|
@@ -29,7 +29,7 @@ class Markov:
|
|
return self.generate()
|
|
return self.generate()
|
|
|
|
|
|
text = " ".join(words)
|
|
text = " ".join(words)
|
|
- text = re.sub(r"(?:^| )?((\?\.\.)|(\.{2,})|(\!{2,})|(\?{2,})|([.?!,:;\(\)\"'\$\+\-–—…]))(?: |$)", r"\1 ", text)
|
|
|
|
|
|
+ text = re.sub(r"(?:^| )?((\!\?\?)|(\!\?)|(\?\!\!)|(\?\?\!)|(\?\!)|(\?\.\.)|(\.{2,})|(\!{2,})|(\?{2,})|([.?!,:;\(\)\"'\$\+\-–—…]))(?: |$)", r"\1 ", text)
|
|
text = text.strip()
|
|
text = text.strip()
|
|
|
|
|
|
return text
|
|
return text
|
|
@@ -44,10 +44,15 @@ class Markov:
|
|
if not text:
|
|
if not text:
|
|
return
|
|
return
|
|
|
|
|
|
- text = text.replace("\n", " ")
|
|
|
|
|
|
+ if "\n" in text:
|
|
|
|
+ for line in text.split("\n"):
|
|
|
|
+ self.extend_corpus(line)
|
|
|
|
+
|
|
|
|
+ return
|
|
|
|
+
|
|
text = re.sub(r"(@[a-z0-9_]+,?)", "", text)
|
|
text = re.sub(r"(@[a-z0-9_]+,?)", "", text)
|
|
text = re.sub("https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)", "", text)
|
|
text = re.sub("https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)", "", text)
|
|
- text = re.sub(r"((\?\.\.)|(\.{2,})|(\!{2,})|(\?{2,})|[.?!,:;\(\)\"'\$\+\-–—…])", r" \1 ", text)
|
|
|
|
|
|
+ text = re.sub(r"((\!\?\?)|(\!\?)|(\?\!\!)|(\?\?\!)|(\?\!)|(\?\.\.)|(\.{2,})|(\!{2,})|(\?{2,})|[.?!,:;\(\)\"'\$\+\-–—…])", r" \1 ", text)
|
|
text = text.split(" ")
|
|
text = text.split(" ")
|
|
text = map(lambda word: word.strip(), text)
|
|
text = map(lambda word: word.strip(), text)
|
|
text = filter(bool, text)
|
|
text = filter(bool, text)
|
|
@@ -57,7 +62,7 @@ class Markov:
|
|
self.corpus.insert(0, text)
|
|
self.corpus.insert(0, text)
|
|
|
|
|
|
if len(self.corpus) > config.MARKOV_CORPUS_SIZE:
|
|
if len(self.corpus) > config.MARKOV_CORPUS_SIZE:
|
|
- self.corpus = self.corpus[: config.MARKOV_CORPUS_SIZE]
|
|
|
|
|
|
+ self.corpus.pop(-1)
|
|
|
|
|
|
self.counter += 1
|
|
self.counter += 1
|
|
|
|
|