txlyre 2 months ago
parent
commit
8571c161a8
1 changed files with 9 additions and 4 deletions
  1. 9 4
      markov.py

+ 9 - 4
markov.py

@@ -29,7 +29,7 @@ class Markov:
             return self.generate()
 
         text = " ".join(words)
-        text = re.sub(r"(?:^| )?((\?\.\.)|(\.{2,})|(\!{2,})|(\?{2,})|([.?!,:;\(\)\"'\$\+\-–—…]))(?: |$)", r"\1 ", text)
+        text = re.sub(r"(?:^| )?((\!\?\?)|(\!\?)|(\?\!\!)|(\?\?\!)|(\?\!)|(\?\.\.)|(\.{2,})|(\!{2,})|(\?{2,})|([.?!,:;\(\)\"'\$\+\-–—…]))(?: |$)", r"\1 ", text)
         text = text.strip()
 
         return text
@@ -44,10 +44,15 @@ class Markov:
         if not text:
             return
 
-        text = text.replace("\n", " ")
+        if "\n" in text:
+          for line in text.split("\n"):
+            self.extend_corpus(line)
+
+          return
+
         text = re.sub(r"(@[a-z0-9_]+,?)", "", text)
         text = re.sub("https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)", "", text)
-        text = re.sub(r"((\?\.\.)|(\.{2,})|(\!{2,})|(\?{2,})|[.?!,:;\(\)\"'\$\+\-–—…])", r" \1 ", text)
+        text = re.sub(r"((\!\?\?)|(\!\?)|(\?\!\!)|(\?\?\!)|(\?\!)|(\?\.\.)|(\.{2,})|(\!{2,})|(\?{2,})|[.?!,:;\(\)\"'\$\+\-–—…])", r" \1 ", text)
         text = text.split(" ")
         text = map(lambda word: word.strip(), text)
         text = filter(bool, text)
@@ -57,7 +62,7 @@ class Markov:
             self.corpus.insert(0, text)
 
         if len(self.corpus) > config.MARKOV_CORPUS_SIZE:
-            self.corpus = self.corpus[: config.MARKOV_CORPUS_SIZE]
+            self.corpus.pop(-1)
 
         self.counter += 1