txlyre 6 months ago
parent
commit
44652d8b90
1 changed files with 3 additions and 0 deletions
  1. 3 0
      markov.py

+ 3 - 0
markov.py

@@ -1,4 +1,5 @@
 import os.path
+import re
 import atexit
 
 import ujson
@@ -40,6 +41,8 @@ class Markov:
             return
 
         text = text.replace("\n", " ")
+        text = re.sub(r"(@[a-z0-9_]+,?)", "", text)
+        text = re.sub(r"((\.{2,})|(\!{2,})|(\?{2,})|[.?!,:;\(\)\"'\$\+\-–—])", r" \1 ", text)
         text = text.split(" ")
         text = map(lambda word: word.strip(), text)
         text = filter(bool, text)