|
@@ -1,4 +1,5 @@
|
|
|
import os.path
|
|
|
+import re
|
|
|
import atexit
|
|
|
|
|
|
import ujson
|
|
@@ -40,6 +41,8 @@ class Markov:
|
|
|
return
|
|
|
|
|
|
text = text.replace("\n", " ")
|
|
|
+ text = re.sub(r"(@[a-z0-9_]+,?)", "", text)
|
|
|
+ text = re.sub(r"((\.{2,})|(\!{2,})|(\?{2,})|[.?!,:;\(\)\"'\$\+\-–—])", r" \1 ", text)
|
|
|
text = text.split(" ")
|
|
|
text = map(lambda word: word.strip(), text)
|
|
|
text = filter(bool, text)
|