txlyre 6 月之前
父节点
当前提交
31b295212b
共有 1 个文件被更改,包括 3 次插入2 次删除
  1. 3 2
      markov.py

+ 3 - 2
markov.py

@@ -29,7 +29,7 @@ class Markov:
             return self.generate()
 
         text = " ".join(words)
-        text = re.sub(r"(?:^| )?((?\.\.)|(\.{2,})|(\!{2,})|(\?{2,})|([.?!,:;\(\)\"'\$\+\-–—…]))(?: |$)", r"\1 ", text)
+        text = re.sub(r"(?:^| )?((\?\.\.)|(\.{2,})|(\!{2,})|(\?{2,})|([.?!,:;\(\)\"'\$\+\-–—…]))(?: |$)", r"\1 ", text)
         text = text.strip()
 
         return text
@@ -46,7 +46,8 @@ class Markov:
 
         text = text.replace("\n", " ")
         text = re.sub(r"(@[a-z0-9_]+,?)", "", text)
-        text = re.sub(r"((?\.\.)|(\.{2,})|(\!{2,})|(\?{2,})|[.?!,:;\(\)\"'\$\+\-–—…])", r" \1 ", text)
+        text = re.sub("https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)", "", text)
+        text = re.sub(r"((\?\.\.)|(\.{2,})|(\!{2,})|(\?{2,})|[.?!,:;\(\)\"'\$\+\-–—…])", r" \1 ", text)
         text = text.split(" ")
         text = map(lambda word: word.strip(), text)
         text = filter(bool, text)