markov.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. import os.path
  2. import re
  3. import atexit
  4. import ujson
  5. import markovify
  6. from config import config
  7. class Markov:
  8. def __init__(self):
  9. self.counter = 0
  10. self.corpus = []
  11. self.chain = None
  12. self.load()
  13. atexit.register(self.save)
  14. @property
  15. def is_ready(self):
  16. return self.chain is not None
  17. def generate(self):
  18. words = self.chain.walk()
  19. if not words:
  20. return self.generate()
  21. text = " ".join(words)
  22. text = re.sub(r"(?:^| )?((\!\?\?)|(\!\?)|(\?\!\!)|(\?\?\!)|(\?\!)|(\?\.\.)|(\.{2,})|(\!{2,})|(\?{2,})|([.?!,:;\(\)\"'\$\+\-–—…]))(?: |$)", r"\1 ", text)
  23. text = text.strip()
  24. return text
  25. def rebuild(self):
  26. self.chain = markovify.Chain(self.corpus, config.MARKOV_STATE_SIZE).compile()
  27. self.counter = 0
  28. def extend_corpus(self, text):
  29. text = text.strip()
  30. if not text:
  31. return
  32. if "\n" in text:
  33. for line in text.split("\n"):
  34. self.extend_corpus(line)
  35. return
  36. text = re.sub(r"(@[A-Za-z0-9_]+,?)", "", text)
  37. text = re.sub("https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)", "", text)
  38. text = re.sub(r"((\!\?\?)|(\!\?)|(\?\!\!)|(\?\?\!)|(\?\!)|(\?\.\.)|(\.{2,})|(\!{2,})|(\?{2,})|[.?!,:;\(\)\"'\$\+\-–—…])", r" \1 ", text)
  39. text = text.split(" ")
  40. text = map(lambda word: word.strip(), text)
  41. text = filter(bool, text)
  42. text = list(text)
  43. if text not in self.corpus:
  44. self.corpus.insert(0, text)
  45. if len(self.corpus) > config.MARKOV_CORPUS_SIZE:
  46. self.corpus.pop(-1)
  47. self.counter += 1
  48. if self.counter % config.MARKOV_REBUILD_RATE == 0:
  49. self.rebuild()
  50. def load(self):
  51. if os.path.isfile(config.MARKOV_CHAIN_PATH):
  52. with open(config.MARKOV_CHAIN_PATH, "r") as f:
  53. self.chain = markovify.Chain.from_json(f.read())
  54. if os.path.isfile(config.MARKOV_CORPUS_PATH):
  55. with open(config.MARKOV_CORPUS_PATH, "r") as f:
  56. self.corpus = ujson.load(f)
  57. def save(self):
  58. if self.chain:
  59. with open(config.MARKOV_CHAIN_PATH, "w") as f:
  60. f.write(self.chain.to_json())
  61. with open(config.MARKOV_CORPUS_PATH, "w") as f:
  62. ujson.dump(self.corpus, f)