markov.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. import os.path
  2. import atexit
  3. import ujson
  4. import markovify
  5. from config import config
  6. class Markov:
  7. def __init__(self):
  8. self.counter = 0
  9. self.corpus = []
  10. self.chain = None
  11. self.load()
  12. atexit.register(self.save)
  13. @property
  14. def is_ready(self):
  15. return self.chain is not None
  16. def generate(self):
  17. words = self.chain.walk()
  18. if not words:
  19. return self.generate()
  20. return " ".join(words)
  21. def rebuild(self):
  22. self.chain = markovify.Chain(self.corpus, config.MARKOV_STATE_SIZE).compile()
  23. def extend_corpus(self, text):
  24. text = text.strip()
  25. if not text:
  26. return
  27. text = text.replace("\n", " ")
  28. text = text.split(" ")
  29. text = map(lambda word: word.strip(), text)
  30. text = filter(bool, text)
  31. text = list(text)
  32. self.corpus.insert(0, text)
  33. if len(self.corpus) > config.MARKOV_CORPUS_SIZE:
  34. self.corpus = self.corpus[: config.MARKOV_CORPUS_SIZE]
  35. self.counter += 1
  36. if self.counter % config.MARKOV_REBUILD_RATE == 0:
  37. self.counter = 0
  38. self.rebuild()
  39. def load(self):
  40. if os.path.isfile(config.MARKOV_CHAIN_PATH):
  41. with open(config.MARKOV_CHAIN_PATH, "r") as f:
  42. self.chain = markovify.Chain.from_json(f.read())
  43. if os.path.isfile(config.MARKOV_CORPUS_PATH):
  44. with open(config.MARKOV_CORPUS_PATH, "r") as f:
  45. self.corpus = ujson.load(f)
  46. def save(self):
  47. if self.chain:
  48. with open(config.MARKOV_CHAIN_PATH, "w") as f:
  49. f.write(self.chain.to_json())
  50. with open(config.MARKOV_CORPUS_PATH, "w") as f:
  51. ujson.dump(self.corpus, f)