txlyre 2 년 전
커밋
c941809f40
2개의 변경된 파일275개의 추가작업 그리고 0개의 파일을 삭제
  1. 41 0
      bot.py
  2. 234 0
      latinica.py

+ 41 - 0
bot.py

@@ -0,0 +1,41 @@
+import os
+
+import telebot
+
+from latinica import Converter
+
+bot = telebot.TeleBot(os.getenv('TELEGRAM_API_TOKEN'), parse_mode='MARKDOWN')
+
[email protected]_handler(commands=['start', 'help'])
+def handle_start(message):
+  bot.reply_to(message, 'Danný bot sozdan dlja togo, čtoby perevodit́ tekst na russkom jazyke s kirillicy na latinicu.\nPrimer ispoĺzovanija:\n/cy2la Привет, мир!')
+
[email protected]_handler(commands=['cy2la'])
+def handle_cy2la(message):
+  text = ' '.join(message.text.split(' ')[1:]).strip()
+
+  if len(text) < 1:
+    bot.reply_to(message, 'Tekst soobŝenija ne dolžen byt́ pustym.')
+
+    return
+
+  converter = Converter(text)
+  result = converter.convert()
+  result = result.replace('```', '`').strip()
+
+  bot.reply_to(message, f'Rezultat:\n```\n{result}```')
+
[email protected]_handler(lambda _: True)
+def query_text(inline_query):
+  text = inline_query.query.strip()
+
+  converter = Converter(text)
+  result = converter.convert()
+
+  try:
+    r = telebot.types.InlineQueryResultArticle('1', result, telebot.types.InputTextMessageContent(result))
+    bot.answer_inline_query(inline_query.id, [r])
+  except:
+    pass
+
+bot.infinity_polling()

+ 234 - 0
latinica.py

@@ -0,0 +1,234 @@
+import sys
+
+RULES = [
+  (('т', 'ь', 'с', 'я'), 't\u0341sá'),
+  (('т', 'ь', 'с', 'ю'), 't\u0341sú'),
+  (('т', 'с', 'я'), 'tsá'), 
+  (('т', 'с', 'ю'), 'tsú'),
+
+  (('ы', 'й', 'B'), ('ý', -3)),
+  (('ч', 'ь', 'B'), (-1, -3)),
+  (('ш', 'ь', 'B'), (-1, -3)),
+  (('л', 'ь', 'B'), ('lj', -3)),
+
+  (('л', 'я', 'я'), (-1, -2, 'á')),
+  (('л', 'ю', 'ю'), (-1, -2, 'ú')),
+  (('л', 'я', 'ю'), (-1, -2, 'ú')),
+  (('л', 'ю', 'я'), (-1, -2, 'á')),
+
+  (('н', 'я', 'я'), (-1, -2, 'á')),
+  (('н', 'ю', 'ю'), (-1, -2, 'ú')),
+  (('н', 'я', 'ю'), (-1, -2, 'ú')),
+  (('н', 'ю', 'я'), (-1, -2, 'á')),
+
+  (('S', 'е'), (-1, '\u0341')),
+  (('B', 'е'), (-1, -2, '\u0341')),
+
+  (('я', 'я'), (-1, 'á')),
+  (('ю', 'ю'), (-1, 'ú')),
+  (('я', 'ю'), (-1, 'ú')),
+  (('ю', 'я'), (-1, 'á')),
+
+  (('д', 'ж'), 'đ'),
+  #(('к', 'с'), 'x'),
+  (('ч', 'ь'), (-1, -2)),
+  (('ш', 'ь'), (-1, -2)),
+  (('C', 'ь'), (-1, '\u0341')),
+
+  (('л', 'ё'), (-1, -2)),
+
+  (('н', 'я'), (-1, -2)),
+  (('л', 'я'), (-1, -2)),
+  (('ш', 'я'), (-1, -2)),
+  (('щ', 'я'), (-1, -2)),
+  (('ж', 'я'), (-1, -2)),
+  (('^C', '^я'), (-1, 'ja')),
+  (('C', 'я'), (-1, 'ía')),
+
+  (('н', 'ю'), (-1, -2)),
+  (('л', 'ю'), (-1, -2)),
+  (('ш', 'ю'), (-1, -2)),
+  (('щ', 'ю'), (-1, -2)),
+  (('ж', 'ю'), (-1, -2)),
+  (('^C', '^ю'), (-1, 'ju')),
+  (('C', 'ю'), (-1, 'íu')),
+
+  (('н', 'ё'), (-1, -2)),
+  (('л', 'ё'), (-1, -2)),
+  (('ш', 'ё'), (-1, -2)),
+  (('щ', 'ё'), (-1, -2)),
+  (('ж', 'ё'), (-1, -2)),
+  (('^C', '^ё'), (-1, 'jo')),
+  (('C', 'ё'), (-1, 'ío')),
+
+  (('V', 'е'), (-1, -2, '\u0341')),
+
+  (('а',), 'a'),
+  (('б',), 'b'),
+  (('в',), 'v'),
+  (('г',), 'g'),
+  (('д',), 'd'),
+  (('е',), 'e'),
+  (('^ё',), ('jo',)),
+  (('ё',), 'jo'),
+  (('ж',), 'ž'),
+  (('з',), 'z'),
+  (('и',), 'i'),
+  (('й',), 'j'),
+  (('к',), 'k'),
+  (('л',), 'l'),
+  (('м',), 'm'),
+  (('н',), 'n'),
+  (('о',), 'o'),
+  (('п',), 'p'),
+  (('р',), 'r'),
+  (('с',), 's'),
+  (('т',), 't'),
+  (('у',), 'u'),
+  (('ф',), 'f'),
+  (('х',), 'h'),
+  (('ц',), 'c'),
+  (('ч',), 'č'),
+  (('ш',), 'š'),
+  (('щ',), 'ŝ'),
+  (('ъ',), '\''),
+  (('ы',), 'y'),
+  (('ь',), '\''),
+  (('э',), 'ě'),
+  (('^ю',), ('ju',)),
+  (('^я',), ('ja',)),
+  (('ю',), 'ju'),
+  (('я',), 'ja'),
+
+  ('*', (0,))
+]
+
+VOWELS = 'аеёийоуыэюя'
+CONSONANTS = 'бвгджзклмнпрстфхцчшщьъ'
+
+def is_vowel(letter):
+  return letter in VOWELS
+
+def is_consonant(letter):
+  return letter in CONSONANTS
+
+def is_word_boundary(letter):
+  return letter in (None, ' ', '.', ',', '!', '?', ':', ';', '/', '&', ')', ']', '}', '\n', '\t')
+
+class Converter:
+  def __init__(self, source):
+    self._source = source
+    self._output = ''
+
+    self._position = 0
+    self._start_position = 0
+
+    self._matched_letters = []
+
+  def _look(self, n=0, lower=True):
+    position = self._position + n
+
+    if position >= len(self._source)\
+    or position < 0:
+      return None
+
+    return self._source[position].lower() if lower else self._source[position]
+
+  def _match_letter(self, letter, n=0):
+    letter_ahead = self._look(n)
+
+    self._matched_letters.append(letter_ahead)
+
+    if not letter_ahead or not letter:
+      return letter is letter_ahead
+
+    if letter[0] == '^':
+      letter = letter[1]
+
+      self._matched_letters.pop()
+
+      return self._match_letter(letter, n) and self._look(n, lower=False).isupper()
+        
+    if letter == '*':
+      return True
+
+    if letter == 'V' and is_vowel(letter_ahead):
+      return True
+    elif letter == 'C' and is_consonant(letter_ahead):
+      return True
+    elif letter == 'B' and is_word_boundary(letter_ahead):
+      return True
+
+    if letter == letter_ahead:
+      return True
+
+    return False
+
+  def _match(self, pattern):
+    self._matched_letters = []
+
+    if pattern[0] == 'S':
+      if self._position != 0:
+        return False
+
+      pattern = pattern[1:]
+
+    for n in range(len(pattern)):
+      if not self._match_letter(pattern[n], n):
+        return False
+
+    self._start_position = self._position
+    self._position += len(pattern)
+
+    return True
+
+  def _convert_letter(self, letter):
+    if not letter:
+      return ''
+
+    for pattern, replacement in RULES:
+      if len(pattern) == 1\
+     and pattern[0] == letter:
+        return replacement
+
+    return letter
+
+  def _is_upper(self, n=0):
+    position = self._start_position + n
+
+    if position >= len(self._source)\
+    or position < 0:
+      return False
+
+    return self._source[position].isupper()
+
+  def _insert(self, letters):
+    buffer = ''
+
+    for letter, n in zip(letters, range(len(letters))):
+      if type(letter) is int:
+        if letter < 0:
+          letter = self._convert_letter(self._matched_letters[abs(letter) - 1])
+        else:
+          letter = self._matched_letters[letter]
+     
+      buffer += (letter.capitalize() if letter[0] == 'j' else letter.upper()) if self._is_upper(n) else letter
+
+    self._output += buffer
+
+  def convert(self):
+    while True:
+      for pattern, replacement in RULES:
+        if self._match(pattern): 
+          self._insert(replacement)
+
+          break
+
+      if self._match((None,)):
+        break
+
+    return self._output
+
+if __name__ == '__main__':
+  converter = Converter(sys.stdin.read())
+  sys.stdout.write(converter.convert())