latinica.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. import sys
  2. RULES = [
  3. (('т', 'ь', 'с', 'я'), 't\u0341sá'),
  4. (('т', 'ь', 'с', 'ю'), 't\u0341sú'),
  5. (('т', 'с', 'я'), 'tsá'),
  6. (('т', 'с', 'ю'), 'tsú'),
  7. (('ы', 'й', 'B'), ('ý', -3)),
  8. (('ч', 'ь', 'B'), (-1, -3)),
  9. (('ш', 'ь', 'B'), (-1, -3)),
  10. (('л', 'ь', 'B'), ('lj', -3)),
  11. (('л', 'я', 'я'), (-1, -2, 'á')),
  12. (('л', 'ю', 'ю'), (-1, -2, 'ú')),
  13. (('л', 'я', 'ю'), (-1, -2, 'ú')),
  14. (('л', 'ю', 'я'), (-1, -2, 'á')),
  15. (('н', 'я', 'я'), (-1, -2, 'á')),
  16. (('н', 'ю', 'ю'), (-1, -2, 'ú')),
  17. (('н', 'я', 'ю'), (-1, -2, 'ú')),
  18. (('н', 'ю', 'я'), (-1, -2, 'á')),
  19. (('S', 'е'), (-1, '\u0341')),
  20. (('B', 'е'), (-1, -2, '\u0341')),
  21. (('я', 'я'), (-1, 'á')),
  22. (('ю', 'ю'), (-1, 'ú')),
  23. (('я', 'ю'), (-1, 'ú')),
  24. (('ю', 'я'), (-1, 'á')),
  25. (('д', 'ж'), 'đ'),
  26. #(('к', 'с'), 'x'),
  27. (('ч', 'ь'), (-1, -2)),
  28. (('ш', 'ь'), (-1, -2)),
  29. (('щ', 'ь'), (-1, -2)),
  30. (('C', 'ь'), (-1, '\u0341')),
  31. (('л', 'ё'), (-1, -2)),
  32. (('н', 'я'), (-1, -2)),
  33. (('л', 'я'), (-1, -2)),
  34. (('ш', 'я'), (-1, -2)),
  35. (('щ', 'я'), (-1, -2)),
  36. (('ж', 'я'), (-1, -2)),
  37. (('ч', 'я'), (-1, -2)),
  38. (('^C', '^я'), (-1, 'ja')),
  39. (('C', 'я'), (-1, 'ía')),
  40. (('н', 'ю'), (-1, -2)),
  41. (('л', 'ю'), (-1, -2)),
  42. (('ш', 'ю'), (-1, -2)),
  43. (('щ', 'ю'), (-1, -2)),
  44. (('ж', 'ю'), (-1, -2)),
  45. (('ч', 'ю'), (-1, -2)),
  46. (('^C', '^ю'), (-1, 'ju')),
  47. (('C', 'ю'), (-1, 'íu')),
  48. (('н', 'ё'), (-1, -2)),
  49. (('л', 'ё'), (-1, -2)),
  50. (('ш', 'ё'), (-1, -2)),
  51. (('щ', 'ё'), (-1, -2)),
  52. (('ж', 'ё'), (-1, -2)),
  53. (('ч', 'ё'), (-1, -2)),
  54. (('^C', '^ё'), (-1, 'jo')),
  55. (('C', 'ё'), (-1, 'ío')),
  56. (('V', 'е'), (-1, -2, '\u0341')),
  57. (('а',), 'a'),
  58. (('б',), 'b'),
  59. (('в',), 'v'),
  60. (('г',), 'g'),
  61. (('д',), 'd'),
  62. (('е',), 'e'),
  63. (('^ё',), ('jo',)),
  64. (('ё',), 'jo'),
  65. (('ж',), 'ž'),
  66. (('з',), 'z'),
  67. (('и',), 'i'),
  68. (('й',), 'j'),
  69. (('к',), 'k'),
  70. (('л',), 'l'),
  71. (('м',), 'm'),
  72. (('н',), 'n'),
  73. (('о',), 'o'),
  74. (('п',), 'p'),
  75. (('р',), 'r'),
  76. (('с',), 's'),
  77. (('т',), 't'),
  78. (('у',), 'u'),
  79. (('ф',), 'f'),
  80. (('х',), 'h'),
  81. (('ц',), 'c'),
  82. (('ч',), 'č'),
  83. (('ш',), 'š'),
  84. (('щ',), 'ŝ'),
  85. (('ъ',), '\''),
  86. (('ы',), 'y'),
  87. (('ь',), '\''),
  88. (('э',), 'ě'),
  89. (('^ю',), ('ju',)),
  90. (('^я',), ('ja',)),
  91. (('ю',), 'ju'),
  92. (('я',), 'ja'),
  93. ('*', (0,))
  94. ]
  95. VOWELS = 'аеёийоуыэюя'
  96. CONSONANTS = 'бвгджзклмнпрстфхцчшщьъ'
  97. def is_vowel(letter):
  98. return letter in VOWELS
  99. def is_consonant(letter):
  100. return letter in CONSONANTS
  101. def is_word_boundary(letter):
  102. return letter in (None, ' ', '.', ',', '!', '?', ':', ';', '/', '&', ')', ']', '}', '\n', '\t')
  103. class Converter:
  104. def __init__(self, source):
  105. self._source = source
  106. self._output = ''
  107. self._position = 0
  108. self._start_position = 0
  109. self._matched_letters = []
  110. def _look(self, n=0, lower=True):
  111. position = self._position + n
  112. if position >= len(self._source)\
  113. or position < 0:
  114. return None
  115. return self._source[position].lower() if lower else self._source[position]
  116. def _match_letter(self, letter, n=0):
  117. letter_ahead = self._look(n)
  118. self._matched_letters.append(letter_ahead)
  119. if not letter_ahead or not letter:
  120. if letter == 'B':
  121. return True
  122. return letter is letter_ahead
  123. if letter[0] == '^':
  124. letter = letter[1]
  125. self._matched_letters.pop()
  126. return self._match_letter(letter, n) and self._look(n, lower=False).isupper()
  127. if letter == '*':
  128. return True
  129. if letter == 'V' and is_vowel(letter_ahead):
  130. return True
  131. elif letter == 'C' and is_consonant(letter_ahead):
  132. return True
  133. elif letter == 'B' and is_word_boundary(letter_ahead):
  134. return True
  135. if letter == letter_ahead:
  136. return True
  137. return False
  138. def _match(self, pattern):
  139. self._matched_letters = []
  140. if pattern[0] == 'S':
  141. if self._position != 0:
  142. return False
  143. pattern = pattern[1:]
  144. for n in range(len(pattern)):
  145. if not self._match_letter(pattern[n], n):
  146. return False
  147. self._start_position = self._position
  148. self._position += len(pattern)
  149. return True
  150. def _convert_letter(self, letter):
  151. if not letter:
  152. return ''
  153. for pattern, replacement in RULES:
  154. if len(pattern) == 1\
  155. and pattern[0] == letter:
  156. return replacement
  157. return letter
  158. def _is_upper(self, n=0):
  159. position = self._start_position + n
  160. if position >= len(self._source)\
  161. or position < 0:
  162. return False
  163. return self._source[position].isupper()
  164. def _insert(self, letters):
  165. buffer = ''
  166. for letter, n in zip(letters, range(len(letters))):
  167. if type(letter) is int:
  168. if letter < 0:
  169. letter = self._convert_letter(self._matched_letters[abs(letter) - 1])
  170. else:
  171. letter = self._matched_letters[letter]
  172. buffer += (letter.capitalize() if letter and letter[0] == 'j' else letter.upper()) if self._is_upper(n) else letter
  173. self._output += buffer
  174. def convert(self):
  175. while True:
  176. for pattern, replacement in RULES:
  177. if self._match(pattern):
  178. self._insert(replacement)
  179. break
  180. if self._match((None,)):
  181. break
  182. return self._output
  183. if __name__ == '__main__':
  184. converter = Converter(sys.stdin.read())
  185. sys.stdout.write(converter.convert())