123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241 |
- import sys
- RULES = [
- (('т', 'ь', 'с', 'я'), 't\u0341sá'),
- (('т', 'ь', 'с', 'ю'), 't\u0341sú'),
- (('т', 'с', 'я'), 'tsá'),
- (('т', 'с', 'ю'), 'tsú'),
- (('ы', 'й', 'B'), ('ý', -3)),
- (('ч', 'ь', 'B'), (-1, -3)),
- (('ш', 'ь', 'B'), (-1, -3)),
- (('л', 'ь', 'B'), ('lj', -3)),
- (('л', 'я', 'я'), (-1, -2, 'á')),
- (('л', 'ю', 'ю'), (-1, -2, 'ú')),
- (('л', 'я', 'ю'), (-1, -2, 'ú')),
- (('л', 'ю', 'я'), (-1, -2, 'á')),
- (('н', 'я', 'я'), (-1, -2, 'á')),
- (('н', 'ю', 'ю'), (-1, -2, 'ú')),
- (('н', 'я', 'ю'), (-1, -2, 'ú')),
- (('н', 'ю', 'я'), (-1, -2, 'á')),
- (('S', 'е'), (-1, '\u0341')),
- (('B', 'е'), (-1, -2, '\u0341')),
- (('я', 'я'), (-1, 'á')),
- (('ю', 'ю'), (-1, 'ú')),
- (('я', 'ю'), (-1, 'ú')),
- (('ю', 'я'), (-1, 'á')),
- (('д', 'ж'), 'đ'),
- #(('к', 'с'), 'x'),
- (('ч', 'ь'), (-1, -2)),
- (('ш', 'ь'), (-1, -2)),
- (('щ', 'ь'), (-1, -2)),
- (('C', 'ь'), (-1, '\u0341')),
- (('л', 'ё'), (-1, -2)),
- (('н', 'я'), (-1, -2)),
- (('л', 'я'), (-1, -2)),
- (('ш', 'я'), (-1, -2)),
- (('щ', 'я'), (-1, -2)),
- (('ж', 'я'), (-1, -2)),
- (('ч', 'я'), (-1, -2)),
- (('^C', '^я'), (-1, 'ja')),
- (('C', 'я'), (-1, 'ía')),
- (('н', 'ю'), (-1, -2)),
- (('л', 'ю'), (-1, -2)),
- (('ш', 'ю'), (-1, -2)),
- (('щ', 'ю'), (-1, -2)),
- (('ж', 'ю'), (-1, -2)),
- (('ч', 'ю'), (-1, -2)),
- (('^C', '^ю'), (-1, 'ju')),
- (('C', 'ю'), (-1, 'íu')),
- (('н', 'ё'), (-1, -2)),
- (('л', 'ё'), (-1, -2)),
- (('ш', 'ё'), (-1, -2)),
- (('щ', 'ё'), (-1, -2)),
- (('ж', 'ё'), (-1, -2)),
- (('ч', 'ё'), (-1, -2)),
- (('^C', '^ё'), (-1, 'jo')),
- (('C', 'ё'), (-1, 'ío')),
- (('V', 'е'), (-1, -2, '\u0341')),
- (('а',), 'a'),
- (('б',), 'b'),
- (('в',), 'v'),
- (('г',), 'g'),
- (('д',), 'd'),
- (('е',), 'e'),
- (('^ё',), ('jo',)),
- (('ё',), 'jo'),
- (('ж',), 'ž'),
- (('з',), 'z'),
- (('и',), 'i'),
- (('й',), 'j'),
- (('к',), 'k'),
- (('л',), 'l'),
- (('м',), 'm'),
- (('н',), 'n'),
- (('о',), 'o'),
- (('п',), 'p'),
- (('р',), 'r'),
- (('с',), 's'),
- (('т',), 't'),
- (('у',), 'u'),
- (('ф',), 'f'),
- (('х',), 'h'),
- (('ц',), 'c'),
- (('ч',), 'č'),
- (('ш',), 'š'),
- (('щ',), 'ŝ'),
- (('ъ',), '\''),
- (('ы',), 'y'),
- (('ь',), '\''),
- (('э',), 'ě'),
- (('^ю',), ('ju',)),
- (('^я',), ('ja',)),
- (('ю',), 'ju'),
- (('я',), 'ja'),
- ('*', (0,))
- ]
- VOWELS = 'аеёийоуыэюя'
- CONSONANTS = 'бвгджзклмнпрстфхцчшщьъ'
- def is_vowel(letter):
- return letter in VOWELS
- def is_consonant(letter):
- return letter in CONSONANTS
- def is_word_boundary(letter):
- return letter in (None, ' ', '.', ',', '!', '?', ':', ';', '/', '&', ')', ']', '}', '\n', '\t')
- class Converter:
- def __init__(self, source):
- self._source = source
- self._output = ''
- self._position = 0
- self._start_position = 0
- self._matched_letters = []
- def _look(self, n=0, lower=True):
- position = self._position + n
- if position >= len(self._source)\
- or position < 0:
- return None
- return self._source[position].lower() if lower else self._source[position]
- def _match_letter(self, letter, n=0):
- letter_ahead = self._look(n)
- self._matched_letters.append(letter_ahead)
- if not letter_ahead or not letter:
- if letter == 'B':
- return True
- return letter is letter_ahead
- if letter[0] == '^':
- letter = letter[1]
- self._matched_letters.pop()
- return self._match_letter(letter, n) and self._look(n, lower=False).isupper()
-
- if letter == '*':
- return True
- if letter == 'V' and is_vowel(letter_ahead):
- return True
- elif letter == 'C' and is_consonant(letter_ahead):
- return True
- elif letter == 'B' and is_word_boundary(letter_ahead):
- return True
- if letter == letter_ahead:
- return True
- return False
- def _match(self, pattern):
- self._matched_letters = []
- if pattern[0] == 'S':
- if self._position != 0:
- return False
- pattern = pattern[1:]
- for n in range(len(pattern)):
- if not self._match_letter(pattern[n], n):
- return False
- self._start_position = self._position
- self._position += len(pattern)
- return True
- def _convert_letter(self, letter):
- if not letter:
- return ''
- for pattern, replacement in RULES:
- if len(pattern) == 1\
- and pattern[0] == letter:
- return replacement
- return letter
- def _is_upper(self, n=0):
- position = self._start_position + n
- if position >= len(self._source)\
- or position < 0:
- return False
- return self._source[position].isupper()
- def _insert(self, letters):
- buffer = ''
- for letter, n in zip(letters, range(len(letters))):
- if type(letter) is int:
- if letter < 0:
- letter = self._convert_letter(self._matched_letters[abs(letter) - 1])
- else:
- letter = self._matched_letters[letter]
-
- buffer += (letter.capitalize() if letter and letter[0] == 'j' else letter.upper()) if self._is_upper(n) else letter
- self._output += buffer
- def convert(self):
- while True:
- for pattern, replacement in RULES:
- if self._match(pattern):
- self._insert(replacement)
- break
- if self._match((None,)):
- break
- return self._output
- if __name__ == '__main__':
- converter = Converter(sys.stdin.read())
- sys.stdout.write(converter.convert())
|