txlyre пре 2 година
родитељ
комит
1643e8ec3d
1 измењених фајлова са 235 додато и 234 уклоњено
  1. 235 234
      latinica.py

+ 235 - 234
latinica.py

@@ -1,234 +1,235 @@
-import sys
-
-RULES = [
-  (('т', 'ь', 'с', 'я'), 't\u0341sá'),
-  (('т', 'ь', 'с', 'ю'), 't\u0341sú'),
-  (('т', 'с', 'я'), 'tsá'), 
-  (('т', 'с', 'ю'), 'tsú'),
-
-  (('ы', 'й', 'B'), ('ý', -3)),
-  (('ч', 'ь', 'B'), (-1, -3)),
-  (('ш', 'ь', 'B'), (-1, -3)),
-  (('л', 'ь', 'B'), ('lj', -3)),
-
-  (('л', 'я', 'я'), (-1, -2, 'á')),
-  (('л', 'ю', 'ю'), (-1, -2, 'ú')),
-  (('л', 'я', 'ю'), (-1, -2, 'ú')),
-  (('л', 'ю', 'я'), (-1, -2, 'á')),
-
-  (('н', 'я', 'я'), (-1, -2, 'á')),
-  (('н', 'ю', 'ю'), (-1, -2, 'ú')),
-  (('н', 'я', 'ю'), (-1, -2, 'ú')),
-  (('н', 'ю', 'я'), (-1, -2, 'á')),
-
-  (('S', 'е'), (-1, '\u0341')),
-  (('B', 'е'), (-1, -2, '\u0341')),
-
-  (('я', 'я'), (-1, 'á')),
-  (('ю', 'ю'), (-1, 'ú')),
-  (('я', 'ю'), (-1, 'ú')),
-  (('ю', 'я'), (-1, 'á')),
-
-  (('д', 'ж'), 'đ'),
-  #(('к', 'с'), 'x'),
-  (('ч', 'ь'), (-1, -2)),
-  (('ш', 'ь'), (-1, -2)),
-  (('C', 'ь'), (-1, '\u0341')),
-
-  (('л', 'ё'), (-1, -2)),
-
-  (('н', 'я'), (-1, -2)),
-  (('л', 'я'), (-1, -2)),
-  (('ш', 'я'), (-1, -2)),
-  (('щ', 'я'), (-1, -2)),
-  (('ж', 'я'), (-1, -2)),
-  (('^C', '^я'), (-1, 'ja')),
-  (('C', 'я'), (-1, 'ía')),
-
-  (('н', 'ю'), (-1, -2)),
-  (('л', 'ю'), (-1, -2)),
-  (('ш', 'ю'), (-1, -2)),
-  (('щ', 'ю'), (-1, -2)),
-  (('ж', 'ю'), (-1, -2)),
-  (('^C', '^ю'), (-1, 'ju')),
-  (('C', 'ю'), (-1, 'íu')),
-
-  (('н', 'ё'), (-1, -2)),
-  (('л', 'ё'), (-1, -2)),
-  (('ш', 'ё'), (-1, -2)),
-  (('щ', 'ё'), (-1, -2)),
-  (('ж', 'ё'), (-1, -2)),
-  (('^C', '^ё'), (-1, 'jo')),
-  (('C', 'ё'), (-1, 'ío')),
-
-  (('V', 'е'), (-1, -2, '\u0341')),
-
-  (('а',), 'a'),
-  (('б',), 'b'),
-  (('в',), 'v'),
-  (('г',), 'g'),
-  (('д',), 'd'),
-  (('е',), 'e'),
-  (('^ё',), ('jo',)),
-  (('ё',), 'jo'),
-  (('ж',), 'ž'),
-  (('з',), 'z'),
-  (('и',), 'i'),
-  (('й',), 'j'),
-  (('к',), 'k'),
-  (('л',), 'l'),
-  (('м',), 'm'),
-  (('н',), 'n'),
-  (('о',), 'o'),
-  (('п',), 'p'),
-  (('р',), 'r'),
-  (('с',), 's'),
-  (('т',), 't'),
-  (('у',), 'u'),
-  (('ф',), 'f'),
-  (('х',), 'h'),
-  (('ц',), 'c'),
-  (('ч',), 'č'),
-  (('ш',), 'š'),
-  (('щ',), 'ŝ'),
-  (('ъ',), '\''),
-  (('ы',), 'y'),
-  (('ь',), '\''),
-  (('э',), 'ě'),
-  (('^ю',), ('ju',)),
-  (('^я',), ('ja',)),
-  (('ю',), 'ju'),
-  (('я',), 'ja'),
-
-  ('*', (0,))
-]
-
-VOWELS = 'аеёийоуыэюя'
-CONSONANTS = 'бвгджзклмнпрстфхцчшщьъ'
-
-def is_vowel(letter):
-  return letter in VOWELS
-
-def is_consonant(letter):
-  return letter in CONSONANTS
-
-def is_word_boundary(letter):
-  return letter in (None, ' ', '.', ',', '!', '?', ':', ';', '/', '&', ')', ']', '}', '\n', '\t')
-
-class Converter:
-  def __init__(self, source):
-    self._source = source
-    self._output = ''
-
-    self._position = 0
-    self._start_position = 0
-
-    self._matched_letters = []
-
-  def _look(self, n=0, lower=True):
-    position = self._position + n
-
-    if position >= len(self._source)\
-    or position < 0:
-      return None
-
-    return self._source[position].lower() if lower else self._source[position]
-
-  def _match_letter(self, letter, n=0):
-    letter_ahead = self._look(n)
-
-    self._matched_letters.append(letter_ahead)
-
-    if not letter_ahead or not letter:
-      return letter is letter_ahead
-
-    if letter[0] == '^':
-      letter = letter[1]
-
-      self._matched_letters.pop()
-
-      return self._match_letter(letter, n) and self._look(n, lower=False).isupper()
-        
-    if letter == '*':
-      return True
-
-    if letter == 'V' and is_vowel(letter_ahead):
-      return True
-    elif letter == 'C' and is_consonant(letter_ahead):
-      return True
-    elif letter == 'B' and is_word_boundary(letter_ahead):
-      return True
-
-    if letter == letter_ahead:
-      return True
-
-    return False
-
-  def _match(self, pattern):
-    self._matched_letters = []
-
-    if pattern[0] == 'S':
-      if self._position != 0:
-        return False
-
-      pattern = pattern[1:]
-
-    for n in range(len(pattern)):
-      if not self._match_letter(pattern[n], n):
-        return False
-
-    self._start_position = self._position
-    self._position += len(pattern)
-
-    return True
-
-  def _convert_letter(self, letter):
-    if not letter:
-      return ''
-
-    for pattern, replacement in RULES:
-      if len(pattern) == 1\
-     and pattern[0] == letter:
-        return replacement
-
-    return letter
-
-  def _is_upper(self, n=0):
-    position = self._start_position + n
-
-    if position >= len(self._source)\
-    or position < 0:
-      return False
-
-    return self._source[position].isupper()
-
-  def _insert(self, letters):
-    buffer = ''
-
-    for letter, n in zip(letters, range(len(letters))):
-      if type(letter) is int:
-        if letter < 0:
-          letter = self._convert_letter(self._matched_letters[abs(letter) - 1])
-        else:
-          letter = self._matched_letters[letter]
-     
-      buffer += (letter.capitalize() if letter[0] == 'j' else letter.upper()) if self._is_upper(n) else letter
-
-    self._output += buffer
-
-  def convert(self):
-    while True:
-      for pattern, replacement in RULES:
-        if self._match(pattern): 
-          self._insert(replacement)
-
-          break
-
-      if self._match((None,)):
-        break
-
-    return self._output
-
-if __name__ == '__main__':
-  converter = Converter(sys.stdin.read())
-  sys.stdout.write(converter.convert())
+import sys
+
+RULES = [
+  (('т', 'ь', 'с', 'я'), 't\u0341sá'),
+  (('т', 'ь', 'с', 'ю'), 't\u0341sú'),
+  (('т', 'с', 'я'), 'tsá'), 
+  (('т', 'с', 'ю'), 'tsú'),
+
+  (('ы', 'й', 'B'), ('ý', -3)),
+  (('ч', 'ь', 'B'), (-1, -3)),
+  (('ш', 'ь', 'B'), (-1, -3)),
+  (('л', 'ь', 'B'), ('lj', -3)),
+
+  (('л', 'я', 'я'), (-1, -2, 'á')),
+  (('л', 'ю', 'ю'), (-1, -2, 'ú')),
+  (('л', 'я', 'ю'), (-1, -2, 'ú')),
+  (('л', 'ю', 'я'), (-1, -2, 'á')),
+
+  (('н', 'я', 'я'), (-1, -2, 'á')),
+  (('н', 'ю', 'ю'), (-1, -2, 'ú')),
+  (('н', 'я', 'ю'), (-1, -2, 'ú')),
+  (('н', 'ю', 'я'), (-1, -2, 'á')),
+
+  (('S', 'е'), (-1, '\u0341')),
+  (('B', 'е'), (-1, -2, '\u0341')),
+
+  (('я', 'я'), (-1, 'á')),
+  (('ю', 'ю'), (-1, 'ú')),
+  (('я', 'ю'), (-1, 'ú')),
+  (('ю', 'я'), (-1, 'á')),
+
+  (('д', 'ж'), 'đ'),
+  #(('к', 'с'), 'x'),
+  (('ч', 'ь'), (-1, -2)),
+  (('ш', 'ь'), (-1, -2)),
+  (('щ', 'ь'), (-1, -2)),
+  (('C', 'ь'), (-1, '\u0341')),
+
+  (('л', 'ё'), (-1, -2)),
+
+  (('н', 'я'), (-1, -2)),
+  (('л', 'я'), (-1, -2)),
+  (('ш', 'я'), (-1, -2)),
+  (('щ', 'я'), (-1, -2)),
+  (('ж', 'я'), (-1, -2)),
+  (('^C', '^я'), (-1, 'ja')),
+  (('C', 'я'), (-1, 'ía')),
+
+  (('н', 'ю'), (-1, -2)),
+  (('л', 'ю'), (-1, -2)),
+  (('ш', 'ю'), (-1, -2)),
+  (('щ', 'ю'), (-1, -2)),
+  (('ж', 'ю'), (-1, -2)),
+  (('^C', '^ю'), (-1, 'ju')),
+  (('C', 'ю'), (-1, 'íu')),
+
+  (('н', 'ё'), (-1, -2)),
+  (('л', 'ё'), (-1, -2)),
+  (('ш', 'ё'), (-1, -2)),
+  (('щ', 'ё'), (-1, -2)),
+  (('ж', 'ё'), (-1, -2)),
+  (('^C', '^ё'), (-1, 'jo')),
+  (('C', 'ё'), (-1, 'ío')),
+
+  (('V', 'е'), (-1, -2, '\u0341')),
+
+  (('а',), 'a'),
+  (('б',), 'b'),
+  (('в',), 'v'),
+  (('г',), 'g'),
+  (('д',), 'd'),
+  (('е',), 'e'),
+  (('^ё',), ('jo',)),
+  (('ё',), 'jo'),
+  (('ж',), 'ž'),
+  (('з',), 'z'),
+  (('и',), 'i'),
+  (('й',), 'j'),
+  (('к',), 'k'),
+  (('л',), 'l'),
+  (('м',), 'm'),
+  (('н',), 'n'),
+  (('о',), 'o'),
+  (('п',), 'p'),
+  (('р',), 'r'),
+  (('с',), 's'),
+  (('т',), 't'),
+  (('у',), 'u'),
+  (('ф',), 'f'),
+  (('х',), 'h'),
+  (('ц',), 'c'),
+  (('ч',), 'č'),
+  (('ш',), 'š'),
+  (('щ',), 'ŝ'),
+  (('ъ',), '\''),
+  (('ы',), 'y'),
+  (('ь',), '\''),
+  (('э',), 'ě'),
+  (('^ю',), ('ju',)),
+  (('^я',), ('ja',)),
+  (('ю',), 'ju'),
+  (('я',), 'ja'),
+
+  ('*', (0,))
+]
+
+VOWELS = 'аеёийоуыэюя'
+CONSONANTS = 'бвгджзклмнпрстфхцчшщьъ'
+
+def is_vowel(letter):
+  return letter in VOWELS
+
+def is_consonant(letter):
+  return letter in CONSONANTS
+
+def is_word_boundary(letter):
+  return letter in (None, ' ', '.', ',', '!', '?', ':', ';', '/', '&', ')', ']', '}', '\n', '\t')
+
+class Converter:
+  def __init__(self, source):
+    self._source = source
+    self._output = ''
+
+    self._position = 0
+    self._start_position = 0
+
+    self._matched_letters = []
+
+  def _look(self, n=0, lower=True):
+    position = self._position + n
+
+    if position >= len(self._source)\
+    or position < 0:
+      return None
+
+    return self._source[position].lower() if lower else self._source[position]
+
+  def _match_letter(self, letter, n=0):
+    letter_ahead = self._look(n)
+
+    self._matched_letters.append(letter_ahead)
+
+    if not letter_ahead or not letter:
+      return letter is letter_ahead
+
+    if letter[0] == '^':
+      letter = letter[1]
+
+      self._matched_letters.pop()
+
+      return self._match_letter(letter, n) and self._look(n, lower=False).isupper()
+        
+    if letter == '*':
+      return True
+
+    if letter == 'V' and is_vowel(letter_ahead):
+      return True
+    elif letter == 'C' and is_consonant(letter_ahead):
+      return True
+    elif letter == 'B' and is_word_boundary(letter_ahead):
+      return True
+
+    if letter == letter_ahead:
+      return True
+
+    return False
+
+  def _match(self, pattern):
+    self._matched_letters = []
+
+    if pattern[0] == 'S':
+      if self._position != 0:
+        return False
+
+      pattern = pattern[1:]
+
+    for n in range(len(pattern)):
+      if not self._match_letter(pattern[n], n):
+        return False
+
+    self._start_position = self._position
+    self._position += len(pattern)
+
+    return True
+
+  def _convert_letter(self, letter):
+    if not letter:
+      return ''
+
+    for pattern, replacement in RULES:
+      if len(pattern) == 1\
+     and pattern[0] == letter:
+        return replacement
+
+    return letter
+
+  def _is_upper(self, n=0):
+    position = self._start_position + n
+
+    if position >= len(self._source)\
+    or position < 0:
+      return False
+
+    return self._source[position].isupper()
+
+  def _insert(self, letters):
+    buffer = ''
+
+    for letter, n in zip(letters, range(len(letters))):
+      if type(letter) is int:
+        if letter < 0:
+          letter = self._convert_letter(self._matched_letters[abs(letter) - 1])
+        else:
+          letter = self._matched_letters[letter]
+     
+      buffer += (letter.capitalize() if letter[0] == 'j' else letter.upper()) if self._is_upper(n) else letter
+
+    self._output += buffer
+
+  def convert(self):
+    while True:
+      for pattern, replacement in RULES:
+        if self._match(pattern): 
+          self._insert(replacement)
+
+          break
+
+      if self._match((None,)):
+        break
+
+    return self._output
+
+if __name__ == '__main__':
+  converter = Converter(sys.stdin.read())
+  sys.stdout.write(converter.convert())