wma.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. #!/usr/bin/python
  2. import sys
  3. import struct
  4. import lark
  5. GRAMMAR = r"""
  6. start: _NL? command ((_NL+|";") command)* _NL?
  7. command: LABEL? operation
  8. | LABEL (arg|mixed)
  9. | LABEL
  10. | INCLUDE
  11. ?operation: "nop" -> nop
  12. | ("mvj"|"mj") arg arg arg -> mj
  13. | "sblez" arg arg arg -> sjlez
  14. | "ablez" arg arg arg -> ajlez
  15. | "sblz" arg arg arg -> sjlz
  16. | "bles" arg arg arg -> jles
  17. | ("nbnz"|"tjt") arg arg -> tjt
  18. | ("dbnz"|"djt") arg arg -> djt
  19. | "sslez" arg arg -> sslez
  20. | "aslez" arg arg -> aslez
  21. | ("ibnc"|"ije") arg arg arg -> ije
  22. | "vblz" arg arg arg -> djlz
  23. | "xblz" arg arg arg -> xjlz
  24. | "dslz" arg -> dslz
  25. | ("ssgt"|"ssl") arg arg -> ssl
  26. | "mbnz" arg arg arg -> mbnz
  27. | "modbz" arg arg arg -> modbz
  28. | ("aja"|"aj") arg arg -> aj
  29. | "la" arg arg -> la
  30. | "ld" arg arg -> ld
  31. | "ia" arg -> ia
  32. | "jmc" arg -> jmc
  33. | ("jw"|"ja") arg -> ja
  34. | ("push"|"psh") arg -> psh
  35. | "pd" arg -> pd
  36. | "pop" arg -> pop
  37. | "shlbnz" arg arg arg -> shlbnz
  38. | "shrbnz" arg arg arg -> shrbnz
  39. | "nbz" arg arg -> nbz
  40. | "anz" arg arg arg -> anz
  41. | "abgz" arg arg arg -> abgz
  42. | "swp" arg arg -> swp
  43. | "add" arg arg -> h_add
  44. | "sub" arg arg -> h_sub
  45. | "inc" arg -> h_inc
  46. | "dec" arg -> h_dec
  47. | ("mov"|"mv") arg arg -> h_mov
  48. | ("jmp"|"j") arg -> h_jmp
  49. | "hlt" -> h_halt
  50. | "out" arg -> h_out
  51. | "in" arg -> h_in
  52. mixed: arg arg+
  53. ?arg: INTEGER
  54. | CHAR
  55. | CHARS
  56. | QMARK
  57. | LABELOFFSET
  58. | OFFSET
  59. | NAME
  60. | rep
  61. rep: arg "*" COUNT
  62. COUNT: /[0-9]+/
  63. INTEGER: /-?[0-9]+/
  64. CHAR: "'" /./ "'"
  65. CHARS: "\"" /[^"]*/ "\""
  66. QMARK: "?"
  67. OFFSET: "$" /(-|\+)[0-9]+/
  68. LABELOFFSET: "$" /(-|\+)[A-Za-z][a-zA-Z0-9_]*/
  69. LABEL: /[A-Za-z][a-zA-Z0-9_]*:/
  70. NAME: /[A-Za-z][a-zA-Z0-9_]*/
  71. INCLUDE: "+" /.+/
  72. _NL: /\n+/
  73. IG: /[ \t\r]+/
  74. COM: /#.*[^\n]/
  75. %ignore IG
  76. %ignore COM
  77. """
  78. class WMA:
  79. def __init__(self):
  80. self.buffer = []
  81. self.size = 0
  82. self.parser = lark.Lark(GRAMMAR)
  83. def emit(self, *ops):
  84. self.buffer.extend(ops)
  85. if type(ops[0]) is tuple and ops[0][0]:
  86. return
  87. self.size += len(ops)
  88. def compile_arg(self, arg):
  89. if type(arg) is lark.Tree:
  90. if arg.data == "mixed":
  91. for subnode in arg.children:
  92. self.compile_arg(subnode)
  93. elif arg.data == "rep":
  94. count = int(arg.children[1].value)
  95. for _ in range(count):
  96. self.compile_arg(arg.children[0])
  97. elif arg.type == "INTEGER":
  98. self.emit(int(arg.value))
  99. elif arg.type == "CHAR":
  100. self.emit(ord(arg.value[1]))
  101. elif arg.type == "CHARS":
  102. for char in arg.value[1:-1]:
  103. self.emit(ord(char))
  104. elif arg.type == "QMARK":
  105. self.emit(self.size+2)
  106. elif arg.type == "OFFSET":
  107. self.emit(self.size+int(arg.value[1:])+1)
  108. elif arg.type == "LABELOFFSET":
  109. self.emit((False, arg.value[2:], arg.value[1]))
  110. elif arg.type == "NAME":
  111. if arg.value == "IO":
  112. self.emit(-1)
  113. elif arg.value == "Z":
  114. self.emit(-2)
  115. elif arg.value == "O":
  116. self.emit(-3)
  117. elif arg.value == "N":
  118. self.emit(-4)
  119. elif arg.value == "J":
  120. self.emit(-5)
  121. elif arg.value == "T":
  122. self.emit(-6)
  123. elif arg.value == "SP":
  124. self.emit(-7)
  125. elif arg.value == "EZ":
  126. self.emit(-8)
  127. elif arg.value == "SZ":
  128. self.emit(-9)
  129. elif arg.value == "MZ":
  130. self.emit(-10)
  131. elif arg.value == "JZ":
  132. self.emit(-11)
  133. elif arg.value == "W":
  134. self.emit(-12)
  135. elif arg.value == "MM":
  136. self.emit(-13)
  137. else:
  138. self.emit((False, arg.value))
  139. def compile_operation(self, op):
  140. if op.data == "nop":
  141. self.emit(0)
  142. elif op.data == "mj":
  143. self.emit(1)
  144. self.compile_arg(op.children[0])
  145. self.compile_arg(op.children[1])
  146. self.compile_arg(op.children[2])
  147. elif op.data == "sjlez":
  148. self.emit(2)
  149. self.compile_arg(op.children[0])
  150. self.compile_arg(op.children[1])
  151. self.compile_arg(op.children[2])
  152. elif op.data == "ajlez":
  153. self.emit(3)
  154. self.compile_arg(op.children[0])
  155. self.compile_arg(op.children[1])
  156. self.compile_arg(op.children[2])
  157. elif op.data == "sjlz":
  158. self.emit(4)
  159. self.compile_arg(op.children[0])
  160. self.compile_arg(op.children[1])
  161. self.compile_arg(op.children[2])
  162. elif op.data == "jles":
  163. self.emit(5)
  164. self.compile_arg(op.children[0])
  165. self.compile_arg(op.children[1])
  166. self.compile_arg(op.children[2])
  167. elif op.data == "tjt":
  168. self.emit(6)
  169. self.compile_arg(op.children[0])
  170. self.compile_arg(op.children[1])
  171. elif op.data == "djt":
  172. self.emit(7)
  173. self.compile_arg(op.children[0])
  174. self.compile_arg(op.children[1])
  175. elif op.data == "sslez":
  176. self.emit(8)
  177. self.compile_arg(op.children[0])
  178. self.compile_arg(op.children[1])
  179. elif op.data == "aslez":
  180. self.emit(9)
  181. self.compile_arg(op.children[0])
  182. self.compile_arg(op.children[1])
  183. elif op.data == "ije":
  184. self.emit(10)
  185. self.compile_arg(op.children[0])
  186. self.compile_arg(op.children[1])
  187. self.compile_arg(op.children[2])
  188. elif op.data == "djlz":
  189. self.emit(11)
  190. self.compile_arg(op.children[0])
  191. self.compile_arg(op.children[1])
  192. self.compile_arg(op.children[2])
  193. elif op.data == "xjlz":
  194. self.emit(12)
  195. self.compile_arg(op.children[0])
  196. self.compile_arg(op.children[1])
  197. self.compile_arg(op.children[2])
  198. elif op.data == "dslz":
  199. self.emit(13)
  200. self.compile_arg(op.children[0])
  201. elif op.data == "ssl":
  202. self.emit(14)
  203. self.compile_arg(op.children[0])
  204. self.compile_arg(op.children[1])
  205. elif op.data == "mbnz":
  206. self.emit(15)
  207. self.compile_arg(op.children[0])
  208. self.compile_arg(op.children[1])
  209. self.compile_arg(op.children[2])
  210. elif op.data == "modbz":
  211. self.emit(16)
  212. self.compile_arg(op.children[0])
  213. self.compile_arg(op.children[1])
  214. self.compile_arg(op.children[2])
  215. elif op.data == "aj":
  216. self.emit(17)
  217. self.compile_arg(op.children[0])
  218. self.compile_arg(op.children[1])
  219. elif op.data == "la":
  220. self.emit(18)
  221. self.compile_arg(op.children[0])
  222. self.compile_arg(op.children[1])
  223. elif op.data == "ld":
  224. self.emit(19)
  225. self.compile_arg(op.children[0])
  226. self.compile_arg(op.children[1])
  227. elif op.data == "ia":
  228. self.emit(20)
  229. self.compile_arg(op.children[0])
  230. elif op.data == "jmc":
  231. self.emit(21)
  232. self.compile_arg(op.children[0])
  233. elif op.data == "ja":
  234. self.emit(22)
  235. self.compile_arg(op.children[0])
  236. elif op.data == "psh":
  237. self.emit(23)
  238. self.compile_arg(op.children[0])
  239. elif op.data == "pd":
  240. self.emit(24)
  241. self.compile_arg(op.children[0])
  242. elif op.data == "pop":
  243. self.emit(25)
  244. self.compile_arg(op.children[0])
  245. elif op.data == "shlbnz":
  246. self.emit(26)
  247. self.compile_arg(op.children[0])
  248. self.compile_arg(op.children[1])
  249. self.compile_arg(op.children[2])
  250. elif op.data == "shrbnz":
  251. self.emit(27)
  252. self.compile_arg(op.children[0])
  253. self.compile_arg(op.children[1])
  254. self.compile_arg(op.children[2])
  255. elif op.data == "nbz":
  256. self.emit(28)
  257. self.compile_arg(op.children[0])
  258. self.compile_arg(op.children[1])
  259. elif op.data == "anz":
  260. self.emit(29)
  261. self.compile_arg(op.children[0])
  262. self.compile_arg(op.children[1])
  263. self.compile_arg(op.children[2])
  264. elif op.data == "abgz":
  265. self.emit(30)
  266. self.compile_arg(op.children[0])
  267. self.compile_arg(op.children[1])
  268. self.compile_arg(op.children[2])
  269. elif op.data == "swp":
  270. self.emit(31)
  271. self.compile_arg(op.children[0])
  272. self.compile_arg(op.children[1])
  273. elif op.data == "h_add":
  274. self.emit(3)
  275. self.compile_arg(op.children[0])
  276. self.compile_arg(op.children[1])
  277. self.emit(self.size+2)
  278. elif op.data == "h_sub":
  279. self.emit(2)
  280. self.compile_arg(op.children[0])
  281. self.compile_arg(op.children[1])
  282. self.emit(self.size+2)
  283. elif op.data == "h_inc":
  284. self.emit(3)
  285. self.emit(-3)
  286. self.compile_arg(op.children[0])
  287. self.emit(self.size+2)
  288. elif op.data == "h_dec":
  289. self.emit(2)
  290. self.emit(-3)
  291. self.compile_arg(op.children[0])
  292. self.emit(self.size+2)
  293. elif op.data == "h_mov":
  294. self.emit(1)
  295. self.compile_arg(op.children[0])
  296. self.compile_arg(op.children[1])
  297. self.emit(self.size+2)
  298. elif op.data == "h_jmp":
  299. self.emit(1)
  300. self.emit(0)
  301. self.emit(0)
  302. self.compile_arg(op.children[0])
  303. elif op.data == "h_halt":
  304. self.emit(1)
  305. self.emit(0)
  306. self.emit(0)
  307. self.emit(-1)
  308. elif op.data == "h_out":
  309. self.emit(1)
  310. self.compile_arg(op.children[0])
  311. self.emit(-1)
  312. self.emit(self.size+2)
  313. elif op.data == "h_in":
  314. self.emit(1)
  315. self.emit(-1)
  316. self.compile_arg(op.children[0])
  317. self.emit(self.size+2)
  318. def compile_labels(self):
  319. labels = {}
  320. position = 0
  321. while position < len(self.buffer):
  322. this = self.buffer[position]
  323. if type(this) is tuple and this[0]:
  324. label = this[1]
  325. if label in labels:
  326. raise Exception(f"Duplicated label: {label}.")
  327. elif label in ("IO", "Z", "O", "N", "J", "T", "SP", "EZ", "SZ", "MZ", "JZ", "W", "MM"):
  328. raise Exception(f"Register override: {label}.")
  329. self.buffer.pop(position)
  330. labels[label] = position + 1
  331. position += 1
  332. position = 0
  333. while position < len(self.buffer):
  334. this = self.buffer[position]
  335. if type(this) is tuple and not this[0]:
  336. label = this[1]
  337. if label not in labels:
  338. raise Exception(f"Undefined label/register: {label}.")
  339. if len(this) == 3:
  340. self.buffer[position] = 1 + (position + labels[label] if this[2] == '+' else position - labels[label])
  341. else:
  342. self.buffer[position] = labels[label]
  343. position += 1
  344. def encode(self):
  345. return struct.pack(f"<{'q'*self.size}", *self.buffer)
  346. def precompile(self, source):
  347. ast = self.parser.parse(source)
  348. for command in ast.children:
  349. if len(command.children) == 2:
  350. label = command.children[0].value[:-1]
  351. self.emit((True,label))
  352. if type(command.children[1]) is lark.Tree and command.children[1].data != "mixed":
  353. self.compile_operation(command.children[1])
  354. else:
  355. self.compile_arg(command.children[1])
  356. else:
  357. if type(command.children[0]) is lark.Token:
  358. if command.children[0].type == "LABEL":
  359. label = command.children[0].value[:-1]
  360. self.emit((True,label))
  361. else:
  362. with open(command.children[0].value[1:], "r") as f:
  363. self.precompile(f.read())
  364. else:
  365. self.compile_operation(command.children[0])
  366. def compile(self, source):
  367. source += "\n"
  368. for reg in "ABCDEFGHIXYK":
  369. source += f"{reg}:0"
  370. if reg != "K":
  371. source += ";"
  372. self.precompile(source)
  373. self.compile_labels()
  374. return self.encode()
  375. wma = WMA()
  376. try:
  377. if len(sys.argv) == 3:
  378. with open(sys.argv[1], "r") as fin:
  379. with open(sys.argv[2], "wb") as fout:
  380. fout.write(wma.compile(fin.read()))
  381. else:
  382. sys.stdout.buffer.write(wma.compile(sys.stdin.read()))
  383. except Exception as e:
  384. print(e)
  385. sys.exit(1)