wma.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486
  1. #!/usr/bin/python
  2. import sys
  3. import struct
  4. import lark
  5. GRAMMAR = r"""
  6. start: _NL? command ((_NL+|";") command)* _NL?
  7. command: LABEL? operation
  8. | LABEL (arg|mixed)
  9. | LABEL
  10. | INCLUDE
  11. ?operation: "org" INTEGER -> org
  12. | "nop" -> nop
  13. | ("mvj"|"mj") arg arg arg -> mj
  14. | "sblez" arg arg arg -> sjlez
  15. | "ablez" arg arg arg -> ajlez
  16. | "sblz" arg arg arg -> sjlz
  17. | "bles" arg arg arg -> jles
  18. | ("nbnz"|"tjt") arg arg -> tjt
  19. | ("dbnz"|"djt") arg arg -> djt
  20. | "sslez" arg arg -> sslez
  21. | "aslez" arg arg -> aslez
  22. | ("ibnc"|"ije") arg arg arg -> ije
  23. | "vblz" arg arg arg -> djlz
  24. | "xblz" arg arg arg -> xjlz
  25. | "dslz" arg -> dslz
  26. | ("ssgt"|"ssl") arg arg -> ssl
  27. | "mbnz" arg arg arg -> mbnz
  28. | "modbz" arg arg arg -> modbz
  29. | ("aja"|"aj") arg arg -> aj
  30. | "la" arg arg -> la
  31. | "ld" arg arg -> ld
  32. | "ia" arg -> ia
  33. | "jmc" arg -> jmc
  34. | ("jw"|"ja"|"call") arg -> ja
  35. | ("push"|"psh") arg -> psh
  36. | "pd" arg -> pd
  37. | "pop" arg -> pop
  38. | "shlbnz" arg arg arg -> shlbnz
  39. | "shrbnz" arg arg arg -> shrbnz
  40. | "nbz" arg arg -> nbz
  41. | "anz" arg arg arg -> anz
  42. | "abgz" arg arg arg -> abgz
  43. | "swp" arg arg -> swp
  44. | "str" arg arg -> str
  45. | "add" arg arg -> h_add
  46. | "sub" arg arg -> h_sub
  47. | "inc" arg -> h_inc
  48. | "dec" arg -> h_dec
  49. | ("mov"|"mv") arg arg -> h_mov
  50. | ("jmp"|"j") arg -> h_jmp
  51. | "hlt" -> h_halt
  52. | "out" arg -> h_out
  53. | "outn" arg -> h_outn
  54. | "in" arg -> h_in
  55. | "dir" arg -> h_dir
  56. | "ret" -> h_ret
  57. | "peek" arg -> h_peek
  58. mixed: arg arg+
  59. ?arg: INTEGER
  60. | DOUBLE
  61. | CHAR
  62. | CHARS
  63. | OFFSET
  64. | LABELOFFSET
  65. | QMARK
  66. | EMARK
  67. | NAMEOFFSET
  68. | NAME
  69. | rep
  70. rep: (INTEGER|DOUBLE|CHAR|CHARS|NAME|NAMEOFFSET) "*" COUNT
  71. COUNT: /[0-9]+/
  72. INTEGER: /-?[0-9]+/
  73. DOUBLE: /-?[0-9]+\.[0-9]+/
  74. CHAR: "'" /./ "'"
  75. CHARS: "\"" /[^"]*/ "\""
  76. QMARK: "?"
  77. EMARK: "!"
  78. OFFSET: "?" /(-|\+)[0-9]+/
  79. LABELOFFSET: "?" /(-|\+)[A-Za-z_][a-zA-Z0-9_]*/
  80. NAMEOFFSET: /[A-Za-z_][a-zA-Z0-9_]*(-|\+)[0-9]+/
  81. LABEL: /[A-Za-z_][a-zA-Z0-9_]*:/
  82. NAME: /[A-Za-z_][a-zA-Z0-9_]*/
  83. INCLUDE: "+" /.+/
  84. _NL: /\n+/
  85. IG: /[ \t\r]+/
  86. COM: /#.*[^\n]/
  87. %ignore IG
  88. %ignore COM
  89. """
  90. class WMA:
  91. def __init__(self):
  92. self.origin = 1
  93. self.buffer = []
  94. self.size = 0
  95. self.parser = lark.Lark(GRAMMAR, parser='lalr')
  96. self.used_regs = []
  97. self.labels = {}
  98. def add_label(self, label):
  99. if label in self.labels:
  100. raise Exception(f"Duplicated label: {label}.")
  101. elif label in ("PC", "IO", "Z", "O", "N", "J", "T", "SP", "EZ", "SZ", "MZ", "JZ", "W", "MM", "DR", "ZZ"):
  102. raise Exception(f"Register override: {label}.")
  103. self.labels[label] = len(self.buffer)
  104. def emit(self, *ops):
  105. self.buffer.extend(ops)
  106. if type(ops[0]) is tuple and ops[0][0]:
  107. return
  108. self.size += len(ops)
  109. def compile_arg(self, arg):
  110. if type(arg) is lark.Tree:
  111. if arg.data == "mixed":
  112. for subnode in arg.children:
  113. self.compile_arg(subnode)
  114. elif arg.data == "rep":
  115. count = int(arg.children[1].value)
  116. for _ in range(count):
  117. self.compile_arg(arg.children[0])
  118. elif arg.type == "INTEGER":
  119. self.emit(int(arg.value))
  120. elif arg.type == "DOUBLE":
  121. self.emit(float(arg.value))
  122. elif arg.type == "CHAR":
  123. self.emit(ord(arg.value[1]))
  124. elif arg.type == "CHARS":
  125. for char in arg.value[1:-1]:
  126. self.emit(ord(char))
  127. elif arg.type == "QMARK":
  128. self.emit(self.origin + self.size)
  129. elif arg.type == "EMARK":
  130. self.emit(self.origin + self.size + 1)
  131. elif arg.type == "OFFSET":
  132. self.emit(self.origin + self.size + int(arg.value[1:]))
  133. elif arg.type == "LABELOFFSET":
  134. self.emit((False, arg.value[2:], arg.value[1]))
  135. elif arg.type == "NAMEOFFSET":
  136. n, o = arg.value.split(
  137. '+' if '+' in arg.value else '-'
  138. )
  139. self.emit((False, n, '+' if '+' in arg.value else '-', int(o)))
  140. elif arg.type == "NAME":
  141. if arg.value == "PC":
  142. self.emit(0)
  143. elif arg.value == "IO":
  144. self.emit(-1)
  145. elif arg.value == "Z":
  146. self.emit(-2)
  147. elif arg.value == "O":
  148. self.emit(-3)
  149. elif arg.value == "N":
  150. self.emit(-4)
  151. elif arg.value == "J":
  152. self.emit(-5)
  153. elif arg.value == "T":
  154. self.emit(-6)
  155. elif arg.value == "SP":
  156. self.emit(-7)
  157. elif arg.value == "EZ":
  158. self.emit(-8)
  159. elif arg.value == "SZ":
  160. self.emit(-9)
  161. elif arg.value == "MZ":
  162. self.emit(-10)
  163. elif arg.value == "JZ":
  164. self.emit(-11)
  165. elif arg.value == "W":
  166. self.emit(-12)
  167. elif arg.value == "MM":
  168. self.emit(-13)
  169. elif arg.value == "DR":
  170. self.emit(-14)
  171. elif arg.value == "ZZ":
  172. self.emit(-15)
  173. else:
  174. self.used_regs.append(arg.value)
  175. self.emit((False, arg.value))
  176. def compile_operation(self, op):
  177. if op.data == "org":
  178. self.org = int(op.children[0].value)
  179. elif op.data == "nop":
  180. self.emit(0)
  181. elif op.data == "mj":
  182. self.emit(1)
  183. self.compile_arg(op.children[0])
  184. self.compile_arg(op.children[1])
  185. self.compile_arg(op.children[2])
  186. elif op.data == "sjlez":
  187. self.emit(2)
  188. self.compile_arg(op.children[0])
  189. self.compile_arg(op.children[1])
  190. self.compile_arg(op.children[2])
  191. elif op.data == "ajlez":
  192. self.emit(3)
  193. self.compile_arg(op.children[0])
  194. self.compile_arg(op.children[1])
  195. self.compile_arg(op.children[2])
  196. elif op.data == "sjlz":
  197. self.emit(4)
  198. self.compile_arg(op.children[0])
  199. self.compile_arg(op.children[1])
  200. self.compile_arg(op.children[2])
  201. elif op.data == "jles":
  202. self.emit(5)
  203. self.compile_arg(op.children[0])
  204. self.compile_arg(op.children[1])
  205. self.compile_arg(op.children[2])
  206. elif op.data == "tjt":
  207. self.emit(6)
  208. self.compile_arg(op.children[0])
  209. self.compile_arg(op.children[1])
  210. elif op.data == "djt":
  211. self.emit(7)
  212. self.compile_arg(op.children[0])
  213. self.compile_arg(op.children[1])
  214. elif op.data == "sslez":
  215. self.emit(8)
  216. self.compile_arg(op.children[0])
  217. self.compile_arg(op.children[1])
  218. elif op.data == "aslez":
  219. self.emit(9)
  220. self.compile_arg(op.children[0])
  221. self.compile_arg(op.children[1])
  222. elif op.data == "ije":
  223. self.emit(10)
  224. self.compile_arg(op.children[0])
  225. self.compile_arg(op.children[1])
  226. self.compile_arg(op.children[2])
  227. elif op.data == "djlz":
  228. self.emit(11)
  229. self.compile_arg(op.children[0])
  230. self.compile_arg(op.children[1])
  231. self.compile_arg(op.children[2])
  232. elif op.data == "xjlz":
  233. self.emit(12)
  234. self.compile_arg(op.children[0])
  235. self.compile_arg(op.children[1])
  236. self.compile_arg(op.children[2])
  237. elif op.data == "dslz":
  238. self.emit(13)
  239. self.compile_arg(op.children[0])
  240. elif op.data == "ssl":
  241. self.emit(14)
  242. self.compile_arg(op.children[0])
  243. self.compile_arg(op.children[1])
  244. elif op.data == "mbnz":
  245. self.emit(15)
  246. self.compile_arg(op.children[0])
  247. self.compile_arg(op.children[1])
  248. self.compile_arg(op.children[2])
  249. elif op.data == "modbz":
  250. self.emit(16)
  251. self.compile_arg(op.children[0])
  252. self.compile_arg(op.children[1])
  253. self.compile_arg(op.children[2])
  254. elif op.data == "aj":
  255. self.emit(17)
  256. self.compile_arg(op.children[0])
  257. self.compile_arg(op.children[1])
  258. elif op.data == "la":
  259. self.emit(18)
  260. self.compile_arg(op.children[0])
  261. self.compile_arg(op.children[1])
  262. elif op.data == "ld":
  263. self.emit(19)
  264. self.compile_arg(op.children[0])
  265. self.compile_arg(op.children[1])
  266. elif op.data == "ia":
  267. self.emit(20)
  268. self.compile_arg(op.children[0])
  269. elif op.data == "jmc":
  270. self.emit(21)
  271. self.compile_arg(op.children[0])
  272. elif op.data == "ja":
  273. self.emit(22)
  274. self.compile_arg(op.children[0])
  275. elif op.data == "psh":
  276. self.emit(23)
  277. self.compile_arg(op.children[0])
  278. elif op.data == "pd":
  279. self.emit(24)
  280. self.compile_arg(op.children[0])
  281. elif op.data == "pop":
  282. self.emit(25)
  283. self.compile_arg(op.children[0])
  284. elif op.data == "shlbnz":
  285. self.emit(26)
  286. self.compile_arg(op.children[0])
  287. self.compile_arg(op.children[1])
  288. self.compile_arg(op.children[2])
  289. elif op.data == "shrbnz":
  290. self.emit(27)
  291. self.compile_arg(op.children[0])
  292. self.compile_arg(op.children[1])
  293. self.compile_arg(op.children[2])
  294. elif op.data == "nbz":
  295. self.emit(28)
  296. self.compile_arg(op.children[0])
  297. self.compile_arg(op.children[1])
  298. elif op.data == "anz":
  299. self.emit(29)
  300. self.compile_arg(op.children[0])
  301. self.compile_arg(op.children[1])
  302. self.compile_arg(op.children[2])
  303. elif op.data == "abgz":
  304. self.emit(30)
  305. self.compile_arg(op.children[0])
  306. self.compile_arg(op.children[1])
  307. self.compile_arg(op.children[2])
  308. elif op.data == "swp":
  309. self.emit(31)
  310. self.compile_arg(op.children[0])
  311. self.compile_arg(op.children[1])
  312. elif op.data == "str":
  313. self.emit(32)
  314. self.compile_arg(op.children[0])
  315. self.compile_arg(op.children[1])
  316. elif op.data == "h_add":
  317. self.emit(3)
  318. self.compile_arg(op.children[0])
  319. self.compile_arg(op.children[1])
  320. self.emit(self.size+2)
  321. elif op.data == "h_sub":
  322. self.emit(2)
  323. self.compile_arg(op.children[0])
  324. self.compile_arg(op.children[1])
  325. self.emit(self.size+2)
  326. elif op.data == "h_inc":
  327. self.emit(3)
  328. self.emit(-3)
  329. self.compile_arg(op.children[0])
  330. self.emit(self.size+2)
  331. elif op.data == "h_dec":
  332. self.emit(2)
  333. self.emit(-3)
  334. self.compile_arg(op.children[0])
  335. self.emit(self.size+2)
  336. elif op.data == "h_mov":
  337. if type(op.children[0]) is lark.Token and op.children[0].type in ("INTEGER", "DOUBLE", "CHAR"):
  338. self.emit(19)
  339. self.compile_arg(op.children[0])
  340. self.compile_arg(op.children[1])
  341. else:
  342. self.emit(1)
  343. self.compile_arg(op.children[0])
  344. self.compile_arg(op.children[1])
  345. self.emit(self.size+2)
  346. elif op.data == "h_jmp":
  347. self.emit(1)
  348. self.emit(0)
  349. self.emit(0)
  350. self.compile_arg(op.children[0])
  351. elif op.data == "h_halt":
  352. self.emit(1)
  353. self.emit(0)
  354. self.emit(0)
  355. self.emit(-1)
  356. elif op.data == "h_out":
  357. self.emit(1)
  358. self.compile_arg(op.children[0])
  359. self.emit(-1)
  360. self.emit(self.size+2)
  361. elif op.data == "h_outn":
  362. self.emit(1)
  363. self.compile_arg(op.children[0])
  364. self.emit(-2)
  365. self.emit(self.size+2)
  366. elif op.data == "h_in":
  367. self.emit(1)
  368. self.emit(-1)
  369. self.compile_arg(op.children[0])
  370. self.emit(self.size+2)
  371. elif op.data == "h_dir":
  372. self.emit(19)
  373. self.compile_arg(op.children[0])
  374. self.emit(-14)
  375. elif op.data == "h_ret":
  376. self.emit(1)
  377. self.emit(-15)
  378. self.emit(-15)
  379. self.emit(-5)
  380. elif op.data == "h_peek":
  381. self.emit(25)
  382. self.compile_arg(op.children[0])
  383. self.emit(23)
  384. self.compile_arg(op.children[0])
  385. def compile_labels(self):
  386. position = 0
  387. while position < len(self.buffer):
  388. this = self.buffer[position]
  389. if type(this) is tuple and not this[0]:
  390. label = this[1]
  391. if label not in self.labels:
  392. raise Exception(f"Undefined label/register: {label}.")
  393. if len(this) == 3:
  394. self.buffer[position] = self.origin + (position + self.labels[label] if this[2] == '+' else position - selfmlabels[label])
  395. elif len(this) == 4:
  396. self.buffer[position] = self.origin + (self.labels[label] + this[3] if this[2] == '+' else self.labels[label] - this[3])
  397. else:
  398. self.buffer[position] = self.origin + self.labels[label]
  399. position += 1
  400. def encode(self):
  401. buffer = b""
  402. for b in self.buffer:
  403. if type(b) is float:
  404. b = struct.pack("<d", b)
  405. else:
  406. b = struct.pack("<q", b)
  407. buffer += b
  408. return buffer
  409. def precompile(self, source):
  410. ast = self.parser.parse(source)
  411. for command in ast.children:
  412. if len(command.children) == 2:
  413. label = command.children[0].value[:-1]
  414. self.add_label(label)
  415. if type(command.children[1]) is lark.Tree and command.children[1].data not in ("mixed", "rep"):
  416. self.compile_operation(command.children[1])
  417. else:
  418. self.compile_arg(command.children[1])
  419. else:
  420. if type(command.children[0]) is lark.Token:
  421. if command.children[0].type == "LABEL":
  422. label = command.children[0].value[:-1]
  423. self.add_label(label)
  424. self.labels[label] = len(self.buffer)
  425. else:
  426. with open(command.children[0].value[1:], "r") as f:
  427. self.precompile(f.read())
  428. else:
  429. self.compile_operation(command.children[0])
  430. def compile(self, source):
  431. self.precompile(source)
  432. for reg in "ABCDEFGHIXYKRS":
  433. if reg in self.used_regs:
  434. self.add_label(reg)
  435. self.buffer.append(0)
  436. self.add_label("END")
  437. self.compile_labels()
  438. return self.encode()
  439. wma = WMA()
  440. try:
  441. if len(sys.argv) == 3:
  442. with open(sys.argv[1], "r") as fin:
  443. with open(sys.argv[2], "wb") as fout:
  444. fout.write(wma.compile(fin.read()))
  445. else:
  446. sys.stdout.buffer.write(wma.compile(sys.stdin.read()))
  447. except Exception as e:
  448. print(e)
  449. sys.exit(1)