wma.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. #!/usr/bin/python
  2. import sys
  3. import struct
  4. import lark
  5. GRAMMAR = r"""
  6. start: _NL? command ((_NL+|";") command)* _NL?
  7. command: LABEL? operation
  8. | LABEL (arg|mixed)
  9. | LABEL
  10. | INCLUDE
  11. ?operation: "org" INTEGER -> org
  12. | "nop" -> nop
  13. | ("mvj"|"mj") arg arg arg -> mj
  14. | "sblez" arg arg arg -> sjlez
  15. | "ablez" arg arg arg -> ajlez
  16. | "sblz" arg arg arg -> sjlz
  17. | "bles" arg arg arg -> jles
  18. | ("nbnz"|"tjt") arg arg -> tjt
  19. | ("dbnz"|"djt") arg arg -> djt
  20. | "sslez" arg arg -> sslez
  21. | "aslez" arg arg -> aslez
  22. | ("ibnc"|"ije") arg arg arg -> ije
  23. | "vblz" arg arg arg -> djlz
  24. | "xblz" arg arg arg -> xjlz
  25. | "dslz" arg -> dslz
  26. | ("ssgt"|"ssl") arg arg -> ssl
  27. | "mbnz" arg arg arg -> mbnz
  28. | "modbz" arg arg arg -> modbz
  29. | ("aja"|"aj") arg arg -> aj
  30. | "la" arg arg -> la
  31. | "ld" arg arg -> ld
  32. | "ia" arg -> ia
  33. | "jmc" arg -> jmc
  34. | ("jw"|"ja"|"call") arg -> ja
  35. | ("push"|"psh") arg -> psh
  36. | "pd" arg -> pd
  37. | "pop" arg -> pop
  38. | "shlbnz" arg arg arg -> shlbnz
  39. | "shrbnz" arg arg arg -> shrbnz
  40. | "nbz" arg arg -> nbz
  41. | "anz" arg arg arg -> anz
  42. | "abgz" arg arg arg -> abgz
  43. | "swp" arg arg -> swp
  44. | "str" arg arg -> str
  45. | "add" arg arg -> h_add
  46. | "sub" arg arg -> h_sub
  47. | "inc" arg -> h_inc
  48. | "dec" arg -> h_dec
  49. | ("mov"|"mv") arg arg -> h_mov
  50. | ("jmp"|"j") arg -> h_jmp
  51. | "hlt" -> h_halt
  52. | "out" arg -> h_out
  53. | "outn" arg -> h_outn
  54. | "in" arg -> h_in
  55. | "dir" arg -> h_dir
  56. | "ret" -> h_ret
  57. mixed: arg arg+
  58. ?arg: INTEGER
  59. | DOUBLE
  60. | CHAR
  61. | CHARS
  62. | OFFSET
  63. | LABELOFFSET
  64. | QMARK
  65. | EMARK
  66. | NAMEOFFSET
  67. | NAME
  68. | rep
  69. rep: (INTEGER|DOUBLE|CHAR|CHARS|NAME|NAMEOFFSET) "*" COUNT
  70. COUNT: /[0-9]+/
  71. INTEGER: /-?[0-9]+/
  72. DOUBLE: /-?[0-9]+\.[0-9]+/
  73. CHAR: "'" /./ "'"
  74. CHARS: "\"" /[^"]*/ "\""
  75. QMARK: "?"
  76. EMARK: "!"
  77. OFFSET: "?" /(-|\+)[0-9]+/
  78. LABELOFFSET: "?" /(-|\+)[A-Za-z_][a-zA-Z0-9_]*/
  79. NAMEOFFSET: /[A-Za-z_][a-zA-Z0-9_]*(-|\+)[0-9]+/
  80. LABEL: /[A-Za-z_][a-zA-Z0-9_]*:/
  81. NAME: /[A-Za-z_][a-zA-Z0-9_]*/
  82. INCLUDE: "+" /.+/
  83. _NL: /\n+/
  84. IG: /[ \t\r]+/
  85. COM: /#.*[^\n]/
  86. %ignore IG
  87. %ignore COM
  88. """
  89. class WMA:
  90. def __init__(self):
  91. self.origin = 1
  92. self.buffer = []
  93. self.size = 0
  94. self.parser = lark.Lark(GRAMMAR, parser='lalr')
  95. self.used_regs = []
  96. self.labels = {}
  97. def add_label(self, label):
  98. if label in self.labels:
  99. raise Exception(f"Duplicated label: {label}.")
  100. elif label in ("PC", "IO", "Z", "O", "N", "J", "T", "SP", "EZ", "SZ", "MZ", "JZ", "W", "MM", "DR", "ZZ"):
  101. raise Exception(f"Register override: {label}.")
  102. self.labels[label] = len(self.buffer)
  103. def emit(self, *ops):
  104. self.buffer.extend(ops)
  105. if type(ops[0]) is tuple and ops[0][0]:
  106. return
  107. self.size += len(ops)
  108. def compile_arg(self, arg):
  109. if type(arg) is lark.Tree:
  110. if arg.data == "mixed":
  111. for subnode in arg.children:
  112. self.compile_arg(subnode)
  113. elif arg.data == "rep":
  114. count = int(arg.children[1].value)
  115. for _ in range(count):
  116. self.compile_arg(arg.children[0])
  117. elif arg.type == "INTEGER":
  118. self.emit(int(arg.value))
  119. elif arg.type == "DOUBLE":
  120. self.emit(float(arg.value))
  121. elif arg.type == "CHAR":
  122. self.emit(ord(arg.value[1]))
  123. elif arg.type == "CHARS":
  124. for char in arg.value[1:-1]:
  125. self.emit(ord(char))
  126. elif arg.type == "QMARK":
  127. self.emit(self.origin + self.size)
  128. elif arg.type == "EMARK":
  129. self.emit(self.origin + self.size + 1)
  130. elif arg.type == "OFFSET":
  131. self.emit(self.origin + self.size + int(arg.value[1:]))
  132. elif arg.type == "LABELOFFSET":
  133. self.emit((False, arg.value[2:], arg.value[1]))
  134. elif arg.type == "NAMEOFFSET":
  135. n, o = arg.value.split(
  136. '+' if '+' in arg.value else '-'
  137. )
  138. self.emit((False, n, '+' if '+' in arg.value else '-', int(o)))
  139. elif arg.type == "NAME":
  140. if arg.value == "PC":
  141. self.emit(0)
  142. elif arg.value == "IO":
  143. self.emit(-1)
  144. elif arg.value == "Z":
  145. self.emit(-2)
  146. elif arg.value == "O":
  147. self.emit(-3)
  148. elif arg.value == "N":
  149. self.emit(-4)
  150. elif arg.value == "J":
  151. self.emit(-5)
  152. elif arg.value == "T":
  153. self.emit(-6)
  154. elif arg.value == "SP":
  155. self.emit(-7)
  156. elif arg.value == "EZ":
  157. self.emit(-8)
  158. elif arg.value == "SZ":
  159. self.emit(-9)
  160. elif arg.value == "MZ":
  161. self.emit(-10)
  162. elif arg.value == "JZ":
  163. self.emit(-11)
  164. elif arg.value == "W":
  165. self.emit(-12)
  166. elif arg.value == "MM":
  167. self.emit(-13)
  168. elif arg.value == "DR":
  169. self.emit(-14)
  170. elif arg.value == "ZZ":
  171. self.emit(-15)
  172. else:
  173. self.used_regs.append(arg.value)
  174. self.emit((False, arg.value))
  175. def compile_operation(self, op):
  176. if op.data == "org":
  177. self.org = int(op.children[0].value)
  178. elif op.data == "nop":
  179. self.emit(0)
  180. elif op.data == "mj":
  181. self.emit(1)
  182. self.compile_arg(op.children[0])
  183. self.compile_arg(op.children[1])
  184. self.compile_arg(op.children[2])
  185. elif op.data == "sjlez":
  186. self.emit(2)
  187. self.compile_arg(op.children[0])
  188. self.compile_arg(op.children[1])
  189. self.compile_arg(op.children[2])
  190. elif op.data == "ajlez":
  191. self.emit(3)
  192. self.compile_arg(op.children[0])
  193. self.compile_arg(op.children[1])
  194. self.compile_arg(op.children[2])
  195. elif op.data == "sjlz":
  196. self.emit(4)
  197. self.compile_arg(op.children[0])
  198. self.compile_arg(op.children[1])
  199. self.compile_arg(op.children[2])
  200. elif op.data == "jles":
  201. self.emit(5)
  202. self.compile_arg(op.children[0])
  203. self.compile_arg(op.children[1])
  204. self.compile_arg(op.children[2])
  205. elif op.data == "tjt":
  206. self.emit(6)
  207. self.compile_arg(op.children[0])
  208. self.compile_arg(op.children[1])
  209. elif op.data == "djt":
  210. self.emit(7)
  211. self.compile_arg(op.children[0])
  212. self.compile_arg(op.children[1])
  213. elif op.data == "sslez":
  214. self.emit(8)
  215. self.compile_arg(op.children[0])
  216. self.compile_arg(op.children[1])
  217. elif op.data == "aslez":
  218. self.emit(9)
  219. self.compile_arg(op.children[0])
  220. self.compile_arg(op.children[1])
  221. elif op.data == "ije":
  222. self.emit(10)
  223. self.compile_arg(op.children[0])
  224. self.compile_arg(op.children[1])
  225. self.compile_arg(op.children[2])
  226. elif op.data == "djlz":
  227. self.emit(11)
  228. self.compile_arg(op.children[0])
  229. self.compile_arg(op.children[1])
  230. self.compile_arg(op.children[2])
  231. elif op.data == "xjlz":
  232. self.emit(12)
  233. self.compile_arg(op.children[0])
  234. self.compile_arg(op.children[1])
  235. self.compile_arg(op.children[2])
  236. elif op.data == "dslz":
  237. self.emit(13)
  238. self.compile_arg(op.children[0])
  239. elif op.data == "ssl":
  240. self.emit(14)
  241. self.compile_arg(op.children[0])
  242. self.compile_arg(op.children[1])
  243. elif op.data == "mbnz":
  244. self.emit(15)
  245. self.compile_arg(op.children[0])
  246. self.compile_arg(op.children[1])
  247. self.compile_arg(op.children[2])
  248. elif op.data == "modbz":
  249. self.emit(16)
  250. self.compile_arg(op.children[0])
  251. self.compile_arg(op.children[1])
  252. self.compile_arg(op.children[2])
  253. elif op.data == "aj":
  254. self.emit(17)
  255. self.compile_arg(op.children[0])
  256. self.compile_arg(op.children[1])
  257. elif op.data == "la":
  258. self.emit(18)
  259. self.compile_arg(op.children[0])
  260. self.compile_arg(op.children[1])
  261. elif op.data == "ld":
  262. self.emit(19)
  263. self.compile_arg(op.children[0])
  264. self.compile_arg(op.children[1])
  265. elif op.data == "ia":
  266. self.emit(20)
  267. self.compile_arg(op.children[0])
  268. elif op.data == "jmc":
  269. self.emit(21)
  270. self.compile_arg(op.children[0])
  271. elif op.data == "ja":
  272. self.emit(22)
  273. self.compile_arg(op.children[0])
  274. elif op.data == "psh":
  275. self.emit(23)
  276. self.compile_arg(op.children[0])
  277. elif op.data == "pd":
  278. self.emit(24)
  279. self.compile_arg(op.children[0])
  280. elif op.data == "pop":
  281. self.emit(25)
  282. self.compile_arg(op.children[0])
  283. elif op.data == "shlbnz":
  284. self.emit(26)
  285. self.compile_arg(op.children[0])
  286. self.compile_arg(op.children[1])
  287. self.compile_arg(op.children[2])
  288. elif op.data == "shrbnz":
  289. self.emit(27)
  290. self.compile_arg(op.children[0])
  291. self.compile_arg(op.children[1])
  292. self.compile_arg(op.children[2])
  293. elif op.data == "nbz":
  294. self.emit(28)
  295. self.compile_arg(op.children[0])
  296. self.compile_arg(op.children[1])
  297. elif op.data == "anz":
  298. self.emit(29)
  299. self.compile_arg(op.children[0])
  300. self.compile_arg(op.children[1])
  301. self.compile_arg(op.children[2])
  302. elif op.data == "abgz":
  303. self.emit(30)
  304. self.compile_arg(op.children[0])
  305. self.compile_arg(op.children[1])
  306. self.compile_arg(op.children[2])
  307. elif op.data == "swp":
  308. self.emit(31)
  309. self.compile_arg(op.children[0])
  310. self.compile_arg(op.children[1])
  311. elif op.data == "str":
  312. self.emit(32)
  313. self.compile_arg(op.children[0])
  314. self.compile_arg(op.children[1])
  315. elif op.data == "h_add":
  316. self.emit(3)
  317. self.compile_arg(op.children[0])
  318. self.compile_arg(op.children[1])
  319. self.emit(self.size+2)
  320. elif op.data == "h_sub":
  321. self.emit(2)
  322. self.compile_arg(op.children[0])
  323. self.compile_arg(op.children[1])
  324. self.emit(self.size+2)
  325. elif op.data == "h_inc":
  326. self.emit(3)
  327. self.emit(-3)
  328. self.compile_arg(op.children[0])
  329. self.emit(self.size+2)
  330. elif op.data == "h_dec":
  331. self.emit(2)
  332. self.emit(-3)
  333. self.compile_arg(op.children[0])
  334. self.emit(self.size+2)
  335. elif op.data == "h_mov":
  336. if type(op.children[0]) is lark.Token and op.children[0].type in ("INTEGER", "DOUBLE", "CHAR"):
  337. self.emit(19)
  338. self.compile_arg(op.children[0])
  339. self.compile_arg(op.children[1])
  340. else:
  341. self.emit(1)
  342. self.compile_arg(op.children[0])
  343. self.compile_arg(op.children[1])
  344. self.emit(self.size+2)
  345. elif op.data == "h_jmp":
  346. self.emit(1)
  347. self.emit(0)
  348. self.emit(0)
  349. self.compile_arg(op.children[0])
  350. elif op.data == "h_halt":
  351. self.emit(1)
  352. self.emit(0)
  353. self.emit(0)
  354. self.emit(-1)
  355. elif op.data == "h_out":
  356. self.emit(1)
  357. self.compile_arg(op.children[0])
  358. self.emit(-1)
  359. self.emit(self.size+2)
  360. elif op.data == "h_outn":
  361. self.emit(1)
  362. self.compile_arg(op.children[0])
  363. self.emit(-2)
  364. self.emit(self.size+2)
  365. elif op.data == "h_in":
  366. self.emit(1)
  367. self.emit(-1)
  368. self.compile_arg(op.children[0])
  369. self.emit(self.size+2)
  370. elif op.data == "h_dir":
  371. self.emit(19)
  372. self.compile_arg(op.children[0])
  373. self.emit(-14)
  374. elif op.data == "h_ret":
  375. self.emit(1)
  376. self.emit(-2)
  377. self.emit(-2)
  378. self.emit(-5)
  379. def compile_labels(self):
  380. position = 0
  381. while position < len(self.buffer):
  382. this = self.buffer[position]
  383. if type(this) is tuple and not this[0]:
  384. label = this[1]
  385. if label not in self.labels:
  386. raise Exception(f"Undefined label/register: {label}.")
  387. if len(this) == 3:
  388. self.buffer[position] = self.origin + (position + self.labels[label] if this[2] == '+' else position - selfmlabels[label])
  389. elif len(this) == 4:
  390. self.buffer[position] = self.origin + (self.labels[label] + this[3] if this[2] == '+' else self.labels[label] - this[3])
  391. else:
  392. self.buffer[position] = self.origin + self.labels[label]
  393. position += 1
  394. def encode(self):
  395. buffer = b""
  396. for b in self.buffer:
  397. if type(b) is float:
  398. b = struct.pack("<d", b)
  399. else:
  400. b = struct.pack("<q", b)
  401. buffer += b
  402. return buffer
  403. def precompile(self, source):
  404. ast = self.parser.parse(source)
  405. for command in ast.children:
  406. if len(command.children) == 2:
  407. label = command.children[0].value[:-1]
  408. self.add_label(label)
  409. if type(command.children[1]) is lark.Tree and command.children[1].data not in ("mixed", "rep"):
  410. self.compile_operation(command.children[1])
  411. else:
  412. self.compile_arg(command.children[1])
  413. else:
  414. if type(command.children[0]) is lark.Token:
  415. if command.children[0].type == "LABEL":
  416. label = command.children[0].value[:-1]
  417. self.add_label(label)
  418. self.labels[label] = len(self.buffer)
  419. else:
  420. with open(command.children[0].value[1:], "r") as f:
  421. self.precompile(f.read())
  422. else:
  423. self.compile_operation(command.children[0])
  424. def compile(self, source):
  425. self.precompile(source)
  426. for reg in "ABCDEFGHIXYKRS":
  427. if reg in self.used_regs:
  428. self.add_label(reg)
  429. self.buffer.append(0)
  430. self.compile_labels()
  431. return self.encode()
  432. wma = WMA()
  433. try:
  434. if len(sys.argv) == 3:
  435. with open(sys.argv[1], "r") as fin:
  436. with open(sys.argv[2], "wb") as fout:
  437. fout.write(wma.compile(fin.read()))
  438. else:
  439. sys.stdout.buffer.write(wma.compile(sys.stdin.read()))
  440. except Exception as e:
  441. print(e)
  442. sys.exit(1)