wma.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. #!/usr/bin/python
  2. import sys
  3. import struct
  4. import lark
  5. GRAMMAR = r"""
  6. start: _NL? command ((_NL+|";") command)* _NL?
  7. command: LABEL? operation
  8. | LABEL (arg|mixed)
  9. | LABEL
  10. | INCLUDE
  11. ?operation: "nop" -> nop
  12. | ("mvj"|"mj") arg arg arg -> mj
  13. | "sblez" arg arg arg -> sjlez
  14. | "ablez" arg arg arg -> ajlez
  15. | "sblz" arg arg arg -> sjlz
  16. | "bles" arg arg arg -> jles
  17. | ("nbnz"|"tjt") arg arg -> tjt
  18. | ("dbnz"|"djt") arg arg -> djt
  19. | "sslez" arg arg -> sslez
  20. | "aslez" arg arg -> aslez
  21. | ("ibnc"|"ije") arg arg arg -> ije
  22. | "vblz" arg arg arg -> djlz
  23. | "xblz" arg arg arg -> xjlz
  24. | "dslz" arg -> dslz
  25. | ("ssgt"|"ssl") arg arg -> ssl
  26. | "mbnz" arg arg arg -> mbnz
  27. | "modbz" arg arg arg -> modbz
  28. | ("aja"|"aj") arg arg -> aj
  29. | "la" arg arg -> la
  30. | "ld" arg arg -> ld
  31. | "ia" arg -> ia
  32. | "jmc" arg -> jmc
  33. | ("jw"|"ja") arg -> ja
  34. | ("push"|"psh") arg -> psh
  35. | "pd" arg -> pd
  36. | "pop" arg -> pop
  37. | "shlbnz" arg arg arg -> shlbnz
  38. | "shrbnz" arg arg arg -> shrbnz
  39. | "nbz" arg arg -> nbz
  40. | "anz" arg arg arg -> anz
  41. | "abgz" arg arg arg -> abgz
  42. | "swp" arg arg -> swp
  43. | "add" arg arg -> h_add
  44. | "sub" arg arg -> h_sub
  45. | "inc" arg -> h_inc
  46. | "dec" arg -> h_dec
  47. | ("mov"|"mv") arg arg -> h_mov
  48. | ("jmp"|"j") arg -> h_jmp
  49. | "hlt" -> h_halt
  50. | "out" arg -> h_out
  51. | "outn" arg -> h_outn
  52. | "in" arg -> h_in
  53. mixed: arg arg+
  54. ?arg: INTEGER
  55. | DOUBLE
  56. | CHAR
  57. | CHARS
  58. | OFFSET
  59. | LABELOFFSET
  60. | QMARK
  61. | NAMEOFFSET
  62. | NAME
  63. | rep
  64. rep: arg "*" COUNT
  65. COUNT: /[0-9]+/
  66. INTEGER: /-?[0-9]+/
  67. DOUBLE: /-?[0-9]+\.[0-9]+/
  68. CHAR: "'" /./ "'"
  69. CHARS: "\"" /[^"]*/ "\""
  70. QMARK: "?"
  71. OFFSET: "?" /(-|\+)[0-9]+/
  72. LABELOFFSET: "?" /(-|\+)[A-Za-z][a-zA-Z0-9_]*/
  73. NAMEOFFSET: /[A-Za-z][a-zA-Z0-9_]*(-|\+)[0-9]+/
  74. LABEL: /[A-Za-z][a-zA-Z0-9_]*:/
  75. NAME: /[A-Za-z][a-zA-Z0-9_]*/
  76. INCLUDE: "+" /.+/
  77. _NL: /\n+/
  78. IG: /[ \t\r]+/
  79. COM: /#.*[^\n]/
  80. %ignore IG
  81. %ignore COM
  82. """
  83. class WMA:
  84. def __init__(self):
  85. self.buffer = []
  86. self.size = 0
  87. self.parser = lark.Lark(GRAMMAR)
  88. def emit(self, *ops):
  89. self.buffer.extend(ops)
  90. if type(ops[0]) is tuple and ops[0][0]:
  91. return
  92. self.size += len(ops)
  93. def compile_arg(self, arg):
  94. if type(arg) is lark.Tree:
  95. if arg.data == "mixed":
  96. for subnode in arg.children:
  97. self.compile_arg(subnode)
  98. elif arg.data == "rep":
  99. count = int(arg.children[1].value)
  100. for _ in range(count):
  101. self.compile_arg(arg.children[0])
  102. elif arg.type == "INTEGER":
  103. self.emit(int(arg.value))
  104. elif arg.type == "DOUBLE":
  105. self.emit(float(arg.value))
  106. elif arg.type == "CHAR":
  107. self.emit(ord(arg.value[1]))
  108. elif arg.type == "CHARS":
  109. for char in arg.value[1:-1]:
  110. self.emit(ord(char))
  111. elif arg.type == "QMARK":
  112. self.emit(self.size+2)
  113. elif arg.type == "OFFSET":
  114. self.emit(self.size+int(arg.value[1:])+1)
  115. elif arg.type == "LABELOFFSET":
  116. self.emit((False, arg.value[2:], arg.value[1]))
  117. elif arg.type == "NAMEOFFSET":
  118. n, o = arg.value.split(
  119. '+' if '+' in arg.value else '-'
  120. )
  121. self.emit((False, n, '+' if '+' in arg.value else '-', int(o)))
  122. elif arg.type == "NAME":
  123. if arg.value == "IO":
  124. self.emit(-1)
  125. elif arg.value == "Z":
  126. self.emit(-2)
  127. elif arg.value == "O":
  128. self.emit(-3)
  129. elif arg.value == "N":
  130. self.emit(-4)
  131. elif arg.value == "J":
  132. self.emit(-5)
  133. elif arg.value == "T":
  134. self.emit(-6)
  135. elif arg.value == "SP":
  136. self.emit(-7)
  137. elif arg.value == "EZ":
  138. self.emit(-8)
  139. elif arg.value == "SZ":
  140. self.emit(-9)
  141. elif arg.value == "MZ":
  142. self.emit(-10)
  143. elif arg.value == "JZ":
  144. self.emit(-11)
  145. elif arg.value == "W":
  146. self.emit(-12)
  147. elif arg.value == "MM":
  148. self.emit(-13)
  149. elif arg.value == "DR":
  150. self.emit(-14)
  151. elif arg.value == "ZZ":
  152. self.emit(-15)
  153. else:
  154. self.emit((False, arg.value))
  155. def compile_operation(self, op):
  156. if op.data == "nop":
  157. self.emit(0)
  158. elif op.data == "mj":
  159. self.emit(1)
  160. self.compile_arg(op.children[0])
  161. self.compile_arg(op.children[1])
  162. self.compile_arg(op.children[2])
  163. elif op.data == "sjlez":
  164. self.emit(2)
  165. self.compile_arg(op.children[0])
  166. self.compile_arg(op.children[1])
  167. self.compile_arg(op.children[2])
  168. elif op.data == "ajlez":
  169. self.emit(3)
  170. self.compile_arg(op.children[0])
  171. self.compile_arg(op.children[1])
  172. self.compile_arg(op.children[2])
  173. elif op.data == "sjlz":
  174. self.emit(4)
  175. self.compile_arg(op.children[0])
  176. self.compile_arg(op.children[1])
  177. self.compile_arg(op.children[2])
  178. elif op.data == "jles":
  179. self.emit(5)
  180. self.compile_arg(op.children[0])
  181. self.compile_arg(op.children[1])
  182. self.compile_arg(op.children[2])
  183. elif op.data == "tjt":
  184. self.emit(6)
  185. self.compile_arg(op.children[0])
  186. self.compile_arg(op.children[1])
  187. elif op.data == "djt":
  188. self.emit(7)
  189. self.compile_arg(op.children[0])
  190. self.compile_arg(op.children[1])
  191. elif op.data == "sslez":
  192. self.emit(8)
  193. self.compile_arg(op.children[0])
  194. self.compile_arg(op.children[1])
  195. elif op.data == "aslez":
  196. self.emit(9)
  197. self.compile_arg(op.children[0])
  198. self.compile_arg(op.children[1])
  199. elif op.data == "ije":
  200. self.emit(10)
  201. self.compile_arg(op.children[0])
  202. self.compile_arg(op.children[1])
  203. self.compile_arg(op.children[2])
  204. elif op.data == "djlz":
  205. self.emit(11)
  206. self.compile_arg(op.children[0])
  207. self.compile_arg(op.children[1])
  208. self.compile_arg(op.children[2])
  209. elif op.data == "xjlz":
  210. self.emit(12)
  211. self.compile_arg(op.children[0])
  212. self.compile_arg(op.children[1])
  213. self.compile_arg(op.children[2])
  214. elif op.data == "dslz":
  215. self.emit(13)
  216. self.compile_arg(op.children[0])
  217. elif op.data == "ssl":
  218. self.emit(14)
  219. self.compile_arg(op.children[0])
  220. self.compile_arg(op.children[1])
  221. elif op.data == "mbnz":
  222. self.emit(15)
  223. self.compile_arg(op.children[0])
  224. self.compile_arg(op.children[1])
  225. self.compile_arg(op.children[2])
  226. elif op.data == "modbz":
  227. self.emit(16)
  228. self.compile_arg(op.children[0])
  229. self.compile_arg(op.children[1])
  230. self.compile_arg(op.children[2])
  231. elif op.data == "aj":
  232. self.emit(17)
  233. self.compile_arg(op.children[0])
  234. self.compile_arg(op.children[1])
  235. elif op.data == "la":
  236. self.emit(18)
  237. self.compile_arg(op.children[0])
  238. self.compile_arg(op.children[1])
  239. elif op.data == "ld":
  240. self.emit(19)
  241. self.compile_arg(op.children[0])
  242. self.compile_arg(op.children[1])
  243. elif op.data == "ia":
  244. self.emit(20)
  245. self.compile_arg(op.children[0])
  246. elif op.data == "jmc":
  247. self.emit(21)
  248. self.compile_arg(op.children[0])
  249. elif op.data == "ja":
  250. self.emit(22)
  251. self.compile_arg(op.children[0])
  252. elif op.data == "psh":
  253. self.emit(23)
  254. self.compile_arg(op.children[0])
  255. elif op.data == "pd":
  256. self.emit(24)
  257. self.compile_arg(op.children[0])
  258. elif op.data == "pop":
  259. self.emit(25)
  260. self.compile_arg(op.children[0])
  261. elif op.data == "shlbnz":
  262. self.emit(26)
  263. self.compile_arg(op.children[0])
  264. self.compile_arg(op.children[1])
  265. self.compile_arg(op.children[2])
  266. elif op.data == "shrbnz":
  267. self.emit(27)
  268. self.compile_arg(op.children[0])
  269. self.compile_arg(op.children[1])
  270. self.compile_arg(op.children[2])
  271. elif op.data == "nbz":
  272. self.emit(28)
  273. self.compile_arg(op.children[0])
  274. self.compile_arg(op.children[1])
  275. elif op.data == "anz":
  276. self.emit(29)
  277. self.compile_arg(op.children[0])
  278. self.compile_arg(op.children[1])
  279. self.compile_arg(op.children[2])
  280. elif op.data == "abgz":
  281. self.emit(30)
  282. self.compile_arg(op.children[0])
  283. self.compile_arg(op.children[1])
  284. self.compile_arg(op.children[2])
  285. elif op.data == "swp":
  286. self.emit(31)
  287. self.compile_arg(op.children[0])
  288. self.compile_arg(op.children[1])
  289. elif op.data == "h_add":
  290. self.emit(3)
  291. self.compile_arg(op.children[0])
  292. self.compile_arg(op.children[1])
  293. self.emit(self.size+2)
  294. elif op.data == "h_sub":
  295. self.emit(2)
  296. self.compile_arg(op.children[0])
  297. self.compile_arg(op.children[1])
  298. self.emit(self.size+2)
  299. elif op.data == "h_inc":
  300. self.emit(3)
  301. self.emit(-3)
  302. self.compile_arg(op.children[0])
  303. self.emit(self.size+2)
  304. elif op.data == "h_dec":
  305. self.emit(2)
  306. self.emit(-3)
  307. self.compile_arg(op.children[0])
  308. self.emit(self.size+2)
  309. elif op.data == "h_mov":
  310. self.emit(1)
  311. self.compile_arg(op.children[0])
  312. self.compile_arg(op.children[1])
  313. self.emit(self.size+2)
  314. elif op.data == "h_jmp":
  315. self.emit(1)
  316. self.emit(0)
  317. self.emit(0)
  318. self.compile_arg(op.children[0])
  319. elif op.data == "h_halt":
  320. self.emit(1)
  321. self.emit(0)
  322. self.emit(0)
  323. self.emit(-1)
  324. elif op.data == "h_out":
  325. self.emit(1)
  326. self.compile_arg(op.children[0])
  327. self.emit(-1)
  328. self.emit(self.size+2)
  329. elif op.data == "h_outn":
  330. self.emit(1)
  331. self.compile_arg(op.children[0])
  332. self.emit(-2)
  333. self.emit(self.size+2)
  334. elif op.data == "h_in":
  335. self.emit(1)
  336. self.emit(-1)
  337. self.compile_arg(op.children[0])
  338. self.emit(self.size+2)
  339. def compile_labels(self):
  340. labels = {}
  341. position = 0
  342. while position < len(self.buffer):
  343. this = self.buffer[position]
  344. if type(this) is tuple and this[0]:
  345. label = this[1]
  346. if label in labels:
  347. raise Exception(f"Duplicated label: {label}.")
  348. elif label in ("IO", "Z", "O", "N", "J", "T", "SP", "EZ", "SZ", "MZ", "JZ", "W", "MM", "DR", "ZZ"):
  349. raise Exception(f"Register override: {label}.")
  350. self.buffer.pop(position)
  351. labels[label] = position + 1
  352. position += 1
  353. position = 0
  354. while position < len(self.buffer):
  355. this = self.buffer[position]
  356. if type(this) is tuple and not this[0]:
  357. label = this[1]
  358. if label not in labels:
  359. raise Exception(f"Undefined label/register: {label}.")
  360. if len(this) == 3:
  361. self.buffer[position] = 1 + (position + labels[label] if this[2] == '+' else position - labels[label])
  362. elif len(this) == 4:
  363. self.buffer[position] = labels[label] + this[3] if this[2] == '+' else labels[label] - this[3]
  364. else:
  365. self.buffer[position] = labels[label]
  366. position += 1
  367. def encode(self):
  368. buffer = b""
  369. for b in self.buffer:
  370. if type(b) is float:
  371. b = struct.pack("<d", b)
  372. else:
  373. b = struct.pack("<q", b)
  374. buffer += b
  375. return buffer
  376. def precompile(self, source):
  377. ast = self.parser.parse(source)
  378. for command in ast.children:
  379. if len(command.children) == 2:
  380. label = command.children[0].value[:-1]
  381. self.emit((True,label))
  382. if type(command.children[1]) is lark.Tree and command.children[1].data != "mixed":
  383. self.compile_operation(command.children[1])
  384. else:
  385. self.compile_arg(command.children[1])
  386. else:
  387. if type(command.children[0]) is lark.Token:
  388. if command.children[0].type == "LABEL":
  389. label = command.children[0].value[:-1]
  390. self.emit((True,label))
  391. else:
  392. with open(command.children[0].value[1:], "r") as f:
  393. self.precompile(f.read())
  394. else:
  395. self.compile_operation(command.children[0])
  396. def compile(self, source):
  397. source += "\n"
  398. for reg in "ABCDEFGHIXYK":
  399. source += f"{reg}:0"
  400. if reg != "K":
  401. source += ";"
  402. self.precompile(source)
  403. self.compile_labels()
  404. return self.encode()
  405. wma = WMA()
  406. try:
  407. if len(sys.argv) == 3:
  408. with open(sys.argv[1], "r") as fin:
  409. with open(sys.argv[2], "wb") as fout:
  410. fout.write(wma.compile(fin.read()))
  411. else:
  412. sys.stdout.buffer.write(wma.compile(sys.stdin.read()))
  413. except Exception as e:
  414. print(e)
  415. sys.exit(1)