asm-nseel-ppc-gcc.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041
  1. #if EEL_F_SIZE == 8
  2. void nseel_asm_1pdd(void)
  3. {
  4. __asm__(
  5. "addis r5, 0, 0xdead\n"
  6. "ori r5, r5, 0xbeef\n"
  7. "lfd f1, 0(r3)\n"
  8. "mtctr r5\n"
  9. "subi r1, r1, 64\n"
  10. "bctrl\n"
  11. "addi r1, r1, 64\n"
  12. "stfdu f1, 8(r16)\n"
  13. "mr r3, r16\n"
  14. :: );
  15. }
  16. void nseel_asm_1pdd_end(void){}
  17. void nseel_asm_2pdd(void)
  18. {
  19. __asm__(
  20. "addis r7, 0, 0xdead\n"
  21. "ori r7, r7, 0xbeef\n"
  22. "lfd f2, 0(r3)\n"
  23. "lfd f1, 0(r14)\n"
  24. "mtctr r7\n"
  25. "subi r1, r1, 64\n"
  26. "bctrl\n"
  27. "addi r1, r1, 64\n"
  28. "stfdu f1, 8(r16)\n"
  29. "mr r3, r16\n"
  30. :: );
  31. };
  32. void nseel_asm_2pdd_end(void){}
  33. void nseel_asm_2pdds(void)
  34. {
  35. __asm__(
  36. "addis r5, 0, 0xdead\n"
  37. "ori r5, r5, 0xbeef\n"
  38. "lfd f2, 0(r3)\n"
  39. "lfd f1, 0(r14)\n"
  40. "mtctr r5\n"
  41. "subi r1, r1, 64\n"
  42. "bctrl\n"
  43. "addi r1, r1, 64\n"
  44. "stfd f1, 0(r14)\n"
  45. "mr r3, r14\n"
  46. :: );
  47. }
  48. void nseel_asm_2pdds_end(void){}
  49. #else // 32 bit floating point calls
  50. #error mac only can do 64 bit floats for now
  51. #endif
  52. void nseel_asm_2pp(void)
  53. {
  54. // r3=firstparm, r4=second parm, returns in f1
  55. __asm__(
  56. "addis r5, 0, 0xdead\n"
  57. "ori r5, r5, 0xbeef\n"
  58. "mtctr r5\n"
  59. "mr r4, r3\n"
  60. "mr r3, r14\n"
  61. "subi r1, r1, 64\n"
  62. "bctrl\n"
  63. "addi r1, r1, 64\n"
  64. "stfdu f1, 8(r16)\n"
  65. "mr r3, r16\n"
  66. :: );
  67. };
  68. void nseel_asm_2pp_end(void){}
  69. void nseel_asm_1pp(void)
  70. {
  71. __asm__(
  72. "addis r5, 0, 0xdead\n"
  73. "ori r5, r5, 0xbeef\n"
  74. "mtctr r5\n"
  75. "subi r1, r1, 64\n"
  76. "bctrl\n"
  77. "addi r1, r1, 64\n"
  78. "stfdu f1, 8(r16)\n"
  79. "mr r3, r16\n"
  80. :: );
  81. };
  82. void nseel_asm_1pp_end(void){}
  83. //---------------------------------------------------------------------------------------------------------------
  84. // do nothing, eh
  85. void nseel_asm_exec2(void)
  86. {
  87. }
  88. void nseel_asm_exec2_end(void) { }
  89. void nseel_asm_invsqrt(void)
  90. {
  91. __asm__(
  92. "lfd f1, 0(r3)\n"
  93. "frsqrte f1, f1\n" // less accurate than our x86 equivilent, but invsqrt() is inherently inaccurate anyway
  94. "stfdu f1, 8(r16)\n"
  95. "mr r3, r16\n"
  96. );
  97. }
  98. void nseel_asm_invsqrt_end(void) {}
  99. //---------------------------------------------------------------------------------------------------------------
  100. void nseel_asm_sqr(void)
  101. {
  102. __asm__(
  103. "lfd f1, 0(r3)\n"
  104. "fmul f1, f1, f1\n"
  105. "stfdu f1, 8(r16)\n"
  106. "mr r3, r16\n"
  107. );
  108. }
  109. void nseel_asm_sqr_end(void) {}
  110. //---------------------------------------------------------------------------------------------------------------
  111. void nseel_asm_abs(void)
  112. {
  113. __asm__(
  114. "lfd f1, 0(r3)\n"
  115. "fabs f1, f1\n"
  116. "stfdu f1, 8(r16)\n"
  117. "mr r3, r16\n"
  118. );
  119. }
  120. void nseel_asm_abs_end(void) {}
  121. //---------------------------------------------------------------------------------------------------------------
  122. void nseel_asm_assign(void)
  123. {
  124. __asm__(
  125. "lfd f1, 0(r3)\n"
  126. "stfd f1, 0(r14)\n"
  127. );
  128. }
  129. void nseel_asm_assign_end(void) {}
  130. //---------------------------------------------------------------------------------------------------------------
  131. void nseel_asm_add(void)
  132. {
  133. __asm__(
  134. "lfd f1, 0(r3)\n"
  135. "lfd f2, 0(r14)\n"
  136. "fadd f1, f1, f2\n"
  137. "stfdu f1, 8(r16)\n"
  138. "mr r3, r16\n"
  139. );
  140. }
  141. void nseel_asm_add_end(void) {}
  142. void nseel_asm_add_op(void)
  143. {
  144. __asm__(
  145. "lfd f1, 0(r3)\n"
  146. "lfd f2, 0(r14)\n"
  147. "fadd f1, f1, f2\n"
  148. "stfd f1, 0(r14)\n"
  149. "mr r3, r14\n"
  150. );
  151. }
  152. void nseel_asm_add_op_end(void) {}
  153. //---------------------------------------------------------------------------------------------------------------
  154. void nseel_asm_sub(void)
  155. {
  156. __asm__(
  157. "lfd f1, 0(r3)\n"
  158. "lfd f2, 0(r14)\n"
  159. "fsub f1, f2, f1\n"
  160. "stfdu f1, 8(r16)\n"
  161. "mr r3, r16\n"
  162. );
  163. }
  164. void nseel_asm_sub_end(void) {}
  165. void nseel_asm_sub_op(void)
  166. {
  167. __asm__(
  168. "lfd f1, 0(r3)\n"
  169. "lfd f2, 0(r14)\n"
  170. "fsub f1, f2, f1\n"
  171. "stfd f1, 0(r14)\n"
  172. "mr r3, r14\n"
  173. );
  174. }
  175. void nseel_asm_sub_op_end(void) {}
  176. //---------------------------------------------------------------------------------------------------------------
  177. void nseel_asm_mul(void)
  178. {
  179. __asm__(
  180. "lfd f1, 0(r3)\n"
  181. "lfd f2, 0(r14)\n"
  182. "fmul f1, f2, f1\n"
  183. "stfdu f1, 8(r16)\n"
  184. "mr r3, r16\n"
  185. );
  186. }
  187. void nseel_asm_mul_end(void) {}
  188. void nseel_asm_mul_op(void)
  189. {
  190. __asm__(
  191. "lfd f1, 0(r3)\n"
  192. "lfd f2, 0(r14)\n"
  193. "fmul f1, f2, f1\n"
  194. "stfd f1, 0(r14)\n"
  195. "mr r3, r14\n"
  196. );
  197. }
  198. void nseel_asm_mul_op_end(void) {}
  199. //---------------------------------------------------------------------------------------------------------------
  200. void nseel_asm_div(void)
  201. {
  202. __asm__(
  203. "lfd f1, 0(r3)\n"
  204. "lfd f2, 0(r14)\n"
  205. "fdiv f1, f2, f1\n"
  206. "stfdu f1, 8(r16)\n"
  207. "mr r3, r16\n"
  208. );
  209. }
  210. void nseel_asm_div_end(void) {}
  211. void nseel_asm_div_op(void)
  212. {
  213. __asm__(
  214. "lfd f1, 0(r3)\n"
  215. "lfd f2, 0(r14)\n"
  216. "fdiv f1, f2, f1\n"
  217. "stfd f1, 0(r14)\n"
  218. "mr r3, r14\n"
  219. );
  220. }
  221. void nseel_asm_div_op_end(void) {}
  222. //---------------------------------------------------------------------------------------------------------------
  223. void nseel_asm_mod(void)
  224. {
  225. __asm__(
  226. "lfd f1, 0(r3)\n"
  227. "lfd f2, 0(r14)\n"
  228. "fabs f1, f1\n"
  229. "fabs f2, f2\n"
  230. "fctiwz f1, f1\n"
  231. "fctiwz f2, f2\n"
  232. "stfd f1, 8(r16)\n"
  233. "stfd f2, 16(r16)\n"
  234. "lwz r10, 12(r16)\n"
  235. "lwz r11, 20(r16)\n" //r11 and r12 have the integers
  236. "divw r12, r11, r10\n"
  237. "mullw r12, r12, r10\n"
  238. "subf r10, r12, r11\n"
  239. "addis r11, 0, 0x4330\n"
  240. "addis r12, 0, 0x8000\n"
  241. "xoris r10, r10, 0x8000\n"
  242. "stw r11, 8(r16)\n" // 0x43300000
  243. "stw r10, 12(r16)\n" // our integer sign flipped
  244. "stw r11, 16(r16)\n" // 0x43300000
  245. "stw r12, 20(r16)\n" // 0x80000000
  246. "lfd f1, 8(r16)\n"
  247. "lfd f2, 16(r16)\n"
  248. "fsub f1, f1, f2\n"
  249. "stfdu f1, 8(r16)\n"
  250. "mr r3, r16\n"
  251. );
  252. }
  253. void nseel_asm_mod_end(void) {}
  254. void nseel_asm_mod_op(void)
  255. {
  256. __asm__(
  257. "lfd f1, 0(r3)\n"
  258. "lfd f2, 0(r14)\n"
  259. "fabs f1, f1\n"
  260. "fabs f2, f2\n"
  261. "fctiwz f1, f1\n"
  262. "fctiwz f2, f2\n"
  263. "stfd f1, 8(r16)\n"
  264. "stfd f2, 16(r16)\n"
  265. "lwz r10, 12(r16)\n"
  266. "lwz r11, 20(r16)\n" //r11 and r12 have the integers
  267. "divw r12, r11, r10\n"
  268. "mullw r12, r12, r10\n"
  269. "subf r10, r12, r11\n"
  270. "addis r11, 0, 0x4330\n"
  271. "addis r12, 0, 0x8000\n"
  272. "xoris r10, r10, 0x8000\n"
  273. "stw r11, 8(r16)\n" // 0x43300000
  274. "stw r10, 12(r16)\n" // our integer sign flipped
  275. "stw r11, 16(r16)\n" // 0x43300000
  276. "stw r12, 20(r16)\n" // 0x80000000
  277. "lfd f1, 8(r16)\n"
  278. "lfd f2, 16(r16)\n"
  279. "fsub f1, f1, f2\n"
  280. "stfd f1, 0(r14)\n"
  281. "mr r3, r14\n"
  282. );
  283. }
  284. void nseel_asm_mod_op_end(void) {}
  285. //---------------------------------------------------------------------------------------------------------------
  286. void nseel_asm_or(void)
  287. {
  288. __asm__(
  289. "lfd f1, 0(r3)\n"
  290. "lfd f2, 0(r14)\n"
  291. "fctiwz f1, f1\n"
  292. "fctiwz f2, f2\n"
  293. "stfd f1, 8(r16)\n"
  294. "stfd f2, 16(r16)\n"
  295. "lwz r10, 12(r16)\n"
  296. "lwz r11, 20(r16)\n" //r11 and r12 have the integers
  297. "or r10, r10, r11\n" // r10 has the result
  298. "addis r11, 0, 0x4330\n"
  299. "addis r12, 0, 0x8000\n"
  300. "xoris r10, r10, 0x8000\n"
  301. "stw r11, 8(r16)\n" // 0x43300000
  302. "stw r10, 12(r16)\n" // our integer sign flipped
  303. "stw r11, 16(r16)\n" // 0x43300000
  304. "stw r12, 20(r16)\n" // 0x80000000
  305. "lfd f1, 8(r16)\n"
  306. "lfd f2, 16(r16)\n"
  307. "fsub f1, f1, f2\n"
  308. "stfdu f1, 8(r16)\n"
  309. "mr r3, r16\n"
  310. );
  311. }
  312. void nseel_asm_or_end(void) {}
  313. void nseel_asm_or_op(void)
  314. {
  315. __asm__(
  316. "lfd f1, 0(r3)\n"
  317. "lfd f2, 0(r14)\n"
  318. "fctiwz f1, f1\n"
  319. "fctiwz f2, f2\n"
  320. "stfd f1, 8(r16)\n"
  321. "stfd f2, 16(r16)\n"
  322. "lwz r10, 12(r16)\n"
  323. "lwz r11, 20(r16)\n" //r11 and r12 have the integers
  324. "or r10, r10, r11\n" // r10 has the result
  325. "addis r11, 0, 0x4330\n"
  326. "addis r12, 0, 0x8000\n"
  327. "xoris r10, r10, 0x8000\n"
  328. "stw r11, 8(r16)\n" // 0x43300000
  329. "stw r10, 12(r16)\n" // our integer sign flipped
  330. "stw r11, 16(r16)\n" // 0x43300000
  331. "stw r12, 20(r16)\n" // 0x80000000
  332. "lfd f1, 8(r16)\n"
  333. "lfd f2, 16(r16)\n"
  334. "fsub f1, f1, f2\n"
  335. "stfd f1, 0(r14)\n"
  336. "mr r3, r14\n"
  337. );
  338. }
  339. void nseel_asm_or_op_end(void) {}
  340. //---------------------------------------------------------------------------------------------------------------
  341. void nseel_asm_and(void)
  342. {
  343. __asm__(
  344. "lfd f1, 0(r3)\n"
  345. "lfd f2, 0(r14)\n"
  346. "fctiwz f1, f1\n"
  347. "fctiwz f2, f2\n"
  348. "stfd f1, 8(r16)\n"
  349. "stfd f2, 16(r16)\n"
  350. "lwz r10, 12(r16)\n"
  351. "lwz r11, 20(r16)\n" //r11 and r12 have the integers
  352. "and r10, r10, r11\n" // r10 has the result
  353. "addis r11, 0, 0x4330\n"
  354. "addis r12, 0, 0x8000\n"
  355. "xoris r10, r10, 0x8000\n"
  356. "stw r11, 8(r16)\n" // 0x43300000
  357. "stw r10, 12(r16)\n" // our integer sign flipped
  358. "stw r11, 16(r16)\n" // 0x43300000
  359. "stw r12, 20(r16)\n" // 0x80000000
  360. "lfd f1, 8(r16)\n"
  361. "lfd f2, 16(r16)\n"
  362. "fsub f1, f1, f2\n"
  363. "stfdu f1, 8(r16)\n"
  364. "mr r3, r16\n"
  365. );}
  366. void nseel_asm_and_end(void) {}
  367. void nseel_asm_and_op(void)
  368. {
  369. __asm__(
  370. "lfd f1, 0(r3)\n"
  371. "lfd f2, 0(r14)\n"
  372. "fctiwz f1, f1\n"
  373. "fctiwz f2, f2\n"
  374. "stfd f1, 8(r16)\n"
  375. "stfd f2, 16(r16)\n"
  376. "lwz r10, 12(r16)\n"
  377. "lwz r11, 20(r16)\n" //r11 and r12 have the integers
  378. "and r10, r10, r11\n" // r10 has the result
  379. "addis r11, 0, 0x4330\n"
  380. "addis r12, 0, 0x8000\n"
  381. "xoris r10, r10, 0x8000\n"
  382. "stw r11, 8(r16)\n" // 0x43300000
  383. "stw r10, 12(r16)\n" // our integer sign flipped
  384. "stw r11, 16(r16)\n" // 0x43300000
  385. "stw r12, 20(r16)\n" // 0x80000000
  386. "lfd f1, 8(r16)\n"
  387. "lfd f2, 16(r16)\n"
  388. "fsub f1, f1, f2\n"
  389. "stfd f1, 0(r14)\n"
  390. "mr r3, r14\n"
  391. );
  392. }
  393. void nseel_asm_and_op_end(void) {}
  394. //---------------------------------------------------------------------------------------------------------------
  395. void nseel_asm_uplus(void) // this is the same as doing nothing, it seems
  396. {
  397. }
  398. void nseel_asm_uplus_end(void) {}
  399. //---------------------------------------------------------------------------------------------------------------
  400. void nseel_asm_uminus(void)
  401. {
  402. __asm__(
  403. "lfd f1, 0(r3)\n"
  404. "fneg f1, f1\n"
  405. "stfdu f1, 8(r16)\n"
  406. "mr r3, r16\n"
  407. );
  408. }
  409. void nseel_asm_uminus_end(void) {}
  410. //---------------------------------------------------------------------------------------------------------------
  411. void nseel_asm_sign(void)
  412. {
  413. __asm__(
  414. "lfd f1, 0(r3)\n"
  415. "addis r5, 0, 0xdead\n"
  416. "ori r5, r5, 0xbeef\n"
  417. "lfd f2, 0(r5)\n"
  418. "lis r9, 0xbff0\n"
  419. "fcmpu cr7, f1, f2\n"
  420. "blt- cr7, 0f\n"
  421. "ble- cr7, 1f\n"
  422. " lis r9, 0x3ff0\n"
  423. "0:\n"
  424. " li r10, 0\n"
  425. " stwu r9, 8(r16)\n"
  426. " stw r10, 4(r16)\n"
  427. " b 2f\n"
  428. "1:\n"
  429. " stfdu f1, 8(r16)\n"
  430. "2:\n"
  431. " mr r3, r16\n"
  432. ::
  433. );
  434. }
  435. void nseel_asm_sign_end(void) {}
  436. //---------------------------------------------------------------------------------------------------------------
  437. void nseel_asm_bnot(void)
  438. {
  439. __asm__(
  440. "addis r5, 0, 0xdead\n"
  441. "ori r5, r5, 0xbeef\n"
  442. "lfd f2, 0(r5)\n"
  443. "lfd f1, 0(r3)\n"
  444. "fabs f1, f1\n"
  445. "fcmpu cr7, f1, f2\n"
  446. "blt cr7, 0f\n"
  447. "addis r5, 0, 0xdead\n"
  448. "ori r5, r5, 0xbeef\n"
  449. "lfd f1, 0(r5)\n"
  450. "b 1f\n"
  451. "0:\n"
  452. "addis r5, 0, 0xdead\n"
  453. "ori r5, r5, 0xbeef\n"
  454. "lfd f1, 0(r5)\n"
  455. "1:\n"
  456. " stfdu f1, 8(r16)\n"
  457. " mr r3, r16\n"
  458. ::
  459. );
  460. }
  461. void nseel_asm_bnot_end(void) {}
  462. //---------------------------------------------------------------------------------------------------------------
  463. void nseel_asm_if(void)
  464. {
  465. __asm__(
  466. "addis r5, 0, 0xdead\n"
  467. "ori r5, r5, 0xbeef\n"
  468. "lfd f2, 0(r5)\n"
  469. "lfd f1, 0(r3)\n"
  470. "addis r6, 0, 0xdead\n"
  471. "ori r6, r6, 0xbeef\n"
  472. "addis r7, 0, 0xdead\n"
  473. "ori r7, r7, 0xbeef\n"
  474. "fabs f1, f1\n"
  475. "fcmpu cr7, f1, f2\n"
  476. "blt cr7, 0f\n"
  477. " mtctr r6\n"
  478. "b 1f\n"
  479. "0:\n"
  480. " mtctr r7\n"
  481. "1:\n"
  482. "bctrl\n"
  483. :: );
  484. }
  485. void nseel_asm_if_end(void) {}
  486. //---------------------------------------------------------------------------------------------------------------
  487. void nseel_asm_repeat(void)
  488. {
  489. __asm__(
  490. "addis r6, 0, 0xdead\n"
  491. "ori r6, r6, 0xbeef\n"
  492. "addis r7, 0, ha16(%0)\n"
  493. "addi r7, r7, lo16(%0)\n"
  494. "lfd f1, 0(r3)\n"
  495. "fctiwz f1, f1\n"
  496. "stfd f1, 8(r16)\n"
  497. "lwz r5, 12(r16)\n" // r5 has count now
  498. "cmpwi cr0, r5, 0\n"
  499. "ble cr0, 1f\n"
  500. "cmpw cr0, r7, r5\n"
  501. "bge cr0, 0f\n"
  502. "mr r5, r7\n" // set r5 to max if we have to
  503. "0:\n"
  504. "stw r5, -4(r1)\n"
  505. "stw r6, -8(r1)\n"
  506. "stwu r16, -12(r1)\n"
  507. "mtctr r6\n"
  508. "bctrl\n"
  509. "lwz r16, 0(r1)\n"
  510. "lwz r6, 4(r1)\n"
  511. "lwz r5, 8(r1)\n"
  512. "addi r1, r1, 12\n"
  513. "addi r5, r5, -1\n"
  514. "cmpwi cr0, r5, 0\n"
  515. "bgt cr0, 0b\n"
  516. "1:\n"
  517. ::"g" (NSEEL_LOOPFUNC_SUPPORT_MAXLEN)
  518. );
  519. }
  520. void nseel_asm_repeat_end(void) {}
  521. void nseel_asm_repeatwhile(void)
  522. {
  523. __asm__(
  524. "addis r6, 0, 0xdead\n"
  525. "ori r6, r6, 0xbeef\n"
  526. "addis r5, 0, ha16(%0)\n"
  527. "addi r5, r5, lo16(%0)\n"
  528. "0:\n"
  529. "stw r5, -4(r1)\n"
  530. "stw r6, -8(r1)\n"
  531. "stwu r16, -12(r1)\n"
  532. "mtctr r6\n"
  533. "bctrl\n"
  534. "lwz r16, 0(r1)\n"
  535. "lwz r6, 4(r1)\n"
  536. "lwz r5, 8(r1)\n"
  537. "addi r1, r1, 12\n"
  538. "addi r5, r5, -1\n"
  539. "addis r7, 0, 0xdead\n"
  540. "ori r7, r7, 0xbeef\n"
  541. "lfd f2, 0(r7)\n"
  542. "lfd f1, 0(r3)\n"
  543. "fabs f1, f1\n"
  544. "fcmpu cr7, f1, f2\n"
  545. "blt cr7, 1f\n"
  546. "cmpwi cr0, r5, 0\n"
  547. "bgt cr0, 0b\n"
  548. "1:\n"
  549. ::"g" (NSEEL_LOOPFUNC_SUPPORT_MAXLEN)
  550. );
  551. }
  552. void nseel_asm_repeatwhile_end(void) {}
  553. void nseel_asm_band(void)
  554. {
  555. __asm__(
  556. "addis r5, 0, 0xdead\n"
  557. "ori r5, r5, 0xbeef\n"
  558. "lfd f2, 0(r5)\n"
  559. "lfd f1, 0(r3)\n"
  560. "fabs f1, f1\n"
  561. "fcmpu cr7, f1, f2\n"
  562. "blt cr7, 0f\n"
  563. "addis r6, 0, 0xdead\n"
  564. "ori r6, r6, 0xbeef\n"
  565. " mtctr r6\n"
  566. " bctrl\n"
  567. " addis r5, 0, 0xdead\n"
  568. " ori r5, r5, 0xbeef\n"
  569. " lfd f2, 0(r5)\n"
  570. " lfd f1, 0(r3)\n"
  571. " fabs f1, f1\n"
  572. " fcmpu cr7, f1, f2\n"
  573. " bge cr7, 1f\n"
  574. "0:\n"
  575. " fsub f1, f1, f1\n" // set f1 to 0!
  576. " b 2f\n"
  577. "1:\n"
  578. " addis r5, 0, 0xdead\n" // set f1 to 1
  579. " ori r5, r5, 0xbeef\n"
  580. " lfd f1, 0(r5)\n"
  581. "2:\n"
  582. "stfdu f1, 8(r16)\n"
  583. "mr r3, r16\n"
  584. :: );
  585. }
  586. void nseel_asm_band_end(void) {}
  587. void nseel_asm_bor(void)
  588. {
  589. __asm__(
  590. "addis r5, 0, 0xdead\n"
  591. "ori r5, r5, 0xbeef\n"
  592. "lfd f2, 0(r5)\n"
  593. "lfd f1, 0(r3)\n"
  594. "fabs f1, f1\n"
  595. "fcmpu cr7, f1, f2\n"
  596. "bge cr7, 0f\n"
  597. "addis r6, 0, 0xdead\n"
  598. "ori r6, r6, 0xbeef\n"
  599. " mtctr r6\n"
  600. " bctrl\n"
  601. " addis r5, 0, 0xdead\n"
  602. " ori r5, r5, 0xbeef\n"
  603. " lfd f2, 0(r5)\n"
  604. " lfd f1, 0(r3)\n"
  605. " fabs f1, f1\n"
  606. " fcmpu cr7, f1, f2\n"
  607. " blt cr7, 1f\n"
  608. "0:\n"
  609. " addis r5, 0, 0xdead\n" // set f1 to 1
  610. " ori r5, r5, 0xbeef\n"
  611. " lfd f1, 0(r5)\n"
  612. " b 2f\n"
  613. "1:\n"
  614. " fsub f1, f1, f1\n" // set f1 to 0!
  615. "2:\n"
  616. "stfdu f1, 8(r16)\n"
  617. "mr r3, r16\n"
  618. :: );
  619. }
  620. void nseel_asm_bor_end(void) {}
  621. //---------------------------------------------------------------------------------------------------------------
  622. void nseel_asm_equal(void)
  623. {
  624. __asm__(
  625. "lfd f1, 0(r3)\n"
  626. "lfd f2, 0(r14)\n"
  627. "addis r5, 0, 0xdead\n"
  628. "ori r5, r5, 0xbeef\n"
  629. "fsub f1, f1, f2\n"
  630. "fabs f1, f1\n"
  631. "lfd f2, 0(r5)\n"
  632. "fcmpu cr7, f1, f2\n"
  633. "blt cr7, 0f\n"
  634. "addis r5, 0, 0xdead\n"
  635. "ori r5, r5, 0xbeef\n"
  636. "b 1f\n"
  637. "0:\n"
  638. "addis r5, 0, 0xdead\n"
  639. "ori r5, r5, 0xbeef\n"
  640. "1:\n"
  641. "lfd f1, 0(r5)\n"
  642. " stfdu f1, 8(r16)\n"
  643. " mr r3, r16\n"
  644. ::
  645. );
  646. }
  647. void nseel_asm_equal_end(void) {}
  648. //
  649. //---------------------------------------------------------------------------------------------------------------
  650. void nseel_asm_notequal(void)
  651. {
  652. __asm__(
  653. "lfd f1, 0(r3)\n"
  654. "lfd f2, 0(r14)\n"
  655. "addis r5, 0, 0xdead\n"
  656. "ori r5, r5, 0xbeef\n"
  657. "fsub f1, f1, f2\n"
  658. "fabs f1, f1\n"
  659. "lfd f2, 0(r5)\n"
  660. "fcmpu cr7, f1, f2\n"
  661. "blt cr7, 0f\n"
  662. "addis r5, 0, 0xdead\n"
  663. "ori r5, r5, 0xbeef\n"
  664. "b 1f\n"
  665. "0:\n"
  666. "addis r5, 0, 0xdead\n"
  667. "ori r5, r5, 0xbeef\n"
  668. "1:\n"
  669. "lfd f1, 0(r5)\n"
  670. " stfdu f1, 8(r16)\n"
  671. " mr r3, r16\n"
  672. ::
  673. );
  674. }
  675. void nseel_asm_notequal_end(void) {}
  676. //---------------------------------------------------------------------------------------------------------------
  677. void nseel_asm_below(void)
  678. {
  679. __asm__(
  680. "lfd f1, 0(r3)\n"
  681. "lfd f2, 0(r14)\n"
  682. "fcmpu cr7, f2, f1\n"
  683. "blt cr7, 0f\n"
  684. "addis r5, 0, 0xdead\n"
  685. "ori r5, r5, 0xbeef\n"
  686. "lfd f1, 0(r5)\n"
  687. "b 1f\n"
  688. "0:\n"
  689. "addis r5, 0, 0xdead\n"
  690. "ori r5, r5, 0xbeef\n"
  691. "lfd f1, 0(r5)\n"
  692. "1:\n"
  693. " stfdu f1, 8(r16)\n"
  694. " mr r3, r16\n"
  695. ::
  696. );
  697. }
  698. void nseel_asm_below_end(void) {}
  699. //---------------------------------------------------------------------------------------------------------------
  700. void nseel_asm_beloweq(void)
  701. {
  702. __asm__(
  703. "lfd f1, 0(r3)\n"
  704. "lfd f2, 0(r14)\n"
  705. "fcmpu cr7, f2, f1\n"
  706. "ble cr7, 0f\n"
  707. "addis r5, 0, 0xdead\n"
  708. "ori r5, r5, 0xbeef\n"
  709. "lfd f1, 0(r5)\n"
  710. "b 1f\n"
  711. "0:\n"
  712. "addis r5, 0, 0xdead\n"
  713. "ori r5, r5, 0xbeef\n"
  714. "lfd f1, 0(r5)\n"
  715. "1:\n"
  716. " stfdu f1, 8(r16)\n"
  717. " mr r3, r16\n"
  718. ::
  719. );
  720. }
  721. void nseel_asm_beloweq_end(void) {}
  722. //---------------------------------------------------------------------------------------------------------------
  723. void nseel_asm_above(void)
  724. {
  725. __asm__(
  726. "lfd f1, 0(r3)\n"
  727. "lfd f2, 0(r14)\n"
  728. "fcmpu cr7, f2, f1\n"
  729. "bgt cr7, 0f\n"
  730. "addis r5, 0, 0xdead\n"
  731. "ori r5, r5, 0xbeef\n"
  732. "lfd f1, 0(r5)\n"
  733. "b 1f\n"
  734. "0:\n"
  735. "addis r5, 0, 0xdead\n"
  736. "ori r5, r5, 0xbeef\n"
  737. "lfd f1, 0(r5)\n"
  738. "1:\n"
  739. " stfdu f1, 8(r16)\n"
  740. " mr r3, r16\n"
  741. ::
  742. );
  743. }
  744. void nseel_asm_above_end(void) {}
  745. void nseel_asm_aboveeq(void)
  746. {
  747. __asm__(
  748. "lfd f1, 0(r3)\n"
  749. "lfd f2, 0(r14)\n"
  750. "fcmpu cr7, f2, f1\n"
  751. "bge cr7, 0f\n"
  752. "addis r5, 0, 0xdead\n"
  753. "ori r5, r5, 0xbeef\n"
  754. "lfd f1, 0(r5)\n"
  755. "b 1f\n"
  756. "0:\n"
  757. "addis r5, 0, 0xdead\n"
  758. "ori r5, r5, 0xbeef\n"
  759. "lfd f1, 0(r5)\n"
  760. "1:\n"
  761. " stfdu f1, 8(r16)\n"
  762. " mr r3, r16\n"
  763. ::
  764. );
  765. }
  766. void nseel_asm_aboveeq_end(void) {}
  767. void nseel_asm_min(void)
  768. {
  769. __asm__(
  770. "lfd f1, 0(r3)\n"
  771. "lfd f2, 0(r14)\n"
  772. "fcmpu cr7, f2, f1\n"
  773. "bgt cr7, 0f\n"
  774. "fmr f1, f2\n"
  775. "0:\n"
  776. " stfdu f1, 8(r16)\n"
  777. " mr r3, r16\n"
  778. );
  779. }
  780. void nseel_asm_min_end(void) {}
  781. void nseel_asm_max(void)
  782. {
  783. __asm__(
  784. "lfd f1, 0(r3)\n"
  785. "lfd f2, 0(r14)\n"
  786. "fcmpu cr7, f2, f1\n"
  787. "blt cr7, 0f\n"
  788. "fmr f1, f2\n"
  789. "0:\n"
  790. " stfdu f1, 8(r16)\n"
  791. " mr r3, r16\n"
  792. );
  793. }
  794. void nseel_asm_max_end(void) {}
  795. void _asm_generic3parm(void)
  796. {
  797. __asm__(
  798. "mr r6, r3\n"
  799. "addis r3, 0, 0xdead\n"
  800. "ori r3, r3, 0xbeef\n"
  801. "addis r7, 0, 0xdead\n"
  802. "ori r7, r7, 0xbeef\n"
  803. "mr r4, r15\n"
  804. "mr r5, r14\n"
  805. "mtctr r7\n"
  806. "subi r1, r1, 64\n"
  807. "bctrl\n"
  808. "addi r1, r1, 64\n"
  809. ::
  810. );
  811. }
  812. void _asm_generic3parm_end(void) {}
  813. void _asm_generic3parm_retd(void)
  814. {
  815. __asm__(
  816. "mr r6, r3\n"
  817. "addis r3, 0, 0xdead\n"
  818. "ori r3, r3, 0xbeef\n"
  819. "addis r7, 0, 0xdead\n"
  820. "ori r7, r7, 0xbeef\n"
  821. "mr r4, r15\n"
  822. "mr r5, r14\n"
  823. "mtctr r7\n"
  824. "subi r1, r1, 64\n"
  825. "bctrl\n"
  826. "addi r1, r1, 64\n"
  827. "stfdu f1, 8(r16)\n"
  828. "mr r3, r16\n"
  829. ::
  830. );
  831. }
  832. void _asm_generic3parm_retd_end(void) {}
  833. void _asm_generic2parm(void) // this prob neds to be fixed for ppc
  834. {
  835. __asm__(
  836. "mr r5, r3\n"
  837. "addis r3, 0, 0xdead\n"
  838. "ori r3, r3, 0xbeef\n"
  839. "addis r7, 0, 0xdead\n"
  840. "ori r7, r7, 0xbeef\n"
  841. "mr r4, r14\n"
  842. "mtctr r7\n"
  843. "subi r1, r1, 64\n"
  844. "bctrl\n"
  845. "addi r1, r1, 64\n"
  846. ::
  847. );
  848. }
  849. void _asm_generic2parm_end(void) {}
  850. void _asm_generic2parm_retd(void)
  851. {
  852. __asm__(
  853. "mr r5, r3\n"
  854. "addis r3, 0, 0xdead\n"
  855. "ori r3, r3, 0xbeef\n"
  856. "addis r7, 0, 0xdead\n"
  857. "ori r7, r7, 0xbeef\n"
  858. "mr r4, r14\n"
  859. "mtctr r7\n"
  860. "subi r1, r1, 64\n"
  861. "bctrl\n"
  862. "addi r1, r1, 64\n"
  863. "stfdu f1, 8(r16)\n"
  864. "mr r3, r16\n"
  865. ::
  866. );
  867. }
  868. void _asm_generic2parm_retd_end(void) {}
  869. void _asm_generic1parm(void) // this prob neds to be fixed for ppc
  870. {
  871. __asm__(
  872. "mr r4, r3\n"
  873. "addis r3, 0, 0xdead\n"
  874. "ori r3, r3, 0xbeef\n"
  875. "addis r7, 0, 0xdead\n"
  876. "ori r7, r7, 0xbeef\n"
  877. "mtctr r7\n"
  878. "subi r1, r1, 64\n"
  879. "bctrl\n"
  880. "addi r1, r1, 64\n"
  881. ::
  882. );
  883. }
  884. void _asm_generic1parm_end(void) {}
  885. void _asm_generic1parm_retd(void)
  886. {
  887. __asm__(
  888. "mr r4, r3\n"
  889. "addis r3, 0, 0xdead\n"
  890. "ori r3, r3, 0xbeef\n"
  891. "addis r7, 0, 0xdead\n"
  892. "ori r7, r7, 0xbeef\n"
  893. "mtctr r7\n"
  894. "subi r1, r1, 64\n"
  895. "bctrl\n"
  896. "addi r1, r1, 64\n"
  897. "stfdu f1, 8(r16)\n"
  898. "mr r3, r16\n"
  899. ::
  900. );
  901. }
  902. void _asm_generic1parm_retd_end(void) {}
  903. void _asm_megabuf(void)
  904. {
  905. __asm__(
  906. "lfd f1, 0(r3)\n"
  907. "addis r3, 0, 0xdead\n" // set up context pointer
  908. "ori r3, r3, 0xbeef\n"
  909. "addis r4, 0, 0xdead\n"
  910. "ori r4, r4, 0xbeef\n"
  911. "lfd f2, 0(r4)\n"
  912. "fadd f1, f2, f1\n"
  913. "addis r7, 0, 0xdead\n"
  914. "ori r7, r7, 0xbeef\n"
  915. "mtctr r7\n"
  916. "fctiwz f1, f1\n"
  917. "stfd f1, 8(r16)\n"
  918. "lwz r4, 12(r16)\n"
  919. "subi r1, r1, 64\n"
  920. "bctrl\n"
  921. "addi r1, r1, 64\n"
  922. "cmpi cr0, r3, 0\n"
  923. "bne cr0, 0f\n"
  924. "sub r5, r5, r5\n"
  925. "stwu r5, 8(r16)\n"
  926. "stw r5, 4(r16)\n"
  927. "mr r3, r16\n"
  928. "0:\n"
  929. ::
  930. );
  931. }
  932. void _asm_megabuf_end(void) {}