moveframe.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609
  1. #include <windows.h>
  2. #include <math.h>
  3. #define M_PI 3.14159265358979323846
  4. extern int (*warand)(void);
  5. static int XRES=24;
  6. static int YRES=16;
  7. static int g_w,g_h;
  8. static int *m_wmul;
  9. static int *m_tab;
  10. static int __inline myftol(double d)
  11. {
  12. int a;
  13. __asm
  14. {
  15. fld d
  16. fistp a
  17. mov eax, a
  18. }
  19. }
  20. static double getvis(unsigned char *visdata, int bc, int bw, int ch, int xorv)
  21. {
  22. int x = 0;
  23. int accum = 0;
  24. if (ch && ch != 1 && ch != 2) return 0.0;
  25. if (bw < 1) bw=1;
  26. bc-=bw/2;
  27. if (bc < 0)
  28. {
  29. bw+=bc;
  30. bc=0;
  31. }
  32. if (bc > 575) bc=575;
  33. if (bc+bw > 576) bw=576-bc;
  34. if (!ch)
  35. {
  36. for (x = 0; x < bw; x ++)
  37. {
  38. accum+=(visdata[bc]^xorv)-xorv;
  39. accum+=(visdata[bc+576]^xorv)-xorv;
  40. bc++;
  41. }
  42. return (double)accum / ((double)bw*255.0);
  43. }
  44. else
  45. {
  46. if (ch == 2) visdata+=576;
  47. for (x = 0; x < bw; x ++) accum+=(visdata[bc++]^xorv)-xorv;
  48. return (double)accum / ((double)bw*127.5);
  49. }
  50. }
  51. static unsigned char *m_visdata;
  52. static double getosc(double band, double bandw)
  53. {
  54. return getvis((unsigned char *)m_visdata,myftol(band*576.0),
  55. myftol(bandw*576.0),0,128);
  56. }
  57. static double fx_gstarttime,fx_curtime;
  58. static double ef10_sc;
  59. static __inline double sign(double p)
  60. {
  61. if (p < 0.0) return -1.0;
  62. return 1.0;
  63. }
  64. static __inline double my_asin(double v)
  65. {
  66. double tmp;
  67. __asm {
  68. fld qword ptr [v]
  69. fmul qword ptr [v]
  70. fld st(0) //Duplicate X**2 on tos.
  71. fld1 //Compute 1-X**2.
  72. fsubr
  73. fdiv //Compute X**2/(1-X**2).
  74. fsqrt //Compute sqrt(x**2/(1-X**2)).
  75. fld1 //To compute full arctangent.
  76. fpatan //Compute atan of the above.
  77. fstp qword ptr [tmp]
  78. }
  79. return tmp;
  80. }
  81. //#define TEST_FX 24
  82. #define RME(n,x) case n: x break;
  83. static void __fxsfunc(int which, double &d, double &r)
  84. {
  85. double t = 0.0;
  86. switch (which)
  87. {
  88. RME(0, r+=(0.1-0.2*d)*(cos(fx_curtime)); d*=0.96;)
  89. RME(1, d*=0.99*(1.0-sin(r*3+fx_curtime*3)/32.0); r+=0.03*sin(d * M_PI * 4.0 +fx_curtime*0.5);)
  90. RME(2, d*=0.94+(cos(r*32.0)*0.06);)
  91. RME(3, d*=1.01+(cos(r*4.0)*0.04); r+=0.03*sin(d * M_PI * 4);)
  92. RME(4, r+=0.1*sin(d*M_PI*5);)
  93. RME(5, t=sin(d*M_PI); d-=8*t*t*t*t*t*0.01; )
  94. RME(6, d*=0.95+(cos(r*5.0 - M_PI/2.50)*0.03); )
  95. RME(7, r+=0.1*cos(fx_curtime); d*=0.96+cos(d*M_PI)*0.05; )
  96. RME(8, t=tan(fx_curtime*0.2);
  97. if (t < -20.0) t=-20.0;
  98. if (t > 20.0) t=20.0;
  99. r+=0.07*cos(d*M_PI)*t;
  100. )
  101. RME(9, t=d; d=d+0.05*cos(r*ef10_sc)+getosc(r,0.2)*0.5; r=r+cos(t*3.14159*ef10_sc)*0.1; )
  102. RME(10, d=atan(d); )
  103. RME(11, d=sin(d); )
  104. RME(12, r=r+sin(d*3.14159*4)*0.1; d=(0.99+0.04*cos(r*32))*d; )
  105. RME(13,d=d-0.01*(fabs(sin(d*3.14159*8))+0.1);)
  106. RME(14,d=d*(1.0+0.5*getosc(d,0.4)); )
  107. RME(15,d=0.3; )
  108. RME(16,d=0.1*cos(r*4.0+fx_curtime*1.3)+d; )
  109. RME(17,r=cos(cos(fx_curtime)*d*M_PI*17)*0.1+r; d=d*cos(r*5.0+fx_curtime)*0.1+d; )
  110. RME(18,r+=sin(r*2.0+cos(fx_curtime*0.2)*8)*0.15; d=d*0.98; )
  111. RME(19, t=sin(r-M_PI*0.5 + fx_curtime*0.3); if (t < 0.1) t=0.1; d=d-0.3*d*t; )
  112. RME(20, d=0.92*d*(1.0+0.09*sin(r)); r=r+0.1*(1.0-d)*(1.0-d)*(1.0-d); )
  113. RME(21, d=my_asin(d*0.75)*0.95/0.75;)
  114. RME(22,r=r+(0.2*sin(r*8+fx_curtime)); d=d*0.99;)
  115. RME(23,
  116. r=r+sin(r*4+cos(fx_curtime)*3)*sin(d*3.14159*7+cos(fx_curtime*0.2)*7)*0.3;
  117. d=d*(0.97+cos(r*32.7)*0.03);
  118. )
  119. RME(24,
  120. r=r+cos(fx_curtime*0.3)*0.1;
  121. t=r;
  122. if (t < 0.0) t+=3.14159;
  123. if (t > 3.14159) t-=3.14159;
  124. d=atan2(d,t/2)*0.9;
  125. )
  126. }
  127. }
  128. #define NUM_FX 25
  129. #define COMBINE_FX 4
  130. static double fx_weight;
  131. static int cur_fx[COMBINE_FX], last_fx[COMBINE_FX];
  132. static double fx_offs[2][2];
  133. static unsigned int fx_start;
  134. static unsigned int fx_end,fx_end2;
  135. static void fx_init(void)
  136. {
  137. if (GetTickCount()>=fx_end)
  138. {
  139. memcpy(last_fx,cur_fx,sizeof(last_fx));
  140. fx_start=GetTickCount();
  141. fx_end2=fx_start+400+(warand()&16383);
  142. fx_end=fx_start + 4*(fx_end2-fx_start);
  143. if (fx_end > fx_start+8000) fx_end=fx_start+8000;
  144. if (fx_end2 > fx_end) fx_end2=fx_end;
  145. int x;
  146. for (x = 0; x < COMBINE_FX; x ++)
  147. {
  148. cur_fx[x]=warand()%((NUM_FX-x) + x*x*(NUM_FX/3));
  149. if (cur_fx[x] == 15) cur_fx[x]=warand()%((NUM_FX-x) + x*x*(NUM_FX/3));
  150. }
  151. for (x = 1; x < COMBINE_FX; x ++)
  152. {
  153. if (cur_fx[x] >= NUM_FX) break;
  154. }
  155. for (; x < COMBINE_FX; x ++) cur_fx[x]=NUM_FX;
  156. #if 0
  157. static char buf[1024] = {0};
  158. wsprintf(buf,"picked: ");
  159. for (x = 0; x < COMBINE_FX; x ++)
  160. {
  161. if (cur_fx[x]>=NUM_FX) break;
  162. wsprintf(buf+strlen(buf),"%d,",cur_fx[x]);
  163. }
  164. wsprintf(buf+strlen(buf),"\n");
  165. OutputDebugString(buf);
  166. #endif
  167. // for testing
  168. #ifdef TEST_FX
  169. for (x = 1; x < COMBINE_FX; x++)
  170. {
  171. cur_fx[x]=NUM_FX;
  172. }
  173. cur_fx[0]=TEST_FX;
  174. #endif
  175. for (x = 0; x < 2; x ++)
  176. {
  177. fx_offs[1][x]=fx_offs[0][x];
  178. if (!(warand()&7))
  179. fx_offs[0][x]=((warand()%101) - 50)/750.0;
  180. else fx_offs[0][x]=0;
  181. }
  182. static int ff;
  183. if (!ff)
  184. {
  185. last_fx[0]=warand()%NUM_FX;
  186. for (x = 1; x < COMBINE_FX; x ++)
  187. {
  188. last_fx[x]=NUM_FX;
  189. }
  190. ff++;
  191. }
  192. }
  193. fx_curtime=(double)(GetTickCount()-fx_gstarttime)/1000.0;
  194. fx_weight=(double)(GetTickCount()-fx_start)/(double)(fx_end2-fx_start);
  195. if (fx_weight > 1.0) fx_weight=1.0;
  196. ef10_sc=cos(fx_curtime)*2 + 3;
  197. }
  198. static void fx_apply(double &d, double &r) // 1 if rect
  199. {
  200. double d2=d, r2=r;
  201. int x;
  202. for (x = 0; x < COMBINE_FX; x ++)
  203. {
  204. if (last_fx[x] < NUM_FX)
  205. {
  206. __fxsfunc(last_fx[x],d2,r2);
  207. }
  208. if (cur_fx[x] < NUM_FX)
  209. {
  210. __fxsfunc(cur_fx[x],d,r);
  211. }
  212. }
  213. d=d*fx_weight + d2*(1.0-fx_weight);
  214. r=r*fx_weight + r2*(1.0-fx_weight);
  215. }
  216. static int mmx_fadeval[2]={1,1};
  217. static unsigned int const mmx_blend4_revn[2]={0xff00ff,0xff00ff};
  218. static int const mmx_blend4_zero;
  219. ////// NEW FASTER (HOPEFULLY) - THANKS FOR THE IDEA RYAN! :)
  220. static int mask1[2]={0x0000ffff,0};
  221. static int mask2[2]={0xffff0000,0};
  222. static int revy[2]={0,0xff00ff};
  223. static int mask3[2]={0xffffffff,0};
  224. static int mask4[2]={0,0xffffffff};
  225. static int subma=0x000000FF;
  226. #ifdef CLOOP
  227. static __inline unsigned char FASTMMXBLEND(unsigned char *i, unsigned int w, int xp, int yp)
  228. {
  229. __asm
  230. {
  231. movd mm1, [xp]
  232. mov eax, i
  233. psrlw mm1, 8 // mm1 = -0XP
  234. mov esi, w
  235. movd mm3, [yp]
  236. punpcklwd mm1,mm1 // mm1=00XP-00XP
  237. psrlw mm3, 8 // mm3 = -0YP
  238. sub ecx, ecx
  239. movd mm2, [subma] // mm2=0000-00FF
  240. mov cl, [eax]
  241. psubw mm2, mm1 // mm2=00??-00XI
  242. mov ch, [eax+1]
  243. punpcklwd mm3,mm3 // mm3=00YP-00YP
  244. pand mm1, [mask2] // mm1=00XP-0000
  245. punpckldq mm3, mm3 //mm3=00YP-00YP-00YP-00YP
  246. shl ecx, 16
  247. movq mm4, [revy] // mm4=0000-0000-00FF-00FF
  248. pand mm2, [mask1] // mm2=0000-00XI
  249. psubw mm4, mm3 // mm4=00YI-00YI-00??-00??
  250. mov cl, [eax+esi]
  251. pand mm3, [mask3] // mm3=0000-0000-00YP-00YP
  252. por mm1, mm2 // mm1=00XP-00XI
  253. pand mm4, [mask4] // mm4=00YI-00YI-0000-0000
  254. mov ch, [eax+esi+1]
  255. por mm3, mm4 // mm3=00YP-00YP-00YI-00YI
  256. punpckldq mm1, mm1 //mm1=00XP-00XI-00XP-00XI
  257. pmullw mm1, mm3
  258. movd mm0, ecx
  259. punpcklbw mm0, [mmx_blend4_zero]
  260. psrlw mm1, 8
  261. Pmaddwd mm0, mm1
  262. // empty
  263. // stall
  264. // stall
  265. psrld mm0, 8
  266. // empty
  267. movq mm1, mm0
  268. // empty
  269. psrl mm1, 32
  270. // empty
  271. paddusb mm0, mm1
  272. // empty
  273. psubusb mm0, [mmx_fadeval]
  274. // empty
  275. movd eax, mm0
  276. }
  277. }
  278. #endif
  279. void moveframe_init(int w, int h, int divx, int divy, int fadeval)
  280. {
  281. int x = 0;
  282. XRES=divx+1;
  283. YRES=divy+1;
  284. if (XRES&1) XRES&=~1;
  285. if (YRES&1) YRES&=~1;
  286. if (XRES<2) XRES=2;
  287. if (YRES<2) YRES=2;
  288. if (XRES>128) XRES=128;
  289. if (YRES>128) YRES=128;
  290. fx_gstarttime=(double)GetTickCount();
  291. m_wmul = (int*)GlobalAlloc(GPTR,h*sizeof(int)+(XRES*YRES*2 + XRES*4 + 4)*sizeof(int));
  292. m_tab=m_wmul + h;
  293. for(x = 0; x < h; x ++)
  294. m_wmul[x]=x*w;
  295. g_w=w;
  296. g_h=h;
  297. mmx_fadeval[0]=fadeval;
  298. mmx_fadeval[1]=fadeval;
  299. }
  300. void moveframe_quit()
  301. {
  302. if (m_wmul) GlobalFree(m_wmul);
  303. m_wmul=NULL;
  304. }
  305. void moveframe(unsigned char *inptr, unsigned char *outptr, unsigned char *visdata)
  306. {
  307. m_visdata=visdata;
  308. int w=g_w;
  309. int h=g_h;
  310. int w_adj=(w-2)<<16;
  311. int h_adj=(h-2)<<16;
  312. fx_init();
  313. int x = 0;
  314. int y = 0;
  315. int *tabptr=m_tab;
  316. double xsc=2.0/w,ysc=2.0/h;
  317. double dw2=((double)w*32768.0);
  318. double dh2=((double)h*32768.0);
  319. double max_screen_d=sqrt((double)(w*w+h*h))*0.5;
  320. double divmax_d=1.0/max_screen_d;
  321. max_screen_d *= 65536.0;
  322. double xo=fx_offs[0][0]*fx_weight + fx_offs[1][0]*(1.0-fx_weight);
  323. double yo=fx_offs[0][1]*fx_weight + fx_offs[1][1]*(1.0-fx_weight);
  324. int yc_pos, yc_dpos, xc_pos, xc_dpos;
  325. yc_pos=0;
  326. xc_dpos = (w<<16)/(XRES-1);
  327. yc_dpos = (h<<16)/(YRES-1);
  328. for (y = 0; y < YRES; y ++)
  329. {
  330. xc_pos=0;
  331. for (x = 0; x < XRES; x ++)
  332. {
  333. double xd = 0, yd = 0;
  334. xd=((double)xc_pos-dw2)*(1.0/65536.0);
  335. yd=((double)yc_pos-dh2)*(1.0/65536.0);
  336. xc_pos+=xc_dpos;
  337. double var_d=sqrt(xd*xd+yd*yd)*divmax_d;
  338. double var_r=atan2(yd,xd) + M_PI*0.5;
  339. int tmp1 = 0, tmp2 = 0;
  340. fx_apply(var_d,var_r);
  341. var_d *= max_screen_d;
  342. var_r -= M_PI*0.5;
  343. tmp1=myftol(dw2*(1.0+xo) + cos(var_r) * var_d);
  344. tmp2=myftol(dh2*(1.0+yo) + sin(var_r) * var_d);
  345. if (tmp1 < 0) tmp1=0;
  346. if (tmp1 > w_adj) tmp1=w_adj;
  347. if (tmp2 < 0) tmp2=0;
  348. if (tmp2 > h_adj) tmp2=h_adj;
  349. *tabptr++ = tmp1;
  350. *tabptr++ = tmp2;
  351. }
  352. yc_pos+=yc_dpos;
  353. }
  354. // yay, the table is generated. now we do a fixed point
  355. // interpolation of the whole thing and pray.
  356. int *interptab=m_tab+XRES*YRES*2;
  357. int *rdtab=m_tab;
  358. int yseek=1;
  359. yc_pos=0;
  360. xc_dpos=(w<<16)/(XRES-1);
  361. yc_dpos=(h<<16)/(YRES-1);
  362. int lypos=0;
  363. int yl=h;
  364. while (yl>0)
  365. {
  366. yc_pos+=yc_dpos;
  367. yseek=(yc_pos>>16)-lypos;
  368. if (!yseek) goto done;
  369. lypos=yc_pos>>16;
  370. int l=XRES;
  371. int *stab=interptab;
  372. int xr3=XRES*2;
  373. while (l--)
  374. {
  375. int tmp1, tmp2;
  376. tmp1=rdtab[0];
  377. tmp2=rdtab[1];
  378. stab[0]=tmp1;
  379. stab[1]=tmp2;
  380. stab[2]=(rdtab[XRES*2]-tmp1)/yseek;
  381. stab[3]=(rdtab[XRES*2+1]-tmp2)/yseek;
  382. rdtab+=2;
  383. stab+=4;
  384. }
  385. if (yseek > yl) yseek=yl;
  386. yl-=yseek;
  387. if (yseek > 0) while (yseek--)
  388. {
  389. int d_x;
  390. int d_y;
  391. int seek;
  392. int *seektab=interptab;
  393. int xp,yp;
  394. int l=w;
  395. int lpos=0;
  396. int xc_pos=0;
  397. while (l>0)
  398. {
  399. xc_pos+=xc_dpos;
  400. seek=(xc_pos>>16)-lpos;
  401. if (!seek) goto done;
  402. lpos=xc_pos>>16;
  403. xp=seektab[0];
  404. yp=seektab[1];
  405. d_x=(seektab[4]-xp)/(seek);
  406. d_y=(seektab[5]-yp)/(seek);
  407. seektab[0] += seektab[2];
  408. seektab[1] += seektab[3];
  409. seektab+=4;
  410. if (seek>l) seek=l;
  411. l-=seek;
  412. if (seek>0)
  413. {
  414. // normal loop
  415. #ifdef CLOOP
  416. while (seek--)
  417. {
  418. *outptr++=FASTMMXBLEND(inptr+(xp>>16)+m_wmul[yp>>16],w,xp,yp);
  419. xp+=d_x; yp+=d_y;
  420. }
  421. #else
  422. __asm
  423. {
  424. mov edx, seek
  425. mov edi, outptr
  426. mov esi, w
  427. align 16
  428. myLoop1:
  429. mov eax, m_wmul
  430. mov ebx, [yp]
  431. movd mm3, ebx
  432. mov ecx, [xp]
  433. shr ebx, 16
  434. movd mm1, ecx
  435. mov eax, [eax+ebx*4];
  436. shr ecx, 16
  437. psrlw mm1, 8 // mm1 = -0XP
  438. add eax, ecx
  439. punpcklwd mm1,mm1 // mm1=00XP-00XP
  440. add eax, [inptr]
  441. psrlw mm3, 8 // mm3 = -0YP
  442. movd mm2, [subma] // mm2=0000-00FF
  443. psubw mm2, mm1 // mm2=00??-00XI
  444. mov cx, [eax]
  445. punpcklwd mm3,mm3 // mm3=00YP-00YP
  446. pand mm1, [mask2] // mm1=00XP-0000
  447. punpckldq mm3, mm3 //mm3=00YP-00YP-00YP-00YP
  448. shl ecx, 16
  449. movq mm4, [revy] // mm4=0000-0000-00FF-00FF
  450. pand mm2, [mask1] // mm2=0000-00XI
  451. mov cx, [eax+esi]
  452. por mm1, mm2 // mm1=00XP-00XI
  453. psubw mm4, mm3 // mm4=00YI-00YI-00??-00??
  454. pand mm3, [mask3] // mm3=0000-0000-00YP-00YP
  455. pand mm4, [mask4] // mm4=00YI-00YI-0000-0000
  456. por mm3, mm4 // mm3=00YP-00YP-00YI-00YI
  457. punpckldq mm1, mm1 //mm1=00XP-00XI-00XP-00XI
  458. pmullw mm1, mm3
  459. movd mm0, ecx
  460. // empty
  461. // stall
  462. punpcklbw mm0, [mmx_blend4_zero]
  463. psrlw mm1, 8
  464. Pmaddwd mm0, mm1
  465. mov eax, [xp]
  466. add eax, [d_x]
  467. mov ebx, [yp]
  468. mov [xp], eax
  469. add ebx, [d_y]
  470. psrld mm0, 8
  471. mov [yp], ebx
  472. movq mm1, mm0
  473. // empty
  474. psrl mm1, 32
  475. // empty
  476. paddusb mm0, mm1
  477. // empty
  478. psubusb mm0, [mmx_fadeval]
  479. // empty
  480. movd ecx, mm0
  481. mov [edi], cl
  482. inc edi
  483. dec edx
  484. jnz myLoop1
  485. mov outptr, edi
  486. }
  487. #endif
  488. }
  489. }
  490. // adjust final (rightmost elem) part of seektab
  491. seektab[0] += seektab[2];
  492. seektab[1] += seektab[3];
  493. }
  494. }
  495. done:
  496. __asm emms;
  497. }