1
0

r_blur.cpp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883
  1. /*
  2. LICENSE
  3. -------
  4. Copyright 2005 Nullsoft, Inc.
  5. All rights reserved.
  6. Redistribution and use in source and binary forms, with or without modification,
  7. are permitted provided that the following conditions are met:
  8. * Redistributions of source code must retain the above copyright notice,
  9. this list of conditions and the following disclaimer.
  10. * Redistributions in binary form must reproduce the above copyright notice,
  11. this list of conditions and the following disclaimer in the documentation
  12. and/or other materials provided with the distribution.
  13. * Neither the name of Nullsoft nor the names of its contributors may be used to
  14. endorse or promote products derived from this software without specific prior written permission.
  15. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
  16. IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  17. FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  18. CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  21. IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  22. OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  23. */
  24. // alphachannel safe 11/21/99
  25. #include <windows.h>
  26. #include <commctrl.h>
  27. #include "r_defs.h"
  28. #include "resource.h"
  29. #include "timing.h"
  30. #include "../Agave/Language/api_language.h"
  31. #ifndef LASER
  32. #define C_THISCLASS C_BlurClass
  33. #define MOD_NAME "Trans / Blur"
  34. static const int zero=0;
  35. class C_THISCLASS : public C_RBASE2 {
  36. protected:
  37. public:
  38. C_THISCLASS();
  39. virtual ~C_THISCLASS();
  40. virtual int render(char visdata[2][2][576], int isBeat, int *framebuffer, int *fbout, int w, int h);
  41. virtual char *get_desc() { static char desc[128]; return (!desc[0]?WASABI_API_LNGSTRING_BUF(IDS_TRANS_BLUR,desc,128):desc); }
  42. virtual HWND conf(HINSTANCE hInstance, HWND hwndParent);
  43. virtual void load_config(unsigned char *data, int len);
  44. virtual int save_config(unsigned char *data);
  45. virtual int smp_getflags() { return 1; }
  46. virtual int smp_begin(int max_threads, char visdata[2][2][576], int isBeat, int *framebuffer, int *fbout, int w, int h);
  47. virtual void smp_render(int this_thread, int max_threads, char visdata[2][2][576], int isBeat, int *framebuffer, int *fbout, int w, int h);
  48. virtual int smp_finish(char visdata[2][2][576], int isBeat, int *framebuffer, int *fbout, int w, int h); // return value is that of render() for fbstuff etc
  49. int enabled;
  50. int roundmode;
  51. };
  52. #define PUT_INT(y) data[pos]=(y)&255; data[pos+1]=(y>>8)&255; data[pos+2]=(y>>16)&255; data[pos+3]=(y>>24)&255
  53. #define GET_INT() (data[pos]|(data[pos+1]<<8)|(data[pos+2]<<16)|(data[pos+3]<<24))
  54. void C_THISCLASS::load_config(unsigned char *data, int len)
  55. {
  56. int pos=0;
  57. if (len-pos >= 4) { enabled=GET_INT(); pos+=4; }
  58. if (len-pos >= 4) { roundmode=GET_INT(); pos+=4; }
  59. else roundmode=0;
  60. }
  61. int C_THISCLASS::save_config(unsigned char *data)
  62. {
  63. int pos=0;
  64. PUT_INT(enabled); pos+=4;
  65. PUT_INT(roundmode); pos+=4;
  66. return pos;
  67. }
  68. C_THISCLASS::C_THISCLASS()
  69. {
  70. roundmode=0;
  71. enabled=1;
  72. }
  73. C_THISCLASS::~C_THISCLASS()
  74. {
  75. }
  76. #define MASK_SH1 (~(((1<<7)|(1<<15)|(1<<23))<<1))
  77. #define MASK_SH2 (~(((3<<6)|(3<<14)|(3<<22))<<2))
  78. #define MASK_SH3 (~(((7<<5)|(7<<13)|(7<<21))<<3))
  79. #define MASK_SH4 (~(((15<<4)|(15<<12)|(15<<20))<<4))
  80. static unsigned int mmx_mask1[2]={MASK_SH1,MASK_SH1};
  81. static unsigned int mmx_mask2[2]={MASK_SH2,MASK_SH2};
  82. static unsigned int mmx_mask3[2]={MASK_SH3,MASK_SH3};
  83. static unsigned int mmx_mask4[2]={MASK_SH4,MASK_SH4};
  84. #define DIV_2(x) ((( x ) & MASK_SH1)>>1)
  85. #define DIV_4(x) ((( x ) & MASK_SH2)>>2)
  86. #define DIV_8(x) ((( x ) & MASK_SH3)>>3)
  87. #define DIV_16(x) ((( x ) & MASK_SH4)>>4)
  88. void C_THISCLASS::smp_render(int this_thread, int max_threads, char visdata[2][2][576], int isBeat, int *framebuffer, int *fbout, int w, int h)
  89. {
  90. if (!enabled) return;
  91. timingEnter(0);
  92. unsigned int *f = (unsigned int *) framebuffer;
  93. unsigned int *of = (unsigned int *) fbout;
  94. if (max_threads < 1) max_threads=1;
  95. int start_l = ( this_thread * h ) / max_threads;
  96. int end_l;
  97. if (this_thread >= max_threads - 1) end_l = h;
  98. else end_l = ( (this_thread+1) * h ) / max_threads;
  99. int outh=end_l-start_l;
  100. if (outh<1) return;
  101. int skip_pix=start_l*w;
  102. f += skip_pix;
  103. of+= skip_pix;
  104. int at_top=0, at_bottom=0;
  105. if (!this_thread) at_top=1;
  106. if (this_thread >= max_threads - 1) at_bottom=1;
  107. if (enabled == 2)
  108. {
  109. // top line
  110. if (at_top)
  111. {
  112. unsigned int *f2=f+w;
  113. int x;
  114. int adj_tl=0, adj_tl2=0;
  115. if (roundmode) { adj_tl = 0x03030303; adj_tl2 = 0x04040404; }
  116. // top left
  117. *of++=DIV_2(f[0])+DIV_4(f[0])+DIV_8(f[1])+DIV_8(f2[0]) + adj_tl; f++; f2++;
  118. // top center
  119. x=(w-2)/4;
  120. while (x--)
  121. {
  122. of[0]=DIV_2(f[0]) + DIV_8(f[0]) + DIV_8(f[1]) + DIV_8(f[-1]) + DIV_8(f2[0]) + adj_tl2;
  123. of[1]=DIV_2(f[1]) + DIV_8(f[1]) + DIV_8(f[2]) + DIV_8(f[0]) + DIV_8(f2[1]) + adj_tl2;
  124. of[2]=DIV_2(f[2]) + DIV_8(f[2]) + DIV_8(f[3]) + DIV_8(f[1]) + DIV_8(f2[2]) + adj_tl2;
  125. of[3]=DIV_2(f[3]) + DIV_8(f[3]) + DIV_8(f[4]) + DIV_8(f[2]) + DIV_8(f2[3]) + adj_tl2;
  126. f+=4;
  127. f2+=4;
  128. of+=4;
  129. }
  130. x=(w-2)&3;
  131. while (x--)
  132. {
  133. *of++=DIV_2(f[0]) + DIV_8(f[0]) + DIV_8(f[1]) + DIV_8(f[-1]) + DIV_8(f2[0]) + adj_tl2;
  134. f++;
  135. f2++;
  136. }
  137. // top right
  138. *of++=DIV_2(f[0])+DIV_4(f[0]) + DIV_8(f[-1])+DIV_8(f2[0]) + adj_tl; f++; f2++;
  139. }
  140. // middle block
  141. {
  142. int y=outh-at_top-at_bottom;
  143. unsigned int adj_tl1=0,adj_tl2=0;
  144. unsigned __int64 adj2=0;
  145. if (roundmode) { adj_tl1=0x04040404; adj_tl2=0x05050505; adj2=0x0505050505050505i64; }
  146. while (y--)
  147. {
  148. int x;
  149. unsigned int *f2=f+w;
  150. unsigned int *f3=f-w;
  151. // left edge
  152. *of++=DIV_2(f[0])+DIV_8(f[0])+DIV_8(f[1])+DIV_8(f2[0])+DIV_8(f3[0])+adj_tl1; f++; f2++; f3++;
  153. // middle of line
  154. #ifdef NO_MMX
  155. x=(w-2)/4;
  156. if (roundmode)
  157. {
  158. while (x--)
  159. {
  160. of[0]=DIV_2(f[0]) + DIV_4(f[0]) + DIV_16(f[1]) + DIV_16(f[-1]) + DIV_16(f2[0]) + DIV_16(f3[0]) + 0x05050505;
  161. of[1]=DIV_2(f[1]) + DIV_4(f[1]) + DIV_16(f[2]) + DIV_16(f[0]) + DIV_16(f2[1]) + DIV_16(f3[1]) + 0x05050505;
  162. of[2]=DIV_2(f[2]) + DIV_4(f[2]) + DIV_16(f[3]) + DIV_16(f[1]) + DIV_16(f2[2]) + DIV_16(f3[2]) + 0x05050505;
  163. of[3]=DIV_2(f[3]) + DIV_4(f[3]) + DIV_16(f[4]) + DIV_16(f[2]) + DIV_16(f2[3]) + DIV_16(f3[3]) + 0x05050505;
  164. f+=4;
  165. f2+=4;
  166. f3+=4;
  167. of+=4;
  168. }
  169. }
  170. else
  171. {
  172. while (x--)
  173. {
  174. of[0]=DIV_2(f[0]) + DIV_4(f[0]) + DIV_16(f[1]) + DIV_16(f[-1]) + DIV_16(f2[0]) + DIV_16(f3[0]);
  175. of[1]=DIV_2(f[1]) + DIV_4(f[1]) + DIV_16(f[2]) + DIV_16(f[0]) + DIV_16(f2[1]) + DIV_16(f3[1]);
  176. of[2]=DIV_2(f[2]) + DIV_4(f[2]) + DIV_16(f[3]) + DIV_16(f[1]) + DIV_16(f2[2]) + DIV_16(f3[2]);
  177. of[3]=DIV_2(f[3]) + DIV_4(f[3]) + DIV_16(f[4]) + DIV_16(f[2]) + DIV_16(f2[3]) + DIV_16(f3[3]);
  178. f+=4;
  179. f2+=4;
  180. f3+=4;
  181. of+=4;
  182. }
  183. }
  184. #else
  185. {
  186. __asm
  187. {
  188. mov ecx, w
  189. mov edx, ecx
  190. mov ebx, edx
  191. neg ebx
  192. mov esi, f
  193. mov edi, of
  194. sub ecx, 2
  195. shr ecx, 2
  196. movq mm1, [esi-4]
  197. align 16
  198. mmx_light_blur_loop:
  199. movq mm0, [esi]
  200. movq mm2, [esi+4]
  201. pand mm0, mmx_mask1
  202. movq mm5, mm2
  203. psrl mm0, 1
  204. movq mm7, [esi+8]
  205. movq mm4, mm0
  206. pand mm1, mmx_mask4
  207. pand mm4, mmx_mask1
  208. movq mm3, [esi+edx*4]
  209. psrl mm4, 1
  210. paddb mm0, mm4
  211. pand mm2, mmx_mask4
  212. movq mm4, [esi+ebx*4]
  213. pand mm3, mmx_mask4
  214. pand mm4, mmx_mask4
  215. psrl mm1, 4
  216. pand mm7, mmx_mask1
  217. movq mm6, [esi+12]
  218. psrl mm2, 4
  219. add esi, 16
  220. psrl mm3, 4
  221. paddb mm0, mm1
  222. psrl mm4, 4
  223. movq mm1, mm6
  224. psrl mm7, 1
  225. paddb mm2, mm3
  226. paddb mm0, mm4
  227. movq mm3, [esi+edx*4-8]
  228. paddb mm0, mm2
  229. movq mm4, [esi+ebx*4-8]
  230. paddb mm0, [adj2]
  231. pand mm6, mmx_mask4
  232. movq [edi],mm0
  233. pand mm5, mmx_mask4
  234. movq mm0, mm7
  235. pand mm3, mmx_mask4
  236. psrl mm6, 4
  237. pand mm0, mmx_mask1
  238. pand mm4, mmx_mask4
  239. psrl mm5, 4
  240. psrl mm0, 1
  241. paddb mm7, mm6
  242. paddb mm7, mm0
  243. add edi, 16
  244. psrl mm3, 4
  245. psrl mm4, 4
  246. paddb mm5, mm3
  247. paddb mm7, mm4
  248. dec ecx
  249. paddb mm7, mm5
  250. paddb mm7, [adj2]
  251. movq [edi-8],mm7
  252. jnz mmx_light_blur_loop
  253. mov of, edi
  254. mov f, esi
  255. };
  256. f2=f+w; // update these bitches
  257. f3=f-w;
  258. }
  259. #endif
  260. x=(w-2)&3;
  261. while (x--)
  262. {
  263. *of++=DIV_2(f[0]) + DIV_4(f[0]) + DIV_16(f[1]) + DIV_16(f[-1]) + DIV_16(f2[0]) + DIV_16(f3[0]) + adj_tl2;
  264. f++;
  265. f2++;
  266. f3++;
  267. }
  268. // right block
  269. *of++=DIV_2(f[0])+DIV_8(f[0])+DIV_8(f[-1])+DIV_8(f2[0])+DIV_8(f3[0])+adj_tl1; f++;
  270. }
  271. }
  272. // bottom block
  273. if (at_bottom)
  274. {
  275. unsigned int *f2=f-w;
  276. int x;
  277. int adj_tl=0, adj_tl2=0;
  278. if (roundmode) { adj_tl = 0x03030303; adj_tl2 = 0x04040404; }
  279. // bottom left
  280. *of++=DIV_2(f[0])+DIV_4(f[0])+DIV_8(f[1])+DIV_8(f2[0]) + adj_tl; f++; f2++;
  281. // bottom center
  282. x=(w-2)/4;
  283. while (x--)
  284. {
  285. of[0]=DIV_2(f[0]) + DIV_8(f[0]) + DIV_8(f[1]) + DIV_8(f[-1]) + DIV_8(f2[0]) + adj_tl2;
  286. of[1]=DIV_2(f[1]) + DIV_8(f[1]) + DIV_8(f[2]) + DIV_8(f[0]) + DIV_8(f2[1]) + adj_tl2;
  287. of[2]=DIV_2(f[2]) + DIV_8(f[2]) + DIV_8(f[3]) + DIV_8(f[1]) + DIV_8(f2[2])+adj_tl2;
  288. of[3]=DIV_2(f[3]) + DIV_8(f[3]) + DIV_8(f[4]) + DIV_8(f[2]) + DIV_8(f2[3])+adj_tl2;
  289. f+=4;
  290. f2+=4;
  291. of+=4;
  292. }
  293. x=(w-2)&3;
  294. while (x--)
  295. {
  296. *of++=DIV_2(f[0]) + DIV_8(f[0]) + DIV_8(f[1]) + DIV_8(f[-1]) + DIV_8(f2[0])+adj_tl2;
  297. f++;
  298. f2++;
  299. }
  300. // bottom right
  301. *of++=DIV_2(f[0])+DIV_4(f[0]) + DIV_8(f[-1])+DIV_8(f2[0])+adj_tl; f++; f2++;
  302. }
  303. }
  304. else if (enabled == 3) // more blur
  305. {
  306. // top line
  307. if (at_top) {
  308. unsigned int *f2=f+w;
  309. int x;
  310. int adj_tl=0, adj_tl2=0;
  311. if (roundmode) { adj_tl = 0x02020202; adj_tl2 = 0x01010101; }
  312. // top left
  313. *of++=DIV_2(f[1])+DIV_2(f2[0]) + adj_tl2; f++; f2++;
  314. // top center
  315. x=(w-2)/4;
  316. while (x--)
  317. {
  318. of[0]=DIV_4(f[1]) + DIV_4(f[-1]) + DIV_2(f2[0]) + adj_tl;
  319. of[1]=DIV_4(f[2]) + DIV_4(f[0]) + DIV_2(f2[1]) +adj_tl;
  320. of[2]=DIV_4(f[3]) + DIV_4(f[1]) + DIV_2(f2[2]) + adj_tl;
  321. of[3]=DIV_4(f[4]) + DIV_4(f[2]) + DIV_2(f2[3]) + adj_tl;
  322. f+=4;
  323. f2+=4;
  324. of+=4;
  325. }
  326. x=(w-2)&3;
  327. while (x--)
  328. {
  329. *of++=DIV_4(f[1]) + DIV_4(f[-1]) + DIV_2(f2[0])+adj_tl;
  330. f++;
  331. f2++;
  332. }
  333. // top right
  334. *of++=DIV_2(f[-1])+DIV_2(f2[0])+adj_tl2; f++; f2++;
  335. }
  336. // middle block
  337. {
  338. int y=outh-at_top-at_bottom;
  339. int adj_tl1=0,adj_tl2=0;
  340. unsigned __int64 adj2=0;
  341. if (roundmode) { adj_tl1=0x02020202; adj_tl2=0x03030303; adj2=0x0303030303030303i64; }
  342. while (y--)
  343. {
  344. int x;
  345. unsigned int *f2=f+w;
  346. unsigned int *f3=f-w;
  347. // left edge
  348. *of++=DIV_2(f[1])+DIV_4(f2[0])+DIV_4(f3[0]) + adj_tl1; f++; f2++; f3++;
  349. // middle of line
  350. #ifdef NO_MMX
  351. x=(w-2)/4;
  352. if (roundmode)
  353. {
  354. while (x--)
  355. {
  356. of[0]=DIV_4(f[1]) + DIV_4(f[-1]) + DIV_4(f2[0]) + DIV_4(f3[0]) + 0x03030303;
  357. of[1]=DIV_4(f[2]) + DIV_4(f[0]) + DIV_4(f2[1]) + DIV_4(f3[1]) + 0x03030303;
  358. of[2]=DIV_4(f[3]) + DIV_4(f[1]) + DIV_4(f2[2]) + DIV_4(f3[2]) + 0x03030303;
  359. of[3]=DIV_4(f[4]) + DIV_4(f[2]) + DIV_4(f2[3]) + DIV_4(f3[3]) + 0x03030303;
  360. f+=4; f2+=4; f3+=4; of+=4;
  361. }
  362. }
  363. else
  364. {
  365. while (x--)
  366. {
  367. of[0]=DIV_4(f[1]) + DIV_4(f[-1]) + DIV_4(f2[0]) + DIV_4(f3[0]);
  368. of[1]=DIV_4(f[2]) + DIV_4(f[0]) + DIV_4(f2[1]) + DIV_4(f3[1]);
  369. of[2]=DIV_4(f[3]) + DIV_4(f[1]) + DIV_4(f2[2]) + DIV_4(f3[2]);
  370. of[3]=DIV_4(f[4]) + DIV_4(f[2]) + DIV_4(f2[3]) + DIV_4(f3[3]);
  371. f+=4; f2+=4; f3+=4; of+=4;
  372. }
  373. }
  374. #else
  375. {
  376. __asm
  377. {
  378. mov ecx, w
  379. mov edx, ecx
  380. mov ebx, edx
  381. neg ebx
  382. mov esi, f
  383. mov edi, of
  384. sub ecx, 2
  385. shr ecx, 2
  386. movq mm1, [esi-4]
  387. align 16
  388. mmx_heavy_blur_loop:
  389. movq mm2, [esi+4]
  390. pxor mm0, mm0
  391. movq mm5, mm2
  392. pxor mm7, mm7
  393. movq mm3, [esi+edx*4]
  394. pand mm1, mmx_mask2
  395. movq mm4, [esi+ebx*4]
  396. pand mm2, mmx_mask2
  397. pand mm3, mmx_mask2
  398. pand mm4, mmx_mask2
  399. psrl mm1, 2
  400. movq mm6, [esi+12]
  401. psrl mm2, 2
  402. psrl mm3, 2
  403. paddb mm0, mm1
  404. psrl mm4, 2
  405. movq mm1, mm6
  406. paddb mm2, mm3
  407. paddb mm0, mm4
  408. movq mm3, [esi+edx*4+8]
  409. paddb mm0, mm2
  410. movq mm4, [esi+ebx*4+8]
  411. paddb mm0, [adj2]
  412. pand mm6, mmx_mask2
  413. movq [edi],mm0
  414. pand mm5, mmx_mask2
  415. pand mm3, mmx_mask2
  416. add esi, 16
  417. psrl mm6, 2
  418. pand mm4, mmx_mask2
  419. psrl mm5, 2
  420. paddb mm7, mm6
  421. psrl mm3, 2
  422. add edi, 16
  423. psrl mm4, 2
  424. paddb mm5, mm3
  425. paddb mm7, mm4
  426. paddb mm7, mm5
  427. paddb mm7, [adj2]
  428. movq [edi-8],mm7
  429. dec ecx
  430. jnz mmx_heavy_blur_loop
  431. mov of, edi
  432. mov f, esi
  433. };
  434. f2=f+w; // update these bitches
  435. f3=f-w;
  436. }
  437. #endif
  438. x=(w-2)&3;
  439. while (x--)
  440. {
  441. *of++=DIV_4(f[1]) + DIV_4(f[-1]) + DIV_4(f2[0]) + DIV_4(f3[0]) + adj_tl2;
  442. f++;
  443. f2++;
  444. f3++;
  445. }
  446. // right block
  447. *of++=DIV_2(f[-1])+DIV_4(f2[0])+DIV_4(f3[0]) + adj_tl1; f++;
  448. }
  449. }
  450. // bottom block
  451. if (at_bottom)
  452. {
  453. unsigned int *f2=f-w;
  454. int x;
  455. int adj_tl=0, adj_tl2=0;
  456. if (roundmode) { adj_tl = 0x02020202; adj_tl2 = 0x01010101; }
  457. // bottom left
  458. *of++=DIV_2(f[1])+DIV_2(f2[0]) + adj_tl2; f++; f2++;
  459. // bottom center
  460. x=(w-2)/4;
  461. while (x--)
  462. {
  463. of[0]=DIV_4(f[1]) + DIV_4(f[-1]) + DIV_2(f2[0])+adj_tl;
  464. of[1]=DIV_4(f[2]) + DIV_4(f[0]) + DIV_2(f2[1])+adj_tl;
  465. of[2]=DIV_4(f[3]) + DIV_4(f[1]) + DIV_2(f2[2])+adj_tl;
  466. of[3]=DIV_4(f[4]) + DIV_4(f[2]) + DIV_2(f2[3])+adj_tl;
  467. f+=4;
  468. f2+=4;
  469. of+=4;
  470. }
  471. x=(w-2)&3;
  472. while (x--)
  473. {
  474. *of++=DIV_4(f[1]) + DIV_4(f[-1]) + DIV_2(f2[0])+adj_tl;
  475. f++;
  476. f2++;
  477. }
  478. // bottom right
  479. *of++=DIV_2(f[-1])+DIV_2(f2[0])+adj_tl2; f++; f2++;
  480. }
  481. }
  482. else
  483. {
  484. // top line
  485. if (at_top)
  486. {
  487. unsigned int *f2=f+w;
  488. int x;
  489. int adj_tl=0, adj_tl2=0;
  490. if (roundmode) { adj_tl = 0x02020202; adj_tl2 = 0x03030303; }
  491. // top left
  492. *of++=DIV_2(f[0])+DIV_4(f[1])+DIV_4(f2[0]) + adj_tl; f++; f2++;
  493. // top center
  494. x=(w-2)/4;
  495. while (x--)
  496. {
  497. of[0]=DIV_4(f[0]) + DIV_4(f[1]) + DIV_4(f[-1]) + DIV_4(f2[0]) + adj_tl2;
  498. of[1]=DIV_4(f[1]) + DIV_4(f[2]) + DIV_4(f[0]) + DIV_4(f2[1]) + adj_tl2;
  499. of[2]=DIV_4(f[2]) + DIV_4(f[3]) + DIV_4(f[1]) + DIV_4(f2[2]) + adj_tl2;
  500. of[3]=DIV_4(f[3]) + DIV_4(f[4]) + DIV_4(f[2]) + DIV_4(f2[3]) + adj_tl2;
  501. f+=4;
  502. f2+=4;
  503. of+=4;
  504. }
  505. x=(w-2)&3;
  506. while (x--)
  507. {
  508. *of++=DIV_4(f[0]) + DIV_4(f[1]) + DIV_4(f[-1]) + DIV_4(f2[0]) + adj_tl2;
  509. f++;
  510. f2++;
  511. }
  512. // top right
  513. *of++=DIV_2(f[0])+DIV_4(f[-1])+DIV_4(f2[0]) + adj_tl; f++; f2++;
  514. }
  515. // middle block
  516. {
  517. int y=outh-at_top-at_bottom;
  518. int adj_tl1=0,adj_tl2=0;
  519. unsigned __int64 adj2=0;
  520. if (roundmode) { adj_tl1=0x03030303; adj_tl2=0x04040404; adj2=0x0404040404040404i64; }
  521. while (y--)
  522. {
  523. int x;
  524. unsigned int *f2=f+w;
  525. unsigned int *f3=f-w;
  526. // left edge
  527. *of++=DIV_4(f[0])+DIV_4(f[1])+DIV_4(f2[0])+DIV_4(f3[0])+adj_tl1; f++; f2++; f3++;
  528. // middle of line
  529. #ifdef NO_MMX
  530. x=(w-2)/4;
  531. if (roundmode)
  532. {
  533. while (x--)
  534. {
  535. of[0]=DIV_2(f[0]) + DIV_8(f[1]) + DIV_8(f[-1]) + DIV_8(f2[0]) + DIV_8(f3[0]) + 0x04040404;
  536. of[1]=DIV_2(f[1]) + DIV_8(f[2]) + DIV_8(f[0]) + DIV_8(f2[1]) + DIV_8(f3[1]) + 0x04040404;
  537. of[2]=DIV_2(f[2]) + DIV_8(f[3]) + DIV_8(f[1]) + DIV_8(f2[2]) + DIV_8(f3[2]) + 0x04040404;
  538. of[3]=DIV_2(f[3]) + DIV_8(f[4]) + DIV_8(f[2]) + DIV_8(f2[3]) + DIV_8(f3[3]) + 0x04040404;
  539. f+=4; f2+=4; f3+=4; of+=4;
  540. }
  541. }
  542. else
  543. {
  544. while (x--)
  545. {
  546. of[0]=DIV_2(f[0]) + DIV_8(f[1]) + DIV_8(f[-1]) + DIV_8(f2[0]) + DIV_8(f3[0]);
  547. of[1]=DIV_2(f[1]) + DIV_8(f[2]) + DIV_8(f[0]) + DIV_8(f2[1]) + DIV_8(f3[1]);
  548. of[2]=DIV_2(f[2]) + DIV_8(f[3]) + DIV_8(f[1]) + DIV_8(f2[2]) + DIV_8(f3[2]);
  549. of[3]=DIV_2(f[3]) + DIV_8(f[4]) + DIV_8(f[2]) + DIV_8(f2[3]) + DIV_8(f3[3]);
  550. f+=4; f2+=4; f3+=4; of+=4;
  551. }
  552. }
  553. #else
  554. {
  555. __asm
  556. {
  557. mov ecx, w
  558. mov edx, ecx
  559. mov ebx, edx
  560. neg ebx
  561. mov esi, f
  562. mov edi, of
  563. sub ecx, 2
  564. shr ecx, 2
  565. movq mm1, [esi-4]
  566. align 16
  567. mmx_normal_blur_loop:
  568. movq mm0, [esi]
  569. movq mm2, [esi+4]
  570. pand mm0, mmx_mask1
  571. movq mm5, mm2
  572. movq mm7, [esi+8]
  573. pand mm1, mmx_mask3
  574. movq mm3, [esi+edx*4]
  575. pand mm2, mmx_mask3
  576. movq mm4, [esi+ebx*4]
  577. pand mm3, mmx_mask3
  578. psrl mm0, 1
  579. pand mm4, mmx_mask3
  580. psrl mm1, 3
  581. pand mm7, mmx_mask1
  582. movq mm6, [esi+12]
  583. psrl mm2, 3
  584. add esi, 16
  585. psrl mm3, 3
  586. paddb mm0, mm1
  587. psrl mm4, 3
  588. movq mm1, mm6
  589. paddb mm2, mm3
  590. paddb mm0, mm4
  591. movq mm3, [esi+edx*4-8]
  592. paddb mm0, mm2
  593. movq mm4, [esi+ebx*4-8]
  594. paddb mm0, [adj2]
  595. pand mm6, mmx_mask3
  596. movq [edi],mm0
  597. pand mm5, mmx_mask3
  598. psrl mm7, 1
  599. pand mm3, mmx_mask3
  600. psrl mm6, 3
  601. pand mm4, mmx_mask3
  602. psrl mm5, 3
  603. paddb mm7, mm6
  604. add edi, 16
  605. psrl mm3, 3
  606. psrl mm4, 3
  607. paddb mm5, mm3
  608. paddb mm7, mm4
  609. dec ecx
  610. paddb mm7, mm5
  611. paddb mm7, [adj2]
  612. movq [edi-8],mm7
  613. jnz mmx_normal_blur_loop
  614. mov of, edi
  615. mov f, esi
  616. };
  617. f2=f+w; // update these bitches
  618. f3=f-w;
  619. }
  620. #endif
  621. x=(w-2)&3;
  622. while (x--)
  623. {
  624. *of++=DIV_2(f[0]) + DIV_8(f[1]) + DIV_8(f[-1]) + DIV_8(f2[0]) + DIV_8(f3[0]) + adj_tl2;
  625. f++;
  626. f2++;
  627. f3++;
  628. }
  629. // right block
  630. *of++=DIV_4(f[0])+DIV_4(f[-1])+DIV_4(f2[0])+DIV_4(f3[0]) + adj_tl1; f++;
  631. }
  632. }
  633. // bottom block
  634. if (at_bottom)
  635. {
  636. unsigned int *f2=f-w;
  637. int adj_tl=0, adj_tl2=0;
  638. if (roundmode) { adj_tl = 0x02020202; adj_tl2 = 0x03030303; }
  639. int x;
  640. // bottom left
  641. *of++=DIV_2(f[0])+DIV_4(f[1])+DIV_4(f2[0]) + adj_tl; f++; f2++;
  642. // bottom center
  643. x=(w-2)/4;
  644. while (x--)
  645. {
  646. of[0]=DIV_4(f[0]) + DIV_4(f[1]) + DIV_4(f[-1]) + DIV_4(f2[0]) + adj_tl2;
  647. of[1]=DIV_4(f[1]) + DIV_4(f[2]) + DIV_4(f[0]) + DIV_4(f2[1]) + adj_tl2;
  648. of[2]=DIV_4(f[2]) + DIV_4(f[3]) + DIV_4(f[1]) + DIV_4(f2[2]) + adj_tl2;
  649. of[3]=DIV_4(f[3]) + DIV_4(f[4]) + DIV_4(f[2]) + DIV_4(f2[3]) + adj_tl2;
  650. f+=4;
  651. f2+=4;
  652. of+=4;
  653. }
  654. x=(w-2)&3;
  655. while (x--)
  656. {
  657. *of++=DIV_4(f[0]) + DIV_4(f[1]) + DIV_4(f[-1]) + DIV_4(f2[0]) + adj_tl2;
  658. f++;
  659. f2++;
  660. }
  661. // bottom right
  662. *of++=DIV_2(f[0])+DIV_4(f[-1])+DIV_4(f2[0]) + adj_tl; f++; f2++;
  663. }
  664. }
  665. #ifndef NO_MMX
  666. __asm emms;
  667. #endif
  668. timingLeave(0);
  669. }
  670. int C_THISCLASS::smp_begin(int max_threads, char visdata[2][2][576], int isBeat, int *framebuffer, int *fbout, int w, int h)
  671. {
  672. if (!enabled) return 0;
  673. return max_threads;
  674. }
  675. int C_THISCLASS::smp_finish(char visdata[2][2][576], int isBeat, int *framebuffer, int *fbout, int w, int h) // return value is that of render() for fbstuff etc
  676. {
  677. return !!enabled;
  678. }
  679. int C_THISCLASS::render(char visdata[2][2][576], int isBeat, int *framebuffer, int *fbout, int w, int h)
  680. {
  681. smp_begin(1,visdata,isBeat,framebuffer,fbout,w,h);
  682. if (isBeat & 0x80000000) return 0;
  683. smp_render(0,1,visdata,isBeat,framebuffer,fbout,w,h);
  684. return smp_finish(visdata,isBeat,framebuffer,fbout,w,h);
  685. }
  686. C_RBASE *R_Blur(char *desc)
  687. {
  688. if (desc) { strcpy(desc,MOD_NAME); return NULL; }
  689. return (C_RBASE *) new C_THISCLASS();
  690. }
  691. static C_THISCLASS *g_this;
  692. static BOOL CALLBACK g_DlgProc(HWND hwndDlg, UINT uMsg, WPARAM wParam,LPARAM lParam)
  693. {
  694. switch (uMsg)
  695. {
  696. case WM_INITDIALOG:
  697. if (g_this->enabled==2) CheckDlgButton(hwndDlg,IDC_RADIO3,BST_CHECKED);
  698. else if (g_this->enabled==3) CheckDlgButton(hwndDlg,IDC_RADIO4,BST_CHECKED);
  699. else if (g_this->enabled) CheckDlgButton(hwndDlg,IDC_RADIO2,BST_CHECKED);
  700. else CheckDlgButton(hwndDlg,IDC_RADIO1,BST_CHECKED);
  701. if (g_this->roundmode==0) CheckDlgButton(hwndDlg,IDC_ROUNDDOWN,BST_CHECKED);
  702. else CheckDlgButton(hwndDlg,IDC_ROUNDUP,BST_CHECKED);
  703. return 1;
  704. case WM_COMMAND:
  705. if (LOWORD(wParam) == IDC_RADIO1)
  706. if (IsDlgButtonChecked(hwndDlg,IDC_RADIO1))
  707. g_this->enabled=0;
  708. if (LOWORD(wParam) == IDC_RADIO2)
  709. if (IsDlgButtonChecked(hwndDlg,IDC_RADIO2))
  710. g_this->enabled=1;
  711. if (LOWORD(wParam) == IDC_RADIO3)
  712. if (IsDlgButtonChecked(hwndDlg,IDC_RADIO3))
  713. g_this->enabled=2;
  714. if (LOWORD(wParam) == IDC_RADIO4)
  715. if (IsDlgButtonChecked(hwndDlg,IDC_RADIO4))
  716. g_this->enabled=3;
  717. if (LOWORD(wParam) == IDC_ROUNDUP)
  718. if (IsDlgButtonChecked(hwndDlg,IDC_ROUNDUP))
  719. g_this->roundmode=1;
  720. if (LOWORD(wParam) == IDC_ROUNDDOWN)
  721. if (IsDlgButtonChecked(hwndDlg,IDC_ROUNDDOWN))
  722. g_this->roundmode=0;
  723. return 0;
  724. return 0;
  725. }
  726. return 0;
  727. }
  728. HWND C_THISCLASS::conf(HINSTANCE hInstance, HWND hwndParent)
  729. {
  730. g_this = this;
  731. return WASABI_API_CREATEDIALOG(IDD_CFG_BLUR,hwndParent,g_DlgProc);
  732. }
  733. #else
  734. C_RBASE *R_Blur(char *desc) { return NULL; }
  735. #endif