123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542 |
- #ifndef _BLENDING_H_
- #define _BLENDING_H_
- //#include <bfc/common.h>
- #include <bfc/platform/types.h>
- #include <tataki/export.h>
- #ifdef _WIN64
- #define NO_MMX
- #endif
- class TATAKIAPI Blenders
- {
- public:
- static void init();
- static unsigned int inline BLEND_ADJ1(unsigned int a, unsigned int b, int alpha);
- static unsigned int inline BLEND_ADJ2(unsigned int a, unsigned int b);
- static unsigned int inline BLEND_ADJ3(unsigned int a, unsigned int b, int alpha);
- static unsigned int inline BLEND_MUL(unsigned int a, int v);
- static unsigned int inline BLEND_AVG(unsigned int a, unsigned int b);
- static unsigned int inline BLEND4(unsigned int *p1, unsigned int w, int xp, int yp);
- #ifndef NO_MMX
- static int inline MMX_AVAILABLE() { return mmx_available; }
- static unsigned int inline BLEND_ADJ1_MMX(unsigned int a, unsigned int b, int alpha);
- static unsigned int inline BLEND_ADJ2_MMX(unsigned int a, unsigned int b);
- static unsigned int inline BLEND_ADJ3_MMX(unsigned int a, unsigned int b, int alpha);
- static unsigned int inline BLEND_MUL_MMX(unsigned int a, int v);
- static unsigned int inline BLEND_AVG_MMX(unsigned int a, unsigned int b);
- static unsigned int inline BLEND4_MMX(unsigned int *p1, unsigned int w, int xp, int yp);
- static void inline BLEND_MMX_END()
- {
- #if defined(WIN32) && !defined(_WIN64)
- if (mmx_available) __asm emms;
- #endif
- #ifdef LINUX
- if (mmx_available) __asm__ volatile ( "emms" : : );
- #endif
- }
- #endif
- //private:
- static uint8_t alphatable[256][256];
- #ifndef NO_MMX
- static int mmx_available;
- #endif
- };
- // NON MMX
- // average blend of a and b.
- unsigned int inline Blenders::BLEND_AVG(unsigned int a, unsigned int b)
- {
- return ((a >> 1)&~((1 << 7) | (1 << 15) | (1 << 23))) + ((b >> 1)&~((1 << 7) | (1 << 15) | (1 << 23)));
- }
- // multiplies 32 bit color A by scalar V (0-255)
- unsigned int inline Blenders::BLEND_MUL(unsigned int a, int v)
- {
- register int t;
- t = Blenders::alphatable[a & 0xFF][v];
- t |= Blenders::alphatable[(a & 0xFF00) >> 8][v] << 8;
- t |= Blenders::alphatable[(a & 0xFF0000) >> 16][v] << 16;
- t |= Blenders::alphatable[(a & 0xFF000000) >> 24][v] << 24;
- return t;
- }
- // V is scalar (0-255), (1.0-V)*b + V*a
- unsigned int inline Blenders::BLEND_ADJ1(unsigned int a, unsigned int b, int v)
- {
- register int t;
- t = Blenders::alphatable[b & 0xFF][0xFF - v] + Blenders::alphatable[a & 0xFF][v];
- t |= (Blenders::alphatable[(b & 0xFF00) >> 8][0xFF - v] + Blenders::alphatable[(a & 0xFF00) >> 8][v]) << 8;
- t |= (Blenders::alphatable[(b & 0xFF0000) >> 16][0xFF - v] + Blenders::alphatable[(a & 0xFF0000) >> 16][v]) << 16;
- t |= (Blenders::alphatable[(b & 0xFF000000) >> 24][0xFF - v] + Blenders::alphatable[(a & 0xFF000000) >> 24][v]) << 24;
- return t;
- }
- // returns a*(1.0-Alpha(b)) + b
- unsigned int inline Blenders::BLEND_ADJ2(unsigned int a, unsigned int b)
- {
- register int t, z;
- int v = 0xff - ((b >> 24) & 0xff);
- t = Blenders::alphatable[a & 0xFF][v] + (b & 0xFF);
- if (t > 0xFF) t = 0xff;
- z = (Blenders::alphatable[(a & 0xFF00) >> 8][v] << 8) + (b & 0xFF00);
- if (z > 0xFF00) z = 0xff00;
- t |= z;
- z = (Blenders::alphatable[(a & 0xFF0000) >> 16][v] << 16) + ((b & 0xFF0000));
- if (z > 0xFF0000) z = 0xff0000;
- t |= z;
- z = (Blenders::alphatable[(a & 0xFF000000) >> 24][v]) + ((b & 0xFF000000) >> 24);
- if (z > 0xFF) z = 0xff;
- return t | (z << 24);
- }
- // returns a*(1-Alpha(b)*W) + b*W, clamped (W is scalar 0-0xff).
- unsigned int inline Blenders::BLEND_ADJ3(unsigned int a, unsigned int b, int w)
- {
- register int t, z;
- int v = 0xff - Blenders::alphatable[(b >> 24) & 0xff][w];
- t = Blenders::alphatable[a & 0xFF][v] + Blenders::alphatable[b & 0xFF][w];
- if (t > 0xFF) t = 0xFF;
- z = Blenders::alphatable[(a & 0xFF00) >> 8][v] + Blenders::alphatable[(b & 0xFF00) >> 8][w];
- if (z > 0xFF) z = 0xFF;
- t |= z << 8;
- z = Blenders::alphatable[(a & 0xFF0000) >> 16][v] + Blenders::alphatable[(b & 0xFF0000) >> 16][w];
- if (z > 0xFF) z = 0xFF;
- t |= z << 16;
- z = Blenders::alphatable[(a & 0xFF000000) >> 24][v] + Blenders::alphatable[(b & 0xFF000000) >> 24][w];
- if (z > 0xFF) z = 0xFF;
- return t | (z << 24);
- }
- unsigned int __inline Blenders::BLEND4(unsigned int *p1, unsigned int w, int xp, int yp)
- {
- register int t;
- uint8_t a1, a2, a3, a4;
- xp = (xp >> 8) & 0xff;
- yp = (yp >> 8) & 0xff;
- a1 = alphatable[255 - xp][255 - yp];
- a2 = alphatable[xp][255 - yp];
- a3 = alphatable[255 - xp][yp];
- a4 = alphatable[xp][yp];
- t = alphatable[p1[0] & 0xff][a1] + alphatable[p1[1] & 0xff][a2] + alphatable[p1[w] & 0xff][a3] + alphatable[p1[w + 1] & 0xff][a4];
- t |= (alphatable[(p1[0] >> 8) & 0xff][a1] + alphatable[(p1[1] >> 8) & 0xff][a2] + alphatable[(p1[w] >> 8) & 0xff][a3] + alphatable[(p1[w + 1] >> 8) & 0xff][a4]) << 8;
- t |= (alphatable[(p1[0] >> 16) & 0xff][a1] + alphatable[(p1[1] >> 16) & 0xff][a2] + alphatable[(p1[w] >> 16) & 0xff][a3] + alphatable[(p1[w + 1] >> 16) & 0xff][a4]) << 16;
- t |= (alphatable[(p1[0] >> 24) & 0xff][a1] + alphatable[(p1[1] >> 24) & 0xff][a2] + alphatable[(p1[w] >> 24) & 0xff][a3] + alphatable[(p1[w + 1] >> 24) & 0xff][a4]) << 24;
- return t;
- }
- #ifndef NO_MMX
- #ifdef WIN32
- #pragma warning( push, 1 )
- #pragma warning(disable: 4799)
- #endif
- #ifdef WIN32
- #define MMX_CONST const
- #else
- #define MMX_CONST
- #endif
- static unsigned int MMX_CONST Blenders__mmx_revn2[2] = {0x01000100, 0x01000100};
- static unsigned int MMX_CONST Blenders__mmx_zero[2];
- static unsigned int MMX_CONST Blenders__mmx_one[2] = {1, 0};
- #undef MMX_CONST
- /// MMX
- // average blend of a and b.
- unsigned int inline Blenders::BLEND_AVG_MMX(unsigned int a, unsigned int b)
- {
- return ((a >> 1)&~((1 << 7) | (1 << 15) | (1 << 23))) + ((b >> 1)&~((1 << 7) | (1 << 15) | (1 << 23)));
- }
- // multiplies 32 bit color A by scalar V (0-255)
- unsigned int inline Blenders::BLEND_MUL_MMX(unsigned int a, int v)
- {
- #ifdef WIN32
- __asm
- {
- movd mm3, [v] // VVVVVVVV
- movd mm0, [a]
- packuswb mm3, mm3 // 0000HHVV
- punpcklbw mm0, [Blenders__mmx_zero]
- punpcklwd mm3, mm3 // HHVVHHVV
- punpckldq mm3, mm3 // HHVVHHVV HHVVHHVV
- pmullw mm0, mm3
- psrlw mm0, 8
- packuswb mm0, mm0
- movd eax, mm0
- }
- #else
- __asm__ volatile (
- "movd %0, %%mm3\n"
- "movd %1, %%mm0\n"
- "packuswb %%mm3, %%mm3\n"
- "punpcklbw (Blenders__mmx_zero), %%mm0\n"
- "punpcklwd %%mm3, %%mm3\n"
- "punpckldq %%mm3, %%mm3\n"
- "pmullw %%mm3, %%mm0\n"
- "psrlw $8, %%mm0\n"
- "packuswb %%mm0, %%mm0\n"
- "movd %%mm0, %%eax\n"
- :
- : "m" (v), "m" (a)
- : "%mm0", "%mm3" );
- #endif
- }
- // V is scalar (0-255), (1.0-V)*b + V*a
- unsigned int inline Blenders::BLEND_ADJ1_MMX(unsigned int a, unsigned int b, int v)
- {
- #ifdef WIN32
- __asm
- {
- movd mm3, [v] // VVVVVVVV
- movd mm0, [a]
- packuswb mm3, mm3 // 0000HHVV
- movd mm1, [b]
- paddusw mm3, [Blenders__mmx_one]
- movq mm4, [Blenders__mmx_revn2]
- punpcklwd mm3, mm3 // HHVVHHVV
- punpcklbw mm0, [Blenders__mmx_zero]
- punpckldq mm3, mm3 // HHVVHHVV HHVVHHVV
- punpcklbw mm1, [Blenders__mmx_zero]
- psubw mm4, mm3
- pmullw mm0, mm3
- pmullw mm1, mm4
- paddw mm0, mm1
- psrlw mm0, 8
- packuswb mm0, mm0
- movd eax, mm0
- }
- #else
- __asm__ volatile (
- "movd %0, %%mm3\n"
- "movd %1, %%mm0\n"
- "packuswb %%mm3, %%mm3\n"
- "movd %2, %%mm1\n"
- "paddusw (Blenders__mmx_one), %%mm3\n"
- "movq (Blenders__mmx_revn2), %%mm4\n"
- "punpcklwd %%mm3, %%mm3\n"
- "punpcklbw (Blenders__mmx_zero), %%mm0\n"
- "punpckldq %%mm3, %%mm3\n"
- "punpcklbw (Blenders__mmx_zero), %%mm1\n"
- "psubw %%mm3, %%mm4\n"
- "pmullw %%mm3, %%mm0\n"
- "pmullw %%mm4, %%mm1\n"
- "paddw %%mm1, %%mm0\n"
- "psrlw $8, %%mm0\n"
- "packuswb %%mm0, %%mm0\n"
- "movd %%mm0, %%eax\n"
- :
- : "m" (v), "m" (a), "m" (b)
- : "%mm0", "%mm1", "%mm3", "%mm4" );
- #endif
- }
- // returns a*(1.0-Alpha(b)) + b
- unsigned int inline Blenders::BLEND_ADJ2_MMX(unsigned int a, unsigned int b)
- {
- #ifdef WIN32
- __asm
- {
- movd mm3, [b] // VVVVVVVV
- movq mm4, [Blenders__mmx_revn2]
- movd mm0, [a]
- psrld mm3, 24
- movd mm1, [b]
- paddusw mm3, [Blenders__mmx_one]
- punpcklwd mm3, mm3 // HHVVHHVV
- punpcklbw mm0, [Blenders__mmx_zero]
- punpckldq mm3, mm3 // HHVVHHVV HHVVHHVV
- punpcklbw mm1, [Blenders__mmx_zero]
- psubw mm4, mm3
- pmullw mm0, mm4
- // stall
- // stall
- // stall
- psrlw mm0, 8
- // stall
- paddw mm0, mm1
- // stall
- packuswb mm0, mm0
- // stall
- movd eax, mm0
- }
- #else
- __asm__ volatile (
- "movd %1, %%mm3\n"
- "movq (Blenders__mmx_revn2), %%mm4\n"
- "movd %0, %%mm0\n"
- "psrld $24, %%mm3\n"
- "movd %1, %%mm1\n"
- "paddusw (Blenders__mmx_one), %%mm3\n"
- "punpcklwd %%mm3, %%mm3\n"
- "punpcklbw (Blenders__mmx_zero), %%mm0\n"
- "punpckldq %%mm3, %%mm3\n"
- "punpcklbw (Blenders__mmx_zero), %%mm1\n"
- "psubw %%mm3, %%mm4\n"
- "pmullw %%mm4, %%mm0\n"
- "psrlw $8, %%mm0\n"
- "paddw %%mm1, %%mm0\n"
- "packuswb %%mm0, %%mm0\n"
- "movd %%mm0, %%eax\n"
- :
- : "m" (a), "m" (b)
- : "%esi", "%mm0", "%mm1", "%mm3", "%mm4" );
- #endif
- }
- // returns a*(1-Alpha(b)*W) + b*W, clamped (W is scalar 0-0xff).
- unsigned int inline Blenders::BLEND_ADJ3_MMX(unsigned int a, unsigned int b, int w)
- {
- #ifdef WIN32
- __asm
- {
- movd mm3, [b] // VVVVVVVV
- movd mm5, [w]
- movd mm0, [a]
- psrld mm3, 24
- movd mm1, [b]
- paddusw mm3, [Blenders__mmx_one]
- movq mm4, [Blenders__mmx_revn2]
- pmullw mm3, mm5
- packuswb mm5, mm5
- punpcklbw mm0, [Blenders__mmx_zero]
- punpcklwd mm5, mm5
- punpcklbw mm1, [Blenders__mmx_zero]
- psrlw mm3, 8
- punpckldq mm5, mm5
- paddusw mm3, [Blenders__mmx_one]
- punpcklwd mm3, mm3 // HHVVHHVV
- punpckldq mm3, mm3 // HHVVHHVV HHVVHHVV
- psubw mm4, mm3
- pmullw mm0, mm4
- pmullw mm1, mm5
- paddusw mm0, mm1
- psrlw mm0, 8
- packuswb mm0, mm0
- movd eax, mm0
- }
- #else
- __asm__ volatile (
- "movd %2, %%mm3\n"
- "movd %0, %%mm5\n"
- "movd %1, %%mm0\n"
- "psrld $24, %%mm3\n"
- "movd %2, %%mm1\n"
- "paddusw (Blenders__mmx_one), %%mm3\n"
- "movq (Blenders__mmx_revn2), %%mm4\n"
- "pmullw %%mm5, %%mm3\n"
- "packuswb %%mm5, %%mm5 \n"
- "punpcklbw (Blenders__mmx_zero), %%mm0\n"
- "punpcklwd %%mm5, %%mm5\n"
- "punpcklbw (Blenders__mmx_zero), %%mm1\n"
- "psrlw $8, %%mm3\n"
- "punpckldq %%mm5, %%mm5\n"
- "paddusw (Blenders__mmx_one), %%mm3\n"
- "punpcklwd %%mm3, %%mm3\n"
- "punpckldq %%mm3, %%mm3\n"
- "psubw %%mm3, %%mm4\n"
- "pmullw %%mm4, %%mm0\n"
- "pmullw %%mm5, %%mm1\n"
- "paddusw %%mm1, %%mm0\n"
- "psrlw $8, %%mm0\n"
- "packuswb %%mm0, %%mm0\n"
- "movd %%mm0, %%eax\n"
- :
- : "m" (w), "m" (a), "m" (b)
- : "%mm0", "%mm1", "%mm4", "%mm3", "%mm5" );
- #endif
- }
- // does bilinear filtering. p1 is upper left pixel, w is width of framebuffer
- // xp and yp's low 16 bits are used for the subpixel positioning.
- unsigned int inline Blenders::BLEND4_MMX(unsigned int *p1, unsigned int w, int xp, int yp)
- {
- #ifdef WIN32
- __asm
- {
- movd mm6, xp
- mov eax, p1
- movd mm7, yp
- mov esi, w
- movq mm4, Blenders__mmx_revn2
- psrlw mm6, 8
- movq mm5, Blenders__mmx_revn2
- psrlw mm7, 8
- movd mm0, [eax]
- punpcklwd mm6, mm6
- movd mm1, [eax + 4]
- punpcklwd mm7, mm7
- movd mm2, [eax + esi*4]
- punpckldq mm6, mm6
- movd mm3, [eax + esi*4 + 4]
- punpckldq mm7, mm7
- punpcklbw mm0, [Blenders__mmx_zero]
- psubw mm4, mm6
- punpcklbw mm1, [Blenders__mmx_zero]
- pmullw mm0, mm4
- punpcklbw mm2, [Blenders__mmx_zero]
- pmullw mm1, mm6
- punpcklbw mm3, [Blenders__mmx_zero]
- psubw mm5, mm7
- pmullw mm2, mm4
- pmullw mm3, mm6
- paddw mm0, mm1
- // stall (mm0)
- psrlw mm0, 8
- // stall (waiting for mm3/mm2)
- paddw mm2, mm3
- pmullw mm0, mm5
- psrlw mm2, 8
- // stall (mm2)
- pmullw mm2, mm7
- // stall
- // stall (mm2)
- paddw mm0, mm2
- // stall
- psrlw mm0, 8
- // stall
- packuswb mm0, mm0
- // stall
- movd eax, mm0
- }
- #else
- __asm__ volatile (
- "movd %2, %%mm6\n"
- "mov %0, %%eax\n"
- "movd %3, %%mm7\n"
- "mov %1, %%esi\n"
- "movq (Blenders__mmx_revn2), %%mm4\n"
- "psrlw $8, %%mm6\n"
- "movq (Blenders__mmx_revn2), %%mm5\n"
- "psrlw $8, %%mm7\n"
- "movd (%%eax), %%mm0\n"
- "punpcklwd %%mm6,%%mm6\n"
- "movd 4(%%eax), %%mm1\n"
- "punpcklwd %%mm7,%%mm7\n"
- "movd (%%eax,%%esi,4), %%mm2\n"
- "punpckldq %%mm6,%%mm6\n"
- "movd 4(%%eax,%%esi,4), %%mm3\n"
- "punpckldq %%mm7,%%mm7\n"
- "punpcklbw (Blenders__mmx_zero), %%mm0\n"
- "psubw %%mm6, %%mm4\n"
- "punpcklbw (Blenders__mmx_zero), %%mm1\n"
- "pmullw %%mm4, %%mm0\n"
- "punpcklbw (Blenders__mmx_zero), %%mm2\n"
- "pmullw %%mm6, %%mm1\n"
- "punpcklbw (Blenders__mmx_zero), %%mm3\n"
- "psubw %%mm7, %%mm5\n"
- "pmullw %%mm4, %%mm2\n"
- "pmullw %%mm6, %%mm3\n"
- "paddw %%mm1, %%mm0\n"
- "psrlw $8, %%mm0\n"
- "paddw %%mm3, %%mm2\n"
- "pmullw %%mm5, %%mm0\n"
- "psrlw $8, %%mm2\n"
- "pmullw %%mm7, %%mm2\n"
- "paddw %%mm2, %%mm0\n"
- "psrlw $8, %%mm0\n"
- "packuswb %%mm0, %%mm0\n"
- "movd %%mm0, %%eax\n"
- :
- : "m" (p1), "m" (w), "m" (xp), "m" (yp)
- : "%mm0", "%mm1", "%mm4", "%mm3", "%mm5" );
- #endif
- }
- #ifdef WIN32
- #pragma warning( pop )
- #endif
- #endif // ndef NO_MMX
- #endif
|