1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267 |
- /****************************************************************************
- *
- * Module Title : scaleopt.cpp
- *
- * Description : Optimized scaling functions
- *
- ****************************************************************************/
- /****************************************************************************
- * Module Statics
- ****************************************************************************/
- __declspec(align(16)) const static unsigned short oneFifth[] = { 51, 51, 51, 51 };
- __declspec(align(16)) const static unsigned short twoFifths[] = { 102, 102, 102, 102 };
- __declspec(align(16)) const static unsigned short threeFifths[] = { 154, 154, 154, 154 };
- __declspec(align(16)) const static unsigned short fourFifths[] = { 205, 205, 205, 205 };
- __declspec(align(16)) const static unsigned short roundValues[] = { 128, 128, 128, 128 };
- __declspec(align(16)) const static unsigned short fourOnes[]= { 1, 1, 1, 1};
- __declspec(align(16)) const static unsigned short const45_2[] = {205, 154, 102, 51 };
- __declspec(align(16)) const static unsigned short const45_1[] = { 51, 102, 154, 205 };
- __declspec(align(16)) const static unsigned char mask45[] = { 0, 0, 0, 0, 0, 0, 255, 0};
- __declspec(align(16)) const static unsigned short const35_2[] = { 154, 51, 205, 102 };
- __declspec(align(16)) const static unsigned short const35_1[] = { 102, 205, 51, 154 };
- #if defined(__cplusplus)
- extern "C" {
- #endif
- /****************************************************************************
- *
- * ROUTINE : HorizontalLine_3_5_Scale_MMX
- *
- * INPUTS : const unsigned char *source :
- * unsigned int sourceWidth :
- * unsigned char *dest :
- * unsigned int destWidth :
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : 3 to 5 up-scaling of a horizontal line of pixels.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
- void HorizontalLine_3_5_Scale_MMX
- (
- const unsigned char *source,
- unsigned int sourceWidth,
- unsigned char *dest,
- unsigned int destWidth
- )
- {
- (void) destWidth;
- __asm
- {
- push ebx
- mov esi, source
- mov edi, dest
-
- mov ecx, sourceWidth
- lea edx, [esi+ecx-3];
- movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx
- movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx
-
- movq mm4, roundValues // mm4 = 80 xx 80 xx 80 xx 80 xx
- pxor mm7, mm7 // clear mm7
- HorizLine_3_5_Loop:
-
- mov eax, DWORD PTR [esi] // eax = 00 01 02 03
- mov ebx, eax
-
- and ebx, 0xffff00 // ebx = xx 01 02 xx
- mov ecx, eax // ecx = 00 01 02 03
-
- and eax, 0xffff0000 // eax = xx xx 02 03
- xor ecx, eax // ecx = 00 01 xx xx
- shr ebx, 8 // ebx = 01 02 xx xx
- or eax, ebx // eax = 01 02 02 03
- shl ebx, 16 // ebx = xx xx 01 02
- movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx
-
- or ebx, ecx // ebx = 00 01 01 02
- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx
-
- movd mm0, ebx // mm0 = 00 01 01 02
- pmullw mm1, mm6 //
- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx
- pmullw mm0, mm5 //
- mov [edi], ebx // writeoutput 00 xx xx xx
- add esi, 3
- add edi, 5
- paddw mm0, mm1
-
- paddw mm0, mm4
- psrlw mm0, 8
- cmp esi, edx
- packuswb mm0, mm7
-
- movd DWORD Ptr [edi-4], mm0
- jl HorizLine_3_5_Loop
- //Exit:
- mov eax, DWORD PTR [esi] // eax = 00 01 02 03
- mov ebx, eax
-
- and ebx, 0xffff00 // ebx = xx 01 02 xx
- mov ecx, eax // ecx = 00 01 02 03
-
- and eax, 0xffff0000 // eax = xx xx 02 03
- xor ecx, eax // ecx = 00 01 xx xx
- shr ebx, 8 // ebx = 01 02 xx xx
- or eax, ebx // eax = 01 02 02 03
- shl eax, 8 // eax = xx 01 02 02
- and eax, 0xffff0000 // eax = xx xx 02 02
- or eax, ebx // eax = 01 02 02 02
- shl ebx, 16 // ebx = xx xx 01 02
- movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx
-
- or ebx, ecx // ebx = 00 01 01 02
- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx
-
- movd mm0, ebx // mm0 = 00 01 01 02
- pmullw mm1, mm6 //
- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx
- pmullw mm0, mm5 //
- mov [edi], ebx // writeoutput 00 xx xx xx
- paddw mm0, mm1
- paddw mm0, mm4
- psrlw mm0, 8
- packuswb mm0, mm7
- movd DWORD Ptr [edi+1], mm0
- pop ebx
-
- }
- /*
- const unsigned char *src = source;
- unsigned char *des = dest;
- unsigned int a, b, c ;
- unsigned int i;
- (void) destWidth;
- for ( i=0; i<sourceWidth-3; i+=3 )
- {
- a = src[0];
- b = src[1];
- des [0] = (UINT8) (a);
- // 2 * left + 3 * right /5
- des [1] = (UINT8) (( a * 102 + 154 * b + 128 ) >> 8);
- c = src[2] ;
- // 4 * left + 1 * right /5
- des [2] = (UINT8) (( b * 205 + c * 51 + 128 ) >> 8);
- // 1 * left + 4 * right /5
- des [3] = (UINT8) (( b * 51 + c * 205 + 128 ) >> 8);
- a = src[3];
- // 3 * left + 2 * right /5
- des [4] = (UINT8) (( c * 154 + a * 102 + 128 ) >> 8);
- src += 3;
- des += 5;
- }
- a = src[0];
- b = src[1];
- des [0] = (UINT8) (a);
- // 2 * left + 3 * right /5
- des [1] = (UINT8) (( a * 102 + 154 * b + 128 ) >> 8);
- c = src[2] ;
- // 4 * left + 1 * right /5
- des [2] = (UINT8) (( b * 205 + c * 51 + 128 ) >> 8);
- // 1 * left + 4 * right /5
- des [3] = (UINT8) (( b * 51 + c * 205 + 128 ) >> 8);
-
- des [4] = (UINT8) (c);
- */
- }
- /****************************************************************************
- *
- * ROUTINE : HorizontalLine_4_5_Scale_MMX
- *
- * INPUTS : const unsigned char *source :
- * unsigned int sourceWidth :
- * unsigned char *dest :
- * unsigned int destWidth :
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : 4 to 5 up-scaling of a horizontal line of pixels.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
- void HorizontalLine_4_5_Scale_MMX
- (
- const unsigned char *source,
- unsigned int sourceWidth,
- unsigned char *dest,
- unsigned int destWidth
- )
- {
- (void)destWidth;
- __asm
- {
-
- mov esi, source
- mov edi, dest
-
- mov ecx, sourceWidth
- lea edx, [esi+ecx-8];
- movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx
- movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx
-
- movq mm4, roundValues // mm4 = 80 xx 80 xx 80 xx 80 xx
- pxor mm7, mm7 // clear mm7
- HorizLine_4_5_Loop:
-
- movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07
- movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08
- movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07
- movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08
-
- movd DWORD PTR [edi], mm0 // write output 00 xx xx xx
- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx
-
- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx
- pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205
- pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51
- punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx
-
- movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx
- pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205
- punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx
- pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51
-
- paddw mm0, mm1 // added round values
- paddw mm0, mm4
- psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx
- packuswb mm0, mm7
- movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04
- add edi, 10
- add esi, 8
- paddw mm2, mm3 //
- paddw mm2, mm4 // added round values
- cmp esi, edx
- psrlw mm2, 8
- packuswb mm2, mm7
- movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09
- jl HorizLine_4_5_Loop
- //Exit:
- movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07
- movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07
- movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07
- psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00
- movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00
- pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00
-
- psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07
- por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07
-
- movq mm3, mm1
- movd DWORD PTR [edi], mm0 // write output 00 xx xx xx
- punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx
-
- punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx
- pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205
- pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51
- punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx
-
- movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx
- pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205
- punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx
- pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51
-
- paddw mm0, mm1 // added round values
- paddw mm0, mm4
- psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx
- packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx
- movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04
- paddw mm2, mm3 //
- paddw mm2, mm4 // added round values
- psrlw mm2, 8
-
- packuswb mm2, mm7
- movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09
- }
- /*
- const unsigned char *src = source;
- unsigned char *des = dest;
- unsigned int a, b, c ;
- unsigned i;
- (void) destWidth;
-
- for ( i=0; i<sourceWidth-4; i+=4 )
- {
- a = src[0];
- b = src[1];
- des [0] = (UINT8) a;
- des [1] = (UINT8) (( a * 51 + 205 * b + 128) >> 8);
- c = src[2] * 154;
- a = src[3];
- des [2] = (UINT8) (( b * 102 + c + 128) >> 8);
- des [3] = (UINT8) (( c + 102 * a + 128) >> 8);
- b = src[4];
- des [4] = (UINT8) (( a * 205 + 51 * b + 128) >> 8);
- src += 4;
- des += 5;
- }
- a = src[0];
- b = src[1];
- des [0] = (UINT8) (a);
- des [1] = (UINT8) (( a * 51 + 205 * b + 128) >> 8);
- c = src[2] * 154;
- a = src[3];
- des [2] = (UINT8) (( b * 102 + c + 128) >> 8);
- des [3] = (UINT8) (( c + 102 * a + 128) >> 8);
- des [4] = (UINT8) (a);
- */
- }
- /****************************************************************************
- *
- * ROUTINE : VerticalBand_4_5_Scale_MMX
- *
- * INPUTS : unsigned char *dest :
- * unsigned int destPitch :
- * unsigned int destWidth :
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : 4 to 5 up-scaling of a 4 pixel high band of pixels.
- *
- * SPECIAL NOTES : The routine uses the first line of the band below
- * the current band. The function also has a "C" only
- * version.
- *
- ****************************************************************************/
- void VerticalBand_4_5_Scale_MMX
- (
- unsigned char *dest,
- unsigned int destPitch,
- unsigned int destWidth
- )
- {
- __asm
- {
-
- mov esi, dest // Get the source and destination pointer
- mov ecx, destPitch // Get the pitch size
-
- lea edi, [esi+ecx*2] // tow lines below
- add edi, ecx // three lines below
- pxor mm7, mm7 // clear out mm7
- mov edx, destWidth // Loop counter
- VS_4_5_loop:
- movq mm0, QWORD ptr [esi] // src[0];
- movq mm1, QWORD ptr [esi+ecx] // src[1];
- movq mm2, mm0 // Make a copy
- punpcklbw mm0, mm7 // unpack low to word
-
- movq mm5, oneFifth
- punpckhbw mm2, mm7 // unpack high to word
- pmullw mm0, mm5 // a * 1/5
-
- movq mm3, mm1 // make a copy
- punpcklbw mm1, mm7 // unpack low to word
- pmullw mm2, mm5 // a * 1/5
- movq mm6, fourFifths // constan
- movq mm4, mm1 // copy of low b
- pmullw mm4, mm6 // b * 4/5
- punpckhbw mm3, mm7 // unpack high to word
- movq mm5, mm3 // copy of high b
- pmullw mm5, mm6 // b * 4/5
- paddw mm0, mm4 // a * 1/5 + b * 4/5
- paddw mm2, mm5 // a * 1/5 + b * 4/5
- paddw mm0, roundValues // + 128
- paddw mm2, roundValues // + 128
- psrlw mm0, 8
- psrlw mm2, 8
- packuswb mm0, mm2 // des [1]
- movq QWORD ptr [esi+ecx], mm0 // write des[1]
- movq mm0, [esi+ecx*2] // mm0 = src[2]
- // mm1, mm3 --- Src[1]
- // mm0 --- Src[2]
- // mm7 for unpacking
- movq mm5, twoFifths
- movq mm2, mm0 // make a copy
- pmullw mm1, mm5 // b * 2/5
- movq mm6, threeFifths
- punpcklbw mm0, mm7 // unpack low to word
- pmullw mm3, mm5 // b * 2/5
- movq mm4, mm0 // make copy of c
- punpckhbw mm2, mm7 // unpack high to word
- pmullw mm4, mm6 // c * 3/5
- movq mm5, mm2
- pmullw mm5, mm6 // c * 3/5
- paddw mm1, mm4 // b * 2/5 + c * 3/5
-
- paddw mm3, mm5 // b * 2/5 + c * 3/5
- paddw mm1, roundValues // + 128
- paddw mm3, roundValues // + 128
- psrlw mm1, 8
- psrlw mm3, 8
- packuswb mm1, mm3 // des[2]
- movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
- movq mm1, [edi] // mm1=Src[3];
- // mm0, mm2 --- Src[2]
- // mm1 --- Src[3]
- // mm6 --- 3/5
- // mm7 for unpacking
- pmullw mm0, mm6 // c * 3/5
- movq mm5, twoFifths // mm5 = 2/5
- movq mm3, mm1 // make a copy
- pmullw mm2, mm6 // c * 3/5
- punpcklbw mm1, mm7 // unpack low
- movq mm4, mm1 // make a copy
- punpckhbw mm3, mm7 // unpack high
- pmullw mm4, mm5 // d * 2/5
- movq mm6, mm3 // make a copy
- pmullw mm6, mm5 // d * 2/5
-
- paddw mm0, mm4 // c * 3/5 + d * 2/5
- paddw mm2, mm6 // c * 3/5 + d * 2/5
- paddw mm0, roundValues // + 128
- paddw mm2, roundValues // + 128
-
- psrlw mm0, 8
- psrlw mm2, 8
- packuswb mm0, mm2 // des[3]
- movq QWORD ptr [edi], mm0 // write des[3]
- // mm1, mm3 --- Src[3]
- // mm7 -- cleared for unpacking
- movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group
- movq mm5, fourFifths // mm5 = 4/5
- pmullw mm1, mm5 // d * 4/5
- movq mm6, oneFifth // mm6 = 1/5
- movq mm2, mm0 // make a copy
- pmullw mm3, mm5 // d * 4/5
- punpcklbw mm0, mm7 // unpack low
-
- pmullw mm0, mm6 // an * 1/5
- punpckhbw mm2, mm7 // unpack high
- paddw mm1, mm0 // d * 4/5 + an * 1/5
- pmullw mm2, mm6 // an * 1/5
- paddw mm3, mm2 // d * 4/5 + an * 1/5
- paddw mm1, roundValues // + 128
- paddw mm3, roundValues // + 128
- psrlw mm1, 8
-
- psrlw mm3, 8
- packuswb mm1, mm3 // des[4]
- movq QWORD ptr [edi+ecx], mm1 // write des[4]
- add edi, 8
- add esi, 8
- sub edx, 8
- jg VS_4_5_loop
- }
- }
- /****************************************************************************
- *
- * ROUTINE : LastVerticalBand_4_5_Scale_MMX
- *
- * INPUTS : unsigned char *dest :
- * unsigned int destPitch :
- * unsigned int destWidth :
- *
- * OUTPUTS : None.
- *
- * RETURNS : None
- *
- * FUNCTION : 4 to 5 up-scaling of the last 4-pixel high band in an image.
- *
- * SPECIAL NOTES : The routine uses the first line of the band below
- * the current band. The function also has an "C" only
- * version.
- *
- ****************************************************************************/
- void LastVerticalBand_4_5_Scale_MMX
- (
- unsigned char *dest,
- unsigned int destPitch,
- unsigned int destWidth
- )
- {
- __asm
- {
- mov esi, dest // Get the source and destination pointer
- mov ecx, destPitch // Get the pitch size
-
- lea edi, [esi+ecx*2] // tow lines below
- add edi, ecx // three lines below
- pxor mm7, mm7 // clear out mm7
- mov edx, destWidth // Loop counter
- LastVS_4_5_loop:
- movq mm0, QWORD ptr [esi] // src[0];
- movq mm1, QWORD ptr [esi+ecx] // src[1];
- movq mm2, mm0 // Make a copy
- punpcklbw mm0, mm7 // unpack low to word
-
- movq mm5, oneFifth
- punpckhbw mm2, mm7 // unpack high to word
- pmullw mm0, mm5 // a * 1/5
-
- movq mm3, mm1 // make a copy
- punpcklbw mm1, mm7 // unpack low to word
- pmullw mm2, mm5 // a * 1/5
- movq mm6, fourFifths // constan
- movq mm4, mm1 // copy of low b
- pmullw mm4, mm6 // b * 4/5
- punpckhbw mm3, mm7 // unpack high to word
- movq mm5, mm3 // copy of high b
- pmullw mm5, mm6 // b * 4/5
- paddw mm0, mm4 // a * 1/5 + b * 4/5
- paddw mm2, mm5 // a * 1/5 + b * 4/5
- paddw mm0, roundValues // + 128
- paddw mm2, roundValues // + 128
- psrlw mm0, 8
- psrlw mm2, 8
- packuswb mm0, mm2 // des [1]
- movq QWORD ptr [esi+ecx], mm0 // write des[1]
- movq mm0, [esi+ecx*2] // mm0 = src[2]
- // mm1, mm3 --- Src[1]
- // mm0 --- Src[2]
- // mm7 for unpacking
- movq mm5, twoFifths
- movq mm2, mm0 // make a copy
- pmullw mm1, mm5 // b * 2/5
- movq mm6, threeFifths
- punpcklbw mm0, mm7 // unpack low to word
- pmullw mm3, mm5 // b * 2/5
- movq mm4, mm0 // make copy of c
- punpckhbw mm2, mm7 // unpack high to word
- pmullw mm4, mm6 // c * 3/5
- movq mm5, mm2
- pmullw mm5, mm6 // c * 3/5
- paddw mm1, mm4 // b * 2/5 + c * 3/5
-
- paddw mm3, mm5 // b * 2/5 + c * 3/5
- paddw mm1, roundValues // + 128
- paddw mm3, roundValues // + 128
- psrlw mm1, 8
- psrlw mm3, 8
- packuswb mm1, mm3 // des[2]
- movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
- movq mm1, [edi] // mm1=Src[3];
- movq QWORD ptr [edi+ecx], mm1 // write des[4];
- // mm0, mm2 --- Src[2]
- // mm1 --- Src[3]
- // mm6 --- 3/5
- // mm7 for unpacking
- pmullw mm0, mm6 // c * 3/5
- movq mm5, twoFifths // mm5 = 2/5
- movq mm3, mm1 // make a copy
- pmullw mm2, mm6 // c * 3/5
- punpcklbw mm1, mm7 // unpack low
- movq mm4, mm1 // make a copy
- punpckhbw mm3, mm7 // unpack high
- pmullw mm4, mm5 // d * 2/5
- movq mm6, mm3 // make a copy
- pmullw mm6, mm5 // d * 2/5
-
- paddw mm0, mm4 // c * 3/5 + d * 2/5
- paddw mm2, mm6 // c * 3/5 + d * 2/5
- paddw mm0, roundValues // + 128
- paddw mm2, roundValues // + 128
- psrlw mm0, 8
- psrlw mm2, 8
- packuswb mm0, mm2 // des[3]
- movq QWORD ptr [edi], mm0 // write des[3]
- // mm1, mm3 --- Src[3]
- // mm7 -- cleared for unpacking
- add edi, 8
- add esi, 8
- sub edx, 8
- jg LastVS_4_5_loop
- }
- }
- /****************************************************************************
- *
- * ROUTINE : VerticalBand_3_5_Scale_MMX
- *
- * INPUTS : unsigned char *dest :
- * unsigned int destPitch :
- * unsigned int destWidth :
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : 3 to 5 up-scaling of a 3-pixel high band of pixels.
- *
- * SPECIAL NOTES : The routine uses the first line of the band below
- * the current band. The function also has an "C" only
- * version.
- *
- ****************************************************************************/
- void VerticalBand_3_5_Scale_MMX
- (
- unsigned char *dest,
- unsigned int destPitch,
- unsigned int destWidth
- )
- {
- __asm
- {
- mov esi, dest // Get the source and destination pointer
- mov ecx, destPitch // Get the pitch size
-
- lea edi, [esi+ecx*2] // tow lines below
- add edi, ecx // three lines below
- pxor mm7, mm7 // clear out mm7
- mov edx, destWidth // Loop counter
- VS_3_5_loop:
- movq mm0, QWORD ptr [esi] // src[0];
- movq mm1, QWORD ptr [esi+ecx] // src[1];
- movq mm2, mm0 // Make a copy
- punpcklbw mm0, mm7 // unpack low to word
-
- movq mm5, twoFifths // mm5 = 2/5
- punpckhbw mm2, mm7 // unpack high to word
- pmullw mm0, mm5 // a * 2/5
-
- movq mm3, mm1 // make a copy
- punpcklbw mm1, mm7 // unpack low to word
- pmullw mm2, mm5 // a * 2/5
- movq mm6, threeFifths // mm6 = 3/5
- movq mm4, mm1 // copy of low b
- pmullw mm4, mm6 // b * 3/5
- punpckhbw mm3, mm7 // unpack high to word
- movq mm5, mm3 // copy of high b
- pmullw mm5, mm6 // b * 3/5
- paddw mm0, mm4 // a * 2/5 + b * 3/5
- paddw mm2, mm5 // a * 2/5 + b * 3/5
- paddw mm0, roundValues // + 128
- paddw mm2, roundValues // + 128
- psrlw mm0, 8
- psrlw mm2, 8
- packuswb mm0, mm2 // des [1]
- movq QWORD ptr [esi+ecx], mm0 // write des[1]
- movq mm0, [esi+ecx*2] // mm0 = src[2]
- // mm1, mm3 --- Src[1]
- // mm0 --- Src[2]
- // mm7 for unpacking
- movq mm4, mm1 // b low
- pmullw mm1, fourFifths // b * 4/5 low
- movq mm5, mm3 // b high
- pmullw mm3, fourFifths // b * 4/5 high
-
- movq mm2, mm0 // c
- pmullw mm4, oneFifth // b * 1/5
- punpcklbw mm0, mm7 // c low
- pmullw mm5, oneFifth // b * 1/5
- movq mm6, mm0 // make copy of c low
- punpckhbw mm2, mm7 // c high
- pmullw mm6, oneFifth // c * 1/5 low
- movq mm7, mm2 // make copy of c high
- pmullw mm7, oneFifth // c * 1/5 high
- paddw mm1, mm6 // b * 4/5 + c * 1/5 low
- paddw mm3, mm7 // b * 4/5 + c * 1/5 high
- movq mm6, mm0 // make copy of c low
- pmullw mm6, fourFifths // c * 4/5 low
- movq mm7, mm2 // make copy of c high
- pmullw mm7, fourFifths // c * 4/5 high
- paddw mm4, mm6 // b * 1/5 + c * 4/5 low
- paddw mm5, mm7 // b * 1/5 + c * 4/5 high
- paddw mm1, roundValues // + 128
- paddw mm3, roundValues // + 128
- psrlw mm1, 8
- psrlw mm3, 8
- packuswb mm1, mm3 // des[2]
- movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
- paddw mm4, roundValues // + 128
- paddw mm5, roundValues // + 128
- psrlw mm4, 8
- psrlw mm5, 8
- packuswb mm4, mm5 // des[3]
- movq QWORD ptr [edi], mm4 // write des[3]
- // mm0, mm2 --- Src[3]
-
- pxor mm7, mm7 // clear mm7 for unpacking
- movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group
- movq mm5, threeFifths // mm5 = 3/5
- pmullw mm0, mm5 // d * 3/5
- movq mm6, twoFifths // mm6 = 2/5
- movq mm3, mm1 // make a copy
- pmullw mm2, mm5 // d * 3/5
- punpcklbw mm1, mm7 // unpack low
-
- pmullw mm1, mm6 // an * 2/5
- punpckhbw mm3, mm7 // unpack high
- paddw mm0, mm1 // d * 3/5 + an * 2/5
- pmullw mm3, mm6 // an * 2/5
- paddw mm2, mm3 // d * 3/5 + an * 2/5
- paddw mm0, roundValues // + 128
- paddw mm2, roundValues // + 128
- psrlw mm0, 8
-
- psrlw mm2, 8
- packuswb mm0, mm2 // des[4]
- movq QWORD ptr [edi+ecx], mm0 // write des[4]
- add edi, 8
- add esi, 8
- sub edx, 8
- jg VS_3_5_loop
- }
- }
- /****************************************************************************
- *
- * ROUTINE : LastVerticalBand_3_5_Scale_MMX
- *
- * INPUTS : unsigned char *dest :
- * unsigned int destPitch :
- * unsigned int destWidth :
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : 3 to 5 up-scaling of a 3-pixel high band of pixels.
- *
- * SPECIAL NOTES : The routine uses the first line of the band below
- * the current band. The function also has an "C" only
- * version.
- *
- ****************************************************************************/
- void LastVerticalBand_3_5_Scale_MMX
- (
- unsigned char *dest,
- unsigned int destPitch,
- unsigned int destWidth
- )
- {
- __asm
- {
- mov esi, dest // Get the source and destination pointer
- mov ecx, destPitch // Get the pitch size
-
- lea edi, [esi+ecx*2] // tow lines below
- add edi, ecx // three lines below
- pxor mm7, mm7 // clear out mm7
- mov edx, destWidth // Loop counter
- LastVS_3_5_loop:
- movq mm0, QWORD ptr [esi] // src[0];
- movq mm1, QWORD ptr [esi+ecx] // src[1];
- movq mm2, mm0 // Make a copy
- punpcklbw mm0, mm7 // unpack low to word
-
- movq mm5, twoFifths // mm5 = 2/5
- punpckhbw mm2, mm7 // unpack high to word
- pmullw mm0, mm5 // a * 2/5
-
- movq mm3, mm1 // make a copy
- punpcklbw mm1, mm7 // unpack low to word
- pmullw mm2, mm5 // a * 2/5
- movq mm6, threeFifths // mm6 = 3/5
- movq mm4, mm1 // copy of low b
- pmullw mm4, mm6 // b * 3/5
- punpckhbw mm3, mm7 // unpack high to word
- movq mm5, mm3 // copy of high b
- pmullw mm5, mm6 // b * 3/5
- paddw mm0, mm4 // a * 2/5 + b * 3/5
- paddw mm2, mm5 // a * 2/5 + b * 3/5
- paddw mm0, roundValues // + 128
- paddw mm2, roundValues // + 128
- psrlw mm0, 8
- psrlw mm2, 8
- packuswb mm0, mm2 // des [1]
- movq QWORD ptr [esi+ecx], mm0 // write des[1]
- movq mm0, [esi+ecx*2] // mm0 = src[2]
-
- // mm1, mm3 --- Src[1]
- // mm0 --- Src[2]
- // mm7 for unpacking
- movq mm4, mm1 // b low
- pmullw mm1, fourFifths // b * 4/5 low
-
- movq QWORD ptr [edi+ecx], mm0 // write des[4]
- movq mm5, mm3 // b high
- pmullw mm3, fourFifths // b * 4/5 high
-
- movq mm2, mm0 // c
- pmullw mm4, oneFifth // b * 1/5
- punpcklbw mm0, mm7 // c low
- pmullw mm5, oneFifth // b * 1/5
- movq mm6, mm0 // make copy of c low
- punpckhbw mm2, mm7 // c high
- pmullw mm6, oneFifth // c * 1/5 low
- movq mm7, mm2 // make copy of c high
- pmullw mm7, oneFifth // c * 1/5 high
- paddw mm1, mm6 // b * 4/5 + c * 1/5 low
- paddw mm3, mm7 // b * 4/5 + c * 1/5 high
- movq mm6, mm0 // make copy of c low
- pmullw mm6, fourFifths // c * 4/5 low
- movq mm7, mm2 // make copy of c high
- pmullw mm7, fourFifths // c * 4/5 high
- paddw mm4, mm6 // b * 1/5 + c * 4/5 low
- paddw mm5, mm7 // b * 1/5 + c * 4/5 high
- paddw mm1, roundValues // + 128
- paddw mm3, roundValues // + 128
- psrlw mm1, 8
- psrlw mm3, 8
- packuswb mm1, mm3 // des[2]
- movq QWORD ptr [esi+ecx*2], mm1 // write des[2]
- paddw mm4, roundValues // + 128
- paddw mm5, roundValues // + 128
- psrlw mm4, 8
- psrlw mm5, 8
- packuswb mm4, mm5 // des[3]
- movq QWORD ptr [edi], mm4 // write des[3]
- // mm0, mm2 --- Src[3]
-
- add edi, 8
- add esi, 8
- sub edx, 8
- jg LastVS_3_5_loop
- }
- }
- /****************************************************************************
- *
- * ROUTINE : VerticalBand_1_2_Scale_MMX
- *
- * INPUTS : unsigned char *dest :
- * unsigned int destPitch :
- * unsigned int destWidth :
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : 1 to 2 up-scaling of a band of pixels.
- *
- * SPECIAL NOTES : The routine uses the first line of the band below
- * the current band. The function also has an "C" only
- * version.
- *
- ****************************************************************************/
- void VerticalBand_1_2_Scale_MMX
- (
- unsigned char *dest,
- unsigned int destPitch,
- unsigned int destWidth
- )
- {
- __asm
- {
-
- mov esi, dest // Get the source and destination pointer
- mov ecx, destPitch // Get the pitch size
- pxor mm7, mm7 // clear out mm7
- mov edx, destWidth // Loop counter
- VS_1_2_loop:
- movq mm0, [esi] // get Src[0]
- movq mm1, [esi + ecx * 2] // get Src[1]
- movq mm2, mm0 // make copy before unpack
- movq mm3, mm1 // make copy before unpack
- punpcklbw mm0, mm7 // low Src[0]
- movq mm6, fourOnes // mm6= 1, 1, 1, 1
- punpcklbw mm1, mm7 // low Src[1]
- paddw mm0, mm1 // low (a + b)
-
- punpckhbw mm2, mm7 // high Src[0]
- paddw mm0, mm6 // low (a + b + 1)
- punpckhbw mm3, mm7
- paddw mm2, mm3 // high (a + b )
- psraw mm0, 1 // low (a + b +1 )/2
- paddw mm2, mm6 // high (a + b + 1)
- psraw mm2, 1 // high (a + b + 1)/2
- packuswb mm0, mm2 // pack results
- movq [esi+ecx], mm0 // write out eight bytes
- add esi, 8
- sub edx, 8
- jg VS_1_2_loop
- }
-
- }
- /****************************************************************************
- *
- * ROUTINE : LastVerticalBand_1_2_Scale_MMX
- *
- * INPUTS : unsigned char *dest :
- * unsigned int destPitch :
- * unsigned int destWidth :
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : 1 to 2 up-scaling of band of pixels.
- *
- * SPECIAL NOTES : The routine uses the first line of the band below
- * the current band. The function also has an "C" only
- * version.
- *
- ****************************************************************************/
- void LastVerticalBand_1_2_Scale_MMX
- (
- unsigned char *dest,
- unsigned int destPitch,
- unsigned int destWidth
- )
- {
- __asm
- {
- mov esi, dest // Get the source and destination pointer
- mov ecx, destPitch // Get the pitch size
- mov edx, destWidth // Loop counter
- LastVS_1_2_loop:
- movq mm0, [esi] // get Src[0]
- movq [esi+ecx], mm0 // write out eight bytes
- add esi, 8
- sub edx, 8
- jg LastVS_1_2_loop
- }
- }
- /****************************************************************************
- *
- * ROUTINE : HorizontalLine_1_2_Scale
- *
- * INPUTS : const unsigned char *source :
- * unsigned int sourceWidth :
- * unsigned char *dest :
- * unsigned int destWidth :
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : 1 to 2 up-scaling of a horizontal line of pixels.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
- void HorizontalLine_1_2_Scale_MMX
- (
- const unsigned char *source,
- unsigned int sourceWidth,
- unsigned char *dest,
- unsigned int destWidth
- )
- {
- (void) destWidth;
- __asm
- {
- mov esi, source
- mov edi, dest
- pxor mm7, mm7
- movq mm6, fourOnes
- mov ecx, sourceWidth
- HS_1_2_Loop:
- movq mm0, [esi]
- movq mm1, [esi+1]
- movq mm2, mm0
- movq mm3, mm1
- movq mm4, mm0
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- paddw mm0, mm1
- paddw mm0, mm6
- punpckhbw mm2, mm7
- punpckhbw mm3, mm7
- paddw mm2, mm3
- paddw mm2, mm6
- psraw mm0, 1
- psraw mm2, 1
- packuswb mm0, mm2
- movq mm2, mm4
- punpcklbw mm2, mm0
- movq [edi], mm2
- punpckhbw mm4, mm0
- movq [edi+8], mm4
- add esi, 8
- add edi, 16
- sub ecx, 8
- cmp ecx, 8
- jg HS_1_2_Loop
- // last eight pixel
- movq mm0, [esi]
- movq mm1, mm0
-
- movq mm2, mm0
- movq mm3, mm1
- psrlq mm1, 8
- psrlq mm3, 56
-
- psllq mm3, 56
- por mm1, mm3
- movq mm3, mm1
- movq mm4, mm0
- punpcklbw mm0, mm7
- punpcklbw mm1, mm7
- paddw mm0, mm1
- paddw mm0, mm6
- punpckhbw mm2, mm7
- punpckhbw mm3, mm7
- paddw mm2, mm3
- paddw mm2, mm6
- psraw mm0, 1
- psraw mm2, 1
- packuswb mm0, mm2
- movq mm2, mm4
- punpcklbw mm2, mm0
- movq [edi], mm2
- punpckhbw mm4, mm0
- movq [edi+8], mm4
- }
- }
-
- #if defined(__cplusplus)
- extern "C" {
- #endif
|