123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790 |
- /****************************************************************************
- *
- * Module Title : newLoopTest_asm.c
- *
- * Description : Codec specific functions
- *
- * AUTHOR : Yaowu Xu
- *
- *****************************************************************************
- * Revision History
- *
- * 1.02 YWX 03-Nov-00 Changed confusing variable name
- * 1.01 YWX 02-Nov-00 Added the set of functions
- * 1.00 YWX 19-Oct-00 configuration baseline
- *****************************************************************************
- */
- /****************************************************************************
- * Header Frames
- *****************************************************************************
- */
- #define STRICT /* Strict type checking. */
- #include "codec_common.h"
- #include <math.h>
- /****************************************************************************
- * Module constants.
- *****************************************************************************
- */
- #define MIN(a, b) (((a) < (b)) ? (a) : (b))
- #define FILTER_WEIGHT 128
- #define FILTER_SHIFT 7
- __declspec(align(16)) short rd[]={64,64,64,64,64,64,64,64};
- __declspec(align(16)) INT16 BilinearFilters_wmt[8][16] =
- {
- { 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0 },
- { 112,112,112,112,112,112,112,112, 16, 16, 16, 16, 16, 16, 16, 16 },
- { 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 },
- { 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 },
- { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
- { 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 },
- { 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 },
- { 16, 16, 16, 16, 16, 16, 16, 16, 112,112,112,112,112,112,112,112 }
- };
- extern __declspec(align(16)) INT16 BicubicFilters_mmx[17][8][32];
- _inline
- void FilterBlock1d_h_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
- {
- __asm
- {
- mov edi, Filter
- movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
- movdqa xmm2, [edi+ 16] ; xmm3 *= kernel 0 modifiers.
- movdqa xmm6, [edi + 32] ; xmm3 *= kernel 0 modifiers.
- movdqa xmm7, [edi + 48] ; xmm3 *= kernel 0 modifiers.
- mov edi,OutputPtr
- mov esi,SrcPtr
- dec esi
- mov ecx, DWORD PTR OutputHeight
- mov eax, OutputWidth ; destination pitch?
- pxor xmm0, xmm0 ; xmm0 = 00000000
- nextrow:
- // kernel 0 and 3 are potentially negative taps. These negative tap filters
- // must be done first or we could have problems saturating our high value
- // tap filters
- movdqu xmm3, [esi] ; xmm3 = p-1..p14
- movdqu xmm4, xmm3 ; xmm4 = p-1..p14
- punpcklbw xmm3, xmm0 ; xmm3 = p-1..p6
- pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
- psrldq xmm4, 3 ; xmm4 = p2..p13
- movdqa xmm5, xmm4 ; xmm5 = p2..p13
- punpcklbw xmm5, xmm0 ; xmm5 = p2..p7
- pmullw xmm5, xmm7 ; xmm5 *= kernel 3 modifiers
- paddsw xmm3, xmm5 ; xmm3 += xmm5
- movdqu xmm4, [esi+1] ; xmm4 = p0..p13
- movdqa xmm5, xmm4 ; xmm5 = p0..p13
- punpcklbw xmm5, xmm0 ; xmm5 = p0..p7
- pmullw xmm5, xmm2 ; xmm5 *= kernel 1 modifiers
- paddsw xmm3, xmm5 ; xmm3 += xmm5
- psrldq xmm4, 1 ; xmm4 = p1..p13
- movdqa xmm5, xmm4 ; xmm5 = p1..p13
- punpcklbw xmm5, xmm0 ; xmm5 = p1..p7
- pmullw xmm5, xmm6 ; xmm5 *= kernel 2 modifiers
- paddsw xmm3, xmm5 ; xmm3 += xmm5
- paddsw xmm3, rd ; xmm3 += round value
- psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
- packuswb xmm3, xmm0 ; pack and saturate
- movdq2q mm0, xmm3
- movq [edi],mm0 ; store the results in the destination
- add esi,SrcPixelsPerLine ; next line
- add edi,eax;
- dec ecx ; decrement count
- jnz nextrow ; next row
- }
- }
- _inline
- void FilterBlock1d_v_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
- {
- __asm
- {
- mov edi, Filter
- movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
- movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers.
- movdqa xmm6, [edi + 32] ; xmm3 *= kernel 0 modifiers.
- movdqa xmm7, [edi + 48] ; xmm3 *= kernel 0 modifiers.
- mov edx, PixelsPerLine
- mov edi, OutputPtr
- mov esi, SrcPtr
- sub esi, PixelsPerLine
- mov ecx, DWORD PTR OutputHeight
- mov eax, OutputWidth ; destination pitch?
- pxor xmm0, xmm0 ; xmm0 = 00000000
- nextrow:
- movdqu xmm3, [esi] ; xmm3 = p0..p16
- punpcklbw xmm3, xmm0 ; xmm3 = p0..p8
- pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
- add esi, edx ; move source forward 1 line to avoid 3 * pitch
- movdqu xmm4, [esi+2*edx] ; xmm4 = p0..p16
- punpcklbw xmm4, xmm0 ; xmm4 = p0..p8
- pmullw xmm4, xmm7 ; xmm4 *= kernel 3 modifiers.
- paddsw xmm3, xmm4 ; xmm3 += xmm4
- movdqu xmm4, [esi ] ; xmm4 = p0..p16
- punpcklbw xmm4, xmm0 ; xmm4 = p0..p8
- pmullw xmm4, xmm2 ; xmm4 *= kernel 1 modifiers.
- paddsw xmm3, xmm4 ; xmm3 += xmm4
- movdqu xmm4, [esi +edx] ; xmm4 = p0..p16
- punpcklbw xmm4, xmm0 ; xmm4 = p0..p8
- pmullw xmm4, xmm6 ; xmm4 *= kernel 2 modifiers.
- paddsw xmm3, xmm4 ; xmm3 += xmm4
- paddsw xmm3, rd ; xmm3 += round value
- psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
- packuswb xmm3, xmm0 ; pack and unpack to saturate
- movdq2q mm0, xmm3
- movq [edi],mm0 ; store the results in the destination
- // the subsequent iterations repeat 3 out of 4 of these reads. Since the
- // recon block should be in cache this shouldn't cost much. Its obviously
- // avoidable!!!.
- add edi,eax;
- dec ecx ; decrement count
- jnz nextrow ; next row
- }
- }
- _inline
- void FilterBlock1d_hb8_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
- {
- __asm
- {
- mov edi, Filter
- movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
- movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers.
- mov edi,OutputPtr
- mov esi,SrcPtr
- mov ecx, DWORD PTR OutputHeight
- mov eax, OutputWidth ; destination pitch?
- pxor xmm0, xmm0 ; xmm0 = 00000000
- nextrow:
- movdqu xmm3, [esi] ; xmm3 = p-1..p14
- movdqu xmm5, xmm3 ; xmm4 = p-1..p14
- punpcklbw xmm3, xmm0 ; xmm3 = p-1..p6
- pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
- psrldq xmm5, 1 ; xmm4 = p0..p13
- punpcklbw xmm5, xmm0 ; xmm5 = p0..p7
- pmullw xmm5, xmm2 ; xmm5 *= kernel 1 modifiers
- paddw xmm3, xmm5 ; xmm3 += xmm5
- paddw xmm3, rd ; xmm3 += round value
- psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
- packuswb xmm3, xmm0 ; pack and unpack to saturate
- movdq2q mm0, xmm3
- movq [edi],mm0 ; store the results in the destination
- add esi,SrcPixelsPerLine ; next line
- add edi,eax;
- dec ecx ; decrement count
- jnz nextrow ; next row
- }
- }
- _inline
- void FilterBlock1d_vb8_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
- {
- __asm
- {
- mov edi, Filter
- movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
- movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers.
- mov edx, PixelsPerLine
- mov edi, OutputPtr
- mov esi, SrcPtr
- mov ecx, DWORD PTR OutputHeight
- mov eax, OutputWidth ; destination pitch?
- pxor xmm0, xmm0 ; xmm0 = 00000000
- nextrow:
- movdqu xmm3, [esi] ; xmm3 = p0..p16
- punpcklbw xmm3, xmm0 ; xmm3 = p0..p8
- pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
- movdqu xmm4, [esi +edx ] ; xmm4 = p0..p16
- punpcklbw xmm4, xmm0 ; xmm4 = p0..p8
- pmullw xmm4, xmm2 ; xmm4 *= kernel 1 modifiers.
- paddw xmm3, xmm4 ; xmm3 += xmm4
- paddw xmm3, rd ; xmm3 += round value
- psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
- packuswb xmm3, xmm0 ; pack and unpack to saturate
- movdq2q mm0, xmm3
- movq [edi],mm0 ; store the results in the destination
- // the subsequent iterations repeat 3 out of 4 of these reads. Since the
- // recon block should be in cache this shouldn't cost much. Its obviously
- // avoidable!!!.
- add esi,edx
- add edi,eax
- dec ecx ; decrement count
- jnz nextrow ; next row
- }
- }
- /****************************************************************************
- *
- * ROUTINE : FilterBlock2dBil
- *
- * INPUTS : Pointer to source data
- *
- * OUTPUTS : Filtered data
- *
- * RETURNS : None.
- *
- * FUNCTION : Applies a bilinear filter on the intput data to produce
- * a predictor block (UINT16)
- *
- * SPECIAL NOTES :
- *
- * ERRORS : None.
- *
- ****************************************************************************/
- _inline
- void FilterBlock2dBil_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter )
- {
- __asm
- {
- mov eax, HFilter ;
- mov edi, OutputPtr ;
- mov esi, SrcPtr ;
- lea ecx, [edi+64] ;
- mov edx, SrcPixelsPerLine ;
-
- movdqa xmm1, [eax] ;
- movdqa xmm2, [eax+16] ;
-
- mov eax, VFilter ;
- pxor xmm0, xmm0 ;
- // get the first horizontal line done ;
- movdqu xmm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
- movdqa xmm4, xmm3 ; make a copy of current line
-
- punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
- psrldq xmm4, 1 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx
-
- pmullw xmm3, xmm1 ;
- punpcklbw xmm4, xmm0 ; 00 01 02 03 04 05 06 07
- pmullw xmm4, xmm2 ;
- paddw xmm3, xmm4 ;
- paddw xmm3, rd ;
- psraw xmm3, FILTER_SHIFT ; ready for output
-
- movdqa xmm5, xmm3 ;
- add esi, edx ; next line
- NextRow:
- pmullw xmm5, [eax] ;
- movdqu xmm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
- movdqa xmm4, xmm3 ; make a copy of current line
- punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
- psrldq xmm4, 1 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx
- pmullw xmm3, xmm1 ;
- punpcklbw xmm4, xmm0 ; 00 01 02 03 04 05 06 07
- movdqa xmm6, xmm5 ;
- pmullw xmm4, xmm2 ;
- paddw xmm3, xmm4 ;
- paddw xmm3, rd ;
- psraw xmm3, FILTER_SHIFT ; ready for output
- movdqa xmm5, xmm3 ; make a copy for the next row
-
- pmullw xmm3, [eax+16] ;
- paddw xmm6, xmm3 ;
-
- paddw xmm6, rd ; xmm6 += round value
- psraw xmm6, FILTER_SHIFT ; xmm6 /= 128
- packuswb xmm6, xmm0 ; pack and unpack to saturate
- movdq2q mm0, xmm6
- movq [edi], mm0 ; store the results in the destination
- add esi, edx ; next line
- add edi, 8 ;
- cmp edi, ecx ;
- jne NextRow
- }
- // First filter 1d Horizontal
- //FilterBlock1d_hb8_wmt(SrcPtr, Intermediate, SrcPixelsPerLine, 1, 9, 8, HFilter );
- // Now filter Verticaly
- //FilterBlock1d_vb8_wmt(Intermediate, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter);
- }
- _inline
- void FilterUnpackBlock2dBil_wmt( UINT8 *SrcPtr, INT16 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter )
- {
- __asm
- {
- mov eax, HFilter ;
- mov edi, OutputPtr ;
- mov esi, SrcPtr ;
- lea ecx, [edi+128] ;
- mov edx, SrcPixelsPerLine ;
-
- movdqa xmm1, [eax] ;
- movdqa xmm2, [eax+16] ;
-
- mov eax, VFilter ;
- pxor xmm0, xmm0 ;
- // get the first horizontal line done ;
- movdqu xmm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
- movdqa xmm4, xmm3 ; make a copy of current line
-
- punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
- psrldq xmm4, 1 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx
-
- pmullw xmm3, xmm1 ;
- punpcklbw xmm4, xmm0 ; 00 01 02 03 04 05 06 07
- pmullw xmm4, xmm2 ;
- paddw xmm3, xmm4 ;
- paddw xmm3, rd ;
- psraw xmm3, FILTER_SHIFT ; ready for output
-
- movdqa xmm5, xmm3 ;
- add esi, edx ; next line
- NextRow:
- pmullw xmm5, [eax] ;
- movdqu xmm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
- movdqa xmm4, xmm3 ; make a copy of current line
- punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06
- psrldq xmm4, 1 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx
- pmullw xmm3, xmm1 ;
- punpcklbw xmm4, xmm0 ; 00 01 02 03 04 05 06 07
- movdqa xmm6, xmm5 ;
- pmullw xmm4, xmm2 ;
- paddw xmm3, xmm4 ;
- paddw xmm3, rd ;
- psraw xmm3, FILTER_SHIFT ; ready for output
- movdqa xmm5, xmm3 ; make a copy for the next row
-
- pmullw xmm3, [eax+16] ;
- paddw xmm6, xmm3 ;
-
- paddw xmm6, rd ; xmm6 += round value
- psraw xmm6, FILTER_SHIFT ; xmm6 /= 128
- movdqu [edi], xmm6;
-
- /*
- packuswb xmm6, xmm0 ; pack and unpack to saturate
- movdq2q mm0, xmm6
- movq [edi], mm0 ; store the results in the destination
- */
- add esi, edx ; next line
- add edi, 16 ;
- cmp edi, ecx ;
- jne NextRow
- }
- // First filter 1d Horizontal
- //FilterBlock1d_hb8_wmt(SrcPtr, Intermediate, SrcPixelsPerLine, 1, 9, 8, HFilter );
- // Now filter Verticaly
- //FilterBlock1d_vb8_wmt(Intermediate, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter);
- }
- _inline
- void FilterUnpackBlock1d_hb8_wmt( UINT8 *SrcPtr, INT16 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
- {
- __asm
- {
- mov edi, Filter
- movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
- movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers.
- mov edi,OutputPtr
- mov esi,SrcPtr
- mov ecx, DWORD PTR OutputHeight
- mov eax, OutputWidth ; destination pitch?
- pxor xmm0, xmm0 ; xmm0 = 00000000
- nextrow:
- movdqu xmm3, [esi] ; xmm3 = p-1..p14
- movdqu xmm5, xmm3 ; xmm4 = p-1..p14
- punpcklbw xmm3, xmm0 ; xmm3 = p-1..p6
- pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
- psrldq xmm5, 1 ; xmm4 = p0..p13
- punpcklbw xmm5, xmm0 ; xmm5 = p0..p7
- pmullw xmm5, xmm2 ; xmm5 *= kernel 1 modifiers
- paddw xmm3, xmm5 ; xmm3 += xmm5
- paddw xmm3, rd ; xmm3 += round value
- psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
-
- /*
- packuswb xmm3, xmm0 ; pack and unpack to saturate
- movdq2q mm0, xmm3
- */
- movdqu [edi],xmm3 ; store the results in the destination
- add esi,SrcPixelsPerLine ; next line
- add edi,eax;
- dec ecx ; decrement count
- jnz nextrow ; next row
- }
- }
- _inline
- void FilterUnpackBlock1d_vb8_wmt( UINT8 *SrcPtr, INT16 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter )
- {
- __asm
- {
- mov edi, Filter
- movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers.
- movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers.
- mov edx, PixelsPerLine
- mov edi, OutputPtr
- mov esi, SrcPtr
- mov ecx, DWORD PTR OutputHeight
- mov eax, OutputWidth ; destination pitch?
- pxor xmm0, xmm0 ; xmm0 = 00000000
- nextrow:
- movdqu xmm3, [esi] ; xmm3 = p0..p16
- punpcklbw xmm3, xmm0 ; xmm3 = p0..p8
- pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers.
- movdqu xmm4, [esi +edx ] ; xmm4 = p0..p16
- punpcklbw xmm4, xmm0 ; xmm4 = p0..p8
- pmullw xmm4, xmm2 ; xmm4 *= kernel 1 modifiers.
- paddw xmm3, xmm4 ; xmm3 += xmm4
- paddw xmm3, rd ; xmm3 += round value
- psraw xmm3, FILTER_SHIFT ; xmm3 /= 128
-
- /*packuswb xmm3, xmm0 ; pack and unpack to saturate
- movdq2q mm0, xmm3
- */
- movdqu [edi],xmm3 ; store the results in the destination
- // the subsequent iterations repeat 3 out of 4 of these reads. Since the
- // recon block should be in cache this shouldn't cost much. Its obviously
- // avoidable!!!.
- add esi,edx
- add edi,eax
- dec ecx ; decrement count
- jnz nextrow ; next row
- }
- }
-
- /****************************************************************************
- *
- * ROUTINE : FilterBlockBil_8
- *
- * INPUTS : ReconPtr1, ReconPtr12
- * Two pointers into the block of data to be filtered
- * These pointers bound the fractional pel position
- * PixelsPerLine
- * Pixels per line in the buffer pointed to by ReconPtr1 & ReconPtr12
- * Modx, ModY
- * The fractional pel bits used to select a filter.
- *
- *
- * OUTPUTS : ReconRefPtr
- * A pointer to an 8x8 buffer into which UINT8 filtered data is written.
- *
- * RETURNS : None.
- *
- * FUNCTION : Produces a bilinear filtered fractional pel prediction block
- * with UINT8 output
- *
- * SPECIAL NOTES :
- *
- * ERRORS : None.
- *
- ****************************************************************************/
- void FilterBlockBil_8_wmt( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY )
- {
- int diff;
- // swap pointers so ReconPtr1 smaller (above, left, above-right or above-left )
- diff=ReconPtr2-ReconPtr1;
- // The ModX and ModY arguments are the bottom three bits of the signed motion vector components (at 1/8th pel precision).
- // This works out to be what we want... despite the pointer swapping that goes on below.
- // For example... if the X component of the vector is a +ve ModX = X%8.
- // if the X component of the vector is a -ve ModX = 8+(X%8) where X%8 is in the range -7 to -1.
- if(diff<0)
- { // swap pointers so ReconPtr1 smaller
- UINT8 *temp=ReconPtr1;
- ReconPtr1=ReconPtr2;
- ReconPtr2=temp;
- diff= (int)(ReconPtr2-ReconPtr1);
- }
- if( diff==1 )
- {
- FilterBlock1d_hb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, 1, 8, 8, BilinearFilters_wmt[ModX] );
- }
- else if (diff == (int)(PixelsPerLine) ) // Fractional pixel in vertical only
- {
- FilterBlock1d_vb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, PixelsPerLine, 8, 8, BilinearFilters_wmt[ModY]);
- }
- else if(diff == (int)(PixelsPerLine - 1)) // ReconPtr1 is Top right
- {
- FilterBlock2dBil_wmt( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
- //FilterBlock2dBil_8_wmt( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
- }
- else if(diff == (int)(PixelsPerLine + 1) ) // ReconPtr1 is Top left
- {
- FilterBlock2dBil_wmt( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
- //FilterBlock2dBil_8_wmt( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
- }
- }
- _inline void UnpackBlock_wmt( UINT8 *SrcPtr, UINT16 *OutputPtr, UINT32 SrcPixelsPerLine )
- {
- __asm
- {
- mov edi,OutputPtr
- mov esi,SrcPtr
- mov ecx, 8
- mov eax, 16 ; destination pitch?
- pxor xmm0, xmm0 ; xmm0 = 00000000
- nextrow:
- movdqu xmm3, [esi] ; xmm3 = p-1..p14
- punpcklbw xmm3, xmm0 ; xmm3 = p-1..p6
- movdqu [edi],xmm3 ; store the results in the destination
- add esi,SrcPixelsPerLine ; next line
- add edi,eax;
- dec ecx ; decrement count
- jnz nextrow ; next row
- }
- }
- /****************************************************************************
- *
- * ROUTINE : FilterBlock2d
- *
- * INPUTS : Pointer to source data
- *
- * OUTPUTS : Filtered data
- *
- * RETURNS : None.
- *
- * FUNCTION : Applies a 2d 4 tap filter on the intput data to produce
- * a predictor block (UINT16)
- *
- * SPECIAL NOTES :
- *
- * ERRORS : None.
- *
- ****************************************************************************/
- void FilterBlock2d_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter )
- {
- UINT8 Intermediate[256];
- // First filter 1d Horizontal
- FilterBlock1d_h_wmt(SrcPtr-SrcPixelsPerLine, Intermediate, SrcPixelsPerLine, 1, 11, 8, HFilter );
- // Now filter Verticaly
- FilterBlock1d_v_wmt(Intermediate+BLOCK_HEIGHT_WIDTH, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter);
- }
-
- /****************************************************************************
- *
- * ROUTINE : FilterBlock
- *
- * INPUTS : ReconPtr1, ReconPtr12
- * Two pointers into the block of data to be filtered
- * These pointers bound the fractional pel position
- * PixelsPerLine
- * Pixels per line in the buffer pointed to by ReconPtr1 & ReconPtr12
- * Modx, ModY
- * The fractional pel bits used to select a filter.
- * UseBicubic
- * Whether to use the bicubuc filter set or the bilinear set
- *
- *
- * OUTPUTS : ReconRefPtr
- * A pointer to an 8x8 buffer into which the filtered data is written.
- *
- * RETURNS : None.
- *
- * FUNCTION : Produces a filtered fractional pel prediction block
- * using bilinear or bicubic filters
- *
- * SPECIAL NOTES :
- *
- * ERRORS : None.
- *
- ****************************************************************************/
- void FilterBlock_wmt( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha )
- {
- int diff;
- UINT8 Intermediate[256];
- // swap pointers so ReconPtr1 smaller (above, left, above-right or above-left )
- diff=ReconPtr2-ReconPtr1;
- // The ModX and ModY arguments are the bottom three bits of the signed motion vector components (at 1/8th pel precision).
- // This works out to be what we want... despite the pointer swapping that goes on below.
- // For example... if the X component of the vector is a +ve ModX = X%8.
- // if the X component of the vector is a -ve ModX = 8+(X%8) where X%8 is in the range -7 to -1.
- if(diff<0)
- { // swap pointers so ReconPtr1 smaller
- UINT8 *temp=ReconPtr1;
- ReconPtr1=ReconPtr2;
- ReconPtr2=temp;
- diff= (int)(ReconPtr2-ReconPtr1);
- }
- if(!diff)
- {
- return;
- }
- if(UseBicubic)
- {
- if( diff==1 )
- { // Fractional pixel in horizontal only
- FilterBlock1d_h_wmt(ReconPtr1, Intermediate, PixelsPerLine, 1, 8, 8, BicubicFilters_mmx[BicubicAlpha][ModX] );
- }
- else if (diff == (int)(PixelsPerLine) ) // Fractional pixel in vertical only
- {
- FilterBlock1d_v_wmt(ReconPtr1, Intermediate, PixelsPerLine, PixelsPerLine, 8, 8, BicubicFilters_mmx[BicubicAlpha][ModY]);
- }
- else if(diff == (int)(PixelsPerLine - 1)) // ReconPtr1 is Top right
- {
- FilterBlock2d_wmt( ReconPtr1-1, Intermediate, PixelsPerLine, BicubicFilters_mmx[BicubicAlpha][ModX], BicubicFilters_mmx[BicubicAlpha][ModY] );
- }
- else if(diff == (int)(PixelsPerLine + 1) ) // ReconPtr1 is Top left
- {
- FilterBlock2d_wmt( ReconPtr1, Intermediate, PixelsPerLine, BicubicFilters_mmx[BicubicAlpha][ModX], BicubicFilters_mmx[BicubicAlpha][ModY] );
- }
- UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
- }
- else
- {
-
- if( diff==1 )
- {
- FilterUnpackBlock1d_hb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, 1, 8, 16, BilinearFilters_wmt[ModX] );
-
- // Fractional pixel in horizontal only
- /*
- FilterBlock1d_hb8_wmt(ReconPtr1, Intermediate, PixelsPerLine, 1, 8, 8, BilinearFilters_wmt[ModX] );
- UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
- */
-
- }
- else if (diff == (int)(PixelsPerLine) ) // Fractional pixel in vertical only
- {
- FilterUnpackBlock1d_vb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, PixelsPerLine, 8, 16, BilinearFilters_wmt[ModY]);
- /*
- FilterBlock1d_vb8_wmt(ReconPtr1, Intermediate, PixelsPerLine, PixelsPerLine, 8, 8, BilinearFilters_wmt[ModY]);
- UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
- */
- }
- else if(diff == (int)(PixelsPerLine - 1)) // ReconPtr1 is Top right
- {
- FilterUnpackBlock2dBil_wmt( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
- /*
- FilterBlock2dBil_wmt( ReconPtr1-1, Intermediate, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
- UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
- */
- }
- else if(diff == (int)(PixelsPerLine + 1) ) // ReconPtr1 is Top left
- {
- FilterUnpackBlock2dBil_wmt( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
- /*
- FilterBlock2dBil_wmt( ReconPtr1, Intermediate, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] );
- UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 );
- */
- }
- }
- }
|