/**************************************************************************** * * Module Title : newLoopTest_asm.c * * Description : Codec specific functions * * AUTHOR : Yaowu Xu * ***************************************************************************** * Revision History * * 1.02 YWX 03-Nov-00 Changed confusing variable name * 1.01 YWX 02-Nov-00 Added the set of functions * 1.00 YWX 19-Oct-00 configuration baseline ***************************************************************************** */ /**************************************************************************** * Header Frames ***************************************************************************** */ #define STRICT /* Strict type checking. */ #include "codec_common.h" #include /**************************************************************************** * Module constants. ***************************************************************************** */ #define MIN(a, b) (((a) < (b)) ? (a) : (b)) #define FILTER_WEIGHT 128 #define FILTER_SHIFT 7 __declspec(align(16)) short rd[]={64,64,64,64,64,64,64,64}; __declspec(align(16)) INT16 BilinearFilters_wmt[8][16] = { { 128,128,128,128,128,128,128,128, 0, 0, 0, 0, 0, 0, 0, 0 }, { 112,112,112,112,112,112,112,112, 16, 16, 16, 16, 16, 16, 16, 16 }, { 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 }, { 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 }, { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 }, { 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 }, { 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 }, { 16, 16, 16, 16, 16, 16, 16, 16, 112,112,112,112,112,112,112,112 } }; extern __declspec(align(16)) INT16 BicubicFilters_mmx[17][8][32]; _inline void FilterBlock1d_h_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter ) { __asm { mov edi, Filter movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers. movdqa xmm2, [edi+ 16] ; xmm3 *= kernel 0 modifiers. movdqa xmm6, [edi + 32] ; xmm3 *= kernel 0 modifiers. movdqa xmm7, [edi + 48] ; xmm3 *= kernel 0 modifiers. mov edi,OutputPtr mov esi,SrcPtr dec esi mov ecx, DWORD PTR OutputHeight mov eax, OutputWidth ; destination pitch? pxor xmm0, xmm0 ; xmm0 = 00000000 nextrow: // kernel 0 and 3 are potentially negative taps. These negative tap filters // must be done first or we could have problems saturating our high value // tap filters movdqu xmm3, [esi] ; xmm3 = p-1..p14 movdqu xmm4, xmm3 ; xmm4 = p-1..p14 punpcklbw xmm3, xmm0 ; xmm3 = p-1..p6 pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers. psrldq xmm4, 3 ; xmm4 = p2..p13 movdqa xmm5, xmm4 ; xmm5 = p2..p13 punpcklbw xmm5, xmm0 ; xmm5 = p2..p7 pmullw xmm5, xmm7 ; xmm5 *= kernel 3 modifiers paddsw xmm3, xmm5 ; xmm3 += xmm5 movdqu xmm4, [esi+1] ; xmm4 = p0..p13 movdqa xmm5, xmm4 ; xmm5 = p0..p13 punpcklbw xmm5, xmm0 ; xmm5 = p0..p7 pmullw xmm5, xmm2 ; xmm5 *= kernel 1 modifiers paddsw xmm3, xmm5 ; xmm3 += xmm5 psrldq xmm4, 1 ; xmm4 = p1..p13 movdqa xmm5, xmm4 ; xmm5 = p1..p13 punpcklbw xmm5, xmm0 ; xmm5 = p1..p7 pmullw xmm5, xmm6 ; xmm5 *= kernel 2 modifiers paddsw xmm3, xmm5 ; xmm3 += xmm5 paddsw xmm3, rd ; xmm3 += round value psraw xmm3, FILTER_SHIFT ; xmm3 /= 128 packuswb xmm3, xmm0 ; pack and saturate movdq2q mm0, xmm3 movq [edi],mm0 ; store the results in the destination add esi,SrcPixelsPerLine ; next line add edi,eax; dec ecx ; decrement count jnz nextrow ; next row } } _inline void FilterBlock1d_v_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter ) { __asm { mov edi, Filter movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers. movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers. movdqa xmm6, [edi + 32] ; xmm3 *= kernel 0 modifiers. movdqa xmm7, [edi + 48] ; xmm3 *= kernel 0 modifiers. mov edx, PixelsPerLine mov edi, OutputPtr mov esi, SrcPtr sub esi, PixelsPerLine mov ecx, DWORD PTR OutputHeight mov eax, OutputWidth ; destination pitch? pxor xmm0, xmm0 ; xmm0 = 00000000 nextrow: movdqu xmm3, [esi] ; xmm3 = p0..p16 punpcklbw xmm3, xmm0 ; xmm3 = p0..p8 pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers. add esi, edx ; move source forward 1 line to avoid 3 * pitch movdqu xmm4, [esi+2*edx] ; xmm4 = p0..p16 punpcklbw xmm4, xmm0 ; xmm4 = p0..p8 pmullw xmm4, xmm7 ; xmm4 *= kernel 3 modifiers. paddsw xmm3, xmm4 ; xmm3 += xmm4 movdqu xmm4, [esi ] ; xmm4 = p0..p16 punpcklbw xmm4, xmm0 ; xmm4 = p0..p8 pmullw xmm4, xmm2 ; xmm4 *= kernel 1 modifiers. paddsw xmm3, xmm4 ; xmm3 += xmm4 movdqu xmm4, [esi +edx] ; xmm4 = p0..p16 punpcklbw xmm4, xmm0 ; xmm4 = p0..p8 pmullw xmm4, xmm6 ; xmm4 *= kernel 2 modifiers. paddsw xmm3, xmm4 ; xmm3 += xmm4 paddsw xmm3, rd ; xmm3 += round value psraw xmm3, FILTER_SHIFT ; xmm3 /= 128 packuswb xmm3, xmm0 ; pack and unpack to saturate movdq2q mm0, xmm3 movq [edi],mm0 ; store the results in the destination // the subsequent iterations repeat 3 out of 4 of these reads. Since the // recon block should be in cache this shouldn't cost much. Its obviously // avoidable!!!. add edi,eax; dec ecx ; decrement count jnz nextrow ; next row } } _inline void FilterBlock1d_hb8_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter ) { __asm { mov edi, Filter movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers. movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers. mov edi,OutputPtr mov esi,SrcPtr mov ecx, DWORD PTR OutputHeight mov eax, OutputWidth ; destination pitch? pxor xmm0, xmm0 ; xmm0 = 00000000 nextrow: movdqu xmm3, [esi] ; xmm3 = p-1..p14 movdqu xmm5, xmm3 ; xmm4 = p-1..p14 punpcklbw xmm3, xmm0 ; xmm3 = p-1..p6 pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers. psrldq xmm5, 1 ; xmm4 = p0..p13 punpcklbw xmm5, xmm0 ; xmm5 = p0..p7 pmullw xmm5, xmm2 ; xmm5 *= kernel 1 modifiers paddw xmm3, xmm5 ; xmm3 += xmm5 paddw xmm3, rd ; xmm3 += round value psraw xmm3, FILTER_SHIFT ; xmm3 /= 128 packuswb xmm3, xmm0 ; pack and unpack to saturate movdq2q mm0, xmm3 movq [edi],mm0 ; store the results in the destination add esi,SrcPixelsPerLine ; next line add edi,eax; dec ecx ; decrement count jnz nextrow ; next row } } _inline void FilterBlock1d_vb8_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter ) { __asm { mov edi, Filter movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers. movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers. mov edx, PixelsPerLine mov edi, OutputPtr mov esi, SrcPtr mov ecx, DWORD PTR OutputHeight mov eax, OutputWidth ; destination pitch? pxor xmm0, xmm0 ; xmm0 = 00000000 nextrow: movdqu xmm3, [esi] ; xmm3 = p0..p16 punpcklbw xmm3, xmm0 ; xmm3 = p0..p8 pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers. movdqu xmm4, [esi +edx ] ; xmm4 = p0..p16 punpcklbw xmm4, xmm0 ; xmm4 = p0..p8 pmullw xmm4, xmm2 ; xmm4 *= kernel 1 modifiers. paddw xmm3, xmm4 ; xmm3 += xmm4 paddw xmm3, rd ; xmm3 += round value psraw xmm3, FILTER_SHIFT ; xmm3 /= 128 packuswb xmm3, xmm0 ; pack and unpack to saturate movdq2q mm0, xmm3 movq [edi],mm0 ; store the results in the destination // the subsequent iterations repeat 3 out of 4 of these reads. Since the // recon block should be in cache this shouldn't cost much. Its obviously // avoidable!!!. add esi,edx add edi,eax dec ecx ; decrement count jnz nextrow ; next row } } /**************************************************************************** * * ROUTINE : FilterBlock2dBil * * INPUTS : Pointer to source data * * OUTPUTS : Filtered data * * RETURNS : None. * * FUNCTION : Applies a bilinear filter on the intput data to produce * a predictor block (UINT16) * * SPECIAL NOTES : * * ERRORS : None. * ****************************************************************************/ _inline void FilterBlock2dBil_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter ) { __asm { mov eax, HFilter ; mov edi, OutputPtr ; mov esi, SrcPtr ; lea ecx, [edi+64] ; mov edx, SrcPixelsPerLine ; movdqa xmm1, [eax] ; movdqa xmm2, [eax+16] ; mov eax, VFilter ; pxor xmm0, xmm0 ; // get the first horizontal line done ; movdqu xmm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 movdqa xmm4, xmm3 ; make a copy of current line punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 psrldq xmm4, 1 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx pmullw xmm3, xmm1 ; punpcklbw xmm4, xmm0 ; 00 01 02 03 04 05 06 07 pmullw xmm4, xmm2 ; paddw xmm3, xmm4 ; paddw xmm3, rd ; psraw xmm3, FILTER_SHIFT ; ready for output movdqa xmm5, xmm3 ; add esi, edx ; next line NextRow: pmullw xmm5, [eax] ; movdqu xmm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 movdqa xmm4, xmm3 ; make a copy of current line punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 psrldq xmm4, 1 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx pmullw xmm3, xmm1 ; punpcklbw xmm4, xmm0 ; 00 01 02 03 04 05 06 07 movdqa xmm6, xmm5 ; pmullw xmm4, xmm2 ; paddw xmm3, xmm4 ; paddw xmm3, rd ; psraw xmm3, FILTER_SHIFT ; ready for output movdqa xmm5, xmm3 ; make a copy for the next row pmullw xmm3, [eax+16] ; paddw xmm6, xmm3 ; paddw xmm6, rd ; xmm6 += round value psraw xmm6, FILTER_SHIFT ; xmm6 /= 128 packuswb xmm6, xmm0 ; pack and unpack to saturate movdq2q mm0, xmm6 movq [edi], mm0 ; store the results in the destination add esi, edx ; next line add edi, 8 ; cmp edi, ecx ; jne NextRow } // First filter 1d Horizontal //FilterBlock1d_hb8_wmt(SrcPtr, Intermediate, SrcPixelsPerLine, 1, 9, 8, HFilter ); // Now filter Verticaly //FilterBlock1d_vb8_wmt(Intermediate, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter); } _inline void FilterUnpackBlock2dBil_wmt( UINT8 *SrcPtr, INT16 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter ) { __asm { mov eax, HFilter ; mov edi, OutputPtr ; mov esi, SrcPtr ; lea ecx, [edi+128] ; mov edx, SrcPixelsPerLine ; movdqa xmm1, [eax] ; movdqa xmm2, [eax+16] ; mov eax, VFilter ; pxor xmm0, xmm0 ; // get the first horizontal line done ; movdqu xmm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 movdqa xmm4, xmm3 ; make a copy of current line punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 psrldq xmm4, 1 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx pmullw xmm3, xmm1 ; punpcklbw xmm4, xmm0 ; 00 01 02 03 04 05 06 07 pmullw xmm4, xmm2 ; paddw xmm3, xmm4 ; paddw xmm3, rd ; psraw xmm3, FILTER_SHIFT ; ready for output movdqa xmm5, xmm3 ; add esi, edx ; next line NextRow: pmullw xmm5, [eax] ; movdqu xmm3, [esi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 movdqa xmm4, xmm3 ; make a copy of current line punpcklbw xmm3, xmm0 ; xx 00 01 02 03 04 05 06 psrldq xmm4, 1 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 xx pmullw xmm3, xmm1 ; punpcklbw xmm4, xmm0 ; 00 01 02 03 04 05 06 07 movdqa xmm6, xmm5 ; pmullw xmm4, xmm2 ; paddw xmm3, xmm4 ; paddw xmm3, rd ; psraw xmm3, FILTER_SHIFT ; ready for output movdqa xmm5, xmm3 ; make a copy for the next row pmullw xmm3, [eax+16] ; paddw xmm6, xmm3 ; paddw xmm6, rd ; xmm6 += round value psraw xmm6, FILTER_SHIFT ; xmm6 /= 128 movdqu [edi], xmm6; /* packuswb xmm6, xmm0 ; pack and unpack to saturate movdq2q mm0, xmm6 movq [edi], mm0 ; store the results in the destination */ add esi, edx ; next line add edi, 16 ; cmp edi, ecx ; jne NextRow } // First filter 1d Horizontal //FilterBlock1d_hb8_wmt(SrcPtr, Intermediate, SrcPixelsPerLine, 1, 9, 8, HFilter ); // Now filter Verticaly //FilterBlock1d_vb8_wmt(Intermediate, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter); } _inline void FilterUnpackBlock1d_hb8_wmt( UINT8 *SrcPtr, INT16 *OutputPtr, UINT32 SrcPixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter ) { __asm { mov edi, Filter movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers. movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers. mov edi,OutputPtr mov esi,SrcPtr mov ecx, DWORD PTR OutputHeight mov eax, OutputWidth ; destination pitch? pxor xmm0, xmm0 ; xmm0 = 00000000 nextrow: movdqu xmm3, [esi] ; xmm3 = p-1..p14 movdqu xmm5, xmm3 ; xmm4 = p-1..p14 punpcklbw xmm3, xmm0 ; xmm3 = p-1..p6 pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers. psrldq xmm5, 1 ; xmm4 = p0..p13 punpcklbw xmm5, xmm0 ; xmm5 = p0..p7 pmullw xmm5, xmm2 ; xmm5 *= kernel 1 modifiers paddw xmm3, xmm5 ; xmm3 += xmm5 paddw xmm3, rd ; xmm3 += round value psraw xmm3, FILTER_SHIFT ; xmm3 /= 128 /* packuswb xmm3, xmm0 ; pack and unpack to saturate movdq2q mm0, xmm3 */ movdqu [edi],xmm3 ; store the results in the destination add esi,SrcPixelsPerLine ; next line add edi,eax; dec ecx ; decrement count jnz nextrow ; next row } } _inline void FilterUnpackBlock1d_vb8_wmt( UINT8 *SrcPtr, INT16 *OutputPtr, UINT32 PixelsPerLine, UINT32 PixelStep, UINT32 OutputHeight, UINT32 OutputWidth, INT16 * Filter ) { __asm { mov edi, Filter movdqa xmm1, [edi] ; xmm3 *= kernel 0 modifiers. movdqa xmm2, [edi + 16] ; xmm3 *= kernel 0 modifiers. mov edx, PixelsPerLine mov edi, OutputPtr mov esi, SrcPtr mov ecx, DWORD PTR OutputHeight mov eax, OutputWidth ; destination pitch? pxor xmm0, xmm0 ; xmm0 = 00000000 nextrow: movdqu xmm3, [esi] ; xmm3 = p0..p16 punpcklbw xmm3, xmm0 ; xmm3 = p0..p8 pmullw xmm3, xmm1 ; xmm3 *= kernel 0 modifiers. movdqu xmm4, [esi +edx ] ; xmm4 = p0..p16 punpcklbw xmm4, xmm0 ; xmm4 = p0..p8 pmullw xmm4, xmm2 ; xmm4 *= kernel 1 modifiers. paddw xmm3, xmm4 ; xmm3 += xmm4 paddw xmm3, rd ; xmm3 += round value psraw xmm3, FILTER_SHIFT ; xmm3 /= 128 /*packuswb xmm3, xmm0 ; pack and unpack to saturate movdq2q mm0, xmm3 */ movdqu [edi],xmm3 ; store the results in the destination // the subsequent iterations repeat 3 out of 4 of these reads. Since the // recon block should be in cache this shouldn't cost much. Its obviously // avoidable!!!. add esi,edx add edi,eax dec ecx ; decrement count jnz nextrow ; next row } } /**************************************************************************** * * ROUTINE : FilterBlockBil_8 * * INPUTS : ReconPtr1, ReconPtr12 * Two pointers into the block of data to be filtered * These pointers bound the fractional pel position * PixelsPerLine * Pixels per line in the buffer pointed to by ReconPtr1 & ReconPtr12 * Modx, ModY * The fractional pel bits used to select a filter. * * * OUTPUTS : ReconRefPtr * A pointer to an 8x8 buffer into which UINT8 filtered data is written. * * RETURNS : None. * * FUNCTION : Produces a bilinear filtered fractional pel prediction block * with UINT8 output * * SPECIAL NOTES : * * ERRORS : None. * ****************************************************************************/ void FilterBlockBil_8_wmt( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT8 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY ) { int diff; // swap pointers so ReconPtr1 smaller (above, left, above-right or above-left ) diff=ReconPtr2-ReconPtr1; // The ModX and ModY arguments are the bottom three bits of the signed motion vector components (at 1/8th pel precision). // This works out to be what we want... despite the pointer swapping that goes on below. // For example... if the X component of the vector is a +ve ModX = X%8. // if the X component of the vector is a -ve ModX = 8+(X%8) where X%8 is in the range -7 to -1. if(diff<0) { // swap pointers so ReconPtr1 smaller UINT8 *temp=ReconPtr1; ReconPtr1=ReconPtr2; ReconPtr2=temp; diff= (int)(ReconPtr2-ReconPtr1); } if( diff==1 ) { FilterBlock1d_hb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, 1, 8, 8, BilinearFilters_wmt[ModX] ); } else if (diff == (int)(PixelsPerLine) ) // Fractional pixel in vertical only { FilterBlock1d_vb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, PixelsPerLine, 8, 8, BilinearFilters_wmt[ModY]); } else if(diff == (int)(PixelsPerLine - 1)) // ReconPtr1 is Top right { FilterBlock2dBil_wmt( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] ); //FilterBlock2dBil_8_wmt( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] ); } else if(diff == (int)(PixelsPerLine + 1) ) // ReconPtr1 is Top left { FilterBlock2dBil_wmt( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] ); //FilterBlock2dBil_8_wmt( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] ); } } _inline void UnpackBlock_wmt( UINT8 *SrcPtr, UINT16 *OutputPtr, UINT32 SrcPixelsPerLine ) { __asm { mov edi,OutputPtr mov esi,SrcPtr mov ecx, 8 mov eax, 16 ; destination pitch? pxor xmm0, xmm0 ; xmm0 = 00000000 nextrow: movdqu xmm3, [esi] ; xmm3 = p-1..p14 punpcklbw xmm3, xmm0 ; xmm3 = p-1..p6 movdqu [edi],xmm3 ; store the results in the destination add esi,SrcPixelsPerLine ; next line add edi,eax; dec ecx ; decrement count jnz nextrow ; next row } } /**************************************************************************** * * ROUTINE : FilterBlock2d * * INPUTS : Pointer to source data * * OUTPUTS : Filtered data * * RETURNS : None. * * FUNCTION : Applies a 2d 4 tap filter on the intput data to produce * a predictor block (UINT16) * * SPECIAL NOTES : * * ERRORS : None. * ****************************************************************************/ void FilterBlock2d_wmt( UINT8 *SrcPtr, UINT8 *OutputPtr, UINT32 SrcPixelsPerLine, INT16 * HFilter, INT16 * VFilter ) { UINT8 Intermediate[256]; // First filter 1d Horizontal FilterBlock1d_h_wmt(SrcPtr-SrcPixelsPerLine, Intermediate, SrcPixelsPerLine, 1, 11, 8, HFilter ); // Now filter Verticaly FilterBlock1d_v_wmt(Intermediate+BLOCK_HEIGHT_WIDTH, OutputPtr, BLOCK_HEIGHT_WIDTH, BLOCK_HEIGHT_WIDTH, 8, 8, VFilter); } /**************************************************************************** * * ROUTINE : FilterBlock * * INPUTS : ReconPtr1, ReconPtr12 * Two pointers into the block of data to be filtered * These pointers bound the fractional pel position * PixelsPerLine * Pixels per line in the buffer pointed to by ReconPtr1 & ReconPtr12 * Modx, ModY * The fractional pel bits used to select a filter. * UseBicubic * Whether to use the bicubuc filter set or the bilinear set * * * OUTPUTS : ReconRefPtr * A pointer to an 8x8 buffer into which the filtered data is written. * * RETURNS : None. * * FUNCTION : Produces a filtered fractional pel prediction block * using bilinear or bicubic filters * * SPECIAL NOTES : * * ERRORS : None. * ****************************************************************************/ void FilterBlock_wmt( UINT8 *ReconPtr1, UINT8 *ReconPtr2, UINT16 *ReconRefPtr, UINT32 PixelsPerLine, INT32 ModX, INT32 ModY, BOOL UseBicubic, UINT8 BicubicAlpha ) { int diff; UINT8 Intermediate[256]; // swap pointers so ReconPtr1 smaller (above, left, above-right or above-left ) diff=ReconPtr2-ReconPtr1; // The ModX and ModY arguments are the bottom three bits of the signed motion vector components (at 1/8th pel precision). // This works out to be what we want... despite the pointer swapping that goes on below. // For example... if the X component of the vector is a +ve ModX = X%8. // if the X component of the vector is a -ve ModX = 8+(X%8) where X%8 is in the range -7 to -1. if(diff<0) { // swap pointers so ReconPtr1 smaller UINT8 *temp=ReconPtr1; ReconPtr1=ReconPtr2; ReconPtr2=temp; diff= (int)(ReconPtr2-ReconPtr1); } if(!diff) { return; } if(UseBicubic) { if( diff==1 ) { // Fractional pixel in horizontal only FilterBlock1d_h_wmt(ReconPtr1, Intermediate, PixelsPerLine, 1, 8, 8, BicubicFilters_mmx[BicubicAlpha][ModX] ); } else if (diff == (int)(PixelsPerLine) ) // Fractional pixel in vertical only { FilterBlock1d_v_wmt(ReconPtr1, Intermediate, PixelsPerLine, PixelsPerLine, 8, 8, BicubicFilters_mmx[BicubicAlpha][ModY]); } else if(diff == (int)(PixelsPerLine - 1)) // ReconPtr1 is Top right { FilterBlock2d_wmt( ReconPtr1-1, Intermediate, PixelsPerLine, BicubicFilters_mmx[BicubicAlpha][ModX], BicubicFilters_mmx[BicubicAlpha][ModY] ); } else if(diff == (int)(PixelsPerLine + 1) ) // ReconPtr1 is Top left { FilterBlock2d_wmt( ReconPtr1, Intermediate, PixelsPerLine, BicubicFilters_mmx[BicubicAlpha][ModX], BicubicFilters_mmx[BicubicAlpha][ModY] ); } UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 ); } else { if( diff==1 ) { FilterUnpackBlock1d_hb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, 1, 8, 16, BilinearFilters_wmt[ModX] ); // Fractional pixel in horizontal only /* FilterBlock1d_hb8_wmt(ReconPtr1, Intermediate, PixelsPerLine, 1, 8, 8, BilinearFilters_wmt[ModX] ); UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 ); */ } else if (diff == (int)(PixelsPerLine) ) // Fractional pixel in vertical only { FilterUnpackBlock1d_vb8_wmt(ReconPtr1, ReconRefPtr, PixelsPerLine, PixelsPerLine, 8, 16, BilinearFilters_wmt[ModY]); /* FilterBlock1d_vb8_wmt(ReconPtr1, Intermediate, PixelsPerLine, PixelsPerLine, 8, 8, BilinearFilters_wmt[ModY]); UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 ); */ } else if(diff == (int)(PixelsPerLine - 1)) // ReconPtr1 is Top right { FilterUnpackBlock2dBil_wmt( ReconPtr1-1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] ); /* FilterBlock2dBil_wmt( ReconPtr1-1, Intermediate, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] ); UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 ); */ } else if(diff == (int)(PixelsPerLine + 1) ) // ReconPtr1 is Top left { FilterUnpackBlock2dBil_wmt( ReconPtr1, ReconRefPtr, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] ); /* FilterBlock2dBil_wmt( ReconPtr1, Intermediate, PixelsPerLine, BilinearFilters_wmt[ModX], BilinearFilters_wmt[ModY] ); UnpackBlock_wmt( Intermediate, ReconRefPtr, 8 ); */ } } }