123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281 |
- /****************************************************************************
- *
- * Module Title : WmtOptFunctions.c
- *
- * Description : willamette processor specific
- * optimised versions of functions
- *
- * AUTHOR : Yaowu Xu
- *
- * Special Note:
- *
- *****************************************************************************
- * Revision History
- *
- *
- * 1.03 YWX 07-Dec-00 Removed constants and functions that are not in use
- * Added push and pop ebx in WmtReconIntra
- * 1.02 YWX 30 Aug 00 changed to be compatible with Microsoft compiler
- * 1.01 YWX 13 JUL 00 New Willamette Optimized Functions
- * 1.00 YWX 14/06/00 Configuration baseline from OptFunctions.c
- *
- *****************************************************************************
- */
-
- /*
- Use Tim's optimized version.
- */
- /****************************************************************************
- * Header Files
- *****************************************************************************
- */
- #define STRICT // Strict type checking.
- #include "reconstruct.h"
- /****************************************************************************
- * Module constants.
- *****************************************************************************
- */
- /****************************************************************************
- * Imports.
- *****************************************************************************
- */
- /****************************************************************************
- * Exported Global Variables
- *****************************************************************************
- */
- /****************************************************************************
- * Exported Functions
- *****************************************************************************
- */
- /****************************************************************************
- * Module Statics
- *****************************************************************************
- */
- _declspec(align(16)) static UINT8 Eight128s[8] = {128,128,128,128,128,128,128,128};
- #pragma warning( disable : 4799 ) // Disable no emms instruction warning!
-
- /****************************************************************************
- * Forward References
- *****************************************************************************
- */
- /****************************************************************************
- *
- * ROUTINE : WmtReconIntra
- *
- * INPUTS : INT16 * idct
- * Pointer to the output from the idct for this block
- *
- * UINT32 stride
- * Line Length in pixels in recon and reference images
- *
- *
- *
- *
- * OUTPUTS : UINT8 * dest
- * The reconstruction buffer
- *
- * RETURNS : None
- *
- * FUNCTION : Reconstructs an intra block - wmt version
- *
- *
- * ERRORS : None.
- *
- ****************************************************************************/
- void WmtReconIntra( INT16 *TmpDataBuffer, UINT8 * dest, UINT16 * idct, UINT32 stride )
- {
- (void)TmpDataBuffer;
- __asm
- {
-
- push ebx
- mov eax,[idct] ; Signed 16 bit inputs
- mov edx,[dest] ; Unsigned 8 bit outputs
- movq xmm0,QWORD PTR [Eight128s] ; Set xmm0 to 0x000000000000008080808080808080
- pxor xmm3, xmm3 ; set xmm3 to 0
- ;
- mov ebx,[stride] ; Line stride in output buffer
- lea ecx,[eax+128] ; Endpoint in input buffer
- loop_label:
- movdqa xmm2,XMMWORD PTR [eax] ; Read the eight inputs
- packsswb xmm2,xmm3 ;
-
- pxor xmm2,xmm0 ; Convert result to unsigned (same as add 128)
- lea eax,[eax + 16] ; Step source buffer
- cmp eax,ecx ; are we done
- movq QWORD PTR [edx],xmm2 ; store results
- lea edx,[edx+ebx] ; Step output buffer
- jc loop_label ; Loop back if we are not done
- pop ebx
- }
- }
- /****************************************************************************
- *
- * ROUTINE : WmtReconInter
- *
- * INPUTS : UINT8 * RefPtr
- * The last frame reference
- *
- * INT16 * ChangePtr
- * Pointer to the change data
- *
- * UINT32 LineStep
- * Line Length in pixels in recon and ref images
- *
- * OUTPUTS : UINT8 * ReconPtr
- * The reconstruction
- *
- * RETURNS : None
- *
- * FUNCTION : Reconstructs data from last data and change
- *
- * SPECIAL NOTES :
- *
- *
- * ERRORS : None.
- *
- ****************************************************************************/
- void WmtReconInter( INT16 *TmpDataBuffer, UINT8 * ReconPtr, UINT8 * RefPtr, INT16 * ChangePtr, UINT32 LineStep )
- {
- (void) TmpDataBuffer;
- _asm {
- push edi
-
- mov ebx, [RefPtr]
- mov ecx, [ChangePtr]
- mov eax, [ReconPtr]
- mov edx, [LineStep]
- pxor xmm0, xmm0
- lea edi, [ecx + 128]
- L:
- movq xmm2, QWORD ptr [ebx] ; (+3 misaligned) 8 reference pixels
- movdqa xmm4, XMMWORD ptr [ecx] ; 8 changes
-
- punpcklbw xmm2, xmm0 ;
- add ebx, edx ; next row of reference pixels
- paddsw xmm2, xmm4 ; add in first 4 changes
- lea ecx, [ecx + 16] ; next row of changes
- packuswb xmm2, xmm0 ; pack result to unsigned 8-bit values
- cmp ecx, edi ; are we done?
- movq QWORD PTR [eax], xmm2 ; store result
- lea eax, [eax+edx] ; next row of output
- jc L ; 12c / 8 elts = 18c / 8 pixels = 2.25 c/pix
- pop edi
- }
- }
- /****************************************************************************
- *
- * ROUTINE : WmtReconInterHalfPixel2
- *
- * INPUTS : UINT8 * RefPtr1, RefPtr2
- * The last frame reference
- *
- * INT16 * ChangePtr
- * Pointer to the change data
- *
- * UINT32 LineStep
- * Line Length in pixels in recon and ref images
- *
- *
- * OUTPUTS : UINT8 * ReconPtr
- * The reconstruction
- *
- * RETURNS : None
- *
- * FUNCTION : Reconstructs data from half pixel reference data and change.
- * Half pixel data interpolated from 2 references.
- *
- * SPECIAL NOTES :
- *
- *
- * ERRORS : None.
- *
- ****************************************************************************/
- void WmtReconInterHalfPixel2( INT16 *TmpDataBuffer, UINT8 * ReconPtr,
- UINT8 * RefPtr1, UINT8 * RefPtr2,
- INT16 * ChangePtr, UINT32 LineStep )
- {
- (void)TmpDataBuffer;
- _asm {
- push esi
- push edi
- mov ecx, [ChangePtr]
- mov esi, [RefPtr1]
- mov edi, [RefPtr2]
- mov ebx, [ReconPtr]
-
- mov edx, [LineStep]
- lea eax, [ecx+128]
- pxor xmm0, xmm0
- L:
-
- movq xmm2, QWORD PTR [esi] ; (+3 misaligned) mm2 = row from ref1
- movq xmm4, QWORD PTR [edi] ; (+3 misaligned) mm4 = row from ref2
- punpcklbw xmm2, xmm0 ;
- punpcklbw xmm4, xmm0 ;
- movdqa xmm6, [ecx] ; mm6 = first 4 changes
- paddw xmm2, xmm4 ; mm2 = start (ref1 + ref2)
- psrlw xmm2, 1 ; mm2 = start (ref1 + ref2)/2
- paddw xmm2, xmm6 ; add changes to start
- lea ecx, [ecx+16] ; next row idct
- packuswb xmm2, xmm0 ; pack start|end to unsigned 8-bit
-
- add esi, edx ; next row ref1
- add edi, edx ; next row ref2
-
- cmp ecx, eax
- movq QWORD PTR [ebx], xmm2 ; store result
- ;
- lea ebx, [ebx+edx]
- jc L
- pop edi
- pop esi
- }
- }
|