123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257 |
- /****************************************************************************
- *
- * Module Title : PreProcOptFunctions.c
- *
- * Description : MMX or otherwise processor specific
- * optimised versions of pre-processor functions
- *
- * AUTHOR : Paul Wilkins
- *
- *****************************************************************************
- * Revision History
- *
- * 1.04 YWX 30-Nov-00 Added support for WMT cpu
- * 1.03 PGW 24 Jul 00 Added Column SAD function.
- * 1.02 YX 06/04/00 Optimized get row sad for xmm
- * 1.01 PGW 12/07/99 Changes to reduce uneccessary dependancies.
- * 1.00 PGW 14/06/99 Configuration baseline
- *
- *****************************************************************************
- */
- /****************************************************************************
- * Header Files
- *****************************************************************************
- */
- #define STRICT /* Strict type checking. */
- #include "preproc.h"
- #include "cpuidlib.h"
- #pragma warning( disable : 4799 ) // Disable no emms instruction warning!
- /****************************************************************************
- * Module constants.
- *****************************************************************************
- */
- /****************************************************************************
- * Imports.
- *****************************************************************************
- */
-
- /****************************************************************************
- * Exported Global Variables
- *****************************************************************************
- */
- /****************************************************************************
- * Exported Functions
- *****************************************************************************
- */
- /****************************************************************************
- * Module Statics
- *****************************************************************************
- */
- /****************************************************************************
- * Forward References
- *****************************************************************************
- */
- UINT32 MmxRowSAD( UINT8 * Src1, UINT8 * Src2 );
- extern UINT32 XmmRowSAD( UINT8 * Src1, UINT8 * Src2 );
- /****************************************************************************
- *
- * ROUTINE : MachineSpecificConfig
- *
- * INPUTS : None
- *
- * OUTPUTS : None
- *
- * RETURNS : None
- *
- * FUNCTION : Checks for machine specifc features such as MMX support
- * sets approipriate flags and function pointers.
- *
- * SPECIAL NOTES : None.
- *
- *
- * ERRORS : None.
- *
- ****************************************************************************/
- #define MMX_ENABLED 1
- void MachineSpecificConfig(PP_INSTANCE *ppi)
- {
- UINT32 FeatureFlags = 0;
- BOOL CPUID_Supported = TRUE; // Is the CPUID instruction supported
- BOOL TestMmx = TRUE;
-
- PROCTYPE CPUType = findCPUId();
- switch(CPUType)
- {
- case X86 :
- case PPRO :
- case C6X86 :
- case C6X86MX:
- case AMDK5 :
- case MACG3 :
- case MAC68K :
- ppi->MmxEnabled = FALSE;
- ppi->XmmEnabled = FALSE;
- break;
- case PII :
- case AMDK63D:
- case AMDK6 :
- case PMMX :
- ppi->MmxEnabled = TRUE;
- ppi->XmmEnabled = FALSE;
- break;
- case XMM :
- case WMT :
- ppi->MmxEnabled = TRUE;
- ppi->XmmEnabled = TRUE;
- break;
- }
-
- //To test We force the cpu type here
- //ppi->MmxEnabled = FALSE;
- //ppi->XmmEnabled = FALSE;
- // If MMX supported then set to use MMX versions of functions else
- // use original 'C' versions.
- if (ppi->XmmEnabled)
- {
- ppi->RowSAD=XmmRowSAD;
- ppi->ColSAD = ScalarColSAD;
- }
- else if ( ppi->MmxEnabled )
- {
- ppi->RowSAD = MmxRowSAD;
- ppi->ColSAD = ScalarColSAD;
- }
- else
- {
- ppi->RowSAD = ScalarRowSAD;
- ppi->ColSAD = ScalarColSAD;
- }
- }
- /****************************************************************************
- *
- * ROUTINE : MmxRowSAD
- *
- * INPUTS : UINT8 * NewDataPtr (New Data)
- * UINT8 * RefDataPtr
- *
- * OUTPUTS :
- *
- * RETURNS : Highest of two S.A.D. values.
- *
- *
- * FUNCTION : Calculates the sum of the absolute differences for two groups of
- * four pixels and returns the larger of the two.
- *
- * SPECIAL NOTES : None.
- *
- *
- * ERRORS : None.
- *
- ****************************************************************************/
- UINT32 MmxRowSAD( UINT8 * NewDataPtr, UINT8 * RefDataPtr )
- {
- UINT32 SadValue;
- UINT32 SadValue1;
- UINT32 AbsValues[2];
- // MMX code for calculating absolute difference values
- __asm
- {
- pxor mm6, mm6 ; Blank mmx6
- pxor mm7, mm7 ; Blank mmx6
- mov eax,dword ptr [NewDataPtr] ; Load base addresses
- mov ebx,dword ptr [RefDataPtr]
- // Calculate eight ABS difference values.
- movq mm0, [eax] ; Copy eight bytes to mm0
- movq mm1, [ebx] ; Copy eight bytes to mm1
- movq mm2, mm0 ; Take copy of MM0
- psubusb mm0, mm1 ; A-B to MM0
- psubusb mm1, mm2 ; B-A to MM1
- por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
- movq mm1, mm0 ; keep a copy
- // Sum together the low four bytes and the high four bytes
- punpcklbw mm0, mm6 ; unpack low four bytes to higher precision
- punpckhbw mm1, mm7 ; unpack high four bytes to higher precision
- movq mm2, mm0 ; take a copy
- movq mm3, mm1 ; take a copy
- punpcklwd mm0, mm6 ; unpack low two words to higher precision
- punpcklwd mm1, mm7 ; unpack low two words to higher precision
- punpckhwd mm2, mm6 ; unpack high low two words to higher precision
- punpckhwd mm3, mm7 ; unpack high low two words to higher precision
-
- paddd mm0, mm2 ; Accumulate intermediate results
- paddd mm1, mm3 ; Accumulate intermediate results
- movq mm2, mm0 ; take a copy
- movq mm3, mm1 ; take a copy
- punpckhdq mm0, mm6 ; Unpack and accumulate again
- punpckhdq mm1, mm7 ; Unpack and accumulate again
- punpckldq mm2, mm6
- punpckldq mm3, mm7
- paddd mm0, mm2 ; Accumulate final result
- paddd mm1, mm3 ; Accumulate final result
- // Interleave the two SAD results
- punpckldq mm0, mm1
- // Write back the abs values
- movq dword ptr [AbsValues], mm0
- }
-
- SadValue = AbsValues[0];
- SadValue1 = AbsValues[1];
- SadValue = (SadValue > SadValue1) ? SadValue : SadValue1;
- return SadValue;
- }
- /****************************************************************************
- *
- * ROUTINE : ClearMmxState()
- *
- *
- * INPUTS : None
- *
- * OUTPUTS :
- *
- * RETURNS :
- *
- *
- * FUNCTION : Clears down the MMX state
- *
- * SPECIAL NOTES : None.
- *
- *
- * ERRORS : None.
- *
- ****************************************************************************/
- void ClearMmxState(PP_INSTANCE *ppi)
- {
- if ( ppi->MmxEnabled )
- {
- __asm
- {
- emms ; Clear the MMX state.
- }
- }
- }
|