preprocfunctions.c 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. /****************************************************************************
  2. *
  3. * Module Title : PreProcOptFunctions.c
  4. *
  5. * Description : MMX or otherwise processor specific
  6. * optimised versions of pre-processor functions
  7. *
  8. * AUTHOR : Paul Wilkins
  9. *
  10. *****************************************************************************
  11. * Revision History
  12. *
  13. * 1.04 YWX 30-Nov-00 Added support for WMT cpu
  14. * 1.03 PGW 24 Jul 00 Added Column SAD function.
  15. * 1.02 YX 06/04/00 Optimized get row sad for xmm
  16. * 1.01 PGW 12/07/99 Changes to reduce uneccessary dependancies.
  17. * 1.00 PGW 14/06/99 Configuration baseline
  18. *
  19. *****************************************************************************
  20. */
  21. /****************************************************************************
  22. * Header Files
  23. *****************************************************************************
  24. */
  25. #define STRICT /* Strict type checking. */
  26. #include "preproc.h"
  27. #include "cpuidlib.h"
  28. #pragma warning( disable : 4799 ) // Disable no emms instruction warning!
  29. /****************************************************************************
  30. * Module constants.
  31. *****************************************************************************
  32. */
  33. /****************************************************************************
  34. * Imports.
  35. *****************************************************************************
  36. */
  37. /****************************************************************************
  38. * Exported Global Variables
  39. *****************************************************************************
  40. */
  41. /****************************************************************************
  42. * Exported Functions
  43. *****************************************************************************
  44. */
  45. /****************************************************************************
  46. * Module Statics
  47. *****************************************************************************
  48. */
  49. /****************************************************************************
  50. * Forward References
  51. *****************************************************************************
  52. */
  53. UINT32 MmxRowSAD( UINT8 * Src1, UINT8 * Src2 );
  54. extern UINT32 XmmRowSAD( UINT8 * Src1, UINT8 * Src2 );
  55. /****************************************************************************
  56. *
  57. * ROUTINE : MachineSpecificConfig
  58. *
  59. * INPUTS : None
  60. *
  61. * OUTPUTS : None
  62. *
  63. * RETURNS : None
  64. *
  65. * FUNCTION : Checks for machine specifc features such as MMX support
  66. * sets approipriate flags and function pointers.
  67. *
  68. * SPECIAL NOTES : None.
  69. *
  70. *
  71. * ERRORS : None.
  72. *
  73. ****************************************************************************/
  74. #define MMX_ENABLED 1
  75. void MachineSpecificConfig(PP_INSTANCE *ppi)
  76. {
  77. UINT32 FeatureFlags = 0;
  78. BOOL CPUID_Supported = TRUE; // Is the CPUID instruction supported
  79. BOOL TestMmx = TRUE;
  80. PROCTYPE CPUType = findCPUId();
  81. switch(CPUType)
  82. {
  83. case X86 :
  84. case PPRO :
  85. case C6X86 :
  86. case C6X86MX:
  87. case AMDK5 :
  88. case MACG3 :
  89. case MAC68K :
  90. ppi->MmxEnabled = FALSE;
  91. ppi->XmmEnabled = FALSE;
  92. break;
  93. case PII :
  94. case AMDK63D:
  95. case AMDK6 :
  96. case PMMX :
  97. ppi->MmxEnabled = TRUE;
  98. ppi->XmmEnabled = FALSE;
  99. break;
  100. case XMM :
  101. case WMT :
  102. ppi->MmxEnabled = TRUE;
  103. ppi->XmmEnabled = TRUE;
  104. break;
  105. }
  106. //To test We force the cpu type here
  107. //ppi->MmxEnabled = FALSE;
  108. //ppi->XmmEnabled = FALSE;
  109. // If MMX supported then set to use MMX versions of functions else
  110. // use original 'C' versions.
  111. if (ppi->XmmEnabled)
  112. {
  113. ppi->RowSAD=XmmRowSAD;
  114. ppi->ColSAD = ScalarColSAD;
  115. }
  116. else if ( ppi->MmxEnabled )
  117. {
  118. ppi->RowSAD = MmxRowSAD;
  119. ppi->ColSAD = ScalarColSAD;
  120. }
  121. else
  122. {
  123. ppi->RowSAD = ScalarRowSAD;
  124. ppi->ColSAD = ScalarColSAD;
  125. }
  126. }
  127. /****************************************************************************
  128. *
  129. * ROUTINE : MmxRowSAD
  130. *
  131. * INPUTS : UINT8 * NewDataPtr (New Data)
  132. * UINT8 * RefDataPtr
  133. *
  134. * OUTPUTS :
  135. *
  136. * RETURNS : Highest of two S.A.D. values.
  137. *
  138. *
  139. * FUNCTION : Calculates the sum of the absolute differences for two groups of
  140. * four pixels and returns the larger of the two.
  141. *
  142. * SPECIAL NOTES : None.
  143. *
  144. *
  145. * ERRORS : None.
  146. *
  147. ****************************************************************************/
  148. UINT32 MmxRowSAD( UINT8 * NewDataPtr, UINT8 * RefDataPtr )
  149. {
  150. UINT32 SadValue;
  151. UINT32 SadValue1;
  152. UINT32 AbsValues[2];
  153. // MMX code for calculating absolute difference values
  154. __asm
  155. {
  156. pxor mm6, mm6 ; Blank mmx6
  157. pxor mm7, mm7 ; Blank mmx6
  158. mov eax,dword ptr [NewDataPtr] ; Load base addresses
  159. mov ebx,dword ptr [RefDataPtr]
  160. // Calculate eight ABS difference values.
  161. movq mm0, [eax] ; Copy eight bytes to mm0
  162. movq mm1, [ebx] ; Copy eight bytes to mm1
  163. movq mm2, mm0 ; Take copy of MM0
  164. psubusb mm0, mm1 ; A-B to MM0
  165. psubusb mm1, mm2 ; B-A to MM1
  166. por mm0, mm1 ; OR MM0 and MM1 gives abs differences in MM0
  167. movq mm1, mm0 ; keep a copy
  168. // Sum together the low four bytes and the high four bytes
  169. punpcklbw mm0, mm6 ; unpack low four bytes to higher precision
  170. punpckhbw mm1, mm7 ; unpack high four bytes to higher precision
  171. movq mm2, mm0 ; take a copy
  172. movq mm3, mm1 ; take a copy
  173. punpcklwd mm0, mm6 ; unpack low two words to higher precision
  174. punpcklwd mm1, mm7 ; unpack low two words to higher precision
  175. punpckhwd mm2, mm6 ; unpack high low two words to higher precision
  176. punpckhwd mm3, mm7 ; unpack high low two words to higher precision
  177. paddd mm0, mm2 ; Accumulate intermediate results
  178. paddd mm1, mm3 ; Accumulate intermediate results
  179. movq mm2, mm0 ; take a copy
  180. movq mm3, mm1 ; take a copy
  181. punpckhdq mm0, mm6 ; Unpack and accumulate again
  182. punpckhdq mm1, mm7 ; Unpack and accumulate again
  183. punpckldq mm2, mm6
  184. punpckldq mm3, mm7
  185. paddd mm0, mm2 ; Accumulate final result
  186. paddd mm1, mm3 ; Accumulate final result
  187. // Interleave the two SAD results
  188. punpckldq mm0, mm1
  189. // Write back the abs values
  190. movq dword ptr [AbsValues], mm0
  191. }
  192. SadValue = AbsValues[0];
  193. SadValue1 = AbsValues[1];
  194. SadValue = (SadValue > SadValue1) ? SadValue : SadValue1;
  195. return SadValue;
  196. }
  197. /****************************************************************************
  198. *
  199. * ROUTINE : ClearMmxState()
  200. *
  201. *
  202. * INPUTS : None
  203. *
  204. * OUTPUTS :
  205. *
  206. * RETURNS :
  207. *
  208. *
  209. * FUNCTION : Clears down the MMX state
  210. *
  211. * SPECIAL NOTES : None.
  212. *
  213. *
  214. * ERRORS : None.
  215. *
  216. ****************************************************************************/
  217. void ClearMmxState(PP_INSTANCE *ppi)
  218. {
  219. if ( ppi->MmxEnabled )
  220. {
  221. __asm
  222. {
  223. emms ; Clear the MMX state.
  224. }
  225. }
  226. }