xmmrowsad.asm 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. ;------------------------------------------------
  2. XmmRowSADParams STRUC
  3. dd ? ;1 pushed regs
  4. dd ? ;return address
  5. NewDataPtr dd ?
  6. RefDataPtr dd ?
  7. XmmRowSADParams ENDS
  8. ;------------------------------------------------
  9. INCLUDE iaxmm.inc
  10. .586
  11. .387
  12. .MODEL flat, SYSCALL, os_dos
  13. .MMX
  14. ; macros
  15. .DATA
  16. TORQ_CX_DATA SEGMENT PAGE PUBLIC USE32 'DATA'
  17. ALIGN 32
  18. .CODE
  19. NAME XmmRowSAD
  20. PUBLIC XmmRowSAD_
  21. PUBLIC _XmmRowSAD
  22. ;------------------------------------------------
  23. ; local vars
  24. LOCAL_SPACE EQU 0
  25. ;------------------------------------------------
  26. ;UINT32 XmmRowSAD( UINT8 * NewDataPtr, UINT8 * RefDataPtr)
  27. ;
  28. XmmRowSAD_:
  29. _XmmRowSAD:
  30. push ebx
  31. mov eax,(XmmRowSADParams PTR [esp]).NewDataPtr ; Load base addresses
  32. mov ebx,(XmmRowSADParams PTR [esp]).RefDataPtr
  33. ;
  34. ; ESP = Stack Pointer MM0 = Free
  35. ; ESI = Free MM1 = Free
  36. ; EDI = Free MM2 = Free
  37. ; EBP = Free MM3 = Free
  38. ; EBX = RefDataPtr MM4 = Free
  39. ; ECX = PixelsPerLine MM5 = Free
  40. ; EDX = PixelsPerLine + STRIDE_EXTRA MM6 = Free
  41. ; EAX = NewDataPtr MM7 = Free
  42. ;
  43. movq mm0, QWORD PTR [eax] ; copy eight bytes from NewDataPtr to mm0
  44. movq mm3, QWORD PTR [ebx] ; copy eight bytes from ReconDataPtr to mm3
  45. pxor mm1, mm1 ; clear mm1 for unpacking
  46. movq mm2, mm0 ; make a copy
  47. movq mm4, mm3 ; make a copy
  48. punpcklbw mm0, mm1 ; unpack the lower four bytes
  49. punpcklbw mm3, mm1 ; unpack the lower four bytes
  50. psadbw mm0, mm3 ; sum of absolute difference of four bytes
  51. punpckhbw mm2, mm1 ; unpack the higher four bytes
  52. punpckhbw mm4, mm1 ; unpack the higher four bytes
  53. psadbw mm2, mm4 ; sum of absolute difference of another four
  54. pop ebx
  55. pmaxsw mm0, mm2 ; get the max
  56. movd eax, mm0 ; return value
  57. ret
  58. ;************************************************
  59. END
  60. END