1
0

mp4dec.h 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708
  1. /* ///////////////////////////////////////////////////////////////////////
  2. //
  3. // INTEL CORPORATION PROPRIETARY INFORMATION
  4. // This software is supplied under the terms of a license agreement or
  5. // nondisclosure agreement with Intel Corporation and may not be copied
  6. // or disclosed except in accordance with the terms of that agreement.
  7. // Copyright (c) 2001-2007 Intel Corporation. All Rights Reserved.
  8. //
  9. // Description: MPEG-4 header.
  10. //
  11. */
  12. #pragma once
  13. #ifdef __cplusplus
  14. extern "C" {
  15. #endif
  16. //#ifdef __INTEL_COMPILER
  17. //#include <emmintrin.h>
  18. //#define USE_INTRINSIC_EMM
  19. //#else
  20. #undef USE_INTRINSIC_XMM
  21. #undef USE_INTRINSIC_EMM
  22. //#endif
  23. #define USE_TABLE_INTRA_DIV
  24. #define mp4_Div2(a) ((a) >= 0 ? ((a) >> 1) : (((a)+1) >> 1))
  25. #define mp4_Div2Round(a) (((a) >> 1) | ((a) & 1))
  26. #define mp4_DivRoundInf(a, b) ((((a) + (((a) >= 0) ? ((b) >> 1) : -((b) >> 1))) / (b)))
  27. #ifndef USE_TABLE_INTRA_DIV
  28. #define mp4_DivIntraDC(a, b) (((a) + ((b) >> 1)) / (b))
  29. #define mp4_DivIntraAC(a, b) mp4_DivRoundInf(a, b)
  30. #else
  31. // tested on (-2047..2047) // (1..46)
  32. #define mp4_DivIntraDC(a, b) (((a) * mp4_DivIntraDivisor[b] + (1 << 17)) >> 18)
  33. #define mp4_DivIntraAC(a, b) mp4_DivIntraDC(a, b)
  34. #endif
  35. __INLINE int16_t mp4_Median(int16_t a, int16_t b, int16_t c)
  36. {
  37. if (a > b) {
  38. int16_t t = a; a = b; b = t;
  39. }
  40. return (int16_t)((b <= c) ? b : (c >= a) ? c : a);
  41. }
  42. __INLINE void mp4_ComputeChromaMV(const IppMotionVector *mvLuma, IppMotionVector *mvChroma)
  43. {
  44. mvChroma->dx = (int16_t)mp4_Div2Round(mvLuma->dx);
  45. mvChroma->dy = (int16_t)mp4_Div2Round(mvLuma->dy);
  46. }
  47. __INLINE void mp4_ComputeChromaMVQ(const IppMotionVector *mvLuma, IppMotionVector *mvChroma)
  48. {
  49. int32_t dx, dy;
  50. dx = mp4_Div2(mvLuma->dx);
  51. dy = mp4_Div2(mvLuma->dy);
  52. mvChroma->dx = (int16_t)mp4_Div2Round(dx);
  53. mvChroma->dy = (int16_t)mp4_Div2Round(dy);
  54. }
  55. __INLINE void mp4_ComputeChroma4MV(const IppMotionVector mvLuma[4], IppMotionVector *mvChroma)
  56. {
  57. int32_t dx, dy, cdx, cdy, adx, ady;
  58. dx = mvLuma[0].dx + mvLuma[1].dx + mvLuma[2].dx + mvLuma[3].dx;
  59. dy = mvLuma[0].dy + mvLuma[1].dy + mvLuma[2].dy + mvLuma[3].dy;
  60. adx = abs(dx);
  61. ady = abs(dy);
  62. cdx = mp4_cCbCrMvRound16[adx & 15] + (adx >> 4) * 2;
  63. cdy = mp4_cCbCrMvRound16[ady & 15] + (ady >> 4) * 2;
  64. mvChroma->dx = (int16_t)((dx >= 0) ? cdx : -cdx);
  65. mvChroma->dy = (int16_t)((dy >= 0) ? cdy : -cdy);
  66. }
  67. __INLINE void mp4_ComputeChroma4MVQ(const IppMotionVector mvLuma[4], IppMotionVector *mvChroma)
  68. {
  69. int32_t dx, dy, cdx, cdy, adx, ady;
  70. dx = mp4_Div2(mvLuma[0].dx) + mp4_Div2(mvLuma[1].dx) + mp4_Div2(mvLuma[2].dx) + mp4_Div2(mvLuma[3].dx);
  71. dy = mp4_Div2(mvLuma[0].dy) + mp4_Div2(mvLuma[1].dy) + mp4_Div2(mvLuma[2].dy) + mp4_Div2(mvLuma[3].dy);
  72. adx = abs(dx);
  73. ady = abs(dy);
  74. cdx = mp4_cCbCrMvRound16[adx & 15] + (adx >> 4) * 2;
  75. cdy = mp4_cCbCrMvRound16[ady & 15] + (ady >> 4) * 2;
  76. mvChroma->dx = (int16_t)((dx >= 0) ? cdx : -cdx);
  77. mvChroma->dy = (int16_t)((dy >= 0) ? cdy : -cdy);
  78. }
  79. #define limitMV(dx, xmin, xmax, mvd) \
  80. { \
  81. if ((dx) < (xmin)) \
  82. mvd = (int16_t)(xmin); \
  83. else if ((dx) >= (xmax)) \
  84. mvd = (int16_t)(xmax); \
  85. else \
  86. mvd = (int16_t)(dx); \
  87. }
  88. __INLINE void mp4_LimitMV(const IppMotionVector *pSrcMV, IppMotionVector *pDstMV, const IppiRect *limitRect, int32_t x, int32_t y, int32_t size)
  89. {
  90. limitMV(pSrcMV->dx, (limitRect->x - x) << 1, (limitRect->x - x + limitRect->width - size) << 1, pDstMV->dx);
  91. limitMV(pSrcMV->dy, (limitRect->y - y) << 1, (limitRect->y - y + limitRect->height - size) << 1, pDstMV->dy);
  92. }
  93. __INLINE void mp4_LimitMVQ(const IppMotionVector *pSrcMV, IppMotionVector *pDstMV, const IppiRect *limitRect, int32_t x, int32_t y, int32_t size)
  94. {
  95. limitMV(pSrcMV->dx, (limitRect->x - x) << 2, (limitRect->x - x + limitRect->width - size) << 2, pDstMV->dx);
  96. limitMV(pSrcMV->dy, (limitRect->y - y) << 2, (limitRect->y - y + limitRect->height - size) << 2, pDstMV->dy);
  97. }
  98. __INLINE void mp4_Limit4MV(const IppMotionVector *pSrcMV, IppMotionVector *pDstMV, const IppiRect *limitRect, int32_t x, int32_t y, int32_t size)
  99. {
  100. mp4_LimitMV(&pSrcMV[0], &pDstMV[0], limitRect, x , y, size);
  101. mp4_LimitMV(&pSrcMV[1], &pDstMV[1], limitRect, x + size, y, size);
  102. mp4_LimitMV(&pSrcMV[2], &pDstMV[2], limitRect, x , y + size, size);
  103. mp4_LimitMV(&pSrcMV[3], &pDstMV[3], limitRect, x + size, y + size, size);
  104. }
  105. __INLINE void mp4_Limit4MVQ(const IppMotionVector *pSrcMV, IppMotionVector *pDstMV, const IppiRect *limitRect, int32_t x, int32_t y, int32_t size)
  106. {
  107. mp4_LimitMVQ(&pSrcMV[0], &pDstMV[0], limitRect, x , y, size);
  108. mp4_LimitMVQ(&pSrcMV[1], &pDstMV[1], limitRect, x + size, y, size);
  109. mp4_LimitMVQ(&pSrcMV[2], &pDstMV[2], limitRect, x , y + size, size);
  110. mp4_LimitMVQ(&pSrcMV[3], &pDstMV[3], limitRect, x + size, y + size, size);
  111. }
  112. __INLINE void mp4_LimitFMV(const IppMotionVector *pSrcMV, IppMotionVector *pDstMV, const IppiRect *limitRect, int32_t x, int32_t y, int32_t size)
  113. {
  114. limitMV(pSrcMV->dx, (limitRect->x - x) << 1, (limitRect->x - x + limitRect->width - size) << 1, pDstMV->dx);
  115. limitMV(pSrcMV->dy << 1, (limitRect->y - y) << 1, (limitRect->y - y + limitRect->height - size) << 1, pDstMV->dy);
  116. pDstMV->dy >>= 1;
  117. }
  118. __INLINE void mp4_LimitFMVQ(const IppMotionVector *pSrcMV, IppMotionVector *pDstMV, const IppiRect *limitRect, int32_t x, int32_t y, int32_t size)
  119. {
  120. limitMV(pSrcMV->dx, (limitRect->x - x) << 2, (limitRect->x - x + limitRect->width - size) << 2, pDstMV->dx);
  121. limitMV(pSrcMV->dy << 1, (limitRect->y - y) << 2, (limitRect->y - y + limitRect->height - size) << 2, pDstMV->dy);
  122. pDstMV->dy >>= 1;
  123. }
  124. #define MP4_MV_OFF_HP(dx, dy, step) \
  125. (((dx) >> 1) + (step) * ((dy) >> 1))
  126. #define MP4_MV_ACC_HP(dx, dy) \
  127. ((((dy) & 1) << 1) + ((dx) & 1))
  128. #define MP4_MV_OFF_QP(dx, dy, step) \
  129. (((dx) >> 2) + (step) * ((dy) >> 2))
  130. #define MP4_MV_ACC_QP(dx, dy) \
  131. ((((dy) & 3) << 2) + ((dx) & 3))
  132. #define mp4_Copy8x4HP_8u(pSrc, srcStep, pDst, dstStep, mv, rc) \
  133. ippiCopy8x4HP_8u_C1R(pSrc + MP4_MV_OFF_HP((mv)->dx, (mv)->dy, srcStep), srcStep, pDst, dstStep, MP4_MV_ACC_HP((mv)->dx, (mv)->dy), rc)
  134. #define mp4_Copy8x8HP_8u(pSrc, srcStep, pDst, dstStep, mv, rc) \
  135. ippiCopy8x8HP_8u_C1R(pSrc + MP4_MV_OFF_HP((mv)->dx, (mv)->dy, srcStep), srcStep, pDst, dstStep, MP4_MV_ACC_HP((mv)->dx, (mv)->dy), rc)
  136. #define mp4_Copy16x8HP_8u(pSrc, srcStep, pDst, dstStep, mv, rc) \
  137. ippiCopy16x8HP_8u_C1R(pSrc + MP4_MV_OFF_HP((mv)->dx, (mv)->dy, srcStep), srcStep, pDst, dstStep, MP4_MV_ACC_HP((mv)->dx, (mv)->dy), rc)
  138. #define mp4_Copy16x16HP_8u(pSrc, srcStep, pDst, dstStep, mv, rc) \
  139. ippiCopy16x16HP_8u_C1R(pSrc + MP4_MV_OFF_HP((mv)->dx, (mv)->dy, srcStep), srcStep, pDst, dstStep, MP4_MV_ACC_HP((mv)->dx, (mv)->dy), rc)
  140. #define mp4_Copy8x8QP_8u(pSrc, srcStep, pDst, dstStep, mv, rc) \
  141. ippiCopy8x8QP_MPEG4_8u_C1R(pSrc + MP4_MV_OFF_QP((mv)->dx, (mv)->dy, srcStep), srcStep, pDst, dstStep, MP4_MV_ACC_QP((mv)->dx, (mv)->dy), rc)
  142. #define mp4_Copy16x8QP_8u(pSrc, srcStep, pDst, dstStep, mv, rc) \
  143. ippiCopy16x8QP_MPEG4_8u_C1R(pSrc + MP4_MV_OFF_QP((mv)->dx, (mv)->dy, srcStep), srcStep, pDst, dstStep, MP4_MV_ACC_QP((mv)->dx, (mv)->dy), rc)
  144. #define mp4_Copy16x16QP_8u(pSrc, srcStep, pDst, dstStep, mv, rc) \
  145. ippiCopy16x16QP_MPEG4_8u_C1R(pSrc + MP4_MV_OFF_QP((mv)->dx, (mv)->dy, srcStep), srcStep, pDst, dstStep, MP4_MV_ACC_QP((mv)->dx, (mv)->dy), rc)
  146. #define mp4_Add8x8HP_16s8u(pSrc, srcStep, pResid, pDst, dstStep, mv, rc) \
  147. ippiAdd8x8HP_16s8u_C1RS(pResid, 16, pSrc + MP4_MV_OFF_HP((mv)->dx, (mv)->dy, srcStep), srcStep, pDst, dstStep, MP4_MV_ACC_HP((mv)->dx, (mv)->dy), rc)
  148. #define mp4_Add8x8_16s8u(pSrcDst, pResid, srcDstStep) \
  149. ippiAdd8x8_16s8u_C1IRS(pResid, 16, pSrcDst, srcDstStep)
  150. #define mp4_UpdateQuant(pInfo, quant) \
  151. { \
  152. quant += mp4_dquant[mp4_GetBits9(pInfo, 2)]; \
  153. mp4_CLIP(quant, 1, (1 << pInfo->VisualObject.VideoObject.quant_precision) - 1); \
  154. }
  155. #define mp4_UpdateQuant_B(pInfo, quant) \
  156. if (mp4_GetBit(pInfo) != 0) { \
  157. quant += (mp4_GetBit(pInfo) == 0) ? -2 : 2; \
  158. mp4_CLIP(quant, 1, (1 << pInfo->VisualObject.VideoObject.quant_precision) - 1); \
  159. }
  160. __INLINE void mp4_Set8x8_8u(uint8_t *p, int32_t step, uint8_t v)
  161. {
  162. #if defined(USE_INTRINSIC_XMM) || defined(USE_INTRINSIC_EMM)
  163. __m64 _p_v = _mm_set1_pi8(v);
  164. *(__m64*)p = _p_v;
  165. *(__m64*)(p+step) = _p_v;
  166. p += 2 * step;
  167. *(__m64*)p = _p_v;
  168. *(__m64*)(p+step) = _p_v;
  169. p += 2 * step;
  170. *(__m64*)p = _p_v;
  171. *(__m64*)(p+step) = _p_v;
  172. p += 2 * step;
  173. *(__m64*)p = _p_v;
  174. *(__m64*)(p+step) = _p_v;
  175. _mm_empty();
  176. #else
  177. uint32_t val;
  178. val = v + (v << 8);
  179. val += val << 16;
  180. ((uint32_t*)p)[0] = val; ((uint32_t*)p)[1] = val; p += step;
  181. ((uint32_t*)p)[0] = val; ((uint32_t*)p)[1] = val; p += step;
  182. ((uint32_t*)p)[0] = val; ((uint32_t*)p)[1] = val; p += step;
  183. ((uint32_t*)p)[0] = val; ((uint32_t*)p)[1] = val; p += step;
  184. ((uint32_t*)p)[0] = val; ((uint32_t*)p)[1] = val; p += step;
  185. ((uint32_t*)p)[0] = val; ((uint32_t*)p)[1] = val; p += step;
  186. ((uint32_t*)p)[0] = val; ((uint32_t*)p)[1] = val; p += step;
  187. ((uint32_t*)p)[0] = val; ((uint32_t*)p)[1] = val;
  188. #endif
  189. }
  190. __INLINE void mp4_Set16x16_8u(uint8_t *p, int32_t step, uint8_t val)
  191. {
  192. int32_t i, j;
  193. for (i = 0; i < 16; i ++) {
  194. for (j = 0; j < 16; j ++)
  195. p[j] = val;
  196. p += step;
  197. }
  198. }
  199. #if defined(USE_INTRINSIC_XMM) || defined(USE_INTRINSIC_EMM)
  200. #define mp4_Zero4MV(mv) \
  201. memset(mv, 0, 4 * sizeof(IppMotionVector));
  202. #if defined(USE_INTRINSIC_XMM)
  203. #define mp4_Zero64_16s(pDst) \
  204. { \
  205. __m64 _p_zero = _mm_setzero_si64(); \
  206. ((__m64*)(pDst))[0] = _p_zero; \
  207. ((__m64*)(pDst))[1] = _p_zero; \
  208. ((__m64*)(pDst))[2] = _p_zero; \
  209. ((__m64*)(pDst))[3] = _p_zero; \
  210. ((__m64*)(pDst))[4] = _p_zero; \
  211. ((__m64*)(pDst))[5] = _p_zero; \
  212. ((__m64*)(pDst))[6] = _p_zero; \
  213. ((__m64*)(pDst))[7] = _p_zero; \
  214. ((__m64*)(pDst))[8] = _p_zero; \
  215. ((__m64*)(pDst))[9] = _p_zero; \
  216. ((__m64*)(pDst))[10] = _p_zero; \
  217. ((__m64*)(pDst))[11] = _p_zero; \
  218. ((__m64*)(pDst))[12] = _p_zero; \
  219. ((__m64*)(pDst))[13] = _p_zero; \
  220. ((__m64*)(pDst))[14] = _p_zero; \
  221. ((__m64*)(pDst))[15] = _p_zero; \
  222. _m_empty(); \
  223. }
  224. #define mp4_Set64_16s(val, pDst) \
  225. { \
  226. __m64 _p_val = _mm_set1_pi16((int16_t)(val)); \
  227. ((__m64*)(pDst))[0] = _p_val; \
  228. ((__m64*)(pDst))[1] = _p_val; \
  229. ((__m64*)(pDst))[2] = _p_val; \
  230. ((__m64*)(pDst))[3] = _p_val; \
  231. ((__m64*)(pDst))[4] = _p_val; \
  232. ((__m64*)(pDst))[5] = _p_val; \
  233. ((__m64*)(pDst))[6] = _p_val; \
  234. ((__m64*)(pDst))[7] = _p_val; \
  235. ((__m64*)(pDst))[8] = _p_val; \
  236. ((__m64*)(pDst))[9] = _p_val; \
  237. ((__m64*)(pDst))[10] = _p_val; \
  238. ((__m64*)(pDst))[11] = _p_val; \
  239. ((__m64*)(pDst))[12] = _p_val; \
  240. ((__m64*)(pDst))[13] = _p_val; \
  241. ((__m64*)(pDst))[14] = _p_val; \
  242. ((__m64*)(pDst))[15] = _p_val; \
  243. _m_empty(); \
  244. }
  245. #elif defined(USE_INTRINSIC_EMM)
  246. #define mp4_Zero64_16s(pDst) \
  247. { \
  248. __m128i _p_val = _mm_setzero_si128(); \
  249. ((__m128i*)(pDst))[0] = _p_val; \
  250. ((__m128i*)(pDst))[1] = _p_val; \
  251. ((__m128i*)(pDst))[2] = _p_val; \
  252. ((__m128i*)(pDst))[3] = _p_val; \
  253. ((__m128i*)(pDst))[4] = _p_val; \
  254. ((__m128i*)(pDst))[5] = _p_val; \
  255. ((__m128i*)(pDst))[6] = _p_val; \
  256. ((__m128i*)(pDst))[7] = _p_val; \
  257. }
  258. #define mp4_Set64_16s(val, pDst) \
  259. { \
  260. __m128i _p_val = _mm_set1_epi16((int16_t)(val)); \
  261. ((__m128i*)(pDst))[0] = _p_val; \
  262. ((__m128i*)(pDst))[1] = _p_val; \
  263. ((__m128i*)(pDst))[2] = _p_val; \
  264. ((__m128i*)(pDst))[3] = _p_val; \
  265. ((__m128i*)(pDst))[4] = _p_val; \
  266. ((__m128i*)(pDst))[5] = _p_val; \
  267. ((__m128i*)(pDst))[6] = _p_val; \
  268. ((__m128i*)(pDst))[7] = _p_val; \
  269. }
  270. #endif
  271. #else
  272. #define mp4_Zero4MV(mv) \
  273. (mv)[0].dx = (mv)[0].dy = (mv)[1].dx = (mv)[1].dy = (mv)[2].dx = (mv)[2].dy = (mv)[3].dx = (mv)[3].dy = 0
  274. #define mp4_Zero64_16s(pDst) \
  275. { \
  276. int32_t i; \
  277. for (i = 0; i < 32; i += 8) { \
  278. ((uint32_t*)(pDst))[i] = 0; \
  279. ((uint32_t*)(pDst))[i+1] = 0; \
  280. ((uint32_t*)(pDst))[i+2] = 0; \
  281. ((uint32_t*)(pDst))[i+3] = 0; \
  282. ((uint32_t*)(pDst))[i+4] = 0; \
  283. ((uint32_t*)(pDst))[i+5] = 0; \
  284. ((uint32_t*)(pDst))[i+6] = 0; \
  285. ((uint32_t*)(pDst))[i+7] = 0; \
  286. } \
  287. }
  288. #define mp4_Set64_16s(val, pDst) \
  289. { \
  290. int32_t i; \
  291. uint32_t v; \
  292. v = ((val) << 16) + (Ipp16u)(val); \
  293. for (i = 0; i < 32; i += 8) { \
  294. ((uint32_t*)(pDst))[i] = v; \
  295. ((uint32_t*)(pDst))[i+1] = v; \
  296. ((uint32_t*)(pDst))[i+2] = v; \
  297. ((uint32_t*)(pDst))[i+3] = v; \
  298. ((uint32_t*)(pDst))[i+4] = v; \
  299. ((uint32_t*)(pDst))[i+5] = v; \
  300. ((uint32_t*)(pDst))[i+6] = v; \
  301. ((uint32_t*)(pDst))[i+7] = v; \
  302. } \
  303. }
  304. #endif
  305. #define mp4_MC_HP(pat, pRef, stepRef, pCur, stepCur, coeffMB, mv, rc) \
  306. { \
  307. if (pat) { \
  308. mp4_Add8x8HP_16s8u(pRef, stepRef, coeffMB, pCur, stepCur, mv, rc); \
  309. } else { \
  310. mp4_Copy8x8HP_8u(pRef, stepRef, pCur, stepCur, mv, rc); \
  311. } \
  312. }
  313. #define mp4_AddResidual(pat, pc, stepc, coeffMB) \
  314. { \
  315. if (pat) { \
  316. mp4_Add8x8_16s8u(pc, coeffMB, stepc); \
  317. } \
  318. }
  319. #define mp4_DCTInvCoeffsIntraMB(coeffMB, lnz, pFc, stepFc) \
  320. { \
  321. int32_t i; \
  322. for (i = 0; i < 6; i ++) { \
  323. if (lnz[i] > 0) \
  324. ippiDCT8x8Inv_16s8u_C1R(&coeffMB[i*64], pFc[i], stepFc[i]); \
  325. else { \
  326. int k = (coeffMB[i*64] + 4) >> 3; \
  327. mp4_CLIP(k, 0, 255); \
  328. mp4_Set8x8_8u(pFc[i], stepFc[i], (uint8_t)k); \
  329. } \
  330. } \
  331. }
  332. #define mp4_ReconstructCoeffsIntraMB_SVH(pInfo, coeffMB, lnz, pat, quant, err) \
  333. { \
  334. int32_t i, pm = 32; \
  335. for (i = 0; i < 6; i ++) { \
  336. if (ippiReconstructCoeffsIntra_H263_1u16s(&pInfo->bufptr, &pInfo->bitoff, coeffMB+i*64, &lnz[i], pat & pm, quant, 0, IPPVC_SCAN_ZIGZAG, 0) != ippStsNoErr) { \
  337. mp4_Error("Error: decoding coefficients of Intra block"); \
  338. goto err; \
  339. } \
  340. if (pat & pm) { \
  341. mp4_StatisticInc_(&pInfo->VisualObject.Statistic.nB_INTRA_AC); \
  342. } else { \
  343. mp4_StatisticInc_(&pInfo->VisualObject.Statistic.nB_INTRA_DC); \
  344. } \
  345. pm >>= 1; \
  346. } \
  347. }
  348. #define mp4_ReconstructCoeffsInterMB_SVH(pInfo, coeffMB, lnz, pat, quant, err) \
  349. { \
  350. if (pat) { \
  351. int32_t i, pm = 32; \
  352. for (i = 0; i < 6; i ++) { \
  353. if (pat & pm) { \
  354. if (ippiReconstructCoeffsInter_H263_1u16s(&pInfo->bufptr, &pInfo->bitoff, coeffMB+i*64, &lnz[i], quant, 0) != ippStsNoErr) { \
  355. mp4_Error("Error: decoding coefficients of Inter block"); \
  356. goto err; \
  357. } \
  358. mp4_StatisticInc_(&pInfo->VisualObject.Statistic.nB_INTER_C); \
  359. } else { \
  360. mp4_StatisticInc_(&pInfo->VisualObject.Statistic.nB_INTER_NC); \
  361. } \
  362. pm >>= 1; \
  363. } \
  364. } else { \
  365. mp4_StatisticInc_(&pInfo->VisualObject.Statistic.nB_INTER_NC); \
  366. mp4_StatisticInc_(&pInfo->VisualObject.Statistic.nB_INTER_NC); \
  367. mp4_StatisticInc_(&pInfo->VisualObject.Statistic.nB_INTER_NC); \
  368. mp4_StatisticInc_(&pInfo->VisualObject.Statistic.nB_INTER_NC); \
  369. mp4_StatisticInc_(&pInfo->VisualObject.Statistic.nB_INTER_NC); \
  370. mp4_StatisticInc_(&pInfo->VisualObject.Statistic.nB_INTER_NC); \
  371. } \
  372. }
  373. #define mp4_DCTInvCoeffsInterMB_SVH(coeffMB, lastNZ, pat) \
  374. if (pat) { \
  375. int32_t i, lnz, pm = 32; \
  376. int16_t *coeff = coeffMB; \
  377. for (i = 0; i < 6; i ++) { \
  378. if ((pat) & pm) { \
  379. lnz = lastNZ[i]; \
  380. if (lnz != 0) { \
  381. if ((lnz <= 4) && (coeff[16] == 0)) \
  382. ippiDCT8x8Inv_2x2_16s_C1I(coeff); \
  383. else if ((lnz <= 13) && (coeff[32] == 0)) \
  384. ippiDCT8x8Inv_4x4_16s_C1I(coeff); \
  385. else \
  386. ippiDCT8x8Inv_16s_C1I(coeff); \
  387. } else \
  388. mp4_Set64_16s((int16_t)((coeff[0] + 4) >> 3), coeff); \
  389. } \
  390. pm >>= 1; \
  391. coeff += 64; \
  392. } \
  393. }
  394. #define mp4_DecodeMCInterBlock_SVH(pInfo, quant, pat, pRef, pCur, step, coeffMB, mv, err) \
  395. { \
  396. if (pat) { \
  397. int32_t lnz; \
  398. if (ippiReconstructCoeffsInter_H263_1u16s(&pInfo->bufptr, &pInfo->bitoff, coeffMB, &lnz, quant, 0) != ippStsNoErr) { \
  399. mp4_Error("Error: decoding coefficients of Inter block"); \
  400. goto err; \
  401. } \
  402. if (lnz != 0) { \
  403. if ((lnz <= 4) && (coeffMB[16] == 0)) \
  404. ippiDCT8x8Inv_2x2_16s_C1I(coeffMB); \
  405. else if ((lnz <= 13) && (coeffMB[32] == 0)) \
  406. ippiDCT8x8Inv_4x4_16s_C1I(coeffMB); \
  407. else \
  408. ippiDCT8x8Inv_16s_C1I(coeffMB); \
  409. } else { \
  410. mp4_Set64_16s((int16_t)((coeffMB[0] + 4) >> 3), coeffMB); \
  411. } \
  412. mp4_Add8x8HP_16s8u(pRef, step, coeffMB, pCur, step, mv, 0); \
  413. mp4_StatisticInc_(&pInfo->VisualObject.Statistic.nB_INTER_C); \
  414. } else { \
  415. mp4_Copy8x8HP_8u(pRef, step, pCur, step, mv, 0); \
  416. mp4_StatisticInc_(&pInfo->VisualObject.Statistic.nB_INTER_NC); \
  417. } \
  418. }
  419. // reset Intra prediction buffer on new Video_packet
  420. #define mp4_ResetIntraPredBuffer(pInfo) \
  421. { \
  422. mp4_IntraPredBlock *b = pInfo->VisualObject.VideoObject.IntraPredBuff.dcB; \
  423. int32_t i; \
  424. b[3].dct_dc = b[4].dct_dc = b[5].dct_dc = -1; \
  425. b = pInfo->VisualObject.VideoObject.IntraPredBuff.block; \
  426. for (i = 0; i <= pInfo->VisualObject.VideoObject.MacroBlockPerRow; i ++) { \
  427. b[i*6+0].dct_dc = b[i*6+1].dct_dc = b[i*6+2].dct_dc = b[i*6+3].dct_dc = b[i*6+4].dct_dc = b[i*6+5].dct_dc = -1; \
  428. } \
  429. }
  430. // reset B-prediction blocks on new row
  431. #define mp4_ResetIntraPredBblock(pInfo) \
  432. { \
  433. pInfo->VisualObject.VideoObject.IntraPredBuff.dcB[3].dct_dc = \
  434. pInfo->VisualObject.VideoObject.IntraPredBuff.dcB[4].dct_dc = \
  435. pInfo->VisualObject.VideoObject.IntraPredBuff.dcB[5].dct_dc = -1; \
  436. }
  437. // mark current MB as invalid for Intra prediction and rotate buffer
  438. #define mp4_UpdateIntraPredBuffInvalid(pInfo, colNum) \
  439. { \
  440. mp4_IntraPredBlock *b = &pInfo->VisualObject.VideoObject.IntraPredBuff.block[colNum*6+6]; \
  441. pInfo->VisualObject.VideoObject.IntraPredBuff.dcB[3].dct_dc = b[3].dct_dc; \
  442. pInfo->VisualObject.VideoObject.IntraPredBuff.dcB[4].dct_dc = b[4].dct_dc; \
  443. pInfo->VisualObject.VideoObject.IntraPredBuff.dcB[5].dct_dc = b[5].dct_dc; \
  444. b[0].dct_dc = b[1].dct_dc = b[2].dct_dc = b[3].dct_dc = b[4].dct_dc = b[5].dct_dc = -1; \
  445. /* pInfo->VisualObject.VideoObject.IntraPredBuff.quant[colNum+1] = (uint8_t)quant; */ \
  446. }
  447. /* 2x2 and 4x4 DCT decision suitable for Classical Zigzag Scan only */
  448. #define mp4_DecodeMCBlockInter_MPEG4(pat, pr, stepr, pc, stepc, mv, rt, err) \
  449. { \
  450. if (pat) { \
  451. int32_t lnz; \
  452. if (ippiReconstructCoeffsInter_MPEG4_1u16s(&pInfo->bufptr, &pInfo->bitoff, coeffMB, &lnz, rvlc, scan, pInfo->VisualObject.VideoObject.QuantInvInterSpec, quant) != ippStsNoErr) { \
  453. mp4_Error("Error: decoding coefficients of Inter block"); \
  454. goto err; \
  455. } \
  456. if (pInfo->VisualObject.VideoObject.quant_type == 0 || (coeffMB[63] == 0)) { \
  457. if (lnz != 0) { \
  458. if ((lnz <= 4) && (coeffMB[16] == 0)) \
  459. ippiDCT8x8Inv_2x2_16s_C1I(coeffMB); \
  460. else if ((lnz <= 13) && (coeffMB[32] == 0)) \
  461. ippiDCT8x8Inv_4x4_16s_C1I(coeffMB); \
  462. else \
  463. ippiDCT8x8Inv_16s_C1I(coeffMB); \
  464. } else { \
  465. mp4_Set64_16s((int16_t)((coeffMB[0] + 4) >> 3), coeffMB); \
  466. } \
  467. } else { \
  468. ippiDCT8x8Inv_16s_C1I(coeffMB); \
  469. } \
  470. mp4_Add8x8HP_16s8u(pr, stepr, coeffMB, pc, stepc, &mv, rt); \
  471. mp4_StatisticInc_(&pInfo->VisualObject.Statistic.nB_INTER_C); \
  472. } else { \
  473. mp4_Copy8x8HP_8u(pr, stepr, pc, stepc, &mv, rt); \
  474. mp4_StatisticInc_(&pInfo->VisualObject.Statistic.nB_INTER_NC); \
  475. } \
  476. }
  477. /* 2x2 and 4x4 DCT decision suitable for Classical Zigzag Scan only */
  478. #define mp4_DecodeReconBlockInter_MPEG4(pat, pc, stepc, err) \
  479. { \
  480. if (pat) { \
  481. int32_t lnz; \
  482. if (ippiReconstructCoeffsInter_MPEG4_1u16s(&pInfo->bufptr, &pInfo->bitoff, coeffMB, &lnz, rvlc, scan, pInfo->VisualObject.VideoObject.QuantInvInterSpec, quant) != ippStsNoErr) { \
  483. mp4_Error("Error: decoding coefficients of Inter block"); \
  484. goto err; \
  485. } \
  486. if (pInfo->VisualObject.VideoObject.quant_type == 0 || (coeffMB[63] == 0)) { \
  487. if (lnz != 0) { \
  488. if ((lnz <= 4) && (coeffMB[16] == 0)) \
  489. ippiDCT8x8Inv_2x2_16s_C1I(coeffMB); \
  490. else if ((lnz <= 13) && (coeffMB[32] == 0)) \
  491. ippiDCT8x8Inv_4x4_16s_C1I(coeffMB); \
  492. else \
  493. ippiDCT8x8Inv_16s_C1I(coeffMB); \
  494. } else { \
  495. mp4_Set64_16s((int16_t)((coeffMB[0] + 4) >> 3), coeffMB); \
  496. } \
  497. } else { \
  498. ippiDCT8x8Inv_16s_C1I(coeffMB); \
  499. } \
  500. mp4_Add8x8_16s8u(pc, coeffMB, stepc); \
  501. mp4_StatisticInc_(&pInfo->VisualObject.Statistic.nB_INTER_C); \
  502. } else { \
  503. mp4_StatisticInc_(&pInfo->VisualObject.Statistic.nB_INTER_NC); \
  504. } \
  505. }
  506. __INLINE int32_t mp4_GetMacroBlockNumberSize(int32_t nmb)
  507. {
  508. int32_t nb = 0;
  509. nmb --;
  510. do {
  511. nmb >>= 1;
  512. nb ++;
  513. } while (nmb);
  514. return nb;
  515. }
  516. __INLINE int32_t mp4_GetConvRatio(mp4_Info* pInfo)
  517. {
  518. if (mp4_GetBit(pInfo) == 1)
  519. return 0;
  520. else
  521. return (mp4_GetBit(pInfo) == 0 ? 2 : 4);
  522. }
  523. // decode cbpy for Inter nontransparent MB
  524. __INLINE mp4_Status mp4_DecodeCBPY_P(mp4_Info* pInfo, int32_t *yPattern, int32_t mbType)
  525. {
  526. uint32_t code;
  527. code = mp4_ShowBits9(pInfo, 6);
  528. if (mbType < IPPVC_MBTYPE_INTRA)
  529. *yPattern = 15 - mp4_cbpy4[code].code;
  530. else
  531. *yPattern = mp4_cbpy4[code].code;
  532. if (mp4_cbpy4[code].len == 255) {
  533. mp4_Error("Error: decoding CBPY");
  534. return MP4_STATUS_ERROR;
  535. } else {
  536. mp4_FlushBits(pInfo, mp4_cbpy4[code].len);
  537. return MP4_STATUS_OK;
  538. }
  539. }
  540. // decode cbpy for Intra nontransparent MB
  541. __INLINE mp4_Status mp4_DecodeCBPY_I(mp4_Info* pInfo, int32_t *yPattern)
  542. {
  543. uint32_t code;
  544. code = mp4_ShowBits9(pInfo, 6);
  545. *yPattern = mp4_cbpy4[code].code;
  546. if (mp4_cbpy4[code].len == 255) {
  547. mp4_Error("Error: decoding CBPY");
  548. return MP4_STATUS_ERROR;
  549. } else {
  550. mp4_FlushBits(pInfo, mp4_cbpy4[code].len);
  551. return MP4_STATUS_OK;
  552. }
  553. }
  554. extern mp4_Status mp4_DecodeMVD(mp4_Info *pInfo, int32_t *mvdx, int32_t *mvdy, int32_t fcode);
  555. extern mp4_Status mp4_DecodeMV(mp4_Info *pInfo, IppMotionVector *mv, int32_t fcode);
  556. extern mp4_Status mp4_Decode4MV(mp4_Info *pInfo, IppMotionVector *mv, int32_t fcode);
  557. extern mp4_Status mp4_DecodeMV_Direct(mp4_Info *pInfo, IppMotionVector mvC[4], IppMotionVector mvForw[4], IppMotionVector mvBack[4], int32_t TRB, int32_t TRD, int32_t modb, int32_t comb_type);
  558. extern mp4_Status mp4_DecodeMV_DirectField(mp4_Info *pInfo, int32_t mb_ftfr, int32_t mb_fbfr, IppMotionVector *mvTop, IppMotionVector *mvBottom, IppMotionVector *mvForwTop, IppMotionVector *mvForwBottom, IppMotionVector *mvBackTop, IppMotionVector *mvBackBottom, int32_t TRB, int32_t TRD, int32_t modb);
  559. extern mp4_Status mp4_DecodeIntraMB_SVH(mp4_Info *pInfo, int32_t pat, int32_t quant, uint8_t *pR[], int32_t stepR[]);
  560. extern mp4_Status mp4_DecodeIntraMB_DP(mp4_Info *pInfo, int16_t dct_dc[], int32_t x, int32_t pat, int32_t quant, int32_t dcVLC, int32_t ac_pred_flag, uint8_t *pR[], int32_t stepR[]);
  561. extern mp4_Status mp4_DecodeIntraMB(mp4_Info *pInfo, int32_t x, int32_t pat, int32_t quant, int32_t dcVLC, int32_t ac_pred_flag, uint8_t *pR[], int32_t stepR[]);
  562. extern mp4_Status mp4_DecodeInterMB_SVH(mp4_Info *pInfo, int16_t *coeffMB, int32_t quant, int32_t pat);
  563. extern mp4_Status mp4_DecodeInterMB(mp4_Info *pInfo, int16_t *coeffMB, int32_t quant, int32_t pat, int32_t scan);
  564. extern mp4_Status mp4_ReconstructCoeffsIntraMB(mp4_Info *pInfo, int32_t x, int32_t pat, int32_t quant, int32_t dcVLC, int32_t ac_pred_flag, int16_t *coeff, int32_t lnz[]);
  565. extern mp4_Status mp4_DecodeMCBPC_P(mp4_Info* pInfo, int32_t *mbType, int32_t *mbPattern, int32_t stat);
  566. extern mp4_Status mp4_PredictDecode1MV(mp4_Info *pInfo, mp4_MacroBlock *MBcurr, int32_t y, int32_t x);
  567. extern mp4_Status mp4_PredictDecode4MV(mp4_Info *pInfo, mp4_MacroBlock *MBcurr, int32_t y, int32_t x);
  568. extern mp4_Status mp4_PredictDecodeFMV(mp4_Info *pInfo, mp4_MacroBlock *MBcurr, int32_t y, int32_t x, IppMotionVector *mvT, IppMotionVector *mvB);
  569. extern mp4_Status mp4_DecodeVideoObjectPlane(mp4_Info* pInfo);
  570. extern mp4_Status mp4_DecodeVOP_I(mp4_Info* pInfo);
  571. extern mp4_Status mp4_DecodeVOP_P(mp4_Info* pInfo);
  572. extern mp4_Status mp4_DecodeVOP_B(mp4_Info* pInfo);
  573. extern mp4_Status mp4_DecodeVOP_S(mp4_Info* pInfo);
  574. extern mp4_Status mp4_DecodeVOP_I_MT(mp4_Info* pInfo);
  575. extern mp4_Status mp4_DecodeVOP_P_MT(mp4_Info* pInfo);
  576. extern mp4_Status mp4_DecodeVOP_B_MT(mp4_Info* pInfo);
  577. extern mp4_Status mp4_DecodeVOP_S_MT(mp4_Info* pInfo);
  578. extern void mp4_DCTInvCoeffsInterMB(mp4_Info *pInfo, int16_t *coeffMB, int32_t lnz[], int32_t pat, int32_t scan);
  579. extern void mp4_PadFrame(mp4_Info* pInfo);
  580. extern void mp4_OBMC(mp4_Info *pInfo, mp4_MacroBlock *pMBinfo, IppMotionVector *mvCur, int32_t colNum, int32_t rowNum, IppiRect limitRectL, uint8_t *pYc, int32_t stepYc, uint8_t *pYr, int32_t stepYr, int32_t cbpy, int16_t *coeffMB, int32_t dct_type);
  581. extern mp4_Status mp4_CheckDecodeVideoPacket(mp4_Info* pInfo, int32_t *found);
  582. extern int32_t mp4_CheckDecodeGOB_SVH(mp4_Info* pInfo);
  583. extern void mp4_CopyMacroBlocks(const mp4_Frame *rFrame, mp4_Frame *cFrame, int32_t mbPerRow, int32_t rowNum, int32_t colNum, int32_t n);
  584. extern mp4_Status mp4_Sprite_Trajectory(mp4_Info* pInfo);
  585. extern mp4_Status mp4_PredictDecodeMV(mp4_Info *pInfo, mp4_MacroBlock *MBcurr, int32_t frGOB, int32_t y, int32_t x);
  586. extern mp4_Status mp4_DecodeMCBPC_I(mp4_Info* pInfo, int32_t *mbType, int32_t *mbPattern);
  587. #ifdef FLOAT_POINT_IDCT
  588. static void fIDCT_16s8u_C1R(int16_t *coeff, uint8_t *pR, int stepR)
  589. {
  590. __ALIGN16(Ipp32f, c, 64);
  591. int i, j;
  592. for (i = 0; i < 64; i ++)
  593. c[i] = coeff[i];
  594. ippiDCT8x8Inv_32f_C1I(c);
  595. for (i = 0; i < 8; i ++)
  596. for (j = 0; j < 8; j ++)
  597. pR[i*stepR+j] = c[i*8+j] < 0 ? 0 : c[i*8+j] > 255 ? 255 : (int16_t)(c[i*8+j] + 0.5f);
  598. }
  599. static void fIDCT_16s_C1I(int16_t *coeff)
  600. {
  601. __ALIGN16(Ipp32f, c, 64);
  602. int i;
  603. for (i = 0; i < 64; i ++)
  604. c[i] = coeff[i];
  605. ippiDCT8x8Inv_32f_C1I(c);
  606. for (i = 0; i < 64; i ++)
  607. coeff[i] = c[i] < 0 ? (int16_t)(c[i] - 0.5f) : (int16_t)(c[i] + 0.5f);
  608. }
  609. #define ippiDCT8x8Inv_16s8u_C1R(coeff, pR, stepR) \
  610. fIDCT_16s8u_C1R(coeff, pR, stepR)
  611. #define ippiDCT8x8Inv_2x2_16s_C1I(coeff) \
  612. fIDCT_16s_C1I(coeff)
  613. #define ippiDCT8x8Inv_4x4_16s_C1I(coeff) \
  614. fIDCT_16s_C1I(coeff)
  615. #define ippiDCT8x8Inv_16s_C1I(coeff) \
  616. fIDCT_16s_C1I(coeff)
  617. #endif
  618. #ifdef __cplusplus
  619. }
  620. #endif