idctpart.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921
  1. /****************************************************************************
  2. *
  3. * Module Title : idctpart.c
  4. *
  5. * Description : IDCT with multiple versions based on # of non 0 coeffs
  6. *
  7. ****************************************************************************/
  8. /****************************************************************************
  9. * Header Files
  10. ****************************************************************************/
  11. #include "dct.h"
  12. #include "string.h"
  13. /****************************************************************************
  14. * Macros
  15. ****************************************************************************/
  16. #define int32 int
  17. #define int16 short
  18. #define IdctAdjustBeforeShift 8
  19. #define xC1S7 64277
  20. #define xC2S6 60547
  21. #define xC3S5 54491
  22. #define xC4S4 46341
  23. #define xC5S3 36410
  24. #define xC6S2 25080
  25. #define xC7S1 12785
  26. /****************************************************************************
  27. * Module statics
  28. ****************************************************************************/
  29. static const UINT32 dequant_index[64] =
  30. {
  31. 0, 1, 8, 16, 9, 2, 3, 10,
  32. 17, 24, 32, 25, 18, 11, 4, 5,
  33. 12, 19, 26, 33, 40, 48, 41, 34,
  34. 27, 20, 13, 6, 7, 14, 21, 28,
  35. 35, 42, 49, 56, 57, 50, 43, 36,
  36. 29, 22, 15, 23, 30, 37, 44, 51,
  37. 58, 59, 52, 45, 38, 31, 39, 46,
  38. 53, 60, 61, 54, 47, 55, 62, 63
  39. };
  40. #if 0 // AWG CODE NO LONGER USED IN CODEBASE.
  41. /* Cos and Sin constant multipliers used during DCT and IDCT */
  42. const double C1S7 = (double)0.9807852804032;
  43. const double C2S6 = (double)0.9238795325113;
  44. const double C3S5 = (double)0.8314696123025;
  45. const double C4S4 = (double)0.7071067811865;
  46. const double C5S3 = (double)0.5555702330196;
  47. const double C6S2 = (double)0.3826834323651;
  48. const double C7S1 = (double)0.1950903220161;
  49. /****************************************************************************
  50. * Exports
  51. ****************************************************************************/
  52. // DCT lookup tables
  53. INT32 * C4S4_TablePtr;
  54. INT32 C4S4_Table[(COEFF_MAX * 4) + 1];
  55. INT32 * C6S2_TablePtr;
  56. INT32 C6S2_Table[(COEFF_MAX * 2) + 1];
  57. INT32 * C2S6_TablePtr;
  58. INT32 C2S6_Table[(COEFF_MAX * 2) + 1];
  59. INT32 * C1S7_TablePtr;
  60. INT32 C1S7_Table[(COEFF_MAX * 2) + 1];
  61. INT32 * C7S1_TablePtr;
  62. INT32 C7S1_Table[(COEFF_MAX * 2) + 1];
  63. INT32 * C3S5_TablePtr;
  64. INT32 C3S5_Table[(COEFF_MAX * 2) + 1];
  65. INT32 * C5S3_TablePtr;
  66. INT32 C5S3_Table[(COEFF_MAX * 2) + 1];
  67. /****************************************************************************
  68. *
  69. * ROUTINE : InitDctTables
  70. *
  71. * INPUTS : None.
  72. *
  73. * OUTPUTS : None.
  74. *
  75. * RETURNS : void
  76. *
  77. * FUNCTION : Initialises lookup tables used in IDCT.
  78. *
  79. * SPECIAL NOTES : NO LONGER USED IN CODEBASE.
  80. *
  81. ****************************************************************************/
  82. void InitDctTables ( void )
  83. {
  84. INT32 i;
  85. C4S4_TablePtr = &C4S4_Table[COEFF_MAX*2];
  86. for( i = -(2 * COEFF_MAX); i < (2 * COEFF_MAX); i++ )
  87. {
  88. if ( i < 0 )
  89. C4S4_TablePtr[i] = (INT32)((i * C4S4) - 0.5);
  90. else
  91. C4S4_TablePtr[i] = (INT32)((i * C4S4) + 0.5);
  92. }
  93. C6S2_TablePtr = &C6S2_Table[COEFF_MAX];
  94. for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
  95. {
  96. if ( i < 0 )
  97. C6S2_TablePtr[i] = (INT32)((i * C6S2) - 0.5);
  98. else
  99. C6S2_TablePtr[i] = (INT32)((i * C6S2) + 0.5);
  100. }
  101. C2S6_TablePtr = &C2S6_Table[COEFF_MAX];
  102. for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
  103. {
  104. if ( i < 0 )
  105. C2S6_TablePtr[i] = (INT32)((i * C2S6) - 0.5);
  106. else
  107. C2S6_TablePtr[i] = (INT32)((i * C2S6) + 0.5);
  108. }
  109. C1S7_TablePtr = &C1S7_Table[COEFF_MAX];
  110. for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
  111. {
  112. if ( i < 0 )
  113. C1S7_TablePtr[i] = (INT32)((i * C1S7) - 0.5);
  114. else
  115. C1S7_TablePtr[i] = (INT32)((i * C1S7) + 0.5);
  116. }
  117. C7S1_TablePtr = &C7S1_Table[COEFF_MAX];
  118. for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
  119. {
  120. if ( i < 0 )
  121. C7S1_TablePtr[i] = (INT32)((i * C7S1) - 0.5);
  122. else
  123. C7S1_TablePtr[i] = (INT32)((i * C7S1) + 0.5);
  124. }
  125. C3S5_TablePtr = &C3S5_Table[COEFF_MAX];
  126. for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
  127. {
  128. if ( i < 0 )
  129. C3S5_TablePtr[i] = (INT32)((i * C3S5) - 0.5);
  130. else
  131. C3S5_TablePtr[i] = (INT32)((i * C3S5) + 0.5);
  132. }
  133. C5S3_TablePtr = &C5S3_Table[COEFF_MAX];
  134. for( i = -COEFF_MAX ; i < COEFF_MAX; i++ )
  135. {
  136. if ( i < 0 )
  137. C5S3_TablePtr[i] = (INT32)((i * C5S3) - 0.5);
  138. else
  139. C5S3_TablePtr[i] = (INT32)((i * C5S3) + 0.5);
  140. }
  141. }
  142. #endif
  143. /****************************************************************************
  144. *
  145. * ROUTINE : dequant_slow
  146. *
  147. * INPUTS : INT16 *dequant_coeffs : Pointer to dequantization step sizes.
  148. * INT16 *quantized_list : Pointer to quantized DCT coeffs
  149. * (in zig-zag order).
  150. *
  151. * OUTPUTS : INT32 *DCT_block : Pointer to 8x8 de-quantized block
  152. * (in 2-D raster order).
  153. *
  154. * RETURNS : void
  155. *
  156. * FUNCTION : De-quantizes an 8x8 block of quantized DCT coeffs.
  157. *
  158. * SPECIAL NOTES : Uses dequant_index to invert zig-zag ordering.
  159. *
  160. ****************************************************************************/
  161. void dequant_slow ( INT16 *dequant_coeffs, INT16 *quantized_list, INT32 *DCT_block )
  162. {
  163. // Loop fully expanded for maximum speed
  164. DCT_block[dequant_index[0]] = quantized_list[0] * dequant_coeffs[0];
  165. DCT_block[dequant_index[1]] = quantized_list[1] * dequant_coeffs[1];
  166. DCT_block[dequant_index[2]] = quantized_list[2] * dequant_coeffs[2];
  167. DCT_block[dequant_index[3]] = quantized_list[3] * dequant_coeffs[3];
  168. DCT_block[dequant_index[4]] = quantized_list[4] * dequant_coeffs[4];
  169. DCT_block[dequant_index[5]] = quantized_list[5] * dequant_coeffs[5];
  170. DCT_block[dequant_index[6]] = quantized_list[6] * dequant_coeffs[6];
  171. DCT_block[dequant_index[7]] = quantized_list[7] * dequant_coeffs[7];
  172. DCT_block[dequant_index[8]] = quantized_list[8] * dequant_coeffs[8];
  173. DCT_block[dequant_index[9]] = quantized_list[9] * dequant_coeffs[9];
  174. DCT_block[dequant_index[10]] = quantized_list[10] * dequant_coeffs[10];
  175. DCT_block[dequant_index[11]] = quantized_list[11] * dequant_coeffs[11];
  176. DCT_block[dequant_index[12]] = quantized_list[12] * dequant_coeffs[12];
  177. DCT_block[dequant_index[13]] = quantized_list[13] * dequant_coeffs[13];
  178. DCT_block[dequant_index[14]] = quantized_list[14] * dequant_coeffs[14];
  179. DCT_block[dequant_index[15]] = quantized_list[15] * dequant_coeffs[15];
  180. DCT_block[dequant_index[16]] = quantized_list[16] * dequant_coeffs[16];
  181. DCT_block[dequant_index[17]] = quantized_list[17] * dequant_coeffs[17];
  182. DCT_block[dequant_index[18]] = quantized_list[18] * dequant_coeffs[18];
  183. DCT_block[dequant_index[19]] = quantized_list[19] * dequant_coeffs[19];
  184. DCT_block[dequant_index[20]] = quantized_list[20] * dequant_coeffs[20];
  185. DCT_block[dequant_index[21]] = quantized_list[21] * dequant_coeffs[21];
  186. DCT_block[dequant_index[22]] = quantized_list[22] * dequant_coeffs[22];
  187. DCT_block[dequant_index[23]] = quantized_list[23] * dequant_coeffs[23];
  188. DCT_block[dequant_index[24]] = quantized_list[24] * dequant_coeffs[24];
  189. DCT_block[dequant_index[25]] = quantized_list[25] * dequant_coeffs[25];
  190. DCT_block[dequant_index[26]] = quantized_list[26] * dequant_coeffs[26];
  191. DCT_block[dequant_index[27]] = quantized_list[27] * dequant_coeffs[27];
  192. DCT_block[dequant_index[28]] = quantized_list[28] * dequant_coeffs[28];
  193. DCT_block[dequant_index[29]] = quantized_list[29] * dequant_coeffs[29];
  194. DCT_block[dequant_index[30]] = quantized_list[30] * dequant_coeffs[30];
  195. DCT_block[dequant_index[31]] = quantized_list[31] * dequant_coeffs[31];
  196. DCT_block[dequant_index[32]] = quantized_list[32] * dequant_coeffs[32];
  197. DCT_block[dequant_index[33]] = quantized_list[33] * dequant_coeffs[33];
  198. DCT_block[dequant_index[34]] = quantized_list[34] * dequant_coeffs[34];
  199. DCT_block[dequant_index[35]] = quantized_list[35] * dequant_coeffs[35];
  200. DCT_block[dequant_index[36]] = quantized_list[36] * dequant_coeffs[36];
  201. DCT_block[dequant_index[37]] = quantized_list[37] * dequant_coeffs[37];
  202. DCT_block[dequant_index[38]] = quantized_list[38] * dequant_coeffs[38];
  203. DCT_block[dequant_index[39]] = quantized_list[39] * dequant_coeffs[39];
  204. DCT_block[dequant_index[40]] = quantized_list[40] * dequant_coeffs[40];
  205. DCT_block[dequant_index[41]] = quantized_list[41] * dequant_coeffs[41];
  206. DCT_block[dequant_index[42]] = quantized_list[42] * dequant_coeffs[42];
  207. DCT_block[dequant_index[43]] = quantized_list[43] * dequant_coeffs[43];
  208. DCT_block[dequant_index[44]] = quantized_list[44] * dequant_coeffs[44];
  209. DCT_block[dequant_index[45]] = quantized_list[45] * dequant_coeffs[45];
  210. DCT_block[dequant_index[46]] = quantized_list[46] * dequant_coeffs[46];
  211. DCT_block[dequant_index[47]] = quantized_list[47] * dequant_coeffs[47];
  212. DCT_block[dequant_index[48]] = quantized_list[48] * dequant_coeffs[48];
  213. DCT_block[dequant_index[49]] = quantized_list[49] * dequant_coeffs[49];
  214. DCT_block[dequant_index[50]] = quantized_list[50] * dequant_coeffs[50];
  215. DCT_block[dequant_index[51]] = quantized_list[51] * dequant_coeffs[51];
  216. DCT_block[dequant_index[52]] = quantized_list[52] * dequant_coeffs[52];
  217. DCT_block[dequant_index[53]] = quantized_list[53] * dequant_coeffs[53];
  218. DCT_block[dequant_index[54]] = quantized_list[54] * dequant_coeffs[54];
  219. DCT_block[dequant_index[55]] = quantized_list[55] * dequant_coeffs[55];
  220. DCT_block[dequant_index[56]] = quantized_list[56] * dequant_coeffs[56];
  221. DCT_block[dequant_index[57]] = quantized_list[57] * dequant_coeffs[57];
  222. DCT_block[dequant_index[58]] = quantized_list[58] * dequant_coeffs[58];
  223. DCT_block[dequant_index[59]] = quantized_list[59] * dequant_coeffs[59];
  224. DCT_block[dequant_index[60]] = quantized_list[60] * dequant_coeffs[60];
  225. DCT_block[dequant_index[61]] = quantized_list[61] * dequant_coeffs[61];
  226. DCT_block[dequant_index[62]] = quantized_list[62] * dequant_coeffs[62];
  227. DCT_block[dequant_index[63]] = quantized_list[63] * dequant_coeffs[63];
  228. }
  229. /****************************************************************************
  230. *
  231. * ROUTINE : IDctSlow
  232. *
  233. * INPUTS : int16 *InputData : Pointer to 8x8 quantized DCT coefficients.
  234. * int16 *QuantMatrix : Pointer to 8x8 quantization matrix.
  235. *
  236. * OUTPUTS : int16 *OutputData : Pointer to 8x8 block to hold output.
  237. *
  238. * RETURNS : void
  239. *
  240. * FUNCTION : Inverse quantizes and inverse DCT's input 8x8 block
  241. * to reproduce prediction error.
  242. *
  243. * SPECIAL NOTES : None.
  244. *
  245. ****************************************************************************/
  246. void IDctSlow ( int16 *InputData, int16 *QuantMatrix, int16 *OutputData )
  247. {
  248. int loop;
  249. int32 t1, t2;
  250. int32 IntermediateData[64];
  251. int32 _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
  252. int32 _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
  253. int32 *ip = IntermediateData;
  254. int16 *op = OutputData;
  255. // dequantize the input
  256. dequant_slow ( QuantMatrix, InputData, IntermediateData );
  257. // Inverse DCT on the rows now
  258. for ( loop=0; loop<8; loop++ )
  259. {
  260. // Check for non-zero values
  261. if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] )
  262. {
  263. t1 = (int32)(xC1S7 * ip[1]);
  264. t2 = (int32)(xC7S1 * ip[7]);
  265. t1 >>= 16;
  266. t2 >>= 16;
  267. _A = t1 + t2;
  268. t1 = (int32)(xC7S1 * ip[1]);
  269. t2 = (int32)(xC1S7 * ip[7]);
  270. t1 >>= 16;
  271. t2 >>= 16;
  272. _B = t1 - t2;
  273. t1 = (int32)(xC3S5 * ip[3]);
  274. t2 = (int32)(xC5S3 * ip[5]);
  275. t1 >>= 16;
  276. t2 >>= 16;
  277. _C = t1 + t2;
  278. t1 = (int32)(xC3S5 * ip[5]);
  279. t2 = (int32)(xC5S3 * ip[3]);
  280. t1 >>= 16;
  281. t2 >>= 16;
  282. _D = t1 - t2;
  283. t1 = (int32)(xC4S4 * (_A - _C));
  284. t1 >>= 16;
  285. _Ad = t1;
  286. t1 = (int32)(xC4S4 * (_B - _D));
  287. t1 >>= 16;
  288. _Bd = t1;
  289. _Cd = _A + _C;
  290. _Dd = _B + _D;
  291. t1 = (int32)(xC4S4 * (ip[0] + ip[4]));
  292. t1 >>= 16;
  293. _E = t1;
  294. t1 = (int32)(xC4S4 * (ip[0] - ip[4]));
  295. t1 >>= 16;
  296. _F = t1;
  297. t1 = (int32)(xC2S6 * ip[2]);
  298. t2 = (int32)(xC6S2 * ip[6]);
  299. t1 >>= 16;
  300. t2 >>= 16;
  301. _G = t1 + t2;
  302. t1 = (int32)(xC6S2 * ip[2]);
  303. t2 = (int32)(xC2S6 * ip[6]);
  304. t1 >>= 16;
  305. t2 >>= 16;
  306. _H = t1 - t2;
  307. _Ed = _E - _G;
  308. _Gd = _E + _G;
  309. _Add = _F + _Ad;
  310. _Bdd = _Bd - _H;
  311. _Fd = _F - _Ad;
  312. _Hd = _Bd + _H;
  313. // Final sequence of operations over-write original inputs.
  314. ip[0] = (int16)((_Gd + _Cd ) >> 0);
  315. ip[7] = (int16)((_Gd - _Cd ) >> 0);
  316. ip[1] = (int16)((_Add + _Hd ) >> 0);
  317. ip[2] = (int16)((_Add - _Hd ) >> 0);
  318. ip[3] = (int16)((_Ed + _Dd ) >> 0);
  319. ip[4] = (int16)((_Ed - _Dd ) >> 0);
  320. ip[5] = (int16)((_Fd + _Bdd ) >> 0);
  321. ip[6] = (int16)((_Fd - _Bdd ) >> 0);
  322. }
  323. ip += 8; /* next row */
  324. }
  325. ip = IntermediateData;
  326. for ( loop=0; loop<8; loop++ )
  327. {
  328. // Check for non-zero values (bitwise | faster than logical ||)
  329. if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
  330. ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] )
  331. {
  332. t1 = (int32)(xC1S7 * ip[1*8]);
  333. t2 = (int32)(xC7S1 * ip[7*8]);
  334. t1 >>= 16;
  335. t2 >>= 16;
  336. _A = t1 + t2;
  337. t1 = (int32)(xC7S1 * ip[1*8]);
  338. t2 = (int32)(xC1S7 * ip[7*8]);
  339. t1 >>= 16;
  340. t2 >>= 16;
  341. _B = t1 - t2;
  342. t1 = (int32)(xC3S5 * ip[3*8]);
  343. t2 = (int32)(xC5S3 * ip[5*8]);
  344. t1 >>= 16;
  345. t2 >>= 16;
  346. _C = t1 + t2;
  347. t1 = (int32)(xC3S5 * ip[5*8]);
  348. t2 = (int32)(xC5S3 * ip[3*8]);
  349. t1 >>= 16;
  350. t2 >>= 16;
  351. _D = t1 - t2;
  352. t1 = (int32)(xC4S4 * (_A - _C));
  353. t1 >>= 16;
  354. _Ad = t1;
  355. t1 = (int32)(xC4S4 * (_B - _D));
  356. t1 >>= 16;
  357. _Bd = t1;
  358. _Cd = _A + _C;
  359. _Dd = _B + _D;
  360. t1 = (int32)(xC4S4 * (ip[0*8] + ip[4*8]));
  361. t1 >>= 16;
  362. _E = t1;
  363. t1 = (int32)(xC4S4 * (ip[0*8] - ip[4*8]));
  364. t1 >>= 16;
  365. _F = t1;
  366. t1 = (int32)(xC2S6 * ip[2*8]);
  367. t2 = (int32)(xC6S2 * ip[6*8]);
  368. t1 >>= 16;
  369. t2 >>= 16;
  370. _G = t1 + t2;
  371. t1 = (int32)(xC6S2 * ip[2*8]);
  372. t2 = (int32)(xC2S6 * ip[6*8]);
  373. t1 >>= 16;
  374. t2 >>= 16;
  375. _H = t1 - t2;
  376. _Ed = _E - _G;
  377. _Gd = _E + _G;
  378. _Add = _F + _Ad;
  379. _Bdd = _Bd - _H;
  380. _Fd = _F - _Ad;
  381. _Hd = _Bd + _H;
  382. _Gd += IdctAdjustBeforeShift;
  383. _Add += IdctAdjustBeforeShift;
  384. _Ed += IdctAdjustBeforeShift;
  385. _Fd += IdctAdjustBeforeShift;
  386. // Final sequence of operations over-write original inputs.
  387. op[0*8] = (int16)((_Gd + _Cd ) >> 4);
  388. op[7*8] = (int16)((_Gd - _Cd ) >> 4);
  389. op[1*8] = (int16)((_Add + _Hd ) >> 4);
  390. op[2*8] = (int16)((_Add - _Hd ) >> 4);
  391. op[3*8] = (int16)((_Ed + _Dd ) >> 4);
  392. op[4*8] = (int16)((_Ed - _Dd ) >> 4);
  393. op[5*8] = (int16)((_Fd + _Bdd ) >> 4);
  394. op[6*8] = (int16)((_Fd - _Bdd ) >> 4);
  395. }
  396. else
  397. {
  398. op[0*8] = 0;
  399. op[7*8] = 0;
  400. op[1*8] = 0;
  401. op[2*8] = 0;
  402. op[3*8] = 0;
  403. op[4*8] = 0;
  404. op[5*8] = 0;
  405. op[6*8] = 0;
  406. }
  407. ip++; // next column
  408. op++;
  409. }
  410. }
  411. /****************************************************************************
  412. *
  413. * ROUTINE : dequant_slow10
  414. *
  415. * INPUTS : INT16 *dequant_coeffs : Pointer to dequantization step sizes.
  416. * INT16 *quantized_list : Pointer to quantized DCT coeffs
  417. * (in zig-zag order).
  418. *
  419. * OUTPUTS : INT32 *DCT_block : Pointer to 8x8 de-quantized block
  420. * (in 2-D raster order).
  421. *
  422. * RETURNS : void
  423. *
  424. * FUNCTION : De-quantizes an 8x8 block of quantized DCT coeffs that
  425. * only has non-zero coefficients in the first 10, i.e.
  426. * only DC & AC1-9 are non-zero, AC10-63 __MUST_BE_ zero.
  427. *
  428. * SPECIAL NOTES : Uses dequant_index to invert zig-zag ordering.
  429. *
  430. ****************************************************************************/
  431. void dequant_slow10 ( INT16 *dequant_coeffs, INT16 *quantized_list, INT32 *DCT_block )
  432. {
  433. memset(DCT_block,0, 128);
  434. // Loop fully expanded for maximum speed
  435. DCT_block[dequant_index[0]] = quantized_list[0] * dequant_coeffs[0];
  436. DCT_block[dequant_index[1]] = quantized_list[1] * dequant_coeffs[1];
  437. DCT_block[dequant_index[2]] = quantized_list[2] * dequant_coeffs[2];
  438. DCT_block[dequant_index[3]] = quantized_list[3] * dequant_coeffs[3];
  439. DCT_block[dequant_index[4]] = quantized_list[4] * dequant_coeffs[4];
  440. DCT_block[dequant_index[5]] = quantized_list[5] * dequant_coeffs[5];
  441. DCT_block[dequant_index[6]] = quantized_list[6] * dequant_coeffs[6];
  442. DCT_block[dequant_index[7]] = quantized_list[7] * dequant_coeffs[7];
  443. DCT_block[dequant_index[8]] = quantized_list[8] * dequant_coeffs[8];
  444. DCT_block[dequant_index[9]] = quantized_list[9] * dequant_coeffs[9];
  445. DCT_block[dequant_index[10]] = quantized_list[10] * dequant_coeffs[10];
  446. }
  447. /****************************************************************************
  448. *
  449. * ROUTINE : IDctSlow10
  450. *
  451. * INPUTS : int16 *InputData : Pointer to 8x8 quantized DCT coefficients.
  452. * int16 *QuantMatrix : Pointer to 8x8 quantization matrix.
  453. *
  454. * OUTPUTS : int16 *OutputData : Pointer to 8x8 block to hold output.
  455. *
  456. * RETURNS : void
  457. *
  458. * FUNCTION : Inverse quantizes and inverse DCT's input 8x8 block
  459. * with non-zero coeffs only in DC & the first 9 AC coeffs.
  460. * i.e. non-zeros ONLY in the following 10 positions:
  461. *
  462. * x x x x 0 0 0 0
  463. * x x x 0 0 0 0 0
  464. * x x 0 0 0 0 0 0
  465. * x 0 0 0 0 0 0 0
  466. * 0 0 0 0 0 0 0 0
  467. * 0 0 0 0 0 0 0 0
  468. * 0 0 0 0 0 0 0 0
  469. * 0 0 0 0 0 0 0 0
  470. *
  471. * SPECIAL NOTES : Output data is in raster, not zig-zag, order.
  472. *
  473. ****************************************************************************/
  474. void IDct10 ( int16 *InputData, int16 *QuantMatrix, int16 *OutputData )
  475. {
  476. int loop;
  477. int32 t1, t2;
  478. int32 IntermediateData[64];
  479. int32 _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
  480. int32 _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
  481. int32 *ip = IntermediateData;
  482. int16 *op = OutputData;
  483. // dequantize the input
  484. dequant_slow10 ( QuantMatrix, InputData, IntermediateData );
  485. // Inverse DCT on the rows now
  486. for ( loop=0; loop<4; loop++ )
  487. {
  488. // Check for non-zero values
  489. if ( ip[0] | ip[1] | ip[2] | ip[3] )
  490. {
  491. t1 = (int32)(xC1S7 * ip[1]);
  492. t1 >>= 16;
  493. _A = t1;
  494. t1 = (int32)(xC7S1 * ip[1]);
  495. t1 >>= 16;
  496. _B = t1 ;
  497. t1 = (int32)(xC3S5 * ip[3]);
  498. t1 >>= 16;
  499. _C = t1;
  500. t2 = (int32)(xC5S3 * ip[3]);
  501. t2 >>= 16;
  502. _D = -t2;
  503. t1 = (int32)(xC4S4 * (_A - _C));
  504. t1 >>= 16;
  505. _Ad = t1;
  506. t1 = (int32)(xC4S4 * (_B - _D));
  507. t1 >>= 16;
  508. _Bd = t1;
  509. _Cd = _A + _C;
  510. _Dd = _B + _D;
  511. t1 = (int32)(xC4S4 * ip[0] );
  512. t1 >>= 16;
  513. _E = t1;
  514. _F = t1;
  515. t1 = (int32)(xC2S6 * ip[2]);
  516. t1 >>= 16;
  517. _G = t1;
  518. t1 = (int32)(xC6S2 * ip[2]);
  519. t1 >>= 16;
  520. _H = t1 ;
  521. _Ed = _E - _G;
  522. _Gd = _E + _G;
  523. _Add = _F + _Ad;
  524. _Bdd = _Bd - _H;
  525. _Fd = _F - _Ad;
  526. _Hd = _Bd + _H;
  527. // Final sequence of operations over-write original inputs.
  528. ip[0] = (int16)((_Gd + _Cd ) >> 0);
  529. ip[7] = (int16)((_Gd - _Cd ) >> 0);
  530. ip[1] = (int16)((_Add + _Hd ) >> 0);
  531. ip[2] = (int16)((_Add - _Hd ) >> 0);
  532. ip[3] = (int16)((_Ed + _Dd ) >> 0);
  533. ip[4] = (int16)((_Ed - _Dd ) >> 0);
  534. ip[5] = (int16)((_Fd + _Bdd ) >> 0);
  535. ip[6] = (int16)((_Fd - _Bdd ) >> 0);
  536. }
  537. ip += 8; /* next row */
  538. }
  539. ip = IntermediateData;
  540. for ( loop=0; loop<8; loop++ )
  541. {
  542. // Check for non-zero values (bitwise or faster than ||)
  543. if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] )
  544. {
  545. t1 = (int32)(xC1S7 * ip[1*8]);
  546. t1 >>= 16;
  547. _A = t1 ;
  548. t1 = (int32)(xC7S1 * ip[1*8]);
  549. t1 >>= 16;
  550. _B = t1 ;
  551. t1 = (int32)(xC3S5 * ip[3*8]);
  552. t1 >>= 16;
  553. _C = t1 ;
  554. t2 = (int32)(xC5S3 * ip[3*8]);
  555. t2 >>= 16;
  556. _D = - t2;
  557. t1 = (int32)(xC4S4 * (_A - _C));
  558. t1 >>= 16;
  559. _Ad = t1;
  560. t1 = (int32)(xC4S4 * (_B - _D));
  561. t1 >>= 16;
  562. _Bd = t1;
  563. _Cd = _A + _C;
  564. _Dd = _B + _D;
  565. t1 = (int32)(xC4S4 * ip[0*8]);
  566. t1 >>= 16;
  567. _E = t1;
  568. _F = t1;
  569. t1 = (int32)(xC2S6 * ip[2*8]);
  570. t1 >>= 16;
  571. _G = t1;
  572. t1 = (int32)(xC6S2 * ip[2*8]);
  573. t1 >>= 16;
  574. _H = t1;
  575. _Ed = _E - _G;
  576. _Gd = _E + _G;
  577. _Add = _F + _Ad;
  578. _Bdd = _Bd - _H;
  579. _Fd = _F - _Ad;
  580. _Hd = _Bd + _H;
  581. _Gd += IdctAdjustBeforeShift;
  582. _Add += IdctAdjustBeforeShift;
  583. _Ed += IdctAdjustBeforeShift;
  584. _Fd += IdctAdjustBeforeShift;
  585. // Final sequence of operations over-write original inputs.
  586. op[0*8] = (int16)((_Gd + _Cd ) >> 4);
  587. op[7*8] = (int16)((_Gd - _Cd ) >> 4);
  588. op[1*8] = (int16)((_Add + _Hd ) >> 4);
  589. op[2*8] = (int16)((_Add - _Hd ) >> 4);
  590. op[3*8] = (int16)((_Ed + _Dd ) >> 4);
  591. op[4*8] = (int16)((_Ed - _Dd ) >> 4);
  592. op[5*8] = (int16)((_Fd + _Bdd ) >> 4);
  593. op[6*8] = (int16)((_Fd - _Bdd ) >> 4);
  594. }
  595. else
  596. {
  597. op[0*8] = 0;
  598. op[7*8] = 0;
  599. op[1*8] = 0;
  600. op[2*8] = 0;
  601. op[3*8] = 0;
  602. op[4*8] = 0;
  603. op[5*8] = 0;
  604. op[6*8] = 0;
  605. }
  606. ip++; // next column
  607. op++;
  608. }
  609. }
  610. /****************************************************************************
  611. *
  612. * ROUTINE : IDct1
  613. *
  614. * INPUTS : int16 *InputData : Pointer to 8x8 quantized DCT coefficients.
  615. * int16 *QuantMatrix : Pointer to 8x8 quantization matrix.
  616. *
  617. * OUTPUTS : int16 *OutputData : Pointer to 8x8 block to hold output.
  618. *
  619. * RETURNS : void
  620. *
  621. * FUNCTION : Inverse DCT's input 8x8 block with only one non-zero
  622. * coeff in the DC position:
  623. *
  624. * x 0 0 0 0 0 0 0
  625. * 0 0 0 0 0 0 0 0
  626. * 0 0 0 0 0 0 0 0
  627. * 0 0 0 0 0 0 0 0
  628. * 0 0 0 0 0 0 0 0
  629. * 0 0 0 0 0 0 0 0
  630. * 0 0 0 0 0 0 0 0
  631. * 0 0 0 0 0 0 0 0
  632. *
  633. * SPECIAL NOTES : Output data is in raster, not zig-zag, order.
  634. *
  635. ****************************************************************************/
  636. void IDct1 ( int16 *InputData, int16 *QuantMatrix, INT16 *OutputData )
  637. {
  638. INT32 loop;
  639. INT16 OutD;
  640. OutD = (INT16)((INT32)(InputData[0]*QuantMatrix[0]+15)>>5);
  641. for ( loop=0; loop<64; loop++ )
  642. OutputData[loop] = OutD;
  643. }
  644. #if 0
  645. /****************************************************************************
  646. *
  647. * ROUTINE : IDct4
  648. *
  649. * INPUTS : int16 *InputData : Pointer to 8x8 DCT coefficients.
  650. *
  651. * OUTPUTS : int16 *OutputData : Pointer to 8x8 block to hold output.
  652. *
  653. * RETURNS : void
  654. *
  655. * FUNCTION : Inverse DCT's input 8x8 block with at most four non-zero
  656. * coeffs in the following positions:
  657. *
  658. * x x 0 0 0 0 0 0
  659. * x x 0 0 0 0 0 0
  660. * 0 0 0 0 0 0 0 0
  661. * 0 0 0 0 0 0 0 0
  662. * 0 0 0 0 0 0 0 0
  663. * 0 0 0 0 0 0 0 0
  664. * 0 0 0 0 0 0 0 0
  665. * 0 0 0 0 0 0 0 0
  666. *
  667. * SPECIAL NOTES : CURRENTLY NOT USED IN CODEBASE.
  668. *
  669. ****************************************************************************/
  670. void IDct4 ( int16 *InputData, int16 *OutputData )
  671. {
  672. int32 t1;
  673. int loop;
  674. int32 _Add, _Fd;
  675. int32 _A, _B, _Ad, _Bd, _Cd, _Dd, _E;
  676. int16 *ip = InputData;
  677. int16 *op = OutputData;
  678. // Unzigzag the coefficents
  679. ip[8] = ip[2];
  680. ip[9] = ip[4];
  681. ip[2] = 0;
  682. ip[5] = 0;
  683. // Inverse DCT on the rows now
  684. for ( loop = 0; loop < 2; loop++)
  685. {
  686. // Check for non-zero values
  687. if ( ip[0] | ip[1] )
  688. {
  689. t1 = (int32)(xC1S7 * ip[1]);
  690. t1 >>= 16;
  691. _A = t1;
  692. t1 = (int32)(xC7S1 * ip[1]);
  693. t1 >>= 16;
  694. _B = t1 ;
  695. t1 = (int32)(xC4S4 * _A );
  696. t1 >>= 16;
  697. _Ad = t1;
  698. t1 = (int32)(xC4S4 * _B );
  699. t1 >>= 16;
  700. _Bd = t1;
  701. _Cd = _A ;
  702. _Dd = _B ;
  703. t1 = (int32)(xC4S4 * ip[0] );
  704. t1 >>= 16;
  705. _E = t1;
  706. _Add = _E + _Ad;
  707. _Fd = _E - _Ad;
  708. // Final sequence of operations over-write original inputs.
  709. ip[0] = (int16)((_E + _Cd ) >> 0);
  710. ip[7] = (int16)((_E - _Cd ) >> 0);
  711. ip[1] = (int16)((_Add + _Bd ) >> 0);
  712. ip[2] = (int16)((_Add - _Bd ) >> 0);
  713. ip[3] = (int16)((_E + _Dd ) >> 0);
  714. ip[4] = (int16)((_E - _Dd ) >> 0);
  715. ip[5] = (int16)((_Fd + _Bd ) >> 0);
  716. ip[6] = (int16)((_Fd - _Bd ) >> 0);
  717. }
  718. ip += 8; /* next row */
  719. }
  720. ip = InputData;
  721. for ( loop=0; loop<8; loop++ )
  722. {
  723. // Check for non-zero values (bitwise or faster than ||)
  724. if ( ip[0 * 8] | ip[1 * 8] )
  725. {
  726. t1 = (int32)(xC1S7 * ip[1*8]);
  727. t1 >>= 16;
  728. _A = t1 ;
  729. t1 = (int32)(xC7S1 * ip[1*8]);
  730. t1 >>= 16;
  731. _B = t1 ;
  732. t1 = (int32)(xC4S4 * _A );
  733. t1 >>= 16;
  734. _Ad = t1;
  735. t1 = (int32)(xC4S4 * _B );
  736. t1 >>= 16;
  737. _Bd = t1;
  738. _Cd = _A ;
  739. _Dd = _B ;
  740. t1 = (int32)(xC4S4 * ip[0*8]);
  741. t1 >>= 16;
  742. _E = t1;
  743. _Add = _E + _Ad;
  744. _Fd = _E - _Ad;
  745. _Add += IdctAdjustBeforeShift;
  746. _E += IdctAdjustBeforeShift;
  747. _Fd += IdctAdjustBeforeShift;
  748. // Final sequence of operations over-write original inputs.
  749. op[0*8] = (int16)((_E + _Cd ) >> 4);
  750. op[7*8] = (int16)((_E - _Cd ) >> 4);
  751. op[1*8] = (int16)((_Add + _Bd ) >> 4);
  752. op[2*8] = (int16)((_Add - _Bd ) >> 4);
  753. op[3*8] = (int16)((_E + _Dd ) >> 4);
  754. op[4*8] = (int16)((_E - _Dd ) >> 4);
  755. op[5*8] = (int16)((_Fd + _Bd ) >> 4);
  756. op[6*8] = (int16)((_Fd - _Bd ) >> 4);
  757. }
  758. else
  759. {
  760. op[0*8] = 0;
  761. op[7*8] = 0;
  762. op[1*8] = 0;
  763. op[2*8] = 0;
  764. op[3*8] = 0;
  765. op[4*8] = 0;
  766. op[5*8] = 0;
  767. op[6*8] = 0;
  768. }
  769. ip++; // next column
  770. op++;
  771. }
  772. }
  773. #endif