fdct.c 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. /****************************************************************************
  2. *
  3. * Module Title : fdct.c
  4. *
  5. * Description : Fast 8x8 DCT C-Implementation.
  6. *
  7. ****************************************************************************/
  8. /****************************************************************************
  9. * Header Files
  10. ****************************************************************************/
  11. #include "dct.h"
  12. /****************************************************************************
  13. * Macros
  14. ****************************************************************************/
  15. #define SIGNBITDUPPED(X) ( (signed )((X & 0x80000000)) >> 31 )
  16. #define DOROUND(X) X = ( (SIGNBITDUPPED(X) & (0xffff)) + X );
  17. /****************************************************************************
  18. * Module statics
  19. ****************************************************************************/
  20. static INT32 xC1S7 = 64277;
  21. static INT32 xC2S6 = 60547;
  22. static INT32 xC3S5 = 54491;
  23. static INT32 xC4S4 = 46341;
  24. static INT32 xC5S3 = 36410;
  25. static INT32 xC6S2 = 25080;
  26. static INT32 xC7S1 = 12785;
  27. /****************************************************************************
  28. *
  29. * ROUTINE : fdct_short_C_orig
  30. *
  31. * INPUTS : INT16 *InputData : 16-bit input data.
  32. *
  33. * OUTPUTS : INT16 *OutputData : 16-bit transform coefficients.
  34. *
  35. * RETURNS : void
  36. *
  37. * FUNCTION : Performs an 8x8 2-D fast DCT.
  38. *
  39. * The algorithm used is derived from the flowgraph for
  40. * the Vetterli and Ligtenberg fast 1-D dct given in the
  41. * JPEG reference book by Pennebaker and Mitchell.
  42. *
  43. * SPECIAL NOTES : None.
  44. *
  45. ****************************************************************************/
  46. void fdct_short_C_orig ( INT16 *InputData, INT16 *OutputData )
  47. {
  48. int loop;
  49. INT32 is07, is12, is34, is56;
  50. INT32 is0734, is1256;
  51. INT32 id07, id12, id34, id56;
  52. INT32 irot_input_x, irot_input_y;
  53. INT32 icommon_product1; // Re-used product (c4s4 * (s12 - s56)).
  54. INT32 icommon_product2; // Re-used product (c4s4 * (d12 + d56)).
  55. INT32 temp1, temp2; // intermediate variable for computation
  56. INT32 InterData[64];
  57. INT32 *ip = InterData;
  58. INT16 *op = OutputData;
  59. for ( loop=0; loop<8; loop++ )
  60. {
  61. // Pre calculate some common sums and differences.
  62. is07 = InputData[0] + InputData[7];
  63. is12 = InputData[1] + InputData[2];
  64. is34 = InputData[3] + InputData[4];
  65. is56 = InputData[5] + InputData[6];
  66. id07 = InputData[0] - InputData[7];
  67. id12 = InputData[1] - InputData[2];
  68. id34 = InputData[3] - InputData[4];
  69. id56 = InputData[5] - InputData[6];
  70. is0734 = is07 + is34;
  71. is1256 = is12 + is56;
  72. // Pre-Calculate some common product terms.
  73. icommon_product1 = xC4S4*(is12 - is56);
  74. DOROUND ( icommon_product1 )
  75. icommon_product1 >>= 16;
  76. icommon_product2 = xC4S4*(id12 + id56);
  77. DOROUND ( icommon_product2 )
  78. icommon_product2 >>= 16;
  79. ip[0] = (xC4S4*(is0734 + is1256));
  80. DOROUND ( ip[0] );
  81. ip[0] >>= 16;
  82. ip[4] = (xC4S4*(is0734 - is1256));
  83. DOROUND ( ip[4] );
  84. ip[4] >>= 16;
  85. // Define inputs to rotation for outputs 2 and 6
  86. irot_input_x = id12 - id56;
  87. irot_input_y = is07 - is34;
  88. // Apply rotation for outputs 2 and 6.
  89. temp1 = xC6S2*irot_input_x;
  90. DOROUND ( temp1 );
  91. temp1 >>= 16;
  92. temp2 = xC2S6*irot_input_y;
  93. DOROUND ( temp2 );
  94. temp2 >>= 16;
  95. ip[2] = temp1 + temp2;
  96. temp1 = xC6S2*irot_input_y;
  97. DOROUND ( temp1 );
  98. temp1 >>= 16;
  99. temp2 = xC2S6*irot_input_x;
  100. DOROUND ( temp2 );
  101. temp2 >>= 16;
  102. ip[6] = temp1 -temp2;
  103. // Define inputs to rotation for outputs 1 and 7
  104. irot_input_x = icommon_product1 + id07;
  105. irot_input_y = -( id34 + icommon_product2 );
  106. // Apply rotation for outputs 1 and 7.
  107. temp1 = xC1S7*irot_input_x;
  108. DOROUND ( temp1 );
  109. temp1 >>= 16;
  110. temp2 = xC7S1*irot_input_y;
  111. DOROUND ( temp2 );
  112. temp2 >>= 16;
  113. ip[1] = temp1 - temp2;
  114. temp1 = xC7S1*irot_input_x;
  115. DOROUND ( temp1 );
  116. temp1 >>= 16;
  117. temp2 = xC1S7*irot_input_y;
  118. DOROUND ( temp2 );
  119. temp2 >>= 16;
  120. ip[7] = temp1 + temp2;
  121. // Define inputs to rotation for outputs 3 and 5
  122. irot_input_x = id07 - icommon_product1;
  123. irot_input_y = id34 - icommon_product2;
  124. // Apply rotation for outputs 3 and 5.
  125. temp1 = xC3S5 * irot_input_x;
  126. DOROUND ( temp1 );
  127. temp1 >>= 16;
  128. temp2 = xC5S3*irot_input_y;
  129. DOROUND ( temp2 );
  130. temp2 >>= 16;
  131. ip[3] = temp1 - temp2;
  132. temp1 = xC5S3*irot_input_x;
  133. DOROUND ( temp1 );
  134. temp1 >>= 16;
  135. temp2 = xC3S5*irot_input_y;
  136. DOROUND ( temp2 );
  137. temp2 >>= 16;
  138. ip[5] = temp1 + temp2;
  139. // Increment data pointer for next row.
  140. InputData += 8;
  141. ip += 8; // advance pointer to next row
  142. }
  143. // Performed DCT on rows, now transform the columns
  144. ip = InterData;
  145. for ( loop=0; loop<8; loop++ )
  146. {
  147. // Pre calculate some common sums and differences.
  148. is07 = ip[0 * 8] + ip[7 * 8];
  149. is12 = ip[1 * 8] + ip[2 * 8];
  150. is34 = ip[3 * 8] + ip[4 * 8];
  151. is56 = ip[5 * 8] + ip[6 * 8];
  152. id07 = ip[0 * 8] - ip[7 * 8];
  153. id12 = ip[1 * 8] - ip[2 * 8];
  154. id34 = ip[3 * 8] - ip[4 * 8];
  155. id56 = ip[5 * 8] - ip[6 * 8];
  156. is0734 = is07 + is34;
  157. is1256 = is12 + is56;
  158. // Pre-Calculate some common product terms.
  159. icommon_product1 = xC4S4*(is12 - is56);
  160. icommon_product2 = xC4S4*(id12 + id56);
  161. DOROUND ( icommon_product1 )
  162. DOROUND ( icommon_product2 )
  163. icommon_product1 >>= 16;
  164. icommon_product2 >>= 16;
  165. temp1 = xC4S4*(is0734 + is1256);
  166. temp2 = xC4S4*(is0734 - is1256);
  167. DOROUND ( temp1 );
  168. DOROUND ( temp2 );
  169. temp1 >>= 16;
  170. temp2 >>= 16;
  171. op[0*8] = (INT16)temp1;
  172. op[4*8] = (INT16)temp2;
  173. // Define inputs to rotation for outputs 2 and 6
  174. irot_input_x = id12 - id56;
  175. irot_input_y = is07 - is34;
  176. // Apply rotation for outputs 2 and 6.
  177. temp1 = xC6S2*irot_input_x;
  178. DOROUND ( temp1 );
  179. temp1 >>= 16;
  180. temp2 = xC2S6*irot_input_y;
  181. DOROUND ( temp2 );
  182. temp2 >>= 16;
  183. op[2*8] = (INT16)(temp1 + temp2);
  184. temp1 = xC6S2*irot_input_y;
  185. DOROUND ( temp1 );
  186. temp1 >>= 16;
  187. temp2 = xC2S6*irot_input_x;
  188. DOROUND ( temp2 );
  189. temp2 >>= 16;
  190. op[6*8] = (INT16)(temp1 -temp2);
  191. // Define inputs to rotation for outputs 1 and 7
  192. irot_input_x = icommon_product1 + id07;
  193. irot_input_y = -( id34 + icommon_product2 );
  194. // Apply rotation for outputs 1 and 7.
  195. temp1 = xC1S7*irot_input_x;
  196. DOROUND ( temp1 );
  197. temp1 >>= 16;
  198. temp2 = xC7S1*irot_input_y;
  199. DOROUND ( temp2 );
  200. temp2 >>= 16;
  201. op[1*8] = (INT16) (temp1 - temp2);
  202. temp1 = xC7S1*irot_input_x;
  203. DOROUND ( temp1 );
  204. temp1 >>= 16;
  205. temp2 = xC1S7*irot_input_y;
  206. DOROUND ( temp2 );
  207. temp2 >>= 16;
  208. op[7*8] = (INT16)(temp1 + temp2);
  209. // Define inputs to rotation for outputs 3 and 5
  210. irot_input_x = id07 - icommon_product1;
  211. irot_input_y = id34 - icommon_product2;
  212. // Apply rotation for outputs 3 and 5.
  213. temp1 = xC3S5*irot_input_x;
  214. DOROUND ( temp1 );
  215. temp1 >>= 16;
  216. temp2 = xC5S3*irot_input_y;
  217. DOROUND ( temp2 );
  218. temp2 >>= 16;
  219. op[3*8] = (INT16)(temp1 - temp2);
  220. temp1 = xC5S3*irot_input_x;
  221. DOROUND ( temp1 );
  222. temp1 >>= 16;
  223. temp2 = xC3S5*irot_input_y;
  224. DOROUND ( temp2 );
  225. temp2 >>= 16;
  226. op[5*8] = (INT16) (temp1 + temp2);
  227. // Increment data pointer for next column.
  228. ip ++;
  229. op ++;
  230. }
  231. }
  232. /****************************************************************************
  233. *
  234. * ROUTINE : fdct_short_C
  235. *
  236. * INPUTS : INT16 *InputData : 16-bit input data.
  237. *
  238. * OUTPUTS : INT16 *OutputData : 16-bit transform coefficients.
  239. *
  240. * RETURNS : void
  241. *
  242. * FUNCTION : Performs an 8x8 2-D fast DCT.
  243. *
  244. * The function to up the precision of FDCT by number of bits
  245. * defined by FDCT_PRECISION_BITS.
  246. *
  247. * SPECIAL NOTES : None.
  248. *
  249. ****************************************************************************/
  250. void fdct_short_C ( INT16 *DCTDataBuffer, INT16 *DCT_codes )
  251. {
  252. INT32 i;
  253. // Increase precision on input to fdct
  254. for ( i = 0; i < 64; i++ )
  255. DCTDataBuffer[i] = DCTDataBuffer[i] << FDCT_PRECISION_BITS;
  256. // Transform the error signal using the forward DCT to get set of transform coefficients
  257. fdct_short_C_orig ( DCTDataBuffer, DCT_codes );
  258. // Strip off the extra bits from the DCT output.
  259. // This should ultimately be merged into the quantize process but there are also
  260. // implications for DC prediction that would then need to be sorted
  261. for ( i = 0; i < 64; i++ )
  262. {
  263. // signed shift modified so behaves like "/" (truncates towards 0 for + and -)
  264. if ( DCT_codes[i] >= 0 )
  265. DCT_codes[i] = (DCT_codes[i]) >> FDCT_PRECISION_BITS;
  266. else
  267. DCT_codes[i] = (DCT_codes[i] + FDCT_PRECISION_NEG_ADJ) >> FDCT_PRECISION_BITS;
  268. }
  269. }