pcm.cpp 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. #include "pcm.h"
  2. #include <math.h>
  3. #include <ipps.h>
  4. #include <intrin.h>
  5. #include <mmintrin.h>
  6. #define PA_CLIP_( val, min, max )\
  7. { val = ((val) < (min)) ? (min) : (((val) > (max)) ? (max) : (val)); }
  8. #if defined(_M_IX86)
  9. static __inline long float_to_long(double t)
  10. {
  11. long r;
  12. __asm fld t
  13. __asm fistp r
  14. return r;
  15. }
  16. #else
  17. #define float_to_long(x) ((long)( x ))
  18. #endif
  19. inline static void clip(double &x, double a, double b)
  20. {
  21. double x1 = fabs (x - a);
  22. double x2 = fabs (x - b);
  23. x = x1 + (a + b);
  24. x -= x2;
  25. x *= 0.5;
  26. }
  27. static void Float32_To_Int32_Clip(void *destinationBuffer, const float *src, size_t count, double gain)
  28. {
  29. int32_t *dest = (int32_t *)destinationBuffer;
  30. gain*=65536.*32768.;
  31. while ( count-- )
  32. {
  33. /* convert to 32 bit and drop the low 8 bits */
  34. double scaled = *src++ * gain;
  35. clip( scaled, -2147483648., 2147483647.);
  36. signed long temp = (signed long) scaled;
  37. *dest++ = temp;
  38. }
  39. }
  40. static void Float32_To_Int24_Clip(void *destinationBuffer, const float *src, size_t count, double gain)
  41. {
  42. unsigned char *dest = (unsigned char*)destinationBuffer;
  43. gain*=65536.*32768.;
  44. while ( count-- )
  45. {
  46. /* convert to 32 bit and drop the low 8 bits */
  47. double scaled = *src * gain;
  48. clip( scaled, -2147483648., 2147483647.);
  49. signed long temp = (signed long) scaled;
  50. dest[0] = (unsigned char)(temp >> 8);
  51. dest[1] = (unsigned char)(temp >> 16);
  52. dest[2] = (unsigned char)(temp >> 24);
  53. src++;
  54. dest += 3;
  55. }
  56. }
  57. static void Float32_To_Int16_Clip(void *destinationBuffer, const float *src, size_t count, double gain)
  58. {
  59. int16_t *dest = (signed short*)destinationBuffer;
  60. gain*=32768.0;
  61. while ( count-- )
  62. {
  63. long samp = float_to_long((*src) * gain/* - 0.5*/);
  64. PA_CLIP_( samp, -0x8000, 0x7FFF );
  65. *dest = (int16_t) samp;
  66. src ++;
  67. dest ++;
  68. }
  69. }
  70. static void Float32_To_UInt8_Clip(void *destinationBuffer, const float *src, size_t count, double gain)
  71. {
  72. uint8_t *dest = (uint8_t *)destinationBuffer;
  73. gain*=128.0;
  74. while ( count-- )
  75. {
  76. long samp = float_to_long((*src) * gain/* - 0.5*/) + 128;
  77. PA_CLIP_( samp, 0, 255);
  78. *dest = (uint8_t) samp;
  79. src ++;
  80. dest ++;
  81. }
  82. }
  83. int nsutil_pcm_FloatToInt_Interleaved_Gain(void *pcm, const float *input, int bps, size_t num_samples, float gain)
  84. {
  85. switch(bps)
  86. {
  87. case 8:
  88. Float32_To_UInt8_Clip(pcm, input, num_samples, gain);
  89. return 0;
  90. case 16:
  91. Float32_To_Int16_Clip(pcm, input, num_samples, gain);
  92. return 0;
  93. case 24:
  94. Float32_To_Int24_Clip(pcm, input, num_samples, gain);
  95. return 0;
  96. case 32:
  97. Float32_To_Int32_Clip(pcm, input, num_samples, gain);
  98. return 0;
  99. }
  100. return 0;
  101. }
  102. int nsutil_pcm_FloatToInt_Interleaved(void *pcm, const float *input, int bps, size_t num_samples)
  103. {
  104. switch(bps)
  105. {
  106. case 8:
  107. Float32_To_UInt8_Clip(pcm, input, num_samples, 1.0f);
  108. return 0;
  109. case 16:
  110. Float32_To_Int16_Clip(pcm, input, num_samples, 1.0f);
  111. return 0;
  112. case 24:
  113. Float32_To_Int24_Clip(pcm, input, num_samples, 1.0f);
  114. return 0;
  115. case 32:
  116. Float32_To_Int32_Clip(pcm, input, num_samples, 1.0f);
  117. return 0;
  118. }
  119. return 0;
  120. }
  121. int nsutil_pcm_IntToFloat_Interleaved(float *output, const void *pcm, int bps, size_t num_samples)
  122. {
  123. switch (bps)
  124. {
  125. case 8:
  126. {
  127. unsigned __int8 *samples8 = (unsigned __int8 *)pcm;
  128. for (size_t x = 0; x != num_samples; x ++)
  129. {
  130. output[x] = (float)(samples8[x]-128) * 0.00390625f /* 1/256 */;
  131. }
  132. }
  133. break;
  134. case 16:
  135. {
  136. short *samples16 = (short *)pcm;
  137. for (size_t x = 0; x != num_samples; x ++)
  138. {
  139. output[x] = (float)samples16[x] * 0.000030517578125f /* 1/ 32768 */;
  140. }
  141. }
  142. break;
  143. case 24:
  144. {
  145. unsigned __int8 *samples8 = (unsigned __int8 *)pcm;
  146. for (size_t x = 0; x != num_samples; x ++)
  147. {
  148. long temp = (((long)samples8[0]) << 8);
  149. temp = temp | (((long)samples8[1]) << 16);
  150. temp = temp | (((long)samples8[2]) << 24);
  151. output[x] = (float)temp * 4.656612873077393e-10f /* 1/2147483648 */;
  152. samples8+=3;
  153. }
  154. }
  155. break;
  156. case 32:
  157. {
  158. int32_t *samples32 = (int32_t *)pcm;
  159. for (size_t x = 0; x != num_samples; x ++)
  160. {
  161. output[x] = (float)samples32[x] * 4.656612873077393e-10f /* 1/2147483648 */;
  162. }
  163. }
  164. break;
  165. }
  166. return 0;
  167. }
  168. int nsutil_pcm_IntToFloat_Interleaved_Gain(float *output, const void *pcm, int bps, size_t num_samples, float gain)
  169. {
  170. switch (bps)
  171. {
  172. case 8:
  173. {
  174. gain /= 256.0f;
  175. uint8_t *samples8 = (uint8_t *)pcm;
  176. for (size_t x = 0; x != num_samples; x ++)
  177. {
  178. output[x] = (float)(samples8[x]-128) * gain;
  179. }
  180. }
  181. break;
  182. case 16:
  183. {
  184. gain /= 32768.0f;
  185. int16_t *samples16 = (int16_t *)pcm;
  186. for (size_t x = 0; x != num_samples; x ++)
  187. {
  188. output[x] = (float)samples16[x] * gain;
  189. }
  190. }
  191. break;
  192. case 24:
  193. {
  194. gain /= 2147483648.0f;
  195. uint8_t *samples8 = (uint8_t *)pcm;
  196. for (size_t x = 0; x != num_samples; x ++)
  197. {
  198. long temp = (((long)samples8[0]) << 8);
  199. temp = temp | (((long)samples8[1]) << 16);
  200. temp = temp | (((long)samples8[2]) << 24);
  201. output[x] = (float)temp * gain;
  202. samples8+=3;
  203. }
  204. }
  205. break;
  206. case 32:
  207. {
  208. gain /= 2147483648.0f;
  209. int32_t *samples32 = (int32_t *)pcm;
  210. for (size_t x = 0; x != num_samples; x ++)
  211. {
  212. output[x] = (float)samples32[x] * gain;
  213. }
  214. }
  215. break;
  216. }
  217. return 0;
  218. }
  219. int nsutil_pcm_S8ToS16_Interleaved(int16_t *output, const int8_t *pcm, size_t num_samples)
  220. {
  221. //__m64 mmx_zero = _mm_setzero_si64();
  222. __m128i sse_zero = _mm_setzero_si128();
  223. //while (num_samples>7)
  224. while (num_samples > 15)
  225. {
  226. //__m64 mmx_8 = *(const __m64 *)pcm;
  227. __m128i sse_8 = *(const __m128i*)pcm;
  228. //pcm+=8;
  229. pcm += 16;
  230. //__m64 mmx_16 = _mm_unpacklo_pi8(mmx_zero, mmx_8);
  231. __m128i sse_16 = _mm_unpacklo_epi8(sse_zero, sse_8);
  232. //*(__m64 *)output = mmx_16;
  233. *(__m128i*)output = sse_16;
  234. //output+=4;
  235. output += 8;
  236. //mmx_16 = _mm_unpackhi_pi8(mmx_zero, mmx_8);
  237. sse_16 = _mm_unpackhi_epi8(sse_zero, sse_8);
  238. //*(__m64 *)output = mmx_16;
  239. *(__m128i *)output = sse_16;
  240. //output+=4;
  241. output += 8;
  242. //num_samples-=8;
  243. num_samples-=16;
  244. }
  245. while(num_samples--)
  246. {
  247. *output++ = (*pcm++) << 8;
  248. }
  249. //_mm_empty();
  250. return 0;
  251. }
  252. int nsutil_pcm_U8ToS16_Interleaved(int16_t *output, const uint8_t *pcm, size_t num_samples)
  253. {
  254. //__m64 mmx_zero = _mm_setzero_si64();
  255. __m128i sse_zero = _mm_setzero_si128();
  256. //__m64 mmx_128 = _mm_set1_pi8(-128);
  257. __m128i sse_128 = _mm_set1_epi8(-128);
  258. //while (num_samples>7)
  259. while (num_samples > 15)
  260. {
  261. //__m64 mmx_8 = *(const __m64*)pcm;
  262. __m128i sse_8 = *(const __m128i *)pcm;
  263. //mmx_8 = _mm_add_pi8(mmx_8, mmx_128);
  264. sse_8 = _mm_add_epi8(sse_8, sse_128);
  265. //pcm+=8;
  266. pcm += 16;
  267. //__m64 mmx_16 = _mm_unpacklo_pi8(mmx_zero, mmx_8);
  268. __m128i sse_16 = _mm_unpacklo_epi8(sse_zero, sse_8);
  269. //*(__m64 *)output = mmx_16;
  270. *(__m128i*)output = sse_16;
  271. //output+=4;
  272. output += 8;
  273. //mmx_16 = _mm_unpackhi_pi8(mmx_zero, mmx_8);
  274. sse_16 = _mm_unpackhi_epi8(sse_zero, sse_8);
  275. //*(__m64 *)output = mmx_16;
  276. *(__m128i*)output = sse_16;
  277. //output+=4;
  278. output += 8;
  279. //num_samples-=8;
  280. num_samples -= 16;
  281. }
  282. while(num_samples--)
  283. {
  284. *output++ = (*pcm++ - 128) << 8;
  285. }
  286. //_mm_empty();
  287. return 0;
  288. }