TDStretch.cpp 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106
  1. ///////////////////////////////////////////////////////////////////////////////
  2. ///
  3. /// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo
  4. /// while maintaining the original pitch by using a time domain WSOLA-like
  5. /// method with several performance-increasing tweaks.
  6. ///
  7. /// Notes : MMX optimized functions reside in a separate, platform-specific
  8. /// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'.
  9. ///
  10. /// This source file contains OpenMP optimizations that allow speeding up the
  11. /// corss-correlation algorithm by executing it in several threads / CPU cores
  12. /// in parallel. See the following article link for more detailed discussion
  13. /// about SoundTouch OpenMP optimizations:
  14. /// http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices
  15. ///
  16. /// Author : Copyright (c) Olli Parviainen
  17. /// Author e-mail : oparviai 'at' iki.fi
  18. /// SoundTouch WWW: http://www.surina.net/soundtouch
  19. ///
  20. ////////////////////////////////////////////////////////////////////////////////
  21. //
  22. // License :
  23. //
  24. // SoundTouch audio processing library
  25. // Copyright (c) Olli Parviainen
  26. //
  27. // This library is free software; you can redistribute it and/or
  28. // modify it under the terms of the GNU Lesser General Public
  29. // License as published by the Free Software Foundation; either
  30. // version 2.1 of the License, or (at your option) any later version.
  31. //
  32. // This library is distributed in the hope that it will be useful,
  33. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  34. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  35. // Lesser General Public License for more details.
  36. //
  37. // You should have received a copy of the GNU Lesser General Public
  38. // License along with this library; if not, write to the Free Software
  39. // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  40. //
  41. ////////////////////////////////////////////////////////////////////////////////
  42. #include <string.h>
  43. #include <limits.h>
  44. #include <assert.h>
  45. #include <math.h>
  46. #include <float.h>
  47. #include "STTypes.h"
  48. #include "cpu_detect.h"
  49. #include "TDStretch.h"
  50. using namespace soundtouch;
  51. #define max(x, y) (((x) > (y)) ? (x) : (y))
  52. /*****************************************************************************
  53. *
  54. * Constant definitions
  55. *
  56. *****************************************************************************/
  57. // Table for the hierarchical mixing position seeking algorithm
  58. const short _scanOffsets[5][24]={
  59. { 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806,
  60. 868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0},
  61. {-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0,
  62. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
  63. { -20, -15, -10, -5, 5, 10, 15, 20, 0, 0, 0, 0,
  64. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
  65. { -4, -3, -2, -1, 1, 2, 3, 4, 0, 0, 0, 0,
  66. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
  67. { 121, 114, 97, 114, 98, 105, 108, 32, 104, 99, 117, 111,
  68. 116, 100, 110, 117, 111, 115, 0, 0, 0, 0, 0, 0}};
  69. /*****************************************************************************
  70. *
  71. * Implementation of the class 'TDStretch'
  72. *
  73. *****************************************************************************/
  74. TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
  75. {
  76. bQuickSeek = false;
  77. channels = 2;
  78. pMidBuffer = NULL;
  79. pMidBufferUnaligned = NULL;
  80. overlapLength = 0;
  81. bAutoSeqSetting = true;
  82. bAutoSeekSetting = true;
  83. tempo = 1.0f;
  84. setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);
  85. setTempo(1.0f);
  86. clear();
  87. }
  88. TDStretch::~TDStretch()
  89. {
  90. delete[] pMidBufferUnaligned;
  91. }
  92. // Sets routine control parameters. These control are certain time constants
  93. // defining how the sound is stretched to the desired duration.
  94. //
  95. // 'sampleRate' = sample rate of the sound
  96. // 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms)
  97. // 'seekwindowMS' = seeking window length for scanning the best overlapping
  98. // position (default = 28 ms)
  99. // 'overlapMS' = overlapping length (default = 12 ms)
  100. void TDStretch::setParameters(int aSampleRate, int aSequenceMS,
  101. int aSeekWindowMS, int aOverlapMS)
  102. {
  103. // accept only positive parameter values - if zero or negative, use old values instead
  104. if (aSampleRate > 0)
  105. {
  106. if (aSampleRate > 192000) ST_THROW_RT_ERROR("Error: Excessive samplerate");
  107. this->sampleRate = aSampleRate;
  108. }
  109. if (aOverlapMS > 0) this->overlapMs = aOverlapMS;
  110. if (aSequenceMS > 0)
  111. {
  112. this->sequenceMs = aSequenceMS;
  113. bAutoSeqSetting = false;
  114. }
  115. else if (aSequenceMS == 0)
  116. {
  117. // if zero, use automatic setting
  118. bAutoSeqSetting = true;
  119. }
  120. if (aSeekWindowMS > 0)
  121. {
  122. this->seekWindowMs = aSeekWindowMS;
  123. bAutoSeekSetting = false;
  124. }
  125. else if (aSeekWindowMS == 0)
  126. {
  127. // if zero, use automatic setting
  128. bAutoSeekSetting = true;
  129. }
  130. calcSeqParameters();
  131. calculateOverlapLength(overlapMs);
  132. // set tempo to recalculate 'sampleReq'
  133. setTempo(tempo);
  134. }
  135. /// Get routine control parameters, see setParameters() function.
  136. /// Any of the parameters to this function can be NULL, in such case corresponding parameter
  137. /// value isn't returned.
  138. void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const
  139. {
  140. if (pSampleRate)
  141. {
  142. *pSampleRate = sampleRate;
  143. }
  144. if (pSequenceMs)
  145. {
  146. *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs;
  147. }
  148. if (pSeekWindowMs)
  149. {
  150. *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs;
  151. }
  152. if (pOverlapMs)
  153. {
  154. *pOverlapMs = overlapMs;
  155. }
  156. }
  157. // Overlaps samples in 'midBuffer' with the samples in 'pInput'
  158. void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
  159. {
  160. int i;
  161. SAMPLETYPE m1, m2;
  162. m1 = (SAMPLETYPE)0;
  163. m2 = (SAMPLETYPE)overlapLength;
  164. for (i = 0; i < overlapLength ; i ++)
  165. {
  166. pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength;
  167. m1 += 1;
  168. m2 -= 1;
  169. }
  170. }
  171. void TDStretch::clearMidBuffer()
  172. {
  173. memset(pMidBuffer, 0, channels * sizeof(SAMPLETYPE) * overlapLength);
  174. }
  175. void TDStretch::clearInput()
  176. {
  177. inputBuffer.clear();
  178. clearMidBuffer();
  179. isBeginning = true;
  180. maxnorm = 0;
  181. maxnormf = 1e8;
  182. skipFract = 0;
  183. }
  184. // Clears the sample buffers
  185. void TDStretch::clear()
  186. {
  187. outputBuffer.clear();
  188. clearInput();
  189. }
  190. // Enables/disables the quick position seeking algorithm. Zero to disable, nonzero
  191. // to enable
  192. void TDStretch::enableQuickSeek(bool enable)
  193. {
  194. bQuickSeek = enable;
  195. }
  196. // Returns nonzero if the quick seeking algorithm is enabled.
  197. bool TDStretch::isQuickSeekEnabled() const
  198. {
  199. return bQuickSeek;
  200. }
  201. // Seeks for the optimal overlap-mixing position.
  202. int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos)
  203. {
  204. if (bQuickSeek)
  205. {
  206. return seekBestOverlapPositionQuick(refPos);
  207. }
  208. else
  209. {
  210. return seekBestOverlapPositionFull(refPos);
  211. }
  212. }
  213. // Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position
  214. // of 'ovlPos'.
  215. inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const
  216. {
  217. #ifndef USE_MULTICH_ALWAYS
  218. if (channels == 1)
  219. {
  220. // mono sound.
  221. overlapMono(pOutput, pInput + ovlPos);
  222. }
  223. else if (channels == 2)
  224. {
  225. // stereo sound
  226. overlapStereo(pOutput, pInput + 2 * ovlPos);
  227. }
  228. else
  229. #endif // USE_MULTICH_ALWAYS
  230. {
  231. assert(channels > 0);
  232. overlapMulti(pOutput, pInput + channels * ovlPos);
  233. }
  234. }
  235. // Seeks for the optimal overlap-mixing position. The 'stereo' version of the
  236. // routine
  237. //
  238. // The best position is determined as the position where the two overlapped
  239. // sample sequences are 'most alike', in terms of the highest cross-correlation
  240. // value over the overlapping period
  241. int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
  242. {
  243. int bestOffs;
  244. double bestCorr;
  245. int i;
  246. double norm;
  247. bestCorr = -FLT_MAX;
  248. bestOffs = 0;
  249. // Scans for the best correlation value by testing each possible position
  250. // over the permitted range.
  251. bestCorr = calcCrossCorr(refPos, pMidBuffer, norm);
  252. bestCorr = (bestCorr + 0.1) * 0.75;
  253. #pragma omp parallel for
  254. for (i = 1; i < seekLength; i ++)
  255. {
  256. double corr;
  257. // Calculates correlation value for the mixing position corresponding to 'i'
  258. #if defined(_OPENMP) || defined(ST_SIMD_AVOID_UNALIGNED)
  259. // in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't
  260. // iterate the loop in sequential order
  261. // in SIMD mode, avoid accumulator version to allow avoiding unaligned positions
  262. corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm);
  263. #else
  264. // In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same
  265. // as "calcCrossCorr", but saves time by reusing & updating previously stored
  266. // "norm" value
  267. corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm);
  268. #endif
  269. // heuristic rule to slightly favour values close to mid of the range
  270. double tmp = (double)(2 * i - seekLength) / (double)seekLength;
  271. corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
  272. // Checks for the highest correlation value
  273. if (corr > bestCorr)
  274. {
  275. // For optimal performance, enter critical section only in case that best value found.
  276. // in such case repeat 'if' condition as it's possible that parallel execution may have
  277. // updated the bestCorr value in the mean time
  278. #pragma omp critical
  279. if (corr > bestCorr)
  280. {
  281. bestCorr = corr;
  282. bestOffs = i;
  283. }
  284. }
  285. }
  286. #ifdef SOUNDTOUCH_INTEGER_SAMPLES
  287. adaptNormalizer();
  288. #endif
  289. // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
  290. clearCrossCorrState();
  291. return bestOffs;
  292. }
  293. // Quick seek algorithm for improved runtime-performance: First roughly scans through the
  294. // correlation area, and then scan surroundings of two best preliminary correlation candidates
  295. // with improved precision
  296. //
  297. // Based on testing:
  298. // - This algorithm gives on average 99% as good match as the full algorithm
  299. // - this quick seek algorithm finds the best match on ~90% of cases
  300. // - on those 10% of cases when this algorithm doesn't find best match,
  301. // it still finds on average ~90% match vs. the best possible match
  302. int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)
  303. {
  304. #define _MIN(a, b) (((a) < (b)) ? (a) : (b))
  305. #define SCANSTEP 16
  306. #define SCANWIND 8
  307. int bestOffs;
  308. int i;
  309. int bestOffs2;
  310. float bestCorr, corr;
  311. float bestCorr2;
  312. double norm;
  313. // note: 'float' types used in this function in case that the platform would need to use software-fp
  314. bestCorr =
  315. bestCorr2 = -FLT_MAX;
  316. bestOffs =
  317. bestOffs2 = SCANWIND;
  318. // Scans for the best correlation value by testing each possible position
  319. // over the permitted range. Look for two best matches on the first pass to
  320. // increase possibility of ideal match.
  321. //
  322. // Begin from "SCANSTEP" instead of SCANWIND to make the calculation
  323. // catch the 'middlepoint' of seekLength vector as that's the a-priori
  324. // expected best match position
  325. //
  326. // Roughly:
  327. // - 15% of cases find best result directly on the first round,
  328. // - 75% cases find better match on 2nd round around the best match from 1st round
  329. // - 10% cases find better match on 2nd round around the 2nd-best-match from 1st round
  330. for (i = SCANSTEP; i < seekLength - SCANWIND - 1; i += SCANSTEP)
  331. {
  332. // Calculates correlation value for the mixing position corresponding
  333. // to 'i'
  334. corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);
  335. // heuristic rule to slightly favour values close to mid of the seek range
  336. float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;
  337. corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));
  338. // Checks for the highest correlation value
  339. if (corr > bestCorr)
  340. {
  341. // found new best match. keep the previous best as 2nd best match
  342. bestCorr2 = bestCorr;
  343. bestOffs2 = bestOffs;
  344. bestCorr = corr;
  345. bestOffs = i;
  346. }
  347. else if (corr > bestCorr2)
  348. {
  349. // not new best, but still new 2nd best match
  350. bestCorr2 = corr;
  351. bestOffs2 = i;
  352. }
  353. }
  354. // Scans surroundings of the found best match with small stepping
  355. int end = _MIN(bestOffs + SCANWIND + 1, seekLength);
  356. for (i = bestOffs - SCANWIND; i < end; i++)
  357. {
  358. if (i == bestOffs) continue; // this offset already calculated, thus skip
  359. // Calculates correlation value for the mixing position corresponding
  360. // to 'i'
  361. corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);
  362. // heuristic rule to slightly favour values close to mid of the range
  363. float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;
  364. corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));
  365. // Checks for the highest correlation value
  366. if (corr > bestCorr)
  367. {
  368. bestCorr = corr;
  369. bestOffs = i;
  370. }
  371. }
  372. // Scans surroundings of the 2nd best match with small stepping
  373. end = _MIN(bestOffs2 + SCANWIND + 1, seekLength);
  374. for (i = bestOffs2 - SCANWIND; i < end; i++)
  375. {
  376. if (i == bestOffs2) continue; // this offset already calculated, thus skip
  377. // Calculates correlation value for the mixing position corresponding
  378. // to 'i'
  379. corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);
  380. // heuristic rule to slightly favour values close to mid of the range
  381. float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;
  382. corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));
  383. // Checks for the highest correlation value
  384. if (corr > bestCorr)
  385. {
  386. bestCorr = corr;
  387. bestOffs = i;
  388. }
  389. }
  390. // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
  391. clearCrossCorrState();
  392. #ifdef SOUNDTOUCH_INTEGER_SAMPLES
  393. adaptNormalizer();
  394. #endif
  395. return bestOffs;
  396. }
  397. /// For integer algorithm: adapt normalization factor divider with music so that
  398. /// it'll not be pessimistically restrictive that can degrade quality on quieter sections
  399. /// yet won't cause integer overflows either
  400. void TDStretch::adaptNormalizer()
  401. {
  402. // Do not adapt normalizer over too silent sequences to avoid averaging filter depleting to
  403. // too low values during pauses in music
  404. if ((maxnorm > 1000) || (maxnormf > 40000000))
  405. {
  406. //norm averaging filter
  407. maxnormf = 0.9f * maxnormf + 0.1f * (float)maxnorm;
  408. if ((maxnorm > 800000000) && (overlapDividerBitsNorm < 16))
  409. {
  410. // large values, so increase divider
  411. overlapDividerBitsNorm++;
  412. if (maxnorm > 1600000000) overlapDividerBitsNorm++; // extra large value => extra increase
  413. }
  414. else if ((maxnormf < 1000000) && (overlapDividerBitsNorm > 0))
  415. {
  416. // extra small values, decrease divider
  417. overlapDividerBitsNorm--;
  418. }
  419. }
  420. maxnorm = 0;
  421. }
  422. /// clear cross correlation routine state if necessary
  423. void TDStretch::clearCrossCorrState()
  424. {
  425. // default implementation is empty.
  426. }
  427. /// Calculates processing sequence length according to tempo setting
  428. void TDStretch::calcSeqParameters()
  429. {
  430. // Adjust tempo param according to tempo, so that variating processing sequence length is used
  431. // at various tempo settings, between the given low...top limits
  432. #define AUTOSEQ_TEMPO_LOW 0.5 // auto setting low tempo range (-50%)
  433. #define AUTOSEQ_TEMPO_TOP 2.0 // auto setting top tempo range (+100%)
  434. // sequence-ms setting values at above low & top tempo
  435. #define AUTOSEQ_AT_MIN 90.0
  436. #define AUTOSEQ_AT_MAX 40.0
  437. #define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
  438. #define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW))
  439. // seek-window-ms setting values at above low & top tempoq
  440. #define AUTOSEEK_AT_MIN 20.0
  441. #define AUTOSEEK_AT_MAX 15.0
  442. #define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
  443. #define AUTOSEEK_C (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW))
  444. #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x)))
  445. double seq, seek;
  446. if (bAutoSeqSetting)
  447. {
  448. seq = AUTOSEQ_C + AUTOSEQ_K * tempo;
  449. seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN);
  450. sequenceMs = (int)(seq + 0.5);
  451. }
  452. if (bAutoSeekSetting)
  453. {
  454. seek = AUTOSEEK_C + AUTOSEEK_K * tempo;
  455. seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN);
  456. seekWindowMs = (int)(seek + 0.5);
  457. }
  458. // Update seek window lengths
  459. seekWindowLength = (sampleRate * sequenceMs) / 1000;
  460. if (seekWindowLength < 2 * overlapLength)
  461. {
  462. seekWindowLength = 2 * overlapLength;
  463. }
  464. seekLength = (sampleRate * seekWindowMs) / 1000;
  465. }
  466. // Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower
  467. // tempo, larger faster tempo.
  468. void TDStretch::setTempo(double newTempo)
  469. {
  470. int intskip;
  471. tempo = newTempo;
  472. // Calculate new sequence duration
  473. calcSeqParameters();
  474. // Calculate ideal skip length (according to tempo value)
  475. nominalSkip = tempo * (seekWindowLength - overlapLength);
  476. intskip = (int)(nominalSkip + 0.5);
  477. // Calculate how many samples are needed in the 'inputBuffer' to
  478. // process another batch of samples
  479. //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2;
  480. sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength;
  481. }
  482. // Sets the number of channels, 1 = mono, 2 = stereo
  483. void TDStretch::setChannels(int numChannels)
  484. {
  485. if (!verifyNumberOfChannels(numChannels) ||
  486. (channels == numChannels)) return;
  487. channels = numChannels;
  488. inputBuffer.setChannels(channels);
  489. outputBuffer.setChannels(channels);
  490. // re-init overlap/buffer
  491. overlapLength=0;
  492. setParameters(sampleRate);
  493. }
  494. // nominal tempo, no need for processing, just pass the samples through
  495. // to outputBuffer
  496. /*
  497. void TDStretch::processNominalTempo()
  498. {
  499. assert(tempo == 1.0f);
  500. if (bMidBufferDirty)
  501. {
  502. // If there are samples in pMidBuffer waiting for overlapping,
  503. // do a single sliding overlapping with them in order to prevent a
  504. // clicking distortion in the output sound
  505. if (inputBuffer.numSamples() < overlapLength)
  506. {
  507. // wait until we've got overlapLength input samples
  508. return;
  509. }
  510. // Mix the samples in the beginning of 'inputBuffer' with the
  511. // samples in 'midBuffer' using sliding overlapping
  512. overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0);
  513. outputBuffer.putSamples(overlapLength);
  514. inputBuffer.receiveSamples(overlapLength);
  515. clearMidBuffer();
  516. // now we've caught the nominal sample flow and may switch to
  517. // bypass mode
  518. }
  519. // Simply bypass samples from input to output
  520. outputBuffer.moveSamples(inputBuffer);
  521. }
  522. */
  523. // Processes as many processing frames of the samples 'inputBuffer', store
  524. // the result into 'outputBuffer'
  525. void TDStretch::processSamples()
  526. {
  527. int ovlSkip;
  528. int offset = 0;
  529. int temp;
  530. /* Removed this small optimization - can introduce a click to sound when tempo setting
  531. crosses the nominal value
  532. if (tempo == 1.0f)
  533. {
  534. // tempo not changed from the original, so bypass the processing
  535. processNominalTempo();
  536. return;
  537. }
  538. */
  539. // Process samples as long as there are enough samples in 'inputBuffer'
  540. // to form a processing frame.
  541. while ((int)inputBuffer.numSamples() >= sampleReq)
  542. {
  543. if (isBeginning == false)
  544. {
  545. // apart from the very beginning of the track,
  546. // scan for the best overlapping position & do overlap-add
  547. offset = seekBestOverlapPosition(inputBuffer.ptrBegin());
  548. // Mix the samples in the 'inputBuffer' at position of 'offset' with the
  549. // samples in 'midBuffer' using sliding overlapping
  550. // ... first partially overlap with the end of the previous sequence
  551. // (that's in 'midBuffer')
  552. overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset);
  553. outputBuffer.putSamples((uint)overlapLength);
  554. offset += overlapLength;
  555. }
  556. else
  557. {
  558. // Adjust processing offset at beginning of track by not perform initial overlapping
  559. // and compensating that in the 'input buffer skip' calculation
  560. isBeginning = false;
  561. int skip = (int)(tempo * overlapLength + 0.5 * seekLength + 0.5);
  562. #ifdef ST_SIMD_AVOID_UNALIGNED
  563. // in SIMD mode, round the skip amount to value corresponding to aligned memory address
  564. if (channels == 1)
  565. {
  566. skip &= -4;
  567. }
  568. else if (channels == 2)
  569. {
  570. skip &= -2;
  571. }
  572. #endif
  573. skipFract -= skip;
  574. if (skipFract <= -nominalSkip)
  575. {
  576. skipFract = -nominalSkip;
  577. }
  578. }
  579. // ... then copy sequence samples from 'inputBuffer' to output:
  580. // crosscheck that we don't have buffer overflow...
  581. if ((int)inputBuffer.numSamples() < (offset + seekWindowLength - overlapLength))
  582. {
  583. continue; // just in case, shouldn't really happen
  584. }
  585. // length of sequence
  586. temp = (seekWindowLength - 2 * overlapLength);
  587. outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * offset, (uint)temp);
  588. // Copies the end of the current sequence from 'inputBuffer' to
  589. // 'midBuffer' for being mixed with the beginning of the next
  590. // processing sequence and so on
  591. assert((offset + temp + overlapLength) <= (int)inputBuffer.numSamples());
  592. memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp),
  593. channels * sizeof(SAMPLETYPE) * overlapLength);
  594. // Remove the processed samples from the input buffer. Update
  595. // the difference between integer & nominal skip step to 'skipFract'
  596. // in order to prevent the error from accumulating over time.
  597. skipFract += nominalSkip; // real skip size
  598. ovlSkip = (int)skipFract; // rounded to integer skip
  599. skipFract -= ovlSkip; // maintain the fraction part, i.e. real vs. integer skip
  600. inputBuffer.receiveSamples((uint)ovlSkip);
  601. }
  602. }
  603. // Adds 'numsamples' pcs of samples from the 'samples' memory position into
  604. // the input of the object.
  605. void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples)
  606. {
  607. // Add the samples into the input buffer
  608. inputBuffer.putSamples(samples, nSamples);
  609. // Process the samples in input buffer
  610. processSamples();
  611. }
  612. /// Set new overlap length parameter & reallocate RefMidBuffer if necessary.
  613. void TDStretch::acceptNewOverlapLength(int newOverlapLength)
  614. {
  615. int prevOvl;
  616. assert(newOverlapLength >= 0);
  617. prevOvl = overlapLength;
  618. overlapLength = newOverlapLength;
  619. if (overlapLength > prevOvl)
  620. {
  621. delete[] pMidBufferUnaligned;
  622. pMidBufferUnaligned = new SAMPLETYPE[overlapLength * channels + 16 / sizeof(SAMPLETYPE)];
  623. // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency
  624. pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned);
  625. clearMidBuffer();
  626. }
  627. }
  628. // Operator 'new' is overloaded so that it automatically creates a suitable instance
  629. // depending on if we've a MMX/SSE/etc-capable CPU available or not.
  630. void * TDStretch::operator new(size_t s)
  631. {
  632. // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead!
  633. ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!");
  634. return newInstance();
  635. }
  636. TDStretch * TDStretch::newInstance()
  637. {
  638. uint uExtensions;
  639. uExtensions = detectCPUextensions();
  640. // Check if MMX/SSE instruction set extensions supported by CPU
  641. #ifdef SOUNDTOUCH_ALLOW_MMX
  642. // MMX routines available only with integer sample types
  643. if (uExtensions & SUPPORT_MMX)
  644. {
  645. return ::new TDStretchMMX;
  646. }
  647. else
  648. #endif // SOUNDTOUCH_ALLOW_MMX
  649. #ifdef SOUNDTOUCH_ALLOW_SSE
  650. if (uExtensions & SUPPORT_SSE)
  651. {
  652. // SSE support
  653. return ::new TDStretchSSE;
  654. }
  655. else
  656. #endif // SOUNDTOUCH_ALLOW_SSE
  657. {
  658. // ISA optimizations not supported, use plain C version
  659. return ::new TDStretch;
  660. }
  661. }
  662. //////////////////////////////////////////////////////////////////////////////
  663. //
  664. // Integer arithmetic specific algorithm implementations.
  665. //
  666. //////////////////////////////////////////////////////////////////////////////
  667. #ifdef SOUNDTOUCH_INTEGER_SAMPLES
  668. // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo'
  669. // version of the routine.
  670. void TDStretch::overlapStereo(short *poutput, const short *input) const
  671. {
  672. int i;
  673. short temp;
  674. int cnt2;
  675. for (i = 0; i < overlapLength ; i ++)
  676. {
  677. temp = (short)(overlapLength - i);
  678. cnt2 = 2 * i;
  679. poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp ) / overlapLength;
  680. poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength;
  681. }
  682. }
  683. // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi'
  684. // version of the routine.
  685. void TDStretch::overlapMulti(short *poutput, const short *input) const
  686. {
  687. short m1;
  688. int i = 0;
  689. for (m1 = 0; m1 < overlapLength; m1 ++)
  690. {
  691. short m2 = (short)(overlapLength - m1);
  692. for (int c = 0; c < channels; c ++)
  693. {
  694. poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2) / overlapLength;
  695. i++;
  696. }
  697. }
  698. }
  699. // Calculates the x having the closest 2^x value for the given value
  700. static int _getClosest2Power(double value)
  701. {
  702. return (int)(log(value) / log(2.0) + 0.5);
  703. }
  704. /// Calculates overlap period length in samples.
  705. /// Integer version rounds overlap length to closest power of 2
  706. /// for a divide scaling operation.
  707. void TDStretch::calculateOverlapLength(int aoverlapMs)
  708. {
  709. int newOvl;
  710. assert(aoverlapMs >= 0);
  711. // calculate overlap length so that it's power of 2 - thus it's easy to do
  712. // integer division by right-shifting. Term "-1" at end is to account for
  713. // the extra most significatnt bit left unused in result by signed multiplication
  714. overlapDividerBitsPure = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1;
  715. if (overlapDividerBitsPure > 9) overlapDividerBitsPure = 9;
  716. if (overlapDividerBitsPure < 3) overlapDividerBitsPure = 3;
  717. newOvl = (int)pow(2.0, (int)overlapDividerBitsPure + 1); // +1 => account for -1 above
  718. acceptNewOverlapLength(newOvl);
  719. overlapDividerBitsNorm = overlapDividerBitsPure;
  720. // calculate sloping divider so that crosscorrelation operation won't
  721. // overflow 32-bit register. Max. sum of the crosscorrelation sum without
  722. // divider would be 2^30*(N^3-N)/3, where N = overlap length
  723. slopingDivider = (newOvl * newOvl - 1) / 3;
  724. }
  725. double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm)
  726. {
  727. long corr;
  728. unsigned long lnorm;
  729. int i;
  730. #ifdef ST_SIMD_AVOID_UNALIGNED
  731. // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary
  732. if (((ulongptr)mixingPos) & 15) return -1e50;
  733. #endif
  734. // hint compiler autovectorization that loop length is divisible by 8
  735. int ilength = (channels * overlapLength) & -8;
  736. corr = lnorm = 0;
  737. // Same routine for stereo and mono
  738. for (i = 0; i < ilength; i += 2)
  739. {
  740. corr += (mixingPos[i] * compare[i] +
  741. mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
  742. lnorm += (mixingPos[i] * mixingPos[i] +
  743. mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm;
  744. // do intermediate scalings to avoid integer overflow
  745. }
  746. if (lnorm > maxnorm)
  747. {
  748. // modify 'maxnorm' inside critical section to avoid multi-access conflict if in OpenMP mode
  749. #pragma omp critical
  750. if (lnorm > maxnorm)
  751. {
  752. maxnorm = lnorm;
  753. }
  754. }
  755. // Normalize result by dividing by sqrt(norm) - this step is easiest
  756. // done using floating point operation
  757. norm = (double)lnorm;
  758. return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
  759. }
  760. /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
  761. double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm)
  762. {
  763. long corr;
  764. long lnorm;
  765. int i;
  766. // hint compiler autovectorization that loop length is divisible by 8
  767. int ilength = (channels * overlapLength) & -8;
  768. // cancel first normalizer tap from previous round
  769. lnorm = 0;
  770. for (i = 1; i <= channels; i ++)
  771. {
  772. lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBitsNorm;
  773. }
  774. corr = 0;
  775. // Same routine for stereo and mono.
  776. for (i = 0; i < ilength; i += 2)
  777. {
  778. corr += (mixingPos[i] * compare[i] +
  779. mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
  780. }
  781. // update normalizer with last samples of this round
  782. for (int j = 0; j < channels; j ++)
  783. {
  784. i --;
  785. lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBitsNorm;
  786. }
  787. norm += (double)lnorm;
  788. if (norm > maxnorm)
  789. {
  790. maxnorm = (unsigned long)norm;
  791. }
  792. // Normalize result by dividing by sqrt(norm) - this step is easiest
  793. // done using floating point operation
  794. return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
  795. }
  796. #endif // SOUNDTOUCH_INTEGER_SAMPLES
  797. //////////////////////////////////////////////////////////////////////////////
  798. //
  799. // Floating point arithmetic specific algorithm implementations.
  800. //
  801. #ifdef SOUNDTOUCH_FLOAT_SAMPLES
  802. // Overlaps samples in 'midBuffer' with the samples in 'pInput'
  803. void TDStretch::overlapStereo(float *pOutput, const float *pInput) const
  804. {
  805. int i;
  806. float fScale;
  807. float f1;
  808. float f2;
  809. fScale = 1.0f / (float)overlapLength;
  810. f1 = 0;
  811. f2 = 1.0f;
  812. for (i = 0; i < 2 * (int)overlapLength ; i += 2)
  813. {
  814. pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2;
  815. pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2;
  816. f1 += fScale;
  817. f2 -= fScale;
  818. }
  819. }
  820. // Overlaps samples in 'midBuffer' with the samples in 'input'.
  821. void TDStretch::overlapMulti(float *pOutput, const float *pInput) const
  822. {
  823. int i;
  824. float fScale;
  825. float f1;
  826. float f2;
  827. fScale = 1.0f / (float)overlapLength;
  828. f1 = 0;
  829. f2 = 1.0f;
  830. i=0;
  831. for (int i2 = 0; i2 < overlapLength; i2 ++)
  832. {
  833. // note: Could optimize this slightly by taking into account that always channels > 2
  834. for (int c = 0; c < channels; c ++)
  835. {
  836. pOutput[i] = pInput[i] * f1 + pMidBuffer[i] * f2;
  837. i++;
  838. }
  839. f1 += fScale;
  840. f2 -= fScale;
  841. }
  842. }
  843. /// Calculates overlapInMsec period length in samples.
  844. void TDStretch::calculateOverlapLength(int overlapInMsec)
  845. {
  846. int newOvl;
  847. assert(overlapInMsec >= 0);
  848. newOvl = (sampleRate * overlapInMsec) / 1000;
  849. if (newOvl < 16) newOvl = 16;
  850. // must be divisible by 8
  851. newOvl -= newOvl % 8;
  852. acceptNewOverlapLength(newOvl);
  853. }
  854. /// Calculate cross-correlation
  855. double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm)
  856. {
  857. float corr;
  858. float norm;
  859. int i;
  860. #ifdef ST_SIMD_AVOID_UNALIGNED
  861. // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary
  862. if (((ulongptr)mixingPos) & 15) return -1e50;
  863. #endif
  864. // hint compiler autovectorization that loop length is divisible by 8
  865. int ilength = (channels * overlapLength) & -8;
  866. corr = norm = 0;
  867. // Same routine for stereo and mono
  868. for (i = 0; i < ilength; i ++)
  869. {
  870. corr += mixingPos[i] * compare[i];
  871. norm += mixingPos[i] * mixingPos[i];
  872. }
  873. anorm = norm;
  874. return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
  875. }
  876. /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
  877. double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm)
  878. {
  879. float corr;
  880. int i;
  881. corr = 0;
  882. // cancel first normalizer tap from previous round
  883. for (i = 1; i <= channels; i ++)
  884. {
  885. norm -= mixingPos[-i] * mixingPos[-i];
  886. }
  887. // hint compiler autovectorization that loop length is divisible by 8
  888. int ilength = (channels * overlapLength) & -8;
  889. // Same routine for stereo and mono
  890. for (i = 0; i < ilength; i ++)
  891. {
  892. corr += mixingPos[i] * compare[i];
  893. }
  894. // update normalizer with last samples of this round
  895. for (int j = 0; j < channels; j ++)
  896. {
  897. i --;
  898. norm += mixingPos[i] * mixingPos[i];
  899. }
  900. return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
  901. }
  902. #endif // SOUNDTOUCH_FLOAT_SAMPLES