BPMDetect.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. ////////////////////////////////////////////////////////////////////////////////
  2. ///
  3. /// Beats-per-minute (BPM) detection routine.
  4. ///
  5. /// The beat detection algorithm works as follows:
  6. /// - Use function 'inputSamples' to input a chunks of samples to the class for
  7. /// analysis. It's a good idea to enter a large sound file or stream in smallish
  8. /// chunks of around few kilosamples in order not to extinguish too much RAM memory.
  9. /// - Inputted sound data is decimated to approx 500 Hz to reduce calculation burden,
  10. /// which is basically ok as low (bass) frequencies mostly determine the beat rate.
  11. /// Simple averaging is used for anti-alias filtering because the resulting signal
  12. /// quality isn't of that high importance.
  13. /// - Decimated sound data is enveloped, i.e. the amplitude shape is detected by
  14. /// taking absolute value that's smoothed by sliding average. Signal levels that
  15. /// are below a couple of times the general RMS amplitude level are cut away to
  16. /// leave only notable peaks there.
  17. /// - Repeating sound patterns (e.g. beats) are detected by calculating short-term
  18. /// autocorrelation function of the enveloped signal.
  19. /// - After whole sound data file has been analyzed as above, the bpm level is
  20. /// detected by function 'getBpm' that finds the highest peak of the autocorrelation
  21. /// function, calculates it's precise location and converts this reading to bpm's.
  22. ///
  23. /// Author : Copyright (c) Olli Parviainen
  24. /// Author e-mail : oparviai 'at' iki.fi
  25. /// SoundTouch WWW: http://www.surina.net/soundtouch
  26. ///
  27. ////////////////////////////////////////////////////////////////////////////////
  28. //
  29. // License :
  30. //
  31. // SoundTouch audio processing library
  32. // Copyright (c) Olli Parviainen
  33. //
  34. // This library is free software; you can redistribute it and/or
  35. // modify it under the terms of the GNU Lesser General Public
  36. // License as published by the Free Software Foundation; either
  37. // version 2.1 of the License, or (at your option) any later version.
  38. //
  39. // This library is distributed in the hope that it will be useful,
  40. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  41. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  42. // Lesser General Public License for more details.
  43. //
  44. // You should have received a copy of the GNU Lesser General Public
  45. // License along with this library; if not, write to the Free Software
  46. // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  47. //
  48. ////////////////////////////////////////////////////////////////////////////////
  49. #define _USE_MATH_DEFINES
  50. #include <math.h>
  51. #include <assert.h>
  52. #include <string.h>
  53. #include <stdio.h>
  54. #include <cfloat>
  55. #include "FIFOSampleBuffer.h"
  56. #include "PeakFinder.h"
  57. #include "BPMDetect.h"
  58. using namespace soundtouch;
  59. // algorithm input sample block size
  60. static const int INPUT_BLOCK_SIZE = 2048;
  61. // decimated sample block size
  62. static const int DECIMATED_BLOCK_SIZE = 256;
  63. /// Target sample rate after decimation
  64. static const int TARGET_SRATE = 1000;
  65. /// XCorr update sequence size, update in about 200msec chunks
  66. static const int XCORR_UPDATE_SEQUENCE = (int)(TARGET_SRATE / 5);
  67. /// Moving average N size
  68. static const int MOVING_AVERAGE_N = 15;
  69. /// XCorr decay time constant, decay to half in 30 seconds
  70. /// If it's desired to have the system adapt quicker to beat rate
  71. /// changes within a continuing music stream, then the
  72. /// 'xcorr_decay_time_constant' value can be reduced, yet that
  73. /// can increase possibility of glitches in bpm detection.
  74. static const double XCORR_DECAY_TIME_CONSTANT = 30.0;
  75. /// Data overlap factor for beat detection algorithm
  76. static const int OVERLAP_FACTOR = 4;
  77. static const double TWOPI = (2 * M_PI);
  78. ////////////////////////////////////////////////////////////////////////////////
  79. // Enable following define to create bpm analysis file:
  80. //#define _CREATE_BPM_DEBUG_FILE
  81. #ifdef _CREATE_BPM_DEBUG_FILE
  82. static void _SaveDebugData(const char *name, const float *data, int minpos, int maxpos, double coeff)
  83. {
  84. FILE *fptr = fopen(name, "wt");
  85. int i;
  86. if (fptr)
  87. {
  88. printf("\nWriting BPM debug data into file %s\n", name);
  89. for (i = minpos; i < maxpos; i ++)
  90. {
  91. fprintf(fptr, "%d\t%.1lf\t%f\n", i, coeff / (double)i, data[i]);
  92. }
  93. fclose(fptr);
  94. }
  95. }
  96. void _SaveDebugBeatPos(const char *name, const std::vector<BEAT> &beats)
  97. {
  98. printf("\nWriting beat detections data into file %s\n", name);
  99. FILE *fptr = fopen(name, "wt");
  100. if (fptr)
  101. {
  102. for (uint i = 0; i < beats.size(); i++)
  103. {
  104. BEAT b = beats[i];
  105. fprintf(fptr, "%lf\t%lf\n", b.pos, b.strength);
  106. }
  107. fclose(fptr);
  108. }
  109. }
  110. #else
  111. #define _SaveDebugData(name, a,b,c,d)
  112. #define _SaveDebugBeatPos(name, b)
  113. #endif
  114. // Hamming window
  115. void hamming(float *w, int N)
  116. {
  117. for (int i = 0; i < N; i++)
  118. {
  119. w[i] = (float)(0.54 - 0.46 * cos(TWOPI * i / (N - 1)));
  120. }
  121. }
  122. ////////////////////////////////////////////////////////////////////////////////
  123. //
  124. // IIR2_filter - 2nd order IIR filter
  125. IIR2_filter::IIR2_filter(const double *lpf_coeffs)
  126. {
  127. memcpy(coeffs, lpf_coeffs, 5 * sizeof(double));
  128. memset(prev, 0, sizeof(prev));
  129. }
  130. float IIR2_filter::update(float x)
  131. {
  132. prev[0] = x;
  133. double y = x * coeffs[0];
  134. for (int i = 4; i >= 1; i--)
  135. {
  136. y += coeffs[i] * prev[i];
  137. prev[i] = prev[i - 1];
  138. }
  139. prev[3] = y;
  140. return (float)y;
  141. }
  142. // IIR low-pass filter coefficients, calculated with matlab/octave cheby2(2,40,0.05)
  143. const double _LPF_coeffs[5] = { 0.00996655391939, -0.01944529148401, 0.00996655391939, 1.96867605796247, -0.96916387431724 };
  144. ////////////////////////////////////////////////////////////////////////////////
  145. BPMDetect::BPMDetect(int numChannels, int aSampleRate) :
  146. beat_lpf(_LPF_coeffs)
  147. {
  148. beats.reserve(250); // initial reservation to prevent frequent reallocation
  149. this->sampleRate = aSampleRate;
  150. this->channels = numChannels;
  151. decimateSum = 0;
  152. decimateCount = 0;
  153. // choose decimation factor so that result is approx. 1000 Hz
  154. decimateBy = sampleRate / TARGET_SRATE;
  155. if ((decimateBy <= 0) || (decimateBy * DECIMATED_BLOCK_SIZE < INPUT_BLOCK_SIZE))
  156. {
  157. ST_THROW_RT_ERROR("Too small samplerate");
  158. }
  159. // Calculate window length & starting item according to desired min & max bpms
  160. windowLen = (60 * sampleRate) / (decimateBy * MIN_BPM);
  161. windowStart = (60 * sampleRate) / (decimateBy * MAX_BPM_RANGE);
  162. assert(windowLen > windowStart);
  163. // allocate new working objects
  164. xcorr = new float[windowLen];
  165. memset(xcorr, 0, windowLen * sizeof(float));
  166. pos = 0;
  167. peakPos = 0;
  168. peakVal = 0;
  169. init_scaler = 1;
  170. beatcorr_ringbuffpos = 0;
  171. beatcorr_ringbuff = new float[windowLen];
  172. memset(beatcorr_ringbuff, 0, windowLen * sizeof(float));
  173. // allocate processing buffer
  174. buffer = new FIFOSampleBuffer();
  175. // we do processing in mono mode
  176. buffer->setChannels(1);
  177. buffer->clear();
  178. // calculate hamming windows
  179. hamw = new float[XCORR_UPDATE_SEQUENCE];
  180. hamming(hamw, XCORR_UPDATE_SEQUENCE);
  181. hamw2 = new float[XCORR_UPDATE_SEQUENCE / 2];
  182. hamming(hamw2, XCORR_UPDATE_SEQUENCE / 2);
  183. }
  184. BPMDetect::~BPMDetect()
  185. {
  186. delete[] xcorr;
  187. delete[] beatcorr_ringbuff;
  188. delete[] hamw;
  189. delete[] hamw2;
  190. delete buffer;
  191. }
  192. /// convert to mono, low-pass filter & decimate to about 500 Hz.
  193. /// return number of outputted samples.
  194. ///
  195. /// Decimation is used to remove the unnecessary frequencies and thus to reduce
  196. /// the amount of data needed to be processed as calculating autocorrelation
  197. /// function is a very-very heavy operation.
  198. ///
  199. /// Anti-alias filtering is done simply by averaging the samples. This is really a
  200. /// poor-man's anti-alias filtering, but it's not so critical in this kind of application
  201. /// (it'd also be difficult to design a high-quality filter with steep cut-off at very
  202. /// narrow band)
  203. int BPMDetect::decimate(SAMPLETYPE *dest, const SAMPLETYPE *src, int numsamples)
  204. {
  205. int count, outcount;
  206. LONG_SAMPLETYPE out;
  207. assert(channels > 0);
  208. assert(decimateBy > 0);
  209. outcount = 0;
  210. for (count = 0; count < numsamples; count ++)
  211. {
  212. int j;
  213. // convert to mono and accumulate
  214. for (j = 0; j < channels; j ++)
  215. {
  216. decimateSum += src[j];
  217. }
  218. src += j;
  219. decimateCount ++;
  220. if (decimateCount >= decimateBy)
  221. {
  222. // Store every Nth sample only
  223. out = (LONG_SAMPLETYPE)(decimateSum / (decimateBy * channels));
  224. decimateSum = 0;
  225. decimateCount = 0;
  226. #ifdef SOUNDTOUCH_INTEGER_SAMPLES
  227. // check ranges for sure (shouldn't actually be necessary)
  228. if (out > 32767)
  229. {
  230. out = 32767;
  231. }
  232. else if (out < -32768)
  233. {
  234. out = -32768;
  235. }
  236. #endif // SOUNDTOUCH_INTEGER_SAMPLES
  237. dest[outcount] = (SAMPLETYPE)out;
  238. outcount ++;
  239. }
  240. }
  241. return outcount;
  242. }
  243. // Calculates autocorrelation function of the sample history buffer
  244. void BPMDetect::updateXCorr(int process_samples)
  245. {
  246. int offs;
  247. SAMPLETYPE *pBuffer;
  248. assert(buffer->numSamples() >= (uint)(process_samples + windowLen));
  249. assert(process_samples == XCORR_UPDATE_SEQUENCE);
  250. pBuffer = buffer->ptrBegin();
  251. // calculate decay factor for xcorr filtering
  252. float xcorr_decay = (float)pow(0.5, 1.0 / (XCORR_DECAY_TIME_CONSTANT * TARGET_SRATE / process_samples));
  253. // prescale pbuffer
  254. float tmp[XCORR_UPDATE_SEQUENCE];
  255. for (int i = 0; i < process_samples; i++)
  256. {
  257. tmp[i] = hamw[i] * hamw[i] * pBuffer[i];
  258. }
  259. #pragma omp parallel for
  260. for (offs = windowStart; offs < windowLen; offs ++)
  261. {
  262. float sum;
  263. int i;
  264. sum = 0;
  265. for (i = 0; i < process_samples; i ++)
  266. {
  267. sum += tmp[i] * pBuffer[i + offs]; // scaling the sub-result shouldn't be necessary
  268. }
  269. xcorr[offs] *= xcorr_decay; // decay 'xcorr' here with suitable time constant.
  270. xcorr[offs] += (float)fabs(sum);
  271. }
  272. }
  273. // Detect individual beat positions
  274. void BPMDetect::updateBeatPos(int process_samples)
  275. {
  276. SAMPLETYPE *pBuffer;
  277. assert(buffer->numSamples() >= (uint)(process_samples + windowLen));
  278. pBuffer = buffer->ptrBegin();
  279. assert(process_samples == XCORR_UPDATE_SEQUENCE / 2);
  280. // static double thr = 0.0003;
  281. double posScale = (double)this->decimateBy / (double)this->sampleRate;
  282. int resetDur = (int)(0.12 / posScale + 0.5);
  283. // prescale pbuffer
  284. float tmp[XCORR_UPDATE_SEQUENCE / 2];
  285. for (int i = 0; i < process_samples; i++)
  286. {
  287. tmp[i] = hamw2[i] * hamw2[i] * pBuffer[i];
  288. }
  289. #pragma omp parallel for
  290. for (int offs = windowStart; offs < windowLen; offs++)
  291. {
  292. float sum = 0;
  293. for (int i = 0; i < process_samples; i++)
  294. {
  295. sum += tmp[i] * pBuffer[offs + i];
  296. }
  297. beatcorr_ringbuff[(beatcorr_ringbuffpos + offs) % windowLen] += (float)((sum > 0) ? sum : 0); // accumulate only positive correlations
  298. }
  299. int skipstep = XCORR_UPDATE_SEQUENCE / OVERLAP_FACTOR;
  300. // compensate empty buffer at beginning by scaling coefficient
  301. float scale = (float)windowLen / (float)(skipstep * init_scaler);
  302. if (scale > 1.0f)
  303. {
  304. init_scaler++;
  305. }
  306. else
  307. {
  308. scale = 1.0f;
  309. }
  310. // detect beats
  311. for (int i = 0; i < skipstep; i++)
  312. {
  313. LONG_SAMPLETYPE max = 0;
  314. float sum = beatcorr_ringbuff[beatcorr_ringbuffpos];
  315. sum -= beat_lpf.update(sum);
  316. if (sum > peakVal)
  317. {
  318. // found new local largest value
  319. peakVal = sum;
  320. peakPos = pos;
  321. }
  322. if (pos > peakPos + resetDur)
  323. {
  324. // largest value not updated for 200msec => accept as beat
  325. peakPos += skipstep;
  326. if (peakVal > 0)
  327. {
  328. // add detected beat to end of "beats" vector
  329. BEAT temp = { (float)(peakPos * posScale), (float)(peakVal * scale) };
  330. beats.push_back(temp);
  331. }
  332. peakVal = 0;
  333. peakPos = pos;
  334. }
  335. beatcorr_ringbuff[beatcorr_ringbuffpos] = 0;
  336. pos++;
  337. beatcorr_ringbuffpos = (beatcorr_ringbuffpos + 1) % windowLen;
  338. }
  339. }
  340. #define max(x,y) ((x) > (y) ? (x) : (y))
  341. void BPMDetect::inputSamples(const SAMPLETYPE *samples, int numSamples)
  342. {
  343. SAMPLETYPE decimated[DECIMATED_BLOCK_SIZE];
  344. // iterate so that max INPUT_BLOCK_SAMPLES processed per iteration
  345. while (numSamples > 0)
  346. {
  347. int block;
  348. int decSamples;
  349. block = (numSamples > INPUT_BLOCK_SIZE) ? INPUT_BLOCK_SIZE : numSamples;
  350. // decimate. note that converts to mono at the same time
  351. decSamples = decimate(decimated, samples, block);
  352. samples += block * channels;
  353. numSamples -= block;
  354. buffer->putSamples(decimated, decSamples);
  355. }
  356. // when the buffer has enough samples for processing...
  357. int req = max(windowLen + XCORR_UPDATE_SEQUENCE, 2 * XCORR_UPDATE_SEQUENCE);
  358. while ((int)buffer->numSamples() >= req)
  359. {
  360. // ... update autocorrelations...
  361. updateXCorr(XCORR_UPDATE_SEQUENCE);
  362. // ...update beat position calculation...
  363. updateBeatPos(XCORR_UPDATE_SEQUENCE / 2);
  364. // ... and remove proceessed samples from the buffer
  365. int n = XCORR_UPDATE_SEQUENCE / OVERLAP_FACTOR;
  366. buffer->receiveSamples(n);
  367. }
  368. }
  369. void BPMDetect::removeBias()
  370. {
  371. int i;
  372. // Remove linear bias: calculate linear regression coefficient
  373. // 1. calc mean of 'xcorr' and 'i'
  374. double mean_i = 0;
  375. double mean_x = 0;
  376. for (i = windowStart; i < windowLen; i++)
  377. {
  378. mean_x += xcorr[i];
  379. }
  380. mean_x /= (windowLen - windowStart);
  381. mean_i = 0.5 * (windowLen - 1 + windowStart);
  382. // 2. calculate linear regression coefficient
  383. double b = 0;
  384. double div = 0;
  385. for (i = windowStart; i < windowLen; i++)
  386. {
  387. double xt = xcorr[i] - mean_x;
  388. double xi = i - mean_i;
  389. b += xt * xi;
  390. div += xi * xi;
  391. }
  392. b /= div;
  393. // subtract linear regression and resolve min. value bias
  394. float minval = FLT_MAX; // arbitrary large number
  395. for (i = windowStart; i < windowLen; i ++)
  396. {
  397. xcorr[i] -= (float)(b * i);
  398. if (xcorr[i] < minval)
  399. {
  400. minval = xcorr[i];
  401. }
  402. }
  403. // subtract min.value
  404. for (i = windowStart; i < windowLen; i ++)
  405. {
  406. xcorr[i] -= minval;
  407. }
  408. }
  409. // Calculate N-point moving average for "source" values
  410. void MAFilter(float *dest, const float *source, int start, int end, int N)
  411. {
  412. for (int i = start; i < end; i++)
  413. {
  414. int i1 = i - N / 2;
  415. int i2 = i + N / 2 + 1;
  416. if (i1 < start) i1 = start;
  417. if (i2 > end) i2 = end;
  418. double sum = 0;
  419. for (int j = i1; j < i2; j ++)
  420. {
  421. sum += source[j];
  422. }
  423. dest[i] = (float)(sum / (i2 - i1));
  424. }
  425. }
  426. float BPMDetect::getBpm()
  427. {
  428. double peakPos;
  429. double coeff;
  430. PeakFinder peakFinder;
  431. // remove bias from xcorr data
  432. removeBias();
  433. coeff = 60.0 * ((double)sampleRate / (double)decimateBy);
  434. // save bpm debug data if debug data writing enabled
  435. _SaveDebugData("soundtouch-bpm-xcorr.txt", xcorr, windowStart, windowLen, coeff);
  436. // Smoothen by N-point moving-average
  437. float *data = new float[windowLen];
  438. memset(data, 0, sizeof(float) * windowLen);
  439. MAFilter(data, xcorr, windowStart, windowLen, MOVING_AVERAGE_N);
  440. // find peak position
  441. peakPos = peakFinder.detectPeak(data, windowStart, windowLen);
  442. // save bpm debug data if debug data writing enabled
  443. _SaveDebugData("soundtouch-bpm-smoothed.txt", data, windowStart, windowLen, coeff);
  444. delete[] data;
  445. assert(decimateBy != 0);
  446. if (peakPos < 1e-9) return 0.0; // detection failed.
  447. _SaveDebugBeatPos("soundtouch-detected-beats.txt", beats);
  448. // calculate BPM
  449. float bpm = (float)(coeff / peakPos);
  450. return (bpm >= MIN_BPM && bpm <= MAX_BPM_VALID) ? bpm : 0;
  451. }
  452. /// Get beat position arrays. Note: The array includes also really low beat detection values
  453. /// in absence of clear strong beats. Consumer may wish to filter low values away.
  454. /// - "pos" receive array of beat positions
  455. /// - "values" receive array of beat detection strengths
  456. /// - max_num indicates max.size of "pos" and "values" array.
  457. ///
  458. /// You can query a suitable array sized by calling this with NULL in "pos" & "values".
  459. ///
  460. /// \return number of beats in the arrays.
  461. int BPMDetect::getBeats(float *pos, float *values, int max_num)
  462. {
  463. int num = (int)beats.size();
  464. if ((!pos) || (!values)) return num; // pos or values NULL, return just size
  465. for (int i = 0; (i < num) && (i < max_num); i++)
  466. {
  467. pos[i] = beats[i].pos;
  468. values[i] = beats[i].strength;
  469. }
  470. return num;
  471. }