1
0

CDSPBlockConvolver.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642
  1. //$ nobt
  2. //$ nocpp
  3. /**
  4. * @file CDSPBlockConvolver.h
  5. *
  6. * @brief Single-block overlap-save convolution processor class.
  7. *
  8. * This file includes single-block overlap-save convolution processor class.
  9. *
  10. * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
  11. * See the "LICENSE" file for license.
  12. */
  13. #ifndef R8B_CDSPBLOCKCONVOLVER_INCLUDED
  14. #define R8B_CDSPBLOCKCONVOLVER_INCLUDED
  15. #include "CDSPFIRFilter.h"
  16. #include "CDSPProcessor.h"
  17. namespace r8b {
  18. /**
  19. * @brief Single-block overlap-save convolution processing class.
  20. *
  21. * Class that implements single-block overlap-save convolution processing. The
  22. * length of a single FFT block used depends on the length of the filter
  23. * kernel.
  24. *
  25. * The rationale behind "single-block" processing is that increasing the FFT
  26. * block length by 2 is more efficient than performing convolution at the same
  27. * FFT block length but using two blocks.
  28. *
  29. * This class also implements a built-in resampling by any whole-number
  30. * factor, which simplifies the overall resampling objects topology.
  31. */
  32. class CDSPBlockConvolver : public CDSPProcessor
  33. {
  34. public:
  35. /**
  36. * Constructor initializes internal variables and constants of *this
  37. * object.
  38. *
  39. * @param aFilter Pre-calculated filter data. Reference to this object is
  40. * inhertied by *this object, and the object will be released when *this
  41. * object is destroyed. If upsampling is used, filter's gain should be
  42. * equal to the upsampling factor.
  43. * @param aUpFactor The upsampling factor, positive value. E.g. value of 2
  44. * means 2x upsampling should be performed over the input data.
  45. * @param aDownFactor The downsampling factor, positive value. E.g. value
  46. * of 2 means 2x downsampling should be performed over the output data.
  47. * @param PrevLatency Latency, in samples (any value >=0), which was left
  48. * in the output signal by a previous process. This value is usually
  49. * non-zero if the minimum-phase filters are in use. This value is always
  50. * zero if the linear-phase filters are in use.
  51. * @param aDoConsumeLatency "True" if the output latency should be
  52. * consumed. Does not apply to the fractional part of the latency (if such
  53. * part is available).
  54. */
  55. CDSPBlockConvolver( CDSPFIRFilter& aFilter, const int aUpFactor,
  56. const int aDownFactor, const double PrevLatency = 0.0,
  57. const bool aDoConsumeLatency = true )
  58. : Filter( &aFilter )
  59. , UpFactor( aUpFactor )
  60. , DownFactor( aDownFactor )
  61. , DoConsumeLatency( aDoConsumeLatency )
  62. , BlockLen2( 2 << Filter -> getBlockLenBits() )
  63. {
  64. R8BASSERT( UpFactor > 0 );
  65. R8BASSERT( DownFactor > 0 );
  66. R8BASSERT( PrevLatency >= 0.0 );
  67. int fftinBits;
  68. UpShift = getBitOccupancy( UpFactor ) - 1;
  69. if(( 1 << UpShift ) == UpFactor )
  70. {
  71. fftinBits = Filter -> getBlockLenBits() + 1 - UpShift;
  72. PrevInputLen = ( Filter -> getKernelLen() - 1 + UpFactor - 1 ) /
  73. UpFactor;
  74. InputLen = BlockLen2 - PrevInputLen * UpFactor;
  75. }
  76. else
  77. {
  78. UpShift = -1;
  79. fftinBits = Filter -> getBlockLenBits() + 1;
  80. PrevInputLen = Filter -> getKernelLen() - 1;
  81. InputLen = BlockLen2 - PrevInputLen;
  82. }
  83. OutOffset = ( Filter -> isZeroPhase() ? Filter -> getLatency() : 0 );
  84. LatencyFrac = Filter -> getLatencyFrac() + PrevLatency * UpFactor;
  85. Latency = (int) LatencyFrac;
  86. const int InLatency = Latency + Filter -> getLatency() - OutOffset;
  87. LatencyFrac -= Latency;
  88. LatencyFrac /= DownFactor;
  89. Latency += InputLen + Filter -> getLatency();
  90. int fftoutBits;
  91. InputDelay = 0;
  92. DownSkipInit = 0;
  93. DownShift = getBitOccupancy( DownFactor ) - 1;
  94. if(( 1 << DownShift ) == DownFactor )
  95. {
  96. fftoutBits = Filter -> getBlockLenBits() + 1 - DownShift;
  97. if( DownFactor > 1 )
  98. {
  99. if( UpShift > 0 )
  100. {
  101. // This case never happens in practice due to mutual
  102. // exclusion of "power of 2" DownFactor and UpFactor
  103. // values.
  104. R8BASSERT( UpShift == 0 );
  105. }
  106. else
  107. {
  108. // Make sure InputLen is divisible by DownFactor.
  109. const int ilc = InputLen & ( DownFactor - 1 );
  110. PrevInputLen += ilc;
  111. InputLen -= ilc;
  112. Latency -= ilc;
  113. // Correct InputDelay for input and filter's latency.
  114. const int lc = InLatency & ( DownFactor - 1 );
  115. if( lc > 0 )
  116. {
  117. InputDelay = DownFactor - lc;
  118. }
  119. if( !DoConsumeLatency )
  120. {
  121. Latency /= DownFactor;
  122. }
  123. }
  124. }
  125. }
  126. else
  127. {
  128. fftoutBits = Filter -> getBlockLenBits() + 1;
  129. DownShift = -1;
  130. if( !DoConsumeLatency && DownFactor > 1 )
  131. {
  132. DownSkipInit = Latency % DownFactor;
  133. Latency /= DownFactor;
  134. }
  135. }
  136. R8BASSERT( Latency >= 0 );
  137. fftin = new CDSPRealFFTKeeper( fftinBits );
  138. if( fftoutBits == fftinBits )
  139. {
  140. fftout = fftin;
  141. }
  142. else
  143. {
  144. ffto2 = new CDSPRealFFTKeeper( fftoutBits );
  145. fftout = ffto2;
  146. }
  147. WorkBlocks.alloc( BlockLen2 * 2 + PrevInputLen );
  148. CurInput = &WorkBlocks[ 0 ];
  149. CurOutput = &WorkBlocks[ BlockLen2 ]; // CurInput and
  150. // CurOutput are address-aligned.
  151. PrevInput = &WorkBlocks[ BlockLen2 * 2 ];
  152. clear();
  153. R8BCONSOLE( "CDSPBlockConvolver: flt_len=%i in_len=%i io=%i/%i "
  154. "fft=%i/%i latency=%i\n", Filter -> getKernelLen(), InputLen,
  155. UpFactor, DownFactor, (*fftin) -> getLen(), (*fftout) -> getLen(),
  156. getLatency() );
  157. }
  158. virtual ~CDSPBlockConvolver()
  159. {
  160. Filter -> unref();
  161. }
  162. virtual int getLatency() const
  163. {
  164. return( DoConsumeLatency ? 0 : Latency );
  165. }
  166. virtual double getLatencyFrac() const
  167. {
  168. return( LatencyFrac );
  169. }
  170. virtual int getMaxOutLen( const int MaxInLen ) const
  171. {
  172. R8BASSERT( MaxInLen >= 0 );
  173. return(( MaxInLen * UpFactor + DownFactor - 1 ) / DownFactor );
  174. }
  175. virtual void clear()
  176. {
  177. memset( &PrevInput[ 0 ], 0, PrevInputLen * sizeof( PrevInput[ 0 ]));
  178. if( DoConsumeLatency )
  179. {
  180. LatencyLeft = Latency;
  181. }
  182. else
  183. {
  184. LatencyLeft = 0;
  185. if( DownShift > 0 )
  186. {
  187. memset( &CurOutput[ 0 ], 0, ( BlockLen2 >> DownShift ) *
  188. sizeof( CurOutput[ 0 ]));
  189. }
  190. else
  191. {
  192. memset( &CurOutput[ BlockLen2 - OutOffset ], 0, OutOffset *
  193. sizeof( CurOutput[ 0 ]));
  194. memset( &CurOutput[ 0 ], 0, ( InputLen - OutOffset ) *
  195. sizeof( CurOutput[ 0 ]));
  196. }
  197. }
  198. memset( CurInput, 0, InputDelay * sizeof( CurInput[ 0 ]));
  199. InDataLeft = InputLen - InputDelay;
  200. UpSkip = 0;
  201. DownSkip = DownSkipInit;
  202. }
  203. virtual int process( double* ip, int l0, double*& op0 )
  204. {
  205. R8BASSERT( l0 >= 0 );
  206. R8BASSERT( UpFactor / DownFactor <= 1 || ip != op0 || l0 == 0 );
  207. double* op = op0;
  208. int l = l0 * UpFactor;
  209. l0 = 0;
  210. while( l > 0 )
  211. {
  212. const int Offs = InputLen - InDataLeft;
  213. if( l < InDataLeft )
  214. {
  215. InDataLeft -= l;
  216. if( UpShift >= 0 )
  217. {
  218. memcpy( &CurInput[ Offs >> UpShift ], ip,
  219. ( l >> UpShift ) * sizeof( CurInput[ 0 ]));
  220. }
  221. else
  222. {
  223. copyUpsample( ip, &CurInput[ Offs ], l );
  224. }
  225. copyToOutput( Offs - OutOffset, op, l, l0 );
  226. break;
  227. }
  228. const int b = InDataLeft;
  229. l -= b;
  230. InDataLeft = InputLen;
  231. int ilu;
  232. if( UpShift >= 0 )
  233. {
  234. const int bu = b >> UpShift;
  235. memcpy( &CurInput[ Offs >> UpShift ], ip,
  236. bu * sizeof( CurInput[ 0 ]));
  237. ip += bu;
  238. ilu = InputLen >> UpShift;
  239. }
  240. else
  241. {
  242. copyUpsample( ip, &CurInput[ Offs ], b );
  243. ilu = InputLen;
  244. }
  245. const size_t pil = PrevInputLen * sizeof( CurInput[ 0 ]);
  246. memcpy( &CurInput[ ilu ], PrevInput, pil );
  247. memcpy( PrevInput, &CurInput[ ilu - PrevInputLen ], pil );
  248. (*fftin) -> forward( CurInput );
  249. if( UpShift > 0 )
  250. {
  251. #if R8B_FLOATFFT
  252. mirrorInputSpectrum( (float*) CurInput );
  253. #else // R8B_FLOATFFT
  254. mirrorInputSpectrum( CurInput );
  255. #endif // R8B_FLOATFFT
  256. }
  257. if( Filter -> isZeroPhase() )
  258. {
  259. (*fftout) -> multiplyBlocksZP( Filter -> getKernelBlock(),
  260. CurInput );
  261. }
  262. else
  263. {
  264. (*fftout) -> multiplyBlocks( Filter -> getKernelBlock(),
  265. CurInput );
  266. }
  267. if( DownShift > 0 )
  268. {
  269. const int z = BlockLen2 >> DownShift;
  270. #if R8B_FLOATFFT
  271. float* const kb = (float*) Filter -> getKernelBlock();
  272. float* const p = (float*) CurInput;
  273. #else // R8B_FLOATFFT
  274. const double* const kb = Filter -> getKernelBlock();
  275. double* const p = CurInput;
  276. #endif // R8B_FLOATFFT
  277. p[ 1 ] = kb[ z ] * p[ z ] - kb[ z + 1 ] * p[ z + 1 ];
  278. }
  279. (*fftout) -> inverse( CurInput );
  280. copyToOutput( Offs - OutOffset, op, b, l0 );
  281. double* const tmp = CurInput;
  282. CurInput = CurOutput;
  283. CurOutput = tmp;
  284. }
  285. return( l0 );
  286. }
  287. private:
  288. CDSPFIRFilter* Filter; ///< Filter in use.
  289. ///<
  290. CPtrKeeper< CDSPRealFFTKeeper* > fftin; ///< FFT object 1, used to produce
  291. ///< the input spectrum (can embed the "power of 2" upsampling).
  292. ///<
  293. CPtrKeeper< CDSPRealFFTKeeper* > ffto2; ///< FFT object 2 (can be NULL).
  294. ///<
  295. CDSPRealFFTKeeper* fftout; ///< FFT object used to produce the output
  296. ///< signal (can embed the "power of 2" downsampling), may point to
  297. ///< either "fftin" or "ffto2".
  298. ///<
  299. int UpFactor; ///< Upsampling factor.
  300. ///<
  301. int DownFactor; ///< Downsampling factor.
  302. ///<
  303. bool DoConsumeLatency; ///< "True" if the output latency should be
  304. ///< consumed. Does not apply to the fractional part of the latency
  305. ///< (if such part is available).
  306. ///<
  307. int BlockLen2; ///< Equals block length * 2.
  308. ///<
  309. int OutOffset; ///< Output offset, depends on filter's introduced latency.
  310. ///<
  311. int PrevInputLen; ///< The length of previous input data saved, used for
  312. ///< overlap.
  313. ///<
  314. int InputLen; ///< The number of input samples that should be accumulated
  315. ///< before the input block is processed.
  316. ///<
  317. int Latency; ///< Processing latency, in samples.
  318. ///<
  319. double LatencyFrac; ///< Fractional latency, in samples, that is left in
  320. ///< the output signal.
  321. ///<
  322. int UpShift; ///< "Power of 2" upsampling shift. Equals -1 if UpFactor is
  323. ///< not a "power of 2" value. Equals 0 if UpFactor equals 1.
  324. ///<
  325. int DownShift; ///< "Power of 2" downsampling shift. Equals -1 if
  326. ///< DownFactor is not a "power of 2". Equals 0 if DownFactor equals
  327. ///< 1.
  328. ///<
  329. int InputDelay; ///< Additional input delay, in samples. Used to make the
  330. ///< output delay divisible by DownShift. Used only if UpShift <= 0
  331. ///< and DownShift > 0.
  332. ///<
  333. CFixedBuffer< double > WorkBlocks; ///< Previous input data, input and
  334. ///< output data blocks, overall capacity = BlockLen2 * 2 +
  335. ///< PrevInputLen. Used in the flip-flop manner.
  336. ///<
  337. double* PrevInput; ///< Previous input data buffer, capacity = BlockLen.
  338. ///<
  339. double* CurInput; ///< Input data buffer, capacity = BlockLen2.
  340. ///<
  341. double* CurOutput; ///< Output data buffer, capacity = BlockLen2.
  342. ///<
  343. int InDataLeft; ///< Samples left before processing input and output FFT
  344. ///< blocks. Initialized to InputLen on clear.
  345. ///<
  346. int LatencyLeft; ///< Latency in samples left to skip.
  347. ///<
  348. int UpSkip; ///< The current upsampling sample skip (value in the range
  349. ///< 0 to UpFactor - 1).
  350. ///<
  351. int DownSkip; ///< The current downsampling sample skip (value in the
  352. ///< range 0 to DownFactor - 1). Not used if DownShift > 0.
  353. ///<
  354. int DownSkipInit; ///< The initial DownSkip value after clear().
  355. ///<
  356. /**
  357. * Function copies samples from the input buffer to the output buffer
  358. * while inserting zeros inbetween them to perform the whole-numbered
  359. * upsampling.
  360. *
  361. * @param[in,out] ip0 Input buffer. Will be advanced on function's return.
  362. * @param[out] op Output buffer.
  363. * @param l0 The number of samples to fill in the output buffer, including
  364. * both input samples and interpolation (zero) samples.
  365. */
  366. void copyUpsample( double*& ip0, double* op, int l0 )
  367. {
  368. int b = min( UpSkip, l0 );
  369. if( b != 0 )
  370. {
  371. UpSkip -= b;
  372. l0 -= b;
  373. *op = 0.0;
  374. op++;
  375. while( --b != 0 )
  376. {
  377. *op = 0.0;
  378. op++;
  379. }
  380. }
  381. double* ip = ip0;
  382. const int upf = UpFactor;
  383. int l = l0 / upf;
  384. int lz = l0 - l * upf;
  385. if( upf == 3 )
  386. {
  387. while( l != 0 )
  388. {
  389. op[ 0 ] = *ip;
  390. op[ 1 ] = 0.0;
  391. op[ 2 ] = 0.0;
  392. ip++;
  393. op += upf;
  394. l--;
  395. }
  396. }
  397. else
  398. if( upf == 5 )
  399. {
  400. while( l != 0 )
  401. {
  402. op[ 0 ] = *ip;
  403. op[ 1 ] = 0.0;
  404. op[ 2 ] = 0.0;
  405. op[ 3 ] = 0.0;
  406. op[ 4 ] = 0.0;
  407. ip++;
  408. op += upf;
  409. l--;
  410. }
  411. }
  412. else
  413. {
  414. const size_t zc = ( upf - 1 ) * sizeof( op[ 0 ]);
  415. while( l != 0 )
  416. {
  417. *op = *ip;
  418. ip++;
  419. memset( op + 1, 0, zc );
  420. op += upf;
  421. l--;
  422. }
  423. }
  424. if( lz != 0 )
  425. {
  426. *op = *ip;
  427. ip++;
  428. op++;
  429. UpSkip = upf - lz;
  430. while( --lz != 0 )
  431. {
  432. *op = 0.0;
  433. op++;
  434. }
  435. }
  436. ip0 = ip;
  437. }
  438. /**
  439. * Function copies sample data from the CurOutput buffer to the specified
  440. * output buffer and advances its position. If necessary, this function
  441. * "consumes" latency and performs downsampling.
  442. *
  443. * @param Offs CurOutput buffer offset, can be negative.
  444. * @param[out] op0 Output buffer pointer, will be advanced.
  445. * @param b The number of output samples available, including those which
  446. * are discarded during whole-number downsampling.
  447. * @param l0 The overall output sample count, will be increased.
  448. */
  449. void copyToOutput( int Offs, double*& op0, int b, int& l0 )
  450. {
  451. if( Offs < 0 )
  452. {
  453. if( Offs + b <= 0 )
  454. {
  455. Offs += BlockLen2;
  456. }
  457. else
  458. {
  459. copyToOutput( Offs + BlockLen2, op0, -Offs, l0 );
  460. b += Offs;
  461. Offs = 0;
  462. }
  463. }
  464. if( LatencyLeft != 0 )
  465. {
  466. if( LatencyLeft >= b )
  467. {
  468. LatencyLeft -= b;
  469. return;
  470. }
  471. Offs += LatencyLeft;
  472. b -= LatencyLeft;
  473. LatencyLeft = 0;
  474. }
  475. const int df = DownFactor;
  476. if( DownShift > 0 )
  477. {
  478. int Skip = Offs & ( df - 1 );
  479. if( Skip > 0 )
  480. {
  481. Skip = df - Skip;
  482. b -= Skip;
  483. Offs += Skip;
  484. }
  485. if( b > 0 )
  486. {
  487. b = ( b + df - 1 ) >> DownShift;
  488. memcpy( op0, &CurOutput[ Offs >> DownShift ],
  489. b * sizeof( op0[ 0 ]));
  490. op0 += b;
  491. l0 += b;
  492. }
  493. }
  494. else
  495. {
  496. if( df > 1 )
  497. {
  498. const double* ip = &CurOutput[ Offs + DownSkip ];
  499. int l = ( b + df - 1 - DownSkip ) / df;
  500. DownSkip += l * df - b;
  501. double* op = op0;
  502. l0 += l;
  503. op0 += l;
  504. while( l > 0 )
  505. {
  506. *op = *ip;
  507. ip += df;
  508. op++;
  509. l--;
  510. }
  511. }
  512. else
  513. {
  514. memcpy( op0, &CurOutput[ Offs ], b * sizeof( op0[ 0 ]));
  515. op0 += b;
  516. l0 += b;
  517. }
  518. }
  519. }
  520. /**
  521. * Function performs input spectrum mirroring which is used to perform a
  522. * fast "power of 2" upsampling. Such mirroring is equivalent to insertion
  523. * of zeros into the input signal.
  524. *
  525. * @param p Spectrum data block to mirror.
  526. * @tparam T Buffer's element type.
  527. */
  528. template< typename T >
  529. void mirrorInputSpectrum( T* const p )
  530. {
  531. const int bl1 = BlockLen2 >> UpShift;
  532. const int bl2 = bl1 + bl1;
  533. int i;
  534. for( i = bl1 + 2; i < bl2; i += 2 )
  535. {
  536. p[ i ] = p[ bl2 - i ];
  537. p[ i + 1 ] = -p[ bl2 - i + 1 ];
  538. }
  539. p[ bl1 ] = p[ 1 ];
  540. p[ bl1 + 1 ] = 0.0;
  541. p[ 1 ] = p[ 0 ];
  542. for( i = 1; i < UpShift; i++ )
  543. {
  544. const int z = bl1 << i;
  545. memcpy( &p[ z ], p, z * sizeof( p[ 0 ]));
  546. p[ z + 1 ] = 0.0;
  547. }
  548. }
  549. };
  550. } // namespace r8b
  551. #endif // R8B_CDSPBLOCKCONVOLVER_INCLUDED