//$ nobt //$ nocpp /** * @file CDSPFracInterpolator.h * * @brief Fractional delay interpolator and filter bank classes. * * This file includes fractional delay interpolator class. * * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev * See the "LICENSE" file for license. */ #ifndef R8B_CDSPFRACINTERPOLATOR_INCLUDED #define R8B_CDSPFRACINTERPOLATOR_INCLUDED #include "CDSPSincFilterGen.h" #include "CDSPProcessor.h" namespace r8b { #if R8B_FLTTEST extern int InterpFilterFracs; ///< Force this number of fractional filter ///< positions. -1 - use default. ///< #endif // R8B_FLTTEST /** * @brief Sinc function-based fractional delay filter bank class. * * Class implements storage and initialization of a bank of sinc-based * fractional delay filters, expressed as 0th, 1st, 2nd or 3rd order * polynomial interpolation coefficients. The filters are windowed by the * "Kaiser" power-raised window function. */ class CDSPFracDelayFilterBank : public R8B_BASECLASS { R8BNOCTOR( CDSPFracDelayFilterBank ); friend class CDSPFracDelayFilterBankCache; public: /** * Constructor. * * @param aFilterFracs The number of fractional delay positions to sample, * -1 - use default. * @param aElementSize The size of each filter's tap, in "double" values. * This parameter corresponds to the complexity of interpolation. 4 should * be set for 3rd order, 3 for 2nd order, 2 for linear interpolation, 1 * for whole-numbered stepping. * @param aInterpPoints The number of points the interpolation is based * on. This value should not be confused with the ElementSize. Set to 2 * for linear or no interpolation. * @param aReqAtten Required filter attentuation. * @param aIsThird "True" if one-third filter is required. */ CDSPFracDelayFilterBank( const int aFilterFracs, const int aElementSize, const int aInterpPoints, const double aReqAtten, const bool aIsThird ) : InitFilterFracs( aFilterFracs ) , ElementSize( aElementSize ) , InterpPoints( aInterpPoints ) , ReqAtten( aReqAtten ) , IsThird( aIsThird ) , Next( NULL ) , RefCount( 1 ) { R8BASSERT( ElementSize >= 1 && ElementSize <= 4 ); // Kaiser window function Params, for half and third-band. const double* const Params = getWinParams( ReqAtten, IsThird, FilterLen ); FilterSize = FilterLen * ElementSize; if( InitFilterFracs == -1 ) { FilterFracs = (int) ceil( pow( 6.4, ReqAtten / 50.0 )); #if R8B_FLTTEST if( InterpFilterFracs != -1 ) { FilterFracs = InterpFilterFracs; } #endif // R8B_FLTTEST } else { FilterFracs = InitFilterFracs; } Table.alloc( FilterSize * ( FilterFracs + InterpPoints )); CDSPSincFilterGen sinc; sinc.Len2 = FilterLen / 2; double* p = Table; const int pc2 = InterpPoints / 2; int i; for( i = -pc2 + 1; i <= FilterFracs + pc2; i++ ) { sinc.FracDelay = (double) ( FilterFracs - i ) / FilterFracs; sinc.initFrac( CDSPSincFilterGen :: wftKaiser, Params, true ); sinc.generateFrac( p, &CDSPSincFilterGen :: calcWindowKaiser, ElementSize ); normalizeFIRFilter( p, FilterLen, 1.0, ElementSize ); p += FilterSize; } const int TablePos2 = FilterSize; const int TablePos3 = FilterSize * 2; const int TablePos4 = FilterSize * 3; const int TablePos5 = FilterSize * 4; const int TablePos6 = FilterSize * 5; const int TablePos7 = FilterSize * 6; const int TablePos8 = FilterSize * 7; double* const TableEnd = Table + ( FilterFracs + 1 ) * FilterSize; p = Table; if( InterpPoints == 8 ) { if( ElementSize == 3 ) { // Calculate 2nd order spline (polynomial) interpolation // coefficients using 8 points. while( p < TableEnd ) { calcSpline2p8Coeffs( p, p[ 0 ], p[ TablePos2 ], p[ TablePos3 ], p[ TablePos4 ], p[ TablePos5 ], p[ TablePos6 ], p[ TablePos7 ], p[ TablePos8 ]); p += ElementSize; } #if defined( R8B_SIMD_ISH ) shuffle2_3( Table, TableEnd ); #endif // SIMD } else if( ElementSize == 4 ) { // Calculate 3rd order spline (polynomial) interpolation // coefficients using 8 points. while( p < TableEnd ) { calcSpline3p8Coeffs( p, p[ 0 ], p[ TablePos2 ], p[ TablePos3 ], p[ TablePos4 ], p[ TablePos5 ], p[ TablePos6 ], p[ TablePos7 ], p[ TablePos8 ]); p += ElementSize; } #if defined( R8B_SIMD_ISH ) shuffle2_4( Table, TableEnd ); #endif // SIMD } } else { if( ElementSize == 2 ) { // Calculate linear interpolation coefficients. while( p < TableEnd ) { p[ 1 ] = p[ TablePos2 ] - p[ 0 ]; p += ElementSize; } #if defined( R8B_SIMD_ISH ) shuffle2_2( Table, TableEnd ); #endif // SIMD } } R8BCONSOLE( "CDSPFracDelayFilterBank: fracs=%i order=%i taps=%i " "att=%.1f third=%i\n", FilterFracs, ElementSize - 1, FilterLen, ReqAtten, (int) IsThird ); } ~CDSPFracDelayFilterBank() { delete Next; } /** * Function "rounds" the specified attenuation to the nearest effective * value. * * @param[in,out] att Required filter attentuation. Will be rounded to the * nearest value. * @param aIsThird "True" if one-third filter is required. */ static void roundReqAtten( double& att, const bool aIsThird ) { int tmp; getWinParams( att, aIsThird, tmp ); } /** * Function returns the length of the filter. Always an even number, not * less than 6. */ int getFilterLen() const { return( FilterLen ); } /** * Function returns the number of fractional positions sampled by the * bank. */ int getFilterFracs() const { return( FilterFracs ); } /** * @param i Filter index, in the range 0 to FilterFracs, inclusive. * @return Reference to the filter. */ const double& operator []( const int i ) const { R8BASSERT( i >= 0 && i <= FilterFracs ); return( Table[ i * FilterSize ]); } /** * This function should be called when the filter bank obtained via the * filter bank cache is no longer needed. */ void unref(); private: int FilterLen; ///< Filter length. Always an even number, not less than 6. ///< int FilterFracs; ///< Fractional position count. ///< int InitFilterFracs; ///< Fractional position count as supplied to the ///< constructor, may equal -1. ///< int ElementSize; ///< Filter element size. ///< int InterpPoints; ///< Interpolation points to use. ///< double ReqAtten; ///< Filter's attentuation. ///< bool IsThird; ///< "True" if one-third filter is in use. ///< int FilterSize; ///< This constant specifies the "size" of a single filter ///< in "double" elements. ///< CFixedBuffer< double > Table; ///< The table of fractional delay filters ///< for all discrete fractional x = 0..1 sample positions, and ///< interpolation coefficients. ///< CDSPFracDelayFilterBank* Next; ///< Next filter bank in cache's list. ///< int RefCount; ///< The number of references made to *this filter bank. ///< Not considered for "static" filter bank objects. ///< /** * Function returns windowing function parameters for the specified * attenuation and filter type. * * @param[in,out] att Required filter attentuation. Will be rounded to the * nearest value. * @param aIsThird "True" if one-third filter is required. * @param[out] fltlen Resulting filter length. */ static const double* getWinParams( double& att, const bool aIsThird, int& fltlen ) { static const int Coeffs2Base = 8; static const int Coeffs2Count = 12; static const double Coeffs2[ Coeffs2Count ][ 3 ] = { { 4.1308468534586913, 1.1752580009977263, 55.5446 }, // 0.0256 { 4.4241520324148826, 1.8004881791443044, 81.4191 }, // 0.0886 { 5.2615232289173663, 1.8133318236025469, 96.3392 }, // 0.0481 { 5.9433751227216174, 1.8730186391986436, 111.1315 }, // 0.0264 { 6.8308658290513815, 1.8549555110340281, 125.4653 }, // 0.0146 { 7.6648458290312904, 1.8565766090828464, 139.7379 }, // 0.0081 { 8.2038728664307605, 1.9269521820570166, 154.0532 }, // 0.0045 { 8.7865150946655142, 1.9775307667441668, 168.2101 }, // 0.0025 { 9.5945017884101773, 1.9718456992078597, 182.1076 }, // 0.0014 { 10.5163141145985240, 1.9504067820201083, 195.5668 }, // 0.0008 { 10.2382465206362470, 2.1608923446870087, 209.0610 }, // 0.0004 { 10.9976060250714000, 2.1536533525688935, 222.5010 }, // 0.0003 }; static const int Coeffs3Base = 6; static const int Coeffs3Count = 10; static const double Coeffs3[ Coeffs3Count ][ 3 ] = { { 3.9888564562781847, 1.5869927184268915, 66.5701 }, // 0.0467 { 4.6986694038145007, 1.8086068597928262, 86.4715 }, // 0.0136 { 5.5995071329337822, 1.8930163360942349, 106.1195 }, // 0.0040 { 6.3627287800257228, 1.9945748322093975, 125.2307 }, // 0.0012 { 7.4299550711428308, 1.9893400572347544, 144.3469 }, // 0.0004 { 8.0667715944075642, 2.0928201458699909, 163.4099 }, // 0.0001 { 8.7469970226288822, 2.1640279784268355, 181.0694 }, // 0.0000 { 10.0823430069835230, 2.0896678025321922, 199.2880 }, // 0.0000 { 10.9222206090489510, 2.1221681162186004, 216.6865 }, // 0.0000 { 21.2017743894772010, 1.1856768080118900, 233.9188 }, // 0.0000 }; const double* Params; int i = 0; if( aIsThird ) { while( i != Coeffs3Count - 1 && Coeffs3[ i ][ 2 ] < att ) { i++; } Params = &Coeffs3[ i ][ 0 ]; att = Coeffs3[ i ][ 2 ]; fltlen = Coeffs3Base + i * 2; } else { while( i != Coeffs2Count - 1 && Coeffs2[ i ][ 2 ] < att ) { i++; } Params = &Coeffs2[ i ][ 0 ]; att = Coeffs2[ i ][ 2 ]; fltlen = Coeffs2Base + i * 2; } return( Params ); } /** * Function shuffles 2 order-2 filter points for SIMD operation. * * @param p Filter table start pointer. * @param pe Filter table end pointer. */ static void shuffle2_2( double* p, double* const pe ) { while( p != pe ) { const double t = p[ 2 ]; p[ 2 ] = p[ 1 ]; p[ 1 ] = t; p += 4; } } /** * Function shuffles 2 order-3 filter points for SIMD operation. * * @param p Filter table start pointer. * @param pe Filter table end pointer. */ static void shuffle2_3( double* p, double* const pe ) { while( p != pe ) { const double t1 = p[ 1 ]; const double t2 = p[ 2 ]; const double t3 = p[ 3 ]; const double t4 = p[ 4 ]; p[ 1 ] = t3; p[ 2 ] = t1; p[ 3 ] = t4; p[ 4 ] = t2; p += 6; } } /** * Function shuffles 2 order-4 filter points for SIMD operation. * * @param p Filter table start pointer. * @param pe Filter table end pointer. */ static void shuffle2_4( double* p, double* const pe ) { while( p != pe ) { const double t1 = p[ 1 ]; const double t2 = p[ 2 ]; const double t3 = p[ 3 ]; const double t4 = p[ 4 ]; const double t5 = p[ 5 ]; const double t6 = p[ 6 ]; p[ 1 ] = t4; p[ 2 ] = t1; p[ 3 ] = t5; p[ 4 ] = t2; p[ 5 ] = t6; p[ 6 ] = t3; p += 8; } } }; /** * @brief Fractional delay filter cache class. * * Class implements cache storage of fractional delay filter banks. */ class CDSPFracDelayFilterBankCache : public R8B_BASECLASS { R8BNOCTOR( CDSPFracDelayFilterBankCache ); friend class CDSPFracDelayFilterBank; public: /** * @return The number of filters present in the cache now. This value can * be monitored for debugging "forgotten" filters. */ static int getObjCount() { R8BSYNC( StateSync ); return( ObjCount ); } /** * Function calculates or returns reference to a previously calculated * (cached) fractional delay filter bank. * * @param aFilterFracs The number of fractional delay positions to sample, * -1 - use default. * @param aElementSize The size of each filter's tap, in "double" values. * @param aInterpPoints The number of points the interpolation is based * on. * @param ReqAtten Required filter attentuation. * @param IsThird "True" if one-third filter is required. * @param IsStatic "True" if a permanent static filter should be returned * that is never removed from the cache until application terminates. */ static CDSPFracDelayFilterBank& getFilterBank( const int aFilterFracs, const int aElementSize, const int aInterpPoints, double ReqAtten, const bool IsThird, const bool IsStatic ) { CDSPFracDelayFilterBank :: roundReqAtten( ReqAtten, IsThird ); R8BSYNC( StateSync ); if( IsStatic ) { CDSPFracDelayFilterBank* CurObj = StaticObjects; while( CurObj != NULL ) { if( CurObj -> InitFilterFracs == aFilterFracs && CurObj -> ElementSize == aElementSize && CurObj -> InterpPoints == aInterpPoints && CurObj -> ReqAtten == ReqAtten && CurObj -> IsThird == IsThird ) { return( *CurObj ); } CurObj = CurObj -> Next; } // Create a new filter bank and build it. CurObj = new CDSPFracDelayFilterBank( aFilterFracs, aElementSize, aInterpPoints, ReqAtten, IsThird ); // Insert the bank at the start of the list. CurObj -> Next = StaticObjects.unkeep(); StaticObjects = CurObj; return( *CurObj ); } CDSPFracDelayFilterBank* PrevObj = NULL; CDSPFracDelayFilterBank* CurObj = Objects; while( CurObj != NULL ) { if( CurObj -> InitFilterFracs == aFilterFracs && CurObj -> ElementSize == aElementSize && CurObj -> InterpPoints == aInterpPoints && CurObj -> ReqAtten == ReqAtten && CurObj -> IsThird == IsThird ) { break; } if( CurObj -> Next == NULL && ObjCount >= R8B_FRACBANK_CACHE_MAX ) { if( CurObj -> RefCount == 0 ) { // Delete the last bank which is not used. PrevObj -> Next = NULL; delete CurObj; ObjCount--; } else { // Move the last bank to the top of the list since it // seems to be in use for a long time. PrevObj -> Next = NULL; CurObj -> Next = Objects.unkeep(); Objects = CurObj; } CurObj = NULL; break; } PrevObj = CurObj; CurObj = CurObj -> Next; } if( CurObj != NULL ) { CurObj -> RefCount++; if( PrevObj == NULL ) { return( *CurObj ); } // Remove the bank from the list temporarily. PrevObj -> Next = CurObj -> Next; } else { // Create a new filter bank (with RefCount == 1) and build it. CurObj = new CDSPFracDelayFilterBank( aFilterFracs, aElementSize, aInterpPoints, ReqAtten, IsThird ); ObjCount++; } // Insert the bank at the start of the list. CurObj -> Next = Objects.unkeep(); Objects = CurObj; return( *CurObj ); } private: static CSyncObject StateSync; ///< Cache state synchronizer. ///< static CPtrKeeper< CDSPFracDelayFilterBank* > Objects; ///< The chain of ///< cached objects. ///< static CPtrKeeper< CDSPFracDelayFilterBank* > StaticObjects; ///< The ///< chain of static objects. ///< static int ObjCount; ///< The number of objects currently preset in the ///< Objects cache. ///< }; // --------------------------------------------------------------------------- // CDSPFracDelayFilterBank PUBLIC // --------------------------------------------------------------------------- inline void CDSPFracDelayFilterBank :: unref() { R8BSYNC( CDSPFracDelayFilterBankCache :: StateSync ); RefCount--; } /** * @param l Number 1. * @param s Number 2. * @param[out] GCD Resulting GCD. * @return "True" if the greatest common denominator of 2 numbers was * found. */ inline bool findGCD( double l, double s, double& GCD ) { int it = 0; while( it < 50 ) { if( s <= 0.0 ) { GCD = l; return( true ); } const double r = l - s; l = s; s = ( r < 0.0 ? -r : r ); it++; } return( false ); } /** * Function evaluates source and destination sample rate ratio and returns * the required input and output stepping. Function returns "false" if * whole stepping cannot be used to perform interpolation using these sample * rates. * * @param SSampleRate Source sample rate. * @param DSampleRate Destination sample rate. * @param[out] ResInStep Resulting input step. * @param[out] ResOutStep Resulting output step. * @return "True" if stepping was acquired. */ inline bool getWholeStepping( const double SSampleRate, const double DSampleRate, int& ResInStep, int& ResOutStep ) { double GCD; if( !findGCD( SSampleRate, DSampleRate, GCD ) || GCD < 1.0 ) { return( false ); } const double InStep0 = SSampleRate / GCD; ResInStep = (int) InStep0; const double OutStep0 = DSampleRate / GCD; ResOutStep = (int) OutStep0; if( InStep0 != ResInStep || OutStep0 != ResOutStep ) { return( false ); } if( ResOutStep > 1500 ) { // Do not allow large output stepping due to low cache // performance of large filter banks. return( false ); } return( true ); } /** * @brief Fractional delay filter bank-based interpolator class. * * Class implements the fractional delay interpolator. This implementation at * first puts the input signal into a ring buffer and then performs * interpolation. The interpolation is performed using sinc-based fractional * delay filters. These filters are contained in a bank, and for higher * precision they are interpolated between adjacent filters. * * To increase sample timing precision, this class uses "resettable counter" * approach. This gives zero overall sample timing error. With the * R8B_FASTTIMING configuration option enabled, the sample timing experiences * a very minor drift. */ class CDSPFracInterpolator : public CDSPProcessor { public: /** * Constructor initalizes the interpolator. It is important to call the * getMaxOutLen() function afterwards to obtain the optimal output buffer * length. * * @param aSrcSampleRate Source sample rate. * @param aDstSampleRate Destination sample rate. * @param ReqAtten Required filter attentuation. * @param IsThird "True" if one-third filter is required. * @param PrevLatency Latency, in samples (any value >=0), which was left * in the output signal by a previous process. This latency will be * consumed completely. */ CDSPFracInterpolator( const double aSrcSampleRate, const double aDstSampleRate, const double ReqAtten, const bool IsThird, const double PrevLatency ) : SrcSampleRate( aSrcSampleRate ) , DstSampleRate( aDstSampleRate ) #if R8B_FASTTIMING , FracStep( aSrcSampleRate / aDstSampleRate ) #endif // R8B_FASTTIMING { R8BASSERT( SrcSampleRate > 0.0 ); R8BASSERT( DstSampleRate > 0.0 ); R8BASSERT( PrevLatency >= 0.0 ); R8BASSERT( BufLenBits >= 5 ); InitFracPos = PrevLatency; Latency = (int) InitFracPos; InitFracPos -= Latency; R8BASSERT( Latency >= 0 ); #if R8B_FLTTEST IsWhole = false; LatencyFrac = 0.0; FilterBank = new CDSPFracDelayFilterBank( -1, 3, 8, ReqAtten, IsThird ); #else // R8B_FLTTEST IsWhole = getWholeStepping( SrcSampleRate, DstSampleRate, InStep, OutStep ); if( IsWhole ) { InitFracPosW = (int) ( InitFracPos * OutStep ); LatencyFrac = InitFracPos - (double) InitFracPosW / OutStep; FilterBank = &CDSPFracDelayFilterBankCache :: getFilterBank( OutStep, 1, 2, ReqAtten, IsThird, false ); } else { LatencyFrac = 0.0; FilterBank = &CDSPFracDelayFilterBankCache :: getFilterBank( -1, 3, 8, ReqAtten, IsThird, true ); } #endif // R8B_FLTTEST FilterLen = FilterBank -> getFilterLen(); fl2 = FilterLen >> 1; fll = fl2 - 1; flo = fll + fl2; flb = BufLen - fll; R8BASSERT(( 1 << BufLenBits ) >= FilterLen * 3 ); static const CConvolveFn FltConvFn0[ 13 ] = { &CDSPFracInterpolator :: convolve0< 6 >, &CDSPFracInterpolator :: convolve0< 8 >, &CDSPFracInterpolator :: convolve0< 10 >, &CDSPFracInterpolator :: convolve0< 12 >, &CDSPFracInterpolator :: convolve0< 14 >, &CDSPFracInterpolator :: convolve0< 16 >, &CDSPFracInterpolator :: convolve0< 18 >, &CDSPFracInterpolator :: convolve0< 20 >, &CDSPFracInterpolator :: convolve0< 22 >, &CDSPFracInterpolator :: convolve0< 24 >, &CDSPFracInterpolator :: convolve0< 26 >, &CDSPFracInterpolator :: convolve0< 28 >, &CDSPFracInterpolator :: convolve0< 30 > }; convfn = ( IsWhole ? FltConvFn0[ fl2 - 3 ] : &CDSPFracInterpolator :: convolve2 ); R8BCONSOLE( "CDSPFracInterpolator: src=%.2f dst=%.2f taps=%i " "fracs=%i whole=%i third=%i step=%.6f\n", SrcSampleRate, DstSampleRate, FilterLen, ( IsWhole ? OutStep : FilterBank -> getFilterFracs() ), (int) IsWhole, (int) IsThird, aSrcSampleRate / aDstSampleRate ); clear(); } virtual ~CDSPFracInterpolator() { #if R8B_FLTTEST delete FilterBank; #else // R8B_FLTTEST FilterBank -> unref(); #endif // R8B_FLTTEST } virtual int getLatency() const { return( 0 ); } virtual double getLatencyFrac() const { return( LatencyFrac ); } virtual int getMaxOutLen( const int MaxInLen ) const { R8BASSERT( MaxInLen >= 0 ); return( (int) ceil( MaxInLen * DstSampleRate / SrcSampleRate ) + 1 ); } virtual void clear() { LatencyLeft = Latency; BufLeft = 0; WritePos = 0; ReadPos = flb; // Set "read" position to account for filter's // latency at zero fractional delay. memset( &Buf[ ReadPos ], 0, ( BufLen - flb ) * sizeof( Buf[ 0 ])); if( IsWhole ) { InPosFracW = InitFracPosW; } else { InPosFrac = InitFracPos; #if !R8B_FASTTIMING InCounter = 0; InPosInt = 0; InPosShift = InitFracPos * DstSampleRate / SrcSampleRate; #endif // !R8B_FASTTIMING } } virtual int process( double* ip, int l, double*& op0 ) { R8BASSERT( l >= 0 ); R8BASSERT( ip != op0 || l == 0 || SrcSampleRate > DstSampleRate ); if( LatencyLeft != 0 ) { if( LatencyLeft >= l ) { LatencyLeft -= l; return( 0 ); } l -= LatencyLeft; ip += LatencyLeft; LatencyLeft = 0; } double* op = op0; while( l > 0 ) { // Add new input samples to both halves of the ring buffer. const int b = min( min( l, BufLen - WritePos ), flb - BufLeft ); double* const wp1 = Buf + WritePos; memcpy( wp1, ip, b * sizeof( wp1[ 0 ])); if( WritePos < flo ) { const int c = min( b, flo - WritePos ); memcpy( wp1 + BufLen, wp1, c * sizeof( wp1[ 0 ])); } ip += b; WritePos = ( WritePos + b ) & BufLenMask; l -= b; BufLeft += b; // Produce as many output samples as possible. op = ( *this.*convfn )( op ); } #if !R8B_FASTTIMING if( !IsWhole && InCounter > 1000 ) { // Reset the interpolation position counter to achieve a // higher sample timing precision. InCounter = 0; InPosInt = 0; InPosShift = InPosFrac * DstSampleRate / SrcSampleRate; } #endif // !R8B_FASTTIMING return( (int) ( op - op0 )); } private: static const int BufLenBits = 8; ///< The length of the ring buffer, ///< expressed as Nth power of 2. This value can be reduced if it is ///< known that only short input buffers will be passed to the ///< interpolator. The minimum value of this parameter is 5, and ///< 1 << BufLenBits should be at least 3 times larger than the ///< FilterLen. However, this condition can be easily met if the input ///< signal is suitably downsampled first before the interpolation is ///< performed. ///< static const int BufLen = 1 << BufLenBits; ///< The length of the ring ///< buffer. The actual length is twice as long to allow "beyond max ///< position" positioning. ///< static const int BufLenMask = BufLen - 1; ///< Mask used for quick buffer ///< position wrapping. ///< int FilterLen; ///< Filter length, in taps. Even value. ///< int fl2; ///< Right-side (half) filter length. ///< int fll; ///< Input latency. ///< int flo; ///< Overrun length. ///< int flb; ///< Initial read position and maximal buffer write length. ///< double Buf[ BufLen + 29 ]; ///< The ring buffer, including overrun ///< protection for maximal filter length. ///< double SrcSampleRate; ///< Source sample rate. ///< double DstSampleRate; ///< Destination sample rate. ///< bool IsWhole; ///< "True" if whole-number stepping is in use. ///< int InStep; ///< Input whole-number stepping. ///< int OutStep; ///< Output whole-number stepping (corresponds to filter bank ///< size). ///< double InitFracPos; ///< Initial fractional position, in samples, in the ///< range [0; 1). ///< int InitFracPosW; ///< Initial fractional position for whole-number ///< stepping. ///< int Latency; ///< Initial latency that should be removed from the input. ///< double LatencyFrac; ///< Left-over fractional latency. ///< int BufLeft; ///< The number of samples left in the buffer to process. ///< int WritePos; ///< The current buffer write position. Incremented together ///< with the BufLeft variable. ///< int ReadPos; ///< The current buffer read position. ///< int LatencyLeft; ///< Input latency left to remove. ///< double InPosFrac; ///< Interpolation position (fractional part). ///< int InPosFracW; ///< Interpolation position (fractional part) for ///< whole-number stepping. Corresponds to the index into the filter ///< bank. ///< CDSPFracDelayFilterBank* FilterBank; ///< Filter bank in use, may be ///< whole-number stepping filter bank or static bank. ///< #if R8B_FASTTIMING double FracStep; ///< Fractional sample timing step. #else // R8B_FASTTIMING int InCounter; ///< Interpolation step counter. ///< int InPosInt; ///< Interpolation position (integer part). ///< double InPosShift; ///< Interpolation position fractional shift. ///< #endif // R8B_FASTTIMING typedef double*( CDSPFracInterpolator :: *CConvolveFn )( double* op ); ///< ///< Convolution function type. ///< CConvolveFn convfn; ///< Convolution function in use. ///< /** * Convolution function for 0th order resampling. * * @param[out] op Output buffer. * @return Advanced "op" value. * @tparam fltlen Filter length, in taps. */ template< int fltlen > double* convolve0( double* op ) { while( BufLeft > fl2 ) { const double* const ftp = &(*FilterBank)[ InPosFracW ]; const double* const rp = Buf + ReadPos; int i; #if defined( R8B_SSE2 ) && !defined( __INTEL_COMPILER ) __m128d s = _mm_setzero_pd(); for( i = 0; i < fltlen; i += 2 ) { const __m128d m = _mm_mul_pd( _mm_load_pd( ftp + i ), _mm_loadu_pd( rp + i )); s = _mm_add_pd( s, m ); } _mm_storel_pd( op, _mm_add_pd( s, _mm_shuffle_pd( s, s, 1 ))); #elif defined( R8B_NEON ) float64x2_t s = vdupq_n_f64( 0.0 ); for( i = 0; i < fltlen; i += 2 ) { s = vmlaq_f64( s, vld1q_f64( ftp + i ), vld1q_f64( rp + i )); } *op = vaddvq_f64( s ); #else // SIMD double s = 0.0; for( i = 0; i < fltlen; i++ ) { s += ftp[ i ] * rp[ i ]; } *op = s; #endif // SIMD op++; InPosFracW += InStep; const int PosIncr = InPosFracW / OutStep; InPosFracW -= PosIncr * OutStep; ReadPos = ( ReadPos + PosIncr ) & BufLenMask; BufLeft -= PosIncr; } return( op ); } /** * Convolution function for 2nd order resampling. * * @param[out] op Output buffer. * @return Advanced "op" value. */ double* convolve2( double* op ) { const CDSPFracDelayFilterBank& fb = *FilterBank; const int fltlen = FilterLen; while( BufLeft > fl2 ) { double x = InPosFrac * fb.getFilterFracs(); const int fti = (int) x; // Function table index. x -= fti; // Coefficient for interpolation between // adjacent fractional delay filters. const double x2d = x * x; const double* ftp = &fb[ fti ]; const double* const rp = Buf + ReadPos; int i; #if defined( R8B_SSE2 ) && defined( R8B_SIMD_ISH ) const __m128d x1 = _mm_set1_pd( x ); const __m128d x2 = _mm_set1_pd( x2d ); __m128d s = _mm_setzero_pd(); for( i = 0; i < fltlen; i += 2 ) { const __m128d ftp2 = _mm_load_pd( ftp + 2 ); const __m128d xx1 = _mm_mul_pd( ftp2, x1 ); const __m128d ftp4 = _mm_load_pd( ftp + 4 ); const __m128d xx2 = _mm_mul_pd( ftp4, x2 ); const __m128d ftp0 = _mm_load_pd( ftp ); ftp += 6; const __m128d rpi = _mm_loadu_pd( rp + i ); const __m128d xxs = _mm_add_pd( ftp0, _mm_add_pd( xx1, xx2 )); s = _mm_add_pd( s, _mm_mul_pd( rpi, xxs )); } _mm_storel_pd( op, _mm_add_pd( s, _mm_shuffle_pd( s, s, 1 ))); #elif defined( R8B_NEON ) && defined( R8B_SIMD_ISH ) const float64x2_t x1 = vdupq_n_f64( x ); const float64x2_t x2 = vdupq_n_f64( x2d ); float64x2_t s = vdupq_n_f64( 0.0 ); for( i = 0; i < fltlen; i += 2 ) { const float64x2_t ftp2 = vld1q_f64( ftp + 2 ); const float64x2_t xx1 = vmulq_f64( ftp2, x1 ); const float64x2_t ftp4 = vld1q_f64( ftp + 4 ); const float64x2_t xx2 = vmulq_f64( ftp4, x2 ); const float64x2_t ftp0 = vld1q_f64( ftp ); ftp += 6; const float64x2_t rpi = vld1q_f64( rp + i ); const float64x2_t xxs = vaddq_f64( ftp0, vaddq_f64( xx1, xx2 )); s = vmlaq_f64( s, rpi, xxs ); } *op = vaddvq_f64( s ); #else // SIMD double s = 0.0; for( i = 0; i < fltlen; i++ ) { s += ( ftp[ 0 ] + ftp[ 1 ] * x + ftp[ 2 ] * x2d ) * rp[ i ]; ftp += 3; } *op = s; #endif // SIMD op++; #if R8B_FASTTIMING InPosFrac += FracStep; const int PosIncr = (int) InPosFrac; InPosFrac -= PosIncr; #else // R8B_FASTTIMING InCounter++; const double NextInPos = ( InCounter + InPosShift ) * SrcSampleRate / DstSampleRate; const int NextInPosInt = (int) NextInPos; const int PosIncr = NextInPosInt - InPosInt; InPosInt = NextInPosInt; InPosFrac = NextInPos - NextInPosInt; #endif // R8B_FASTTIMING ReadPos = ( ReadPos + PosIncr ) & BufLenMask; BufLeft -= PosIncr; } return( op ); } }; // --------------------------------------------------------------------------- } // namespace r8b #endif // R8B_CDSPFRACINTERPOLATOR_INCLUDED