|
- #include "main.h"
- #include "SABuffer.h"
- #include <math.h>
- #include "WinampAttributes.h"
- #include "fft.h"
- extern int _srate;
- #ifdef _M_IX86
- __inline static int lrint(float flt)
- {
- int intgr;
- _asm
- {
- fld flt
- fistp intgr
- }
- return intgr;
- }
- #else
- __inline static int lrint(float flt)
- {
- return (int)flt;
- }
- #endif
- // quantizes to 23 bits - use appropriately
- inline static float fastmin(float x, const float b)
- {
- x = b - x;
- x += (float)fabs(x);
- x *= 0.5f;
- x = b - x;
- return x;
- }
- #define FASTMIN(x,b) { x = b - x; x += (float)fabs(x); x *= 0.5f; x = b - x; }
- inline static float fastclip(float x, const float a, const float b)
- {
- float x1 = (float)fabs(x-a);
- float x2 = (float)fabs(x-b);
- x = x1 + (a+b);
- x -= x2;
- x *= 0.5f;
- return (x);
- }
- void makeOscData(char *tempdata, char *data_buf, int little_block, int channels, int bits)
- {
- float dd = little_block/75.0f;
- int x,c;
- int stride=bits/8; // number of bytes between samples
- // we're calculating using only the most significant byte,
- // because we only end up with 6 bit data anyway
- // if you want full resolution, check out CVS tag BETA_2005_1122_182830, file: vis.c
- char *ptr, *sbuf = data_buf;
- for (x = 0; x < 75; x ++)
- {
- float val=0;
- int index =(int)((float)x * dd); // calculate the nearest sample for this point, interpolation is too expensive for this use
- ptr=&sbuf[index*stride*channels+stride-1]; // find first sample, and offset for little endian
- for (c=0;c<channels;c++)
- {
- val += (float)*ptr / 8.0f; // we want our final value to be -32 to 32
- ptr+=stride; // jump to the next sample (channels are interleaved)
- }
- tempdata[x] = (char)lrint(val / (float)channels); // average the channels
- }
- }
- inline double fast_exp2(const double val)
- {
- int e;
- double ret;
- if (val >= 0)
- {
- e = int (val);
- ret = val - (e - 1);
- ((*(1 + (int *) &ret)) &= ~(2047 << 20)) += (e + 1023) << 20;
- }
- else
- {
- e = int (val + 1023);
- ret = val - (e - 1024);
- ((*(1 + (int *) &ret)) &= ~(2047 << 20)) += e << 20;
- }
- return (ret);
- }
- // ~6 clocks on Pentium M vs. ~24 for single precision sqrtf
- #if !defined(_WIN64)
- static inline float squareroot_sse_11bits(float x)
- {
- float z;
- _asm
- {
- rsqrtss xmm0, x
- rcpss xmm0, xmm0
- movss z, xmm0 // z ~= sqrt(x) to 0.038%
- }
- return z;
- }
- static inline int floor_int(double x)
- {
- int i;
- static const float round_toward_m_i = -0.5f;
- __asm
- {
- fld x
- fadd st, st(0)
- fadd round_toward_m_i
- fistp i
- sar i, 1
- }
- return (i);
- }
- #endif
- /*
- static inline float hermite(float x, float y0, float y1, float y2, float y3)
- {
- // 4-point, 3rd-order Hermite (x-form)
- float c0 = y1;
- float c1 = 0.5f * (y2 - y0);
- float c2 = y0 - 2.5f * y1 + 2.f * y2 - 0.5f * y3;
- float c3 = 1.5f * (y1 - y2) + 0.5f * (y3 - y0);
- return ((c3 * x + c2) * x + c1) * x + c0;
- }
- */
- /*
- static const float c_half = 0.5f;
- __declspec(naked) static float hermite(float frac_pos, const float* pntr)
- {
- __asm
- {
- push ecx;
- mov ecx, dword ptr[esp + 12]; //////////////////////////////////////////////////////////////////////////////////////////////////
- add ecx, 0x04; // ST(0) ST(1) ST(2) ST(3) ST(4) ST(5) ST(6) ST(7)
- fld dword ptr [ecx+4]; // x1
- fsub dword ptr [ecx-4]; // x1-xm1
- fld dword ptr [ecx]; // x0 x1-xm1
- fsub dword ptr [ecx+4]; // v x1-xm1
- fld dword ptr [ecx+8]; // x2 v x1-xm1
- fsub dword ptr [ecx]; // x2-x0 v x1-xm1
- fxch st(2); // x1-m1 v x2-x0
- fmul c_half; // c v x2-x0
- fxch st(2); // x2-x0 v c
- fmul c_half; // 0.5*(x2-x0) v c
- fxch st(2); // c v 0.5*(x2-x0)
- fst st(3); // c v 0.5*(x2-x0) c
- fadd st(0), st(1); // w v 0.5*(x2-x0) c
- fxch st(2); // 0.5*(x2-x0) v w c
- faddp st(1), st(0); // v+.5(x2-x0) w c
- fadd st(0), st(1); // a w c
- fadd st(1), st(0); // a b_neg c
- fmul dword ptr [esp+8]; // a*frac b_neg c
- fsubrp st(1), st(0); // a*f-b c
- fmul dword ptr [esp+8]; // (a*f-b)*f c
- faddp st(1), st(0); // res-x0/f
- fmul dword ptr [esp+8]; // res-x0
- fadd dword ptr [ecx]; // res
- pop ecx;
- ret;
- }
- }
- */
- inline float hermite(float x, float y0, float y1, float y2, float y3)
- {
- // 4-point, 3rd-order Hermite (x-form)
- float c0 = y1;
- float c1 = 0.5f * (y2 - y0);
- float c3 = 1.5f * (y1 - y2) + 0.5f * (y3 - y0);
- float c2 = y0 - y1 + c1 - c3;
- return ((c3 * x + c2) * x + c1) * x + c0;
- }
- static inline float fpow2(const float y)
- {
- union
- {
- float f;
- int i;
- } c;
- int integer = lrint(floor(y));
- /* cut: because we guarantee y>=0
- if(y < 0)
- integer = integer-1;
- */
- float frac = y - (float)integer;
- c.i = (integer+127) << 23;
- c.f *= 0.33977f*frac*frac + (1.0f-0.33977f)*frac + 1.0f;
- return c.f;
- }
- //#define SAPOW(x) (powf(2.f, (float)(x)/12.f))
- #define SAPOW(x) (fpow2((float)(x)/12.f))
- //#define WARP(x) ((powf(1.1f, (float)(x)/12.f) - 1.) * bla)
- #define WARP(x) ((SAPOW(x) - 1.f) * bla)
- void makeSpecData(unsigned char *tempdata, float *wavetrum)
- {
- //WARP(75);
- float bla = (255.f/SAPOW(75.f));
- fft_9(wavetrum);
- float spec_scale=0.5;
- if (config_replaygain)
- { // benski> i'm sure there's some math identity we can use to optimize this.
- spec_scale/=pow(10.0f, config_replaygain_non_rg_gain.GetFloat() / 20.0f);
- }
- for (int i=0;i<256;i++)
- {
- //int lookup=2*i;
- float sinT = wavetrum[2*i];
- float cosT = wavetrum[2*i+1];
- wavetrum[i] = sqrt(sinT*sinT+cosT*cosT)*spec_scale;
- }
- float next = WARP(0)+1 ;
- for (int x = 0; x < 75; x ++)
- {
- //float prev = 1.+(pow(2.,(float)x/12.) -1.) * bla;
- float binF = next;
- next = WARP(x+1) +1;
- float thisValue = 0;
- int bin = lrint(floor(binF));
- int end = lrint(floor(next));
- end = min(end, 255);
- float mult = ((float)(bin+1))-binF;
- bool herm=true;
- do
- {
- if (bin == end)
- {
- mult = (next-binF);
- herm=true;
- }
- if (herm)
- {
- float C=0, D=0;
- if (bin<255)
- {
- C=wavetrum[bin+1];
- if (bin<254)
- D=wavetrum[bin+2];
- }
- //float samples[4] = { wavetrum[lookupA], wavetrum[lookupB], wavetrum[lookupC], wavetrum[lookupD] };
- //thisValue += hermite(binF-bin, samples) * mult;
- thisValue += hermite(binF-bin, wavetrum[bin-1], wavetrum[bin], C, D) * mult;
- }
- else
- {
- thisValue += wavetrum[bin];
- }
- herm=false;
- bin++;
- binF=(float)bin;
- }
- while (bin <= end);
- tempdata[x]=lrint(fastmin(thisValue, 255.f));
- }
- }
- ////////////////////////////////
- SABuffer saBuffer;
- void sa_addpcmdata(void *_data_buf, int numChannels, int numBits, int ts)
- {
- char *data_buf = reinterpret_cast<char *>(_data_buf);
- char tempdata[75*2] = {0};
- __declspec(align(16)) float wavetrum[512];
- //extern int sa_curmode;
- int vis_Csa=sa_override ? 3 : sa_curmode;
- switch (vis_Csa)
- {
- case 4:
- tempdata[0] = 0;
- tempdata[1] = 0;
- sa_add(tempdata,ts,4);
- return;
- case 2:
- makeOscData(tempdata,data_buf,576,numChannels, numBits);
- sa_add(tempdata,ts,2);
- return ;
- case 3:
- makeOscData(tempdata+75,data_buf,576,numChannels, numBits);
- // fall through!
- case 1:
- calcVuData((unsigned char*)tempdata, data_buf, numChannels, numBits);
- vu_add(tempdata, ts);
- break;
- }
- bool done=false;
- size_t samples=576;
- while (samples)
- {
- unsigned int copied = saBuffer.AddToBuffer(data_buf, numChannels, numBits, ts, (unsigned int) samples);
- samples-=copied;
- data_buf+=(copied*(numBits/8)*numChannels);
- if (saBuffer.Full())
- {
- saBuffer.WindowToFFTBuffer(wavetrum);
- if (!done)
- {
- if (vis_Csa == 3)
- {
- makeSpecData((unsigned char*)tempdata, wavetrum);
- sa_add(tempdata, ts, 0x80000003);
- }
- else if (vis_Csa == 1)
- {
- makeSpecData((unsigned char*)tempdata, wavetrum);
- sa_add(tempdata, ts, 1);
- }
- }
- //done=true;
- saBuffer.CopyHalf();
- ts+=MulDiv(SABUFFER_WINDOW_INCREMENT,1000,_srate);
- }
- }
- }
|