Reverb.cpp 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040
  1. /*
  2. * Reverb.cpp
  3. * ----------
  4. * Purpose: Mixing code for reverb.
  5. * Notes : Ugh... This should really be removed at some point.
  6. * Authors: Olivier Lapicque
  7. * OpenMPT Devs
  8. * The OpenMPT source code is released under the BSD license. Read LICENSE for more details.
  9. */
  10. #include "stdafx.h"
  11. #ifndef NO_REVERB
  12. #include "Reverb.h"
  13. #include "../soundlib/MixerLoops.h"
  14. #include "mpt/base/numbers.hpp"
  15. #if defined(MPT_ENABLE_ARCH_INTRINSICS_SSE2)
  16. #include <emmintrin.h>
  17. #endif
  18. #endif // NO_REVERB
  19. OPENMPT_NAMESPACE_BEGIN
  20. #ifndef NO_REVERB
  21. #if defined(MPT_ENABLE_ARCH_INTRINSICS_SSE2)
  22. // Load two 32-bit values
  23. static MPT_FORCEINLINE __m128i Load64SSE(const int32 *x) { return _mm_loadl_epi64(reinterpret_cast<const __m128i *>(x)); }
  24. // Load four 16-bit values
  25. static MPT_FORCEINLINE __m128i Load64SSE(const LR16 (&x)[2]) { return _mm_loadl_epi64(&reinterpret_cast<const __m128i &>(x)); }
  26. // Store two 32-bit or four 16-bit values from register
  27. static MPT_FORCEINLINE void Store64SSE(int32 *dst, __m128i src) { return _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), src); }
  28. static MPT_FORCEINLINE void Store64SSE(LR16 (&dst)[2], __m128i src) { return _mm_storel_epi64(&reinterpret_cast<__m128i &>(dst), src); }
  29. #endif
  30. CReverb::CReverb()
  31. {
  32. // Reverb mix buffers
  33. MemsetZero(g_RefDelay);
  34. MemsetZero(g_LateReverb);
  35. }
  36. static int32 OnePoleLowPassCoef(int32 scale, float g, float F_c, float F_s)
  37. {
  38. if(g > 0.999999f) return 0;
  39. g *= g;
  40. double scale_over_1mg = scale / (1.0 - g);
  41. double cosw = std::cos((2.0 * mpt::numbers::pi) * F_c / F_s);
  42. return mpt::saturate_round<int32>((1.0 - (std::sqrt((g + g) * (1.0 - cosw) - g * g * (1.0 - cosw * cosw)) + g * cosw)) * scale_over_1mg);
  43. }
  44. static float mBToLinear(int32 value_mB)
  45. {
  46. if(!value_mB) return 1;
  47. if(value_mB <= -100000) return 0;
  48. const double val = value_mB * 3.321928094887362304 / (100.0 * 20.0); // log2(10)/(100*20)
  49. return static_cast<float>(std::pow(2.0, val - static_cast<int32>(0.5 + val)));
  50. }
  51. static int32 mBToLinear(int32 scale, int32 value_mB)
  52. {
  53. return mpt::saturate_round<int32>(mBToLinear(value_mB) * scale);
  54. }
  55. static constexpr std::pair<SNDMIX_REVERB_PROPERTIES, const char *> ReverbPresets[NUM_REVERBTYPES] =
  56. {
  57. // Examples simulating General MIDI 2'musical' reverb presets
  58. // Name (Decay time) Description
  59. // Plate (1.3s) A plate reverb simulation.
  60. {{ -1000, -200, 1.30f,0.90f, 0,0.002f, 0,0.010f,100.0f, 75.0f }, "GM Plate"},
  61. // Small Room (1.1s) A small size room with a length of 5m or so.
  62. {{ -1000, -600, 1.10f,0.83f, -400,0.005f, 500,0.010f,100.0f,100.0f }, "GM Small Room"},
  63. // Medium Room (1.3s) A medium size room with a length of 10m or so.
  64. {{ -1000, -600, 1.30f,0.83f, -1000,0.010f, -200,0.020f,100.0f,100.0f }, "GM Medium Room"},
  65. // Large Room (1.5s) A large size room suitable for live performances.
  66. {{ -1000, -600, 1.50f,0.83f, -1600,0.020f, -1000,0.040f,100.0f,100.0f }, "GM Large Room"},
  67. // Medium Hall (1.8s) A medium size concert hall.
  68. {{ -1000, -600, 1.80f,0.70f, -1300,0.015f, -800,0.030f,100.0f,100.0f }, "GM Medium Hall"},
  69. // Large Hall (1.8s) A large size concert hall suitable for a full orchestra.
  70. {{ -1000, -600, 1.80f,0.70f, -2000,0.030f, -1400,0.060f,100.0f,100.0f }, "GM Large Hall"},
  71. {{ -1000, -100, 1.49f,0.83f, -2602,0.007f, 200,0.011f,100.0f,100.0f }, "Generic"},
  72. {{ -1000,-6000, 0.17f,0.10f, -1204,0.001f, 207,0.002f,100.0f,100.0f }, "Padded Cell"},
  73. {{ -1000, -454, 0.40f,0.83f, -1646,0.002f, 53,0.003f,100.0f,100.0f }, "Room"},
  74. {{ -1000,-1200, 1.49f,0.54f, -370,0.007f, 1030,0.011f,100.0f, 60.0f }, "Bathroom"},
  75. {{ -1000,-6000, 0.50f,0.10f, -1376,0.003f, -1104,0.004f,100.0f,100.0f }, "Living Room"},
  76. {{ -1000, -300, 2.31f,0.64f, -711,0.012f, 83,0.017f,100.0f,100.0f }, "Stone Room"},
  77. {{ -1000, -476, 4.32f,0.59f, -789,0.020f, -289,0.030f,100.0f,100.0f }, "Auditorium"},
  78. {{ -1000, -500, 3.92f,0.70f, -1230,0.020f, -2,0.029f,100.0f,100.0f }, "Concert Hall"},
  79. {{ -1000, 0, 2.91f,1.30f, -602,0.015f, -302,0.022f,100.0f,100.0f }, "Cave"},
  80. {{ -1000, -698, 7.24f,0.33f, -1166,0.020f, 16,0.030f,100.0f,100.0f }, "Arena"},
  81. {{ -1000,-1000,10.05f,0.23f, -602,0.020f, 198,0.030f,100.0f,100.0f }, "Hangar"},
  82. {{ -1000,-4000, 0.30f,0.10f, -1831,0.002f, -1630,0.030f,100.0f,100.0f }, "Carpeted Hallway"},
  83. {{ -1000, -300, 1.49f,0.59f, -1219,0.007f, 441,0.011f,100.0f,100.0f }, "Hallway"},
  84. {{ -1000, -237, 2.70f,0.79f, -1214,0.013f, 395,0.020f,100.0f,100.0f }, "Stone Corridor"},
  85. {{ -1000, -270, 1.49f,0.86f, -1204,0.007f, -4,0.011f,100.0f,100.0f }, "Alley"},
  86. {{ -1000,-3300, 1.49f,0.54f, -2560,0.162f, -613,0.088f, 79.0f,100.0f }, "Forest"},
  87. {{ -1000, -800, 1.49f,0.67f, -2273,0.007f, -2217,0.011f, 50.0f,100.0f }, "City"},
  88. {{ -1000,-2500, 1.49f,0.21f, -2780,0.300f, -2014,0.100f, 27.0f,100.0f }, "Mountains"},
  89. {{ -1000,-1000, 1.49f,0.83f,-10000,0.061f, 500,0.025f,100.0f,100.0f }, "Quarry"},
  90. {{ -1000,-2000, 1.49f,0.50f, -2466,0.179f, -2514,0.100f, 21.0f,100.0f }, "Plain"},
  91. {{ -1000, 0, 1.65f,1.50f, -1363,0.008f, -1153,0.012f,100.0f,100.0f }, "Parking Lot"},
  92. {{ -1000,-1000, 2.81f,0.14f, 429,0.014f, 648,0.021f, 80.0f, 60.0f }, "Sewer Pipe"},
  93. {{ -1000,-4000, 1.49f,0.10f, -449,0.007f, 1700,0.011f,100.0f,100.0f }, "Underwater"},
  94. };
  95. mpt::ustring GetReverbPresetName(uint32 preset)
  96. {
  97. return (preset < NUM_REVERBTYPES) ? mpt::ToUnicode(mpt::Charset::ASCII, ReverbPresets[preset].second) : mpt::ustring{};
  98. }
  99. const SNDMIX_REVERB_PROPERTIES *GetReverbPreset(uint32 preset)
  100. {
  101. return (preset < NUM_REVERBTYPES) ? &ReverbPresets[preset].first : nullptr;
  102. }
  103. //////////////////////////////////////////////////////////////////////////
  104. //
  105. // I3DL2 environmental reverb support
  106. //
  107. struct REFLECTIONPRESET
  108. {
  109. int32 lDelayFactor;
  110. int16 sGainLL, sGainRR, sGainLR, sGainRL;
  111. };
  112. const REFLECTIONPRESET gReflectionsPreset[ENVIRONMENT_NUMREFLECTIONS] =
  113. {
  114. // %Delay, ll, rr, lr, rl
  115. {0, 9830, 6554, 0, 0},
  116. {10, 6554, 13107, 0, 0},
  117. {24, -9830, 13107, 0, 0},
  118. {36, 13107, -6554, 0, 0},
  119. {54, 16384, 16384, -1638, -1638},
  120. {61, -13107, 8192, -328, -328},
  121. {73, -11468, -11468, -3277, 3277},
  122. {87, 13107, -9830, 4916, -4916}
  123. };
  124. ////////////////////////////////////////////////////////////////////////////////////
  125. //
  126. // Implementation
  127. //
  128. MPT_FORCEINLINE int32 ftol(float f) { return static_cast<int32>(f); }
  129. static void I3dl2_to_Generic(
  130. const SNDMIX_REVERB_PROPERTIES *pReverb,
  131. EnvironmentReverb *pRvb,
  132. float flOutputFreq,
  133. int32 lMinRefDelay,
  134. int32 lMaxRefDelay,
  135. int32 lMinRvbDelay,
  136. int32 lMaxRvbDelay,
  137. int32 lTankLength)
  138. {
  139. float flDelayFactor, flDelayFactorHF, flDecayTimeHF;
  140. int32 lDensity, lTailDiffusion;
  141. // Common parameters
  142. pRvb->ReverbLevel = pReverb->lReverb;
  143. pRvb->ReflectionsLevel = pReverb->lReflections;
  144. pRvb->RoomHF = pReverb->lRoomHF;
  145. // HACK: Somewhat normalize the reverb output level
  146. int32 lMaxLevel = (pRvb->ReverbLevel > pRvb->ReflectionsLevel) ? pRvb->ReverbLevel : pRvb->ReflectionsLevel;
  147. if (lMaxLevel < -600)
  148. {
  149. lMaxLevel += 600;
  150. pRvb->ReverbLevel -= lMaxLevel;
  151. pRvb->ReflectionsLevel -= lMaxLevel;
  152. }
  153. // Pre-Diffusion factor (for both reflections and late reverb)
  154. lDensity = 8192 + ftol(79.31f * pReverb->flDensity);
  155. pRvb->PreDiffusion = lDensity;
  156. // Late reverb diffusion
  157. lTailDiffusion = ftol((0.15f + pReverb->flDiffusion * (0.36f*0.01f)) * 32767.0f);
  158. if (lTailDiffusion > 0x7f00) lTailDiffusion = 0x7f00;
  159. pRvb->TankDiffusion = lTailDiffusion;
  160. // Verify reflections and reverb delay parameters
  161. float flRefDelay = pReverb->flReflectionsDelay;
  162. if (flRefDelay > 0.100f) flRefDelay = 0.100f;
  163. int32 lReverbDelay = ftol(pReverb->flReverbDelay * flOutputFreq);
  164. int32 lReflectionsDelay = ftol(flRefDelay * flOutputFreq);
  165. int32 lReverbDecayTime = ftol(pReverb->flDecayTime * flOutputFreq);
  166. if (lReflectionsDelay < lMinRefDelay)
  167. {
  168. lReverbDelay -= (lMinRefDelay - lReflectionsDelay);
  169. lReflectionsDelay = lMinRefDelay;
  170. }
  171. if (lReflectionsDelay > lMaxRefDelay)
  172. {
  173. lReverbDelay += (lReflectionsDelay - lMaxRefDelay);
  174. lReflectionsDelay = lMaxRefDelay;
  175. }
  176. // Adjust decay time when adjusting reverb delay
  177. if (lReverbDelay < lMinRvbDelay)
  178. {
  179. lReverbDecayTime -= (lMinRvbDelay - lReverbDelay);
  180. lReverbDelay = lMinRvbDelay;
  181. }
  182. if (lReverbDelay > lMaxRvbDelay)
  183. {
  184. lReverbDecayTime += (lReverbDelay - lMaxRvbDelay);
  185. lReverbDelay = lMaxRvbDelay;
  186. }
  187. pRvb->ReverbDelay = lReverbDelay;
  188. pRvb->ReverbDecaySamples = lReverbDecayTime;
  189. // Setup individual reflections delay and gains
  190. for (uint32 iRef=0; iRef<ENVIRONMENT_NUMREFLECTIONS; iRef++)
  191. {
  192. EnvironmentReflection &ref = pRvb->Reflections[iRef];
  193. ref.Delay = lReflectionsDelay + (gReflectionsPreset[iRef].lDelayFactor * lReverbDelay + 50)/100;
  194. ref.GainLL = gReflectionsPreset[iRef].sGainLL;
  195. ref.GainRL = gReflectionsPreset[iRef].sGainRL;
  196. ref.GainLR = gReflectionsPreset[iRef].sGainLR;
  197. ref.GainRR = gReflectionsPreset[iRef].sGainRR;
  198. }
  199. // Late reverb decay time
  200. if (lTankLength < 10) lTankLength = 10;
  201. flDelayFactor = (lReverbDecayTime <= lTankLength) ? 1.0f : ((float)lTankLength / (float)lReverbDecayTime);
  202. pRvb->ReverbDecay = ftol(std::pow(0.001f, flDelayFactor) * 32768.0f);
  203. // Late Reverb Decay HF
  204. flDecayTimeHF = (float)lReverbDecayTime * pReverb->flDecayHFRatio;
  205. flDelayFactorHF = (flDecayTimeHF <= (float)lTankLength) ? 1.0f : ((float)lTankLength / flDecayTimeHF);
  206. pRvb->flReverbDamping = std::pow(0.001f, flDelayFactorHF);
  207. }
  208. void CReverb::Shutdown(MixSampleInt &gnRvbROfsVol, MixSampleInt &gnRvbLOfsVol)
  209. {
  210. gnReverbSend = false;
  211. gnRvbLOfsVol = 0;
  212. gnRvbROfsVol = 0;
  213. // Clear out all reverb state
  214. g_bLastInPresent = false;
  215. g_bLastOutPresent = false;
  216. g_nLastRvbIn_xl = g_nLastRvbIn_xr = 0;
  217. g_nLastRvbIn_yl = g_nLastRvbIn_yr = 0;
  218. g_nLastRvbOut_xl = g_nLastRvbOut_xr = 0;
  219. MemsetZero(gnDCRRvb_X1);
  220. MemsetZero(gnDCRRvb_Y1);
  221. // Zero internal buffers
  222. MemsetZero(g_LateReverb.Diffusion1);
  223. MemsetZero(g_LateReverb.Diffusion2);
  224. MemsetZero(g_LateReverb.Delay1);
  225. MemsetZero(g_LateReverb.Delay2);
  226. MemsetZero(g_RefDelay.RefDelayBuffer);
  227. MemsetZero(g_RefDelay.PreDifBuffer);
  228. MemsetZero(g_RefDelay.RefOut);
  229. }
  230. void CReverb::Initialize(bool bReset, MixSampleInt &gnRvbROfsVol, MixSampleInt &gnRvbLOfsVol, uint32 MixingFreq)
  231. {
  232. if (m_Settings.m_nReverbType >= NUM_REVERBTYPES) m_Settings.m_nReverbType = 0;
  233. const SNDMIX_REVERB_PROPERTIES *rvbPreset = &ReverbPresets[m_Settings.m_nReverbType].first;
  234. if ((rvbPreset != m_currentPreset) || (bReset))
  235. {
  236. // Reverb output frequency is half of the dry output rate
  237. float flOutputFrequency = (float)MixingFreq;
  238. EnvironmentReverb rvb;
  239. // Reset reverb parameters
  240. m_currentPreset = rvbPreset;
  241. I3dl2_to_Generic(rvbPreset, &rvb, flOutputFrequency,
  242. RVBMINREFDELAY, RVBMAXREFDELAY,
  243. RVBMINRVBDELAY, RVBMAXRVBDELAY,
  244. ( RVBDIF1L_LEN + RVBDIF1R_LEN
  245. + RVBDIF2L_LEN + RVBDIF2R_LEN
  246. + RVBDLY1L_LEN + RVBDLY1R_LEN
  247. + RVBDLY2L_LEN + RVBDLY2R_LEN) / 2);
  248. // Store reverb decay time (in samples) for reverb auto-shutdown
  249. gnReverbDecaySamples = rvb.ReverbDecaySamples;
  250. // Room attenuation at high frequencies
  251. int32 nRoomLP;
  252. nRoomLP = OnePoleLowPassCoef(32768, mBToLinear(rvb.RoomHF), 5000, flOutputFrequency);
  253. g_RefDelay.nCoeffs.c.l = (int16)nRoomLP;
  254. g_RefDelay.nCoeffs.c.r = (int16)nRoomLP;
  255. // Pre-Diffusion factor (for both reflections and late reverb)
  256. g_RefDelay.nPreDifCoeffs.c.l = (int16)(rvb.PreDiffusion*2);
  257. g_RefDelay.nPreDifCoeffs.c.r = (int16)(rvb.PreDiffusion*2);
  258. // Setup individual reflections delay and gains
  259. for (uint32 iRef=0; iRef<8; iRef++)
  260. {
  261. SWRvbReflection &ref = g_RefDelay.Reflections[iRef];
  262. ref.DelayDest = rvb.Reflections[iRef].Delay;
  263. ref.Delay = ref.DelayDest;
  264. ref.Gains[0].c.l = rvb.Reflections[iRef].GainLL;
  265. ref.Gains[0].c.r = rvb.Reflections[iRef].GainRL;
  266. ref.Gains[1].c.l = rvb.Reflections[iRef].GainLR;
  267. ref.Gains[1].c.r = rvb.Reflections[iRef].GainRR;
  268. }
  269. g_LateReverb.nReverbDelay = rvb.ReverbDelay;
  270. // Reflections Master Gain
  271. uint32 lReflectionsGain = 0;
  272. if (rvb.ReflectionsLevel > -9000)
  273. {
  274. lReflectionsGain = mBToLinear(32768, rvb.ReflectionsLevel);
  275. }
  276. g_RefDelay.lMasterGain = lReflectionsGain;
  277. // Late reverb master gain
  278. uint32 lReverbGain = 0;
  279. if (rvb.ReverbLevel > -9000)
  280. {
  281. lReverbGain = mBToLinear(32768, rvb.ReverbLevel);
  282. }
  283. g_LateReverb.lMasterGain = lReverbGain;
  284. // Late reverb diffusion
  285. uint32 nTailDiffusion = rvb.TankDiffusion;
  286. if (nTailDiffusion > 0x7f00) nTailDiffusion = 0x7f00;
  287. g_LateReverb.nDifCoeffs[0].c.l = (int16)nTailDiffusion;
  288. g_LateReverb.nDifCoeffs[0].c.r = (int16)nTailDiffusion;
  289. g_LateReverb.nDifCoeffs[1].c.l = (int16)nTailDiffusion;
  290. g_LateReverb.nDifCoeffs[1].c.r = (int16)nTailDiffusion;
  291. g_LateReverb.Dif2InGains[0].c.l = 0x7000;
  292. g_LateReverb.Dif2InGains[0].c.r = 0x1000;
  293. g_LateReverb.Dif2InGains[1].c.l = 0x1000;
  294. g_LateReverb.Dif2InGains[1].c.r = 0x7000;
  295. // Late reverb decay time
  296. int32 nReverbDecay = rvb.ReverbDecay;
  297. Limit(nReverbDecay, 0, 0x7ff0);
  298. g_LateReverb.nDecayDC[0].c.l = (int16)nReverbDecay;
  299. g_LateReverb.nDecayDC[0].c.r = 0;
  300. g_LateReverb.nDecayDC[1].c.l = 0;
  301. g_LateReverb.nDecayDC[1].c.r = (int16)nReverbDecay;
  302. // Late Reverb Decay HF
  303. float fReverbDamping = rvb.flReverbDamping * rvb.flReverbDamping;
  304. int32 nDampingLowPass;
  305. nDampingLowPass = OnePoleLowPassCoef(32768, fReverbDamping, 5000, flOutputFrequency);
  306. Limit(nDampingLowPass, 0x100, 0x7f00);
  307. g_LateReverb.nDecayLP[0].c.l = (int16)nDampingLowPass;
  308. g_LateReverb.nDecayLP[0].c.r = 0;
  309. g_LateReverb.nDecayLP[1].c.l = 0;
  310. g_LateReverb.nDecayLP[1].c.r = (int16)nDampingLowPass;
  311. }
  312. if (bReset)
  313. {
  314. gnReverbSamples = 0;
  315. Shutdown(gnRvbROfsVol, gnRvbLOfsVol);
  316. }
  317. // Wait at least 5 seconds before shutting down the reverb
  318. if (gnReverbDecaySamples < MixingFreq*5)
  319. {
  320. gnReverbDecaySamples = MixingFreq*5;
  321. }
  322. }
  323. void CReverb::TouchReverbSendBuffer(MixSampleInt *MixReverbBuffer, MixSampleInt &gnRvbROfsVol, MixSampleInt &gnRvbLOfsVol, uint32 nSamples)
  324. {
  325. if(!gnReverbSend)
  326. { // and we did not clear the buffer yet, do it now because we will get new data
  327. StereoFill(MixReverbBuffer, nSamples, gnRvbROfsVol, gnRvbLOfsVol);
  328. }
  329. gnReverbSend = true; // we will have to process reverb
  330. }
  331. // Reverb
  332. void CReverb::Process(MixSampleInt *MixSoundBuffer, MixSampleInt *MixReverbBuffer, MixSampleInt &gnRvbROfsVol, MixSampleInt &gnRvbLOfsVol, uint32 nSamples)
  333. {
  334. if((!gnReverbSend) && (!gnReverbSamples))
  335. { // no data is sent to reverb and reverb decayed completely
  336. return;
  337. }
  338. if(!gnReverbSend)
  339. { // no input data in MixReverbBuffer, so the buffer got not cleared in TouchReverbSendBuffer(), do it now for decay
  340. StereoFill(MixReverbBuffer, nSamples, gnRvbROfsVol, gnRvbLOfsVol);
  341. }
  342. uint32 nIn, nOut;
  343. // Dynamically adjust reverb master gains
  344. int32 lMasterGain;
  345. lMasterGain = ((g_RefDelay.lMasterGain * m_Settings.m_nReverbDepth) >> 4);
  346. if (lMasterGain > 0x7fff) lMasterGain = 0x7fff;
  347. g_RefDelay.ReflectionsGain.c.l = (int16)lMasterGain;
  348. g_RefDelay.ReflectionsGain.c.r = (int16)lMasterGain;
  349. lMasterGain = ((g_LateReverb.lMasterGain * m_Settings.m_nReverbDepth) >> 4);
  350. if (lMasterGain > 0x10000) lMasterGain = 0x10000;
  351. g_LateReverb.RvbOutGains[0].c.l = (int16)((lMasterGain+0x7f) >> 3); // l->l
  352. g_LateReverb.RvbOutGains[0].c.r = (int16)((lMasterGain+0xff) >> 4); // r->l
  353. g_LateReverb.RvbOutGains[1].c.l = (int16)((lMasterGain+0xff) >> 4); // l->r
  354. g_LateReverb.RvbOutGains[1].c.r = (int16)((lMasterGain+0x7f) >> 3); // r->r
  355. // Process Dry/Wet Mix
  356. int32 lMaxRvbGain = (g_RefDelay.lMasterGain > g_LateReverb.lMasterGain) ? g_RefDelay.lMasterGain : g_LateReverb.lMasterGain;
  357. if (lMaxRvbGain > 32768) lMaxRvbGain = 32768;
  358. int32 lDryVol = (36 - m_Settings.m_nReverbDepth)>>1;
  359. if (lDryVol < 8) lDryVol = 8;
  360. if (lDryVol > 16) lDryVol = 16;
  361. lDryVol = 16 - (((16-lDryVol) * lMaxRvbGain) >> 15);
  362. ReverbDryMix(MixSoundBuffer, MixReverbBuffer, lDryVol, nSamples);
  363. // Downsample 2x + 1st stage of lowpass filter
  364. nIn = ReverbProcessPreFiltering1x(MixReverbBuffer, nSamples);
  365. nOut = nIn;
  366. // Main reverb processing: split into small chunks (needed for short reverb delays)
  367. // Reverb Input + Low-Pass stage #2 + Pre-diffusion
  368. if (nIn > 0) ProcessPreDelay(&g_RefDelay, MixReverbBuffer, nIn);
  369. // Process Reverb Reflections and Late Reverberation
  370. int32 *pRvbOut = MixReverbBuffer;
  371. uint32 nRvbSamples = nOut;
  372. while (nRvbSamples > 0)
  373. {
  374. uint32 nPosRef = g_RefDelay.nRefOutPos & SNDMIX_REVERB_DELAY_MASK;
  375. uint32 nPosRvb = (nPosRef - g_LateReverb.nReverbDelay) & SNDMIX_REVERB_DELAY_MASK;
  376. uint32 nmax1 = (SNDMIX_REVERB_DELAY_MASK+1) - nPosRef;
  377. uint32 nmax2 = (SNDMIX_REVERB_DELAY_MASK+1) - nPosRvb;
  378. nmax1 = (nmax1 < nmax2) ? nmax1 : nmax2;
  379. uint32 n = nRvbSamples;
  380. if (n > nmax1) n = nmax1;
  381. if (n > 64) n = 64;
  382. // Reflections output + late reverb delay
  383. ProcessReflections(&g_RefDelay, &g_RefDelay.RefOut[nPosRef], pRvbOut, n);
  384. // Late Reverberation
  385. ProcessLateReverb(&g_LateReverb, &g_RefDelay.RefOut[nPosRvb], pRvbOut, n);
  386. // Update delay positions
  387. g_RefDelay.nRefOutPos = (g_RefDelay.nRefOutPos + n) & SNDMIX_REVERB_DELAY_MASK;
  388. g_RefDelay.nDelayPos = (g_RefDelay.nDelayPos + n) & SNDMIX_REFLECTIONS_DELAY_MASK;
  389. pRvbOut += n*2;
  390. nRvbSamples -= n;
  391. }
  392. // Adjust nDelayPos, in case nIn != nOut
  393. g_RefDelay.nDelayPos = (g_RefDelay.nDelayPos - nOut + nIn) & SNDMIX_REFLECTIONS_DELAY_MASK;
  394. // Upsample 2x
  395. ReverbProcessPostFiltering1x(MixReverbBuffer, MixSoundBuffer, nSamples);
  396. // Automatically shut down if needed
  397. if(gnReverbSend) gnReverbSamples = gnReverbDecaySamples; // reset decay counter
  398. else if(gnReverbSamples > nSamples) gnReverbSamples -= nSamples; // decay
  399. else // decayed
  400. {
  401. Shutdown(gnRvbROfsVol, gnRvbLOfsVol);
  402. gnReverbSamples = 0;
  403. }
  404. gnReverbSend = false; // no input data in MixReverbBuffer
  405. }
  406. void CReverb::ReverbDryMix(int32 * MPT_RESTRICT pDry, int32 * MPT_RESTRICT pWet, int lDryVol, uint32 nSamples)
  407. {
  408. for (uint32 i=0; i<nSamples; i++)
  409. {
  410. pDry[i*2] += (pWet[i*2]>>4) * lDryVol;
  411. pDry[i*2+1] += (pWet[i*2+1]>>4) * lDryVol;
  412. }
  413. }
  414. uint32 CReverb::ReverbProcessPreFiltering2x(int32 * MPT_RESTRICT pWet, uint32 nSamples)
  415. {
  416. uint32 nOutSamples = 0;
  417. int lowpass = g_RefDelay.nCoeffs.c.l;
  418. int y1_l = g_nLastRvbIn_yl, y1_r = g_nLastRvbIn_yr;
  419. uint32 n = nSamples;
  420. if (g_bLastInPresent)
  421. {
  422. int x1_l = g_nLastRvbIn_xl, x1_r = g_nLastRvbIn_xr;
  423. int x2_l = pWet[0], x2_r = pWet[1];
  424. x1_l = (x1_l+x2_l)>>13;
  425. x1_r = (x1_r+x2_r)>>13;
  426. y1_l = x1_l + (((x1_l - y1_l)*lowpass)>>15);
  427. y1_r = x1_r + (((x1_r - y1_r)*lowpass)>>15);
  428. pWet[0] = y1_l;
  429. pWet[1] = y1_r;
  430. pWet+=2;
  431. n--;
  432. nOutSamples = 1;
  433. g_bLastInPresent = false;
  434. }
  435. if (n & 1)
  436. {
  437. n--;
  438. g_nLastRvbIn_xl = pWet[n*2];
  439. g_nLastRvbIn_xr = pWet[n*2+1];
  440. g_bLastInPresent = true;
  441. }
  442. n >>= 1;
  443. for (uint32 i=0; i<n; i++)
  444. {
  445. int x1_l = pWet[i*4];
  446. int x2_l = pWet[i*4+2];
  447. x1_l = (x1_l+x2_l)>>13;
  448. int x1_r = pWet[i*4+1];
  449. int x2_r = pWet[i*4+3];
  450. x1_r = (x1_r+x2_r)>>13;
  451. y1_l = x1_l + (((x1_l - y1_l)*lowpass)>>15);
  452. y1_r = x1_r + (((x1_r - y1_r)*lowpass)>>15);
  453. pWet[i*2] = y1_l;
  454. pWet[i*2+1] = y1_r;
  455. }
  456. g_nLastRvbIn_yl = y1_l;
  457. g_nLastRvbIn_yr = y1_r;
  458. return nOutSamples + n;
  459. }
  460. uint32 CReverb::ReverbProcessPreFiltering1x(int32 * MPT_RESTRICT pWet, uint32 nSamples)
  461. {
  462. int lowpass = g_RefDelay.nCoeffs.c.l;
  463. int y1_l = g_nLastRvbIn_yl, y1_r = g_nLastRvbIn_yr;
  464. for (uint32 i=0; i<nSamples; i++)
  465. {
  466. int x_l = pWet[i*2] >> 12;
  467. int x_r = pWet[i*2+1] >> 12;
  468. y1_l = x_l + (((x_l - y1_l)*lowpass)>>15);
  469. y1_r = x_r + (((x_r - y1_r)*lowpass)>>15);
  470. pWet[i*2] = y1_l;
  471. pWet[i*2+1] = y1_r;
  472. }
  473. g_nLastRvbIn_yl = y1_l;
  474. g_nLastRvbIn_yr = y1_r;
  475. return nSamples;
  476. }
  477. void CReverb::ReverbProcessPostFiltering2x(const int32 * MPT_RESTRICT pRvb, int32 * MPT_RESTRICT pDry, uint32 nSamples)
  478. {
  479. uint32 n0 = nSamples, n;
  480. int x1_l = g_nLastRvbOut_xl, x1_r = g_nLastRvbOut_xr;
  481. if (g_bLastOutPresent)
  482. {
  483. pDry[0] += x1_l;
  484. pDry[1] += x1_r;
  485. pDry += 2;
  486. n0--;
  487. g_bLastOutPresent = false;
  488. }
  489. n = n0 >> 1;
  490. for (uint32 i=0; i<n; i++)
  491. {
  492. int x_l = pRvb[i*2], x_r = pRvb[i*2+1];
  493. pDry[i*4] += (x_l + x1_l)>>1;
  494. pDry[i*4+1] += (x_r + x1_r)>>1;
  495. pDry[i*4+2] += x_l;
  496. pDry[i*4+3] += x_r;
  497. x1_l = x_l;
  498. x1_r = x_r;
  499. }
  500. if (n0 & 1)
  501. {
  502. int x_l = pRvb[n*2], x_r = pRvb[n*2+1];
  503. pDry[n*4] += (x_l + x1_l)>>1;
  504. pDry[n*4+1] += (x_r + x1_r)>>1;
  505. x1_l = x_l;
  506. x1_r = x_r;
  507. g_bLastOutPresent = true;
  508. }
  509. g_nLastRvbOut_xl = x1_l;
  510. g_nLastRvbOut_xr = x1_r;
  511. }
  512. #define DCR_AMOUNT 9
  513. // Stereo Add + DC removal
  514. void CReverb::ReverbProcessPostFiltering1x(const int32 * MPT_RESTRICT pRvb, int32 * MPT_RESTRICT pDry, uint32 nSamples)
  515. {
  516. #if defined(MPT_ENABLE_ARCH_INTRINSICS_SSE2)
  517. if(CPU::HasFeatureSet(CPU::feature::sse2))
  518. {
  519. __m128i nDCRRvb_Y1 = Load64SSE(gnDCRRvb_Y1);
  520. __m128i nDCRRvb_X1 = Load64SSE(gnDCRRvb_X1);
  521. __m128i in = _mm_set1_epi32(0);
  522. while(nSamples--)
  523. {
  524. in = Load64SSE(pRvb);
  525. pRvb += 2;
  526. // x(n-1) - x(n)
  527. __m128i diff = _mm_sub_epi32(nDCRRvb_X1, in);
  528. nDCRRvb_X1 = _mm_add_epi32(nDCRRvb_Y1, _mm_sub_epi32(_mm_srai_epi32(diff, DCR_AMOUNT + 1), diff));
  529. __m128i out = _mm_add_epi32(Load64SSE(pDry), nDCRRvb_X1);
  530. nDCRRvb_Y1 = _mm_sub_epi32(nDCRRvb_X1, _mm_srai_epi32(nDCRRvb_X1, DCR_AMOUNT));
  531. nDCRRvb_X1 = in;
  532. Store64SSE(pDry, out);
  533. pDry += 2;
  534. }
  535. Store64SSE(gnDCRRvb_X1, in);
  536. Store64SSE(gnDCRRvb_Y1, nDCRRvb_Y1);
  537. return;
  538. }
  539. #endif
  540. int32 X1L = gnDCRRvb_X1[0], X1R = gnDCRRvb_X1[1];
  541. int32 Y1L = gnDCRRvb_Y1[0], Y1R = gnDCRRvb_Y1[1];
  542. int32 inL = 0, inR = 0;
  543. while(nSamples--)
  544. {
  545. inL = pRvb[0];
  546. inR = pRvb[1];
  547. pRvb += 2;
  548. int32 outL = pDry[0], outR = pDry[1];
  549. // x(n-1) - x(n)
  550. X1L -= inL;
  551. X1R -= inR;
  552. X1L = X1L / (1 << (DCR_AMOUNT + 1)) - X1L;
  553. X1R = X1R / (1 << (DCR_AMOUNT + 1)) - X1R;
  554. Y1L += X1L;
  555. Y1R += X1R;
  556. // add to dry mix
  557. outL += Y1L;
  558. outR += Y1R;
  559. Y1L -= Y1L / (1 << DCR_AMOUNT);
  560. Y1R -= Y1R / (1 << DCR_AMOUNT);
  561. X1L = inL;
  562. X1R = inR;
  563. pDry[0] = outL;
  564. pDry[1] = outR;
  565. pDry += 2;
  566. }
  567. gnDCRRvb_Y1[0] = Y1L;
  568. gnDCRRvb_Y1[1] = Y1R;
  569. gnDCRRvb_X1[0] = inL;
  570. gnDCRRvb_X1[1] = inR;
  571. }
  572. void CReverb::ReverbDCRemoval(int32 * MPT_RESTRICT pBuffer, uint32 nSamples)
  573. {
  574. #if defined(MPT_ENABLE_ARCH_INTRINSICS_SSE2)
  575. if(CPU::HasFeatureSet(CPU::feature::sse2))
  576. {
  577. __m128i nDCRRvb_Y1 = Load64SSE(gnDCRRvb_Y1);
  578. __m128i nDCRRvb_X1 = Load64SSE(gnDCRRvb_X1);
  579. while(nSamples--)
  580. {
  581. __m128i in = Load64SSE(pBuffer);
  582. __m128i diff = _mm_sub_epi32(nDCRRvb_X1, in);
  583. __m128i out = _mm_add_epi32(nDCRRvb_Y1, _mm_sub_epi32(_mm_srai_epi32(diff, DCR_AMOUNT + 1), diff));
  584. Store64SSE(pBuffer, out);
  585. pBuffer += 2;
  586. nDCRRvb_Y1 = _mm_sub_epi32(out, _mm_srai_epi32(out, DCR_AMOUNT));
  587. nDCRRvb_X1 = in;
  588. }
  589. Store64SSE(gnDCRRvb_X1, nDCRRvb_X1);
  590. Store64SSE(gnDCRRvb_Y1, nDCRRvb_Y1);
  591. return;
  592. }
  593. #endif
  594. int32 X1L = gnDCRRvb_X1[0], X1R = gnDCRRvb_X1[1];
  595. int32 Y1L = gnDCRRvb_Y1[0], Y1R = gnDCRRvb_Y1[1];
  596. int32 inL = 0, inR = 0;
  597. while(nSamples--)
  598. {
  599. inL = pBuffer[0];
  600. inR = pBuffer[1];
  601. // x(n-1) - x(n)
  602. X1L -= inL;
  603. X1R -= inR;
  604. X1L = X1L / (1 << (DCR_AMOUNT + 1)) - X1L;
  605. X1R = X1R / (1 << (DCR_AMOUNT + 1)) - X1R;
  606. Y1L += X1L;
  607. Y1R += X1R;
  608. pBuffer[0] = Y1L;
  609. pBuffer[1] = Y1R;
  610. pBuffer += 2;
  611. Y1L -= Y1L / (1 << DCR_AMOUNT);
  612. Y1R -= Y1R / (1 << DCR_AMOUNT);
  613. X1L = inL;
  614. X1R = inR;
  615. }
  616. gnDCRRvb_Y1[0] = Y1L;
  617. gnDCRRvb_Y1[1] = Y1R;
  618. gnDCRRvb_X1[0] = inL;
  619. gnDCRRvb_X1[1] = inR;
  620. }
  621. //////////////////////////////////////////////////////////////////////////
  622. //
  623. // Pre-Delay:
  624. //
  625. // 1. Saturate and low-pass the reverb input (stage 2 of roomHF)
  626. // 2. Process pre-diffusion
  627. // 3. Insert the result in the reflections delay buffer
  628. //
  629. // Save some typing
  630. static MPT_FORCEINLINE int32 Clamp16(int32 x) { return Clamp(x, std::numeric_limits<int16>::min(), std::numeric_limits<int16>::max()); }
  631. void CReverb::ProcessPreDelay(SWRvbRefDelay * MPT_RESTRICT pPreDelay, const int32 * MPT_RESTRICT pIn, uint32 nSamples)
  632. {
  633. uint32 preDifPos = pPreDelay->nPreDifPos;
  634. uint32 delayPos = pPreDelay->nDelayPos - 1;
  635. #if defined(MPT_ENABLE_ARCH_INTRINSICS_SSE2)
  636. if(CPU::HasFeatureSet(CPU::feature::sse2))
  637. {
  638. __m128i coeffs = _mm_cvtsi32_si128(pPreDelay->nCoeffs.lr);
  639. __m128i history = _mm_cvtsi32_si128(pPreDelay->History.lr);
  640. __m128i preDifCoeffs = _mm_cvtsi32_si128(pPreDelay->nPreDifCoeffs.lr);
  641. while(nSamples--)
  642. {
  643. __m128i in32 = Load64SSE(pIn); // 16-bit unsaturated reverb input [ r | l ]
  644. __m128i inSat = _mm_packs_epi32(in32, in32); // [ r | l | r | l ] (16-bit saturated)
  645. pIn += 2;
  646. // Low-pass
  647. __m128i lp = _mm_mulhi_epi16(_mm_subs_epi16(history, inSat), coeffs);
  648. __m128i preDif = _mm_cvtsi32_si128(pPreDelay->PreDifBuffer[preDifPos].lr);
  649. history = _mm_adds_epi16(_mm_adds_epi16(lp, lp), inSat);
  650. // Pre-Diffusion
  651. preDifPos = (preDifPos + 1) & SNDMIX_PREDIFFUSION_DELAY_MASK;
  652. delayPos = (delayPos + 1) & SNDMIX_REFLECTIONS_DELAY_MASK;
  653. __m128i preDif2 = _mm_subs_epi16(history, _mm_mulhi_epi16(preDif, preDifCoeffs));
  654. pPreDelay->PreDifBuffer[preDifPos].lr = _mm_cvtsi128_si32(preDif2);
  655. pPreDelay->RefDelayBuffer[delayPos].lr = _mm_cvtsi128_si32(_mm_adds_epi16(_mm_mulhi_epi16(preDifCoeffs, preDif2), preDif));
  656. }
  657. pPreDelay->nPreDifPos = preDifPos;
  658. pPreDelay->History.lr = _mm_cvtsi128_si32(history);
  659. return;
  660. }
  661. #endif
  662. const int32 coeffsL = pPreDelay->nCoeffs.c.l, coeffsR = pPreDelay->nCoeffs.c.r;
  663. const int32 preDifCoeffsL = pPreDelay->nPreDifCoeffs.c.l, preDifCoeffsR = pPreDelay->nPreDifCoeffs.c.r;
  664. int16 historyL = pPreDelay->History.c.l, historyR = pPreDelay->History.c.r;
  665. while(nSamples--)
  666. {
  667. int32 inL = Clamp16(pIn[0]);
  668. int32 inR = Clamp16(pIn[1]);
  669. pIn += 2;
  670. // Low-pass
  671. int32 lpL = (Clamp16(historyL - inL) * coeffsL) / 65536;
  672. int32 lpR = (Clamp16(historyR - inR) * coeffsR) / 65536;
  673. historyL = mpt::saturate_cast<int16>(Clamp16(lpL + lpL) + inL);
  674. historyR = mpt::saturate_cast<int16>(Clamp16(lpR + lpR) + inR);
  675. // Pre-Diffusion
  676. int32 preDifL = pPreDelay->PreDifBuffer[preDifPos].c.l;
  677. int32 preDifR = pPreDelay->PreDifBuffer[preDifPos].c.r;
  678. preDifPos = (preDifPos + 1) & SNDMIX_PREDIFFUSION_DELAY_MASK;
  679. delayPos = (delayPos + 1) & SNDMIX_REFLECTIONS_DELAY_MASK;
  680. int16 preDif2L = mpt::saturate_cast<int16>(historyL - preDifL * preDifCoeffsL / 65536);
  681. int16 preDif2R = mpt::saturate_cast<int16>(historyR - preDifR * preDifCoeffsR / 65536);
  682. pPreDelay->PreDifBuffer[preDifPos].c.l = preDif2L;
  683. pPreDelay->PreDifBuffer[preDifPos].c.r = preDif2R;
  684. pPreDelay->RefDelayBuffer[delayPos].c.l = mpt::saturate_cast<int16>(preDifCoeffsL * preDif2L / 65536 + preDifL);
  685. pPreDelay->RefDelayBuffer[delayPos].c.r = mpt::saturate_cast<int16>(preDifCoeffsR * preDif2R / 65536 + preDifR);
  686. }
  687. pPreDelay->nPreDifPos = preDifPos;
  688. pPreDelay->History.c.l = historyL;
  689. pPreDelay->History.c.r = historyR;
  690. }
  691. ////////////////////////////////////////////////////////////////////
  692. //
  693. // ProcessReflections:
  694. // First stage:
  695. // - process 4 reflections, output to pRefOut
  696. // - output results to pRefOut
  697. // Second stage:
  698. // - process another 3 reflections
  699. // - sum with pRefOut
  700. // - apply reflections master gain and accumulate in the given output
  701. //
  702. void CReverb::ProcessReflections(SWRvbRefDelay * MPT_RESTRICT pPreDelay, LR16 * MPT_RESTRICT pRefOut, int32 * MPT_RESTRICT pOut, uint32 nSamples)
  703. {
  704. #if defined(MPT_ENABLE_ARCH_INTRINSICS_SSE2)
  705. if(CPU::HasFeatureSet(CPU::feature::sse2))
  706. {
  707. union
  708. {
  709. __m128i xmm;
  710. int16 i[8];
  711. } pos;
  712. const LR16 *refDelayBuffer = pPreDelay->RefDelayBuffer;
  713. #define GETDELAY(x) static_cast<int16>(pPreDelay->Reflections[x].Delay)
  714. __m128i delayPos = _mm_set_epi16(GETDELAY(7), GETDELAY(6), GETDELAY(5), GETDELAY(4), GETDELAY(3), GETDELAY(2), GETDELAY(1), GETDELAY(0));
  715. #undef GETDELAY
  716. delayPos = _mm_sub_epi16(_mm_set1_epi16(static_cast<int16>(pPreDelay->nDelayPos - 1)), delayPos);
  717. __m128i gain12 = _mm_unpacklo_epi64(Load64SSE(pPreDelay->Reflections[0].Gains), Load64SSE(pPreDelay->Reflections[1].Gains));
  718. __m128i gain34 = _mm_unpacklo_epi64(Load64SSE(pPreDelay->Reflections[2].Gains), Load64SSE(pPreDelay->Reflections[3].Gains));
  719. __m128i gain56 = _mm_unpacklo_epi64(Load64SSE(pPreDelay->Reflections[4].Gains), Load64SSE(pPreDelay->Reflections[5].Gains));
  720. __m128i gain78 = _mm_unpacklo_epi64(Load64SSE(pPreDelay->Reflections[6].Gains), Load64SSE(pPreDelay->Reflections[7].Gains));
  721. // For 28-bit final output: 16+15-3 = 28
  722. __m128i refGain = _mm_srai_epi32(_mm_set_epi32(0, 0, pPreDelay->ReflectionsGain.c.r, pPreDelay->ReflectionsGain.c.l), 3);
  723. __m128i delayInc = _mm_set1_epi16(1), delayMask = _mm_set1_epi16(SNDMIX_REFLECTIONS_DELAY_MASK);
  724. while(nSamples--)
  725. {
  726. delayPos = _mm_and_si128(_mm_add_epi16(delayInc, delayPos), delayMask);
  727. _mm_storeu_si128(&pos.xmm, delayPos);
  728. __m128i ref12 = _mm_set_epi32(refDelayBuffer[pos.i[1]].lr, refDelayBuffer[pos.i[1]].lr, refDelayBuffer[pos.i[0]].lr, refDelayBuffer[pos.i[0]].lr);
  729. __m128i ref34 = _mm_set_epi32(refDelayBuffer[pos.i[3]].lr, refDelayBuffer[pos.i[3]].lr, refDelayBuffer[pos.i[2]].lr, refDelayBuffer[pos.i[2]].lr);
  730. __m128i ref56 = _mm_set_epi32(refDelayBuffer[pos.i[5]].lr, refDelayBuffer[pos.i[5]].lr, refDelayBuffer[pos.i[4]].lr, refDelayBuffer[pos.i[4]].lr);
  731. __m128i ref78 = _mm_set_epi32(0, 0, refDelayBuffer[pos.i[6]].lr, refDelayBuffer[pos.i[6]].lr);
  732. // First stage
  733. __m128i refOut1 = _mm_add_epi32(_mm_madd_epi16(ref12, gain12), _mm_madd_epi16(ref34, gain34));
  734. refOut1 = _mm_srai_epi32(_mm_add_epi32(refOut1, _mm_shuffle_epi32(refOut1, _MM_SHUFFLE(1, 0, 3, 2))), 15);
  735. // Second stage
  736. __m128i refOut2 = _mm_add_epi32(_mm_madd_epi16(ref56, gain56), _mm_madd_epi16(ref78, gain78));
  737. refOut2 = _mm_srai_epi32(_mm_add_epi32(refOut2, _mm_shuffle_epi32(refOut2, _MM_SHUFFLE(1, 0, 3, 2))), 15);
  738. // Saturate to 16-bit and sum stages
  739. __m128i refOut = _mm_adds_epi16(_mm_packs_epi32(refOut1, refOut1), _mm_packs_epi32(refOut2, refOut2));
  740. pRefOut->lr = _mm_cvtsi128_si32(refOut);
  741. pRefOut++;
  742. __m128i out = _mm_madd_epi16(_mm_unpacklo_epi16(refOut, refOut), refGain); // Apply reflections gain
  743. // At this, point, this is the only output of the reverb
  744. Store64SSE(pOut, out);
  745. pOut += 2;
  746. }
  747. return;
  748. }
  749. #endif
  750. int pos[7];
  751. for(int i = 0; i < 7; i++)
  752. pos[i] = pPreDelay->nDelayPos - pPreDelay->Reflections[i].Delay - 1;
  753. // For 28-bit final output: 16+15-3 = 28
  754. int16 refGain = pPreDelay->ReflectionsGain.c.l / (1 << 3);
  755. while(nSamples--)
  756. {
  757. // First stage
  758. int32 refOutL = 0, refOutR = 0;
  759. for(int i = 0; i < 4; i++)
  760. {
  761. pos[i] = (pos[i] + 1) & SNDMIX_REFLECTIONS_DELAY_MASK;
  762. int16 refL = pPreDelay->RefDelayBuffer[pos[i]].c.l, refR = pPreDelay->RefDelayBuffer[pos[i]].c.r;
  763. refOutL += refL * pPreDelay->Reflections[i].Gains[0].c.l + refR * pPreDelay->Reflections[i].Gains[0].c.r;
  764. refOutR += refL * pPreDelay->Reflections[i].Gains[1].c.l + refR * pPreDelay->Reflections[i].Gains[1].c.r;
  765. }
  766. int16 stage1l = mpt::saturate_cast<int16>(refOutL / (1 << 15));
  767. int16 stage1r = mpt::saturate_cast<int16>(refOutR / (1 << 15));
  768. // Second stage
  769. refOutL = 0;
  770. refOutR = 0;
  771. for(int i = 4; i < 7; i++)
  772. {
  773. pos[i] = (pos[i] + 1) & SNDMIX_REFLECTIONS_DELAY_MASK;
  774. int16 refL = pPreDelay->RefDelayBuffer[pos[i]].c.l, refR = pPreDelay->RefDelayBuffer[pos[i]].c.r;
  775. refOutL += refL * pPreDelay->Reflections[i].Gains[0].c.l + refR * pPreDelay->Reflections[i].Gains[0].c.r;
  776. refOutR += refL * pPreDelay->Reflections[i].Gains[1].c.l + refR * pPreDelay->Reflections[i].Gains[1].c.r;
  777. }
  778. pOut[0] = (pRefOut->c.l = mpt::saturate_cast<int16>(stage1l + refOutL / (1 << 15))) * refGain;
  779. pOut[1] = (pRefOut->c.r = mpt::saturate_cast<int16>(stage1r + refOutR / (1 << 15))) * refGain;
  780. pRefOut++;
  781. pOut += 2;
  782. }
  783. }
  784. //////////////////////////////////////////////////////////////////////////
  785. //
  786. // Late reverberation (with SW reflections)
  787. //
  788. void CReverb::ProcessLateReverb(SWLateReverb * MPT_RESTRICT pReverb, LR16 * MPT_RESTRICT pRefOut, int32 * MPT_RESTRICT pMixOut, uint32 nSamples)
  789. {
  790. // Calculate delay line offset from current delay position
  791. #define DELAY_OFFSET(x) ((delayPos - (x)) & RVBDLY_MASK)
  792. #if defined(MPT_ENABLE_ARCH_INTRINSICS_SSE2)
  793. if(CPU::HasFeatureSet(CPU::feature::sse2))
  794. {
  795. int delayPos = pReverb->nDelayPos & RVBDLY_MASK;
  796. __m128i rvbOutGains = Load64SSE(pReverb->RvbOutGains);
  797. __m128i difCoeffs = Load64SSE(pReverb->nDifCoeffs);
  798. __m128i decayLP = Load64SSE(pReverb->nDecayLP);
  799. __m128i lpHistory = Load64SSE(pReverb->LPHistory);
  800. while(nSamples--)
  801. {
  802. __m128i refIn = _mm_cvtsi32_si128(pRefOut->lr); // 16-bit stereo input
  803. pRefOut++;
  804. __m128i delay2 = _mm_unpacklo_epi32(
  805. _mm_cvtsi32_si128(pReverb->Delay2[DELAY_OFFSET(RVBDLY2L_LEN)].lr),
  806. _mm_cvtsi32_si128(pReverb->Delay2[DELAY_OFFSET(RVBDLY2R_LEN)].lr));
  807. // Unsigned to avoid sign extension
  808. uint16 diff1L = pReverb->Diffusion1[DELAY_OFFSET(RVBDIF1L_LEN)].c.l;
  809. uint16 diff1R = pReverb->Diffusion1[DELAY_OFFSET(RVBDIF1R_LEN)].c.r;
  810. int32 diffusion1 = diff1L | (diff1R << 16); // diffusion1 history
  811. uint16 diff2L = pReverb->Diffusion2[DELAY_OFFSET(RVBDIF2L_LEN)].c.l;
  812. uint16 diff2R = pReverb->Diffusion2[DELAY_OFFSET(RVBDIF2R_LEN)].c.r;
  813. int32 diffusion2 = diff2L | (diff2R << 16); // diffusion2 history
  814. __m128i lpDecay = _mm_mulhi_epi16(_mm_subs_epi16(lpHistory, delay2), decayLP);
  815. lpHistory = _mm_adds_epi16(_mm_adds_epi16(lpDecay, lpDecay), delay2); // Low-passed decay
  816. // Apply decay gain
  817. __m128i histDecay = _mm_srai_epi32(_mm_madd_epi16(Load64SSE(pReverb->nDecayDC), lpHistory), 15);
  818. __m128i histDecayPacked = _mm_shuffle_epi32(_mm_packs_epi32(histDecay, histDecay), _MM_SHUFFLE(2, 0, 2, 0));
  819. __m128i histDecayIn = _mm_adds_epi16(_mm_shuffle_epi32(_mm_packs_epi32(histDecay, histDecay), _MM_SHUFFLE(2, 0, 2, 0)), _mm_srai_epi16(_mm_unpacklo_epi32(refIn, refIn), 2));
  820. __m128i histDecayInDiff = _mm_subs_epi16(histDecayIn, _mm_mulhi_epi16(_mm_cvtsi32_si128(diffusion1), difCoeffs));
  821. pReverb->Diffusion1[delayPos].lr = _mm_cvtsi128_si32(histDecayInDiff);
  822. __m128i delay1Out = _mm_adds_epi16(_mm_mulhi_epi16(difCoeffs, histDecayInDiff), _mm_cvtsi32_si128(diffusion1));
  823. // Insert the diffusion output in the reverb delay line
  824. pReverb->Delay1[delayPos].lr = _mm_cvtsi128_si32(delay1Out);
  825. __m128i histDecayInDelay = _mm_adds_epi16(histDecayIn, _mm_unpacklo_epi32(delay1Out, delay1Out));
  826. // Input to second diffuser
  827. __m128i delay1 = _mm_unpacklo_epi32(
  828. _mm_cvtsi32_si128(pReverb->Delay1[DELAY_OFFSET(RVBDLY1L_LEN)].lr),
  829. _mm_cvtsi32_si128(pReverb->Delay1[DELAY_OFFSET(RVBDLY1R_LEN)].lr));
  830. __m128i delay1Gains = _mm_srai_epi32(_mm_madd_epi16(delay1, Load64SSE(pReverb->Dif2InGains)), 15);
  831. __m128i delay1GainsSat = _mm_shuffle_epi32(_mm_packs_epi32(delay1Gains, delay1Gains), _MM_SHUFFLE(2, 0, 2, 0));
  832. __m128i histDelay1 = _mm_subs_epi16(_mm_adds_epi16(histDecayInDelay, delay1), delay1GainsSat); // accumulate with reverb output
  833. __m128i diff2out = _mm_subs_epi16(delay1GainsSat, _mm_mulhi_epi16(_mm_cvtsi32_si128(diffusion2), difCoeffs));
  834. __m128i diff2outCoeffs = _mm_mulhi_epi16(difCoeffs, diff2out);
  835. pReverb->Diffusion2[delayPos].lr = _mm_cvtsi128_si32(diff2out);
  836. __m128i mixOut = Load64SSE(pMixOut);
  837. __m128i delay2out = _mm_adds_epi16(diff2outCoeffs, _mm_cvtsi32_si128(diffusion2));
  838. pReverb->Delay2[delayPos].lr = _mm_cvtsi128_si32(delay2out);
  839. delayPos = (delayPos + 1) & RVBDLY_MASK;
  840. // Accumulate with reverb output
  841. __m128i out = _mm_add_epi32(_mm_madd_epi16(_mm_adds_epi16(histDelay1, delay2out), rvbOutGains), mixOut);
  842. Store64SSE(pMixOut, out);
  843. pMixOut += 2;
  844. }
  845. Store64SSE(pReverb->LPHistory, lpHistory);
  846. pReverb->nDelayPos = delayPos;
  847. return;
  848. }
  849. #endif
  850. int delayPos = pReverb->nDelayPos & RVBDLY_MASK;
  851. while(nSamples--)
  852. {
  853. int16 refInL = pRefOut->c.l, refInR = pRefOut->c.r;
  854. pRefOut++;
  855. int32 delay2LL = pReverb->Delay2[DELAY_OFFSET(RVBDLY2L_LEN)].c.l, delay2LR = pReverb->Delay2[DELAY_OFFSET(RVBDLY2L_LEN)].c.r;
  856. int32 delay2RL = pReverb->Delay2[DELAY_OFFSET(RVBDLY2R_LEN)].c.l, delay2RR = pReverb->Delay2[DELAY_OFFSET(RVBDLY2R_LEN)].c.r;
  857. int32 diff1L = pReverb->Diffusion1[DELAY_OFFSET(RVBDIF1L_LEN)].c.l;
  858. int32 diff1R = pReverb->Diffusion1[DELAY_OFFSET(RVBDIF1R_LEN)].c.r;
  859. int32 diff2L = pReverb->Diffusion2[DELAY_OFFSET(RVBDIF2L_LEN)].c.l;
  860. int32 diff2R = pReverb->Diffusion2[DELAY_OFFSET(RVBDIF2R_LEN)].c.r;
  861. int32 lpDecayLL = Clamp16(pReverb->LPHistory[0].c.l - delay2LL) * pReverb->nDecayLP[0].c.l / 65536;
  862. int32 lpDecayLR = Clamp16(pReverb->LPHistory[0].c.r - delay2LR) * pReverb->nDecayLP[0].c.r / 65536;
  863. int32 lpDecayRL = Clamp16(pReverb->LPHistory[1].c.l - delay2RL) * pReverb->nDecayLP[1].c.l / 65536;
  864. int32 lpDecayRR = Clamp16(pReverb->LPHistory[1].c.r - delay2RR) * pReverb->nDecayLP[1].c.r / 65536;
  865. // Low-passed decay
  866. pReverb->LPHistory[0].c.l = mpt::saturate_cast<int16>(Clamp16(lpDecayLL + lpDecayLL) + delay2LL);
  867. pReverb->LPHistory[0].c.r = mpt::saturate_cast<int16>(Clamp16(lpDecayLR + lpDecayLR) + delay2LR);
  868. pReverb->LPHistory[1].c.l = mpt::saturate_cast<int16>(Clamp16(lpDecayRL + lpDecayRL) + delay2RL);
  869. pReverb->LPHistory[1].c.r = mpt::saturate_cast<int16>(Clamp16(lpDecayRR + lpDecayRR) + delay2RR);
  870. // Apply decay gain
  871. int32 histDecayL = Clamp16((int32)pReverb->nDecayDC[0].c.l * pReverb->LPHistory[0].c.l / (1 << 15));
  872. int32 histDecayR = Clamp16((int32)pReverb->nDecayDC[1].c.r * pReverb->LPHistory[1].c.r / (1 << 15));
  873. int32 histDecayInL = Clamp16(histDecayL + refInL / 4);
  874. int32 histDecayInR = Clamp16(histDecayR + refInR / 4);
  875. int32 histDecayInDiffL = Clamp16(histDecayInL - diff1L * pReverb->nDifCoeffs[0].c.l / 65536);
  876. int32 histDecayInDiffR = Clamp16(histDecayInR - diff1R * pReverb->nDifCoeffs[0].c.r / 65536);
  877. pReverb->Diffusion1[delayPos].c.l = static_cast<int16>(histDecayInDiffL);
  878. pReverb->Diffusion1[delayPos].c.r = static_cast<int16>(histDecayInDiffR);
  879. int32 delay1L = Clamp16(pReverb->nDifCoeffs[0].c.l * histDecayInDiffL / 65536 + diff1L);
  880. int32 delay1R = Clamp16(pReverb->nDifCoeffs[0].c.r * histDecayInDiffR / 65536 + diff1R);
  881. // Insert the diffusion output in the reverb delay line
  882. pReverb->Delay1[delayPos].c.l = static_cast<int16>(delay1L);
  883. pReverb->Delay1[delayPos].c.r = static_cast<int16>(delay1R);
  884. int32 histDecayInDelayL = Clamp16(histDecayInL + delay1L);
  885. int32 histDecayInDelayR = Clamp16(histDecayInR + delay1R);
  886. // Input to second diffuser
  887. int32 delay1LL = pReverb->Delay1[DELAY_OFFSET(RVBDLY1L_LEN)].c.l, delay1LR = pReverb->Delay1[DELAY_OFFSET(RVBDLY1L_LEN)].c.r;
  888. int32 delay1RL = pReverb->Delay1[DELAY_OFFSET(RVBDLY1R_LEN)].c.l, delay1RR = pReverb->Delay1[DELAY_OFFSET(RVBDLY1R_LEN)].c.r;
  889. int32 delay1GainsL = Clamp16((delay1LL * pReverb->Dif2InGains[0].c.l + delay1LR * pReverb->Dif2InGains[0].c.r) / (1 << 15));
  890. int32 delay1GainsR = Clamp16((delay1RL * pReverb->Dif2InGains[1].c.l + delay1RR * pReverb->Dif2InGains[1].c.r) / (1 << 15));
  891. // accumulate with reverb output
  892. int32 histDelay1LL = Clamp16(Clamp16(histDecayInDelayL + delay1LL) - delay1GainsL);
  893. int32 histDelay1LR = Clamp16(Clamp16(histDecayInDelayR + delay1LR) - delay1GainsR);
  894. int32 histDelay1RL = Clamp16(Clamp16(histDecayInDelayL + delay1RL) - delay1GainsL);
  895. int32 histDelay1RR = Clamp16(Clamp16(histDecayInDelayR + delay1RR) - delay1GainsR);
  896. int32 diff2outL = Clamp16(delay1GainsL - diff2L * pReverb->nDifCoeffs[0].c.l / 65536);
  897. int32 diff2outR = Clamp16(delay1GainsR - diff2R * pReverb->nDifCoeffs[0].c.r / 65536);
  898. int32 diff2outCoeffsL = pReverb->nDifCoeffs[0].c.l * diff2outL / 65536;
  899. int32 diff2outCoeffsR = pReverb->nDifCoeffs[0].c.r * diff2outR / 65536;
  900. pReverb->Diffusion2[delayPos].c.l = static_cast<int16>(diff2outL);
  901. pReverb->Diffusion2[delayPos].c.r = static_cast<int16>(diff2outR);
  902. int32 delay2outL = Clamp16(diff2outCoeffsL + diff2L);
  903. int32 delay2outR = Clamp16(diff2outCoeffsR + diff2R);
  904. pReverb->Delay2[delayPos].c.l = static_cast<int16>(delay2outL);
  905. pReverb->Delay2[delayPos].c.r = static_cast<int16>(delay2outR);
  906. delayPos = (delayPos + 1) & RVBDLY_MASK;
  907. // Accumulate with reverb output
  908. pMixOut[0] += Clamp16(histDelay1LL + delay2outL) * pReverb->RvbOutGains[0].c.l + Clamp16(histDelay1LR + delay2outR) * pReverb->RvbOutGains[0].c.r;
  909. pMixOut[1] += Clamp16(histDelay1RL + Clamp16(diff2outCoeffsL)) * pReverb->RvbOutGains[1].c.l + Clamp16(histDelay1RR + Clamp16(diff2outCoeffsR)) * pReverb->RvbOutGains[1].c.r;
  910. pMixOut += 2;
  911. }
  912. pReverb->nDelayPos = delayPos;
  913. #undef DELAY_OFFSET
  914. }
  915. #else
  916. MPT_MSVC_WORKAROUND_LNK4221(Reverb)
  917. #endif // NO_REVERB
  918. OPENMPT_NAMESPACE_END