1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464 |
- /*++
- Copyright (c) Microsoft Corporation. All rights reserved.
- Module Name:
- xnamathmisc.inl
- Abstract:
- XNA math library for Windows and Xbox 360: Quaternion, plane, and color functions.
- --*/
- #if defined(_MSC_VER) && (_MSC_VER > 1000)
- #pragma once
- #endif
- #ifndef __XNAMATHMISC_INL__
- #define __XNAMATHMISC_INL__
- /****************************************************************************
- *
- * Quaternion
- *
- ****************************************************************************/
- //------------------------------------------------------------------------------
- // Comparison operations
- //------------------------------------------------------------------------------
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMQuaternionEqual
- (
- FXMVECTOR Q1,
- FXMVECTOR Q2
- )
- {
- return XMVector4Equal(Q1, Q2);
- }
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMQuaternionNotEqual
- (
- FXMVECTOR Q1,
- FXMVECTOR Q2
- )
- {
- return XMVector4NotEqual(Q1, Q2);
- }
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMQuaternionIsNaN
- (
- FXMVECTOR Q
- )
- {
- return XMVector4IsNaN(Q);
- }
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMQuaternionIsInfinite
- (
- FXMVECTOR Q
- )
- {
- return XMVector4IsInfinite(Q);
- }
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMQuaternionIsIdentity
- (
- FXMVECTOR Q
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- return XMVector4Equal(Q, g_XMIdentityR3.v);
- #elif defined(_XM_SSE_INTRINSICS_)
- XMVECTOR vTemp = _mm_cmpeq_ps(Q,g_XMIdentityR3);
- return (_mm_movemask_ps(vTemp)==0x0f) ? true : false;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- // Computation operations
- //------------------------------------------------------------------------------
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionDot
- (
- FXMVECTOR Q1,
- FXMVECTOR Q2
- )
- {
- return XMVector4Dot(Q1, Q2);
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionMultiply
- (
- FXMVECTOR Q1,
- FXMVECTOR Q2
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- XMVECTOR NegativeQ1;
- XMVECTOR Q2X;
- XMVECTOR Q2Y;
- XMVECTOR Q2Z;
- XMVECTOR Q2W;
- XMVECTOR Q1WZYX;
- XMVECTOR Q1ZWXY;
- XMVECTOR Q1YXWZ;
- XMVECTOR Result;
- CONST XMVECTORU32 ControlWZYX = {XM_PERMUTE_0W, XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1X};
- CONST XMVECTORU32 ControlZWXY = {XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_1X, XM_PERMUTE_1Y};
- CONST XMVECTORU32 ControlYXWZ = {XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z};
- NegativeQ1 = XMVectorNegate(Q1);
- Q2W = XMVectorSplatW(Q2);
- Q2X = XMVectorSplatX(Q2);
- Q2Y = XMVectorSplatY(Q2);
- Q2Z = XMVectorSplatZ(Q2);
- Q1WZYX = XMVectorPermute(Q1, NegativeQ1, ControlWZYX.v);
- Q1ZWXY = XMVectorPermute(Q1, NegativeQ1, ControlZWXY.v);
- Q1YXWZ = XMVectorPermute(Q1, NegativeQ1, ControlYXWZ.v);
- Result = XMVectorMultiply(Q1, Q2W);
- Result = XMVectorMultiplyAdd(Q1WZYX, Q2X, Result);
- Result = XMVectorMultiplyAdd(Q1ZWXY, Q2Y, Result);
- Result = XMVectorMultiplyAdd(Q1YXWZ, Q2Z, Result);
- return Result;
- #elif defined(_XM_SSE_INTRINSICS_)
- static CONST XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f};
- static CONST XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f};
- static CONST XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f};
- // Copy to SSE registers and use as few as possible for x86
- XMVECTOR Q2X = Q2;
- XMVECTOR Q2Y = Q2;
- XMVECTOR Q2Z = Q2;
- XMVECTOR vResult = Q2;
- // Splat with one instruction
- vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
- Q2X = _mm_shuffle_ps(Q2X,Q2X,_MM_SHUFFLE(0,0,0,0));
- Q2Y = _mm_shuffle_ps(Q2Y,Q2Y,_MM_SHUFFLE(1,1,1,1));
- Q2Z = _mm_shuffle_ps(Q2Z,Q2Z,_MM_SHUFFLE(2,2,2,2));
- // Retire Q1 and perform Q1*Q2W
- vResult = _mm_mul_ps(vResult,Q1);
- XMVECTOR Q1Shuffle = Q1;
- // Shuffle the copies of Q1
- Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
- // Mul by Q1WZYX
- Q2X = _mm_mul_ps(Q2X,Q1Shuffle);
- Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(2,3,0,1));
- // Flip the signs on y and z
- Q2X = _mm_mul_ps(Q2X,ControlWZYX);
- // Mul by Q1ZWXY
- Q2Y = _mm_mul_ps(Q2Y,Q1Shuffle);
- Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
- // Flip the signs on z and w
- Q2Y = _mm_mul_ps(Q2Y,ControlZWXY);
- // Mul by Q1YXWZ
- Q2Z = _mm_mul_ps(Q2Z,Q1Shuffle);
- vResult = _mm_add_ps(vResult,Q2X);
- // Flip the signs on x and w
- Q2Z = _mm_mul_ps(Q2Z,ControlYXWZ);
- Q2Y = _mm_add_ps(Q2Y,Q2Z);
- vResult = _mm_add_ps(vResult,Q2Y);
- return vResult;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionLengthSq
- (
- FXMVECTOR Q
- )
- {
- return XMVector4LengthSq(Q);
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionReciprocalLength
- (
- FXMVECTOR Q
- )
- {
- return XMVector4ReciprocalLength(Q);
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionLength
- (
- FXMVECTOR Q
- )
- {
- return XMVector4Length(Q);
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionNormalizeEst
- (
- FXMVECTOR Q
- )
- {
- return XMVector4NormalizeEst(Q);
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionNormalize
- (
- FXMVECTOR Q
- )
- {
- return XMVector4Normalize(Q);
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionConjugate
- (
- FXMVECTOR Q
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- XMVECTOR Result = {
- -Q.vector4_f32[0],
- -Q.vector4_f32[1],
- -Q.vector4_f32[2],
- Q.vector4_f32[3]
- };
- return Result;
- #elif defined(_XM_SSE_INTRINSICS_)
- static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f};
- XMVECTOR Result = _mm_mul_ps(Q,NegativeOne3);
- return Result;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionInverse
- (
- FXMVECTOR Q
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- XMVECTOR Conjugate;
- XMVECTOR L;
- XMVECTOR Control;
- XMVECTOR Result;
- CONST XMVECTOR Zero = XMVectorZero();
- L = XMVector4LengthSq(Q);
- Conjugate = XMQuaternionConjugate(Q);
- Control = XMVectorLessOrEqual(L, g_XMEpsilon.v);
- L = XMVectorReciprocal(L);
- Result = XMVectorMultiply(Conjugate, L);
- Result = XMVectorSelect(Result, Zero, Control);
- return Result;
- #elif defined(_XM_SSE_INTRINSICS_)
- XMVECTOR Conjugate;
- XMVECTOR L;
- XMVECTOR Control;
- XMVECTOR Result;
- XMVECTOR Zero = XMVectorZero();
- L = XMVector4LengthSq(Q);
- Conjugate = XMQuaternionConjugate(Q);
- Control = XMVectorLessOrEqual(L, g_XMEpsilon);
- Result = _mm_div_ps(Conjugate,L);
- Result = XMVectorSelect(Result, Zero, Control);
- return Result;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionLn
- (
- FXMVECTOR Q
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- XMVECTOR Q0;
- XMVECTOR QW;
- XMVECTOR Theta;
- XMVECTOR SinTheta;
- XMVECTOR S;
- XMVECTOR ControlW;
- XMVECTOR Result;
- static CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
- QW = XMVectorSplatW(Q);
- Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v);
- ControlW = XMVectorInBounds(QW, OneMinusEpsilon);
- Theta = XMVectorACos(QW);
- SinTheta = XMVectorSin(Theta);
- S = XMVectorReciprocal(SinTheta);
- S = XMVectorMultiply(Theta, S);
- Result = XMVectorMultiply(Q0, S);
- Result = XMVectorSelect(Q0, Result, ControlW);
- return Result;
- #elif defined(_XM_SSE_INTRINSICS_)
- static CONST XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
- static CONST XMVECTORF32 NegOneMinusEpsilon = {-(1.0f - 0.00001f), -(1.0f - 0.00001f),-(1.0f - 0.00001f),-(1.0f - 0.00001f)};
- // Get W only
- XMVECTOR QW = _mm_shuffle_ps(Q,Q,_MM_SHUFFLE(3,3,3,3));
- // W = 0
- XMVECTOR Q0 = _mm_and_ps(Q,g_XMMask3);
- // Use W if within bounds
- XMVECTOR ControlW = _mm_cmple_ps(QW,OneMinusEpsilon);
- XMVECTOR vTemp2 = _mm_cmpge_ps(QW,NegOneMinusEpsilon);
- ControlW = _mm_and_ps(ControlW,vTemp2);
- // Get theta
- XMVECTOR vTheta = XMVectorACos(QW);
- // Get Sine of theta
- vTemp2 = XMVectorSin(vTheta);
- // theta/sine of theta
- vTheta = _mm_div_ps(vTheta,vTemp2);
- // Here's the answer
- vTheta = _mm_mul_ps(vTheta,Q0);
- // Was W in bounds? If not, return input as is
- vTheta = XMVectorSelect(Q0,vTheta,ControlW);
- return vTheta;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionExp
- (
- FXMVECTOR Q
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- XMVECTOR Theta;
- XMVECTOR SinTheta;
- XMVECTOR CosTheta;
- XMVECTOR S;
- XMVECTOR Control;
- XMVECTOR Zero;
- XMVECTOR Result;
- Theta = XMVector3Length(Q);
- XMVectorSinCos(&SinTheta, &CosTheta, Theta);
- S = XMVectorReciprocal(Theta);
- S = XMVectorMultiply(SinTheta, S);
- Result = XMVectorMultiply(Q, S);
- Zero = XMVectorZero();
- Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v);
- Result = XMVectorSelect(Result, Q, Control);
- Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v);
- return Result;
- #elif defined(_XM_SSE_INTRINSICS_)
- XMVECTOR Theta;
- XMVECTOR SinTheta;
- XMVECTOR CosTheta;
- XMVECTOR S;
- XMVECTOR Control;
- XMVECTOR Zero;
- XMVECTOR Result;
- Theta = XMVector3Length(Q);
- XMVectorSinCos(&SinTheta, &CosTheta, Theta);
- S = _mm_div_ps(SinTheta,Theta);
- Result = _mm_mul_ps(Q, S);
- Zero = XMVectorZero();
- Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon);
- Result = XMVectorSelect(Result,Q,Control);
- Result = _mm_and_ps(Result,g_XMMask3);
- CosTheta = _mm_and_ps(CosTheta,g_XMMaskW);
- Result = _mm_or_ps(Result,CosTheta);
- return Result;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMINLINE XMVECTOR XMQuaternionSlerp
- (
- FXMVECTOR Q0,
- FXMVECTOR Q1,
- FLOAT t
- )
- {
- XMVECTOR T = XMVectorReplicate(t);
- return XMQuaternionSlerpV(Q0, Q1, T);
- }
- //------------------------------------------------------------------------------
- XMINLINE XMVECTOR XMQuaternionSlerpV
- (
- FXMVECTOR Q0,
- FXMVECTOR Q1,
- FXMVECTOR T
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
- XMVECTOR Omega;
- XMVECTOR CosOmega;
- XMVECTOR SinOmega;
- XMVECTOR InvSinOmega;
- XMVECTOR V01;
- XMVECTOR C1000;
- XMVECTOR SignMask;
- XMVECTOR S0;
- XMVECTOR S1;
- XMVECTOR Sign;
- XMVECTOR Control;
- XMVECTOR Result;
- XMVECTOR Zero;
- CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
- XMASSERT((T.vector4_f32[1] == T.vector4_f32[0]) && (T.vector4_f32[2] == T.vector4_f32[0]) && (T.vector4_f32[3] == T.vector4_f32[0]));
- CosOmega = XMQuaternionDot(Q0, Q1);
- Zero = XMVectorZero();
- Control = XMVectorLess(CosOmega, Zero);
- Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control);
- CosOmega = XMVectorMultiply(CosOmega, Sign);
- Control = XMVectorLess(CosOmega, OneMinusEpsilon);
- SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v);
- SinOmega = XMVectorSqrt(SinOmega);
- Omega = XMVectorATan2(SinOmega, CosOmega);
- SignMask = XMVectorSplatSignMask();
- C1000 = XMVectorSetBinaryConstant(1, 0, 0, 0);
- V01 = XMVectorShiftLeft(T, Zero, 2);
- SignMask = XMVectorShiftLeft(SignMask, Zero, 3);
- V01 = XMVectorXorInt(V01, SignMask);
- V01 = XMVectorAdd(C1000, V01);
- InvSinOmega = XMVectorReciprocal(SinOmega);
- S0 = XMVectorMultiply(V01, Omega);
- S0 = XMVectorSin(S0);
- S0 = XMVectorMultiply(S0, InvSinOmega);
- S0 = XMVectorSelect(V01, S0, Control);
- S1 = XMVectorSplatY(S0);
- S0 = XMVectorSplatX(S0);
- S1 = XMVectorMultiply(S1, Sign);
- Result = XMVectorMultiply(Q0, S0);
- Result = XMVectorMultiplyAdd(Q1, S1, Result);
- return Result;
- #elif defined(_XM_SSE_INTRINSICS_)
- // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
- XMVECTOR Omega;
- XMVECTOR CosOmega;
- XMVECTOR SinOmega;
- XMVECTOR V01;
- XMVECTOR S0;
- XMVECTOR S1;
- XMVECTOR Sign;
- XMVECTOR Control;
- XMVECTOR Result;
- XMVECTOR Zero;
- static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
- static const XMVECTORI32 SignMask2 = {0x80000000,0x00000000,0x00000000,0x00000000};
- static const XMVECTORI32 MaskXY = {0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000000};
- XMASSERT((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)));
- CosOmega = XMQuaternionDot(Q0, Q1);
- Zero = XMVectorZero();
- Control = XMVectorLess(CosOmega, Zero);
- Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control);
- CosOmega = _mm_mul_ps(CosOmega, Sign);
- Control = XMVectorLess(CosOmega, OneMinusEpsilon);
- SinOmega = _mm_mul_ps(CosOmega,CosOmega);
- SinOmega = _mm_sub_ps(g_XMOne,SinOmega);
- SinOmega = _mm_sqrt_ps(SinOmega);
- Omega = XMVectorATan2(SinOmega, CosOmega);
- V01 = _mm_shuffle_ps(T,T,_MM_SHUFFLE(2,3,0,1));
- V01 = _mm_and_ps(V01,MaskXY);
- V01 = _mm_xor_ps(V01,SignMask2);
- V01 = _mm_add_ps(g_XMIdentityR0, V01);
- S0 = _mm_mul_ps(V01, Omega);
- S0 = XMVectorSin(S0);
- S0 = _mm_div_ps(S0, SinOmega);
- S0 = XMVectorSelect(V01, S0, Control);
- S1 = XMVectorSplatY(S0);
- S0 = XMVectorSplatX(S0);
- S1 = _mm_mul_ps(S1, Sign);
- Result = _mm_mul_ps(Q0, S0);
- S1 = _mm_mul_ps(S1, Q1);
- Result = _mm_add_ps(Result,S1);
- return Result;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionSquad
- (
- FXMVECTOR Q0,
- FXMVECTOR Q1,
- FXMVECTOR Q2,
- CXMVECTOR Q3,
- FLOAT t
- )
- {
- XMVECTOR T = XMVectorReplicate(t);
- return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T);
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionSquadV
- (
- FXMVECTOR Q0,
- FXMVECTOR Q1,
- FXMVECTOR Q2,
- CXMVECTOR Q3,
- CXMVECTOR T
- )
- {
- XMVECTOR Q03;
- XMVECTOR Q12;
- XMVECTOR TP;
- XMVECTOR Two;
- XMVECTOR Result;
- XMASSERT( (XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)) );
- TP = T;
- Two = XMVectorSplatConstant(2, 0);
- Q03 = XMQuaternionSlerpV(Q0, Q3, T);
- Q12 = XMQuaternionSlerpV(Q1, Q2, T);
- TP = XMVectorNegativeMultiplySubtract(TP, TP, TP);
- TP = XMVectorMultiply(TP, Two);
- Result = XMQuaternionSlerpV(Q03, Q12, TP);
- return Result;
- }
- //------------------------------------------------------------------------------
- XMINLINE VOID XMQuaternionSquadSetup
- (
- XMVECTOR* pA,
- XMVECTOR* pB,
- XMVECTOR* pC,
- FXMVECTOR Q0,
- FXMVECTOR Q1,
- FXMVECTOR Q2,
- CXMVECTOR Q3
- )
- {
- XMVECTOR SQ0, SQ2, SQ3;
- XMVECTOR InvQ1, InvQ2;
- XMVECTOR LnQ0, LnQ1, LnQ2, LnQ3;
- XMVECTOR ExpQ02, ExpQ13;
- XMVECTOR LS01, LS12, LS23;
- XMVECTOR LD01, LD12, LD23;
- XMVECTOR Control0, Control1, Control2;
- XMVECTOR NegativeOneQuarter;
- XMASSERT(pA);
- XMASSERT(pB);
- XMASSERT(pC);
- LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2));
- LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2));
- SQ2 = XMVectorNegate(Q2);
- Control1 = XMVectorLess(LS12, LD12);
- SQ2 = XMVectorSelect(Q2, SQ2, Control1);
- LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1));
- LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1));
- SQ0 = XMVectorNegate(Q0);
- LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3));
- LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3));
- SQ3 = XMVectorNegate(Q3);
- Control0 = XMVectorLess(LS01, LD01);
- Control2 = XMVectorLess(LS23, LD23);
- SQ0 = XMVectorSelect(Q0, SQ0, Control0);
- SQ3 = XMVectorSelect(Q3, SQ3, Control2);
- InvQ1 = XMQuaternionInverse(Q1);
- InvQ2 = XMQuaternionInverse(SQ2);
- LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0));
- LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2));
- LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1));
- LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3));
- NegativeOneQuarter = XMVectorSplatConstant(-1, 2);
- ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter);
- ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter);
- ExpQ02 = XMQuaternionExp(ExpQ02);
- ExpQ13 = XMQuaternionExp(ExpQ13);
- *pA = XMQuaternionMultiply(Q1, ExpQ02);
- *pB = XMQuaternionMultiply(SQ2, ExpQ13);
- *pC = SQ2;
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionBaryCentric
- (
- FXMVECTOR Q0,
- FXMVECTOR Q1,
- FXMVECTOR Q2,
- FLOAT f,
- FLOAT g
- )
- {
- XMVECTOR Q01;
- XMVECTOR Q02;
- FLOAT s;
- XMVECTOR Result;
- s = f + g;
- if ((s < 0.00001f) && (s > -0.00001f))
- {
- Result = Q0;
- }
- else
- {
- Q01 = XMQuaternionSlerp(Q0, Q1, s);
- Q02 = XMQuaternionSlerp(Q0, Q2, s);
- Result = XMQuaternionSlerp(Q01, Q02, g / s);
- }
- return Result;
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionBaryCentricV
- (
- FXMVECTOR Q0,
- FXMVECTOR Q1,
- FXMVECTOR Q2,
- CXMVECTOR F,
- CXMVECTOR G
- )
- {
- XMVECTOR Q01;
- XMVECTOR Q02;
- XMVECTOR S, GS;
- XMVECTOR Epsilon;
- XMVECTOR Result;
- XMASSERT( (XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)) );
- XMASSERT( (XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)) );
- Epsilon = XMVectorSplatConstant(1, 16);
- S = XMVectorAdd(F, G);
- if (XMVector4InBounds(S, Epsilon))
- {
- Result = Q0;
- }
- else
- {
- Q01 = XMQuaternionSlerpV(Q0, Q1, S);
- Q02 = XMQuaternionSlerpV(Q0, Q2, S);
- GS = XMVectorReciprocal(S);
- GS = XMVectorMultiply(G, GS);
- Result = XMQuaternionSlerpV(Q01, Q02, GS);
- }
- return Result;
- }
- //------------------------------------------------------------------------------
- // Transformation operations
- //------------------------------------------------------------------------------
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionIdentity()
- {
- #if defined(_XM_NO_INTRINSICS_)
- return g_XMIdentityR3.v;
- #elif defined(_XM_SSE_INTRINSICS_)
- return g_XMIdentityR3;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYaw
- (
- FLOAT Pitch,
- FLOAT Yaw,
- FLOAT Roll
- )
- {
- XMVECTOR Angles;
- XMVECTOR Q;
- Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
- Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
- return Q;
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYawFromVector
- (
- FXMVECTOR Angles // <Pitch, Yaw, Roll, 0>
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- XMVECTOR Q, Q0, Q1;
- XMVECTOR P0, P1, Y0, Y1, R0, R1;
- XMVECTOR HalfAngles;
- XMVECTOR SinAngles, CosAngles;
- static CONST XMVECTORU32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X};
- static CONST XMVECTORU32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y};
- static CONST XMVECTORU32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z};
- static CONST XMVECTOR Sign = {1.0f, -1.0f, -1.0f, 1.0f};
- HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v);
- XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
- P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch.v);
- Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw.v);
- R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll.v);
- P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch.v);
- Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw.v);
- R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll.v);
- Q1 = XMVectorMultiply(P1, Sign);
- Q0 = XMVectorMultiply(P0, Y0);
- Q1 = XMVectorMultiply(Q1, Y1);
- Q0 = XMVectorMultiply(Q0, R0);
- Q = XMVectorMultiplyAdd(Q1, R1, Q0);
- return Q;
- #elif defined(_XM_SSE_INTRINSICS_)
- XMVECTOR Q, Q0, Q1;
- XMVECTOR P0, P1, Y0, Y1, R0, R1;
- XMVECTOR HalfAngles;
- XMVECTOR SinAngles, CosAngles;
- static CONST XMVECTORI32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X};
- static CONST XMVECTORI32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y};
- static CONST XMVECTORI32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z};
- static CONST XMVECTORF32 Sign = {1.0f, -1.0f, -1.0f, 1.0f};
- HalfAngles = _mm_mul_ps(Angles, g_XMOneHalf);
- XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
- P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch);
- Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw);
- R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll);
- P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch);
- Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw);
- R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll);
- Q1 = _mm_mul_ps(P1, Sign);
- Q0 = _mm_mul_ps(P0, Y0);
- Q1 = _mm_mul_ps(Q1, Y1);
- Q0 = _mm_mul_ps(Q0, R0);
- Q = _mm_mul_ps(Q1, R1);
- Q = _mm_add_ps(Q,Q0);
- return Q;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionRotationNormal
- (
- FXMVECTOR NormalAxis,
- FLOAT Angle
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- XMVECTOR Q;
- XMVECTOR N;
- XMVECTOR Scale;
- N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v);
- XMScalarSinCos(&Scale.vector4_f32[2], &Scale.vector4_f32[3], 0.5f * Angle);
- Scale.vector4_f32[0] = Scale.vector4_f32[1] = Scale.vector4_f32[2];
- Q = XMVectorMultiply(N, Scale);
- return Q;
- #elif defined(_XM_SSE_INTRINSICS_)
- XMVECTOR N = _mm_and_ps(NormalAxis,g_XMMask3);
- N = _mm_or_ps(N,g_XMIdentityR3);
- XMVECTOR Scale = _mm_set_ps1(0.5f * Angle);
- XMVECTOR vSine;
- XMVECTOR vCosine;
- XMVectorSinCos(&vSine,&vCosine,Scale);
- Scale = _mm_and_ps(vSine,g_XMMask3);
- vCosine = _mm_and_ps(vCosine,g_XMMaskW);
- Scale = _mm_or_ps(Scale,vCosine);
- N = _mm_mul_ps(N,Scale);
- return N;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMQuaternionRotationAxis
- (
- FXMVECTOR Axis,
- FLOAT Angle
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- XMVECTOR Normal;
- XMVECTOR Q;
- XMASSERT(!XMVector3Equal(Axis, XMVectorZero()));
- XMASSERT(!XMVector3IsInfinite(Axis));
- Normal = XMVector3Normalize(Axis);
- Q = XMQuaternionRotationNormal(Normal, Angle);
- return Q;
- #elif defined(_XM_SSE_INTRINSICS_)
- XMVECTOR Normal;
- XMVECTOR Q;
- XMASSERT(!XMVector3Equal(Axis, XMVectorZero()));
- XMASSERT(!XMVector3IsInfinite(Axis));
- Normal = XMVector3Normalize(Axis);
- Q = XMQuaternionRotationNormal(Normal, Angle);
- return Q;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMINLINE XMVECTOR XMQuaternionRotationMatrix
- (
- CXMMATRIX M
- )
- {
- #if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_)
- XMVECTOR Q0, Q1, Q2;
- XMVECTOR M00, M11, M22;
- XMVECTOR CQ0, CQ1, C;
- XMVECTOR CX, CY, CZ, CW;
- XMVECTOR SQ1, Scale;
- XMVECTOR Rsq, Sqrt, VEqualsNaN;
- XMVECTOR A, B, P;
- XMVECTOR PermuteSplat, PermuteSplatT;
- XMVECTOR SignB, SignBT;
- XMVECTOR PermuteControl, PermuteControlT;
- XMVECTOR Result;
- static CONST XMVECTORF32 OneQuarter = {0.25f, 0.25f, 0.25f, 0.25f};
- static CONST XMVECTORF32 SignPNNP = {1.0f, -1.0f, -1.0f, 1.0f};
- static CONST XMVECTORF32 SignNPNP = {-1.0f, 1.0f, -1.0f, 1.0f};
- static CONST XMVECTORF32 SignNNPP = {-1.0f, -1.0f, 1.0f, 1.0f};
- static CONST XMVECTORF32 SignPNPP = {1.0f, -1.0f, 1.0f, 1.0f};
- static CONST XMVECTORF32 SignPPNP = {1.0f, 1.0f, -1.0f, 1.0f};
- static CONST XMVECTORF32 SignNPPP = {-1.0f, 1.0f, 1.0f, 1.0f};
- static CONST XMVECTORU32 Permute0X0X0Y0W = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_0W};
- static CONST XMVECTORU32 Permute0Y0Z0Z1W = {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_1W};
- static CONST XMVECTORU32 SplatX = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
- static CONST XMVECTORU32 SplatY = {XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
- static CONST XMVECTORU32 SplatZ = {XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z};
- static CONST XMVECTORU32 SplatW = {XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W};
- static CONST XMVECTORU32 PermuteC = {XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Y};
- static CONST XMVECTORU32 PermuteA = {XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0W};
- static CONST XMVECTORU32 PermuteB = {XM_PERMUTE_1X, XM_PERMUTE_1W, XM_PERMUTE_0Z, XM_PERMUTE_0W};
- static CONST XMVECTORU32 Permute0 = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Z, XM_PERMUTE_1Y};
- static CONST XMVECTORU32 Permute1 = {XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z};
- static CONST XMVECTORU32 Permute2 = {XM_PERMUTE_1Z, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_1X};
- static CONST XMVECTORU32 Permute3 = {XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_1X, XM_PERMUTE_0W};
- M00 = XMVectorSplatX(M.r[0]);
- M11 = XMVectorSplatY(M.r[1]);
- M22 = XMVectorSplatZ(M.r[2]);
- Q0 = XMVectorMultiply(SignPNNP.v, M00);
- Q0 = XMVectorMultiplyAdd(SignNPNP.v, M11, Q0);
- Q0 = XMVectorMultiplyAdd(SignNNPP.v, M22, Q0);
- Q1 = XMVectorAdd(Q0, g_XMOne.v);
- Rsq = XMVectorReciprocalSqrt(Q1);
- VEqualsNaN = XMVectorIsNaN(Rsq);
- Sqrt = XMVectorMultiply(Q1, Rsq);
- Q1 = XMVectorSelect(Sqrt, Q1, VEqualsNaN);
- Q1 = XMVectorMultiply(Q1, g_XMOneHalf.v);
- SQ1 = XMVectorMultiply(Rsq, g_XMOneHalf.v);
- CQ0 = XMVectorPermute(Q0, Q0, Permute0X0X0Y0W.v);
- CQ1 = XMVectorPermute(Q0, g_XMEpsilon.v, Permute0Y0Z0Z1W.v);
- C = XMVectorGreaterOrEqual(CQ0, CQ1);
- CX = XMVectorSplatX(C);
- CY = XMVectorSplatY(C);
- CZ = XMVectorSplatZ(C);
- CW = XMVectorSplatW(C);
- PermuteSplat = XMVectorSelect(SplatZ.v, SplatY.v, CZ);
- SignB = XMVectorSelect(SignNPPP.v, SignPPNP.v, CZ);
- PermuteControl = XMVectorSelect(Permute2.v, Permute1.v, CZ);
- PermuteSplat = XMVectorSelect(PermuteSplat, SplatZ.v, CX);
- SignB = XMVectorSelect(SignB, SignNPPP.v, CX);
- PermuteControl = XMVectorSelect(PermuteControl, Permute2.v, CX);
- PermuteSplatT = XMVectorSelect(PermuteSplat,SplatX.v, CY);
- SignBT = XMVectorSelect(SignB, SignPNPP.v, CY);
- PermuteControlT = XMVectorSelect(PermuteControl,Permute0.v, CY);
- PermuteSplat = XMVectorSelect(PermuteSplat, PermuteSplatT, CX);
- SignB = XMVectorSelect(SignB, SignBT, CX);
- PermuteControl = XMVectorSelect(PermuteControl, PermuteControlT, CX);
- PermuteSplat = XMVectorSelect(PermuteSplat,SplatW.v, CW);
- SignB = XMVectorSelect(SignB, g_XMNegativeOne.v, CW);
- PermuteControl = XMVectorSelect(PermuteControl,Permute3.v, CW);
- Scale = XMVectorPermute(SQ1, SQ1, PermuteSplat);
- P = XMVectorPermute(M.r[1], M.r[2],PermuteC.v); // {M10, M12, M20, M21}
- A = XMVectorPermute(M.r[0], P, PermuteA.v); // {M01, M12, M20, M03}
- B = XMVectorPermute(M.r[0], P, PermuteB.v); // {M10, M21, M02, M03}
- Q2 = XMVectorMultiplyAdd(SignB, B, A);
- Q2 = XMVectorMultiply(Q2, Scale);
- Result = XMVectorPermute(Q1, Q2, PermuteControl);
- return Result;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- // Conversion operations
- //------------------------------------------------------------------------------
- //------------------------------------------------------------------------------
- XMFINLINE VOID XMQuaternionToAxisAngle
- (
- XMVECTOR* pAxis,
- FLOAT* pAngle,
- FXMVECTOR Q
- )
- {
- XMASSERT(pAxis);
- XMASSERT(pAngle);
- *pAxis = Q;
- #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
- *pAngle = 2.0f * acosf(XMVectorGetW(Q));
- #else
- *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q));
- #endif
- }
- /****************************************************************************
- *
- * Plane
- *
- ****************************************************************************/
- //------------------------------------------------------------------------------
- // Comparison operations
- //------------------------------------------------------------------------------
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMPlaneEqual
- (
- FXMVECTOR P1,
- FXMVECTOR P2
- )
- {
- return XMVector4Equal(P1, P2);
- }
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMPlaneNearEqual
- (
- FXMVECTOR P1,
- FXMVECTOR P2,
- FXMVECTOR Epsilon
- )
- {
- XMVECTOR NP1 = XMPlaneNormalize(P1);
- XMVECTOR NP2 = XMPlaneNormalize(P2);
- return XMVector4NearEqual(NP1, NP2, Epsilon);
- }
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMPlaneNotEqual
- (
- FXMVECTOR P1,
- FXMVECTOR P2
- )
- {
- return XMVector4NotEqual(P1, P2);
- }
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMPlaneIsNaN
- (
- FXMVECTOR P
- )
- {
- return XMVector4IsNaN(P);
- }
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMPlaneIsInfinite
- (
- FXMVECTOR P
- )
- {
- return XMVector4IsInfinite(P);
- }
- //------------------------------------------------------------------------------
- // Computation operations
- //------------------------------------------------------------------------------
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMPlaneDot
- (
- FXMVECTOR P,
- FXMVECTOR V
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- return XMVector4Dot(P, V);
- #elif defined(_XM_SSE_INTRINSICS_)
- __m128 vTemp2 = V;
- __m128 vTemp = _mm_mul_ps(P,vTemp2);
- vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
- vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W;
- vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
- vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together
- return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMPlaneDotCoord
- (
- FXMVECTOR P,
- FXMVECTOR V
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- XMVECTOR V3;
- XMVECTOR Result;
- // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3]
- V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v);
- Result = XMVector4Dot(P, V3);
- return Result;
- #elif defined(_XM_SSE_INTRINSICS_)
- XMVECTOR vTemp2 = _mm_and_ps(V,g_XMMask3);
- vTemp2 = _mm_or_ps(vTemp2,g_XMIdentityR3);
- XMVECTOR vTemp = _mm_mul_ps(P,vTemp2);
- vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
- vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W;
- vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
- vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together
- return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMPlaneDotNormal
- (
- FXMVECTOR P,
- FXMVECTOR V
- )
- {
- return XMVector3Dot(P, V);
- }
- //------------------------------------------------------------------------------
- // XMPlaneNormalizeEst uses a reciprocal estimate and
- // returns QNaN on zero and infinite vectors.
- XMFINLINE XMVECTOR XMPlaneNormalizeEst
- (
- FXMVECTOR P
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- XMVECTOR Result;
- Result = XMVector3ReciprocalLength(P);
- Result = XMVectorMultiply(P, Result);
- return Result;
- #elif defined(_XM_SSE_INTRINSICS_)
- // Perform the dot product
- XMVECTOR vDot = _mm_mul_ps(P,P);
- // x=Dot.y, y=Dot.z
- XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
- // Result.x = x+y
- vDot = _mm_add_ss(vDot,vTemp);
- // x=Dot.z
- vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
- // Result.x = (x+y)+z
- vDot = _mm_add_ss(vDot,vTemp);
- // Splat x
- vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
- // Get the reciprocal
- vDot = _mm_rsqrt_ps(vDot);
- // Get the reciprocal
- vDot = _mm_mul_ps(vDot,P);
- return vDot;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMPlaneNormalize
- (
- FXMVECTOR P
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- FLOAT fLengthSq = sqrtf((P.vector4_f32[0]*P.vector4_f32[0])+(P.vector4_f32[1]*P.vector4_f32[1])+(P.vector4_f32[2]*P.vector4_f32[2]));
- // Prevent divide by zero
- if (fLengthSq) {
- fLengthSq = 1.0f/fLengthSq;
- }
- {
- XMVECTOR vResult = {
- P.vector4_f32[0]*fLengthSq,
- P.vector4_f32[1]*fLengthSq,
- P.vector4_f32[2]*fLengthSq,
- P.vector4_f32[3]*fLengthSq
- };
- return vResult;
- }
- #elif defined(_XM_SSE_INTRINSICS_)
- // Perform the dot product on x,y and z only
- XMVECTOR vLengthSq = _mm_mul_ps(P,P);
- XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1));
- vLengthSq = _mm_add_ss(vLengthSq,vTemp);
- vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
- vLengthSq = _mm_add_ss(vLengthSq,vTemp);
- vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
- // Prepare for the division
- XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
- // Failsafe on zero (Or epsilon) length planes
- // If the length is infinity, set the elements to zero
- vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
- // Reciprocal mul to perform the normalization
- vResult = _mm_div_ps(P,vResult);
- // Any that are infinity, set to zero
- vResult = _mm_and_ps(vResult,vLengthSq);
- return vResult;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMPlaneIntersectLine
- (
- FXMVECTOR P,
- FXMVECTOR LinePoint1,
- FXMVECTOR LinePoint2
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- XMVECTOR V1;
- XMVECTOR V2;
- XMVECTOR D;
- XMVECTOR ReciprocalD;
- XMVECTOR VT;
- XMVECTOR Point;
- XMVECTOR Zero;
- XMVECTOR Control;
- XMVECTOR Result;
- V1 = XMVector3Dot(P, LinePoint1);
- V2 = XMVector3Dot(P, LinePoint2);
- D = XMVectorSubtract(V1, V2);
- ReciprocalD = XMVectorReciprocal(D);
- VT = XMPlaneDotCoord(P, LinePoint1);
- VT = XMVectorMultiply(VT, ReciprocalD);
- Point = XMVectorSubtract(LinePoint2, LinePoint1);
- Point = XMVectorMultiplyAdd(Point, VT, LinePoint1);
- Zero = XMVectorZero();
- Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v);
- Result = XMVectorSelect(Point, g_XMQNaN.v, Control);
- return Result;
- #elif defined(_XM_SSE_INTRINSICS_)
- XMVECTOR V1;
- XMVECTOR V2;
- XMVECTOR D;
- XMVECTOR VT;
- XMVECTOR Point;
- XMVECTOR Zero;
- XMVECTOR Control;
- XMVECTOR Result;
- V1 = XMVector3Dot(P, LinePoint1);
- V2 = XMVector3Dot(P, LinePoint2);
- D = _mm_sub_ps(V1, V2);
- VT = XMPlaneDotCoord(P, LinePoint1);
- VT = _mm_div_ps(VT, D);
- Point = _mm_sub_ps(LinePoint2, LinePoint1);
- Point = _mm_mul_ps(Point,VT);
- Point = _mm_add_ps(Point,LinePoint1);
- Zero = XMVectorZero();
- Control = XMVectorNearEqual(D, Zero, g_XMEpsilon);
- Result = XMVectorSelect(Point, g_XMQNaN, Control);
- return Result;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMINLINE VOID XMPlaneIntersectPlane
- (
- XMVECTOR* pLinePoint1,
- XMVECTOR* pLinePoint2,
- FXMVECTOR P1,
- FXMVECTOR P2
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- XMVECTOR V1;
- XMVECTOR V2;
- XMVECTOR V3;
- XMVECTOR LengthSq;
- XMVECTOR RcpLengthSq;
- XMVECTOR Point;
- XMVECTOR P1W;
- XMVECTOR P2W;
- XMVECTOR Control;
- XMVECTOR LinePoint1;
- XMVECTOR LinePoint2;
- XMASSERT(pLinePoint1);
- XMASSERT(pLinePoint2);
- V1 = XMVector3Cross(P2, P1);
- LengthSq = XMVector3LengthSq(V1);
- V2 = XMVector3Cross(P2, V1);
- P1W = XMVectorSplatW(P1);
- Point = XMVectorMultiply(V2, P1W);
- V3 = XMVector3Cross(V1, P1);
- P2W = XMVectorSplatW(P2);
- Point = XMVectorMultiplyAdd(V3, P2W, Point);
- RcpLengthSq = XMVectorReciprocal(LengthSq);
- LinePoint1 = XMVectorMultiply(Point, RcpLengthSq);
- LinePoint2 = XMVectorAdd(LinePoint1, V1);
- Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v);
- *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN.v, Control);
- *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN.v, Control);
- #elif defined(_XM_SSE_INTRINSICS_)
- XMASSERT(pLinePoint1);
- XMASSERT(pLinePoint2);
- XMVECTOR V1;
- XMVECTOR V2;
- XMVECTOR V3;
- XMVECTOR LengthSq;
- XMVECTOR Point;
- XMVECTOR P1W;
- XMVECTOR P2W;
- XMVECTOR Control;
- XMVECTOR LinePoint1;
- XMVECTOR LinePoint2;
- V1 = XMVector3Cross(P2, P1);
- LengthSq = XMVector3LengthSq(V1);
- V2 = XMVector3Cross(P2, V1);
- P1W = _mm_shuffle_ps(P1,P1,_MM_SHUFFLE(3,3,3,3));
- Point = _mm_mul_ps(V2, P1W);
- V3 = XMVector3Cross(V1, P1);
- P2W = _mm_shuffle_ps(P2,P2,_MM_SHUFFLE(3,3,3,3));
- V3 = _mm_mul_ps(V3,P2W);
- Point = _mm_add_ps(Point,V3);
- LinePoint1 = _mm_div_ps(Point,LengthSq);
- LinePoint2 = _mm_add_ps(LinePoint1, V1);
- Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon);
- *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN, Control);
- *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN, Control);
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMPlaneTransform
- (
- FXMVECTOR P,
- CXMMATRIX M
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- XMVECTOR X;
- XMVECTOR Y;
- XMVECTOR Z;
- XMVECTOR W;
- XMVECTOR Result;
- W = XMVectorSplatW(P);
- Z = XMVectorSplatZ(P);
- Y = XMVectorSplatY(P);
- X = XMVectorSplatX(P);
- Result = XMVectorMultiply(W, M.r[3]);
- Result = XMVectorMultiplyAdd(Z, M.r[2], Result);
- Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
- Result = XMVectorMultiplyAdd(X, M.r[0], Result);
- return Result;
- #elif defined(_XM_SSE_INTRINSICS_)
- XMVECTOR X = _mm_shuffle_ps(P,P,_MM_SHUFFLE(0,0,0,0));
- XMVECTOR Y = _mm_shuffle_ps(P,P,_MM_SHUFFLE(1,1,1,1));
- XMVECTOR Z = _mm_shuffle_ps(P,P,_MM_SHUFFLE(2,2,2,2));
- XMVECTOR W = _mm_shuffle_ps(P,P,_MM_SHUFFLE(3,3,3,3));
- X = _mm_mul_ps(X, M.r[0]);
- Y = _mm_mul_ps(Y, M.r[1]);
- Z = _mm_mul_ps(Z, M.r[2]);
- W = _mm_mul_ps(W, M.r[3]);
- X = _mm_add_ps(X,Z);
- Y = _mm_add_ps(Y,W);
- X = _mm_add_ps(X,Y);
- return X;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMFLOAT4* XMPlaneTransformStream
- (
- XMFLOAT4* pOutputStream,
- UINT OutputStride,
- CONST XMFLOAT4* pInputStream,
- UINT InputStride,
- UINT PlaneCount,
- CXMMATRIX M
- )
- {
- return XMVector4TransformStream(pOutputStream,
- OutputStride,
- pInputStream,
- InputStride,
- PlaneCount,
- M);
- }
- //------------------------------------------------------------------------------
- // Conversion operations
- //------------------------------------------------------------------------------
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMPlaneFromPointNormal
- (
- FXMVECTOR Point,
- FXMVECTOR Normal
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- XMVECTOR W;
- XMVECTOR Result;
- W = XMVector3Dot(Point, Normal);
- W = XMVectorNegate(W);
- Result = XMVectorSelect(W, Normal, g_XMSelect1110.v);
- return Result;
- #elif defined(_XM_SSE_INTRINSICS_)
- XMVECTOR W;
- XMVECTOR Result;
- W = XMVector3Dot(Point,Normal);
- W = _mm_mul_ps(W,g_XMNegativeOne);
- Result = _mm_and_ps(Normal,g_XMMask3);
- W = _mm_and_ps(W,g_XMMaskW);
- Result = _mm_or_ps(Result,W);
- return Result;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMPlaneFromPoints
- (
- FXMVECTOR Point1,
- FXMVECTOR Point2,
- FXMVECTOR Point3
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- XMVECTOR N;
- XMVECTOR D;
- XMVECTOR V21;
- XMVECTOR V31;
- XMVECTOR Result;
- V21 = XMVectorSubtract(Point1, Point2);
- V31 = XMVectorSubtract(Point1, Point3);
- N = XMVector3Cross(V21, V31);
- N = XMVector3Normalize(N);
- D = XMPlaneDotNormal(N, Point1);
- D = XMVectorNegate(D);
- Result = XMVectorSelect(D, N, g_XMSelect1110.v);
- return Result;
- #elif defined(_XM_SSE_INTRINSICS_)
- XMVECTOR N;
- XMVECTOR D;
- XMVECTOR V21;
- XMVECTOR V31;
- XMVECTOR Result;
- V21 = _mm_sub_ps(Point1, Point2);
- V31 = _mm_sub_ps(Point1, Point3);
- N = XMVector3Cross(V21, V31);
- N = XMVector3Normalize(N);
- D = XMPlaneDotNormal(N, Point1);
- D = _mm_mul_ps(D,g_XMNegativeOne);
- N = _mm_and_ps(N,g_XMMask3);
- D = _mm_and_ps(D,g_XMMaskW);
- Result = _mm_or_ps(D,N);
- return Result;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- /****************************************************************************
- *
- * Color
- *
- ****************************************************************************/
- //------------------------------------------------------------------------------
- // Comparison operations
- //------------------------------------------------------------------------------
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMColorEqual
- (
- FXMVECTOR C1,
- FXMVECTOR C2
- )
- {
- return XMVector4Equal(C1, C2);
- }
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMColorNotEqual
- (
- FXMVECTOR C1,
- FXMVECTOR C2
- )
- {
- return XMVector4NotEqual(C1, C2);
- }
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMColorGreater
- (
- FXMVECTOR C1,
- FXMVECTOR C2
- )
- {
- return XMVector4Greater(C1, C2);
- }
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMColorGreaterOrEqual
- (
- FXMVECTOR C1,
- FXMVECTOR C2
- )
- {
- return XMVector4GreaterOrEqual(C1, C2);
- }
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMColorLess
- (
- FXMVECTOR C1,
- FXMVECTOR C2
- )
- {
- return XMVector4Less(C1, C2);
- }
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMColorLessOrEqual
- (
- FXMVECTOR C1,
- FXMVECTOR C2
- )
- {
- return XMVector4LessOrEqual(C1, C2);
- }
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMColorIsNaN
- (
- FXMVECTOR C
- )
- {
- return XMVector4IsNaN(C);
- }
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMColorIsInfinite
- (
- FXMVECTOR C
- )
- {
- return XMVector4IsInfinite(C);
- }
- //------------------------------------------------------------------------------
- // Computation operations
- //------------------------------------------------------------------------------
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMColorNegative
- (
- FXMVECTOR vColor
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- // XMASSERT(XMVector4GreaterOrEqual(C, XMVectorReplicate(0.0f)));
- // XMASSERT(XMVector4LessOrEqual(C, XMVectorReplicate(1.0f)));
- XMVECTOR vResult = {
- 1.0f - vColor.vector4_f32[0],
- 1.0f - vColor.vector4_f32[1],
- 1.0f - vColor.vector4_f32[2],
- vColor.vector4_f32[3]
- };
- return vResult;
- #elif defined(_XM_SSE_INTRINSICS_)
- // Negate only x,y and z.
- XMVECTOR vTemp = _mm_xor_ps(vColor,g_XMNegate3);
- // Add 1,1,1,0 to -x,-y,-z,w
- return _mm_add_ps(vTemp,g_XMOne3);
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMColorModulate
- (
- FXMVECTOR C1,
- FXMVECTOR C2
- )
- {
- return XMVectorMultiply(C1, C2);
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMColorAdjustSaturation
- (
- FXMVECTOR vColor,
- FLOAT fSaturation
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- CONST XMVECTOR gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
- // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2];
- // Result = (C - Luminance) * Saturation + Luminance;
- FLOAT fLuminance = (vColor.vector4_f32[0]*gvLuminance.vector4_f32[0])+(vColor.vector4_f32[1]*gvLuminance.vector4_f32[1])+(vColor.vector4_f32[2]*gvLuminance.vector4_f32[2]);
- XMVECTOR vResult = {
- ((vColor.vector4_f32[0] - fLuminance)*fSaturation)+fLuminance,
- ((vColor.vector4_f32[1] - fLuminance)*fSaturation)+fLuminance,
- ((vColor.vector4_f32[2] - fLuminance)*fSaturation)+fLuminance,
- vColor.vector4_f32[3]};
- return vResult;
- #elif defined(_XM_SSE_INTRINSICS_)
- static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
- // Mul RGB by intensity constants
- XMVECTOR vLuminance = _mm_mul_ps(vColor,gvLuminance);
- // vResult.x = vLuminance.y, vResult.y = vLuminance.y,
- // vResult.z = vLuminance.z, vResult.w = vLuminance.z
- XMVECTOR vResult = vLuminance;
- vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,1,1));
- // vLuminance.x += vLuminance.y
- vLuminance = _mm_add_ss(vLuminance,vResult);
- // Splat vLuminance.z
- vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,2,2));
- // vLuminance.x += vLuminance.z (Dot product)
- vLuminance = _mm_add_ss(vLuminance,vResult);
- // Splat vLuminance
- vLuminance = _mm_shuffle_ps(vLuminance,vLuminance,_MM_SHUFFLE(0,0,0,0));
- // Splat fSaturation
- XMVECTOR vSaturation = _mm_set_ps1(fSaturation);
- // vResult = ((vColor-vLuminance)*vSaturation)+vLuminance;
- vResult = _mm_sub_ps(vColor,vLuminance);
- vResult = _mm_mul_ps(vResult,vSaturation);
- vResult = _mm_add_ps(vResult,vLuminance);
- // Retain w from the source color
- vLuminance = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
- vResult = _mm_shuffle_ps(vResult,vLuminance,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
- return vResult;
- #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMColorAdjustContrast
- (
- FXMVECTOR vColor,
- FLOAT fContrast
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- // Result = (vColor - 0.5f) * fContrast + 0.5f;
- XMVECTOR vResult = {
- ((vColor.vector4_f32[0]-0.5f) * fContrast) + 0.5f,
- ((vColor.vector4_f32[1]-0.5f) * fContrast) + 0.5f,
- ((vColor.vector4_f32[2]-0.5f) * fContrast) + 0.5f,
- vColor.vector4_f32[3] // Leave W untouched
- };
- return vResult;
- #elif defined(_XM_SSE_INTRINSICS_)
- XMVECTOR vScale = _mm_set_ps1(fContrast); // Splat the scale
- XMVECTOR vResult = _mm_sub_ps(vColor,g_XMOneHalf); // Subtract 0.5f from the source (Saving source)
- vResult = _mm_mul_ps(vResult,vScale); // Mul by scale
- vResult = _mm_add_ps(vResult,g_XMOneHalf); // Add 0.5f
- // Retain w from the source color
- vScale = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
- vResult = _mm_shuffle_ps(vResult,vScale,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
- return vResult;
- #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
- #endif // _XM_VMX128_INTRINSICS_
- }
- /****************************************************************************
- *
- * Miscellaneous
- *
- ****************************************************************************/
- //------------------------------------------------------------------------------
- XMINLINE BOOL XMVerifyCPUSupport()
- {
- #if defined(_XM_NO_INTRINSICS_) || !defined(_XM_SSE_INTRINSICS_)
- return TRUE;
- #else // _XM_SSE_INTRINSICS_
- // Note that on Windows 2000 or older, SSE2 detection is not supported so this will always fail
- // Detecting SSE2 on older versions of Windows would require using cpuid directly
- return ( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) && IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) );
- #endif
- }
- //------------------------------------------------------------------------------
- #define XMASSERT_LINE_STRING_SIZE 16
- XMINLINE VOID XMAssert
- (
- CONST CHAR* pExpression,
- CONST CHAR* pFileName,
- UINT LineNumber
- )
- {
- CHAR aLineString[XMASSERT_LINE_STRING_SIZE];
- CHAR* pLineString;
- UINT Line;
- aLineString[XMASSERT_LINE_STRING_SIZE - 2] = '0';
- aLineString[XMASSERT_LINE_STRING_SIZE - 1] = '\0';
- for (Line = LineNumber, pLineString = aLineString + XMASSERT_LINE_STRING_SIZE - 2;
- Line != 0 && pLineString >= aLineString;
- Line /= 10, pLineString--)
- {
- *pLineString = (CHAR)('0' + (Line % 10));
- }
- #ifndef NO_OUTPUT_DEBUG_STRING
- OutputDebugStringA("Assertion failed: ");
- OutputDebugStringA(pExpression);
- OutputDebugStringA(", file ");
- OutputDebugStringA(pFileName);
- OutputDebugStringA(", line ");
- OutputDebugStringA(pLineString + 1);
- OutputDebugStringA("\r\n");
- #else
- DbgPrint("Assertion failed: %s, file %s, line %d\r\n", pExpression, pFileName, LineNumber);
- #endif
- __debugbreak();
- }
- //------------------------------------------------------------------------------
- XMFINLINE XMVECTOR XMFresnelTerm
- (
- FXMVECTOR CosIncidentAngle,
- FXMVECTOR RefractionIndex
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- XMVECTOR G;
- XMVECTOR D, S;
- XMVECTOR V0, V1, V2, V3;
- XMVECTOR Result;
- // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
- // c = CosIncidentAngle
- // g = sqrt(c^2 + RefractionIndex^2 - 1)
- XMASSERT(!XMVector4IsInfinite(CosIncidentAngle));
- G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v);
- G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G);
- G = XMVectorAbs(G);
- G = XMVectorSqrt(G);
- S = XMVectorAdd(G, CosIncidentAngle);
- D = XMVectorSubtract(G, CosIncidentAngle);
- V0 = XMVectorMultiply(D, D);
- V1 = XMVectorMultiply(S, S);
- V1 = XMVectorReciprocal(V1);
- V0 = XMVectorMultiply(g_XMOneHalf.v, V0);
- V0 = XMVectorMultiply(V0, V1);
- V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v);
- V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v);
- V2 = XMVectorMultiply(V2, V2);
- V3 = XMVectorMultiply(V3, V3);
- V3 = XMVectorReciprocal(V3);
- V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v);
- Result = XMVectorMultiply(V0, V2);
- Result = XMVectorSaturate(Result);
- return Result;
- #elif defined(_XM_SSE_INTRINSICS_)
- // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
- // c = CosIncidentAngle
- // g = sqrt(c^2 + RefractionIndex^2 - 1)
- XMASSERT(!XMVector4IsInfinite(CosIncidentAngle));
- // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2))
- XMVECTOR G = _mm_mul_ps(RefractionIndex,RefractionIndex);
- XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle,CosIncidentAngle);
- G = _mm_sub_ps(G,g_XMOne);
- vTemp = _mm_add_ps(vTemp,G);
- // max((0-vTemp),vTemp) == abs(vTemp)
- // The abs is needed to deal with refraction and cosine being zero
- G = _mm_setzero_ps();
- G = _mm_sub_ps(G,vTemp);
- G = _mm_max_ps(G,vTemp);
- // Last operation, the sqrt()
- G = _mm_sqrt_ps(G);
- // Calc G-C and G+C
- XMVECTOR GAddC = _mm_add_ps(G,CosIncidentAngle);
- XMVECTOR GSubC = _mm_sub_ps(G,CosIncidentAngle);
- // Perform the term (0.5f *(g - c)^2) / (g + c)^2
- XMVECTOR vResult = _mm_mul_ps(GSubC,GSubC);
- vTemp = _mm_mul_ps(GAddC,GAddC);
- vResult = _mm_mul_ps(vResult,g_XMOneHalf);
- vResult = _mm_div_ps(vResult,vTemp);
- // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1)
- GAddC = _mm_mul_ps(GAddC,CosIncidentAngle);
- GSubC = _mm_mul_ps(GSubC,CosIncidentAngle);
- GAddC = _mm_sub_ps(GAddC,g_XMOne);
- GSubC = _mm_add_ps(GSubC,g_XMOne);
- GAddC = _mm_mul_ps(GAddC,GAddC);
- GSubC = _mm_mul_ps(GSubC,GSubC);
- GAddC = _mm_div_ps(GAddC,GSubC);
- GAddC = _mm_add_ps(GAddC,g_XMOne);
- // Multiply the two term parts
- vResult = _mm_mul_ps(vResult,GAddC);
- // Clamp to 0.0 - 1.0f
- vResult = _mm_max_ps(vResult,g_XMZero);
- vResult = _mm_min_ps(vResult,g_XMOne);
- return vResult;
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE BOOL XMScalarNearEqual
- (
- FLOAT S1,
- FLOAT S2,
- FLOAT Epsilon
- )
- {
- FLOAT Delta = S1 - S2;
- #if defined(_XM_NO_INTRINSICS_)
- UINT AbsDelta = *(UINT*)&Delta & 0x7FFFFFFF;
- return (*(FLOAT*)&AbsDelta <= Epsilon);
- #elif defined(_XM_SSE_INTRINSICS_)
- return (fabsf(Delta) <= Epsilon);
- #else
- return (__fabs(Delta) <= Epsilon);
- #endif
- }
- //------------------------------------------------------------------------------
- // Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI
- XMFINLINE FLOAT XMScalarModAngle
- (
- FLOAT Angle
- )
- {
- // Note: The modulo is performed with unsigned math only to work
- // around a precision error on numbers that are close to PI
- float fTemp;
- #if defined(_XM_NO_INTRINSICS_) || !defined(_XM_VMX128_INTRINSICS_)
- // Normalize the range from 0.0f to XM_2PI
- Angle = Angle + XM_PI;
- // Perform the modulo, unsigned
- fTemp = fabsf(Angle);
- fTemp = fTemp - (XM_2PI * (FLOAT)((INT)(fTemp/XM_2PI)));
- // Restore the number to the range of -XM_PI to XM_PI-epsilon
- fTemp = fTemp - XM_PI;
- // If the modulo'd value was negative, restore negation
- if (Angle<0.0f) {
- fTemp = -fTemp;
- }
- return fTemp;
- #else
- #endif
- }
- //------------------------------------------------------------------------------
- XMINLINE FLOAT XMScalarSin
- (
- FLOAT Value
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- FLOAT ValueMod;
- FLOAT ValueSq;
- XMVECTOR V0123, V0246, V1357, V9111315, V17192123;
- XMVECTOR V1, V7, V8;
- XMVECTOR R0, R1, R2;
- ValueMod = XMScalarModAngle(Value);
- // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! +
- // V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
- ValueSq = ValueMod * ValueMod;
- V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
- V1 = XMVectorSplatY(V0123);
- V0246 = XMVectorMultiply(V0123, V0123);
- V1357 = XMVectorMultiply(V0246, V1);
- V7 = XMVectorSplatW(V1357);
- V8 = XMVectorMultiply(V7, V1);
- V9111315 = XMVectorMultiply(V1357, V8);
- V17192123 = XMVectorMultiply(V9111315, V8);
- R0 = XMVector4Dot(V1357, g_XMSinCoefficients0.v);
- R1 = XMVector4Dot(V9111315, g_XMSinCoefficients1.v);
- R2 = XMVector4Dot(V17192123, g_XMSinCoefficients2.v);
- return R0.vector4_f32[0] + R1.vector4_f32[0] + R2.vector4_f32[0];
- #elif defined(_XM_SSE_INTRINSICS_)
- return sinf( Value );
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMINLINE FLOAT XMScalarCos
- (
- FLOAT Value
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- FLOAT ValueMod;
- FLOAT ValueSq;
- XMVECTOR V0123, V0246, V8101214, V16182022;
- XMVECTOR V2, V6, V8;
- XMVECTOR R0, R1, R2;
- ValueMod = XMScalarModAngle(Value);
- // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! +
- // V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
- ValueSq = ValueMod * ValueMod;
- V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
- V0246 = XMVectorMultiply(V0123, V0123);
- V2 = XMVectorSplatZ(V0123);
- V6 = XMVectorSplatW(V0246);
- V8 = XMVectorMultiply(V6, V2);
- V8101214 = XMVectorMultiply(V0246, V8);
- V16182022 = XMVectorMultiply(V8101214, V8);
- R0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v);
- R1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v);
- R2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v);
- return R0.vector4_f32[0] + R1.vector4_f32[0] + R2.vector4_f32[0];
- #elif defined(_XM_SSE_INTRINSICS_)
- return cosf(Value);
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMINLINE VOID XMScalarSinCos
- (
- FLOAT* pSin,
- FLOAT* pCos,
- FLOAT Value
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- FLOAT ValueMod;
- FLOAT ValueSq;
- XMVECTOR V0123, V0246, V1357, V8101214, V9111315, V16182022, V17192123;
- XMVECTOR V1, V2, V6, V8;
- XMVECTOR S0, S1, S2, C0, C1, C2;
- XMASSERT(pSin);
- XMASSERT(pCos);
- ValueMod = XMScalarModAngle(Value);
- // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! +
- // V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
- // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! +
- // V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
- ValueSq = ValueMod * ValueMod;
- V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
- V1 = XMVectorSplatY(V0123);
- V2 = XMVectorSplatZ(V0123);
- V0246 = XMVectorMultiply(V0123, V0123);
- V1357 = XMVectorMultiply(V0246, V1);
- V6 = XMVectorSplatW(V0246);
- V8 = XMVectorMultiply(V6, V2);
- V8101214 = XMVectorMultiply(V0246, V8);
- V9111315 = XMVectorMultiply(V1357, V8);
- V16182022 = XMVectorMultiply(V8101214, V8);
- V17192123 = XMVectorMultiply(V9111315, V8);
- C0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v);
- S0 = XMVector4Dot(V1357, g_XMSinCoefficients0.v);
- C1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v);
- S1 = XMVector4Dot(V9111315, g_XMSinCoefficients1.v);
- C2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v);
- S2 = XMVector4Dot(V17192123, g_XMSinCoefficients2.v);
- *pCos = C0.vector4_f32[0] + C1.vector4_f32[0] + C2.vector4_f32[0];
- *pSin = S0.vector4_f32[0] + S1.vector4_f32[0] + S2.vector4_f32[0];
- #elif defined(_XM_SSE_INTRINSICS_)
- XMASSERT(pSin);
- XMASSERT(pCos);
- *pSin = sinf(Value);
- *pCos = cosf(Value);
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMINLINE FLOAT XMScalarASin
- (
- FLOAT Value
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- FLOAT AbsValue, Value2, Value3, D;
- XMVECTOR AbsV, R0, R1, Result;
- XMVECTOR V3;
- *(UINT*)&AbsValue = *(UINT*)&Value & 0x7FFFFFFF;
- Value2 = Value * AbsValue;
- Value3 = Value * Value2;
- D = (Value - Value2) / sqrtf(1.00000011921f - AbsValue);
- AbsV = XMVectorReplicate(AbsValue);
- V3.vector4_f32[0] = Value3;
- V3.vector4_f32[1] = 1.0f;
- V3.vector4_f32[2] = Value3;
- V3.vector4_f32[3] = 1.0f;
- R1 = XMVectorSet(D, D, Value, Value);
- R1 = XMVectorMultiply(R1, V3);
- R0 = XMVectorMultiplyAdd(AbsV, g_XMASinCoefficients0.v, g_XMASinCoefficients1.v);
- R0 = XMVectorMultiplyAdd(AbsV, R0, g_XMASinCoefficients2.v);
- Result = XMVector4Dot(R0, R1);
- return Result.vector4_f32[0];
- #elif defined(_XM_SSE_INTRINSICS_)
- return asinf(Value);
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMINLINE FLOAT XMScalarACos
- (
- FLOAT Value
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- return XM_PIDIV2 - XMScalarASin(Value);
- #elif defined(_XM_SSE_INTRINSICS_)
- return acosf(Value);
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE FLOAT XMScalarSinEst
- (
- FLOAT Value
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- FLOAT ValueSq;
- XMVECTOR V;
- XMVECTOR Y;
- XMVECTOR Result;
- XMASSERT(Value >= -XM_PI);
- XMASSERT(Value < XM_PI);
- // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
- ValueSq = Value * Value;
- V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value);
- Y = XMVectorSplatY(V);
- V = XMVectorMultiply(V, V);
- V = XMVectorMultiply(V, Y);
- Result = XMVector4Dot(V, g_XMSinEstCoefficients.v);
- return Result.vector4_f32[0];
- #elif defined(_XM_SSE_INTRINSICS_)
- XMASSERT(Value >= -XM_PI);
- XMASSERT(Value < XM_PI);
- float ValueSq = Value*Value;
- XMVECTOR vValue = _mm_set_ps1(Value);
- XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f);
- vTemp = _mm_mul_ps(vTemp,vTemp);
- vTemp = _mm_mul_ps(vTemp,vValue);
- // vTemp = Value,Value^3,Value^5,Value^7
- vTemp = _mm_mul_ps(vTemp,g_XMSinEstCoefficients);
- vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
- vValue = _mm_add_ps(vValue,vTemp); // Add Z = X+Z; W = Y+W;
- vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
- vTemp = _mm_add_ps(vTemp,vValue); // Add Z and W together
- vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
- #if defined(_MSC_VER) && (_MSC_VER>=1500)
- return _mm_cvtss_f32(vTemp);
- #else
- return vTemp.m128_f32[0];
- #endif
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE FLOAT XMScalarCosEst
- (
- FLOAT Value
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- FLOAT ValueSq;
- XMVECTOR V;
- XMVECTOR Result;
- XMASSERT(Value >= -XM_PI);
- XMASSERT(Value < XM_PI);
- // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
- ValueSq = Value * Value;
- V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value);
- V = XMVectorMultiply(V, V);
- Result = XMVector4Dot(V, g_XMCosEstCoefficients.v);
- return Result.vector4_f32[0];
- #elif defined(_XM_SSE_INTRINSICS_)
- XMASSERT(Value >= -XM_PI);
- XMASSERT(Value < XM_PI);
- float ValueSq = Value*Value;
- XMVECTOR vValue = _mm_setzero_ps();
- XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f);
- vTemp = _mm_mul_ps(vTemp,vTemp);
- // vTemp = 1.0f,Value^2,Value^4,Value^6
- vTemp = _mm_mul_ps(vTemp,g_XMCosEstCoefficients);
- vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
- vValue = _mm_add_ps(vValue,vTemp); // Add Z = X+Z; W = Y+W;
- vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
- vTemp = _mm_add_ps(vTemp,vValue); // Add Z and W together
- vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
- #if defined(_MSC_VER) && (_MSC_VER>=1500)
- return _mm_cvtss_f32(vTemp);
- #else
- return vTemp.m128_f32[0];
- #endif
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE VOID XMScalarSinCosEst
- (
- FLOAT* pSin,
- FLOAT* pCos,
- FLOAT Value
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- FLOAT ValueSq;
- XMVECTOR V, Sin, Cos;
- XMVECTOR Y;
- XMASSERT(pSin);
- XMASSERT(pCos);
- XMASSERT(Value >= -XM_PI);
- XMASSERT(Value < XM_PI);
- // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
- // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
- ValueSq = Value * Value;
- V = XMVectorSet(1.0f, Value, ValueSq, Value * ValueSq);
- Y = XMVectorSplatY(V);
- Cos = XMVectorMultiply(V, V);
- Sin = XMVectorMultiply(Cos, Y);
- Cos = XMVector4Dot(Cos, g_XMCosEstCoefficients.v);
- Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients.v);
- *pCos = Cos.vector4_f32[0];
- *pSin = Sin.vector4_f32[0];
- #elif defined(_XM_SSE_INTRINSICS_)
- XMASSERT(pSin);
- XMASSERT(pCos);
- XMASSERT(Value >= -XM_PI);
- XMASSERT(Value < XM_PI);
- float ValueSq = Value * Value;
- XMVECTOR Cos = _mm_set_ps(Value * ValueSq,ValueSq,Value,1.0f);
- XMVECTOR Sin = _mm_set_ps1(Value);
- Cos = _mm_mul_ps(Cos,Cos);
- Sin = _mm_mul_ps(Sin,Cos);
- // Cos = 1.0f,Value^2,Value^4,Value^6
- Cos = XMVector4Dot(Cos,g_XMCosEstCoefficients);
- _mm_store_ss(pCos,Cos);
- // Sin = Value,Value^3,Value^5,Value^7
- Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients);
- _mm_store_ss(pSin,Sin);
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE FLOAT XMScalarASinEst
- (
- FLOAT Value
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- XMVECTOR VR, CR, CS;
- XMVECTOR Result;
- FLOAT AbsV, V2, D;
- CONST FLOAT OnePlusEps = 1.00000011921f;
- *(UINT*)&AbsV = *(UINT*)&Value & 0x7FFFFFFF;
- V2 = Value * AbsV;
- D = OnePlusEps - AbsV;
- CS = XMVectorSet(Value, 1.0f, 1.0f, V2);
- VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV);
- CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v);
- Result = XMVector4Dot(VR, CR);
- return Result.vector4_f32[0];
- #elif defined(_XM_SSE_INTRINSICS_)
- CONST FLOAT OnePlusEps = 1.00000011921f;
- FLOAT AbsV = fabsf(Value);
- FLOAT V2 = Value * AbsV; // Square with sign retained
- FLOAT D = OnePlusEps - AbsV;
- XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value);
- XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D));
- Result = _mm_mul_ps(Result, g_XMASinEstCoefficients);
- Result = XMVector4Dot(VR,Result);
- #if defined(_MSC_VER) && (_MSC_VER>=1500)
- return _mm_cvtss_f32(Result);
- #else
- return Result.m128_f32[0];
- #endif
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- //------------------------------------------------------------------------------
- XMFINLINE FLOAT XMScalarACosEst
- (
- FLOAT Value
- )
- {
- #if defined(_XM_NO_INTRINSICS_)
- XMVECTOR VR, CR, CS;
- XMVECTOR Result;
- FLOAT AbsV, V2, D;
- CONST FLOAT OnePlusEps = 1.00000011921f;
- // return XM_PIDIV2 - XMScalarASin(Value);
- *(UINT*)&AbsV = *(UINT*)&Value & 0x7FFFFFFF;
- V2 = Value * AbsV;
- D = OnePlusEps - AbsV;
- CS = XMVectorSet(Value, 1.0f, 1.0f, V2);
- VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV);
- CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v);
- Result = XMVector4Dot(VR, CR);
- return XM_PIDIV2 - Result.vector4_f32[0];
- #elif defined(_XM_SSE_INTRINSICS_)
- CONST FLOAT OnePlusEps = 1.00000011921f;
- FLOAT AbsV = fabsf(Value);
- FLOAT V2 = Value * AbsV; // Value^2 retaining sign
- FLOAT D = OnePlusEps - AbsV;
- XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value);
- XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D));
- Result = _mm_mul_ps(Result,g_XMASinEstCoefficients);
- Result = XMVector4Dot(VR,Result);
- #if defined(_MSC_VER) && (_MSC_VER>=1500)
- return XM_PIDIV2 - _mm_cvtss_f32(Result);
- #else
- return XM_PIDIV2 - Result.m128_f32[0];
- #endif
- #else // _XM_VMX128_INTRINSICS_
- #endif // _XM_VMX128_INTRINSICS_
- }
- #endif // __XNAMATHMISC_INL__
|