123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711 |
- // Auto-generated by `genhbc`, do not edit!
- #if defined( R8B_SSE2 )
- R8BHBC1( convolve1 )
- op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]);
- R8BHBC2
- R8BHBC1( convolve2 )
- __m128d v1, v2, m1, s1;
- v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = m1;
- _mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
- R8BHBC2
- R8BHBC1( convolve3 )
- __m128d v1, v2, m1, s1;
- v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = m1;
- _mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
- op[ 1 ] += flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]);
- R8BHBC2
- R8BHBC1( convolve4 )
- __m128d v1, v2, m1, s1;
- v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = m1;
- __m128d v3, v4, m3, s3;
- v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = m3;
- s1 = _mm_add_pd( s1, s3 );
- _mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
- R8BHBC2
- R8BHBC1( convolve5 )
- __m128d v1, v2, m1, s1;
- v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = m1;
- __m128d v3, v4, m3, s3;
- v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = m3;
- s1 = _mm_add_pd( s1, s3 );
- _mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
- op[ 1 ] += flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]);
- R8BHBC2
- R8BHBC1( convolve6 )
- __m128d v1, v2, m1, s1;
- v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = m1;
- __m128d v3, v4, m3, s3;
- v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = m3;
- v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = _mm_add_pd( s1, m1 );
- s1 = _mm_add_pd( s1, s3 );
- _mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
- R8BHBC2
- R8BHBC1( convolve7 )
- __m128d v1, v2, m1, s1;
- v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = m1;
- __m128d v3, v4, m3, s3;
- v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = m3;
- v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = _mm_add_pd( s1, m1 );
- s1 = _mm_add_pd( s1, s3 );
- _mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
- op[ 1 ] += flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]);
- R8BHBC2
- R8BHBC1( convolve8 )
- __m128d v1, v2, m1, s1;
- v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = m1;
- __m128d v3, v4, m3, s3;
- v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = m3;
- v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = _mm_add_pd( s1, m1 );
- v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = _mm_add_pd( s3, m3 );
- s1 = _mm_add_pd( s1, s3 );
- _mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
- R8BHBC2
- R8BHBC1( convolve9 )
- __m128d v1, v2, m1, s1;
- v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = m1;
- __m128d v3, v4, m3, s3;
- v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = m3;
- v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = _mm_add_pd( s1, m1 );
- v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = _mm_add_pd( s3, m3 );
- s1 = _mm_add_pd( s1, s3 );
- _mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
- op[ 1 ] += flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]);
- R8BHBC2
- R8BHBC1( convolve10 )
- __m128d v1, v2, m1, s1;
- v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = m1;
- __m128d v3, v4, m3, s3;
- v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = m3;
- v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = _mm_add_pd( s1, m1 );
- v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = _mm_add_pd( s3, m3 );
- v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = _mm_add_pd( s1, m1 );
- s1 = _mm_add_pd( s1, s3 );
- _mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
- R8BHBC2
- R8BHBC1( convolve11 )
- __m128d v1, v2, m1, s1;
- v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = m1;
- __m128d v3, v4, m3, s3;
- v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = m3;
- v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = _mm_add_pd( s1, m1 );
- v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = _mm_add_pd( s3, m3 );
- v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = _mm_add_pd( s1, m1 );
- s1 = _mm_add_pd( s1, s3 );
- _mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
- op[ 1 ] += flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]);
- R8BHBC2
- R8BHBC1( convolve12 )
- __m128d v1, v2, m1, s1;
- v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = m1;
- __m128d v3, v4, m3, s3;
- v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = m3;
- v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = _mm_add_pd( s1, m1 );
- v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = _mm_add_pd( s3, m3 );
- v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = _mm_add_pd( s1, m1 );
- v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = _mm_add_pd( s3, m3 );
- s1 = _mm_add_pd( s1, s3 );
- _mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
- R8BHBC2
- R8BHBC1( convolve13 )
- __m128d v1, v2, m1, s1;
- v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = m1;
- __m128d v3, v4, m3, s3;
- v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = m3;
- v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = _mm_add_pd( s1, m1 );
- v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = _mm_add_pd( s3, m3 );
- v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = _mm_add_pd( s1, m1 );
- v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = _mm_add_pd( s3, m3 );
- s1 = _mm_add_pd( s1, s3 );
- _mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
- op[ 1 ] += flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]);
- R8BHBC2
- R8BHBC1( convolve14 )
- __m128d v1, v2, m1, s1;
- v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = m1;
- __m128d v3, v4, m3, s3;
- v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = m3;
- v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = _mm_add_pd( s1, m1 );
- v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = _mm_add_pd( s3, m3 );
- v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = _mm_add_pd( s1, m1 );
- v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 );
- m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ),
- _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
- s3 = _mm_add_pd( s3, m3 );
- v2 = _mm_loadu_pd( rp - 13 ); v1 = _mm_loadu_pd( rp + 13 );
- m1 = _mm_mul_pd( _mm_load_pd( flt + 12 ),
- _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
- s1 = _mm_add_pd( s1, m1 );
- s1 = _mm_add_pd( s1, s3 );
- _mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
- R8BHBC2
- #elif defined( R8B_NEON )
- R8BHBC1( convolve1 )
- op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]);
- R8BHBC2
- R8BHBC1( convolve2 )
- float64x2_t v1, v2, s1;
- s1 = vdupq_n_f64( 0.0 );
- v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- op[ 1 ] = vaddvq_f64( s1 );
- R8BHBC2
- R8BHBC1( convolve3 )
- float64x2_t v1, v2, s1;
- s1 = vdupq_n_f64( 0.0 );
- v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- op[ 1 ] = vaddvq_f64( s1 ) + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]);
- R8BHBC2
- R8BHBC1( convolve4 )
- float64x2_t v1, v2, s1;
- s1 = vdupq_n_f64( 0.0 );
- v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- float64x2_t v3, v4, s3;
- s3 = vdupq_n_f64( 0.0 );
- v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- s1 = vaddq_f64( s1, s3 );
- op[ 1 ] = vaddvq_f64( s1 );
- R8BHBC2
- R8BHBC1( convolve5 )
- float64x2_t v1, v2, s1;
- s1 = vdupq_n_f64( 0.0 );
- v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- float64x2_t v3, v4, s3;
- s3 = vdupq_n_f64( 0.0 );
- v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- s1 = vaddq_f64( s1, s3 );
- op[ 1 ] = vaddvq_f64( s1 ) + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]);
- R8BHBC2
- R8BHBC1( convolve6 )
- float64x2_t v1, v2, s1;
- s1 = vdupq_n_f64( 0.0 );
- v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- float64x2_t v3, v4, s3;
- s3 = vdupq_n_f64( 0.0 );
- v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- s1 = vaddq_f64( s1, s3 );
- op[ 1 ] = vaddvq_f64( s1 );
- R8BHBC2
- R8BHBC1( convolve7 )
- float64x2_t v1, v2, s1;
- s1 = vdupq_n_f64( 0.0 );
- v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- float64x2_t v3, v4, s3;
- s3 = vdupq_n_f64( 0.0 );
- v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- s1 = vaddq_f64( s1, s3 );
- op[ 1 ] = vaddvq_f64( s1 ) + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]);
- R8BHBC2
- R8BHBC1( convolve8 )
- float64x2_t v1, v2, s1;
- s1 = vdupq_n_f64( 0.0 );
- v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- float64x2_t v3, v4, s3;
- s3 = vdupq_n_f64( 0.0 );
- v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- s1 = vaddq_f64( s1, s3 );
- op[ 1 ] = vaddvq_f64( s1 );
- R8BHBC2
- R8BHBC1( convolve9 )
- float64x2_t v1, v2, s1;
- s1 = vdupq_n_f64( 0.0 );
- v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- float64x2_t v3, v4, s3;
- s3 = vdupq_n_f64( 0.0 );
- v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- s1 = vaddq_f64( s1, s3 );
- op[ 1 ] = vaddvq_f64( s1 ) + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]);
- R8BHBC2
- R8BHBC1( convolve10 )
- float64x2_t v1, v2, s1;
- s1 = vdupq_n_f64( 0.0 );
- v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- float64x2_t v3, v4, s3;
- s3 = vdupq_n_f64( 0.0 );
- v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- s1 = vaddq_f64( s1, s3 );
- op[ 1 ] = vaddvq_f64( s1 );
- R8BHBC2
- R8BHBC1( convolve11 )
- float64x2_t v1, v2, s1;
- s1 = vdupq_n_f64( 0.0 );
- v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- float64x2_t v3, v4, s3;
- s3 = vdupq_n_f64( 0.0 );
- v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- s1 = vaddq_f64( s1, s3 );
- op[ 1 ] = vaddvq_f64( s1 ) + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]);
- R8BHBC2
- R8BHBC1( convolve12 )
- float64x2_t v1, v2, s1;
- s1 = vdupq_n_f64( 0.0 );
- v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- float64x2_t v3, v4, s3;
- s3 = vdupq_n_f64( 0.0 );
- v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- s1 = vaddq_f64( s1, s3 );
- op[ 1 ] = vaddvq_f64( s1 );
- R8BHBC2
- R8BHBC1( convolve13 )
- float64x2_t v1, v2, s1;
- s1 = vdupq_n_f64( 0.0 );
- v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- float64x2_t v3, v4, s3;
- s3 = vdupq_n_f64( 0.0 );
- v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- s1 = vaddq_f64( s1, s3 );
- op[ 1 ] = vaddvq_f64( s1 ) + flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]);
- R8BHBC2
- R8BHBC1( convolve14 )
- float64x2_t v1, v2, s1;
- s1 = vdupq_n_f64( 0.0 );
- v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- float64x2_t v3, v4, s3;
- s3 = vdupq_n_f64( 0.0 );
- v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 );
- s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ),
- vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
- v2 = vld1q_f64( rp - 13 ); v1 = vld1q_f64( rp + 13 );
- s1 = vmlaq_f64( s1, vld1q_f64( flt + 12 ),
- vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
- s1 = vaddq_f64( s1, s3 );
- op[ 1 ] = vaddvq_f64( s1 );
- R8BHBC2
- #else // SIMD
- R8BHBC1( convolve1 )
- op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]);
- R8BHBC2
- R8BHBC1( convolve2 )
- op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
- + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]);
- R8BHBC2
- R8BHBC1( convolve3 )
- op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
- + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
- + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]);
- R8BHBC2
- R8BHBC1( convolve4 )
- op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
- + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
- + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
- + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ]);
- R8BHBC2
- R8BHBC1( convolve5 )
- op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
- + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
- + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
- + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
- + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]);
- R8BHBC2
- R8BHBC1( convolve6 )
- op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
- + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
- + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
- + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
- + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
- + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ]);
- R8BHBC2
- R8BHBC1( convolve7 )
- op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
- + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
- + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
- + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
- + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
- + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
- + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]);
- R8BHBC2
- R8BHBC1( convolve8 )
- op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
- + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
- + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
- + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
- + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
- + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
- + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
- + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ]);
- R8BHBC2
- R8BHBC1( convolve9 )
- op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
- + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
- + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
- + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
- + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
- + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
- + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
- + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
- + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]);
- R8BHBC2
- R8BHBC1( convolve10 )
- op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
- + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
- + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
- + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
- + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
- + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
- + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
- + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
- + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
- + flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ]);
- R8BHBC2
- R8BHBC1( convolve11 )
- op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
- + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
- + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
- + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
- + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
- + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
- + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
- + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
- + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
- + flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
- + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]);
- R8BHBC2
- R8BHBC1( convolve12 )
- op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
- + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
- + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
- + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
- + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
- + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
- + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
- + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
- + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
- + flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
- + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ])
- + flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ]);
- R8BHBC2
- R8BHBC1( convolve13 )
- op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
- + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
- + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
- + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
- + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
- + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
- + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
- + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
- + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
- + flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
- + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ])
- + flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ])
- + flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]);
- R8BHBC2
- R8BHBC1( convolve14 )
- op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
- + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
- + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
- + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
- + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
- + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
- + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
- + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
- + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
- + flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
- + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ])
- + flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ])
- + flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ])
- + flt[ 13 ] * ( rp[ 14 ] + rp[ -13 ]);
- R8BHBC2
- #endif // SIMD
|