1 #ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
2 #define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
17 static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(
lv_16sc_t* cVector,
const lv_8sc_t* aVector,
const lv_8sc_t* bVector,
unsigned int num_points){
18 unsigned int number = 0;
19 const unsigned int quarterPoints = num_points / 4;
21 __m128i x, y, realz, imagz;
25 __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
27 for(;number < quarterPoints; number++){
29 x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
30 y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
33 realz = _mm_madd_epi16(x,y);
36 y = _mm_sign_epi16(y, conjugateSign);
39 y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1));
42 imagz = _mm_madd_epi16(x,y);
44 _mm_store_si128((__m128i*)c, _mm_packs_epi32(_mm_unpacklo_epi32(realz, imagz), _mm_unpackhi_epi32(realz, imagz)));
51 number = quarterPoints * 4;
52 int16_t* c16Ptr = (int16_t*)&cVector[number];
53 int8_t* a8Ptr = (int8_t*)&aVector[number];
54 int8_t* b8Ptr = (int8_t*)&bVector[number];
55 for(; number < num_points; number++){
56 float aReal = (float)*a8Ptr++;
57 float aImag = (float)*a8Ptr++;
59 float bReal = (float)*b8Ptr++;
60 float bImag = (float)*b8Ptr++;
70 #ifdef LV_HAVE_GENERIC
78 static inline void volk_8ic_x2_multiply_conjugate_16ic_a_generic(
lv_16sc_t* cVector,
const lv_8sc_t* aVector,
const lv_8sc_t* bVector,
unsigned int num_points){
79 unsigned int number = 0;
80 int16_t* c16Ptr = (int16_t*)cVector;
81 int8_t* a8Ptr = (int8_t*)aVector;
82 int8_t* b8Ptr = (int8_t*)bVector;
83 for(number =0; number < num_points; number++){
84 float aReal = (float)*a8Ptr++;
85 float aImag = (float)*a8Ptr++;
87 float bReal = (float)*b8Ptr++;
88 float bImag = (float)*b8Ptr++;