GNU Radio 3.4.0 C++ API
|
00001 #ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H 00002 #define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 #include <volk/volk_complex.h> 00007 00008 #if LV_HAVE_SSE4_1 00009 #include <smmintrin.h> 00010 /*! 00011 \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector 00012 \param cVector The complex vector where the results will be stored 00013 \param aVector One of the complex vectors to be multiplied 00014 \param bVector The complex vector which will be converted to complex conjugate and multiplied 00015 \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector 00016 */ 00017 static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_sse4_1(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ 00018 unsigned int number = 0; 00019 const unsigned int quarterPoints = num_points / 4; 00020 00021 __m128i x, y, realz, imagz; 00022 lv_16sc_t* c = cVector; 00023 const lv_8sc_t* a = aVector; 00024 const lv_8sc_t* b = bVector; 00025 __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1); 00026 const int shuffleMask = _MM_SHUFFLE(2,3,0,1); 00027 00028 for(;number < quarterPoints; number++){ 00029 // Convert into 8 bit values into 16 bit values 00030 x = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)a)); 00031 y = _mm_cvtepi8_epi16(_mm_movpi64_epi64(*(__m64*)b)); 00032 00033 // Calculate the ar*cr - ai*(-ci) portions 00034 realz = _mm_madd_epi16(x,y); 00035 00036 // Calculate the complex conjugate of the cr + ci j values 00037 y = _mm_sign_epi16(y, conjugateSign); 00038 00039 // Shift the order of the cr and ci values 00040 y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, shuffleMask ), shuffleMask); 00041 00042 // Calculate the ar*(-ci) + cr*(ai) 00043 imagz = _mm_madd_epi16(x,y); 00044 00045 _mm_store_si128((__m128i*)c, _mm_packs_epi32(_mm_unpacklo_epi32(realz, imagz), _mm_unpackhi_epi32(realz, imagz))); 00046 00047 a += 4; 00048 b += 4; 00049 c += 4; 00050 } 00051 00052 number = quarterPoints * 4; 00053 int16_t* c16Ptr = (int16_t*)&cVector[number]; 00054 int8_t* a8Ptr = (int8_t*)&aVector[number]; 00055 int8_t* b8Ptr = (int8_t*)&bVector[number]; 00056 for(; number < num_points; number++){ 00057 float aReal = (float)*a8Ptr++; 00058 float aImag = (float)*a8Ptr++; 00059 lv_32fc_t aVal = lv_32fc_init(aReal, aImag ); 00060 float bReal = (float)*b8Ptr++; 00061 float bImag = (float)*b8Ptr++; 00062 lv_32fc_t bVal = lv_32fc_init( bReal, -bImag ); 00063 lv_32fc_t temp = aVal * bVal; 00064 00065 *c16Ptr++ = (int16_t)lv_creal(temp); 00066 *c16Ptr++ = (int16_t)lv_cimag(temp); 00067 } 00068 } 00069 #endif /* LV_HAVE_SSE4_1 */ 00070 00071 #if LV_HAVE_GENERIC 00072 /*! 00073 \brief Multiplys the one complex vector with the complex conjugate of the second complex vector and stores their results in the third vector 00074 \param cVector The complex vector where the results will be stored 00075 \param aVector One of the complex vectors to be multiplied 00076 \param bVector The complex vector which will be converted to complex conjugate and multiplied 00077 \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector 00078 */ 00079 static inline void volk_8ic_x2_multiply_conjugate_16ic_a16_generic(lv_16sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ 00080 unsigned int number = 0; 00081 int16_t* c16Ptr = (int16_t*)cVector; 00082 int8_t* a8Ptr = (int8_t*)aVector; 00083 int8_t* b8Ptr = (int8_t*)bVector; 00084 for(number =0; number < num_points; number++){ 00085 float aReal = (float)*a8Ptr++; 00086 float aImag = (float)*a8Ptr++; 00087 lv_32fc_t aVal = lv_32fc_init(aReal, aImag ); 00088 float bReal = (float)*b8Ptr++; 00089 float bImag = (float)*b8Ptr++; 00090 lv_32fc_t bVal = lv_32fc_init( bReal, -bImag ); 00091 lv_32fc_t temp = aVal * bVal; 00092 00093 *c16Ptr++ = (int16_t)lv_creal(temp); 00094 *c16Ptr++ = (int16_t)lv_cimag(temp); 00095 } 00096 } 00097 #endif /* LV_HAVE_GENERIC */ 00098 00099 00100 00101 00102 #endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a16_H */