GNU Radio 3.4.0 C++ API
|
00001 #ifndef INCLUDED_volk_32f_s32f_stddev_32f_a16_H 00002 #define INCLUDED_volk_32f_s32f_stddev_32f_a16_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 #include <math.h> 00007 00008 #if LV_HAVE_SSE4_1 00009 #include <smmintrin.h> 00010 /*! 00011 \brief Calculates the standard deviation of the input buffer using the supplied mean 00012 \param stddev The calculated standard deviation 00013 \param inputBuffer The buffer of points to calculate the std deviation for 00014 \param mean The mean of the input buffer 00015 \param num_points The number of values in input buffer to used in the stddev calculation 00016 */ 00017 static inline void volk_32f_s32f_stddev_32f_a16_sse4_1(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ 00018 float returnValue = 0; 00019 if(num_points > 0){ 00020 unsigned int number = 0; 00021 const unsigned int sixteenthPoints = num_points / 16; 00022 00023 const float* aPtr = inputBuffer; 00024 00025 float squareBuffer[4] __attribute__((aligned(128))); 00026 00027 __m128 squareAccumulator = _mm_setzero_ps(); 00028 __m128 aVal1, aVal2, aVal3, aVal4; 00029 __m128 cVal1, cVal2, cVal3, cVal4; 00030 for(;number < sixteenthPoints; number++) { 00031 aVal1 = _mm_load_ps(aPtr); aPtr += 4; 00032 cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1); 00033 00034 aVal2 = _mm_load_ps(aPtr); aPtr += 4; 00035 cVal2 = _mm_dp_ps(aVal2, aVal2, 0xF2); 00036 00037 aVal3 = _mm_load_ps(aPtr); aPtr += 4; 00038 cVal3 = _mm_dp_ps(aVal3, aVal3, 0xF4); 00039 00040 aVal4 = _mm_load_ps(aPtr); aPtr += 4; 00041 cVal4 = _mm_dp_ps(aVal4, aVal4, 0xF8); 00042 00043 cVal1 = _mm_or_ps(cVal1, cVal2); 00044 cVal3 = _mm_or_ps(cVal3, cVal4); 00045 cVal1 = _mm_or_ps(cVal1, cVal3); 00046 00047 squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // squareAccumulator += x^2 00048 } 00049 _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container 00050 returnValue = squareBuffer[0]; 00051 returnValue += squareBuffer[1]; 00052 returnValue += squareBuffer[2]; 00053 returnValue += squareBuffer[3]; 00054 00055 number = sixteenthPoints * 16; 00056 for(;number < num_points; number++){ 00057 returnValue += (*aPtr) * (*aPtr); 00058 aPtr++; 00059 } 00060 returnValue /= num_points; 00061 returnValue -= (mean * mean); 00062 returnValue = sqrt(returnValue); 00063 } 00064 *stddev = returnValue; 00065 } 00066 #endif /* LV_HAVE_SSE4_1 */ 00067 00068 #if LV_HAVE_SSE 00069 #include <xmmintrin.h> 00070 /*! 00071 \brief Calculates the standard deviation of the input buffer using the supplied mean 00072 \param stddev The calculated standard deviation 00073 \param inputBuffer The buffer of points to calculate the std deviation for 00074 \param mean The mean of the input buffer 00075 \param num_points The number of values in input buffer to used in the stddev calculation 00076 */ 00077 static inline void volk_32f_s32f_stddev_32f_a16_sse(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ 00078 float returnValue = 0; 00079 if(num_points > 0){ 00080 unsigned int number = 0; 00081 const unsigned int quarterPoints = num_points / 4; 00082 00083 const float* aPtr = inputBuffer; 00084 00085 float squareBuffer[4] __attribute__((aligned(128))); 00086 00087 __m128 squareAccumulator = _mm_setzero_ps(); 00088 __m128 aVal = _mm_setzero_ps(); 00089 for(;number < quarterPoints; number++) { 00090 aVal = _mm_load_ps(aPtr); // aVal = x 00091 aVal = _mm_mul_ps(aVal, aVal); // squareAccumulator += x^2 00092 squareAccumulator = _mm_add_ps(squareAccumulator, aVal); 00093 aPtr += 4; 00094 } 00095 _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container 00096 returnValue = squareBuffer[0]; 00097 returnValue += squareBuffer[1]; 00098 returnValue += squareBuffer[2]; 00099 returnValue += squareBuffer[3]; 00100 00101 number = quarterPoints * 4; 00102 for(;number < num_points; number++){ 00103 returnValue += (*aPtr) * (*aPtr); 00104 aPtr++; 00105 } 00106 returnValue /= num_points; 00107 returnValue -= (mean * mean); 00108 returnValue = sqrt(returnValue); 00109 } 00110 *stddev = returnValue; 00111 } 00112 #endif /* LV_HAVE_SSE */ 00113 00114 #if LV_HAVE_GENERIC 00115 /*! 00116 \brief Calculates the standard deviation of the input buffer using the supplied mean 00117 \param stddev The calculated standard deviation 00118 \param inputBuffer The buffer of points to calculate the std deviation for 00119 \param mean The mean of the input buffer 00120 \param num_points The number of values in input buffer to used in the stddev calculation 00121 */ 00122 static inline void volk_32f_s32f_stddev_32f_a16_generic(float* stddev, const float* inputBuffer, const float mean, unsigned int num_points){ 00123 float returnValue = 0; 00124 if(num_points > 0){ 00125 const float* aPtr = inputBuffer; 00126 unsigned int number = 0; 00127 00128 for(number = 0; number < num_points; number++){ 00129 returnValue += (*aPtr) * (*aPtr); 00130 aPtr++; 00131 } 00132 00133 returnValue /= num_points; 00134 returnValue -= (mean * mean); 00135 returnValue = sqrt(returnValue); 00136 } 00137 *stddev = returnValue; 00138 } 00139 #endif /* LV_HAVE_GENERIC */ 00140 00141 00142 00143 00144 #endif /* INCLUDED_volk_32f_s32f_stddev_32f_a16_H */