GNU Radio 3.4.0 C++ API
|
00001 #ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H 00002 #define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 #include <math.h> 00007 00008 #if LV_HAVE_SSE4_1 00009 #include <smmintrin.h> 00010 /*! 00011 \brief Calculates the standard deviation and mean of the input buffer 00012 \param stddev The calculated standard deviation 00013 \param mean The mean of the input buffer 00014 \param inputBuffer The buffer of points to calculate the std deviation for 00015 \param num_points The number of values in input buffer to used in the stddev and mean calculations 00016 */ 00017 static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse4_1(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ 00018 float returnValue = 0; 00019 float newMean = 0; 00020 if(num_points > 0){ 00021 unsigned int number = 0; 00022 const unsigned int sixteenthPoints = num_points / 16; 00023 00024 const float* aPtr = inputBuffer; 00025 float meanBuffer[4] __attribute__((aligned(128))); 00026 float squareBuffer[4] __attribute__((aligned(128))); 00027 00028 __m128 accumulator = _mm_setzero_ps(); 00029 __m128 squareAccumulator = _mm_setzero_ps(); 00030 __m128 aVal1, aVal2, aVal3, aVal4; 00031 __m128 cVal1, cVal2, cVal3, cVal4; 00032 for(;number < sixteenthPoints; number++) { 00033 aVal1 = _mm_load_ps(aPtr); aPtr += 4; 00034 cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1); 00035 accumulator = _mm_add_ps(accumulator, aVal1); // accumulator += x 00036 00037 aVal2 = _mm_load_ps(aPtr); aPtr += 4; 00038 cVal2 = _mm_dp_ps(aVal2, aVal2, 0xF2); 00039 accumulator = _mm_add_ps(accumulator, aVal2); // accumulator += x 00040 00041 aVal3 = _mm_load_ps(aPtr); aPtr += 4; 00042 cVal3 = _mm_dp_ps(aVal3, aVal3, 0xF4); 00043 accumulator = _mm_add_ps(accumulator, aVal3); // accumulator += x 00044 00045 aVal4 = _mm_load_ps(aPtr); aPtr += 4; 00046 cVal4 = _mm_dp_ps(aVal4, aVal4, 0xF8); 00047 accumulator = _mm_add_ps(accumulator, aVal4); // accumulator += x 00048 00049 cVal1 = _mm_or_ps(cVal1, cVal2); 00050 cVal3 = _mm_or_ps(cVal3, cVal4); 00051 cVal1 = _mm_or_ps(cVal1, cVal3); 00052 00053 squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // squareAccumulator += x^2 00054 } 00055 _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C container 00056 _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container 00057 newMean = meanBuffer[0]; 00058 newMean += meanBuffer[1]; 00059 newMean += meanBuffer[2]; 00060 newMean += meanBuffer[3]; 00061 returnValue = squareBuffer[0]; 00062 returnValue += squareBuffer[1]; 00063 returnValue += squareBuffer[2]; 00064 returnValue += squareBuffer[3]; 00065 00066 number = sixteenthPoints * 16; 00067 for(;number < num_points; number++){ 00068 returnValue += (*aPtr) * (*aPtr); 00069 newMean += *aPtr++; 00070 } 00071 newMean /= num_points; 00072 returnValue /= num_points; 00073 returnValue -= (newMean * newMean); 00074 returnValue = sqrt(returnValue); 00075 } 00076 *stddev = returnValue; 00077 *mean = newMean; 00078 } 00079 #endif /* LV_HAVE_SSE4_1 */ 00080 00081 #if LV_HAVE_SSE 00082 #include <xmmintrin.h> 00083 /*! 00084 \brief Calculates the standard deviation and mean of the input buffer 00085 \param stddev The calculated standard deviation 00086 \param mean The mean of the input buffer 00087 \param inputBuffer The buffer of points to calculate the std deviation for 00088 \param num_points The number of values in input buffer to used in the stddev and mean calculations 00089 */ 00090 static inline void volk_32f_stddev_and_mean_32f_x2_a16_sse(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ 00091 float returnValue = 0; 00092 float newMean = 0; 00093 if(num_points > 0){ 00094 unsigned int number = 0; 00095 const unsigned int quarterPoints = num_points / 4; 00096 00097 const float* aPtr = inputBuffer; 00098 float meanBuffer[4] __attribute__((aligned(128))); 00099 float squareBuffer[4] __attribute__((aligned(128))); 00100 00101 __m128 accumulator = _mm_setzero_ps(); 00102 __m128 squareAccumulator = _mm_setzero_ps(); 00103 __m128 aVal = _mm_setzero_ps(); 00104 for(;number < quarterPoints; number++) { 00105 aVal = _mm_load_ps(aPtr); // aVal = x 00106 accumulator = _mm_add_ps(accumulator, aVal); // accumulator += x 00107 aVal = _mm_mul_ps(aVal, aVal); // squareAccumulator += x^2 00108 squareAccumulator = _mm_add_ps(squareAccumulator, aVal); 00109 aPtr += 4; 00110 } 00111 _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C container 00112 _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container 00113 newMean = meanBuffer[0]; 00114 newMean += meanBuffer[1]; 00115 newMean += meanBuffer[2]; 00116 newMean += meanBuffer[3]; 00117 returnValue = squareBuffer[0]; 00118 returnValue += squareBuffer[1]; 00119 returnValue += squareBuffer[2]; 00120 returnValue += squareBuffer[3]; 00121 00122 number = quarterPoints * 4; 00123 for(;number < num_points; number++){ 00124 returnValue += (*aPtr) * (*aPtr); 00125 newMean += *aPtr++; 00126 } 00127 newMean /= num_points; 00128 returnValue /= num_points; 00129 returnValue -= (newMean * newMean); 00130 returnValue = sqrt(returnValue); 00131 } 00132 *stddev = returnValue; 00133 *mean = newMean; 00134 } 00135 #endif /* LV_HAVE_SSE */ 00136 00137 #if LV_HAVE_GENERIC 00138 /*! 00139 \brief Calculates the standard deviation and mean of the input buffer 00140 \param stddev The calculated standard deviation 00141 \param mean The mean of the input buffer 00142 \param inputBuffer The buffer of points to calculate the std deviation for 00143 \param num_points The number of values in input buffer to used in the stddev and mean calculations 00144 */ 00145 static inline void volk_32f_stddev_and_mean_32f_x2_a16_generic(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points){ 00146 float returnValue = 0; 00147 float newMean = 0; 00148 if(num_points > 0){ 00149 const float* aPtr = inputBuffer; 00150 unsigned int number = 0; 00151 00152 for(number = 0; number < num_points; number++){ 00153 returnValue += (*aPtr) * (*aPtr); 00154 newMean += *aPtr++; 00155 } 00156 newMean /= num_points; 00157 returnValue /= num_points; 00158 returnValue -= (newMean * newMean); 00159 returnValue = sqrt(returnValue); 00160 } 00161 *stddev = returnValue; 00162 *mean = newMean; 00163 } 00164 #endif /* LV_HAVE_GENERIC */ 00165 00166 00167 00168 00169 #endif /* INCLUDED_volk_32f_stddev_and_mean_32f_x2_a16_H */