1 #ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H
2 #define INCLUDED_volk_32f_s32f_convert_32i_u_H
17 static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
18 unsigned int number = 0;
20 const unsigned int quarterPoints = num_points / 4;
22 const float* inputVectorPtr = (
const float*)inputVector;
23 int32_t* outputVectorPtr = outputVector;
25 float min_val = -2147483647;
26 float max_val = 2147483647;
29 __m128 vScalar = _mm_set_ps1(scalar);
32 __m128 vmin_val = _mm_set_ps1(min_val);
33 __m128 vmax_val = _mm_set_ps1(max_val);
35 for(;number < quarterPoints; number++){
36 inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
38 inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
39 intInputVal1 = _mm_cvtps_epi32(inputVal1);
41 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
45 number = quarterPoints * 4;
46 for(; number < num_points; number++){
47 r = inputVector[number] * scalar;
52 outputVector[number] = (int32_t)(r);
58 #include <xmmintrin.h>
67 static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
68 unsigned int number = 0;
70 const unsigned int quarterPoints = num_points / 4;
72 const float* inputVectorPtr = (
const float*)inputVector;
73 int32_t* outputVectorPtr = outputVector;
75 float min_val = -2147483647;
76 float max_val = 2147483647;
79 __m128 vScalar = _mm_set_ps1(scalar);
81 __m128 vmin_val = _mm_set_ps1(min_val);
82 __m128 vmax_val = _mm_set_ps1(max_val);
86 for(;number < quarterPoints; number++){
87 ret = _mm_loadu_ps(inputVectorPtr);
90 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
92 _mm_store_ps(outputFloatBuffer, ret);
93 *outputVectorPtr++ = (int32_t)(outputFloatBuffer[0]);
94 *outputVectorPtr++ = (int32_t)(outputFloatBuffer[1]);
95 *outputVectorPtr++ = (int32_t)(outputFloatBuffer[2]);
96 *outputVectorPtr++ = (int32_t)(outputFloatBuffer[3]);
99 number = quarterPoints * 4;
100 for(; number < num_points; number++){
101 r = inputVector[number] * scalar;
106 outputVector[number] = (int32_t)(r);
111 #ifdef LV_HAVE_GENERIC
120 static inline void volk_32f_s32f_convert_32i_u_generic(int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
121 int32_t* outputVectorPtr = outputVector;
122 const float* inputVectorPtr = inputVector;
123 unsigned int number = 0;
124 float min_val = -2147483647;
125 float max_val = 2147483647;
128 for(number = 0; number < num_points; number++){
129 r = *inputVectorPtr++ * scalar;
134 *outputVectorPtr++ = (int32_t)(r);