1 #ifndef INCLUDED_volk_8i_s32f_convert_32f_a_H
2 #define INCLUDED_volk_8i_s32f_convert_32f_a_H
17 static inline void volk_8i_s32f_convert_32f_a_sse4_1(
float* outputVector,
const int8_t* inputVector,
const float scalar,
unsigned int num_points){
18 unsigned int number = 0;
19 const unsigned int sixteenthPoints = num_points / 16;
21 float* outputVectorPtr = outputVector;
22 const float iScalar = 1.0 / scalar;
23 __m128 invScalar = _mm_set_ps1(iScalar);
24 const int8_t* inputVectorPtr = inputVector;
29 for(;number < sixteenthPoints; number++){
30 inputVal = _mm_load_si128((__m128i*)inputVectorPtr);
32 interimVal = _mm_cvtepi8_epi32(inputVal);
33 ret = _mm_cvtepi32_ps(interimVal);
34 ret = _mm_mul_ps(ret, invScalar);
35 _mm_store_ps(outputVectorPtr, ret);
38 inputVal = _mm_srli_si128(inputVal, 4);
39 interimVal = _mm_cvtepi8_epi32(inputVal);
40 ret = _mm_cvtepi32_ps(interimVal);
41 ret = _mm_mul_ps(ret, invScalar);
42 _mm_store_ps(outputVectorPtr, ret);
45 inputVal = _mm_srli_si128(inputVal, 4);
46 interimVal = _mm_cvtepi8_epi32(inputVal);
47 ret = _mm_cvtepi32_ps(interimVal);
48 ret = _mm_mul_ps(ret, invScalar);
49 _mm_store_ps(outputVectorPtr, ret);
52 inputVal = _mm_srli_si128(inputVal, 4);
53 interimVal = _mm_cvtepi8_epi32(inputVal);
54 ret = _mm_cvtepi32_ps(interimVal);
55 ret = _mm_mul_ps(ret, invScalar);
56 _mm_store_ps(outputVectorPtr, ret);
62 number = sixteenthPoints * 16;
63 for(; number < num_points; number++){
64 outputVector[number] = (float)(inputVector[number]) * iScalar;
69 #ifdef LV_HAVE_GENERIC
77 static inline void volk_8i_s32f_convert_32f_a_generic(
float* outputVector,
const int8_t* inputVector,
const float scalar,
unsigned int num_points){
78 float* outputVectorPtr = outputVector;
79 const int8_t* inputVectorPtr = inputVector;
80 unsigned int number = 0;
81 const float iScalar = 1.0 / scalar;
83 for(number = 0; number < num_points; number++){
84 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
97 extern void volk_8i_s32f_convert_32f_a_orc_impl(
float* outputVector,
const int8_t* inputVector,
const float scalar,
unsigned int num_points);
98 static inline void volk_8i_s32f_convert_32f_a_orc(
float* outputVector,
const int8_t* inputVector,
const float scalar,
unsigned int num_points){
99 float invscalar = 1.0 / scalar;
100 volk_8i_s32f_convert_32f_a_orc_impl(outputVector, inputVector, invscalar, num_points);