The conclusion is:
- NEON intrinsics support converting a float data type to int32 regarding a Q value. The reverse is also supported.
- Neon intrinsics already do the clip operation to float value outside of [-1, 1]:!!
- The issue cost 1 Cycles per 32x2 on cortex-a9.
Source code:
#include "arm_neon.h"void test_float_int32_converter()
{ const int q = 31;
float f[] = {-1.0, -0.5, 0, 0.5};
int32_t i[] = {0, 0, 0, 0};
float32x4_t vf = vld1q_f32(&f[0]);
int32x4_t vi = vcvtq_n_s32_f32(vf, q);
vst1q_s32(&i[0], vi);
printf("Q=%d\n", q);
printf("src float 32x4: \t%f,\t%f,\t%f,\t%f\n", f[0], f[1], f[2], f[3]);
printf("dst int 32x4: \t%8.8x,\t%8.8x,\t%8.8x,\t%8.8x\n", i[0], i[1], i[2], i[3]);
vf = vcvtq_n_f32_s32(vi, q);
vst1q_f32(&f[0], vf);
printf("dst float 32x4: \t%f,\t%f,\t%f,\t%f\n", f[0], f[1], f[2], f[3]);
return;
} |
build:
armcc --arm_linux_paths --arm_linux_config_file=/home/jxion/arm_linux_config.xml main.c --cpu=Cortex-A9 -o test.exe
|
Result on Pandaboard:
Q=31src float 32x4: -1.000000, -0.500000, 0.000000, 0.500000dst int 32x4: 80000000, c0000000, 00000000, 40000000dst float 32x4: -1.000000, -0.500000, 0.000000, 0.500000 |
Q=31src float 32x4: -1.200000, -0.500000, 1.000000, 1.500000dst int 32x4: 80000000, c0000000, 7fffffff, 7fffffffdst float 32x4: -1.000000, -0.500000, 1.000000, 1.000000