我认为它看起来会类似于:
inline int16x4_t LoadAndConvert4(const uint8_t * p)
{
return vreinterpret_s16_u16(vget_low_u16(vmovl_u8(
vreinterpret_u8_u32(vdup_n_u32(*(uint32_t*)p)))));
}
Run Code Online (Sandbox Code Playgroud)
或者一步一步:
inline int16x4_t LoadAndConvert4(const uint8_t * p)
{
uint32_t u32 = *(uint32_t*)p;
uint32x2_t u32x2 = vdup_n_u32(a32);
uint8x8_t u8x8 = vreinterpret_u8_u32(a32x2);
uint16x8_t u16x8 = vmovl_u8(a8x8);
uint16x4_t u16x4 = vget_low_u16(a16x8);
return vreinterpret_s16_u16(u16x4);
}
Run Code Online (Sandbox Code Playgroud)