我正在尝试优化以下代码complex.cpp:
typedef struct {
float re;
float im;
} dcmplx;
dcmplx ComplexConv(int len, dcmplx *hat, dcmplx *buf)
{
int i;
dcmplx z, xout;
xout.re = xout.im = 0.0;
asm volatile (
"movs r3, #0\n\t"
".loop:\n\t"
"vldr s11, [%[hat], #4]\n\t"
"vldr s13, [%[hat]]\n\t"
"vneg.f32 s11, s11\n\t"
"vldr s15, [%[buf], #4]\n\t"
"vldr s12, [%[buf]]\n\t"
"vmul.f32 s14, s15, s13\n\t"
"vmul.f32 s15, s11, s15\n\t"
"adds %[hat], #8\n\t"
"vmla.f32 s14, s11, s12\n\t"
"vnmls.f32 s15, s12, s13\n\t"
"adds %[buf], #8\n\t"
"vadd.f32 s1, s1, s14\n\t"
"vadd.f32 s0, …Run Code Online (Sandbox Code Playgroud)