Message ID | 5297785C.5030509@linaro.org |
---|---|
State | Superseded |
Headers | show |
On 28 November 2013 17:07, Will Newton <will.newton@linaro.org> wrote: > > This adds support for the ARMv8 Advanced SIMD VMAXNM and VMINNM > instructions. > > Signed-off-by: Will Newton <will.newton@linaro.org> > --- > target-arm/helper.h | 3 +++ > target-arm/neon_helper.c | 24 ++++++++++++++++++++++++ > target-arm/translate.c | 23 +++++++++++++++++------ > 3 files changed, 44 insertions(+), 6 deletions(-) > > diff --git a/target-arm/helper.h b/target-arm/helper.h > index d459a39..3ecbbd2 100644 > --- a/target-arm/helper.h > +++ b/target-arm/helper.h > @@ -355,6 +355,9 @@ DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, ptr) > DEF_HELPER_3(neon_acge_f32, i32, i32, i32, ptr) > DEF_HELPER_3(neon_acgt_f32, i32, i32, i32, ptr) > > +DEF_HELPER_3(neon_maxnm_f32, i32, i32, i32, ptr) > +DEF_HELPER_3(neon_minnm_f32, i32, i32, i32, ptr) > + > /* iwmmxt_helper.c */ > DEF_HELPER_2(iwmmxt_maddsq, i64, i64, i64) > DEF_HELPER_2(iwmmxt_madduq, i64, i64, i64) > diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c > index b028cc2..cc55e83 100644 > --- a/target-arm/neon_helper.c > +++ b/target-arm/neon_helper.c > @@ -2008,3 +2008,27 @@ void HELPER(neon_zip16)(CPUARMState *env, uint32_t rd, uint32_t rm) > env->vfp.regs[rm] = make_float64(m0); > env->vfp.regs[rd] = make_float64(d0); > } > + > +uint32_t HELPER(neon_maxnm_f32)(uint32_t a, uint32_t b, void *fpstp) > +{ > + float_status *fpst = fpstp; > + float32 af = make_float32(a); > + float32 bf = make_float32(b); > + if (float32_is_quiet_nan(af) && !float32_is_quiet_nan(bf)) > + return b; > + else if (float32_is_quiet_nan(bf) && !float32_is_quiet_nan(af)) > + return a; > + return float32_val(float32_max(af, bf, fpst)); Since these instructions are implementing an IEEE specified function (maxNum(), minNum()) we should implement the IEEE semantics as a new set of functions float*_maxnum() and float*_minnum() in fpu/, and then just call those functions, rather than doing the NaN special casing here. Your implementation also gives the wrong answer for VMAXNM(QNaN,denormalized-number) when in flush-to-zero mode -- you aren't squashing denormal inputs the way the pseudocode says you should. > +} > + > +uint32_t HELPER(neon_minnm_f32)(uint32_t a, uint32_t b, void *fpstp) > +{ > + float_status *fpst = fpstp; > + float32 af = make_float32(a); > + float32 bf = make_float32(b); > + if (float32_is_quiet_nan(af) && !float32_is_quiet_nan(bf)) > + return b; > + else if (float32_is_quiet_nan(bf) && !float32_is_quiet_nan(af)) > + return a; > + return float32_val(float32_min(af, bf, fpst)); > +} > diff --git a/target-arm/translate.c b/target-arm/translate.c > index cac7668..9a4e7f4 100644 > --- a/target-arm/translate.c > +++ b/target-arm/translate.c > @@ -4540,7 +4540,7 @@ static void gen_neon_narrow_op(int op, int u, int size, > #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */ > #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */ > #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */ > -#define NEON_3R_VRECPS_VRSQRTS 31 /* float VRECPS, VRSQRTS */ > +#define NEON_3R_VRECPS_VRSQRTS 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */ Let's rename this #define to NEON_3R_FLOAT_MISC... > > static const uint8_t neon_3r_sizes[] = { > [NEON_3R_VHADD] = 0x7, > @@ -4835,7 +4835,8 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins > } > break; > case NEON_3R_VRECPS_VRSQRTS: > - if (u) { > + /* Encoding shared with VMAXNM/VMINNM in ARMv8 */ > + if (u && (!arm_feature(env, ARM_FEATURE_V8) || (size & 1))) { > return 1; > } > break; > @@ -5125,10 +5126,20 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins > break; > } > case NEON_3R_VRECPS_VRSQRTS: > - if (size == 0) > - gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env); > - else > - gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env); > + if (u) { > + /* VMAXNM/VMINNM */ > + TCGv_ptr fpstatus = get_fpstatus_ptr(1); > + if (size == 0) > + gen_helper_neon_maxnm_f32(tmp, tmp, tmp2, fpstatus); > + else > + gen_helper_neon_minnm_f32(tmp, tmp, tmp2, fpstatus); > + tcg_temp_free_ptr(fpstatus); > + } else { > + if (size == 0) > + gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env); > + else > + gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env); > + } > break; if statements need braces, even for single-statement arms. (this includes fixing up existing code where your patch touches it, as here.) scripts/checkpatch.pl will tell you about this kind of style nit. thanks -- PMM
On 28 November 2013 17:07, Will Newton <will.newton@linaro.org> wrote: > +uint32_t HELPER(neon_maxnm_f32)(uint32_t a, uint32_t b, void *fpstp) > +{ > + float_status *fpst = fpstp; > + float32 af = make_float32(a); > + float32 bf = make_float32(b); > + if (float32_is_quiet_nan(af) && !float32_is_quiet_nan(bf)) > + return b; > + else if (float32_is_quiet_nan(bf) && !float32_is_quiet_nan(af)) > + return a; > + return float32_val(float32_max(af, bf, fpst)); > +} > + > +uint32_t HELPER(neon_minnm_f32)(uint32_t a, uint32_t b, void *fpstp) > +{ > + float_status *fpst = fpstp; > + float32 af = make_float32(a); > + float32 bf = make_float32(b); > + if (float32_is_quiet_nan(af) && !float32_is_quiet_nan(bf)) > + return b; > + else if (float32_is_quiet_nan(bf) && !float32_is_quiet_nan(af)) > + return a; > + return float32_val(float32_min(af, bf, fpst)); > +} Forgot to mention -- these are identical to the helpers in patch 3. We don't need duplicates... thanks -- PMM
diff --git a/target-arm/helper.h b/target-arm/helper.h index d459a39..3ecbbd2 100644 --- a/target-arm/helper.h +++ b/target-arm/helper.h @@ -355,6 +355,9 @@ DEF_HELPER_3(neon_cgt_f32, i32, i32, i32, ptr) DEF_HELPER_3(neon_acge_f32, i32, i32, i32, ptr) DEF_HELPER_3(neon_acgt_f32, i32, i32, i32, ptr) +DEF_HELPER_3(neon_maxnm_f32, i32, i32, i32, ptr) +DEF_HELPER_3(neon_minnm_f32, i32, i32, i32, ptr) + /* iwmmxt_helper.c */ DEF_HELPER_2(iwmmxt_maddsq, i64, i64, i64) DEF_HELPER_2(iwmmxt_madduq, i64, i64, i64) diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c index b028cc2..cc55e83 100644 --- a/target-arm/neon_helper.c +++ b/target-arm/neon_helper.c @@ -2008,3 +2008,27 @@ void HELPER(neon_zip16)(CPUARMState *env, uint32_t rd, uint32_t rm) env->vfp.regs[rm] = make_float64(m0); env->vfp.regs[rd] = make_float64(d0); } + +uint32_t HELPER(neon_maxnm_f32)(uint32_t a, uint32_t b, void *fpstp) +{ + float_status *fpst = fpstp; + float32 af = make_float32(a); + float32 bf = make_float32(b); + if (float32_is_quiet_nan(af) && !float32_is_quiet_nan(bf)) + return b; + else if (float32_is_quiet_nan(bf) && !float32_is_quiet_nan(af)) + return a; + return float32_val(float32_max(af, bf, fpst)); +} + +uint32_t HELPER(neon_minnm_f32)(uint32_t a, uint32_t b, void *fpstp) +{ + float_status *fpst = fpstp; + float32 af = make_float32(a); + float32 bf = make_float32(b); + if (float32_is_quiet_nan(af) && !float32_is_quiet_nan(bf)) + return b; + else if (float32_is_quiet_nan(bf) && !float32_is_quiet_nan(af)) + return a; + return float32_val(float32_min(af, bf, fpst)); +} diff --git a/target-arm/translate.c b/target-arm/translate.c index cac7668..9a4e7f4 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -4540,7 +4540,7 @@ static void gen_neon_narrow_op(int op, int u, int size, #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */ #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */ #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */ -#define NEON_3R_VRECPS_VRSQRTS 31 /* float VRECPS, VRSQRTS */ +#define NEON_3R_VRECPS_VRSQRTS 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */ static const uint8_t neon_3r_sizes[] = { [NEON_3R_VHADD] = 0x7, @@ -4835,7 +4835,8 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins } break; case NEON_3R_VRECPS_VRSQRTS: - if (u) { + /* Encoding shared with VMAXNM/VMINNM in ARMv8 */ + if (u && (!arm_feature(env, ARM_FEATURE_V8) || (size & 1))) { return 1; } break; @@ -5125,10 +5126,20 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins break; } case NEON_3R_VRECPS_VRSQRTS: - if (size == 0) - gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env); - else - gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env); + if (u) { + /* VMAXNM/VMINNM */ + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + if (size == 0) + gen_helper_neon_maxnm_f32(tmp, tmp, tmp2, fpstatus); + else + gen_helper_neon_minnm_f32(tmp, tmp, tmp2, fpstatus); + tcg_temp_free_ptr(fpstatus); + } else { + if (size == 0) + gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env); + else + gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env); + } break; case NEON_3R_VFM: {
This adds support for the ARMv8 Advanced SIMD VMAXNM and VMINNM instructions. Signed-off-by: Will Newton <will.newton@linaro.org> --- target-arm/helper.h | 3 +++ target-arm/neon_helper.c | 24 ++++++++++++++++++++++++ target-arm/translate.c | 23 +++++++++++++++++------ 3 files changed, 44 insertions(+), 6 deletions(-)