Message ID | 1297268850-5777-7-git-send-email-peter.maydell@linaro.org |
---|---|
State | Superseded |
Headers | show |
On Wed, Feb 09, 2011 at 04:27:30PM +0000, Peter Maydell wrote: > The Neon half-precision conversion operations (VCVT.F16.F32 and > VCVT.F32.F16) use ARM standard floating-point arithmetic, unlike > the VFP versions (VCVTB and VCVTT). > > Signed-off-by: Peter Maydell <peter.maydell@linaro.org> > --- > target-arm/helper.c | 26 ++++++++++++++++++++++---- > target-arm/helpers.h | 2 ++ > target-arm/translate.c | 16 ++++++++-------- > 3 files changed, 32 insertions(+), 12 deletions(-) Reviewed-by: Aurelien Jarno <aurelien@aurel32.net> > diff --git a/target-arm/helper.c b/target-arm/helper.c > index 503278c..d36f0f3 100644 > --- a/target-arm/helper.c > +++ b/target-arm/helper.c > @@ -2623,9 +2623,8 @@ VFP_CONV_FIX(ul, s, float32, uint32, u) > #undef VFP_CONV_FIX > > /* Half precision conversions. */ > -float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env) > +static float32 do_fcvt_f16_to_f32(uint32_t a, CPUState *env, float_status *s) > { > - float_status *s = &env->vfp.fp_status; > int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0; > float32 r = float16_to_float32(a, ieee, s); > if (ieee) { > @@ -2634,9 +2633,8 @@ float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env) > return r; > } > > -uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUState *env) > +static uint32_t do_fcvt_f32_to_f16(float32 a, CPUState *env, float_status *s) > { > - float_status *s = &env->vfp.fp_status; > int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0; > float16 r = float32_to_float16(a, ieee, s); > if (ieee) { > @@ -2645,6 +2643,26 @@ uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUState *env) > return r; > } > > +float32 HELPER(neon_fcvt_f16_to_f32)(uint32_t a, CPUState *env) > +{ > + return do_fcvt_f16_to_f32(a, env, &env->vfp.standard_fp_status); > +} > + > +float32 HELPER(neon_fcvt_f32_to_f16)(uint32_t a, CPUState *env) > +{ > + return do_fcvt_f32_to_f16(a, env, &env->vfp.standard_fp_status); > +} > + > +float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env) > +{ > + return do_fcvt_f16_to_f32(a, env, &env->vfp.fp_status); > +} > + > +float32 HELPER(vfp_fcvt_f32_to_f16)(uint32_t a, CPUState *env) > +{ > + return do_fcvt_f32_to_f16(a, env, &env->vfp.fp_status); > +} > + > float32 HELPER(recps_f32)(float32 a, float32 b, CPUState *env) > { > float_status *s = &env->vfp.fp_status; > diff --git a/target-arm/helpers.h b/target-arm/helpers.h > index 8a2564e..40264b4 100644 > --- a/target-arm/helpers.h > +++ b/target-arm/helpers.h > @@ -129,6 +129,8 @@ DEF_HELPER_3(vfp_ultod, f64, f64, i32, env) > > DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env) > DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env) > +DEF_HELPER_2(neon_fcvt_f16_to_f32, f32, i32, env) > +DEF_HELPER_2(neon_fcvt_f32_to_f16, i32, f32, env) > > DEF_HELPER_3(recps_f32, f32, f32, f32, env) > DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env) > diff --git a/target-arm/translate.c b/target-arm/translate.c > index e4649e6..a867f55 100644 > --- a/target-arm/translate.c > +++ b/target-arm/translate.c > @@ -5495,17 +5495,17 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) > tmp = new_tmp(); > tmp2 = new_tmp(); > tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0)); > - gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); > + gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); > tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1)); > - gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env); > + gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env); > tcg_gen_shli_i32(tmp2, tmp2, 16); > tcg_gen_or_i32(tmp2, tmp2, tmp); > tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2)); > - gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); > + gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); > tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3)); > neon_store_reg(rd, 0, tmp2); > tmp2 = new_tmp(); > - gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env); > + gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env); > tcg_gen_shli_i32(tmp2, tmp2, 16); > tcg_gen_or_i32(tmp2, tmp2, tmp); > neon_store_reg(rd, 1, tmp2); > @@ -5518,17 +5518,17 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) > tmp = neon_load_reg(rm, 0); > tmp2 = neon_load_reg(rm, 1); > tcg_gen_ext16u_i32(tmp3, tmp); > - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); > + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); > tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0)); > tcg_gen_shri_i32(tmp3, tmp, 16); > - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); > + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); > tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1)); > dead_tmp(tmp); > tcg_gen_ext16u_i32(tmp3, tmp2); > - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); > + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); > tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2)); > tcg_gen_shri_i32(tmp3, tmp2, 16); > - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); > + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); > tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3)); > dead_tmp(tmp2); > dead_tmp(tmp3); > -- > 1.7.1 > > >
On 9 February 2011 16:27, Peter Maydell <peter.maydell@linaro.org> wrote: > -uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUState *env) > +static uint32_t do_fcvt_f32_to_f16(float32 a, CPUState *env, float_status *s) > +float32 HELPER(neon_fcvt_f32_to_f16)(uint32_t a, CPUState *env) > +float32 HELPER(vfp_fcvt_f32_to_f16)(uint32_t a, CPUState *env) Just noticed this accidental change due to cut-n-paste error, the _to_f16 helpers should still be returning uint32_t, not float32. -- PMM
diff --git a/target-arm/helper.c b/target-arm/helper.c index 503278c..d36f0f3 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -2623,9 +2623,8 @@ VFP_CONV_FIX(ul, s, float32, uint32, u) #undef VFP_CONV_FIX /* Half precision conversions. */ -float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env) +static float32 do_fcvt_f16_to_f32(uint32_t a, CPUState *env, float_status *s) { - float_status *s = &env->vfp.fp_status; int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0; float32 r = float16_to_float32(a, ieee, s); if (ieee) { @@ -2634,9 +2633,8 @@ float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env) return r; } -uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUState *env) +static uint32_t do_fcvt_f32_to_f16(float32 a, CPUState *env, float_status *s) { - float_status *s = &env->vfp.fp_status; int ieee = (env->vfp.xregs[ARM_VFP_FPSCR] & (1 << 26)) == 0; float16 r = float32_to_float16(a, ieee, s); if (ieee) { @@ -2645,6 +2643,26 @@ uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, CPUState *env) return r; } +float32 HELPER(neon_fcvt_f16_to_f32)(uint32_t a, CPUState *env) +{ + return do_fcvt_f16_to_f32(a, env, &env->vfp.standard_fp_status); +} + +float32 HELPER(neon_fcvt_f32_to_f16)(uint32_t a, CPUState *env) +{ + return do_fcvt_f32_to_f16(a, env, &env->vfp.standard_fp_status); +} + +float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, CPUState *env) +{ + return do_fcvt_f16_to_f32(a, env, &env->vfp.fp_status); +} + +float32 HELPER(vfp_fcvt_f32_to_f16)(uint32_t a, CPUState *env) +{ + return do_fcvt_f32_to_f16(a, env, &env->vfp.fp_status); +} + float32 HELPER(recps_f32)(float32 a, float32 b, CPUState *env) { float_status *s = &env->vfp.fp_status; diff --git a/target-arm/helpers.h b/target-arm/helpers.h index 8a2564e..40264b4 100644 --- a/target-arm/helpers.h +++ b/target-arm/helpers.h @@ -129,6 +129,8 @@ DEF_HELPER_3(vfp_ultod, f64, f64, i32, env) DEF_HELPER_2(vfp_fcvt_f16_to_f32, f32, i32, env) DEF_HELPER_2(vfp_fcvt_f32_to_f16, i32, f32, env) +DEF_HELPER_2(neon_fcvt_f16_to_f32, f32, i32, env) +DEF_HELPER_2(neon_fcvt_f32_to_f16, i32, f32, env) DEF_HELPER_3(recps_f32, f32, f32, f32, env) DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env) diff --git a/target-arm/translate.c b/target-arm/translate.c index e4649e6..a867f55 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -5495,17 +5495,17 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tmp = new_tmp(); tmp2 = new_tmp(); tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0)); - gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); + gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1)); - gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env); + gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env); tcg_gen_shli_i32(tmp2, tmp2, 16); tcg_gen_or_i32(tmp2, tmp2, tmp); tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2)); - gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); + gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3)); neon_store_reg(rd, 0, tmp2); tmp2 = new_tmp(); - gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env); + gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env); tcg_gen_shli_i32(tmp2, tmp2, 16); tcg_gen_or_i32(tmp2, tmp2, tmp); neon_store_reg(rd, 1, tmp2); @@ -5518,17 +5518,17 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tmp = neon_load_reg(rm, 0); tmp2 = neon_load_reg(rm, 1); tcg_gen_ext16u_i32(tmp3, tmp); - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0)); tcg_gen_shri_i32(tmp3, tmp, 16); - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1)); dead_tmp(tmp); tcg_gen_ext16u_i32(tmp3, tmp2); - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2)); tcg_gen_shri_i32(tmp3, tmp2, 16); - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); + gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env); tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3)); dead_tmp(tmp2); dead_tmp(tmp3);
The Neon half-precision conversion operations (VCVT.F16.F32 and VCVT.F32.F16) use ARM standard floating-point arithmetic, unlike the VFP versions (VCVTB and VCVTT). Signed-off-by: Peter Maydell <peter.maydell@linaro.org> --- target-arm/helper.c | 26 ++++++++++++++++++++++---- target-arm/helpers.h | 2 ++ target-arm/translate.c | 16 ++++++++-------- 3 files changed, 32 insertions(+), 12 deletions(-)