Message ID | 1390908155-23475-11-git-send-email-will.newton@linaro.org |
---|---|
State | Superseded |
Headers | show |
On 28 January 2014 11:22, Will Newton <will.newton@linaro.org> wrote: > Add support for the AArch32 floating-point half-precision to double- > precision conversion VCVTB and VCVTT instructions. > > Signed-off-by: Will Newton <will.newton@linaro.org> > --- > target-arm/translate.c | 62 ++++++++++++++++++++++++++++++++++++++------------ > 1 file changed, 48 insertions(+), 14 deletions(-) > > diff --git a/target-arm/translate.c b/target-arm/translate.c > index e701c0f..dfda2c4 100644 > --- a/target-arm/translate.c > +++ b/target-arm/translate.c > @@ -3142,14 +3142,16 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) > VFP_DREG_N(rn, insn); > } > > - if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18))) { > + if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) || > + ((rn & 0x1e) == 0x6))) { > /* Integer or single precision destination. */ > rd = VFP_SREG_D(insn); You should update the comment here, since the destination is halfprec in the case you've added. > } else { > VFP_DREG_D(rd, insn); > } > if (op == 15 && > - (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14))) { > + (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) || > + ((rn & 0x1e) == 0x4))) { > /* VCVT from int is always from S reg regardless of dp bit. > * VCVT with immediate frac_bits has same format as SREG_M > */ Again, the comment needs updating since you've added an extra case. > @@ -3241,12 +3243,19 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) > case 5: > case 6: > case 7: > - /* VCVTB, VCVTT: only present with the halfprec extension, > - * UNPREDICTABLE if bit 8 is set (we choose to UNDEF) > + /* VCVTB, VCVTT: only present with the halfprec extension > + * UNPREDICTABLE if bit 8 is set prior to ARMv8 > + * (we choose to UNDEF) > */ > - if (dp || !arm_feature(env, ARM_FEATURE_VFP_FP16)) { > + if ((dp && !arm_feature(env, ARM_FEATURE_V8)) || > + !arm_feature(env, ARM_FEATURE_VFP_FP16)) { > return 1; > } > + if ((rn & 0x1e) == 0x4) { > + /* Single precision source */ > + gen_mov_F0_vreg(0, rm); > + break; > + } This is confusing, because actually you're using this to catch the case of a half-precision source. I think it will be clearer to say: if (!extract32(rn, 1, 1)) { /* Half precision source */ gen_mov_F0_vreg(0, rm); break; } which then catches the half-prec sources for both to-single and to-double, and leaves just the "source is as per dp flag" cases to fall through. (We didn't need to catch halfprec sources separately when we only had 32<->16 since they both get handled similarly, but now we have the possibility of dp!=0 it's clearer to handle them explicitly. > /* Otherwise fall through */ > default: > /* One source operand. */ > @@ -3394,21 +3403,39 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) > case 3: /* sqrt */ > gen_vfp_sqrt(dp); > break; > - case 4: /* vcvtb.f32.f16 */ > + case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */ > tmp = gen_vfp_mrs(); > tcg_gen_ext16u_i32(tmp, tmp); > - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env); > + if (dp) { > + gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp, > + cpu_env); > + } else { > + gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, > + cpu_env); > + } > tcg_temp_free_i32(tmp); > break; > - case 5: /* vcvtt.f32.f16 */ > + case 5: /* vcvtt.f32.f16, vcvtt.f64.f16 */ > tmp = gen_vfp_mrs(); > tcg_gen_shri_i32(tmp, tmp, 16); > - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env); > + if (dp) { > + gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp, > + cpu_env); > + } else { > + gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, > + cpu_env); > + } > tcg_temp_free_i32(tmp); > break; > - case 6: /* vcvtb.f16.f32 */ > + case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */ > tmp = tcg_temp_new_i32(); > - gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); > + if (dp) { > + gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d, > + cpu_env); > + } else { > + gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, > + cpu_env); > + } > gen_mov_F0_vreg(0, rd); > tmp2 = gen_vfp_mrs(); > tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000); > @@ -3416,9 +3443,15 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) > tcg_temp_free_i32(tmp2); > gen_vfp_msr(tmp); > break; > - case 7: /* vcvtt.f16.f32 */ > + case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */ > tmp = tcg_temp_new_i32(); > - gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); > + if (dp) { > + gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d, > + cpu_env); > + } else { > + gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, > + cpu_env); > + } > tcg_gen_shli_i32(tmp, tmp, 16); > gen_mov_F0_vreg(0, rd); > tmp2 = gen_vfp_mrs(); > @@ -3553,7 +3586,8 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) > /* Write back the result. */ > if (op == 15 && (rn >= 8 && rn <= 11)) > ; /* Comparison, do nothing. */ > - else if (op == 15 && dp && ((rn & 0x1c) == 0x18)) > + else if (op == 15 && dp && ((rn & 0x1c) == 0x18 || > + (rn & 0x1e) == 0x6)) > /* VCVT double to int: always integer result. */ > gen_mov_vreg_F0(0, rd); > else if (op == 15 && rn == 15) Again, you need to update the comment because you've added a case. Incidentally, my testing of this patch revealed a bug in the softfloat float<->float conversion functions: if the input is a NaN with the sign bit set and the top N bits of the mantissa clear (where N is the number of mantissa bits in the destination fp format), then we will incorrectly return a NaN with sign bit clear and topmost mantissa bit only set, whereas the ARM ARM demands that the signbit be the same as the input. This is because the softfloat code for handling "what does an input NaN get turned into for float/float conversion" (ie the float*ToCommonNaN and commonNaNToFloat* functions) don't do what the ARM ARM specifies; they should probably be specifiable by the target architecture. At the moment we try to work around this by always calling float*_maybe_silence_nan() after the conversion, but this turns out to be not sufficient to fix up the semantics... thanks -- PMM
diff --git a/target-arm/translate.c b/target-arm/translate.c index e701c0f..dfda2c4 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -3142,14 +3142,16 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) VFP_DREG_N(rn, insn); } - if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18))) { + if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) || + ((rn & 0x1e) == 0x6))) { /* Integer or single precision destination. */ rd = VFP_SREG_D(insn); } else { VFP_DREG_D(rd, insn); } if (op == 15 && - (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14))) { + (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) || + ((rn & 0x1e) == 0x4))) { /* VCVT from int is always from S reg regardless of dp bit. * VCVT with immediate frac_bits has same format as SREG_M */ @@ -3241,12 +3243,19 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) case 5: case 6: case 7: - /* VCVTB, VCVTT: only present with the halfprec extension, - * UNPREDICTABLE if bit 8 is set (we choose to UNDEF) + /* VCVTB, VCVTT: only present with the halfprec extension + * UNPREDICTABLE if bit 8 is set prior to ARMv8 + * (we choose to UNDEF) */ - if (dp || !arm_feature(env, ARM_FEATURE_VFP_FP16)) { + if ((dp && !arm_feature(env, ARM_FEATURE_V8)) || + !arm_feature(env, ARM_FEATURE_VFP_FP16)) { return 1; } + if ((rn & 0x1e) == 0x4) { + /* Single precision source */ + gen_mov_F0_vreg(0, rm); + break; + } /* Otherwise fall through */ default: /* One source operand. */ @@ -3394,21 +3403,39 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) case 3: /* sqrt */ gen_vfp_sqrt(dp); break; - case 4: /* vcvtb.f32.f16 */ + case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */ tmp = gen_vfp_mrs(); tcg_gen_ext16u_i32(tmp, tmp); - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env); + if (dp) { + gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp, + cpu_env); + } else { + gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, + cpu_env); + } tcg_temp_free_i32(tmp); break; - case 5: /* vcvtt.f32.f16 */ + case 5: /* vcvtt.f32.f16, vcvtt.f64.f16 */ tmp = gen_vfp_mrs(); tcg_gen_shri_i32(tmp, tmp, 16); - gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env); + if (dp) { + gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp, + cpu_env); + } else { + gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, + cpu_env); + } tcg_temp_free_i32(tmp); break; - case 6: /* vcvtb.f16.f32 */ + case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */ tmp = tcg_temp_new_i32(); - gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); + if (dp) { + gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d, + cpu_env); + } else { + gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, + cpu_env); + } gen_mov_F0_vreg(0, rd); tmp2 = gen_vfp_mrs(); tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000); @@ -3416,9 +3443,15 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) tcg_temp_free_i32(tmp2); gen_vfp_msr(tmp); break; - case 7: /* vcvtt.f16.f32 */ + case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */ tmp = tcg_temp_new_i32(); - gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env); + if (dp) { + gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d, + cpu_env); + } else { + gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, + cpu_env); + } tcg_gen_shli_i32(tmp, tmp, 16); gen_mov_F0_vreg(0, rd); tmp2 = gen_vfp_mrs(); @@ -3553,7 +3586,8 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) /* Write back the result. */ if (op == 15 && (rn >= 8 && rn <= 11)) ; /* Comparison, do nothing. */ - else if (op == 15 && dp && ((rn & 0x1c) == 0x18)) + else if (op == 15 && dp && ((rn & 0x1c) == 0x18 || + (rn & 0x1e) == 0x6)) /* VCVT double to int: always integer result. */ gen_mov_vreg_F0(0, rd); else if (op == 15 && rn == 15)
Add support for the AArch32 floating-point half-precision to double- precision conversion VCVTB and VCVTT instructions. Signed-off-by: Will Newton <will.newton@linaro.org> --- target-arm/translate.c | 62 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 14 deletions(-)