Message ID | 524D6838.5080607@linaro.org |
---|---|
State | Superseded |
Headers | show |
On 3 October 2013 21:51, Will Newton <will.newton@linaro.org> wrote: > > This adds support for the VSEL floating point selection instruction > which was added in ARMv8. It is based on the previous patch[1] from > Mans Rullgard, but attempts to address the feedback given on that patch. > > [1] http://lists.nongnu.org/archive/html/qemu-devel/2013-06/msg03117.html This sort of commentary about previous patch versions should go below the '---', not in the commit message. > > Signed-off-by: Will Newton <will.newton@linaro.org> > --- > target-arm/translate.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 105 insertions(+) > > Changes in v2: > - Integrate vsel decoding into disas_vfp_insn > > diff --git a/target-arm/translate.c b/target-arm/translate.c > index 998bde2..5e49334 100644 > --- a/target-arm/translate.c > +++ b/target-arm/translate.c > @@ -2880,6 +2880,98 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) > rm = VFP_SREG_M(insn); > } > > + if ((insn & 0x0f800e50) == 0x0e000a00) { > + /* vsel */ > + uint32_t cc = (insn >> 20) & 3; > + TCGv_i32 tmp, zero; > + > + /* ARMv8 VFP. */ > + if (!arm_feature(env, ARM_FEATURE_V8)) > + return 1; scripts/checkpatch.pl will tell you that omitting the braces is a coding style violation. > + > + zero = tcg_const_tl(0); > + > + if (dp) { > + TCGv_i64 ftmp1, ftmp2, ftmp3; > + > + ftmp1 = tcg_temp_new_i64(); > + ftmp2 = tcg_temp_new_i64(); > + ftmp3 = tcg_temp_new_i64(); > + tcg_gen_ld_f64(ftmp1, cpu_env, vfp_reg_offset(dp, rn)); > + tcg_gen_ld_f64(ftmp2, cpu_env, vfp_reg_offset(dp, rm)); > + switch (cc) { > + case 0: /* eq: Z */ > + tcg_gen_movcond_i64(TCG_COND_EQ, ftmp3, cpu_ZF, zero, > + ftmp1, ftmp2); > + break; > + case 1: /* vs: V */ > + tcg_gen_movcond_i64(TCG_COND_LT, ftmp3, cpu_VF, zero, > + ftmp1, ftmp2); > + break; > + case 2: /* ge: N == V -> N ^ V == 0 */ > + tmp = tcg_temp_new_i32(); > + tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); > + tcg_gen_movcond_i64(TCG_COND_GE, ftmp3, tmp, zero, > + ftmp1, ftmp2); > + tcg_temp_free_i32(tmp); > + break; > + case 3: /* gt: !Z && N == V */ > + tcg_gen_movcond_i64(TCG_COND_NE, ftmp3, cpu_ZF, zero, > + ftmp1, ftmp2); > + tmp = tcg_temp_new_i32(); > + tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); > + tcg_gen_movcond_i64(TCG_COND_GE, ftmp3, tmp, zero, > + ftmp3, ftmp2); > + tcg_temp_free_i32(tmp); > + break; > + } > + tcg_gen_st_f64(ftmp3, cpu_env, vfp_reg_offset(dp, rd)); > + tcg_temp_free_i64(ftmp1); > + tcg_temp_free_i64(ftmp2); > + tcg_temp_free_i64(ftmp3); > + } else { > + TCGv_i32 ftmp1, ftmp2, ftmp3; > + > + ftmp1 = tcg_temp_new_i32(); > + ftmp2 = tcg_temp_new_i32(); > + ftmp3 = tcg_temp_new_i32(); > + tcg_gen_ld_f32(ftmp1, cpu_env, vfp_reg_offset(dp, rn)); > + tcg_gen_ld_f32(ftmp2, cpu_env, vfp_reg_offset(dp, rm)); > + switch (cc) { > + case 0: /* eq: Z */ > + tcg_gen_movcond_i32(TCG_COND_EQ, ftmp3, cpu_ZF, zero, > + ftmp1, ftmp2); > + break; > + case 1: /* vs: V */ > + tcg_gen_movcond_i32(TCG_COND_LT, ftmp3, cpu_VF, zero, > + ftmp1, ftmp2); > + break; > + case 2: /* ge: N == V -> N ^ V == 0 */ > + tmp = tcg_temp_new_i32(); > + tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); > + tcg_gen_movcond_i32(TCG_COND_GE, ftmp3, tmp, zero, > + ftmp1, ftmp2); > + tcg_temp_free_i32(tmp); > + break; > + case 3: /* gt: !Z && N == V */ > + tcg_gen_movcond_i32(TCG_COND_NE, ftmp3, cpu_ZF, zero, > + ftmp1, ftmp2); > + tmp = tcg_temp_new_i32(); > + tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); > + tcg_gen_movcond_i32(TCG_COND_GE, ftmp3, tmp, zero, > + ftmp3, ftmp2); > + tcg_temp_free_i32(tmp); > + break; > + } > + tcg_gen_st_f32(ftmp3, cpu_env, vfp_reg_offset(dp, rd)); > + tcg_temp_free_i32(ftmp1); > + tcg_temp_free_i32(ftmp2); > + tcg_temp_free_i32(ftmp3); > + } > + > + return 0; > + } > + > veclen = s->vec_len; > if (op == 15 && rn > 3) > veclen = 0; > @@ -6756,6 +6848,13 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s) > goto illegal_op; > return; > } > + if ((insn & 0x0f800e50) == 0x0e000a00) { > + /* ARMv8 VFP. */ > + ARCH(8); > + > + if (disas_vfp_insn(env, s, insn)) > + goto illegal_op; > + } This isn't what I meant. If our decoding matches up with the ARM ARM then this instruction pattern should already fall into disas_vfp_insn(), and we shouldn't need an extra check and call. (If it's not correct then we should adjust our decode so it does.) thanks -- PMM
On 3 October 2013 13:59, Peter Maydell <peter.maydell@linaro.org> wrote: > On 3 October 2013 21:51, Will Newton <will.newton@linaro.org> wrote: >> >> This adds support for the VSEL floating point selection instruction >> which was added in ARMv8. It is based on the previous patch[1] from >> Mans Rullgard, but attempts to address the feedback given on that patch. >> >> [1] http://lists.nongnu.org/archive/html/qemu-devel/2013-06/msg03117.html > > This sort of commentary about previous patch versions should go below > the '---', not in the commit message. > >> >> Signed-off-by: Will Newton <will.newton@linaro.org> >> --- >> target-arm/translate.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 105 insertions(+) >> >> Changes in v2: >> - Integrate vsel decoding into disas_vfp_insn >> >> diff --git a/target-arm/translate.c b/target-arm/translate.c >> index 998bde2..5e49334 100644 >> --- a/target-arm/translate.c >> +++ b/target-arm/translate.c >> @@ -2880,6 +2880,98 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) >> rm = VFP_SREG_M(insn); >> } >> >> + if ((insn & 0x0f800e50) == 0x0e000a00) { >> + /* vsel */ >> + uint32_t cc = (insn >> 20) & 3; >> + TCGv_i32 tmp, zero; >> + >> + /* ARMv8 VFP. */ >> + if (!arm_feature(env, ARM_FEATURE_V8)) >> + return 1; > > scripts/checkpatch.pl will tell you that omitting the braces > is a coding style violation. Ok, I'll fix that. >> + >> + zero = tcg_const_tl(0); >> + >> + if (dp) { >> + TCGv_i64 ftmp1, ftmp2, ftmp3; >> + >> + ftmp1 = tcg_temp_new_i64(); >> + ftmp2 = tcg_temp_new_i64(); >> + ftmp3 = tcg_temp_new_i64(); >> + tcg_gen_ld_f64(ftmp1, cpu_env, vfp_reg_offset(dp, rn)); >> + tcg_gen_ld_f64(ftmp2, cpu_env, vfp_reg_offset(dp, rm)); >> + switch (cc) { >> + case 0: /* eq: Z */ >> + tcg_gen_movcond_i64(TCG_COND_EQ, ftmp3, cpu_ZF, zero, >> + ftmp1, ftmp2); >> + break; >> + case 1: /* vs: V */ >> + tcg_gen_movcond_i64(TCG_COND_LT, ftmp3, cpu_VF, zero, >> + ftmp1, ftmp2); >> + break; >> + case 2: /* ge: N == V -> N ^ V == 0 */ >> + tmp = tcg_temp_new_i32(); >> + tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); >> + tcg_gen_movcond_i64(TCG_COND_GE, ftmp3, tmp, zero, >> + ftmp1, ftmp2); >> + tcg_temp_free_i32(tmp); >> + break; >> + case 3: /* gt: !Z && N == V */ >> + tcg_gen_movcond_i64(TCG_COND_NE, ftmp3, cpu_ZF, zero, >> + ftmp1, ftmp2); >> + tmp = tcg_temp_new_i32(); >> + tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); >> + tcg_gen_movcond_i64(TCG_COND_GE, ftmp3, tmp, zero, >> + ftmp3, ftmp2); >> + tcg_temp_free_i32(tmp); >> + break; >> + } >> + tcg_gen_st_f64(ftmp3, cpu_env, vfp_reg_offset(dp, rd)); >> + tcg_temp_free_i64(ftmp1); >> + tcg_temp_free_i64(ftmp2); >> + tcg_temp_free_i64(ftmp3); >> + } else { >> + TCGv_i32 ftmp1, ftmp2, ftmp3; >> + >> + ftmp1 = tcg_temp_new_i32(); >> + ftmp2 = tcg_temp_new_i32(); >> + ftmp3 = tcg_temp_new_i32(); >> + tcg_gen_ld_f32(ftmp1, cpu_env, vfp_reg_offset(dp, rn)); >> + tcg_gen_ld_f32(ftmp2, cpu_env, vfp_reg_offset(dp, rm)); >> + switch (cc) { >> + case 0: /* eq: Z */ >> + tcg_gen_movcond_i32(TCG_COND_EQ, ftmp3, cpu_ZF, zero, >> + ftmp1, ftmp2); >> + break; >> + case 1: /* vs: V */ >> + tcg_gen_movcond_i32(TCG_COND_LT, ftmp3, cpu_VF, zero, >> + ftmp1, ftmp2); >> + break; >> + case 2: /* ge: N == V -> N ^ V == 0 */ >> + tmp = tcg_temp_new_i32(); >> + tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); >> + tcg_gen_movcond_i32(TCG_COND_GE, ftmp3, tmp, zero, >> + ftmp1, ftmp2); >> + tcg_temp_free_i32(tmp); >> + break; >> + case 3: /* gt: !Z && N == V */ >> + tcg_gen_movcond_i32(TCG_COND_NE, ftmp3, cpu_ZF, zero, >> + ftmp1, ftmp2); >> + tmp = tcg_temp_new_i32(); >> + tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); >> + tcg_gen_movcond_i32(TCG_COND_GE, ftmp3, tmp, zero, >> + ftmp3, ftmp2); >> + tcg_temp_free_i32(tmp); >> + break; >> + } >> + tcg_gen_st_f32(ftmp3, cpu_env, vfp_reg_offset(dp, rd)); >> + tcg_temp_free_i32(ftmp1); >> + tcg_temp_free_i32(ftmp2); >> + tcg_temp_free_i32(ftmp3); >> + } >> + >> + return 0; >> + } >> + >> veclen = s->vec_len; >> if (op == 15 && rn > 3) >> veclen = 0; >> @@ -6756,6 +6848,13 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s) >> goto illegal_op; >> return; >> } >> + if ((insn & 0x0f800e50) == 0x0e000a00) { >> + /* ARMv8 VFP. */ >> + ARCH(8); >> + >> + if (disas_vfp_insn(env, s, insn)) >> + goto illegal_op; >> + } > > This isn't what I meant. If our decoding matches up with the ARM ARM > then this instruction pattern should already fall into disas_vfp_insn(), > and we shouldn't need an extra check and call. (If it's not correct then > we should adjust our decode so it does.) I'll respin the patch pulling the calls to disas_vfp_insn up a level which I think you alluded to in the original review. It still needs an additional call to disas_vfp_insn in the ARM case as condition code == 0xf is dealt with separately from the others. Let me know if this is not what you were looking for. Thanks,
On 10/03/2013 05:51 AM, Will Newton wrote: > + case 0: /* eq: Z */ > + tcg_gen_movcond_i64(TCG_COND_EQ, ftmp3, cpu_ZF, zero, > + ftmp1, ftmp2); > + break; Does this compile when configured with --enable-debug? It shouldn't, since movcond_i64 takes 5 _i64 variables, and your comparison variables are _i32. r~
On 3 October 2013 23:31, Will Newton <will.newton@linaro.org> wrote: > On 3 October 2013 13:59, Peter Maydell <peter.maydell@linaro.org> wrote: >> This isn't what I meant. If our decoding matches up with the ARM ARM >> then this instruction pattern should already fall into disas_vfp_insn(), >> and we shouldn't need an extra check and call. (If it's not correct then >> we should adjust our decode so it does.) > > I'll respin the patch pulling the calls to disas_vfp_insn up a level > which I think you alluded to in the original review. It still needs an > additional call to disas_vfp_insn in the ARM case as condition code == > 0xf is dealt with separately from the others. Let me know if this is > not what you were looking for. Ah, that means the ARM ARM table is incorrect, because it implies that VSEL is conditional (which it definitely isn't). I need to look at where the new insns are in the T32/A32 encodings in more detail, then, which I don't have time for just at the moment. Pulling the disas_vfp_insn calls out of disas_coproc is a good idea anyway, though (it should be a separate patch to the one which adds VSEL). -- PMM
On 3 October 2013 15:34, Richard Henderson <rth@twiddle.net> wrote: > On 10/03/2013 05:51 AM, Will Newton wrote: >> + case 0: /* eq: Z */ >> + tcg_gen_movcond_i64(TCG_COND_EQ, ftmp3, cpu_ZF, zero, >> + ftmp1, ftmp2); >> + break; > > Does this compile when configured with --enable-debug? > > It shouldn't, since movcond_i64 takes 5 _i64 variables, > and your comparison variables are _i32. No, thanks for picking that up. I was wondering if that was valid and the code seemed to work. What's the best way to work around the problem? Just extend everything up to 64bits?
On 10/03/2013 08:10 AM, Will Newton wrote: > No, thanks for picking that up. I was wondering if that was valid and > the code seemed to work. What's the best way to work around the > problem? Just extend everything up to 64bits? For the simple conditions, yes. For the more complex ones, you might want to do the computation in 32-bit and extend the result. r~
On 10/03/2013 10:28 AM, Richard Henderson wrote: > For the simple conditions, yes. For the more complex ones, > you might want to do the computation in 32-bit and extend > the result. Alternately, compute the condition with setcond_i32 and only extend that result to 64 bits. That means doing something different with GT with its compound. Maybe xor_i32 tmp, vf, nf setcond_i32 tmp, tmp, zero, ge movcond_i32 tmp, zf, zero, tmp, zero, ne (tmp = z ? tmp : 0) r~
On 3 October 2013 15:37, Peter Maydell <peter.maydell@linaro.org> wrote: > Ah, that means the ARM ARM table is incorrect, because it implies > that VSEL is conditional (which it definitely isn't). I need to look > at where the new insns are in the T32/A32 encodings in more > detail, then, which I don't have time for just at the moment. Yes, these are in what would be the CDP2 space in both T32 and A32. So, quick sketch of what I think we should do: * move the disas_vfp_insn() calls outside disas_coproc_insn() (and in the thumb decode case, to before the "if bit 28 set then goto illegal_op" check) (basically what you have in this patch is fine) * add a call to disas_vfp_insn() in the unconditional code (what you have there in this patch is fine, but remember that QEMU coding style mandates braces; use scripts/checkpatch.pl.) * in disas_vfp_insn(), just after the "is vfp disabled?" check, add: if (extract32(insn, 28, 4) == 0xf) { /* Encodings with T=1 (Thumb) or unconditional (ARM): * only used in v8 and above */ return 1; } That all goes into patch 1 of 2, which is just doing refactoring and makes no changes in behaviour. * then in patch 2 of the series, actually add the VSEL support, by replacing that 'return 1' with 'return disas_vfp_v8_insn(env, s, insn);' and implementing that function with the VSEL support. [It seems better to me to have this separate rather than fully integrated into the existing logic of disas_vfp_insn because we know that no new insn is ever going to use the legacy/deprecated vfp vector support. And the function is already 800 lines long...] thanks -- PMM
diff --git a/target-arm/translate.c b/target-arm/translate.c index 998bde2..5e49334 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -2880,6 +2880,98 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn) rm = VFP_SREG_M(insn); } + if ((insn & 0x0f800e50) == 0x0e000a00) { + /* vsel */ + uint32_t cc = (insn >> 20) & 3; + TCGv_i32 tmp, zero; + + /* ARMv8 VFP. */ + if (!arm_feature(env, ARM_FEATURE_V8)) + return 1; + + zero = tcg_const_tl(0); + + if (dp) { + TCGv_i64 ftmp1, ftmp2, ftmp3; + + ftmp1 = tcg_temp_new_i64(); + ftmp2 = tcg_temp_new_i64(); + ftmp3 = tcg_temp_new_i64(); + tcg_gen_ld_f64(ftmp1, cpu_env, vfp_reg_offset(dp, rn)); + tcg_gen_ld_f64(ftmp2, cpu_env, vfp_reg_offset(dp, rm)); + switch (cc) { + case 0: /* eq: Z */ + tcg_gen_movcond_i64(TCG_COND_EQ, ftmp3, cpu_ZF, zero, + ftmp1, ftmp2); + break; + case 1: /* vs: V */ + tcg_gen_movcond_i64(TCG_COND_LT, ftmp3, cpu_VF, zero, + ftmp1, ftmp2); + break; + case 2: /* ge: N == V -> N ^ V == 0 */ + tmp = tcg_temp_new_i32(); + tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); + tcg_gen_movcond_i64(TCG_COND_GE, ftmp3, tmp, zero, + ftmp1, ftmp2); + tcg_temp_free_i32(tmp); + break; + case 3: /* gt: !Z && N == V */ + tcg_gen_movcond_i64(TCG_COND_NE, ftmp3, cpu_ZF, zero, + ftmp1, ftmp2); + tmp = tcg_temp_new_i32(); + tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); + tcg_gen_movcond_i64(TCG_COND_GE, ftmp3, tmp, zero, + ftmp3, ftmp2); + tcg_temp_free_i32(tmp); + break; + } + tcg_gen_st_f64(ftmp3, cpu_env, vfp_reg_offset(dp, rd)); + tcg_temp_free_i64(ftmp1); + tcg_temp_free_i64(ftmp2); + tcg_temp_free_i64(ftmp3); + } else { + TCGv_i32 ftmp1, ftmp2, ftmp3; + + ftmp1 = tcg_temp_new_i32(); + ftmp2 = tcg_temp_new_i32(); + ftmp3 = tcg_temp_new_i32(); + tcg_gen_ld_f32(ftmp1, cpu_env, vfp_reg_offset(dp, rn)); + tcg_gen_ld_f32(ftmp2, cpu_env, vfp_reg_offset(dp, rm)); + switch (cc) { + case 0: /* eq: Z */ + tcg_gen_movcond_i32(TCG_COND_EQ, ftmp3, cpu_ZF, zero, + ftmp1, ftmp2); + break; + case 1: /* vs: V */ + tcg_gen_movcond_i32(TCG_COND_LT, ftmp3, cpu_VF, zero, + ftmp1, ftmp2); + break; + case 2: /* ge: N == V -> N ^ V == 0 */ + tmp = tcg_temp_new_i32(); + tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); + tcg_gen_movcond_i32(TCG_COND_GE, ftmp3, tmp, zero, + ftmp1, ftmp2); + tcg_temp_free_i32(tmp); + break; + case 3: /* gt: !Z && N == V */ + tcg_gen_movcond_i32(TCG_COND_NE, ftmp3, cpu_ZF, zero, + ftmp1, ftmp2); + tmp = tcg_temp_new_i32(); + tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); + tcg_gen_movcond_i32(TCG_COND_GE, ftmp3, tmp, zero, + ftmp3, ftmp2); + tcg_temp_free_i32(tmp); + break; + } + tcg_gen_st_f32(ftmp3, cpu_env, vfp_reg_offset(dp, rd)); + tcg_temp_free_i32(ftmp1); + tcg_temp_free_i32(ftmp2); + tcg_temp_free_i32(ftmp3); + } + + return 0; + } + veclen = s->vec_len; if (op == 15 && rn > 3) veclen = 0; @@ -6756,6 +6848,13 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s) goto illegal_op; return; } + if ((insn & 0x0f800e50) == 0x0e000a00) { + /* ARMv8 VFP. */ + ARCH(8); + + if (disas_vfp_insn(env, s, insn)) + goto illegal_op; + } if (((insn & 0x0f30f000) == 0x0510f000) || ((insn & 0x0f30f010) == 0x0710f000)) { if ((insn & (1 << 22)) == 0) { @@ -8768,6 +8867,12 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28); if (disas_neon_data_insn(env, s, insn)) goto illegal_op; + } else if ((insn & 0x0f800e50) == 0x0e000a00) { + /* ARMv8 VFP. */ + ARCH(8); + + if (disas_vfp_insn(env, s, insn)) + goto illegal_op; } else { if (insn & (1 << 28)) goto illegal_op;
This adds support for the VSEL floating point selection instruction which was added in ARMv8. It is based on the previous patch[1] from Mans Rullgard, but attempts to address the feedback given on that patch. [1] http://lists.nongnu.org/archive/html/qemu-devel/2013-06/msg03117.html Signed-off-by: Will Newton <will.newton@linaro.org> --- target-arm/translate.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) Changes in v2: - Integrate vsel decoding into disas_vfp_insn