Message ID | 20171218172425.18200-11-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | ARM v8.1 simd + v8.3 complex insns | expand |
On 18 December 2017 at 17:24, Richard Henderson <richard.henderson@linaro.org> wrote: > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/arm/translate.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 65 insertions(+) > > diff --git a/target/arm/translate.c b/target/arm/translate.c > index 1a0b0eaced..e57844c019 100644 > --- a/target/arm/translate.c > +++ b/target/arm/translate.c > @@ -7662,6 +7662,65 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) > return 0; > } > > +/* ARMv8.3 reclaims a portion of the LDC2/STC2 coprocessor 8 space. */ > + > +static int disas_neon_insn_cp8_3same(DisasContext *s, uint32_t insn) > +{ > + void (*fn_gvec_ptr)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); > + int rd, rn, rm, rot, size, opr_sz; > + TCGv_ptr fpst; > + bool q; > + > + /* FIXME: this access check should not take precedence over UNDEF > + * for invalid encodings; we will generate incorrect syndrome information > + * for attempts to execute invalid vfp/neon encodings with FP disabled. > + */ > + if (s->fp_excp_el) { > + gen_exception_insn(s, 4, EXCP_UDEF, > + syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); > + return 0; > + } > + if (!s->vfp_enabled) { > + return 1; > + } > + if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) { > + return 1; > + } > + > + q = extract32(insn, 6, 1); > + size = extract32(insn, 20, 1); > + VFP_DREG_D(rd, insn); > + VFP_DREG_N(rn, insn); > + VFP_DREG_M(rm, insn); > + if ((rd | rn | rm) & q) { > + return 1; > + } > + > + if (extract32(insn, 21, 1)) { > + /* VCMLA */ > + rot = extract32(insn, 23, 2); > + fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah; > + } else if (extract32(insn, 23, 1)) { > + /* VCADD */ > + rot = extract32(insn, 24, 1); > + fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh; > + } else { > + /* Assuming the register fields remain, only bit 24 remains undecoded: > + * 1111_110x_0d0s_nnnn_dddd_1000_nqm0_mmmm > + */ > + return 1; > + } > + > + opr_sz = (1 + q) * 8; > + fpst = get_fpstatus_ptr(1); > + tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), > + vfp_reg_offset(1, rn), > + vfp_reg_offset(1, rm), fpst, > + opr_sz, opr_sz, rot, fn_gvec_ptr); > + tcg_temp_free_ptr(fpst); > + return 0; > +} > + > static int disas_coproc_insn(DisasContext *s, uint32_t insn) > { > int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2; > @@ -8406,6 +8465,12 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) > } > } > } > + } else if ((insn & 0x0e000f10) == 0x0c000800) { I think we should guard this with an ARM_FEATURE_V8 check, so that for pre-v8 CPUs we fall back to the usual "treat it as a coprocessor" codepath. (In theory it should work out the same either way, but specifically limiting this to v8 makes it easier to be sure that it's not changing the behaviour where it shouldn't.) > + /* ARMv8.3 neon ldc2/stc2 coprocessor 8 extension. */ > + if (disas_neon_insn_cp8_3same(s, insn)) { > + goto illegal_op; > + } This doesn't seem to line up with the Arm ARM decode. Your pattern and mask give op0 = 0x, op1 = 100, op2 = 0 and also bit 8 = 0. The ARM has 3reg-same decoded with op0 = 0x, op1 = 1x0, op2 = x (and some insns in the 3reg-same group have bit 8 == 1, like VSDOT and VUDOT.) > + return; > } else if ((insn & 0x0fe00000) == 0x0c400000) { > /* Coprocessor double register transfer. */ > ARCH(5TE); How are you proposing to do the Thumb decoding? Try to share some of the 3same-vs-2reg+scalar decode part, or just have them both do a similar kind of decode and call the disas_neon_insn_cp8_3same/cp8_index functions? thanks -- PMM
On 18 December 2017 at 17:24, Richard Henderson <richard.henderson@linaro.org> wrote: > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/arm/translate.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 65 insertions(+) > > diff --git a/target/arm/translate.c b/target/arm/translate.c > index 1a0b0eaced..e57844c019 100644 > --- a/target/arm/translate.c > +++ b/target/arm/translate.c > @@ -7662,6 +7662,65 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) > return 0; > } > > +/* ARMv8.3 reclaims a portion of the LDC2/STC2 coprocessor 8 space. */ > + > +static int disas_neon_insn_cp8_3same(DisasContext *s, uint32_t insn) > +{ > + void (*fn_gvec_ptr)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); > + int rd, rn, rm, rot, size, opr_sz; > + TCGv_ptr fpst; > + bool q; > + > + /* FIXME: this access check should not take precedence over UNDEF > + * for invalid encodings; we will generate incorrect syndrome information > + * for attempts to execute invalid vfp/neon encodings with FP disabled. > + */ (Forgot this bit before hitting send on the other email...) Unlike the sprawling disas_vfp_insn(), we're in a position to get the order of checks right here. Just move it and the vfp_enabled test a bit further down... > + if (s->fp_excp_el) { > + gen_exception_insn(s, 4, EXCP_UDEF, > + syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); > + return 0; > + } > + if (!s->vfp_enabled) { > + return 1; > + } > + if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) { > + return 1; > + } > + > + q = extract32(insn, 6, 1); > + size = extract32(insn, 20, 1); > + VFP_DREG_D(rd, insn); > + VFP_DREG_N(rn, insn); > + VFP_DREG_M(rm, insn); > + if ((rd | rn | rm) & q) { > + return 1; > + } > + > + if (extract32(insn, 21, 1)) { > + /* VCMLA */ > + rot = extract32(insn, 23, 2); > + fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah; > + } else if (extract32(insn, 23, 1)) { > + /* VCADD */ > + rot = extract32(insn, 24, 1); > + fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh; > + } else { > + /* Assuming the register fields remain, only bit 24 remains undecoded: > + * 1111_110x_0d0s_nnnn_dddd_1000_nqm0_mmmm > + */ > + return 1; > + } ...to here. > + > + opr_sz = (1 + q) * 8; > + fpst = get_fpstatus_ptr(1); > + tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), > + vfp_reg_offset(1, rn), > + vfp_reg_offset(1, rm), fpst, > + opr_sz, opr_sz, rot, fn_gvec_ptr); > + tcg_temp_free_ptr(fpst); > + return 0; > +} > + thanks -- PMM
On 01/15/2018 10:46 AM, Peter Maydell wrote: > This doesn't seem to line up with the Arm ARM decode. Your > pattern and mask give > op0 = 0x, op1 = 100, op2 = 0 and also bit 8 = 0. > The ARM has 3reg-same decoded with > op0 = 0x, op1 = 1x0, op2 = x > > (and some insns in the 3reg-same group have bit 8 == 1, like > VSDOT and VUDOT.) Ah, more v8.2 instructions that I wasn't even looking at... > How are you proposing to do the Thumb decoding? Try to share > some of the 3same-vs-2reg+scalar decode part, or just have > them both do a similar kind of decode and call the > disas_neon_insn_cp8_3same/cp8_index functions? Hmm. I thought this was working via the "translate into the equivalent ARM encoding" path. But it couldn't possibly be doing so, since disas_neon_insn_cp8_3same is not a subroutine of disas_neon_data_insn. I guess I'll have to verify that RISU is testing what I thought it was... r~
diff --git a/target/arm/translate.c b/target/arm/translate.c index 1a0b0eaced..e57844c019 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -7662,6 +7662,65 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 0; } +/* ARMv8.3 reclaims a portion of the LDC2/STC2 coprocessor 8 space. */ + +static int disas_neon_insn_cp8_3same(DisasContext *s, uint32_t insn) +{ + void (*fn_gvec_ptr)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); + int rd, rn, rm, rot, size, opr_sz; + TCGv_ptr fpst; + bool q; + + /* FIXME: this access check should not take precedence over UNDEF + * for invalid encodings; we will generate incorrect syndrome information + * for attempts to execute invalid vfp/neon encodings with FP disabled. + */ + if (s->fp_excp_el) { + gen_exception_insn(s, 4, EXCP_UDEF, + syn_fp_access_trap(1, 0xe, false), s->fp_excp_el); + return 0; + } + if (!s->vfp_enabled) { + return 1; + } + if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) { + return 1; + } + + q = extract32(insn, 6, 1); + size = extract32(insn, 20, 1); + VFP_DREG_D(rd, insn); + VFP_DREG_N(rn, insn); + VFP_DREG_M(rm, insn); + if ((rd | rn | rm) & q) { + return 1; + } + + if (extract32(insn, 21, 1)) { + /* VCMLA */ + rot = extract32(insn, 23, 2); + fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah; + } else if (extract32(insn, 23, 1)) { + /* VCADD */ + rot = extract32(insn, 24, 1); + fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh; + } else { + /* Assuming the register fields remain, only bit 24 remains undecoded: + * 1111_110x_0d0s_nnnn_dddd_1000_nqm0_mmmm + */ + return 1; + } + + opr_sz = (1 + q) * 8; + fpst = get_fpstatus_ptr(1); + tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), + vfp_reg_offset(1, rn), + vfp_reg_offset(1, rm), fpst, + opr_sz, opr_sz, rot, fn_gvec_ptr); + tcg_temp_free_ptr(fpst); + return 0; +} + static int disas_coproc_insn(DisasContext *s, uint32_t insn) { int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2; @@ -8406,6 +8465,12 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) } } } + } else if ((insn & 0x0e000f10) == 0x0c000800) { + /* ARMv8.3 neon ldc2/stc2 coprocessor 8 extension. */ + if (disas_neon_insn_cp8_3same(s, insn)) { + goto illegal_op; + } + return; } else if ((insn & 0x0fe00000) == 0x0c400000) { /* Coprocessor double register transfer. */ ARCH(5TE);
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/translate.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) -- 2.14.3