[v2,10/11] target/arm: Decode aa32 armv8.3 3-same

Message ID	20171218172425.18200-11-richard.henderson@linaro.org
State	New
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of qemu-devel-bounces+patch=linaro.org@nongnu.org designates 2001:4830:134:3::11 as permitted sender) client-ip=2001:4830:134:3::11; From: Richard Henderson <richard.henderson@linaro.org> To: qemu-devel@nongnu.org Date: Mon, 18 Dec 2017 09:24:24 -0800 Message-Id: <20171218172425.18200-11-richard.henderson@linaro.org> In-Reply-To: <20171218172425.18200-1-richard.henderson@linaro.org> References: <20171218172425.18200-1-richard.henderson@linaro.org> Subject: [Qemu-devel] [PATCH v2 10/11] target/arm: Decode aa32 armv8.3 3-same Precedence: list Cc: peter.maydell@linaro.org, qemu-arm@nongnu.org Errors-To: qemu-devel-bounces+patch=linaro.org@nongnu.org Sender: "Qemu-devel" <qemu-devel-bounces+patch=linaro.org@nongnu.org>
Series	ARM v8.1 simd + v8.3 complex insns \| expand [v2,00/11] ARM v8.1 simd + v8.3 complex insns [v2,01/11] target/arm: Add ARM_FEATURE_V8_1_SIMD [v2,02/11] target/arm: Decode aa64 armv8.1 scalar three same extra [v2,03/11] target/arm: Decode aa64 armv8.1 three same extra [v2,04/11] target/arm: Decode aa64 armv8.1 scalar/vector x indexed element [v2,05/11] target/arm: Decode aa32 armv8.1 three same [v2,06/11] target/arm: Decode aa32 armv8.1 two reg and a scalar [v2,07/11] target/arm: Add ARM_FEATURE_V8_FCMA [v2,08/11] target/arm: Decode aa64 armv8.3 fcadd [v2,09/11] target/arm: Decode aa64 armv8.3 fcmla [v2,10/11] target/arm: Decode aa32 armv8.3 3-same [v2,11/11] target/arm: Decode aa32 armv8.3 2-reg-index

Message ID

20171218172425.18200-11-richard.henderson@linaro.org

State

New

Headers

Received-SPF: pass (google.com: domain of
	qemu-devel-bounces+patch=linaro.org@nongnu.org designates
	2001:4830:134:3::11 as permitted sender)
	client-ip=2001:4830:134:3::11; 
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Date: Mon, 18 Dec 2017 09:24:24 -0800
Message-Id: <20171218172425.18200-11-richard.henderson@linaro.org>
In-Reply-To: <20171218172425.18200-1-richard.henderson@linaro.org>
References: <20171218172425.18200-1-richard.henderson@linaro.org>
Subject: [Qemu-devel] [PATCH v2 10/11] target/arm: Decode aa32 armv8.3 3-same
Precedence: list
Cc: peter.maydell@linaro.org, qemu-arm@nongnu.org
Errors-To: qemu-devel-bounces+patch=linaro.org@nongnu.org
Sender: "Qemu-devel" <qemu-devel-bounces+patch=linaro.org@nongnu.org>

Series

ARM v8.1 simd + v8.3 complex insns | expand

Comments

Peter Maydell Jan. 15, 2018, 6:46 p.m. UTC | #1

On 18 December 2017 at 17:24, Richard Henderson
<richard.henderson@linaro.org> wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  target/arm/translate.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++

>  1 file changed, 65 insertions(+)

>

> diff --git a/target/arm/translate.c b/target/arm/translate.c

> index 1a0b0eaced..e57844c019 100644

> --- a/target/arm/translate.c

> +++ b/target/arm/translate.c

> @@ -7662,6 +7662,65 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)

>      return 0;

>  }

>

> +/* ARMv8.3 reclaims a portion of the LDC2/STC2 coprocessor 8 space.  */

> +

> +static int disas_neon_insn_cp8_3same(DisasContext *s, uint32_t insn)

> +{

> +    void (*fn_gvec_ptr)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);

> +    int rd, rn, rm, rot, size, opr_sz;

> +    TCGv_ptr fpst;

> +    bool q;

> +

> +    /* FIXME: this access check should not take precedence over UNDEF

> +     * for invalid encodings; we will generate incorrect syndrome information

> +     * for attempts to execute invalid vfp/neon encodings with FP disabled.

> +     */

> +    if (s->fp_excp_el) {

> +        gen_exception_insn(s, 4, EXCP_UDEF,

> +                           syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);

> +        return 0;

> +    }

> +    if (!s->vfp_enabled) {

> +        return 1;

> +    }

> +    if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) {

> +        return 1;

> +    }

> +

> +    q = extract32(insn, 6, 1);

> +    size = extract32(insn, 20, 1);

> +    VFP_DREG_D(rd, insn);

> +    VFP_DREG_N(rn, insn);

> +    VFP_DREG_M(rm, insn);

> +    if ((rd | rn | rm) & q) {

> +        return 1;

> +    }

> +

> +    if (extract32(insn, 21, 1)) {

> +        /* VCMLA */

> +        rot = extract32(insn, 23, 2);

> +        fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;

> +    } else if (extract32(insn, 23, 1)) {

> +        /* VCADD */

> +        rot = extract32(insn, 24, 1);

> +        fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;

> +    } else {

> +        /* Assuming the register fields remain, only bit 24 remains undecoded:

> +         * 1111_110x_0d0s_nnnn_dddd_1000_nqm0_mmmm

> +         */

> +        return 1;

> +    }

> +

> +    opr_sz = (1 + q) * 8;

> +    fpst = get_fpstatus_ptr(1);

> +    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),

> +                       vfp_reg_offset(1, rn),

> +                       vfp_reg_offset(1, rm), fpst,

> +                       opr_sz, opr_sz, rot, fn_gvec_ptr);

> +    tcg_temp_free_ptr(fpst);

> +    return 0;

> +}

> +

>  static int disas_coproc_insn(DisasContext *s, uint32_t insn)

>  {

>      int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;

> @@ -8406,6 +8465,12 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)

>                      }

>                  }

>              }

> +        } else if ((insn & 0x0e000f10) == 0x0c000800) {


I think we should guard this with an ARM_FEATURE_V8 check, so
that for pre-v8 CPUs we fall back to the usual "treat it as a
coprocessor" codepath. (In theory it should work out the same
either way, but specifically limiting this to v8 makes it easier
to be sure that it's not changing the behaviour where it shouldn't.)

> +            /* ARMv8.3 neon ldc2/stc2 coprocessor 8 extension.  */

> +            if (disas_neon_insn_cp8_3same(s, insn)) {

> +                goto illegal_op;

> +            }


This doesn't seem to line up with the Arm ARM decode. Your
pattern and mask give
  op0 = 0x, op1 = 100, op2 = 0 and also bit 8 = 0.
The ARM has 3reg-same decoded with
  op0 = 0x, op1 = 1x0, op2 = x

(and some insns in the 3reg-same group have bit 8 == 1, like
VSDOT and VUDOT.)

> +            return;

>          } else if ((insn & 0x0fe00000) == 0x0c400000) {

>              /* Coprocessor double register transfer.  */

>              ARCH(5TE);


How are you proposing to do the Thumb decoding? Try to share
some of the 3same-vs-2reg+scalar decode part, or just have
them both do a similar kind of decode and call the
disas_neon_insn_cp8_3same/cp8_index functions?

thanks
-- PMM

Peter Maydell Jan. 15, 2018, 6:49 p.m. UTC | #2

On 18 December 2017 at 17:24, Richard Henderson
<richard.henderson@linaro.org> wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  target/arm/translate.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++

>  1 file changed, 65 insertions(+)

>

> diff --git a/target/arm/translate.c b/target/arm/translate.c

> index 1a0b0eaced..e57844c019 100644

> --- a/target/arm/translate.c

> +++ b/target/arm/translate.c

> @@ -7662,6 +7662,65 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)

>      return 0;

>  }

>

> +/* ARMv8.3 reclaims a portion of the LDC2/STC2 coprocessor 8 space.  */

> +

> +static int disas_neon_insn_cp8_3same(DisasContext *s, uint32_t insn)

> +{

> +    void (*fn_gvec_ptr)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);

> +    int rd, rn, rm, rot, size, opr_sz;

> +    TCGv_ptr fpst;

> +    bool q;

> +

> +    /* FIXME: this access check should not take precedence over UNDEF

> +     * for invalid encodings; we will generate incorrect syndrome information

> +     * for attempts to execute invalid vfp/neon encodings with FP disabled.

> +     */


(Forgot this bit before hitting send on the other email...)

Unlike the sprawling disas_vfp_insn(), we're in a position to get the
order of checks right here. Just move it and the vfp_enabled test a
bit further down...

> +    if (s->fp_excp_el) {

> +        gen_exception_insn(s, 4, EXCP_UDEF,

> +                           syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);

> +        return 0;

> +    }

> +    if (!s->vfp_enabled) {

> +        return 1;

> +    }

> +    if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) {

> +        return 1;

> +    }

> +

> +    q = extract32(insn, 6, 1);

> +    size = extract32(insn, 20, 1);

> +    VFP_DREG_D(rd, insn);

> +    VFP_DREG_N(rn, insn);

> +    VFP_DREG_M(rm, insn);

> +    if ((rd | rn | rm) & q) {

> +        return 1;

> +    }

> +

> +    if (extract32(insn, 21, 1)) {

> +        /* VCMLA */

> +        rot = extract32(insn, 23, 2);

> +        fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;

> +    } else if (extract32(insn, 23, 1)) {

> +        /* VCADD */

> +        rot = extract32(insn, 24, 1);

> +        fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;

> +    } else {

> +        /* Assuming the register fields remain, only bit 24 remains undecoded:

> +         * 1111_110x_0d0s_nnnn_dddd_1000_nqm0_mmmm

> +         */

> +        return 1;

> +    }


...to here.

> +

> +    opr_sz = (1 + q) * 8;

> +    fpst = get_fpstatus_ptr(1);

> +    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),

> +                       vfp_reg_offset(1, rn),

> +                       vfp_reg_offset(1, rm), fpst,

> +                       opr_sz, opr_sz, rot, fn_gvec_ptr);

> +    tcg_temp_free_ptr(fpst);

> +    return 0;

> +}

> +


thanks
-- PMM

Richard Henderson Jan. 15, 2018, 7:10 p.m. UTC | #3

On 01/15/2018 10:46 AM, Peter Maydell wrote:
> This doesn't seem to line up with the Arm ARM decode. Your

> pattern and mask give

>   op0 = 0x, op1 = 100, op2 = 0 and also bit 8 = 0.

> The ARM has 3reg-same decoded with

>   op0 = 0x, op1 = 1x0, op2 = x

> 

> (and some insns in the 3reg-same group have bit 8 == 1, like

> VSDOT and VUDOT.)

Ah, more v8.2 instructions that I wasn't even looking at...

> How are you proposing to do the Thumb decoding? Try to share

> some of the 3same-vs-2reg+scalar decode part, or just have

> them both do a similar kind of decode and call the

> disas_neon_insn_cp8_3same/cp8_index functions?

Hmm.  I thought this was working via the "translate into the equivalent ARM
encoding" path.  But it couldn't possibly be doing so, since
disas_neon_insn_cp8_3same is not a subroutine of disas_neon_data_insn.

I guess I'll have to verify that RISU is testing what I thought it was...

r~

diff --git a/target/arm/translate.c b/target/arm/translate.c
index 1a0b0eaced..e57844c019 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -7662,6 +7662,65 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
     return 0;
 }
 
+/* ARMv8.3 reclaims a portion of the LDC2/STC2 coprocessor 8 space.  */
+
+static int disas_neon_insn_cp8_3same(DisasContext *s, uint32_t insn)
+{
+    void (*fn_gvec_ptr)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+    int rd, rn, rm, rot, size, opr_sz;
+    TCGv_ptr fpst;
+    bool q;
+
+    /* FIXME: this access check should not take precedence over UNDEF
+     * for invalid encodings; we will generate incorrect syndrome information
+     * for attempts to execute invalid vfp/neon encodings with FP disabled.
+     */
+    if (s->fp_excp_el) {
+        gen_exception_insn(s, 4, EXCP_UDEF,
+                           syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
+        return 0;
+    }
+    if (!s->vfp_enabled) {
+        return 1;
+    }
+    if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) {
+        return 1;
+    }
+
+    q = extract32(insn, 6, 1);
+    size = extract32(insn, 20, 1);
+    VFP_DREG_D(rd, insn);
+    VFP_DREG_N(rn, insn);
+    VFP_DREG_M(rm, insn);
+    if ((rd | rn | rm) & q) {
+        return 1;
+    }
+
+    if (extract32(insn, 21, 1)) {
+        /* VCMLA */
+        rot = extract32(insn, 23, 2);
+        fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
+    } else if (extract32(insn, 23, 1)) {
+        /* VCADD */
+        rot = extract32(insn, 24, 1);
+        fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
+    } else {
+        /* Assuming the register fields remain, only bit 24 remains undecoded:
+         * 1111_110x_0d0s_nnnn_dddd_1000_nqm0_mmmm
+         */
+        return 1;
+    }
+
+    opr_sz = (1 + q) * 8;
+    fpst = get_fpstatus_ptr(1);
+    tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
+                       vfp_reg_offset(1, rn),
+                       vfp_reg_offset(1, rm), fpst,
+                       opr_sz, opr_sz, rot, fn_gvec_ptr);
+    tcg_temp_free_ptr(fpst);
+    return 0;
+}
+
 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
 {
     int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
@@ -8406,6 +8465,12 @@  static void disas_arm_insn(DisasContext *s, unsigned int insn)
                     }
                 }
             }
+        } else if ((insn & 0x0e000f10) == 0x0c000800) {
+            /* ARMv8.3 neon ldc2/stc2 coprocessor 8 extension.  */
+            if (disas_neon_insn_cp8_3same(s, insn)) {
+                goto illegal_op;
+            }
+            return;
         } else if ((insn & 0x0fe00000) == 0x0c400000) {
             /* Coprocessor double register transfer.  */
             ARCH(5TE);

[v2,10/11] target/arm: Decode aa32 armv8.3 3-same

Commit Message

Comments

Patch