[v2,05/11] target/arm: Decode aa32 armv8.1 three same

Message ID	20171218172425.18200-6-richard.henderson@linaro.org
State	Superseded
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of qemu-devel-bounces+patch=linaro.org@nongnu.org designates 2001:4830:134:3::11 as permitted sender) client-ip=2001:4830:134:3::11; From: Richard Henderson <richard.henderson@linaro.org> To: qemu-devel@nongnu.org Date: Mon, 18 Dec 2017 09:24:19 -0800 Message-Id: <20171218172425.18200-6-richard.henderson@linaro.org> In-Reply-To: <20171218172425.18200-1-richard.henderson@linaro.org> References: <20171218172425.18200-1-richard.henderson@linaro.org> Subject: [Qemu-devel] [PATCH v2 05/11] target/arm: Decode aa32 armv8.1 three same Precedence: list Cc: peter.maydell@linaro.org, qemu-arm@nongnu.org Errors-To: qemu-devel-bounces+patch=linaro.org@nongnu.org Sender: "Qemu-devel" <qemu-devel-bounces+patch=linaro.org@nongnu.org>
Series	ARM v8.1 simd + v8.3 complex insns \| expand [v2,00/11] ARM v8.1 simd + v8.3 complex insns [v2,01/11] target/arm: Add ARM_FEATURE_V8_1_SIMD [v2,02/11] target/arm: Decode aa64 armv8.1 scalar three same extra [v2,03/11] target/arm: Decode aa64 armv8.1 three same extra [v2,04/11] target/arm: Decode aa64 armv8.1 scalar/vector x indexed element [v2,05/11] target/arm: Decode aa32 armv8.1 three same [v2,06/11] target/arm: Decode aa32 armv8.1 two reg and a scalar [v2,07/11] target/arm: Add ARM_FEATURE_V8_FCMA [v2,08/11] target/arm: Decode aa64 armv8.3 fcadd [v2,09/11] target/arm: Decode aa64 armv8.3 fcmla [v2,10/11] target/arm: Decode aa32 armv8.3 3-same [v2,11/11] target/arm: Decode aa32 armv8.3 2-reg-index

Message ID

20171218172425.18200-6-richard.henderson@linaro.org

State

Superseded

Headers

Received-SPF: pass (google.com: domain of
	qemu-devel-bounces+patch=linaro.org@nongnu.org designates
	2001:4830:134:3::11 as permitted sender)
	client-ip=2001:4830:134:3::11; 
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Date: Mon, 18 Dec 2017 09:24:19 -0800
Message-Id: <20171218172425.18200-6-richard.henderson@linaro.org>
In-Reply-To: <20171218172425.18200-1-richard.henderson@linaro.org>
References: <20171218172425.18200-1-richard.henderson@linaro.org>
Subject: [Qemu-devel] [PATCH v2 05/11] target/arm: Decode aa32 armv8.1 three
	same
Precedence: list
Cc: peter.maydell@linaro.org, qemu-arm@nongnu.org
Errors-To: qemu-devel-bounces+patch=linaro.org@nongnu.org
Sender: "Qemu-devel" <qemu-devel-bounces+patch=linaro.org@nongnu.org>

Series

ARM v8.1 simd + v8.3 complex insns | expand

Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/translate.c | 85 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 19 deletions(-) -- 2.14.3

Comments

Peter Maydell Jan. 15, 2018, 5:37 p.m. UTC | #1

On 18 December 2017 at 17:24, Richard Henderson
<richard.henderson@linaro.org> wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  target/arm/translate.c | 85 +++++++++++++++++++++++++++++++++++++++-----------

>  1 file changed, 66 insertions(+), 19 deletions(-)

>

> diff --git a/target/arm/translate.c b/target/arm/translate.c

> index c690658493..a9587ae242 100644

> --- a/target/arm/translate.c

> +++ b/target/arm/translate.c

> @@ -25,6 +25,7 @@

>  #include "disas/disas.h"

>  #include "exec/exec-all.h"

>  #include "tcg-op.h"

> +#include "tcg-op-gvec.h"

>  #include "qemu/log.h"

>  #include "qemu/bitops.h"

>  #include "arm_ldst.h"

> @@ -5364,9 +5365,9 @@ static void gen_neon_narrow_op(int op, int u, int size,

>  #define NEON_3R_VPMAX 20

>  #define NEON_3R_VPMIN 21

>  #define NEON_3R_VQDMULH_VQRDMULH 22

> -#define NEON_3R_VPADD 23

> +#define NEON_3R_VPADD_VQRDMLAH 23

>  #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */

> -#define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */

> +#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS : float fused multiply-add */


If this case includes VQRDLMSH as well now, then the comment needs updating.
I would suggest just /* VFMA, VFMS, VQRDMLSH */

>  #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */

>  #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */

>  #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */


> @@ -5630,12 +5647,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)

>          if (q && ((rd | rn | rm) & 1)) {

>              return 1;

>          }

> -        /*

> -         * The SHA-1/SHA-256 3-register instructions require special treatment

> -         * here, as their size field is overloaded as an op type selector, and

> -         * they all consume their input in a single pass.

> -         */

> -        if (op == NEON_3R_SHA) {

> +        switch (op) {

> +        case NEON_3R_SHA:

> +            /* The SHA-1/SHA-256 3-register instructions require special

> +             * treatment here, as their size field is overloaded as an

> +             * op type selector, and they all consume their input in a

> +             * single pass.  */


You've lost the newline before the '*/' here.

>              if (!q) {

>                  return 1;

>              }

> @@ -5672,6 +5689,40 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)

>              tcg_temp_free_i32(tmp2);

>              tcg_temp_free_i32(tmp3);

>              return 0;

> +

> +        case NEON_3R_VPADD_VQRDMLAH:

> +            if (!u) {

> +                break;  /* VPADD */

> +            }

> +            /* VQRDMLAH */

> +            switch (size) {

> +            case 1:

> +                return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,

> +                                     q, rd, rn, rm);

> +            case 2:

> +                return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,

> +                                     q, rd, rn, rm);

> +            }

> +            return 1;

> +

> +        case NEON_3R_VFM_VQRDMLSH:

> +            if (!u) {

> +                /* VFM, VFMS */

> +                if ((5 & (1 << size)) == 0) {


You could write this 'if (size == 1)' (since the neon_e3r_sizes[]
check has already ruled out bit 3 being set)...

Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>


thanks
-- PMM

diff --git a/target/arm/translate.c b/target/arm/translate.c
index c690658493..a9587ae242 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -25,6 +25,7 @@ 
 #include "disas/disas.h"
 #include "exec/exec-all.h"
 #include "tcg-op.h"
+#include "tcg-op-gvec.h"
 #include "qemu/log.h"
 #include "qemu/bitops.h"
 #include "arm_ldst.h"
@@ -5364,9 +5365,9 @@  static void gen_neon_narrow_op(int op, int u, int size,
 #define NEON_3R_VPMAX 20
 #define NEON_3R_VPMIN 21
 #define NEON_3R_VQDMULH_VQRDMULH 22
-#define NEON_3R_VPADD 23
+#define NEON_3R_VPADD_VQRDMLAH 23
 #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
-#define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */
+#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS : float fused multiply-add */
 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
@@ -5398,9 +5399,9 @@  static const uint8_t neon_3r_sizes[] = {
     [NEON_3R_VPMAX] = 0x7,
     [NEON_3R_VPMIN] = 0x7,
     [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
-    [NEON_3R_VPADD] = 0x7,
+    [NEON_3R_VPADD_VQRDMLAH] = 0x7,
     [NEON_3R_SHA] = 0xf, /* size field encodes op type */
-    [NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */
+    [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
     [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
     [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
     [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
@@ -5579,6 +5580,22 @@  static const uint8_t neon_2rm_sizes[] = {
     [NEON_2RM_VCVT_UF] = 0x4,
 };
 
+
+/* Expand v8.1 simd helper.  */
+static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
+                         int q, int rd, int rn, int rm)
+{
+    if (arm_dc_feature(s, ARM_FEATURE_V8_1_SIMD)) {
+        int opr_sz = (1 + q) * 8;
+        tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
+                           vfp_reg_offset(1, rn),
+                           vfp_reg_offset(1, rm), cpu_env,
+                           opr_sz, opr_sz, 0, fn);
+        return 0;
+    }
+    return 1;
+}
+
 /* Translate a NEON data processing instruction.  Return nonzero if the
    instruction is invalid.
    We process data in a mixture of 32-bit and 64-bit chunks.
@@ -5630,12 +5647,12 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
         if (q && ((rd | rn | rm) & 1)) {
             return 1;
         }
-        /*
-         * The SHA-1/SHA-256 3-register instructions require special treatment
-         * here, as their size field is overloaded as an op type selector, and
-         * they all consume their input in a single pass.
-         */
-        if (op == NEON_3R_SHA) {
+        switch (op) {
+        case NEON_3R_SHA:
+            /* The SHA-1/SHA-256 3-register instructions require special
+             * treatment here, as their size field is overloaded as an
+             * op type selector, and they all consume their input in a
+             * single pass.  */
             if (!q) {
                 return 1;
             }
@@ -5672,6 +5689,40 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
             tcg_temp_free_i32(tmp2);
             tcg_temp_free_i32(tmp3);
             return 0;
+
+        case NEON_3R_VPADD_VQRDMLAH:
+            if (!u) {
+                break;  /* VPADD */
+            }
+            /* VQRDMLAH */
+            switch (size) {
+            case 1:
+                return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
+                                     q, rd, rn, rm);
+            case 2:
+                return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
+                                     q, rd, rn, rm);
+            }
+            return 1;
+
+        case NEON_3R_VFM_VQRDMLSH:
+            if (!u) {
+                /* VFM, VFMS */
+                if ((5 & (1 << size)) == 0) {
+                    return 1;
+                }
+                break;
+            }
+            /* VQRDMLSH */
+            switch (size) {
+            case 1:
+                return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
+                                     q, rd, rn, rm);
+            case 2:
+                return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
+                                     q, rd, rn, rm);
+            }
+            return 1;
         }
         if (size == 3 && op != NEON_3R_LOGIC) {
             /* 64-bit element instructions. */
@@ -5757,11 +5808,7 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                 rm = rtmp;
             }
             break;
-        case NEON_3R_VPADD:
-            if (u) {
-                return 1;
-            }
-            /* Fall through */
+        case NEON_3R_VPADD_VQRDMLAH:
         case NEON_3R_VPMAX:
         case NEON_3R_VPMIN:
             pairwise = 1;
@@ -5795,8 +5842,8 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                 return 1;
             }
             break;
-        case NEON_3R_VFM:
-            if (!arm_dc_feature(s, ARM_FEATURE_VFP4) || u) {
+        case NEON_3R_VFM_VQRDMLSH:
+            if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
                 return 1;
             }
             break;
@@ -5993,7 +6040,7 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                 }
             }
             break;
-        case NEON_3R_VPADD:
+        case NEON_3R_VPADD_VQRDMLAH:
             switch (size) {
             case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
             case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
@@ -6092,7 +6139,7 @@  static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
               }
             }
             break;
-        case NEON_3R_VFM:
+        case NEON_3R_VFM_VQRDMLSH:
         {
             /* VFMA, VFMS: fused multiply-add */
             TCGv_ptr fpstatus = get_fpstatus_ptr(1);

[v2,05/11] target/arm: Decode aa32 armv8.1 three same

Commit Message

Comments

Patch