diff mbox series

[v2,37/81] tcg/arm: Add full [US]XT[BH] into {s}extract

Message ID 20250107080112.1175095-38-richard.henderson@linaro.org
State Superseded
Headers show
Series tcg: Merge *_i32 and *_i64 opcodes | expand

Commit Message

Richard Henderson Jan. 7, 2025, 8 a.m. UTC
The armv6 uxt and sxt opcodes have a 2-bit rotate field
which supports extractions from ofs = {0,8,16,24}.
Special case ofs = 0, len <= 8 as AND.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/arm/tcg-target-has.h | 17 +++++++++++++
 tcg/arm/tcg-target.c.inc | 54 +++++++++++++++++++++++++++++++++++-----
 2 files changed, 65 insertions(+), 6 deletions(-)

Comments

Philippe Mathieu-Daudé Jan. 9, 2025, 10:57 p.m. UTC | #1
On 7/1/25 09:00, Richard Henderson wrote:
> The armv6 uxt and sxt opcodes have a 2-bit rotate field
> which supports extractions from ofs = {0,8,16,24}.
> Special case ofs = 0, len <= 8 as AND.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/arm/tcg-target-has.h | 17 +++++++++++++
>   tcg/arm/tcg-target.c.inc | 54 +++++++++++++++++++++++++++++++++++-----
>   2 files changed, 65 insertions(+), 6 deletions(-)
> 
> diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h
> index 316185500d..d6ca35ed1a 100644

Missing:

-- >8 --
@@ -41,8 +41,8 @@ extern bool use_neon_instructions;
  #define TCG_TARGET_HAS_ctz_i32          use_armv7_instructions
  #define TCG_TARGET_HAS_ctpop_i32        0
  #define TCG_TARGET_HAS_deposit_i32      use_armv7_instructions
-#define TCG_TARGET_HAS_extract_i32      use_armv7_instructions
-#define TCG_TARGET_HAS_sextract_i32     use_armv7_instructions
+#define TCG_TARGET_HAS_extract_i32      1
+#define TCG_TARGET_HAS_sextract_i32     1
  #define TCG_TARGET_HAS_extract2_i32     1
  #define TCG_TARGET_HAS_negsetcond_i32   1
  #define TCG_TARGET_HAS_mulu2_i32        1
---

> --- a/tcg/arm/tcg-target-has.h
> +++ b/tcg/arm/tcg-target-has.h
> @@ -82,4 +82,21 @@ extern bool use_neon_instructions;
>   #define TCG_TARGET_HAS_cmpsel_vec       0
>   #define TCG_TARGET_HAS_tst_vec          1
>   
> +static inline bool
> +tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
> +{
> +    if (use_armv7_instructions) {
> +        return true;  /* SBFX or UBFX */
> +    }
> +    switch (len) {
> +    case 8:   /* SXTB or UXTB */
> +    case 16:  /* SXTH or UXTH */
> +        return (ofs % 8) == 0;
> +    }
> +    return false;
> +}
> +
> +#define TCG_TARGET_extract_valid   tcg_target_extract_valid
> +#define TCG_TARGET_sextract_valid  tcg_target_extract_valid
> +
>   #endif
Philippe Mathieu-Daudé Jan. 15, 2025, 8:06 p.m. UTC | #2
On 9/1/25 23:57, Philippe Mathieu-Daudé wrote:
> On 7/1/25 09:00, Richard Henderson wrote:
>> The armv6 uxt and sxt opcodes have a 2-bit rotate field
>> which supports extractions from ofs = {0,8,16,24}.
>> Special case ofs = 0, len <= 8 as AND.
>>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>   tcg/arm/tcg-target-has.h | 17 +++++++++++++
>>   tcg/arm/tcg-target.c.inc | 54 +++++++++++++++++++++++++++++++++++-----
>>   2 files changed, 65 insertions(+), 6 deletions(-)
>>
>> diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h
>> index 316185500d..d6ca35ed1a 100644
> 
> Missing:
> 
> -- >8 --
> @@ -41,8 +41,8 @@ extern bool use_neon_instructions;
>   #define TCG_TARGET_HAS_ctz_i32          use_armv7_instructions
>   #define TCG_TARGET_HAS_ctpop_i32        0
>   #define TCG_TARGET_HAS_deposit_i32      use_armv7_instructions
> -#define TCG_TARGET_HAS_extract_i32      use_armv7_instructions
> -#define TCG_TARGET_HAS_sextract_i32     use_armv7_instructions
> +#define TCG_TARGET_HAS_extract_i32      1
> +#define TCG_TARGET_HAS_sextract_i32     1
>   #define TCG_TARGET_HAS_extract2_i32     1
>   #define TCG_TARGET_HAS_negsetcond_i32   1
>   #define TCG_TARGET_HAS_mulu2_i32        1
> ---

With that chunk squashed:
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
diff mbox series

Patch

diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h
index 316185500d..d6ca35ed1a 100644
--- a/tcg/arm/tcg-target-has.h
+++ b/tcg/arm/tcg-target-has.h
@@ -82,4 +82,21 @@  extern bool use_neon_instructions;
 #define TCG_TARGET_HAS_cmpsel_vec       0
 #define TCG_TARGET_HAS_tst_vec          1
 
+static inline bool
+tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
+{
+    if (use_armv7_instructions) {
+        return true;  /* SBFX or UBFX */
+    }
+    switch (len) {
+    case 8:   /* SXTB or UXTB */
+    case 16:  /* SXTH or UXTH */
+        return (ofs % 8) == 0;
+    }
+    return false;
+}
+
+#define TCG_TARGET_extract_valid   tcg_target_extract_valid
+#define TCG_TARGET_sextract_valid  tcg_target_extract_valid
+
 #endif
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 9cfb733a14..12dad7307f 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -1036,19 +1036,61 @@  static void tcg_out_deposit(TCGContext *s, ARMCond cond, TCGReg rd,
 static void tcg_out_extract(TCGContext *s, ARMCond cond, TCGReg rd,
                             TCGReg rn, int ofs, int len)
 {
-    /* ubfx */
-    tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | rn
-              | (ofs << 7) | ((len - 1) << 16));
+    /* According to gcc, AND can be faster. */
+    if (ofs == 0 && len <= 8) {
+        tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn,
+                        encode_imm_nofail((1 << len) - 1));
+        return;
+    }
+
+    if (use_armv7_instructions) {
+        /* ubfx */
+        tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | rn
+                  | (ofs << 7) | ((len - 1) << 16));
+        return;
+    }
+
+    assert(ofs % 8 == 0);
+    switch (len) {
+    case 8:
+        /* uxtb */
+        tcg_out32(s, 0x06ef0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
+        break;
+    case 16:
+        /* uxth */
+        tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
+        break;
+    default:
+        g_assert_not_reached();
+    }
 }
 
 static void tcg_out_sextract(TCGContext *s, ARMCond cond, TCGReg rd,
                              TCGReg rn, int ofs, int len)
 {
-    /* sbfx */
-    tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | rn
-              | (ofs << 7) | ((len - 1) << 16));
+    if (use_armv7_instructions) {
+        /* sbfx */
+        tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | rn
+                  | (ofs << 7) | ((len - 1) << 16));
+        return;
+    }
+
+    assert(ofs % 8 == 0);
+    switch (len) {
+    case 8:
+        /* sxtb */
+        tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
+        break;
+    case 16:
+        /* sxth */
+        tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
+        break;
+    default:
+        g_assert_not_reached();
+    }
 }
 
+
 static void tcg_out_ld32u(TCGContext *s, ARMCond cond,
                           TCGReg rd, TCGReg rn, int32_t offset)
 {