diff mbox series

[v2,34/81] tcg/i386: Fold the ext{8, 16, 32}[us] cases into {s}extract

Message ID 20250107080112.1175095-35-richard.henderson@linaro.org
State Superseded
Headers show
Series tcg: Merge *_i32 and *_i64 opcodes | expand

Commit Message

Richard Henderson Jan. 7, 2025, 8 a.m. UTC
Accept byte and word extensions with the extract opcodes.
This is preparatory to removing the specialized extracts.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/i386/tcg-target-has.h | 49 +++++++++++++++++++++++++++----
 tcg/tcg-has.h             | 12 +++++---
 tcg/optimize.c            |  8 +++--
 tcg/tcg-op.c              | 12 +++-----
 tcg/i386/tcg-target.c.inc | 62 +++++++++++++++++++++++++++++----------
 5 files changed, 107 insertions(+), 36 deletions(-)

Comments

Philippe Mathieu-Daudé Jan. 15, 2025, 9:56 p.m. UTC | #1
On 7/1/25 09:00, Richard Henderson wrote:
> Accept byte and word extensions with the extract opcodes.
> This is preparatory to removing the specialized extracts.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   tcg/i386/tcg-target-has.h | 49 +++++++++++++++++++++++++++----
>   tcg/tcg-has.h             | 12 +++++---
>   tcg/optimize.c            |  8 +++--
>   tcg/tcg-op.c              | 12 +++-----
>   tcg/i386/tcg-target.c.inc | 62 +++++++++++++++++++++++++++++----------
>   5 files changed, 107 insertions(+), 36 deletions(-)
> 
> diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h
> index 3ea2eab807..ad69f957a7 100644
> --- a/tcg/i386/tcg-target-has.h
> +++ b/tcg/i386/tcg-target-has.h
> @@ -80,7 +80,7 @@
>   #define TCG_TARGET_HAS_ctpop_i64        have_popcnt
>   #define TCG_TARGET_HAS_deposit_i64      1
>   #define TCG_TARGET_HAS_extract_i64      1
> -#define TCG_TARGET_HAS_sextract_i64     0
> +#define TCG_TARGET_HAS_sextract_i64     1
>   #define TCG_TARGET_HAS_extract2_i64     1
>   #define TCG_TARGET_HAS_negsetcond_i64   1
>   #define TCG_TARGET_HAS_add2_i64         1
> @@ -130,10 +130,47 @@
>        (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
>   #define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid
>   
> -/* Check for the possibility of high-byte extraction and, for 64-bit,
> -   zero-extending 32-bit right-shift.  */
> -#define TCG_TARGET_extract_i32_valid(ofs, len) ((ofs) == 8 && (len) == 8)
> -#define TCG_TARGET_extract_i64_valid(ofs, len) \
> -    (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
> +/*
> + * Check for the possibility of low byte/word extraction, high-byte extraction
> + * and zero-extending 32-bit right-shift.
> + *
> + * We cannot sign-extend from high byte to 64-bits without using the
> + * REX prefix that explicitly excludes access to the high-byte registers.
> + */
> +static inline bool
> +tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
> +{
> +    switch (ofs) {
> +    case 0:
> +        switch (len) {
> +        case 8:
> +        case 16:
> +            return true;
> +        case 32:
> +            return type == TCG_TYPE_I64;
> +        }
> +        return false;
> +    case 8:
> +        return len == 8 && type == TCG_TYPE_I32;
> +    }
> +    return false;
> +}
> +#define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
> +
> +static inline bool
> +tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
> +{
> +    if (type == TCG_TYPE_I64 && ofs + len == 32) {
> +        return true;
> +    }
> +    switch (ofs) {
> +    case 0:
> +        return len == 8 || len == 16;
> +    case 8:
> +        return len == 8;
> +    }
> +    return false;
> +}
> +#define TCG_TARGET_extract_valid  tcg_target_extract_valid
>   
>   #endif
> diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h
> index 65b6a0b0cf..8ed35be8c3 100644
> --- a/tcg/tcg-has.h
> +++ b/tcg/tcg-has.h
> @@ -56,11 +56,15 @@
>   #ifndef TCG_TARGET_deposit_i64_valid
>   #define TCG_TARGET_deposit_i64_valid(ofs, len) 1
>   #endif
> -#ifndef TCG_TARGET_extract_i32_valid
> -#define TCG_TARGET_extract_i32_valid(ofs, len) 1
> +#ifndef TCG_TARGET_extract_valid
> +#define TCG_TARGET_extract_valid(type, ofs, len) \
> +    ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_extract_i32 \
> +     : TCG_TARGET_HAS_extract_i64)
>   #endif
> -#ifndef TCG_TARGET_extract_i64_valid
> -#define TCG_TARGET_extract_i64_valid(ofs, len) 1
> +#ifndef TCG_TARGET_sextract_valid
> +#define TCG_TARGET_sextract_valid(type, ofs, len) \
> +    ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_sextract_i32 \
> +     : TCG_TARGET_HAS_sextract_i64)
>   #endif
>   
>   /* Only one of DIV or DIV2 should be defined.  */
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index c363c5c04b..cd8ad712c4 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -2362,8 +2362,10 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
>           xor_opc = INDEX_op_xor_i32;
>           shr_opc = INDEX_op_shr_i32;
>           neg_opc = INDEX_op_neg_i32;
> -        if (TCG_TARGET_extract_i32_valid(sh, 1)) {
> +        if (TCG_TARGET_extract_valid(TCG_TYPE_I32, sh, 1)) {
>               uext_opc = TCG_TARGET_HAS_extract_i32 ? INDEX_op_extract_i32 : 0;
> +        }
> +        if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, sh, 1)) {
>               sext_opc = TCG_TARGET_HAS_sextract_i32 ? INDEX_op_sextract_i32 : 0;
>           }
>           break;
> @@ -2373,8 +2375,10 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
>           xor_opc = INDEX_op_xor_i64;
>           shr_opc = INDEX_op_shr_i64;
>           neg_opc = INDEX_op_neg_i64;
> -        if (TCG_TARGET_extract_i64_valid(sh, 1)) {
> +        if (TCG_TARGET_extract_valid(TCG_TYPE_I64, sh, 1)) {
>               uext_opc = TCG_TARGET_HAS_extract_i64 ? INDEX_op_extract_i64 : 0;
> +        }
> +        if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, sh, 1)) {
>               sext_opc = TCG_TARGET_HAS_sextract_i64 ? INDEX_op_sextract_i64 : 0;
>           }
>           break;
> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
> index ab5ccd8dcb..d813a7f44e 100644
> --- a/tcg/tcg-op.c
> +++ b/tcg/tcg-op.c
> @@ -1014,8 +1014,7 @@ void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
>           return;
>       }
>   
> -    if (TCG_TARGET_HAS_extract_i32
> -        && TCG_TARGET_extract_i32_valid(ofs, len)) {
> +    if (TCG_TARGET_extract_valid(TCG_TYPE_I32, ofs, len)) {
>           tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len);
>           return;
>       }
> @@ -1077,8 +1076,7 @@ void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
>           }
>       }
>   
> -    if (TCG_TARGET_HAS_sextract_i32
> -        && TCG_TARGET_extract_i32_valid(ofs, len)) {
> +    if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, ofs, len)) {
>           tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len);
>           return;
>       }
> @@ -2811,8 +2809,7 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
>           goto do_shift_and;
>       }
>   
> -    if (TCG_TARGET_HAS_extract_i64
> -        && TCG_TARGET_extract_i64_valid(ofs, len)) {
> +    if (TCG_TARGET_extract_valid(TCG_TYPE_I64, ofs, len)) {
>           tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len);
>           return;
>       }
> @@ -2917,8 +2914,7 @@ void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
>           return;
>       }
>   
> -    if (TCG_TARGET_HAS_sextract_i64
> -        && TCG_TARGET_extract_i64_valid(ofs, len)) {
> +    if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, ofs, len)) {
>           tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len);
>           return;
>       }
> diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
> index 047c5da81c..afff56956f 100644
> --- a/tcg/i386/tcg-target.c.inc
> +++ b/tcg/i386/tcg-target.c.inc
> @@ -3036,6 +3036,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
>   
>       case INDEX_op_extract_i64:
>           if (a2 + args[3] == 32) {
> +            if (a2 == 0) {
> +                tcg_out_ext32u(s, a0, a1);
> +                break;
> +            }
>               /* This is a 32-bit zero-extending right shift.  */
>               tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
>               tcg_out_shifti(s, SHIFT_SHR, a0, a2);
> @@ -3043,28 +3047,53 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
>           }
>           /* FALLTHRU */
>       case INDEX_op_extract_i32:
> -        /* On the off-chance that we can use the high-byte registers.
> -           Otherwise we emit the same ext16 + shift pattern that we
> -           would have gotten from the normal tcg-op.c expansion.  */
> -        tcg_debug_assert(a2 == 8 && args[3] == 8);
> -        if (a1 < 4 && a0 < 8) {
> -            tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
> -        } else {
> +        if (a2 == 0 && args[3] == 8) {
> +            tcg_out_ext8u(s, a0, a1);
> +        } else if (a2 == 0 && args[3] == 16) {
>               tcg_out_ext16u(s, a0, a1);
> -            tcg_out_shifti(s, SHIFT_SHR, a0, 8);
> +        } else if (a2 == 8 && args[3] == 8) {
> +            /*
> +             * On the off-chance that we can use the high-byte registers.
> +             * Otherwise we emit the same ext16 + shift pattern that we
> +             * would have gotten from the normal tcg-op.c expansion.
> +             */
> +            if (a1 < 4 && a0 < 8) {
> +                tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
> +            } else {
> +                tcg_out_ext16u(s, a0, a1);
> +                tcg_out_shifti(s, SHIFT_SHR, a0, 8);
> +            }
> +        } else {
> +            g_assert_not_reached();
> +        }
> +        break;
> +
> +    case INDEX_op_sextract_i64:
> +        if (a2 == 0 && args[3] == 8) {
> +            tcg_out_ext8s(s, TCG_TYPE_I64, a0, a1);
> +        } else if (a2 == 0 && args[3] == 16) {
> +            tcg_out_ext16s(s, TCG_TYPE_I64, a0, a1);
> +        } else if (a2 == 0 && args[3] == 32) {
> +            tcg_out_ext32s(s, a0, a1);
> +        } else {
> +            g_assert_not_reached();
>           }
>           break;
>   
>       case INDEX_op_sextract_i32:
> -        /* We don't implement sextract_i64, as we cannot sign-extend to
> -           64-bits without using the REX prefix that explicitly excludes
> -           access to the high-byte registers.  */
> -        tcg_debug_assert(a2 == 8 && args[3] == 8);
> -        if (a1 < 4 && a0 < 8) {
> -            tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
> -        } else {
> +        if (a2 == 0 && args[3] == 8) {
> +            tcg_out_ext8s(s, TCG_TYPE_I32, a0, a1);
> +        } else if (a2 == 0 && args[3] == 16) {
>               tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1);
> -            tcg_out_shifti(s, SHIFT_SAR, a0, 8);
> +        } else if (a2 == 8 && args[3] == 8) {
> +            if (a1 < 4 && a0 < 8) {
> +                tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
> +            } else {
> +                tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1);
> +                tcg_out_shifti(s, SHIFT_SAR, a0, 8);
> +            }
> +        } else {
> +            g_assert_not_reached();
>           }
>           break;
>   
> @@ -3747,6 +3776,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
>       case INDEX_op_extract_i32:
>       case INDEX_op_extract_i64:
>       case INDEX_op_sextract_i32:
> +    case INDEX_op_sextract_i64:
>       case INDEX_op_ctpop_i32:
>       case INDEX_op_ctpop_i64:
>           return C_O1_I1(r, r);

To the best of my knowledge,
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
diff mbox series

Patch

diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h
index 3ea2eab807..ad69f957a7 100644
--- a/tcg/i386/tcg-target-has.h
+++ b/tcg/i386/tcg-target-has.h
@@ -80,7 +80,7 @@ 
 #define TCG_TARGET_HAS_ctpop_i64        have_popcnt
 #define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_extract_i64      1
-#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_sextract_i64     1
 #define TCG_TARGET_HAS_extract2_i64     1
 #define TCG_TARGET_HAS_negsetcond_i64   1
 #define TCG_TARGET_HAS_add2_i64         1
@@ -130,10 +130,47 @@ 
      (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
 #define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid
 
-/* Check for the possibility of high-byte extraction and, for 64-bit,
-   zero-extending 32-bit right-shift.  */
-#define TCG_TARGET_extract_i32_valid(ofs, len) ((ofs) == 8 && (len) == 8)
-#define TCG_TARGET_extract_i64_valid(ofs, len) \
-    (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
+/*
+ * Check for the possibility of low byte/word extraction, high-byte extraction
+ * and zero-extending 32-bit right-shift.
+ *
+ * We cannot sign-extend from high byte to 64-bits without using the
+ * REX prefix that explicitly excludes access to the high-byte registers.
+ */
+static inline bool
+tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
+{
+    switch (ofs) {
+    case 0:
+        switch (len) {
+        case 8:
+        case 16:
+            return true;
+        case 32:
+            return type == TCG_TYPE_I64;
+        }
+        return false;
+    case 8:
+        return len == 8 && type == TCG_TYPE_I32;
+    }
+    return false;
+}
+#define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
+
+static inline bool
+tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
+{
+    if (type == TCG_TYPE_I64 && ofs + len == 32) {
+        return true;
+    }
+    switch (ofs) {
+    case 0:
+        return len == 8 || len == 16;
+    case 8:
+        return len == 8;
+    }
+    return false;
+}
+#define TCG_TARGET_extract_valid  tcg_target_extract_valid
 
 #endif
diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h
index 65b6a0b0cf..8ed35be8c3 100644
--- a/tcg/tcg-has.h
+++ b/tcg/tcg-has.h
@@ -56,11 +56,15 @@ 
 #ifndef TCG_TARGET_deposit_i64_valid
 #define TCG_TARGET_deposit_i64_valid(ofs, len) 1
 #endif
-#ifndef TCG_TARGET_extract_i32_valid
-#define TCG_TARGET_extract_i32_valid(ofs, len) 1
+#ifndef TCG_TARGET_extract_valid
+#define TCG_TARGET_extract_valid(type, ofs, len) \
+    ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_extract_i32 \
+     : TCG_TARGET_HAS_extract_i64)
 #endif
-#ifndef TCG_TARGET_extract_i64_valid
-#define TCG_TARGET_extract_i64_valid(ofs, len) 1
+#ifndef TCG_TARGET_sextract_valid
+#define TCG_TARGET_sextract_valid(type, ofs, len) \
+    ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_sextract_i32 \
+     : TCG_TARGET_HAS_sextract_i64)
 #endif
 
 /* Only one of DIV or DIV2 should be defined.  */
diff --git a/tcg/optimize.c b/tcg/optimize.c
index c363c5c04b..cd8ad712c4 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2362,8 +2362,10 @@  static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
         xor_opc = INDEX_op_xor_i32;
         shr_opc = INDEX_op_shr_i32;
         neg_opc = INDEX_op_neg_i32;
-        if (TCG_TARGET_extract_i32_valid(sh, 1)) {
+        if (TCG_TARGET_extract_valid(TCG_TYPE_I32, sh, 1)) {
             uext_opc = TCG_TARGET_HAS_extract_i32 ? INDEX_op_extract_i32 : 0;
+        }
+        if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, sh, 1)) {
             sext_opc = TCG_TARGET_HAS_sextract_i32 ? INDEX_op_sextract_i32 : 0;
         }
         break;
@@ -2373,8 +2375,10 @@  static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
         xor_opc = INDEX_op_xor_i64;
         shr_opc = INDEX_op_shr_i64;
         neg_opc = INDEX_op_neg_i64;
-        if (TCG_TARGET_extract_i64_valid(sh, 1)) {
+        if (TCG_TARGET_extract_valid(TCG_TYPE_I64, sh, 1)) {
             uext_opc = TCG_TARGET_HAS_extract_i64 ? INDEX_op_extract_i64 : 0;
+        }
+        if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, sh, 1)) {
             sext_opc = TCG_TARGET_HAS_sextract_i64 ? INDEX_op_sextract_i64 : 0;
         }
         break;
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index ab5ccd8dcb..d813a7f44e 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -1014,8 +1014,7 @@  void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
         return;
     }
 
-    if (TCG_TARGET_HAS_extract_i32
-        && TCG_TARGET_extract_i32_valid(ofs, len)) {
+    if (TCG_TARGET_extract_valid(TCG_TYPE_I32, ofs, len)) {
         tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len);
         return;
     }
@@ -1077,8 +1076,7 @@  void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
         }
     }
 
-    if (TCG_TARGET_HAS_sextract_i32
-        && TCG_TARGET_extract_i32_valid(ofs, len)) {
+    if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, ofs, len)) {
         tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len);
         return;
     }
@@ -2811,8 +2809,7 @@  void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
         goto do_shift_and;
     }
 
-    if (TCG_TARGET_HAS_extract_i64
-        && TCG_TARGET_extract_i64_valid(ofs, len)) {
+    if (TCG_TARGET_extract_valid(TCG_TYPE_I64, ofs, len)) {
         tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len);
         return;
     }
@@ -2917,8 +2914,7 @@  void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
         return;
     }
 
-    if (TCG_TARGET_HAS_sextract_i64
-        && TCG_TARGET_extract_i64_valid(ofs, len)) {
+    if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, ofs, len)) {
         tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len);
         return;
     }
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 047c5da81c..afff56956f 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -3036,6 +3036,10 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
 
     case INDEX_op_extract_i64:
         if (a2 + args[3] == 32) {
+            if (a2 == 0) {
+                tcg_out_ext32u(s, a0, a1);
+                break;
+            }
             /* This is a 32-bit zero-extending right shift.  */
             tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
             tcg_out_shifti(s, SHIFT_SHR, a0, a2);
@@ -3043,28 +3047,53 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
         }
         /* FALLTHRU */
     case INDEX_op_extract_i32:
-        /* On the off-chance that we can use the high-byte registers.
-           Otherwise we emit the same ext16 + shift pattern that we
-           would have gotten from the normal tcg-op.c expansion.  */
-        tcg_debug_assert(a2 == 8 && args[3] == 8);
-        if (a1 < 4 && a0 < 8) {
-            tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
-        } else {
+        if (a2 == 0 && args[3] == 8) {
+            tcg_out_ext8u(s, a0, a1);
+        } else if (a2 == 0 && args[3] == 16) {
             tcg_out_ext16u(s, a0, a1);
-            tcg_out_shifti(s, SHIFT_SHR, a0, 8);
+        } else if (a2 == 8 && args[3] == 8) {
+            /*
+             * On the off-chance that we can use the high-byte registers.
+             * Otherwise we emit the same ext16 + shift pattern that we
+             * would have gotten from the normal tcg-op.c expansion.
+             */
+            if (a1 < 4 && a0 < 8) {
+                tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
+            } else {
+                tcg_out_ext16u(s, a0, a1);
+                tcg_out_shifti(s, SHIFT_SHR, a0, 8);
+            }
+        } else {
+            g_assert_not_reached();
+        }
+        break;
+
+    case INDEX_op_sextract_i64:
+        if (a2 == 0 && args[3] == 8) {
+            tcg_out_ext8s(s, TCG_TYPE_I64, a0, a1);
+        } else if (a2 == 0 && args[3] == 16) {
+            tcg_out_ext16s(s, TCG_TYPE_I64, a0, a1);
+        } else if (a2 == 0 && args[3] == 32) {
+            tcg_out_ext32s(s, a0, a1);
+        } else {
+            g_assert_not_reached();
         }
         break;
 
     case INDEX_op_sextract_i32:
-        /* We don't implement sextract_i64, as we cannot sign-extend to
-           64-bits without using the REX prefix that explicitly excludes
-           access to the high-byte registers.  */
-        tcg_debug_assert(a2 == 8 && args[3] == 8);
-        if (a1 < 4 && a0 < 8) {
-            tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
-        } else {
+        if (a2 == 0 && args[3] == 8) {
+            tcg_out_ext8s(s, TCG_TYPE_I32, a0, a1);
+        } else if (a2 == 0 && args[3] == 16) {
             tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1);
-            tcg_out_shifti(s, SHIFT_SAR, a0, 8);
+        } else if (a2 == 8 && args[3] == 8) {
+            if (a1 < 4 && a0 < 8) {
+                tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
+            } else {
+                tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1);
+                tcg_out_shifti(s, SHIFT_SAR, a0, 8);
+            }
+        } else {
+            g_assert_not_reached();
         }
         break;
 
@@ -3747,6 +3776,7 @@  tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
     case INDEX_op_extract_i32:
     case INDEX_op_extract_i64:
     case INDEX_op_sextract_i32:
+    case INDEX_op_sextract_i64:
     case INDEX_op_ctpop_i32:
     case INDEX_op_ctpop_i64:
         return C_O1_I1(r, r);