Message ID | 1448629261-29938-1-git-send-email-james.greenhalgh@arm.com |
---|---|
State | New |
Headers | show |
On Fri, Nov 27, 2015 at 01:01:01PM +0000, James Greenhalgh wrote: > This patch follow Richard Henderson's advice to tighten up > CANNOT_CHANGE_MODE_CLASS for AArch64 to avoid a simplification bug in > the middle-end. > > There is nothing AArch64-specific about the testcase which triggers this, > so I'll put it in the testcase for other targets. If you see a regression, > the explanation in the PR is much more thorough and correct than I can > reproduce here, so I'd recommend starting there. In short, target > maintainers need to: > > > forbid BITS_PER_WORD (64-bit) subregs of hard registers > > > BITS_PER_WORD. See the verbiage I added to the i386 backend for this. > > We removed the CANNOT_CHANGE_MODE_CLASS macro back in January 2015. Before > then, we used it to workaround bugs in big-endian vector support > ( https://gcc.gnu.org/ml/gcc-patches/2014-12/msg01216.html ). Ideally, > we'd not need to bring this macro back, but if we can't fix the middle-end > bug this exposes, we need the workaround. > > For AArch64, doing this runs in to some trouble with two of our > instruction patterns - we end up with: > > (truncate:DI (reg:TF)) > > Which fails if it ever make it through to the simplify routines with > something nasty like: > > (and:DI (truncate:DI (reg:TF 32 v0 [ a ])) > (const_int 2305843009213693951 [0x1fffffffffffffff])) > > The simplify routines want to turn this around to look like: > > (truncate:DI (and:TF (reg:TF 32 v0 [ a ]) > (const_int 2305843009213693951 [0x1fffffffffffffff]))) > > Which then wants to further simplify the expression by first building > the constant in TF mode, and trunc_int_for_mode barfs: > > 0x7a38a5 trunc_int_for_mode(long, machine_mode) > .../gcc/explow.c:53 > > We can fix that by changing the patterns to use a zero_extract, which seems > more in line with what they actually express (extracting the two 64-bit > halves of a 128-bit value). > > Bootstrapped on aarch64-none-linux-gnu, and tested on aarch64-none-elf and > aarch64_be-none-elf without seeing any correctness regressions. > > OK? *ping* Thanks, James > > If so, we ought to get this backported to the release branches, the gcc-5 > backport applies clean (testing ongoing but looks OK so far) if the release > managers and AArch64 maintainers agree this is something that should be > backported this late in the 5.3 release cycle. > > Thanks, > James > > --- > 2015-11-27 James Greenhalgh <james.greenhalgh@arm.com> > > * config/aarch64/aarch64-protos.h > (aarch64_cannot_change_mode_class): Bring back. > * config/aarch64/aarch64.c > (aarch64_cannot_change_mode_class): Likewise. > * config/aarch64/aarch64.h (CANNOT_CHANGE_MODE_CLASS): Likewise. > * config/aarch64/aarch64.md (aarch64_movdi_<mode>low): Use > zero_extract rather than truncate. > (aarch64_movdi_<mode>high): Likewise. > > 2015-11-27 James Greenhalgh <james.greenhalgh@arm.com> > > * gcc.dg/torture/pr67609.c: New. > > diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h > index e0a050c..59d3da4 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -269,6 +269,9 @@ int aarch64_get_condition_code (rtx); > bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode); > int aarch64_branch_cost (bool, bool); > enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx); > +bool aarch64_cannot_change_mode_class (machine_mode, > + machine_mode, > + enum reg_class); > bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT); > bool aarch64_constant_address_p (rtx); > bool aarch64_expand_movmem (rtx *); > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index 3fe2f0f..fadb716 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -12408,6 +12408,24 @@ aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, > return ret; > } > > +/* Implement target hook CANNOT_CHANGE_MODE_CLASS. */ > +bool > +aarch64_cannot_change_mode_class (machine_mode from, > + machine_mode to, > + enum reg_class rclass) > +{ > + /* We cannot allow word_mode subregs of full vector modes. > + Otherwise the middle-end will assume it's ok to store to > + (subreg:DI (reg:TI 100) 0) in order to modify only the low 64 bits > + of the 128-bit register. However, after reload the subreg will > + be dropped leaving a plain DImode store. See PR67609 for a more > + detailed dicussion. In all other cases, we want to be premissive > + and return false. */ > + return (reg_classes_intersect_p (FP_REGS, rclass) > + && GET_MODE_SIZE (to) == UNITS_PER_WORD > + && GET_MODE_SIZE (from) > UNITS_PER_WORD); > +} > + > rtx > aarch64_reverse_mask (enum machine_mode mode) > { > diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h > index 68c006f..66b768d 100644 > --- a/gcc/config/aarch64/aarch64.h > +++ b/gcc/config/aarch64/aarch64.h > @@ -831,6 +831,9 @@ do { \ > extern void __aarch64_sync_cache_range (void *, void *); \ > __aarch64_sync_cache_range (beg, end) > > +#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ > + aarch64_cannot_change_mode_class (FROM, TO, CLASS) > + > #define SHIFT_COUNT_TRUNCATED !TARGET_SIMD > > /* Choose appropriate mode for caller saves, so we do the minimum > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index 64a40ae..c3367df 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -4667,7 +4667,8 @@ > > (define_insn "aarch64_movdi_<mode>low" > [(set (match_operand:DI 0 "register_operand" "=r") > - (truncate:DI (match_operand:TX 1 "register_operand" "w")))] > + (zero_extract:DI (match_operand:TX 1 "register_operand" "w") > + (const_int 64) (const_int 0)))] > "TARGET_FLOAT && (reload_completed || reload_in_progress)" > "fmov\\t%x0, %d1" > [(set_attr "type" "f_mrc") > @@ -4676,9 +4677,8 @@ > > (define_insn "aarch64_movdi_<mode>high" > [(set (match_operand:DI 0 "register_operand" "=r") > - (truncate:DI > - (lshiftrt:TX (match_operand:TX 1 "register_operand" "w") > - (const_int 64))))] > + (zero_extract:DI (match_operand:TX 1 "register_operand" "w") > + (const_int 64) (const_int 64)))] > "TARGET_FLOAT && (reload_completed || reload_in_progress)" > "fmov\\t%x0, %1.d[1]" > [(set_attr "type" "f_mrc") > diff --git a/gcc/testsuite/gcc.dg/torture/pr67609.c b/gcc/testsuite/gcc.dg/torture/pr67609.c > new file mode 100644 > index 0000000..817857d > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/torture/pr67609.c > @@ -0,0 +1,33 @@ > +/* { dg-do run } */ > + > +typedef union > +{ > + double v[2]; > + double s __attribute__ ((vector_size (16))); > +} data; > + > +data reg; > + > +void __attribute__ ((noinline)) > +set_lower (double b) > +{ > + data stack_var; > + double __attribute__ ((vector_size (16))) one = { 1.0, 1.0 }; > + stack_var.s = reg.s; > + stack_var.s += one; > + stack_var.v[0] += b; > + reg.s = stack_var.s; > +} > + > +int > +main (int argc, char ** argv) > +{ > + reg.v[0] = 1.0; > + reg.v[1] = 1.0; > + /* reg should contain { 1.0, 1.0 }. */ > + set_lower (2.0); > + /* reg should contain { 4.0, 2.0 }. */ > + if ((int) reg.v[0] != 4 || (int) reg.v[1] != 2) > + __builtin_abort (); > + return 0; > +}
On 27 November 2015 at 13:01, James Greenhalgh <james.greenhalgh@arm.com> wrote: > 2015-11-27 James Greenhalgh <james.greenhalgh@arm.com> > > * config/aarch64/aarch64-protos.h > (aarch64_cannot_change_mode_class): Bring back. > * config/aarch64/aarch64.c > (aarch64_cannot_change_mode_class): Likewise. > * config/aarch64/aarch64.h (CANNOT_CHANGE_MODE_CLASS): Likewise. > * config/aarch64/aarch64.md (aarch64_movdi_<mode>low): Use > zero_extract rather than truncate. > (aarch64_movdi_<mode>high): Likewise. > > 2015-11-27 James Greenhalgh <james.greenhalgh@arm.com> > > * gcc.dg/torture/pr67609.c: New. > + detailed dicussion. In all other cases, we want to be premissive s/premissive/permissive/ OK /Marcus
On Wed, Dec 09, 2015 at 01:13:20PM +0000, Marcus Shawcroft wrote: > On 27 November 2015 at 13:01, James Greenhalgh <james.greenhalgh@arm.com> wrote: > > > 2015-11-27 James Greenhalgh <james.greenhalgh@arm.com> > > > > * config/aarch64/aarch64-protos.h > > (aarch64_cannot_change_mode_class): Bring back. > > * config/aarch64/aarch64.c > > (aarch64_cannot_change_mode_class): Likewise. > > * config/aarch64/aarch64.h (CANNOT_CHANGE_MODE_CLASS): Likewise. > > * config/aarch64/aarch64.md (aarch64_movdi_<mode>low): Use > > zero_extract rather than truncate. > > (aarch64_movdi_<mode>high): Likewise. > > > > 2015-11-27 James Greenhalgh <james.greenhalgh@arm.com> > > > > * gcc.dg/torture/pr67609.c: New. > > > > + detailed dicussion. In all other cases, we want to be premissive > > s/premissive/permissive/ > > OK /Marcus Thanks. This has had a week or so to soak on trunk now, is it OK to backport to GCC 5 and 4.9? The patch applies as-good-as clean, with only a little bit to fix up in aarch64-protos.h to keep alphabetical order, and I've bootstrapped and tested the backports with no issue. Cheers, James
On 14 December 2015 at 11:01, James Greenhalgh <james.greenhalgh@arm.com> wrote: > On Wed, Dec 09, 2015 at 01:13:20PM +0000, Marcus Shawcroft wrote: >> On 27 November 2015 at 13:01, James Greenhalgh <james.greenhalgh@arm.com> wrote: >> >> > 2015-11-27 James Greenhalgh <james.greenhalgh@arm.com> >> > >> > * config/aarch64/aarch64-protos.h >> > (aarch64_cannot_change_mode_class): Bring back. >> > * config/aarch64/aarch64.c >> > (aarch64_cannot_change_mode_class): Likewise. >> > * config/aarch64/aarch64.h (CANNOT_CHANGE_MODE_CLASS): Likewise. >> > * config/aarch64/aarch64.md (aarch64_movdi_<mode>low): Use >> > zero_extract rather than truncate. >> > (aarch64_movdi_<mode>high): Likewise. >> > >> > 2015-11-27 James Greenhalgh <james.greenhalgh@arm.com> >> > >> > * gcc.dg/torture/pr67609.c: New. >> > >> >> + detailed dicussion. In all other cases, we want to be premissive >> >> s/premissive/permissive/ >> >> OK /Marcus > > Thanks. > > This has had a week or so to soak on trunk now, is it OK to backport to GCC > 5 and 4.9? > > The patch applies as-good-as clean, with only a little bit to fix up in > aarch64-protos.h to keep alphabetical order, and I've bootstrapped and tested > the backports with no issue. OK /Marcus
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index e0a050c..59d3da4 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -269,6 +269,9 @@ int aarch64_get_condition_code (rtx); bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode); int aarch64_branch_cost (bool, bool); enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx); +bool aarch64_cannot_change_mode_class (machine_mode, + machine_mode, + enum reg_class); bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT); bool aarch64_constant_address_p (rtx); bool aarch64_expand_movmem (rtx *); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 3fe2f0f..fadb716 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -12408,6 +12408,24 @@ aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, return ret; } +/* Implement target hook CANNOT_CHANGE_MODE_CLASS. */ +bool +aarch64_cannot_change_mode_class (machine_mode from, + machine_mode to, + enum reg_class rclass) +{ + /* We cannot allow word_mode subregs of full vector modes. + Otherwise the middle-end will assume it's ok to store to + (subreg:DI (reg:TI 100) 0) in order to modify only the low 64 bits + of the 128-bit register. However, after reload the subreg will + be dropped leaving a plain DImode store. See PR67609 for a more + detailed dicussion. In all other cases, we want to be premissive + and return false. */ + return (reg_classes_intersect_p (FP_REGS, rclass) + && GET_MODE_SIZE (to) == UNITS_PER_WORD + && GET_MODE_SIZE (from) > UNITS_PER_WORD); +} + rtx aarch64_reverse_mask (enum machine_mode mode) { diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 68c006f..66b768d 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -831,6 +831,9 @@ do { \ extern void __aarch64_sync_cache_range (void *, void *); \ __aarch64_sync_cache_range (beg, end) +#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ + aarch64_cannot_change_mode_class (FROM, TO, CLASS) + #define SHIFT_COUNT_TRUNCATED !TARGET_SIMD /* Choose appropriate mode for caller saves, so we do the minimum diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 64a40ae..c3367df 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4667,7 +4667,8 @@ (define_insn "aarch64_movdi_<mode>low" [(set (match_operand:DI 0 "register_operand" "=r") - (truncate:DI (match_operand:TX 1 "register_operand" "w")))] + (zero_extract:DI (match_operand:TX 1 "register_operand" "w") + (const_int 64) (const_int 0)))] "TARGET_FLOAT && (reload_completed || reload_in_progress)" "fmov\\t%x0, %d1" [(set_attr "type" "f_mrc") @@ -4676,9 +4677,8 @@ (define_insn "aarch64_movdi_<mode>high" [(set (match_operand:DI 0 "register_operand" "=r") - (truncate:DI - (lshiftrt:TX (match_operand:TX 1 "register_operand" "w") - (const_int 64))))] + (zero_extract:DI (match_operand:TX 1 "register_operand" "w") + (const_int 64) (const_int 64)))] "TARGET_FLOAT && (reload_completed || reload_in_progress)" "fmov\\t%x0, %1.d[1]" [(set_attr "type" "f_mrc") diff --git a/gcc/testsuite/gcc.dg/torture/pr67609.c b/gcc/testsuite/gcc.dg/torture/pr67609.c new file mode 100644 index 0000000..817857d --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr67609.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ + +typedef union +{ + double v[2]; + double s __attribute__ ((vector_size (16))); +} data; + +data reg; + +void __attribute__ ((noinline)) +set_lower (double b) +{ + data stack_var; + double __attribute__ ((vector_size (16))) one = { 1.0, 1.0 }; + stack_var.s = reg.s; + stack_var.s += one; + stack_var.v[0] += b; + reg.s = stack_var.s; +} + +int +main (int argc, char ** argv) +{ + reg.v[0] = 1.0; + reg.v[1] = 1.0; + /* reg should contain { 1.0, 1.0 }. */ + set_lower (2.0); + /* reg should contain { 4.0, 2.0 }. */ + if ((int) reg.v[0] != 4 || (int) reg.v[1] != 2) + __builtin_abort (); + return 0; +}