@@ -570,7 +570,7 @@ static void invalidate_for_call (void);
static rtx use_related_value (rtx, struct table_elt *);
static inline unsigned canon_hash (rtx, machine_mode);
-static inline unsigned safe_hash (rtx, machine_mode);
+static inline unsigned safe_hash (const_rtx, machine_mode);
static inline unsigned hash_rtx_string (const char *);
static rtx canon_reg (rtx, rtx_insn *);
@@ -2369,6 +2369,11 @@ hash_rtx_cb (const_rtx x, machine_mode mode,
hash += fixed_hash (CONST_FIXED_VALUE (x));
return hash;
+ case VEC_DUPLICATE:
+ return hash_rtx_cb (XEXP (x, 0), VOIDmode,
+ do_not_record_p, hash_arg_in_memory_p,
+ have_reg_qty, cb);
+
case CONST_VECTOR:
{
int units;
@@ -2599,7 +2604,7 @@ canon_hash (rtx x, machine_mode mode)
and hash_arg_in_memory are not changed. */
static inline unsigned
-safe_hash (rtx x, machine_mode mode)
+safe_hash (const_rtx x, machine_mode mode)
{
int dummy_do_not_record;
return hash_rtx (x, mode, &dummy_do_not_record, NULL, true);
@@ -2630,6 +2635,16 @@ exp_equiv_p (const_rtx x, const_rtx y, int validate, bool for_gcse)
return x == y;
code = GET_CODE (x);
+ if ((code == CONST_VECTOR && GET_CODE (y) == VEC_DUPLICATE)
+ || (code == VEC_DUPLICATE && GET_CODE (y) == CONST_VECTOR))
+ {
+ if (code == VEC_DUPLICATE)
+ std::swap (x, y);
+ if (const_vector_encoded_nelts (x) != 1)
+ return 0;
+ return exp_equiv_p (CONST_VECTOR_ENCODED_ELT (x, 0), XEXP (y, 0),
+ validate, for_gcse);
+ }
if (code != GET_CODE (y))
return 0;
@@ -4192,7 +4207,8 @@ struct set
char src_in_memory;
/* Nonzero if the SET_SRC contains something
whose value cannot be predicted and understood. */
- char src_volatile;
+ char src_volatile : 1;
+ char invalidate_dest_p : 1;
/* Original machine mode, in case it becomes a CONST_INT.
The size of this field should match the size of the mode
field of struct rtx_def (see rtl.h). */
@@ -4639,7 +4655,7 @@ cse_insn (rtx_insn *insn)
for (i = 0; i < n_sets; i++)
{
bool repeat = false;
- bool mem_noop_insn = false;
+ bool noop_insn = false;
rtx src, dest;
rtx src_folded;
struct table_elt *elt = 0, *p;
@@ -4736,6 +4752,7 @@ cse_insn (rtx_insn *insn)
sets[i].src = src;
sets[i].src_hash = HASH (src, mode);
sets[i].src_volatile = do_not_record;
+ sets[i].invalidate_dest_p = 1;
sets[i].src_in_memory = hash_arg_in_memory;
/* If SRC is a MEM, there is a REG_EQUIV note for SRC, and DEST is
@@ -5365,7 +5382,7 @@ cse_insn (rtx_insn *insn)
|| insn_nothrow_p (insn)))
{
SET_SRC (sets[i].rtl) = trial;
- mem_noop_insn = true;
+ noop_insn = true;
break;
}
@@ -5418,6 +5435,19 @@ cse_insn (rtx_insn *insn)
src_folded_cost = constant_pool_entries_cost;
src_folded_regcost = constant_pool_entries_regcost;
}
+ else if (n_sets == 1
+ && REG_P (trial)
+ && REG_P (SET_DEST (sets[i].rtl))
+ && GET_MODE_CLASS (mode) == MODE_CC
+ && REGNO (trial) == REGNO (SET_DEST (sets[i].rtl))
+ && !side_effects_p (dest)
+ && (cfun->can_delete_dead_exceptions
+ || insn_nothrow_p (insn)))
+ {
+ SET_SRC (sets[i].rtl) = trial;
+ noop_insn = true;
+ break;
+ }
}
/* If we changed the insn too much, handle this set from scratch. */
@@ -5588,7 +5618,7 @@ cse_insn (rtx_insn *insn)
}
/* Similarly for no-op MEM moves. */
- else if (mem_noop_insn)
+ else if (noop_insn)
{
if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
cse_cfg_altered = true;
@@ -5760,6 +5790,26 @@ cse_insn (rtx_insn *insn)
}
elt = insert (src, classp, sets[i].src_hash, mode);
elt->in_memory = sets[i].src_in_memory;
+
+ if (REG_P (dest)
+ && ! reg_mentioned_p (dest, src))
+ {
+ sets[i].invalidate_dest_p = 0;
+ unsigned int regno = REGNO (dest);
+ unsigned int endregno = END_REGNO (dest);
+ unsigned int j;
+
+ for (j = regno; j < endregno; j++)
+ {
+ if (REG_IN_TABLE (j) >= 0)
+ {
+ remove_invalid_refs (j);
+ REG_IN_TABLE (j) = -1;
+ }
+ }
+ invalidate (dest, VOIDmode);
+ }
+
/* If inline asm has any clobbers, ensure we only reuse
existing inline asms and never try to put the ASM_OPERANDS
into an insn that isn't inline asm. */
@@ -5853,7 +5903,8 @@ cse_insn (rtx_insn *insn)
previous quantity's chain.
Needed for memory if this is a nonvarying address, unless
we have just done an invalidate_memory that covers even those. */
- if (REG_P (dest) || GET_CODE (dest) == SUBREG)
+ if ((REG_P (dest) || GET_CODE (dest) == SUBREG)
+ && sets[i].invalidate_dest_p)
invalidate (dest, VOIDmode);
else if (MEM_P (dest))
invalidate (dest, VOIDmode);
@@ -5887,7 +5938,7 @@ cse_insn (rtx_insn *insn)
if (!REG_P (x))
mention_regs (x);
- else
+ else if (sets[i].invalidate_dest_p)
{
/* We used to rely on all references to a register becoming
inaccessible when a register changes to a new quantity,
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-S -O3 -march=armv8.2-a+sve" } */
+
+void
+f (int *restrict x, int *restrict y, int *restrict z, int n)
+{
+ for (int i = 0; i < n; i += 2)
+ {
+ x[i] = y[i] + z[i];
+ x[i + 1] = y[i + 1] - z[i + 1];
+ }
+}
+
+/* { dg-final { scan-assembler-times {ptest} 0 } } */
From: Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org> This patch changes cse_insn to process parallel rtx one by one such that any destination rtx in cse list is invalidated before processing the next. gcc/ChangeLog: 2019-05-16 Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org> PR target/88834 * cse.c (safe_hash): Handle VEC_DUPLICATE. (exp_equiv_p): Likewise. (hash_rtx_cb): Change to accept const_rtx. (struct set): Add field to record if uses of dest is invalidated. (cse_insn): For parallel rtx, invalidate register set by first rtx before processing the next. gcc/testsuite/ChangeLog: 2019-05-16 Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org> PR target/88834 * gcc.target/aarch64/pr88834.c: New test. Change-Id: I7c3a61f034128f38abe0c2b7dab5d81dec28146c --- gcc/cse.c | 67 ++++++++++++++++++++++++++---- gcc/testsuite/gcc.target/aarch64/pr88836.c | 14 +++++++ 2 files changed, 73 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/pr88836.c -- 2.7.4