@@ -13346,85 +13346,167 @@ aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
return;
}
-/* Given OPERANDS of consecutive load/store, check if we can merge
- them into ldp/stp. LOAD is true if they are load instructions.
- MODE is the mode of memory operands. */
+/* Extract in to REG and MEM operands to an LDP/STP operation from
+ OPERANDS. The count of operands to extract is OPS, and whether
+ we are looking at a load or a store is given by LOAD. */
-bool
-aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
- enum machine_mode mode)
+static void
+aarch64_extract_ldpstp_operands (unsigned int ops, bool load,
+ rtx *operands, rtx *reg, rtx *mem)
{
- HOST_WIDE_INT offval_1, offval_2, msize;
- enum reg_class rclass_1, rclass_2;
- rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
+ for (unsigned int i = 0; i < ops; i++)
+ {
+ unsigned int twoi = i * 2;
+ reg[i] = operands[load ? twoi : (twoi + 1)];
+ mem[i] = operands[load ? (twoi + 1) : twoi];
+ /* Sanity check. */
+ gcc_assert (MEM_P (mem[i]));
- if (load)
+ if (load)
+ gcc_assert (REG_P (reg[i]));
+ }
+}
+
+/* Return TRUE if each RTX in REG (which has size COUNT) is of the
+ same register class. For the purpose of this function anything which
+ would not fit REG_P (i.e. a const_int 0 or a const_double 0.0) is
+ consider to be in GENERAL_REGS. */
+
+static bool
+aarch64_ldpstp_ops_same_reg_class_p (unsigned int count, rtx *reg)
+{
+ /* Check if the registers are of same class. */
+ reg_class rclass = (REG_P (reg[0]) && FP_REGNUM_P (REGNO (reg[0])))
+ ? FP_REGS
+ : GENERAL_REGS;
+
+ for (unsigned int i = 1; i < count; i++)
{
- mem_1 = operands[1];
- mem_2 = operands[3];
- reg_1 = operands[0];
- reg_2 = operands[2];
- gcc_assert (REG_P (reg_1) && REG_P (reg_2));
- if (REGNO (reg_1) == REGNO (reg_2))
+ reg_class rc = (REG_P (reg[i]) && FP_REGNUM_P (REGNO (reg[i])))
+ ? FP_REGS
+ : GENERAL_REGS;
+ if (rclass != rc)
return false;
}
- else
+
+ return true;
+}
+
+/* REG contains the set of registers, sized by COUNT, which are written by
+ a sequence of (base + offset) loads which are based from BASE and have
+ offsets OFFSETS. Return TRUE if any of the registers in REG clobber
+ BASE. */
+
+static bool
+aarch64_ldpstp_load_regs_clobber_base_p (unsigned int count,
+ rtx *reg, rtx base,
+ HOST_WIDE_INT *offsets)
+{
+ for (unsigned int i = 0; i < count - 1; i++)
+ if (reg_mentioned_p (reg[i], base))
+ return true;
+
+ /* In increasing order, the last load can clobber the address. */
+ return (offsets[0] > offsets[1]
+ && reg_mentioned_p (reg[count - 1], base));
+}
+
+/* Return true if OFFSETS, which has size COUNT, is an ascending or
+ descending sequence, separated by MSIZE. */
+
+static bool
+aarch64_ldpstp_offsets_consecutive_p (unsigned int count,
+ HOST_WIDE_INT *offsets,
+ HOST_WIDE_INT msize)
+{
+ bool ascending = true, descending = true;
+ for (unsigned int i = 0; i < count; i++)
{
- mem_1 = operands[0];
- mem_2 = operands[2];
- reg_1 = operands[1];
- reg_2 = operands[3];
+ ascending &= (offsets[0] == offsets[i] - (msize * i));
+ descending &= (offsets[0] == offsets[i] + msize * i);
}
- /* The mems cannot be volatile. */
- if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
- return false;
+ return ascending || descending;
+}
- /* Check if the addresses are in the form of [base+offset]. */
- extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
- if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
- return false;
- extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
- if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
- return false;
- /* Check if the bases are same. */
- if (!rtx_equal_p (base_1, base_2))
- return false;
+/* Helper function for aarch64_operands_ok_for_ldpstp and
+ aarch64_operands_adjust_ok_for_ldpstp. OPERANDS are the
+ consecutive load/store operands which we hope to merge. LOAD
+ is true if these are LOAD instructions. MODE is the mode of the
+ memory operations. ADJUST is true if we are in the 4-operand
+ adjust case. */
- offval_1 = INTVAL (offset_1);
- offval_2 = INTVAL (offset_2);
- msize = GET_MODE_SIZE (mode);
- /* Check if the offsets are consecutive. */
- if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
- return false;
+bool
+aarch64_operands_ok_for_ldpstp_1 (rtx *operands, bool load,
+ enum machine_mode mode, bool adjust)
+{
+ const unsigned int count = adjust ? 4 : 2;
+ /* Avoid alloca calls and just size as large as they need to be
+ for the largest case we can handle. */
+ const unsigned int max_ops = 4;
+ rtx mem[max_ops], reg[max_ops], base[max_ops], offset[max_ops];
+ HOST_WIDE_INT offval[max_ops];
+ unsigned int i = 0;
+ HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
+
+ aarch64_extract_ldpstp_operands (count, load, operands, reg, mem);
- /* Check if the addresses are clobbered by load. */
if (load)
{
- if (reg_mentioned_p (reg_1, mem_1))
- return false;
+ for (i = 0; i < count; i += 2)
+ if (REGNO (reg[i]) == REGNO (reg[i + 1]))
+ return false;
+ }
+
+ /* For the adjust case, skip if memory operand is by itself valid
+ for ldp/stp. */
+ if (adjust
+ && (!MEM_P (mem[0]) || aarch64_mem_pair_operand (mem[0], mode)))
+ return false;
- /* In increasing order, the last load can clobber the address. */
- if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
+ /* The mems cannot be volatile. */
+ for (i = 0; i < count; i++)
+ if (MEM_VOLATILE_P (mem[i]))
return false;
+
+ /* Check if the addresses are in the form of [base+offset]. */
+ for (i = 0; i < count; i++)
+ {
+ extract_base_offset_in_addr (mem[i], &base[i], &offset[i]);
+ if (base[i] == NULL_RTX || offset[i] == NULL_RTX)
+ return false;
}
- if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
- rclass_1 = FP_REGS;
- else
- rclass_1 = GENERAL_REGS;
+ /* Check if the bases are same. */
+ for (i = 1; i < count; i++)
+ if (!rtx_equal_p (base[0], base[i]))
+ return false;
- if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
- rclass_2 = FP_REGS;
- else
- rclass_2 = GENERAL_REGS;
+ for (unsigned int i = 0; i < count; i++)
+ offval[i] = INTVAL (offset[i]);
- /* Check if the registers are of same class. */
- if (rclass_1 != rclass_2)
+ if (!aarch64_ldpstp_offsets_consecutive_p (count, offval, msize))
return false;
- return true;
+ /* Check if the addresses are clobbered by load. */
+ if (load && aarch64_ldpstp_load_regs_clobber_base_p (count, reg,
+ base[0], offval))
+ return false;
+
+ return aarch64_ldpstp_ops_same_reg_class_p (count, reg);
+}
+
+
+/* Given OPERANDS of consecutive load/store, check if we can merge
+ them into ldp/stp. LOAD is true if they are load instructions.
+ MODE is the mode of memory operands. */
+
+bool
+aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
+ enum machine_mode mode)
+{
+ return aarch64_operands_ok_for_ldpstp_1 (operands, load, mode, false);
}
/* Given OPERANDS of consecutive load/store, check if we can merge
@@ -13446,124 +13528,13 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
stp w1, w1, [scratch, 0x8]
The peephole patterns detecting this opportunity should guarantee
- the scratch register is avaliable. */
+ the scratch register is available. */
bool
aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
enum machine_mode mode)
{
- enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
- HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
- rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
- rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
-
- if (load)
- {
- reg_1 = operands[0];
- mem_1 = operands[1];
- reg_2 = operands[2];
- mem_2 = operands[3];
- reg_3 = operands[4];
- mem_3 = operands[5];
- reg_4 = operands[6];
- mem_4 = operands[7];
- gcc_assert (REG_P (reg_1) && REG_P (reg_2)
- && REG_P (reg_3) && REG_P (reg_4));
- if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
- return false;
- }
- else
- {
- mem_1 = operands[0];
- reg_1 = operands[1];
- mem_2 = operands[2];
- reg_2 = operands[3];
- mem_3 = operands[4];
- reg_3 = operands[5];
- mem_4 = operands[6];
- reg_4 = operands[7];
- }
- /* Skip if memory operand is by itslef valid for ldp/stp. */
- if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
- return false;
-
- /* The mems cannot be volatile. */
- if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
- || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
- return false;
-
- /* Check if the addresses are in the form of [base+offset]. */
- extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
- if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
- return false;
- extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
- if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
- return false;
- extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
- if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
- return false;
- extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
- if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
- return false;
-
- /* Check if the bases are same. */
- if (!rtx_equal_p (base_1, base_2)
- || !rtx_equal_p (base_2, base_3)
- || !rtx_equal_p (base_3, base_4))
- return false;
-
- offval_1 = INTVAL (offset_1);
- offval_2 = INTVAL (offset_2);
- offval_3 = INTVAL (offset_3);
- offval_4 = INTVAL (offset_4);
- msize = GET_MODE_SIZE (mode);
- /* Check if the offsets are consecutive. */
- if ((offval_1 != (offval_2 + msize)
- || offval_1 != (offval_3 + msize * 2)
- || offval_1 != (offval_4 + msize * 3))
- && (offval_4 != (offval_3 + msize)
- || offval_4 != (offval_2 + msize * 2)
- || offval_4 != (offval_1 + msize * 3)))
- return false;
-
- /* Check if the addresses are clobbered by load. */
- if (load)
- {
- if (reg_mentioned_p (reg_1, mem_1)
- || reg_mentioned_p (reg_2, mem_2)
- || reg_mentioned_p (reg_3, mem_3))
- return false;
-
- /* In increasing order, the last load can clobber the address. */
- if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
- return false;
- }
-
- if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
- rclass_1 = FP_REGS;
- else
- rclass_1 = GENERAL_REGS;
-
- if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
- rclass_2 = FP_REGS;
- else
- rclass_2 = GENERAL_REGS;
-
- if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
- rclass_3 = FP_REGS;
- else
- rclass_3 = GENERAL_REGS;
-
- if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
- rclass_4 = FP_REGS;
- else
- rclass_4 = GENERAL_REGS;
-
- /* Check if the registers are of same class. */
- if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
- return false;
-
- return true;
+ return aarch64_operands_ok_for_ldpstp_1 (operands, load, mode, true);
}
/* Given OPERANDS of consecutive load/store, this function pairs them