diff mbox series

[02/29,arm] Perform early splitting of adddi3.

Message ID 20191018194900.34795-3-Richard.Earnshaw@arm.com
State New
Headers show
Series Rewrite DImode arithmetic support | expand

Commit Message

Richard Earnshaw (lists) Oct. 18, 2019, 7:48 p.m. UTC
This patch causes the expansion of adddi3 to split the operation
immediately for Arm and Thumb-2.  This is desirable as it frees up the
register allocator to pick what ever combination of registers suits
best and reduces the number of auxiliary patterns that we need in the
back-end.  Three of the testcases that we disabled earlier are already
fixed by this patch.  Finally, we add a new pattern to match the
canonicalization of add-with-carry when using an immediate of zero.

gcc:
	* config/arm/arm-protos.h (arm_decompose_di_binop): New prototype.
	* config/arm/arm.c (arm_decompose_di_binop): New function.
	* config/arm/arm.md (adddi3): Also accept any const_int for op2.
	If not generating Thumb-1 code, decompose the operation into 32-bit
	pieces.
	* add0si_carryin_<optab>: New pattern.

testsuite:
	* gcc.target/arm/pr53447-1.c: Remove XFAIL.
	* gcc.target/arm/pr53447-3.c: Remove XFAIL.
	* gcc.target/arm/pr53447-4.c: Remove XFAIL.
---
 gcc/config/arm/arm-protos.h              |  1 +
 gcc/config/arm/arm.c                     | 15 +++++
 gcc/config/arm/arm.md                    | 73 ++++++++++++++++++------
 gcc/testsuite/gcc.target/arm/pr53447-1.c |  2 +-
 gcc/testsuite/gcc.target/arm/pr53447-3.c |  2 +-
 gcc/testsuite/gcc.target/arm/pr53447-4.c |  2 +-
 6 files changed, 76 insertions(+), 19 deletions(-)
diff mbox series

Patch

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index f995974f9bb..c685bcbf99c 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -57,6 +57,7 @@  extern rtx arm_simd_vect_par_cnst_half (machine_mode mode, bool high);
 extern bool arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
 						 bool high);
 extern void arm_emit_speculation_barrier_function (void);
+extern void arm_decompose_di_binop (rtx, rtx, rtx *, rtx *, rtx *, rtx *);
 
 #ifdef RTX_CODE
 extern void arm_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode,
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 41567af1869..db18651346f 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -14933,6 +14933,21 @@  gen_cpymem_ldrd_strd (rtx *operands)
   return true;
 }
 
+/* Decompose operands for a 64-bit binary operation in OP1 and OP2
+   into its component 32-bit subregs.  OP2 may be an immediate
+   constant and we want to simplify it in that case.  */
+void
+arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
+			rtx *lo_op2, rtx *hi_op2)
+{
+  *lo_op1 = gen_lowpart (SImode, op1);
+  *hi_op1 = gen_highpart (SImode, op1);
+  *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
+				 subreg_lowpart_offset (SImode, DImode));
+  *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
+				 subreg_highpart_offset (SImode, DImode));
+}
+
 /* Select a dominance comparison mode if possible for a test of the general
    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
    COND_OR == DOM_CC_X_AND_Y => (X && Y)
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 241ba97c4ba..5ba42a13430 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -437,25 +437,53 @@  (define_expand "adddi3"
  [(parallel
    [(set (match_operand:DI           0 "s_register_operand")
 	  (plus:DI (match_operand:DI 1 "s_register_operand")
-	           (match_operand:DI 2 "s_register_operand")))
+		   (match_operand:DI 2 "reg_or_int_operand")))
     (clobber (reg:CC CC_REGNUM))])]
   "TARGET_EITHER"
   "
-  if (TARGET_THUMB1 && !REG_P (operands[2]))
-    operands[2] = force_reg (DImode, operands[2]);
-  "
-)
+  if (TARGET_THUMB1)
+    {
+      if (!REG_P (operands[2]))
+	operands[2] = force_reg (DImode, operands[2]);
+    }
+  else
+    {
+      rtx lo_result, hi_result, lo_dest, hi_dest;
+      rtx lo_op1, hi_op1, lo_op2, hi_op2;
+      arm_decompose_di_binop (operands[1], operands[2], &lo_op1, &hi_op1,
+			      &lo_op2, &hi_op2);
+      lo_result = lo_dest = gen_lowpart (SImode, operands[0]);
+      hi_result = hi_dest = gen_highpart (SImode, operands[0]);
+
+      if (lo_op2 == const0_rtx)
+	{
+	  lo_dest = lo_op1;
+	  if (!arm_add_operand (hi_op2, SImode))
+	    hi_op2 = force_reg (SImode, hi_op2);
+	  /* Assume hi_op2 won't also be zero.  */
+	  emit_insn (gen_addsi3 (hi_dest, hi_op1, hi_op2));
+	}
+      else
+	{
+	  if (!arm_add_operand (lo_op2, SImode))
+	    lo_op2 = force_reg (SImode, lo_op2);
+	  if (!arm_not_operand (hi_op2, SImode))
+	    hi_op2 = force_reg (SImode, hi_op2);
+
+	  emit_insn (gen_addsi3_compareC (lo_dest, lo_op1, lo_op2));
+	  if (hi_op2 == const0_rtx)
+	    emit_insn (gen_add0si3_carryin_ltu (hi_dest, hi_op1));
+	  else
+	    emit_insn (gen_addsi3_carryin_ltu (hi_dest, hi_op1, hi_op2));
+	}
 
-(define_insn "*arm_adddi3"
-  [(set (match_operand:DI 0 "s_register_operand"  "=&r,&r,&r")
-	(plus:DI (match_operand:DI 1 "s_register_operand" " %0,0,r")
-		 (match_operand:DI 2 "s_register_operand" " r,0,r")))
-   (clobber (reg:CC CC_REGNUM))]
-  "TARGET_32BIT"
-  "adds\\t%Q0, %Q1, %Q2;adc\\t%R0, %R1, %R2"
-  [(set_attr "conds" "clob")
-   (set_attr "length" "8")
-   (set_attr "type" "multiple")]
+      if (lo_result != lo_dest)
+	emit_move_insn (lo_result, lo_dest);
+      if (hi_result != hi_dest)
+	emit_move_insn (gen_highpart (SImode, operands[0]), hi_dest);
+      DONE;
+    }
+  "
 )
 
 (define_expand "addv<mode>4"
@@ -830,7 +858,7 @@  (define_insn "*compare_addsi2_op1"
    (set_attr "type" "alus_imm,alus_sreg,alus_imm,alus_imm,alus_sreg")]
  )
 
-(define_insn "*addsi3_carryin_<optab>"
+(define_insn "addsi3_carryin_<optab>"
   [(set (match_operand:SI 0 "s_register_operand" "=l,r,r")
         (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%l,r,r")
                           (match_operand:SI 2 "arm_not_operand" "0,rI,K"))
@@ -848,6 +876,19 @@  (define_insn "*addsi3_carryin_<optab>"
    (set_attr "type" "adc_reg,adc_reg,adc_imm")]
 )
 
+;; Canonicalization of the above when the immediate is zero.
+(define_insn "add0si3_carryin_<optab>"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(plus:SI (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))
+		 (match_operand:SI 1 "arm_not_operand" "r")))]
+  "TARGET_32BIT"
+  "adc%?\\t%0, %1, #0"
+  [(set_attr "conds" "use")
+   (set_attr "predicable" "yes")
+   (set_attr "length" "4")
+   (set_attr "type" "adc_imm")]
+)
+
 (define_insn "*addsi3_carryin_alt2_<optab>"
   [(set (match_operand:SI 0 "s_register_operand" "=l,r,r")
         (plus:SI (plus:SI (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))
diff --git a/gcc/testsuite/gcc.target/arm/pr53447-1.c b/gcc/testsuite/gcc.target/arm/pr53447-1.c
index 0fd98b791fe..dc094180c85 100644
--- a/gcc/testsuite/gcc.target/arm/pr53447-1.c
+++ b/gcc/testsuite/gcc.target/arm/pr53447-1.c
@@ -1,6 +1,6 @@ 
 /* { dg-options "-O2" }  */
 /* { dg-require-effective-target arm32 } */
-/* { dg-final { scan-assembler-not "mov" { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not "mov" } } */
 
 void t0p(long long * p)
 {
diff --git a/gcc/testsuite/gcc.target/arm/pr53447-3.c b/gcc/testsuite/gcc.target/arm/pr53447-3.c
index 79d3691ee14..8e48f119b74 100644
--- a/gcc/testsuite/gcc.target/arm/pr53447-3.c
+++ b/gcc/testsuite/gcc.target/arm/pr53447-3.c
@@ -1,6 +1,6 @@ 
 /* { dg-options "-O2" }  */
 /* { dg-require-effective-target arm32 } */
-/* { dg-final { scan-assembler-not "mov" { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not "mov" } } */
 
 
 void t0p(long long * p)
diff --git a/gcc/testsuite/gcc.target/arm/pr53447-4.c b/gcc/testsuite/gcc.target/arm/pr53447-4.c
index bfa20df7ccd..22acb97270e 100644
--- a/gcc/testsuite/gcc.target/arm/pr53447-4.c
+++ b/gcc/testsuite/gcc.target/arm/pr53447-4.c
@@ -1,6 +1,6 @@ 
 /* { dg-options "-O2" }  */
 /* { dg-require-effective-target arm32 } */
-/* { dg-final { scan-assembler-not "mov" { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not "mov" } } */
 
 
 void t0p(long long * p)