PR target/86891 __builtin_<add/sub>_overflow issues on AArch64 (redux)

Message ID	5ef55e58-a442-2885-9128-0cc4a5820b48@arm.com
State	New
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of gcc-patches-return-494148-patch=linaro.org@gcc.gnu.org designates 209.132.180.131 as permitted sender) client-ip=209.132.180.131; DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:to :from:subject:message-id:date:mime-version:content-type; q=dns; s=default; b=VTxvUhgn08A7hRJWsEJTChsIQ85sL/RVpR9sklBY4i0TUUKY42 U7QnNTvvVtvZ/08+4oDOFVnS+d1kW3IAG+RUJJMWwcw4UIp5C32WOSpT9eeskYBS pQpq8UmdnyRplZ6kV8JLC4XqC3eEarNPylB96F03KiawItezjugI1jeCM= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org To: gcc-patches <gcc-patches@gcc.gnu.org> From: "Richard Earnshaw (lists)" <Richard.Earnshaw@arm.com> Subject: PR target/86891 __builtin_<add/sub>_overflow issues on AArch64 (redux) Openpgp: preference=signencrypt Message-ID: <5ef55e58-a442-2885-9128-0cc4a5820b48@arm.com> Date: Wed, 16 Jan 2019 15:22:20 +0000 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.2.1 MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------B60D0C1EDD7D7F0332B1A0C3"
Series	PR target/86891 __builtin_<add/sub>_overflow issues on AArch64 (redux) \| expand PR target/86891 __builtin_<add/sub>_overflow issues on AArch64 (redux)

diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def index 5fe5ef0..14c1a43 100644 --- a/gcc/config/aarch64/aarch64-modes.def +++ b/gcc/config/aarch64/aarch64-modes.def @@ -18,12 +18,25 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ +/* Important note about Carry generation in AArch64. + + Unlike some architectures, the C flag generated by a subtract + operation, or a simple compare operation is set to 1 if the result + does not overflow in an unsigned sense. That is, if there is no + borrow needed from a higher word. That means that overflow from + addition will set C, but overflow from a subtraction will clear C. + We use CC_Cmode to represent detection of overflow from addition as + CCmode is used for 'normal' compare (subtraction) operations. For + ADC, the representation becomes more complex still, since we cannot + use the normal idiom of comparing the result to one of the input + operands; instead we use CC_ADCmode to represent this case. */ CC_MODE (CCFP); CC_MODE (CCFPE); CC_MODE (CC_SWP); CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */ CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */ -CC_MODE (CC_C); /* Only C bit of condition flags is valid. */ +CC_MODE (CC_C); /* C represents unsigned overflow of a simple addition. */ +CC_MODE (CC_ADC); /* Unsigned overflow from an ADC (add with carry). */ CC_MODE (CC_V); /* Only V bit of condition flags is valid. */ /* Half-precision floating point for __fp16. */ diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index fd60bdd..da48c09 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -7089,9 +7089,12 @@ aarch64_emit_call_insn (rtx pat) machine_mode aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) { + machine_mode mode_x = GET_MODE (x); + rtx_code code_x = GET_CODE (x); + /* All floating point compares return CCFP if it is an equality comparison, and CCFPE otherwise. */ - if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) + if (GET_MODE_CLASS (mode_x) == MODE_FLOAT) { switch (code) { @@ -7122,55 +7125,65 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) using the TST instruction with the appropriate bitmask. */ if (y == const0_rtx && REG_P (x) && (code == EQ || code == NE) - && (GET_MODE (x) == HImode || GET_MODE (x) == QImode)) + && (mode_x == HImode || mode_x == QImode)) return CC_NZmode; /* Similarly, comparisons of zero_extends from shorter modes can be performed using an ANDS with an immediate mask. */ - if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND - && (GET_MODE (x) == SImode || GET_MODE (x) == DImode) + if (y == const0_rtx && code_x == ZERO_EXTEND + && (mode_x == SImode || mode_x == DImode) && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode) && (code == EQ || code == NE)) return CC_NZmode; - if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) + if ((mode_x == SImode || mode_x == DImode) && y == const0_rtx && (code == EQ || code == NE || code == LT || code == GE) - && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND - || GET_CODE (x) == NEG - || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1)) + && (code_x == PLUS || code_x == MINUS || code_x == AND + || code_x == NEG + || (code_x == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1)) && CONST_INT_P (XEXP (x, 2))))) return CC_NZmode; /* A compare with a shifted operand. Because of canonicalization, the comparison will have to be swapped when we emit the assembly code. */ - if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) + if ((mode_x == SImode || mode_x == DImode) && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx) - && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT - || GET_CODE (x) == LSHIFTRT - || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)) + && (code_x == ASHIFT || code_x == ASHIFTRT + || code_x == LSHIFTRT + || code_x == ZERO_EXTEND || code_x == SIGN_EXTEND)) return CC_SWPmode; /* Similarly for a negated operand, but we can only do this for equalities. */ - if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) + if ((mode_x == SImode || mode_x == DImode) && (REG_P (y) || GET_CODE (y) == SUBREG) && (code == EQ || code == NE) - && GET_CODE (x) == NEG) + && code_x == NEG) return CC_Zmode; - /* A test for unsigned overflow. */ - if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode) - && code == NE - && GET_CODE (x) == PLUS - && GET_CODE (y) == ZERO_EXTEND) + /* A test for unsigned overflow from an addition. */ + if ((mode_x == DImode || mode_x == TImode) + && (code == LTU || code == GEU) + && code_x == PLUS + && rtx_equal_p (XEXP (x, 0), y)) return CC_Cmode; + /* A test for unsigned overflow from an add with carry. */ + if ((mode_x == DImode || mode_x == TImode) + && (code == LTU || code == GEU) + && code_x == PLUS + && CONST_SCALAR_INT_P (y) + && (rtx_mode_t (y, mode_x) + == (wi::shwi (1, mode_x) + << (GET_MODE_BITSIZE (mode_x).to_constant () / 2)))) + return CC_ADCmode; + /* A test for signed overflow. */ - if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode) + if ((mode_x == DImode || mode_x == TImode) && code == NE - && GET_CODE (x) == PLUS + && code_x == PLUS && GET_CODE (y) == SIGN_EXTEND) return CC_Vmode; @@ -7274,8 +7287,17 @@ aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code) case E_CC_Cmode: switch (comp_code) { - case NE: return AARCH64_CS; - case EQ: return AARCH64_CC; + case LTU: return AARCH64_CS; + case GEU: return AARCH64_CC; + default: return -1; + } + break; + + case E_CC_ADCmode: + switch (comp_code) + { + case GEU: return AARCH64_CS; + case LTU: return AARCH64_CC; default: return -1; } break; diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 513aec1..e65936a 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1871,7 +1871,7 @@ (define_expand "uaddv<mode>4" "" { emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2])); - aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); + aarch64_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]); DONE; }) @@ -1973,7 +1973,7 @@ (define_expand "uaddvti4" emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest); emit_move_insn (gen_highpart (DImode, operands[0]), high_dest); - aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); + aarch64_gen_unlikely_cbranch (GEU, CC_ADCmode, operands[3]); DONE; }) @@ -2010,69 +2010,36 @@ (define_insn "*addsi3_compare0_uxtw" [(set_attr "type" "alus_sreg,alus_imm,alus_imm")] ) -(define_insn "*add<mode>3_compareC_cconly_imm" - [(set (reg:CC_C CC_REGNUM) - (ne:CC_C - (plus:<DWI> - (zero_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r")) - (match_operand:<DWI> 2 "const_scalar_int_operand" "")) - (zero_extend:<DWI> - (plus:GPI - (match_dup 0) - (match_operand:GPI 1 "aarch64_plus_immediate" "I,J")))))] - "aarch64_zero_extend_const_eq (<DWI>mode, operands[2], - <MODE>mode, operands[1])" - "@ - cmn\\t%<w>0, %1 - cmp\\t%<w>0, #%n1" - [(set_attr "type" "alus_imm")] -) - (define_insn "*add<mode>3_compareC_cconly" [(set (reg:CC_C CC_REGNUM) - (ne:CC_C - (plus:<DWI> - (zero_extend:<DWI> (match_operand:GPI 0 "register_operand" "r")) - (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) - (zero_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] + (compare:CC_C + (plus:GPI + (match_operand:GPI 0 "register_operand" "r,r,r") + (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J")) + (match_dup 0)))] "" - "cmn\\t%<w>0, %<w>1" - [(set_attr "type" "alus_sreg")] -) - -(define_insn "*add<mode>3_compareC_imm" - [(set (reg:CC_C CC_REGNUM) - (ne:CC_C - (plus:<DWI> - (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r,r")) - (match_operand:<DWI> 3 "const_scalar_int_operand" "")) - (zero_extend:<DWI> - (plus:GPI - (match_dup 1) - (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))))) - (set (match_operand:GPI 0 "register_operand" "=r,r") - (plus:GPI (match_dup 1) (match_dup 2)))] - "aarch64_zero_extend_const_eq (<DWI>mode, operands[3], - <MODE>mode, operands[2])" "@ - adds\\t%<w>0, %<w>1, %2 - subs\\t%<w>0, %<w>1, #%n2" - [(set_attr "type" "alus_imm")] + cmn\\t%<w>0, %<w>1 + cmn\\t%<w>0, %1 + cmp\\t%<w>0, #%n1" + [(set_attr "type" "alus_sreg,alus_imm,alus_imm")] ) (define_insn "add<mode>3_compareC" [(set (reg:CC_C CC_REGNUM) (compare:CC_C - (plus:<DWI> - (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) - (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) - (zero_extend:<DWI> - (plus:GPI (match_dup 1) (match_dup 2))))) - (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI + (match_operand:GPI 1 "register_operand" "r,r,r") + (match_operand:GPI 2 "aarch64_plus_operand" "r,I,J")) + (match_dup 1))) + (set (match_operand:GPI 0 "register_operand" "=r,r,r") (plus:GPI (match_dup 1) (match_dup 2)))] "" - "adds\\t%<w>0, %<w>1, %<w>2" - [(set_attr "type" "alus_sreg")] + "@ + adds\\t%<w>0, %<w>1, %<w>2 + adds\\t%<w>0, %<w>1, %2 + subs\\t%<w>0, %<w>1, #%n2" + [(set_attr "type" "alus_sreg,alus_imm,alus_imm")] ) (define_insn "*add<mode>3_compareV_cconly_imm" @@ -2470,7 +2437,7 @@ (define_expand "add<mode>3_carryin" [(set (match_operand:GPI 0 "register_operand") (plus:GPI (plus:GPI - (ne:GPI (reg:CC_C CC_REGNUM) (const_int 0)) + (ltu:GPI (reg:CC_C CC_REGNUM) (const_int 0)) (match_operand:GPI 1 "aarch64_reg_or_zero")) (match_operand:GPI 2 "aarch64_reg_or_zero")))] "" @@ -2509,7 +2476,7 @@ (define_insn "*addsi3_carryin_uxtw" (define_expand "add<mode>3_carryinC" [(parallel [(set (match_dup 3) - (compare:CC_C + (compare:CC_ADC (plus:<DWI> (plus:<DWI> (match_dup 4) @@ -2517,57 +2484,54 @@ (define_expand "add<mode>3_carryinC" (match_operand:GPI 1 "register_operand" ""))) (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" ""))) - (zero_extend:<DWI> - (plus:GPI - (plus:GPI (match_dup 5) (match_dup 1)) - (match_dup 2))))) + (match_dup 6))) (set (match_operand:GPI 0 "register_operand") (plus:GPI (plus:GPI (match_dup 5) (match_dup 1)) (match_dup 2)))])] "" { - operands[3] = gen_rtx_REG (CC_Cmode, CC_REGNUM); - operands[4] = gen_rtx_NE (<DWI>mode, operands[3], const0_rtx); - operands[5] = gen_rtx_NE (<MODE>mode, operands[3], const0_rtx); + operands[3] = gen_rtx_REG (CC_ADCmode, CC_REGNUM); + rtx ccin = gen_rtx_REG (CC_Cmode, CC_REGNUM); + operands[4] = gen_rtx_LTU (<DWI>mode, ccin, const0_rtx); + operands[5] = gen_rtx_LTU (<MODE>mode, ccin, const0_rtx); + operands[6] = immed_wide_int_const (wi::shwi (1, <DWI>mode) + << GET_MODE_BITSIZE (<MODE>mode), + TImode); }) (define_insn "*add<mode>3_carryinC_zero" - [(set (reg:CC_C CC_REGNUM) - (compare:CC_C + [(set (reg:CC_ADC CC_REGNUM) + (compare:CC_ADC (plus:<DWI> (match_operand:<DWI> 2 "aarch64_carry_operation" "") (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) - (zero_extend:<DWI> - (plus:GPI - (match_operand:GPI 3 "aarch64_carry_operation" "") - (match_dup 1))))) + (match_operand 4 "const_scalar_int_operand" ""))) (set (match_operand:GPI 0 "register_operand" "=r") - (plus:GPI (match_dup 3) (match_dup 1)))] - "" + (plus:GPI (match_operand:GPI 3 "aarch64_carry_operation" "") + (match_dup 1)))] + "rtx_mode_t (operands[4], <DWI>mode) + == (wi::shwi (1, <DWI>mode) << (unsigned) GET_MODE_BITSIZE (<MODE>mode))" "adcs\\t%<w>0, %<w>1, <w>zr" [(set_attr "type" "adc_reg")] ) (define_insn "*add<mode>3_carryinC" - [(set (reg:CC_C CC_REGNUM) - (compare:CC_C + [(set (reg:CC_ADC CC_REGNUM) + (compare:CC_ADC (plus:<DWI> (plus:<DWI> (match_operand:<DWI> 3 "aarch64_carry_operation" "") (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) - (zero_extend:<DWI> - (plus:GPI - (plus:GPI - (match_operand:GPI 4 "aarch64_carry_operation" "") - (match_dup 1)) - (match_dup 2))))) + (match_operand 5 "const_scalar_int_operand" ""))) (set (match_operand:GPI 0 "register_operand" "=r") (plus:GPI - (plus:GPI (match_dup 4) (match_dup 1)) + (plus:GPI (match_operand:GPI 4 "aarch64_carry_operation" "") + (match_dup 1)) (match_dup 2)))] - "" + "rtx_mode_t (operands[5], <DWI>mode) + == (wi::shwi (1, <DWI>mode) << (unsigned) GET_MODE_BITSIZE (<MODE>mode))" "adcs\\t%<w>0, %<w>1, %<w>2" [(set_attr "type" "adc_reg")] ) @@ -2594,8 +2558,8 @@ (define_expand "add<mode>3_carryinV" "" { rtx cc = gen_rtx_REG (CC_Cmode, CC_REGNUM); - operands[3] = gen_rtx_NE (<DWI>mode, cc, const0_rtx); - operands[4] = gen_rtx_NE (<MODE>mode, cc, const0_rtx); + operands[3] = gen_rtx_LTU (<DWI>mode, cc, const0_rtx); + operands[4] = gen_rtx_LTU (<MODE>mode, cc, const0_rtx); }) (define_insn "*add<mode>3_carryinV_zero" diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 9103b82..855cf7b 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -347,23 +347,37 @@ (define_special_predicate "aarch64_equality_operator" (match_code "eq,ne")) (define_special_predicate "aarch64_carry_operation" - (match_code "ne,geu") + (match_code "ltu,geu") { if (XEXP (op, 1) != const0_rtx) return false; - machine_mode ccmode = (GET_CODE (op) == NE ? CC_Cmode : CCmode); rtx op0 = XEXP (op, 0); - return REG_P (op0) && REGNO (op0) == CC_REGNUM && GET_MODE (op0) == ccmode; + if (!REG_P (op0) || REGNO (op0) != CC_REGNUM) + return false; + machine_mode ccmode = GET_MODE (op0); + if (ccmode == CC_Cmode) + return GET_CODE (op) == LTU; + if (ccmode == CC_ADCmode || ccmode == CCmode) + return GET_CODE (op) == GEU; + return false; }) +; borrow is essentially the inverse of carry since the sense of the C flag +; is inverted during subtraction. See the note in aarch64-modes.def. (define_special_predicate "aarch64_borrow_operation" - (match_code "eq,ltu") + (match_code "geu,ltu") { if (XEXP (op, 1) != const0_rtx) return false; - machine_mode ccmode = (GET_CODE (op) == EQ ? CC_Cmode : CCmode); rtx op0 = XEXP (op, 0); - return REG_P (op0) && REGNO (op0) == CC_REGNUM && GET_MODE (op0) == ccmode; + if (!REG_P (op0) || REGNO (op0) != CC_REGNUM) + return false; + machine_mode ccmode = GET_MODE (op0); + if (ccmode == CC_Cmode) + return GET_CODE (op) == GEU; + if (ccmode == CC_ADCmode || ccmode == CCmode) + return GET_CODE (op) == LTU; + return false; }) ;; True if the operand is memory reference suitable for a load/store exclusive.

PR target/86891 __builtin_<add/sub>_overflow issues on AArch64 (redux)

Commit Message

Patch