@@ -1,3 +1,17 @@
+2011-07-21 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org>
+
+ * config/arm/arm-protos.h (arm_autoinc_modes_ok_p): Declare.
+ * config/arm/arm.c (arm_autoinc_modes_ok_p): Define.
+ * config/arm/arm.h (ARM_AUTOINC_VALID_FOR_MODE_P): New.
+ (USE_LOAD_POST_INCREMENT, USE_LOAD_PRE_INCREMENT)
+ (USE_LOAD_POST_DECREMENT, USE_LOAD_PRE_DECREMENT): Define.
+ (USE_STORE_PRE_DECREMENT, USE_STORE_PRE_INCREMENT): Likewise.
+ (USE_STORE_POST_DECREMENT, USE_STORE_POST_INCREMENT): Likewise.
+ * tree-ssa-loop-ivopts.c (rtl.h): Include.
+ (add_autoinc_candidates): Use modes to decide
+ whether we have auto-increment.
+ (get_address_cost): Likewise.
+
2011-07-21 Richard Sandiford <richard.sandiford@linaro.org>
* regcprop.c (maybe_mode_change): Check HARD_REGNO_MODE_OK.
@@ -2595,7 +2595,7 @@ tree-predcom.o: tree-predcom.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_P_H) \
$(PARAMS_H) $(DIAGNOSTIC_H) $(TREE_PASS_H) $(TM_H) coretypes.h \
tree-affine.h $(TREE_INLINE_H) tree-pretty-print.h
tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
- $(SYSTEM_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
+ $(SYSTEM_H) $(TREE_H) $(RTL_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
$(TREE_PASS_H) $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
$(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) \
@@ -238,6 +238,7 @@ struct tune_params
};
extern const struct tune_params *current_tune;
+extern bool arm_autoinc_modes_ok_p (enum machine_mode, RTX_CODE);
#endif /* RTX_CODE */
#endif /* ! GCC_ARM_PROTOS_H */
@@ -23988,4 +23988,39 @@ arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
return 4;
}
+bool
+arm_autoinc_modes_ok_p (enum machine_mode mode, enum rtx_code code)
+{
+ if (TARGET_SOFT_FLOAT)
+ return true;
+
+ switch (code)
+ {
+ case POST_INC:
+ case PRE_DEC:
+ if (VECTOR_MODE_P (mode))
+ {
+ if (code != PRE_DEC)
+ return true;
+ else
+ return false;
+ }
+
+ return true;
+
+ case POST_DEC:
+ case PRE_INC:
+ if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
+ return false;
+ else
+ return true;
+
+ default:
+ return false;
+
+ }
+
+ return false;
+}
+
#include "gt-arm.h"
@@ -1660,6 +1660,22 @@ typedef struct
#define HAVE_PRE_MODIFY_REG TARGET_32BIT
#define HAVE_POST_MODIFY_REG TARGET_32BIT
+#define ARM_AUTOINC_VALID_FOR_MODE_P(mode, code) \
+ (TARGET_32BIT && arm_autoinc_modes_ok_p (mode, code))
+#define USE_LOAD_POST_INCREMENT(mode) \
+ ARM_AUTOINC_VALID_FOR_MODE_P(mode, POST_INC)
+#define USE_LOAD_PRE_INCREMENT(mode) \
+ ARM_AUTOINC_VALID_FOR_MODE_P(mode, PRE_INC)
+#define USE_LOAD_POST_DECREMENT(mode) \
+ ARM_AUTOINC_VALID_FOR_MODE_P(mode, POST_DEC)
+#define USE_LOAD_PRE_DECREMENT(mode) \
+ ARM_AUTOINC_VALID_FOR_MODE_P(mode, PRE_DEC)
+
+#define USE_STORE_PRE_DECREMENT(mode) USE_LOAD_PRE_DECREMENT(mode)
+#define USE_STORE_PRE_INCREMENT(mode) USE_LOAD_PRE_INCREMENT(mode)
+#define USE_STORE_POST_DECREMENT(mode) USE_LOAD_POST_DECREMENT(mode)
+#define USE_STORE_POST_INCREMENT(mode) USE_LOAD_POST_INCREMENT(mode)
+
/* Macros to check register numbers against specific register classes. */
/* These assume that REGNO is a hard or pseudo reg number.
@@ -67,6 +67,7 @@ along with GCC; see the file COPYING3. If not see
#include "coretypes.h"
#include "tm.h"
#include "tree.h"
+#include "rtl.h"
#include "tm_p.h"
#include "basic-block.h"
#include "output.h"
@@ -2356,8 +2357,12 @@ add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
cstepi = int_cst_value (step);
mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
- if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
- || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
+ if (((USE_LOAD_PRE_INCREMENT (mem_mode)
+ || USE_STORE_PRE_INCREMENT (mem_mode))
+ && GET_MODE_SIZE (mem_mode) == cstepi)
+ || ((USE_LOAD_PRE_DECREMENT (mem_mode)
+ || USE_STORE_PRE_DECREMENT (mem_mode))
+ && GET_MODE_SIZE (mem_mode) == -cstepi))
{
enum tree_code code = MINUS_EXPR;
tree new_base;
@@ -2374,8 +2379,12 @@ add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
use->stmt);
}
- if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
- || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
+ if (((USE_LOAD_POST_INCREMENT (mem_mode)
+ || USE_STORE_POST_INCREMENT (mem_mode))
+ && GET_MODE_SIZE (mem_mode) == cstepi)
+ || ((USE_LOAD_POST_DECREMENT (mem_mode)
+ || USE_STORE_POST_DECREMENT (mem_mode))
+ && GET_MODE_SIZE (mem_mode) == -cstepi))
{
add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
use->stmt);
@@ -3329,25 +3338,29 @@ get_address_cost (bool symbol_present, bool var_present,
reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
- if (HAVE_PRE_DECREMENT)
+ if (USE_LOAD_PRE_DECREMENT (mem_mode)
+ || USE_STORE_PRE_DECREMENT (mem_mode))
{
addr = gen_rtx_PRE_DEC (address_mode, reg0);
has_predec[mem_mode]
= memory_address_addr_space_p (mem_mode, addr, as);
}
- if (HAVE_POST_DECREMENT)
+ if (USE_LOAD_POST_DECREMENT (mem_mode)
+ || USE_STORE_POST_DECREMENT (mem_mode))
{
addr = gen_rtx_POST_DEC (address_mode, reg0);
has_postdec[mem_mode]
= memory_address_addr_space_p (mem_mode, addr, as);
}
- if (HAVE_PRE_INCREMENT)
+ if (USE_LOAD_PRE_INCREMENT (mem_mode)
+ || USE_STORE_PRE_DECREMENT (mem_mode))
{
addr = gen_rtx_PRE_INC (address_mode, reg0);
has_preinc[mem_mode]
= memory_address_addr_space_p (mem_mode, addr, as);
}
- if (HAVE_POST_INCREMENT)
+ if (USE_LOAD_POST_INCREMENT (mem_mode)
+ || USE_STORE_POST_INCREMENT (mem_mode))
{
addr = gen_rtx_POST_INC (address_mode, reg0);
has_postinc[mem_mode]
From: Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> Hi, While investigating cases where we generate excessive moves between VFP and integer registers I came across a situation where ivopts wasn't considering the mode in which auto-increment was allowed in this situation. Changing legitimate_address_p wasn't enough to catch all these situations as HAVE_PRE_INCREMENT or HAVE_POST_INCREMENT are not predicated on the modes. I did think about making this mode specific but these changes were enough for me to go ahead. I've tried a few options like disabling the auto-increment modes for the necessary floating point modes as well as the vector modes ( for neon for instance only etc. but that wasn't enough of a hammer to stop auto-inc from generating loops of this form. The motivating example for this is from the bug report here : https://bugs.launchpad.net/gcc-linaro/+bug/640518 With this patch now we are able to generate: oo: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. mov r3, #0 fconsts s14, #112 .L2: fldmias r1!, {s15} add r3, r3, #1 cmp r3, #100 fadds s15, s15, s14 fstmias r0!, {s15} bne .L2 bx lr rather than the kind of code as shown in the bug report. This hasn't yet run through a full test cycle but I'd like to push this out for some review comments and also because I promised Richard sometime last week that I'd have something out for him. Ok after regression tests and a round of benchmark runs ? One possible enhancement for architectures that care about whether something is a valid load vs store auto-increment ( I think SH) is to try and arrange things such that auto-inc would decide whether to use the load or the store depending on the kind of memory access. cheers Ramana 2011-07-21 Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> * config/arm/arm-protos.h (arm_autoinc_modes_ok_p): Declare. * config/arm/arm.c (arm_autoinc_modes_ok_p): Define. * config/arm/arm.h (ARM_AUTOINC_VALID_FOR_MODE_P): New. (USE_LOAD_POST_INCREMENT, USE_LOAD_PRE_INCREMENT) (USE_LOAD_POST_DECREMENT, USE_LOAD_PRE_DECREMENT): Define. (USE_STORE_PRE_DECREMENT, USE_STORE_PRE_INCREMENT): Likewise. (USE_STORE_POST_DECREMENT, USE_STORE_POST_INCREMENT): Likewise. * tree-ssa-loop-ivopts.c (rtl.h): Include. (add_autoinc_candidates): Use modes to decide whether we have auto-increment. (get_address_cost): Likewise. --- gcc/ChangeLog | 14 ++++++++++++++ gcc/Makefile.in | 2 +- gcc/config/arm/arm-protos.h | 1 + gcc/config/arm/arm.c | 35 +++++++++++++++++++++++++++++++++++ gcc/config/arm/arm.h | 16 ++++++++++++++++ gcc/tree-ssa-loop-ivopts.c | 29 +++++++++++++++++++++-------- 6 files changed, 88 insertions(+), 9 deletions(-)