Message ID | 87pofqs70q.fsf@linaro.org |
---|---|
State | New |
Headers | show |
Ping Richard Sandiford <richard.sandiford@linaro.org> writes: > This patch checks whether two data references x and y cannot > partially overlap and so are independent whenever &x != &y. > We can then use this in the vectoriser to optimise alias checks. > > Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? > > Thanks, > Richard > > > gcc/ > 2016-05-03 Richard Sandiford <richard.sandiford@linaro.org> > > * hash-traits.h (pair_hash): New struct. > * tree-data-ref.h (data_dependence_relation): Add object_a and > object_b fields. > (DDR_OBJECT_A, DDR_OBJECT_B): New macros. > * tree-data-ref.c (initialize_data_dependence_relation): Initialize > DDR_OBJECT_A and DDR_OBJECT_B. > * tree-vectorizer.h (vec_object_pair): New type. > (_loop_vec_info): Add a check_unequal_addrs field. > (LOOP_VINFO_CHECK_UNEQUAL_ADDRS): New macro. > (LOOP_REQUIRES_VERSIONING_FOR_ALIAS): Return true if there is an > entry in check_unequal_addrs. Check comp_alias_ddrs instead of > may_alias_ddrs. > * tree-vect-loop.c (destroy_loop_vec_info): Release > LOOP_VINFO_CHECK_UNEQUAL_ADDRS. > (vect_analyze_loop_2): Likewise, when restarting. > (vect_estimate_min_profitable_iters): Estimate the cost of > LOOP_VINFO_CHECK_UNEQUAL_ADDRS. > * tree-vect-data-refs.c: Include tree-hash-traits.h. > (vect_prune_runtime_alias_test_list): Try to handle conflicts > using LOOP_VINFO_CHECK_UNEQUAL_ADDRS, if the data dependence allows. > Count such tests in the final summary. > * tree-vect-loop-manip.c (chain_cond_expr): New function. > (vect_create_cond_for_align_checks): Use it. > (vect_create_cond_for_alias_checks): Likewise. > (vect_create_cond_for_unequal_addrs): New function. > (vect_loop_versioning): Call it. > > gcc/testsuite/ > * gcc.dg/vect/vect-alias-check-6.c: New test. > > Index: gcc/hash-traits.h > =================================================================== > --- gcc/hash-traits.h 2017-02-23 19:54:15.000000000 +0000 > +++ gcc/hash-traits.h 2017-05-03 08:48:53.312035228 +0100 > @@ -301,6 +301,76 @@ struct ggc_cache_ptr_hash : pointer_hash > > struct nofree_string_hash : string_hash, typed_noop_remove <const char *> {}; > > +/* Traits for pairs of values, using the first to record empty and > + deleted slots. */ > + > +template <typename T1, typename T2> > +struct pair_hash > +{ > + typedef std::pair <typename T1::value_type, > + typename T2::value_type> value_type; > + typedef std::pair <typename T1::compare_type, > + typename T2::compare_type> compare_type; > + > + static inline hashval_t hash (const value_type &); > + static inline bool equal (const value_type &, const compare_type &); > + static inline void remove (value_type &); > + static inline void mark_deleted (value_type &); > + static inline void mark_empty (value_type &); > + static inline bool is_deleted (const value_type &); > + static inline bool is_empty (const value_type &); > +}; > + > +template <typename T1, typename T2> > +inline hashval_t > +pair_hash <T1, T2>::hash (const value_type &x) > +{ > + return iterative_hash_hashval_t (T1::hash (x.first), T2::hash (x.second)); > +} > + > +template <typename T1, typename T2> > +inline bool > +pair_hash <T1, T2>::equal (const value_type &x, const compare_type &y) > +{ > + return T1::equal (x.first, y.first) && T2::equal (x.second, y.second); > +} > + > +template <typename T1, typename T2> > +inline void > +pair_hash <T1, T2>::remove (value_type &x) > +{ > + T1::remove (x.first); > + T2::remove (x.second); > +} > + > +template <typename T1, typename T2> > +inline void > +pair_hash <T1, T2>::mark_deleted (value_type &x) > +{ > + T1::mark_deleted (x.first); > +} > + > +template <typename T1, typename T2> > +inline void > +pair_hash <T1, T2>::mark_empty (value_type &x) > +{ > + T1::mark_empty (x.first); > +} > + > +template <typename T1, typename T2> > +inline bool > +pair_hash <T1, T2>::is_deleted (const value_type &x) > +{ > + return T1::is_deleted (x.first); > +} > + > +template <typename T1, typename T2> > +inline bool > +pair_hash <T1, T2>::is_empty (const value_type &x) > +{ > + return T1::is_empty (x.first); > +} > + > template <typename T> struct default_hash_traits : T {}; > > template <typename T> > Index: gcc/tree-data-ref.h > =================================================================== > --- gcc/tree-data-ref.h 2017-05-03 08:48:48.737038502 +0100 > +++ gcc/tree-data-ref.h 2017-05-03 08:48:53.313041828 +0100 > @@ -240,6 +240,13 @@ struct data_dependence_relation > but the analyzer cannot be more specific. */ > tree are_dependent; > > + /* If nonnull, COULD_BE_INDEPENDENT_P is true and the accesses are > + independent when the runtime addresses of OBJECT_A and OBJECT_B > + are different. The addresses of both objects are invariant in the > + loop nest. */ > + tree object_a; > + tree object_b; > + > /* For each subscript in the dependence test, there is an element in > this array. This is the attribute that labels the edge A->B of > the data_dependence_relation. */ > @@ -303,6 +310,8 @@ #define DDR_A(DDR) (DDR)->a > #define DDR_B(DDR) (DDR)->b > #define DDR_AFFINE_P(DDR) (DDR)->affine_p > #define DDR_ARE_DEPENDENT(DDR) (DDR)->are_dependent > +#define DDR_OBJECT_A(DDR) (DDR)->object_a > +#define DDR_OBJECT_B(DDR) (DDR)->object_b > #define DDR_SUBSCRIPTS(DDR) (DDR)->subscripts > #define DDR_SUBSCRIPT(DDR, I) DDR_SUBSCRIPTS (DDR)[I] > #define DDR_NUM_SUBSCRIPTS(DDR) DDR_SUBSCRIPTS (DDR).length () > Index: gcc/tree-data-ref.c > =================================================================== > --- gcc/tree-data-ref.c 2017-05-03 08:48:48.737038502 +0100 > +++ gcc/tree-data-ref.c 2017-05-03 08:48:53.313041828 +0100 > @@ -1715,6 +1715,15 @@ initialize_data_dependence_relation (str > } > > DDR_COULD_BE_INDEPENDENT_P (res) = true; > + if (!loop_nest.exists () > + || (object_address_invariant_in_loop_p (loop_nest[0], > + struct_ref_a) > + && object_address_invariant_in_loop_p (loop_nest[0], > + struct_ref_b))) > + { > + DDR_OBJECT_A (res) = struct_ref_a; > + DDR_OBJECT_B (res) = struct_ref_b; > + } > } > > DDR_AFFINE_P (res) = true; > Index: gcc/tree-vectorizer.h > =================================================================== > --- gcc/tree-vectorizer.h 2017-05-03 08:48:48.738045102 +0100 > +++ gcc/tree-vectorizer.h 2017-05-03 08:48:53.315055028 +0100 > @@ -174,6 +174,10 @@ struct dr_with_seg_len_pair_t > > > > +/* Describes two objects whose addresses must be unequal for the vectorized > + loop to be valid. */ > +typedef std::pair<tree, tree> vec_object_pair; > + > /* Vectorizer state common between loop and basic-block vectorization. */ > struct vec_info { > enum { bb, loop } kind; > @@ -270,6 +274,9 @@ typedef struct _loop_vec_info : public v > lengths from which the run-time aliasing check is built. */ > vec<dr_with_seg_len_pair_t> comp_alias_ddrs; > > + /* Check that the addresses of each pair of objects is unequal. */ > + vec<vec_object_pair> check_unequal_addrs; > + > /* Statements in the loop that have data references that are candidates for a > runtime (loop versioning) misalignment check. */ > vec<gimple *> may_misalign_stmts; > @@ -364,6 +371,7 @@ #define LOOP_VINFO_UNALIGNED_DR(L) > #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts > #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs > #define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs > +#define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs > #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores > #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances > #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor > @@ -383,7 +391,8 @@ #define LOOP_VINFO_ORIG_LOOP_INFO(L) > #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ > ((L)->may_misalign_stmts.length () > 0) > #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ > - ((L)->comp_alias_ddrs.length () > 0) > + ((L)->comp_alias_ddrs.length () > 0 \ > + || (L)->check_unequal_addrs.length () > 0) > #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \ > (LOOP_VINFO_NITERS_ASSUMPTIONS (L)) > #define LOOP_REQUIRES_VERSIONING(L) \ > Index: gcc/tree-vect-loop.c > =================================================================== > --- gcc/tree-vect-loop.c 2017-04-18 19:52:35.059158859 +0100 > +++ gcc/tree-vect-loop.c 2017-05-03 08:48:53.315055028 +0100 > @@ -1275,6 +1275,8 @@ destroy_loop_vec_info (loop_vec_info loo > destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)); > loop_vinfo->scalar_cost_vec.release (); > > + LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release (); > + > free (loop_vinfo); > loop->aux = NULL; > } > @@ -2299,6 +2301,7 @@ vect_analyze_loop_2 (loop_vec_info loop_ > } > /* Free optimized alias test DDRS. */ > LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release (); > + LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release (); > /* Reset target cost data. */ > destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)); > LOOP_VINFO_TARGET_COST_DATA (loop_vinfo) > @@ -3261,6 +3264,11 @@ vect_estimate_min_profitable_iters (loop > unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length (); > (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0, > vect_prologue); > + len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length (); > + if (len) > + /* Count LEN - 1 ANDs and LEN comparisons. */ > + (void) add_stmt_cost (target_cost_data, len * 2 - 1, scalar_stmt, > + NULL, 0, vect_prologue); > dump_printf (MSG_NOTE, > "cost model: Adding cost of checks for loop " > "versioning aliasing.\n"); > Index: gcc/tree-vect-data-refs.c > =================================================================== > --- gcc/tree-vect-data-refs.c 2017-05-03 08:48:48.738045102 +0100 > +++ gcc/tree-vect-data-refs.c 2017-05-03 08:48:53.314048428 +0100 > @@ -50,6 +50,7 @@ Software Foundation; either version 3, o > #include "expr.h" > #include "builtins.h" > #include "params.h" > +#include "tree-hash-traits.h" > > /* Return true if load- or store-lanes optab OPTAB is implemented for > COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */ > @@ -3085,10 +3086,14 @@ dependence_distance_ge_vf (data_dependen > bool > vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) > { > - vec<ddr_p> may_alias_ddrs = > - LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo); > - vec<dr_with_seg_len_pair_t>& comp_alias_ddrs = > - LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo); > + typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash; > + hash_set <tree_pair_hash> compared_objects; > + > + vec<ddr_p> may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo); > + vec<dr_with_seg_len_pair_t> &comp_alias_ddrs > + = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo); > + vec<vec_object_pair> &check_unequal_addrs > + = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo); > int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); > tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo); > > @@ -3151,6 +3156,24 @@ vect_prune_runtime_alias_test_list (loop > if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor)) > continue; > > + if (DDR_OBJECT_A (ddr)) > + { > + vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr)); > + if (!compared_objects.add (new_pair)) > + { > + if (dump_enabled_p ()) > + { > + dump_printf_loc (MSG_NOTE, vect_location, "checking that "); > + dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.first); > + dump_printf (MSG_NOTE, " and "); > + dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.second); > + dump_printf (MSG_NOTE, " have different addresses\n"); > + } > + LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair); > + } > + continue; > + } > + > dr_a = DDR_A (ddr); > stmt_a = DR_STMT (DDR_A (ddr)); > dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a)); > @@ -3346,11 +3369,12 @@ vect_prune_runtime_alias_test_list (loop > } > } > > + unsigned int count = (comp_alias_ddrs.length () > + + check_unequal_addrs.length ()); > dump_printf_loc (MSG_NOTE, vect_location, > "improved number of alias checks from %d to %d\n", > - may_alias_ddrs.length (), comp_alias_ddrs.length ()); > - if ((int) comp_alias_ddrs.length () > > - PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS)) > + may_alias_ddrs.length (), count); > + if ((int) count > PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS)) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > Index: gcc/tree-vect-loop-manip.c > =================================================================== > --- gcc/tree-vect-loop-manip.c 2017-04-18 19:52:34.027608884 +0100 > +++ gcc/tree-vect-loop-manip.c 2017-05-03 08:48:53.314048428 +0100 > @@ -1926,6 +1926,20 @@ vect_create_cond_for_niters_checks (loop > *cond_expr = part_cond_expr; > } > > +/* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR > + and PART_COND_EXPR are true. Treat a null *COND_EXPR as "true". */ > + > +static void > +chain_cond_expr (tree *cond_expr, tree part_cond_expr) > +{ > + if (*cond_expr) > + *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, > + *cond_expr, part_cond_expr); > + else > + *cond_expr = part_cond_expr; > +} > + > + > /* Function vect_create_cond_for_align_checks. > > Create a conditional expression that represents the alignment checks for > @@ -2037,13 +2051,32 @@ vect_create_cond_for_align_checks (loop_ > ptrsize_zero = build_int_cst (int_ptrsize_type, 0); > part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node, > and_tmp_name, ptrsize_zero); > - if (*cond_expr) > - *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, > - *cond_expr, part_cond_expr); > - else > - *cond_expr = part_cond_expr; > + chain_cond_expr (cond_expr, part_cond_expr); > } > > + > +/* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>, > + create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn). > + Set *COND_EXPR to a tree that is true when both the original *COND_EXPR > + and this new condition are true. Treat a null *COND_EXPR as "true". */ > + > +static void > +vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr) > +{ > + vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo); > + unsigned int i; > + vec_object_pair *pair; > + FOR_EACH_VEC_ELT (pairs, i, pair) > + { > + tree addr1 = build_fold_addr_expr (pair->first); > + tree addr2 = build_fold_addr_expr (pair->second); > + tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node, > + addr1, addr2); > + chain_cond_expr (cond_expr, part_cond_expr); > + } > +} > + > + > /* Given two data references and segment lengths described by DR_A and DR_B, > create expression checking if the two addresses ranges intersect with > each other based on index of the two addresses. This can only be done > @@ -2280,11 +2313,7 @@ vect_create_cond_for_alias_checks (loop_ > > /* Create condition expression for each pair data references. */ > create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b); > - if (*cond_expr) > - *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, > - *cond_expr, part_cond_expr); > - else > - *cond_expr = part_cond_expr; > + chain_cond_expr (cond_expr, part_cond_expr); > } > > if (dump_enabled_p ()) > @@ -2353,7 +2382,10 @@ vect_loop_versioning (loop_vec_info loop > &cond_expr_stmt_list); > > if (version_alias) > - vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr); > + { > + vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr); > + vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr); > + } > > cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list, > is_gimple_condexpr, NULL_TREE); > Index: gcc/testsuite/gcc.dg/vect/vect-alias-check-6.c > =================================================================== > --- /dev/null 2017-05-03 08:16:26.972699664 +0100 > +++ gcc/testsuite/gcc.dg/vect/vect-alias-check-6.c 2017-05-03 08:48:53.312035228 +0100 > @@ -0,0 +1,23 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target vect_int } */ > + > +#define N 16 > + > +struct s { int x[N]; }; > + > +void > +f1 (struct s *a, struct s *b) > +{ > + for (int i = 0; i < N - 1; ++i) > + a->x[i + 1] += b->x[i]; > +} > + > +void > +f2 (struct s *a, struct s *b) > +{ > + for (int i = 0; i < N; ++i) > + a->x[i] += b->x[N - i - 1]; > +} > + > +/* { dg-final { scan-tree-dump-times {checking that [^\n]* and [^\n]* have different addresses} 2 "vect" } } */ > +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
On Wed, May 31, 2017 at 8:56 AM, Richard Sandiford <richard.sandiford@linaro.org> wrote: > Ping > > Richard Sandiford <richard.sandiford@linaro.org> writes: >> This patch checks whether two data references x and y cannot >> partially overlap and so are independent whenever &x != &y. >> We can then use this in the vectoriser to optimise alias checks. >> >> Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? Looks good to me. Probably needs refactoring now if Bin was faster with factoring out the machinery to elsewhere. Thanks, Richard. >> Thanks, >> Richard >> >> >> gcc/ >> 2016-05-03 Richard Sandiford <richard.sandiford@linaro.org> >> >> * hash-traits.h (pair_hash): New struct. >> * tree-data-ref.h (data_dependence_relation): Add object_a and >> object_b fields. >> (DDR_OBJECT_A, DDR_OBJECT_B): New macros. >> * tree-data-ref.c (initialize_data_dependence_relation): Initialize >> DDR_OBJECT_A and DDR_OBJECT_B. >> * tree-vectorizer.h (vec_object_pair): New type. >> (_loop_vec_info): Add a check_unequal_addrs field. >> (LOOP_VINFO_CHECK_UNEQUAL_ADDRS): New macro. >> (LOOP_REQUIRES_VERSIONING_FOR_ALIAS): Return true if there is an >> entry in check_unequal_addrs. Check comp_alias_ddrs instead of >> may_alias_ddrs. >> * tree-vect-loop.c (destroy_loop_vec_info): Release >> LOOP_VINFO_CHECK_UNEQUAL_ADDRS. >> (vect_analyze_loop_2): Likewise, when restarting. >> (vect_estimate_min_profitable_iters): Estimate the cost of >> LOOP_VINFO_CHECK_UNEQUAL_ADDRS. >> * tree-vect-data-refs.c: Include tree-hash-traits.h. >> (vect_prune_runtime_alias_test_list): Try to handle conflicts >> using LOOP_VINFO_CHECK_UNEQUAL_ADDRS, if the data dependence allows. >> Count such tests in the final summary. >> * tree-vect-loop-manip.c (chain_cond_expr): New function. >> (vect_create_cond_for_align_checks): Use it. >> (vect_create_cond_for_alias_checks): Likewise. >> (vect_create_cond_for_unequal_addrs): New function. >> (vect_loop_versioning): Call it. >> >> gcc/testsuite/ >> * gcc.dg/vect/vect-alias-check-6.c: New test. >> >> Index: gcc/hash-traits.h >> =================================================================== >> --- gcc/hash-traits.h 2017-02-23 19:54:15.000000000 +0000 >> +++ gcc/hash-traits.h 2017-05-03 08:48:53.312035228 +0100 >> @@ -301,6 +301,76 @@ struct ggc_cache_ptr_hash : pointer_hash >> >> struct nofree_string_hash : string_hash, typed_noop_remove <const char *> {}; >> >> +/* Traits for pairs of values, using the first to record empty and >> + deleted slots. */ >> + >> +template <typename T1, typename T2> >> +struct pair_hash >> +{ >> + typedef std::pair <typename T1::value_type, >> + typename T2::value_type> value_type; >> + typedef std::pair <typename T1::compare_type, >> + typename T2::compare_type> compare_type; >> + >> + static inline hashval_t hash (const value_type &); >> + static inline bool equal (const value_type &, const compare_type &); >> + static inline void remove (value_type &); >> + static inline void mark_deleted (value_type &); >> + static inline void mark_empty (value_type &); >> + static inline bool is_deleted (const value_type &); >> + static inline bool is_empty (const value_type &); >> +}; >> + >> +template <typename T1, typename T2> >> +inline hashval_t >> +pair_hash <T1, T2>::hash (const value_type &x) >> +{ >> + return iterative_hash_hashval_t (T1::hash (x.first), T2::hash (x.second)); >> +} >> + >> +template <typename T1, typename T2> >> +inline bool >> +pair_hash <T1, T2>::equal (const value_type &x, const compare_type &y) >> +{ >> + return T1::equal (x.first, y.first) && T2::equal (x.second, y.second); >> +} >> + >> +template <typename T1, typename T2> >> +inline void >> +pair_hash <T1, T2>::remove (value_type &x) >> +{ >> + T1::remove (x.first); >> + T2::remove (x.second); >> +} >> + >> +template <typename T1, typename T2> >> +inline void >> +pair_hash <T1, T2>::mark_deleted (value_type &x) >> +{ >> + T1::mark_deleted (x.first); >> +} >> + >> +template <typename T1, typename T2> >> +inline void >> +pair_hash <T1, T2>::mark_empty (value_type &x) >> +{ >> + T1::mark_empty (x.first); >> +} >> + >> +template <typename T1, typename T2> >> +inline bool >> +pair_hash <T1, T2>::is_deleted (const value_type &x) >> +{ >> + return T1::is_deleted (x.first); >> +} >> + >> +template <typename T1, typename T2> >> +inline bool >> +pair_hash <T1, T2>::is_empty (const value_type &x) >> +{ >> + return T1::is_empty (x.first); >> +} >> + >> template <typename T> struct default_hash_traits : T {}; >> >> template <typename T> >> Index: gcc/tree-data-ref.h >> =================================================================== >> --- gcc/tree-data-ref.h 2017-05-03 08:48:48.737038502 +0100 >> +++ gcc/tree-data-ref.h 2017-05-03 08:48:53.313041828 +0100 >> @@ -240,6 +240,13 @@ struct data_dependence_relation >> but the analyzer cannot be more specific. */ >> tree are_dependent; >> >> + /* If nonnull, COULD_BE_INDEPENDENT_P is true and the accesses are >> + independent when the runtime addresses of OBJECT_A and OBJECT_B >> + are different. The addresses of both objects are invariant in the >> + loop nest. */ >> + tree object_a; >> + tree object_b; >> + >> /* For each subscript in the dependence test, there is an element in >> this array. This is the attribute that labels the edge A->B of >> the data_dependence_relation. */ >> @@ -303,6 +310,8 @@ #define DDR_A(DDR) (DDR)->a >> #define DDR_B(DDR) (DDR)->b >> #define DDR_AFFINE_P(DDR) (DDR)->affine_p >> #define DDR_ARE_DEPENDENT(DDR) (DDR)->are_dependent >> +#define DDR_OBJECT_A(DDR) (DDR)->object_a >> +#define DDR_OBJECT_B(DDR) (DDR)->object_b >> #define DDR_SUBSCRIPTS(DDR) (DDR)->subscripts >> #define DDR_SUBSCRIPT(DDR, I) DDR_SUBSCRIPTS (DDR)[I] >> #define DDR_NUM_SUBSCRIPTS(DDR) DDR_SUBSCRIPTS (DDR).length () >> Index: gcc/tree-data-ref.c >> =================================================================== >> --- gcc/tree-data-ref.c 2017-05-03 08:48:48.737038502 +0100 >> +++ gcc/tree-data-ref.c 2017-05-03 08:48:53.313041828 +0100 >> @@ -1715,6 +1715,15 @@ initialize_data_dependence_relation (str >> } >> >> DDR_COULD_BE_INDEPENDENT_P (res) = true; >> + if (!loop_nest.exists () >> + || (object_address_invariant_in_loop_p (loop_nest[0], >> + struct_ref_a) >> + && object_address_invariant_in_loop_p (loop_nest[0], >> + struct_ref_b))) >> + { >> + DDR_OBJECT_A (res) = struct_ref_a; >> + DDR_OBJECT_B (res) = struct_ref_b; >> + } >> } >> >> DDR_AFFINE_P (res) = true; >> Index: gcc/tree-vectorizer.h >> =================================================================== >> --- gcc/tree-vectorizer.h 2017-05-03 08:48:48.738045102 +0100 >> +++ gcc/tree-vectorizer.h 2017-05-03 08:48:53.315055028 +0100 >> @@ -174,6 +174,10 @@ struct dr_with_seg_len_pair_t >> >> >> >> +/* Describes two objects whose addresses must be unequal for the vectorized >> + loop to be valid. */ >> +typedef std::pair<tree, tree> vec_object_pair; >> + >> /* Vectorizer state common between loop and basic-block vectorization. */ >> struct vec_info { >> enum { bb, loop } kind; >> @@ -270,6 +274,9 @@ typedef struct _loop_vec_info : public v >> lengths from which the run-time aliasing check is built. */ >> vec<dr_with_seg_len_pair_t> comp_alias_ddrs; >> >> + /* Check that the addresses of each pair of objects is unequal. */ >> + vec<vec_object_pair> check_unequal_addrs; >> + >> /* Statements in the loop that have data references that are candidates for a >> runtime (loop versioning) misalignment check. */ >> vec<gimple *> may_misalign_stmts; >> @@ -364,6 +371,7 @@ #define LOOP_VINFO_UNALIGNED_DR(L) >> #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts >> #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs >> #define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs >> +#define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs >> #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores >> #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances >> #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor >> @@ -383,7 +391,8 @@ #define LOOP_VINFO_ORIG_LOOP_INFO(L) >> #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ >> ((L)->may_misalign_stmts.length () > 0) >> #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ >> - ((L)->comp_alias_ddrs.length () > 0) >> + ((L)->comp_alias_ddrs.length () > 0 \ >> + || (L)->check_unequal_addrs.length () > 0) >> #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \ >> (LOOP_VINFO_NITERS_ASSUMPTIONS (L)) >> #define LOOP_REQUIRES_VERSIONING(L) \ >> Index: gcc/tree-vect-loop.c >> =================================================================== >> --- gcc/tree-vect-loop.c 2017-04-18 19:52:35.059158859 +0100 >> +++ gcc/tree-vect-loop.c 2017-05-03 08:48:53.315055028 +0100 >> @@ -1275,6 +1275,8 @@ destroy_loop_vec_info (loop_vec_info loo >> destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)); >> loop_vinfo->scalar_cost_vec.release (); >> >> + LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release (); >> + >> free (loop_vinfo); >> loop->aux = NULL; >> } >> @@ -2299,6 +2301,7 @@ vect_analyze_loop_2 (loop_vec_info loop_ >> } >> /* Free optimized alias test DDRS. */ >> LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release (); >> + LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release (); >> /* Reset target cost data. */ >> destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)); >> LOOP_VINFO_TARGET_COST_DATA (loop_vinfo) >> @@ -3261,6 +3264,11 @@ vect_estimate_min_profitable_iters (loop >> unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length (); >> (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0, >> vect_prologue); >> + len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length (); >> + if (len) >> + /* Count LEN - 1 ANDs and LEN comparisons. */ >> + (void) add_stmt_cost (target_cost_data, len * 2 - 1, scalar_stmt, >> + NULL, 0, vect_prologue); >> dump_printf (MSG_NOTE, >> "cost model: Adding cost of checks for loop " >> "versioning aliasing.\n"); >> Index: gcc/tree-vect-data-refs.c >> =================================================================== >> --- gcc/tree-vect-data-refs.c 2017-05-03 08:48:48.738045102 +0100 >> +++ gcc/tree-vect-data-refs.c 2017-05-03 08:48:53.314048428 +0100 >> @@ -50,6 +50,7 @@ Software Foundation; either version 3, o >> #include "expr.h" >> #include "builtins.h" >> #include "params.h" >> +#include "tree-hash-traits.h" >> >> /* Return true if load- or store-lanes optab OPTAB is implemented for >> COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */ >> @@ -3085,10 +3086,14 @@ dependence_distance_ge_vf (data_dependen >> bool >> vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) >> { >> - vec<ddr_p> may_alias_ddrs = >> - LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo); >> - vec<dr_with_seg_len_pair_t>& comp_alias_ddrs = >> - LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo); >> + typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash; >> + hash_set <tree_pair_hash> compared_objects; >> + >> + vec<ddr_p> may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo); >> + vec<dr_with_seg_len_pair_t> &comp_alias_ddrs >> + = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo); >> + vec<vec_object_pair> &check_unequal_addrs >> + = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo); >> int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); >> tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo); >> >> @@ -3151,6 +3156,24 @@ vect_prune_runtime_alias_test_list (loop >> if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor)) >> continue; >> >> + if (DDR_OBJECT_A (ddr)) >> + { >> + vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr)); >> + if (!compared_objects.add (new_pair)) >> + { >> + if (dump_enabled_p ()) >> + { >> + dump_printf_loc (MSG_NOTE, vect_location, "checking that "); >> + dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.first); >> + dump_printf (MSG_NOTE, " and "); >> + dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.second); >> + dump_printf (MSG_NOTE, " have different addresses\n"); >> + } >> + LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair); >> + } >> + continue; >> + } >> + >> dr_a = DDR_A (ddr); >> stmt_a = DR_STMT (DDR_A (ddr)); >> dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a)); >> @@ -3346,11 +3369,12 @@ vect_prune_runtime_alias_test_list (loop >> } >> } >> >> + unsigned int count = (comp_alias_ddrs.length () >> + + check_unequal_addrs.length ()); >> dump_printf_loc (MSG_NOTE, vect_location, >> "improved number of alias checks from %d to %d\n", >> - may_alias_ddrs.length (), comp_alias_ddrs.length ()); >> - if ((int) comp_alias_ddrs.length () > >> - PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS)) >> + may_alias_ddrs.length (), count); >> + if ((int) count > PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS)) >> { >> if (dump_enabled_p ()) >> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, >> Index: gcc/tree-vect-loop-manip.c >> =================================================================== >> --- gcc/tree-vect-loop-manip.c 2017-04-18 19:52:34.027608884 +0100 >> +++ gcc/tree-vect-loop-manip.c 2017-05-03 08:48:53.314048428 +0100 >> @@ -1926,6 +1926,20 @@ vect_create_cond_for_niters_checks (loop >> *cond_expr = part_cond_expr; >> } >> >> +/* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR >> + and PART_COND_EXPR are true. Treat a null *COND_EXPR as "true". */ >> + >> +static void >> +chain_cond_expr (tree *cond_expr, tree part_cond_expr) >> +{ >> + if (*cond_expr) >> + *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, >> + *cond_expr, part_cond_expr); >> + else >> + *cond_expr = part_cond_expr; >> +} >> + >> + >> /* Function vect_create_cond_for_align_checks. >> >> Create a conditional expression that represents the alignment checks for >> @@ -2037,13 +2051,32 @@ vect_create_cond_for_align_checks (loop_ >> ptrsize_zero = build_int_cst (int_ptrsize_type, 0); >> part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node, >> and_tmp_name, ptrsize_zero); >> - if (*cond_expr) >> - *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, >> - *cond_expr, part_cond_expr); >> - else >> - *cond_expr = part_cond_expr; >> + chain_cond_expr (cond_expr, part_cond_expr); >> } >> >> + >> +/* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>, >> + create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn). >> + Set *COND_EXPR to a tree that is true when both the original *COND_EXPR >> + and this new condition are true. Treat a null *COND_EXPR as "true". */ >> + >> +static void >> +vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr) >> +{ >> + vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo); >> + unsigned int i; >> + vec_object_pair *pair; >> + FOR_EACH_VEC_ELT (pairs, i, pair) >> + { >> + tree addr1 = build_fold_addr_expr (pair->first); >> + tree addr2 = build_fold_addr_expr (pair->second); >> + tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node, >> + addr1, addr2); >> + chain_cond_expr (cond_expr, part_cond_expr); >> + } >> +} >> + >> + >> /* Given two data references and segment lengths described by DR_A and DR_B, >> create expression checking if the two addresses ranges intersect with >> each other based on index of the two addresses. This can only be done >> @@ -2280,11 +2313,7 @@ vect_create_cond_for_alias_checks (loop_ >> >> /* Create condition expression for each pair data references. */ >> create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b); >> - if (*cond_expr) >> - *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, >> - *cond_expr, part_cond_expr); >> - else >> - *cond_expr = part_cond_expr; >> + chain_cond_expr (cond_expr, part_cond_expr); >> } >> >> if (dump_enabled_p ()) >> @@ -2353,7 +2382,10 @@ vect_loop_versioning (loop_vec_info loop >> &cond_expr_stmt_list); >> >> if (version_alias) >> - vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr); >> + { >> + vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr); >> + vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr); >> + } >> >> cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list, >> is_gimple_condexpr, NULL_TREE); >> Index: gcc/testsuite/gcc.dg/vect/vect-alias-check-6.c >> =================================================================== >> --- /dev/null 2017-05-03 08:16:26.972699664 +0100 >> +++ gcc/testsuite/gcc.dg/vect/vect-alias-check-6.c 2017-05-03 08:48:53.312035228 +0100 >> @@ -0,0 +1,23 @@ >> +/* { dg-do compile } */ >> +/* { dg-require-effective-target vect_int } */ >> + >> +#define N 16 >> + >> +struct s { int x[N]; }; >> + >> +void >> +f1 (struct s *a, struct s *b) >> +{ >> + for (int i = 0; i < N - 1; ++i) >> + a->x[i + 1] += b->x[i]; >> +} >> + >> +void >> +f2 (struct s *a, struct s *b) >> +{ >> + for (int i = 0; i < N; ++i) >> + a->x[i] += b->x[N - i - 1]; >> +} >> + >> +/* { dg-final { scan-tree-dump-times {checking that [^\n]* and [^\n]* have different addresses} 2 "vect" } } */ >> +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
Index: gcc/hash-traits.h =================================================================== --- gcc/hash-traits.h 2017-02-23 19:54:15.000000000 +0000 +++ gcc/hash-traits.h 2017-05-03 08:48:53.312035228 +0100 @@ -301,6 +301,76 @@ struct ggc_cache_ptr_hash : pointer_hash struct nofree_string_hash : string_hash, typed_noop_remove <const char *> {}; +/* Traits for pairs of values, using the first to record empty and + deleted slots. */ + +template <typename T1, typename T2> +struct pair_hash +{ + typedef std::pair <typename T1::value_type, + typename T2::value_type> value_type; + typedef std::pair <typename T1::compare_type, + typename T2::compare_type> compare_type; + + static inline hashval_t hash (const value_type &); + static inline bool equal (const value_type &, const compare_type &); + static inline void remove (value_type &); + static inline void mark_deleted (value_type &); + static inline void mark_empty (value_type &); + static inline bool is_deleted (const value_type &); + static inline bool is_empty (const value_type &); +}; + +template <typename T1, typename T2> +inline hashval_t +pair_hash <T1, T2>::hash (const value_type &x) +{ + return iterative_hash_hashval_t (T1::hash (x.first), T2::hash (x.second)); +} + +template <typename T1, typename T2> +inline bool +pair_hash <T1, T2>::equal (const value_type &x, const compare_type &y) +{ + return T1::equal (x.first, y.first) && T2::equal (x.second, y.second); +} + +template <typename T1, typename T2> +inline void +pair_hash <T1, T2>::remove (value_type &x) +{ + T1::remove (x.first); + T2::remove (x.second); +} + +template <typename T1, typename T2> +inline void +pair_hash <T1, T2>::mark_deleted (value_type &x) +{ + T1::mark_deleted (x.first); +} + +template <typename T1, typename T2> +inline void +pair_hash <T1, T2>::mark_empty (value_type &x) +{ + T1::mark_empty (x.first); +} + +template <typename T1, typename T2> +inline bool +pair_hash <T1, T2>::is_deleted (const value_type &x) +{ + return T1::is_deleted (x.first); +} + +template <typename T1, typename T2> +inline bool +pair_hash <T1, T2>::is_empty (const value_type &x) +{ + return T1::is_empty (x.first); +} + template <typename T> struct default_hash_traits : T {}; template <typename T> Index: gcc/tree-data-ref.h =================================================================== --- gcc/tree-data-ref.h 2017-05-03 08:48:48.737038502 +0100 +++ gcc/tree-data-ref.h 2017-05-03 08:48:53.313041828 +0100 @@ -240,6 +240,13 @@ struct data_dependence_relation but the analyzer cannot be more specific. */ tree are_dependent; + /* If nonnull, COULD_BE_INDEPENDENT_P is true and the accesses are + independent when the runtime addresses of OBJECT_A and OBJECT_B + are different. The addresses of both objects are invariant in the + loop nest. */ + tree object_a; + tree object_b; + /* For each subscript in the dependence test, there is an element in this array. This is the attribute that labels the edge A->B of the data_dependence_relation. */ @@ -303,6 +310,8 @@ #define DDR_A(DDR) (DDR)->a #define DDR_B(DDR) (DDR)->b #define DDR_AFFINE_P(DDR) (DDR)->affine_p #define DDR_ARE_DEPENDENT(DDR) (DDR)->are_dependent +#define DDR_OBJECT_A(DDR) (DDR)->object_a +#define DDR_OBJECT_B(DDR) (DDR)->object_b #define DDR_SUBSCRIPTS(DDR) (DDR)->subscripts #define DDR_SUBSCRIPT(DDR, I) DDR_SUBSCRIPTS (DDR)[I] #define DDR_NUM_SUBSCRIPTS(DDR) DDR_SUBSCRIPTS (DDR).length () Index: gcc/tree-data-ref.c =================================================================== --- gcc/tree-data-ref.c 2017-05-03 08:48:48.737038502 +0100 +++ gcc/tree-data-ref.c 2017-05-03 08:48:53.313041828 +0100 @@ -1715,6 +1715,15 @@ initialize_data_dependence_relation (str } DDR_COULD_BE_INDEPENDENT_P (res) = true; + if (!loop_nest.exists () + || (object_address_invariant_in_loop_p (loop_nest[0], + struct_ref_a) + && object_address_invariant_in_loop_p (loop_nest[0], + struct_ref_b))) + { + DDR_OBJECT_A (res) = struct_ref_a; + DDR_OBJECT_B (res) = struct_ref_b; + } } DDR_AFFINE_P (res) = true; Index: gcc/tree-vectorizer.h =================================================================== --- gcc/tree-vectorizer.h 2017-05-03 08:48:48.738045102 +0100 +++ gcc/tree-vectorizer.h 2017-05-03 08:48:53.315055028 +0100 @@ -174,6 +174,10 @@ struct dr_with_seg_len_pair_t +/* Describes two objects whose addresses must be unequal for the vectorized + loop to be valid. */ +typedef std::pair<tree, tree> vec_object_pair; + /* Vectorizer state common between loop and basic-block vectorization. */ struct vec_info { enum { bb, loop } kind; @@ -270,6 +274,9 @@ typedef struct _loop_vec_info : public v lengths from which the run-time aliasing check is built. */ vec<dr_with_seg_len_pair_t> comp_alias_ddrs; + /* Check that the addresses of each pair of objects is unequal. */ + vec<vec_object_pair> check_unequal_addrs; + /* Statements in the loop that have data references that are candidates for a runtime (loop versioning) misalignment check. */ vec<gimple *> may_misalign_stmts; @@ -364,6 +371,7 @@ #define LOOP_VINFO_UNALIGNED_DR(L) #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs #define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs +#define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor @@ -383,7 +391,8 @@ #define LOOP_VINFO_ORIG_LOOP_INFO(L) #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ ((L)->may_misalign_stmts.length () > 0) #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ - ((L)->comp_alias_ddrs.length () > 0) + ((L)->comp_alias_ddrs.length () > 0 \ + || (L)->check_unequal_addrs.length () > 0) #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \ (LOOP_VINFO_NITERS_ASSUMPTIONS (L)) #define LOOP_REQUIRES_VERSIONING(L) \ Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c 2017-04-18 19:52:35.059158859 +0100 +++ gcc/tree-vect-loop.c 2017-05-03 08:48:53.315055028 +0100 @@ -1275,6 +1275,8 @@ destroy_loop_vec_info (loop_vec_info loo destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)); loop_vinfo->scalar_cost_vec.release (); + LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release (); + free (loop_vinfo); loop->aux = NULL; } @@ -2299,6 +2301,7 @@ vect_analyze_loop_2 (loop_vec_info loop_ } /* Free optimized alias test DDRS. */ LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release (); + LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release (); /* Reset target cost data. */ destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)); LOOP_VINFO_TARGET_COST_DATA (loop_vinfo) @@ -3261,6 +3264,11 @@ vect_estimate_min_profitable_iters (loop unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length (); (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0, vect_prologue); + len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length (); + if (len) + /* Count LEN - 1 ANDs and LEN comparisons. */ + (void) add_stmt_cost (target_cost_data, len * 2 - 1, scalar_stmt, + NULL, 0, vect_prologue); dump_printf (MSG_NOTE, "cost model: Adding cost of checks for loop " "versioning aliasing.\n"); Index: gcc/tree-vect-data-refs.c =================================================================== --- gcc/tree-vect-data-refs.c 2017-05-03 08:48:48.738045102 +0100 +++ gcc/tree-vect-data-refs.c 2017-05-03 08:48:53.314048428 +0100 @@ -50,6 +50,7 @@ Software Foundation; either version 3, o #include "expr.h" #include "builtins.h" #include "params.h" +#include "tree-hash-traits.h" /* Return true if load- or store-lanes optab OPTAB is implemented for COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */ @@ -3085,10 +3086,14 @@ dependence_distance_ge_vf (data_dependen bool vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) { - vec<ddr_p> may_alias_ddrs = - LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo); - vec<dr_with_seg_len_pair_t>& comp_alias_ddrs = - LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo); + typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash; + hash_set <tree_pair_hash> compared_objects; + + vec<ddr_p> may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo); + vec<dr_with_seg_len_pair_t> &comp_alias_ddrs + = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo); + vec<vec_object_pair> &check_unequal_addrs + = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo); int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo); @@ -3151,6 +3156,24 @@ vect_prune_runtime_alias_test_list (loop if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor)) continue; + if (DDR_OBJECT_A (ddr)) + { + vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr)); + if (!compared_objects.add (new_pair)) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, "checking that "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.first); + dump_printf (MSG_NOTE, " and "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.second); + dump_printf (MSG_NOTE, " have different addresses\n"); + } + LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair); + } + continue; + } + dr_a = DDR_A (ddr); stmt_a = DR_STMT (DDR_A (ddr)); dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a)); @@ -3346,11 +3369,12 @@ vect_prune_runtime_alias_test_list (loop } } + unsigned int count = (comp_alias_ddrs.length () + + check_unequal_addrs.length ()); dump_printf_loc (MSG_NOTE, vect_location, "improved number of alias checks from %d to %d\n", - may_alias_ddrs.length (), comp_alias_ddrs.length ()); - if ((int) comp_alias_ddrs.length () > - PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS)) + may_alias_ddrs.length (), count); + if ((int) count > PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, Index: gcc/tree-vect-loop-manip.c =================================================================== --- gcc/tree-vect-loop-manip.c 2017-04-18 19:52:34.027608884 +0100 +++ gcc/tree-vect-loop-manip.c 2017-05-03 08:48:53.314048428 +0100 @@ -1926,6 +1926,20 @@ vect_create_cond_for_niters_checks (loop *cond_expr = part_cond_expr; } +/* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR + and PART_COND_EXPR are true. Treat a null *COND_EXPR as "true". */ + +static void +chain_cond_expr (tree *cond_expr, tree part_cond_expr) +{ + if (*cond_expr) + *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, + *cond_expr, part_cond_expr); + else + *cond_expr = part_cond_expr; +} + + /* Function vect_create_cond_for_align_checks. Create a conditional expression that represents the alignment checks for @@ -2037,13 +2051,32 @@ vect_create_cond_for_align_checks (loop_ ptrsize_zero = build_int_cst (int_ptrsize_type, 0); part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node, and_tmp_name, ptrsize_zero); - if (*cond_expr) - *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, - *cond_expr, part_cond_expr); - else - *cond_expr = part_cond_expr; + chain_cond_expr (cond_expr, part_cond_expr); } + +/* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>, + create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn). + Set *COND_EXPR to a tree that is true when both the original *COND_EXPR + and this new condition are true. Treat a null *COND_EXPR as "true". */ + +static void +vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr) +{ + vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo); + unsigned int i; + vec_object_pair *pair; + FOR_EACH_VEC_ELT (pairs, i, pair) + { + tree addr1 = build_fold_addr_expr (pair->first); + tree addr2 = build_fold_addr_expr (pair->second); + tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node, + addr1, addr2); + chain_cond_expr (cond_expr, part_cond_expr); + } +} + + /* Given two data references and segment lengths described by DR_A and DR_B, create expression checking if the two addresses ranges intersect with each other based on index of the two addresses. This can only be done @@ -2280,11 +2313,7 @@ vect_create_cond_for_alias_checks (loop_ /* Create condition expression for each pair data references. */ create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b); - if (*cond_expr) - *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, - *cond_expr, part_cond_expr); - else - *cond_expr = part_cond_expr; + chain_cond_expr (cond_expr, part_cond_expr); } if (dump_enabled_p ()) @@ -2353,7 +2382,10 @@ vect_loop_versioning (loop_vec_info loop &cond_expr_stmt_list); if (version_alias) - vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr); + { + vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr); + vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr); + } cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list, is_gimple_condexpr, NULL_TREE); Index: gcc/testsuite/gcc.dg/vect/vect-alias-check-6.c =================================================================== --- /dev/null 2017-05-03 08:16:26.972699664 +0100 +++ gcc/testsuite/gcc.dg/vect/vect-alias-check-6.c 2017-05-03 08:48:53.312035228 +0100 @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ + +#define N 16 + +struct s { int x[N]; }; + +void +f1 (struct s *a, struct s *b) +{ + for (int i = 0; i < N - 1; ++i) + a->x[i + 1] += b->x[i]; +} + +void +f2 (struct s *a, struct s *b) +{ + for (int i = 0; i < N; ++i) + a->x[i] += b->x[N - i - 1]; +} + +/* { dg-final { scan-tree-dump-times {checking that [^\n]* and [^\n]* have different addresses} 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */