diff mbox series

[060/nnn] poly_int: loop versioning threshold

Message ID 87o9oxkc9d.fsf@linaro.org
State New
Headers show
Series [060/nnn] poly_int: loop versioning threshold | expand

Commit Message

Richard Sandiford Oct. 23, 2017, 5:25 p.m. UTC
This patch splits the loop versioning threshold out from the
cost model threshold so that the former can become a poly_uint64.
We still use a single test to enforce both limits where possible.


2017-10-23  Richard Sandiford  <richard.sandiford@linaro.org>
	    Alan Hayward  <alan.hayward@arm.com>
	    David Sherwood  <david.sherwood@arm.com>

gcc/
	* tree-vectorizer.h (_loop_vec_info): Add a versioning_threshold
	field.
	(LOOP_VINFO_VERSIONING_THRESHOLD): New macro
	(vect_loop_versioning): Take the loop versioning threshold as a
	separate parameter.
	* tree-vect-loop-manip.c (vect_loop_versioning): Likewise.
	* tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize
	versioning_threshold.
	(vect_analyze_loop_2): Compute the loop versioning threshold
	whenever loop versioning is needed, and store it in the new
	field rather than combining it with the cost model threshold.
	(vect_transform_loop): Update call to vect_loop_versioning.
	Try to combine the loop versioning and cost thresholds here.

Comments

Jeff Law Dec. 5, 2017, 5:31 p.m. UTC | #1
On 10/23/2017 11:25 AM, Richard Sandiford wrote:
> This patch splits the loop versioning threshold out from the

> cost model threshold so that the former can become a poly_uint64.

> We still use a single test to enforce both limits where possible.

> 

> 

> 2017-10-23  Richard Sandiford  <richard.sandiford@linaro.org>

> 	    Alan Hayward  <alan.hayward@arm.com>

> 	    David Sherwood  <david.sherwood@arm.com>

> 

> gcc/

> 	* tree-vectorizer.h (_loop_vec_info): Add a versioning_threshold

> 	field.

> 	(LOOP_VINFO_VERSIONING_THRESHOLD): New macro

> 	(vect_loop_versioning): Take the loop versioning threshold as a

> 	separate parameter.

> 	* tree-vect-loop-manip.c (vect_loop_versioning): Likewise.

> 	* tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize

> 	versioning_threshold.

> 	(vect_analyze_loop_2): Compute the loop versioning threshold

> 	whenever loop versioning is needed, and store it in the new

> 	field rather than combining it with the cost model threshold.

> 	(vect_transform_loop): Update call to vect_loop_versioning.

> 	Try to combine the loop versioning and cost thresholds here.

So you dropped the tests for PEELING_FOR_GAPS and PEELING_FOR_NITER in
vect_analyze_loop_2.  Was that intentional?

Otherwise it looks fine.  If the drop was intentional, then OK as-is.

jeff
diff mbox series

Patch

Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h	2017-10-23 17:11:39.817127625 +0100
+++ gcc/tree-vectorizer.h	2017-10-23 17:22:23.377858186 +0100
@@ -238,6 +238,12 @@  typedef struct _loop_vec_info : public v
      PARAM_MIN_VECT_LOOP_BOUND.  */
   unsigned int th;
 
+  /* When applying loop versioning, the vector form should only be used
+     if the number of scalar iterations is >= this value, on top of all
+     the other requirements.  Ignored when loop versioning is not being
+     used.  */
+  poly_uint64 versioning_threshold;
+
   /* Unrolling factor  */
   int vectorization_factor;
 
@@ -357,6 +363,7 @@  #define LOOP_VINFO_NITERS(L)
 #define LOOP_VINFO_NITERS_UNCHANGED(L)     (L)->num_iters_unchanged
 #define LOOP_VINFO_NITERS_ASSUMPTIONS(L)   (L)->num_iters_assumptions
 #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th
+#define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold
 #define LOOP_VINFO_VECTORIZABLE_P(L)       (L)->vectorizable
 #define LOOP_VINFO_VECT_FACTOR(L)          (L)->vectorization_factor
 #define LOOP_VINFO_MAX_VECT_FACTOR(L)      (L)->max_vectorization_factor
@@ -1143,7 +1150,8 @@  extern void slpeel_make_loop_iterate_nti
 extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
 struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *,
 						     struct loop *, edge);
-extern void vect_loop_versioning (loop_vec_info, unsigned int, bool);
+extern void vect_loop_versioning (loop_vec_info, unsigned int, bool,
+				  poly_uint64);
 extern struct loop *vect_do_peeling (loop_vec_info, tree, tree,
 				     tree *, tree *, tree *, int, bool, bool);
 extern source_location find_loop_location (struct loop *);
Index: gcc/tree-vect-loop-manip.c
===================================================================
--- gcc/tree-vect-loop-manip.c	2017-10-23 17:11:39.816125711 +0100
+++ gcc/tree-vect-loop-manip.c	2017-10-23 17:22:23.376857985 +0100
@@ -2295,7 +2295,8 @@  vect_create_cond_for_alias_checks (loop_
 
 void
 vect_loop_versioning (loop_vec_info loop_vinfo,
-		      unsigned int th, bool check_profitability)
+		      unsigned int th, bool check_profitability,
+		      poly_uint64 versioning_threshold)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
@@ -2320,6 +2321,17 @@  vect_loop_versioning (loop_vec_info loop
     cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
 			     build_int_cst (TREE_TYPE (scalar_loop_iters),
 					    th - 1));
+  if (maybe_nonzero (versioning_threshold))
+    {
+      tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
+			       build_int_cst (TREE_TYPE (scalar_loop_iters),
+					      versioning_threshold - 1));
+      if (cond_expr)
+	cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
+				 expr, cond_expr);
+      else
+	cond_expr = expr;
+    }
 
   if (version_niter)
     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	2017-10-23 17:11:39.816125711 +0100
+++ gcc/tree-vect-loop.c	2017-10-23 17:22:23.377858186 +0100
@@ -1110,6 +1110,7 @@  _loop_vec_info::_loop_vec_info (struct l
     num_iters_unchanged (NULL_TREE),
     num_iters_assumptions (NULL_TREE),
     th (0),
+    versioning_threshold (0),
     vectorization_factor (0),
     max_vectorization_factor (0),
     unaligned_dr (NULL),
@@ -2174,11 +2175,9 @@  vect_analyze_loop_2 (loop_vec_info loop_
      enough for both peeled prolog loop and vector loop.  This check
      can be merged along with threshold check of loop versioning, so
      increase threshold for this case if necessary.  */
-  if (LOOP_REQUIRES_VERSIONING (loop_vinfo)
-      && (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
-	  || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)))
+  if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
     {
-      unsigned niters_th;
+      poly_uint64 niters_th;
 
       /* Niters for peeled prolog loop.  */
       if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
@@ -2195,9 +2194,8 @@  vect_analyze_loop_2 (loop_vec_info loop_
       niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo);
       /* One additional iteration because of peeling for gap.  */
       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
-	niters_th++;
-      if (LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) < niters_th)
-	LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = niters_th;
+	niters_th += 1;
+      LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = niters_th;
     }
 
   gcc_assert (vectorization_factor
@@ -2300,6 +2298,7 @@  vect_analyze_loop_2 (loop_vec_info loop_
   LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
   LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0;
+  LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = 0;
 
   goto start_over;
 }
@@ -7320,7 +7319,17 @@  vect_transform_loop (loop_vec_info loop_
 
   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
     {
-      vect_loop_versioning (loop_vinfo, th, check_profitability);
+      poly_uint64 versioning_threshold
+	= LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
+      if (check_profitability
+	  && ordered_p (poly_uint64 (th), versioning_threshold))
+	{
+	  versioning_threshold = ordered_max (poly_uint64 (th),
+					      versioning_threshold);
+	  check_profitability = false;
+	}
+      vect_loop_versioning (loop_vinfo, th, check_profitability,
+			    versioning_threshold);
       check_profitability = false;
     }