diff mbox series

Use tree_vector_builder::new_binary_operation for folding

Message ID 87zi6v98we.fsf_-_@linaro.org
State New
Headers show
Series Use tree_vector_builder::new_binary_operation for folding | expand

Commit Message

Richard Sandiford Dec. 6, 2017, 3:24 p.m. UTC
This patch makes fold-const.c operate directly on the VECTOR_CST
encoding when folding an operation that has two VECTOR_CST inputs.

Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.
Also spot-checked on sparc64-linux-gnu.  OK to install?

Thanks,
Richard


2017-12-06  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* tree-vector-builder.h
	(tree_vector_builder::new_binary_operation): Declare.
	* tree-vector-builder.c
	(tree_vector_builder::new_binary_operation): New function.
	* fold-const.c (fold_relational_const): Use it.
	(const_binop): Likewise.  Check that both input vectors have
	the same number of elements, thus excluding things like WIDEN_SUM.
	Check whether it is possible to operate directly on the encodings
	of stepped inputs.

Comments

Richard Biener Dec. 7, 2017, 11:07 a.m. UTC | #1
On Wed, Dec 6, 2017 at 4:24 PM, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> This patch makes fold-const.c operate directly on the VECTOR_CST

> encoding when folding an operation that has two VECTOR_CST inputs.

>

> Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu.

> Also spot-checked on sparc64-linux-gnu.  OK to install?


Ok.

Richard.

> Thanks,

> Richard

>

>

> 2017-12-06  Richard Sandiford  <richard.sandiford@linaro.org>

>

> gcc/

>         * tree-vector-builder.h

>         (tree_vector_builder::new_binary_operation): Declare.

>         * tree-vector-builder.c

>         (tree_vector_builder::new_binary_operation): New function.

>         * fold-const.c (fold_relational_const): Use it.

>         (const_binop): Likewise.  Check that both input vectors have

>         the same number of elements, thus excluding things like WIDEN_SUM.

>         Check whether it is possible to operate directly on the encodings

>         of stepped inputs.

>

> Index: gcc/tree-vector-builder.h

> ===================================================================

> --- gcc/tree-vector-builder.h   2017-12-06 14:46:14.131599903 +0000

> +++ gcc/tree-vector-builder.h   2017-12-06 14:49:00.386854068 +0000

> @@ -38,6 +38,7 @@ #define GCC_TREE_VECTOR_BUILDER_H

>

>    void new_vector (tree, unsigned int, unsigned int);

>    bool new_unary_operation (tree, tree, bool);

> +  bool new_binary_operation (tree, tree, tree, bool);

>

>  private:

>    bool equal_p (const_tree, const_tree) const;

> Index: gcc/tree-vector-builder.c

> ===================================================================

> --- gcc/tree-vector-builder.c   2017-12-06 14:46:14.131599903 +0000

> +++ gcc/tree-vector-builder.c   2017-12-06 14:49:00.386854068 +0000

> @@ -49,6 +49,53 @@ tree_vector_builder::new_unary_operation

>    return true;

>  }

>

> +/* Try to start building a new vector of type TYPE that holds the result of

> +   a binary operation on VECTOR_CSTs T1 and T2.  ALLOW_STEPPED_P is true if

> +   the operation can handle stepped encodings directly, without having to

> +   expand the full sequence.

> +

> +   Return true if the operation is possible.  Leave the builder unchanged

> +   otherwise.  */

> +

> +bool

> +tree_vector_builder::new_binary_operation (tree type, tree t1, tree t2,

> +                                          bool allow_stepped_p)

> +{

> +  unsigned int full_nelts = TYPE_VECTOR_SUBPARTS (type);

> +  gcc_assert (full_nelts == TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1))

> +             && full_nelts == TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2)));

> +  /* Conceptually we split the patterns in T1 and T2 until we have

> +     an equal number for both.  Each split pattern requires the same

> +     number of elements per pattern as the original.  E.g. splitting:

> +

> +       { 1, 2, 3, ... }

> +

> +     into two gives:

> +

> +       { 1, 3, 5, ... }

> +       { 2, 4, 6, ... }

> +

> +     while splitting:

> +

> +       { 1, 0, ... }

> +

> +     into two gives:

> +

> +       { 1, 0, ... }

> +       { 0, 0, ... }.  */

> +  unsigned int npatterns = least_common_multiple (VECTOR_CST_NPATTERNS (t1),

> +                                                 VECTOR_CST_NPATTERNS (t2));

> +  unsigned int nelts_per_pattern = MAX (VECTOR_CST_NELTS_PER_PATTERN (t1),

> +                                       VECTOR_CST_NELTS_PER_PATTERN (t2));

> +  if (!allow_stepped_p && nelts_per_pattern > 2)

> +    {

> +      npatterns = full_nelts;

> +      nelts_per_pattern = 1;

> +    }

> +  new_vector (type, npatterns, nelts_per_pattern);

> +  return true;

> +}

> +

>  /* Return a VECTOR_CST for the current constant.  */

>

>  tree

> Index: gcc/fold-const.c

> ===================================================================

> --- gcc/fold-const.c    2017-12-06 14:48:56.997993407 +0000

> +++ gcc/fold-const.c    2017-12-06 14:49:00.386854068 +0000

> @@ -1435,13 +1435,40 @@ const_binop (enum tree_code code, tree a

>      }

>

>    if (TREE_CODE (arg1) == VECTOR_CST

> -      && TREE_CODE (arg2) == VECTOR_CST)

> +      && TREE_CODE (arg2) == VECTOR_CST

> +      && (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1))

> +         == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg2))))

>      {

>        tree type = TREE_TYPE (arg1);

> -      int count = VECTOR_CST_NELTS (arg1), i;

> +      bool step_ok_p;

> +      if (VECTOR_CST_STEPPED_P (arg1)

> +         && VECTOR_CST_STEPPED_P (arg2))

> +       /* We can operate directly on the encoding if:

> +

> +             a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1

> +           implies

> +             (a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1)

> +

> +          Addition and subtraction are the supported operators

> +          for which this is true.  */

> +       step_ok_p = (code == PLUS_EXPR || code == MINUS_EXPR);

> +      else if (VECTOR_CST_STEPPED_P (arg1))

> +       /* We can operate directly on stepped encodings if:

> +

> +            a3 - a2 == a2 - a1

> +          implies:

> +            (a3 op c) - (a2 op c) == (a2 op c) - (a1 op c)

>

> -      auto_vec<tree, 32> elts (count);

> -      for (i = 0; i < count; i++)

> +          which is true if (x -> x op c) distributes over addition.  */

> +       step_ok_p = distributes_over_addition_p (code, 1);

> +      else

> +       /* Similarly in reverse.  */

> +       step_ok_p = distributes_over_addition_p (code, 2);

> +      tree_vector_builder elts;

> +      if (!elts.new_binary_operation (type, arg1, arg2, step_ok_p))

> +       return NULL_TREE;

> +      unsigned int count = elts.encoded_nelts ();

> +      for (unsigned int i = 0; i < count; ++i)

>         {

>           tree elem1 = VECTOR_CST_ELT (arg1, i);

>           tree elem2 = VECTOR_CST_ELT (arg2, i);

> @@ -1455,7 +1482,7 @@ const_binop (enum tree_code code, tree a

>           elts.quick_push (elt);

>         }

>

> -      return build_vector (type, elts);

> +      return elts.build ();

>      }

>

>    /* Shifts allow a scalar offset for a vector.  */

> @@ -13770,11 +13797,10 @@ fold_relational_const (enum tree_code co

>             }

>           return constant_boolean_node (true, type);

>         }

> -      unsigned count = VECTOR_CST_NELTS (op0);

> -      gcc_assert (VECTOR_CST_NELTS (op1) == count

> -                 && TYPE_VECTOR_SUBPARTS (type) == count);

> -

> -      auto_vec<tree, 32> elts (count);

> +      tree_vector_builder elts;

> +      if (!elts.new_binary_operation (type, op0, op1, false))

> +       return NULL_TREE;

> +      unsigned int count = elts.encoded_nelts ();

>        for (unsigned i = 0; i < count; i++)

>         {

>           tree elem_type = TREE_TYPE (type);

> @@ -13791,7 +13817,7 @@ fold_relational_const (enum tree_code co

>                                           integer_zerop (tem) ? 0 : -1));

>         }

>

> -      return build_vector (type, elts);

> +      return elts.build ();

>      }

>

>    /* From here on we only handle LT, LE, GT, GE, EQ and NE.
diff mbox series

Patch

Index: gcc/tree-vector-builder.h
===================================================================
--- gcc/tree-vector-builder.h	2017-12-06 14:46:14.131599903 +0000
+++ gcc/tree-vector-builder.h	2017-12-06 14:49:00.386854068 +0000
@@ -38,6 +38,7 @@  #define GCC_TREE_VECTOR_BUILDER_H
 
   void new_vector (tree, unsigned int, unsigned int);
   bool new_unary_operation (tree, tree, bool);
+  bool new_binary_operation (tree, tree, tree, bool);
 
 private:
   bool equal_p (const_tree, const_tree) const;
Index: gcc/tree-vector-builder.c
===================================================================
--- gcc/tree-vector-builder.c	2017-12-06 14:46:14.131599903 +0000
+++ gcc/tree-vector-builder.c	2017-12-06 14:49:00.386854068 +0000
@@ -49,6 +49,53 @@  tree_vector_builder::new_unary_operation
   return true;
 }
 
+/* Try to start building a new vector of type TYPE that holds the result of
+   a binary operation on VECTOR_CSTs T1 and T2.  ALLOW_STEPPED_P is true if
+   the operation can handle stepped encodings directly, without having to
+   expand the full sequence.
+
+   Return true if the operation is possible.  Leave the builder unchanged
+   otherwise.  */
+
+bool
+tree_vector_builder::new_binary_operation (tree type, tree t1, tree t2,
+					   bool allow_stepped_p)
+{
+  unsigned int full_nelts = TYPE_VECTOR_SUBPARTS (type);
+  gcc_assert (full_nelts == TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1))
+	      && full_nelts == TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2)));
+  /* Conceptually we split the patterns in T1 and T2 until we have
+     an equal number for both.  Each split pattern requires the same
+     number of elements per pattern as the original.  E.g. splitting:
+
+       { 1, 2, 3, ... }
+
+     into two gives:
+
+       { 1, 3, 5, ... }
+       { 2, 4, 6, ... }
+
+     while splitting:
+
+       { 1, 0, ... }
+
+     into two gives:
+
+       { 1, 0, ... }
+       { 0, 0, ... }.  */
+  unsigned int npatterns = least_common_multiple (VECTOR_CST_NPATTERNS (t1),
+						  VECTOR_CST_NPATTERNS (t2));
+  unsigned int nelts_per_pattern = MAX (VECTOR_CST_NELTS_PER_PATTERN (t1),
+					VECTOR_CST_NELTS_PER_PATTERN (t2));
+  if (!allow_stepped_p && nelts_per_pattern > 2)
+    {
+      npatterns = full_nelts;
+      nelts_per_pattern = 1;
+    }
+  new_vector (type, npatterns, nelts_per_pattern);
+  return true;
+}
+
 /* Return a VECTOR_CST for the current constant.  */
 
 tree
Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	2017-12-06 14:48:56.997993407 +0000
+++ gcc/fold-const.c	2017-12-06 14:49:00.386854068 +0000
@@ -1435,13 +1435,40 @@  const_binop (enum tree_code code, tree a
     }
 
   if (TREE_CODE (arg1) == VECTOR_CST
-      && TREE_CODE (arg2) == VECTOR_CST)
+      && TREE_CODE (arg2) == VECTOR_CST
+      && (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1))
+	  == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg2))))
     {
       tree type = TREE_TYPE (arg1);
-      int count = VECTOR_CST_NELTS (arg1), i;
+      bool step_ok_p;
+      if (VECTOR_CST_STEPPED_P (arg1)
+	  && VECTOR_CST_STEPPED_P (arg2))
+	/* We can operate directly on the encoding if:
+
+	      a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1
+	    implies
+	      (a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1)
+
+	   Addition and subtraction are the supported operators
+	   for which this is true.  */
+	step_ok_p = (code == PLUS_EXPR || code == MINUS_EXPR);
+      else if (VECTOR_CST_STEPPED_P (arg1))
+	/* We can operate directly on stepped encodings if:
+
+	     a3 - a2 == a2 - a1
+	   implies:
+	     (a3 op c) - (a2 op c) == (a2 op c) - (a1 op c)
 
-      auto_vec<tree, 32> elts (count);
-      for (i = 0; i < count; i++)
+	   which is true if (x -> x op c) distributes over addition.  */
+	step_ok_p = distributes_over_addition_p (code, 1);
+      else
+	/* Similarly in reverse.  */
+	step_ok_p = distributes_over_addition_p (code, 2);
+      tree_vector_builder elts;
+      if (!elts.new_binary_operation (type, arg1, arg2, step_ok_p))
+	return NULL_TREE;
+      unsigned int count = elts.encoded_nelts ();
+      for (unsigned int i = 0; i < count; ++i)
 	{
 	  tree elem1 = VECTOR_CST_ELT (arg1, i);
 	  tree elem2 = VECTOR_CST_ELT (arg2, i);
@@ -1455,7 +1482,7 @@  const_binop (enum tree_code code, tree a
 	  elts.quick_push (elt);
 	}
 
-      return build_vector (type, elts);
+      return elts.build ();
     }
 
   /* Shifts allow a scalar offset for a vector.  */
@@ -13770,11 +13797,10 @@  fold_relational_const (enum tree_code co
 	    }
 	  return constant_boolean_node (true, type);
 	}
-      unsigned count = VECTOR_CST_NELTS (op0);
-      gcc_assert (VECTOR_CST_NELTS (op1) == count
-		  && TYPE_VECTOR_SUBPARTS (type) == count);
-
-      auto_vec<tree, 32> elts (count);
+      tree_vector_builder elts;
+      if (!elts.new_binary_operation (type, op0, op1, false))
+	return NULL_TREE;
+      unsigned int count = elts.encoded_nelts ();
       for (unsigned i = 0; i < count; i++)
 	{
 	  tree elem_type = TREE_TYPE (type);
@@ -13791,7 +13817,7 @@  fold_relational_const (enum tree_code co
 					  integer_zerop (tem) ? 0 : -1));
 	}
 
-      return build_vector (type, elts);
+      return elts.build ();
     }
 
   /* From here on we only handle LT, LE, GT, GE, EQ and NE.