Message ID | 87zi6v98we.fsf_-_@linaro.org |
---|---|
State | New |
Headers | show |
Series | Use tree_vector_builder::new_binary_operation for folding | expand |
On Wed, Dec 6, 2017 at 4:24 PM, Richard Sandiford <richard.sandiford@linaro.org> wrote: > This patch makes fold-const.c operate directly on the VECTOR_CST > encoding when folding an operation that has two VECTOR_CST inputs. > > Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu. > Also spot-checked on sparc64-linux-gnu. OK to install? Ok. Richard. > Thanks, > Richard > > > 2017-12-06 Richard Sandiford <richard.sandiford@linaro.org> > > gcc/ > * tree-vector-builder.h > (tree_vector_builder::new_binary_operation): Declare. > * tree-vector-builder.c > (tree_vector_builder::new_binary_operation): New function. > * fold-const.c (fold_relational_const): Use it. > (const_binop): Likewise. Check that both input vectors have > the same number of elements, thus excluding things like WIDEN_SUM. > Check whether it is possible to operate directly on the encodings > of stepped inputs. > > Index: gcc/tree-vector-builder.h > =================================================================== > --- gcc/tree-vector-builder.h 2017-12-06 14:46:14.131599903 +0000 > +++ gcc/tree-vector-builder.h 2017-12-06 14:49:00.386854068 +0000 > @@ -38,6 +38,7 @@ #define GCC_TREE_VECTOR_BUILDER_H > > void new_vector (tree, unsigned int, unsigned int); > bool new_unary_operation (tree, tree, bool); > + bool new_binary_operation (tree, tree, tree, bool); > > private: > bool equal_p (const_tree, const_tree) const; > Index: gcc/tree-vector-builder.c > =================================================================== > --- gcc/tree-vector-builder.c 2017-12-06 14:46:14.131599903 +0000 > +++ gcc/tree-vector-builder.c 2017-12-06 14:49:00.386854068 +0000 > @@ -49,6 +49,53 @@ tree_vector_builder::new_unary_operation > return true; > } > > +/* Try to start building a new vector of type TYPE that holds the result of > + a binary operation on VECTOR_CSTs T1 and T2. ALLOW_STEPPED_P is true if > + the operation can handle stepped encodings directly, without having to > + expand the full sequence. > + > + Return true if the operation is possible. Leave the builder unchanged > + otherwise. */ > + > +bool > +tree_vector_builder::new_binary_operation (tree type, tree t1, tree t2, > + bool allow_stepped_p) > +{ > + unsigned int full_nelts = TYPE_VECTOR_SUBPARTS (type); > + gcc_assert (full_nelts == TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1)) > + && full_nelts == TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2))); > + /* Conceptually we split the patterns in T1 and T2 until we have > + an equal number for both. Each split pattern requires the same > + number of elements per pattern as the original. E.g. splitting: > + > + { 1, 2, 3, ... } > + > + into two gives: > + > + { 1, 3, 5, ... } > + { 2, 4, 6, ... } > + > + while splitting: > + > + { 1, 0, ... } > + > + into two gives: > + > + { 1, 0, ... } > + { 0, 0, ... }. */ > + unsigned int npatterns = least_common_multiple (VECTOR_CST_NPATTERNS (t1), > + VECTOR_CST_NPATTERNS (t2)); > + unsigned int nelts_per_pattern = MAX (VECTOR_CST_NELTS_PER_PATTERN (t1), > + VECTOR_CST_NELTS_PER_PATTERN (t2)); > + if (!allow_stepped_p && nelts_per_pattern > 2) > + { > + npatterns = full_nelts; > + nelts_per_pattern = 1; > + } > + new_vector (type, npatterns, nelts_per_pattern); > + return true; > +} > + > /* Return a VECTOR_CST for the current constant. */ > > tree > Index: gcc/fold-const.c > =================================================================== > --- gcc/fold-const.c 2017-12-06 14:48:56.997993407 +0000 > +++ gcc/fold-const.c 2017-12-06 14:49:00.386854068 +0000 > @@ -1435,13 +1435,40 @@ const_binop (enum tree_code code, tree a > } > > if (TREE_CODE (arg1) == VECTOR_CST > - && TREE_CODE (arg2) == VECTOR_CST) > + && TREE_CODE (arg2) == VECTOR_CST > + && (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)) > + == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg2)))) > { > tree type = TREE_TYPE (arg1); > - int count = VECTOR_CST_NELTS (arg1), i; > + bool step_ok_p; > + if (VECTOR_CST_STEPPED_P (arg1) > + && VECTOR_CST_STEPPED_P (arg2)) > + /* We can operate directly on the encoding if: > + > + a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1 > + implies > + (a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1) > + > + Addition and subtraction are the supported operators > + for which this is true. */ > + step_ok_p = (code == PLUS_EXPR || code == MINUS_EXPR); > + else if (VECTOR_CST_STEPPED_P (arg1)) > + /* We can operate directly on stepped encodings if: > + > + a3 - a2 == a2 - a1 > + implies: > + (a3 op c) - (a2 op c) == (a2 op c) - (a1 op c) > > - auto_vec<tree, 32> elts (count); > - for (i = 0; i < count; i++) > + which is true if (x -> x op c) distributes over addition. */ > + step_ok_p = distributes_over_addition_p (code, 1); > + else > + /* Similarly in reverse. */ > + step_ok_p = distributes_over_addition_p (code, 2); > + tree_vector_builder elts; > + if (!elts.new_binary_operation (type, arg1, arg2, step_ok_p)) > + return NULL_TREE; > + unsigned int count = elts.encoded_nelts (); > + for (unsigned int i = 0; i < count; ++i) > { > tree elem1 = VECTOR_CST_ELT (arg1, i); > tree elem2 = VECTOR_CST_ELT (arg2, i); > @@ -1455,7 +1482,7 @@ const_binop (enum tree_code code, tree a > elts.quick_push (elt); > } > > - return build_vector (type, elts); > + return elts.build (); > } > > /* Shifts allow a scalar offset for a vector. */ > @@ -13770,11 +13797,10 @@ fold_relational_const (enum tree_code co > } > return constant_boolean_node (true, type); > } > - unsigned count = VECTOR_CST_NELTS (op0); > - gcc_assert (VECTOR_CST_NELTS (op1) == count > - && TYPE_VECTOR_SUBPARTS (type) == count); > - > - auto_vec<tree, 32> elts (count); > + tree_vector_builder elts; > + if (!elts.new_binary_operation (type, op0, op1, false)) > + return NULL_TREE; > + unsigned int count = elts.encoded_nelts (); > for (unsigned i = 0; i < count; i++) > { > tree elem_type = TREE_TYPE (type); > @@ -13791,7 +13817,7 @@ fold_relational_const (enum tree_code co > integer_zerop (tem) ? 0 : -1)); > } > > - return build_vector (type, elts); > + return elts.build (); > } > > /* From here on we only handle LT, LE, GT, GE, EQ and NE.
Index: gcc/tree-vector-builder.h =================================================================== --- gcc/tree-vector-builder.h 2017-12-06 14:46:14.131599903 +0000 +++ gcc/tree-vector-builder.h 2017-12-06 14:49:00.386854068 +0000 @@ -38,6 +38,7 @@ #define GCC_TREE_VECTOR_BUILDER_H void new_vector (tree, unsigned int, unsigned int); bool new_unary_operation (tree, tree, bool); + bool new_binary_operation (tree, tree, tree, bool); private: bool equal_p (const_tree, const_tree) const; Index: gcc/tree-vector-builder.c =================================================================== --- gcc/tree-vector-builder.c 2017-12-06 14:46:14.131599903 +0000 +++ gcc/tree-vector-builder.c 2017-12-06 14:49:00.386854068 +0000 @@ -49,6 +49,53 @@ tree_vector_builder::new_unary_operation return true; } +/* Try to start building a new vector of type TYPE that holds the result of + a binary operation on VECTOR_CSTs T1 and T2. ALLOW_STEPPED_P is true if + the operation can handle stepped encodings directly, without having to + expand the full sequence. + + Return true if the operation is possible. Leave the builder unchanged + otherwise. */ + +bool +tree_vector_builder::new_binary_operation (tree type, tree t1, tree t2, + bool allow_stepped_p) +{ + unsigned int full_nelts = TYPE_VECTOR_SUBPARTS (type); + gcc_assert (full_nelts == TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1)) + && full_nelts == TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2))); + /* Conceptually we split the patterns in T1 and T2 until we have + an equal number for both. Each split pattern requires the same + number of elements per pattern as the original. E.g. splitting: + + { 1, 2, 3, ... } + + into two gives: + + { 1, 3, 5, ... } + { 2, 4, 6, ... } + + while splitting: + + { 1, 0, ... } + + into two gives: + + { 1, 0, ... } + { 0, 0, ... }. */ + unsigned int npatterns = least_common_multiple (VECTOR_CST_NPATTERNS (t1), + VECTOR_CST_NPATTERNS (t2)); + unsigned int nelts_per_pattern = MAX (VECTOR_CST_NELTS_PER_PATTERN (t1), + VECTOR_CST_NELTS_PER_PATTERN (t2)); + if (!allow_stepped_p && nelts_per_pattern > 2) + { + npatterns = full_nelts; + nelts_per_pattern = 1; + } + new_vector (type, npatterns, nelts_per_pattern); + return true; +} + /* Return a VECTOR_CST for the current constant. */ tree Index: gcc/fold-const.c =================================================================== --- gcc/fold-const.c 2017-12-06 14:48:56.997993407 +0000 +++ gcc/fold-const.c 2017-12-06 14:49:00.386854068 +0000 @@ -1435,13 +1435,40 @@ const_binop (enum tree_code code, tree a } if (TREE_CODE (arg1) == VECTOR_CST - && TREE_CODE (arg2) == VECTOR_CST) + && TREE_CODE (arg2) == VECTOR_CST + && (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)) + == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg2)))) { tree type = TREE_TYPE (arg1); - int count = VECTOR_CST_NELTS (arg1), i; + bool step_ok_p; + if (VECTOR_CST_STEPPED_P (arg1) + && VECTOR_CST_STEPPED_P (arg2)) + /* We can operate directly on the encoding if: + + a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1 + implies + (a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1) + + Addition and subtraction are the supported operators + for which this is true. */ + step_ok_p = (code == PLUS_EXPR || code == MINUS_EXPR); + else if (VECTOR_CST_STEPPED_P (arg1)) + /* We can operate directly on stepped encodings if: + + a3 - a2 == a2 - a1 + implies: + (a3 op c) - (a2 op c) == (a2 op c) - (a1 op c) - auto_vec<tree, 32> elts (count); - for (i = 0; i < count; i++) + which is true if (x -> x op c) distributes over addition. */ + step_ok_p = distributes_over_addition_p (code, 1); + else + /* Similarly in reverse. */ + step_ok_p = distributes_over_addition_p (code, 2); + tree_vector_builder elts; + if (!elts.new_binary_operation (type, arg1, arg2, step_ok_p)) + return NULL_TREE; + unsigned int count = elts.encoded_nelts (); + for (unsigned int i = 0; i < count; ++i) { tree elem1 = VECTOR_CST_ELT (arg1, i); tree elem2 = VECTOR_CST_ELT (arg2, i); @@ -1455,7 +1482,7 @@ const_binop (enum tree_code code, tree a elts.quick_push (elt); } - return build_vector (type, elts); + return elts.build (); } /* Shifts allow a scalar offset for a vector. */ @@ -13770,11 +13797,10 @@ fold_relational_const (enum tree_code co } return constant_boolean_node (true, type); } - unsigned count = VECTOR_CST_NELTS (op0); - gcc_assert (VECTOR_CST_NELTS (op1) == count - && TYPE_VECTOR_SUBPARTS (type) == count); - - auto_vec<tree, 32> elts (count); + tree_vector_builder elts; + if (!elts.new_binary_operation (type, op0, op1, false)) + return NULL_TREE; + unsigned int count = elts.encoded_nelts (); for (unsigned i = 0; i < count; i++) { tree elem_type = TREE_TYPE (type); @@ -13791,7 +13817,7 @@ fold_relational_const (enum tree_code co integer_zerop (tem) ? 0 : -1)); } - return build_vector (type, elts); + return elts.build (); } /* From here on we only handle LT, LE, GT, GE, EQ and NE.