===================================================================
@@ -1154,6 +1154,16 @@ vect_vf_for_cost (loop_vec_info loop_vin
return estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
}
+/* Estimate the number of elements in VEC_TYPE for costing purposes.
+ Pick a reasonable estimate if the exact number isn't known at
+ compile time. */
+
+static inline unsigned int
+vect_nunits_for_cost (tree vec_type)
+{
+ return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vec_type));
+}
+
/* Return the size of the value accessed by unvectorized data reference DR.
This is only valid once STMT_VINFO_VECTYPE has been calculated for the
associated gimple statement, since that guarantees that DR accesses
===================================================================
@@ -3844,13 +3844,15 @@ vect_model_reduction_cost (stmt_vec_info
}
else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
{
- unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ unsigned estimated_nunits = vect_nunits_for_cost (vectype);
/* Extraction of scalar elements. */
- epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits,
+ epilogue_cost += add_stmt_cost (target_cost_data,
+ 2 * estimated_nunits,
vec_to_scalar, stmt_info, 0,
vect_epilogue);
/* Scalar max reductions via COND_EXPR / MAX_EXPR. */
- epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits - 3,
+ epilogue_cost += add_stmt_cost (target_cost_data,
+ 2 * estimated_nunits - 3,
scalar_stmt, stmt_info, 0,
vect_epilogue);
}
===================================================================
@@ -1718,8 +1718,8 @@ vect_analyze_slp_cost_1 (slp_instance in
&n_perms);
record_stmt_cost (body_cost_vec, n_perms, vec_perm,
stmt_info, 0, vect_body);
- unsigned nunits
- = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
+ unsigned assumed_nunits
+ = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
/* And adjust the number of loads performed. This handles
redundancies as well as loads that are later dead. */
auto_sbitmap perm (GROUP_SIZE (stmt_info));
@@ -1730,7 +1730,7 @@ vect_analyze_slp_cost_1 (slp_instance in
bool load_seen = false;
for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
{
- if (i % nunits == 0)
+ if (i % assumed_nunits == 0)
{
if (load_seen)
ncopies_for_cost++;
@@ -1743,7 +1743,7 @@ vect_analyze_slp_cost_1 (slp_instance in
ncopies_for_cost++;
gcc_assert (ncopies_for_cost
<= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
- + nunits - 1) / nunits);
+ + assumed_nunits - 1) / assumed_nunits);
poly_uint64 uf = SLP_INSTANCE_UNROLLING_FACTOR (instance);
ncopies_for_cost *= estimated_poly_value (uf);
}
@@ -1856,9 +1856,9 @@ vect_analyze_slp_cost (slp_instance inst
assumed_vf = vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info));
else
assumed_vf = 1;
- unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
/* For reductions look at a reduction operand in case the reduction
operation is widening like DOT_PROD or SAD. */
+ tree vectype_for_cost = STMT_VINFO_VECTYPE (stmt_info);
if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
{
gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
@@ -1866,14 +1866,16 @@ vect_analyze_slp_cost (slp_instance inst
{
case DOT_PROD_EXPR:
case SAD_EXPR:
- nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
- (TREE_TYPE (gimple_assign_rhs1 (stmt))));
+ vectype_for_cost = get_vectype_for_scalar_type
+ (TREE_TYPE (gimple_assign_rhs1 (stmt)));
break;
default:;
}
}
- ncopies_for_cost = least_common_multiple (nunits,
- group_size * assumed_vf) / nunits;
+ unsigned int assumed_nunits = vect_nunits_for_cost (vectype_for_cost);
+ ncopies_for_cost = (least_common_multiple (assumed_nunits,
+ group_size * assumed_vf)
+ / assumed_nunits);
prologue_cost_vec.create (10);
body_cost_vec.create (10);
===================================================================
@@ -950,18 +950,25 @@ vect_model_store_cost (stmt_vec_info stm
/* Costs of the stores. */
if (memory_access_type == VMAT_ELEMENTWISE
|| memory_access_type == VMAT_GATHER_SCATTER)
- /* N scalar stores plus extracting the elements. */
- inside_cost += record_stmt_cost (body_cost_vec,
- ncopies * TYPE_VECTOR_SUBPARTS (vectype),
- scalar_store, stmt_info, 0, vect_body);
+ {
+ /* N scalar stores plus extracting the elements. */
+ unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
+ inside_cost += record_stmt_cost (body_cost_vec,
+ ncopies * assumed_nunits,
+ scalar_store, stmt_info, 0, vect_body);
+ }
else
vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
if (memory_access_type == VMAT_ELEMENTWISE
|| memory_access_type == VMAT_STRIDED_SLP)
- inside_cost += record_stmt_cost (body_cost_vec,
- ncopies * TYPE_VECTOR_SUBPARTS (vectype),
- vec_to_scalar, stmt_info, 0, vect_body);
+ {
+ /* N scalar stores plus extracting the elements. */
+ unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
+ inside_cost += record_stmt_cost (body_cost_vec,
+ ncopies * assumed_nunits,
+ vec_to_scalar, stmt_info, 0, vect_body);
+ }
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -1081,8 +1088,9 @@ vect_model_load_cost (stmt_vec_info stmt
{
/* N scalar loads plus gathering them into a vector. */
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
inside_cost += record_stmt_cost (body_cost_vec,
- ncopies * TYPE_VECTOR_SUBPARTS (vectype),
+ ncopies * assumed_nunits,
scalar_load, stmt_info, 0, vect_body);
}
else