===================================================================
@@ -41,6 +41,8 @@ Software Foundation; either version 3, o
#include "langhooks.h"
#include "gimple-walk.h"
#include "dbgcnt.h"
+#include "gimple-fold.h"
+#include "internal-fn.h"
/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
@@ -206,24 +208,69 @@ vect_get_place_in_interleaving_chain (gi
return -1;
}
+/* Check whether it is possible to load COUNT elements of type ELT_MODE
+ using the method implemented by duplicate_and_interleave. Return true
+ if so, returning the number of intermediate vectors in *NVECTORS_OUT
+ (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
+ (if nonnull). */
+
+static bool
+can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
+ unsigned int *nvectors_out = NULL,
+ tree *vector_type_out = NULL)
+{
+ poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
+ poly_int64 nelts;
+ unsigned int nvectors = 1;
+ for (;;)
+ {
+ scalar_int_mode int_mode;
+ poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
+ if (multiple_p (current_vector_size, elt_bytes, &nelts)
+ && int_mode_for_size (elt_bits, 0).exists (&int_mode))
+ {
+ tree int_type = build_nonstandard_integer_type
+ (GET_MODE_BITSIZE (int_mode), 1);
+ tree vector_type = build_vector_type (int_type, nelts);
+ if (VECTOR_MODE_P (TYPE_MODE (vector_type))
+ && direct_internal_fn_supported_p (IFN_VEC_INTERLEAVE_LO,
+ vector_type,
+ OPTIMIZE_FOR_SPEED)
+ && direct_internal_fn_supported_p (IFN_VEC_INTERLEAVE_HI,
+ vector_type,
+ OPTIMIZE_FOR_SPEED))
+ {
+ if (nvectors_out)
+ *nvectors_out = nvectors;
+ if (vector_type_out)
+ *vector_type_out = vector_type;
+ return true;
+ }
+ }
+ if (!multiple_p (elt_bytes, 2, &elt_bytes))
+ return false;
+ nvectors *= 2;
+ }
+}
/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
they are of a valid type and that they match the defs of the first stmt of
the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
- by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
- is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
- and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
- and code of comparison needs to be inverted. If there is any operand swap
- in this function, *SWAP is set to non-zero value.
+ by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
+ indicates swap is required for cond_expr stmts. Specifically, *SWAP
+ is 1 if STMT is cond and operands of comparison need to be swapped;
+ *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
+ If there is any operand swap in this function, *SWAP is set to non-zero
+ value.
If there was a fatal error return -1; if the error could be corrected by
swapping operands of father node of this one, return 1; if everything is
ok return 0. */
-
static int
vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
- gimple *stmt, unsigned stmt_num,
+ vec<gimple *> stmts, unsigned stmt_num,
vec<slp_oprnd_info> *oprnds_info)
{
+ gimple *stmt = stmts[stmt_num];
tree oprnd;
unsigned int i, number_of_oprnds;
gimple *def_stmt;
@@ -371,15 +418,15 @@ vect_get_and_check_slp_defs (vec_info *v
types for reduction chains: the first stmt must be a
vect_reduction_def (a phi node), and the rest
vect_internal_def. */
- if (((oprnd_info->first_dt != dt
- && !(oprnd_info->first_dt == vect_reduction_def
- && dt == vect_internal_def)
- && !((oprnd_info->first_dt == vect_external_def
- || oprnd_info->first_dt == vect_constant_def)
- && (dt == vect_external_def
- || dt == vect_constant_def)))
- || !types_compatible_p (oprnd_info->first_op_type,
- TREE_TYPE (oprnd))))
+ tree type = TREE_TYPE (oprnd);
+ if ((oprnd_info->first_dt != dt
+ && !(oprnd_info->first_dt == vect_reduction_def
+ && dt == vect_internal_def)
+ && !((oprnd_info->first_dt == vect_external_def
+ || oprnd_info->first_dt == vect_constant_def)
+ && (dt == vect_external_def
+ || dt == vect_constant_def)))
+ || !types_compatible_p (oprnd_info->first_op_type, type))
{
/* Try swapping operands if we got a mismatch. */
if (i == 0
@@ -396,16 +443,12 @@ vect_get_and_check_slp_defs (vec_info *v
return 1;
}
- }
-
- /* Check the types of the definitions. */
- switch (dt)
- {
- case vect_constant_def:
- case vect_external_def:
- /* We must already have set a vector size by now. */
- gcc_checking_assert (may_ne (current_vector_size, 0U));
- if (!current_vector_size.is_constant ())
+ if ((dt == vect_constant_def
+ || dt == vect_external_def)
+ && !current_vector_size.is_constant ()
+ && (TREE_CODE (type) == BOOLEAN_TYPE
+ || !can_duplicate_and_interleave_p (stmts.length (),
+ TYPE_MODE (type))))
{
if (dump_enabled_p ())
{
@@ -417,6 +460,13 @@ vect_get_and_check_slp_defs (vec_info *v
}
return -1;
}
+ }
+
+ /* Check the types of the definitions. */
+ switch (dt)
+ {
+ case vect_constant_def:
+ case vect_external_def:
break;
case vect_reduction_def:
@@ -1115,7 +1165,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
FOR_EACH_VEC_ELT (stmts, i, stmt)
{
int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
- stmt, i, &oprnds_info);
+ stmts, i, &oprnds_info);
if (res != 0)
matches[(res == -1) ? 0 : i] = false;
if (!matches[0])
@@ -3205,6 +3255,118 @@ vect_mask_constant_operand_p (gimple *st
return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
}
+/* Build a variable-length vector in which the elements in ELTS are repeated
+ to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
+ RESULTS and add any new instructions to SEQ.
+
+ The approach we use is:
+
+ (1) Find a vector mode VM with integer elements of mode IM.
+
+ (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
+ ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
+ from small vectors to IM.
+
+ (3) Duplicate each ELTS'[I] into a vector of mode VM.
+
+ (4) Use a tree of VEC_INTERLEAVE_LO/HIs to create VMs with the
+ correct byte contents.
+
+ (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
+
+ We try to find the largest IM for which this sequence works, in order
+ to cut down on the number of interleaves. */
+
+static void
+duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
+ unsigned int nresults, vec<tree> &results)
+{
+ unsigned int nelts = elts.length ();
+ tree element_type = TREE_TYPE (vector_type);
+
+ /* (1) Find a vector mode VM with integer elements of mode IM. */
+ unsigned int nvectors = 1;
+ tree new_vector_type;
+ if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
+ &nvectors, &new_vector_type))
+ gcc_unreachable ();
+
+ /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
+ unsigned int partial_nelts = nelts / nvectors;
+ tree partial_vector_type = build_vector_type (element_type, partial_nelts);
+
+ auto_vec<tree, 32> partial_elts (partial_nelts);
+ partial_elts.quick_grow (partial_nelts);
+ auto_vec<tree, 32> pieces (nvectors * 2);
+ pieces.quick_grow (nvectors * 2);
+ for (unsigned int i = 0; i < nvectors; ++i)
+ {
+ /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
+ ELTS' has mode IM. */
+ for (unsigned int j = 0; j < partial_nelts; ++j)
+ partial_elts[j] = elts[i * partial_nelts + j];
+ tree t = gimple_build_vector (seq, partial_vector_type, partial_elts);
+ t = gimple_build (seq, VIEW_CONVERT_EXPR,
+ TREE_TYPE (new_vector_type), t);
+
+ /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
+ pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
+ }
+
+ /* (4) Use a tree of VEC_INTERLEAVE_LO/HIs to create a single VM with the
+ correct byte contents.
+
+ We need to repeat the following operation log2(nvectors) times:
+
+ out[i * 2] = VEC_INTERLEAVE_LO (in[i], in[i + hi_start]);
+ out[i * 2 + 1] = VEC_INTERLEAVE_HI (in[i], in[i + hi_start]);
+
+ However, if each input repeats every N elements and the VF is
+ a multiple of N * 2, the HI result is the same as the LO. */
+ unsigned int in_start = 0;
+ unsigned int out_start = nvectors;
+ unsigned int hi_start = nvectors / 2;
+ /* A bound on the number of outputs needed to produce NRESULTS results
+ in the final iteration. */
+ unsigned int noutputs_bound = nvectors * nresults;
+ for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
+ {
+ noutputs_bound /= 2;
+ unsigned int limit = MIN (noutputs_bound, nvectors);
+ for (unsigned int i = 0; i < limit; ++i)
+ {
+ if ((i & 1) != 0
+ && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
+ 2 * in_repeat))
+ {
+ pieces[out_start + i] = pieces[out_start + i - 1];
+ continue;
+ }
+
+ tree output = make_ssa_name (new_vector_type);
+ tree input1 = pieces[in_start + (i / 2)];
+ tree input2 = pieces[in_start + (i / 2) + hi_start];
+ internal_fn fn = ((i & 1) != 0
+ ? IFN_VEC_INTERLEAVE_HI
+ : IFN_VEC_INTERLEAVE_LO);
+ gcall *call = gimple_build_call_internal (fn, 2, input1, input2);
+ gimple_call_set_lhs (call, output);
+ gimple_seq_add_stmt (seq, call);
+ pieces[out_start + i] = output;
+ }
+ std::swap (in_start, out_start);
+ }
+
+ /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
+ results.reserve (nresults);
+ for (unsigned int i = 0; i < nresults; ++i)
+ if (i < nvectors)
+ results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
+ pieces[in_start + i]));
+ else
+ results.quick_push (results[i - nvectors]);
+}
+
/* For constant and loop invariant defs of SLP_NODE this function returns
(vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
@@ -3221,7 +3383,7 @@ vect_get_constant_vectors (tree op, slp_
vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
gimple *stmt = stmts[0];
stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
- unsigned nunits;
+ unsigned HOST_WIDE_INT nunits;
tree vec_cst;
unsigned j, number_of_places_left_in_vector;
tree vector_type;
@@ -3231,10 +3393,11 @@ vect_get_constant_vectors (tree op, slp_
unsigned number_of_copies = 1;
vec<tree> voprnds;
voprnds.create (number_of_vectors);
- bool constant_p, is_store;
+ bool is_store;
tree neutral_op = NULL;
enum tree_code code = gimple_expr_code (stmt);
gimple_seq ctor_seq = NULL;
+ auto_vec<tree, 16> permute_results;
/* Check if vector type is a boolean vector. */
if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
@@ -3243,8 +3406,6 @@ vect_get_constant_vectors (tree op, slp_
= build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
else
vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
- /* Enforced by vect_get_and_check_slp_defs. */
- nunits = TYPE_VECTOR_SUBPARTS (vector_type).to_constant ();
if (STMT_VINFO_DATA_REF (stmt_vinfo))
{
@@ -3272,10 +3433,14 @@ vect_get_constant_vectors (tree op, slp_
(s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
{s5, s6, s7, s8}. */
+ /* When using duplicate_and_interleave, we just need one element for
+ each scalar statement. */
+ if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
+ nunits = group_size;
+
number_of_copies = nunits * number_of_vectors / group_size;
number_of_places_left_in_vector = nunits;
- constant_p = true;
auto_vec<tree, 32> elts (nunits);
elts.quick_grow (nunits);
bool place_after_defs = false;
@@ -3382,8 +3547,6 @@ vect_get_constant_vectors (tree op, slp_
}
}
elts[number_of_places_left_in_vector] = op;
- if (!CONSTANT_CLASS_P (op))
- constant_p = false;
if (TREE_CODE (orig_op) == SSA_NAME
&& !SSA_NAME_IS_DEFAULT_DEF (orig_op)
&& STMT_VINFO_BB_VINFO (stmt_vinfo)
@@ -3393,16 +3556,16 @@ vect_get_constant_vectors (tree op, slp_
if (number_of_places_left_in_vector == 0)
{
- if (constant_p)
- vec_cst = build_vector (vector_type, elts);
+ if (must_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
+ /* Build the vector directly from ELTS. */
+ vec_cst = gimple_build_vector (&ctor_seq, vector_type, elts);
else
{
- vec<constructor_elt, va_gc> *v;
- unsigned k;
- vec_alloc (v, nunits);
- for (k = 0; k < nunits; ++k)
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
- vec_cst = build_constructor (vector_type, v);
+ if (vec_oprnds->is_empty ())
+ duplicate_and_interleave (&ctor_seq, vector_type, elts,
+ number_of_vectors,
+ permute_results);
+ vec_cst = permute_results[number_of_vectors - j - 1];
}
tree init;
gimple_stmt_iterator gsi;
@@ -3417,14 +3580,12 @@ vect_get_constant_vectors (tree op, slp_
if (ctor_seq != NULL)
{
gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
- gsi_insert_seq_before_without_update (&gsi, ctor_seq,
- GSI_SAME_STMT);
+ gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
ctor_seq = NULL;
}
voprnds.quick_push (init);
place_after_defs = false;
number_of_places_left_in_vector = nunits;
- constant_p = true;
}
}
}
===================================================================
@@ -52,5 +52,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
===================================================================
@@ -118,5 +118,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */
===================================================================
@@ -107,7 +107,7 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target {vect_uintfloat_cvt && vect_int_mult} } } } */
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target {{! { vect_uintfloat_cvt}} && { ! {vect_int_mult}}} } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_uintfloat_cvt && vect_int_mult } xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_uintfloat_cvt && vect_int_mult }} } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target {{! { vect_uintfloat_cvt}} && { ! {vect_int_mult}}} } } } */
===================================================================
@@ -46,6 +46,6 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided2 && vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided2 && vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided2 && vect_int_mult } xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided2 && vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided2 && vect_int_mult } } } } } */
===================================================================
@@ -48,5 +48,5 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int_mult xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int_mult } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_int_mult } } } } */
===================================================================
@@ -51,5 +51,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
===================================================================
@@ -53,5 +53,5 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided4 } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided4 } } } } */
===================================================================
@@ -110,5 +110,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */
===================================================================
@@ -201,6 +201,6 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target { vect_strided4 || vect_extract_even_odd } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided4 || vect_extract_even_odd } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided4 xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided4 } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided4 } } } } } */
===================================================================
@@ -129,5 +129,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 6 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 6 "vect" } } */
===================================================================
@@ -91,4 +91,4 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && ilp32 } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { { vect_no_align && ilp32 } || { vect_variable_length && vect_load_lanes } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_no_align && ilp32 } } } } */
===================================================================
@@ -77,4 +77,4 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && ilp32 } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { { vect_no_align && ilp32 } || { vect_variable_length && vect_load_lanes } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_no_align && ilp32 } } } } */
===================================================================
@@ -89,5 +89,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
===================================================================
@@ -21,4 +21,4 @@ void bar (double w)
}
}
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
===================================================================
@@ -116,6 +116,6 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target vect_int_mult} } } */
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target { ! { vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target vect_int_mult xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target vect_int_mult } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target { ! { vect_int_mult } } } } } */
===================================================================
@@ -122,6 +122,6 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target vect_short_mult } } }*/
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { ! { vect_short_mult } } } } }*/
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target vect_short_mult xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target vect_short_mult } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { ! { vect_short_mult } } } } } */
===================================================================
@@ -122,4 +122,4 @@ main ()
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
===================================================================
@@ -125,4 +125,4 @@ main ()
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
===================================================================
@@ -125,4 +125,4 @@ main ()
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
===================================================================
@@ -52,5 +52,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
===================================================================
@@ -40,5 +40,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */
===================================================================
@@ -40,5 +40,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */
===================================================================
@@ -46,5 +46,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */
===================================================================
@@ -62,5 +62,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
===================================================================
@@ -104,7 +104,7 @@ int main (int argc, const char* argv[])
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && { ! vect_load_lanes } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes } } } */
/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
===================================================================
@@ -46,7 +46,7 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_widen_mult_hi_to_si xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_widen_mult_hi_to_si } } } */
/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
===================================================================
@@ -68,5 +68,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */
/* { dg-final { scan-tree-dump-times "vec_stmt_relevant_p: stmt live but not relevant" 4 "vect" } } */
===================================================================
@@ -62,5 +62,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
/* { dg-final { scan-tree-dump-times "vec_stmt_relevant_p: stmt live but not relevant" 2 "vect" } } */
===================================================================
@@ -29,6 +29,6 @@ void blockmove_NtoN_blend_noremap32 (con
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { { vect_no_align && { ! vect_hw_misalign } } || vect_variable_length } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */
===================================================================
@@ -75,5 +75,5 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided8 && vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided8 && vect_int_mult } xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided8 && vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */
===================================================================
@@ -111,5 +111,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_int_mult } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult } } } */
===================================================================
@@ -112,6 +112,6 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {target vect_int_mult } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target { ! { vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target { ! { vect_int_mult } } } } } */
===================================================================
@@ -77,5 +77,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
===================================================================
@@ -52,5 +52,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */
===================================================================
@@ -52,5 +52,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */
===================================================================
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */
+
+#include <stdint.h>
+
+#define VEC_PERM(TYPE) \
+TYPE __attribute__ ((noinline, noclone)) \
+vec_slp_##TYPE (TYPE *restrict a, TYPE b, TYPE c, int n) \
+{ \
+ for (int i = 0; i < n; ++i) \
+ { \
+ a[i * 2] += b; \
+ a[i * 2 + 1] += c; \
+ } \
+}
+
+#define TEST_ALL(T) \
+ T (int8_t) \
+ T (uint8_t) \
+ T (int16_t) \
+ T (uint16_t) \
+ T (int32_t) \
+ T (uint32_t) \
+ T (int64_t) \
+ T (uint64_t) \
+ T (_Float16) \
+ T (float) \
+ T (double)
+
+TEST_ALL (VEC_PERM)
+
+/* We should use one DUP for each of the 8-, 16- and 32-bit types,
+ although we currently use LD1RW for _Float16. We should use two
+ DUPs for each of the three 64-bit types. */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, [hw]} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, [sw]} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, [dx]} 9 } } */
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-not {\tzip2\t} } } */
===================================================================
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#include "sve_slp_1.c"
+
+#define N (103 * 2)
+
+#define HARNESS(TYPE) \
+ { \
+ TYPE a[N], b[2] = { 3, 11 }; \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ vec_slp_##TYPE (a, b[0], b[1], N / 2); \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ TYPE orig = i * 2 + i % 5; \
+ TYPE expected = orig + b[i % 2]; \
+ if (a[i] != expected) \
+ __builtin_abort (); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST_ALL (HARNESS)
+}
===================================================================
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */
+
+#include <stdint.h>
+
+#define VEC_PERM(TYPE) \
+TYPE __attribute__ ((noinline, noclone)) \
+vec_slp_##TYPE (TYPE *restrict a, int n) \
+{ \
+ for (int i = 0; i < n; ++i) \
+ { \
+ a[i * 2] += 10; \
+ a[i * 2 + 1] += 17; \
+ } \
+}
+
+#define TEST_ALL(T) \
+ T (int8_t) \
+ T (uint8_t) \
+ T (int16_t) \
+ T (uint16_t) \
+ T (int32_t) \
+ T (uint32_t) \
+ T (int64_t) \
+ T (uint64_t) \
+ T (_Float16) \
+ T (float) \
+ T (double)
+
+TEST_ALL (VEC_PERM)
+
+/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, } 2 } } */
+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 3 } } */
+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 5 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #10\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #17\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-not {\tzip2\t} } } */
===================================================================
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#include "sve_slp_2.c"
+
+#define N (103 * 2)
+
+#define HARNESS(TYPE) \
+ { \
+ TYPE a[N], b[2] = { 10, 17 }; \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ vec_slp_##TYPE (a, N / 2); \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ TYPE orig = i * 2 + i % 5; \
+ TYPE expected = orig + b[i % 2]; \
+ if (a[i] != expected) \
+ __builtin_abort (); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST_ALL (HARNESS)
+}
===================================================================
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */
+
+#include <stdint.h>
+
+#define VEC_PERM(TYPE) \
+TYPE __attribute__ ((noinline, noclone)) \
+vec_slp_##TYPE (TYPE *restrict a, int n) \
+{ \
+ for (int i = 0; i < n; ++i) \
+ { \
+ a[i * 4] += 41; \
+ a[i * 4 + 1] += 25; \
+ a[i * 4 + 2] += 31; \
+ a[i * 4 + 3] += 62; \
+ } \
+}
+
+#define TEST_ALL(T) \
+ T (int8_t) \
+ T (uint8_t) \
+ T (int16_t) \
+ T (uint16_t) \
+ T (int32_t) \
+ T (uint32_t) \
+ T (int64_t) \
+ T (uint64_t) \
+ T (_Float16) \
+ T (float) \
+ T (double)
+
+TEST_ALL (VEC_PERM)
+
+/* 1 for each 8-bit type. */
+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 2 } } */
+/* 1 for each 16-bit type, 2 for each 32-bit type, and 4 for double. */
+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 13 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #41\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #25\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #31\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #62\n} 2 } } */
+/* The 32-bit types need 1 ZIP1 each. The 64-bit types need:
+
+ ZIP1 ZIP1 (2 ZIP2s optimized away)
+ ZIP1 ZIP2. */
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
===================================================================
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#include "sve_slp_3.c"
+
+#define N (77 * 4)
+
+#define HARNESS(TYPE) \
+ { \
+ TYPE a[N], b[4] = { 41, 25, 31, 62 }; \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ vec_slp_##TYPE (a, N / 4); \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ TYPE orig = i * 2 + i % 5; \
+ TYPE expected = orig + b[i % 4]; \
+ if (a[i] != expected) \
+ __builtin_abort (); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST_ALL (HARNESS)
+}
===================================================================
@@ -0,0 +1,60 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */
+
+#include <stdint.h>
+
+#define VEC_PERM(TYPE) \
+TYPE __attribute__ ((noinline, noclone)) \
+vec_slp_##TYPE (TYPE *restrict a, int n) \
+{ \
+ for (int i = 0; i < n; ++i) \
+ { \
+ a[i * 8] += 99; \
+ a[i * 8 + 1] += 11; \
+ a[i * 8 + 2] += 17; \
+ a[i * 8 + 3] += 80; \
+ a[i * 8 + 4] += 63; \
+ a[i * 8 + 5] += 37; \
+ a[i * 8 + 6] += 24; \
+ a[i * 8 + 7] += 81; \
+ } \
+}
+
+#define TEST_ALL(T) \
+ T (int8_t) \
+ T (uint8_t) \
+ T (int16_t) \
+ T (uint16_t) \
+ T (int32_t) \
+ T (uint32_t) \
+ T (int64_t) \
+ T (uint64_t) \
+ T (_Float16) \
+ T (float) \
+ T (double)
+
+TEST_ALL (VEC_PERM)
+
+/* 1 for each 8-bit type, 2 for each 16-bit type, 4 for each 32-bit type
+ and 8 for double. */
+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 28 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #99\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #11\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #17\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #80\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #63\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #37\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #24\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #81\n} 2 } } */
+/* The 16-bit types need 1 ZIP1 each. The 32-bit types need:
+
+ ZIP1 ZIP1 (2 ZIP2s optimized away)
+ ZIP1 ZIP2
+
+ and the 64-bit types need:
+
+ ZIP1 ZIP1 ZIP1 ZIP1 (4 ZIP2s optimized away)
+ ZIP1 ZIP2 ZIP1 ZIP2
+ ZIP1 ZIP2 ZIP1 ZIP2. */
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 15 } } */
===================================================================
@@ -0,0 +1,30 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#include "sve_slp_4.c"
+
+#define N (59 * 8)
+
+#define HARNESS(TYPE) \
+ { \
+ TYPE a[N], b[8] = { 99, 11, 17, 80, 63, 37, 24, 81 }; \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ a[i] = i * 2 + i % 5; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ vec_slp_##TYPE (a, N / 8); \
+ for (unsigned int i = 0; i < N; ++i) \
+ { \
+ TYPE orig = i * 2 + i % 5; \
+ TYPE expected = orig + b[i % 8]; \
+ if (a[i] != expected) \
+ __builtin_abort (); \
+ } \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST_ALL (HARNESS)
+}