Handle more SLP constant and extern definitions for variable VF

Message ID	87o9obwn2x.fsf@linaro.org
State	New
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of gcc-patches-return-466411-patch=linaro.org@gcc.gnu.org designates 209.132.180.131 as permitted sender) client-ip=209.132.180.131; DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:mime-version:content-type; q=dns; s= default; b=UJPc/jFrOXyRdXQhPp8mu93Q+JzC6sZciieE72c/0b/mBL8zJdv+d RZ9QiUA7Nh7rGiUBZfMOub2cdaTx2efN4toC1IAlzlYwAWrmbLks7Rwv3cOPUVD2 bfAiDtt8ecNLu0K9EAEBlDZ3s7l4d2LNUtQl4o1iFsvCjoc9INg/Tk= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org From: Richard Sandiford <richard.sandiford@linaro.org> To: gcc-patches@gcc.gnu.org Mail-Followup-To: gcc-patches@gcc.gnu.org, richard.sandiford@linaro.org Subject: Handle more SLP constant and extern definitions for variable VF Date: Thu, 09 Nov 2017 14:20:22 +0000 Message-ID: <87o9obwn2x.fsf@linaro.org> User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/25.2 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain
Series	Handle more SLP constant and extern definitions for variable VF \| expand Handle more SLP constant and extern definitions for variable VF

Index: gcc/tree-vect-slp.c =================================================================== --- gcc/tree-vect-slp.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/tree-vect-slp.c 2017-11-09 14:16:43.816866116 +0000 @@ -41,6 +41,8 @@ Software Foundation; either version 3, o #include "langhooks.h" #include "gimple-walk.h" #include "dbgcnt.h" +#include "gimple-fold.h" +#include "internal-fn.h" /* Recursively free the memory allocated for the SLP tree rooted at NODE. */ @@ -206,24 +208,69 @@ vect_get_place_in_interleaving_chain (gi return -1; } +/* Check whether it is possible to load COUNT elements of type ELT_MODE + using the method implemented by duplicate_and_interleave. Return true + if so, returning the number of intermediate vectors in *NVECTORS_OUT + (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT + (if nonnull). */ + +static bool +can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode, + unsigned int *nvectors_out = NULL, + tree *vector_type_out = NULL) +{ + poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode); + poly_int64 nelts; + unsigned int nvectors = 1; + for (;;) + { + scalar_int_mode int_mode; + poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT; + if (multiple_p (current_vector_size, elt_bytes, &nelts) + && int_mode_for_size (elt_bits, 0).exists (&int_mode)) + { + tree int_type = build_nonstandard_integer_type + (GET_MODE_BITSIZE (int_mode), 1); + tree vector_type = build_vector_type (int_type, nelts); + if (VECTOR_MODE_P (TYPE_MODE (vector_type)) + && direct_internal_fn_supported_p (IFN_VEC_INTERLEAVE_LO, + vector_type, + OPTIMIZE_FOR_SPEED) + && direct_internal_fn_supported_p (IFN_VEC_INTERLEAVE_HI, + vector_type, + OPTIMIZE_FOR_SPEED)) + { + if (nvectors_out) + *nvectors_out = nvectors; + if (vector_type_out) + *vector_type_out = vector_type; + return true; + } + } + if (!multiple_p (elt_bytes, 2, &elt_bytes)) + return false; + nvectors *= 2; + } +} /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that they are of a valid type and that they match the defs of the first stmt of the SLP group (stored in OPRNDS_INFO). This function tries to match stmts - by swapping operands of STMT when possible. Non-zero *SWAP indicates swap - is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond - and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond - and code of comparison needs to be inverted. If there is any operand swap - in this function, *SWAP is set to non-zero value. + by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP + indicates swap is required for cond_expr stmts. Specifically, *SWAP + is 1 if STMT is cond and operands of comparison need to be swapped; + *SWAP is 2 if STMT is cond and code of comparison needs to be inverted. + If there is any operand swap in this function, *SWAP is set to non-zero + value. If there was a fatal error return -1; if the error could be corrected by swapping operands of father node of this one, return 1; if everything is ok return 0. */ - static int vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap, - gimple *stmt, unsigned stmt_num, + vec<gimple *> stmts, unsigned stmt_num, vec<slp_oprnd_info> *oprnds_info) { + gimple *stmt = stmts[stmt_num]; tree oprnd; unsigned int i, number_of_oprnds; gimple *def_stmt; @@ -371,15 +418,15 @@ vect_get_and_check_slp_defs (vec_info *v types for reduction chains: the first stmt must be a vect_reduction_def (a phi node), and the rest vect_internal_def. */ - if (((oprnd_info->first_dt != dt - && !(oprnd_info->first_dt == vect_reduction_def - && dt == vect_internal_def) - && !((oprnd_info->first_dt == vect_external_def - || oprnd_info->first_dt == vect_constant_def) - && (dt == vect_external_def - || dt == vect_constant_def))) - || !types_compatible_p (oprnd_info->first_op_type, - TREE_TYPE (oprnd)))) + tree type = TREE_TYPE (oprnd); + if ((oprnd_info->first_dt != dt + && !(oprnd_info->first_dt == vect_reduction_def + && dt == vect_internal_def) + && !((oprnd_info->first_dt == vect_external_def + || oprnd_info->first_dt == vect_constant_def) + && (dt == vect_external_def + || dt == vect_constant_def))) + || !types_compatible_p (oprnd_info->first_op_type, type)) { /* Try swapping operands if we got a mismatch. */ if (i == 0 @@ -396,16 +443,12 @@ vect_get_and_check_slp_defs (vec_info *v return 1; } - } - - /* Check the types of the definitions. */ - switch (dt) - { - case vect_constant_def: - case vect_external_def: - /* We must already have set a vector size by now. */ - gcc_checking_assert (may_ne (current_vector_size, 0U)); - if (!current_vector_size.is_constant ()) + if ((dt == vect_constant_def + || dt == vect_external_def) + && !current_vector_size.is_constant () + && (TREE_CODE (type) == BOOLEAN_TYPE + || !can_duplicate_and_interleave_p (stmts.length (), + TYPE_MODE (type)))) { if (dump_enabled_p ()) { @@ -417,6 +460,13 @@ vect_get_and_check_slp_defs (vec_info *v } return -1; } + } + + /* Check the types of the definitions. */ + switch (dt) + { + case vect_constant_def: + case vect_external_def: break; case vect_reduction_def: @@ -1115,7 +1165,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, FOR_EACH_VEC_ELT (stmts, i, stmt) { int res = vect_get_and_check_slp_defs (vinfo, &swap[i], - stmt, i, &oprnds_info); + stmts, i, &oprnds_info); if (res != 0) matches[(res == -1) ? 0 : i] = false; if (!matches[0]) @@ -3205,6 +3255,118 @@ vect_mask_constant_operand_p (gimple *st return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo)); } +/* Build a variable-length vector in which the elements in ELTS are repeated + to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in + RESULTS and add any new instructions to SEQ. + + The approach we use is: + + (1) Find a vector mode VM with integer elements of mode IM. + + (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of + ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs + from small vectors to IM. + + (3) Duplicate each ELTS'[I] into a vector of mode VM. + + (4) Use a tree of VEC_INTERLEAVE_LO/HIs to create VMs with the + correct byte contents. + + (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type. + + We try to find the largest IM for which this sequence works, in order + to cut down on the number of interleaves. */ + +static void +duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts, + unsigned int nresults, vec<tree> &results) +{ + unsigned int nelts = elts.length (); + tree element_type = TREE_TYPE (vector_type); + + /* (1) Find a vector mode VM with integer elements of mode IM. */ + unsigned int nvectors = 1; + tree new_vector_type; + if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type), + &nvectors, &new_vector_type)) + gcc_unreachable (); + + /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */ + unsigned int partial_nelts = nelts / nvectors; + tree partial_vector_type = build_vector_type (element_type, partial_nelts); + + auto_vec<tree, 32> partial_elts (partial_nelts); + partial_elts.quick_grow (partial_nelts); + auto_vec<tree, 32> pieces (nvectors * 2); + pieces.quick_grow (nvectors * 2); + for (unsigned int i = 0; i < nvectors; ++i) + { + /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of + ELTS' has mode IM. */ + for (unsigned int j = 0; j < partial_nelts; ++j) + partial_elts[j] = elts[i * partial_nelts + j]; + tree t = gimple_build_vector (seq, partial_vector_type, partial_elts); + t = gimple_build (seq, VIEW_CONVERT_EXPR, + TREE_TYPE (new_vector_type), t); + + /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */ + pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t); + } + + /* (4) Use a tree of VEC_INTERLEAVE_LO/HIs to create a single VM with the + correct byte contents. + + We need to repeat the following operation log2(nvectors) times: + + out[i * 2] = VEC_INTERLEAVE_LO (in[i], in[i + hi_start]); + out[i * 2 + 1] = VEC_INTERLEAVE_HI (in[i], in[i + hi_start]); + + However, if each input repeats every N elements and the VF is + a multiple of N * 2, the HI result is the same as the LO. */ + unsigned int in_start = 0; + unsigned int out_start = nvectors; + unsigned int hi_start = nvectors / 2; + /* A bound on the number of outputs needed to produce NRESULTS results + in the final iteration. */ + unsigned int noutputs_bound = nvectors * nresults; + for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2) + { + noutputs_bound /= 2; + unsigned int limit = MIN (noutputs_bound, nvectors); + for (unsigned int i = 0; i < limit; ++i) + { + if ((i & 1) != 0 + && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type), + 2 * in_repeat)) + { + pieces[out_start + i] = pieces[out_start + i - 1]; + continue; + } + + tree output = make_ssa_name (new_vector_type); + tree input1 = pieces[in_start + (i / 2)]; + tree input2 = pieces[in_start + (i / 2) + hi_start]; + internal_fn fn = ((i & 1) != 0 + ? IFN_VEC_INTERLEAVE_HI + : IFN_VEC_INTERLEAVE_LO); + gcall *call = gimple_build_call_internal (fn, 2, input1, input2); + gimple_call_set_lhs (call, output); + gimple_seq_add_stmt (seq, call); + pieces[out_start + i] = output; + } + std::swap (in_start, out_start); + } + + /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */ + results.reserve (nresults); + for (unsigned int i = 0; i < nresults; ++i) + if (i < nvectors) + results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type, + pieces[in_start + i])); + else + results.quick_push (results[i - nvectors]); +} + /* For constant and loop invariant defs of SLP_NODE this function returns (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts. @@ -3221,7 +3383,7 @@ vect_get_constant_vectors (tree op, slp_ vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node); gimple *stmt = stmts[0]; stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); - unsigned nunits; + unsigned HOST_WIDE_INT nunits; tree vec_cst; unsigned j, number_of_places_left_in_vector; tree vector_type; @@ -3231,10 +3393,11 @@ vect_get_constant_vectors (tree op, slp_ unsigned number_of_copies = 1; vec<tree> voprnds; voprnds.create (number_of_vectors); - bool constant_p, is_store; + bool is_store; tree neutral_op = NULL; enum tree_code code = gimple_expr_code (stmt); gimple_seq ctor_seq = NULL; + auto_vec<tree, 16> permute_results; /* Check if vector type is a boolean vector. */ if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op)) @@ -3243,8 +3406,6 @@ vect_get_constant_vectors (tree op, slp_ = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo)); else vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); - /* Enforced by vect_get_and_check_slp_defs. */ - nunits = TYPE_VECTOR_SUBPARTS (vector_type).to_constant (); if (STMT_VINFO_DATA_REF (stmt_vinfo)) { @@ -3272,10 +3433,14 @@ vect_get_constant_vectors (tree op, slp_ (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and {s5, s6, s7, s8}. */ + /* When using duplicate_and_interleave, we just need one element for + each scalar statement. */ + if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits)) + nunits = group_size; + number_of_copies = nunits * number_of_vectors / group_size; number_of_places_left_in_vector = nunits; - constant_p = true; auto_vec<tree, 32> elts (nunits); elts.quick_grow (nunits); bool place_after_defs = false; @@ -3382,8 +3547,6 @@ vect_get_constant_vectors (tree op, slp_ } } elts[number_of_places_left_in_vector] = op; - if (!CONSTANT_CLASS_P (op)) - constant_p = false; if (TREE_CODE (orig_op) == SSA_NAME && !SSA_NAME_IS_DEFAULT_DEF (orig_op) && STMT_VINFO_BB_VINFO (stmt_vinfo) @@ -3393,16 +3556,16 @@ vect_get_constant_vectors (tree op, slp_ if (number_of_places_left_in_vector == 0) { - if (constant_p) - vec_cst = build_vector (vector_type, elts); + if (must_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits)) + /* Build the vector directly from ELTS. */ + vec_cst = gimple_build_vector (&ctor_seq, vector_type, elts); else { - vec<constructor_elt, va_gc> *v; - unsigned k; - vec_alloc (v, nunits); - for (k = 0; k < nunits; ++k) - CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]); - vec_cst = build_constructor (vector_type, v); + if (vec_oprnds->is_empty ()) + duplicate_and_interleave (&ctor_seq, vector_type, elts, + number_of_vectors, + permute_results); + vec_cst = permute_results[number_of_vectors - j - 1]; } tree init; gimple_stmt_iterator gsi; @@ -3417,14 +3580,12 @@ vect_get_constant_vectors (tree op, slp_ if (ctor_seq != NULL) { gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init)); - gsi_insert_seq_before_without_update (&gsi, ctor_seq, - GSI_SAME_STMT); + gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT); ctor_seq = NULL; } voprnds.quick_push (init); place_after_defs = false; number_of_places_left_in_vector = nunits; - constant_p = true; } } } Index: gcc/testsuite/gcc.dg/vect/no-scevccp-slp-30.c =================================================================== --- gcc/testsuite/gcc.dg/vect/no-scevccp-slp-30.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/no-scevccp-slp-30.c 2017-11-09 14:16:43.811866116 +0000 @@ -52,5 +52,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/slp-1.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-1.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-1.c 2017-11-09 14:16:43.811866116 +0000 @@ -118,5 +118,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/slp-10.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-10.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-10.c 2017-11-09 14:16:43.811866116 +0000 @@ -107,7 +107,7 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target {vect_uintfloat_cvt && vect_int_mult} } } } */ /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */ /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target {{! { vect_uintfloat_cvt}} && { ! {vect_int_mult}}} } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_uintfloat_cvt && vect_int_mult } xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target { vect_uintfloat_cvt && vect_int_mult }} } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target {{! { vect_uintfloat_cvt}} && vect_int_mult} } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target {{! { vect_uintfloat_cvt}} && { ! {vect_int_mult}}} } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-12b.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-12b.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-12b.c 2017-11-09 14:16:43.812866116 +0000 @@ -46,6 +46,6 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided2 && vect_int_mult } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided2 && vect_int_mult } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided2 && vect_int_mult } xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided2 && vect_int_mult } } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided2 && vect_int_mult } } } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-12c.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-12c.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-12c.c 2017-11-09 14:16:43.812866116 +0000 @@ -48,5 +48,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_int_mult } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_int_mult } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int_mult xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int_mult } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_int_mult } } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-17.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-17.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-17.c 2017-11-09 14:16:43.812866116 +0000 @@ -51,5 +51,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/slp-19b.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-19b.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-19b.c 2017-11-09 14:16:43.812866116 +0000 @@ -53,5 +53,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided4 } } } */ /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided4 } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided4 } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided4 } } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-20.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-20.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-20.c 2017-11-09 14:16:43.812866116 +0000 @@ -110,5 +110,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/slp-21.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-21.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-21.c 2017-11-09 14:16:43.812866116 +0000 @@ -201,6 +201,6 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target { vect_strided4 || vect_extract_even_odd } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! { vect_strided4 || vect_extract_even_odd } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided4 xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_strided4 } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided4 } } } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-22.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-22.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-22.c 2017-11-09 14:16:43.813866116 +0000 @@ -129,5 +129,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 6 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 6 "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/slp-24-big-array.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-24-big-array.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-24-big-array.c 2017-11-09 14:16:43.813866116 +0000 @@ -91,4 +91,4 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && ilp32 } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { { vect_no_align && ilp32 } || { vect_variable_length && vect_load_lanes } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_no_align && ilp32 } } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-24.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-24.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-24.c 2017-11-09 14:16:43.813866116 +0000 @@ -77,4 +77,4 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && ilp32 } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { { vect_no_align && ilp32 } || { vect_variable_length && vect_load_lanes } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_no_align && ilp32 } } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-28.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-28.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-28.c 2017-11-09 14:16:43.813866116 +0000 @@ -89,5 +89,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/slp-39.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-39.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-39.c 2017-11-09 14:16:43.813866116 +0000 @@ -21,4 +21,4 @@ void bar (double w) } } -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/slp-6.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-6.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-6.c 2017-11-09 14:16:43.813866116 +0000 @@ -116,6 +116,6 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" {target vect_int_mult} } } */ /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" {target { ! { vect_int_mult } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target vect_int_mult xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {target vect_int_mult } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target { ! { vect_int_mult } } } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-7.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-7.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-7.c 2017-11-09 14:16:43.813866116 +0000 @@ -122,6 +122,6 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target vect_short_mult } } }*/ /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { ! { vect_short_mult } } } } }*/ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target vect_short_mult xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { target vect_short_mult } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { ! { vect_short_mult } } } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-cond-1.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-cond-1.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-cond-1.c 2017-11-09 14:16:43.813866116 +0000 @@ -122,4 +122,4 @@ main () return 0; } -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-cond-2-big-array.c 2017-11-09 14:16:43.813866116 +0000 @@ -125,4 +125,4 @@ main () return 0; } -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/slp-cond-2.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-cond-2.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-cond-2.c 2017-11-09 14:16:43.814866116 +0000 @@ -125,4 +125,4 @@ main () return 0; } -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c 2017-11-09 14:16:43.814866116 +0000 @@ -52,5 +52,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c 2017-11-09 14:16:43.814866116 +0000 @@ -40,5 +40,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c 2017-11-09 14:16:43.814866116 +0000 @@ -40,5 +40,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c 2017-11-09 14:16:43.814866116 +0000 @@ -46,5 +46,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c 2017-11-09 14:16:43.814866116 +0000 @@ -62,5 +62,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/slp-perm-6.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-perm-6.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-perm-6.c 2017-11-09 14:16:43.814866116 +0000 @@ -104,7 +104,7 @@ int main (int argc, const char* argv[]) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && { ! vect_load_lanes } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes } } } */ /* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */ /* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ /* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c 2017-11-09 14:16:43.814866116 +0000 @@ -46,7 +46,7 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_widen_mult_hi_to_si xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_widen_mult_hi_to_si } } } */ /* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ /* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ Index: gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c =================================================================== --- gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c 2017-11-09 14:16:43.814866116 +0000 @@ -68,5 +68,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" } } */ /* { dg-final { scan-tree-dump-times "vec_stmt_relevant_p: stmt live but not relevant" 4 "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c =================================================================== --- gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c 2017-11-09 14:16:43.814866116 +0000 @@ -62,5 +62,5 @@ main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail { vect_variable_length && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ /* { dg-final { scan-tree-dump-times "vec_stmt_relevant_p: stmt live but not relevant" 2 "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/pr33953.c =================================================================== --- gcc/testsuite/gcc.dg/vect/pr33953.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/pr33953.c 2017-11-09 14:16:43.811866116 +0000 @@ -29,6 +29,6 @@ void blockmove_NtoN_blend_noremap32 (con } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { { vect_no_align && { ! vect_hw_misalign } } || vect_variable_length } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-12a.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-12a.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-12a.c 2017-11-09 14:16:43.812866116 +0000 @@ -75,5 +75,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided8 && vect_int_mult } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided8 && vect_int_mult } xfail vect_variable_length } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided8 && vect_int_mult } } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided8 && vect_int_mult } } } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-14.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-14.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-14.c 2017-11-09 14:16:43.812866116 +0000 @@ -111,5 +111,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_int_mult } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult xfail vect_variable_length } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-15.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-15.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-15.c 2017-11-09 14:16:43.812866116 +0000 @@ -112,6 +112,6 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {target vect_int_mult } } } */ /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target { ! { vect_int_mult } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult xfail vect_variable_length } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target { ! { vect_int_mult } } } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c 2017-11-09 14:16:43.814866116 +0000 @@ -77,5 +77,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail vect_variable_length } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c 2017-11-09 14:16:43.814866116 +0000 @@ -52,5 +52,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack xfail vect_variable_length } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */ Index: gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c =================================================================== --- gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c 2017-11-09 14:16:43.320866086 +0000 +++ gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c 2017-11-09 14:16:43.814866116 +0000 @@ -52,5 +52,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc xfail vect_variable_length } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */ Index: gcc/testsuite/gcc.target/aarch64/sve_slp_1.c =================================================================== --- /dev/null 2017-11-09 12:47:20.377612760 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve_slp_1.c 2017-11-09 14:16:43.815866116 +0000 @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +TYPE __attribute__ ((noinline, noclone)) \ +vec_slp_##TYPE (TYPE *restrict a, TYPE b, TYPE c, int n) \ +{ \ + for (int i = 0; i < n; ++i) \ + { \ + a[i * 2] += b; \ + a[i * 2 + 1] += c; \ + } \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (_Float16) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* We should use one DUP for each of the 8-, 16- and 32-bit types, + although we currently use LD1RW for _Float16. We should use two + DUPs for each of the three 64-bit types. */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, [hw]} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, [sw]} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 1 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, [dx]} 9 } } */ +/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ +/* { dg-final { scan-assembler-not {\tzip2\t} } } */ Index: gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c =================================================================== --- /dev/null 2017-11-09 12:47:20.377612760 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve_slp_1_run.c 2017-11-09 14:16:43.815866116 +0000 @@ -0,0 +1,30 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_slp_1.c" + +#define N (103 * 2) + +#define HARNESS(TYPE) \ + { \ + TYPE a[N], b[2] = { 3, 11 }; \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + a[i] = i * 2 + i % 5; \ + asm volatile ("" ::: "memory"); \ + } \ + vec_slp_##TYPE (a, b[0], b[1], N / 2); \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + TYPE orig = i * 2 + i % 5; \ + TYPE expected = orig + b[i % 2]; \ + if (a[i] != expected) \ + __builtin_abort (); \ + } \ + } + +int __attribute__ ((optimize (1))) +main (void) +{ + TEST_ALL (HARNESS) +} Index: gcc/testsuite/gcc.target/aarch64/sve_slp_2.c =================================================================== --- /dev/null 2017-11-09 12:47:20.377612760 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve_slp_2.c 2017-11-09 14:16:43.815866116 +0000 @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +TYPE __attribute__ ((noinline, noclone)) \ +vec_slp_##TYPE (TYPE *restrict a, int n) \ +{ \ + for (int i = 0; i < n; ++i) \ + { \ + a[i * 2] += 10; \ + a[i * 2 + 1] += 17; \ + } \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (_Float16) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, } 2 } } */ +/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 3 } } */ +/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 5 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #10\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #17\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ +/* { dg-final { scan-assembler-not {\tzip2\t} } } */ Index: gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c =================================================================== --- /dev/null 2017-11-09 12:47:20.377612760 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve_slp_2_run.c 2017-11-09 14:16:43.815866116 +0000 @@ -0,0 +1,30 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_slp_2.c" + +#define N (103 * 2) + +#define HARNESS(TYPE) \ + { \ + TYPE a[N], b[2] = { 10, 17 }; \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + a[i] = i * 2 + i % 5; \ + asm volatile ("" ::: "memory"); \ + } \ + vec_slp_##TYPE (a, N / 2); \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + TYPE orig = i * 2 + i % 5; \ + TYPE expected = orig + b[i % 2]; \ + if (a[i] != expected) \ + __builtin_abort (); \ + } \ + } + +int __attribute__ ((optimize (1))) +main (void) +{ + TEST_ALL (HARNESS) +} Index: gcc/testsuite/gcc.target/aarch64/sve_slp_3.c =================================================================== --- /dev/null 2017-11-09 12:47:20.377612760 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve_slp_3.c 2017-11-09 14:16:43.815866116 +0000 @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +TYPE __attribute__ ((noinline, noclone)) \ +vec_slp_##TYPE (TYPE *restrict a, int n) \ +{ \ + for (int i = 0; i < n; ++i) \ + { \ + a[i * 4] += 41; \ + a[i * 4 + 1] += 25; \ + a[i * 4 + 2] += 31; \ + a[i * 4 + 3] += 62; \ + } \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (_Float16) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* 1 for each 8-bit type. */ +/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 2 } } */ +/* 1 for each 16-bit type, 2 for each 32-bit type, and 4 for double. */ +/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 13 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #41\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #25\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #31\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #62\n} 2 } } */ +/* The 32-bit types need 1 ZIP1 each. The 64-bit types need: + + ZIP1 ZIP1 (2 ZIP2s optimized away) + ZIP1 ZIP2. */ +/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ Index: gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c =================================================================== --- /dev/null 2017-11-09 12:47:20.377612760 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve_slp_3_run.c 2017-11-09 14:16:43.815866116 +0000 @@ -0,0 +1,30 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_slp_3.c" + +#define N (77 * 4) + +#define HARNESS(TYPE) \ + { \ + TYPE a[N], b[4] = { 41, 25, 31, 62 }; \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + a[i] = i * 2 + i % 5; \ + asm volatile ("" ::: "memory"); \ + } \ + vec_slp_##TYPE (a, N / 4); \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + TYPE orig = i * 2 + i % 5; \ + TYPE expected = orig + b[i % 4]; \ + if (a[i] != expected) \ + __builtin_abort (); \ + } \ + } + +int __attribute__ ((optimize (1))) +main (void) +{ + TEST_ALL (HARNESS) +} Index: gcc/testsuite/gcc.target/aarch64/sve_slp_4.c =================================================================== --- /dev/null 2017-11-09 12:47:20.377612760 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve_slp_4.c 2017-11-09 14:16:43.815866116 +0000 @@ -0,0 +1,60 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */ + +#include <stdint.h> + +#define VEC_PERM(TYPE) \ +TYPE __attribute__ ((noinline, noclone)) \ +vec_slp_##TYPE (TYPE *restrict a, int n) \ +{ \ + for (int i = 0; i < n; ++i) \ + { \ + a[i * 8] += 99; \ + a[i * 8 + 1] += 11; \ + a[i * 8 + 2] += 17; \ + a[i * 8 + 3] += 80; \ + a[i * 8 + 4] += 63; \ + a[i * 8 + 5] += 37; \ + a[i * 8 + 6] += 24; \ + a[i * 8 + 7] += 81; \ + } \ +} + +#define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ + T (int64_t) \ + T (uint64_t) \ + T (_Float16) \ + T (float) \ + T (double) + +TEST_ALL (VEC_PERM) + +/* 1 for each 8-bit type, 2 for each 16-bit type, 4 for each 32-bit type + and 8 for double. */ +/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 28 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #99\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #11\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #17\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #80\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #63\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #37\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #24\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #81\n} 2 } } */ +/* The 16-bit types need 1 ZIP1 each. The 32-bit types need: + + ZIP1 ZIP1 (2 ZIP2s optimized away) + ZIP1 ZIP2 + + and the 64-bit types need: + + ZIP1 ZIP1 ZIP1 ZIP1 (4 ZIP2s optimized away) + ZIP1 ZIP2 ZIP1 ZIP2 + ZIP1 ZIP2 ZIP1 ZIP2. */ +/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 36 } } */ +/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 15 } } */ Index: gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c =================================================================== --- /dev/null 2017-11-09 12:47:20.377612760 +0000 +++ gcc/testsuite/gcc.target/aarch64/sve_slp_4_run.c 2017-11-09 14:16:43.815866116 +0000 @@ -0,0 +1,30 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */ + +#include "sve_slp_4.c" + +#define N (59 * 8) + +#define HARNESS(TYPE) \ + { \ + TYPE a[N], b[8] = { 99, 11, 17, 80, 63, 37, 24, 81 }; \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + a[i] = i * 2 + i % 5; \ + asm volatile ("" ::: "memory"); \ + } \ + vec_slp_##TYPE (a, N / 8); \ + for (unsigned int i = 0; i < N; ++i) \ + { \ + TYPE orig = i * 2 + i % 5; \ + TYPE expected = orig + b[i % 8]; \ + if (a[i] != expected) \ + __builtin_abort (); \ + } \ + } + +int __attribute__ ((optimize (1))) +main (void) +{ + TEST_ALL (HARNESS) +}

Handle more SLP constant and extern definitions for variable VF

Commit Message

Comments

Patch