===================================================================
@@ -75,3 +75,5 @@ check_vect (void)
#endif
signal (SIGILL, SIG_DFL);
}
+
+#define VECTOR_BITS 128
===================================================================
@@ -2,10 +2,18 @@
/* { dg-additional-options "-O3" } */
/* { dg-require-effective-target vect_unpack } */
+#include "tree-vect.h"
+
+#if VECTOR_BITS > 512
+#define N (VECTOR_BITS * 10 / 16)
+#else
+#define N 320
+#endif
+
void foo(unsigned *p1, unsigned short *p2)
{
int n;
- for (n = 0; n < 320; n++)
+ for (n = 0; n < N; n++)
p1[n] = p2[n * 2];
}
===================================================================
@@ -4,7 +4,11 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 256
+#define N (VECTOR_BITS / 16)
+#else
#define N 16
+#endif
unsigned short in[N];
unsigned short coef[N];
===================================================================
@@ -4,7 +4,11 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 256
+#define N (VECTOR_BITS / 16 + 10)
+#else
#define N 26
+#endif
__attribute__ ((noinline))
unsigned int main1 ()
===================================================================
@@ -4,7 +4,12 @@
#include <stdarg.h>
#include "tree-vect.h"
+/* N / 2 bytes has to be worth vectorizing even with peeling. */
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 4 / 8)
+#else
#define N 64
+#endif
struct t{
int k[N];
===================================================================
@@ -4,7 +4,11 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 2 / 8)
+#else
#define N 32
+#endif
struct {
char ca[N];
===================================================================
@@ -3,7 +3,11 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 512
+#define N (VECTOR_BITS * 6 / 16)
+#else
#define N 200
+#endif
void __attribute__((noinline))
foo (unsigned short *__restrict__ pInput, unsigned short *__restrict__ pOutput)
===================================================================
@@ -3,8 +3,12 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS / 8)
+#else
#define N 16
-
+#endif
+
__attribute__ ((noinline))
int main1 ()
{
===================================================================
@@ -3,8 +3,13 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 2 / 32)
+#define OFF (VECTOR_BITS / 32)
+#else
#define N 8
#define OFF 8
+#endif
/* Check handling of accesses for which the "initial condition" -
the expression that represents the first location accessed - is
===================================================================
@@ -3,8 +3,13 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 2 / 32)
+#define OFF (VECTOR_BITS / 32)
+#else
#define N 8
#define OFF 8
+#endif
/* Check handling of accesses for which the "initial condition" -
the expression that represents the first location accessed - is
===================================================================
@@ -3,8 +3,13 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 2 / 32)
+#define OFF (VECTOR_BITS / 32)
+#else
#define N 8
#define OFF 8
+#endif
/* Check handling of accesses for which the "initial condition" -
the expression that represents the first location accessed - is
===================================================================
@@ -3,7 +3,11 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 256
+#define N (VECTOR_BITS * 2 / 32)
+#else
#define N 16
+#endif
struct tmp_struct
{
===================================================================
@@ -4,7 +4,11 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 256
+#define N (VECTOR_BITS * 2 / 32)
+#else
#define N 16
+#endif
struct tmp
{
===================================================================
@@ -4,7 +4,11 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 2 / 8)
+#else
#define N 32
+#endif
int ib[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) =
{0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
===================================================================
@@ -4,7 +4,11 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 2 / 8)
+#else
#define N 32
+#endif
unsigned int ic[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) =
{0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
===================================================================
@@ -4,7 +4,11 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 8 / 16)
+#else
#define N 64
+#endif
/* Modified rgb to rgb conversion from FFmpeg. */
__attribute__ ((noinline)) void
===================================================================
@@ -4,7 +4,11 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 8 / 16)
+#else
#define N 64
+#endif
/* Modified rgb to rgb conversion from FFmpeg. */
__attribute__ ((noinline)) int
===================================================================
@@ -3,7 +3,12 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 2 / 16)
+#else
#define N 16
+#endif
+
unsigned short udata_sh[N] =
{ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28 };
#define SUM 210
===================================================================
@@ -3,7 +3,12 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 2 / 16)
+#else
#define N 16
+#endif
+
unsigned char udata_ch[N] =
{ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28 };
#define SUM 210
===================================================================
@@ -3,7 +3,12 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 2 / 16)
+#else
#define N 16
+#endif
+
signed short data_sh[N] =
{ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28 };
#define SUM 210
===================================================================
@@ -4,12 +4,18 @@
#include <stdarg.h>
#include "tree-vect.h"
-#define N 16
+#if VECTOR_BITS > 128
+#define NINTS (VECTOR_BITS / 32)
+#else
+#define NINTS 4
+#endif
+
+#define N (NINTS * 4)
int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
-int ia[N][4][N+1];
-int ic[N][N][3][2*N+2];
-int id[N][N][N+4];
+int ia[N][NINTS][N + 1];
+int ic[N][N][NINTS - 1][2 * N + 2];
+int id[N][N][N + NINTS];
__attribute__ ((noinline))
int main1 ()
===================================================================
@@ -5,7 +5,13 @@
#include <stdarg.h>
#include "tree-vect.h"
-#define N 24
+#if VECTOR_BITS > 128
+#define NINTS (VECTOR_BITS / 32)
+#else
+#define NINTS 4
+#endif
+
+#define N (NINTS * 6)
struct s{
int m;
@@ -19,8 +25,7 @@ struct s2{
struct test1{
struct s a; /* array a.n is unaligned */
- int b;
- int c;
+ int pad[NINTS - 2];
struct s e; /* array e.n is aligned */
};
@@ -54,13 +59,13 @@ int main1 ()
}
/* 2. aligned */
- for (i = 3; i < N-1; i++)
+ for (i = NINTS - 1; i < N - 1; i++)
{
tmp1[2].a.n[1][2][i] = 6;
}
/* check results: */
- for (i = 3; i < N-1; i++)
+ for (i = NINTS; i < N - 1; i++)
{
if (tmp1[2].a.n[1][2][i] != 6)
abort ();
@@ -86,18 +91,18 @@ int main1 ()
}
/* 4. unaligned (unknown misalignment) */
- for (i = 0; i < N-4; i++)
+ for (i = 0; i < N - NINTS; i++)
{
- for (j = 0; j < N-4; j++)
+ for (j = 0; j < N - NINTS; j++)
{
tmp2[2].e.n[1][i][j] = 8;
}
}
/* check results: */
- for (i = 0; i < N-4; i++)
+ for (i = 0; i < N - NINTS; i++)
{
- for (j = 0; j < N-4; j++)
+ for (j = 0; j < N - NINTS; j++)
{
if (tmp2[2].e.n[1][i][j] != 8)
abort ();
===================================================================
@@ -4,7 +4,13 @@
#include <stdarg.h>
#include "tree-vect.h"
-#define N 32
+#if VECTOR_BITS > 128
+#define NINTS (VECTOR_BITS / 32)
+#else
+#define NINTS 4
+#endif
+
+#define N (NINTS * 8)
struct s{
int m;
@@ -18,16 +24,15 @@ struct s2{
struct test1{
struct s a; /* array a.n is unaligned */
- int b;
- int c;
+ int pad[NINTS - 2];
struct s e; /* array e.n is aligned */
};
struct test2{
- struct s2 a; /* array a.n is unaligned */
+ struct s2 a;
int b;
int c;
- struct s2 e; /* array e.n is aligned */
+ struct s2 e;
};
@@ -52,13 +57,13 @@ int main1 ()
}
/* 2. aligned */
- for (i = 3; i < N-1; i++)
+ for (i = NINTS - 1; i < N - 1; i++)
{
tmp1[2].a.n[1][2][i] = 6;
}
/* check results: */
- for (i = 3; i < N-1; i++)
+ for (i = NINTS - 1; i < N - 1; i++)
{
if (tmp1[2].a.n[1][2][i] != 6)
abort ();
@@ -84,18 +89,18 @@ int main1 ()
}
/* 4. unaligned */
- for (i = 0; i < N-4; i++)
+ for (i = 0; i < N - NINTS; i++)
{
- for (j = 0; j < N-4; j++)
+ for (j = 0; j < N - NINTS; j++)
{
tmp2[2].e.n[1][i][j] = 8;
}
}
/* check results: */
- for (i = 0; i < N-4; i++)
+ for (i = 0; i < N - NINTS; i++)
{
- for (j = 0; j < N-4; j++)
+ for (j = 0; j < N - NINTS; j++)
{
if (tmp2[2].e.n[1][i][j] != 8)
abort ();
===================================================================
@@ -35,7 +35,11 @@ #define M24 115
#define M34 7716
#define M44 16
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 5 / 32)
+#else
#define N 20
+#endif
void foo (unsigned int *__restrict__ pInput,
unsigned int *__restrict__ pOutput,
@@ -77,12 +81,6 @@ void foo (unsigned int *__restrict__ pIn
int main (int argc, const char* argv[])
{
unsigned int input[N], output[N], i, input2[N], output2[N];
- unsigned int check_results[N]
- = {3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399,
- 22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, 31619 };
- unsigned int check_results2[N]
- = {7136, 2702, 84604, 57909, 6633, 16956, 6122, 224204, 113484, 16243,
- 26776, 9542, 363804, 169059, 25853, 36596, 12962, 503404, 224634, 35463 };
check_vect ();
@@ -95,6 +93,57 @@ int main (int argc, const char* argv[])
__asm__ volatile ("");
}
+#if N == 20
+ unsigned int check_results[N]
+ = { 3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399,
+ 22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404,
+ 31619 };
+ unsigned int check_results2[N]
+ = { 7136, 2702, 84604, 57909, 6633, 16956, 6122, 224204, 113484, 16243,
+ 26776, 9542, 363804, 169059, 25853, 36596, 12962, 503404, 224634,
+ 35463 };
+#else
+ volatile unsigned int check_results[N];
+ volatile unsigned int check_results2[N];
+
+ for (i = 0; i < N / 5; i++)
+ {
+ unsigned int a = input[i * 5];
+ unsigned int b = input[i * 5 + 1];
+ unsigned int c = input[i * 5 + 2];
+ unsigned int d = input[i * 5 + 3];
+ unsigned int e = input[i * 5 + 4];
+
+ check_results[i * 5] = M00 * a + M01 * b + M02 * c + M03 * d + M04 * e;
+ check_results[i * 5 + 1] = (M10 * a + M11 * b + M12 * c
+ + M13 * d + M14 * e);
+ check_results[i * 5 + 2] = (M20 * a + M21 * b + M22 * c
+ + M23 * d + M24 * e);
+ check_results[i * 5 + 3] = (M30 * a + M31 * b + M32 * c
+ + M33 * d + M34 * e);
+ check_results[i * 5 + 4] = (M40 * a + M41 * b + M42 * c
+ + M43 * d + M44 * e);
+
+ a = input2[i * 5];
+ b = input2[i * 5 + 1];
+ c = input2[i * 5 + 2];
+ d = input2[i * 5 + 3];
+ e = input2[i * 5 + 4];
+
+ check_results2[i * 5] = M00 * a + M01 * b + M02 * c + M03 * d + M04 * e;
+ check_results2[i * 5 + 1] = (M10 * a + M11 * b + M12 * c
+ + M13 * d + M14 * e);
+ check_results2[i * 5 + 2] = (M20 * a + M21 * b + M22 * c
+ + M23 * d + M24 * e);
+ check_results2[i * 5 + 3] = (M30 * a + M31 * b + M32 * c
+ + M33 * d + M34 * e);
+ check_results2[i * 5 + 4] = (M40 * a + M41 * b + M42 * c
+ + M43 * d + M44 * e);
+
+ asm volatile ("" ::: "memory");
+ }
+#endif
+
foo (input, output, input2, output2);
for (i = 0; i < N; i++)
===================================================================
@@ -13,7 +13,11 @@ #define M02 74
#define M12 191
#define M22 500
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 3 / 32 + 4)
+#else
#define N 16
+#endif
void foo (unsigned int *__restrict__ pInput, unsigned int *__restrict__ pOutput)
{
@@ -34,7 +38,6 @@ void foo (unsigned int *__restrict__ pIn
int main (int argc, const char* argv[])
{
unsigned int input[N], output[N], i;
- unsigned int check_results[N] = {1470, 395, 28271, 5958, 1655, 111653, 10446, 2915, 195035, 14934, 4175, 278417, 19422, 5435, 361799, 0};
check_vect ();
@@ -45,6 +48,25 @@ int main (int argc, const char* argv[])
__asm__ volatile ("");
}
+#if N == 16
+ unsigned int check_results[N] = {1470, 395, 28271, 5958, 1655, 111653, 10446, 2915, 195035, 14934, 4175, 278417, 19422, 5435, 361799, 0};
+#else
+ volatile unsigned int check_results[N] = {};
+
+ for (unsigned int i = 0; i < N / 3; i++)
+ {
+ unsigned int a = input[i * 3];
+ unsigned int b = input[i * 3 + 1];
+ unsigned int c = input[i * 3 + 2];
+
+ check_results[i * 3] = M00 * a + M01 * b + M02 * c;
+ check_results[i * 3 + 1] = M10 * a + M11 * b + M12 * c;
+ check_results[i * 3 + 2] = M20 * a + M21 * b + M22 * c;
+
+ asm volatile ("" ::: "memory");
+ }
+#endif
+
foo (input, output);
for (i = 0; i < N; i++)
===================================================================
@@ -34,7 +34,11 @@ #define M24 115
#define M34 7716
#define M44 16
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 5 / 32)
+#else
#define N 20
+#endif
void foo (unsigned int *__restrict__ pInput, unsigned int *__restrict__ pOutput)
{
@@ -59,9 +63,6 @@ void foo (unsigned int *__restrict__ pIn
int main (int argc, const char* argv[])
{
unsigned int input[N], output[N], i;
- unsigned int check_results[N]
- = {3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399,
- 22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, 31619};
check_vect ();
@@ -72,6 +73,34 @@ int main (int argc, const char* argv[])
asm volatile ("" ::: "memory");
}
+#if N == 20
+ unsigned int check_results[N]
+ = {3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399,
+ 22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, 31619};
+#else
+ volatile unsigned int check_results[N];
+
+ for (i = 0; i < N / 5; i++)
+ {
+ unsigned int a = input[i * 5];
+ unsigned int b = input[i * 5 + 1];
+ unsigned int c = input[i * 5 + 2];
+ unsigned int d = input[i * 5 + 3];
+ unsigned int e = input[i * 5 + 4];
+
+ check_results[i * 5] = M00 * a + M01 * b + M02 * c + M03 * d + M04 * e;
+ check_results[i * 5 + 1] = (M10 * a + M11 * b + M12 * c
+ + M13 * d + M14 * e);
+ check_results[i * 5 + 2] = (M20 * a + M21 * b + M22 * c
+ + M23 * d + M24 * e);
+ check_results[i * 5 + 3] = (M30 * a + M31 * b + M32 * c
+ + M33 * d + M34 * e);
+ check_results[i * 5 + 4] = (M40 * a + M41 * b + M42 * c
+ + M43 * d + M44 * e);
+ asm volatile ("");
+ }
+#endif
+
foo (input, output);
for (i = 0; i < N; i++)
===================================================================
@@ -18,7 +18,11 @@ #define K10 112
#define K01 4322
#define K11 135
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 3 / 32 + 4)
+#else
#define N 16
+#endif
void foo (int *__restrict__ pInput, int *__restrict__ pOutput,
int *__restrict__ pInput2, int *__restrict__ pOutput2)
@@ -46,9 +50,7 @@ void foo (int *__restrict__ pInput, int
int main (int argc, const char* argv[])
{
int input[N], output[N], i;
- int check_results[N] = {1470, 395, 28271, 5958, 1655, 111653, 10446, 2915, 195035, 14934, 4175, 278417, 19422, 5435, 361799, 0};
int input2[N], output2[N];
- int check_results2[N] = {4322, 135, 13776, 629, 23230, 1123, 32684, 1617, 42138, 2111, 0, 0, 0, 0, 0, 0};
check_vect ();
@@ -63,6 +65,35 @@ int main (int argc, const char* argv[])
__asm__ volatile ("");
}
+#if N == 16
+ int check_results[N] = { 1470, 395, 28271, 5958, 1655, 111653, 10446, 2915,
+ 195035, 14934, 4175, 278417, 19422, 5435, 361799,
+ 0 };
+ int check_results2[N] = { 4322, 135, 13776, 629, 23230, 1123, 32684, 1617,
+ 42138, 2111, 0, 0, 0, 0, 0, 0 };
+#else
+ volatile int check_results[N] = {};
+ volatile int check_results2[N] = {};
+
+ for (int i = 0; i < N / 3; i++)
+ {
+ int a = input[i * 3];
+ int b = input[i * 3 + 1];
+ int c = input[i * 3 + 2];
+ int d = input2[i * 2];
+ int e = input2[i * 2 + 1];
+
+ check_results[i * 3] = M00 * a + M01 * b + M02 * c;
+ check_results[i * 3 + 1] = M10 * a + M11 * b + M12 * c;
+ check_results[i * 3 + 2] = M20 * a + M21 * b + M22 * c;
+
+ check_results2[i * 2] = K00 * d + K01 * e;
+ check_results2[i * 2 + 1] = K10 * d + K11 * e;
+
+ asm volatile ("" ::: "memory");
+ }
+#endif
+
foo (input, output, input2, output2);
for (i = 0; i < N; i++)
===================================================================
@@ -18,7 +18,11 @@ #define K10 112
#define K01 4322
#define K11 135
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 3 / 32 + 4)
+#else
#define N 16
+#endif
void foo (int *__restrict__ pInput, int *__restrict__ pOutput,
int *__restrict__ pInput2, int *__restrict__ pOutput2)
@@ -47,9 +51,7 @@ void foo (int *__restrict__ pInput, int
int main (int argc, const char* argv[])
{
int input[N], output[N], i;
- int check_results[N] = {1470, 395, 28271, 5958, 1655, 111653, 10446, 2915, 195035, 14934, 4175, 278417, 19422, 5435, 361799, 0};
int input2[N], output2[N];
- int check_results2[N] = {0, 112, 810, 336, 1620, 560, 2430, 784, 3240, 1008, 0, 0, 0, 0, 0, 0};
check_vect ();
@@ -62,6 +64,35 @@ int main (int argc, const char* argv[])
__asm__ volatile ("");
}
+#if N == 16
+ int check_results[N] = { 1470, 395, 28271, 5958, 1655, 111653, 10446, 2915,
+ 195035, 14934, 4175, 278417, 19422, 5435, 361799,
+ 0 };
+ int check_results2[N] = { 0, 112, 810, 336, 1620, 560, 2430, 784, 3240, 1008,
+ 0, 0, 0, 0, 0, 0 };
+#else
+ volatile int check_results[N] = {};
+ volatile int check_results2[N] = {};
+
+ for (int i = 0; i < N / 3; i++)
+ {
+ int a = input[i * 3];
+ int b = input[i * 3 + 1];
+ int c = input[i * 3 + 2];
+ int d = input2[i * 2];
+ int e = input2[i * 2 + 1];
+
+ check_results[i * 3] = M00 * a + M01 * b + M02 * c;
+ check_results[i * 3 + 1] = M10 * a + M11 * b + M12 * c;
+ check_results[i * 3 + 2] = M20 * a + M21 * b + M22 * c;
+
+ check_results2[i * 2] = K00 * d;
+ check_results2[i * 2 + 1] = K10 * e;
+
+ asm volatile ("" ::: "memory");
+ }
+#endif
+
foo (input, output, input2, output2);
for (i = 0; i < N; i++)
===================================================================
@@ -18,7 +18,11 @@ #define K10 112
#define K01 4322
#define K11 135
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 3 / 32 + 4)
+#else
#define N 16
+#endif
/* SLP with load permutation and loop-based vectorization. */
void foo (int *__restrict__ pInput, int *__restrict__ pOutput,
@@ -45,9 +49,7 @@ void foo (int *__restrict__ pInput, int
int main (int argc, const char* argv[])
{
int input[N], output[N], i;
- int check_results[N] = {1470, 395, 28271, 5958, 1655, 111653, 10446, 2915, 195035, 14934, 4175, 278417, 19422, 5435, 361799, 0};
int input2[N], output2[N];
- int check_results2[N] = {0, 405, 810, 1215, 1620, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
check_vect ();
@@ -60,6 +62,30 @@ int main (int argc, const char* argv[])
asm volatile ("" ::: "memory");
}
+#if N == 16
+ int check_results[N] = {1470, 395, 28271, 5958, 1655, 111653, 10446, 2915, 195035, 14934, 4175, 278417, 19422, 5435, 361799, 0};
+ int check_results2[N] = {0, 405, 810, 1215, 1620, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+#else
+ volatile int check_results[N] = {};
+ volatile int check_results2[N] = {};
+
+ for (int i = 0; i < N / 3; i++)
+ {
+ int a = input[i * 3];
+ int b = input[i * 3 + 1];
+ int c = input[i * 3 + 2];
+ int d = input2[i];
+
+ check_results[i * 3] = M00 * a + M01 * b + M02 * c;
+ check_results[i * 3 + 1] = M10 * a + M11 * b + M12 * c;
+ check_results[i * 3 + 2] = M20 * a + M21 * b + M22 * c;
+
+ check_results2[i] = K00 * d;
+
+ asm volatile ("" ::: "memory");
+ }
+#endif
+
foo (input, output, input2, output2);
for (i = 0; i < N; i++)
===================================================================
@@ -1,10 +1,19 @@
#include "tree-vect.h"
+#if VECTOR_BITS > 256
+#define NINTS (VECTOR_BITS / 32)
+#else
+#define NINTS 8
+#endif
+
+#define N (NINTS * 2)
+#define RESULT (NINTS * (NINTS - 1) / 2 * N + NINTS)
+
extern void abort (void);
typedef struct giga
{
- unsigned int g[16];
+ unsigned int g[N];
} giga;
unsigned long __attribute__((noinline,noclone))
@@ -19,17 +28,17 @@ addfst(giga const *gptr, int num)
int main ()
{
- struct giga g[8];
+ struct giga g[NINTS];
unsigned int n = 1;
int i, j;
check_vect ();
- for (i = 0; i < 8; ++i)
- for (j = 0; j < 16; ++j)
+ for (i = 0; i < NINTS; ++i)
+ for (j = 0; j < N; ++j)
{
g[i].g[j] = n++;
__asm__ volatile ("");
}
- if (addfst (g, 8) != 456)
+ if (addfst (g, NINTS) != RESULT)
abort ();
return 0;
}
===================================================================
@@ -4,7 +4,11 @@
extern void abort (void) __attribute__ ((noreturn));
+#if VECTOR_BITS > 256
+#define N (VECTOR_BITS / 8)
+#else
#define N 32
+#endif
/* Condition reduction where loop size is not known at compile time. Will fail
to vectorize. Version inlined into main loop will vectorize. */
@@ -30,6 +34,11 @@ main (void)
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31, 32
};
+ for (int i = 32; i < N; ++i)
+ {
+ a[i] = 70 + (i & 3);
+ asm volatile ("" ::: "memory");
+ }
check_vect ();
===================================================================
@@ -1,6 +1,8 @@
/* { dg-do compile } */
-struct __attribute__((aligned (32)))
+#include "tree-vect.h"
+
+struct __attribute__((aligned (VECTOR_BITS / 8)))
{
char misaligner;
int foo[100];
===================================================================
@@ -3,14 +3,27 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 2 / 32)
+#else
#define N 16
+#endif
int
main1 ()
{
unsigned int i;
unsigned int out[N*8];
+#if N == 16
unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+#else
+ unsigned int in[N * 8];
+ for (unsigned int i = 0; i < N * 8; ++i)
+ {
+ in[i] = i & 63;
+ asm volatile ("" ::: "memory");
+ }
+#endif
unsigned int ia[N*2], a0, a1, a2, a3;
/* The last stmt requires interleaving of not power of 2 size - not
===================================================================
@@ -3,19 +3,27 @@
#include <stdarg.h>
#include "tree-vect.h"
-#define N 32
-
-unsigned short in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
-unsigned short in2[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
-unsigned short in3[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
-unsigned short check[N] = {0,1,2,3,5,6,7,8,10,11,12,13,15,16,17,18,20,21,22,23,25,26,27,28,30,31,32,33,35,36,37,38};
-unsigned short check3[N] = {0,1,2,3,4,5,6,7,8,9,10,11,5,6,7,8,9,10,11,12,13,14,15,16,10,11,12,13,14,15,16,17};
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS * 4 / 16)
+#else
+#define N 32
+#endif
+
+unsigned short in[N] = {};
+unsigned short in2[N] = {};
+unsigned short in3[N] = {};
int
main1 ()
{
int i;
+ for (i = 0; i < N; i++)
+ {
+ in[i] = in2[i] = in3[i] = i;
+ asm volatile ("" ::: "memory");
+ }
+
for (i = 0; i < N/4; i++)
{
in[i*4] = in[i*4] + 5;
@@ -43,9 +51,9 @@ main1 ()
}
/* check results: */
- for (i = 4; i < N; i++)
+ for (i = 0; i < N; i++)
{
- if (in2[i] != check[i])
+ if (in2[i] != (i % 4) + (i / 4) * 5)
abort ();
}
@@ -61,9 +69,9 @@ main1 ()
}
/* check results: */
- for (i = 12; i < N; i++)
+ for (i = 0; i < N; i++)
{
- if (in3[i] != check3[i])
+ if (in3[i] != (i % 12) + (i / 12) * 5)
abort ();
}
===================================================================
@@ -3,12 +3,17 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 512
+#define N (VECTOR_BITS * 6 / 16)
+#else
#define N 200
+#endif
void __attribute__((noinline))
foo (unsigned char *__restrict__ pInput, unsigned char *__restrict__ pOutput)
{
- unsigned char i, a, b, c;
+ unsigned char a, b, c;
+ unsigned int i;
for (i = 0; i < N / 3; i++)
{
@@ -24,8 +29,9 @@ foo (unsigned char *__restrict__ pInput,
int main (int argc, const char* argv[])
{
- unsigned char input[N], output[N], i;
+ unsigned char input[N], output[N];
unsigned char check_results[N];
+ unsigned int i;
check_vect ();
===================================================================
@@ -4,7 +4,13 @@
#include <stdarg.h>
#include "tree-vect.h"
-#define N 9
+#if VECTOR_BITS > 256
+#define NINTS (VECTOR_BITS / 32)
+#else
+#define NINTS 8
+#endif
+
+#define N (NINTS + 1)
struct extraction
{
@@ -14,7 +20,6 @@ struct extraction
static int a[N] = {1,2,3,4,5,6,7,8,9};
static int b[N] = {17,24,7,0,2,3,4,31,82};
-static int c[N] = {9,17,24,7,0,2,3,4,31};
__attribute__ ((noinline))
int main1 (int x, int y) {
@@ -31,16 +36,15 @@ int main1 (int x, int y) {
/* Vectorizable: distance > VF. */
for (i = 0; i < N; i++)
- {
- *((int *)p + x + i) = *((int *)p + x + i + 8);
- }
+ *((int *)p + x + i) = *((int *)p + x + i + NINTS);
/* check results: */
- for (i = 0; i < N; i++)
- {
- if (p->a[i] != c[i])
- abort();
- }
+ if (p->a[0] != a[N - 1])
+ abort ();
+ for (i = 1; i < N; i++)
+ if (p->a[i] != b[i - 1])
+ abort ();
+
return 0;
}
===================================================================
@@ -3,13 +3,19 @@
#include <stdarg.h>
#include "tree-vect.h"
-#define N 16
+#if VECTOR_BITS > 256
+#define NINTS (VECTOR_BITS / 32)
+#else
+#define NINTS 8
+#endif
+
+#define N (NINTS * 2)
__attribute__ ((noinline))
int main1 (int a, int b)
{
int i, j;
- int ia[N][4][N+8];
+ int ia[N][4][N + NINTS];
/* Multidimensional array. Aligned. The "inner" dimensions
are invariant in the inner loop. Store.
@@ -18,7 +24,7 @@ int main1 (int a, int b)
{
for (j = 0; j < N; j++)
{
- ia[i][1][j+8] = (a == b);
+ ia[i][1][j + NINTS] = (a == b);
}
}
@@ -27,7 +33,7 @@ int main1 (int a, int b)
{
for (j = 0; j < N; j++)
{
- if (ia[i][1][j+8] != (a == b))
+ if (ia[i][1][j + NINTS] != (a == b))
abort();
}
}
===================================================================
@@ -4,11 +4,24 @@
#include <stdarg.h>
#include "tree-vect.h"
-#define N 24
+#if VECTOR_BITS > 128
+#define NINTS (VECTOR_BITS / 32)
+#else
+#define NINTS 4
+#endif
+
+#define N (NINTS * 6)
+
+/* Keep execution time down. */
+#if N <= 24
+#define OUTERN N
+#else
+#define OUTERN NINTS
+#endif
struct s{
int m;
- int n[N/6][N/6][N];
+ int n[4][4][N];
};
struct test1{
@@ -18,36 +31,43 @@ struct test1{
struct s e[N]; /* array e.n is aligned */
};
+/* Avoid big local temporaries. */
+#if NINTS > 8
+struct test1 tmp1;
+#endif
+
__attribute__ ((noinline))
int main1 ()
{
int i,j;
+#if NINTS <= 8
struct test1 tmp1;
+#endif
- for (i = 0; i < N; i++)
- for (j = 3; j < N-3; j++)
+ for (i = 0; i < OUTERN; i++)
+ for (j = NINTS - 1; j < N - NINTS + 1; j++)
{
tmp1.e[i].n[1][2][j] = 8;
}
/* check results: */
- for (i = 0; i < N; i++)
- for (j = 3; j < N-3; j++)
+ for (i = 0; i < OUTERN; i++)
+ for (j = NINTS - 1; j < N - NINTS + 1; j++)
{
if (tmp1.e[i].n[1][2][j] != 8)
abort ();
}
/* not consecutive, will use strided stores */
- for (i = 0; i < N; i++)
- for (j = 3; j < N-3; j++)
+ for (i = 0; i < OUTERN; i++)
+ for (j = NINTS - 1; j < N - NINTS + 1; j++)
{
tmp1.e[j].n[1][2][j] = 8;
}
/* check results: */
- for (i = 0; i < N; i++)
- for (j = 3; j < N-3; j++)
+ for (i = 0; i < OUTERN; i++)
+ for (j = NINTS - 1; j < N - NINTS + 1; j++)
{
if (tmp1.e[j].n[1][2][j] != 8)
abort ();
===================================================================
@@ -7,7 +7,14 @@
#define N 256
-extern int a[N+20];
+/* Pick a value greater than the vector length. */
+#if VECTOR_BITS > 128
+#define OFF (VECTOR_BITS * 5 / 32)
+#else
+#define OFF 20
+#endif
+
+extern int a[N + OFF];
/* The alignment of 'pa' is unknown.
Yet we do know that both the read access and write access have
@@ -52,7 +59,7 @@ main3 ()
for (i = 0; i < N; i++)
{
- a[i] = a[i+20];
+ a[i] = a[i + OFF];
}
return 0;
===================================================================
@@ -17,12 +17,18 @@ float pc[N] __attribute__ ((__aligned__(
can use this information (generate prolog and epilog loops
with known number of iterations, and only if needed). */
+#if VECTOR_BITS > 128
+#define NITER (VECTOR_BITS * 3 / 32)
+#else
+#define NITER 12
+#endif
+
__attribute__ ((noinline)) int
main1 ()
{
int i;
- for (i = 0; i < 10; i++)
+ for (i = 0; i < NITER - 2; i++)
{
pa[i+1] = pb[i+1] * pc[i+1];
}
@@ -42,7 +48,7 @@ main2 ()
{
int i;
- for (i = 0; i < 12; i++)
+ for (i = 0; i < NITER; i++)
{
pa[i+1] = pb[i+1] * pc[i+1];
}
===================================================================
@@ -4,33 +4,38 @@
#include <stdarg.h>
#include "tree-vect.h"
-#define N 3001
+#define N1 3001
+#if VECTOR_BITS > 256
+#define N2 (VECTOR_BITS / 32 + 2)
+#else
+#define N2 10
+#endif
__attribute__ ((noinline)) int
main1 (float *pa)
{
int i;
- for (i = 0; i < 3001; i++)
+ for (i = 0; i < N1; i++)
{
pa[i] = 2.0;
}
/* check results: */
- for (i = 0; i < 3001; i++)
+ for (i = 0; i < N1; i++)
{
if (pa[i] != 2.0)
abort ();
}
- for (i = 1; i <= 10; i++)
+ for (i = 1; i <= N2; i++)
{
pa[i] = 3.0;
}
/* check results: */
- for (i = 1; i <= 10; i++)
+ for (i = 1; i <= N2; i++)
{
if (pa[i] != 3.0)
abort ();
@@ -42,13 +47,14 @@ main1 (float *pa)
int main (void)
{
int i;
- float a[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
- float b[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+ float a[N1] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+ float b[N1] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
check_vect ();
/* from bzip2: */
- for (i=0; i<N; i++) b[i] = i;
+ for (i = 0; i < N1; i++)
+ b[i] = i;
a[0] = 0;
for (i = 1; i <= 256; i++) a[i] = b[i-1];
===================================================================
@@ -4,7 +4,14 @@
#include <stdarg.h>
#include "tree-vect.h"
-#define N 32
+#if VECTOR_BITS > 128
+#define NSHORTS (VECTOR_BITS / 16)
+#else
+#define NSHORTS 8
+#endif
+
+#define NINTS (NSHORTS / 2)
+#define N (NSHORTS * 4)
short sa[N];
short sb[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
@@ -27,14 +34,14 @@ __attribute__ ((noinline)) int main1 (in
copmutations. Vectorizable. */
for (i = 0; i < n; i++)
{
- sa[i+7] = sb[i];
- ia[i+3] = ib[i+1];
+ sa[i + NSHORTS - 1] = sb[i];
+ ia[i + NINTS - 1] = ib[i + 1];
}
/* check results: */
for (i = 0; i < n; i++)
{
- if (sa[i+7] != sb[i] || ia[i+3] != ib[i+1])
+ if (sa[i + NSHORTS - 1] != sb[i] || ia[i + NINTS - 1] != ib[i + 1])
abort ();
}
@@ -57,14 +64,14 @@ __attribute__ ((noinline)) int main2 (in
copmutations. */
for (i = 0; i < n; i++)
{
- ia[i+3] = ib[i];
- sa[i+3] = sb[i+1];
+ ia[i + NINTS - 1] = ib[i];
+ sa[i + NINTS - 1] = sb[i + 1];
}
/* check results: */
for (i = 0; i < n; i++)
{
- if (sa[i+3] != sb[i+1] || ia[i+3] != ib[i])
+ if (sa[i + NINTS - 1] != sb[i + 1] || ia[i + NINTS - 1] != ib[i])
abort ();
}
@@ -75,8 +82,8 @@ int main (void)
{
check_vect ();
- main1 (N-7);
- main2 (N-3);
+ main1 (N - NSHORTS + 1);
+ main2 (N - NINTS + 1);
return 0;
}
===================================================================
@@ -4,7 +4,7 @@
#include <stdarg.h>
#include "tree-vect.h"
-#define N 128
+#define N VECTOR_BITS
/* Modified rgb to rgb conversion from FFmpeg. */
__attribute__ ((noinline)) void
@@ -32,7 +32,9 @@ foo (unsigned char *src, unsigned char *
const int g = *s++;
const int r = *s++;
const int a = *s++;
- if (*d != ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9)))
+ unsigned short expected
+ = ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9));
+ if (*d != expected)
abort ();
d++;
}
===================================================================
@@ -4,12 +4,24 @@
#include <stdarg.h>
#include "tree-vect.h"
+#if VECTOR_BITS > 128
+#define NINTS (VECTOR_BITS / 32)
+#define EXTRA (NINTS * 2)
+#else
+#define NINTS 4
+#define EXTRA 10
+#endif
+
#define N 128
-#define RES 21640
-int ib[N+10];
-int ia[N+10];
-int ic[N+10];
+#define RES_A (N * N / 4)
+#define RES_B (N * (N + 1) / 2 + (NINTS + 3) * (N + 1))
+#define RES_C (N * (N + 1) / 2 + (N + 1))
+#define RES (RES_A + RES_B + RES_C)
+
+int ib[N + EXTRA];
+int ia[N + EXTRA];
+int ic[N + EXTRA];
__attribute__ ((noinline))
int main1 ()
@@ -20,8 +32,8 @@ int main1 ()
for (i = 0; i <= N; i++)
{
suma += ia[i];
- sumb += ib[i+5];
- sumc += ic[i+1];
+ sumb += ib[i + NINTS + 1];
+ sumc += ic[i + 1];
}
/* check results: */
@@ -37,7 +49,7 @@ int main (void)
check_vect ();
- for (i = 0; i < N+10; i++)
+ for (i = 0; i < N + EXTRA; i++)
{
asm volatile ("" : "+r" (i));
ib[i] = i;
===================================================================
@@ -4,28 +4,41 @@
#include "tree-vect.h"
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS / 8)
+#else
+#define N 16
+#endif
+
extern void abort(void);
-unsigned char in[16] __attribute__((__aligned__(16)));
+unsigned char in[N] __attribute__((__aligned__(16)));
int
main (unsigned char argc, char **argv)
{
unsigned char i = 0;
unsigned char sum = 1;
+ unsigned char expected = 1;
check_vect ();
- for (i = 0; i < 16; i++)
+ for (i = 0; i < N; i++)
in[i] = (i + i + 1) & 0xfd;
+ for (i = 0; i < N; i++)
+ {
+ expected |= in[i];
+ asm volatile ("");
+ }
+
/* Prevent constant propagation of the entire loop below. */
asm volatile ("" : : : "memory");
- for (i = 0; i < 16; i++)
+ for (i = 0; i < N; i++)
sum |= in[i];
- if (sum != 29)
+ if (sum != expected)
{
__builtin_printf("Failed %d\n", sum);
abort();
===================================================================
@@ -4,24 +4,37 @@
#include "tree-vect.h"
+#if VECTOR_BITS > 128
+#define N (VECTOR_BITS / 8)
+#else
+#define N 16
+#endif
+
extern void abort(void);
int
main (unsigned char argc, char **argv)
{
- unsigned char in[16] __attribute__((aligned(16)));
+ unsigned char in[N] __attribute__((aligned(16)));
unsigned char i = 0;
unsigned char sum = 1;
+ unsigned char expected = 1;
check_vect ();
- for (i = 0; i < 16; i++)
+ for (i = 0; i < N; i++)
in[i] = (i + i + 1) & 0xfd;
- for (i = 0; i < 16; i++)
+ for (i = 0; i < N; i++)
+ {
+ expected |= in[i];
+ asm volatile ("");
+ }
+
+ for (i = 0; i < N; i++)
sum |= in[i];
- if (sum != 29)
+ if (sum != expected)
{
__builtin_printf("Failed %d\n", sum);
abort();