2017-02-19 Jim Wilson <jim.wilson@linaro.org>
sim/aarch64/
* simulator.c (do_vec_ADDV): Mov val declaration inside each case,
with type set to input type size.
(do_vec_xtl): Change bias from 3 to 4 for byte case.
sim/testsuite/sim/aarch64/
* bit.s: Change cmp immediates to account for addv bug fix.
* cmtst.s, ldn_single.s, stn_single.s: Likewise.
* xtl.s: New.
@@ -3433,7 +3433,6 @@ do_vec_ADDV (sim_cpu *cpu)
unsigned vm = INSTR (9, 5);
unsigned rd = INSTR (4, 0);
unsigned i;
- uint64_t val = 0;
int full = INSTR (30, 30);
NYI_assert (29, 24, 0x0E);
@@ -3443,24 +3442,33 @@ do_vec_ADDV (sim_cpu *cpu)
switch (INSTR (23, 22))
{
case 0:
- for (i = 0; i < (full ? 16 : 8); i++)
- val += aarch64_get_vec_u8 (cpu, vm, i);
- aarch64_set_vec_u64 (cpu, rd, 0, val);
- return;
+ {
+ uint8_t val = 0;
+ for (i = 0; i < (full ? 16 : 8); i++)
+ val += aarch64_get_vec_u8 (cpu, vm, i);
+ aarch64_set_vec_u64 (cpu, rd, 0, val);
+ return;
+ }
case 1:
- for (i = 0; i < (full ? 8 : 4); i++)
- val += aarch64_get_vec_u16 (cpu, vm, i);
- aarch64_set_vec_u64 (cpu, rd, 0, val);
- return;
+ {
+ uint16_t val = 0;
+ for (i = 0; i < (full ? 8 : 4); i++)
+ val += aarch64_get_vec_u16 (cpu, vm, i);
+ aarch64_set_vec_u64 (cpu, rd, 0, val);
+ return;
+ }
case 2:
- if (! full)
- HALT_UNALLOC;
- for (i = 0; i < 4; i++)
- val += aarch64_get_vec_u32 (cpu, vm, i);
- aarch64_set_vec_u64 (cpu, rd, 0, val);
- return;
+ {
+ uint32_t val = 0;
+ if (! full)
+ HALT_UNALLOC;
+ for (i = 0; i < 4; i++)
+ val += aarch64_get_vec_u32 (cpu, vm, i);
+ aarch64_set_vec_u64 (cpu, rd, 0, val);
+ return;
+ }
case 3:
HALT_UNALLOC;
@@ -5694,7 +5702,7 @@ do_vec_xtl (sim_cpu *cpu)
NYI_assert (19, 19, 1);
shift = INSTR (18, 16);
- bias *= 3;
+ bias *= 4;
for (i = 0; i < 8; i++)
v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
for (i = 0; i < 8; i++)
@@ -5730,7 +5738,7 @@ do_vec_xtl (sim_cpu *cpu)
NYI_assert (19, 19, 1);
shift = INSTR (18, 16);
- bias *= 3;
+ bias *= 4;
for (i = 0; i < 8; i++)
v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
for (i = 0; i < 8; i++)
@@ -34,56 +34,56 @@ mask:
bif v3.8b, v1.8b, v2.8b
addv b4, v3.8b
mov x1, v4.d[0]
- cmp x1, #306
+ cmp x1, #50
bne .Lfailure
mov v3.16b, v0.16b
bif v3.16b, v1.16b, v2.16b
addv b4, v3.16b
mov x1, v4.d[0]
- cmp x1, #1020
+ cmp x1, #252
bne .Lfailure
mov v3.8b, v0.8b
bit v3.8b, v1.8b, v2.8b
addv b4, v3.8b
mov x1, v4.d[0]
- cmp x1, #306
+ cmp x1, #50
bne .Lfailure
mov v3.16b, v0.16b
bit v3.16b, v1.16b, v2.16b
addv b4, v3.16b
mov x1, v4.d[0]
- cmp x1, #1037
+ cmp x1, #13
bne .Lfailure
mov v3.8b, v2.8b
bsl v3.8b, v0.8b, v1.8b
addv b4, v3.8b
mov x1, v4.d[0]
- cmp x1, #306
+ cmp x1, #50
bne .Lfailure
mov v3.16b, v2.16b
bsl v3.16b, v0.16b, v1.16b
addv b4, v3.16b
mov x1, v4.d[0]
- cmp x1, #1020
+ cmp x1, #252
bne .Lfailure
mov v3.8b, v0.8b
eor v3.8b, v1.8b, v2.8b
addv b4, v3.8b
mov x1, v4.d[0]
- cmp x1, #1020
+ cmp x1, #252
bne .Lfailure
mov v3.16b, v0.16b
eor v3.16b, v1.16b, v2.16b
addv b4, v3.16b
mov x1, v4.d[0]
- cmp x1, #2039
+ cmp x1, #247
bne .Lfailure
pass
@@ -40,13 +40,13 @@ inputd2:
cmtst v2.8b, v0.8b, v1.8b
addv b3, v2.8b
mov x1, v3.d[0]
- cmp x1, #0x5fa
+ cmp x1, #0xfa
bne .Lfailure
cmtst v2.16b, v0.16b, v1.16b
addv b3, v2.16b
mov x1, v3.d[0]
- cmp x1, #0xbf4
+ cmp x1, #0xf4
bne .Lfailure
adrp x0, inputh
@@ -56,14 +56,14 @@ inputd2:
cmtst v2.4h, v0.4h, v1.4h
addv h3, v2.4h
mov x1, v3.d[0]
- mov x2, #0x1fffe
+ mov x2, #0xfffe
cmp x1, x2
bne .Lfailure
cmtst v2.8h, v0.8h, v1.8h
addv h3, v2.8h
mov x1, v3.d[0]
- mov x2, #0x3fffc
+ mov x2, #0xfffc
cmp x1, x2
bne .Lfailure
@@ -82,7 +82,7 @@ inputd2:
cmtst v2.4s, v0.4s, v1.4s
addv s3, v2.4s
mov x1, v3.d[0]
- mov x2, #0x1fffffffe
+ mov x2, #0xfffffffe
cmp x1, x2
bne .Lfailure
@@ -48,7 +48,7 @@ input:
mov x6, v3.d[0]
cmp x5, #221
bne .Lfailure
- cmp x6, #307
+ cmp x6, #51
bne .Lfailure
mov x2, x0
@@ -68,7 +68,7 @@ input:
bne .Lfailure
cmp x5, #200
bne .Lfailure
- cmp x6, #264
+ cmp x6, #8
bne .Lfailure
mov x2, x0
@@ -90,9 +90,9 @@ input:
bne .Lfailure
cmp x5, #232
bne .Lfailure
- cmp x6, #296
+ cmp x6, #40
bne .Lfailure
- cmp x7, #360
+ cmp x7, #104
bne .Lfailure
pass
@@ -63,7 +63,7 @@ output:
mov x6, v5.d[0]
cmp x5, #136
bne .Lfailure
- cmp x6, #264
+ cmp x6, #8
bne .Lfailure
mov x2, x1
@@ -114,7 +114,7 @@ output:
bne .Lfailure
cmp x6, #232
bne .Lfailure
- cmp x7, #296
+ cmp x7, #40
bne .Lfailure
pass
new file mode 100644
@@ -0,0 +1,101 @@
+#mach: aarch64
+
+# Check the extend long instructions: sxtl, sxtl2, uxtl, uxtl2.
+
+.include "testutils.inc"
+
+ .data
+ .align 4
+input:
+ .word 0x04030201
+ .word 0x08070605
+ .word 0xfcfdfeff
+ .word 0xf8f9fafb
+
+ start
+ adrp x0, input
+ ldr q0, [x0, #:lo12:input]
+
+ uxtl v1.8h, v0.8b
+ uxtl2 v2.8h, v0.16b
+ addv h3, v1.8h
+ addv h4, v2.8h
+ mov x1, v3.d[0]
+ mov x2, v4.d[0]
+ cmp x1, #36
+ bne .Lfailure
+ cmp x2, #2012
+ bne .Lfailure
+
+ uxtl v1.4s, v0.4h
+ uxtl2 v2.4s, v0.8h
+ addv s3, v1.4s
+ addv s4, v2.4s
+ mov x1, v3.d[0]
+ mov x2, v4.d[0]
+ mov x3, #5136
+ cmp x1, x3
+ bne .Lfailure
+ mov x4, #0xeff0
+ movk x4, 0x3, lsl #16
+ cmp x2, x4
+ bne .Lfailure
+
+ uxtl v1.2d, v0.2s
+ uxtl2 v2.2d, v0.4s
+ addv s3, v1.4s
+ addv s4, v2.4s
+ mov x1, v3.d[0]
+ mov x2, v4.d[0]
+ mov x3, #0x0806
+ movk x3, #0x0c0a, lsl #16
+ cmp x1, x3
+ bne .Lfailure
+ mov x4, #0xf9fa
+ movk x4, #0xf5f7, lsl #16
+ cmp x2, x4
+ bne .Lfailure
+
+ sxtl v1.8h, v0.8b
+ sxtl2 v2.8h, v0.16b
+ addv h3, v1.8h
+ addv h4, v2.8h
+ mov x1, v3.d[0]
+ mov x2, v4.d[0]
+ cmp x1, #36
+ bne .Lfailure
+ mov x3, #0xffdc
+ cmp x2, x3
+ bne .Lfailure
+
+ sxtl v1.4s, v0.4h
+ sxtl2 v2.4s, v0.8h
+ addv s3, v1.4s
+ addv s4, v2.4s
+ mov x1, v3.d[0]
+ mov x2, v4.d[0]
+ mov x3, #5136
+ cmp x1, x3
+ bne .Lfailure
+ mov x4, #0xeff0
+ movk x4, 0xffff, lsl #16
+ bne .Lfailure
+
+ sxtl v1.2d, v0.2s
+ sxtl2 v2.2d, v0.4s
+ addv s3, v1.4s
+ addv s4, v2.4s
+ mov x1, v3.d[0]
+ mov x2, v4.d[0]
+ mov x3, #0x0806
+ movk x3, #0x0c0a, lsl #16
+ cmp x1, x3
+ bne .Lfailure
+ mov x4, #0xf9f8
+ movk x4, #0xf5f7, lsl #16
+ cmp x2, x4
+ bne .Lfailure
+
+ pass
+.Lfailure:
+ fail