1 ; Test various target-specific DAG combiner patterns.
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
5 ; Check that an extraction followed by a truncation is effectively treated
7 define void @f1(<4 x i32> %v1, <4 x i32> %v2, i8 *%ptr1, i8 *%ptr2) {
9 ; CHECK: vaf [[REG:%v[0-9]+]], %v24, %v26
10 ; CHECK-DAG: vsteb [[REG]], 0(%r2), 3
11 ; CHECK-DAG: vsteb [[REG]], 0(%r3), 15
13 %add = add <4 x i32> %v1, %v2
14 %elem1 = extractelement <4 x i32> %add, i32 0
15 %elem2 = extractelement <4 x i32> %add, i32 3
16 %trunc1 = trunc i32 %elem1 to i8
17 %trunc2 = trunc i32 %elem2 to i8
18 store i8 %trunc1, i8 *%ptr1
19 store i8 %trunc2, i8 *%ptr2
23 ; Test a case where a pack-type shuffle can be eliminated.
24 define i16 @f2(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
27 ; CHECK-DAG: vaf [[REG1:%v[0-9]+]], %v24, %v26
28 ; CHECK-DAG: vaf [[REG2:%v[0-9]+]], %v26, %v28
29 ; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG1]], 3
30 ; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG2]], 7
32 %add1 = add <4 x i32> %v1, %v2
33 %add2 = add <4 x i32> %v2, %v3
34 %shuffle = shufflevector <4 x i32> %add1, <4 x i32> %add2,
35 <4 x i32> <i32 1, i32 3, i32 5, i32 7>
36 %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
37 %elem1 = extractelement <8 x i16> %bitcast, i32 1
38 %elem2 = extractelement <8 x i16> %bitcast, i32 7
39 %res = add i16 %elem1, %elem2
43 ; ...and again in a case where there's also a splat and a bitcast.
44 define i16 @f3(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
48 ; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
49 ; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6
50 ; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
52 %add = add <4 x i32> %v1, %v2
53 %splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
54 <2 x i32> <i32 0, i32 0>
55 %splatcast = bitcast <2 x i64> %splat to <4 x i32>
56 %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
57 <4 x i32> <i32 1, i32 3, i32 5, i32 7>
58 %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
59 %elem1 = extractelement <8 x i16> %bitcast, i32 2
60 %elem2 = extractelement <8 x i16> %bitcast, i32 7
61 %res = add i16 %elem1, %elem2
65 ; ...and again with a merge low instead of a pack.
66 define i16 @f4(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
70 ; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
71 ; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6
72 ; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
74 %add = add <4 x i32> %v1, %v2
75 %splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
76 <2 x i32> <i32 0, i32 0>
77 %splatcast = bitcast <2 x i64> %splat to <4 x i32>
78 %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
79 <4 x i32> <i32 2, i32 6, i32 3, i32 7>
80 %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
81 %elem1 = extractelement <8 x i16> %bitcast, i32 4
82 %elem2 = extractelement <8 x i16> %bitcast, i32 7
83 %res = add i16 %elem1, %elem2
87 ; ...and again with a merge high.
88 define i16 @f5(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
92 ; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
93 ; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 2
94 ; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
96 %add = add <4 x i32> %v1, %v2
97 %splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
98 <2 x i32> <i32 0, i32 0>
99 %splatcast = bitcast <2 x i64> %splat to <4 x i32>
100 %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
101 <4 x i32> <i32 0, i32 4, i32 1, i32 5>
102 %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
103 %elem1 = extractelement <8 x i16> %bitcast, i32 4
104 %elem2 = extractelement <8 x i16> %bitcast, i32 7
105 %res = add i16 %elem1, %elem2