1 ; RUN: opt < %s -instcombine -S | FileCheck %s
3 ; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
5 define <4 x double> @perm2pd_non_const_imm(<4 x double> %a0, <4 x double> %a1, i8 %b) {
6 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b)
9 ; CHECK-LABEL: @perm2pd_non_const_imm
10 ; CHECK-NEXT: call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b)
11 ; CHECK-NEXT: ret <4 x double>
15 ; In the following 3 tests, both zero mask bits of the immediate are set.
17 define <4 x double> @perm2pd_0x88(<4 x double> %a0, <4 x double> %a1) {
18 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 136)
21 ; CHECK-LABEL: @perm2pd_0x88
22 ; CHECK-NEXT: ret <4 x double> zeroinitializer
25 define <8 x float> @perm2ps_0x88(<8 x float> %a0, <8 x float> %a1) {
26 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 136)
29 ; CHECK-LABEL: @perm2ps_0x88
30 ; CHECK-NEXT: ret <8 x float> zeroinitializer
33 define <8 x i32> @perm2si_0x88(<8 x i32> %a0, <8 x i32> %a1) {
34 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 136)
37 ; CHECK-LABEL: @perm2si_0x88
38 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
42 ; The other control bits are ignored when zero mask bits of the immediate are set.
44 define <4 x double> @perm2pd_0xff(<4 x double> %a0, <4 x double> %a1) {
45 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 255)
48 ; CHECK-LABEL: @perm2pd_0xff
49 ; CHECK-NEXT: ret <4 x double> zeroinitializer
53 ; The following 16 tests are simple shuffles, except for 2 cases where we can just return one of the
54 ; source vectors. Verify that we generate the right shuffle masks and undef source operand where possible..
56 define <4 x double> @perm2pd_0x00(<4 x double> %a0, <4 x double> %a1) {
57 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0)
60 ; CHECK-LABEL: @perm2pd_0x00
61 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
62 ; CHECK-NEXT: ret <4 x double> %1
65 define <4 x double> @perm2pd_0x01(<4 x double> %a0, <4 x double> %a1) {
66 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 1)
69 ; CHECK-LABEL: @perm2pd_0x01
70 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
71 ; CHECK-NEXT: ret <4 x double> %1
74 define <4 x double> @perm2pd_0x02(<4 x double> %a0, <4 x double> %a1) {
75 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 2)
78 ; CHECK-LABEL: @perm2pd_0x02
79 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
80 ; CHECK-NEXT: ret <4 x double> %1
83 define <4 x double> @perm2pd_0x03(<4 x double> %a0, <4 x double> %a1) {
84 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3)
87 ; CHECK-LABEL: @perm2pd_0x03
88 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
89 ; CHECK-NEXT: ret <4 x double> %1
92 define <4 x double> @perm2pd_0x10(<4 x double> %a0, <4 x double> %a1) {
93 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 16)
96 ; CHECK-LABEL: @perm2pd_0x10
97 ; CHECK-NEXT: ret <4 x double> %a0
100 define <4 x double> @perm2pd_0x11(<4 x double> %a0, <4 x double> %a1) {
101 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 17)
102 ret <4 x double> %res
104 ; CHECK-LABEL: @perm2pd_0x11
105 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
106 ; CHECK-NEXT: ret <4 x double> %1
109 define <4 x double> @perm2pd_0x12(<4 x double> %a0, <4 x double> %a1) {
110 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 18)
111 ret <4 x double> %res
113 ; CHECK-LABEL: @perm2pd_0x12
114 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
115 ; CHECK-NEXT: ret <4 x double> %1
118 define <4 x double> @perm2pd_0x13(<4 x double> %a0, <4 x double> %a1) {
119 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 19)
120 ret <4 x double> %res
122 ; CHECK-LABEL: @perm2pd_0x13
123 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
124 ; CHECK-NEXT: ret <4 x double> %1
127 define <4 x double> @perm2pd_0x20(<4 x double> %a0, <4 x double> %a1) {
128 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 32)
129 ret <4 x double> %res
131 ; CHECK-LABEL: @perm2pd_0x20
132 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
133 ; CHECK-NEXT: ret <4 x double> %1
136 define <4 x double> @perm2pd_0x21(<4 x double> %a0, <4 x double> %a1) {
137 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 33)
138 ret <4 x double> %res
140 ; CHECK-LABEL: @perm2pd_0x21
141 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
142 ; CHECK-NEXT: ret <4 x double> %1
145 define <4 x double> @perm2pd_0x22(<4 x double> %a0, <4 x double> %a1) {
146 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 34)
147 ret <4 x double> %res
149 ; CHECK-LABEL: @perm2pd_0x22
150 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
151 ; CHECK-NEXT: ret <4 x double> %1
154 define <4 x double> @perm2pd_0x23(<4 x double> %a0, <4 x double> %a1) {
155 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 35)
156 ret <4 x double> %res
158 ; CHECK-LABEL: @perm2pd_0x23
159 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
160 ; CHECK-NEXT: ret <4 x double> %1
163 define <4 x double> @perm2pd_0x30(<4 x double> %a0, <4 x double> %a1) {
164 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 48)
165 ret <4 x double> %res
167 ; CHECK-LABEL: @perm2pd_0x30
168 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
169 ; CHECK-NEXT: ret <4 x double> %1
172 define <4 x double> @perm2pd_0x31(<4 x double> %a0, <4 x double> %a1) {
173 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 49)
174 ret <4 x double> %res
176 ; CHECK-LABEL: @perm2pd_0x31
177 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
178 ; CHECK-NEXT: ret <4 x double> %1
181 define <4 x double> @perm2pd_0x32(<4 x double> %a0, <4 x double> %a1) {
182 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 50)
183 ret <4 x double> %res
185 ; CHECK-LABEL: @perm2pd_0x32
186 ; CHECK-NEXT: ret <4 x double> %a1
189 define <4 x double> @perm2pd_0x33(<4 x double> %a0, <4 x double> %a1) {
190 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 51)
191 ret <4 x double> %res
193 ; CHECK-LABEL: @perm2pd_0x33
194 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
195 ; CHECK-NEXT: ret <4 x double> %1
198 ; Confirm that a mask for 32-bit elements is also correct.
200 define <8 x float> @perm2ps_0x31(<8 x float> %a0, <8 x float> %a1) {
201 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 49)
204 ; CHECK-LABEL: @perm2ps_0x31
205 ; CHECK-NEXT: %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
206 ; CHECK-NEXT: ret <8 x float> %1
210 ; Confirm that when a single zero mask bit is set, we do nothing.
212 define <4 x double> @perm2pd_0x83(<4 x double> %a0, <4 x double> %a1) {
213 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 131)
214 ret <4 x double> %res
216 ; CHECK-LABEL: @perm2pd_0x83
217 ; CHECK-NEXT: call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 -125)
218 ; CHECK-NEXT: ret <4 x double>
222 ; Confirm that when the other zero mask bit is set, we do nothing. Also confirm that an ignored bit has no effect.
224 define <4 x double> @perm2pd_0x48(<4 x double> %a0, <4 x double> %a1) {
225 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 72)
226 ret <4 x double> %res
228 ; CHECK-LABEL: @perm2pd_0x48
229 ; CHECK-NEXT: call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 72)
230 ; CHECK-NEXT: ret <4 x double>
233 declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
234 declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
235 declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone