1 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=X32
2 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=X64
4 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/ssse3-builtins.c
6 define <2 x i64> @test_mm_abs_epi8(<2 x i64> %a0) {
7 ; X32-LABEL: test_mm_abs_epi8:
9 ; X32-NEXT: pabsb %xmm0, %xmm0
12 ; X64-LABEL: test_mm_abs_epi8:
14 ; X64-NEXT: pabsb %xmm0, %xmm0
16 %arg = bitcast <2 x i64> %a0 to <16 x i8>
17 %call = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %arg)
18 %res = bitcast <16 x i8> %call to <2 x i64>
21 declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
23 define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) {
24 ; X32-LABEL: test_mm_abs_epi16:
26 ; X32-NEXT: pabsw %xmm0, %xmm0
29 ; X64-LABEL: test_mm_abs_epi16:
31 ; X64-NEXT: pabsw %xmm0, %xmm0
33 %arg = bitcast <2 x i64> %a0 to <8 x i16>
34 %call = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %arg)
35 %res = bitcast <8 x i16> %call to <2 x i64>
38 declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
40 define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) {
41 ; X32-LABEL: test_mm_abs_epi32:
43 ; X32-NEXT: pabsd %xmm0, %xmm0
46 ; X64-LABEL: test_mm_abs_epi32:
48 ; X64-NEXT: pabsd %xmm0, %xmm0
50 %arg = bitcast <2 x i64> %a0 to <4 x i32>
51 %call = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %arg)
52 %res = bitcast <4 x i32> %call to <2 x i64>
55 declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
57 define <2 x i64> @test_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) {
58 ; X32-LABEL: test_mm_alignr_epi8:
60 ; X32-NEXT: palignr {{.*#}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
61 ; X32-NEXT: movdqa %xmm1, %xmm0
64 ; X64-LABEL: test_mm_alignr_epi8:
66 ; X64-NEXT: palignr {{.*#}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
67 ; X64-NEXT: movdqa %xmm1, %xmm0
69 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
70 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
71 %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
72 %res = bitcast <16 x i8> %shuf to <2 x i64>
76 define <2 x i64> @test_mm_hadd_epi16(<2 x i64> %a0, <2 x i64> %a1) {
77 ; X32-LABEL: test_mm_hadd_epi16:
79 ; X32-NEXT: phaddw %xmm1, %xmm0
82 ; X64-LABEL: test_mm_hadd_epi16:
84 ; X64-NEXT: phaddw %xmm1, %xmm0
86 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
87 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
88 %call = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
89 %res = bitcast <8 x i16> %call to <2 x i64>
92 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
94 define <2 x i64> @test_mm_hadd_epi32(<2 x i64> %a0, <2 x i64> %a1) {
95 ; X32-LABEL: test_mm_hadd_epi32:
97 ; X32-NEXT: phaddd %xmm1, %xmm0
100 ; X64-LABEL: test_mm_hadd_epi32:
102 ; X64-NEXT: phaddd %xmm1, %xmm0
104 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
105 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
106 %call = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
107 %res = bitcast <4 x i32> %call to <2 x i64>
110 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
112 define <2 x i64> @test_mm_hadds_epi16(<2 x i64> %a0, <2 x i64> %a1) {
113 ; X32-LABEL: test_mm_hadds_epi16:
115 ; X32-NEXT: phaddsw %xmm1, %xmm0
118 ; X64-LABEL: test_mm_hadds_epi16:
120 ; X64-NEXT: phaddsw %xmm1, %xmm0
122 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
123 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
124 %call = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
125 %res = bitcast <8 x i16> %call to <2 x i64>
128 declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
130 define <2 x i64> @test_mm_hsub_epi16(<2 x i64> %a0, <2 x i64> %a1) {
131 ; X32-LABEL: test_mm_hsub_epi16:
133 ; X32-NEXT: phsubw %xmm1, %xmm0
136 ; X64-LABEL: test_mm_hsub_epi16:
138 ; X64-NEXT: phsubw %xmm1, %xmm0
140 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
141 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
142 %call = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
143 %res = bitcast <8 x i16> %call to <2 x i64>
146 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
148 define <2 x i64> @test_mm_hsub_epi32(<2 x i64> %a0, <2 x i64> %a1) {
149 ; X32-LABEL: test_mm_hsub_epi32:
151 ; X32-NEXT: phsubd %xmm1, %xmm0
154 ; X64-LABEL: test_mm_hsub_epi32:
156 ; X64-NEXT: phsubd %xmm1, %xmm0
158 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
159 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
160 %call = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
161 %res = bitcast <4 x i32> %call to <2 x i64>
164 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
166 define <2 x i64> @test_mm_hsubs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
167 ; X32-LABEL: test_mm_hsubs_epi16:
169 ; X32-NEXT: phsubsw %xmm1, %xmm0
172 ; X64-LABEL: test_mm_hsubs_epi16:
174 ; X64-NEXT: phsubsw %xmm1, %xmm0
176 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
177 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
178 %call = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
179 %res = bitcast <8 x i16> %call to <2 x i64>
182 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
184 define <2 x i64> @test_mm_maddubs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
185 ; X32-LABEL: test_mm_maddubs_epi16:
187 ; X32-NEXT: pmaddubsw %xmm1, %xmm0
190 ; X64-LABEL: test_mm_maddubs_epi16:
192 ; X64-NEXT: pmaddubsw %xmm1, %xmm0
194 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
195 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
196 %call = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %arg0, <16 x i8> %arg1)
197 %res = bitcast <8 x i16> %call to <2 x i64>
200 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
202 define <2 x i64> @test_mm_mulhrs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
203 ; X32-LABEL: test_mm_mulhrs_epi16:
205 ; X32-NEXT: pmulhrsw %xmm1, %xmm0
208 ; X64-LABEL: test_mm_mulhrs_epi16:
210 ; X64-NEXT: pmulhrsw %xmm1, %xmm0
212 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
213 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
214 %call = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
215 %res = bitcast <8 x i16> %call to <2 x i64>
218 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
220 define <2 x i64> @test_mm_shuffle_epi8(<2 x i64> %a0, <2 x i64> %a1) {
221 ; X32-LABEL: test_mm_shuffle_epi8:
223 ; X32-NEXT: pshufb %xmm1, %xmm0
226 ; X64-LABEL: test_mm_shuffle_epi8:
228 ; X64-NEXT: pshufb %xmm1, %xmm0
230 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
231 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
232 %call = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %arg0, <16 x i8> %arg1)
233 %res = bitcast <16 x i8> %call to <2 x i64>
236 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
238 define <2 x i64> @test_mm_sign_epi8(<2 x i64> %a0, <2 x i64> %a1) {
239 ; X32-LABEL: test_mm_sign_epi8:
241 ; X32-NEXT: psignb %xmm1, %xmm0
244 ; X64-LABEL: test_mm_sign_epi8:
246 ; X64-NEXT: psignb %xmm1, %xmm0
248 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
249 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
250 %call = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %arg0, <16 x i8> %arg1)
251 %res = bitcast <16 x i8> %call to <2 x i64>
254 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
256 define <2 x i64> @test_mm_sign_epi16(<2 x i64> %a0, <2 x i64> %a1) {
257 ; X32-LABEL: test_mm_sign_epi16:
259 ; X32-NEXT: psignw %xmm1, %xmm0
262 ; X64-LABEL: test_mm_sign_epi16:
264 ; X64-NEXT: psignw %xmm1, %xmm0
266 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
267 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
268 %call = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
269 %res = bitcast <8 x i16> %call to <2 x i64>
272 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
274 define <2 x i64> @test_mm_sign_epi32(<2 x i64> %a0, <2 x i64> %a1) {
275 ; X32-LABEL: test_mm_sign_epi32:
277 ; X32-NEXT: psignd %xmm1, %xmm0
280 ; X64-LABEL: test_mm_sign_epi32:
282 ; X64-NEXT: psignd %xmm1, %xmm0
284 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
285 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
286 %call = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
287 %res = bitcast <4 x i32> %call to <2 x i64>
290 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone