1 ; RUN: llc -march=x86-64 -mcpu=core2 -mattr=+sse4.1 < %s | FileCheck %s -check-prefix=SSE41
2 ; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE
3 ; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX
5 define <4 x i32> @test1(<4 x i32> %a) {
6 %div = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
30 define <8 x i32> @test2(<8 x i32> %a) {
31 %div = udiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
46 define <8 x i16> @test3(<8 x i16> %a) {
47 %div = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
65 define <16 x i16> @test4(<16 x i16> %a) {
66 %div = udiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7>
78 define <8 x i16> @test5(<8 x i16> %a) {
79 %div = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
95 define <16 x i16> @test6(<16 x i16> %a) {
96 %div = sdiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7>
107 define <16 x i8> @test7(<16 x i8> %a) {
108 %div = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
112 ; SSE41-LABEL: test7:
118 define <4 x i32> @test8(<4 x i32> %a) {
119 %div = sdiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
122 ; SSE41-LABEL: test8:
160 define <8 x i32> @test9(<8 x i32> %a) {
161 %div = sdiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
178 define <8 x i32> @test10(<8 x i32> %a) {
179 %rem = urem <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
195 define <8 x i32> @test11(<8 x i32> %a) {
196 %rem = srem <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
214 define <2 x i16> @test12() {
215 %I8 = insertelement <2 x i16> zeroinitializer, i16 -1, i32 0
216 %I9 = insertelement <2 x i16> %I8, i16 -1, i32 1
217 %B9 = urem <2 x i16> %I9, %I9
224 define <4 x i32> @PR20355(<4 x i32> %a) {
225 ; SSE-LABEL: PR20355:
226 ; SSE: movdqa {{.*}}, %[[X1:xmm[0-9]+]]
227 ; SSE-NEXT: movdqa %[[X1]], %[[X2:xmm[0-9]+]]
228 ; SSE-NEXT: psrad $31, %[[X2]]
229 ; SSE-NEXT: pand %xmm0, %[[X2]]
230 ; SSE-NEXT: movdqa %xmm0, %[[X3:xmm[0-9]+]]
231 ; SSE-NEXT: psrad $31, %[[X3]]
232 ; SSE-NEXT: pand %[[X1]], %[[X3]]
233 ; SSE-NEXT: paddd %[[X2]], %[[X3]]
234 ; SSE-NEXT: pshufd {{.*}} # [[X4:xmm[0-9]+]] = xmm0[1,0,3,0]
235 ; SSE-NEXT: pmuludq %[[X1]], %xmm0
236 ; SSE-NEXT: pshufd {{.*}} # [[X1]] = [[X1]][1,0,3,0]
237 ; SSE-NEXT: pmuludq %[[X4]], %[[X1]]
238 ; SSE-NEXT: shufps {{.*}} # xmm0 = xmm0[1,3],[[X1]][1,3]
239 ; SSE-NEXT: pshufd {{.*}} # [[X5:xmm[0-9]+]] = xmm0[0,2,1,3]
240 ; SSE-NEXT: psubd %[[X3]], %[[X5]]
241 ; SSE-NEXT: movdqa %[[X5]], %xmm0
242 ; SSE-NEXT: psrld $31, %xmm0
243 ; SSE-NEXT: paddd %[[X5]], %xmm0
246 ; SSE41-LABEL: PR20355:
247 ; SSE41: movdqa {{.*}}, %[[X1:xmm[0-9]+]]
248 ; SSE41-NEXT: pshufd {{.*}} # [[X2:xmm[0-9]+]] = xmm0[1,0,3,0]
249 ; SSE41-NEXT: pmuldq %[[X1]], %xmm0
250 ; SSE41-NEXT: pshufd {{.*}} # [[X1]] = [[X1]][1,0,3,0]
251 ; SSE41-NEXT: pmuldq %[[X2]], %[[X1]]
252 ; SSE41-NEXT: shufps {{.*}} # xmm0 = xmm0[1,3],[[X1]][1,3]
253 ; SSE41-NEXT: pshufd {{.*}} # [[X3:xmm[0-9]+]] = xmm0[0,2,1,3]
254 ; SSE41-NEXT: movdqa %[[X3]], %xmm0
255 ; SSE41-NEXT: psrld $31, %xmm0
256 ; SSE41-NEXT: paddd %[[X3]], %xmm0
259 %sdiv = sdiv <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>