1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
3 ; AVX2 Logical Shift Left
5 define <16 x i16> @test_sllw_1(<16 x i16> %InVec) {
7 %shl = shl <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
11 ; CHECK-LABEL: test_sllw_1:
12 ; CHECK-NOT: vpsllw $0, %ymm0, %ymm0
15 define <16 x i16> @test_sllw_2(<16 x i16> %InVec) {
17 %shl = shl <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
21 ; CHECK-LABEL: test_sllw_2:
22 ; CHECK: vpaddw %ymm0, %ymm0, %ymm0
25 define <16 x i16> @test_sllw_3(<16 x i16> %InVec) {
27 %shl = shl <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
31 ; CHECK-LABEL: test_sllw_3:
32 ; CHECK: vpsllw $15, %ymm0, %ymm0
35 define <8 x i32> @test_slld_1(<8 x i32> %InVec) {
37 %shl = shl <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
41 ; CHECK-LABEL: test_slld_1:
42 ; CHECK-NOT: vpslld $0, %ymm0, %ymm0
45 define <8 x i32> @test_slld_2(<8 x i32> %InVec) {
47 %shl = shl <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
51 ; CHECK-LABEL: test_slld_2:
52 ; CHECK: vpaddd %ymm0, %ymm0, %ymm0
55 define <8 x i32> @test_vpslld_var(i32 %shift) {
56 %amt = insertelement <8 x i32> undef, i32 %shift, i32 0
57 %tmp = shl <8 x i32> <i32 192, i32 193, i32 194, i32 195, i32 196, i32 197, i32 198, i32 199>, %amt
61 ; CHECK-LABEL: test_vpslld_var:
62 ; CHECK: vpslld %xmm0, %ymm1, %ymm0
65 define <8 x i32> @test_slld_3(<8 x i32> %InVec) {
67 %shl = shl <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
71 ; CHECK-LABEL: test_slld_3:
72 ; CHECK: vpslld $31, %ymm0, %ymm0
75 define <4 x i64> @test_sllq_1(<4 x i64> %InVec) {
77 %shl = shl <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
81 ; CHECK-LABEL: test_sllq_1:
82 ; CHECK-NOT: vpsllq $0, %ymm0, %ymm0
85 define <4 x i64> @test_sllq_2(<4 x i64> %InVec) {
87 %shl = shl <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
91 ; CHECK-LABEL: test_sllq_2:
92 ; CHECK: vpaddq %ymm0, %ymm0, %ymm0
95 define <4 x i64> @test_sllq_3(<4 x i64> %InVec) {
97 %shl = shl <4 x i64> %InVec, <i64 63, i64 63, i64 63, i64 63>
101 ; CHECK-LABEL: test_sllq_3:
102 ; CHECK: vpsllq $63, %ymm0, %ymm0
105 ; AVX2 Arithmetic Shift
107 define <16 x i16> @test_sraw_1(<16 x i16> %InVec) {
109 %shl = ashr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
113 ; CHECK-LABEL: test_sraw_1:
114 ; CHECK-NOT: vpsraw $0, %ymm0, %ymm0
117 define <16 x i16> @test_sraw_2(<16 x i16> %InVec) {
119 %shl = ashr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
123 ; CHECK-LABEL: test_sraw_2:
124 ; CHECK: vpsraw $1, %ymm0, %ymm0
127 define <16 x i16> @test_sraw_3(<16 x i16> %InVec) {
129 %shl = ashr <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
133 ; CHECK-LABEL: test_sraw_3:
134 ; CHECK: vpsraw $15, %ymm0, %ymm0
137 define <8 x i32> @test_srad_1(<8 x i32> %InVec) {
139 %shl = ashr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
143 ; CHECK-LABEL: test_srad_1:
144 ; CHECK-NOT: vpsrad $0, %ymm0, %ymm0
147 define <8 x i32> @test_srad_2(<8 x i32> %InVec) {
149 %shl = ashr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
153 ; CHECK-LABEL: test_srad_2:
154 ; CHECK: vpsrad $1, %ymm0, %ymm0
157 define <8 x i32> @test_srad_3(<8 x i32> %InVec) {
159 %shl = ashr <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
163 ; CHECK-LABEL: test_srad_3:
164 ; CHECK: vpsrad $31, %ymm0, %ymm0
167 ; SSE Logical Shift Right
169 define <16 x i16> @test_srlw_1(<16 x i16> %InVec) {
171 %shl = lshr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
175 ; CHECK-LABEL: test_srlw_1:
176 ; CHECK-NOT: vpsrlw $0, %ymm0, %ymm0
179 define <16 x i16> @test_srlw_2(<16 x i16> %InVec) {
181 %shl = lshr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
185 ; CHECK-LABEL: test_srlw_2:
186 ; CHECK: vpsrlw $1, %ymm0, %ymm0
189 define <16 x i16> @test_srlw_3(<16 x i16> %InVec) {
191 %shl = lshr <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
195 ; CHECK-LABEL: test_srlw_3:
196 ; CHECK: vpsrlw $15, %ymm0, %ymm0
199 define <8 x i32> @test_srld_1(<8 x i32> %InVec) {
201 %shl = lshr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
205 ; CHECK-LABEL: test_srld_1:
206 ; CHECK-NOT: vpsrld $0, %ymm0, %ymm0
209 define <8 x i32> @test_srld_2(<8 x i32> %InVec) {
211 %shl = lshr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
215 ; CHECK-LABEL: test_srld_2:
216 ; CHECK: vpsrld $1, %ymm0, %ymm0
219 define <8 x i32> @test_srld_3(<8 x i32> %InVec) {
221 %shl = lshr <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
225 ; CHECK-LABEL: test_srld_3:
226 ; CHECK: vpsrld $31, %ymm0, %ymm0
229 define <4 x i64> @test_srlq_1(<4 x i64> %InVec) {
231 %shl = lshr <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
235 ; CHECK-LABEL: test_srlq_1:
236 ; CHECK-NOT: vpsrlq $0, %ymm0, %ymm0
239 define <4 x i64> @test_srlq_2(<4 x i64> %InVec) {
241 %shl = lshr <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
245 ; CHECK-LABEL: test_srlq_2:
246 ; CHECK: vpsrlq $1, %ymm0, %ymm0
249 define <4 x i64> @test_srlq_3(<4 x i64> %InVec) {
251 %shl = lshr <4 x i64> %InVec, <i64 63, i64 63, i64 63, i64 63>
255 ; CHECK-LABEL: test_srlq_3:
256 ; CHECK: vpsrlq $63, %ymm0, %ymm0
259 ; CHECK-LABEL: @srl_trunc_and_v4i64
261 ; CHECK-NEXT: vpsrlvd
263 define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
264 %and = and <4 x i64> %y, <i64 8, i64 8, i64 8, i64 8>
265 %trunc = trunc <4 x i64> %and to <4 x i32>
266 %sra = lshr <4 x i32> %x, %trunc