1 ; RUN: opt < %s -instcombine -S | FileCheck %s
3 ; We should optimize these two redundant insertqi into one
4 define <2 x i64> @testInsertTwice(<2 x i64> %v, <2 x i64> %i) {
5 ; CHECK-LABEL: @testInsertTwice
6 ; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 32)
7 ; CHECK-NEXT: ret <2 x i64> %1
8 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 32)
9 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 32)
13 ; The result of this insert is the second arg, since the top 64 bits of
14 ; the result are undefined, and we copy the bottom 64 bits from the
16 define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
17 ; CHECK-LABEL: @testInsert64Bits
18 ; CHECK-NEXT: ret <2 x i64> %i
19 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
23 ; Test the several types of ranges and ordering that exist for two insertqi
24 define <2 x i64> @testInsertContainedRange(<2 x i64> %v, <2 x i64> %i) {
25 ; CHECK-LABEL: @testInsertContainedRange
26 ; CHECK: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
27 ; CHECK: ret <2 x i64> %1
28 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
29 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 16)
33 define <2 x i64> @testInsertContainedRange_2(<2 x i64> %v, <2 x i64> %i) {
34 ; CHECK-LABEL: @testInsertContainedRange_2
35 ; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
36 ; CHECK-NEXT: ret <2 x i64> %1
37 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 16)
38 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
42 define <2 x i64> @testInsertOverlappingRange(<2 x i64> %v, <2 x i64> %i) {
43 ; CHECK-LABEL: @testInsertOverlappingRange
44 ; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
45 ; CHECK-NEXT: ret <2 x i64> %1
46 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
47 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 16)
51 define <2 x i64> @testInsertOverlappingRange_2(<2 x i64> %v, <2 x i64> %i) {
52 ; CHECK-LABEL: @testInsertOverlappingRange_2
53 ; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
54 ; CHECK-NEXT: ret <2 x i64> %1
55 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 16)
56 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
60 define <2 x i64> @testInsertAdjacentRange(<2 x i64> %v, <2 x i64> %i) {
61 ; CHECK-LABEL: @testInsertAdjacentRange
62 ; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
63 ; CHECK-NEXT: ret <2 x i64> %1
64 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
65 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
69 define <2 x i64> @testInsertAdjacentRange_2(<2 x i64> %v, <2 x i64> %i) {
70 ; CHECK-LABEL: @testInsertAdjacentRange_2
71 ; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
72 ; CHECK-NEXT: ret <2 x i64> %1
73 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 32)
74 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
78 define <2 x i64> @testInsertDisjointRange(<2 x i64> %v, <2 x i64> %i) {
79 ; CHECK-LABEL: @testInsertDisjointRange
80 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
81 ; CHECK-NEXT: %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
82 ; CHECK-NEXT: ret <2 x i64> %2
83 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
84 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
88 define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i) {
89 ; CHECK-LABEL: @testInsertDisjointRange_2
90 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
91 ; CHECK-NEXT: %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
92 ; CHECK-NEXT: ret <2 x i64> %2
93 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
94 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
98 define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
99 ; CHECK-LABEL: @testZeroLength
100 ; CHECK-NEXT: ret <2 x i64> %i
101 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
105 define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
106 ; CHECK-LABEL: @testUndefinedInsertq_1
107 ; CHECK-NEXT: ret <2 x i64> undef
108 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
112 define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
113 ; CHECK-LABEL: @testUndefinedInsertq_2
114 ; CHECK-NEXT: ret <2 x i64> undef
115 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
119 define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
120 ; CHECK-LABEL: @testUndefinedInsertq_3
121 ; CHECK-NEXT: ret <2 x i64> undef
122 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
127 ; Vector Demanded Bits
130 define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) nounwind uwtable ssp {
131 ; CHECK-LABEL: @test_extrq_arg0
132 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
133 ; CHECK-NEXT: ret <2 x i64> %1
134 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
135 %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind
139 define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) nounwind uwtable ssp {
140 ; CHECK-LABEL: @test_extrq_arg1
141 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
142 ; CHECK-NEXT: ret <2 x i64> %1
143 %1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
144 %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
148 define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) nounwind uwtable ssp {
149 ; CHECK-LABEL: @test_extrq_args01
150 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
151 ; CHECK-NEXT: ret <2 x i64> %1
152 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
153 %2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
154 %3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind
158 define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) nounwind uwtable ssp {
159 ; CHECK-LABEL: @test_extrq_ret
160 ; CHECK-NEXT: ret <2 x i64> undef
161 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
162 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
166 define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) nounwind uwtable ssp {
167 ; CHECK-LABEL: @test_extrqi_arg0
168 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
169 ; CHECK-NEXT: ret <2 x i64> %1
170 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
171 %2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2)
175 define <2 x i64> @test_extrqi_ret(<2 x i64> %x) nounwind uwtable ssp {
176 ; CHECK-LABEL: @test_extrqi_ret
177 ; CHECK-NEXT: ret <2 x i64> undef
178 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind
179 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
183 define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
184 ; CHECK-LABEL: @test_insertq_arg0
185 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
186 ; CHECK-NEXT: ret <2 x i64> %1
187 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
188 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind
192 define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
193 ; CHECK-LABEL: @test_insertq_ret
194 ; CHECK-NEXT: ret <2 x i64> undef
195 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
196 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
200 define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
201 ; CHECK-LABEL: @test_insertqi_arg0
202 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
203 ; CHECK-NEXT: ret <2 x i64> %1
204 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
205 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind
209 define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
210 ; CHECK-LABEL: @test_insertqi_arg1
211 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
212 ; CHECK-NEXT: ret <2 x i64> %1
213 %1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
214 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind
218 define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
219 ; CHECK-LABEL: @test_insertqi_args01
220 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
221 ; CHECK-NEXT: ret <2 x i64> %1
222 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
223 %2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
224 %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind
228 define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) nounwind uwtable ssp {
229 ; CHECK-LABEL: @test_insertqi_ret
230 ; CHECK-NEXT: ret <2 x i64> undef
231 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind
232 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
236 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrq
237 declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind
239 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrqi
240 declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
242 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertq
243 declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind
245 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
246 declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind