1 ; Test memcmp using CLC, with i32 results.
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
5 declare signext i32 @memcmp(i8 *%src1, i8 *%src2, i64 %size)
7 ; Zero-length comparisons should be optimized away.
8 define i32 @f1(i8 *%src1, i8 *%src2) {
12 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 0)
16 ; Check a case where the result is used as an integer.
17 define i32 @f2(i8 *%src1, i8 *%src2) {
19 ; CHECK: clc 0(2,%r2), 0(%r3)
20 ; CHECK: ipm [[REG:%r[0-5]]]
21 ; CHECK: srl [[REG]], 28
22 ; CHECK: rll %r2, [[REG]], 31
24 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 2)
28 ; Check a case where the result is tested for equality.
29 define void @f3(i8 *%src1, i8 *%src2, i32 *%dest) {
31 ; CHECK: clc 0(3,%r2), 0(%r3)
32 ; CHECK-NEXT: je {{\..*}}
34 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 3)
35 %cmp = icmp eq i32 %res, 0
36 br i1 %cmp, label %exit, label %store
39 store i32 0, i32 *%dest
46 ; Check a case where the result is tested for inequality.
47 define void @f4(i8 *%src1, i8 *%src2, i32 *%dest) {
49 ; CHECK: clc 0(4,%r2), 0(%r3)
50 ; CHECK-NEXT: jlh {{\..*}}
53 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 4)
54 %cmp = icmp ne i32 %res, 0
55 br i1 %cmp, label %exit, label %store
58 store i32 0, i32 *%dest
65 ; Check a case where the result is tested via slt.
66 define void @f5(i8 *%src1, i8 *%src2, i32 *%dest) {
68 ; CHECK: clc 0(5,%r2), 0(%r3)
69 ; CHECK-NEXT: jl {{\..*}}
72 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 5)
73 %cmp = icmp slt i32 %res, 0
74 br i1 %cmp, label %exit, label %store
77 store i32 0, i32 *%dest
84 ; Check a case where the result is tested for sgt.
85 define void @f6(i8 *%src1, i8 *%src2, i32 *%dest) {
87 ; CHECK: clc 0(6,%r2), 0(%r3)
88 ; CHECK-NEXT: jh {{\..*}}
91 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 6)
92 %cmp = icmp sgt i32 %res, 0
93 br i1 %cmp, label %exit, label %store
96 store i32 0, i32 *%dest
103 ; Check the upper end of the CLC range. Here the result is used both as
104 ; an integer and for branching.
105 define i32 @f7(i8 *%src1, i8 *%src2, i32 *%dest) {
107 ; CHECK: clc 0(256,%r2), 0(%r3)
108 ; CHECK: ipm [[REG:%r[0-5]]]
109 ; CHECK: srl [[REG]], 28
110 ; CHECK: rll %r2, [[REG]], 31
114 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 256)
115 %cmp = icmp slt i32 %res, 0
116 br i1 %cmp, label %exit, label %store
119 store i32 0, i32 *%dest
126 ; 257 bytes needs two CLCs.
127 define i32 @f8(i8 *%src1, i8 *%src2) {
129 ; CHECK: clc 0(256,%r2), 0(%r3)
130 ; CHECK: jlh [[LABEL:\..*]]
131 ; CHECK: clc 256(1,%r2), 256(%r3)
133 ; CHECK: ipm [[REG:%r[0-5]]]
135 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257)
139 ; Test a comparison of 258 bytes in which the CC result can be used directly.
140 define void @f9(i8 *%src1, i8 *%src2, i32 *%dest) {
142 ; CHECK: clc 0(256,%r2), 0(%r3)
143 ; CHECK: jlh [[LABEL:\..*]]
144 ; CHECK: clc 256(1,%r2), 256(%r3)
149 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257)
150 %cmp = icmp slt i32 %res, 0
151 br i1 %cmp, label %exit, label %store
154 store i32 0, i32 *%dest
161 ; Test the largest size that can use two CLCs.
162 define i32 @f10(i8 *%src1, i8 *%src2) {
164 ; CHECK: clc 0(256,%r2), 0(%r3)
165 ; CHECK: jlh [[LABEL:\..*]]
166 ; CHECK: clc 256(256,%r2), 256(%r3)
168 ; CHECK: ipm [[REG:%r[0-5]]]
170 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 512)
174 ; Test the smallest size that needs 3 CLCs.
175 define i32 @f11(i8 *%src1, i8 *%src2) {
177 ; CHECK: clc 0(256,%r2), 0(%r3)
178 ; CHECK: jlh [[LABEL:\..*]]
179 ; CHECK: clc 256(256,%r2), 256(%r3)
180 ; CHECK: jlh [[LABEL]]
181 ; CHECK: clc 512(1,%r2), 512(%r3)
183 ; CHECK: ipm [[REG:%r[0-5]]]
185 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 513)
189 ; Test the largest size than can use 3 CLCs.
190 define i32 @f12(i8 *%src1, i8 *%src2) {
192 ; CHECK: clc 0(256,%r2), 0(%r3)
193 ; CHECK: jlh [[LABEL:\..*]]
194 ; CHECK: clc 256(256,%r2), 256(%r3)
195 ; CHECK: jlh [[LABEL]]
196 ; CHECK: clc 512(256,%r2), 512(%r3)
198 ; CHECK: ipm [[REG:%r[0-5]]]
200 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 768)
204 ; The next size up uses a loop instead. We leave the more complicated
205 ; loop tests to memcpy-01.ll, which shares the same form.
206 define i32 @f13(i8 *%src1, i8 *%src2) {
208 ; CHECK: lghi [[COUNT:%r[0-5]]], 3
209 ; CHECK: [[LOOP:.L[^:]*]]:
210 ; CHECK: clc 0(256,%r2), 0(%r3)
211 ; CHECK: jlh [[LABEL:\..*]]
212 ; CHECK-DAG: la %r2, 256(%r2)
213 ; CHECK-DAG: la %r3, 256(%r3)
214 ; CHECK: brctg [[COUNT]], [[LOOP]]
215 ; CHECK: clc 0(1,%r2), 0(%r3)
217 ; CHECK: ipm [[REG:%r[0-5]]]
219 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 769)