1 ; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s --check-prefix X64
2 ; RUN: llc < %s -march=x86 -verify-machineinstrs | FileCheck %s --check-prefix X32
4 ; This file checks that atomic (non-seq_cst) stores of immediate values are
5 ; done in one mov instruction and not 2. More precisely, it makes sure that the
6 ; immediate is not first copied uselessly into a register.
8 ; Similarily, it checks that a binary operation of an immediate with an atomic
9 ; variable that is stored back in that variable is done as a single instruction.
10 ; For example: x.store(42 + x.load(memory_order_acquire), memory_order_release)
11 ; should be just an add instruction, instead of loading x into a register, doing
12 ; an add and storing the result back.
13 ; The binary operations supported are currently add, and, or, xor.
14 ; sub is not supported because they are translated by an addition of the
16 ; Finally, we also check the same kind of pattern for inc/dec
18 ; seq_cst stores are left as (lock) xchgl, but we try to check every other
19 ; attribute at least once.
21 ; Please note that these operations do not require the lock prefix: only
22 ; sequentially consistent stores require this kind of protection on X86.
23 ; And even for seq_cst operations, llvm uses the xchg instruction which has
24 ; an implicit lock prefix, so making it explicit is not required.
26 define void @store_atomic_imm_8(i8* %p) {
27 ; X64-LABEL: store_atomic_imm_8
30 ; X32-LABEL: store_atomic_imm_8
33 store atomic i8 42, i8* %p release, align 1
37 define void @store_atomic_imm_16(i16* %p) {
38 ; X64-LABEL: store_atomic_imm_16
41 ; X32-LABEL: store_atomic_imm_16
44 store atomic i16 42, i16* %p monotonic, align 2
48 define void @store_atomic_imm_32(i32* %p) {
49 ; X64-LABEL: store_atomic_imm_32
52 ; On 32 bits, there is an extra movl for each of those functions
53 ; (probably for alignment reasons).
54 ; X32-LABEL: store_atomic_imm_32
55 ; X32: movl 4(%esp), %eax
58 store atomic i32 42, i32* %p release, align 4
62 define void @store_atomic_imm_64(i64* %p) {
63 ; X64-LABEL: store_atomic_imm_64
66 ; These are implemented with a CAS loop on 32 bit architectures, and thus
67 ; cannot be optimized in the same way as the others.
68 ; X32-LABEL: store_atomic_imm_64
70 store atomic i64 42, i64* %p release, align 8
74 ; If an immediate is too big to fit in 32 bits, it cannot be store in one mov,
75 ; even on X64, one must use movabsq that can only target a register.
76 define void @store_atomic_imm_64_big(i64* %p) {
77 ; X64-LABEL: store_atomic_imm_64_big
80 store atomic i64 100000000000, i64* %p monotonic, align 8
84 ; It would be incorrect to replace a lock xchgl by a movl
85 define void @store_atomic_imm_32_seq_cst(i32* %p) {
86 ; X64-LABEL: store_atomic_imm_32_seq_cst
88 ; X32-LABEL: store_atomic_imm_32_seq_cst
90 store atomic i32 42, i32* %p seq_cst, align 4
96 define void @add_8(i8* %p) {
105 %1 = load atomic i8* %p seq_cst, align 1
107 store atomic i8 %2, i8* %p release, align 1
111 define void @add_16(i16* %p) {
112 ; Currently the transformation is not done on 16 bit accesses, as the backend
113 ; treat 16 bit arithmetic as expensive on X86/X86_64.
118 %1 = load atomic i16* %p acquire, align 2
120 store atomic i16 %2, i16* %p release, align 2
124 define void @add_32(i32* %p) {
133 %1 = load atomic i32* %p acquire, align 4
135 store atomic i32 %2, i32* %p monotonic, align 4
139 define void @add_64(i64* %p) {
144 ; We do not check X86-32 as it cannot do 'addq'.
146 %1 = load atomic i64* %p acquire, align 8
148 store atomic i64 %2, i64* %p release, align 8
152 define void @add_32_seq_cst(i32* %p) {
153 ; X64-LABEL: add_32_seq_cst
155 ; X32-LABEL: add_32_seq_cst
157 %1 = load atomic i32* %p monotonic, align 4
159 store atomic i32 %2, i32* %p seq_cst, align 4
165 define void @and_8(i8* %p) {
174 %1 = load atomic i8* %p monotonic, align 1
176 store atomic i8 %2, i8* %p release, align 1
180 define void @and_16(i16* %p) {
181 ; Currently the transformation is not done on 16 bit accesses, as the backend
182 ; treat 16 bit arithmetic as expensive on X86/X86_64.
187 %1 = load atomic i16* %p acquire, align 2
189 store atomic i16 %2, i16* %p release, align 2
193 define void @and_32(i32* %p) {
202 %1 = load atomic i32* %p acquire, align 4
204 store atomic i32 %2, i32* %p release, align 4
208 define void @and_64(i64* %p) {
213 ; We do not check X86-32 as it cannot do 'andq'.
215 %1 = load atomic i64* %p acquire, align 8
217 store atomic i64 %2, i64* %p release, align 8
221 define void @and_32_seq_cst(i32* %p) {
222 ; X64-LABEL: and_32_seq_cst
224 ; X32-LABEL: and_32_seq_cst
226 %1 = load atomic i32* %p monotonic, align 4
228 store atomic i32 %2, i32* %p seq_cst, align 4
234 define void @or_8(i8* %p) {
243 %1 = load atomic i8* %p acquire, align 1
245 store atomic i8 %2, i8* %p release, align 1
249 define void @or_16(i16* %p) {
254 %1 = load atomic i16* %p acquire, align 2
256 store atomic i16 %2, i16* %p release, align 2
260 define void @or_32(i32* %p) {
269 %1 = load atomic i32* %p acquire, align 4
271 store atomic i32 %2, i32* %p release, align 4
275 define void @or_64(i64* %p) {
280 ; We do not check X86-32 as it cannot do 'orq'.
282 %1 = load atomic i64* %p acquire, align 8
284 store atomic i64 %2, i64* %p release, align 8
288 define void @or_32_seq_cst(i32* %p) {
289 ; X64-LABEL: or_32_seq_cst
291 ; X32-LABEL: or_32_seq_cst
293 %1 = load atomic i32* %p monotonic, align 4
295 store atomic i32 %2, i32* %p seq_cst, align 4
301 define void @xor_8(i8* %p) {
310 %1 = load atomic i8* %p acquire, align 1
312 store atomic i8 %2, i8* %p release, align 1
316 define void @xor_16(i16* %p) {
321 %1 = load atomic i16* %p acquire, align 2
323 store atomic i16 %2, i16* %p release, align 2
327 define void @xor_32(i32* %p) {
336 %1 = load atomic i32* %p acquire, align 4
338 store atomic i32 %2, i32* %p release, align 4
342 define void @xor_64(i64* %p) {
347 ; We do not check X86-32 as it cannot do 'xorq'.
349 %1 = load atomic i64* %p acquire, align 8
351 store atomic i64 %2, i64* %p release, align 8
355 define void @xor_32_seq_cst(i32* %p) {
356 ; X64-LABEL: xor_32_seq_cst
358 ; X32-LABEL: xor_32_seq_cst
360 %1 = load atomic i32* %p monotonic, align 4
362 store atomic i32 %2, i32* %p seq_cst, align 4
368 define void @inc_8(i8* %p) {
377 %1 = load atomic i8* %p seq_cst, align 1
379 store atomic i8 %2, i8* %p release, align 1
383 define void @inc_16(i16* %p) {
384 ; Currently the transformation is not done on 16 bit accesses, as the backend
385 ; treat 16 bit arithmetic as expensive on X86/X86_64.
390 %1 = load atomic i16* %p acquire, align 2
392 store atomic i16 %2, i16* %p release, align 2
396 define void @inc_32(i32* %p) {
405 %1 = load atomic i32* %p acquire, align 4
407 store atomic i32 %2, i32* %p monotonic, align 4
411 define void @inc_64(i64* %p) {
416 ; We do not check X86-32 as it cannot do 'incq'.
418 %1 = load atomic i64* %p acquire, align 8
420 store atomic i64 %2, i64* %p release, align 8
424 define void @inc_32_seq_cst(i32* %p) {
425 ; X64-LABEL: inc_32_seq_cst
427 ; X32-LABEL: inc_32_seq_cst
429 %1 = load atomic i32* %p monotonic, align 4
431 store atomic i32 %2, i32* %p seq_cst, align 4
437 define void @dec_8(i8* %p) {
446 %1 = load atomic i8* %p seq_cst, align 1
448 store atomic i8 %2, i8* %p release, align 1
452 define void @dec_16(i16* %p) {
453 ; Currently the transformation is not done on 16 bit accesses, as the backend
454 ; treat 16 bit arithmetic as expensive on X86/X86_64.
459 %1 = load atomic i16* %p acquire, align 2
461 store atomic i16 %2, i16* %p release, align 2
465 define void @dec_32(i32* %p) {
474 %1 = load atomic i32* %p acquire, align 4
476 store atomic i32 %2, i32* %p monotonic, align 4
480 define void @dec_64(i64* %p) {
485 ; We do not check X86-32 as it cannot do 'decq'.
487 %1 = load atomic i64* %p acquire, align 8
489 store atomic i64 %2, i64* %p release, align 8
493 define void @dec_32_seq_cst(i32* %p) {
494 ; X64-LABEL: dec_32_seq_cst
496 ; X32-LABEL: dec_32_seq_cst
498 %1 = load atomic i32* %p monotonic, align 4
500 store atomic i32 %2, i32* %p seq_cst, align 4