test/CodeGen/X86/atomic_mi.ll

   1 ; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s --check-prefix X64
   2 ; RUN: llc < %s -march=x86 -verify-machineinstrs | FileCheck %s --check-prefix X32
   3
   4 ; This file checks that atomic (non-seq_cst) stores of immediate values are
   5 ; done in one mov instruction and not 2. More precisely, it makes sure that the
   6 ; immediate is not first copied uselessly into a register.
   7
   8 ; Similarily, it checks that a binary operation of an immediate with an atomic
   9 ; variable that is stored back in that variable is done as a single instruction.
  10 ; For example: x.store(42 + x.load(memory_order_acquire), memory_order_release)
  11 ; should be just an add instruction, instead of loading x into a register, doing
  12 ; an add and storing the result back.
  13 ; The binary operations supported are currently add, and, or, xor.
  14 ; sub is not supported because they are translated by an addition of the
  15 ; negated immediate.
  16 ; Finally, we also check the same kind of pattern for inc/dec
  17
  18 ; seq_cst stores are left as (lock) xchgl, but we try to check every other
  19 ; attribute at least once.
  20
  21 ; Please note that these operations do not require the lock prefix: only
  22 ; sequentially consistent stores require this kind of protection on X86.
  23 ; And even for seq_cst operations, llvm uses the xchg instruction which has
  24 ; an implicit lock prefix, so making it explicit is not required.
  25
  26 define void @store_atomic_imm_8(i8* %p) {
  27 ; X64-LABEL: store_atomic_imm_8
  28 ; X64: movb
  29 ; X64-NOT: movb
  30 ; X32-LABEL: store_atomic_imm_8
  31 ; X32: movb
  32 ; X32-NOT: movb
  33   store atomic i8 42, i8* %p release, align 1
  34   ret void
  35 }
  36
  37 define void @store_atomic_imm_16(i16* %p) {
  38 ; X64-LABEL: store_atomic_imm_16
  39 ; X64: movw
  40 ; X64-NOT: movw
  41 ; X32-LABEL: store_atomic_imm_16
  42 ; X32: movw
  43 ; X32-NOT: movw
  44   store atomic i16 42, i16* %p monotonic, align 2
  45   ret void
  46 }
  47
  48 define void @store_atomic_imm_32(i32* %p) {
  49 ; X64-LABEL: store_atomic_imm_32
  50 ; X64: movl
  51 ; X64-NOT: movl
  52 ;   On 32 bits, there is an extra movl for each of those functions
  53 ;   (probably for alignment reasons).
  54 ; X32-LABEL: store_atomic_imm_32
  55 ; X32: movl 4(%esp), %eax
  56 ; X32: movl
  57 ; X32-NOT: movl
  58   store atomic i32 42, i32* %p release, align 4
  59   ret void
  60 }
  61
  62 define void @store_atomic_imm_64(i64* %p) {
  63 ; X64-LABEL: store_atomic_imm_64
  64 ; X64: movq
  65 ; X64-NOT: movq
  66 ;   These are implemented with a CAS loop on 32 bit architectures, and thus
  67 ;   cannot be optimized in the same way as the others.
  68 ; X32-LABEL: store_atomic_imm_64
  69 ; X32: cmpxchg8b
  70   store atomic i64 42, i64* %p release, align 8
  71   ret void
  72 }
  73
  74 ; If an immediate is too big to fit in 32 bits, it cannot be store in one mov,
  75 ; even on X64, one must use movabsq that can only target a register.
  76 define void @store_atomic_imm_64_big(i64* %p) {
  77 ; X64-LABEL: store_atomic_imm_64_big
  78 ; X64: movabsq
  79 ; X64: movq
  80   store atomic i64 100000000000, i64* %p monotonic, align 8
  81   ret void
  82 }
  83
  84 ; It would be incorrect to replace a lock xchgl by a movl
  85 define void @store_atomic_imm_32_seq_cst(i32* %p) {
  86 ; X64-LABEL: store_atomic_imm_32_seq_cst
  87 ; X64: xchgl
  88 ; X32-LABEL: store_atomic_imm_32_seq_cst
  89 ; X32: xchgl
  90   store atomic i32 42, i32* %p seq_cst, align 4
  91   ret void
  92 }
  93
  94 ; ----- ADD -----
  95
  96 define void @add_8(i8* %p) {
  97 ; X64-LABEL: add_8
  98 ; X64-NOT: lock
  99 ; X64: addb
 100 ; X64-NOT: movb
 101 ; X32-LABEL: add_8
 102 ; X32-NOT: lock
 103 ; X32: addb
 104 ; X32-NOT: movb
 105   %1 = load atomic i8* %p seq_cst, align 1
 106   %2 = add i8 %1, 2
 107   store atomic i8 %2, i8* %p release, align 1
 108   ret void
 109 }
 110
 111 define void @add_16(i16* %p) {
 112 ;   Currently the transformation is not done on 16 bit accesses, as the backend
 113 ;   treat 16 bit arithmetic as expensive on X86/X86_64.
 114 ; X64-LABEL: add_16
 115 ; X64-NOT: addw
 116 ; X32-LABEL: add_16
 117 ; X32-NOT: addw
 118   %1 = load atomic i16* %p acquire, align 2
 119   %2 = add i16 %1, 2
 120   store atomic i16 %2, i16* %p release, align 2
 121   ret void
 122 }
 123
 124 define void @add_32(i32* %p) {
 125 ; X64-LABEL: add_32
 126 ; X64-NOT: lock
 127 ; X64: addl
 128 ; X64-NOT: movl
 129 ; X32-LABEL: add_32
 130 ; X32-NOT: lock
 131 ; X32: addl
 132 ; X32-NOT: movl
 133   %1 = load atomic i32* %p acquire, align 4
 134   %2 = add i32 %1, 2
 135   store atomic i32 %2, i32* %p monotonic, align 4
 136   ret void
 137 }
 138
 139 define void @add_64(i64* %p) {
 140 ; X64-LABEL: add_64
 141 ; X64-NOT: lock
 142 ; X64: addq
 143 ; X64-NOT: movq
 144 ;   We do not check X86-32 as it cannot do 'addq'.
 145 ; X32-LABEL: add_64
 146   %1 = load atomic i64* %p acquire, align 8
 147   %2 = add i64 %1, 2
 148   store atomic i64 %2, i64* %p release, align 8
 149   ret void
 150 }
 151
 152 define void @add_32_seq_cst(i32* %p) {
 153 ; X64-LABEL: add_32_seq_cst
 154 ; X64: xchgl
 155 ; X32-LABEL: add_32_seq_cst
 156 ; X32: xchgl
 157   %1 = load atomic i32* %p monotonic, align 4
 158   %2 = add i32 %1, 2
 159   store atomic i32 %2, i32* %p seq_cst, align 4
 160   ret void
 161 }
 162
 163 ; ----- AND -----
 164
 165 define void @and_8(i8* %p) {
 166 ; X64-LABEL: and_8
 167 ; X64-NOT: lock
 168 ; X64: andb
 169 ; X64-NOT: movb
 170 ; X32-LABEL: and_8
 171 ; X32-NOT: lock
 172 ; X32: andb
 173 ; X32-NOT: movb
 174   %1 = load atomic i8* %p monotonic, align 1
 175   %2 = and i8 %1, 2
 176   store atomic i8 %2, i8* %p release, align 1
 177   ret void
 178 }
 179
 180 define void @and_16(i16* %p) {
 181 ;   Currently the transformation is not done on 16 bit accesses, as the backend
 182 ;   treat 16 bit arithmetic as expensive on X86/X86_64.
 183 ; X64-LABEL: and_16
 184 ; X64-NOT: andw
 185 ; X32-LABEL: and_16
 186 ; X32-NOT: andw
 187   %1 = load atomic i16* %p acquire, align 2
 188   %2 = and i16 %1, 2
 189   store atomic i16 %2, i16* %p release, align 2
 190   ret void
 191 }
 192
 193 define void @and_32(i32* %p) {
 194 ; X64-LABEL: and_32
 195 ; X64-NOT: lock
 196 ; X64: andl
 197 ; X64-NOT: movl
 198 ; X32-LABEL: and_32
 199 ; X32-NOT: lock
 200 ; X32: andl
 201 ; X32-NOT: movl
 202   %1 = load atomic i32* %p acquire, align 4
 203   %2 = and i32 %1, 2
 204   store atomic i32 %2, i32* %p release, align 4
 205   ret void
 206 }
 207
 208 define void @and_64(i64* %p) {
 209 ; X64-LABEL: and_64
 210 ; X64-NOT: lock
 211 ; X64: andq
 212 ; X64-NOT: movq
 213 ;   We do not check X86-32 as it cannot do 'andq'.
 214 ; X32-LABEL: and_64
 215   %1 = load atomic i64* %p acquire, align 8
 216   %2 = and i64 %1, 2
 217   store atomic i64 %2, i64* %p release, align 8
 218   ret void
 219 }
 220
 221 define void @and_32_seq_cst(i32* %p) {
 222 ; X64-LABEL: and_32_seq_cst
 223 ; X64: xchgl
 224 ; X32-LABEL: and_32_seq_cst
 225 ; X32: xchgl
 226   %1 = load atomic i32* %p monotonic, align 4
 227   %2 = and i32 %1, 2
 228   store atomic i32 %2, i32* %p seq_cst, align 4
 229   ret void
 230 }
 231
 232 ; ----- OR -----
 233
 234 define void @or_8(i8* %p) {
 235 ; X64-LABEL: or_8
 236 ; X64-NOT: lock
 237 ; X64: orb
 238 ; X64-NOT: movb
 239 ; X32-LABEL: or_8
 240 ; X32-NOT: lock
 241 ; X32: orb
 242 ; X32-NOT: movb
 243   %1 = load atomic i8* %p acquire, align 1
 244   %2 = or i8 %1, 2
 245   store atomic i8 %2, i8* %p release, align 1
 246   ret void
 247 }
 248
 249 define void @or_16(i16* %p) {
 250 ; X64-LABEL: or_16
 251 ; X64-NOT: orw
 252 ; X32-LABEL: or_16
 253 ; X32-NOT: orw
 254   %1 = load atomic i16* %p acquire, align 2
 255   %2 = or i16 %1, 2
 256   store atomic i16 %2, i16* %p release, align 2
 257   ret void
 258 }
 259
 260 define void @or_32(i32* %p) {
 261 ; X64-LABEL: or_32
 262 ; X64-NOT: lock
 263 ; X64: orl
 264 ; X64-NOT: movl
 265 ; X32-LABEL: or_32
 266 ; X32-NOT: lock
 267 ; X32: orl
 268 ; X32-NOT: movl
 269   %1 = load atomic i32* %p acquire, align 4
 270   %2 = or i32 %1, 2
 271   store atomic i32 %2, i32* %p release, align 4
 272   ret void
 273 }
 274
 275 define void @or_64(i64* %p) {
 276 ; X64-LABEL: or_64
 277 ; X64-NOT: lock
 278 ; X64: orq
 279 ; X64-NOT: movq
 280 ;   We do not check X86-32 as it cannot do 'orq'.
 281 ; X32-LABEL: or_64
 282   %1 = load atomic i64* %p acquire, align 8
 283   %2 = or i64 %1, 2
 284   store atomic i64 %2, i64* %p release, align 8
 285   ret void
 286 }
 287
 288 define void @or_32_seq_cst(i32* %p) {
 289 ; X64-LABEL: or_32_seq_cst
 290 ; X64: xchgl
 291 ; X32-LABEL: or_32_seq_cst
 292 ; X32: xchgl
 293   %1 = load atomic i32* %p monotonic, align 4
 294   %2 = or i32 %1, 2
 295   store atomic i32 %2, i32* %p seq_cst, align 4
 296   ret void
 297 }
 298
 299 ; ----- XOR -----
 300
 301 define void @xor_8(i8* %p) {
 302 ; X64-LABEL: xor_8
 303 ; X64-NOT: lock
 304 ; X64: xorb
 305 ; X64-NOT: movb
 306 ; X32-LABEL: xor_8
 307 ; X32-NOT: lock
 308 ; X32: xorb
 309 ; X32-NOT: movb
 310   %1 = load atomic i8* %p acquire, align 1
 311   %2 = xor i8 %1, 2
 312   store atomic i8 %2, i8* %p release, align 1
 313   ret void
 314 }
 315
 316 define void @xor_16(i16* %p) {
 317 ; X64-LABEL: xor_16
 318 ; X64-NOT: xorw
 319 ; X32-LABEL: xor_16
 320 ; X32-NOT: xorw
 321   %1 = load atomic i16* %p acquire, align 2
 322   %2 = xor i16 %1, 2
 323   store atomic i16 %2, i16* %p release, align 2
 324   ret void
 325 }
 326
 327 define void @xor_32(i32* %p) {
 328 ; X64-LABEL: xor_32
 329 ; X64-NOT: lock
 330 ; X64: xorl
 331 ; X64-NOT: movl
 332 ; X32-LABEL: xor_32
 333 ; X32-NOT: lock
 334 ; X32: xorl
 335 ; X32-NOT: movl
 336   %1 = load atomic i32* %p acquire, align 4
 337   %2 = xor i32 %1, 2
 338   store atomic i32 %2, i32* %p release, align 4
 339   ret void
 340 }
 341
 342 define void @xor_64(i64* %p) {
 343 ; X64-LABEL: xor_64
 344 ; X64-NOT: lock
 345 ; X64: xorq
 346 ; X64-NOT: movq
 347 ;   We do not check X86-32 as it cannot do 'xorq'.
 348 ; X32-LABEL: xor_64
 349   %1 = load atomic i64* %p acquire, align 8
 350   %2 = xor i64 %1, 2
 351   store atomic i64 %2, i64* %p release, align 8
 352   ret void
 353 }
 354
 355 define void @xor_32_seq_cst(i32* %p) {
 356 ; X64-LABEL: xor_32_seq_cst
 357 ; X64: xchgl
 358 ; X32-LABEL: xor_32_seq_cst
 359 ; X32: xchgl
 360   %1 = load atomic i32* %p monotonic, align 4
 361   %2 = xor i32 %1, 2
 362   store atomic i32 %2, i32* %p seq_cst, align 4
 363   ret void
 364 }
 365
 366 ; ----- INC -----
 367
 368 define void @inc_8(i8* %p) {
 369 ; X64-LABEL: inc_8
 370 ; X64-NOT: lock
 371 ; X64: incb
 372 ; X64-NOT: movb
 373 ; X32-LABEL: inc_8
 374 ; X32-NOT: lock
 375 ; X32: incb
 376 ; X32-NOT: movb
 377   %1 = load atomic i8* %p seq_cst, align 1
 378   %2 = add i8 %1, 1
 379   store atomic i8 %2, i8* %p release, align 1
 380   ret void
 381 }
 382
 383 define void @inc_16(i16* %p) {
 384 ;   Currently the transformation is not done on 16 bit accesses, as the backend
 385 ;   treat 16 bit arithmetic as expensive on X86/X86_64.
 386 ; X64-LABEL: inc_16
 387 ; X64-NOT: incw
 388 ; X32-LABEL: inc_16
 389 ; X32-NOT: incw
 390   %1 = load atomic i16* %p acquire, align 2
 391   %2 = add i16 %1, 1
 392   store atomic i16 %2, i16* %p release, align 2
 393   ret void
 394 }
 395
 396 define void @inc_32(i32* %p) {
 397 ; X64-LABEL: inc_32
 398 ; X64-NOT: lock
 399 ; X64: incl
 400 ; X64-NOT: movl
 401 ; X32-LABEL: inc_32
 402 ; X32-NOT: lock
 403 ; X32: incl
 404 ; X32-NOT: movl
 405   %1 = load atomic i32* %p acquire, align 4
 406   %2 = add i32 %1, 1
 407   store atomic i32 %2, i32* %p monotonic, align 4
 408   ret void
 409 }
 410
 411 define void @inc_64(i64* %p) {
 412 ; X64-LABEL: inc_64
 413 ; X64-NOT: lock
 414 ; X64: incq
 415 ; X64-NOT: movq
 416 ;   We do not check X86-32 as it cannot do 'incq'.
 417 ; X32-LABEL: inc_64
 418   %1 = load atomic i64* %p acquire, align 8
 419   %2 = add i64 %1, 1
 420   store atomic i64 %2, i64* %p release, align 8
 421   ret void
 422 }
 423
 424 define void @inc_32_seq_cst(i32* %p) {
 425 ; X64-LABEL: inc_32_seq_cst
 426 ; X64: xchgl
 427 ; X32-LABEL: inc_32_seq_cst
 428 ; X32: xchgl
 429   %1 = load atomic i32* %p monotonic, align 4
 430   %2 = add i32 %1, 1
 431   store atomic i32 %2, i32* %p seq_cst, align 4
 432   ret void
 433 }
 434
 435 ; ----- DEC -----
 436
 437 define void @dec_8(i8* %p) {
 438 ; X64-LABEL: dec_8
 439 ; X64-NOT: lock
 440 ; X64: decb
 441 ; X64-NOT: movb
 442 ; X32-LABEL: dec_8
 443 ; X32-NOT: lock
 444 ; X32: decb
 445 ; X32-NOT: movb
 446   %1 = load atomic i8* %p seq_cst, align 1
 447   %2 = sub i8 %1, 1
 448   store atomic i8 %2, i8* %p release, align 1
 449   ret void
 450 }
 451
 452 define void @dec_16(i16* %p) {
 453 ;   Currently the transformation is not done on 16 bit accesses, as the backend
 454 ;   treat 16 bit arithmetic as expensive on X86/X86_64.
 455 ; X64-LABEL: dec_16
 456 ; X64-NOT: decw
 457 ; X32-LABEL: dec_16
 458 ; X32-NOT: decw
 459   %1 = load atomic i16* %p acquire, align 2
 460   %2 = sub i16 %1, 1
 461   store atomic i16 %2, i16* %p release, align 2
 462   ret void
 463 }
 464
 465 define void @dec_32(i32* %p) {
 466 ; X64-LABEL: dec_32
 467 ; X64-NOT: lock
 468 ; X64: decl
 469 ; X64-NOT: movl
 470 ; X32-LABEL: dec_32
 471 ; X32-NOT: lock
 472 ; X32: decl
 473 ; X32-NOT: movl
 474   %1 = load atomic i32* %p acquire, align 4
 475   %2 = sub i32 %1, 1
 476   store atomic i32 %2, i32* %p monotonic, align 4
 477   ret void
 478 }
 479
 480 define void @dec_64(i64* %p) {
 481 ; X64-LABEL: dec_64
 482 ; X64-NOT: lock
 483 ; X64: decq
 484 ; X64-NOT: movq
 485 ;   We do not check X86-32 as it cannot do 'decq'.
 486 ; X32-LABEL: dec_64
 487   %1 = load atomic i64* %p acquire, align 8
 488   %2 = sub i64 %1, 1
 489   store atomic i64 %2, i64* %p release, align 8
 490   ret void
 491 }
 492
 493 define void @dec_32_seq_cst(i32* %p) {
 494 ; X64-LABEL: dec_32_seq_cst
 495 ; X64: xchgl
 496 ; X32-LABEL: dec_32_seq_cst
 497 ; X32: xchgl
 498   %1 = load atomic i32* %p monotonic, align 4
 499   %2 = sub i32 %1, 1
 500   store atomic i32 %2, i32* %p seq_cst, align 4
 501   ret void
 502 }