[IR] Add support for floating pointer atomic loads and stores

author Philip Reames <listmail@philipreames.com>

Wed, 16 Dec 2015 00:49:36 +0000 (00:49 +0000)

committer Philip Reames <listmail@philipreames.com>

Wed, 16 Dec 2015 00:49:36 +0000 (00:49 +0000)
author Philip Reames <listmail@philipreames.com>
Wed, 16 Dec 2015 00:49:36 +0000 (00:49 +0000)
committer Philip Reames <listmail@philipreames.com>
Wed, 16 Dec 2015 00:49:36 +0000 (00:49 +0000)
diff --git a/docs/LangRef.rst b/docs/LangRef.rst

index 72c0a319dfab26546f6ea55987aa405bbb772ccd..f8d87da7061b21e72d6d606404cca98872ee45a6 100644 (file)
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -6844,12 +6844,12 @@ If the ``load`` is marked as ``atomic``, it takes an extra
  ``release`` and ``acq_rel`` orderings are not valid on ``load``
  instructions. Atomic loads produce :ref:`defined <memmodel>` results
  when they may see multiple atomic stores. The type of the pointee must
-be an integer type whose bit width is a power of two greater than or
-equal to eight and less than or equal to a target-specific size limit.
-``align`` must be explicitly specified on atomic loads, and the load has
-undefined behavior if the alignment is not set to a value which is at
-least the size in bytes of the pointee. ``!nontemporal`` does not have
-any defined semantics for atomic loads.
+be an integer or floating point type whose bit width is a power of two,
+greater than or equal to eight, and less than or equal to a 
+target-specific size limit. ``align`` must be explicitly specified on
+atomic loads, and the load has undefined behavior if the alignment is 
+not set to a value which is at least the size in bytes of the pointee.
+``!nontemporal`` does not have any defined semantics for atomic loads.
  
  The optional constant ``align`` argument specifies the alignment of the
  operation (that is, the alignment of the memory address). A value of 0
@@ -6969,12 +6969,13 @@ If the ``store`` is marked as ``atomic``, it takes an extra
  ``acquire`` and ``acq_rel`` orderings aren't valid on ``store``
  instructions. Atomic loads produce :ref:`defined <memmodel>` results
  when they may see multiple atomic stores. The type of the pointee must
-be an integer type whose bit width is a power of two greater than or
-equal to eight and less than or equal to a target-specific size limit.
-``align`` must be explicitly specified on atomic stores, and the store
-has undefined behavior if the alignment is not set to a value which is
-at least the size in bytes of the pointee. ``!nontemporal`` does not
-have any defined semantics for atomic stores.
+be an integer or floating point type whose bit width is a power of two,
+greater than or equal to eight, and less than or equal to a 
+target-specific size limit.  ``align`` must be explicitly specified 
+on atomic stores, and the store has undefined behavior if the alignment
+is not set to a value which is at least the size in bytes of the 
+pointee. ``!nontemporal`` does not have any defined semantics for 
+atomic stores.
  
  The optional constant ``align`` argument specifies the alignment of the
  operation (that is, the alignment of the memory address). A value of 0
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp

index e4b7c5a62780ce919d07f98c3b07fa1e714438ae..d0615aeca143a1c2ea9425d7862ab8bb4b2a5371 100644 (file)
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -8,8 +8,10 @@
  //===----------------------------------------------------------------------===//
  //
  // This file contains a pass (at IR level) to replace atomic instructions with
-// either (intrinsic-based) load-linked/store-conditional loops or
-// AtomicCmpXchg.
+// target specific instruction which implement the same semantics in a way
+// which better fits the target backend.  This can include the use of either
+// (intrinsic-based) load-linked/store-conditional loops, AtomicCmpXchg, or
+// type coercions.
  //
  //===----------------------------------------------------------------------===//
  
@@ -46,9 +48,12 @@ namespace {
    private:
      bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
                                 bool IsStore, bool IsLoad);
+    IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
+    LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
      bool tryExpandAtomicLoad(LoadInst *LI);
      bool expandAtomicLoadToLL(LoadInst *LI);
      bool expandAtomicLoadToCmpXchg(LoadInst *LI);
+    StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
      bool expandAtomicStore(StoreInst *SI);
      bool tryExpandAtomicRMW(AtomicRMWInst *AI);
      bool expandAtomicOpToLLSC(
@@ -130,9 +135,27 @@ bool AtomicExpand::runOnFunction(Function &F) {
      }
  
      if (LI) {
+      if (LI->getType()->isFloatingPointTy()) {
+        // TODO: add a TLI hook to control this so that each target can
+        // convert to lowering the original type one at a time.
+        LI = convertAtomicLoadToIntegerType(LI);
+        assert(LI->getType()->isIntegerTy() && "invariant broken");
+        MadeChange = true;
+      }
+      
        MadeChange |= tryExpandAtomicLoad(LI);
-    } else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) {
-      MadeChange |= expandAtomicStore(SI);
+    } else if (SI) {
+      if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
+        // TODO: add a TLI hook to control this so that each target can
+        // convert to lowering the original type one at a time.
+        SI = convertAtomicStoreToIntegerType(SI);
+        assert(SI->getValueOperand()->getType()->isIntegerTy() &&
+               "invariant broken");
+        MadeChange = true;
+      }
+
+      if (TLI->shouldExpandAtomicStoreInIR(SI))
+        MadeChange |= expandAtomicStore(SI);
      } else if (RMWI) {
        // There are two different ways of expanding RMW instructions:
        // - into a load if it is idempotent
@@ -172,6 +195,42 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
    return (LeadingFence || TrailingFence);
  }
  
+/// Get the iX type with the same bitwidth as T.
+IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
+                                                       const DataLayout &DL) {
+  EVT VT = TLI->getValueType(DL, T);
+  unsigned BitWidth = VT.getStoreSizeInBits();
+  assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
+  return IntegerType::get(T->getContext(), BitWidth);
+}
+
+/// Convert an atomic load of a non-integral type to an integer load of the
+/// equivelent bitwidth.  See the function comment on
+/// convertAtomicStoreToIntegerType for background.  
+LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
+  auto *M = LI->getModule();
+  Type *NewTy = getCorrespondingIntegerType(LI->getType(),
+                                            M->getDataLayout());
+
+  IRBuilder<> Builder(LI);
+  
+  Value *Addr = LI->getPointerOperand();
+  Type *PT = PointerType::get(NewTy,
+                              Addr->getType()->getPointerAddressSpace());
+  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+  
+  auto *NewLI = Builder.CreateLoad(NewAddr);
+  NewLI->setAlignment(LI->getAlignment());
+  NewLI->setVolatile(LI->isVolatile());
+  NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope());
+  DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
+  
+  Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
+  LI->replaceAllUsesWith(NewVal);
+  LI->eraseFromParent();
+  return NewLI;
+}
+
  bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
    switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
    case TargetLoweringBase::AtomicExpansionKind::None:
@@ -222,6 +281,35 @@ bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
    return true;
  }
  
+/// Convert an atomic store of a non-integral type to an integer store of the
+/// equivelent bitwidth.  We used to not support floating point or vector
+/// atomics in the IR at all.  The backends learned to deal with the bitcast
+/// idiom because that was the only way of expressing the notion of a atomic
+/// float or vector store.  The long term plan is to teach each backend to
+/// instruction select from the original atomic store, but as a migration
+/// mechanism, we convert back to the old format which the backends understand.
+/// Each backend will need individual work to recognize the new format.
+StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
+  IRBuilder<> Builder(SI);
+  auto *M = SI->getModule();
+  Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
+                                            M->getDataLayout());
+  Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
+  
+  Value *Addr = SI->getPointerOperand();
+  Type *PT = PointerType::get(NewTy,
+                              Addr->getType()->getPointerAddressSpace());
+  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+
+  StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
+  NewSI->setAlignment(SI->getAlignment());
+  NewSI->setVolatile(SI->isVolatile());
+  NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope());
+  DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
+  SI->eraseFromParent();
+  return NewSI;
+}
+
  bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
    // This function is only called on atomic stores that are too large to be
    // atomic if implemented as a native store. So we replace them by an
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp

index 32646cf79996f9ed7178e7e8d039ab841d4a8d03..53d00377469e34506cd8ac1363ae052114ad5b0a 100644 (file)
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -2732,7 +2732,8 @@ void Verifier::visitLoadInst(LoadInst &LI) {
      Assert(LI.getAlignment() != 0,
             "Atomic load must specify explicit alignment", &LI);
      if (!ElTy->isPointerTy()) {
-      Assert(ElTy->isIntegerTy(), "atomic load operand must have integer type!",
+      Assert(ElTy->isIntegerTy() || ElTy->isFloatingPointTy(),
+             "atomic load operand must have integer or floating point type!",
               &LI, ElTy);
        unsigned Size = ElTy->getPrimitiveSizeInBits();
        Assert(Size >= 8 && !(Size & (Size - 1)),
@@ -2761,8 +2762,9 @@ void Verifier::visitStoreInst(StoreInst &SI) {
      Assert(SI.getAlignment() != 0,
             "Atomic store must specify explicit alignment", &SI);
      if (!ElTy->isPointerTy()) {
-      Assert(ElTy->isIntegerTy(),
-             "atomic store operand must have integer type!", &SI, ElTy);
+      Assert(ElTy->isIntegerTy() || ElTy->isFloatingPointTy(),
+             "atomic store operand must have integer or floating point type!",
+             &SI, ElTy);
        unsigned Size = ElTy->getPrimitiveSizeInBits();
        Assert(Size >= 8 && !(Size & (Size - 1)),
               "atomic store operand must be power-of-two byte-sized integer",
diff --git a/test/CodeGen/X86/atomic-non-integer.ll b/test/CodeGen/X86/atomic-non-integer.ll

new file mode 100644 (file)

index 0000000..98fcd96
--- /dev/null
+++ b/test/CodeGen/X86/atomic-non-integer.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck %s
+
+; Note: This test is testing that the lowering for atomics matches what we
+; currently emit for non-atomics + the atomic restriction.  The presence of
+; particular lowering detail in these tests should not be read as requiring
+; that detail for correctness unless it's related to the atomicity itself.
+; (Specifically, there were reviewer questions about the lowering for halfs
+;  and their calling convention which remain unresolved.)
+
+define void @store_half(half* %fptr, half %v) {
+; CHECK-LABEL: @store_half
+; CHECK: movq  %rdi, %rbx
+; CHECK: callq __gnu_f2h_ieee
+; CHECK: movw  %ax, (%rbx)
+  store atomic half %v, half* %fptr unordered, align 2
+  ret void
+}
+
+define void @store_float(float* %fptr, float %v) {
+; CHECK-LABEL: @store_float
+; CHECK: movd  %xmm0, %eax
+; CHECK: movl  %eax, (%rdi)
+  store atomic float %v, float* %fptr unordered, align 4
+  ret void
+}
+
+define void @store_double(double* %fptr, double %v) {
+; CHECK-LABEL: @store_double
+; CHECK: movd  %xmm0, %rax
+; CHECK: movq  %rax, (%rdi)
+  store atomic double %v, double* %fptr unordered, align 8
+  ret void
+}
+
+define void @store_fp128(fp128* %fptr, fp128 %v) {
+; CHECK-LABEL: @store_fp128
+; CHECK: callq __sync_lock_test_and_set_16
+  store atomic fp128 %v, fp128* %fptr unordered, align 16
+  ret void
+}
+
+define half @load_half(half* %fptr) {
+; CHECK-LABEL: @load_half
+; CHECK: movw  (%rdi), %ax
+; CHECK: movzwl        %ax, %edi
+; CHECK: jmp   __gnu_h2f_ieee
+  %v = load atomic half, half* %fptr unordered, align 2
+  ret half %v
+}
+
+define float @load_float(float* %fptr) {
+; CHECK-LABEL: @load_float
+; CHECK: movl  (%rdi), %eax
+; CHECK: movd  %eax, %xmm0
+  %v = load atomic float, float* %fptr unordered, align 4
+  ret float %v
+}
+
+define double @load_double(double* %fptr) {
+; CHECK-LABEL: @load_double
+; CHECK: movq  (%rdi), %rax
+; CHECK: movd  %rax, %xmm0
+  %v = load atomic double, double* %fptr unordered, align 8
+  ret double %v
+}
+
+define fp128 @load_fp128(fp128* %fptr) {
+; CHECK-LABEL: @load_fp128
+; CHECK: callq __sync_val_compare_and_swap_16
+  %v = load atomic fp128, fp128* %fptr unordered, align 16
+  ret fp128 %v
+}
+
+
+; sanity check the seq_cst lowering since that's the 
+; interesting one from an ordering perspective on x86.
+
+define void @store_float_seq_cst(float* %fptr, float %v) {
+; CHECK-LABEL: @store_float_seq_cst
+; CHECK: movd  %xmm0, %eax
+; CHECK: xchgl %eax, (%rdi)
+  store atomic float %v, float* %fptr seq_cst, align 4
+  ret void
+}
+
+define void @store_double_seq_cst(double* %fptr, double %v) {
+; CHECK-LABEL: @store_double_seq_cst
+; CHECK: movd  %xmm0, %rax
+; CHECK: xchgq %rax, (%rdi)
+  store atomic double %v, double* %fptr seq_cst, align 8
+  ret void
+}
+
+define float @load_float_seq_cst(float* %fptr) {
+; CHECK-LABEL: @load_float_seq_cst
+; CHECK: movl  (%rdi), %eax
+; CHECK: movd  %eax, %xmm0
+  %v = load atomic float, float* %fptr seq_cst, align 4
+  ret float %v
+}
+
+define double @load_double_seq_cst(double* %fptr) {
+; CHECK-LABEL: @load_double_seq_cst
+; CHECK: movq  (%rdi), %rax
+; CHECK: movd  %rax, %xmm0
+  %v = load atomic double, double* %fptr seq_cst, align 8
+  ret double %v
+}
diff --git a/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll b/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll

new file mode 100644 (file)

index 0000000..792fb1e
--- /dev/null
+++ b/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
@@ -0,0 +1,82 @@
+; RUN: opt -S %s -atomic-expand -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; This file tests the functions `llvm::convertAtomicLoadToIntegerType` and
+; `llvm::convertAtomicStoreToIntegerType`. If X86 stops using this 
+; functionality, please move this test to a target which still is.
+
+define float @float_load_expand(float* %ptr) {
+; CHECK-LABEL: @float_load_expand
+; CHECK: %1 = bitcast float* %ptr to i32*
+; CHECK: %2 = load atomic i32, i32* %1 unordered, align 4
+; CHECK: %3 = bitcast i32 %2 to float
+; CHECK: ret float %3
+  %res = load atomic float, float* %ptr unordered, align 4
+  ret float %res
+}
+
+define float @float_load_expand_seq_cst(float* %ptr) {
+; CHECK-LABEL: @float_load_expand_seq_cst
+; CHECK: %1 = bitcast float* %ptr to i32*
+; CHECK: %2 = load atomic i32, i32* %1 seq_cst, align 4
+; CHECK: %3 = bitcast i32 %2 to float
+; CHECK: ret float %3
+  %res = load atomic float, float* %ptr seq_cst, align 4
+  ret float %res
+}
+
+define float @float_load_expand_vol(float* %ptr) {
+; CHECK-LABEL: @float_load_expand_vol
+; CHECK: %1 = bitcast float* %ptr to i32*
+; CHECK: %2 = load atomic volatile i32, i32* %1 unordered, align 4
+; CHECK: %3 = bitcast i32 %2 to float
+; CHECK: ret float %3
+  %res = load atomic volatile float, float* %ptr unordered, align 4
+  ret float %res
+}
+
+define float @float_load_expand_addr1(float addrspace(1)* %ptr) {
+; CHECK-LABEL: @float_load_expand_addr1
+; CHECK: %1 = bitcast float addrspace(1)* %ptr to i32 addrspace(1)*
+; CHECK: %2 = load atomic i32, i32 addrspace(1)* %1 unordered, align 4
+; CHECK: %3 = bitcast i32 %2 to float
+; CHECK: ret float %3
+  %res = load atomic float, float addrspace(1)* %ptr unordered, align 4
+  ret float %res
+}
+
+define void @float_store_expand(float* %ptr, float %v) {
+; CHECK-LABEL: @float_store_expand
+; CHECK: %1 = bitcast float %v to i32
+; CHECK: %2 = bitcast float* %ptr to i32*
+; CHECK: store atomic i32 %1, i32* %2 unordered, align 4
+  store atomic float %v, float* %ptr unordered, align 4
+  ret void
+}
+
+define void @float_store_expand_seq_cst(float* %ptr, float %v) {
+; CHECK-LABEL: @float_store_expand_seq_cst
+; CHECK: %1 = bitcast float %v to i32
+; CHECK: %2 = bitcast float* %ptr to i32*
+; CHECK: store atomic i32 %1, i32* %2 seq_cst, align 4
+  store atomic float %v, float* %ptr seq_cst, align 4
+  ret void
+}
+
+define void @float_store_expand_vol(float* %ptr, float %v) {
+; CHECK-LABEL: @float_store_expand_vol
+; CHECK: %1 = bitcast float %v to i32
+; CHECK: %2 = bitcast float* %ptr to i32*
+; CHECK: store atomic volatile i32 %1, i32* %2 unordered, align 4
+  store atomic volatile float %v, float* %ptr unordered, align 4
+  ret void
+}
+
+define void @float_store_expand_addr1(float addrspace(1)* %ptr, float %v) {
+; CHECK-LABEL: @float_store_expand_addr1
+; CHECK: %1 = bitcast float %v to i32
+; CHECK: %2 = bitcast float addrspace(1)* %ptr to i32 addrspace(1)*
+; CHECK: store atomic i32 %1, i32 addrspace(1)* %2 unordered, align 4
+  store atomic float %v, float addrspace(1)* %ptr unordered, align 4
+  ret void
+}
+
diff --git a/test/Verifier/atomics.ll b/test/Verifier/atomics.ll

new file mode 100644 (file)

index 0000000..ee7bf4b
--- /dev/null
+++ b/test/Verifier/atomics.ll
@@ -0,0 +1,14 @@
+; RUN: not opt -verify < %s 2>&1 | FileCheck %s
+
+; CHECK: atomic store operand must have integer or floating point type!
+; CHECK: atomic load operand must have integer or floating point type!
+
+define void @foo(x86_mmx* %P, x86_mmx %v) {
+  store atomic x86_mmx %v, x86_mmx* %P unordered, align 8
+  ret void
+}
+
+define x86_mmx @bar(x86_mmx* %P) {
+  %v = load atomic x86_mmx, x86_mmx* %P unordered, align 8
+  ret x86_mmx %v
+}
author	Philip Reames <listmail@philipreames.com>
	Wed, 16 Dec 2015 00:49:36 +0000 (00:49 +0000)
committer	Philip Reames <listmail@philipreames.com>
	Wed, 16 Dec 2015 00:49:36 +0000 (00:49 +0000)
docs/LangRef.rst		patch \| blob \| history
lib/CodeGen/AtomicExpandPass.cpp		patch \| blob \| history
lib/IR/Verifier.cpp		patch \| blob \| history
test/CodeGen/X86/atomic-non-integer.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll	[new file with mode: 0644]	patch \| blob
test/Verifier/atomics.ll	[new file with mode: 0644]	patch \| blob