set(sources
X86AsmPrinter.cpp
+ X86AtomicExpandPass.cpp
X86CodeEmitter.cpp
X86FastISel.cpp
X86FloatingPoint.cpp
class JITCodeEmitter;
class X86TargetMachine;
+/// createX86AtomicExpandPass - This pass expands atomic operations that cannot
+/// be handled natively in terms of a loop using cmpxchg.
+FunctionPass *createX86AtomicExpandPass(const X86TargetMachine *TM);
+
/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
///
--- /dev/null
+//===-- X86AtomicExpandPass.cpp - Expand illegal atomic instructions --0---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass (at IR level) to replace atomic instructions which
+// cannot be implemented as a single instruction with cmpxchg-based loops.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86TargetMachine.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-atomic-expand"
+
+namespace {
+ class X86AtomicExpandPass : public FunctionPass {
+ const X86TargetMachine *TM;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit X86AtomicExpandPass(const X86TargetMachine *TM)
+ : FunctionPass(ID), TM(TM) {}
+
+ bool runOnFunction(Function &F) override;
+ bool expandAtomicInsts(Function &F);
+
+ bool needsCmpXchgNb(Type *MemType);
+
+ /// There are four kinds of atomic operations. Two never need expanding:
+ /// cmpxchg is what we expand the others *to*, and loads are easily handled
+ /// by ISelLowering. Atomicrmw and store can need expanding in some
+ /// circumstances.
+ bool shouldExpand(Instruction *Inst);
+
+ /// 128-bit atomic stores (64-bit on i686) need to be implemented in terms
+ /// of trivial cmpxchg16b loops. A simple store isn't necessarily atomic.
+ bool shouldExpandStore(StoreInst *SI);
+
+ /// Only some atomicrmw instructions need expanding -- some operations
+ /// (e.g. max) have absolutely no architectural support; some (e.g. or) have
+ /// limited support but can't return the previous value; some (e.g. add)
+ /// have complete support in the instruction set.
+ ///
+ /// Also, naturally, 128-bit operations always need to be expanded.
+ bool shouldExpandAtomicRMW(AtomicRMWInst *AI);
+
+ bool expandAtomicRMW(AtomicRMWInst *AI);
+ bool expandAtomicStore(StoreInst *SI);
+ };
+}
+
+char X86AtomicExpandPass::ID = 0;
+
+FunctionPass *llvm::createX86AtomicExpandPass(const X86TargetMachine *TM) {
+ return new X86AtomicExpandPass(TM);
+}
+
+bool X86AtomicExpandPass::runOnFunction(Function &F) {
+ SmallVector<Instruction *, 1> AtomicInsts;
+
+ // Changing control-flow while iterating through it is a bad idea, so gather a
+ // list of all atomic instructions before we start.
+ for (BasicBlock &BB : F)
+ for (Instruction &Inst : BB) {
+ if (isa<AtomicRMWInst>(&Inst) ||
+ (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic()))
+ AtomicInsts.push_back(&Inst);
+ }
+
+ bool MadeChange = false;
+ for (Instruction *Inst : AtomicInsts) {
+ if (!shouldExpand(Inst))
+ continue;
+
+ if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
+ MadeChange |= expandAtomicRMW(AI);
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ MadeChange |= expandAtomicStore(SI);
+ }
+
+ return MadeChange;
+}
+
+/// Returns true if operations on the given type will need to use either
+/// cmpxchg8b or cmpxchg16b. This occurs if the type is 1 step up from the
+/// native width, and the instructions are available (otherwise we leave them
+/// alone to become __sync_fetch_and_... calls).
+bool X86AtomicExpandPass::needsCmpXchgNb(llvm::Type *MemType) {
+ const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>();
+ if (!Subtarget.hasCmpxchg16b())
+ return false;
+
+ unsigned CmpXchgNbWidth = Subtarget.is64Bit() ? 128 : 64;
+
+ unsigned OpWidth = MemType->getPrimitiveSizeInBits();
+ if (OpWidth == CmpXchgNbWidth)
+ return true;
+
+ return false;
+}
+
+
+bool X86AtomicExpandPass::shouldExpandAtomicRMW(AtomicRMWInst *AI) {
+ const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>();
+ unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
+
+ if (needsCmpXchgNb(AI->getType()))
+ return true;
+
+ if (AI->getType()->getPrimitiveSizeInBits() > NativeWidth)
+ return false;
+
+ AtomicRMWInst::BinOp Op = AI->getOperation();
+ switch (Op) {
+ default:
+ llvm_unreachable("Unknown atomic operation");
+ case AtomicRMWInst::Xchg:
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ // It's better to use xadd, xsub or xchg for these in all cases.
+ return false;
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::And:
+ case AtomicRMWInst::Xor:
+ // If the atomicrmw's result isn't actually used, we can just add a "lock"
+ // prefix to a normal instruction for these operations.
+ return !AI->use_empty();
+ case AtomicRMWInst::Nand:
+ case AtomicRMWInst::Max:
+ case AtomicRMWInst::Min:
+ case AtomicRMWInst::UMax:
+ case AtomicRMWInst::UMin:
+ // These always require a non-trivial set of data operations on x86. We must
+ // use a cmpxchg loop.
+ return true;
+ }
+}
+
+bool X86AtomicExpandPass::shouldExpandStore(StoreInst *SI) {
+ if (needsCmpXchgNb(SI->getValueOperand()->getType()))
+ return true;
+
+ return false;
+}
+
+bool X86AtomicExpandPass::shouldExpand(Instruction *Inst) {
+ if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
+ return shouldExpandAtomicRMW(AI);
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ return shouldExpandStore(SI);
+ return false;
+}
+
+/// Emit IR to implement the given atomicrmw operation on values in registers,
+/// returning the new value.
+static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
+ Value *Loaded, Value *Inc) {
+ Value *NewVal;
+ switch (Op) {
+ case AtomicRMWInst::Xchg:
+ return Inc;
+ case AtomicRMWInst::Add:
+ return Builder.CreateAdd(Loaded, Inc, "new");
+ case AtomicRMWInst::Sub:
+ return Builder.CreateSub(Loaded, Inc, "new");
+ case AtomicRMWInst::And:
+ return Builder.CreateAnd(Loaded, Inc, "new");
+ case AtomicRMWInst::Nand:
+ return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
+ case AtomicRMWInst::Or:
+ return Builder.CreateOr(Loaded, Inc, "new");
+ case AtomicRMWInst::Xor:
+ return Builder.CreateXor(Loaded, Inc, "new");
+ case AtomicRMWInst::Max:
+ NewVal = Builder.CreateICmpSGT(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::Min:
+ NewVal = Builder.CreateICmpSLE(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::UMax:
+ NewVal = Builder.CreateICmpUGT(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::UMin:
+ NewVal = Builder.CreateICmpULE(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ default:
+ break;
+ }
+ llvm_unreachable("Unknown atomic op");
+}
+
+bool X86AtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
+ AtomicOrdering Order =
+ AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
+ Value *Addr = AI->getPointerOperand();
+ BasicBlock *BB = AI->getParent();
+ Function *F = BB->getParent();
+ LLVMContext &Ctx = F->getContext();
+
+ // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+ //
+ // The standard expansion we produce is:
+ // [...]
+ // %init_loaded = load atomic iN* %addr
+ // br label %loop
+ // loop:
+ // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
+ // %new = some_op iN %loaded, %incr
+ // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
+ // %new_loaded = extractvalue { iN, i1 } %pair, 0
+ // %success = extractvalue { iN, i1 } %pair, 1
+ // br i1 %success, label %atomicrmw.end, label %loop
+ // atomicrmw.end:
+ // [...]
+ BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+ // This grabs the DebugLoc from AI.
+ IRBuilder<> Builder(AI);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we want a load. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ LoadInst *InitLoaded = Builder.CreateLoad(Addr);
+ InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
+ Loaded->addIncoming(InitLoaded, BB);
+
+ Value *NewVal =
+ performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
+
+ Value *Pair = Builder.CreateAtomicCmpXchg(
+ Addr, Loaded, NewVal, Order,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
+ Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
+ Loaded->addIncoming(NewLoaded, LoopBB);
+
+ Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
+ Builder.CreateCondBr(Success, ExitBB, LoopBB);
+
+ AI->replaceAllUsesWith(NewLoaded);
+ AI->eraseFromParent();
+
+ return true;
+}
+
+bool X86AtomicExpandPass::expandAtomicStore(StoreInst *SI) {
+ // An atomic store might need cmpxchg16b (or 8b on x86) to execute. Express
+ // this in terms of the usual expansion to "atomicrmw xchg".
+ IRBuilder<> Builder(SI);
+ AtomicRMWInst *AI =
+ Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
+ SI->getValueOperand(), SI->getOrdering());
+ SI->eraseFromParent();
+
+ // Now we have an appropriate swap instruction, lower it as usual.
+ if (shouldExpandAtomicRMW(AI))
+ return expandAtomicRMW(AI);
+
+ return AI;
+}
return getGlobalBaseReg();
- case X86ISD::ATOMOR64_DAG:
- case X86ISD::ATOMXOR64_DAG:
- case X86ISD::ATOMADD64_DAG:
- case X86ISD::ATOMSUB64_DAG:
- case X86ISD::ATOMNAND64_DAG:
- case X86ISD::ATOMAND64_DAG:
- case X86ISD::ATOMMAX64_DAG:
- case X86ISD::ATOMMIN64_DAG:
- case X86ISD::ATOMUMAX64_DAG:
- case X86ISD::ATOMUMIN64_DAG:
- case X86ISD::ATOMSWAP64_DAG: {
- unsigned Opc;
- switch (Opcode) {
- default: llvm_unreachable("Impossible opcode");
- case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break;
- case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break;
- case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break;
- case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break;
- case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break;
- case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break;
- case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break;
- case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break;
- case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break;
- case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break;
- case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break;
- }
- SDNode *RetVal = SelectAtomic64(Node, Opc);
- if (RetVal)
- return RetVal;
- break;
- }
-
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_AND:
case ISD::ATOMIC_LOAD_OR:
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
}
- if (!Subtarget->is64Bit()) {
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
- }
-
if (Subtarget->hasCmpxchg16b()) {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
}
Results.push_back(Swap.getValue(2));
}
-static void
-ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG, unsigned NewOp) {
- SDLoc dl(Node);
- assert (Node->getValueType(0) == MVT::i64 &&
- "Only know how to expand i64 atomics");
-
- SDValue Chain = Node->getOperand(0);
- SDValue In1 = Node->getOperand(1);
- SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(2), DAG.getIntPtrConstant(0));
- SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(2), DAG.getIntPtrConstant(1));
- SDValue Ops[] = { Chain, In1, In2L, In2H };
- SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
- SDValue Result =
- DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, MVT::i64,
- cast<MemSDNode>(Node)->getMemOperand());
- SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF));
- Results.push_back(Result.getValue(2));
-}
-
/// ReplaceNodeResults - Replace a node with an illegal result type
/// with a new node built out of custom code.
void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(EFLAGS.getValue(1));
return;
}
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_UMAX:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_SWAP: {
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected opcode");
- case ISD::ATOMIC_LOAD_ADD:
- Opc = X86ISD::ATOMADD64_DAG;
- break;
- case ISD::ATOMIC_LOAD_AND:
- Opc = X86ISD::ATOMAND64_DAG;
- break;
- case ISD::ATOMIC_LOAD_NAND:
- Opc = X86ISD::ATOMNAND64_DAG;
- break;
- case ISD::ATOMIC_LOAD_OR:
- Opc = X86ISD::ATOMOR64_DAG;
- break;
- case ISD::ATOMIC_LOAD_SUB:
- Opc = X86ISD::ATOMSUB64_DAG;
- break;
- case ISD::ATOMIC_LOAD_XOR:
- Opc = X86ISD::ATOMXOR64_DAG;
- break;
- case ISD::ATOMIC_LOAD_MAX:
- Opc = X86ISD::ATOMMAX64_DAG;
- break;
- case ISD::ATOMIC_LOAD_MIN:
- Opc = X86ISD::ATOMMIN64_DAG;
- break;
- case ISD::ATOMIC_LOAD_UMAX:
- Opc = X86ISD::ATOMUMAX64_DAG;
- break;
- case ISD::ATOMIC_LOAD_UMIN:
- Opc = X86ISD::ATOMUMIN64_DAG;
- break;
- case ISD::ATOMIC_SWAP:
- Opc = X86ISD::ATOMSWAP64_DAG;
- break;
- }
- ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc);
- return;
- }
case ISD::ATOMIC_LOAD: {
ReplaceATOMIC_LOAD(N, Results, DAG);
return;
case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
case X86ISD::LCMPXCHG16_DAG: return "X86ISD::LCMPXCHG16_DAG";
- case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG";
- case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG";
- case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG";
- case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG";
- case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
- case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VZEXT: return "X86ISD::VZEXT";
return sinkMBB;
}
-// Get CMPXCHG opcode for the specified data type.
-static unsigned getCmpXChgOpcode(EVT VT) {
- switch (VT.getSimpleVT().SimpleTy) {
- case MVT::i8: return X86::LCMPXCHG8;
- case MVT::i16: return X86::LCMPXCHG16;
- case MVT::i32: return X86::LCMPXCHG32;
- case MVT::i64: return X86::LCMPXCHG64;
- default:
- break;
- }
- llvm_unreachable("Invalid operand size!");
-}
-
-// Get LOAD opcode for the specified data type.
-static unsigned getLoadOpcode(EVT VT) {
- switch (VT.getSimpleVT().SimpleTy) {
- case MVT::i8: return X86::MOV8rm;
- case MVT::i16: return X86::MOV16rm;
- case MVT::i32: return X86::MOV32rm;
- case MVT::i64: return X86::MOV64rm;
- default:
- break;
- }
- llvm_unreachable("Invalid operand size!");
-}
-
-// Get opcode of the non-atomic one from the specified atomic instruction.
-static unsigned getNonAtomicOpcode(unsigned Opc) {
- switch (Opc) {
- case X86::ATOMAND8: return X86::AND8rr;
- case X86::ATOMAND16: return X86::AND16rr;
- case X86::ATOMAND32: return X86::AND32rr;
- case X86::ATOMAND64: return X86::AND64rr;
- case X86::ATOMOR8: return X86::OR8rr;
- case X86::ATOMOR16: return X86::OR16rr;
- case X86::ATOMOR32: return X86::OR32rr;
- case X86::ATOMOR64: return X86::OR64rr;
- case X86::ATOMXOR8: return X86::XOR8rr;
- case X86::ATOMXOR16: return X86::XOR16rr;
- case X86::ATOMXOR32: return X86::XOR32rr;
- case X86::ATOMXOR64: return X86::XOR64rr;
- }
- llvm_unreachable("Unhandled atomic-load-op opcode!");
-}
-
-// Get opcode of the non-atomic one from the specified atomic instruction with
-// extra opcode.
-static unsigned getNonAtomicOpcodeWithExtraOpc(unsigned Opc,
- unsigned &ExtraOpc) {
- switch (Opc) {
- case X86::ATOMNAND8: ExtraOpc = X86::NOT8r; return X86::AND8rr;
- case X86::ATOMNAND16: ExtraOpc = X86::NOT16r; return X86::AND16rr;
- case X86::ATOMNAND32: ExtraOpc = X86::NOT32r; return X86::AND32rr;
- case X86::ATOMNAND64: ExtraOpc = X86::NOT64r; return X86::AND64rr;
- case X86::ATOMMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVL32rr;
- case X86::ATOMMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVL16rr;
- case X86::ATOMMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVL32rr;
- case X86::ATOMMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVL64rr;
- case X86::ATOMMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVG32rr;
- case X86::ATOMMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVG16rr;
- case X86::ATOMMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVG32rr;
- case X86::ATOMMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVG64rr;
- case X86::ATOMUMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVB32rr;
- case X86::ATOMUMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVB16rr;
- case X86::ATOMUMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVB32rr;
- case X86::ATOMUMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVB64rr;
- case X86::ATOMUMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVA32rr;
- case X86::ATOMUMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVA16rr;
- case X86::ATOMUMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVA32rr;
- case X86::ATOMUMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVA64rr;
- }
- llvm_unreachable("Unhandled atomic-load-op opcode!");
-}
-
-// Get opcode of the non-atomic one from the specified atomic instruction for
-// 64-bit data type on 32-bit target.
-static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) {
- switch (Opc) {
- case X86::ATOMAND6432: HiOpc = X86::AND32rr; return X86::AND32rr;
- case X86::ATOMOR6432: HiOpc = X86::OR32rr; return X86::OR32rr;
- case X86::ATOMXOR6432: HiOpc = X86::XOR32rr; return X86::XOR32rr;
- case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr;
- case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr;
- case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr;
- case X86::ATOMMAX6432: HiOpc = X86::SETLr; return X86::SETLr;
- case X86::ATOMMIN6432: HiOpc = X86::SETGr; return X86::SETGr;
- case X86::ATOMUMAX6432: HiOpc = X86::SETBr; return X86::SETBr;
- case X86::ATOMUMIN6432: HiOpc = X86::SETAr; return X86::SETAr;
- }
- llvm_unreachable("Unhandled atomic-load-op opcode!");
-}
-
-// Get opcode of the non-atomic one from the specified atomic instruction for
-// 64-bit data type on 32-bit target with extra opcode.
-static unsigned getNonAtomic6432OpcodeWithExtraOpc(unsigned Opc,
- unsigned &HiOpc,
- unsigned &ExtraOpc) {
- switch (Opc) {
- case X86::ATOMNAND6432:
- ExtraOpc = X86::NOT32r;
- HiOpc = X86::AND32rr;
- return X86::AND32rr;
- }
- llvm_unreachable("Unhandled atomic-load-op opcode!");
-}
-
-// Get pseudo CMOV opcode from the specified data type.
-static unsigned getPseudoCMOVOpc(EVT VT) {
- switch (VT.getSimpleVT().SimpleTy) {
- case MVT::i8: return X86::CMOV_GR8;
- case MVT::i16: return X86::CMOV_GR16;
- case MVT::i32: return X86::CMOV_GR32;
- default:
- break;
- }
- llvm_unreachable("Unknown CMOV opcode!");
-}
-
-// EmitAtomicLoadArith - emit the code sequence for pseudo atomic instructions.
-// They will be translated into a spin-loop or compare-exchange loop from
-//
-// ...
-// dst = atomic-fetch-op MI.addr, MI.val
-// ...
-//
-// to
-//
-// ...
-// t1 = LOAD MI.addr
-// loop:
-// t4 = phi(t1, t3 / loop)
-// t2 = OP MI.val, t4
-// EAX = t4
-// LCMPXCHG [MI.addr], t2, [EAX is implicitly used & defined]
-// t3 = EAX
-// JNE loop
-// sink:
-// dst = t3
-// ...
-MachineBasicBlock *
-X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
- MachineBasicBlock *MBB) const {
- MachineFunction *MF = MBB->getParent();
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
-
- MachineRegisterInfo &MRI = MF->getRegInfo();
-
- const BasicBlock *BB = MBB->getBasicBlock();
- MachineFunction::iterator I = MBB;
- ++I;
-
- assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
- "Unexpected number of operands");
-
- assert(MI->hasOneMemOperand() &&
- "Expected atomic-load-op to have one memoperand");
-
- // Memory Reference
- MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
-
- unsigned DstReg, SrcReg;
- unsigned MemOpndSlot;
-
- unsigned CurOp = 0;
-
- DstReg = MI->getOperand(CurOp++).getReg();
- MemOpndSlot = CurOp;
- CurOp += X86::AddrNumOperands;
- SrcReg = MI->getOperand(CurOp++).getReg();
-
- const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
- MVT::SimpleValueType VT = *RC->vt_begin();
- unsigned t1 = MRI.createVirtualRegister(RC);
- unsigned t2 = MRI.createVirtualRegister(RC);
- unsigned t3 = MRI.createVirtualRegister(RC);
- unsigned t4 = MRI.createVirtualRegister(RC);
- unsigned PhyReg = getX86SubSuperRegister(X86::EAX, VT);
-
- unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
- unsigned LOADOpc = getLoadOpcode(VT);
-
- // For the atomic load-arith operator, we generate
- //
- // thisMBB:
- // t1 = LOAD [MI.addr]
- // mainMBB:
- // t4 = phi(t1 / thisMBB, t3 / mainMBB)
- // t1 = OP MI.val, EAX
- // EAX = t4
- // LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
- // t3 = EAX
- // JNE mainMBB
- // sinkMBB:
- // dst = t3
-
- MachineBasicBlock *thisMBB = MBB;
- MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
- MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
- MF->insert(I, mainMBB);
- MF->insert(I, sinkMBB);
-
- MachineInstrBuilder MIB;
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), MBB,
- std::next(MachineBasicBlock::iterator(MI)), MBB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
-
- // thisMBB:
- MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1);
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
- MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
- if (NewMO.isReg())
- NewMO.setIsKill(false);
- MIB.addOperand(NewMO);
- }
- for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
- unsigned flags = (*MMOI)->getFlags();
- flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
- MachineMemOperand *MMO =
- MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
- (*MMOI)->getSize(),
- (*MMOI)->getBaseAlignment(),
- (*MMOI)->getTBAAInfo(),
- (*MMOI)->getRanges());
- MIB.addMemOperand(MMO);
- }
-
- thisMBB->addSuccessor(mainMBB);
-
- // mainMBB:
- MachineBasicBlock *origMainMBB = mainMBB;
-
- // Add a PHI.
- MachineInstr *Phi = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4)
- .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
-
- unsigned Opc = MI->getOpcode();
- switch (Opc) {
- default:
- llvm_unreachable("Unhandled atomic-load-op opcode!");
- case X86::ATOMAND8:
- case X86::ATOMAND16:
- case X86::ATOMAND32:
- case X86::ATOMAND64:
- case X86::ATOMOR8:
- case X86::ATOMOR16:
- case X86::ATOMOR32:
- case X86::ATOMOR64:
- case X86::ATOMXOR8:
- case X86::ATOMXOR16:
- case X86::ATOMXOR32:
- case X86::ATOMXOR64: {
- unsigned ARITHOpc = getNonAtomicOpcode(Opc);
- BuildMI(mainMBB, DL, TII->get(ARITHOpc), t2).addReg(SrcReg)
- .addReg(t4);
- break;
- }
- case X86::ATOMNAND8:
- case X86::ATOMNAND16:
- case X86::ATOMNAND32:
- case X86::ATOMNAND64: {
- unsigned Tmp = MRI.createVirtualRegister(RC);
- unsigned NOTOpc;
- unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc);
- BuildMI(mainMBB, DL, TII->get(ANDOpc), Tmp).addReg(SrcReg)
- .addReg(t4);
- BuildMI(mainMBB, DL, TII->get(NOTOpc), t2).addReg(Tmp);
- break;
- }
- case X86::ATOMMAX8:
- case X86::ATOMMAX16:
- case X86::ATOMMAX32:
- case X86::ATOMMAX64:
- case X86::ATOMMIN8:
- case X86::ATOMMIN16:
- case X86::ATOMMIN32:
- case X86::ATOMMIN64:
- case X86::ATOMUMAX8:
- case X86::ATOMUMAX16:
- case X86::ATOMUMAX32:
- case X86::ATOMUMAX64:
- case X86::ATOMUMIN8:
- case X86::ATOMUMIN16:
- case X86::ATOMUMIN32:
- case X86::ATOMUMIN64: {
- unsigned CMPOpc;
- unsigned CMOVOpc = getNonAtomicOpcodeWithExtraOpc(Opc, CMPOpc);
-
- BuildMI(mainMBB, DL, TII->get(CMPOpc))
- .addReg(SrcReg)
- .addReg(t4);
-
- if (Subtarget->hasCMov()) {
- if (VT != MVT::i8) {
- // Native support
- BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2)
- .addReg(SrcReg)
- .addReg(t4);
- } else {
- // Promote i8 to i32 to use CMOV32
- const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
- const TargetRegisterClass *RC32 =
- TRI->getSubClassWithSubReg(getRegClassFor(MVT::i32), X86::sub_8bit);
- unsigned SrcReg32 = MRI.createVirtualRegister(RC32);
- unsigned AccReg32 = MRI.createVirtualRegister(RC32);
- unsigned Tmp = MRI.createVirtualRegister(RC32);
-
- unsigned Undef = MRI.createVirtualRegister(RC32);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef);
-
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), SrcReg32)
- .addReg(Undef)
- .addReg(SrcReg)
- .addImm(X86::sub_8bit);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32)
- .addReg(Undef)
- .addReg(t4)
- .addImm(X86::sub_8bit);
-
- BuildMI(mainMBB, DL, TII->get(CMOVOpc), Tmp)
- .addReg(SrcReg32)
- .addReg(AccReg32);
-
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t2)
- .addReg(Tmp, 0, X86::sub_8bit);
- }
- } else {
- // Use pseudo select and lower them.
- assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
- "Invalid atomic-load-op transformation!");
- unsigned SelOpc = getPseudoCMOVOpc(VT);
- X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc);
- assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!");
- MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t2)
- .addReg(SrcReg).addReg(t4)
- .addImm(CC);
- mainMBB = EmitLoweredSelect(MIB, mainMBB);
- // Replace the original PHI node as mainMBB is changed after CMOV
- // lowering.
- BuildMI(*origMainMBB, Phi, DL, TII->get(X86::PHI), t4)
- .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
- Phi->eraseFromParent();
- }
- break;
- }
- }
-
- // Copy PhyReg back from virtual register.
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), PhyReg)
- .addReg(t4);
-
- MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
- MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
- if (NewMO.isReg())
- NewMO.setIsKill(false);
- MIB.addOperand(NewMO);
- }
- MIB.addReg(t2);
- MIB.setMemRefs(MMOBegin, MMOEnd);
-
- // Copy PhyReg back to virtual register.
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3)
- .addReg(PhyReg);
-
- BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
-
- mainMBB->addSuccessor(origMainMBB);
- mainMBB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- BuildMI(*sinkMBB, sinkMBB->begin(), DL,
- TII->get(TargetOpcode::COPY), DstReg)
- .addReg(t3);
-
- MI->eraseFromParent();
- return sinkMBB;
-}
-
-// EmitAtomicLoadArith6432 - emit the code sequence for pseudo atomic
-// instructions. They will be translated into a spin-loop or compare-exchange
-// loop from
-//
-// ...
-// dst = atomic-fetch-op MI.addr, MI.val
-// ...
-//
-// to
-//
-// ...
-// t1L = LOAD [MI.addr + 0]
-// t1H = LOAD [MI.addr + 4]
-// loop:
-// t4L = phi(t1L, t3L / loop)
-// t4H = phi(t1H, t3H / loop)
-// t2L = OP MI.val.lo, t4L
-// t2H = OP MI.val.hi, t4H
-// EAX = t4L
-// EDX = t4H
-// EBX = t2L
-// ECX = t2H
-// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
-// t3L = EAX
-// t3H = EDX
-// JNE loop
-// sink:
-// dstL = t3L
-// dstH = t3H
-// ...
-MachineBasicBlock *
-X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
- MachineBasicBlock *MBB) const {
- MachineFunction *MF = MBB->getParent();
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
-
- MachineRegisterInfo &MRI = MF->getRegInfo();
-
- const BasicBlock *BB = MBB->getBasicBlock();
- MachineFunction::iterator I = MBB;
- ++I;
-
- assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 &&
- "Unexpected number of operands");
-
- assert(MI->hasOneMemOperand() &&
- "Expected atomic-load-op32 to have one memoperand");
-
- // Memory Reference
- MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
-
- unsigned DstLoReg, DstHiReg;
- unsigned SrcLoReg, SrcHiReg;
- unsigned MemOpndSlot;
-
- unsigned CurOp = 0;
-
- DstLoReg = MI->getOperand(CurOp++).getReg();
- DstHiReg = MI->getOperand(CurOp++).getReg();
- MemOpndSlot = CurOp;
- CurOp += X86::AddrNumOperands;
- SrcLoReg = MI->getOperand(CurOp++).getReg();
- SrcHiReg = MI->getOperand(CurOp++).getReg();
-
- const TargetRegisterClass *RC = &X86::GR32RegClass;
- const TargetRegisterClass *RC8 = &X86::GR8RegClass;
-
- unsigned t1L = MRI.createVirtualRegister(RC);
- unsigned t1H = MRI.createVirtualRegister(RC);
- unsigned t2L = MRI.createVirtualRegister(RC);
- unsigned t2H = MRI.createVirtualRegister(RC);
- unsigned t3L = MRI.createVirtualRegister(RC);
- unsigned t3H = MRI.createVirtualRegister(RC);
- unsigned t4L = MRI.createVirtualRegister(RC);
- unsigned t4H = MRI.createVirtualRegister(RC);
-
- unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
- unsigned LOADOpc = X86::MOV32rm;
-
- // For the atomic load-arith operator, we generate
- //
- // thisMBB:
- // t1L = LOAD [MI.addr + 0]
- // t1H = LOAD [MI.addr + 4]
- // mainMBB:
- // t4L = phi(t1L / thisMBB, t3L / mainMBB)
- // t4H = phi(t1H / thisMBB, t3H / mainMBB)
- // t2L = OP MI.val.lo, t4L
- // t2H = OP MI.val.hi, t4H
- // EBX = t2L
- // ECX = t2H
- // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
- // t3L = EAX
- // t3H = EDX
- // JNE loop
- // sinkMBB:
- // dstL = t3L
- // dstH = t3H
-
- MachineBasicBlock *thisMBB = MBB;
- MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
- MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
- MF->insert(I, mainMBB);
- MF->insert(I, sinkMBB);
-
- MachineInstrBuilder MIB;
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), MBB,
- std::next(MachineBasicBlock::iterator(MI)), MBB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
-
- // thisMBB:
- // Lo
- MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1L);
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
- MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
- if (NewMO.isReg())
- NewMO.setIsKill(false);
- MIB.addOperand(NewMO);
- }
- for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
- unsigned flags = (*MMOI)->getFlags();
- flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
- MachineMemOperand *MMO =
- MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
- (*MMOI)->getSize(),
- (*MMOI)->getBaseAlignment(),
- (*MMOI)->getTBAAInfo(),
- (*MMOI)->getRanges());
- MIB.addMemOperand(MMO);
- };
- MachineInstr *LowMI = MIB;
-
- // Hi
- MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1H);
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
- if (i == X86::AddrDisp) {
- MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
- } else {
- MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
- if (NewMO.isReg())
- NewMO.setIsKill(false);
- MIB.addOperand(NewMO);
- }
- }
- MIB.setMemRefs(LowMI->memoperands_begin(), LowMI->memoperands_end());
-
- thisMBB->addSuccessor(mainMBB);
-
- // mainMBB:
- MachineBasicBlock *origMainMBB = mainMBB;
-
- // Add PHIs.
- MachineInstr *PhiL = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L)
- .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
- MachineInstr *PhiH = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H)
- .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
-
- unsigned Opc = MI->getOpcode();
- switch (Opc) {
- default:
- llvm_unreachable("Unhandled atomic-load-op6432 opcode!");
- case X86::ATOMAND6432:
- case X86::ATOMOR6432:
- case X86::ATOMXOR6432:
- case X86::ATOMADD6432:
- case X86::ATOMSUB6432: {
- unsigned HiOpc;
- unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
- BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(t4L)
- .addReg(SrcLoReg);
- BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(t4H)
- .addReg(SrcHiReg);
- break;
- }
- case X86::ATOMNAND6432: {
- unsigned HiOpc, NOTOpc;
- unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc);
- unsigned TmpL = MRI.createVirtualRegister(RC);
- unsigned TmpH = MRI.createVirtualRegister(RC);
- BuildMI(mainMBB, DL, TII->get(LoOpc), TmpL).addReg(SrcLoReg)
- .addReg(t4L);
- BuildMI(mainMBB, DL, TII->get(HiOpc), TmpH).addReg(SrcHiReg)
- .addReg(t4H);
- BuildMI(mainMBB, DL, TII->get(NOTOpc), t2L).addReg(TmpL);
- BuildMI(mainMBB, DL, TII->get(NOTOpc), t2H).addReg(TmpH);
- break;
- }
- case X86::ATOMMAX6432:
- case X86::ATOMMIN6432:
- case X86::ATOMUMAX6432:
- case X86::ATOMUMIN6432: {
- unsigned HiOpc;
- unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
- unsigned cL = MRI.createVirtualRegister(RC8);
- unsigned cH = MRI.createVirtualRegister(RC8);
- unsigned cL32 = MRI.createVirtualRegister(RC);
- unsigned cH32 = MRI.createVirtualRegister(RC);
- unsigned cc = MRI.createVirtualRegister(RC);
- // cl := cmp src_lo, lo
- BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
- .addReg(SrcLoReg).addReg(t4L);
- BuildMI(mainMBB, DL, TII->get(LoOpc), cL);
- BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL);
- // ch := cmp src_hi, hi
- BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
- .addReg(SrcHiReg).addReg(t4H);
- BuildMI(mainMBB, DL, TII->get(HiOpc), cH);
- BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH);
- // cc := if (src_hi == hi) ? cl : ch;
- if (Subtarget->hasCMov()) {
- BuildMI(mainMBB, DL, TII->get(X86::CMOVE32rr), cc)
- .addReg(cH32).addReg(cL32);
- } else {
- MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), cc)
- .addReg(cH32).addReg(cL32)
- .addImm(X86::COND_E);
- mainMBB = EmitLoweredSelect(MIB, mainMBB);
- }
- BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc);
- if (Subtarget->hasCMov()) {
- BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2L)
- .addReg(SrcLoReg).addReg(t4L);
- BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2H)
- .addReg(SrcHiReg).addReg(t4H);
- } else {
- MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2L)
- .addReg(SrcLoReg).addReg(t4L)
- .addImm(X86::COND_NE);
- mainMBB = EmitLoweredSelect(MIB, mainMBB);
- // As the lowered CMOV won't clobber EFLAGS, we could reuse it for the
- // 2nd CMOV lowering.
- mainMBB->addLiveIn(X86::EFLAGS);
- MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2H)
- .addReg(SrcHiReg).addReg(t4H)
- .addImm(X86::COND_NE);
- mainMBB = EmitLoweredSelect(MIB, mainMBB);
- // Replace the original PHI node as mainMBB is changed after CMOV
- // lowering.
- BuildMI(*origMainMBB, PhiL, DL, TII->get(X86::PHI), t4L)
- .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
- BuildMI(*origMainMBB, PhiH, DL, TII->get(X86::PHI), t4H)
- .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
- PhiL->eraseFromParent();
- PhiH->eraseFromParent();
- }
- break;
- }
- case X86::ATOMSWAP6432: {
- unsigned HiOpc;
- unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
- BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg);
- BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg);
- break;
- }
- }
-
- // Copy EDX:EAX back from HiReg:LoReg
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(t4L);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(t4H);
- // Copy ECX:EBX from t1H:t1L
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t2L);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t2H);
-
- MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
- MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
- if (NewMO.isReg())
- NewMO.setIsKill(false);
- MIB.addOperand(NewMO);
- }
- MIB.setMemRefs(MMOBegin, MMOEnd);
-
- // Copy EDX:EAX back to t3H:t3L
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3L).addReg(X86::EAX);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3H).addReg(X86::EDX);
-
- BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
-
- mainMBB->addSuccessor(origMainMBB);
- mainMBB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- BuildMI(*sinkMBB, sinkMBB->begin(), DL,
- TII->get(TargetOpcode::COPY), DstLoReg)
- .addReg(t3L);
- BuildMI(*sinkMBB, sinkMBB->begin(), DL,
- TII->get(TargetOpcode::COPY), DstHiReg)
- .addReg(t3H);
-
- MI->eraseFromParent();
- return sinkMBB;
-}
-
// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
// or XMM0_V32I8 in AVX all of this code can be replaced with that
// in the .td file.
case X86::XBEGIN:
return EmitXBegin(MI, BB, BB->getParent()->getTarget().getInstrInfo());
- // Atomic Lowering.
- case X86::ATOMAND8:
- case X86::ATOMAND16:
- case X86::ATOMAND32:
- case X86::ATOMAND64:
- // Fall through
- case X86::ATOMOR8:
- case X86::ATOMOR16:
- case X86::ATOMOR32:
- case X86::ATOMOR64:
- // Fall through
- case X86::ATOMXOR16:
- case X86::ATOMXOR8:
- case X86::ATOMXOR32:
- case X86::ATOMXOR64:
- // Fall through
- case X86::ATOMNAND8:
- case X86::ATOMNAND16:
- case X86::ATOMNAND32:
- case X86::ATOMNAND64:
- // Fall through
- case X86::ATOMMAX8:
- case X86::ATOMMAX16:
- case X86::ATOMMAX32:
- case X86::ATOMMAX64:
- // Fall through
- case X86::ATOMMIN8:
- case X86::ATOMMIN16:
- case X86::ATOMMIN32:
- case X86::ATOMMIN64:
- // Fall through
- case X86::ATOMUMAX8:
- case X86::ATOMUMAX16:
- case X86::ATOMUMAX32:
- case X86::ATOMUMAX64:
- // Fall through
- case X86::ATOMUMIN8:
- case X86::ATOMUMIN16:
- case X86::ATOMUMIN32:
- case X86::ATOMUMIN64:
- return EmitAtomicLoadArith(MI, BB);
-
- // This group does 64-bit operations on a 32-bit host.
- case X86::ATOMAND6432:
- case X86::ATOMOR6432:
- case X86::ATOMXOR6432:
- case X86::ATOMNAND6432:
- case X86::ATOMADD6432:
- case X86::ATOMSUB6432:
- case X86::ATOMMAX6432:
- case X86::ATOMMIN6432:
- case X86::ATOMUMAX6432:
- case X86::ATOMUMIN6432:
- case X86::ATOMSWAP6432:
- return EmitAtomicLoadArith6432(MI, BB);
-
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
// XTEST - Test if in transactional execution.
XTEST,
- // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
- // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
- // Atomic 64-bit binary operations.
- ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
- ATOMSUB64_DAG,
- ATOMOR64_DAG,
- ATOMXOR64_DAG,
- ATOMAND64_DAG,
- ATOMNAND64_DAG,
- ATOMMAX64_DAG,
- ATOMMIN64_DAG,
- ATOMUMAX64_DAG,
- ATOMUMIN64_DAG,
- ATOMSWAP64_DAG,
-
// LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap.
- LCMPXCHG_DAG,
+ LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
LCMPXCHG8_DAG,
LCMPXCHG16_DAG,
} // UsesCustomInserter = 1, Uses = [EFLAGS]
-//===----------------------------------------------------------------------===//
-// Atomic Instruction Pseudo Instructions
-//===----------------------------------------------------------------------===//
-
-// Pseudo atomic instructions
-
-multiclass PSEUDO_ATOMIC_LOAD_BINOP<string mnemonic> {
- let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in {
- let Defs = [EFLAGS, AL] in
- def NAME#8 : I<0, Pseudo, (outs GR8:$dst),
- (ins i8mem:$ptr, GR8:$val),
- !strconcat(mnemonic, "8 PSEUDO!"), []>;
- let Defs = [EFLAGS, AX] in
- def NAME#16 : I<0, Pseudo,(outs GR16:$dst),
- (ins i16mem:$ptr, GR16:$val),
- !strconcat(mnemonic, "16 PSEUDO!"), []>;
- let Defs = [EFLAGS, EAX] in
- def NAME#32 : I<0, Pseudo, (outs GR32:$dst),
- (ins i32mem:$ptr, GR32:$val),
- !strconcat(mnemonic, "32 PSEUDO!"), []>;
- let Defs = [EFLAGS, RAX] in
- def NAME#64 : I<0, Pseudo, (outs GR64:$dst),
- (ins i64mem:$ptr, GR64:$val),
- !strconcat(mnemonic, "64 PSEUDO!"), []>;
- }
-}
-
-multiclass PSEUDO_ATOMIC_LOAD_BINOP_PATS<string name, string frag> {
- def : Pat<(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val),
- (!cast<Instruction>(name # "8") addr:$ptr, GR8:$val)>;
- def : Pat<(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val),
- (!cast<Instruction>(name # "16") addr:$ptr, GR16:$val)>;
- def : Pat<(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val),
- (!cast<Instruction>(name # "32") addr:$ptr, GR32:$val)>;
- def : Pat<(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val),
- (!cast<Instruction>(name # "64") addr:$ptr, GR64:$val)>;
-}
-
-// Atomic exchange, and, or, xor
-defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMAND">;
-defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMOR">;
-defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMXOR">;
-defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMNAND">;
-defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMAX">;
-defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMIN">;
-defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMAX">;
-defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMIN">;
-
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMAND", "atomic_load_and">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMOR", "atomic_load_or">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMXOR", "atomic_load_xor">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMNAND", "atomic_load_nand">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMAX", "atomic_load_max">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMIN", "atomic_load_min">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">;
-
-multiclass PSEUDO_ATOMIC_LOAD_BINOP6432<string mnemonic> {
- let usesCustomInserter = 1, Defs = [EFLAGS, EAX, EDX],
- mayLoad = 1, mayStore = 1, hasSideEffects = 0 in
- def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- !strconcat(mnemonic, "6432 PSEUDO!"), []>;
-}
-
-defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMAND">;
-defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMOR">;
-defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMXOR">;
-defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMNAND">;
-defm ATOMADD : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMADD">;
-defm ATOMSUB : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSUB">;
-defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMAX">;
-defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMIN">;
-defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMAX">;
-defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMIN">;
-defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">;
-
//===----------------------------------------------------------------------===//
// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
//===----------------------------------------------------------------------===//
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
return *getX86TargetMachine().getSubtargetImpl();
}
+ void addIRPasses() override;
bool addInstSelector() override;
bool addILPOpts() override;
bool addPreRegAlloc() override;
return new X86PassConfig(this, PM);
}
+void X86PassConfig::addIRPasses() {
+ addPass(createX86AtomicExpandPass(&getX86TargetMachine()));
+
+ TargetPassConfig::addIRPasses();
+}
+
bool X86PassConfig::addInstSelector() {
// Install an instruction selector.
addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
; CHECK: movl 4([[REG]]), %edx
; CHECK: LBB0_1:
; CHECK: movl %eax, %ebx
-; CHECK: addl {{%[a-z]+}}, %ebx
+; CHECK: addl $1, %ebx
; CHECK: movl %edx, %ecx
-; CHECK: adcl {{%[a-z]+}}, %ecx
+; CHECK: adcl $0, %ecx
; CHECK: lock
; CHECK-NEXT: cmpxchg8b ([[REG]])
; CHECK-NEXT: jne
; RUN: llc < %s -march=x86-64 > %t.x86-64
-; RUN: llc < %s -march=x86 > %t.x86
+; RUN: llc < %s -march=x86 -mattr=cx16 > %t.x86
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-apple-darwin8"
-; RUN: llc < %s -march=x86 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mcpu=corei7 -march=x86 -verify-machineinstrs | FileCheck %s
; 64-bit load/store on x86-32
; FIXME: The generated code can be substantially improved.
-; RUN: llc -march=x86 -mattr=+cmov -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=LINUX
-; RUN: llc -march=x86 -mattr=-cmov -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=NOCMOV
-; RUN: llc -march=x86 -mtriple=i386-macosx -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -check-prefix=PIC
+; RUN: llc -march=x86 -mattr=+cmov,cx16 -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=LINUX
+; RUN: llc -march=x86 -mattr=cx16 -mtriple=i386-macosx -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -check-prefix=PIC
@sc64 = external global i64
%1 = atomicrmw max i64* @sc64, i64 5 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
; LINUX: cmpl
-; LINUX: setl
-; LINUX: cmpl
-; LINUX: setl
+; LINUX: seta
; LINUX: cmovne
; LINUX: cmovne
; LINUX: lock
; LINUX-NEXT: cmpxchg8b
; LINUX: jne [[LABEL]]
-; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
-; NOCMOV: cmpl
-; NOCMOV: setl
-; NOCMOV: cmpl
-; NOCMOV: setl
-; NOCMOV: jne
-; NOCMOV: jne
-; NOCMOV: lock
-; NOCMOV-NEXT: cmpxchg8b
-; NOCMOV: jne [[LABEL]]
%2 = atomicrmw min i64* @sc64, i64 6 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
; LINUX: cmpl
-; LINUX: setg
-; LINUX: cmpl
-; LINUX: setg
+; LINUX: setb
; LINUX: cmovne
; LINUX: cmovne
; LINUX: lock
; LINUX-NEXT: cmpxchg8b
; LINUX: jne [[LABEL]]
-; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
-; NOCMOV: cmpl
-; NOCMOV: setg
-; NOCMOV: cmpl
-; NOCMOV: setg
-; NOCMOV: jne
-; NOCMOV: jne
-; NOCMOV: lock
-; NOCMOV-NEXT: cmpxchg8b
-; NOCMOV: jne [[LABEL]]
%3 = atomicrmw umax i64* @sc64, i64 7 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
; LINUX: cmpl
-; LINUX: setb
-; LINUX: cmpl
-; LINUX: setb
+; LINUX: seta
; LINUX: cmovne
; LINUX: cmovne
; LINUX: lock
; LINUX-NEXT: cmpxchg8b
; LINUX: jne [[LABEL]]
-; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
-; NOCMOV: cmpl
-; NOCMOV: setb
-; NOCMOV: cmpl
-; NOCMOV: setb
-; NOCMOV: jne
-; NOCMOV: jne
-; NOCMOV: lock
-; NOCMOV-NEXT: cmpxchg8b
-; NOCMOV: jne [[LABEL]]
%4 = atomicrmw umin i64* @sc64, i64 8 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
; LINUX: cmpl
-; LINUX: seta
-; LINUX: cmpl
-; LINUX: seta
+; LINUX: setb
; LINUX: cmovne
; LINUX: cmovne
; LINUX: lock
; LINUX-NEXT: cmpxchg8b
; LINUX: jne [[LABEL]]
-; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
-; NOCMOV: cmpl
-; NOCMOV: seta
-; NOCMOV: cmpl
-; NOCMOV: seta
-; NOCMOV: jne
-; NOCMOV: jne
-; NOCMOV: lock
-; NOCMOV-NEXT: cmpxchg8b
-; NOCMOV: jne [[LABEL]]
ret void
}
define void @tf_bug(i8* %ptr) nounwind {
; PIC-LABEL: tf_bug:
-; PIC: movl _id-L1$pb(
-; PIC: movl (_id-L1$pb)+4(
+; PIC-DAG: movl _id-L1$pb(
+; PIC-DAG: movl (_id-L1$pb)+4(
%tmp1 = atomicrmw add i64* @id, i64 1 seq_cst
%tmp2 = add i64 %tmp1, 1
%tmp3 = bitcast i8* %ptr to i64*
--- /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s
+
+@var = global i128 0
+
+define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {
+; CHECK-LABEL: val_compare_and_swap:
+; CHECK: movq %rsi, %rax
+; CHECK: movq %rcx, %rbx
+; CHECK: movq %r8, %rcx
+; CHECK: lock
+; CHECK: cmpxchg16b (%rdi)
+
+ %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
+ %val = extractvalue { i128, i1 } %pair, 0
+ ret i128 %val
+}
+
+define void @fetch_and_nand(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_nand:
+; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
+; CHECK-DAG: movq (%rdi), %rax
+; CHECK-DAG: movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: movq %rdx, %rcx
+; CHECK: andq [[INCHI]], %rcx
+; CHECK: movq %rax, %rbx
+ ; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
+; CHECK: andq %rsi, %rbx
+; CHECK: notq %rbx
+; CHECK: notq %rcx
+; CHECK: lock
+; CHECK: cmpxchg16b (%rdi)
+; CHECK: jne [[LOOP]]
+
+; CHECK: movq %rax, _var
+; CHECK: movq %rdx, _var+8
+ %val = atomicrmw nand i128* %p, i128 %bits release
+ store i128 %val, i128* @var, align 16
+ ret void
+}
+
+define void @fetch_and_or(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_or:
+; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
+; CHECK-DAG: movq (%rdi), %rax
+; CHECK-DAG: movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: movq %rax, %rbx
+ ; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
+; CHECK: orq %rsi, %rbx
+; CHECK: movq %rdx, %rcx
+; CHECK: orq [[INCHI]], %rcx
+; CHECK: lock
+; CHECK: cmpxchg16b (%rdi)
+; CHECK: jne [[LOOP]]
+
+; CHECK: movq %rax, _var
+; CHECK: movq %rdx, _var+8
+
+ %val = atomicrmw or i128* %p, i128 %bits seq_cst
+ store i128 %val, i128* @var, align 16
+ ret void
+}
+
+define void @fetch_and_add(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_add:
+; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
+; CHECK-DAG: movq (%rdi), %rax
+; CHECK-DAG: movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: movq %rax, %rbx
+ ; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
+; CHECK: addq %rsi, %rbx
+; CHECK: movq %rdx, %rcx
+; CHECK: adcq [[INCHI]], %rcx
+; CHECK: lock
+; CHECK: cmpxchg16b (%rdi)
+; CHECK: jne [[LOOP]]
+
+; CHECK: movq %rax, _var
+; CHECK: movq %rdx, _var+8
+
+ %val = atomicrmw add i128* %p, i128 %bits seq_cst
+ store i128 %val, i128* @var, align 16
+ ret void
+}
+
+define void @fetch_and_sub(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_sub:
+; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
+; CHECK-DAG: movq (%rdi), %rax
+; CHECK-DAG: movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: movq %rax, %rbx
+ ; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
+; CHECK: subq %rsi, %rbx
+; CHECK: movq %rdx, %rcx
+; CHECK: sbbq [[INCHI]], %rcx
+; CHECK: lock
+; CHECK: cmpxchg16b (%rdi)
+; CHECK: jne [[LOOP]]
+
+; CHECK: movq %rax, _var
+; CHECK: movq %rdx, _var+8
+
+ %val = atomicrmw sub i128* %p, i128 %bits seq_cst
+ store i128 %val, i128* @var, align 16
+ ret void
+}
+
+define void @fetch_and_min(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_min:
+; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
+; CHECK-DAG: movq (%rdi), %rax
+; CHECK-DAG: movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: cmpq %rsi, %rax
+; CHECK: setbe [[CMP:%[a-z0-9]+]]
+; CHECK: cmpq [[INCHI]], %rdx
+; CHECK: setle [[HICMP:%[a-z0-9]+]]
+; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
+
+; CHECK: movb [[HICMP]], [[CMP]]
+; CHECK: [[USE_LO]]:
+; CHECK: testb [[CMP]], [[CMP]]
+; CHECK: movq %rsi, %rbx
+; CHECK: cmovneq %rax, %rbx
+; CHECK: movq [[INCHI]], %rcx
+; CHECK: cmovneq %rdx, %rcx
+; CHECK: lock
+; CHECK: cmpxchg16b (%rdi)
+; CHECK: jne [[LOOP]]
+
+; CHECK: movq %rax, _var
+; CHECK: movq %rdx, _var+8
+
+ %val = atomicrmw min i128* %p, i128 %bits seq_cst
+ store i128 %val, i128* @var, align 16
+ ret void
+}
+
+define void @fetch_and_max(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_max:
+; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
+; CHECK-DAG: movq (%rdi), %rax
+; CHECK-DAG: movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: cmpq %rsi, %rax
+; CHECK: setae [[CMP:%[a-z0-9]+]]
+; CHECK: cmpq [[INCHI]], %rdx
+; CHECK: setge [[HICMP:%[a-z0-9]+]]
+; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
+
+; CHECK: movb [[HICMP]], [[CMP]]
+; CHECK: [[USE_LO]]:
+; CHECK: testb [[CMP]], [[CMP]]
+; CHECK: movq %rsi, %rbx
+; CHECK: cmovneq %rax, %rbx
+; CHECK: movq [[INCHI]], %rcx
+; CHECK: cmovneq %rdx, %rcx
+; CHECK: lock
+; CHECK: cmpxchg16b (%rdi)
+; CHECK: jne [[LOOP]]
+
+; CHECK: movq %rax, _var
+; CHECK: movq %rdx, _var+8
+
+ %val = atomicrmw max i128* %p, i128 %bits seq_cst
+ store i128 %val, i128* @var, align 16
+ ret void
+}
+
+define void @fetch_and_umin(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_umin:
+; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
+; CHECK-DAG: movq (%rdi), %rax
+; CHECK-DAG: movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: cmpq %rsi, %rax
+; CHECK: setbe [[CMP:%[a-z0-9]+]]
+; CHECK: cmpq [[INCHI]], %rdx
+; CHECK: setbe [[HICMP:%[a-z0-9]+]]
+; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
+
+; CHECK: movb [[HICMP]], [[CMP]]
+; CHECK: [[USE_LO]]:
+; CHECK: testb [[CMP]], [[CMP]]
+; CHECK: movq %rsi, %rbx
+; CHECK: cmovneq %rax, %rbx
+; CHECK: movq [[INCHI]], %rcx
+; CHECK: cmovneq %rdx, %rcx
+; CHECK: lock
+; CHECK: cmpxchg16b (%rdi)
+; CHECK: jne [[LOOP]]
+
+; CHECK: movq %rax, _var
+; CHECK: movq %rdx, _var+8
+
+ %val = atomicrmw umin i128* %p, i128 %bits seq_cst
+ store i128 %val, i128* @var, align 16
+ ret void
+}
+
+define void @fetch_and_umax(i128* %p, i128 %bits) {
+; CHECK-LABEL: fetch_and_umax:
+; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
+; CHECK-DAG: movq (%rdi), %rax
+; CHECK-DAG: movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: cmpq %rax, %rsi
+; CHECK: setb [[CMP:%[a-z0-9]+]]
+; CHECK: cmpq [[INCHI]], %rdx
+; CHECK: seta [[HICMP:%[a-z0-9]+]]
+; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
+
+; CHECK: movb [[HICMP]], [[CMP]]
+; CHECK: [[USE_LO]]:
+; CHECK: testb [[CMP]], [[CMP]]
+; CHECK: movq %rsi, %rbx
+; CHECK: cmovneq %rax, %rbx
+; CHECK: movq [[INCHI]], %rcx
+; CHECK: cmovneq %rdx, %rcx
+; CHECK: lock
+; CHECK: cmpxchg16b (%rdi)
+; CHECK: jne [[LOOP]]
+
+; CHECK: movq %rax, _var
+; CHECK: movq %rdx, _var+8
+
+ %val = atomicrmw umax i128* %p, i128 %bits seq_cst
+ store i128 %val, i128* @var, align 16
+ ret void
+}
+
+define i128 @atomic_load_seq_cst(i128* %p) {
+; CHECK-LABEL: atomic_load_seq_cst:
+; CHECK: xorl %eax, %eax
+; CHECK: xorl %edx, %edx
+; CHECK: xorl %ebx, %ebx
+; CHECK: xorl %ecx, %ecx
+; CHECK: lock
+; CHECK: cmpxchg16b (%rdi)
+
+ %r = load atomic i128* %p seq_cst, align 16
+ ret i128 %r
+}
+
+define i128 @atomic_load_relaxed(i128* %p) {
+; CHECK: atomic_load_relaxed:
+; CHECK: xorl %eax, %eax
+; CHECK: xorl %edx, %edx
+; CHECK: xorl %ebx, %ebx
+; CHECK: xorl %ecx, %ecx
+; CHECK: lock
+; CHECK: cmpxchg16b (%rdi)
+
+ %r = load atomic i128* %p monotonic, align 16
+ ret i128 %r
+}
+
+define void @atomic_store_seq_cst(i128* %p, i128 %in) {
+; CHECK-LABEL: atomic_store_seq_cst:
+; CHECK: movq %rdx, %rcx
+; CHECK: movq %rsi, %rbx
+; CHECK: movq (%rdi), %rax
+; CHECK: movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: lock
+; CHECK: cmpxchg16b (%rdi)
+; CHECK: jne [[LOOP]]
+
+ store atomic i128 %in, i128* %p seq_cst, align 16
+ ret void
+}
+
+define void @atomic_store_release(i128* %p, i128 %in) {
+; CHECK-LABEL: atomic_store_release:
+; CHECK: movq %rdx, %rcx
+; CHECK: movq %rsi, %rbx
+; CHECK: movq (%rdi), %rax
+; CHECK: movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: lock
+; CHECK: cmpxchg16b (%rdi)
+; CHECK: jne [[LOOP]]
+
+ store atomic i128 %in, i128* %p release, align 16
+ ret void
+}
+
+define void @atomic_store_relaxed(i128* %p, i128 %in) {
+; CHECK-LABEL: atomic_store_relaxed:
+; CHECK: movq %rdx, %rcx
+; CHECK: movq %rsi, %rbx
+; CHECK: movq (%rdi), %rax
+; CHECK: movq 8(%rdi), %rdx
+
+; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: lock
+; CHECK: cmpxchg16b (%rdi)
+; CHECK: jne [[LOOP]]
+
+ store atomic i128 %in, i128* %p unordered, align 16
+ ret void
+}
@sc16 = external global i16
define void @atomic_fetch_add16() nounwind {
-; X64: atomic_fetch_add16
-; X32: atomic_fetch_add16
+; X64-LABEL: atomic_fetch_add16
+; X32-LABEL: atomic_fetch_add16
entry:
; 32-bit
%t1 = atomicrmw add i16* @sc16, i16 1 acquire
}
define void @atomic_fetch_sub16() nounwind {
-; X64: atomic_fetch_sub16
-; X32: atomic_fetch_sub16
+; X64-LABEL: atomic_fetch_sub16
+; X32-LABEL: atomic_fetch_sub16
%t1 = atomicrmw sub i16* @sc16, i16 1 acquire
; X64: lock
; X64: decw
}
define void @atomic_fetch_and16() nounwind {
-; X64: atomic_fetch_and16
-; X32: atomic_fetch_and16
+; X64-LABEL: atomic_fetch_and16
+; X32-LABEL: atomic_fetch_and16
%t1 = atomicrmw and i16* @sc16, i16 3 acquire
; X64: lock
; X64: andw $3, {{.*}} # encoding: [0xf0,0x66
; X32: lock
; X32: andw $3
%t2 = atomicrmw and i16* @sc16, i16 5 acquire
-; X64: andw
+; X64: andl
; X64: lock
; X64: cmpxchgw
-; X32: andw
+; X32: andl
; X32: lock
; X32: cmpxchgw
%t3 = atomicrmw and i16* @sc16, i16 %t2 acquire
}
define void @atomic_fetch_or16() nounwind {
-; X64: atomic_fetch_or16
-; X32: atomic_fetch_or16
+; X64-LABEL: atomic_fetch_or16
+; X32-LABEL: atomic_fetch_or16
%t1 = atomicrmw or i16* @sc16, i16 3 acquire
; X64: lock
; X64: orw $3, {{.*}} # encoding: [0xf0,0x66
; X32: lock
; X32: orw $3
%t2 = atomicrmw or i16* @sc16, i16 5 acquire
-; X64: orw
+; X64: orl
; X64: lock
; X64: cmpxchgw
-; X32: orw
+; X32: orl
; X32: lock
; X32: cmpxchgw
%t3 = atomicrmw or i16* @sc16, i16 %t2 acquire
}
define void @atomic_fetch_xor16() nounwind {
-; X64: atomic_fetch_xor16
-; X32: atomic_fetch_xor16
+; X64-LABEL: atomic_fetch_xor16
+; X32-LABEL: atomic_fetch_xor16
%t1 = atomicrmw xor i16* @sc16, i16 3 acquire
; X64: lock
; X64: xorw $3, {{.*}} # encoding: [0xf0,0x66
; X32: lock
; X32: xorw $3
%t2 = atomicrmw xor i16* @sc16, i16 5 acquire
-; X64: xorw
+; X64: xorl
; X64: lock
; X64: cmpxchgw
-; X32: xorw
+; X32: xorl
; X32: lock
; X32: cmpxchgw
%t3 = atomicrmw xor i16* @sc16, i16 %t2 acquire
}
define void @atomic_fetch_nand16(i16 %x) nounwind {
-; X64: atomic_fetch_nand16
-; X32: atomic_fetch_nand16
+; X64-LABEL: atomic_fetch_nand16
+; X32-LABEL: atomic_fetch_nand16
%t1 = atomicrmw nand i16* @sc16, i16 %x acquire
-; X64: andw
-; X64: notw
+; X64: andl
+; X64: notl
; X64: lock
; X64: cmpxchgw
-; X32: andw
-; X32: notw
+; X32: andl
+; X32: notl
; X32: lock
; X32: cmpxchgw
ret void
define void @atomic_fetch_max16(i16 %x) nounwind {
%t1 = atomicrmw max i16* @sc16, i16 %x acquire
-; X64: cmpw
+; X64: movswl
+; X64: movswl
+; X64: subl
; X64: cmov
; X64: lock
; X64: cmpxchgw
-; X32: cmpw
+; X32: movswl
+; X32: movswl
+; X32: subl
; X32: cmov
; X32: lock
; X32: cmpxchgw
define void @atomic_fetch_min16(i16 %x) nounwind {
%t1 = atomicrmw min i16* @sc16, i16 %x acquire
-; X64: cmpw
+; X64: movswl
+; X64: movswl
+; X64: subl
; X64: cmov
; X64: lock
; X64: cmpxchgw
-; X32: cmpw
+; X32: movswl
+; X32: movswl
+; X32: subl
; X32: cmov
; X32: lock
; X32: cmpxchgw
define void @atomic_fetch_umax16(i16 %x) nounwind {
%t1 = atomicrmw umax i16* @sc16, i16 %x acquire
-; X64: cmpw
+; X64: movzwl
+; X64: movzwl
+; X64: subl
; X64: cmov
; X64: lock
; X64: cmpxchgw
-; X32: cmpw
+; X32: movzwl
+; X32: movzwl
+; X32: subl
; X32: cmov
; X32: lock
; X32: cmpxchgw
define void @atomic_fetch_umin16(i16 %x) nounwind {
%t1 = atomicrmw umin i16* @sc16, i16 %x acquire
-; X64: cmpw
+; X64: movzwl
+; X64: movzwl
+; X64: subl
; X64: cmov
; X64: lock
; X64: cmpxchgw
-; X32: cmpw
+
+; X32: movzwl
+; X32: movzwl
+; X32: subl
; X32: cmov
; X32: lock
; X32: cmpxchgw
@sc32 = external global i32
define void @atomic_fetch_add32() nounwind {
-; X64: atomic_fetch_add32
-; X32: atomic_fetch_add32
+; X64-LABEL: atomic_fetch_add32:
+; X32-LABEL: atomic_fetch_add32:
entry:
; 32-bit
%t1 = atomicrmw add i32* @sc32, i32 1 acquire
}
define void @atomic_fetch_sub32() nounwind {
-; X64: atomic_fetch_sub32
-; X32: atomic_fetch_sub32
+; X64-LABEL: atomic_fetch_sub32:
+; X32-LABEL: atomic_fetch_sub32:
%t1 = atomicrmw sub i32* @sc32, i32 1 acquire
; X64: lock
; X64: decl
}
define void @atomic_fetch_and32() nounwind {
-; X64: atomic_fetch_and32
-; X32: atomic_fetch_and32
+; X64-LABEL: atomic_fetch_and32:
+; X32-LABEL: atomic_fetch_and32:
%t1 = atomicrmw and i32* @sc32, i32 3 acquire
; X64: lock
; X64: andl $3
}
define void @atomic_fetch_or32() nounwind {
-; X64: atomic_fetch_or32
-; X32: atomic_fetch_or32
+; X64-LABEL: atomic_fetch_or32:
+; X32-LABEL: atomic_fetch_or32:
%t1 = atomicrmw or i32* @sc32, i32 3 acquire
; X64: lock
; X64: orl $3
}
define void @atomic_fetch_xor32() nounwind {
-; X64: atomic_fetch_xor32
-; X32: atomic_fetch_xor32
+; X64-LABEL: atomic_fetch_xor32:
+; X32-LABEL: atomic_fetch_xor32:
%t1 = atomicrmw xor i32* @sc32, i32 3 acquire
; X64: lock
; X64: xorl $3
}
define void @atomic_fetch_nand32(i32 %x) nounwind {
-; X64: atomic_fetch_nand32
-; X32: atomic_fetch_nand32
+; X64-LABEL: atomic_fetch_nand32:
+; X32-LABEL: atomic_fetch_nand32:
%t1 = atomicrmw nand i32* @sc32, i32 %x acquire
; X64: andl
; X64: notl
}
define void @atomic_fetch_max32(i32 %x) nounwind {
+; X64-LABEL: atomic_fetch_max32:
+; X32-LABEL: atomic_fetch_max32:
+
%t1 = atomicrmw max i32* @sc32, i32 %x acquire
-; X64: cmpl
+; X64: subl
; X64: cmov
; X64: lock
; X64: cmpxchgl
-; X32: cmpl
+; X32: subl
; X32: cmov
; X32: lock
; X32: cmpxchgl
-; NOCMOV: cmpl
-; NOCMOV: jl
+; NOCMOV: subl
+; NOCMOV: jge
; NOCMOV: lock
; NOCMOV: cmpxchgl
ret void
}
define void @atomic_fetch_min32(i32 %x) nounwind {
+; X64-LABEL: atomic_fetch_min32:
+; X32-LABEL: atomic_fetch_min32:
+; NOCMOV-LABEL: atomic_fetch_min32:
+
%t1 = atomicrmw min i32* @sc32, i32 %x acquire
-; X64: cmpl
+; X64: subl
; X64: cmov
; X64: lock
; X64: cmpxchgl
-; X32: cmpl
+; X32: subl
; X32: cmov
; X32: lock
; X32: cmpxchgl
-; NOCMOV: cmpl
-; NOCMOV: jg
+; NOCMOV: subl
+; NOCMOV: jle
; NOCMOV: lock
; NOCMOV: cmpxchgl
ret void
}
define void @atomic_fetch_umax32(i32 %x) nounwind {
+; X64-LABEL: atomic_fetch_umax32:
+; X32-LABEL: atomic_fetch_umax32:
+; NOCMOV-LABEL: atomic_fetch_umax32:
+
%t1 = atomicrmw umax i32* @sc32, i32 %x acquire
-; X64: cmpl
+; X64: subl
; X64: cmov
; X64: lock
; X64: cmpxchgl
-; X32: cmpl
+; X32: subl
; X32: cmov
; X32: lock
; X32: cmpxchgl
-; NOCMOV: cmpl
-; NOCMOV: jb
+; NOCMOV: subl
+; NOCMOV: ja
; NOCMOV: lock
; NOCMOV: cmpxchgl
ret void
}
define void @atomic_fetch_umin32(i32 %x) nounwind {
+; X64-LABEL: atomic_fetch_umin32:
+; X32-LABEL: atomic_fetch_umin32:
+; NOCMOV-LABEL: atomic_fetch_umin32:
+
%t1 = atomicrmw umin i32* @sc32, i32 %x acquire
-; X64: cmpl
+; X64: subl
; X64: cmov
; X64: lock
; X64: cmpxchgl
-; X32: cmpl
+; X32: subl
; X32: cmov
; X32: lock
; X32: cmpxchgl
-; NOCMOV: cmpl
-; NOCMOV: ja
+; NOCMOV: subl
+; NOCMOV: jb
; NOCMOV: lock
; NOCMOV: cmpxchgl
ret void
}
define void @atomic_fetch_cmpxchg32() nounwind {
+; X64-LABEL: atomic_fetch_cmpxchg32:
+; X32-LABEL: atomic_fetch_cmpxchg32:
+
%t1 = cmpxchg i32* @sc32, i32 0, i32 1 acquire acquire
; X64: lock
; X64: cmpxchgl
}
define void @atomic_fetch_store32(i32 %x) nounwind {
+; X64-LABEL: atomic_fetch_store32:
+; X32-LABEL: atomic_fetch_store32:
+
store atomic i32 %x, i32* @sc32 release, align 4
; X64-NOT: lock
; X64: movl
}
define void @atomic_fetch_swap32(i32 %x) nounwind {
+; X64-LABEL: atomic_fetch_swap32:
+; X32-LABEL: atomic_fetch_swap32:
+
%t1 = atomicrmw xchg i32* @sc32, i32 %x acquire
; X64-NOT: lock
; X64: xchgl
@sc64 = external global i64
define void @atomic_fetch_add64() nounwind {
-; X64: atomic_fetch_add64
+; X64-LABEL: atomic_fetch_add64:
+; X32-LABEL: atomic_fetch_add64:
entry:
%t1 = atomicrmw add i64* @sc64, i64 1 acquire
; X64: lock
}
define void @atomic_fetch_sub64() nounwind {
-; X64: atomic_fetch_sub64
+; X64-LABEL: atomic_fetch_sub64:
+; X32-LABEL: atomic_fetch_sub64:
%t1 = atomicrmw sub i64* @sc64, i64 1 acquire
; X64: lock
; X64: decq
}
define void @atomic_fetch_and64() nounwind {
-; X64: atomic_fetch_and64
+; X64-LABEL: atomic_fetch_and64:
+; X32-LABEL: atomic_fetch_and64:
%t1 = atomicrmw and i64* @sc64, i64 3 acquire
; X64: lock
; X64: andq $3
}
define void @atomic_fetch_or64() nounwind {
-; X64: atomic_fetch_or64
+; X64-LABEL: atomic_fetch_or64:
+; X32-LABEL: atomic_fetch_or64:
%t1 = atomicrmw or i64* @sc64, i64 3 acquire
; X64: lock
; X64: orq $3
}
define void @atomic_fetch_xor64() nounwind {
-; X64: atomic_fetch_xor64
+; X64-LABEL: atomic_fetch_xor64:
+; X32-LABEL: atomic_fetch_xor64:
%t1 = atomicrmw xor i64* @sc64, i64 3 acquire
; X64: lock
; X64: xorq $3
}
define void @atomic_fetch_nand64(i64 %x) nounwind {
-; X64: atomic_fetch_nand64
-; X32: atomic_fetch_nand64
+; X64-LABEL: atomic_fetch_nand64:
+; X32-LABEL: atomic_fetch_nand64:
%t1 = atomicrmw nand i64* @sc64, i64 %x acquire
; X64: andq
; X64: notq
}
define void @atomic_fetch_max64(i64 %x) nounwind {
+; X64-LABEL: atomic_fetch_max64:
+; X32-LABEL: atomic_fetch_max64:
%t1 = atomicrmw max i64* @sc64, i64 %x acquire
-; X64: cmpq
+; X64: subq
; X64: cmov
; X64: lock
; X64: cmpxchgq
}
define void @atomic_fetch_min64(i64 %x) nounwind {
+; X64-LABEL: atomic_fetch_min64:
+; X32-LABEL: atomic_fetch_min64:
%t1 = atomicrmw min i64* @sc64, i64 %x acquire
-; X64: cmpq
+; X64: subq
; X64: cmov
; X64: lock
; X64: cmpxchgq
}
define void @atomic_fetch_umax64(i64 %x) nounwind {
+; X64-LABEL: atomic_fetch_umax64:
+; X32-LABEL: atomic_fetch_umax64:
%t1 = atomicrmw umax i64* @sc64, i64 %x acquire
-; X64: cmpq
+; X64: subq
; X64: cmov
; X64: lock
; X64: cmpxchgq
}
define void @atomic_fetch_umin64(i64 %x) nounwind {
+; X64-LABEL: atomic_fetch_umin64:
+; X32-LABEL: atomic_fetch_umin64:
%t1 = atomicrmw umin i64* @sc64, i64 %x acquire
-; X64: cmpq
+; X64: subq
; X64: cmov
; X64: lock
; X64: cmpxchgq
}
define void @atomic_fetch_cmpxchg64() nounwind {
+; X64-LABEL: atomic_fetch_cmpxchg64:
+; X32-LABEL: atomic_fetch_cmpxchg64:
%t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire
; X64: lock
; X64: cmpxchgq
}
define void @atomic_fetch_store64(i64 %x) nounwind {
+; X64-LABEL: atomic_fetch_store64:
+; X32-LABEL: atomic_fetch_store64:
store atomic i64 %x, i64* @sc64 release, align 8
; X64-NOT: lock
; X64: movq
}
define void @atomic_fetch_swap64(i64 %x) nounwind {
+; X64-LABEL: atomic_fetch_swap64:
+; X32-LABEL: atomic_fetch_swap64:
%t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
; X64-NOT: lock
; X64: xchgq
@sc64 = external global i64
define void @atomic_fetch_add64() nounwind {
-; X32: atomic_fetch_add64
+; X64-LABEL: atomic_fetch_add64:
+; X32-LABEL: atomic_fetch_add64:
entry:
%t1 = atomicrmw add i64* @sc64, i64 1 acquire
; X32: addl
}
define void @atomic_fetch_sub64() nounwind {
-; X32: atomic_fetch_sub64
+; X64-LABEL: atomic_fetch_sub64:
+; X32-LABEL: atomic_fetch_sub64:
%t1 = atomicrmw sub i64* @sc64, i64 1 acquire
-; X32: subl
-; X32: sbbl
+; X32: addl $-1
+; X32: adcl $-1
; X32: lock
; X32: cmpxchg8b
%t2 = atomicrmw sub i64* @sc64, i64 3 acquire
-; X32: subl
-; X32: sbbl
+; X32: addl $-3
+; X32: adcl $-1
; X32: lock
; X32: cmpxchg8b
%t3 = atomicrmw sub i64* @sc64, i64 5 acquire
-; X32: subl
-; X32: sbbl
+; X32: addl $-5
+; X32: adcl $-1
; X32: lock
; X32: cmpxchg8b
%t4 = atomicrmw sub i64* @sc64, i64 %t3 acquire
}
define void @atomic_fetch_and64() nounwind {
-; X32: atomic_fetch_and64
+; X64-LABEL: atomic_fetch_and:64
+; X32-LABEL: atomic_fetch_and64:
%t1 = atomicrmw and i64* @sc64, i64 3 acquire
-; X32: andl
-; X32: andl
+; X32: andl $3
+; X32-NOT: andl
; X32: lock
; X32: cmpxchg8b
- %t2 = atomicrmw and i64* @sc64, i64 5 acquire
-; X32: andl
-; X32: andl
+ %t2 = atomicrmw and i64* @sc64, i64 4294967297 acquire
+; X32: andl $1
+; X32: andl $1
; X32: lock
; X32: cmpxchg8b
%t3 = atomicrmw and i64* @sc64, i64 %t2 acquire
}
define void @atomic_fetch_or64() nounwind {
-; X32: atomic_fetch_or64
+; X64-LABEL: atomic_fetch_or64:
+; X32-LABEL: atomic_fetch_or64:
%t1 = atomicrmw or i64* @sc64, i64 3 acquire
-; X32: orl
-; X32: orl
+; X32: orl $3
+; X32-NOT: orl
; X32: lock
; X32: cmpxchg8b
- %t2 = atomicrmw or i64* @sc64, i64 5 acquire
-; X32: orl
-; X32: orl
+ %t2 = atomicrmw or i64* @sc64, i64 4294967297 acquire
+; X32: orl $1
+; X32: orl $1
; X32: lock
; X32: cmpxchg8b
%t3 = atomicrmw or i64* @sc64, i64 %t2 acquire
}
define void @atomic_fetch_xor64() nounwind {
-; X32: atomic_fetch_xor64
+; X64-LABEL: atomic_fetch_xor:64
+; X32-LABEL: atomic_fetch_xor64:
%t1 = atomicrmw xor i64* @sc64, i64 3 acquire
; X32: xorl
-; X32: xorl
+; X32-NOT: xorl
; X32: lock
; X32: cmpxchg8b
- %t2 = atomicrmw xor i64* @sc64, i64 5 acquire
-; X32: xorl
-; X32: xorl
+ %t2 = atomicrmw xor i64* @sc64, i64 4294967297 acquire
+; X32: xorl $1
+; X32: xorl $1
; X32: lock
; X32: cmpxchg8b
%t3 = atomicrmw xor i64* @sc64, i64 %t2 acquire
}
define void @atomic_fetch_nand64(i64 %x) nounwind {
-; X32: atomic_fetch_nand64
+; X64-LABEL: atomic_fetch_nand64:
+; X32-LABEL: atomic_fetch_nand64:
%t1 = atomicrmw nand i64* @sc64, i64 %x acquire
; X32: andl
; X32: andl
}
define void @atomic_fetch_max64(i64 %x) nounwind {
+; X64-LABEL: atomic_fetch_max:64
+; X32-LABEL: atomic_fetch_max64:
%t1 = atomicrmw max i64* @sc64, i64 %x acquire
-; X32: cmpl
-; X32: cmpl
-; X32: cmov
+; X32: subl
+; X32: subl
; X32: cmov
; X32: cmov
; X32: lock
}
define void @atomic_fetch_min64(i64 %x) nounwind {
+; X64-LABEL: atomic_fetch_min64:
+; X32-LABEL: atomic_fetch_min64:
%t1 = atomicrmw min i64* @sc64, i64 %x acquire
-; X32: cmpl
-; X32: cmpl
-; X32: cmov
+; X32: subl
+; X32: subl
; X32: cmov
; X32: cmov
; X32: lock
}
define void @atomic_fetch_umax64(i64 %x) nounwind {
+; X64-LABEL: atomic_fetch_umax:64
+; X32-LABEL: atomic_fetch_umax64:
%t1 = atomicrmw umax i64* @sc64, i64 %x acquire
-; X32: cmpl
-; X32: cmpl
-; X32: cmov
+; X32: subl
+; X32: subl
; X32: cmov
; X32: cmov
; X32: lock
}
define void @atomic_fetch_umin64(i64 %x) nounwind {
+; X64-LABEL: atomic_fetch_umin64:
+; X32-LABEL: atomic_fetch_umin64:
%t1 = atomicrmw umin i64* @sc64, i64 %x acquire
-; X32: cmpl
-; X32: cmpl
-; X32: cmov
+; X32: subl
+; X32: subl
; X32: cmov
; X32: cmov
; X32: lock
}
define void @atomic_fetch_cmpxchg64() nounwind {
+; X64-LABEL: atomic_fetch_cmpxchg:64
+; X32-LABEL: atomic_fetch_cmpxchg64:
%t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire
; X32: lock
; X32: cmpxchg8b
}
define void @atomic_fetch_store64(i64 %x) nounwind {
+; X64-LABEL: atomic_fetch_store64:
+; X32-LABEL: atomic_fetch_store64:
store atomic i64 %x, i64* @sc64 release, align 8
; X32: lock
; X32: cmpxchg8b
}
define void @atomic_fetch_swap64(i64 %x) nounwind {
+; X64-LABEL: atomic_fetch_swap64:
+; X32-LABEL: atomic_fetch_swap64:
%t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
; X32: lock
; X32: xchg8b
@sc8 = external global i8
define void @atomic_fetch_add8() nounwind {
-; X64: atomic_fetch_add8
-; X32: atomic_fetch_add8
+; X64-LABEL: atomic_fetch_add8:
+; X32-LABEL: atomic_fetch_add8:
entry:
; 32-bit
%t1 = atomicrmw add i8* @sc8, i8 1 acquire
}
define void @atomic_fetch_sub8() nounwind {
-; X64: atomic_fetch_sub8
-; X32: atomic_fetch_sub8
+; X64-LABEL: atomic_fetch_sub8:
+; X32-LABEL: atomic_fetch_sub8:
%t1 = atomicrmw sub i8* @sc8, i8 1 acquire
; X64: lock
; X64: decb
}
define void @atomic_fetch_and8() nounwind {
-; X64: atomic_fetch_and8
-; X32: atomic_fetch_and8
+; X64-LABEL: atomic_fetch_and8:
+; X32-LABEL: atomic_fetch_and8:
%t1 = atomicrmw and i8* @sc8, i8 3 acquire
; X64: lock
; X64: andb $3
}
define void @atomic_fetch_or8() nounwind {
-; X64: atomic_fetch_or8
-; X32: atomic_fetch_or8
+; X64-LABEL: atomic_fetch_or8:
+; X32-LABEL: atomic_fetch_or8:
%t1 = atomicrmw or i8* @sc8, i8 3 acquire
; X64: lock
; X64: orb $3
}
define void @atomic_fetch_xor8() nounwind {
-; X64: atomic_fetch_xor8
-; X32: atomic_fetch_xor8
+; X64-LABEL: atomic_fetch_xor8:
+; X32-LABEL: atomic_fetch_xor8:
%t1 = atomicrmw xor i8* @sc8, i8 3 acquire
; X64: lock
; X64: xorb $3
}
define void @atomic_fetch_nand8(i8 %x) nounwind {
-; X64: atomic_fetch_nand8
-; X32: atomic_fetch_nand8
+; X64-LABEL: atomic_fetch_nand8:
+; X32-LABEL: atomic_fetch_nand8:
%t1 = atomicrmw nand i8* @sc8, i8 %x acquire
; X64: andb
; X64: notb
}
define void @atomic_fetch_max8(i8 %x) nounwind {
+; X64-LABEL: atomic_fetch_max8:
+; X32-LABEL: atomic_fetch_max8:
%t1 = atomicrmw max i8* @sc8, i8 %x acquire
-; X64: cmpb
-; X64: cmov
+; X64: movsbl
+; X64: movsbl
+; X64: subl
; X64: lock
; X64: cmpxchgb
-; X32: cmpb
-; X32: cmov
+; X32: movsbl
+; X32: movsbl
+; X32: subl
; X32: lock
; X32: cmpxchgb
ret void
}
define void @atomic_fetch_min8(i8 %x) nounwind {
+; X64-LABEL: atomic_fetch_min8:
+; X32-LABEL: atomic_fetch_min8:
%t1 = atomicrmw min i8* @sc8, i8 %x acquire
-; X64: cmpb
-; X64: cmov
+; X64: movsbl
+; X64: movsbl
+; X64: subl
; X64: lock
; X64: cmpxchgb
-; X32: cmpb
-; X32: cmov
+; X32: movsbl
+; X32: movsbl
+; X32: subl
; X32: lock
; X32: cmpxchgb
ret void
}
define void @atomic_fetch_umax8(i8 %x) nounwind {
+; X64-LABEL: atomic_fetch_umax8:
+; X32-LABEL: atomic_fetch_umax8:
%t1 = atomicrmw umax i8* @sc8, i8 %x acquire
-; X64: cmpb
-; X64: cmov
+; X64: movzbl
+; X64: movzbl
+; X64: subl
; X64: lock
; X64: cmpxchgb
-; X32: cmpb
-; X32: cmov
+; X32: movzbl
+; X32: movzbl
+; X32: subl
; X32: lock
; X32: cmpxchgb
ret void
}
define void @atomic_fetch_umin8(i8 %x) nounwind {
+; X64-LABEL: atomic_fetch_umin8:
+; X32-LABEL: atomic_fetch_umin8:
%t1 = atomicrmw umin i8* @sc8, i8 %x acquire
-; X64: cmpb
-; X64: cmov
+; X64: movzbl
+; X64: movzbl
+; X64: subl
; X64: lock
; X64: cmpxchgb
-; X32: cmpb
-; X32: cmov
+
+; X32: movzbl
+; X32: movzbl
+; X32: subl
; X32: lock
; X32: cmpxchgb
ret void
}
define void @atomic_fetch_cmpxchg8() nounwind {
+; X64-LABEL: atomic_fetch_cmpxchg8:
+; X32-LABEL: atomic_fetch_cmpxchg8:
%t1 = cmpxchg i8* @sc8, i8 0, i8 1 acquire acquire
; X64: lock
; X64: cmpxchgb
}
define void @atomic_fetch_store8(i8 %x) nounwind {
+; X64-LABEL: atomic_fetch_store8:
+; X32-LABEL: atomic_fetch_store8:
store atomic i8 %x, i8* @sc8 release, align 4
; X64-NOT: lock
; X64: movb
}
define void @atomic_fetch_swap8(i8 %x) nounwind {
+; X64-LABEL: atomic_fetch_swap8:
+; X32-LABEL: atomic_fetch_swap8:
%t1 = atomicrmw xchg i8* @sc8, i8 %x acquire
; X64-NOT: lock
; X64: xchgb
-; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov,cx16 -verify-machineinstrs | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
%17 = extractvalue { i32, i1 } %pair17, 0
store i32 %17, i32* %old
; CHECK: movl [[R17atomic:.*]], %eax
- ; CHECK: movl $1401, %[[R17mask:[a-z]*]]
- ; CHECK: andl %eax, %[[R17mask]]
- ; CHECK: notl %[[R17mask]]
+ ; CHECK: movl %eax, %[[R17mask:[a-z]*]]
+ ; CHECK: notl %[[R17mask]]
+ ; CHECK: orl $-1402, %[[R17mask]]
; CHECK: lock
; CHECK: cmpxchgl %[[R17mask]], [[R17atomic]]
; CHECK: jne
; CHECK: movl %eax,
%18 = atomicrmw nand i32* %val2, i32 1401 monotonic
store i32 %18, i32* %old
- ; CHECK: andl
- ; CHECK: andl
; CHECK: notl
; CHECK: notl
+ ; CHECK: orl $252645135
+ ; CHECK: orl $252645135
; CHECK: lock
; CHECK: cmpxchg8b
%19 = atomicrmw nand i64* %temp64, i64 17361641481138401520 monotonic
; CHECK: atomic_maxmin_i8
%1 = atomicrmw max i8* @sc8, i8 5 acquire
; CHECK: [[LABEL1:\.?LBB[0-9]+_[0-9]+]]:
-; CHECK: cmpb
-; CHECK: cmovl
+; CHECK: movsbl
+; CHECK: cmpl
; CHECK: lock
; CHECK-NEXT: cmpxchgb
; CHECK: jne [[LABEL1]]
%2 = atomicrmw min i8* @sc8, i8 6 acquire
; CHECK: [[LABEL3:\.?LBB[0-9]+_[0-9]+]]:
-; CHECK: cmpb
-; CHECK: cmovg
+; CHECK: movsbl
+; CHECK: cmpl
; CHECK: lock
; CHECK-NEXT: cmpxchgb
; CHECK: jne [[LABEL3]]
%3 = atomicrmw umax i8* @sc8, i8 7 acquire
; CHECK: [[LABEL5:\.?LBB[0-9]+_[0-9]+]]:
-; CHECK: cmpb
-; CHECK: cmovb
+; CHECK: movzbl
+; CHECK: cmpl
; CHECK: lock
; CHECK-NEXT: cmpxchgb
; CHECK: jne [[LABEL5]]
%4 = atomicrmw umin i8* @sc8, i8 8 acquire
; CHECK: [[LABEL7:\.?LBB[0-9]+_[0-9]+]]:
-; CHECK: cmpb
-; CHECK: cmova
+; CHECK: movzbl
+; CHECK: cmpl
; CHECK: lock
; CHECK-NEXT: cmpxchgb
; CHECK: jne [[LABEL7]]