X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
RegInfo = TM.getRegisterInfo();
- TD = getTargetData();
+ TD = getDataLayout();
// Set up the TargetLowering object.
static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
// Bypass i32 with i8 on Atom when compiling with O2
if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default)
- addBypassSlowDivType(Type::getInt32Ty(getGlobalContext()), Type::getInt8Ty(getGlobalContext()));
+ addBypassSlowDiv(32, 8);
if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
// Setup Windows compiler runtime calls.
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
}
if (Subtarget->hasCmpxchg16b()) {
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
if (!TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f32 , Expand);
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
+ setOperationAction(ISD::FCOS , MVT::f32 , Expand);
setOperationAction(ISD::FCOS , MVT::f64 , Expand);
}
addLegalFPImmediate(APFloat(+0.0)); // FLD0
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
+
setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
}
// cases like PR2962. This should be removed when PR2962 is fixed.
const Function *F = MF.getFunction();
if (IsZeroVal &&
- !F->hasFnAttr(Attribute::NoImplicitFloat)) {
+ !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) {
if (Size >= 16 &&
(Subtarget->isUnalignedMemAccessFast() ||
((DstAlign == 0 || DstAlign >= 16) &&
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
TotalNumIntRegs);
- bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
+ bool NoImplicitFloatOps = Fn->getFnAttributes().
+ hasAttribute(Attributes::NoImplicitFloat);
assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
"SSE register cannot be used when SSE is disabled!");
assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, SR != NotStructReturn,
- MF.getFunction()->hasStructRetAttr(),
+ MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
Outs, OutVals, Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
OpFlags = X86II::MO_DARWIN_STUB;
} else if (Subtarget->isPICStyleRIPRel() &&
isa<Function>(GV) &&
- cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) {
+ cast<Function>(GV)->getFnAttributes().
+ hasAttribute(Attributes::NonLazyBind)) {
// If the function is marked as non-lazy, generate an indirect call
// which loads from the GOT directly. This avoids runtime overhead
// at the cost of eager binding (and one extra byte of encoding).
bool isVarArg,
bool isCalleeStructRet,
bool isCallerStructRet,
+ Type *RetTy,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
// If -tailcallopt is specified, make fastcc functions tail-callable.
const MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = DAG.getMachineFunction().getFunction();
+
+ // If the function return type is x86_fp80 and the callee return type is not,
+ // then the FP_EXTEND of the call result is not a nop. It's not safe to
+ // perform a tailcall optimization here.
+ if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
+ return false;
+
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
if (!MatchEvenMask && !MatchOddMask)
return SDValue();
-
+
SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT);
SDValue Op0 = SVOp->getOperand(0);
return SDValue();
}
-// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64
-// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the
-// constraint of matching input/output vector elements.
-SDValue
-X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const {
- DebugLoc DL = Op.getDebugLoc();
- SDNode *N = Op.getNode();
- EVT VT = Op.getValueType();
- unsigned NumElts = Op.getNumOperands();
-
- // Check supported types and sub-targets.
- //
- // Only v2f32 -> v2f64 needs special handling.
- if (VT != MVT::v2f64 || !Subtarget->hasSSE2())
- return SDValue();
-
- SDValue VecIn;
- EVT VecInVT;
- SmallVector<int, 8> Mask;
- EVT SrcVT = MVT::Other;
-
- // Check the patterns could be translated into X86vfpext.
- for (unsigned i = 0; i < NumElts; ++i) {
- SDValue In = N->getOperand(i);
- unsigned Opcode = In.getOpcode();
-
- // Skip if the element is undefined.
- if (Opcode == ISD::UNDEF) {
- Mask.push_back(-1);
- continue;
- }
-
- // Quit if one of the elements is not defined from 'fpext'.
- if (Opcode != ISD::FP_EXTEND)
- return SDValue();
-
- // Check how the source of 'fpext' is defined.
- SDValue L2In = In.getOperand(0);
- EVT L2InVT = L2In.getValueType();
-
- // Check the original type
- if (SrcVT == MVT::Other)
- SrcVT = L2InVT;
- else if (SrcVT != L2InVT) // Quit if non-homogenous typed.
- return SDValue();
-
- // Check whether the value being 'fpext'ed is extracted from the same
- // source.
- Opcode = L2In.getOpcode();
-
- // Quit if it's not extracted with a constant index.
- if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
- !isa<ConstantSDNode>(L2In.getOperand(1)))
- return SDValue();
-
- SDValue ExtractedFromVec = L2In.getOperand(0);
-
- if (VecIn.getNode() == 0) {
- VecIn = ExtractedFromVec;
- VecInVT = ExtractedFromVec.getValueType();
- } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec.
- return SDValue();
-
- Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue());
- }
-
- // Quit if all operands of BUILD_VECTOR are undefined.
- if (!VecIn.getNode())
- return SDValue();
-
- // Fill the remaining mask as undef.
- for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i)
- Mask.push_back(-1);
-
- return DAG.getNode(X86ISD::VFPEXT, DL, VT,
- DAG.getVectorShuffle(VecInVT, DL,
- VecIn, DAG.getUNDEF(VecInVT),
- &Mask[0]));
-}
-
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
if (Broadcast.getNode())
return Broadcast;
- SDValue FpExt = LowerVectorFpExtend(Op, DAG);
- if (FpExt.getNode())
- return FpExt;
-
unsigned EVTBits = ExtVT.getSizeInBits();
unsigned NumZero = 0;
bool V1IsAllZero = ISD::isBuildVectorAllZeros(V1.getNode());
bool V2IsAllZero = ISD::isBuildVectorAllZeros(V2.getNode());
- // VPSHUFB may be generated if
+ // VPSHUFB may be generated if
// (1) one of input vector is undefined or zeroinitializer.
// The mask value 0x80 puts 0 in the corresponding slot of the vector.
// And (2) the mask indexes don't cross the 128-bit lane.
bool HasAVX = Subtarget->hasAVX();
bool HasAVX2 = Subtarget->hasAVX2();
MachineFunction &MF = DAG.getMachineFunction();
- bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ bool OptForSize = MF.getFunction()->getFnAttributes().
+ hasAttribute(Attributes::OptimizeForSize);
assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
return FIST;
}
+SDValue X86TargetLowering::lowerFP_EXTEND(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ SDValue In = Op.getOperand(0);
+ EVT SVT = In.getValueType();
+
+ assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
+
+ return DAG.getNode(X86ISD::VFPEXT, DL, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
+ In, DAG.getUNDEF(SVT)));
+}
+
SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
LLVMContext *Context = DAG.getContext();
DebugLoc dl = Op.getDebugLoc();
}
assert((VT.is128BitVector() || VT.is256BitVector()) &&
- "Not extracted from 128-bit vector.");
+ "Not extracted from 128-/256-bit vector.");
unsigned FullMask = (1U << VT.getVectorNumElements()) - 1U;
SmallVector<SDValue, 8> VecIns;
EVT ArgVT = Op.getNode()->getValueType(0);
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy);
+ uint32_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy);
uint8_t ArgMode;
// Decide which area this value should be read from.
// Sanity Check: Make sure using fp_offset makes sense.
assert(!getTargetMachine().Options.UseSoftFloat &&
!(DAG.getMachineFunction()
- .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
+ .getFunction()->getFnAttributes()
+ .hasAttribute(Attributes::NoImplicitFloat)) &&
Subtarget->hasSSE1());
}
DebugLoc dl = Op.getDebugLoc();
const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+ const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
if (Subtarget->is64Bit()) {
SDValue OutChains[6];
const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode.
const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode.
- const unsigned char N86R10 = X86_MC::getX86RegNum(X86::R10);
- const unsigned char N86R11 = X86_MC::getX86RegNum(X86::R11);
+ const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7;
+ const unsigned char N86R11 = TRI->getEncodingValue(X86::R11) & 0x7;
const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
for (FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end(); I != E; ++I, ++Idx)
- if (Attrs.paramHasAttr(Idx, Attribute::InReg))
+ if (Attrs.getParamAttributes(Idx).hasAttribute(Attributes::InReg))
// FIXME: should only count parameters that are lowered to integers.
InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
// This is storing the opcode for MOV32ri.
const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte.
- const unsigned char N86Reg = X86_MC::getX86RegNum(NestReg);
+ const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7;
OutChains[0] = DAG.getStore(Root, dl,
DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
Trmp, MachinePointerInfo(TrmpAddr),
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
+ case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG);
case ISD::FABS: return LowerFABS(Op, DAG);
case ISD::FNEG: return LowerFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
}
return;
}
+ case ISD::FP_ROUND: {
+ SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
+ Results.push_back(V);
+ return;
+ }
case ISD::READCYCLECOUNTER: {
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue TheChain = N->getOperand(0);
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_SWAP: {
unsigned Opc;
switch (N->getOpcode()) {
case ISD::ATOMIC_LOAD_XOR:
Opc = X86ISD::ATOMXOR64_DAG;
break;
+ case ISD::ATOMIC_LOAD_MAX:
+ Opc = X86ISD::ATOMMAX64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_MIN:
+ Opc = X86ISD::ATOMMIN64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_UMAX:
+ Opc = X86ISD::ATOMUMAX64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_UMIN:
+ Opc = X86ISD::ATOMUMIN64_DAG;
+ break;
case ISD::ATOMIC_SWAP:
Opc = X86ISD::ATOMSWAP64_DAG;
break;
case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
+ case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
//===----------------------------------------------------------------------===//
// private utility function
+
+// Get CMPXCHG opcode for the specified data type.
+static unsigned getCmpXChgOpcode(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::i8: return X86::LCMPXCHG8;
+ case MVT::i16: return X86::LCMPXCHG16;
+ case MVT::i32: return X86::LCMPXCHG32;
+ case MVT::i64: return X86::LCMPXCHG64;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid operand size!");
+}
+
+// Get LOAD opcode for the specified data type.
+static unsigned getLoadOpcode(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::i8: return X86::MOV8rm;
+ case MVT::i16: return X86::MOV16rm;
+ case MVT::i32: return X86::MOV32rm;
+ case MVT::i64: return X86::MOV64rm;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid operand size!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction.
+static unsigned getNonAtomicOpcode(unsigned Opc) {
+ switch (Opc) {
+ case X86::ATOMAND8: return X86::AND8rr;
+ case X86::ATOMAND16: return X86::AND16rr;
+ case X86::ATOMAND32: return X86::AND32rr;
+ case X86::ATOMAND64: return X86::AND64rr;
+ case X86::ATOMOR8: return X86::OR8rr;
+ case X86::ATOMOR16: return X86::OR16rr;
+ case X86::ATOMOR32: return X86::OR32rr;
+ case X86::ATOMOR64: return X86::OR64rr;
+ case X86::ATOMXOR8: return X86::XOR8rr;
+ case X86::ATOMXOR16: return X86::XOR16rr;
+ case X86::ATOMXOR32: return X86::XOR32rr;
+ case X86::ATOMXOR64: return X86::XOR64rr;
+ }
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction with
+// extra opcode.
+static unsigned getNonAtomicOpcodeWithExtraOpc(unsigned Opc,
+ unsigned &ExtraOpc) {
+ switch (Opc) {
+ case X86::ATOMNAND8: ExtraOpc = X86::NOT8r; return X86::AND8rr;
+ case X86::ATOMNAND16: ExtraOpc = X86::NOT16r; return X86::AND16rr;
+ case X86::ATOMNAND32: ExtraOpc = X86::NOT32r; return X86::AND32rr;
+ case X86::ATOMNAND64: ExtraOpc = X86::NOT64r; return X86::AND64rr;
+ case X86::ATOMMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVL32rr;
+ case X86::ATOMMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVL16rr;
+ case X86::ATOMMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVL32rr;
+ case X86::ATOMMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVL64rr;
+ case X86::ATOMMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVG32rr;
+ case X86::ATOMMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVG16rr;
+ case X86::ATOMMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVG32rr;
+ case X86::ATOMMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVG64rr;
+ case X86::ATOMUMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVB32rr;
+ case X86::ATOMUMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVB16rr;
+ case X86::ATOMUMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVB32rr;
+ case X86::ATOMUMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVB64rr;
+ case X86::ATOMUMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVA32rr;
+ case X86::ATOMUMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVA16rr;
+ case X86::ATOMUMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVA32rr;
+ case X86::ATOMUMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVA64rr;
+ }
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction for
+// 64-bit data type on 32-bit target.
+static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) {
+ switch (Opc) {
+ case X86::ATOMAND6432: HiOpc = X86::AND32rr; return X86::AND32rr;
+ case X86::ATOMOR6432: HiOpc = X86::OR32rr; return X86::OR32rr;
+ case X86::ATOMXOR6432: HiOpc = X86::XOR32rr; return X86::XOR32rr;
+ case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr;
+ case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr;
+ case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr;
+ case X86::ATOMMAX6432: HiOpc = X86::SETLr; return X86::SETLr;
+ case X86::ATOMMIN6432: HiOpc = X86::SETGr; return X86::SETGr;
+ case X86::ATOMUMAX6432: HiOpc = X86::SETBr; return X86::SETBr;
+ case X86::ATOMUMIN6432: HiOpc = X86::SETAr; return X86::SETAr;
+ }
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get opcode of the non-atomic one from the specified atomic instruction for
+// 64-bit data type on 32-bit target with extra opcode.
+static unsigned getNonAtomic6432OpcodeWithExtraOpc(unsigned Opc,
+ unsigned &HiOpc,
+ unsigned &ExtraOpc) {
+ switch (Opc) {
+ case X86::ATOMNAND6432:
+ ExtraOpc = X86::NOT32r;
+ HiOpc = X86::AND32rr;
+ return X86::AND32rr;
+ }
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+}
+
+// Get pseudo CMOV opcode from the specified data type.
+static unsigned getPseudoCMOVOpc(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::i8: return X86::CMOV_GR8;
+ case MVT::i16: return X86::CMOV_GR16;
+ case MVT::i32: return X86::CMOV_GR32;
+ default:
+ break;
+ }
+ llvm_unreachable("Unknown CMOV opcode!");
+}
+
+// EmitAtomicLoadArith - emit the code sequence for pseudo atomic instructions.
+// They will be translated into a spin-loop or compare-exchange loop from
+//
+// ...
+// dst = atomic-fetch-op MI.addr, MI.val
+// ...
+//
+// to
+//
+// ...
+// EAX = LOAD MI.addr
+// loop:
+// t1 = OP MI.val, EAX
+// LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
+// JNE loop
+// sink:
+// dst = EAX
+// ...
MachineBasicBlock *
-X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
- MachineBasicBlock *MBB,
- unsigned regOpc,
- unsigned immOpc,
- unsigned LoadOpc,
- unsigned CXchgOpc,
- unsigned notOpc,
- unsigned EAXreg,
- const TargetRegisterClass *RC,
- bool Invert) const {
- // For the atomic bitwise operator, we generate
- // thisMBB:
- // newMBB:
- // ld t1 = [bitinstr.addr]
- // op t2 = t1, [bitinstr.val]
- // not t3 = t2 (if Invert)
- // mov EAX = t1
- // lcs dest = [bitinstr.addr], t3 [EAX is implicit]
- // bz newMBB
- // fallthrough -->nextMBB
+X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- const BasicBlock *LLVM_BB = MBB->getBasicBlock();
- MachineFunction::iterator MBBIter = MBB;
- ++MBBIter;
+ DebugLoc DL = MI->getDebugLoc();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ assert(MI->getNumOperands() <= X86::AddrNumOperands + 2 &&
+ "Unexpected number of operands");
+
+ assert(MI->hasOneMemOperand() &&
+ "Expected atomic-load-op to have one memoperand");
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstReg, SrcReg;
+ unsigned MemOpndSlot;
+
+ unsigned CurOp = 0;
+
+ DstReg = MI->getOperand(CurOp++).getReg();
+ MemOpndSlot = CurOp;
+ CurOp += X86::AddrNumOperands;
+ SrcReg = MI->getOperand(CurOp++).getReg();
+
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ MVT::SimpleValueType VT = *RC->vt_begin();
+ unsigned AccPhyReg = getX86SubSuperRegister(X86::EAX, VT);
+
+ unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
+ unsigned LOADOpc = getLoadOpcode(VT);
+
+ // For the atomic load-arith operator, we generate
+ //
+ // thisMBB:
+ // EAX = LOAD [MI.addr]
+ // mainMBB:
+ // t1 = OP MI.val, EAX
+ // LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
+ // JNE mainMBB
+ // sinkMBB:
- /// First build the CFG
- MachineFunction *F = MBB->getParent();
MachineBasicBlock *thisMBB = MBB;
- MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(MBBIter, newMBB);
- F->insert(MBBIter, nextMBB);
-
- // Transfer the remainder of thisMBB and its successor edges to nextMBB.
- nextMBB->splice(nextMBB->begin(), thisMBB,
- llvm::next(MachineBasicBlock::iterator(bInstr)),
- thisMBB->end());
- nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
-
- // Update thisMBB to fall through to newMBB
- thisMBB->addSuccessor(newMBB);
-
- // newMBB jumps to itself and fall through to nextMBB
- newMBB->addSuccessor(nextMBB);
- newMBB->addSuccessor(newMBB);
-
- // Insert instructions into newMBB based on incoming instruction
- assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
- "unexpected number of operands");
- DebugLoc dl = bInstr->getDebugLoc();
- MachineOperand& destOper = bInstr->getOperand(0);
- MachineOperand* argOpers[2 + X86::AddrNumOperands];
- int numArgs = bInstr->getNumOperands() - 1;
- for (int i=0; i < numArgs; ++i)
- argOpers[i] = &bInstr->getOperand(i+1);
-
- // x86 address has 4 operands: base, index, scale, and displacement
- int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
- int valArgIndx = lastAddrIndx + 1;
-
- unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
- MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(LoadOpc), t1);
- for (int i=0; i <= lastAddrIndx; ++i)
- (*MIB).addOperand(*argOpers[i]);
-
- unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
- assert((argOpers[valArgIndx]->isReg() ||
- argOpers[valArgIndx]->isImm()) &&
- "invalid operand");
- if (argOpers[valArgIndx]->isReg())
- MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2);
- else
- MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2);
- MIB.addReg(t1);
- (*MIB).addOperand(*argOpers[valArgIndx]);
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // thisMBB:
+ MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), AccPhyReg);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ thisMBB->addSuccessor(mainMBB);
+
+ // mainMBB:
+ MachineBasicBlock *origMainMBB = mainMBB;
+ mainMBB->addLiveIn(AccPhyReg);
+
+ // Copy AccPhyReg as it is used more than once.
+ unsigned AccReg = MRI.createVirtualRegister(RC);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccReg)
+ .addReg(AccPhyReg);
- unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
- if (Invert) {
- MIB = BuildMI(newMBB, dl, TII->get(notOpc), t3).addReg(t2);
+ unsigned t1 = MRI.createVirtualRegister(RC);
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic-load-op opcode!");
+ case X86::ATOMAND8:
+ case X86::ATOMAND16:
+ case X86::ATOMAND32:
+ case X86::ATOMAND64:
+ case X86::ATOMOR8:
+ case X86::ATOMOR16:
+ case X86::ATOMOR32:
+ case X86::ATOMOR64:
+ case X86::ATOMXOR8:
+ case X86::ATOMXOR16:
+ case X86::ATOMXOR32:
+ case X86::ATOMXOR64: {
+ unsigned ARITHOpc = getNonAtomicOpcode(Opc);
+ BuildMI(mainMBB, DL, TII->get(ARITHOpc), t1).addReg(SrcReg)
+ .addReg(AccReg);
+ break;
+ }
+ case X86::ATOMNAND8:
+ case X86::ATOMNAND16:
+ case X86::ATOMNAND32:
+ case X86::ATOMNAND64: {
+ unsigned t2 = MRI.createVirtualRegister(RC);
+ unsigned NOTOpc;
+ unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc);
+ BuildMI(mainMBB, DL, TII->get(ANDOpc), t2).addReg(SrcReg)
+ .addReg(AccReg);
+ BuildMI(mainMBB, DL, TII->get(NOTOpc), t1).addReg(t2);
+ break;
+ }
+ case X86::ATOMMAX8:
+ case X86::ATOMMAX16:
+ case X86::ATOMMAX32:
+ case X86::ATOMMAX64:
+ case X86::ATOMMIN8:
+ case X86::ATOMMIN16:
+ case X86::ATOMMIN32:
+ case X86::ATOMMIN64:
+ case X86::ATOMUMAX8:
+ case X86::ATOMUMAX16:
+ case X86::ATOMUMAX32:
+ case X86::ATOMUMAX64:
+ case X86::ATOMUMIN8:
+ case X86::ATOMUMIN16:
+ case X86::ATOMUMIN32:
+ case X86::ATOMUMIN64: {
+ unsigned CMPOpc;
+ unsigned CMOVOpc = getNonAtomicOpcodeWithExtraOpc(Opc, CMPOpc);
+
+ BuildMI(mainMBB, DL, TII->get(CMPOpc))
+ .addReg(SrcReg)
+ .addReg(AccReg);
+
+ if (Subtarget->hasCMov()) {
+ if (VT != MVT::i8) {
+ // Native support
+ BuildMI(mainMBB, DL, TII->get(CMOVOpc), t1)
+ .addReg(SrcReg)
+ .addReg(AccReg);
+ } else {
+ // Promote i8 to i32 to use CMOV32
+ const TargetRegisterClass *RC32 = getRegClassFor(MVT::i32);
+ unsigned SrcReg32 = MRI.createVirtualRegister(RC32);
+ unsigned AccReg32 = MRI.createVirtualRegister(RC32);
+ unsigned t2 = MRI.createVirtualRegister(RC32);
+
+ unsigned Undef = MRI.createVirtualRegister(RC32);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef);
+
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), SrcReg32)
+ .addReg(Undef)
+ .addReg(SrcReg)
+ .addImm(X86::sub_8bit);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32)
+ .addReg(Undef)
+ .addReg(AccReg)
+ .addImm(X86::sub_8bit);
+
+ BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2)
+ .addReg(SrcReg32)
+ .addReg(AccReg32);
+
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t1)
+ .addReg(t2, 0, X86::sub_8bit);
+ }
+ } else {
+ // Use pseudo select and lower them.
+ assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
+ "Invalid atomic-load-op transformation!");
+ unsigned SelOpc = getPseudoCMOVOpc(VT);
+ X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc);
+ assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!");
+ MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t1)
+ .addReg(SrcReg).addReg(AccReg)
+ .addImm(CC);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ }
+ break;
+ }
}
- else
- t3 = t2;
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
+ // Copy AccPhyReg back from virtual register.
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccPhyReg)
+ .addReg(AccReg);
+
+ MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(MemOpndSlot + i));
MIB.addReg(t1);
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
- MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
- for (int i=0; i <= lastAddrIndx; ++i)
- (*MIB).addOperand(*argOpers[i]);
- MIB.addReg(t3);
- assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
- (*MIB).setMemRefs(bInstr->memoperands_begin(),
- bInstr->memoperands_end());
+ mainMBB->addSuccessor(origMainMBB);
+ mainMBB->addSuccessor(sinkMBB);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
- MIB.addReg(EAXreg);
+ // sinkMBB:
+ sinkMBB->addLiveIn(AccPhyReg);
- // insert branch
- BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(TargetOpcode::COPY), DstReg)
+ .addReg(AccPhyReg);
- bInstr->eraseFromParent(); // The pseudo instruction is gone now.
- return nextMBB;
+ MI->eraseFromParent();
+ return sinkMBB;
}
-// private utility function: 64 bit atomics on 32 bit host.
+// EmitAtomicLoadArith6432 - emit the code sequence for pseudo atomic
+// instructions. They will be translated into a spin-loop or compare-exchange
+// loop from
+//
+// ...
+// dst = atomic-fetch-op MI.addr, MI.val
+// ...
+//
+// to
+//
+// ...
+// EAX = LOAD [MI.addr + 0]
+// EDX = LOAD [MI.addr + 4]
+// loop:
+// EBX = OP MI.val.lo, EAX
+// ECX = OP MI.val.hi, EDX
+// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
+// JNE loop
+// sink:
+// dst = EDX:EAX
+// ...
MachineBasicBlock *
-X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
- MachineBasicBlock *MBB,
- unsigned regOpcL,
- unsigned regOpcH,
- unsigned immOpcL,
- unsigned immOpcH,
- bool Invert) const {
- // For the atomic bitwise operator, we generate
- // thisMBB (instructions are in pairs, except cmpxchg8b)
- // ld t1,t2 = [bitinstr.addr]
- // newMBB:
- // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4)
- // op t5, t6 <- out1, out2, [bitinstr.val]
- // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val])
- // neg t7, t8 < t5, t6 (if Invert)
- // mov ECX, EBX <- t5, t6
- // mov EAX, EDX <- t1, t2
- // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit]
- // mov t3, t4 <- EAX, EDX
- // bz newMBB
- // result in out1, out2
- // fallthrough -->nextMBB
-
- const TargetRegisterClass *RC = &X86::GR32RegClass;
- const unsigned LoadOpc = X86::MOV32rm;
- const unsigned NotOpc = X86::NOT32r;
+X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- const BasicBlock *LLVM_BB = MBB->getBasicBlock();
- MachineFunction::iterator MBBIter = MBB;
- ++MBBIter;
+ DebugLoc DL = MI->getDebugLoc();
- /// First build the CFG
- MachineFunction *F = MBB->getParent();
- MachineBasicBlock *thisMBB = MBB;
- MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(MBBIter, newMBB);
- F->insert(MBBIter, nextMBB);
-
- // Transfer the remainder of thisMBB and its successor edges to nextMBB.
- nextMBB->splice(nextMBB->begin(), thisMBB,
- llvm::next(MachineBasicBlock::iterator(bInstr)),
- thisMBB->end());
- nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
-
- // Update thisMBB to fall through to newMBB
- thisMBB->addSuccessor(newMBB);
-
- // newMBB jumps to itself and fall through to nextMBB
- newMBB->addSuccessor(nextMBB);
- newMBB->addSuccessor(newMBB);
-
- DebugLoc dl = bInstr->getDebugLoc();
- // Insert instructions into newMBB based on incoming instruction
- // There are 8 "real" operands plus 9 implicit def/uses, ignored here.
- assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 &&
- "unexpected number of operands");
- MachineOperand& dest1Oper = bInstr->getOperand(0);
- MachineOperand& dest2Oper = bInstr->getOperand(1);
- MachineOperand* argOpers[2 + X86::AddrNumOperands];
- for (int i=0; i < 2 + X86::AddrNumOperands; ++i) {
- argOpers[i] = &bInstr->getOperand(i+2);
-
- // We use some of the operands multiple times, so conservatively just
- // clear any kill flags that might be present.
- if (argOpers[i]->isReg() && argOpers[i]->isUse())
- argOpers[i]->setIsKill(false);
- }
-
- // x86 address has 5 operands: base, index, scale, displacement, and segment.
- int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
-
- unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
- MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1);
- for (int i=0; i <= lastAddrIndx; ++i)
- (*MIB).addOperand(*argOpers[i]);
- unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
- MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t2);
- // add 4 to displacement.
- for (int i=0; i <= lastAddrIndx-2; ++i)
- (*MIB).addOperand(*argOpers[i]);
- MachineOperand newOp3 = *(argOpers[3]);
- if (newOp3.isImm())
- newOp3.setImm(newOp3.getImm()+4);
- else
- newOp3.setOffset(newOp3.getOffset()+4);
- (*MIB).addOperand(newOp3);
- (*MIB).addOperand(*argOpers[lastAddrIndx]);
-
- // t3/4 are defined later, at the bottom of the loop
- unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
- unsigned t4 = F->getRegInfo().createVirtualRegister(RC);
- BuildMI(newMBB, dl, TII->get(X86::PHI), dest1Oper.getReg())
- .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB);
- BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg())
- .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
-
- // The subsequent operations should be using the destination registers of
- // the PHI instructions.
- t1 = dest1Oper.getReg();
- t2 = dest2Oper.getReg();
-
- int valArgIndx = lastAddrIndx + 1;
- assert((argOpers[valArgIndx]->isReg() ||
- argOpers[valArgIndx]->isImm()) &&
- "invalid operand");
- unsigned t5 = F->getRegInfo().createVirtualRegister(RC);
- unsigned t6 = F->getRegInfo().createVirtualRegister(RC);
- if (argOpers[valArgIndx]->isReg())
- MIB = BuildMI(newMBB, dl, TII->get(regOpcL), t5);
- else
- MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5);
- if (regOpcL != X86::MOV32rr)
- MIB.addReg(t1);
- (*MIB).addOperand(*argOpers[valArgIndx]);
- assert(argOpers[valArgIndx + 1]->isReg() ==
- argOpers[valArgIndx]->isReg());
- assert(argOpers[valArgIndx + 1]->isImm() ==
- argOpers[valArgIndx]->isImm());
- if (argOpers[valArgIndx + 1]->isReg())
- MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6);
- else
- MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6);
- if (regOpcH != X86::MOV32rr)
- MIB.addReg(t2);
- (*MIB).addOperand(*argOpers[valArgIndx + 1]);
-
- unsigned t7, t8;
- if (Invert) {
- t7 = F->getRegInfo().createVirtualRegister(RC);
- t8 = F->getRegInfo().createVirtualRegister(RC);
- MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t7).addReg(t5);
- MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t8).addReg(t6);
- } else {
- t7 = t5;
- t8 = t6;
- }
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
- MIB.addReg(t1);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
- MIB.addReg(t2);
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
+ "Unexpected number of operands");
+
+ assert(MI->hasOneMemOperand() &&
+ "Expected atomic-load-op32 to have one memoperand");
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
- MIB.addReg(t7);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
- MIB.addReg(t8);
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
- MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
- for (int i=0; i <= lastAddrIndx; ++i)
- (*MIB).addOperand(*argOpers[i]);
+ unsigned DstLoReg, DstHiReg;
+ unsigned SrcLoReg, SrcHiReg;
+ unsigned MemOpndSlot;
- assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
- (*MIB).setMemRefs(bInstr->memoperands_begin(),
- bInstr->memoperands_end());
+ unsigned CurOp = 0;
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3);
- MIB.addReg(X86::EAX);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4);
- MIB.addReg(X86::EDX);
+ DstLoReg = MI->getOperand(CurOp++).getReg();
+ DstHiReg = MI->getOperand(CurOp++).getReg();
+ MemOpndSlot = CurOp;
+ CurOp += X86::AddrNumOperands;
+ SrcLoReg = MI->getOperand(CurOp++).getReg();
+ SrcHiReg = MI->getOperand(CurOp++).getReg();
- // insert branch
- BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+ const TargetRegisterClass *RC = &X86::GR32RegClass;
+ const TargetRegisterClass *RC8 = &X86::GR8RegClass;
- bInstr->eraseFromParent(); // The pseudo instruction is gone now.
- return nextMBB;
-}
+ unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
+ unsigned LOADOpc = X86::MOV32rm;
-// private utility function
-MachineBasicBlock *
-X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
- MachineBasicBlock *MBB,
- unsigned cmovOpc) const {
- // For the atomic min/max operator, we generate
- // thisMBB:
- // newMBB:
- // ld t1 = [min/max.addr]
- // mov t2 = [min/max.val]
- // cmp t1, t2
- // cmov[cond] t2 = t1
- // mov EAX = t1
- // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
- // bz newMBB
- // fallthrough -->nextMBB
+ // For the atomic load-arith operator, we generate
//
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- const BasicBlock *LLVM_BB = MBB->getBasicBlock();
- MachineFunction::iterator MBBIter = MBB;
- ++MBBIter;
+ // thisMBB:
+ // EAX = LOAD [MI.addr + 0]
+ // EDX = LOAD [MI.addr + 4]
+ // mainMBB:
+ // EBX = OP MI.vallo, EAX
+ // ECX = OP MI.valhi, EDX
+ // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
+ // JNE mainMBB
+ // sinkMBB:
- /// First build the CFG
- MachineFunction *F = MBB->getParent();
MachineBasicBlock *thisMBB = MBB;
- MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(MBBIter, newMBB);
- F->insert(MBBIter, nextMBB);
-
- // Transfer the remainder of thisMBB and its successor edges to nextMBB.
- nextMBB->splice(nextMBB->begin(), thisMBB,
- llvm::next(MachineBasicBlock::iterator(mInstr)),
- thisMBB->end());
- nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
-
- // Update thisMBB to fall through to newMBB
- thisMBB->addSuccessor(newMBB);
-
- // newMBB jumps to newMBB and fall through to nextMBB
- newMBB->addSuccessor(nextMBB);
- newMBB->addSuccessor(newMBB);
-
- DebugLoc dl = mInstr->getDebugLoc();
- // Insert instructions into newMBB based on incoming instruction
- assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
- "unexpected number of operands");
- MachineOperand& destOper = mInstr->getOperand(0);
- MachineOperand* argOpers[2 + X86::AddrNumOperands];
- int numArgs = mInstr->getNumOperands() - 1;
- for (int i=0; i < numArgs; ++i)
- argOpers[i] = &mInstr->getOperand(i+1);
-
- // x86 address has 4 operands: base, index, scale, and displacement
- int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
- int valArgIndx = lastAddrIndx + 1;
-
- unsigned t1 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
- MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1);
- for (int i=0; i <= lastAddrIndx; ++i)
- (*MIB).addOperand(*argOpers[i]);
-
- // We only support register and immediate values
- assert((argOpers[valArgIndx]->isReg() ||
- argOpers[valArgIndx]->isImm()) &&
- "invalid operand");
-
- unsigned t2 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
- if (argOpers[valArgIndx]->isReg())
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2);
- else
- MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
- (*MIB).addOperand(*argOpers[valArgIndx]);
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
- MIB.addReg(t1);
+ MachineInstrBuilder MIB;
- MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
- MIB.addReg(t1);
- MIB.addReg(t2);
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // thisMBB:
+ // Lo
+ MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EAX);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ // Hi
+ MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EDX);
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp) {
+ MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
+ // Don't forget to transfer the target flag.
+ MachineOperand &MO = MIB->getOperand(MIB->getNumOperands()-1);
+ MO.setTargetFlags(MI->getOperand(MemOpndSlot + i).getTargetFlags());
+ } else
+ MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ thisMBB->addSuccessor(mainMBB);
+
+ // mainMBB:
+ MachineBasicBlock *origMainMBB = mainMBB;
+ mainMBB->addLiveIn(X86::EAX);
+ mainMBB->addLiveIn(X86::EDX);
+
+ // Copy EDX:EAX as they are used more than once.
+ unsigned LoReg = MRI.createVirtualRegister(RC);
+ unsigned HiReg = MRI.createVirtualRegister(RC);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), LoReg).addReg(X86::EAX);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), HiReg).addReg(X86::EDX);
+
+ unsigned t1L = MRI.createVirtualRegister(RC);
+ unsigned t1H = MRI.createVirtualRegister(RC);
+
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic-load-op6432 opcode!");
+ case X86::ATOMAND6432:
+ case X86::ATOMOR6432:
+ case X86::ATOMXOR6432:
+ case X86::ATOMADD6432:
+ case X86::ATOMSUB6432: {
+ unsigned HiOpc;
+ unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg).addReg(LoReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg).addReg(HiReg);
+ break;
+ }
+ case X86::ATOMNAND6432: {
+ unsigned HiOpc, NOTOpc;
+ unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc);
+ unsigned t2L = MRI.createVirtualRegister(RC);
+ unsigned t2H = MRI.createVirtualRegister(RC);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg).addReg(LoReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg).addReg(HiReg);
+ BuildMI(mainMBB, DL, TII->get(NOTOpc), t1L).addReg(t2L);
+ BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H);
+ break;
+ }
+ case X86::ATOMMAX6432:
+ case X86::ATOMMIN6432:
+ case X86::ATOMUMAX6432:
+ case X86::ATOMUMIN6432: {
+ unsigned HiOpc;
+ unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
+ unsigned cL = MRI.createVirtualRegister(RC8);
+ unsigned cH = MRI.createVirtualRegister(RC8);
+ unsigned cL32 = MRI.createVirtualRegister(RC);
+ unsigned cH32 = MRI.createVirtualRegister(RC);
+ unsigned cc = MRI.createVirtualRegister(RC);
+ // cl := cmp src_lo, lo
+ BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
+ .addReg(SrcLoReg).addReg(LoReg);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), cL);
+ BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL);
+ // ch := cmp src_hi, hi
+ BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
+ .addReg(SrcHiReg).addReg(HiReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), cH);
+ BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH);
+ // cc := if (src_hi == hi) ? cl : ch;
+ if (Subtarget->hasCMov()) {
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVE32rr), cc)
+ .addReg(cH32).addReg(cL32);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), cc)
+ .addReg(cH32).addReg(cL32)
+ .addImm(X86::COND_E);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ }
+ BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc);
+ if (Subtarget->hasCMov()) {
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1L)
+ .addReg(SrcLoReg).addReg(LoReg);
+ BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1H)
+ .addReg(SrcHiReg).addReg(HiReg);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1L)
+ .addReg(SrcLoReg).addReg(LoReg)
+ .addImm(X86::COND_NE);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1H)
+ .addReg(SrcHiReg).addReg(HiReg)
+ .addImm(X86::COND_NE);
+ mainMBB = EmitLoweredSelect(MIB, mainMBB);
+ }
+ break;
+ }
+ case X86::ATOMSWAP6432: {
+ unsigned HiOpc;
+ unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
+ BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg);
+ BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg);
+ break;
+ }
+ }
- // Generate movc
- unsigned t3 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
- MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3);
- MIB.addReg(t2);
- MIB.addReg(t1);
+ // Copy EDX:EAX back from HiReg:LoReg
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(LoReg);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(HiReg);
+ // Copy ECX:EBX from t1H:t1L
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t1L);
+ BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t1H);
+
+ MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
+ for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MI->getOperand(MemOpndSlot + i));
+ MIB.setMemRefs(MMOBegin, MMOEnd);
- // Cmp and exchange if none has modified the memory location
- MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG32));
- for (int i=0; i <= lastAddrIndx; ++i)
- (*MIB).addOperand(*argOpers[i]);
- MIB.addReg(t3);
- assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand");
- (*MIB).setMemRefs(mInstr->memoperands_begin(),
- mInstr->memoperands_end());
+ BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
- MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
- MIB.addReg(X86::EAX);
+ mainMBB->addSuccessor(origMainMBB);
+ mainMBB->addSuccessor(sinkMBB);
- // insert branch
- BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+ // sinkMBB:
+ sinkMBB->addLiveIn(X86::EAX);
+ sinkMBB->addLiveIn(X86::EDX);
- mInstr->eraseFromParent(); // The pseudo instruction is gone now.
- return nextMBB;
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(TargetOpcode::COPY), DstLoReg)
+ .addReg(X86::EAX);
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(TargetOpcode::COPY), DstHiReg)
+ .addReg(X86::EDX);
+
+ MI->eraseFromParent();
+ return sinkMBB;
}
// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
return EmitMonitor(MI, BB);
// Atomic Lowering.
- case X86::ATOMMIN32:
- case X86::ATOMMAX32:
- case X86::ATOMUMIN32:
- case X86::ATOMUMAX32:
- case X86::ATOMMIN16:
- case X86::ATOMMAX16:
- case X86::ATOMUMIN16:
- case X86::ATOMUMAX16:
- case X86::ATOMMIN64:
- case X86::ATOMMAX64:
- case X86::ATOMUMIN64:
- case X86::ATOMUMAX64: {
- unsigned Opc;
- switch (MI->getOpcode()) {
- default: llvm_unreachable("illegal opcode!");
- case X86::ATOMMIN32: Opc = X86::CMOVL32rr; break;
- case X86::ATOMMAX32: Opc = X86::CMOVG32rr; break;
- case X86::ATOMUMIN32: Opc = X86::CMOVB32rr; break;
- case X86::ATOMUMAX32: Opc = X86::CMOVA32rr; break;
- case X86::ATOMMIN16: Opc = X86::CMOVL16rr; break;
- case X86::ATOMMAX16: Opc = X86::CMOVG16rr; break;
- case X86::ATOMUMIN16: Opc = X86::CMOVB16rr; break;
- case X86::ATOMUMAX16: Opc = X86::CMOVA16rr; break;
- case X86::ATOMMIN64: Opc = X86::CMOVL64rr; break;
- case X86::ATOMMAX64: Opc = X86::CMOVG64rr; break;
- case X86::ATOMUMIN64: Opc = X86::CMOVB64rr; break;
- case X86::ATOMUMAX64: Opc = X86::CMOVA64rr; break;
- // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
- }
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, Opc);
- }
-
- case X86::ATOMAND32:
- case X86::ATOMOR32:
- case X86::ATOMXOR32:
- case X86::ATOMNAND32: {
- bool Invert = false;
- unsigned RegOpc, ImmOpc;
- switch (MI->getOpcode()) {
- default: llvm_unreachable("illegal opcode!");
- case X86::ATOMAND32:
- RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; break;
- case X86::ATOMOR32:
- RegOpc = X86::OR32rr; ImmOpc = X86::OR32ri; break;
- case X86::ATOMXOR32:
- RegOpc = X86::XOR32rr; ImmOpc = X86::XOR32ri; break;
- case X86::ATOMNAND32:
- RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; Invert = true; break;
- }
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
- X86::MOV32rm, X86::LCMPXCHG32,
- X86::NOT32r, X86::EAX,
- &X86::GR32RegClass, Invert);
- }
-
+ case X86::ATOMAND8:
case X86::ATOMAND16:
+ case X86::ATOMAND32:
+ case X86::ATOMAND64:
+ // Fall through
+ case X86::ATOMOR8:
case X86::ATOMOR16:
+ case X86::ATOMOR32:
+ case X86::ATOMOR64:
+ // Fall through
case X86::ATOMXOR16:
- case X86::ATOMNAND16: {
- bool Invert = false;
- unsigned RegOpc, ImmOpc;
- switch (MI->getOpcode()) {
- default: llvm_unreachable("illegal opcode!");
- case X86::ATOMAND16:
- RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; break;
- case X86::ATOMOR16:
- RegOpc = X86::OR16rr; ImmOpc = X86::OR16ri; break;
- case X86::ATOMXOR16:
- RegOpc = X86::XOR16rr; ImmOpc = X86::XOR16ri; break;
- case X86::ATOMNAND16:
- RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; Invert = true; break;
- }
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
- X86::MOV16rm, X86::LCMPXCHG16,
- X86::NOT16r, X86::AX,
- &X86::GR16RegClass, Invert);
- }
-
- case X86::ATOMAND8:
- case X86::ATOMOR8:
case X86::ATOMXOR8:
- case X86::ATOMNAND8: {
- bool Invert = false;
- unsigned RegOpc, ImmOpc;
- switch (MI->getOpcode()) {
- default: llvm_unreachable("illegal opcode!");
- case X86::ATOMAND8:
- RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; break;
- case X86::ATOMOR8:
- RegOpc = X86::OR8rr; ImmOpc = X86::OR8ri; break;
- case X86::ATOMXOR8:
- RegOpc = X86::XOR8rr; ImmOpc = X86::XOR8ri; break;
- case X86::ATOMNAND8:
- RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; Invert = true; break;
- }
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
- X86::MOV8rm, X86::LCMPXCHG8,
- X86::NOT8r, X86::AL,
- &X86::GR8RegClass, Invert);
- }
-
- // This group is for 64-bit host.
- case X86::ATOMAND64:
- case X86::ATOMOR64:
+ case X86::ATOMXOR32:
case X86::ATOMXOR64:
- case X86::ATOMNAND64: {
- bool Invert = false;
- unsigned RegOpc, ImmOpc;
- switch (MI->getOpcode()) {
- default: llvm_unreachable("illegal opcode!");
- case X86::ATOMAND64:
- RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; break;
- case X86::ATOMOR64:
- RegOpc = X86::OR64rr; ImmOpc = X86::OR64ri32; break;
- case X86::ATOMXOR64:
- RegOpc = X86::XOR64rr; ImmOpc = X86::XOR64ri32; break;
- case X86::ATOMNAND64:
- RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; Invert = true; break;
- }
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
- X86::MOV64rm, X86::LCMPXCHG64,
- X86::NOT64r, X86::RAX,
- &X86::GR64RegClass, Invert);
- }
+ // Fall through
+ case X86::ATOMNAND8:
+ case X86::ATOMNAND16:
+ case X86::ATOMNAND32:
+ case X86::ATOMNAND64:
+ // Fall through
+ case X86::ATOMMAX8:
+ case X86::ATOMMAX16:
+ case X86::ATOMMAX32:
+ case X86::ATOMMAX64:
+ // Fall through
+ case X86::ATOMMIN8:
+ case X86::ATOMMIN16:
+ case X86::ATOMMIN32:
+ case X86::ATOMMIN64:
+ // Fall through
+ case X86::ATOMUMAX8:
+ case X86::ATOMUMAX16:
+ case X86::ATOMUMAX32:
+ case X86::ATOMUMAX64:
+ // Fall through
+ case X86::ATOMUMIN8:
+ case X86::ATOMUMIN16:
+ case X86::ATOMUMIN32:
+ case X86::ATOMUMIN64:
+ return EmitAtomicLoadArith(MI, BB);
// This group does 64-bit operations on a 32-bit host.
case X86::ATOMAND6432:
case X86::ATOMNAND6432:
case X86::ATOMADD6432:
case X86::ATOMSUB6432:
- case X86::ATOMSWAP6432: {
- bool Invert = false;
- unsigned RegOpcL, RegOpcH, ImmOpcL, ImmOpcH;
- switch (MI->getOpcode()) {
- default: llvm_unreachable("illegal opcode!");
- case X86::ATOMAND6432:
- RegOpcL = RegOpcH = X86::AND32rr;
- ImmOpcL = ImmOpcH = X86::AND32ri;
- break;
- case X86::ATOMOR6432:
- RegOpcL = RegOpcH = X86::OR32rr;
- ImmOpcL = ImmOpcH = X86::OR32ri;
- break;
- case X86::ATOMXOR6432:
- RegOpcL = RegOpcH = X86::XOR32rr;
- ImmOpcL = ImmOpcH = X86::XOR32ri;
- break;
- case X86::ATOMNAND6432:
- RegOpcL = RegOpcH = X86::AND32rr;
- ImmOpcL = ImmOpcH = X86::AND32ri;
- Invert = true;
- break;
- case X86::ATOMADD6432:
- RegOpcL = X86::ADD32rr; RegOpcH = X86::ADC32rr;
- ImmOpcL = X86::ADD32ri; ImmOpcH = X86::ADC32ri;
- break;
- case X86::ATOMSUB6432:
- RegOpcL = X86::SUB32rr; RegOpcH = X86::SBB32rr;
- ImmOpcL = X86::SUB32ri; ImmOpcH = X86::SBB32ri;
- break;
- case X86::ATOMSWAP6432:
- RegOpcL = RegOpcH = X86::MOV32rr;
- ImmOpcL = ImmOpcH = X86::MOV32ri;
- break;
- }
- return EmitAtomicBit6432WithCustomInserter(MI, BB, RegOpcL, RegOpcH,
- ImmOpcL, ImmOpcH, Invert);
- }
+ case X86::ATOMMAX6432:
+ case X86::ATOMMIN6432:
+ case X86::ATOMUMAX6432:
+ case X86::ATOMUMIN6432:
+ case X86::ATOMSWAP6432:
+ return EmitAtomicLoadArith6432(MI, BB);
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
// alignment is valid.
unsigned Align = LN0->getAlignment();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned NewAlign = TLI.getTargetData()->
+ unsigned NewAlign = TLI.getDataLayout()->
getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
return SDValue();
const Function *F = DAG.getMachineFunction().getFunction();
- bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
+ bool NoImplicitFloatOps = F->getFnAttributes().
+ hasAttribute(Attributes::NoImplicitFloat);
bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
&& Subtarget->hasSSE2();
if ((VT.isVector() ||