X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelLowering.cpp;h=634c08e54de8b820fbd6d2fdc8dd7530e6d435ce;hb=9a9d275dc7897dfba7f41ce1b3770ca27ac149e8;hp=eef773e2338e7863b414747314868e198eb31b80;hpb=30ef0e5658b0b8b04437f73f74162d5d72923f29;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index eef773e2338..634c08e54de 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -62,21 +62,19 @@ static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2); static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { - switch (TM.getSubtarget().TargetType) { - default: llvm_unreachable("unknown subtarget type"); - case X86Subtarget::isDarwin: - if (TM.getSubtarget().is64Bit()) - return new X8664_MachoTargetObjectFile(); + + bool is64Bit = TM.getSubtarget().is64Bit(); + + if (TM.getSubtarget().isTargetDarwin()) { + if (is64Bit) return new X8664_MachoTargetObjectFile(); return new TargetLoweringObjectFileMachO(); - case X86Subtarget::isELF: - if (TM.getSubtarget().is64Bit()) - return new X8664_ELFTargetObjectFile(TM); + } else if (TM.getSubtarget().isTargetELF() ){ + if (is64Bit) return new X8664_ELFTargetObjectFile(TM); return new X8632_ELFTargetObjectFile(TM); - case X86Subtarget::isMingw: - case X86Subtarget::isCygwin: - case X86Subtarget::isWindows: + } else if (TM.getSubtarget().isTargetCOFF()) { return new TargetLoweringObjectFileCOFF(); - } + } + llvm_unreachable("unknown subtarget type"); } X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) @@ -345,8 +343,15 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->hasSSE1()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); - if (!Subtarget->hasSSE2()) - setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); + // We may not have a libcall for MEMBARRIER so we should lower this. + setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom); + + // On X86 and X86-64, atomic operations are lowered to locked instructions. + // Locked instructions, in turn, have implicit fence semantics (all memory + // operations are flushed before issuing the locked instruction, and they + // are not buffered), so we can fold away the common pattern of + // fence-atomic-fence. + setShouldFoldAtomicFences(true); // Expand certain atomics setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom); @@ -611,7 +616,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addRegisterClass(MVT::v8i8, X86::VR64RegisterClass, false); addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false); addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false); - addRegisterClass(MVT::v2f32, X86::VR64RegisterClass, false); + addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false); setOperationAction(ISD::ADD, MVT::v8i8, Legal); @@ -657,14 +662,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); setOperationAction(ISD::LOAD, MVT::v2i32, Promote); AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::LOAD, MVT::v2f32, Promote); - AddPromotedToType (ISD::LOAD, MVT::v2f32, MVT::v1i64); setOperationAction(ISD::LOAD, MVT::v1i64, Legal); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); @@ -672,7 +674,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f32, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); @@ -691,7 +692,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom); setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom); setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom); - setOperationAction(ISD::BIT_CONVERT, MVT::v2f32, Custom); setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom); } } @@ -792,9 +792,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) EVT VT = SVT; // Do not attempt to promote non-128-bit vectors - if (!VT.is128BitVector()) { + if (!VT.is128BitVector()) continue; - } setOperationAction(ISD::AND, SVT, Promote); AddPromotedToType (ISD::AND, SVT, MVT::v2i64); @@ -1012,7 +1011,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::STORE); - setTargetDAGCombine(ISD::MEMBARRIER); setTargetDAGCombine(ISD::ZERO_EXTEND); if (Subtarget->is64Bit()) setTargetDAGCombine(ISD::MUL); @@ -1192,6 +1190,27 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4; } +bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace, + unsigned &Offset) const { + if (!Subtarget->isTargetLinux()) + return false; + + if (Subtarget->is64Bit()) { + // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs: + Offset = 0x28; + if (getTargetMachine().getCodeModel() == CodeModel::Kernel) + AddressSpace = 256; + else + AddressSpace = 257; + } else { + // %gs:0x14 on i386 + Offset = 0x14; + AddressSpace = 256; + } + return true; +} + + //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -1200,19 +1219,19 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { bool X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &OutTys, - const SmallVectorImpl &ArgsFlags, - SelectionDAG &DAG) const { + const SmallVectorImpl &Outs, + LLVMContext &Context) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), - RVLocs, *DAG.getContext()); - return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_X86); + RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC_X86); } SDValue X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); @@ -1240,7 +1259,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - SDValue ValToCopy = Outs[i].Val; + SDValue ValToCopy = OutVals[i]; // Returns in ST0/ST1 are handled specially: these are pushed as operands to // the RET instruction and handled by the FP Stackifier. @@ -1262,7 +1281,8 @@ X86TargetLowering::LowerReturn(SDValue Chain, if (ValVT.isVector() && ValVT.getSizeInBits() == 64) { ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy); if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) - ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy); + ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, + ValToCopy); } } @@ -1328,17 +1348,34 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, report_fatal_error("SSE register return with SSE disabled"); } + SDValue Val; + // If this is a call to a function that returns an fp value on the floating - // point stack, but where we prefer to use the value in xmm registers, copy - // it out as F80 and use a truncate to move it from fp stack reg to xmm reg. - if ((VA.getLocReg() == X86::ST0 || - VA.getLocReg() == X86::ST1) && - isScalarFPTypeInSSEReg(VA.getValVT())) { - CopyVT = MVT::f80; - } + // point stack, we must guarantee the the value is popped from the stack, so + // a CopyFromReg is not good enough - the copy instruction may be eliminated + // if the return value is not used. We use the FpGET_ST0 instructions + // instead. + if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) { + // If we prefer to use the value in xmm registers, copy it out as f80 and + // use a truncate to move it from fp stack reg to xmm reg. + if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80; + bool isST0 = VA.getLocReg() == X86::ST0; + unsigned Opc = 0; + if (CopyVT == MVT::f32) Opc = isST0 ? X86::FpGET_ST0_32:X86::FpGET_ST1_32; + if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64; + if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80; + SDValue Ops[] = { Chain, InFlag }; + Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Flag, + Ops, 2), 1); + Val = Chain.getValue(0); - SDValue Val; - if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) { + // Round the f80 to the right size, which also moves it to the appropriate + // xmm register. + if (CopyVT != VA.getValVT()) + Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, + // This truncation won't change the value. + DAG.getIntPtrConstant(1)); + } else if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) { // For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64. if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), @@ -1358,15 +1395,6 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, Val = Chain.getValue(0); } InFlag = Chain.getValue(2); - - if (CopyVT != VA.getValVT()) { - // Round the F80 the right size, which also moves to the appropriate xmm - // register. - Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, - // This truncation won't change the value. - DAG.getIntPtrConstant(1)); - } - InVals.push_back(Val); } @@ -1480,11 +1508,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, // could be overwritten by lowering of arguments in case of a tail call. if (Flags.isByVal()) { int FI = MFI->CreateFixedObject(Flags.getByValSize(), - VA.getLocMemOffset(), isImmutable, false); + VA.getLocMemOffset(), isImmutable); return DAG.getFrameIndex(FI, getPointerTy()); } else { int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, - VA.getLocMemOffset(), isImmutable, false); + VA.getLocMemOffset(), isImmutable); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); return DAG.getLoad(ValVT, dl, Chain, FIN, PseudoSourceValue::getFixedStack(FI), 0, @@ -1612,8 +1640,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (isVarArg) { if (Is64Bit || (CallConv != CallingConv::X86_FastCall && CallConv != CallingConv::X86_ThisCall)) { - FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, - true, false)); + FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true)); } if (Is64Bit) { unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; @@ -1785,7 +1812,7 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, // Calculate the new stack slot for the return address. int SlotSize = Is64Bit ? 8 : 4; int NewReturnAddrFI = - MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false, false); + MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false); EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, @@ -1799,6 +1826,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -1811,7 +1839,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), - Outs, Ins, DAG); + Outs, OutVals, Ins, DAG); // Sibcalls are automatically detected tailcalls which do not require // ABI changes. @@ -1871,7 +1899,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; EVT RegVT = VA.getLocVT(); - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; bool isByVal = Flags.isByVal(); @@ -1964,7 +1992,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, } } - if (Is64Bit && isVarArg) { + if (Is64Bit && isVarArg && !Subtarget->isTargetWin64()) { // From AMD64 ABI document: // For calls that may call functions that use varargs or stdargs // (prototype-less calls or calls to functions containing ellipsis (...) in @@ -1973,7 +2001,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // the number of registers, but must be an ubound on the number of SSE // registers used and is in the range 0 - 8 inclusive. - // FIXME: Verify this on Win64 // Count the number of XMM registers allocated. static const unsigned XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, @@ -2010,12 +2037,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (VA.isRegLoc()) continue; assert(VA.isMemLoc()); - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; // Create frame index. int32_t Offset = VA.getLocMemOffset()+FPDiff; uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; - FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true, false); + FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); FIN = DAG.getFrameIndex(FI, getPointerTy()); if (Flags.isByVal()) { @@ -2056,7 +2083,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, FPDiff, dl); } - bool WasGlobalOrExternal = false; if (getTargetMachine().getCodeModel() == CodeModel::Large) { assert(Is64Bit && "Large code model is only legal in 64-bit mode."); // In the 64-bit large code model, we have to make all calls @@ -2064,7 +2090,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // pc-relative offset may not be large enough to hold the whole // address. } else if (GlobalAddressSDNode *G = dyn_cast(Callee)) { - WasGlobalOrExternal = true; // If the callee is a GlobalAddress node (quite common, every direct call // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack // it. @@ -2092,11 +2117,10 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, OpFlags = X86II::MO_DARWIN_STUB; } - Callee = DAG.getTargetGlobalAddress(GV, getPointerTy(), + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), G->getOffset(), OpFlags); } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { - WasGlobalOrExternal = true; unsigned char OpFlags = 0; // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external @@ -2150,17 +2174,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, Ops.push_back(InFlag); if (isTailCall) { - // If this is the first return lowered for this function, add the regs - // to the liveout set for the function. - if (MF.getRegInfo().liveout_empty()) { - SmallVector RVLocs; - CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, - *DAG.getContext()); - CCInfo.AnalyzeCallResult(Ins, RetCC_X86); - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } + // We used to do: + //// If this is the first return lowered for this function, add the regs + //// to the liveout set for the function. + // This isn't right, although it's probably harmless on x86; liveouts + // should be computed from returns not tail calls. Consider a void + // function making a tail call to a function returning int. return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); } @@ -2311,6 +2330,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, bool isCalleeStructRet, bool isCallerStructRet, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, SelectionDAG& DAG) const { if (!IsTailCallConvention(CalleeCC) && @@ -2337,8 +2357,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (RegInfo->needsStackRealignment(MF)) return false; - // Do not sibcall optimize vararg calls unless the call site is not passing any - // arguments. + // Do not sibcall optimize vararg calls unless the call site is not passing + // any arguments. if (isVarArg && !Outs.empty()) return false; @@ -2424,8 +2444,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - EVT RegVT = VA.getLocVT(); - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; if (VA.getLocInfo() == CCValAssign::Indirect) return false; @@ -2440,17 +2459,23 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // If the tailcall address may be in a register, then make sure it's // possible to register allocate for it. In 32-bit, the call address can // only target EAX, EDX, or ECX since the tail call must be scheduled after - // callee-saved registers are restored. In 64-bit, it's RAX, RCX, RDX, RSI, - // RDI, R8, R9, R11. - if (!isa(Callee) && + // callee-saved registers are restored. These happen to be the same + // registers used to pass 'inreg' arguments so watch out for those. + if (!Subtarget->is64Bit() && + !isa(Callee) && !isa(Callee)) { - unsigned Limit = Subtarget->is64Bit() ? 8 : 3; unsigned NumInRegs = 0; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - if (VA.isRegLoc()) { - if (++NumInRegs == Limit) + if (!VA.isRegLoc()) + continue; + unsigned Reg = VA.getLocReg(); + switch (Reg) { + default: break; + case X86::EAX: case X86::EDX: case X86::ECX: + if (++NumInRegs == 3) return false; + break; } } } @@ -2460,20 +2485,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } FastISel * -X86TargetLowering::createFastISel(MachineFunction &mf, - DenseMap &vm, - DenseMap &bm, - DenseMap &am, - std::vector > &pn -#ifndef NDEBUG - , SmallSet &cil -#endif - ) const { - return X86::createFastISel(mf, vm, bm, am, pn -#ifndef NDEBUG - , cil -#endif - ); +X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { + return X86::createFastISel(funcInfo); } @@ -2491,7 +2504,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { // Set up a frame object for the return address. uint64_t SlotSize = TD->getPointerSize(); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, - false, false); + false); FuncInfo->setRAIndex(ReturnAddrIndex); } @@ -3190,7 +3203,7 @@ unsigned X86::getShufflePALIGNRImmediate(SDNode *N) { /// constant +0.0. bool X86::isZeroNode(SDValue Elt) { return ((isa(Elt) && - cast(Elt)->getZExtValue() == 0) || + cast(Elt)->isNullValue()) || (isa(Elt) && cast(Elt)->getValueAPF().isPosZero())); } @@ -4448,7 +4461,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, } /// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide -/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be +/// ones, or rewriting v4i32 / v2i32 as 2 wide ones if possible. This can be /// done when every pair / quad of shuffle mask elements point to elements in /// the right sequence. e.g. /// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> @@ -4462,7 +4475,6 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, unsigned NumElems = VT.getVectorNumElements(); unsigned NewWidth = (NumElems == 4) ? 2 : 4; EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); - EVT MaskEltVT = MaskVT.getVectorElementType(); EVT NewVT = MaskVT; switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Unexpected!"); @@ -5074,13 +5086,9 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - if (Op.getValueType() == MVT::v2f32) - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f32, - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i32, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, - Op.getOperand(0)))); - - if (Op.getValueType() == MVT::v1i64 && Op.getOperand(0).getValueType() == MVT::i64) + + if (Op.getValueType() == MVT::v1i64 && + Op.getOperand(0).getValueType() == MVT::i64) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0)); SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); @@ -5245,10 +5253,10 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, if (OpFlags == X86II::MO_NO_FLAG && X86::isOffsetSuitableForCodeModel(Offset, M)) { // A direct static reference to a global. - Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); + Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset); Offset = 0; } else { - Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags); + Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); } if (Subtarget->isPICStyleRIPRel() && @@ -5293,7 +5301,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); DebugLoc dl = GA->getDebugLoc(); - SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); @@ -5366,7 +5374,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial // exec) - SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, + GA->getValueType(0), GA->getOffset(), OperandFlags); SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); @@ -5423,12 +5432,10 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { OpFlag = X86II::MO_TLVP_PIC_BASE; else OpFlag = X86II::MO_TLVP; - - SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), + DebugLoc DL = Op.getDebugLoc(); + SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, getPointerTy(), GA->getOffset(), OpFlag); - - DebugLoc DL = Op.getDebugLoc(); SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); // With PIC32, the address is actually $g + Offset. @@ -5777,7 +5784,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, // Load the value out, extending it from f32 to f80. // FIXME: Avoid the extend by constructing the right constant pool? - SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, MVT::f80, dl, DAG.getEntryNode(), FudgePtr, PseudoSourceValue::getConstantPool(), 0, MVT::f32, false, false, 4); // Extend everything to 80 bits to force it to be done on x87. @@ -6026,6 +6033,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, bool NeedCF = false; bool NeedOF = false; switch (X86CC) { + default: break; case X86::COND_A: case X86::COND_AE: case X86::COND_B: case X86::COND_BE: NeedCF = true; @@ -6035,120 +6043,129 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, case X86::COND_O: case X86::COND_NO: NeedOF = true; break; - default: break; } // See if we can use the EFLAGS value from the operand instead of // doing a separate TEST. TEST always sets OF and CF to 0, so unless // we prove that the arithmetic won't overflow, we can't use OF or CF. - if (Op.getResNo() == 0 && !NeedOF && !NeedCF) { - unsigned Opcode = 0; - unsigned NumOperands = 0; - switch (Op.getNode()->getOpcode()) { - case ISD::ADD: - // Due to an isel shortcoming, be conservative if this add is - // likely to be selected as part of a load-modify-store - // instruction. When the root node in a match is a store, isel - // doesn't know how to remap non-chain non-flag uses of other - // nodes in the match, such as the ADD in this case. This leads - // to the ADD being left around and reselected, with the result - // being two adds in the output. Alas, even if none our users - // are stores, that doesn't prove we're O.K. Ergo, if we have - // any parents that aren't CopyToReg or SETCC, eschew INC/DEC. - // A better fix seems to require climbing the DAG back to the - // root, and it doesn't seem to be worth the effort. - for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC) - goto default_case; - if (ConstantSDNode *C = - dyn_cast(Op.getNode()->getOperand(1))) { - // An add of one will be selected as an INC. - if (C->getAPIntValue() == 1) { - Opcode = X86ISD::INC; - NumOperands = 1; - break; - } - // An add of negative one (subtract of one) will be selected as a DEC. - if (C->getAPIntValue().isAllOnesValue()) { - Opcode = X86ISD::DEC; - NumOperands = 1; - break; - } + if (Op.getResNo() != 0 || NeedOF || NeedCF) + // Emit a CMP with 0, which is the TEST pattern. + return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, + DAG.getConstant(0, Op.getValueType())); + + unsigned Opcode = 0; + unsigned NumOperands = 0; + switch (Op.getNode()->getOpcode()) { + case ISD::ADD: + // Due to an isel shortcoming, be conservative if this add is likely to be + // selected as part of a load-modify-store instruction. When the root node + // in a match is a store, isel doesn't know how to remap non-chain non-flag + // uses of other nodes in the match, such as the ADD in this case. This + // leads to the ADD being left around and reselected, with the result being + // two adds in the output. Alas, even if none our users are stores, that + // doesn't prove we're O.K. Ergo, if we have any parents that aren't + // CopyToReg or SETCC, eschew INC/DEC. A better fix seems to require + // climbing the DAG back to the root, and it doesn't seem to be worth the + // effort. + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + UE = Op.getNode()->use_end(); UI != UE; ++UI) + if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC) + goto default_case; + + if (ConstantSDNode *C = + dyn_cast(Op.getNode()->getOperand(1))) { + // An add of one will be selected as an INC. + if (C->getAPIntValue() == 1) { + Opcode = X86ISD::INC; + NumOperands = 1; + break; } - // Otherwise use a regular EFLAGS-setting add. - Opcode = X86ISD::ADD; - NumOperands = 2; - break; - case ISD::AND: { - // If the primary and result isn't used, don't bother using X86ISD::AND, - // because a TEST instruction will be better. - bool NonFlagUse = false; - for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; - unsigned UOpNo = UI.getOperandNo(); - if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { - // Look pass truncate. - UOpNo = User->use_begin().getOperandNo(); - User = *User->use_begin(); - } - if (User->getOpcode() != ISD::BRCOND && - User->getOpcode() != ISD::SETCC && - (User->getOpcode() != ISD::SELECT || UOpNo != 0)) { - NonFlagUse = true; - break; - } + + // An add of negative one (subtract of one) will be selected as a DEC. + if (C->getAPIntValue().isAllOnesValue()) { + Opcode = X86ISD::DEC; + NumOperands = 1; + break; } - if (!NonFlagUse) + } + + // Otherwise use a regular EFLAGS-setting add. + Opcode = X86ISD::ADD; + NumOperands = 2; + break; + case ISD::AND: { + // If the primary and result isn't used, don't bother using X86ISD::AND, + // because a TEST instruction will be better. + bool NonFlagUse = false; + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + UE = Op.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + unsigned UOpNo = UI.getOperandNo(); + if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { + // Look pass truncate. + UOpNo = User->use_begin().getOperandNo(); + User = *User->use_begin(); + } + + if (User->getOpcode() != ISD::BRCOND && + User->getOpcode() != ISD::SETCC && + (User->getOpcode() != ISD::SELECT || UOpNo != 0)) { + NonFlagUse = true; break; + } } + + if (!NonFlagUse) + break; + } // FALL THROUGH - case ISD::SUB: - case ISD::OR: - case ISD::XOR: - // Due to the ISEL shortcoming noted above, be conservative if this op is - // likely to be selected as part of a load-modify-store instruction. - for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + case ISD::SUB: + case ISD::OR: + case ISD::XOR: + // Due to the ISEL shortcoming noted above, be conservative if this op is + // likely to be selected as part of a load-modify-store instruction. + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() == ISD::STORE) - goto default_case; - // Otherwise use a regular EFLAGS-setting instruction. - switch (Op.getNode()->getOpcode()) { - case ISD::SUB: Opcode = X86ISD::SUB; break; - case ISD::OR: Opcode = X86ISD::OR; break; - case ISD::XOR: Opcode = X86ISD::XOR; break; - case ISD::AND: Opcode = X86ISD::AND; break; - default: llvm_unreachable("unexpected operator!"); - } - NumOperands = 2; - break; - case X86ISD::ADD: - case X86ISD::SUB: - case X86ISD::INC: - case X86ISD::DEC: - case X86ISD::OR: - case X86ISD::XOR: - case X86ISD::AND: - return SDValue(Op.getNode(), 1); - default: - default_case: - break; - } - if (Opcode != 0) { - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); - SmallVector Ops; - for (unsigned i = 0; i != NumOperands; ++i) - Ops.push_back(Op.getOperand(i)); - SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands); - DAG.ReplaceAllUsesWith(Op, New); - return SDValue(New.getNode(), 1); + if (UI->getOpcode() == ISD::STORE) + goto default_case; + + // Otherwise use a regular EFLAGS-setting instruction. + switch (Op.getNode()->getOpcode()) { + default: llvm_unreachable("unexpected operator!"); + case ISD::SUB: Opcode = X86ISD::SUB; break; + case ISD::OR: Opcode = X86ISD::OR; break; + case ISD::XOR: Opcode = X86ISD::XOR; break; + case ISD::AND: Opcode = X86ISD::AND; break; } + + NumOperands = 2; + break; + case X86ISD::ADD: + case X86ISD::SUB: + case X86ISD::INC: + case X86ISD::DEC: + case X86ISD::OR: + case X86ISD::XOR: + case X86ISD::AND: + return SDValue(Op.getNode(), 1); + default: + default_case: + break; } - // Otherwise just emit a CMP with 0, which is the TEST pattern. - return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, - DAG.getConstant(0, Op.getValueType())); + if (Opcode == 0) + // Emit a CMP with 0, which is the TEST pattern. + return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, + DAG.getConstant(0, Op.getValueType())); + + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + SmallVector Ops; + for (unsigned i = 0; i != NumOperands; ++i) + Ops.push_back(Op.getOperand(i)); + + SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands); + DAG.ReplaceAllUsesWith(Op, New); + return SDValue(New.getNode(), 1); } /// Emit nodes that will be selected as "cmp Op0,Op1", or something @@ -6175,15 +6192,21 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, Op1 = Op1.getOperand(0); SDValue LHS, RHS; - if (Op1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *And10C = dyn_cast(Op1.getOperand(0))) - if (And10C->getZExtValue() == 1) { - LHS = Op0; - RHS = Op1.getOperand(1); - } - } else if (Op0.getOpcode() == ISD::SHL) { + if (Op1.getOpcode() == ISD::SHL) + std::swap(Op0, Op1); + if (Op0.getOpcode() == ISD::SHL) { if (ConstantSDNode *And00C = dyn_cast(Op0.getOperand(0))) if (And00C->getZExtValue() == 1) { + // If we looked past a truncate, check that it's only truncating away + // known zeros. + unsigned BitWidth = Op0.getValueSizeInBits(); + unsigned AndBitWidth = And.getValueSizeInBits(); + if (BitWidth > AndBitWidth) { + APInt Mask = APInt::getAllOnesValue(BitWidth), Zeros, Ones; + DAG.ComputeMaskedBits(Op0, Mask, Zeros, Ones); + if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth) + return SDValue(); + } LHS = Op1; RHS = Op0.getOperand(1); } @@ -6234,7 +6257,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && Op1.getOpcode() == ISD::Constant && - cast(Op1)->getZExtValue() == 0 && + cast(Op1)->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG); if (NewSetCC.getNode()) @@ -6614,15 +6637,16 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); CC = DAG.getConstant(CCode, MVT::i8); - SDValue User = SDValue(*Op.getNode()->use_begin(), 0); + SDNode *User = *Op.getNode()->use_begin(); // Look for an unconditional branch following this conditional branch. // We need this because we need to reverse the successors in order // to implement FCMP_OEQ. - if (User.getOpcode() == ISD::BR) { - SDValue FalseBB = User.getOperand(1); - SDValue NewBR = - DAG.UpdateNodeOperands(User, User.getOperand(0), Dest); + if (User->getOpcode() == ISD::BR) { + SDValue FalseBB = User->getOperand(1); + SDNode *NewBR = + DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); assert(NewBR == User); + (void)NewBR; Dest = FalseBB; Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), @@ -6694,7 +6718,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue Flag; - EVT IntPtr = getPointerTy(); EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag); @@ -6747,7 +6770,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { Store = DAG.getStore(Op.getOperand(0), dl, DAG.getConstant(FuncInfo->getVarArgsFPOffset(), MVT::i32), - FIN, SV, 0, false, false, 0); + FIN, SV, 4, false, false, 0); MemOps.push_back(Store); // Store ptr to overflow_arg_area @@ -6755,7 +6778,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { FIN, DAG.getIntPtrConstant(4)); SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0, + Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 8, false, false, 0); MemOps.push_back(Store); @@ -6764,7 +6787,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { FIN, DAG.getIntPtrConstant(8)); SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0, + Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 16, false, false, 0); MemOps.push_back(Store); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, @@ -6774,9 +6797,6 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { // X86-64 va_list is a struct { i32, i32, i8*, i8* }. assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!"); - SDValue Chain = Op.getOperand(0); - SDValue SrcPtr = Op.getOperand(1); - SDValue SrcSV = Op.getOperand(2); report_fatal_error("VAArgInst is not yet implemented for x86-64!"); return SDValue(); @@ -7199,7 +7219,8 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; if (InRegCount > 2) { - report_fatal_error("Nest register in use - reduce number of inreg parameters!"); + report_fatal_error("Nest register in use - reduce number of inreg" + " parameters!"); } } break; @@ -7489,6 +7510,36 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { return Sum; } +SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ + DebugLoc dl = Op.getDebugLoc(); + + if (!Subtarget->hasSSE2()) + return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(0, MVT::i32)); + + unsigned isDev = cast(Op.getOperand(5))->getZExtValue(); + if(!isDev) + return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); + else { + unsigned Op1 = cast(Op.getOperand(1))->getZExtValue(); + unsigned Op2 = cast(Op.getOperand(2))->getZExtValue(); + unsigned Op3 = cast(Op.getOperand(3))->getZExtValue(); + unsigned Op4 = cast(Op.getOperand(4))->getZExtValue(); + + // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; + if (!Op1 && !Op2 && !Op3 && Op4) + return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0)); + + // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; + if (Op1 && !Op2 && !Op3 && !Op4) + return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0)); + + // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)), + // (MFENCE)>; + return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); + } +} + SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const { EVT T = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); @@ -7578,6 +7629,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); + case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG); case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); @@ -7980,7 +8032,6 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, unsigned immOpc, unsigned LoadOpc, unsigned CXchgOpc, - unsigned copyOpc, unsigned notOpc, unsigned EAXreg, TargetRegisterClass *RC, @@ -8007,8 +8058,11 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, F->insert(MBBIter, newMBB); F->insert(MBBIter, nextMBB); - // Move all successors to thisMBB to nextMBB - nextMBB->transferSuccessors(thisMBB); + // Transfer the remainder of thisMBB and its successor edges to nextMBB. + nextMBB->splice(nextMBB->begin(), thisMBB, + llvm::next(MachineBasicBlock::iterator(bInstr)), + thisMBB->end()); + nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB); // Update thisMBB to fall through to newMBB thisMBB->addSuccessor(newMBB); @@ -8018,17 +8072,17 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, newMBB->addSuccessor(newMBB); // Insert instructions into newMBB based on incoming instruction - assert(bInstr->getNumOperands() < X86AddrNumOperands + 4 && + assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 && "unexpected number of operands"); DebugLoc dl = bInstr->getDebugLoc(); MachineOperand& destOper = bInstr->getOperand(0); - MachineOperand* argOpers[2 + X86AddrNumOperands]; + MachineOperand* argOpers[2 + X86::AddrNumOperands]; int numArgs = bInstr->getNumOperands() - 1; for (int i=0; i < numArgs; ++i) argOpers[i] = &bInstr->getOperand(i+1); // x86 address has 4 operands: base, index, scale, and displacement - int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] + int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] int valArgIndx = lastAddrIndx + 1; unsigned t1 = F->getRegInfo().createVirtualRegister(RC); @@ -8054,7 +8108,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, MIB.addReg(tt); (*MIB).addOperand(*argOpers[valArgIndx]); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), EAXreg); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg); MIB.addReg(t1); MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc)); @@ -8065,13 +8119,13 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, (*MIB).setMemRefs(bInstr->memoperands_begin(), bInstr->memoperands_end()); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), destOper.getReg()); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg()); MIB.addReg(EAXreg); // insert branch BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); - F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. + bInstr->eraseFromParent(); // The pseudo instruction is gone now. return nextMBB; } @@ -8101,7 +8155,6 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, const TargetRegisterClass *RC = X86::GR32RegisterClass; const unsigned LoadOpc = X86::MOV32rm; - const unsigned copyOpc = X86::MOV32rr; const unsigned NotOpc = X86::NOT32r; const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); const BasicBlock *LLVM_BB = MBB->getBasicBlock(); @@ -8116,8 +8169,11 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, F->insert(MBBIter, newMBB); F->insert(MBBIter, nextMBB); - // Move all successors to thisMBB to nextMBB - nextMBB->transferSuccessors(thisMBB); + // Transfer the remainder of thisMBB and its successor edges to nextMBB. + nextMBB->splice(nextMBB->begin(), thisMBB, + llvm::next(MachineBasicBlock::iterator(bInstr)), + thisMBB->end()); + nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB); // Update thisMBB to fall through to newMBB thisMBB->addSuccessor(newMBB); @@ -8129,12 +8185,12 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, DebugLoc dl = bInstr->getDebugLoc(); // Insert instructions into newMBB based on incoming instruction // There are 8 "real" operands plus 9 implicit def/uses, ignored here. - assert(bInstr->getNumOperands() < X86AddrNumOperands + 14 && + assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 && "unexpected number of operands"); MachineOperand& dest1Oper = bInstr->getOperand(0); MachineOperand& dest2Oper = bInstr->getOperand(1); - MachineOperand* argOpers[2 + X86AddrNumOperands]; - for (int i=0; i < 2 + X86AddrNumOperands; ++i) { + MachineOperand* argOpers[2 + X86::AddrNumOperands]; + for (int i=0; i < 2 + X86::AddrNumOperands; ++i) { argOpers[i] = &bInstr->getOperand(i+2); // We use some of the operands multiple times, so conservatively just @@ -8144,7 +8200,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, } // x86 address has 5 operands: base, index, scale, displacement, and segment. - int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] + int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] unsigned t1 = F->getRegInfo().createVirtualRegister(RC); MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1); @@ -8208,14 +8264,14 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, MIB.addReg(t2); (*MIB).addOperand(*argOpers[valArgIndx + 1]); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX); MIB.addReg(t1); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EDX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX); MIB.addReg(t2); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EBX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX); MIB.addReg(t5); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::ECX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX); MIB.addReg(t6); MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B)); @@ -8226,15 +8282,15 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, (*MIB).setMemRefs(bInstr->memoperands_begin(), bInstr->memoperands_end()); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t3); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3); MIB.addReg(X86::EAX); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t4); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4); MIB.addReg(X86::EDX); // insert branch BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); - F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. + bInstr->eraseFromParent(); // The pseudo instruction is gone now. return nextMBB; } @@ -8268,8 +8324,11 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, F->insert(MBBIter, newMBB); F->insert(MBBIter, nextMBB); - // Move all successors of thisMBB to nextMBB - nextMBB->transferSuccessors(thisMBB); + // Transfer the remainder of thisMBB and its successor edges to nextMBB. + nextMBB->splice(nextMBB->begin(), thisMBB, + llvm::next(MachineBasicBlock::iterator(mInstr)), + thisMBB->end()); + nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB); // Update thisMBB to fall through to newMBB thisMBB->addSuccessor(newMBB); @@ -8280,16 +8339,16 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, DebugLoc dl = mInstr->getDebugLoc(); // Insert instructions into newMBB based on incoming instruction - assert(mInstr->getNumOperands() < X86AddrNumOperands + 4 && + assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 && "unexpected number of operands"); MachineOperand& destOper = mInstr->getOperand(0); - MachineOperand* argOpers[2 + X86AddrNumOperands]; + MachineOperand* argOpers[2 + X86::AddrNumOperands]; int numArgs = mInstr->getNumOperands() - 1; for (int i=0; i < numArgs; ++i) argOpers[i] = &mInstr->getOperand(i+1); // x86 address has 4 operands: base, index, scale, and displacement - int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] + int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] int valArgIndx = lastAddrIndx + 1; unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); @@ -8304,12 +8363,12 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); if (argOpers[valArgIndx]->isReg()) - MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2); else MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2); (*MIB).addOperand(*argOpers[valArgIndx]); - MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), X86::EAX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX); MIB.addReg(t1); MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr)); @@ -8331,13 +8390,13 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, (*MIB).setMemRefs(mInstr->memoperands_begin(), mInstr->memoperands_end()); - MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), destOper.getReg()); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg()); MIB.addReg(X86::EAX); // insert branch BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); - F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now. + mInstr->eraseFromParent(); // The pseudo instruction is gone now. return nextMBB; } @@ -8347,7 +8406,6 @@ MachineBasicBlock * X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, unsigned numArgs, bool memArg) const { - MachineFunction *F = BB->getParent(); DebugLoc dl = MI->getDebugLoc(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); @@ -8369,7 +8427,7 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg()) .addReg(X86::XMM0); - F->DeleteMachineInstr(MI); + MI->eraseFromParent(); return BB; } @@ -8398,9 +8456,12 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( F->insert(MBBIter, XMMSaveMBB); F->insert(MBBIter, EndMBB); - // Set up the CFG. - // Move any original successors of MBB to the end block. - EndMBB->transferSuccessors(MBB); + // Transfer the remainder of MBB and its successor edges to EndMBB. + EndMBB->splice(EndMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), + MBB->end()); + EndMBB->transferSuccessorsAndUpdatePHIs(MBB); + // The original block will now fall through to the XMM save block. MBB->addSuccessor(XMMSaveMBB); // The XMMSaveMBB will fall through to the end block. @@ -8439,7 +8500,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( .addMemOperand(MMO); } - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return EndMBB; } @@ -8468,24 +8529,39 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - unsigned Opc = - X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); - BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + + // If the EFLAGS register isn't dead in the terminator, then claim that it's + // live into the sink and copy blocks. + const MachineFunction *MF = BB->getParent(); + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + BitVector ReservedRegs = TRI->getReservedRegs(*MF); + + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + if (!MO.isReg() || !MO.isUse() || MO.isKill()) continue; + unsigned Reg = MO.getReg(); + if (Reg != X86::EFLAGS) continue; + copy0MBB->addLiveIn(Reg); + sinkMBB->addLiveIn(Reg); + } + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + // Add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); + // Create the conditional branch instruction. + unsigned Opc = + X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); + BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB); + // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -8494,11 +8570,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... - BuildMI(sinkMBB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg()) + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(X86::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return sinkMBB; } @@ -8507,21 +8584,20 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); - MachineFunction *F = BB->getParent(); // The lowering is pretty easy: we're just emitting the call to _alloca. The // non-trivial part is impdef of ESP. // FIXME: The code should be tweaked as soon as we'll try to do codegen for // mingw-w64. - BuildMI(BB, DL, TII->get(X86::CALLpcrel32)) + BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32)) .addExternalSymbol("_alloca") .addReg(X86::EAX, RegState::Implicit) .addReg(X86::ESP, RegState::Implicit) .addReg(X86::EAX, RegState::Define | RegState::Implicit) .addReg(X86::ESP, RegState::Define | RegState::Implicit); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -8532,23 +8608,46 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, // our load from the relocation, sticking it in either RDI (x86-64) // or EAX and doing an indirect call. The return value will then // be in the normal return register. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const X86InstrInfo *TII + = static_cast(getTargetMachine().getInstrInfo()); DebugLoc DL = MI->getDebugLoc(); MachineFunction *F = BB->getParent(); + assert(MI->getOperand(3).isGlobal() && "This should be a global"); + if (Subtarget->is64Bit()) { - MachineInstrBuilder MIB = BuildMI(BB, DL, TII->get(X86::MOV64rr), X86::RDI) - .addReg(MI->getOperand(0).getReg()); - MIB = BuildMI(BB, DL, TII->get(X86::CALL64m)); - addDirectMem(MIB, X86::RDI).addReg(0); + MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, + TII->get(X86::MOV64rm), X86::RDI) + .addReg(X86::RIP) + .addImm(0).addReg(0) + .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, + MI->getOperand(3).getTargetFlags()) + .addReg(0); + MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m)); + addDirectMem(MIB, X86::RDI); + } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { + MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, + TII->get(X86::MOV32rm), X86::EAX) + .addReg(0) + .addImm(0).addReg(0) + .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, + MI->getOperand(3).getTargetFlags()) + .addReg(0); + MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); + addDirectMem(MIB, X86::EAX); } else { - MachineInstrBuilder MIB = BuildMI(BB, DL, TII->get(X86::MOV32rr), X86::EAX) - .addReg(MI->getOperand(0).getReg()); - MIB = BuildMI(BB, DL, TII->get(X86::CALL32m)); - addDirectMem(MIB, X86::EAX).addReg(0); + MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, + TII->get(X86::MOV32rm), X86::EAX) + .addReg(TII->getGlobalBaseReg(F)) + .addImm(0).addReg(0) + .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, + MI->getOperand(3).getTargetFlags()) + .addReg(0); + MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); + addDirectMem(MIB, X86::EAX); } - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -8592,23 +8691,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // mode when truncating to an integer value. MachineFunction *F = BB->getParent(); int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false); - addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx); + addFrameReference(BuildMI(*BB, MI, DL, + TII->get(X86::FNSTCW16m)), CWFrameIdx); // Load the old value of the high byte of the control word... unsigned OldCW = F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass); - addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16rm), OldCW), + addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); // Set the high part to be round to zero... - addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mi)), CWFrameIdx) + addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx) .addImm(0xC7F); // Reload the modified control word now... - addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx); + addFrameReference(BuildMI(*BB, MI, DL, + TII->get(X86::FLDCW16m)), CWFrameIdx); // Restore the memory image of control word to original value - addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mr)), CWFrameIdx) + addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx) .addReg(OldCW); // Get the X86 opcode to use. @@ -8647,13 +8748,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, } else { AM.Disp = Op.getImm(); } - addFullAddress(BuildMI(BB, DL, TII->get(Opc)), AM) - .addReg(MI->getOperand(X86AddrNumOperands).getReg()); + addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM) + .addReg(MI->getOperand(X86::AddrNumOperands).getReg()); // Reload the original control word now. - addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx); + addFrameReference(BuildMI(*BB, MI, DL, + TII->get(X86::FLDCW16m)), CWFrameIdx); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } // String/text processing lowering. @@ -8670,25 +8772,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMAND32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, X86::AND32ri, X86::MOV32rm, - X86::LCMPXCHG32, X86::MOV32rr, + X86::LCMPXCHG32, X86::NOT32r, X86::EAX, X86::GR32RegisterClass); case X86::ATOMOR32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr, X86::OR32ri, X86::MOV32rm, - X86::LCMPXCHG32, X86::MOV32rr, + X86::LCMPXCHG32, X86::NOT32r, X86::EAX, X86::GR32RegisterClass); case X86::ATOMXOR32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr, X86::XOR32ri, X86::MOV32rm, - X86::LCMPXCHG32, X86::MOV32rr, + X86::LCMPXCHG32, X86::NOT32r, X86::EAX, X86::GR32RegisterClass); case X86::ATOMNAND32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, X86::AND32ri, X86::MOV32rm, - X86::LCMPXCHG32, X86::MOV32rr, + X86::LCMPXCHG32, X86::NOT32r, X86::EAX, X86::GR32RegisterClass, true); case X86::ATOMMIN32: @@ -8703,25 +8805,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMAND16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, X86::AND16ri, X86::MOV16rm, - X86::LCMPXCHG16, X86::MOV16rr, + X86::LCMPXCHG16, X86::NOT16r, X86::AX, X86::GR16RegisterClass); case X86::ATOMOR16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr, X86::OR16ri, X86::MOV16rm, - X86::LCMPXCHG16, X86::MOV16rr, + X86::LCMPXCHG16, X86::NOT16r, X86::AX, X86::GR16RegisterClass); case X86::ATOMXOR16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr, X86::XOR16ri, X86::MOV16rm, - X86::LCMPXCHG16, X86::MOV16rr, + X86::LCMPXCHG16, X86::NOT16r, X86::AX, X86::GR16RegisterClass); case X86::ATOMNAND16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, X86::AND16ri, X86::MOV16rm, - X86::LCMPXCHG16, X86::MOV16rr, + X86::LCMPXCHG16, X86::NOT16r, X86::AX, X86::GR16RegisterClass, true); case X86::ATOMMIN16: @@ -8736,25 +8838,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMAND8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, X86::AND8ri, X86::MOV8rm, - X86::LCMPXCHG8, X86::MOV8rr, + X86::LCMPXCHG8, X86::NOT8r, X86::AL, X86::GR8RegisterClass); case X86::ATOMOR8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr, X86::OR8ri, X86::MOV8rm, - X86::LCMPXCHG8, X86::MOV8rr, + X86::LCMPXCHG8, X86::NOT8r, X86::AL, X86::GR8RegisterClass); case X86::ATOMXOR8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr, X86::XOR8ri, X86::MOV8rm, - X86::LCMPXCHG8, X86::MOV8rr, + X86::LCMPXCHG8, X86::NOT8r, X86::AL, X86::GR8RegisterClass); case X86::ATOMNAND8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, X86::AND8ri, X86::MOV8rm, - X86::LCMPXCHG8, X86::MOV8rr, + X86::LCMPXCHG8, X86::NOT8r, X86::AL, X86::GR8RegisterClass, true); // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. @@ -8762,25 +8864,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMAND64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, X86::AND64ri32, X86::MOV64rm, - X86::LCMPXCHG64, X86::MOV64rr, + X86::LCMPXCHG64, X86::NOT64r, X86::RAX, X86::GR64RegisterClass); case X86::ATOMOR64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr, X86::OR64ri32, X86::MOV64rm, - X86::LCMPXCHG64, X86::MOV64rr, + X86::LCMPXCHG64, X86::NOT64r, X86::RAX, X86::GR64RegisterClass); case X86::ATOMXOR64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr, X86::XOR64ri32, X86::MOV64rm, - X86::LCMPXCHG64, X86::MOV64rr, + X86::LCMPXCHG64, X86::NOT64r, X86::RAX, X86::GR64RegisterClass); case X86::ATOMNAND64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, X86::AND64ri32, X86::MOV64rm, - X86::LCMPXCHG64, X86::MOV64rr, + X86::LCMPXCHG64, X86::NOT64r, X86::RAX, X86::GR64RegisterClass, true); case X86::ATOMMIN64: @@ -8961,8 +9063,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, // Store the value to a temporary stack slot. SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType()); - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL, 0, - false, false, 0); + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL, + 0, false, false, 0); // Replace each use (extract) with a load of the appropriate element. for (SmallVectorImpl::iterator UI = Uses.begin(), @@ -8976,11 +9078,12 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, uint64_t Offset = EltSize * cast(Idx)->getZExtValue(); SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy()); - SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), OffsetVal, StackPtr); + SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), + OffsetVal, StackPtr); // Load the scalar. - SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch, ScalarAddr, - NULL, 0, false, false, 0); + SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch, + ScalarAddr, NULL, 0, false, false, 0); // Replace the exact with the load. DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar); @@ -9018,8 +9121,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Converting this to a min would handle NaNs incorrectly, and swapping // the operands would cause it to handle comparisons between positive // and negative zero incorrectly. - if (!FiniteOnlyFPMath() && - (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) { + if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { if (!UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; @@ -9057,8 +9159,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Converting this to a max would handle NaNs incorrectly, and swapping // the operands would cause it to handle comparisons between positive // and negative zero incorrectly. - if (!FiniteOnlyFPMath() && - (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) { + if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { if (!UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; @@ -9087,8 +9188,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // cause it to handle NaNs incorrectly. if (!UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) { - if (!FiniteOnlyFPMath() && - (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) + if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; std::swap(LHS, RHS); } @@ -9113,8 +9213,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, case ISD::SETULT: // Converting this to a max would handle NaNs incorrectly. - if (!FiniteOnlyFPMath() && - (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) + if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; Opcode = X86ISD::FMAX; break; @@ -9124,8 +9223,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // cause it to handle NaNs incorrectly. if (!UnsafeFPMath && !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) { - if (!FiniteOnlyFPMath() && - (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) + if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; std::swap(LHS, RHS); } @@ -9606,8 +9704,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (ShAmt1.getOpcode() == ISD::SUB) { SDValue Sum = ShAmt1.getOperand(0); if (ConstantSDNode *SumC = dyn_cast(Sum)) { - if (SumC->getSExtValue() == Bits && - ShAmt1.getOperand(1) == ShAmt0) + SDValue ShAmt1Op1 = ShAmt1.getOperand(1); + if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE) + ShAmt1Op1 = ShAmt1Op1.getOperand(0); + if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0) return DAG.getNode(Opc, DL, VT, Op0, Op1, DAG.getNode(ISD::TRUNCATE, DL, @@ -9803,58 +9903,6 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -// On X86 and X86-64, atomic operations are lowered to locked instructions. -// Locked instructions, in turn, have implicit fence semantics (all memory -// operations are flushed before issuing the locked instruction, and the -// are not buffered), so we can fold away the common pattern of -// fence-atomic-fence. -static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) { - SDValue atomic = N->getOperand(0); - switch (atomic.getOpcode()) { - case ISD::ATOMIC_CMP_SWAP: - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: - break; - default: - return SDValue(); - } - - SDValue fence = atomic.getOperand(0); - if (fence.getOpcode() != ISD::MEMBARRIER) - return SDValue(); - - switch (atomic.getOpcode()) { - case ISD::ATOMIC_CMP_SWAP: - return DAG.UpdateNodeOperands(atomic, fence.getOperand(0), - atomic.getOperand(1), atomic.getOperand(2), - atomic.getOperand(3)); - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: - return DAG.UpdateNodeOperands(atomic, fence.getOperand(0), - atomic.getOperand(1), atomic.getOperand(2)); - default: - return SDValue(); - } -} - static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) { // (i32 zext (and (i8 x86isd::setcc_carry), 1)) -> // (and (i32 x86isd::setcc_carry), 1) @@ -9902,7 +9950,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FAND: return PerformFANDCombine(N, DAG); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); - case ISD::MEMBARRIER: return PerformMEMBARRIERCombine(N, DAG); case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG); } @@ -10025,8 +10072,8 @@ static bool LowerToBSwap(CallInst *CI) { // so don't worry about this. // Verify this is a simple bswap. - if (CI->getNumOperands() != 2 || - CI->getType() != CI->getOperand(1)->getType() || + if (CI->getNumArgOperands() != 1 || + CI->getType() != CI->getArgOperand(0)->getType() || !CI->getType()->isIntegerTy()) return false; @@ -10039,7 +10086,7 @@ static bool LowerToBSwap(CallInst *CI) { Module *M = CI->getParent()->getParent()->getParent(); Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); Op = CallInst::Create(Int, Op, CI->getName(), CI); CI->replaceAllUsesWith(Op); @@ -10172,7 +10219,6 @@ LowerXConstraint(EVT ConstraintVT) const { /// vector. If it is invalid, don't add anything to Ops. void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Constraint, - bool hasMemory, std::vector&Ops, SelectionDAG &DAG) const { SDValue Result(0, 0); @@ -10214,9 +10260,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'e': { // 32-bit signed value if (ConstantSDNode *C = dyn_cast(Op)) { - const ConstantInt *CI = C->getConstantIntValue(); - if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()), - C->getSExtValue())) { + if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), + C->getSExtValue())) { // Widen to 64 bits here to get it sign extended. Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64); break; @@ -10229,9 +10274,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'Z': { // 32-bit unsigned value if (ConstantSDNode *C = dyn_cast(Op)) { - const ConstantInt *CI = C->getConstantIntValue(); - if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()), - C->getZExtValue())) { + if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), + C->getZExtValue())) { Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); break; } @@ -10248,6 +10292,12 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, break; } + // In any sort of PIC mode addresses need to be computed at runtime by + // adding in a register or some sort of table lookup. These can't + // be used as immediates. + if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC()) + return; + // If we are in non-pic codegen mode, we allow the address of a global (with // an optional displacement) to be used with 'i'. GlobalAddressSDNode *GA = 0; @@ -10283,11 +10333,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, getTargetMachine()))) return; - if (hasMemory) - Op = LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG); - else - Op = DAG.getTargetGlobalAddress(GV, GA->getValueType(0), Offset); - Result = Op; + Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), + GA->getValueType(0), Offset); break; } } @@ -10296,8 +10343,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, Ops.push_back(Result); return; } - return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, - Ops, DAG); + return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } std::vector X86TargetLowering::