X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86FastISel.cpp;h=eec19a8030d2bbf57f903bd3ffb78b90c4de3edc;hb=de0129ac0821e693b08df7269f956f5418b2b5f7;hp=15a4948e843fcb988ffe83b8789f0d893cf31f7e;hpb=da5e5688e9b94dd5b372359b4f8102ccca000b14;p=oota-llvm.git diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 15a4948e843..eec19a8030d 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -83,13 +84,13 @@ public: private: bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT, DebugLoc DL); - bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO, - unsigned &ResultReg); + bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO, + unsigned &ResultReg, unsigned Alignment = 1); - bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM, + bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM, MachineMemOperand *MMO = nullptr, bool Aligned = false); bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, - const X86AddressMode &AM, + X86AddressMode &AM, MachineMemOperand *MMO = nullptr, bool Aligned = false); bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, @@ -165,6 +166,9 @@ private: bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, const Value *Cond); + + const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB, + X86AddressMode &AM); }; } // end anonymous namespace. @@ -242,6 +246,20 @@ getX86SSEConditionCode(CmpInst::Predicate Predicate) { return std::make_pair(CC, NeedSwap); } +/// \brief Adds a complex addressing mode to the given machine instr builder. +/// Note, this will constrain the index register. If its not possible to +/// constrain the given index register, then a new one will be created. The +/// IndexReg field of the addressing mode will be updated to match in this case. +const MachineInstrBuilder & +X86FastISel::addFullAddress(const MachineInstrBuilder &MIB, + X86AddressMode &AM) { + // First constrain the index register. It needs to be a GR64_NOSP. + AM.IndexReg = constrainOperandRegClass(MIB->getDesc(), AM.IndexReg, + MIB->getNumOperands() + + X86::AddrIndexReg); + return ::addFullAddress(MIB, AM); +} + /// \brief Check if it is possible to fold the condition from the XALU intrinsic /// into the user. The condition code will only be updated on success. bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, @@ -299,7 +317,7 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, } bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { - EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true); + EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true); if (evt == MVT::Other || !evt.isSimple()) // Unhandled type. Halt "fast" selection and bail. return false; @@ -326,8 +344,9 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. /// Return true and the result register by reference if it is possible. -bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, - MachineMemOperand *MMO, unsigned &ResultReg) { +bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, + MachineMemOperand *MMO, unsigned &ResultReg, + unsigned Alignment) { // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; const TargetRegisterClass *RC = nullptr; @@ -372,6 +391,30 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, case MVT::f80: // No f80 support yet. return false; + case MVT::v4f32: + if (Alignment >= 16) + Opc = Subtarget->hasAVX() ? X86::VMOVAPSrm : X86::MOVAPSrm; + else + Opc = Subtarget->hasAVX() ? X86::VMOVUPSrm : X86::MOVUPSrm; + RC = &X86::VR128RegClass; + break; + case MVT::v2f64: + if (Alignment >= 16) + Opc = Subtarget->hasAVX() ? X86::VMOVAPDrm : X86::MOVAPDrm; + else + Opc = Subtarget->hasAVX() ? X86::VMOVUPDrm : X86::MOVUPDrm; + RC = &X86::VR128RegClass; + break; + case MVT::v4i32: + case MVT::v2i64: + case MVT::v8i16: + case MVT::v16i8: + if (Alignment >= 16) + Opc = Subtarget->hasAVX() ? X86::VMOVDQArm : X86::MOVDQArm; + else + Opc = Subtarget->hasAVX() ? X86::VMOVDQUrm : X86::MOVDQUrm; + RC = &X86::VR128RegClass; + break; } ResultReg = createResultReg(RC); @@ -388,7 +431,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, /// and a displacement offset, or a GlobalAddress, /// i.e. V. Return true if it is possible. bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, - const X86AddressMode &AM, + X86AddressMode &AM, MachineMemOperand *MMO, bool Aligned) { // Get opcode and regclass of the output for the given store instruction. unsigned Opc = 0; @@ -449,7 +492,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, } bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, - const X86AddressMode &AM, + X86AddressMode &AM, MachineMemOperand *MMO, bool Aligned) { // Handle 'null' like i32/i64 0. if (isa(Val)) @@ -565,7 +608,7 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) { // Prepare for inserting code in the local-value area. SavePoint SaveInsertPt = enterLocalValueArea(); - if (TLI.getPointerTy() == MVT::i64) { + if (TLI.getPointerTy(DL) == MVT::i64) { Opc = X86::MOV64rm; RC = &X86::GR64RegClass; @@ -647,13 +690,14 @@ redo_gep: case Instruction::IntToPtr: // Look past no-op inttoptrs. - if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) return X86SelectAddress(U->getOperand(0), AM); break; case Instruction::PtrToInt: // Look past no-op ptrtoints. - if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return X86SelectAddress(U->getOperand(0), AM); break; @@ -823,14 +867,14 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { case Instruction::IntToPtr: // Look past no-op inttoptrs if its operand is in the same BB. if (InMBB && - TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) return X86SelectCallAddress(U->getOperand(0), AM); break; case Instruction::PtrToInt: // Look past no-op ptrtoints if its operand is in the same BB. - if (InMBB && - TLI.getValueType(U->getType()) == TLI.getPointerTy()) + if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return X86SelectCallAddress(U->getOperand(0), AM); break; } @@ -957,7 +1001,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { if (Ret->getNumOperands() > 0) { SmallVector Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector ValLocs; @@ -988,7 +1032,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { return false; unsigned SrcReg = Reg + VA.getValNo(); - EVT SrcVT = TLI.getValueType(RV->getType()); + EVT SrcVT = TLI.getValueType(DL, RV->getType()); EVT DstVT = VA.getValVT(); // Special handling for extended integers. if (SrcVT != DstVT) { @@ -1068,8 +1112,14 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) { if (!X86SelectAddress(Ptr, AM)) return false; + unsigned Alignment = LI->getAlignment(); + unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType()); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = ABIAlignment; + unsigned ResultReg = 0; - if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg)) + if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg, + Alignment)) return false; updateValueMap(I, ResultReg); @@ -1094,20 +1144,30 @@ static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { } } -/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS -/// of the comparison, return an opcode that works for the compare (e.g. -/// CMP32ri) otherwise return 0. +/// If we have a comparison with RHS as the RHS of the comparison, return an +/// opcode that works for the compare (e.g. CMP32ri) otherwise return 0. static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) { + int64_t Val = RHSC->getSExtValue(); switch (VT.getSimpleVT().SimpleTy) { // Otherwise, we can't fold the immediate into this comparison. - default: return 0; - case MVT::i8: return X86::CMP8ri; - case MVT::i16: return X86::CMP16ri; - case MVT::i32: return X86::CMP32ri; + default: + return 0; + case MVT::i8: + return X86::CMP8ri; + case MVT::i16: + if (isInt<8>(Val)) + return X86::CMP16ri8; + return X86::CMP16ri; + case MVT::i32: + if (isInt<8>(Val)) + return X86::CMP32ri8; + return X86::CMP32ri; case MVT::i64: + if (isInt<8>(Val)) + return X86::CMP64ri8; // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext // field. - if ((int)RHSC->getSExtValue() == RHSC->getSExtValue()) + if (isInt<32>(Val)) return X86::CMP64ri32; return 0; } @@ -1241,7 +1301,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { } bool X86FastISel::X86SelectZExt(const Instruction *I) { - EVT DstVT = TLI.getValueType(I->getType()); + EVT DstVT = TLI.getValueType(DL, I->getType()); if (!TLI.isTypeLegal(DstVT)) return false; @@ -1250,7 +1310,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) { return false; // Handle zero-extension from i1 to i8, which is common. - MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType()); + MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType()); if (SrcVT.SimpleTy == MVT::i1) { // Set the high bits to zero. ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); @@ -1303,7 +1363,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { X86::CondCode CC; if (const CmpInst *CI = dyn_cast(BI->getCondition())) { if (CI->hasOneUse() && CI->getParent() == I->getParent()) { - EVT VT = TLI.getValueType(CI->getOperand(0)->getType()); + EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType()); // Try to optimize or fold the cmp. CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); @@ -1743,7 +1803,7 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { if (NeedSwap) std::swap(CmpLHS, CmpRHS); - EVT CmpVT = TLI.getValueType(CmpLHS->getType()); + EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType()); // Emit a compare of the LHS and RHS, setting the flags. if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc())) return false; @@ -1810,11 +1870,11 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { return true; } -/// \brief Emit SSE instructions to lower the select. +/// \brief Emit SSE or AVX instructions to lower the select. /// /// Try to use SSE1/SSE2 instructions to simulate a select without branches. /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary -/// SSE instructions are available. +/// SSE instructions are available. If AVX is available, try to use a VBLENDV. bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { // Optimize conditions coming from a compare if both instructions are in the // same basic block (values defined in other basic blocks may not have @@ -1850,19 +1910,17 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { if (NeedSwap) std::swap(CmpLHS, CmpRHS); - static unsigned OpcTable[2][2][4] = { - { { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr }, - { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } }, - { { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr }, - { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } } + // Choose the SSE instruction sequence based on data type (float or double). + static unsigned OpcTable[2][4] = { + { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr }, + { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr } }; - bool HasAVX = Subtarget->hasAVX(); unsigned *Opc = nullptr; switch (RetVT.SimpleTy) { default: return false; - case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break; - case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break; + case MVT::f32: Opc = &OpcTable[0][0]; break; + case MVT::f64: Opc = &OpcTable[1][0]; break; } const Value *LHS = I->getOperand(1); @@ -1884,14 +1942,33 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { return false; const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); - unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill, - CmpRHSReg, CmpRHSIsKill, CC); - unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false, - LHSReg, LHSIsKill); - unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true, - RHSReg, RHSIsKill); - unsigned ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true, - AndReg, /*IsKill=*/true); + unsigned ResultReg; + + if (Subtarget->hasAVX()) { + // If we have AVX, create 1 blendv instead of 3 logic instructions. + // Blendv was introduced with SSE 4.1, but the 2 register form implicitly + // uses XMM0 as the selection register. That may need just as many + // instructions as the AND/ANDN/OR sequence due to register moves, so + // don't bother. + unsigned CmpOpcode = + (RetVT.SimpleTy == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr; + unsigned BlendOpcode = + (RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr; + + unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill, + CmpRHSReg, CmpRHSIsKill, CC); + ResultReg = fastEmitInst_rrr(BlendOpcode, RC, RHSReg, RHSIsKill, + LHSReg, LHSIsKill, CmpReg, true); + } else { + unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill, + CmpRHSReg, CmpRHSIsKill, CC); + unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false, + LHSReg, LHSIsKill); + unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true, + RHSReg, RHSIsKill); + ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true, + AndReg, /*IsKill=*/true); + } updateValueMap(I, ResultReg); return true; } @@ -1928,7 +2005,7 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) { if (NeedSwap) std::swap(CmpLHS, CmpRHS); - EVT CmpVT = TLI.getValueType(CmpLHS->getType()); + EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType()); if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc())) return false; } else { @@ -2007,6 +2084,12 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) { } bool X86FastISel::X86SelectSIToFP(const Instruction *I) { + // The target-independent selection algorithm in FastISel already knows how + // to select a SINT_TO_FP if the target is SSE but not AVX. + // Early exit if the subtarget doesn't have AVX. + if (!Subtarget->hasAVX()) + return false; + if (!I->getOperand(0)->getType()->isIntegerTy(32)) return false; @@ -2029,11 +2112,6 @@ bool X86FastISel::X86SelectSIToFP(const Instruction *I) { } else return false; - // The target-independent selection algorithm in FastISel already knows how - // to select a SINT_TO_FP if the target is SSE but not AVX. This code is only - // reachable if the subtarget has AVX. - assert(Subtarget->hasAVX() && "Expected a subtarget with AVX!"); - unsigned ImplicitDefReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); @@ -2089,8 +2167,8 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { } bool X86FastISel::X86SelectTrunc(const Instruction *I) { - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - EVT DstVT = TLI.getValueType(I->getType()); + EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(DL, I->getType()); // This code only handles truncation to byte. if (DstVT != MVT::i8 && DstVT != MVT::i1) @@ -2109,6 +2187,7 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) { return true; } + bool KillInputReg = false; if (!Subtarget->is64Bit()) { // If we're on x86-32; we can't extract an i8 from a general register. // First issue a copy to GR16_ABCD or GR32_ABCD. @@ -2118,11 +2197,12 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), CopyReg).addReg(InputReg); InputReg = CopyReg; + KillInputReg = true; } // Issue an extract_subreg. unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8, - InputReg, /*Kill=*/true, + InputReg, KillInputReg, X86::sub_8bit); if (!ResultReg) return false; @@ -2176,7 +2256,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { default: return false; case Intrinsic::convert_from_fp16: case Intrinsic::convert_to_fp16: { - if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) + if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) return false; const Value *Op = II->getArgOperand(0); @@ -2337,7 +2417,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { } case Intrinsic::stackprotector: { // Emit code to store the stack guard onto the stack. - EVT PtrTy = TLI.getPointerTy(); + EVT PtrTy = TLI.getPointerTy(DL); const Value *Op1 = II->getArgOperand(0); // The guard's value. const AllocaInst *Slot = cast(II->getArgOperand(1)); @@ -2359,6 +2439,8 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); // FIXME may need to add RegState::Debug to any registers produced, // although ESP/EBP should be the only ones at the moment. + assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && + "Expected inlined-at fields to agree"); addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM) .addImm(0) .addMetadata(DI->getVariable()) @@ -2654,7 +2736,7 @@ bool X86FastISel::fastLowerArguments() { if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) return false; - EVT ArgVT = TLI.getValueType(ArgTy); + EVT ArgVT = TLI.getValueType(DL, ArgTy); if (!ArgVT.isSimple()) return false; switch (ArgVT.getSimpleVT().SimpleTy) { default: return false; @@ -2691,7 +2773,7 @@ bool X86FastISel::fastLowerArguments() { unsigned GPRIdx = 0; unsigned FPRIdx = 0; for (auto const &Arg : F->args()) { - MVT VT = TLI.getSimpleValueType(Arg.getType()); + MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); const TargetRegisterClass *RC = TLI.getRegClassFor(VT); unsigned SrcReg; switch (VT.SimpleTy) { @@ -2741,7 +2823,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { bool &IsTailCall = CLI.IsTailCall; bool IsVarArg = CLI.IsVarArg; const Value *Callee = CLI.Callee; - const char *SymName = CLI.SymName; + MCSymbol *Symbol = CLI.Symbol; bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isCallingConvWin64(CC); @@ -2938,8 +3020,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()]; unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getStack(LocMemOffset), MachineMemOperand::MOStore, - ArgVT.getStoreSize(), Alignment); + MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset), + MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); if (Flags.isByVal()) { X86AddressMode SrcAM; SrcAM.Base.Reg = ArgReg; @@ -3027,7 +3109,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { OpFlags = X86II::MO_PLT; } else if (Subtarget->isPICStyleStubAny() && - (GV->isDeclaration() || GV->isWeakForLinker()) && + !GV->isStrongDefinitionForLinker() && (!Subtarget->getTargetTriple().isMacOSX() || Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { // PC-relative references to external symbols should go through $stub, @@ -3037,15 +3119,15 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { } MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)); - if (SymName) - MIB.addExternalSymbol(SymName, OpFlags); + if (Symbol) + MIB.addSym(Symbol, OpFlags); else MIB.addGlobalAddress(GV, 0, OpFlags); } // Add a register mask operand representing the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CC)); + MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); // Add an implicit use GOT pointer in EBX. if (Subtarget->isPICStyleGOT()) @@ -3159,8 +3241,8 @@ X86FastISel::fastSelectInstruction(const Instruction *I) { return X86SelectSIToFP(I); case Instruction::IntToPtr: // Deliberate fall-through. case Instruction::PtrToInt: { - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - EVT DstVT = TLI.getValueType(I->getType()); + EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(DL, I->getType()); if (DstVT.bitsGT(SrcVT)) return X86SelectZExt(I); if (DstVT.bitsLT(SrcVT)) @@ -3302,8 +3384,8 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) { TII.get(Opc), ResultReg); addDirectMem(MIB, AddrReg); MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, - TM.getDataLayout()->getPointerSize(), Align); + MachinePointerInfo::getConstantPool(*FuncInfo.MF), + MachineMemOperand::MOLoad, DL.getPointerSize(), Align); MIB->addMemOperand(*FuncInfo.MF, MMO); return ResultReg; } @@ -3330,17 +3412,17 @@ unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) { unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); if (TM.getRelocationModel() == Reloc::Static && - TLI.getPointerTy() == MVT::i64) { + TLI.getPointerTy(DL) == MVT::i64) { // The displacement code could be more than 32 bits away so we need to use // an instruction with a 64 bit immediate BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri), ResultReg) .addGlobalAddress(GV); } else { - unsigned Opc = TLI.getPointerTy() == MVT::i32 - ? (Subtarget->isTarget64BitILP32() - ? X86::LEA64_32r : X86::LEA32r) - : X86::LEA64r; + unsigned Opc = + TLI.getPointerTy(DL) == MVT::i32 + ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r) + : X86::LEA64r; addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg), AM); } @@ -3350,7 +3432,7 @@ unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) { } unsigned X86FastISel::fastMaterializeConstant(const Constant *C) { - EVT CEVT = TLI.getValueType(C->getType(), true); + EVT CEVT = TLI.getValueType(DL, C->getType(), true); // Only handle simple types. if (!CEVT.isSimple()) @@ -3382,11 +3464,11 @@ unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) { X86AddressMode AM; if (!X86SelectAddress(C, AM)) return 0; - unsigned Opc = TLI.getPointerTy() == MVT::i32 - ? (Subtarget->isTarget64BitILP32() - ? X86::LEA64_32r : X86::LEA32r) - : X86::LEA64r; - const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); + unsigned Opc = + TLI.getPointerTy(DL) == MVT::i32 + ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r) + : X86::LEA64r; + const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); unsigned ResultReg = createResultReg(RC); addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg), AM); @@ -3450,14 +3532,32 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, SmallVector AddrOps; AM.getFullAddress(AddrOps); - MachineInstr *Result = - XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, - Size, Alignment, /*AllowCommute=*/true); + MachineInstr *Result = XII.foldMemoryOperandImpl( + *FuncInfo.MF, MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment, + /*AllowCommute=*/true); if (!Result) return false; + // The index register could be in the wrong register class. Unfortunately, + // foldMemoryOperandImpl could have commuted the instruction so its not enough + // to just look at OpNo + the offset to the index reg. We actually need to + // scan the instruction to find the index reg and see if its the correct reg + // class. + unsigned OperandNo = 0; + for (MachineInstr::mop_iterator I = Result->operands_begin(), + E = Result->operands_end(); I != E; ++I, ++OperandNo) { + MachineOperand &MO = *I; + if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg) + continue; + // Found the index reg, now try to rewrite it. + unsigned IndexReg = constrainOperandRegClass(Result->getDesc(), + MO.getReg(), OperandNo); + if (IndexReg == MO.getReg()) + continue; + MO.setReg(IndexReg); + } + Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI)); - FuncInfo.MBB->insert(FuncInfo.InsertPt, Result); MI->eraseFromParent(); return true; }