#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
private:
bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT, DebugLoc DL);
- bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO,
- unsigned &ResultReg);
+ bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
+ unsigned &ResultReg, unsigned Alignment = 1);
- bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
+ bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
MachineMemOperand *MMO = nullptr, bool Aligned = false);
bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
- const X86AddressMode &AM,
+ X86AddressMode &AM,
MachineMemOperand *MMO = nullptr, bool Aligned = false);
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
const Value *Cond);
+
+ const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
+ X86AddressMode &AM);
};
} // end anonymous namespace.
return std::make_pair(CC, NeedSwap);
}
+/// \brief Adds a complex addressing mode to the given machine instr builder.
+/// Note, this will constrain the index register. If its not possible to
+/// constrain the given index register, then a new one will be created. The
+/// IndexReg field of the addressing mode will be updated to match in this case.
+const MachineInstrBuilder &
+X86FastISel::addFullAddress(const MachineInstrBuilder &MIB,
+ X86AddressMode &AM) {
+ // First constrain the index register. It needs to be a GR64_NOSP.
+ AM.IndexReg = constrainOperandRegClass(MIB->getDesc(), AM.IndexReg,
+ MIB->getNumOperands() +
+ X86::AddrIndexReg);
+ return ::addFullAddress(MIB, AM);
+}
+
/// \brief Check if it is possible to fold the condition from the XALU intrinsic
/// into the user. The condition code will only be updated on success.
bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
}
bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
- EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
+ EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
if (evt == MVT::Other || !evt.isSimple())
// Unhandled type. Halt "fast" selection and bail.
return false;
/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
/// Return true and the result register by reference if it is possible.
-bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
- MachineMemOperand *MMO, unsigned &ResultReg) {
+bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
+ MachineMemOperand *MMO, unsigned &ResultReg,
+ unsigned Alignment) {
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = nullptr;
case MVT::f80:
// No f80 support yet.
return false;
+ case MVT::v4f32:
+ if (Alignment >= 16)
+ Opc = Subtarget->hasAVX() ? X86::VMOVAPSrm : X86::MOVAPSrm;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVUPSrm : X86::MOVUPSrm;
+ RC = &X86::VR128RegClass;
+ break;
+ case MVT::v2f64:
+ if (Alignment >= 16)
+ Opc = Subtarget->hasAVX() ? X86::VMOVAPDrm : X86::MOVAPDrm;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVUPDrm : X86::MOVUPDrm;
+ RC = &X86::VR128RegClass;
+ break;
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (Alignment >= 16)
+ Opc = Subtarget->hasAVX() ? X86::VMOVDQArm : X86::MOVDQArm;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVDQUrm : X86::MOVDQUrm;
+ RC = &X86::VR128RegClass;
+ break;
}
ResultReg = createResultReg(RC);
/// and a displacement offset, or a GlobalAddress,
/// i.e. V. Return true if it is possible.
bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
- const X86AddressMode &AM,
+ X86AddressMode &AM,
MachineMemOperand *MMO, bool Aligned) {
+ bool HasSSE2 = Subtarget->hasSSE2();
+ bool HasSSE4A = Subtarget->hasSSE4A();
+ bool HasAVX = Subtarget->hasAVX();
+ bool IsNonTemporal = MMO && MMO->isNonTemporal();
+
// Get opcode and regclass of the output for the given store instruction.
unsigned Opc = 0;
switch (VT.getSimpleVT().SimpleTy) {
// FALLTHROUGH, handling i1 as i8.
case MVT::i8: Opc = X86::MOV8mr; break;
case MVT::i16: Opc = X86::MOV16mr; break;
- case MVT::i32: Opc = X86::MOV32mr; break;
- case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
+ case MVT::i32:
+ Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
+ break;
+ case MVT::i64:
+ // Must be in x86-64 mode.
+ Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
+ break;
case MVT::f32:
- Opc = X86ScalarSSEf32 ?
- (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
+ if (X86ScalarSSEf32) {
+ if (IsNonTemporal && HasSSE4A)
+ Opc = X86::MOVNTSS;
+ else
+ Opc = HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
+ } else
+ Opc = X86::ST_Fp32m;
break;
case MVT::f64:
- Opc = X86ScalarSSEf64 ?
- (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
+ if (X86ScalarSSEf32) {
+ if (IsNonTemporal && HasSSE4A)
+ Opc = X86::MOVNTSD;
+ else
+ Opc = HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
+ } else
+ Opc = X86::ST_Fp64m;
break;
case MVT::v4f32:
- if (Aligned)
- Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
- else
- Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
+ if (Aligned) {
+ if (IsNonTemporal)
+ Opc = HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
+ else
+ Opc = HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
+ } else
+ Opc = HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
break;
case MVT::v2f64:
- if (Aligned)
- Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr;
- else
- Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr;
+ if (Aligned) {
+ if (IsNonTemporal)
+ Opc = HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
+ else
+ Opc = HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
+ } else
+ Opc = HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
break;
case MVT::v4i32:
case MVT::v2i64:
case MVT::v8i16:
case MVT::v16i8:
- if (Aligned)
- Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr;
- else
+ if (Aligned) {
+ if (IsNonTemporal)
+ Opc = HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
+ else
+ Opc = HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
+ } else
Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
break;
}
}
bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
- const X86AddressMode &AM,
+ X86AddressMode &AM,
MachineMemOperand *MMO, bool Aligned) {
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Val))
// Prepare for inserting code in the local-value area.
SavePoint SaveInsertPt = enterLocalValueArea();
- if (TLI.getPointerTy() == MVT::i64) {
+ if (TLI.getPointerTy(DL) == MVT::i64) {
Opc = X86::MOV64rm;
RC = &X86::GR64RegClass;
case Instruction::IntToPtr:
// Look past no-op inttoptrs.
- if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+ TLI.getPointerTy(DL))
return X86SelectAddress(U->getOperand(0), AM);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints.
- if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return X86SelectAddress(U->getOperand(0), AM);
break;
case Instruction::IntToPtr:
// Look past no-op inttoptrs if its operand is in the same BB.
if (InMBB &&
- TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+ TLI.getPointerTy(DL))
return X86SelectCallAddress(U->getOperand(0), AM);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints if its operand is in the same BB.
- if (InMBB &&
- TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return X86SelectCallAddress(U->getOperand(0), AM);
break;
}
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
return false;
unsigned SrcReg = Reg + VA.getValNo();
- EVT SrcVT = TLI.getValueType(RV->getType());
+ EVT SrcVT = TLI.getValueType(DL, RV->getType());
EVT DstVT = VA.getValVT();
// Special handling for extended integers.
if (SrcVT != DstVT) {
if (!X86SelectAddress(Ptr, AM))
return false;
+ unsigned Alignment = LI->getAlignment();
+ unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = ABIAlignment;
+
unsigned ResultReg = 0;
- if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg))
+ if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
+ Alignment))
return false;
updateValueMap(I, ResultReg);
}
}
-/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
-/// of the comparison, return an opcode that works for the compare (e.g.
-/// CMP32ri) otherwise return 0.
+/// If we have a comparison with RHS as the RHS of the comparison, return an
+/// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
+ int64_t Val = RHSC->getSExtValue();
switch (VT.getSimpleVT().SimpleTy) {
// Otherwise, we can't fold the immediate into this comparison.
- default: return 0;
- case MVT::i8: return X86::CMP8ri;
- case MVT::i16: return X86::CMP16ri;
- case MVT::i32: return X86::CMP32ri;
+ default:
+ return 0;
+ case MVT::i8:
+ return X86::CMP8ri;
+ case MVT::i16:
+ if (isInt<8>(Val))
+ return X86::CMP16ri8;
+ return X86::CMP16ri;
+ case MVT::i32:
+ if (isInt<8>(Val))
+ return X86::CMP32ri8;
+ return X86::CMP32ri;
case MVT::i64:
+ if (isInt<8>(Val))
+ return X86::CMP64ri8;
// 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
// field.
- if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
+ if (isInt<32>(Val))
return X86::CMP64ri32;
return 0;
}
}
bool X86FastISel::X86SelectZExt(const Instruction *I) {
- EVT DstVT = TLI.getValueType(I->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
if (!TLI.isTypeLegal(DstVT))
return false;
return false;
// Handle zero-extension from i1 to i8, which is common.
- MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType());
+ MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
if (SrcVT.SimpleTy == MVT::i1) {
// Set the high bits to zero.
ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
X86::CondCode CC;
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
- EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
+ EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
// Try to optimize or fold the cmp.
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
.addMBB(TrueMBB);
}
- // Obtain the branch weight and add the TrueBB to the successor list.
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TrueMBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
-
- // Emits an unconditional branch to the FalseBB, obtains the branch
- // weight, and adds it to the successor list.
- fastEmitBranch(FalseMBB, DbgLoc);
-
+ finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
}
} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
.addMBB(TrueMBB);
- fastEmitBranch(FalseMBB, DbgLoc);
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TrueMBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+
+ finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
}
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
.addMBB(TrueMBB);
- fastEmitBranch(FalseMBB, DbgLoc);
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TrueMBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+ finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
}
.addReg(OpReg).addImm(1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
.addMBB(TrueMBB);
- fastEmitBranch(FalseMBB, DbgLoc);
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TrueMBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+ finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
}
if (NeedSwap)
std::swap(CmpLHS, CmpRHS);
- EVT CmpVT = TLI.getValueType(CmpLHS->getType());
+ EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
// Emit a compare of the LHS and RHS, setting the flags.
if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
return false;
unsigned ResultReg;
if (Subtarget->hasAVX()) {
+ const TargetRegisterClass *FR32 = &X86::FR32RegClass;
+ const TargetRegisterClass *VR128 = &X86::VR128RegClass;
+
// If we have AVX, create 1 blendv instead of 3 logic instructions.
// Blendv was introduced with SSE 4.1, but the 2 register form implicitly
// uses XMM0 as the selection register. That may need just as many
unsigned BlendOpcode =
(RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
- unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
+ unsigned CmpReg = fastEmitInst_rri(CmpOpcode, FR32, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
- ResultReg = fastEmitInst_rrr(BlendOpcode, RC, RHSReg, RHSIsKill,
- LHSReg, LHSIsKill, CmpReg, true);
+ unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
+ LHSReg, LHSIsKill, CmpReg, true);
+ ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
} else {
unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
if (NeedSwap)
std::swap(CmpLHS, CmpRHS);
- EVT CmpVT = TLI.getValueType(CmpLHS->getType());
+ EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
return false;
} else {
}
bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
+ // The target-independent selection algorithm in FastISel already knows how
+ // to select a SINT_TO_FP if the target is SSE but not AVX.
+ // Early exit if the subtarget doesn't have AVX.
+ if (!Subtarget->hasAVX())
+ return false;
+
if (!I->getOperand(0)->getType()->isIntegerTy(32))
return false;
} else
return false;
- // The target-independent selection algorithm in FastISel already knows how
- // to select a SINT_TO_FP if the target is SSE but not AVX. This code is only
- // reachable if the subtarget has AVX.
- assert(Subtarget->hasAVX() && "Expected a subtarget with AVX!");
-
unsigned ImplicitDefReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
}
bool X86FastISel::X86SelectTrunc(const Instruction *I) {
- EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
- EVT DstVT = TLI.getValueType(I->getType());
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
// This code only handles truncation to byte.
if (DstVT != MVT::i8 && DstVT != MVT::i1)
return true;
}
+ bool KillInputReg = false;
if (!Subtarget->is64Bit()) {
// If we're on x86-32; we can't extract an i8 from a general register.
// First issue a copy to GR16_ABCD or GR32_ABCD.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), CopyReg).addReg(InputReg);
InputReg = CopyReg;
+ KillInputReg = true;
}
// Issue an extract_subreg.
unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
- InputReg, /*Kill=*/true,
+ InputReg, KillInputReg,
X86::sub_8bit);
if (!ResultReg)
return false;
default: return false;
case Intrinsic::convert_from_fp16:
case Intrinsic::convert_to_fp16: {
- if (TM.Options.UseSoftFloat || !Subtarget->hasF16C())
+ if (Subtarget->useSoftFloat() || !Subtarget->hasF16C())
return false;
const Value *Op = II->getArgOperand(0);
}
case Intrinsic::stackprotector: {
// Emit code to store the stack guard onto the stack.
- EVT PtrTy = TLI.getPointerTy();
+ EVT PtrTy = TLI.getPointerTy(DL);
const Value *Op1 = II->getArgOperand(0); // The guard's value.
const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
// FIXME may need to add RegState::Debug to any registers produced,
// although ESP/EBP should be the only ones at the moment.
+ assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+ "Expected inlined-at fields to agree");
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
.addImm(0)
.addMetadata(DI->getVariable())
if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
return false;
- EVT ArgVT = TLI.getValueType(ArgTy);
+ EVT ArgVT = TLI.getValueType(DL, ArgTy);
if (!ArgVT.isSimple()) return false;
switch (ArgVT.getSimpleVT().SimpleTy) {
default: return false;
unsigned GPRIdx = 0;
unsigned FPRIdx = 0;
for (auto const &Arg : F->args()) {
- MVT VT = TLI.getSimpleValueType(Arg.getType());
+ MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
unsigned SrcReg;
switch (VT.SimpleTy) {
bool &IsTailCall = CLI.IsTailCall;
bool IsVarArg = CLI.IsVarArg;
const Value *Callee = CLI.Callee;
- const char *SymName = CLI.SymName;
+ MCSymbol *Symbol = CLI.Symbol;
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isCallingConvWin64(CC);
CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
+ unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getStack(LocMemOffset), MachineMemOperand::MOStore,
- ArgVT.getStoreSize(), Alignment);
+ MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
+ MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
if (Flags.isByVal()) {
X86AddressMode SrcAM;
SrcAM.Base.Reg = ArgReg;
GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
- (GV->isDeclaration() || GV->isWeakForLinker()) &&
+ !GV->isStrongDefinitionForLinker() &&
(!Subtarget->getTargetTriple().isMacOSX() ||
Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
}
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
- if (SymName)
- MIB.addExternalSymbol(SymName, OpFlags);
+ if (Symbol)
+ MIB.addSym(Symbol, OpFlags);
else
MIB.addGlobalAddress(GV, 0, OpFlags);
}
return X86SelectSIToFP(I);
case Instruction::IntToPtr: // Deliberate fall-through.
case Instruction::PtrToInt: {
- EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
- EVT DstVT = TLI.getValueType(I->getType());
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
if (DstVT.bitsGT(SrcVT))
return X86SelectZExt(I);
if (DstVT.bitsLT(SrcVT))
updateValueMap(I, Reg);
return true;
}
+ case Instruction::BitCast: {
+ // Select SSE2/AVX bitcasts between 128/256 bit vector types.
+ if (!Subtarget->hasSSE2())
+ return false;
+
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
+
+ if (!SrcVT.isSimple() || !DstVT.isSimple())
+ return false;
+
+ if (!SrcVT.is128BitVector() &&
+ !(Subtarget->hasAVX() && SrcVT.is256BitVector()))
+ return false;
+
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0)
+ return false;
+
+ // No instruction is needed for conversion. Reuse the register used by
+ // the fist operand.
+ updateValueMap(I, Reg);
+ return true;
+ }
}
return false;
TII.get(Opc), ResultReg);
addDirectMem(MIB, AddrReg);
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
- TM.getDataLayout()->getPointerSize(), Align);
+ MachinePointerInfo::getConstantPool(*FuncInfo.MF),
+ MachineMemOperand::MOLoad, DL.getPointerSize(), Align);
MIB->addMemOperand(*FuncInfo.MF, MMO);
return ResultReg;
}
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
if (TM.getRelocationModel() == Reloc::Static &&
- TLI.getPointerTy() == MVT::i64) {
+ TLI.getPointerTy(DL) == MVT::i64) {
// The displacement code could be more than 32 bits away so we need to use
// an instruction with a 64 bit immediate
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
ResultReg)
.addGlobalAddress(GV);
} else {
- unsigned Opc = TLI.getPointerTy() == MVT::i32
- ? (Subtarget->isTarget64BitILP32()
- ? X86::LEA64_32r : X86::LEA32r)
- : X86::LEA64r;
+ unsigned Opc =
+ TLI.getPointerTy(DL) == MVT::i32
+ ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
+ : X86::LEA64r;
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg), AM);
}
}
unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
- EVT CEVT = TLI.getValueType(C->getType(), true);
+ EVT CEVT = TLI.getValueType(DL, C->getType(), true);
// Only handle simple types.
if (!CEVT.isSimple())
X86AddressMode AM;
if (!X86SelectAddress(C, AM))
return 0;
- unsigned Opc = TLI.getPointerTy() == MVT::i32
- ? (Subtarget->isTarget64BitILP32()
- ? X86::LEA64_32r : X86::LEA32r)
- : X86::LEA64r;
- const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ unsigned Opc =
+ TLI.getPointerTy(DL) == MVT::i32
+ ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
+ : X86::LEA64r;
+ const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
unsigned ResultReg = createResultReg(RC);
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg), AM);
SmallVector<MachineOperand, 8> AddrOps;
AM.getFullAddress(AddrOps);
- MachineInstr *Result =
- XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps,
- Size, Alignment, /*AllowCommute=*/true);
+ MachineInstr *Result = XII.foldMemoryOperandImpl(
+ *FuncInfo.MF, MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
+ /*AllowCommute=*/true);
if (!Result)
return false;
+ // The index register could be in the wrong register class. Unfortunately,
+ // foldMemoryOperandImpl could have commuted the instruction so its not enough
+ // to just look at OpNo + the offset to the index reg. We actually need to
+ // scan the instruction to find the index reg and see if its the correct reg
+ // class.
+ unsigned OperandNo = 0;
+ for (MachineInstr::mop_iterator I = Result->operands_begin(),
+ E = Result->operands_end(); I != E; ++I, ++OperandNo) {
+ MachineOperand &MO = *I;
+ if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg)
+ continue;
+ // Found the index reg, now try to rewrite it.
+ unsigned IndexReg = constrainOperandRegClass(Result->getDesc(),
+ MO.getReg(), OperandNo);
+ if (IndexReg == MO.getReg())
+ continue;
+ MO.setReg(IndexReg);
+ }
+
Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
- FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
MI->eraseFromParent();
return true;
}