/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
- /// RegInfo - X86 register info.
- ///
- const X86RegisterInfo *RegInfo;
-
/// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
/// floating point ops.
/// When SSE is available, use it for f32 operations.
Subtarget = &TM.getSubtarget<X86Subtarget>();
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
- RegInfo = static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
}
virtual bool TargetSelectInstruction(const Instruction *I);
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
- bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM);
- bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM);
+ bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
+ bool Aligned = false);
+ bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM,
+ bool Aligned = false);
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
unsigned &ResultReg);
/// and a displacement offset, or a GlobalAddress,
/// i.e. V. Return true if it is possible.
bool
-X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
+X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
+ const X86AddressMode &AM, bool Aligned) {
// Get opcode and regclass of the output for the given store instruction.
unsigned Opc = 0;
switch (VT.getSimpleVT().SimpleTy) {
// Mask out all but lowest bit.
unsigned AndResult = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
- Val = AndResult;
+ TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1);
+ ValReg = AndResult;
}
// FALLTHROUGH, handling i1 as i8.
case MVT::i8: Opc = X86::MOV8mr; break;
(Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
break;
case MVT::v4f32:
- Opc = X86::MOVAPSmr;
+ if (Aligned)
+ Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
break;
case MVT::v2f64:
- Opc = X86::MOVAPDmr;
+ if (Aligned)
+ Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr;
break;
case MVT::v4i32:
case MVT::v2i64:
case MVT::v8i16:
case MVT::v16i8:
- Opc = X86::MOVDQAmr;
+ if (Aligned)
+ Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
break;
}
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DL, TII.get(Opc)), AM).addReg(Val);
+ DL, TII.get(Opc)), AM).addReg(ValReg);
return true;
}
bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
- const X86AddressMode &AM) {
+ const X86AddressMode &AM, bool Aligned) {
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Val))
Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
if (ValReg == 0)
return false;
- return X86FastEmitStore(VT, ValReg, AM);
+ return X86FastEmitStore(VT, ValReg, AM, Aligned);
}
/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
if (S->isAtomic())
return false;
+ unsigned SABIAlignment =
+ TD.getABITypeAlignment(S->getValueOperand()->getType());
+ bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment;
+
MVT VT;
if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
return false;
if (!X86SelectAddress(I->getOperand(1), AM))
return false;
- return X86FastEmitStore(VT, I->getOperand(0), AM);
+ return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned);
}
/// X86SelectRet - Select and emit code to implement ret instructions.
CallingConv::ID CC = F.getCallingConv();
if (CC != CallingConv::C &&
CC != CallingConv::Fast &&
- CC != CallingConv::X86_FastCall)
+ CC != CallingConv::X86_FastCall &&
+ CC != CallingConv::X86_64_SysV)
return false;
- if (Subtarget->isTargetWin64())
+ if (Subtarget->isCallingConvWin64(CC))
return false;
// Don't handle popping bytes on return for now.
}
bool X86FastISel::X86SelectZExt(const Instruction *I) {
- // Handle zero-extension from i1 to i8, which is common.
- if (!I->getOperand(0)->getType()->isIntegerTy(1))
- return false;
-
EVT DstVT = TLI.getValueType(I->getType());
if (!TLI.isTypeLegal(DstVT))
return false;
if (ResultReg == 0)
return false;
- // Set the high bits to zero.
- ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
- if (ResultReg == 0)
- return false;
+ // Handle zero-extension from i1 to i8, which is common.
+ MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()).getSimpleVT();
+ if (SrcVT.SimpleTy == MVT::i1) {
+ // Set the high bits to zero.
+ ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
+ SrcVT = MVT::i8;
+
+ if (ResultReg == 0)
+ return false;
+ }
- if (DstVT != MVT::i8) {
+ if (DstVT == MVT::i64) {
+ // Handle extension to 64-bits via sub-register shenanigans.
+ unsigned MovInst;
+
+ switch (SrcVT.SimpleTy) {
+ case MVT::i8: MovInst = X86::MOVZX32rr8; break;
+ case MVT::i16: MovInst = X86::MOVZX32rr16; break;
+ case MVT::i32: MovInst = X86::MOV32rr; break;
+ default: llvm_unreachable("Unexpected zext to i64 source type");
+ }
+
+ unsigned Result32 = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovInst), Result32)
+ .addReg(ResultReg);
+
+ ResultReg = createResultReg(&X86::GR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::SUBREG_TO_REG),
+ ResultReg)
+ .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
+ } else if (DstVT != MVT::i8) {
ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
ResultReg, /*Kill=*/true);
if (ResultReg == 0)
{ &X86::GR16RegClass, X86::AX, X86::DX, {
{ X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
{ X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
- { X86::DIV16r, X86::MOV16r0, Copy, X86::AX, U }, // UDiv
- { X86::DIV16r, X86::MOV16r0, Copy, X86::DX, U }, // URem
+ { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv
+ { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem
}
}, // i16
{ &X86::GR32RegClass, X86::EAX, X86::EDX, {
{ &X86::GR64RegClass, X86::RAX, X86::RDX, {
{ X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
{ X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
- { X86::DIV64r, X86::MOV64r0, Copy, X86::RAX, U }, // UDiv
- { X86::DIV64r, X86::MOV64r0, Copy, X86::RDX, U }, // URem
+ { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
+ { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
}
}, // i64
};
if (OpEntry.IsOpSigned)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(OpEntry.OpSignExtend));
- else
+ else {
+ unsigned Zero32 = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(OpEntry.OpSignExtend), TypeEntry.HighInReg);
+ TII.get(X86::MOV32r0), Zero32);
+
+ // Copy the zero into the appropriate sub/super/identical physical
+ // register. Unfortunately the operations needed are not uniform enough to
+ // fit neatly into the table above.
+ if (VT.SimpleTy == MVT::i16) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Copy), TypeEntry.HighInReg)
+ .addReg(Zero32, 0, X86::sub_16bit);
+ } else if (VT.SimpleTy == MVT::i32) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Copy), TypeEntry.HighInReg)
+ .addReg(Zero32);
+ } else if (VT.SimpleTy == MVT::i64) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
+ .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
+ }
+ }
}
// Generate the DIV/IDIV instruction.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
- // Copy output register into result register.
- unsigned ResultReg = createResultReg(TypeEntry.RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(Copy), ResultReg).addReg(OpEntry.DivRemResultReg);
+ // For i8 remainder, we can't reference AH directly, as we'll end
+ // up with bogus copies like %R9B = COPY %AH. Reference AX
+ // instead to prevent AH references in a REX instruction.
+ //
+ // The current assumption of the fast register allocator is that isel
+ // won't generate explicit references to the GPR8_NOREX registers. If
+ // the allocator and/or the backend get enhanced to be more robust in
+ // that regard, this can be, and should be, removed.
+ unsigned ResultReg = 0;
+ if ((I->getOpcode() == Instruction::SRem ||
+ I->getOpcode() == Instruction::URem) &&
+ OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
+ unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
+ unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Copy), SourceSuperReg).addReg(X86::AX);
+
+ // Shift AX right by 8 bits instead of using AH.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SHR16ri),
+ ResultSuperReg).addReg(SourceSuperReg).addImm(8);
+
+ // Now reference the 8-bit subreg of the result.
+ ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
+ /*Kill=*/true, X86::sub_8bit);
+ }
+ // Copy the result out of the physreg if we haven't already.
+ if (!ResultReg) {
+ ResultReg = createResultReg(TypeEntry.RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Copy), ResultReg)
+ .addReg(OpEntry.DivRemResultReg);
+ }
UpdateValueMap(I, ResultReg);
return true;
if (!FuncInfo.CanLowerReturn)
return false;
- if (Subtarget->isTargetWin64())
- return false;
-
const Function *F = FuncInfo.Fn;
if (F->isVarArg())
return false;
CallingConv::ID CC = F->getCallingConv();
if (CC != CallingConv::C)
return false;
-
+
+ if (Subtarget->isCallingConvWin64(CC))
+ return false;
+
if (!Subtarget->is64Bit())
return false;
const TargetRegisterClass *RC64 = TLI.getRegClassFor(MVT::i64);
for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I, ++Idx) {
- if (I->use_empty())
- continue;
bool is32Bit = TLI.getValueType(I->getType()) == MVT::i32;
const TargetRegisterClass *RC = is32Bit ? RC32 : RC64;
unsigned SrcReg = is32Bit ? GPR32ArgRegs[Idx] : GPR64ArgRegs[Idx];
// Handle only C and fastcc calling conventions for now.
ImmutableCallSite CS(CI);
CallingConv::ID CC = CS.getCallingConv();
+ bool isWin64 = Subtarget->isCallingConvWin64(CC);
if (CC != CallingConv::C && CC != CallingConv::Fast &&
- CC != CallingConv::X86_FastCall)
+ CC != CallingConv::X86_FastCall && CC != CallingConv::X86_64_Win64 &&
+ CC != CallingConv::X86_64_SysV)
return false;
// fastcc with -tailcallopt is intended to provide a guaranteed
// Don't know how to handle Win64 varargs yet. Nothing special needed for
// x86-32. Special handling for x86-64 is implemented.
- if (isVarArg && Subtarget->isTargetWin64())
+ if (isVarArg && isWin64)
return false;
// Fast-isel doesn't know about callee-pop yet.
I->getParent()->getContext());
// Allocate shadow area for Win64
- if (Subtarget->isTargetWin64())
+ if (isWin64)
CCInfo.AllocateStack(32, 8);
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
} else {
unsigned LocMemOffset = VA.getLocMemOffset();
X86AddressMode AM;
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo*>(
+ getTargetMachine()->getRegisterInfo());
AM.Base.Reg = RegInfo->getStackRegister();
AM.Disp = LocMemOffset;
const Value *ArgVal = ArgVals[VA.getValNo()];
X86::EBX).addReg(Base);
}
- if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
+ if (Subtarget->is64Bit() && isVarArg && !isWin64) {
// Count the number of XMM registers allocated.
static const uint16_t XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
if (Subtarget->isPICStyleGOT())
MIB.addReg(X86::EBX, RegState::Implicit);
- if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64())
+ if (Subtarget->is64Bit() && isVarArg && !isWin64)
MIB.addReg(X86::AL, RegState::Implicit);
// Add implicit physical register uses to the call.