bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
- bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM);
- bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM);
+ bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
+ bool Aligned = false);
+ bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM,
+ bool Aligned = false);
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
unsigned &ResultReg);
/// and a displacement offset, or a GlobalAddress,
/// i.e. V. Return true if it is possible.
bool
-X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
+X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
+ const X86AddressMode &AM, bool Aligned) {
// Get opcode and regclass of the output for the given store instruction.
unsigned Opc = 0;
switch (VT.getSimpleVT().SimpleTy) {
// Mask out all but lowest bit.
unsigned AndResult = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
- Val = AndResult;
+ TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1);
+ ValReg = AndResult;
}
// FALLTHROUGH, handling i1 as i8.
case MVT::i8: Opc = X86::MOV8mr; break;
(Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
break;
case MVT::v4f32:
- Opc = X86::MOVAPSmr;
+ if (Aligned)
+ Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
break;
case MVT::v2f64:
- Opc = X86::MOVAPDmr;
+ if (Aligned)
+ Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr;
break;
case MVT::v4i32:
case MVT::v2i64:
case MVT::v8i16:
case MVT::v16i8:
- Opc = X86::MOVDQAmr;
+ if (Aligned)
+ Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
break;
}
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DL, TII.get(Opc)), AM).addReg(Val);
+ DL, TII.get(Opc)), AM).addReg(ValReg);
return true;
}
bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
- const X86AddressMode &AM) {
+ const X86AddressMode &AM, bool Aligned) {
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Val))
Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
if (ValReg == 0)
return false;
- return X86FastEmitStore(VT, ValReg, AM);
+ return X86FastEmitStore(VT, ValReg, AM, Aligned);
}
/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
if (S->isAtomic())
return false;
+ unsigned SABIAlignment =
+ TD.getABITypeAlignment(S->getValueOperand()->getType());
+ bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment;
+
MVT VT;
if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
return false;
if (!X86SelectAddress(I->getOperand(1), AM))
return false;
- return X86FastEmitStore(VT, I->getOperand(0), AM);
+ return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned);
}
/// X86SelectRet - Select and emit code to implement ret instructions.
CallingConv::ID CC = F.getCallingConv();
if (CC != CallingConv::C &&
CC != CallingConv::Fast &&
- CC != CallingConv::X86_FastCall)
+ CC != CallingConv::X86_FastCall &&
+ CC != CallingConv::X86_64_SysV)
return false;
- if (Subtarget->isTargetWin64())
+ if (Subtarget->isCallingConvWin64(CC))
return false;
// Don't handle popping bytes on return for now.
// Generate the DIV/IDIV instruction.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
- // Copy output register into result register.
- unsigned ResultReg = createResultReg(TypeEntry.RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(Copy), ResultReg).addReg(OpEntry.DivRemResultReg);
+ // For i8 remainder, we can't reference AH directly, as we'll end
+ // up with bogus copies like %R9B = COPY %AH. Reference AX
+ // instead to prevent AH references in a REX instruction.
+ //
+ // The current assumption of the fast register allocator is that isel
+ // won't generate explicit references to the GPR8_NOREX registers. If
+ // the allocator and/or the backend get enhanced to be more robust in
+ // that regard, this can be, and should be, removed.
+ unsigned ResultReg = 0;
+ if ((I->getOpcode() == Instruction::SRem ||
+ I->getOpcode() == Instruction::URem) &&
+ OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
+ unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
+ unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Copy), SourceSuperReg).addReg(X86::AX);
+
+ // Shift AX right by 8 bits instead of using AH.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SHR16ri),
+ ResultSuperReg).addReg(SourceSuperReg).addImm(8);
+
+ // Now reference the 8-bit subreg of the result.
+ ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
+ /*Kill=*/true, X86::sub_8bit);
+ }
+ // Copy the result out of the physreg if we haven't already.
+ if (!ResultReg) {
+ ResultReg = createResultReg(TypeEntry.RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Copy), ResultReg)
+ .addReg(OpEntry.DivRemResultReg);
+ }
UpdateValueMap(I, ResultReg);
return true;
if (!FuncInfo.CanLowerReturn)
return false;
- if (Subtarget->isTargetWin64())
- return false;
-
const Function *F = FuncInfo.Fn;
if (F->isVarArg())
return false;
CallingConv::ID CC = F->getCallingConv();
if (CC != CallingConv::C)
return false;
-
+
+ if (Subtarget->isCallingConvWin64(CC))
+ return false;
+
if (!Subtarget->is64Bit())
return false;
// Handle only C and fastcc calling conventions for now.
ImmutableCallSite CS(CI);
CallingConv::ID CC = CS.getCallingConv();
+ bool isWin64 = Subtarget->isCallingConvWin64(CC);
if (CC != CallingConv::C && CC != CallingConv::Fast &&
- CC != CallingConv::X86_FastCall)
+ CC != CallingConv::X86_FastCall && CC != CallingConv::X86_64_Win64 &&
+ CC != CallingConv::X86_64_SysV)
return false;
// fastcc with -tailcallopt is intended to provide a guaranteed
// Don't know how to handle Win64 varargs yet. Nothing special needed for
// x86-32. Special handling for x86-64 is implemented.
- if (isVarArg && Subtarget->isTargetWin64())
+ if (isVarArg && isWin64)
return false;
// Fast-isel doesn't know about callee-pop yet.
I->getParent()->getContext());
// Allocate shadow area for Win64
- if (Subtarget->isTargetWin64())
+ if (isWin64)
CCInfo.AllocateStack(32, 8);
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
X86::EBX).addReg(Base);
}
- if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
+ if (Subtarget->is64Bit() && isVarArg && !isWin64) {
// Count the number of XMM registers allocated.
static const uint16_t XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
if (Subtarget->isPICStyleGOT())
MIB.addReg(X86::EBX, RegState::Implicit);
- if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64())
+ if (Subtarget->is64Bit() && isVarArg && !isWin64)
MIB.addReg(X86::AL, RegState::Implicit);
// Add implicit physical register uses to the call.