cl::desc("Print instructions that the allocator wants to"
" fuse, but the X86 backend currently can't"),
cl::Hidden);
+ cl::opt<bool>
+ ReMatPICLoad("remat-pic-load",
+ cl::desc("Allow rematerializing pic load"),
+ cl::init(true), cl::Hidden);
}
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::SBB64ri32, X86::SBB64mi32 },
{ X86::SBB64ri8, X86::SBB64mi8 },
{ X86::SBB64rr, X86::SBB64mr },
- { X86::SHL16r1, X86::SHL16m1 },
{ X86::SHL16rCL, X86::SHL16mCL },
{ X86::SHL16ri, X86::SHL16mi },
- { X86::SHL32r1, X86::SHL32m1 },
{ X86::SHL32rCL, X86::SHL32mCL },
{ X86::SHL32ri, X86::SHL32mi },
- { X86::SHL64r1, X86::SHL64m1 },
{ X86::SHL64rCL, X86::SHL64mCL },
{ X86::SHL64ri, X86::SHL64mi },
- { X86::SHL8r1, X86::SHL8m1 },
{ X86::SHL8rCL, X86::SHL8mCL },
{ X86::SHL8ri, X86::SHL8mi },
{ X86::SHLD16rrCL, X86::SHLD16mrCL },
{ X86::TEST16ri, X86::TEST16mi, 1 },
{ X86::TEST32ri, X86::TEST32mi, 1 },
{ X86::TEST64ri32, X86::TEST64mi32, 1 },
- { X86::TEST8ri, X86::TEST8mi, 1 },
- { X86::XCHG16rr, X86::XCHG16mr, 0 },
- { X86::XCHG32rr, X86::XCHG32mr, 0 },
- { X86::XCHG64rr, X86::XCHG64mr, 0 },
- { X86::XCHG8rr, X86::XCHG8mr, 0 }
+ { X86::TEST8ri, X86::TEST8mi, 1 }
};
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
{ X86::PSHUFDri, X86::PSHUFDmi },
{ X86::PSHUFHWri, X86::PSHUFHWmi },
{ X86::PSHUFLWri, X86::PSHUFLWmi },
- { X86::PsMOVZX64rr32, X86::PsMOVZX64rm32 },
{ X86::RCPPSr, X86::RCPPSm },
{ X86::RCPPSr_Int, X86::RCPPSm_Int },
{ X86::RSQRTPSr, X86::RSQRTPSm },
{ X86::TEST8rr, X86::TEST8rm },
// FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
{ X86::UCOMISDrr, X86::UCOMISDrm },
- { X86::UCOMISSrr, X86::UCOMISSrm },
- { X86::XCHG16rr, X86::XCHG16rm },
- { X86::XCHG32rr, X86::XCHG32rm },
- { X86::XCHG64rr, X86::XCHG64rm },
- { X86::XCHG8rr, X86::XCHG8rm }
+ { X86::UCOMISSrr, X86::UCOMISSrm }
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
{ X86::DIVPSrr, X86::DIVPSrm },
{ X86::DIVSDrr, X86::DIVSDrm },
{ X86::DIVSSrr, X86::DIVSSrm },
+ { X86::FsANDNPDrr, X86::FsANDNPDrm },
+ { X86::FsANDNPSrr, X86::FsANDNPSrm },
+ { X86::FsANDPDrr, X86::FsANDPDrm },
+ { X86::FsANDPSrr, X86::FsANDPSrm },
+ { X86::FsORPDrr, X86::FsORPDrm },
+ { X86::FsORPSrr, X86::FsORPSrm },
+ { X86::FsXORPDrr, X86::FsXORPDrm },
+ { X86::FsXORPSrr, X86::FsXORPSrm },
{ X86::HADDPDrr, X86::HADDPDrm },
{ X86::HADDPSrr, X86::HADDPSrm },
{ X86::HSUBPDrr, X86::HSUBPDrm },
bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
unsigned& sourceReg,
unsigned& destReg) const {
- unsigned oc = MI.getOpcode();
- if (oc == X86::MOV8rr || oc == X86::MOV16rr ||
- oc == X86::MOV32rr || oc == X86::MOV64rr ||
- oc == X86::MOV16to16_ || oc == X86::MOV32to32_ ||
- oc == X86::MOV_Fp3232 || oc == X86::MOVSSrr || oc == X86::MOVSDrr ||
- oc == X86::MOV_Fp3264 || oc == X86::MOV_Fp6432 || oc == X86::MOV_Fp6464 ||
- oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr ||
- oc == X86::MOVAPSrr || oc == X86::MOVAPDrr ||
- oc == X86::MOVSS2PSrr || oc == X86::MOVSD2PDrr ||
- oc == X86::MOVPS2SSrr || oc == X86::MOVPD2SDrr ||
- oc == X86::MMX_MOVD64rr || oc == X86::MMX_MOVQ64rr) {
- assert(MI.getNumOperands() >= 2 &&
- MI.getOperand(0).isRegister() &&
- MI.getOperand(1).isRegister() &&
- "invalid register-register move instruction");
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case X86::MOV8rr:
+ case X86::MOV16rr:
+ case X86::MOV32rr:
+ case X86::MOV64rr:
+ case X86::MOV16to16_:
+ case X86::MOV32to32_:
+ case X86::MOVSSrr:
+ case X86::MOVSDrr:
+
+ // FP Stack register class copies
+ case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080:
+ case X86::MOV_Fp3264: case X86::MOV_Fp3280:
+ case X86::MOV_Fp6432: case X86::MOV_Fp8032:
+
+ case X86::FsMOVAPSrr:
+ case X86::FsMOVAPDrr:
+ case X86::MOVAPSrr:
+ case X86::MOVAPDrr:
+ case X86::MOVSS2PSrr:
+ case X86::MOVSD2PDrr:
+ case X86::MOVPS2SSrr:
+ case X86::MOVPD2SDrr:
+ case X86::MMX_MOVD64rr:
+ case X86::MMX_MOVQ64rr:
+ assert(MI.getNumOperands() >= 2 &&
+ MI.getOperand(0).isRegister() &&
+ MI.getOperand(1).isRegister() &&
+ "invalid register-register move instruction");
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
}
- return false;
}
unsigned X86InstrInfo::isLoadFromStackSlot(MachineInstr *MI,
// Loads from constant pools are trivially rematerializable.
if (MI->getOperand(1).isReg() && MI->getOperand(2).isImm() &&
MI->getOperand(3).isReg() && MI->getOperand(4).isCPI() &&
- MI->getOperand(1).getReg() == 0 &&
MI->getOperand(2).getImm() == 1 &&
- MI->getOperand(3).getReg() == 0)
- return true;
-
- // If this is a load from a fixed argument slot, we know the value is
- // invariant across the whole function, because we don't redefine argument
- // values.
-#if 0
- // FIXME: This is disabled due to a remat bug. rdar://5671644
- MachineFunction *MF = MI->getParent()->getParent();
- if (MI->getOperand(1).isFI() &&
- MF->getFrameInfo()->isFixedObjectIndex(MI->getOperand(1).getIndex()))
- return true;
-#endif
+ MI->getOperand(3).getReg() == 0) {
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == 0)
+ return true;
+ if (!ReMatPICLoad)
+ return false;
+ // Allow re-materialization of PIC load.
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ bool isPICBase = false;
+ for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
+ E = MRI.def_end(); I != E; ++I) {
+ MachineInstr *DefMI = I.getOperand().getParent();
+ if (DefMI->getOpcode() != X86::MOVPC32r)
+ return false;
+ assert(!isPICBase && "More than one PIC base?");
+ isPICBase = true;
+ }
+ return isPICBase;
+ }
return false;
}
return true;
}
-/// isReallySideEffectFree - If the M_MAY_HAVE_SIDE_EFFECTS flag is set, this
-/// method is called to determine if the specific instance of this instruction
-/// has side effects. This is useful in cases of instructions, like loads, which
-/// generally always have side effects. A load from a constant pool doesn't have
-/// side effects, though. So we need to differentiate it from the general case.
-bool X86InstrInfo::isReallySideEffectFree(MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- default: break;
- case X86::MOV32rm:
- // Loads from stubs of global addresses are side effect free.
- if (MI->getOperand(1).isReg() &&
- MI->getOperand(2).isImm() && MI->getOperand(3).isReg() &&
- MI->getOperand(4).isGlobal() &&
- TM.getSubtarget<X86Subtarget>().GVRequiresExtraLoad
- (MI->getOperand(4).getGlobal(), TM, false) &&
- MI->getOperand(2).getImm() == 1 &&
- MI->getOperand(3).getReg() == 0)
- return true;
- // FALLTHROUGH
- case X86::MOV8rm:
- case X86::MOV16rm:
- case X86::MOV16_rm:
- case X86::MOV32_rm:
- case X86::MOV64rm:
- case X86::LD_Fp64m:
- case X86::MOVSSrm:
- case X86::MOVSDrm:
- case X86::MOVAPSrm:
- case X86::MOVAPDrm:
- case X86::MMX_MOVD64rm:
- case X86::MMX_MOVQ64rm:
- // Loads from constant pools are trivially rematerializable.
- if (MI->getOperand(1).isReg() && MI->getOperand(2).isImm() &&
- MI->getOperand(3).isReg() && MI->getOperand(4).isCPI() &&
- MI->getOperand(1).getReg() == 0 &&
- MI->getOperand(2).getImm() == 1 &&
- MI->getOperand(3).getReg() == 0)
- return true;
-
- // If this is a load from a fixed argument slot, we know the value is
- // invariant across the whole function, because we don't redefine argument
- // values.
- MachineFunction *MF = MI->getParent()->getParent();
- if (MI->getOperand(1).isFI() &&
- MF->getFrameInfo()->isFixedObjectIndex(MI->getOperand(1).getIndex()))
+/// isInvariantLoad - Return true if the specified instruction (which is marked
+/// mayLoad) is loading from a location whose value is invariant across the
+/// function. For example, loading a value from the constant pool or from
+/// from the argument area of a function if it does not change. This should
+/// only return true of *all* loads the instruction does are invariant (if it
+/// does multiple loads).
+bool X86InstrInfo::isInvariantLoad(MachineInstr *MI) const {
+ // This code cares about loads from three cases: constant pool entries,
+ // invariant argument slots, and global stubs. In order to handle these cases
+ // for all of the myriad of X86 instructions, we just scan for a CP/FI/GV
+ // operand and base our analysis on it. This is safe because the address of
+ // none of these three cases is ever used as anything other than a load base
+ // and X86 doesn't have any instructions that load from multiple places.
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Loads from constant pools are trivially invariant.
+ if (MO.isCPI())
return true;
-
- return false;
- }
+
+ if (MO.isGlobal()) {
+ if (TM.getSubtarget<X86Subtarget>().GVRequiresExtraLoad(MO.getGlobal(),
+ TM, false))
+ return true;
+ return false;
+ }
- // All other instances of these instructions are presumed to have side
- // effects.
+ // If this is a load from an invariant stack slot, the load is a constant.
+ if (MO.isFI()) {
+ const MachineFrameInfo &MFI =
+ *MI->getParent()->getParent()->getFrameInfo();
+ int Idx = MO.getIndex();
+ return MFI.isFixedObjectIndex(Idx) && MFI.isImmutableObjectIndex(Idx);
+ }
+ }
+
+ // All other instances of these instructions are presumed to have other
+ // issues.
return false;
}
unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
- MachineInstr *Ins =
- BuildMI(get(X86::INSERT_SUBREG), leaInReg).addReg(Src).addImm(2);
- Ins->copyKillDeadInfo(MI);
+ // Build and insert into an implicit UNDEF value. This is OK because
+ // well be shifting and then extracting the lower 16-bits.
+ MachineInstr *Undef = BuildMI(get(X86::IMPLICIT_DEF), leaInReg);
+
+ MachineInstr *Ins =
+ BuildMI(get(X86::INSERT_SUBREG),leaInReg)
+ .addReg(leaInReg).addReg(Src).addImm(X86::SUBREG_16BIT);
NewMI = BuildMI(get(Opc), leaOutReg)
.addReg(0).addImm(1 << ShAmt).addReg(leaInReg).addImm(0);
MachineInstr *Ext =
- BuildMI(get(X86::EXTRACT_SUBREG), Dest).addReg(leaOutReg).addImm(2);
+ BuildMI(get(X86::EXTRACT_SUBREG), Dest)
+ .addReg(leaOutReg).addImm(X86::SUBREG_16BIT);
Ext->copyKillDeadInfo(MI);
+ MFI->insert(MBBI, Undef);
MFI->insert(MBBI, Ins); // Insert the insert_subreg
LV.instructionChanged(MI, NewMI); // Update live variables
LV.addVirtualRegisterKilled(leaInReg, NewMI);
}
}
+ if (!NewMI) return 0;
+
NewMI->copyKillDeadInfo(MI);
LV.instructionChanged(MI, NewMI); // Update live variables
MFI->insert(MBBI, NewMI); // Insert the new inst
unsigned C = MI->getOperand(2).getReg();
bool BisKill = MI->getOperand(1).isKill();
bool CisKill = MI->getOperand(2).isKill();
+ // If machine instrs are no longer in two-address forms, update
+ // destination register as well.
+ if (A == B) {
+ // Must be two address instruction!
+ assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
+ "Expecting a two-address instruction!");
+ A = C;
+ CisKill = false;
+ }
return BuildMI(get(Opc), A).addReg(C, false, false, CisKill)
.addReg(B, false, false, BisKill).addImm(Size-Amt);
}
case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break;
}
- MI->setInstrDescriptor(get(Opc));
+ MI->setDesc(get(Opc));
// Fallthrough intended.
}
default:
}
void X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- if (DestRC != SrcRC) {
- // Moving EFLAGS to / from another register requires a push and a pop.
- if (SrcRC == &X86::CCRRegClass) {
- assert(SrcReg == X86::EFLAGS);
- if (DestRC == &X86::GR64RegClass) {
- BuildMI(MBB, MI, get(X86::PUSHFQ));
- BuildMI(MBB, MI, get(X86::POP64r), DestReg);
- return;
- } else if (DestRC == &X86::GR32RegClass) {
- BuildMI(MBB, MI, get(X86::PUSHFD));
- BuildMI(MBB, MI, get(X86::POP32r), DestReg);
- return;
- }
- } else if (DestRC == &X86::CCRRegClass) {
- assert(DestReg == X86::EFLAGS);
- if (SrcRC == &X86::GR64RegClass) {
- BuildMI(MBB, MI, get(X86::PUSH64r)).addReg(SrcReg);
- BuildMI(MBB, MI, get(X86::POPFQ));
- return;
- } else if (SrcRC == &X86::GR32RegClass) {
- BuildMI(MBB, MI, get(X86::PUSH32r)).addReg(SrcReg);
- BuildMI(MBB, MI, get(X86::POPFD));
- return;
- }
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ if (DestRC == SrcRC) {
+ unsigned Opc;
+ if (DestRC == &X86::GR64RegClass) {
+ Opc = X86::MOV64rr;
+ } else if (DestRC == &X86::GR32RegClass) {
+ Opc = X86::MOV32rr;
+ } else if (DestRC == &X86::GR16RegClass) {
+ Opc = X86::MOV16rr;
+ } else if (DestRC == &X86::GR8RegClass) {
+ Opc = X86::MOV8rr;
+ } else if (DestRC == &X86::GR32_RegClass) {
+ Opc = X86::MOV32_rr;
+ } else if (DestRC == &X86::GR16_RegClass) {
+ Opc = X86::MOV16_rr;
+ } else if (DestRC == &X86::RFP32RegClass) {
+ Opc = X86::MOV_Fp3232;
+ } else if (DestRC == &X86::RFP64RegClass || DestRC == &X86::RSTRegClass) {
+ Opc = X86::MOV_Fp6464;
+ } else if (DestRC == &X86::RFP80RegClass) {
+ Opc = X86::MOV_Fp8080;
+ } else if (DestRC == &X86::FR32RegClass) {
+ Opc = X86::FsMOVAPSrr;
+ } else if (DestRC == &X86::FR64RegClass) {
+ Opc = X86::FsMOVAPDrr;
+ } else if (DestRC == &X86::VR128RegClass) {
+ Opc = X86::MOVAPSrr;
+ } else if (DestRC == &X86::VR64RegClass) {
+ Opc = X86::MMX_MOVQ64rr;
+ } else {
+ assert(0 && "Unknown regclass");
+ abort();
}
- cerr << "Not yet supported!";
- abort();
+ BuildMI(MBB, MI, get(Opc), DestReg).addReg(SrcReg);
+ return;
+ }
+
+ // Moving EFLAGS to / from another register requires a push and a pop.
+ if (SrcRC == &X86::CCRRegClass) {
+ assert(SrcReg == X86::EFLAGS);
+ if (DestRC == &X86::GR64RegClass) {
+ BuildMI(MBB, MI, get(X86::PUSHFQ));
+ BuildMI(MBB, MI, get(X86::POP64r), DestReg);
+ return;
+ } else if (DestRC == &X86::GR32RegClass) {
+ BuildMI(MBB, MI, get(X86::PUSHFD));
+ BuildMI(MBB, MI, get(X86::POP32r), DestReg);
+ return;
+ }
+ } else if (DestRC == &X86::CCRRegClass) {
+ assert(DestReg == X86::EFLAGS);
+ if (SrcRC == &X86::GR64RegClass) {
+ BuildMI(MBB, MI, get(X86::PUSH64r)).addReg(SrcReg);
+ BuildMI(MBB, MI, get(X86::POPFQ));
+ return;
+ } else if (SrcRC == &X86::GR32RegClass) {
+ BuildMI(MBB, MI, get(X86::PUSH32r)).addReg(SrcReg);
+ BuildMI(MBB, MI, get(X86::POPFD));
+ return;
+ }
+ }
+
+ // Moving from ST(0) turns into FpGET_ST0_32 etc.
+ if (SrcRC == &X86::RSTRegClass) {
+ // Copying from ST(0)/ST(1).
+ assert((SrcReg == X86::ST0 || SrcReg == X86::ST1) &&
+ "Can only copy from ST(0)/ST(1) right now");
+ bool isST0 = SrcReg == X86::ST0;
+ unsigned Opc;
+ if (DestRC == &X86::RFP32RegClass)
+ Opc = isST0 ? X86::FpGET_ST0_32 : X86::FpGET_ST1_32;
+ else if (DestRC == &X86::RFP64RegClass)
+ Opc = isST0 ? X86::FpGET_ST0_64 : X86::FpGET_ST1_64;
+ else {
+ assert(DestRC == &X86::RFP80RegClass);
+ Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80;
+ }
+ BuildMI(MBB, MI, get(Opc), DestReg);
+ return;
}
- unsigned Opc;
- if (DestRC == &X86::GR64RegClass) {
- Opc = X86::MOV64rr;
- } else if (DestRC == &X86::GR32RegClass) {
- Opc = X86::MOV32rr;
- } else if (DestRC == &X86::GR16RegClass) {
- Opc = X86::MOV16rr;
- } else if (DestRC == &X86::GR8RegClass) {
- Opc = X86::MOV8rr;
- } else if (DestRC == &X86::GR32_RegClass) {
- Opc = X86::MOV32_rr;
- } else if (DestRC == &X86::GR16_RegClass) {
- Opc = X86::MOV16_rr;
- } else if (DestRC == &X86::RFP32RegClass) {
- Opc = X86::MOV_Fp3232;
- } else if (DestRC == &X86::RFP64RegClass || DestRC == &X86::RSTRegClass) {
- Opc = X86::MOV_Fp6464;
- } else if (DestRC == &X86::RFP80RegClass) {
- Opc = X86::MOV_Fp8080;
- } else if (DestRC == &X86::FR32RegClass) {
- Opc = X86::FsMOVAPSrr;
- } else if (DestRC == &X86::FR64RegClass) {
- Opc = X86::FsMOVAPDrr;
- } else if (DestRC == &X86::VR128RegClass) {
- Opc = X86::MOVAPSrr;
- } else if (DestRC == &X86::VR64RegClass) {
- Opc = X86::MMX_MOVQ64rr;
- } else {
- assert(0 && "Unknown regclass");
- abort();
+ // Moving to ST(0) turns into FpSET_ST0_32 etc.
+ if (DestRC == &X86::RSTRegClass) {
+ // Copying to ST(0). FIXME: handle ST(1) also
+ assert(DestReg == X86::ST0 && "Can only copy to TOS right now");
+ unsigned Opc;
+ if (SrcRC == &X86::RFP32RegClass)
+ Opc = X86::FpSET_ST0_32;
+ else if (SrcRC == &X86::RFP64RegClass)
+ Opc = X86::FpSET_ST0_64;
+ else {
+ assert(SrcRC == &X86::RFP80RegClass);
+ Opc = X86::FpSET_ST0_80;
+ }
+ BuildMI(MBB, MI, get(Opc)).addReg(SrcReg);
+ return;
}
- BuildMI(MBB, MI, get(Opc), DestReg).addReg(SrcReg);
+
+ assert(0 && "Not yet supported!");
+ abort();
}
static unsigned getStoreRegOpcode(const TargetRegisterClass *RC,
MachineInstr*
X86InstrInfo::foldMemoryOperand(MachineInstr *MI, unsigned i,
- SmallVector<MachineOperand,4> &MOs) const {
+ SmallVector<MachineOperand,4> &MOs) const {
const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL;
bool isTwoAddrFold = false;
unsigned NumOps = MI->getDesc().getNumOperands();
// No fusion
if (PrintFailedFusing)
- cerr << "We failed to fuse ("
- << ((i == 1) ? "r" : "s") << "): " << *MI;
+ cerr << "We failed to fuse operand " << i << *MI;
return NULL;
}
-MachineInstr* X86InstrInfo::foldMemoryOperand(MachineInstr *MI,
+MachineInstr* X86InstrInfo::foldMemoryOperand(MachineFunction &MF,
+ MachineInstr *MI,
SmallVectorImpl<unsigned> &Ops,
int FrameIndex) const {
// Check switch flag
if (NoFusing) return NULL;
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
+ // FIXME: Move alignment requirement into tables?
+ if (Alignment < 16) {
+ switch (MI->getOpcode()) {
+ default: break;
+ // Not always safe to fold movsd into these instructions since their load
+ // folding variants expects the address to be 16 byte aligned.
+ case X86::FsANDNPDrr:
+ case X86::FsANDNPSrr:
+ case X86::FsANDPDrr:
+ case X86::FsANDPSrr:
+ case X86::FsORPDrr:
+ case X86::FsORPSrr:
+ case X86::FsXORPDrr:
+ case X86::FsXORPSrr:
+ return NULL;
+ }
+ }
+
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
switch (MI->getOpcode()) {
case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
}
// Change to CMPXXri r, 0 first.
- MI->setInstrDescriptor(get(NewOpc));
+ MI->setDesc(get(NewOpc));
MI->getOperand(1).ChangeToImmediate(0);
} else if (Ops.size() != 1)
return NULL;
return foldMemoryOperand(MI, Ops[0], MOs);
}
-MachineInstr* X86InstrInfo::foldMemoryOperand(MachineInstr *MI,
- SmallVectorImpl<unsigned> &Ops,
- MachineInstr *LoadMI) const {
+MachineInstr* X86InstrInfo::foldMemoryOperand(MachineFunction &MF,
+ MachineInstr *MI,
+ SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const {
// Check switch flag
if (NoFusing) return NULL;
+ unsigned Alignment = 0;
+ for (unsigned i = 0, e = LoadMI->getNumMemOperands(); i != e; ++i) {
+ const MemOperand &MRO = LoadMI->getMemOperand(i);
+ unsigned Align = MRO.getAlignment();
+ if (Align > Alignment)
+ Alignment = Align;
+ }
+
+ // FIXME: Move alignment requirement into tables?
+ if (Alignment < 16) {
+ switch (MI->getOpcode()) {
+ default: break;
+ // Not always safe to fold movsd into these instructions since their load
+ // folding variants expects the address to be 16 byte aligned.
+ case X86::FsANDNPDrr:
+ case X86::FsANDNPSrr:
+ case X86::FsANDPDrr:
+ case X86::FsANDPSrr:
+ case X86::FsORPDrr:
+ case X86::FsORPSrr:
+ case X86::FsXORPDrr:
+ case X86::FsXORPSrr:
+ return NULL;
+ }
+ }
+
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
switch (MI->getOpcode()) {
case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
}
// Change to CMPXXri r, 0 first.
- MI->setInstrDescriptor(get(NewOpc));
+ MI->setDesc(get(NewOpc));
MI->getOperand(1).ChangeToImmediate(0);
} else if (Ops.size() != 1)
return NULL;
bool X86InstrInfo::canFoldMemoryOperand(MachineInstr *MI,
- SmallVectorImpl<unsigned> &Ops) const {
+ SmallVectorImpl<unsigned> &Ops) const {
// Check switch flag
if (NoFusing) return 0;
case X86::CMP16ri: NewOpc = X86::TEST16rr; break;
case X86::CMP8ri: NewOpc = X86::TEST8rr; break;
}
- DataMI->setInstrDescriptor(get(NewOpc));
+ DataMI->setDesc(get(NewOpc));
MO1.ChangeToRegister(MO0.getReg(), false);
}
}