#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/MC/MCAsmInfo.h"
+
+#include <limits>
+
using namespace llvm;
static cl::opt<bool>
}
}
-unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
+bool
+X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+ switch (MI.getOpcode()) {
+ default: break;
+ case X86::MOVSX16rr8:
+ case X86::MOVZX16rr8:
+ case X86::MOVSX32rr8:
+ case X86::MOVZX32rr8:
+ case X86::MOVSX64rr8:
+ case X86::MOVZX64rr8:
+ if (!TM.getSubtarget<X86Subtarget>().is64Bit())
+ // It's not always legal to reference the low 8-bit of the larger
+ // register in 32-bit mode.
+ return false;
+ case X86::MOVSX32rr16:
+ case X86::MOVZX32rr16:
+ case X86::MOVSX64rr16:
+ case X86::MOVZX64rr16:
+ case X86::MOVSX64rr32:
+ case X86::MOVZX64rr32: {
+ if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
+ // Be conservative.
+ return false;
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable(0);
+ break;
+ case X86::MOVSX16rr8:
+ case X86::MOVZX16rr8:
+ case X86::MOVSX32rr8:
+ case X86::MOVZX32rr8:
+ case X86::MOVSX64rr8:
+ case X86::MOVZX64rr8:
+ SubIdx = 1;
+ break;
+ case X86::MOVSX32rr16:
+ case X86::MOVZX32rr16:
+ case X86::MOVSX64rr16:
+ case X86::MOVZX64rr16:
+ SubIdx = 3;
+ break;
+ case X86::MOVSX64rr32:
+ case X86::MOVZX64rr32:
+ SubIdx = 4;
+ break;
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+/// isFrameOperand - Return true and the FrameIndex if the specified
+/// operand and follow operands form a reference to the stack frame.
+bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
+ int &FrameIndex) const {
+ if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() &&
+ MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() &&
+ MI->getOperand(Op+1).getImm() == 1 &&
+ MI->getOperand(Op+2).getReg() == 0 &&
+ MI->getOperand(Op+3).getImm() == 0) {
+ FrameIndex = MI->getOperand(Op).getIndex();
+ return true;
+ }
+ return false;
+}
+
+static bool isFrameLoadOpcode(int Opcode) {
+ switch (Opcode) {
default: break;
case X86::MOV8rm:
case X86::MOV16rm:
case X86::MOVDQArm:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
- if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() &&
- MI->getOperand(3).isReg() && MI->getOperand(4).isImm() &&
- MI->getOperand(2).getImm() == 1 &&
- MI->getOperand(3).getReg() == 0 &&
- MI->getOperand(4).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
+ return true;
break;
}
- return 0;
+ return false;
}
-unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
+static bool isFrameStoreOpcode(int Opcode) {
+ switch (Opcode) {
default: break;
case X86::MOV8mr:
case X86::MOV16mr:
case X86::MMX_MOVD64mr:
case X86::MMX_MOVQ64mr:
case X86::MMX_MOVNTQmr:
- if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() &&
- MI->getOperand(2).isReg() && MI->getOperand(3).isImm() &&
- MI->getOperand(1).getImm() == 1 &&
- MI->getOperand(2).getReg() == 0 &&
- MI->getOperand(3).getImm() == 0) {
- FrameIndex = MI->getOperand(0).getIndex();
+ return true;
+ }
+ return false;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameLoadOpcode(MI->getOpcode()))
+ if (isFrameOperand(MI, 1, FrameIndex))
+ return MI->getOperand(0).getReg();
+ return 0;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameLoadOpcode(MI->getOpcode())) {
+ unsigned Reg;
+ if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
+ return Reg;
+ // Check for post-frame index elimination operations
+ const MachineMemOperand *Dummy;
+ return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+ }
+ return 0;
+}
+
+bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end();
+ o != oe;
+ ++o) {
+ if ((*o)->isLoad() && (*o)->getValue())
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ FrameIndex = Value->getFrameIndex();
+ MMO = *o;
+ return true;
+ }
+ }
+ return false;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameStoreOpcode(MI->getOpcode()))
+ if (isFrameOperand(MI, 0, FrameIndex))
return MI->getOperand(X86AddrNumOperands).getReg();
- }
- break;
+ return 0;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameStoreOpcode(MI->getOpcode())) {
+ unsigned Reg;
+ if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
+ return Reg;
+ // Check for post-frame index elimination operations
+ const MachineMemOperand *Dummy;
+ return hasStoreToStackSlot(MI, Dummy, FrameIndex);
}
return 0;
}
+bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end();
+ o != oe;
+ ++o) {
+ if ((*o)->isStore() && (*o)->getValue())
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ FrameIndex = Value->getFrameIndex();
+ MMO = *o;
+ return true;
+ }
+ }
+ return false;
+}
+
/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
/// X86::MOVPC32r.
static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
case X86::MOVSSrm:
case X86::MOVSDrm:
case X86::MOVAPSrm:
+ case X86::MOVUPSrm:
+ case X86::MOVUPSrm_Int:
case X86::MOVAPDrm:
case X86::MOVDQArm:
case X86::MMX_MOVD64rm:
- case X86::MMX_MOVQ64rm: {
+ case X86::MMX_MOVQ64rm:
+ case X86::FsMOVAPSrm:
+ case X86::FsMOVAPDrm: {
// Loads from constant pools are trivially rematerializable.
if (MI->getOperand(1).isReg() &&
MI->getOperand(2).isImm() &&
/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
/// would clobber the EFLAGS condition register. Note the result may be
/// conservative. If it cannot definitely determine the safety after visiting
-/// two instructions it assumes it's not safe.
+/// a few instructions in each direction it assumes it's not safe.
static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
// It's always safe to clobber EFLAGS at the end of a block.
return true;
// For compile time consideration, if we are not able to determine the
- // safety after visiting 2 instructions, we will assume it's not safe.
- for (unsigned i = 0; i < 2; ++i) {
+ // safety after visiting 4 instructions in each direction, we will assume
+ // it's not safe.
+ MachineBasicBlock::iterator Iter = I;
+ for (unsigned i = 0; i < 4; ++i) {
bool SeenDef = false;
- for (unsigned j = 0, e = I->getNumOperands(); j != e; ++j) {
- MachineOperand &MO = I->getOperand(j);
+ for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
+ MachineOperand &MO = Iter->getOperand(j);
if (!MO.isReg())
continue;
if (MO.getReg() == X86::EFLAGS) {
if (SeenDef)
// This instruction defines EFLAGS, no need to look any further.
return true;
- ++I;
+ ++Iter;
// If we make it to the end of the block, it's safe to clobber EFLAGS.
- if (I == MBB.end())
+ if (Iter == MBB.end())
+ return true;
+ }
+
+ Iter = I;
+ for (unsigned i = 0; i < 4; ++i) {
+ // If we make it to the beginning of the block, it's safe to clobber
+ // EFLAGS iff EFLAGS is not live-in.
+ if (Iter == MBB.begin())
+ return !MBB.isLiveIn(X86::EFLAGS);
+
+ --Iter;
+ bool SawKill = false;
+ for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
+ MachineOperand &MO = Iter->getOperand(j);
+ if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
+ if (MO.isDef()) return MO.isDead();
+ if (MO.isKill()) SawKill = true;
+ }
+ }
+
+ if (SawKill)
+ // This instruction kills EFLAGS and doesn't redefine it, so
+ // there's no need to look further.
return true;
}
void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx,
- const MachineInstr *Orig) const {
+ const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL = DebugLoc::getUnknownLoc();
if (I != MBB.end()) DL = I->getDebugLoc();
if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
- DestReg = RI.getSubReg(DestReg, SubIdx);
+ DestReg = TRI->getSubReg(DestReg, SubIdx);
SubIdx = 0;
}
default: break;
case X86::MOV8r0:
case X86::MOV16r0:
- case X86::MOV32r0: {
+ case X86::MOV32r0:
+ case X86::MOV64r0: {
if (!isSafeToClobberEFLAGS(MBB, I)) {
switch (Opc) {
default: break;
case X86::MOV8r0: Opc = X86::MOV8ri; break;
case X86::MOV16r0: Opc = X86::MOV16ri; break;
case X86::MOV32r0: Opc = X86::MOV32ri; break;
+ case X86::MOV64r0: Opc = X86::MOV64ri; break;
}
Clone = false;
}
return false;
}
+/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when
+/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting
+/// to a 32-bit superregister and then truncating back down to a 16-bit
+/// subregister.
+MachineInstr *
+X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
+ MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ MachineInstr *MI = MBBI;
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ bool isKill = MI->getOperand(1).isKill();
+
+ unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::LEA64_32r : X86::LEA32r;
+ MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
+ unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+
+ // Build and insert into an implicit UNDEF value. This is OK because
+ // well be shifting and then extracting the lower 16-bits.
+ // This has the potential to cause partial register stall. e.g.
+ // movw (%rbp,%rcx,2), %dx
+ // leal -65(%rdx), %esi
+ // But testing has shown this *does* help performance in 64-bit mode (at
+ // least on modern x86 machines).
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
+ MachineInstr *InsMI =
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
+ .addReg(leaInReg)
+ .addReg(Src, getKillRegState(isKill))
+ .addImm(X86::SUBREG_16BIT);
+
+ MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
+ get(Opc), leaOutReg);
+ switch (MIOpc) {
+ default:
+ llvm_unreachable(0);
+ break;
+ case X86::SHL16ri: {
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ MIB.addReg(0).addImm(1 << ShAmt)
+ .addReg(leaInReg, RegState::Kill).addImm(0);
+ break;
+ }
+ case X86::INC16r:
+ case X86::INC64_16r:
+ addLeaRegOffset(MIB, leaInReg, true, 1);
+ break;
+ case X86::DEC16r:
+ case X86::DEC64_16r:
+ addLeaRegOffset(MIB, leaInReg, true, -1);
+ break;
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
+ break;
+ case X86::ADD16rr: {
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+ unsigned leaInReg2 = 0;
+ MachineInstr *InsMI2 = 0;
+ if (Src == Src2) {
+ // ADD16rr %reg1028<kill>, %reg1028
+ // just a single insert_subreg.
+ addRegReg(MIB, leaInReg, true, leaInReg, false);
+ } else {
+ leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ // Build and insert into an implicit UNDEF value. This is OK because
+ // well be shifting and then extracting the lower 16-bits.
+ BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+ InsMI2 =
+ BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2)
+ .addReg(leaInReg2)
+ .addReg(Src2, getKillRegState(isKill2))
+ .addImm(X86::SUBREG_16BIT);
+ addRegReg(MIB, leaInReg, true, leaInReg2, true);
+ }
+ if (LV && isKill2 && InsMI2)
+ LV->replaceKillInstruction(Src2, MI, InsMI2);
+ break;
+ }
+ }
+
+ MachineInstr *NewMI = MIB;
+ MachineInstr *ExtMI =
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(leaOutReg, RegState::Kill)
+ .addImm(X86::SUBREG_16BIT);
+
+ if (LV) {
+ // Update live variables
+ LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
+ LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
+ if (isKill)
+ LV->replaceKillInstruction(Src, MI, InsMI);
+ if (isDead)
+ LV->replaceKillInstruction(Dest, MI, ExtMI);
+ }
+
+ return ExtMI;
+}
+
/// convertToThreeAddress - This method must be implemented by targets that
/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
/// may be able to convert a two-address instruction into a true
MachineInstr *NewMI = NULL;
// FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
// we have better subtarget support, enable the 16-bit LEA generation here.
+ // 16-bit LEA is also slow on Core2.
bool DisableLEA16 = true;
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
unsigned MIOpc = MI->getOpcode();
switch (MIOpc) {
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
- unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() ?
- X86::LEA64_32r : X86::LEA32r;
+ unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
.addReg(0).addImm(1 << ShAmt)
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
- if (DisableLEA16) {
- // If 16-bit LEA is disabled, use 32-bit LEA via subregisters.
- MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
- unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
- ? X86::LEA64_32r : X86::LEA32r;
- unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
- unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
-
- // Build and insert into an implicit UNDEF value. This is OK because
- // well be shifting and then extracting the lower 16-bits.
- BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
- MachineInstr *InsMI =
- BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
- .addReg(leaInReg)
- .addReg(Src, getKillRegState(isKill))
- .addImm(X86::SUBREG_16BIT);
-
- NewMI = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg)
- .addReg(0).addImm(1 << ShAmt)
- .addReg(leaInReg, RegState::Kill)
- .addImm(0);
-
- MachineInstr *ExtMI =
- BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
- .addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(leaOutReg, RegState::Kill)
- .addImm(X86::SUBREG_16BIT);
-
- if (LV) {
- // Update live variables
- LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
- LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
- if (isKill)
- LV->replaceKillInstruction(Src, MI, InsMI);
- if (isDead)
- LV->replaceKillInstruction(Dest, MI, ExtMI);
- }
- return ExtMI;
- } else {
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(0).addImm(1 << ShAmt)
- .addReg(Src, getKillRegState(isKill))
- .addImm(0);
- }
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(Src, getKillRegState(isKill))
+ .addImm(0);
break;
}
default: {
if (hasLiveCondCodeDef(MI))
return 0;
- bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
switch (MIOpc) {
default: return 0;
case X86::INC64r:
}
case X86::INC16r:
case X86::INC64_16r:
- if (DisableLEA16) return 0;
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addReg(Dest, RegState::Define |
}
case X86::DEC16r:
case X86::DEC64_16r:
- if (DisableLEA16) return 0;
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addReg(Dest, RegState::Define |
break;
}
case X86::ADD16rr: {
- if (DisableLEA16) return 0;
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill();
case X86::ADD64ri32:
case X86::ADD64ri8:
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- if (MI->getOperand(2).isImm())
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, MI->getOperand(2).getImm());
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
break;
case X86::ADD32ri:
- case X86::ADD32ri8:
+ case X86::ADD32ri8: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- if (MI->getOperand(2).isImm()) {
- unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
+ unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
Src, isKill, MI->getOperand(2).getImm());
- }
break;
+ }
case X86::ADD16ri:
case X86::ADD16ri8:
- if (DisableLEA16) return 0;
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- if (MI->getOperand(2).isImm())
- NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, MI->getOperand(2).getImm());
- break;
- case X86::SHL16ri:
- if (DisableLEA16) return 0;
- case X86::SHL32ri:
- case X86::SHL64ri: {
- assert(MI->getNumOperands() >= 3 && MI->getOperand(2).isImm() &&
- "Unknown shl instruction!");
- unsigned ShAmt = MI->getOperand(2).getImm();
- if (ShAmt == 1 || ShAmt == 2 || ShAmt == 3) {
- X86AddressMode AM;
- AM.Scale = 1 << ShAmt;
- AM.IndexReg = Src;
- unsigned Opc = MIOpc == X86::SHL64ri ? X86::LEA64r
- : (MIOpc == X86::SHL32ri
- ? (is64Bit ? X86::LEA64_32r : X86::LEA32r) : X86::LEA16r);
- NewMI = addFullAddress(BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)), AM);
- if (isKill)
- NewMI->getOperand(3).setIsKill(true);
- }
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
break;
}
- }
}
}
MachineBasicBlock::iterator I = MBB.end();
while (I != MBB.begin()) {
--I;
- // Working from the bottom, when we see a non-terminator
- // instruction, we're done.
+
+ // Working from the bottom, when we see a non-terminator instruction, we're
+ // done.
if (!isBrAnalysisUnpredicatedTerminator(I, *this))
break;
- // A terminator that isn't a branch can't easily be handled
- // by this analysis.
+
+ // A terminator that isn't a branch can't easily be handled by this
+ // analysis.
if (!I->getDesc().isBranch())
return true;
+
// Handle unconditional branches.
if (I->getOpcode() == X86::JMP) {
if (!AllowModify) {
}
// If the block has any instructions after a JMP, delete them.
- while (next(I) != MBB.end())
- next(I)->eraseFromParent();
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
+
Cond.clear();
FBB = 0;
+
// Delete the JMP if it's equivalent to a fall-through.
if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
TBB = 0;
I = MBB.end();
continue;
}
+
// TBB is used to indicate the unconditinal destination.
TBB = I->getOperand(0).getMBB();
continue;
}
+
// Handle conditional branches.
X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode());
if (BranchCode == X86::COND_INVALID)
return true; // Can't handle indirect branch.
+
// Working from the bottom, handle the first conditional branch.
if (Cond.empty()) {
FBB = TBB;
Cond.push_back(MachineOperand::CreateImm(BranchCode));
continue;
}
- // Handle subsequent conditional branches. Only handle the case
- // where all conditional branches branch to the same destination
- // and their condition opcodes fit one of the special
- // multi-branch idioms.
+
+ // Handle subsequent conditional branches. Only handle the case where all
+ // conditional branches branch to the same destination and their condition
+ // opcodes fit one of the special multi-branch idioms.
assert(Cond.size() == 1);
assert(TBB);
- // Only handle the case where all conditional branches branch to
- // the same destination.
+
+ // Only handle the case where all conditional branches branch to the same
+ // destination.
if (TBB != I->getOperand(0).getMBB())
return true;
- X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
+
// If the conditions are the same, we can leave them alone.
+ X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
if (OldBranchCode == BranchCode)
continue;
- // If they differ, see if they fit one of the known patterns.
- // Theoretically we could handle more patterns here, but
- // we shouldn't expect to see them if instruction selection
- // has done a reasonable job.
+
+ // If they differ, see if they fit one of the known patterns. Theoretically,
+ // we could handle more patterns here, but we shouldn't expect to see them
+ // if instruction selection has done a reasonable job.
if ((OldBranchCode == X86::COND_NP &&
BranchCode == X86::COND_E) ||
(OldBranchCode == X86::COND_E &&
BranchCode = X86::COND_NE_OR_P;
else
return true;
+
// Update the MachineOperand.
Cond[0].setImm(BranchCode);
}
if (SrcReg != X86::EFLAGS)
return false;
if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
- BuildMI(MBB, MI, DL, get(X86::PUSHFQ));
+ BuildMI(MBB, MI, DL, get(X86::PUSHFQ64));
BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
return true;
} else if (DestRC == &X86::GR32RegClass ||
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC) const {
const MachineFunction &MF = *MBB.getParent();
- bool isAligned = (RI.getStackAlignment() >= 16) ||
- RI.needsStackRealignment(MF);
+ bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
DebugLoc DL = DebugLoc::getUnknownLoc();
if (MI != MBB.end()) DL = MI->getDebugLoc();
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- bool isAligned = (RI.getStackAlignment() >= 16) ||
- RI.needsStackRealignment(MF);
+ bool isAligned = (*MMOBegin)->getAlignment() >= 16;
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
DebugLoc DL = DebugLoc::getUnknownLoc();
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC) const{
const MachineFunction &MF = *MBB.getParent();
- bool isAligned = (RI.getStackAlignment() >= 16) ||
- RI.needsStackRealignment(MF);
+ bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
DebugLoc DL = DebugLoc::getUnknownLoc();
if (MI != MBB.end()) DL = MI->getDebugLoc();
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- bool isAligned = (RI.getStackAlignment() >= 16) ||
- RI.needsStackRealignment(MF);
+ bool isAligned = (*MMOBegin)->getAlignment() >= 16;
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
DebugLoc DL = DebugLoc::getUnknownLoc();
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
if (CSI.empty())
return false;
- DebugLoc DL = DebugLoc::getUnknownLoc();
- if (MI != MBB.end()) DL = MI->getDebugLoc();
+ DebugLoc DL = MBB.findDebugLoc(MI);
bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
if (CSI.empty())
return false;
- DebugLoc DL = DebugLoc::getUnknownLoc();
- if (MI != MBB.end()) DL = MI->getDebugLoc();
+ DebugLoc DL = MBB.findDebugLoc(MI);
MachineFunction &MF = *MBB.getParent();
unsigned FPReg = RI.getFrameRegister(MF);
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
isTwoAddrFold = true;
} else if (i == 0) { // If operand 0
- if (MI->getOpcode() == X86::MOV16r0)
- NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
+ if (MI->getOpcode() == X86::MOV64r0)
+ NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
else if (MI->getOpcode() == X86::MOV32r0)
NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
+ else if (MI->getOpcode() == X86::MOV16r0)
+ NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
else if (MI->getOpcode() == X86::MOV8r0)
NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
if (NewMI)
// If table selected...
if (OpcodeTablePtr) {
// Find the Opcode to fuse
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
OpcodeTablePtr->find((unsigned*)MI->getOpcode());
if (I != OpcodeTablePtr->end()) {
unsigned Opcode = I->second.first;
// No fusion
if (PrintFailedFusing)
- errs() << "We failed to fuse operand " << i << " in " << *MI;
+ dbgs() << "We failed to fuse operand " << i << " in " << *MI;
return NULL;
}
// Check switch flag
if (NoFusing) return NULL;
+ if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ switch (MI->getOpcode()) {
+ case X86::CVTSD2SSrr:
+ case X86::Int_CVTSD2SSrr:
+ case X86::CVTSS2SDrr:
+ case X86::Int_CVTSS2SDrr:
+ case X86::RCPSSr:
+ case X86::RCPSSr_Int:
+ case X86::ROUNDSDr_Int:
+ case X86::ROUNDSSr_Int:
+ case X86::RSQRTSSr:
+ case X86::RSQRTSSr_Int:
+ case X86::SQRTSSr:
+ case X86::SQRTSSr_Int:
+ return 0;
+ }
+
const MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Size = MFI->getObjectSize(FrameIndex);
unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
// Check switch flag
if (NoFusing) return NULL;
+ if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ switch (MI->getOpcode()) {
+ case X86::CVTSD2SSrr:
+ case X86::Int_CVTSD2SSrr:
+ case X86::CVTSS2SDrr:
+ case X86::Int_CVTSS2SDrr:
+ case X86::RCPSSr:
+ case X86::RCPSSr_Int:
+ case X86::ROUNDSDr_Int:
+ case X86::ROUNDSSr_Int:
+ case X86::RSQRTSSr:
+ case X86::RSQRTSSr_Int:
+ case X86::SQRTSSr:
+ case X86::SQRTSSr_Int:
+ return 0;
+ }
+
// Determine the alignment of the load.
unsigned Alignment = 0;
if (LoadMI->hasOneMemOperand())
case X86::MOV8r0:
case X86::MOV16r0:
case X86::MOV32r0:
+ case X86::MOV64r0:
return true;
default: break;
}
if (OpcodeTablePtr) {
// Find the Opcode to fuse
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
OpcodeTablePtr->find((unsigned*)Opc);
if (I != OpcodeTablePtr->end())
return true;
bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
MemOp2RegOpTable.find((unsigned*)MI->getOpcode());
if (I == MemOp2RegOpTable.end())
return false;
- DebugLoc dl = MI->getDebugLoc();
unsigned Opc = I->second.first;
unsigned Index = I->second.second & 0xf;
bool FoldedLoad = I->second.second & (1 << 4);
if (!N->isMachineOpcode())
return false;
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode());
if (I == MemOp2RegOpTable.end())
return false;
MachineFunction &MF = DAG.getMachineFunction();
if (FoldedLoad) {
EVT VT = *RC->vt_begin();
- bool isAligned = (RI.getStackAlignment() >= 16) ||
- RI.needsStackRealignment(MF);
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+ cast<MachineSDNode>(N)->memoperands_end());
+ bool isAligned = (*MMOs.first)->getAlignment() >= 16;
Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
VT, MVT::Other, &AddrOps[0], AddrOps.size());
NewNodes.push_back(Load);
// Preserve memory reference information.
- std::pair<MachineInstr::mmo_iterator,
- MachineInstr::mmo_iterator> MMOs =
- MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
- cast<MachineSDNode>(N)->memoperands_end());
cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
}
AddrOps.pop_back();
AddrOps.push_back(SDValue(NewNode, 0));
AddrOps.push_back(Chain);
- bool isAligned = (RI.getStackAlignment() >= 16) ||
- RI.needsStackRealignment(MF);
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+ cast<MachineSDNode>(N)->memoperands_end());
+ bool isAligned = (*MMOs.first)->getAlignment() >= 16;
SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
isAligned, TM),
dl, MVT::Other,
NewNodes.push_back(Store);
// Preserve memory reference information.
- std::pair<MachineInstr::mmo_iterator,
- MachineInstr::mmo_iterator> MMOs =
- MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
- cast<MachineSDNode>(N)->memoperands_end());
cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
}
}
unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
- bool UnfoldLoad, bool UnfoldStore) const {
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex) const {
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
MemOp2RegOpTable.find((unsigned*)Opc);
if (I == MemOp2RegOpTable.end())
return 0;
return 0;
if (UnfoldStore && !FoldedStore)
return 0;
+ if (LoadRegIndex)
+ *LoadRegIndex = I->second.second & 0xf;
return I->second.first;
}
-bool X86InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case X86::TCRETURNri:
- case X86::TCRETURNdi:
- case X86::RET: // Return.
- case X86::RETI:
- case X86::TAILJMPd:
- case X86::TAILJMPr:
- case X86::TAILJMPm:
- case X86::JMP: // Uncond branch.
- case X86::JMP32r: // Indirect branch.
- case X86::JMP64r: // Indirect branch (64-bit).
- case X86::JMP32m: // Indirect branch through mem.
- case X86::JMP64m: // Indirect branch through mem (64-bit).
- return true;
- default: return false;
- }
-}
-
bool X86InstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 1 && "Invalid X86 branch condition!");
break;
case TargetInstrInfo::IMPLICIT_DEF:
case TargetInstrInfo::KILL:
- case X86::DWARF_LOC:
case X86::FP_REG_KILL:
break;
case X86::MOVPC32r: {