move target-independent opcodes out of TargetInstrInfo

[oota-llvm.git] / lib / Target / X86 / X86InstrInfo.cpp
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index 859ad57725db6ff3f4fe719ad89d5a56494995da..8d13c0f48fa607699b2c4ed69b401dc903585d26 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -18,7 +18,6 @@
  #include "X86MachineFunctionInfo.h"
  #include "X86Subtarget.h"
  #include "X86TargetMachine.h"
  #include "X86MachineFunctionInfo.h"
  #include "X86Subtarget.h"
  #include "X86TargetMachine.h"
-#include "llvm/GlobalVariable.h"
  #include "llvm/DerivedTypes.h"
  #include "llvm/LLVMContext.h"
  #include "llvm/ADT/STLExtras.h"
  #include "llvm/DerivedTypes.h"
  #include "llvm/LLVMContext.h"
  #include "llvm/ADT/STLExtras.h"
@@ -27,11 +26,16 @@
  #include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/CodeGen/LiveVariables.h"
  #include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
  #include "llvm/Support/ErrorHandling.h"
  #include "llvm/Support/raw_ostream.h"
  #include "llvm/Target/TargetOptions.h"
  #include "llvm/MC/MCAsmInfo.h"
  #include "llvm/Support/ErrorHandling.h"
  #include "llvm/Support/raw_ostream.h"
  #include "llvm/Target/TargetOptions.h"
  #include "llvm/MC/MCAsmInfo.h"
+
+#include <limits>
+
  using namespace llvm;
  
  static cl::opt<bool>
  using namespace llvm;
  
  static cl::opt<bool>
@@ -398,7 +402,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
      { X86::MOVSX64rr32,     X86::MOVSX64rm32, 0 },
      { X86::MOVSX64rr8,      X86::MOVSX64rm8, 0 },
      { X86::MOVUPDrr,        X86::MOVUPDrm, 16 },
      { X86::MOVSX64rr32,     X86::MOVSX64rm32, 0 },
      { X86::MOVSX64rr8,      X86::MOVSX64rm8, 0 },
      { X86::MOVUPDrr,        X86::MOVUPDrm, 16 },
-    { X86::MOVUPSrr,        X86::MOVUPSrm, 16 },
+    { X86::MOVUPSrr,        X86::MOVUPSrm, 0 },
      { X86::MOVZDI2PDIrr,    X86::MOVZDI2PDIrm, 0 },
      { X86::MOVZQI2PQIrr,    X86::MOVZQI2PQIrm, 0 },
      { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 },
      { X86::MOVZDI2PDIrr,    X86::MOVZDI2PDIrm, 0 },
      { X86::MOVZQI2PQIrr,    X86::MOVZQI2PQIrm, 0 },
      { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 },
@@ -708,9 +712,79 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
    }
  }
  
    }
  }
  
-unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 
-                                           int &FrameIndex) const {
-  switch (MI->getOpcode()) {
+bool
+X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+                                    unsigned &SrcReg, unsigned &DstReg,
+                                    unsigned &SubIdx) const {
+  switch (MI.getOpcode()) {
+  default: break;
+  case X86::MOVSX16rr8:
+  case X86::MOVZX16rr8:
+  case X86::MOVSX32rr8:
+  case X86::MOVZX32rr8:
+  case X86::MOVSX64rr8:
+  case X86::MOVZX64rr8:
+    if (!TM.getSubtarget<X86Subtarget>().is64Bit())
+      // It's not always legal to reference the low 8-bit of the larger
+      // register in 32-bit mode.
+      return false;
+  case X86::MOVSX32rr16:
+  case X86::MOVZX32rr16:
+  case X86::MOVSX64rr16:
+  case X86::MOVZX64rr16:
+  case X86::MOVSX64rr32:
+  case X86::MOVZX64rr32: {
+    if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
+      // Be conservative.
+      return false;
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    switch (MI.getOpcode()) {
+    default:
+      llvm_unreachable(0);
+      break;
+    case X86::MOVSX16rr8:
+    case X86::MOVZX16rr8:
+    case X86::MOVSX32rr8:
+    case X86::MOVZX32rr8:
+    case X86::MOVSX64rr8:
+    case X86::MOVZX64rr8:
+      SubIdx = 1;
+      break;
+    case X86::MOVSX32rr16:
+    case X86::MOVZX32rr16:
+    case X86::MOVSX64rr16:
+    case X86::MOVZX64rr16:
+      SubIdx = 3;
+      break;
+    case X86::MOVSX64rr32:
+    case X86::MOVZX64rr32:
+      SubIdx = 4;
+      break;
+    }
+    return true;
+  }
+  }
+  return false;
+}
+
+/// isFrameOperand - Return true and the FrameIndex if the specified
+/// operand and follow operands form a reference to the stack frame.
+bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
+                                  int &FrameIndex) const {
+  if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() &&
+      MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() &&
+      MI->getOperand(Op+1).getImm() == 1 &&
+      MI->getOperand(Op+2).getReg() == 0 &&
+      MI->getOperand(Op+3).getImm() == 0) {
+    FrameIndex = MI->getOperand(Op).getIndex();
+    return true;
+  }
+  return false;
+}
+
+static bool isFrameLoadOpcode(int Opcode) {
+  switch (Opcode) {
    default: break;
    case X86::MOV8rm:
    case X86::MOV16rm:
    default: break;
    case X86::MOV8rm:
    case X86::MOV16rm:
@@ -724,22 +798,14 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
    case X86::MOVDQArm:
    case X86::MMX_MOVD64rm:
    case X86::MMX_MOVQ64rm:
    case X86::MOVDQArm:
    case X86::MMX_MOVD64rm:
    case X86::MMX_MOVQ64rm:
-    if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() &&
-        MI->getOperand(3).isReg() && MI->getOperand(4).isImm() &&
-        MI->getOperand(2).getImm() == 1 &&
-        MI->getOperand(3).getReg() == 0 &&
-        MI->getOperand(4).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
+    return true;
      break;
    }
      break;
    }
-  return 0;
+  return false;
  }
  
  }
  
-unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
-                                          int &FrameIndex) const {
-  switch (MI->getOpcode()) {
+static bool isFrameStoreOpcode(int Opcode) {
+  switch (Opcode) {
    default: break;
    case X86::MOV8mr:
    case X86::MOV16mr:
    default: break;
    case X86::MOV8mr:
    case X86::MOV16mr:
@@ -754,19 +820,89 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
    case X86::MMX_MOVD64mr:
    case X86::MMX_MOVQ64mr:
    case X86::MMX_MOVNTQmr:
    case X86::MMX_MOVD64mr:
    case X86::MMX_MOVQ64mr:
    case X86::MMX_MOVNTQmr:
-    if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() &&
-        MI->getOperand(2).isReg() && MI->getOperand(3).isImm() &&
-        MI->getOperand(1).getImm() == 1 &&
-        MI->getOperand(2).getReg() == 0 &&
-        MI->getOperand(3).getImm() == 0) {
-      FrameIndex = MI->getOperand(0).getIndex();
+    return true;
+  }
+  return false;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 
+                                           int &FrameIndex) const {
+  if (isFrameLoadOpcode(MI->getOpcode()))
+    if (isFrameOperand(MI, 1, FrameIndex))
+      return MI->getOperand(0).getReg();
+  return 0;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, 
+                                                 int &FrameIndex) const {
+  if (isFrameLoadOpcode(MI->getOpcode())) {
+    unsigned Reg;
+    if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
+      return Reg;
+    // Check for post-frame index elimination operations
+    const MachineMemOperand *Dummy;
+    return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+  }
+  return 0;
+}
+
+bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+                                        const MachineMemOperand *&MMO,
+                                        int &FrameIndex) const {
+  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+         oe = MI->memoperands_end();
+       o != oe;
+       ++o) {
+    if ((*o)->isLoad() && (*o)->getValue())
+      if (const FixedStackPseudoSourceValue *Value =
+          dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+        FrameIndex = Value->getFrameIndex();
+        MMO = *o;
+        return true;
+      }
+  }
+  return false;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                          int &FrameIndex) const {
+  if (isFrameStoreOpcode(MI->getOpcode()))
+    if (isFrameOperand(MI, 0, FrameIndex))
        return MI->getOperand(X86AddrNumOperands).getReg();
        return MI->getOperand(X86AddrNumOperands).getReg();
-    }
-    break;
+  return 0;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
+                                                int &FrameIndex) const {
+  if (isFrameStoreOpcode(MI->getOpcode())) {
+    unsigned Reg;
+    if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
+      return Reg;
+    // Check for post-frame index elimination operations
+    const MachineMemOperand *Dummy;
+    return hasStoreToStackSlot(MI, Dummy, FrameIndex);
    }
    return 0;
  }
  
    }
    return 0;
  }
  
+bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+                                       const MachineMemOperand *&MMO,
+                                       int &FrameIndex) const {
+  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+         oe = MI->memoperands_end();
+       o != oe;
+       ++o) {
+    if ((*o)->isStore() && (*o)->getValue())
+      if (const FixedStackPseudoSourceValue *Value =
+          dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+        FrameIndex = Value->getFrameIndex();
+        MMO = *o;
+        return true;
+      }
+  }
+  return false;
+}
+
  /// regIsPICBase - Return true if register is PIC base (i.e.g defined by
  /// X86::MOVPC32r.
  static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
  /// regIsPICBase - Return true if register is PIC base (i.e.g defined by
  /// X86::MOVPC32r.
  static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
@@ -782,31 +918,9 @@ static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
    return isPICBase;
  }
  
    return isPICBase;
  }
  
-/// CanRematLoadWithDispOperand - Return true if a load with the specified
-/// operand is a candidate for remat: for this to be true we need to know that
-/// the load will always return the same value, even if moved.
-static bool CanRematLoadWithDispOperand(const MachineOperand &MO,
-                                        X86TargetMachine &TM) {
-  // Loads from constant pool entries can be remat'd.
-  if (MO.isCPI()) return true;
-  
-  // We can remat globals in some cases.
-  if (MO.isGlobal()) {
-    // If this is a load of a stub, not of the global, we can remat it.  This
-    // access will always return the address of the global.
-    if (isGlobalStubReference(MO.getTargetFlags()))
-      return true;
-    
-    // If the global itself is constant, we can remat the load.
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal()))
-      if (GV->isConstant())
-        return true;
-  }
-  return false;
-}
- 
  bool
  bool
-X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
+X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
+                                                AliasAnalysis *AA) const {
    switch (MI->getOpcode()) {
    default: break;
      case X86::MOV8rm:
    switch (MI->getOpcode()) {
    default: break;
      case X86::MOV8rm:
@@ -817,15 +931,19 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
      case X86::MOVSSrm:
      case X86::MOVSDrm:
      case X86::MOVAPSrm:
      case X86::MOVSSrm:
      case X86::MOVSDrm:
      case X86::MOVAPSrm:
+    case X86::MOVUPSrm:
+    case X86::MOVUPSrm_Int:
      case X86::MOVAPDrm:
      case X86::MOVDQArm:
      case X86::MMX_MOVD64rm:
      case X86::MOVAPDrm:
      case X86::MOVDQArm:
      case X86::MMX_MOVD64rm:
-    case X86::MMX_MOVQ64rm: {
+    case X86::MMX_MOVQ64rm:
+    case X86::FsMOVAPSrm:
+    case X86::FsMOVAPDrm: {
        // Loads from constant pools are trivially rematerializable.
        if (MI->getOperand(1).isReg() &&
            MI->getOperand(2).isImm() &&
            MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
        // Loads from constant pools are trivially rematerializable.
        if (MI->getOperand(1).isReg() &&
            MI->getOperand(2).isImm() &&
            MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
-          CanRematLoadWithDispOperand(MI->getOperand(4), TM)) {
+          MI->isInvariantLoad(AA)) {
          unsigned BaseReg = MI->getOperand(1).getReg();
          if (BaseReg == 0 || BaseReg == X86::RIP)
            return true;
          unsigned BaseReg = MI->getOperand(1).getReg();
          if (BaseReg == 0 || BaseReg == X86::RIP)
            return true;
@@ -876,7 +994,7 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
  /// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
  /// would clobber the EFLAGS condition register. Note the result may be
  /// conservative. If it cannot definitely determine the safety after visiting
  /// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
  /// would clobber the EFLAGS condition register. Note the result may be
  /// conservative. If it cannot definitely determine the safety after visiting
-/// two instructions it assumes it's not safe.
+/// a few instructions in each direction it assumes it's not safe.
  static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator I) {
    // It's always safe to clobber EFLAGS at the end of a block.
  static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator I) {
    // It's always safe to clobber EFLAGS at the end of a block.
@@ -884,11 +1002,13 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
      return true;
  
    // For compile time consideration, if we are not able to determine the
      return true;
  
    // For compile time consideration, if we are not able to determine the
-  // safety after visiting 2 instructions, we will assume it's not safe.
-  for (unsigned i = 0; i < 2; ++i) {
+  // safety after visiting 4 instructions in each direction, we will assume
+  // it's not safe.
+  MachineBasicBlock::iterator Iter = I;
+  for (unsigned i = 0; i < 4; ++i) {
      bool SeenDef = false;
      bool SeenDef = false;
-    for (unsigned j = 0, e = I->getNumOperands(); j != e; ++j) {
-      MachineOperand &MO = I->getOperand(j);
+    for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
+      MachineOperand &MO = Iter->getOperand(j);
        if (!MO.isReg())
          continue;
        if (MO.getReg() == X86::EFLAGS) {
        if (!MO.isReg())
          continue;
        if (MO.getReg() == X86::EFLAGS) {
@@ -901,10 +1021,33 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
      if (SeenDef)
        // This instruction defines EFLAGS, no need to look any further.
        return true;
      if (SeenDef)
        // This instruction defines EFLAGS, no need to look any further.
        return true;
-    ++I;
+    ++Iter;
  
      // If we make it to the end of the block, it's safe to clobber EFLAGS.
  
      // If we make it to the end of the block, it's safe to clobber EFLAGS.
-    if (I == MBB.end())
+    if (Iter == MBB.end())
+      return true;
+  }
+
+  Iter = I;
+  for (unsigned i = 0; i < 4; ++i) {
+    // If we make it to the beginning of the block, it's safe to clobber
+    // EFLAGS iff EFLAGS is not live-in.
+    if (Iter == MBB.begin())
+      return !MBB.isLiveIn(X86::EFLAGS);
+
+    --Iter;
+    bool SawKill = false;
+    for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
+      MachineOperand &MO = Iter->getOperand(j);
+      if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
+        if (MO.isDef()) return MO.isDead();
+        if (MO.isKill()) SawKill = true;
+      }
+    }
+
+    if (SawKill)
+      // This instruction kills EFLAGS and doesn't redefine it, so
+      // there's no need to look further.
        return true;
    }
  
        return true;
    }
  
@@ -915,12 +1058,12 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
  void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator I,
                                   unsigned DestReg, unsigned SubIdx,
  void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator I,
                                   unsigned DestReg, unsigned SubIdx,
-                                 const MachineInstr *Orig) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (I != MBB.end()) DL = I->getDebugLoc();
+                                 const MachineInstr *Orig,
+                                 const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = MBB.findDebugLoc(I);
  
    if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
  
    if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
-    DestReg = RI.getSubReg(DestReg, SubIdx);
+    DestReg = TRI->getSubReg(DestReg, SubIdx);
      SubIdx = 0;
    }
  
      SubIdx = 0;
    }
  
@@ -932,13 +1075,15 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
    default: break;
    case X86::MOV8r0:
    case X86::MOV16r0:
    default: break;
    case X86::MOV8r0:
    case X86::MOV16r0:
-  case X86::MOV32r0: {
+  case X86::MOV32r0:
+  case X86::MOV64r0: {
      if (!isSafeToClobberEFLAGS(MBB, I)) {
        switch (Opc) {
        default: break;
        case X86::MOV8r0:  Opc = X86::MOV8ri;  break;
        case X86::MOV16r0: Opc = X86::MOV16ri; break;
        case X86::MOV32r0: Opc = X86::MOV32ri; break;
      if (!isSafeToClobberEFLAGS(MBB, I)) {
        switch (Opc) {
        default: break;
        case X86::MOV8r0:  Opc = X86::MOV8ri;  break;
        case X86::MOV16r0: Opc = X86::MOV16ri; break;
        case X86::MOV32r0: Opc = X86::MOV32ri; break;
+      case X86::MOV64r0: Opc = X86::MOV64ri; break;
        }
        Clone = false;
      }
        }
        Clone = false;
      }
@@ -958,43 +1103,6 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
    NewMI->getOperand(0).setSubReg(SubIdx);
  }
  
    NewMI->getOperand(0).setSubReg(SubIdx);
  }
  
-/// isInvariantLoad - Return true if the specified instruction (which is marked
-/// mayLoad) is loading from a location whose value is invariant across the
-/// function.  For example, loading a value from the constant pool or from
-/// from the argument area of a function if it does not change.  This should
-/// only return true of *all* loads the instruction does are invariant (if it
-/// does multiple loads).
-bool X86InstrInfo::isInvariantLoad(const MachineInstr *MI) const {
-  // This code cares about loads from three cases: constant pool entries,
-  // invariant argument slots, and global stubs.  In order to handle these cases
-  // for all of the myriad of X86 instructions, we just scan for a CP/FI/GV
-  // operand and base our analysis on it.  This is safe because the address of
-  // none of these three cases is ever used as anything other than a load base
-  // and X86 doesn't have any instructions that load from multiple places.
-  
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    // Loads from constant pools are trivially invariant.
-    if (MO.isCPI())
-      return true;
-
-    if (MO.isGlobal())
-      return isGlobalStubReference(MO.getTargetFlags());
-
-    // If this is a load from an invariant stack slot, the load is a constant.
-    if (MO.isFI()) {
-      const MachineFrameInfo &MFI =
-        *MI->getParent()->getParent()->getFrameInfo();
-      int Idx = MO.getIndex();
-      return MFI.isFixedObjectIndex(Idx) && MFI.isImmutableObjectIndex(Idx);
-    }
-  }
-  
-  // All other instances of these instructions are presumed to have other
-  // issues.
-  return false;
-}
-
  /// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that
  /// is not marked dead.
  static bool hasLiveCondCodeDef(MachineInstr *MI) {
  /// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that
  /// is not marked dead.
  static bool hasLiveCondCodeDef(MachineInstr *MI) {
@@ -1008,6 +1116,112 @@ static bool hasLiveCondCodeDef(MachineInstr *MI) {
    return false;
  }
  
    return false;
  }
  
+/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when
+/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting
+/// to a 32-bit superregister and then truncating back down to a 16-bit
+/// subregister.
+MachineInstr *
+X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
+                                           MachineFunction::iterator &MFI,
+                                           MachineBasicBlock::iterator &MBBI,
+                                           LiveVariables *LV) const {
+  MachineInstr *MI = MBBI;
+  unsigned Dest = MI->getOperand(0).getReg();
+  unsigned Src = MI->getOperand(1).getReg();
+  bool isDead = MI->getOperand(0).isDead();
+  bool isKill = MI->getOperand(1).isKill();
+
+  unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
+    ? X86::LEA64_32r : X86::LEA32r;
+  MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
+  unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+  unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+            
+  // Build and insert into an implicit UNDEF value. This is OK because
+  // well be shifting and then extracting the lower 16-bits. 
+  // This has the potential to cause partial register stall. e.g.
+  //   movw    (%rbp,%rcx,2), %dx
+  //   leal    -65(%rdx), %esi
+  // But testing has shown this *does* help performance in 64-bit mode (at
+  // least on modern x86 machines).
+  BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
+  MachineInstr *InsMI =
+    BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
+    .addReg(leaInReg)
+    .addReg(Src, getKillRegState(isKill))
+    .addImm(X86::SUBREG_16BIT);
+
+  MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
+                                    get(Opc), leaOutReg);
+  switch (MIOpc) {
+  default:
+    llvm_unreachable(0);
+    break;
+  case X86::SHL16ri: {
+    unsigned ShAmt = MI->getOperand(2).getImm();
+    MIB.addReg(0).addImm(1 << ShAmt)
+       .addReg(leaInReg, RegState::Kill).addImm(0);
+    break;
+  }
+  case X86::INC16r:
+  case X86::INC64_16r:
+    addLeaRegOffset(MIB, leaInReg, true, 1);
+    break;
+  case X86::DEC16r:
+  case X86::DEC64_16r:
+    addLeaRegOffset(MIB, leaInReg, true, -1);
+    break;
+  case X86::ADD16ri:
+  case X86::ADD16ri8:
+    addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());    
+    break;
+  case X86::ADD16rr: {
+    unsigned Src2 = MI->getOperand(2).getReg();
+    bool isKill2 = MI->getOperand(2).isKill();
+    unsigned leaInReg2 = 0;
+    MachineInstr *InsMI2 = 0;
+    if (Src == Src2) {
+      // ADD16rr %reg1028<kill>, %reg1028
+      // just a single insert_subreg.
+      addRegReg(MIB, leaInReg, true, leaInReg, false);
+    } else {
+      leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+      // Build and insert into an implicit UNDEF value. This is OK because
+      // well be shifting and then extracting the lower 16-bits. 
+      BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+      InsMI2 =
+        BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2)
+        .addReg(leaInReg2)
+        .addReg(Src2, getKillRegState(isKill2))
+        .addImm(X86::SUBREG_16BIT);
+      addRegReg(MIB, leaInReg, true, leaInReg2, true);
+    }
+    if (LV && isKill2 && InsMI2)
+      LV->replaceKillInstruction(Src2, MI, InsMI2);
+    break;
+  }
+  }
+
+  MachineInstr *NewMI = MIB;
+  MachineInstr *ExtMI =
+    BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
+    .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+    .addReg(leaOutReg, RegState::Kill)
+    .addImm(X86::SUBREG_16BIT);
+
+  if (LV) {
+    // Update live variables
+    LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
+    LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
+    if (isKill)
+      LV->replaceKillInstruction(Src, MI, InsMI);
+    if (isDead)
+      LV->replaceKillInstruction(Dest, MI, ExtMI);
+  }
+
+  return ExtMI;
+}
+
  /// convertToThreeAddress - This method must be implemented by targets that
  /// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
  /// may be able to convert a two-address instruction into a true
  /// convertToThreeAddress - This method must be implemented by targets that
  /// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
  /// may be able to convert a two-address instruction into a true
@@ -1033,7 +1247,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
    MachineInstr *NewMI = NULL;
    // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's.  When
    // we have better subtarget support, enable the 16-bit LEA generation here.
    MachineInstr *NewMI = NULL;
    // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's.  When
    // we have better subtarget support, enable the 16-bit LEA generation here.
+  // 16-bit LEA is also slow on Core2.
    bool DisableLEA16 = true;
    bool DisableLEA16 = true;
+  bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
  
    unsigned MIOpc = MI->getOpcode();
    switch (MIOpc) {
  
    unsigned MIOpc = MI->getOpcode();
    switch (MIOpc) {
@@ -1072,8 +1288,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
      unsigned ShAmt = MI->getOperand(2).getImm();
      if (ShAmt == 0 || ShAmt >= 4) return 0;
  
      unsigned ShAmt = MI->getOperand(2).getImm();
      if (ShAmt == 0 || ShAmt >= 4) return 0;
  
-    unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() ?
-      X86::LEA64_32r : X86::LEA32r;
+    unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
      NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
        .addReg(Dest, RegState::Define | getDeadRegState(isDead))
        .addReg(0).addImm(1 << ShAmt)
      NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
        .addReg(Dest, RegState::Define | getDeadRegState(isDead))
        .addReg(0).addImm(1 << ShAmt)
@@ -1087,51 +1302,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
      unsigned ShAmt = MI->getOperand(2).getImm();
      if (ShAmt == 0 || ShAmt >= 4) return 0;
  
      unsigned ShAmt = MI->getOperand(2).getImm();
      if (ShAmt == 0 || ShAmt >= 4) return 0;
  
-    if (DisableLEA16) {
-      // If 16-bit LEA is disabled, use 32-bit LEA via subregisters.
-      MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
-      unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
-        ? X86::LEA64_32r : X86::LEA32r;
-      unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
-      unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
-            
-      // Build and insert into an implicit UNDEF value. This is OK because
-      // well be shifting and then extracting the lower 16-bits. 
-      BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
-      MachineInstr *InsMI =
-        BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
-        .addReg(leaInReg)
-        .addReg(Src, getKillRegState(isKill))
-        .addImm(X86::SUBREG_16BIT);
-      
-      NewMI = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg)
-        .addReg(0).addImm(1 << ShAmt)
-        .addReg(leaInReg, RegState::Kill)
-        .addImm(0);
-      
-      MachineInstr *ExtMI =
-        BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
-        .addReg(Dest, RegState::Define | getDeadRegState(isDead))
-        .addReg(leaOutReg, RegState::Kill)
-        .addImm(X86::SUBREG_16BIT);
-
-      if (LV) {
-        // Update live variables
-        LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
-        LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
-        if (isKill)
-          LV->replaceKillInstruction(Src, MI, InsMI);
-        if (isDead)
-          LV->replaceKillInstruction(Dest, MI, ExtMI);
-      }
-      return ExtMI;
-    } else {
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
-        .addReg(Dest, RegState::Define | getDeadRegState(isDead))
-        .addReg(0).addImm(1 << ShAmt)
-        .addReg(Src, getKillRegState(isKill))
-        .addImm(0);
-    }
+    if (DisableLEA16)
+      return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+    NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+      .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+      .addReg(0).addImm(1 << ShAmt)
+      .addReg(Src, getKillRegState(isKill))
+      .addImm(0);
      break;
    }
    default: {
      break;
    }
    default: {
@@ -1141,7 +1318,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
      if (hasLiveCondCodeDef(MI))
        return 0;
  
      if (hasLiveCondCodeDef(MI))
        return 0;
  
-    bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
      switch (MIOpc) {
      default: return 0;
      case X86::INC64r:
      switch (MIOpc) {
      default: return 0;
      case X86::INC64r:
@@ -1158,7 +1334,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
      }
      case X86::INC16r:
      case X86::INC64_16r:
      }
      case X86::INC16r:
      case X86::INC64_16r:
-      if (DisableLEA16) return 0;
+      if (DisableLEA16)
+        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
        assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
        NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
                             .addReg(Dest, RegState::Define |
        assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
        NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
                             .addReg(Dest, RegState::Define |
@@ -1179,7 +1356,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
      }
      case X86::DEC16r:
      case X86::DEC64_16r:
      }
      case X86::DEC16r:
      case X86::DEC64_16r:
-      if (DisableLEA16) return 0;
+      if (DisableLEA16)
+        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
        assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
        NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
                             .addReg(Dest, RegState::Define |
        assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
        NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
                             .addReg(Dest, RegState::Define |
@@ -1202,7 +1380,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
        break;
      }
      case X86::ADD16rr: {
        break;
      }
      case X86::ADD16rr: {
-      if (DisableLEA16) return 0;
+      if (DisableLEA16)
+        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
        assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
        unsigned Src2 = MI->getOperand(2).getReg();
        bool isKill2 = MI->getOperand(2).isKill();
        assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
        unsigned Src2 = MI->getOperand(2).getReg();
        bool isKill2 = MI->getOperand(2).isKill();
@@ -1217,56 +1396,32 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
      case X86::ADD64ri32:
      case X86::ADD64ri8:
        assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
      case X86::ADD64ri32:
      case X86::ADD64ri8:
        assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
-      if (MI->getOperand(2).isImm())
-        NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
-                                .addReg(Dest, RegState::Define |
-                                        getDeadRegState(isDead)),
-                                Src, isKill, MI->getOperand(2).getImm());
+      NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+                              .addReg(Dest, RegState::Define |
+                                      getDeadRegState(isDead)),
+                              Src, isKill, MI->getOperand(2).getImm());
        break;
      case X86::ADD32ri:
        break;
      case X86::ADD32ri:
-    case X86::ADD32ri8:
+    case X86::ADD32ri8: {
        assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
        assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
-      if (MI->getOperand(2).isImm()) {
-        unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
-        NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
-                                .addReg(Dest, RegState::Define |
-                                        getDeadRegState(isDead)),
+      unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+      NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+                              .addReg(Dest, RegState::Define |
+                                      getDeadRegState(isDead)),
                                  Src, isKill, MI->getOperand(2).getImm());
                                  Src, isKill, MI->getOperand(2).getImm());
-      }
        break;
        break;
+    }
      case X86::ADD16ri:
      case X86::ADD16ri8:
      case X86::ADD16ri:
      case X86::ADD16ri8:
-      if (DisableLEA16) return 0;
+      if (DisableLEA16)
+        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
        assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
        assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
-      if (MI->getOperand(2).isImm())
-        NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
-                             .addReg(Dest, RegState::Define |
-                                     getDeadRegState(isDead)),
-                             Src, isKill, MI->getOperand(2).getImm());
-      break;
-    case X86::SHL16ri:
-      if (DisableLEA16) return 0;
-    case X86::SHL32ri:
-    case X86::SHL64ri: {
-      assert(MI->getNumOperands() >= 3 && MI->getOperand(2).isImm() &&
-             "Unknown shl instruction!");
-      unsigned ShAmt = MI->getOperand(2).getImm();
-      if (ShAmt == 1 || ShAmt == 2 || ShAmt == 3) {
-        X86AddressMode AM;
-        AM.Scale = 1 << ShAmt;
-        AM.IndexReg = Src;
-        unsigned Opc = MIOpc == X86::SHL64ri ? X86::LEA64r
-          : (MIOpc == X86::SHL32ri
-             ? (is64Bit ? X86::LEA64_32r : X86::LEA32r) : X86::LEA16r);
-        NewMI = addFullAddress(BuildMI(MF, MI->getDebugLoc(), get(Opc))
-                               .addReg(Dest, RegState::Define |
-                                       getDeadRegState(isDead)), AM);
-        if (isKill)
-          NewMI->getOperand(3).setIsKill(true);
-      }
+      NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+                              .addReg(Dest, RegState::Define |
+                                      getDeadRegState(isDead)),
+                              Src, isKill, MI->getOperand(2).getImm());
        break;
      }
        break;
      }
-    }
    }
    }
  
    }
    }
  
@@ -1527,14 +1682,17 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
    MachineBasicBlock::iterator I = MBB.end();
    while (I != MBB.begin()) {
      --I;
    MachineBasicBlock::iterator I = MBB.end();
    while (I != MBB.begin()) {
      --I;
-    // Working from the bottom, when we see a non-terminator
-    // instruction, we're done.
+
+    // Working from the bottom, when we see a non-terminator instruction, we're
+    // done.
      if (!isBrAnalysisUnpredicatedTerminator(I, *this))
        break;
      if (!isBrAnalysisUnpredicatedTerminator(I, *this))
        break;
-    // A terminator that isn't a branch can't easily be handled
-    // by this analysis.
+
+    // A terminator that isn't a branch can't easily be handled by this
+    // analysis.
      if (!I->getDesc().isBranch())
        return true;
      if (!I->getDesc().isBranch())
        return true;
+
      // Handle unconditional branches.
      if (I->getOpcode() == X86::JMP) {
        if (!AllowModify) {
      // Handle unconditional branches.
      if (I->getOpcode() == X86::JMP) {
        if (!AllowModify) {
@@ -1543,10 +1701,12 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
        }
  
        // If the block has any instructions after a JMP, delete them.
        }
  
        // If the block has any instructions after a JMP, delete them.
-      while (next(I) != MBB.end())
-        next(I)->eraseFromParent();
+      while (llvm::next(I) != MBB.end())
+        llvm::next(I)->eraseFromParent();
+
        Cond.clear();
        FBB = 0;
        Cond.clear();
        FBB = 0;
+
        // Delete the JMP if it's equivalent to a fall-through.
        if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
          TBB = 0;
        // Delete the JMP if it's equivalent to a fall-through.
        if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
          TBB = 0;
@@ -1554,14 +1714,17 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
          I = MBB.end();
          continue;
        }
          I = MBB.end();
          continue;
        }
+
        // TBB is used to indicate the unconditinal destination.
        TBB = I->getOperand(0).getMBB();
        continue;
      }
        // TBB is used to indicate the unconditinal destination.
        TBB = I->getOperand(0).getMBB();
        continue;
      }
+
      // Handle conditional branches.
      X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode());
      if (BranchCode == X86::COND_INVALID)
        return true;  // Can't handle indirect branch.
      // Handle conditional branches.
      X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode());
      if (BranchCode == X86::COND_INVALID)
        return true;  // Can't handle indirect branch.
+
      // Working from the bottom, handle the first conditional branch.
      if (Cond.empty()) {
        FBB = TBB;
      // Working from the bottom, handle the first conditional branch.
      if (Cond.empty()) {
        FBB = TBB;
@@ -1569,24 +1732,26 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
        Cond.push_back(MachineOperand::CreateImm(BranchCode));
        continue;
      }
        Cond.push_back(MachineOperand::CreateImm(BranchCode));
        continue;
      }
-    // Handle subsequent conditional branches. Only handle the case
-    // where all conditional branches branch to the same destination
-    // and their condition opcodes fit one of the special
-    // multi-branch idioms.
+
+    // Handle subsequent conditional branches. Only handle the case where all
+    // conditional branches branch to the same destination and their condition
+    // opcodes fit one of the special multi-branch idioms.
      assert(Cond.size() == 1);
      assert(TBB);
      assert(Cond.size() == 1);
      assert(TBB);
-    // Only handle the case where all conditional branches branch to
-    // the same destination.
+
+    // Only handle the case where all conditional branches branch to the same
+    // destination.
      if (TBB != I->getOperand(0).getMBB())
        return true;
      if (TBB != I->getOperand(0).getMBB())
        return true;
-    X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
+
      // If the conditions are the same, we can leave them alone.
      // If the conditions are the same, we can leave them alone.
+    X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
      if (OldBranchCode == BranchCode)
        continue;
      if (OldBranchCode == BranchCode)
        continue;
-    // If they differ, see if they fit one of the known patterns.
-    // Theoretically we could handle more patterns here, but
-    // we shouldn't expect to see them if instruction selection
-    // has done a reasonable job.
+
+    // If they differ, see if they fit one of the known patterns. Theoretically,
+    // we could handle more patterns here, but we shouldn't expect to see them
+    // if instruction selection has done a reasonable job.
      if ((OldBranchCode == X86::COND_NP &&
           BranchCode == X86::COND_E) ||
          (OldBranchCode == X86::COND_E &&
      if ((OldBranchCode == X86::COND_NP &&
           BranchCode == X86::COND_E) ||
          (OldBranchCode == X86::COND_E &&
@@ -1599,6 +1764,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
        BranchCode = X86::COND_NE_OR_P;
      else
        return true;
        BranchCode = X86::COND_NE_OR_P;
      else
        return true;
+
      // Update the MachineOperand.
      Cond[0].setImm(BranchCode);
    }
      // Update the MachineOperand.
      Cond[0].setImm(BranchCode);
    }
@@ -1684,8 +1850,7 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                  unsigned DestReg, unsigned SrcReg,
                                  const TargetRegisterClass *DestRC,
                                  const TargetRegisterClass *SrcRC) const {
                                  unsigned DestReg, unsigned SrcReg,
                                  const TargetRegisterClass *DestRC,
                                  const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  DebugLoc DL = MBB.findDebugLoc(MI);
  
    // Determine if DstRC and SrcRC have a common superclass in common.
    const TargetRegisterClass *CommonRC = DestRC;
  
    // Determine if DstRC and SrcRC have a common superclass in common.
    const TargetRegisterClass *CommonRC = DestRC;
@@ -1772,7 +1937,7 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
      if (SrcReg != X86::EFLAGS)
        return false;
      if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
      if (SrcReg != X86::EFLAGS)
        return false;
      if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
-      BuildMI(MBB, MI, DL, get(X86::PUSHFQ));
+      BuildMI(MBB, MI, DL, get(X86::PUSHFQ64));
        BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
        return true;
      } else if (DestRC == &X86::GR32RegClass ||
        BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
        return true;
      } else if (DestRC == &X86::GR32RegClass ||
@@ -1910,11 +2075,9 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                         unsigned SrcReg, bool isKill, int FrameIdx,
                                         const TargetRegisterClass *RC) const {
    const MachineFunction &MF = *MBB.getParent();
                                         unsigned SrcReg, bool isKill, int FrameIdx,
                                         const TargetRegisterClass *RC) const {
    const MachineFunction &MF = *MBB.getParent();
-  bool isAligned = (RI.getStackAlignment() >= 16) ||
-    RI.needsStackRealignment(MF);
+  bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
    unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
    unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  DebugLoc DL = MBB.findDebugLoc(MI);
    addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
      .addReg(SrcReg, getKillRegState(isKill));
  }
    addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
      .addReg(SrcReg, getKillRegState(isKill));
  }
@@ -1923,15 +2086,17 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
                                    bool isKill,
                                    SmallVectorImpl<MachineOperand> &Addr,
                                    const TargetRegisterClass *RC,
                                    bool isKill,
                                    SmallVectorImpl<MachineOperand> &Addr,
                                    const TargetRegisterClass *RC,
+                                  MachineInstr::mmo_iterator MMOBegin,
+                                  MachineInstr::mmo_iterator MMOEnd,
                                    SmallVectorImpl<MachineInstr*> &NewMIs) const {
                                    SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  bool isAligned = (RI.getStackAlignment() >= 16) ||
-    RI.needsStackRealignment(MF);
+  bool isAligned = (*MMOBegin)->getAlignment() >= 16;
    unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
    DebugLoc DL = DebugLoc::getUnknownLoc();
    MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
    for (unsigned i = 0, e = Addr.size(); i != e; ++i)
      MIB.addOperand(Addr[i]);
    MIB.addReg(SrcReg, getKillRegState(isKill));
    unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
    DebugLoc DL = DebugLoc::getUnknownLoc();
    MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
    for (unsigned i = 0, e = Addr.size(); i != e; ++i)
      MIB.addOperand(Addr[i]);
    MIB.addReg(SrcReg, getKillRegState(isKill));
+  (*MIB).setMemRefs(MMOBegin, MMOEnd);
    NewMIs.push_back(MIB);
  }
  
    NewMIs.push_back(MIB);
  }
  
@@ -2003,25 +2168,25 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                          unsigned DestReg, int FrameIdx,
                                          const TargetRegisterClass *RC) const{
    const MachineFunction &MF = *MBB.getParent();
                                          unsigned DestReg, int FrameIdx,
                                          const TargetRegisterClass *RC) const{
    const MachineFunction &MF = *MBB.getParent();
-  bool isAligned = (RI.getStackAlignment() >= 16) ||
-    RI.needsStackRealignment(MF);
+  bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
    unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
    unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  DebugLoc DL = MBB.findDebugLoc(MI);
    addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
  }
  
  void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
                                   SmallVectorImpl<MachineOperand> &Addr,
                                   const TargetRegisterClass *RC,
    addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
  }
  
  void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
                                   SmallVectorImpl<MachineOperand> &Addr,
                                   const TargetRegisterClass *RC,
+                                 MachineInstr::mmo_iterator MMOBegin,
+                                 MachineInstr::mmo_iterator MMOEnd,
                                   SmallVectorImpl<MachineInstr*> &NewMIs) const {
                                   SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  bool isAligned = (RI.getStackAlignment() >= 16) ||
-    RI.needsStackRealignment(MF);
+  bool isAligned = (*MMOBegin)->getAlignment() >= 16;
    unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
    DebugLoc DL = DebugLoc::getUnknownLoc();
    MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
    for (unsigned i = 0, e = Addr.size(); i != e; ++i)
      MIB.addOperand(Addr[i]);
    unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
    DebugLoc DL = DebugLoc::getUnknownLoc();
    MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
    for (unsigned i = 0, e = Addr.size(); i != e; ++i)
      MIB.addOperand(Addr[i]);
+  (*MIB).setMemRefs(MMOBegin, MMOEnd);
    NewMIs.push_back(MIB);
  }
  
    NewMIs.push_back(MIB);
  }
  
@@ -2031,8 +2196,7 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
    if (CSI.empty())
      return false;
  
    if (CSI.empty())
      return false;
  
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  DebugLoc DL = MBB.findDebugLoc(MI);
  
    bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
    bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
  
    bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
    bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
@@ -2070,8 +2234,7 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
    if (CSI.empty())
      return false;
  
    if (CSI.empty())
      return false;
  
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  DebugLoc DL = MBB.findDebugLoc(MI);
  
    MachineFunction &MF = *MBB.getParent();
    unsigned FPReg = RI.getFrameRegister(MF);
  
    MachineFunction &MF = *MBB.getParent();
    unsigned FPReg = RI.getFrameRegister(MF);
@@ -2180,10 +2343,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
      OpcodeTablePtr = &RegOp2MemOpTable2Addr;
      isTwoAddrFold = true;
    } else if (i == 0) { // If operand 0
      OpcodeTablePtr = &RegOp2MemOpTable2Addr;
      isTwoAddrFold = true;
    } else if (i == 0) { // If operand 0
-    if (MI->getOpcode() == X86::MOV16r0)
-      NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
+    if (MI->getOpcode() == X86::MOV64r0)
+      NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
      else if (MI->getOpcode() == X86::MOV32r0)
        NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
      else if (MI->getOpcode() == X86::MOV32r0)
        NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
+    else if (MI->getOpcode() == X86::MOV16r0)
+      NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
      else if (MI->getOpcode() == X86::MOV8r0)
        NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
      if (NewMI)
      else if (MI->getOpcode() == X86::MOV8r0)
        NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
      if (NewMI)
@@ -2199,7 +2364,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
    // If table selected...
    if (OpcodeTablePtr) {
      // Find the Opcode to fuse
    // If table selected...
    if (OpcodeTablePtr) {
      // Find the Opcode to fuse
-    DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+    DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
        OpcodeTablePtr->find((unsigned*)MI->getOpcode());
      if (I != OpcodeTablePtr->end()) {
        unsigned Opcode = I->second.first;
        OpcodeTablePtr->find((unsigned*)MI->getOpcode());
      if (I != OpcodeTablePtr->end()) {
        unsigned Opcode = I->second.first;
@@ -2246,7 +2411,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
    
    // No fusion 
    if (PrintFailedFusing)
    
    // No fusion 
    if (PrintFailedFusing)
-    errs() << "We failed to fuse operand " << i << " in " << *MI;
+    dbgs() << "We failed to fuse operand " << i << " in " << *MI;
    return NULL;
  }
  
    return NULL;
  }
  
@@ -2258,6 +2423,23 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
    // Check switch flag 
    if (NoFusing) return NULL;
  
    // Check switch flag 
    if (NoFusing) return NULL;
  
+  if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+    switch (MI->getOpcode()) {
+    case X86::CVTSD2SSrr:
+    case X86::Int_CVTSD2SSrr:
+    case X86::CVTSS2SDrr:
+    case X86::Int_CVTSS2SDrr:
+    case X86::RCPSSr:
+    case X86::RCPSSr_Int:
+    case X86::ROUNDSDr_Int:
+    case X86::ROUNDSSr_Int:
+    case X86::RSQRTSSr:
+    case X86::RSQRTSSr_Int:
+    case X86::SQRTSSr:
+    case X86::SQRTSSr_Int:
+      return 0;
+    }
+
    const MachineFrameInfo *MFI = MF.getFrameInfo();
    unsigned Size = MFI->getObjectSize(FrameIndex);
    unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
    const MachineFrameInfo *MFI = MF.getFrameInfo();
    unsigned Size = MFI->getObjectSize(FrameIndex);
    unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
@@ -2293,13 +2475,42 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
    // Check switch flag 
    if (NoFusing) return NULL;
  
    // Check switch flag 
    if (NoFusing) return NULL;
  
+  if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+    switch (MI->getOpcode()) {
+    case X86::CVTSD2SSrr:
+    case X86::Int_CVTSD2SSrr:
+    case X86::CVTSS2SDrr:
+    case X86::Int_CVTSS2SDrr:
+    case X86::RCPSSr:
+    case X86::RCPSSr_Int:
+    case X86::ROUNDSDr_Int:
+    case X86::ROUNDSSr_Int:
+    case X86::RSQRTSSr:
+    case X86::RSQRTSSr_Int:
+    case X86::SQRTSSr:
+    case X86::SQRTSSr_Int:
+      return 0;
+    }
+
    // Determine the alignment of the load.
    unsigned Alignment = 0;
    if (LoadMI->hasOneMemOperand())
    // Determine the alignment of the load.
    unsigned Alignment = 0;
    if (LoadMI->hasOneMemOperand())
-    Alignment = LoadMI->memoperands_begin()->getAlignment();
-  else if (LoadMI->getOpcode() == X86::V_SET0 ||
-           LoadMI->getOpcode() == X86::V_SETALLONES)
-    Alignment = 16;
+    Alignment = (*LoadMI->memoperands_begin())->getAlignment();
+  else
+    switch (LoadMI->getOpcode()) {
+    case X86::V_SET0:
+    case X86::V_SETALLONES:
+      Alignment = 16;
+      break;
+    case X86::FsFLD0SD:
+      Alignment = 8;
+      break;
+    case X86::FsFLD0SS:
+      Alignment = 4;
+      break;
+    default:
+      llvm_unreachable("Don't know how to fold this instruction!");
+    }
    if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
      unsigned NewOpc = 0;
      switch (MI->getOpcode()) {
    if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
      unsigned NewOpc = 0;
      switch (MI->getOpcode()) {
@@ -2316,8 +2527,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
      return NULL;
  
    SmallVector<MachineOperand,X86AddrNumOperands> MOs;
      return NULL;
  
    SmallVector<MachineOperand,X86AddrNumOperands> MOs;
-  if (LoadMI->getOpcode() == X86::V_SET0 ||
-      LoadMI->getOpcode() == X86::V_SETALLONES) {
+  switch (LoadMI->getOpcode()) {
+  case X86::V_SET0:
+  case X86::V_SETALLONES:
+  case X86::FsFLD0SD:
+  case X86::FsFLD0SS: {
      // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
      // Create a constant-pool entry and operands to load from it.
  
      // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
      // Create a constant-pool entry and operands to load from it.
  
@@ -2331,17 +2545,22 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
          // This doesn't work for several reasons.
          // 1. GlobalBaseReg may have been spilled.
          // 2. It may not be live at MI.
          // This doesn't work for several reasons.
          // 1. GlobalBaseReg may have been spilled.
          // 2. It may not be live at MI.
-        return false;
+        return NULL;
      }
  
      }
  
-    // Create a v4i32 constant-pool entry.
+    // Create a constant-pool entry.
      MachineConstantPool &MCP = *MF.getConstantPool();
      MachineConstantPool &MCP = *MF.getConstantPool();
-    const VectorType *Ty =
-          VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
-    Constant *C = LoadMI->getOpcode() == X86::V_SET0 ?
-                    Constant::getNullValue(Ty) :
-                    Constant::getAllOnesValue(Ty);
-    unsigned CPI = MCP.getConstantPoolIndex(C, 16);
+    const Type *Ty;
+    if (LoadMI->getOpcode() == X86::FsFLD0SS)
+      Ty = Type::getFloatTy(MF.getFunction()->getContext());
+    else if (LoadMI->getOpcode() == X86::FsFLD0SD)
+      Ty = Type::getDoubleTy(MF.getFunction()->getContext());
+    else
+      Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
+    Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
+                    Constant::getAllOnesValue(Ty) :
+                    Constant::getNullValue(Ty);
+    unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
  
      // Create operands to load from the constant pool entry.
      MOs.push_back(MachineOperand::CreateReg(PICBase, false));
  
      // Create operands to load from the constant pool entry.
      MOs.push_back(MachineOperand::CreateReg(PICBase, false));
@@ -2349,11 +2568,15 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
      MOs.push_back(MachineOperand::CreateReg(0, false));
      MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
      MOs.push_back(MachineOperand::CreateReg(0, false));
      MOs.push_back(MachineOperand::CreateReg(0, false));
      MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
      MOs.push_back(MachineOperand::CreateReg(0, false));
-  } else {
+    break;
+  }
+  default: {
      // Folding a normal load. Just copy the load's address operands.
      unsigned NumOps = LoadMI->getDesc().getNumOperands();
      for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i)
        MOs.push_back(LoadMI->getOperand(i));
      // Folding a normal load. Just copy the load's address operands.
      unsigned NumOps = LoadMI->getDesc().getNumOperands();
      for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i)
        MOs.push_back(LoadMI->getOperand(i));
+    break;
+  }
    }
    return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment);
  }
    }
    return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment);
  }
@@ -2395,6 +2618,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
      case X86::MOV8r0:
      case X86::MOV16r0:
      case X86::MOV32r0:
      case X86::MOV8r0:
      case X86::MOV16r0:
      case X86::MOV32r0:
+    case X86::MOV64r0:
        return true;
      default: break;
      }
        return true;
      default: break;
      }
@@ -2407,7 +2631,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
    
    if (OpcodeTablePtr) {
      // Find the Opcode to fuse
    
    if (OpcodeTablePtr) {
      // Find the Opcode to fuse
-    DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+    DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
        OpcodeTablePtr->find((unsigned*)Opc);
      if (I != OpcodeTablePtr->end())
        return true;
        OpcodeTablePtr->find((unsigned*)Opc);
      if (I != OpcodeTablePtr->end())
        return true;
@@ -2418,11 +2642,10 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
  bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
                                  unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
                                  SmallVectorImpl<MachineInstr*> &NewMIs) const {
  bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
                                  unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
                                  SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
      MemOp2RegOpTable.find((unsigned*)MI->getOpcode());
    if (I == MemOp2RegOpTable.end())
      return false;
      MemOp2RegOpTable.find((unsigned*)MI->getOpcode());
    if (I == MemOp2RegOpTable.end())
      return false;
-  DebugLoc dl = MI->getDebugLoc();
    unsigned Opc = I->second.first;
    unsigned Index = I->second.second & 0xf;
    bool FoldedLoad = I->second.second & (1 << 4);
    unsigned Opc = I->second.first;
    unsigned Index = I->second.second & 0xf;
    bool FoldedLoad = I->second.second & (1 << 4);
@@ -2455,7 +2678,11 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
  
    // Emit the load instruction.
    if (UnfoldLoad) {
  
    // Emit the load instruction.
    if (UnfoldLoad) {
-    loadRegFromAddr(MF, Reg, AddrOps, RC, NewMIs);
+    std::pair<MachineInstr::mmo_iterator,
+              MachineInstr::mmo_iterator> MMOs =
+      MF.extractLoadMemRefs(MI->memoperands_begin(),
+                            MI->memoperands_end());
+    loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs);
      if (UnfoldStore) {
        // Address operands cannot be marked isKill.
        for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) {
      if (UnfoldStore) {
        // Address operands cannot be marked isKill.
        for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) {
@@ -2515,7 +2742,11 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
    // Emit the store instruction.
    if (UnfoldStore) {
      const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI);
    // Emit the store instruction.
    if (UnfoldStore) {
      const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI);
-    storeRegToAddr(MF, Reg, true, AddrOps, DstRC, NewMIs);
+    std::pair<MachineInstr::mmo_iterator,
+              MachineInstr::mmo_iterator> MMOs =
+      MF.extractStoreMemRefs(MI->memoperands_begin(),
+                             MI->memoperands_end());
+    storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs);
    }
  
    return true;
    }
  
    return true;
@@ -2527,7 +2758,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
    if (!N->isMachineOpcode())
      return false;
  
    if (!N->isMachineOpcode())
      return false;
  
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
      MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode());
    if (I == MemOp2RegOpTable.end())
      return false;
      MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode());
    if (I == MemOp2RegOpTable.end())
      return false;
@@ -2557,14 +2788,20 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
  
    // Emit the load instruction.
    SDNode *Load = 0;
  
    // Emit the load instruction.
    SDNode *Load = 0;
-  const MachineFunction &MF = DAG.getMachineFunction();
+  MachineFunction &MF = DAG.getMachineFunction();
    if (FoldedLoad) {
      EVT VT = *RC->vt_begin();
    if (FoldedLoad) {
      EVT VT = *RC->vt_begin();
-    bool isAligned = (RI.getStackAlignment() >= 16) ||
-      RI.needsStackRealignment(MF);
-    Load = DAG.getTargetNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
-                             VT, MVT::Other, &AddrOps[0], AddrOps.size());
+    std::pair<MachineInstr::mmo_iterator,
+              MachineInstr::mmo_iterator> MMOs =
+      MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+                            cast<MachineSDNode>(N)->memoperands_end());
+    bool isAligned = (*MMOs.first)->getAlignment() >= 16;
+    Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
+                              VT, MVT::Other, &AddrOps[0], AddrOps.size());
      NewNodes.push_back(Load);
      NewNodes.push_back(Load);
+
+    // Preserve memory reference information.
+    cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
    }
  
    // Emit the data processing instruction.
    }
  
    // Emit the data processing instruction.
@@ -2582,8 +2819,8 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
    if (Load)
      BeforeOps.push_back(SDValue(Load, 0));
    std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
    if (Load)
      BeforeOps.push_back(SDValue(Load, 0));
    std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
-  SDNode *NewNode= DAG.getTargetNode(Opc, dl, VTs, &BeforeOps[0],
-                                     BeforeOps.size());
+  SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0],
+                                      BeforeOps.size());
    NewNodes.push_back(NewNode);
  
    // Emit the store instruction.
    NewNodes.push_back(NewNode);
  
    // Emit the store instruction.
@@ -2591,21 +2828,28 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
      AddrOps.pop_back();
      AddrOps.push_back(SDValue(NewNode, 0));
      AddrOps.push_back(Chain);
      AddrOps.pop_back();
      AddrOps.push_back(SDValue(NewNode, 0));
      AddrOps.push_back(Chain);
-    bool isAligned = (RI.getStackAlignment() >= 16) ||
-      RI.needsStackRealignment(MF);
-    SDNode *Store = DAG.getTargetNode(getStoreRegOpcode(0, DstRC,
-                                                        isAligned, TM),
-                                      dl, MVT::Other,
-                                      &AddrOps[0], AddrOps.size());
+    std::pair<MachineInstr::mmo_iterator,
+              MachineInstr::mmo_iterator> MMOs =
+      MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+                             cast<MachineSDNode>(N)->memoperands_end());
+    bool isAligned = (*MMOs.first)->getAlignment() >= 16;
+    SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
+                                                         isAligned, TM),
+                                       dl, MVT::Other,
+                                       &AddrOps[0], AddrOps.size());
      NewNodes.push_back(Store);
      NewNodes.push_back(Store);
+
+    // Preserve memory reference information.
+    cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
    }
  
    return true;
  }
  
  unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
    }
  
    return true;
  }
  
  unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
-                                      bool UnfoldLoad, bool UnfoldStore) const {
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+                                      bool UnfoldLoad, bool UnfoldStore,
+                                      unsigned *LoadRegIndex) const {
+  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
      MemOp2RegOpTable.find((unsigned*)Opc);
    if (I == MemOp2RegOpTable.end())
      return 0;
      MemOp2RegOpTable.find((unsigned*)Opc);
    if (I == MemOp2RegOpTable.end())
      return 0;
@@ -2615,30 +2859,143 @@ unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
      return 0;
    if (UnfoldStore && !FoldedStore)
      return 0;
      return 0;
    if (UnfoldStore && !FoldedStore)
      return 0;
+  if (LoadRegIndex)
+    *LoadRegIndex = I->second.second & 0xf;
    return I->second.first;
  }
  
    return I->second.first;
  }
  
-bool X86InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
-  if (MBB.empty()) return false;
-  
-  switch (MBB.back().getOpcode()) {
-  case X86::TCRETURNri:
-  case X86::TCRETURNdi:
-  case X86::RET:     // Return.
-  case X86::RETI:
-  case X86::TAILJMPd:
-  case X86::TAILJMPr:
-  case X86::TAILJMPm:
-  case X86::JMP:     // Uncond branch.
-  case X86::JMP32r:  // Indirect branch.
-  case X86::JMP64r:  // Indirect branch (64-bit).
-  case X86::JMP32m:  // Indirect branch through mem.
-  case X86::JMP64m:  // Indirect branch through mem (64-bit).
-    return true;
+bool
+X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+                                     int64_t &Offset1, int64_t &Offset2) const {
+  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
+    return false;
+  unsigned Opc1 = Load1->getMachineOpcode();
+  unsigned Opc2 = Load2->getMachineOpcode();
+  switch (Opc1) {
+  default: return false;
+  case X86::MOV8rm:
+  case X86::MOV16rm:
+  case X86::MOV32rm:
+  case X86::MOV64rm:
+  case X86::LD_Fp32m:
+  case X86::LD_Fp64m:
+  case X86::LD_Fp80m:
+  case X86::MOVSSrm:
+  case X86::MOVSDrm:
+  case X86::MMX_MOVD64rm:
+  case X86::MMX_MOVQ64rm:
+  case X86::FsMOVAPSrm:
+  case X86::FsMOVAPDrm:
+  case X86::MOVAPSrm:
+  case X86::MOVUPSrm:
+  case X86::MOVUPSrm_Int:
+  case X86::MOVAPDrm:
+  case X86::MOVDQArm:
+  case X86::MOVDQUrm:
+  case X86::MOVDQUrm_Int:
+    break;
+  }
+  switch (Opc2) {
    default: return false;
    default: return false;
+  case X86::MOV8rm:
+  case X86::MOV16rm:
+  case X86::MOV32rm:
+  case X86::MOV64rm:
+  case X86::LD_Fp32m:
+  case X86::LD_Fp64m:
+  case X86::LD_Fp80m:
+  case X86::MOVSSrm:
+  case X86::MOVSDrm:
+  case X86::MMX_MOVD64rm:
+  case X86::MMX_MOVQ64rm:
+  case X86::FsMOVAPSrm:
+  case X86::FsMOVAPDrm:
+  case X86::MOVAPSrm:
+  case X86::MOVUPSrm:
+  case X86::MOVUPSrm_Int:
+  case X86::MOVAPDrm:
+  case X86::MOVDQArm:
+  case X86::MOVDQUrm:
+  case X86::MOVDQUrm_Int:
+    break;
+  }
+
+  // Check if chain operands and base addresses match.
+  if (Load1->getOperand(0) != Load2->getOperand(0) ||
+      Load1->getOperand(5) != Load2->getOperand(5))
+    return false;
+  // Segment operands should match as well.
+  if (Load1->getOperand(4) != Load2->getOperand(4))
+    return false;
+  // Scale should be 1, Index should be Reg0.
+  if (Load1->getOperand(1) == Load2->getOperand(1) &&
+      Load1->getOperand(2) == Load2->getOperand(2)) {
+    if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1)
+      return false;
+    SDValue Op2 = Load1->getOperand(2);
+    if (!isa<RegisterSDNode>(Op2) ||
+        cast<RegisterSDNode>(Op2)->getReg() != 0)
+      return 0;
+
+    // Now let's examine the displacements.
+    if (isa<ConstantSDNode>(Load1->getOperand(3)) &&
+        isa<ConstantSDNode>(Load2->getOperand(3))) {
+      Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue();
+      Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue();
+      return true;
+    }
+  }
+  return false;
+}
+
+bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+                                           int64_t Offset1, int64_t Offset2,
+                                           unsigned NumLoads) const {
+  assert(Offset2 > Offset1);
+  if ((Offset2 - Offset1) / 8 > 64)
+    return false;
+
+  unsigned Opc1 = Load1->getMachineOpcode();
+  unsigned Opc2 = Load2->getMachineOpcode();
+  if (Opc1 != Opc2)
+    return false;  // FIXME: overly conservative?
+
+  switch (Opc1) {
+  default: break;
+  case X86::LD_Fp32m:
+  case X86::LD_Fp64m:
+  case X86::LD_Fp80m:
+  case X86::MMX_MOVD64rm:
+  case X86::MMX_MOVQ64rm:
+    return false;
    }
    }
+
+  EVT VT = Load1->getValueType(0);
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: {
+    // XMM registers. In 64-bit mode we can be a bit more aggressive since we
+    // have 16 of them to play with.
+    if (TM.getSubtargetImpl()->is64Bit()) {
+      if (NumLoads >= 3)
+        return false;
+    } else if (NumLoads)
+      return false;
+    break;
+  }
+  case MVT::i8:
+  case MVT::i16:
+  case MVT::i32:
+  case MVT::i64:
+  case MVT::f32:
+  case MVT::f64:
+    if (NumLoads)
+      return false;
+  }
+
+  return true;
  }
  
  }
  
+
  bool X86InstrInfo::
  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
    assert(Cond.size() == 1 && "Invalid X86 branch condition!");
  bool X86InstrInfo::
  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
    assert(Cond.size() == 1 && "Invalid X86 branch condition!");
@@ -2657,22 +3014,11 @@ isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
             RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
  }
  
             RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
  }
  
-unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) {
-  switch (Desc->TSFlags & X86II::ImmMask) {
-  case X86II::Imm8:   return 1;
-  case X86II::Imm16:  return 2;
-  case X86II::Imm32:  return 4;
-  case X86II::Imm64:  return 8;
-  default: llvm_unreachable("Immediate size not set!");
-    return 0;
-  }
-}
  
  
-/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register?
-/// e.g. r8, xmm8, etc.
-bool X86InstrInfo::isX86_64ExtendedReg(const MachineOperand &MO) {
-  if (!MO.isReg()) return false;
-  switch (MO.getReg()) {
+/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or higher)
+/// register?  e.g. r8, xmm8, xmm13, etc.
+bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) {
+  switch (RegNo) {
    default: break;
    case X86::R8:    case X86::R9:    case X86::R10:   case X86::R11:
    case X86::R12:   case X86::R13:   case X86::R14:   case X86::R15:
    default: break;
    case X86::R8:    case X86::R9:    case X86::R10:   case X86::R11:
    case X86::R12:   case X86::R13:   case X86::R14:   case X86::R15:
@@ -3026,24 +3372,24 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
      switch (Opcode) {
      default: 
        break;
      switch (Opcode) {
      default: 
        break;
-    case TargetInstrInfo::INLINEASM: {
+    case TargetOpcode::INLINEASM: {
        const MachineFunction *MF = MI.getParent()->getParent();
        const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
        FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
                                            *MF->getTarget().getMCAsmInfo());
        break;
      }
        const MachineFunction *MF = MI.getParent()->getParent();
        const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
        FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
                                            *MF->getTarget().getMCAsmInfo());
        break;
      }
-    case TargetInstrInfo::DBG_LABEL:
-    case TargetInstrInfo::EH_LABEL:
+    case TargetOpcode::DBG_LABEL:
+    case TargetOpcode::EH_LABEL:
        break;
        break;
-    case TargetInstrInfo::IMPLICIT_DEF:
-    case X86::DWARF_LOC:
+    case TargetOpcode::IMPLICIT_DEF:
+    case TargetOpcode::KILL:
      case X86::FP_REG_KILL:
        break;
      case X86::MOVPC32r: {
        // This emits the "call" portion of this pseudo instruction.
        ++FinalSize;
      case X86::FP_REG_KILL:
        break;
      case X86::MOVPC32r: {
        // This emits the "call" portion of this pseudo instruction.
        ++FinalSize;
-      FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+      FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
        break;
      }
      }
        break;
      }
      }
@@ -3061,7 +3407,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
        } else if (MO.isSymbol()) {
          FinalSize += sizeExternalSymbolAddress(false);
        } else if (MO.isImm()) {
        } else if (MO.isSymbol()) {
          FinalSize += sizeExternalSymbolAddress(false);
        } else if (MO.isImm()) {
-        FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+        FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
        } else {
          llvm_unreachable("Unknown RawFrm operand!");
        }
        } else {
          llvm_unreachable("Unknown RawFrm operand!");
        }
@@ -3074,7 +3420,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
      
      if (CurOp != NumOps) {
        const MachineOperand &MO1 = MI.getOperand(CurOp++);
      
      if (CurOp != NumOps) {
        const MachineOperand &MO1 = MI.getOperand(CurOp++);
-      unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+      unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
        if (MO1.isImm())
          FinalSize += sizeConstant(Size);
        else {
        if (MO1.isImm())
          FinalSize += sizeConstant(Size);
        else {
@@ -3099,7 +3445,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
      CurOp += 2;
      if (CurOp != NumOps) {
        ++CurOp;
      CurOp += 2;
      if (CurOp != NumOps) {
        ++CurOp;
-      FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+      FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
      }
      break;
    }
      }
      break;
    }
@@ -3109,7 +3455,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
      CurOp +=  X86AddrNumOperands + 1;
      if (CurOp != NumOps) {
        ++CurOp;
      CurOp +=  X86AddrNumOperands + 1;
      if (CurOp != NumOps) {
        ++CurOp;
-      FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+      FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
      }
      break;
    }
      }
      break;
    }
@@ -3120,7 +3466,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
      CurOp += 2;
      if (CurOp != NumOps) {
        ++CurOp;
      CurOp += 2;
      if (CurOp != NumOps) {
        ++CurOp;
-      FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+      FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
      }
      break;
  
      }
      break;
  
@@ -3137,7 +3483,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
      CurOp += AddrOperands + 1;
      if (CurOp != NumOps) {
        ++CurOp;
      CurOp += AddrOperands + 1;
      if (CurOp != NumOps) {
        ++CurOp;
-      FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+      FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
      }
      break;
    }
      }
      break;
    }
@@ -3162,7 +3508,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
  
      if (CurOp != NumOps) {
        const MachineOperand &MO1 = MI.getOperand(CurOp++);
  
      if (CurOp != NumOps) {
        const MachineOperand &MO1 = MI.getOperand(CurOp++);
-      unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+      unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
        if (MO1.isImm())
          FinalSize += sizeConstant(Size);
        else {
        if (MO1.isImm())
          FinalSize += sizeConstant(Size);
        else {
@@ -3192,7 +3538,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
  
      if (CurOp != NumOps) {
        const MachineOperand &MO = MI.getOperand(CurOp++);
  
      if (CurOp != NumOps) {
        const MachineOperand &MO = MI.getOperand(CurOp++);
-      unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+      unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
        if (MO.isImm())
          FinalSize += sizeConstant(Size);
        else {
        if (MO.isImm())
          FinalSize += sizeConstant(Size);
        else {
@@ -3258,8 +3604,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
    // Insert the set of GlobalBaseReg into the first MBB of the function
    MachineBasicBlock &FirstMBB = MF->front();
    MachineBasicBlock::iterator MBBI = FirstMBB.begin();
    // Insert the set of GlobalBaseReg into the first MBB of the function
    MachineBasicBlock &FirstMBB = MF->front();
    MachineBasicBlock::iterator MBBI = FirstMBB.begin();
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (MBBI != FirstMBB.end()) DL = MBBI->getDebugLoc();
+  DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
    MachineRegisterInfo &RegInfo = MF->getRegInfo();
    unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
    
    MachineRegisterInfo &RegInfo = MF->getRegInfo();
    unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);