Merge XXXGenRegisterNames.inc into XXXGenRegisterInfo.inc

[oota-llvm.git] / lib / Target / X86 / X86InstrInfo.cpp
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index dd4940b4ed24bacb411a7c8c0036464d83866fda..3112dc7382a4bdf15765845794244bafbe7db0bc 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -13,7 +13,6 @@
  
  #include "X86InstrInfo.h"
  #include "X86.h"
-#include "X86GenInstrInfo.inc"
  #include "X86InstrBuilder.h"
  #include "X86MachineFunctionInfo.h"
  #include "X86Subtarget.h"
@@ -34,9 +33,11 @@
  #include "llvm/Support/raw_ostream.h"
  #include "llvm/Target/TargetOptions.h"
  #include "llvm/MC/MCAsmInfo.h"
-
  #include <limits>
  
+#define GET_INSTRINFO_MC_DESC
+#include "X86GenInstrInfo.inc"
+
  using namespace llvm;
  
  static cl::opt<bool>
@@ -55,7 +56,11 @@ ReMatPICStubLoad("remat-pic-stub-load",
  X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
    : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)),
      TM(tm), RI(tm, *this) {
-  SmallVector<unsigned,16> AmbEntries;
+  enum {
+    TB_NOT_REVERSABLE = 1U << 31,
+    TB_FLAGS = TB_NOT_REVERSABLE
+  };
+
    static const unsigned OpTbl2Addr[][2] = {
      { X86::ADC32ri,     X86::ADC32mi },
      { X86::ADC32ri8,    X86::ADC32mi8 },
@@ -65,13 +70,22 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
      { X86::ADC64rr,     X86::ADC64mr },
      { X86::ADD16ri,     X86::ADD16mi },
      { X86::ADD16ri8,    X86::ADD16mi8 },
+    { X86::ADD16ri_DB,  X86::ADD16mi  | TB_NOT_REVERSABLE },
+    { X86::ADD16ri8_DB, X86::ADD16mi8 | TB_NOT_REVERSABLE },
      { X86::ADD16rr,     X86::ADD16mr },
+    { X86::ADD16rr_DB,  X86::ADD16mr | TB_NOT_REVERSABLE },
      { X86::ADD32ri,     X86::ADD32mi },
      { X86::ADD32ri8,    X86::ADD32mi8 },
+    { X86::ADD32ri_DB,  X86::ADD32mi | TB_NOT_REVERSABLE },
+    { X86::ADD32ri8_DB, X86::ADD32mi8 | TB_NOT_REVERSABLE },
      { X86::ADD32rr,     X86::ADD32mr },
+    { X86::ADD32rr_DB,  X86::ADD32mr | TB_NOT_REVERSABLE },
      { X86::ADD64ri32,   X86::ADD64mi32 },
      { X86::ADD64ri8,    X86::ADD64mi8 },
+    { X86::ADD64ri32_DB,X86::ADD64mi32 | TB_NOT_REVERSABLE },
+    { X86::ADD64ri8_DB, X86::ADD64mi8 | TB_NOT_REVERSABLE },
      { X86::ADD64rr,     X86::ADD64mr },
+    { X86::ADD64rr_DB,  X86::ADD64mr | TB_NOT_REVERSABLE },
      { X86::ADD8ri,      X86::ADD8mi },
      { X86::ADD8rr,      X86::ADD8mr },
      { X86::AND16ri,     X86::AND16mi },
@@ -216,16 +230,21 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
  
    for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
      unsigned RegOp = OpTbl2Addr[i][0];
-    unsigned MemOp = OpTbl2Addr[i][1];
-    if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp,
-                                               std::make_pair(MemOp,0))).second)
-      assert(false && "Duplicated entries?");
+    unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS;
+    assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?");
+    RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U);
+
+    // If this is not a reversible operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE)
+      continue;
+
      // Index 0, folded load and store, no alignment requirement.
      unsigned AuxInfo = 0 | (1 << 4) | (1 << 5);
-    if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
-                                                std::make_pair(RegOp,
-                                                              AuxInfo))).second)
-      AmbEntries.push_back(MemOp);
+
+    assert(!MemOp2RegOpTable.count(MemOp) &&
+            "Duplicated entries in unfolding maps?");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
    }
  
    // If the third value is 1, then it's folding either a load or a store.
@@ -252,8 +271,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
      { X86::DIV64r,      X86::DIV64m, 1, 0 },
      { X86::DIV8r,       X86::DIV8m, 1, 0 },
      { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 },
-    { X86::FsMOVAPDrr,  X86::MOVSDmr, 0, 0 },
-    { X86::FsMOVAPSrr,  X86::MOVSSmr, 0, 0 },
+    { X86::FsMOVAPDrr,  X86::MOVSDmr | TB_NOT_REVERSABLE , 0, 0 },
+    { X86::FsMOVAPSrr,  X86::MOVSSmr | TB_NOT_REVERSABLE , 0, 0 },
      { X86::IDIV16r,     X86::IDIV16m, 1, 0 },
      { X86::IDIV32r,     X86::IDIV32m, 1, 0 },
      { X86::IDIV64r,     X86::IDIV64m, 1, 0 },
@@ -268,7 +287,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
      { X86::MOV16rr,     X86::MOV16mr, 0, 0 },
      { X86::MOV32ri,     X86::MOV32mi, 0, 0 },
      { X86::MOV32rr,     X86::MOV32mr, 0, 0 },
-    { X86::MOV32rr_TC,  X86::MOV32mr_TC, 0, 0 },
      { X86::MOV64ri32,   X86::MOV64mi32, 0, 0 },
      { X86::MOV64rr,     X86::MOV64mr, 0, 0 },
      { X86::MOV8ri,      X86::MOV8mi, 0, 0 },
@@ -312,19 +330,22 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
    };
  
    for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
-    unsigned RegOp = OpTbl0[i][0];
-    unsigned MemOp = OpTbl0[i][1];
-    unsigned Align = OpTbl0[i][3];
-    if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp,
-                                           std::make_pair(MemOp,Align))).second)
-      assert(false && "Duplicated entries?");
+    unsigned RegOp      = OpTbl0[i][0];
+    unsigned MemOp      = OpTbl0[i][1] & ~TB_FLAGS;
      unsigned FoldedLoad = OpTbl0[i][2];
+    unsigned Align      = OpTbl0[i][3];
+    assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?");
+    RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align);
+
+    // If this is not a reversible operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl0[i][1] & TB_NOT_REVERSABLE)
+      continue;
+
      // Index 0, folded load or store.
      unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5);
-    if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
-      if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
-                                     std::make_pair(RegOp, AuxInfo))).second)
-        AmbEntries.push_back(MemOp);
+    assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
    }
  
    static const unsigned OpTbl1[][3] = {
@@ -342,16 +363,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
      { X86::CVTTSD2SIrr,     X86::CVTTSD2SIrm, 0 },
      { X86::CVTTSS2SI64rr,   X86::CVTTSS2SI64rm, 0 },
      { X86::CVTTSS2SIrr,     X86::CVTTSS2SIrm, 0 },
-    { X86::FsMOVAPDrr,      X86::MOVSDrm, 0 },
-    { X86::FsMOVAPSrr,      X86::MOVSSrm, 0 },
+    { X86::FsMOVAPDrr,      X86::MOVSDrm | TB_NOT_REVERSABLE , 0 },
+    { X86::FsMOVAPSrr,      X86::MOVSSrm | TB_NOT_REVERSABLE , 0 },
      { X86::IMUL16rri,       X86::IMUL16rmi, 0 },
      { X86::IMUL16rri8,      X86::IMUL16rmi8, 0 },
      { X86::IMUL32rri,       X86::IMUL32rmi, 0 },
      { X86::IMUL32rri8,      X86::IMUL32rmi8, 0 },
      { X86::IMUL64rri32,     X86::IMUL64rmi32, 0 },
      { X86::IMUL64rri8,      X86::IMUL64rmi8, 0 },
-    { X86::Int_CMPSDrr,     X86::Int_CMPSDrm, 0 },
-    { X86::Int_CMPSSrr,     X86::Int_CMPSSrm, 0 },
      { X86::Int_COMISDrr,    X86::Int_COMISDrm, 0 },
      { X86::Int_COMISSrr,    X86::Int_COMISSrm, 0 },
      { X86::Int_CVTDQ2PDrr,  X86::Int_CVTDQ2PDrm, 16 },
@@ -380,7 +399,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
      { X86::Int_UCOMISSrr,   X86::Int_UCOMISSrm, 0 },
      { X86::MOV16rr,         X86::MOV16rm, 0 },
      { X86::MOV32rr,         X86::MOV32rm, 0 },
-    { X86::MOV32rr_TC,      X86::MOV32rm_TC, 0 },
      { X86::MOV64rr,         X86::MOV64rm, 0 },
      { X86::MOV64toPQIrr,    X86::MOVQI2PQIrm, 0 },
      { X86::MOV64toSDrr,     X86::MOV64toSDrm, 0 },
@@ -439,25 +457,31 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
  
    for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
      unsigned RegOp = OpTbl1[i][0];
-    unsigned MemOp = OpTbl1[i][1];
+    unsigned MemOp = OpTbl1[i][1] & ~TB_FLAGS;
      unsigned Align = OpTbl1[i][2];
-    if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp,
-                                           std::make_pair(MemOp,Align))).second)
-      assert(false && "Duplicated entries?");
+    assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries");
+    RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align);
+
+    // If this is not a reversible operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl1[i][1] & TB_NOT_REVERSABLE)
+      continue;
+
      // Index 1, folded load
      unsigned AuxInfo = 1 | (1 << 4);
-    if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
-      if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
-                                     std::make_pair(RegOp, AuxInfo))).second)
-        AmbEntries.push_back(MemOp);
+    assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
    }
  
    static const unsigned OpTbl2[][3] = {
      { X86::ADC32rr,         X86::ADC32rm, 0 },
      { X86::ADC64rr,         X86::ADC64rm, 0 },
      { X86::ADD16rr,         X86::ADD16rm, 0 },
+    { X86::ADD16rr_DB,      X86::ADD16rm | TB_NOT_REVERSABLE, 0 },
      { X86::ADD32rr,         X86::ADD32rm, 0 },
+    { X86::ADD32rr_DB,      X86::ADD32rm | TB_NOT_REVERSABLE, 0 },
      { X86::ADD64rr,         X86::ADD64rm, 0 },
+    { X86::ADD64rr_DB,      X86::ADD64rm | TB_NOT_REVERSABLE, 0 },
      { X86::ADD8rr,          X86::ADD8rm, 0 },
      { X86::ADDPDrr,         X86::ADDPDrm, 16 },
      { X86::ADDPSrr,         X86::ADDPSrm, 16 },
@@ -544,6 +568,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
      { X86::IMUL16rr,        X86::IMUL16rm, 0 },
      { X86::IMUL32rr,        X86::IMUL32rm, 0 },
      { X86::IMUL64rr,        X86::IMUL64rm, 0 },
+    { X86::Int_CMPSDrr,     X86::Int_CMPSDrm, 0 },
+    { X86::Int_CMPSSrr,     X86::Int_CMPSSrm, 0 },
      { X86::MAXPDrr,         X86::MAXPDrm, 16 },
      { X86::MAXPDrr_Int,     X86::MAXPDrm_Int, 16 },
      { X86::MAXPSrr,         X86::MAXPSrm, 16 },
@@ -652,20 +678,23 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
  
    for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
      unsigned RegOp = OpTbl2[i][0];
-    unsigned MemOp = OpTbl2[i][1];
+    unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS;
      unsigned Align = OpTbl2[i][2];
-    if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp,
-                                           std::make_pair(MemOp,Align))).second)
-      assert(false && "Duplicated entries?");
+
+    assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!");
+    RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align);
+
+    // If this is not a reversible operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
+      continue;
+
      // Index 2, folded load
      unsigned AuxInfo = 2 | (1 << 4);
-    if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
-                                   std::make_pair(RegOp, AuxInfo))).second)
-      AmbEntries.push_back(MemOp);
+    assert(!MemOp2RegOpTable.count(MemOp) &&
+           "Duplicated entries in unfolding maps?");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
    }
-
-  // Remove ambiguous entries.
-  assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?");
  }
  
  bool
@@ -745,9 +774,7 @@ static bool isFrameLoadOpcode(int Opcode) {
    case X86::MOV8rm:
    case X86::MOV16rm:
    case X86::MOV32rm:
-  case X86::MOV32rm_TC:
    case X86::MOV64rm:
-  case X86::MOV64rm_TC:
    case X86::LD_Fp64m:
    case X86::MOVSSrm:
    case X86::MOVSDrm:
@@ -768,9 +795,7 @@ static bool isFrameStoreOpcode(int Opcode) {
    case X86::MOV8mr:
    case X86::MOV16mr:
    case X86::MOV32mr:
-  case X86::MOV32mr_TC:
    case X86::MOV64mr:
-  case X86::MOV64mr_TC:
    case X86::ST_FpP64m:
    case X86::MOVSSmr:
    case X86::MOVSDmr:
@@ -785,7 +810,7 @@ static bool isFrameStoreOpcode(int Opcode) {
    return false;
  }
  
-unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 
+unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
                                             int &FrameIndex) const {
    if (isFrameLoadOpcode(MI->getOpcode()))
      if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
@@ -793,7 +818,7 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
    return 0;
  }
  
-unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, 
+unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
                                                   int &FrameIndex) const {
    if (isFrameLoadOpcode(MI->getOpcode())) {
      unsigned Reg;
@@ -893,7 +918,6 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
      case X86::MOVSDrm:
      case X86::MOVAPSrm:
      case X86::MOVUPSrm:
-    case X86::MOVUPSrm_Int:
      case X86::MOVAPDrm:
      case X86::MOVDQArm:
      case X86::MMX_MOVD64rm:
@@ -923,10 +947,10 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
            isPICBase = true;
          }
          return isPICBase;
-      } 
+      }
        return false;
      }
- 
+
       case X86::LEA32r:
       case X86::LEA64r: {
         if (MI->getOperand(2).isImm() &&
@@ -1099,11 +1123,11 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
    unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
      ? X86::LEA64_32r : X86::LEA32r;
    MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
-  unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+  unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
    unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
-            
+
    // Build and insert into an implicit UNDEF value. This is OK because
-  // well be shifting and then extracting the lower 16-bits. 
+  // well be shifting and then extracting the lower 16-bits.
    // This has the potential to cause partial register stall. e.g.
    //   movw    (%rbp,%rcx,2), %dx
    //   leal    -65(%rdx), %esi
@@ -1137,9 +1161,12 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
      break;
    case X86::ADD16ri:
    case X86::ADD16ri8:
-    addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());    
+  case X86::ADD16ri_DB:
+  case X86::ADD16ri8_DB:
+    addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
      break;
-  case X86::ADD16rr: {
+  case X86::ADD16rr:
+  case X86::ADD16rr_DB: {
      unsigned Src2 = MI->getOperand(2).getReg();
      bool isKill2 = MI->getOperand(2).isKill();
      unsigned leaInReg2 = 0;
@@ -1149,9 +1176,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
        // just a single insert_subreg.
        addRegReg(MIB, leaInReg, true, leaInReg, false);
      } else {
-      leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+      leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
        // Build and insert into an implicit UNDEF value. This is OK because
-      // well be shifting and then extracting the lower 16-bits. 
+      // well be shifting and then extracting the lower 16-bits.
        BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
        InsMI2 =
          BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
@@ -1218,7 +1245,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
    case X86::SHUFPSrri: {
      assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
      if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
-    
+
      unsigned B = MI->getOperand(1).getReg();
      unsigned C = MI->getOperand(2).getReg();
      if (B != C) return 0;
@@ -1236,6 +1263,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
      unsigned ShAmt = MI->getOperand(2).getImm();
      if (ShAmt == 0 || ShAmt >= 4) return 0;
  
+    // LEA can't handle RSP.
+    if (TargetRegisterInfo::isVirtualRegister(Src) &&
+        !MF.getRegInfo().constrainRegClass(Src, &X86::GR64_NOSPRegClass))
+      return 0;
+
      NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
        .addReg(Dest, RegState::Define | getDeadRegState(isDead))
        .addReg(0).addImm(1 << ShAmt)
@@ -1250,6 +1282,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
      unsigned ShAmt = MI->getOperand(2).getImm();
      if (ShAmt == 0 || ShAmt >= 4) return 0;
  
+    // LEA can't handle ESP.
+    if (TargetRegisterInfo::isVirtualRegister(Src) &&
+        !MF.getRegInfo().constrainRegClass(Src, &X86::GR32_NOSPRegClass))
+      return 0;
+
      unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
      NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
        .addReg(Dest, RegState::Define | getDeadRegState(isDead))
@@ -1288,6 +1325,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
        assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
        unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
          : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+
+      // LEA can't handle RSP.
+      if (TargetRegisterInfo::isVirtualRegister(Src) &&
+          !MF.getRegInfo().constrainRegClass(Src,
+                            MIOpc == X86::INC64r ? X86::GR64_NOSPRegisterClass :
+                                                   X86::GR32_NOSPRegisterClass))
+        return 0;
+
        NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
                                .addReg(Dest, RegState::Define |
                                        getDeadRegState(isDead)),
@@ -1310,6 +1355,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
        assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
        unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
          : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+      // LEA can't handle RSP.
+      if (TargetRegisterInfo::isVirtualRegister(Src) &&
+          !MF.getRegInfo().constrainRegClass(Src,
+                            MIOpc == X86::DEC64r ? X86::GR64_NOSPRegisterClass :
+                                                   X86::GR32_NOSPRegisterClass))
+        return 0;
+
        NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
                                .addReg(Dest, RegState::Define |
                                        getDeadRegState(isDead)),
@@ -1327,12 +1379,29 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
                             Src, isKill, -1);
        break;
      case X86::ADD64rr:
-    case X86::ADD32rr: {
+    case X86::ADD64rr_DB:
+    case X86::ADD32rr:
+    case X86::ADD32rr_DB: {
        assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
-      unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r
-        : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+      unsigned Opc;
+      TargetRegisterClass *RC;
+      if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) {
+        Opc = X86::LEA64r;
+        RC = X86::GR64_NOSPRegisterClass;
+      } else {
+        Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+        RC = X86::GR32_NOSPRegisterClass;
+      }
+
+
        unsigned Src2 = MI->getOperand(2).getReg();
        bool isKill2 = MI->getOperand(2).isKill();
+
+      // LEA can't handle RSP.
+      if (TargetRegisterInfo::isVirtualRegister(Src2) &&
+          !MF.getRegInfo().constrainRegClass(Src2, RC))
+        return 0;
+
        NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
                          .addReg(Dest, RegState::Define |
                                  getDeadRegState(isDead)),
@@ -1341,7 +1410,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
          LV->replaceKillInstruction(Src2, MI, NewMI);
        break;
      }
-    case X86::ADD16rr: {
+    case X86::ADD16rr:
+    case X86::ADD16rr_DB: {
        if (DisableLEA16)
          return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
        assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
@@ -1357,6 +1427,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
      }
      case X86::ADD64ri32:
      case X86::ADD64ri8:
+    case X86::ADD64ri32_DB:
+    case X86::ADD64ri8_DB:
        assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
        NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
                                .addReg(Dest, RegState::Define |
@@ -1364,7 +1436,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
                                Src, isKill, MI->getOperand(2).getImm());
        break;
      case X86::ADD32ri:
-    case X86::ADD32ri8: {
+    case X86::ADD32ri8:
+    case X86::ADD32ri_DB:
+    case X86::ADD32ri8_DB: {
        assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
        unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
        NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
@@ -1375,6 +1449,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
      }
      case X86::ADD16ri:
      case X86::ADD16ri8:
+    case X86::ADD16ri_DB:
+    case X86::ADD16ri8_DB:
        if (DisableLEA16)
          return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
        assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
@@ -1396,7 +1472,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
        LV->replaceKillInstruction(Dest, MI, NewMI);
    }
  
-  MFI->insert(MBBI, NewMI);          // Insert the new inst    
+  MFI->insert(MBBI, NewMI);          // Insert the new inst
    return NewMI;
  }
  
@@ -1615,18 +1691,18 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
  }
  
  bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isTerminator()) return false;
-  
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isTerminator()) return false;
+
    // Conditional branch is a special case.
-  if (TID.isBranch() && !TID.isBarrier())
+  if (MCID.isBranch() && !MCID.isBarrier())
      return true;
-  if (!TID.isPredicable())
+  if (!MCID.isPredicable())
      return true;
    return !isPredicated(MI);
  }
  
-bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 
+bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
                                   MachineBasicBlock *&TBB,
                                   MachineBasicBlock *&FBB,
                                   SmallVectorImpl<MachineOperand> &Cond,
@@ -1715,7 +1791,6 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
            .addMBB(UnCondBrIter->getOperand(0).getMBB());
          BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4))
            .addMBB(TargetBB);
-        MBB.addSuccessor(TargetBB);
  
          OldInst->eraseFromParent();
          UnCondBrIter->eraseFromParent();
@@ -1787,7 +1862,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
      I = MBB.end();
      ++Count;
    }
-  
+
    return Count;
  }
  
@@ -1941,70 +2016,48 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
                                        bool isStackAligned,
                                        const TargetMachine &TM,
                                        bool load) {
-  switch (RC->getID()) {
+  switch (RC->getSize()) {
    default:
-    llvm_unreachable("Unknown regclass");
-  case X86::GR64RegClassID:
-  case X86::GR64_NOSPRegClassID:
-    return load ? X86::MOV64rm : X86::MOV64mr;
-  case X86::GR32RegClassID:
-  case X86::GR32_NOSPRegClassID:
-  case X86::GR32_ADRegClassID:
-    return load ? X86::MOV32rm : X86::MOV32mr;
-  case X86::GR16RegClassID:
-    return load ? X86::MOV16rm : X86::MOV16mr;
-  case X86::GR8RegClassID:
-    // Copying to or from a physical H register on x86-64 requires a NOREX
-    // move.  Otherwise use a normal move.
-    if (isHReg(Reg) &&
-        TM.getSubtarget<X86Subtarget>().is64Bit())
-      return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
-    else
-      return load ? X86::MOV8rm : X86::MOV8mr;
-  case X86::GR64_ABCDRegClassID:
-    return load ? X86::MOV64rm : X86::MOV64mr;
-  case X86::GR32_ABCDRegClassID:
-    return load ? X86::MOV32rm : X86::MOV32mr;
-  case X86::GR16_ABCDRegClassID:
-    return load ? X86::MOV16rm : X86::MOV16mr;
-  case X86::GR8_ABCD_LRegClassID:
-    return load ? X86::MOV8rm :X86::MOV8mr;
-  case X86::GR8_ABCD_HRegClassID:
+    llvm_unreachable("Unknown spill size");
+  case 1:
+    assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass");
      if (TM.getSubtarget<X86Subtarget>().is64Bit())
-      return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
-    else
-      return load ? X86::MOV8rm : X86::MOV8mr;
-  case X86::GR64_NOREXRegClassID:
-  case X86::GR64_NOREX_NOSPRegClassID:
-    return load ? X86::MOV64rm : X86::MOV64mr;
-  case X86::GR32_NOREXRegClassID:
-    return load ? X86::MOV32rm : X86::MOV32mr;
-  case X86::GR16_NOREXRegClassID:
-    return load ? X86::MOV16rm : X86::MOV16mr;
-  case X86::GR8_NOREXRegClassID:
+      // Copying to or from a physical H register on x86-64 requires a NOREX
+      // move.  Otherwise use a normal move.
+      if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
+        return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
      return load ? X86::MOV8rm : X86::MOV8mr;
-  case X86::GR64_TCRegClassID:
-    return load ? X86::MOV64rm_TC : X86::MOV64mr_TC;
-  case X86::GR32_TCRegClassID:
-    return load ? X86::MOV32rm_TC : X86::MOV32mr_TC;
-  case X86::RFP80RegClassID:
+  case 2:
+    assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
+    return load ? X86::MOV16rm : X86::MOV16mr;
+  case 4:
+    if (X86::GR32RegClass.hasSubClassEq(RC))
+      return load ? X86::MOV32rm : X86::MOV32mr;
+    if (X86::FR32RegClass.hasSubClassEq(RC))
+      return load ? X86::MOVSSrm : X86::MOVSSmr;
+    if (X86::RFP32RegClass.hasSubClassEq(RC))
+      return load ? X86::LD_Fp32m : X86::ST_Fp32m;
+    llvm_unreachable("Unknown 4-byte regclass");
+  case 8:
+    if (X86::GR64RegClass.hasSubClassEq(RC))
+      return load ? X86::MOV64rm : X86::MOV64mr;
+    if (X86::FR64RegClass.hasSubClassEq(RC))
+      return load ? X86::MOVSDrm : X86::MOVSDmr;
+    if (X86::VR64RegClass.hasSubClassEq(RC))
+      return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
+    if (X86::RFP64RegClass.hasSubClassEq(RC))
+      return load ? X86::LD_Fp64m : X86::ST_Fp64m;
+    llvm_unreachable("Unknown 8-byte regclass");
+  case 10:
+    assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
      return load ? X86::LD_Fp80m : X86::ST_FpP80m;
-  case X86::RFP64RegClassID:
-    return load ? X86::LD_Fp64m : X86::ST_Fp64m;
-  case X86::RFP32RegClassID:
-    return load ? X86::LD_Fp32m : X86::ST_Fp32m;
-  case X86::FR32RegClassID:
-    return load ? X86::MOVSSrm : X86::MOVSSmr;
-  case X86::FR64RegClassID:
-    return load ? X86::MOVSDrm : X86::MOVSDmr;
-  case X86::VR128RegClassID:
+  case 16:
+    assert(X86::VR128RegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass");
      // If stack is realigned we can use aligned stores.
      if (isStackAligned)
        return load ? X86::MOVAPSrm : X86::MOVAPSmr;
      else
        return load ? X86::MOVUPSrm : X86::MOVUPSmr;
-  case X86::VR64RegClassID:
-    return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
    }
  }
  
@@ -2031,7 +2084,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
    const MachineFunction &MF = *MBB.getParent();
    assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
           "Stack slot too small for store");
-  bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
+  bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) ||
+    RI.canRealignStack(MF);
    unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
    DebugLoc DL = MBB.findDebugLoc(MI);
    addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
@@ -2063,7 +2117,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                          const TargetRegisterClass *RC,
                                          const TargetRegisterInfo *TRI) const {
    const MachineFunction &MF = *MBB.getParent();
-  bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
+  bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) ||
+    RI.canRealignStack(MF);
    unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
    DebugLoc DL = MBB.findDebugLoc(MI);
    addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
@@ -2085,76 +2140,6 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
    NewMIs.push_back(MIB);
  }
  
-bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                             MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                          const TargetRegisterInfo *TRI) const {
-  if (CSI.empty())
-    return false;
-
-  DebugLoc DL = MBB.findDebugLoc(MI);
-
-  bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
-  bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
-  unsigned SlotSize = is64Bit ? 8 : 4;
-
-  MachineFunction &MF = *MBB.getParent();
-  unsigned FPReg = RI.getFrameRegister(MF);
-  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  unsigned CalleeFrameSize = 0;
-  
-  unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r;
-  for (unsigned i = CSI.size(); i != 0; --i) {
-    unsigned Reg = CSI[i-1].getReg();
-    // Add the callee-saved register as live-in. It's killed at the spill.
-    MBB.addLiveIn(Reg);
-    if (Reg == FPReg)
-      // X86RegisterInfo::emitPrologue will handle spilling of frame register.
-      continue;
-    if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
-      CalleeFrameSize += SlotSize;
-      BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill);
-    } else {
-      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-      storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
-                          RC, &RI);
-    }
-  }
-
-  X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
-  return true;
-}
-
-bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                               MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                          const TargetRegisterInfo *TRI) const {
-  if (CSI.empty())
-    return false;
-
-  DebugLoc DL = MBB.findDebugLoc(MI);
-
-  MachineFunction &MF = *MBB.getParent();
-  unsigned FPReg = RI.getFrameRegister(MF);
-  bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
-  bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64();
-  unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r;
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    if (Reg == FPReg)
-      // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
-      continue;
-    if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
-      BuildMI(MBB, MI, DL, get(Opc), Reg);
-    } else {
-      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-      loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
-                           RC, &RI);
-    }
-  }
-  return true;
-}
-
  MachineInstr*
  X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
                                         int FrameIx, uint64_t Offset,
@@ -2181,7 +2166,7 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
      MIB.addOperand(MOs[i]);
    if (NumAddrOps < 4)  // FrameIndex only
      addOffset(MIB, 0);
-  
+
    // Loop over the rest of the ri operands, converting them over.
    unsigned NumOps = MI->getDesc().getNumOperands()-2;
    for (unsigned i = 0; i != NumOps; ++i) {
@@ -2202,7 +2187,7 @@ static MachineInstr *FuseInst(MachineFunction &MF,
    MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
                                                MI->getDebugLoc(), true);
    MachineInstrBuilder MIB(NewMI);
-  
+
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
      MachineOperand &MO = MI->getOperand(i);
      if (i == OpNo) {
@@ -2238,11 +2223,17 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                      MachineInstr *MI, unsigned i,
                                      const SmallVectorImpl<MachineOperand> &MOs,
                                      unsigned Size, unsigned Align) const {
-  const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
+  const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
    bool isTwoAddrFold = false;
    unsigned NumOps = MI->getDesc().getNumOperands();
    bool isTwoAddr = NumOps > 1 &&
-    MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;
+    MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
+
+  // FIXME: AsmPrinter doesn't know how to handle
+  // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
+  if (MI->getOpcode() == X86::ADD32ri &&
+      MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
+    return NULL;
  
    MachineInstr *NewMI = NULL;
    // Folding a memory location into the two-address part of a two-address
@@ -2251,7 +2242,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
    if (isTwoAddr && NumOps >= 2 && i < 2 &&
        MI->getOperand(0).isReg() &&
        MI->getOperand(1).isReg() &&
-      MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { 
+      MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
      OpcodeTablePtr = &RegOp2MemOpTable2Addr;
      isTwoAddrFold = true;
    } else if (i == 0) { // If operand 0
@@ -2265,19 +2256,19 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
        NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
      if (NewMI)
        return NewMI;
-    
+
      OpcodeTablePtr = &RegOp2MemOpTable0;
    } else if (i == 1) {
      OpcodeTablePtr = &RegOp2MemOpTable1;
    } else if (i == 2) {
      OpcodeTablePtr = &RegOp2MemOpTable2;
    }
-  
+
    // If table selected...
    if (OpcodeTablePtr) {
      // Find the Opcode to fuse
-    DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
-      OpcodeTablePtr->find((unsigned*)MI->getOpcode());
+    DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+      OpcodeTablePtr->find(MI->getOpcode());
      if (I != OpcodeTablePtr->end()) {
        unsigned Opcode = I->second.first;
        unsigned MinAlign = I->second.second;
@@ -2285,7 +2276,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
          return NULL;
        bool NarrowToMOV32rm = false;
        if (Size) {
-        unsigned RCSize =  MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize();
+        unsigned RCSize = getRegClass(MI->getDesc(), i, &RI)->getSize();
          if (Size < RCSize) {
            // Check if it's safe to fold the load. If the size of the object is
            // narrower than the load width, then it's not.
@@ -2320,8 +2311,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
        return NewMI;
      }
    }
-  
-  // No fusion 
+
+  // No fusion
    if (PrintFailedFusing && !MI->isCopy())
      dbgs() << "We failed to fuse operand " << i << " in " << *MI;
    return NULL;
@@ -2332,7 +2323,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                                    MachineInstr *MI,
                                             const SmallVectorImpl<unsigned> &Ops,
                                                    int FrameIndex) const {
-  // Check switch flag 
+  // Check switch flag
    if (NoFusing) return NULL;
  
    if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
@@ -2384,7 +2375,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                                    MachineInstr *MI,
                                             const SmallVectorImpl<unsigned> &Ops,
                                                    MachineInstr *LoadMI) const {
-  // Check switch flag 
+  // Check switch flag
    if (NoFusing) return NULL;
  
    if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
@@ -2424,13 +2415,15 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
        Alignment = 16;
        break;
      case X86::FsFLD0SD:
+    case X86::VFsFLD0SD:
        Alignment = 8;
        break;
      case X86::FsFLD0SS:
+    case X86::VFsFLD0SS:
        Alignment = 4;
        break;
      default:
-      llvm_unreachable("Don't know how to fold this instruction!");
+      return 0;
      }
    if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
      unsigned NewOpc = 0;
@@ -2490,9 +2483,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
      MachineConstantPool &MCP = *MF.getConstantPool();
      const Type *Ty;
      unsigned Opc = LoadMI->getOpcode();
-    if (Opc == X86::FsFLD0SS)
+    if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
        Ty = Type::getFloatTy(MF.getFunction()->getContext());
-    else if (Opc == X86::FsFLD0SD)
+    else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD)
        Ty = Type::getDoubleTy(MF.getFunction()->getContext());
      else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
        Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
@@ -2525,17 +2518,23 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
  
  bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
                                    const SmallVectorImpl<unsigned> &Ops) const {
-  // Check switch flag 
+  // Check switch flag
    if (NoFusing) return 0;
  
    if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
      switch (MI->getOpcode()) {
      default: return false;
-    case X86::TEST8rr: 
+    case X86::TEST8rr:
      case X86::TEST16rr:
      case X86::TEST32rr:
      case X86::TEST64rr:
        return true;
+    case X86::ADD32ri:
+      // FIXME: AsmPrinter doesn't know how to handle
+      // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
+      if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
+        return false;
+      break;
      }
    }
  
@@ -2546,21 +2545,20 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
    unsigned Opc = MI->getOpcode();
    unsigned NumOps = MI->getDesc().getNumOperands();
    bool isTwoAddr = NumOps > 1 &&
-    MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;
+    MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
  
    // Folding a memory location into the two-address part of a two-address
    // instruction is different than folding it other places.  It requires
    // replacing the *two* registers with the memory location.
-  const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
-  if (isTwoAddr && NumOps >= 2 && OpNum < 2) { 
+  const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+  if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
      OpcodeTablePtr = &RegOp2MemOpTable2Addr;
    } else if (OpNum == 0) { // If operand 0
      switch (Opc) {
      case X86::MOV8r0:
      case X86::MOV16r0:
      case X86::MOV32r0:
-    case X86::MOV64r0:
-      return true;
+    case X86::MOV64r0: return true;
      default: break;
      }
      OpcodeTablePtr = &RegOp2MemOpTable0;
@@ -2569,22 +2567,17 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
    } else if (OpNum == 2) {
      OpcodeTablePtr = &RegOp2MemOpTable2;
    }
-  
-  if (OpcodeTablePtr) {
-    // Find the Opcode to fuse
-    DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
-      OpcodeTablePtr->find((unsigned*)Opc);
-    if (I != OpcodeTablePtr->end())
-      return true;
-  }
+
+  if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
+    return true;
    return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
  }
  
  bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
                                  unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
                                  SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
-    MemOp2RegOpTable.find((unsigned*)MI->getOpcode());
+  DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+    MemOp2RegOpTable.find(MI->getOpcode());
    if (I == MemOp2RegOpTable.end())
      return false;
    unsigned Opc = I->second.first;
@@ -2598,9 +2591,8 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
      return false;
    UnfoldStore &= FoldedStore;
  
-  const TargetInstrDesc &TID = get(Opc);
-  const TargetOperandInfo &TOI = TID.OpInfo[Index];
-  const TargetRegisterClass *RC = TOI.getRegClass(&RI);
+  const MCInstrDesc &MCID = get(Opc);
+  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
    if (!MI->hasOneMemOperand() &&
        RC == &X86::VR128RegClass &&
        !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
@@ -2642,9 +2634,9 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
    }
  
    // Emit the data processing instruction.
-  MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true);
+  MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true);
    MachineInstrBuilder MIB(DataMI);
-  
+
    if (FoldedStore)
      MIB.addReg(Reg, RegState::Define);
    for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
@@ -2695,7 +2687,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
  
    // Emit the store instruction.
    if (UnfoldStore) {
-    const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI);
+    const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI);
      std::pair<MachineInstr::mmo_iterator,
                MachineInstr::mmo_iterator> MMOs =
        MF.extractStoreMemRefs(MI->memoperands_begin(),
@@ -2712,17 +2704,17 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
    if (!N->isMachineOpcode())
      return false;
  
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
-    MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode());
+  DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+    MemOp2RegOpTable.find(N->getMachineOpcode());
    if (I == MemOp2RegOpTable.end())
      return false;
    unsigned Opc = I->second.first;
    unsigned Index = I->second.second & 0xf;
    bool FoldedLoad = I->second.second & (1 << 4);
    bool FoldedStore = I->second.second & (1 << 5);
-  const TargetInstrDesc &TID = get(Opc);
-  const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI);
-  unsigned NumDefs = TID.NumDefs;
+  const MCInstrDesc &MCID = get(Opc);
+  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
+  unsigned NumDefs = MCID.NumDefs;
    std::vector<SDValue> AddrOps;
    std::vector<SDValue> BeforeOps;
    std::vector<SDValue> AfterOps;
@@ -2766,13 +2758,13 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
    // Emit the data processing instruction.
    std::vector<EVT> VTs;
    const TargetRegisterClass *DstRC = 0;
-  if (TID.getNumDefs() > 0) {
-    DstRC = TID.OpInfo[0].getRegClass(&RI);
+  if (MCID.getNumDefs() > 0) {
+    DstRC = getRegClass(MCID, 0, &RI);
      VTs.push_back(*DstRC->vt_begin());
    }
    for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
      EVT VT = N->getValueType(i);
-    if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs())
+    if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs())
        VTs.push_back(VT);
    }
    if (Load)
@@ -2813,8 +2805,8 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
  unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
                                        bool UnfoldLoad, bool UnfoldStore,
                                        unsigned *LoadRegIndex) const {
-  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
-    MemOp2RegOpTable.find((unsigned*)Opc);
+  DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+    MemOp2RegOpTable.find(Opc);
    if (I == MemOp2RegOpTable.end())
      return 0;
    bool FoldedLoad = I->second.second & (1 << 4);
@@ -2852,11 +2844,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
    case X86::FsMOVAPDrm:
    case X86::MOVAPSrm:
    case X86::MOVUPSrm:
-  case X86::MOVUPSrm_Int:
    case X86::MOVAPDrm:
    case X86::MOVDQArm:
    case X86::MOVDQUrm:
-  case X86::MOVDQUrm_Int:
      break;
    }
    switch (Opc2) {
@@ -2876,11 +2866,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
    case X86::FsMOVAPDrm:
    case X86::MOVAPSrm:
    case X86::MOVUPSrm:
-  case X86::MOVUPSrm_Int:
    case X86::MOVAPDrm:
    case X86::MOVDQArm:
    case X86::MOVDQUrm:
-  case X86::MOVDQUrm_Int:
      break;
    }
  
@@ -3092,6 +3080,45 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
    NopInst.setOpcode(X86::NOOP);
  }
  
+bool X86InstrInfo::isHighLatencyDef(int opc) const {
+  switch (opc) {
+  default: return false;
+  case X86::DIVSDrm:
+  case X86::DIVSDrm_Int:
+  case X86::DIVSDrr:
+  case X86::DIVSDrr_Int:
+  case X86::DIVSSrm:
+  case X86::DIVSSrm_Int:
+  case X86::DIVSSrr:
+  case X86::DIVSSrr_Int:
+  case X86::SQRTPDm:
+  case X86::SQRTPDm_Int:
+  case X86::SQRTPDr:
+  case X86::SQRTPDr_Int:
+  case X86::SQRTPSm:
+  case X86::SQRTPSm_Int:
+  case X86::SQRTPSr:
+  case X86::SQRTPSr_Int:
+  case X86::SQRTSDm:
+  case X86::SQRTSDm_Int:
+  case X86::SQRTSDr:
+  case X86::SQRTSDr_Int:
+  case X86::SQRTSSm:
+  case X86::SQRTSSm_Int:
+  case X86::SQRTSSr:
+  case X86::SQRTSSr_Int:
+    return true;
+  }
+}
+
+bool X86InstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+                      const MachineRegisterInfo *MRI,
+                      const MachineInstr *DefMI, unsigned DefIdx,
+                      const MachineInstr *UseMI, unsigned UseIdx) const {
+  return isHighLatencyDef(DefMI->getOpcode());
+}
+
  namespace {
    /// CGBR - Create Global Base Reg pass. This initializes the PIC
    /// global base register for x86-32.
@@ -3129,11 +3156,11 @@ namespace {
          PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
        else
          PC = GlobalBaseReg;
-  
+
        // Operand of MovePCtoStack is completely ignored by asm printer. It's
        // only used in JIT code emission as displacement to pc.
        BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
-  
+
        // If we're using vanilla 'GOT' PIC style, we should use relative addressing
        // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
        if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) {