Make DenseSet's erase pass on the return value rather than swallowing it.

[oota-llvm.git] / lib / Target / CellSPU / SPUISelLowering.cpp
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp

index 0596a59015231d5fd63ac9fa5ae6d52e5930d674..fe0f0196465bed2bdbc456c94093cb25f133ef26 100644 (file)
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -1,5 +1,5 @@
-//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
  //
+//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
  //                     The LLVM Compiler Infrastructure
  //
  // This file is distributed under the University of Illinois Open Source
@@ -15,8 +15,9 @@
  #include "SPUISelLowering.h"
  #include "SPUTargetMachine.h"
  #include "SPUFrameInfo.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
  #include "llvm/CallingConv.h"
  #include "llvm/CodeGen/CallingConvLower.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -24,13 +25,13 @@
  #include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/VectorExtras.h"
  #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
  #include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetOptions.h"
-
+#include "llvm/Support/raw_ostream.h"
  #include <map>
  
  using namespace llvm;
@@ -39,10 +40,10 @@ using namespace llvm;
  namespace {
    std::map<unsigned, const char *> node_names;
  
-  //! MVT mapping to useful data for Cell SPU
+  //! EVT mapping to useful data for Cell SPU
    struct valtype_map_s {
-    const MVT   valtype;
-    const int   prefslot_byte;
+    EVT   valtype;
+    int   prefslot_byte;
    };
  
    const valtype_map_s valtype_map[] = {
@@ -58,7 +59,7 @@ namespace {
  
    const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  
-  const valtype_map_s *getValueTypeMapEntry(MVT VT) {
+  const valtype_map_s *getValueTypeMapEntry(EVT VT) {
      const valtype_map_s *retval = 0;
  
      for (size_t i = 0; i < n_valtype_map; ++i) {
@@ -70,10 +71,11 @@ namespace {
  
  #ifndef NDEBUG
      if (retval == 0) {
-      cerr << "getValueTypeMapEntry returns NULL for "
-           << VT.getMVTString()
-           << "\n";
-      abort();
+      std::string msg;
+      raw_string_ostream Msg(msg);
+      Msg << "getValueTypeMapEntry returns NULL for "
+           << VT.getEVTString();
+      llvm_report_error(Msg.str());
      }
  #endif
  
@@ -98,8 +100,8 @@ namespace {
      TargetLowering::ArgListTy Args;
      TargetLowering::ArgListEntry Entry;
      for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
-      MVT ArgVT = Op.getOperand(i).getValueType();
-      const Type *ArgTy = ArgVT.getTypeForMVT();
+      EVT ArgVT = Op.getOperand(i).getValueType();
+      const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
        Entry.Node = Op.getOperand(i);
        Entry.Ty = ArgTy;
        Entry.isSExt = isSigned;
@@ -110,20 +112,22 @@ namespace {
                                             TLI.getPointerTy());
  
      // Splice the libcall in wherever FindInputOutputChains tells us to.
-    const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
+    const Type *RetTy =
+                Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
      std::pair<SDValue, SDValue> CallInfo =
              TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
-                            CallingConv::C, false, Callee, Args, DAG,
-                            Op.getNode()->getDebugLoc());
+                            0, TLI.getLibcallCallingConv(LC), false,
+                            /*isReturnValueUsed=*/true,
+                            Callee, Args, DAG, Op.getDebugLoc(),
+                            DAG.GetOrdering(InChain.getNode()));
  
      return CallInfo.first;
    }
  }
  
  SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
-  : TargetLowering(TM),
-    SPUTM(TM)
-{
+  : TargetLowering(TM, new TargetLoweringObjectFileELF()),
+    SPUTM(TM) {
    // Fold away setcc operations if possible.
    setPow2DivIsCheap();
  
@@ -151,6 +155,13 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
    setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
    setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
  
+  setTruncStoreAction(MVT::i128, MVT::i64, Expand);
+  setTruncStoreAction(MVT::i128, MVT::i32, Expand);
+  setTruncStoreAction(MVT::i128, MVT::i16, Expand);
+  setTruncStoreAction(MVT::i128, MVT::i8, Expand);
+
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
    // SPU constant load actions are custom lowered:
    setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
    setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
@@ -158,7 +169,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
    // SPU's loads and stores have to be custom lowered:
    for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
         ++sctype) {
-    MVT VT = (MVT::SimpleValueType)sctype;
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
  
      setOperationAction(ISD::LOAD,   VT, Custom);
      setOperationAction(ISD::STORE,  VT, Custom);
@@ -167,20 +178,20 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
      setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
  
      for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
-      MVT StoreVT = (MVT::SimpleValueType) stype;
+      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
        setTruncStoreAction(VT, StoreVT, Expand);
      }
    }
  
    for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
         ++sctype) {
-    MVT VT = (MVT::SimpleValueType) sctype;
+    MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
  
      setOperationAction(ISD::LOAD,   VT, Custom);
      setOperationAction(ISD::STORE,  VT, Custom);
  
      for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
-      MVT StoreVT = (MVT::SimpleValueType) stype;
+      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
        setTruncStoreAction(VT, StoreVT, Expand);
      }
    }
@@ -199,11 +210,37 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
    // SPU has no intrinsics for these particular operations:
    setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
  
-  // SPU has no SREM/UREM instructions
-  setOperationAction(ISD::SREM, MVT::i32, Expand);
-  setOperationAction(ISD::UREM, MVT::i32, Expand);
-  setOperationAction(ISD::SREM, MVT::i64, Expand);
-  setOperationAction(ISD::UREM, MVT::i64, Expand);
+  // SPU has no division/remainder instructions
+  setOperationAction(ISD::SREM,    MVT::i8,   Expand);
+  setOperationAction(ISD::UREM,    MVT::i8,   Expand);
+  setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
+  setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
+  setOperationAction(ISD::SREM,    MVT::i16,  Expand);
+  setOperationAction(ISD::UREM,    MVT::i16,  Expand);
+  setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
+  setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
+  setOperationAction(ISD::SREM,    MVT::i32,  Expand);
+  setOperationAction(ISD::UREM,    MVT::i32,  Expand);
+  setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
+  setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
+  setOperationAction(ISD::SREM,    MVT::i64,  Expand);
+  setOperationAction(ISD::UREM,    MVT::i64,  Expand);
+  setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
+  setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
+  setOperationAction(ISD::SREM,    MVT::i128, Expand);
+  setOperationAction(ISD::UREM,    MVT::i128, Expand);
+  setOperationAction(ISD::SDIV,    MVT::i128, Expand);
+  setOperationAction(ISD::UDIV,    MVT::i128, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
  
    // We don't support sin/cos/sqrt/fmod
    setOperationAction(ISD::FSIN , MVT::f64, Expand);
@@ -249,6 +286,25 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
    setOperationAction(ISD::MUL,  MVT::i32,    Legal);
    setOperationAction(ISD::MUL,  MVT::i64,    Legal);
  
+  // Expand double-width multiplication
+  // FIXME: It would probably be reasonable to support some of these operations
+  setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
+  setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
+  setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i16, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i16, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i32, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i32, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i64, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i64, Expand);
+
    // Need to custom handle (some) common i8, i64 math ops
    setOperationAction(ISD::ADD,  MVT::i8,     Custom);
    setOperationAction(ISD::ADD,  MVT::i64,    Legal);
@@ -264,11 +320,19 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
    setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
    setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
    setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
+  setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
  
+  setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
+  setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
    setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
    setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
+  setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
  
+  setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
+  setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
    setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
+  setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
+  setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
  
    // SPU has a version of select that implements (a&~c)|(b&c), just like
    // select ought to work:
@@ -286,10 +350,21 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
    // Custom lower i128 -> i64 truncates
    setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
  
+  // Custom lower i32/i64 -> i128 sign extend
+  setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
+
+  setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
    // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
    // to expand to a libcall, hence the custom lowering:
    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
  
    // FDIV on SPU requires custom lowering
    setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
@@ -312,24 +387,17 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
    // We cannot sextinreg(i1).  Expand to shifts.
    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
  
-  // Support label based line numbers.
-  setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
-  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
-
    // We want to legalize GlobalAddress and ConstantPool nodes into the
    // appropriate instructions to materialize the address.
    for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
         ++sctype) {
-    MVT VT = (MVT::SimpleValueType)sctype;
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
  
      setOperationAction(ISD::GlobalAddress,  VT, Custom);
      setOperationAction(ISD::ConstantPool,   VT, Custom);
      setOperationAction(ISD::JumpTable,      VT, Custom);
    }
  
-  // RET must be custom lowered, to meet ABI requirements
-  setOperationAction(ISD::RET,           MVT::Other, Custom);
-
    // VASTART needs to be custom lowered to use the VarArgsFrameIndex
    setOperationAction(ISD::VASTART           , MVT::Other, Custom);
  
@@ -366,7 +434,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
  
    for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
-    MVT VT = (MVT::SimpleValueType)i;
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
  
      // add/sub are legal for all supported vector VT's.
      setOperationAction(ISD::ADD,     VT, Legal);
@@ -442,9 +510,6 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
      node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
      node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
      node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
-    node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
-    node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
-    node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
      node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
      node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
      node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
@@ -462,13 +527,20 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
    return ((i != node_names.end()) ? i->second : 0);
  }
  
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
+  return 3;
+}
+
  //===----------------------------------------------------------------------===//
  // Return the Cell SPU's SETCC result type
  //===----------------------------------------------------------------------===//
  
-MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
+MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
    // i16 and i32 are valid SETCC result types
-  return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
+  return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
+    VT.getSimpleVT().SimpleTy :
+    MVT::i32);
  }
  
  //===----------------------------------------------------------------------===//
@@ -501,9 +573,9 @@ static SDValue
  LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
    LoadSDNode *LN = cast<LoadSDNode>(Op);
    SDValue the_chain = LN->getChain();
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  MVT InVT = LN->getMemoryVT();
-  MVT OutVT = Op.getValueType();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT InVT = LN->getMemoryVT();
+  EVT OutVT = Op.getValueType();
    ISD::LoadExtType ExtType = LN->getExtensionType();
    unsigned alignment = LN->getAlignment();
    const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
@@ -534,7 +606,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
          // Simplify the base pointer for this case:
          basePtr = basePtr.getOperand(0);
          if ((offset & ~0xf) > 0) {
-          basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
                                  basePtr,
                                  DAG.getConstant((offset & ~0xf), PtrVT));
          }
@@ -573,16 +645,16 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
            // Convert the (add <ptr>, <const>) to an indirect address contained
            // in a register. Note that this is done because we need to avoid
            // creating a 0(reg) d-form address due to the SPU's block loads.
-          basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
            the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
            basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
          } else {
            // Convert the (add <arg1>, <arg2>) to an indirect address, which
            // will likely be lowered as a reg(reg) x-form address.
-          basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
          }
        } else {
-        basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
                                basePtr,
                                DAG.getConstant(0, PtrVT));
        }
@@ -608,7 +680,8 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
  
      // Convert the loaded v16i8 vector to the appropriate vector type
      // specified by the operand:
-    MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
+    EVT vecVT = EVT::getVectorVT(*DAG.getContext(), 
+                                 InVT, (128 / InVT.getSizeInBits()));
      result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
                           DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
  
@@ -641,11 +714,15 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
    case ISD::POST_INC:
    case ISD::POST_DEC:
    case ISD::LAST_INDEXED_MODE:
-    cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
+    {
+      std::string msg;
+      raw_string_ostream Msg(msg);
+      Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
              "UNINDEXED\n";
-    cerr << (unsigned) LN->getAddressingMode() << "\n";
-    abort();
-    /*NOTREACHED*/
+      Msg << (unsigned) LN->getAddressingMode();
+      llvm_report_error(Msg.str());
+      /*NOTREACHED*/
+    }
    }
  
    return SDValue();
@@ -661,17 +738,17 @@ static SDValue
  LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
    StoreSDNode *SN = cast<StoreSDNode>(Op);
    SDValue Value = SN->getValue();
-  MVT VT = Value.getValueType();
-  MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT VT = Value.getValueType();
+  EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
    DebugLoc dl = Op.getDebugLoc();
    unsigned alignment = SN->getAlignment();
  
    switch (SN->getAddressingMode()) {
    case ISD::UNINDEXED: {
      // The vector type we really want to load from the 16-byte chunk.
-    MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
-        stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
+    EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+                                 VT, (128 / VT.getSizeInBits()));
  
      SDValue alignLoadVec;
      SDValue basePtr = SN->getBasePtr();
@@ -690,18 +767,18 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
  
          // Simplify the base pointer for this case:
          basePtr = basePtr.getOperand(0);
-        insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+        insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
                                      basePtr,
                                      DAG.getConstant((offset & 0xf), PtrVT));
  
          if ((offset & ~0xf) > 0) {
-          basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
                                  basePtr,
                                  DAG.getConstant((offset & ~0xf), PtrVT));
          }
        } else {
          // Otherwise, assume it's at byte 0 of basePtr
-        insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+        insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
                                      basePtr,
                                      DAG.getConstant(0, PtrVT));
        }
@@ -720,16 +797,16 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
            // Convert the (add <ptr>, <const>) to an indirect address contained
            // in a register. Note that this is done because we need to avoid
            // creating a 0(reg) d-form address due to the SPU's block loads.
-          basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
            the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
            basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
          } else {
            // Convert the (add <arg1>, <arg2>) to an indirect address, which
            // will likely be lowered as a reg(reg) x-form address.
-          basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
          }
        } else {
-        basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
                                basePtr,
                                DAG.getConstant(0, PtrVT));
        }
@@ -766,9 +843,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
      // to the stack pointer, which is always aligned.
  #if !defined(NDEBUG)
        if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-        cerr << "CellSPU LowerSTORE: basePtr = ";
+        errs() << "CellSPU LowerSTORE: basePtr = ";
          basePtr.getNode()->dump(&DAG);
-        cerr << "\n";
+        errs() << "\n";
        }
  #endif
  
@@ -779,7 +856,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
  
      result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
                           vectorizeOp, alignLoadVec,
-                         DAG.getNode(ISD::BIT_CONVERT, dl, 
+                         DAG.getNode(ISD::BIT_CONVERT, dl,
                                       MVT::v4i32, insertEltOp));
  
      result = DAG.getStore(the_chain, dl, result, basePtr,
@@ -791,9 +868,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
        const SDValue &currentRoot = DAG.getRoot();
  
        DAG.setRoot(result);
-      cerr << "------- CellSPU:LowerStore result:\n";
+      errs() << "------- CellSPU:LowerStore result:\n";
        DAG.dump();
-      cerr << "-------\n";
+      errs() << "-------\n";
        DAG.setRoot(currentRoot);
      }
  #endif
@@ -806,40 +883,45 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
    case ISD::POST_INC:
    case ISD::POST_DEC:
    case ISD::LAST_INDEXED_MODE:
-    cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
+    {
+      std::string msg;
+      raw_string_ostream Msg(msg);
+      Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
              "UNINDEXED\n";
-    cerr << (unsigned) SN->getAddressingMode() << "\n";
-    abort();
-    /*NOTREACHED*/
+      Msg << (unsigned) SN->getAddressingMode();
+      llvm_report_error(Msg.str());
+      /*NOTREACHED*/
+    }
    }
  
    return SDValue();
  }
  
  //! Generate the address of a constant pool entry.
-SDValue
+static SDValue
  LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  MVT PtrVT = Op.getValueType();
+  EVT PtrVT = Op.getValueType();
    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
    Constant *C = CP->getConstVal();
    SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
    SDValue Zero = DAG.getConstant(0, PtrVT);
    const TargetMachine &TM = DAG.getTarget();
+  // FIXME there is no actual debug info here
+  DebugLoc dl = Op.getDebugLoc();
  
    if (TM.getRelocationModel() == Reloc::Static) {
      if (!ST->usingLargeMem()) {
        // Just return the SDValue with the constant pool address in it.
-      return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
+      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
      } else {
-      SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
-      SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
-      return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
+      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
+      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
+      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
      }
    }
  
-  assert(0 &&
-         "LowerConstantPool: Relocation model other than static"
-         " not supported.");
+  llvm_unreachable("LowerConstantPool: Relocation model other than static"
+                   " not supported.");
    return SDValue();
  }
  
@@ -851,48 +933,51 @@ SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM
  
  static SDValue
  LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  MVT PtrVT = Op.getValueType();
+  EVT PtrVT = Op.getValueType();
    JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
    SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
    SDValue Zero = DAG.getConstant(0, PtrVT);
    const TargetMachine &TM = DAG.getTarget();
+  // FIXME there is no actual debug info here
+  DebugLoc dl = Op.getDebugLoc();
  
    if (TM.getRelocationModel() == Reloc::Static) {
      if (!ST->usingLargeMem()) {
-      return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
+      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
      } else {
-      SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
-      SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
-      return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
+      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
+      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
+      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
      }
    }
  
-  assert(0 &&
-         "LowerJumpTable: Relocation model other than static not supported.");
+  llvm_unreachable("LowerJumpTable: Relocation model other than static"
+                   " not supported.");
    return SDValue();
  }
  
  static SDValue
  LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  MVT PtrVT = Op.getValueType();
+  EVT PtrVT = Op.getValueType();
    GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
    GlobalValue *GV = GSDN->getGlobal();
    SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
    const TargetMachine &TM = DAG.getTarget();
    SDValue Zero = DAG.getConstant(0, PtrVT);
+  // FIXME there is no actual debug info here
+  DebugLoc dl = Op.getDebugLoc();
  
    if (TM.getRelocationModel() == Reloc::Static) {
      if (!ST->usingLargeMem()) {
-      return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
+      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
      } else {
-      SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
-      SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
-      return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
+      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
+      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
+      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
      }
    } else {
-    cerr << "LowerGlobalAddress: Relocation model other than static not "
-         << "supported.\n";
-    abort();
+    llvm_report_error("LowerGlobalAddress: Relocation model other than static"
+                      "not supported.");
      /*NOTREACHED*/
    }
  
@@ -902,7 +987,9 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
  //! Custom lower double precision floating point constants
  static SDValue
  LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
+  // FIXME there is no actual debug info here
+  DebugLoc dl = Op.getDebugLoc();
  
    if (VT == MVT::f64) {
      ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
@@ -912,24 +999,25 @@ LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
  
      uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
      SDValue T = DAG.getConstant(dbits, MVT::i64);
-    SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
-    return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
-                       DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
+    SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
+    return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+                       DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
    }
  
    return SDValue();
  }
  
-static SDValue
-LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
-{
+SDValue
+SPUTargetLowering::LowerFormalArguments(SDValue Chain,
+                                        CallingConv::ID CallConv, bool isVarArg,
+                                        const SmallVectorImpl<ISD::InputArg>
+                                          &Ins,
+                                        DebugLoc dl, SelectionDAG &DAG,
+                                        SmallVectorImpl<SDValue> &InVals) {
+
    MachineFunction &MF = DAG.getMachineFunction();
    MachineFrameInfo *MFI = MF.getFrameInfo();
    MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  SmallVector<SDValue, 48> ArgValues;
-  SDValue Root = Op.getOperand(0);
-  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
-  DebugLoc dl = Op.getDebugLoc();
  
    const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
    const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
@@ -938,24 +1026,24 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
    unsigned ArgRegIdx = 0;
    unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
  
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
  
    // Add DAG nodes to load the arguments or copy them out of registers.
-  for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
-       ArgNo != e; ++ArgNo) {
-    MVT ObjectVT = Op.getValue(ArgNo).getValueType();
+  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+    EVT ObjectVT = Ins[ArgNo].VT;
      unsigned ObjSize = ObjectVT.getSizeInBits()/8;
      SDValue ArgVal;
  
      if (ArgRegIdx < NumArgRegs) {
        const TargetRegisterClass *ArgRegClass;
  
-      switch (ObjectVT.getSimpleVT()) {
+      switch (ObjectVT.getSimpleVT().SimpleTy) {
        default: {
-        cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
-             << ObjectVT.getMVTString()
-             << "\n";
-        abort();
+        std::string msg;
+        raw_string_ostream Msg(msg);
+        Msg << "LowerFormalArguments Unhandled argument type: "
+             << ObjectVT.getEVTString();
+        llvm_report_error(Msg.str());
        }
        case MVT::i8:
          ArgRegClass = &SPU::R8CRegClass;
@@ -990,21 +1078,21 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
  
        unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
        RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
-      ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
+      ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
        ++ArgRegIdx;
      } else {
        // We need to load the argument to a virtual register if we determined
        // above that we ran out of physical registers of the appropriate type
        // or we're forced to do vararg
-      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
+      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
        SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
        ArgOffset += StackSlotSize;
      }
  
-    ArgValues.push_back(ArgVal);
+    InVals.push_back(ArgVal);
      // Update the chain
-    Root = ArgVal.getOperand(0);
+    Chain = ArgVal.getOperand(0);
    }
  
    // vararg handling:
@@ -1016,26 +1104,23 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
      // Create the frame slot
  
      for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
-      VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
+      VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
+                                                 true, false);
        SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
        SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
-      SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0);
-      Root = Store.getOperand(0);
+      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
+      Chain = Store.getOperand(0);
        MemOps.push_back(Store);
  
        // Increment address by stack slot size for the next stored argument
        ArgOffset += StackSlotSize;
      }
      if (!MemOps.empty())
-      Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 
-                         &MemOps[0], MemOps.size());
+      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                          &MemOps[0], MemOps.size());
    }
  
-  ArgValues.push_back(Root);
-
-  // Return the new list of results.
-  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
-                     &ArgValues[0], ArgValues.size());
+  return Chain;
  }
  
  /// isLSAAddress - Return the immediate to use if the specified
@@ -1052,24 +1137,25 @@ static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
    return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
  }
  
-static SDValue
-LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
-  SDValue Chain = TheCall->getChain();
-  SDValue Callee    = TheCall->getCallee();
-  unsigned NumOps     = TheCall->getNumArgs();
+SDValue
+SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                             CallingConv::ID CallConv, bool isVarArg,
+                             bool &isTailCall,
+                             const SmallVectorImpl<ISD::OutputArg> &Outs,
+                             const SmallVectorImpl<ISD::InputArg> &Ins,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             SmallVectorImpl<SDValue> &InVals) {
+  // CellSPU target does not yet support tail call optimization.
+  isTailCall = false;
+
+  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
+  unsigned NumOps     = Outs.size();
    unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
    const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
    const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
-  DebugLoc dl = TheCall->getDebugLoc();
  
    // Handy pointer type
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-
-  // Accumulate how many bytes are to be pushed on the stack, including the
-  // linkage area, and parameter passing area.  According to the SPU ABI,
-  // we minimally need space for [LR] and [SP]
-  unsigned NumStackBytes = SPUFrameInfo::minStackSize();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
  
    // Set up a copy of the stack pointer for use loading and storing any
    // arguments that may not fit in the registers available for argument
@@ -1087,15 +1173,15 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
    SmallVector<SDValue, 8> MemOpChains;
  
    for (unsigned i = 0; i != NumOps; ++i) {
-    SDValue Arg = TheCall->getArg(i);
+    SDValue Arg = Outs[i].Val;
  
      // PtrOff will be used to store the current argument to the stack if a
      // register cannot be found for it.
      SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
  
-    switch (Arg.getValueType().getSimpleVT()) {
-    default: assert(0 && "Unexpected ValueType for argument!");
+    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unexpected ValueType for argument!");
      case MVT::i8:
      case MVT::i16:
      case MVT::i32:
@@ -1133,8 +1219,12 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
      }
    }
  
-  // Update number of stack bytes actually used, insert a call sequence start
-  NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
+  // Accumulate how many bytes are to be pushed on the stack, including the
+  // linkage area, and parameter passing area.  According to the SPU ABI,
+  // we minimally need space for [LR] and [SP].
+  unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
+
+  // Insert a call sequence start
    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
                                                              true));
  
@@ -1148,7 +1238,7 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
    // and flag operands which copy the outgoing args into the appropriate regs.
    SDValue InFlag;
    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
                               RegsToPass[i].second, InFlag);
      InFlag = Chain.getValue(1);
    }
@@ -1161,7 +1251,7 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
    // node so that legalize doesn't hack it.
    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
      GlobalValue *GV = G->getGlobal();
-    MVT CalleeVT = Callee.getValueType();
+    EVT CalleeVT = Callee.getValueType();
      SDValue Zero = DAG.getConstant(0, PtrVT);
      SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
  
@@ -1175,25 +1265,25 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
        // This may be an unsafe assumption for JIT and really large compilation
        // units.
        if (GV->isDeclaration()) {
-        Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
+        Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
        } else {
-        Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
+        Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
        }
      } else {
        // "Large memory" mode: Turn all calls into indirect calls with a X-form
        // address pairs:
-      Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
+      Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
      }
    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
-    MVT CalleeVT = Callee.getValueType();
+    EVT CalleeVT = Callee.getValueType();
      SDValue Zero = DAG.getConstant(0, PtrVT);
      SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
          Callee.getValueType());
  
      if (!ST->usingLargeMem()) {
-      Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero);
+      Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
      } else {
-      Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero);
+      Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
      }
    } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
      // If this is an absolute destination address that appears to be a legal
@@ -1219,50 +1309,46 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
  
    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
                               DAG.getIntPtrConstant(0, true), InFlag);
-  if (TheCall->getValueType(0) != MVT::Other)
+  if (!Ins.empty())
      InFlag = Chain.getValue(1);
  
-  SDValue ResultVals[3];
-  unsigned NumResults = 0;
+  // If the function returns void, just return the chain.
+  if (Ins.empty())
+    return Chain;
  
    // If the call has results, copy the values out of the ret val registers.
-  switch (TheCall->getValueType(0).getSimpleVT()) {
-  default: assert(0 && "Unexpected ret value!");
+  switch (Ins[0].VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unexpected ret value!");
    case MVT::Other: break;
    case MVT::i32:
-    if (TheCall->getValueType(1) == MVT::i32) {
-      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4, 
+    if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
+      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
                                   MVT::i32, InFlag).getValue(1);
-      ResultVals[0] = Chain.getValue(0);
+      InVals.push_back(Chain.getValue(0));
        Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
                                   Chain.getValue(2)).getValue(1);
-      ResultVals[1] = Chain.getValue(0);
-      NumResults = 2;
+      InVals.push_back(Chain.getValue(0));
      } else {
-      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, 
+      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
                                   InFlag).getValue(1);
-      ResultVals[0] = Chain.getValue(0);
-      NumResults = 1;
+      InVals.push_back(Chain.getValue(0));
      }
      break;
    case MVT::i64:
-    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64, 
+    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
                                 InFlag).getValue(1);
-    ResultVals[0] = Chain.getValue(0);
-    NumResults = 1;
+    InVals.push_back(Chain.getValue(0));
      break;
    case MVT::i128:
-    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128, 
+    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
                                 InFlag).getValue(1);
-    ResultVals[0] = Chain.getValue(0);
-    NumResults = 1;
+    InVals.push_back(Chain.getValue(0));
      break;
    case MVT::f32:
    case MVT::f64:
-    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
+    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
                                 InFlag).getValue(1);
-    ResultVals[0] = Chain.getValue(0);
-    NumResults = 1;
+    InVals.push_back(Chain.getValue(0));
      break;
    case MVT::v2f64:
    case MVT::v2i64:
@@ -1270,30 +1356,25 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
    case MVT::v4i32:
    case MVT::v8i16:
    case MVT::v16i8:
-    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
+    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
                                     InFlag).getValue(1);
-    ResultVals[0] = Chain.getValue(0);
-    NumResults = 1;
+    InVals.push_back(Chain.getValue(0));
      break;
    }
  
-  // If the function returns void, just return the chain.
-  if (NumResults == 0)
-    return Chain;
-
-  // Otherwise, merge everything together with a MERGE_VALUES node.
-  ResultVals[NumResults++] = Chain;
-  SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl);
-  return Res.getValue(Op.getResNo());
+  return Chain;
  }
  
-static SDValue
-LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
+SDValue
+SPUTargetLowering::LowerReturn(SDValue Chain,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               DebugLoc dl, SelectionDAG &DAG) {
+
    SmallVector<CCValAssign, 16> RVLocs;
-  unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
-  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
-  CCState CCInfo(CC, isVarArg, TM, RVLocs);
-  CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+  CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
  
    // If this is the first return lowered for this function, add the regs to the
    // liveout set for the function.
@@ -1302,21 +1383,21 @@ LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
    }
  
-  SDValue Chain = Op.getOperand(0);
    SDValue Flag;
  
    // Copy the result values into the output registers.
    for (unsigned i = 0; i != RVLocs.size(); ++i) {
      CCValAssign &VA = RVLocs[i];
      assert(VA.isRegLoc() && "Can only return in registers!");
-    Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                             Outs[i].Val, Flag);
      Flag = Chain.getValue(1);
    }
  
    if (Flag.getNode())
-    return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
+    return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
    else
-    return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
+    return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
  }
  
  
@@ -1343,14 +1424,14 @@ getVecImm(SDNode *N) {
      }
    }
  
-  return 0; // All UNDEF: use implicit def.; not Constant node
+  return 0;
  }
  
  /// get_vec_i18imm - Test if this vector is a vector filled with the same value
  /// and the value fits into an unsigned 18-bit constant, and if so, return the
  /// constant
  SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
-                              MVT ValueType) {
+                              EVT ValueType) {
    if (ConstantSDNode *CN = getVecImm(N)) {
      uint64_t Value = CN->getZExtValue();
      if (ValueType == MVT::i64) {
@@ -1372,7 +1453,7 @@ SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
  /// and the value fits into a signed 16-bit constant, and if so, return the
  /// constant
  SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
-                              MVT ValueType) {
+                              EVT ValueType) {
    if (ConstantSDNode *CN = getVecImm(N)) {
      int64_t Value = CN->getSExtValue();
      if (ValueType == MVT::i64) {
@@ -1395,7 +1476,7 @@ SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
  /// and the value fits into a signed 10-bit constant, and if so, return the
  /// constant
  SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
-                              MVT ValueType) {
+                              EVT ValueType) {
    if (ConstantSDNode *CN = getVecImm(N)) {
      int64_t Value = CN->getSExtValue();
      if (ValueType == MVT::i64) {
@@ -1421,7 +1502,7 @@ SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
  /// constant vectors. Thus, we test to see if the upper and lower bytes are the
  /// same value.
  SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
-                             MVT ValueType) {
+                             EVT ValueType) {
    if (ConstantSDNode *CN = getVecImm(N)) {
      int Value = (int) CN->getZExtValue();
      if (ValueType == MVT::i16
@@ -1440,7 +1521,7 @@ SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
  /// and the value fits into a signed 16-bit constant, and if so, return the
  /// constant
  SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
-                               MVT ValueType) {
+                               EVT ValueType) {
    if (ConstantSDNode *CN = getVecImm(N)) {
      uint64_t Value = CN->getZExtValue();
      if ((ValueType == MVT::i32
@@ -1470,212 +1551,107 @@ SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
    return SDValue();
  }
  
-// If this is a vector of constants or undefs, get the bits.  A bit in
-// UndefBits is set if the corresponding element of the vector is an
-// ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
-// zero.   Return true if this is not an array of constants, false if it is.
-//
-static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
-                                       uint64_t UndefBits[2]) {
-  // Start with zero'd results.
-  VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
-
-  unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
-  for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
-    SDValue OpVal = BV->getOperand(i);
-
-    unsigned PartNo = i >= e/2;     // In the upper 128 bits?
-    unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
-
-    uint64_t EltBits = 0;
-    if (OpVal.getOpcode() == ISD::UNDEF) {
-      uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
-      UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
-      continue;
-    } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
-      EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
-    } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
-      const APFloat &apf = CN->getValueAPF();
-      EltBits = (CN->getValueType(0) == MVT::f32
-                 ? FloatToBits(apf.convertToFloat())
-                 : DoubleToBits(apf.convertToDouble()));
-    } else {
-      // Nonconstant element.
-      return true;
-    }
+//! Lower a BUILD_VECTOR instruction creatively:
+static SDValue
+LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = Op.getDebugLoc();
+  BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+  assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
+  unsigned minSplatBits = EltVT.getSizeInBits();
  
-    VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
-  }
+  if (minSplatBits < 16)
+    minSplatBits = 16;
  
-  //printf("%llx %llx  %llx %llx\n",
-  //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
-  return false;
-}
+  APInt APSplatBits, APSplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
  
-/// If this is a splat (repetition) of a value across the whole vector, return
-/// the smallest size that splats it.  For example, "0x01010101010101..." is a
-/// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
-/// SplatSize = 1 byte.
-static bool isConstantSplat(const uint64_t Bits128[2],
-                            const uint64_t Undef128[2],
-                            int MinSplatBits,
-                            uint64_t &SplatBits, uint64_t &SplatUndef,
-                            int &SplatSize) {
-  // Don't let undefs prevent splats from matching.  See if the top 64-bits are
-  // the same as the lower 64-bits, ignoring undefs.
-  uint64_t Bits64  = Bits128[0] | Bits128[1];
-  uint64_t Undef64 = Undef128[0] & Undef128[1];
-  uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
-  uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
-  uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
-  uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
-
-  if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
-    if (MinSplatBits < 64) {
-
-      // Check that the top 32-bits are the same as the lower 32-bits, ignoring
-      // undefs.
-      if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
-        if (MinSplatBits < 32) {
-
-          // If the top 16-bits are different than the lower 16-bits, ignoring
-          // undefs, we have an i32 splat.
-          if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
-            if (MinSplatBits < 16) {
-              // If the top 8-bits are different than the lower 8-bits, ignoring
-              // undefs, we have an i16 splat.
-              if ((Bits16 & (uint16_t(~Undef16) >> 8))
-                  == ((Bits16 >> 8) & ~Undef16)) {
-                // Otherwise, we have an 8-bit splat.
-                SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
-                SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
-                SplatSize = 1;
-                return true;
-              }
-            } else {
-              SplatBits = Bits16;
-              SplatUndef = Undef16;
-              SplatSize = 2;
-              return true;
-            }
-          }
-        } else {
-          SplatBits = Bits32;
-          SplatUndef = Undef32;
-          SplatSize = 4;
-          return true;
-        }
-      }
-    } else {
-      SplatBits = Bits128[0];
-      SplatUndef = Undef128[0];
-      SplatSize = 8;
-      return true;
-    }
-  }
+  if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+                            HasAnyUndefs, minSplatBits)
+      || minSplatBits < SplatBitSize)
+    return SDValue();   // Wasn't a constant vector or splat exceeded min
  
-  return false;  // Can't be a splat if two pieces don't match.
-}
+  uint64_t SplatBits = APSplatBits.getZExtValue();
  
-//! Lower a BUILD_VECTOR instruction creatively:
-SDValue
-LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
-  // If this is a vector of constants or undefs, get the bits.  A bit in
-  // UndefBits is set if the corresponding element of the vector is an
-  // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
-  // zero.
-  uint64_t VectorBits[2];
-  uint64_t UndefBits[2];
-  uint64_t SplatBits, SplatUndef;
-  int SplatSize;
-  if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
-      || !isConstantSplat(VectorBits, UndefBits,
-                          VT.getVectorElementType().getSizeInBits(),
-                          SplatBits, SplatUndef, SplatSize))
-    return SDValue();   // Not a constant vector, not a splat.
-
-  switch (VT.getSimpleVT()) {
-  default:
-    cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
-         << VT.getMVTString()
-         << "\n";
-    abort();
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: {
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
+         << VT.getEVTString();
+    llvm_report_error(Msg.str());
      /*NOTREACHED*/
+  }
    case MVT::v4f32: {
      uint32_t Value32 = uint32_t(SplatBits);
-    assert(SplatSize == 4
+    assert(SplatBitSize == 32
             && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
      // NOTE: pretend the constant is an integer. LLVM won't load FP constants
      SDValue T = DAG.getConstant(Value32, MVT::i32);
-    return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
-                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
+    return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
+                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
      break;
    }
    case MVT::v2f64: {
      uint64_t f64val = uint64_t(SplatBits);
-    assert(SplatSize == 8
+    assert(SplatBitSize == 64
             && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
      // NOTE: pretend the constant is an integer. LLVM won't load FP constants
      SDValue T = DAG.getConstant(f64val, MVT::i64);
-    return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
-                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
+    return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
+                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
      break;
    }
    case MVT::v16i8: {
     // 8-bit constants have to be expanded to 16-bits
-   unsigned short Value16 = SplatBits | (SplatBits << 8);
-   SDValue Ops[8];
-   for (int i = 0; i < 8; ++i)
-     Ops[i] = DAG.getConstant(Value16, MVT::i16);
-   return DAG.getNode(ISD::BIT_CONVERT, VT,
-                      DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
+   unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
+   SmallVector<SDValue, 8> Ops;
+
+   Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
+   return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+                      DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
    }
    case MVT::v8i16: {
-    unsigned short Value16;
-    if (SplatSize == 2)
-      Value16 = (unsigned short) (SplatBits & 0xffff);
-    else
-      Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
-    SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
-    SDValue Ops[8];
-    for (int i = 0; i < 8; ++i) Ops[i] = T;
-    return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
+    unsigned short Value16 = SplatBits;
+    SDValue T = DAG.getConstant(Value16, EltVT);
+    SmallVector<SDValue, 8> Ops;
+
+    Ops.assign(8, T);
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
    }
    case MVT::v4i32: {
-    unsigned int Value = SplatBits;
-    SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
-    return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
+    SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
    }
    case MVT::v2i32: {
-    unsigned int Value = SplatBits;
-    SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
-    return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T);
+    SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
    }
    case MVT::v2i64: {
-    return SPU::LowerSplat_v2i64(VT, DAG, SplatBits);
+    return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
    }
    }
  
    return SDValue();
  }
  
+/*!
+ */
  SDValue
-SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal) {
+SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
+                     DebugLoc dl) {
    uint32_t upper = uint32_t(SplatVal >> 32);
    uint32_t lower = uint32_t(SplatVal);
  
    if (upper == lower) {
      // Magic constant that can be matched by IL, ILA, et. al.
      SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
-    return DAG.getNode(ISD::BIT_CONVERT, OpVT,
-                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+    return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
                                     Val, Val, Val, Val));
    } else {
-    SDValue LO32;
-    SDValue HI32;
-    SmallVector<SDValue, 16> ShufBytes;
-    SDValue Result;
      bool upper_special, lower_special;
  
      // NOTE: This code creates common-case shuffle masks that can be easily
@@ -1686,19 +1662,31 @@ SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal) {
      upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
      lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
  
+    // Both upper and lower are special, lower to a constant pool load:
+    if (lower_special && upper_special) {
+      SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
+      return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
+                         SplatValCN, SplatValCN);
+    }
+
+    SDValue LO32;
+    SDValue HI32;
+    SmallVector<SDValue, 16> ShufBytes;
+    SDValue Result;
+
      // Create lower vector if not a special pattern
      if (!lower_special) {
        SDValue LO32C = DAG.getConstant(lower, MVT::i32);
-      LO32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
-                         DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+      LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+                         DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
                                       LO32C, LO32C, LO32C, LO32C));
      }
  
      // Create upper vector if not a special pattern
      if (!upper_special) {
        SDValue HI32C = DAG.getConstant(upper, MVT::i32);
-      HI32 = DAG.getNode(ISD::BIT_CONVERT, OpVT,
-                         DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+      HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+                         DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
                                       HI32C, HI32C, HI32C, HI32C));
      }
  
@@ -1708,13 +1696,6 @@ SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal) {
        LO32 = HI32;
      if (upper_special)
        HI32 = LO32;
-    if (lower_special && upper_special) {
-      // Unhappy situation... both upper and lower are special, so punt with
-      // a target constant:
-      SDValue Zero = DAG.getConstant(0, MVT::i32);
-      HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
-                                Zero, Zero);
-    }
  
      for (int i = 0; i < 4; ++i) {
        uint64_t val = 0;
@@ -1742,8 +1723,8 @@ SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal) {
        ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
      }
  
-    return DAG.getNode(SPUISD::SHUFB, OpVT, HI32, LO32,
-                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+    return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
+                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
                                     &ShufBytes[0], ShufBytes.size()));
    }
  }
@@ -1762,17 +1743,18 @@ SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal) {
  /// \note
  /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
  static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+  const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
    SDValue V1 = Op.getOperand(0);
    SDValue V2 = Op.getOperand(1);
-  SDValue PermMask = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
  
    if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
  
    // If we have a single element being moved from V1 to V2, this can be handled
    // using the C*[DX] compute mask instructions, but the vector elements have
    // to be monotonically increasing with one exception element.
-  MVT VecVT = V1.getValueType();
-  MVT EltVT = VecVT.getVectorElementType();
+  EVT VecVT = V1.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
    unsigned EltsFromV2 = 0;
    unsigned V2Elt = 0;
    unsigned V2EltIdx0 = 0;
@@ -1792,41 +1774,42 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
    } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
      V2EltIdx0 = 2;
    } else
-    assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
+    llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
  
-  for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
-    if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
-      unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
+  for (unsigned i = 0; i != MaxElts; ++i) {
+    if (SVN->getMaskElt(i) < 0)
+      continue;
+    
+    unsigned SrcElt = SVN->getMaskElt(i);
  
-      if (monotonic) {
-        if (SrcElt >= V2EltIdx0) {
-          if (1 >= (++EltsFromV2)) {
-            V2Elt = (V2EltIdx0 - SrcElt) << 2;
-          }
-        } else if (CurrElt != SrcElt) {
-          monotonic = false;
+    if (monotonic) {
+      if (SrcElt >= V2EltIdx0) {
+        if (1 >= (++EltsFromV2)) {
+          V2Elt = (V2EltIdx0 - SrcElt) << 2;
          }
-
-        ++CurrElt;
+      } else if (CurrElt != SrcElt) {
+        monotonic = false;
        }
  
-      if (rotate) {
-        if (PrevElt > 0 && SrcElt < MaxElts) {
-          if ((PrevElt == SrcElt - 1)
-              || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
-            PrevElt = SrcElt;
-            if (SrcElt == 0)
-              V0Elt = i;
-          } else {
-            rotate = false;
-          }
-        } else if (PrevElt == 0) {
-          // First time through, need to keep track of previous element
+      ++CurrElt;
+    }
+
+    if (rotate) {
+      if (PrevElt > 0 && SrcElt < MaxElts) {
+        if ((PrevElt == SrcElt - 1)
+            || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
            PrevElt = SrcElt;
+          if (SrcElt == 0)
+            V0Elt = i;
          } else {
-          // This isn't a rotation, takes elements from vector 2
            rotate = false;
          }
+      } else if (PrevElt == 0) {
+        // First time through, need to keep track of previous element
+        PrevElt = SrcElt;
+      } else {
+        // This isn't a rotation, takes elements from vector 2
+        rotate = false;
        }
      }
    }
@@ -1836,21 +1819,22 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
      MachineFunction &MF = DAG.getMachineFunction();
      MachineRegisterInfo &RegInfo = MF.getRegInfo();
      unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
-    MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
      // Initialize temporary register to 0
      SDValue InitTempReg =
-      DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
+      DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
      // Copy register's contents as index in SHUFFLE_MASK:
      SDValue ShufMaskOp =
-      DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
+      DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
                    DAG.getTargetConstant(V2Elt, MVT::i32),
-                  DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
+                  DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
      // Use shuffle mask in SHUFB synthetic instruction:
-    return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
+    return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
+                       ShufMaskOp);
    } else if (rotate) {
      int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
  
-    return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
+    return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
                         V1, DAG.getConstant(rotamt, MVT::i16));
    } else {
     // Convert the SHUFFLE_VECTOR mask's input element units to the
@@ -1858,27 +1842,22 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
      unsigned BytesPerElement = EltVT.getSizeInBits()/8;
  
      SmallVector<SDValue, 16> ResultMask;
-    for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
-      unsigned SrcElt;
-      if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
-        SrcElt = 0;
-      else
-        SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
-
-      for (unsigned j = 0; j < BytesPerElement; ++j) {
-        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
-                                             MVT::i8));
-      }
+    for (unsigned i = 0, e = MaxElts; i != e; ++i) {
+      unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
+
+      for (unsigned j = 0; j < BytesPerElement; ++j)
+        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
      }
  
-    SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
+    SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
                                      &ResultMask[0], ResultMask.size());
-    return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
+    return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
    }
  }
  
  static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
    SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
+  DebugLoc dl = Op.getDebugLoc();
  
    if (Op0.getNode()->getOpcode() == ISD::Constant) {
      // For a constant, build the appropriate constant vector, which will
@@ -1886,13 +1865,13 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
  
      ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
      SmallVector<SDValue, 16> ConstVecValues;
-    MVT VT;
+    EVT VT;
      size_t n_copies;
  
      // Create a constant vector:
-    switch (Op.getValueType().getSimpleVT()) {
-    default: assert(0 && "Unexpected constant value type in "
-                         "LowerSCALAR_TO_VECTOR");
+    switch (Op.getValueType().getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unexpected constant value type in "
+                              "LowerSCALAR_TO_VECTOR");
      case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
      case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
      case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
@@ -1905,19 +1884,19 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
      for (size_t j = 0; j < n_copies; ++j)
        ConstVecValues.push_back(CValue);
  
-    return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
                         &ConstVecValues[0], ConstVecValues.size());
    } else {
      // Otherwise, copy the value from one register to another:
-    switch (Op0.getValueType().getSimpleVT()) {
-    default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
+    switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
      case MVT::i8:
      case MVT::i16:
      case MVT::i32:
      case MVT::i64:
      case MVT::f32:
      case MVT::f64:
-      return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0);
+      return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
      }
    }
  
@@ -1925,9 +1904,10 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
  }
  
  static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
    SDValue N = Op.getOperand(0);
    SDValue Elt = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
    SDValue retval;
  
    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
@@ -1936,24 +1916,24 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
  
      // sanity checks:
      if (VT == MVT::i8 && EltNo >= 16)
-      assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
+      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
      else if (VT == MVT::i16 && EltNo >= 8)
-      assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
+      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
      else if (VT == MVT::i32 && EltNo >= 4)
-      assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
+      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
      else if (VT == MVT::i64 && EltNo >= 2)
-      assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
+      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
  
      if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
        // i32 and i64: Element 0 is the preferred slot
-      return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
+      return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
      }
  
      // Need to generate shuffle mask and extract:
      int prefslot_begin = -1, prefslot_end = -1;
      int elt_byte = EltNo * VT.getSizeInBits() / 8;
  
-    switch (VT.getSimpleVT()) {
+    switch (VT.getSimpleVT().SimpleTy) {
      default:
        assert(false && "Invalid value type!");
      case MVT::i8: {
@@ -1979,7 +1959,9 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
      assert(prefslot_begin != -1 && prefslot_end != -1 &&
             "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
  
-    unsigned int ShufBytes[16];
+    unsigned int ShufBytes[16] = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    };
      for (int i = 0; i < 16; ++i) {
        // zero fill uppper part of preferred slot, don't care about the
        // other slots:
@@ -2005,25 +1987,25 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
        ShufMask[i] = DAG.getConstant(bits, MVT::i32);
      }
  
-    SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
-                                      &ShufMask[0],
-                                      sizeof(ShufMask) / sizeof(ShufMask[0]));
+    SDValue ShufMaskVec =
+      DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                  &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
  
-    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
-                         DAG.getNode(SPUISD::SHUFB, N.getValueType(),
+    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+                         DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
                                       N, N, ShufMaskVec));
    } else {
      // Variable index: Rotate the requested element into slot 0, then replicate
      // slot 0 across the vector
-    MVT VecVT = N.getValueType();
+    EVT VecVT = N.getValueType();
      if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
-      cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
-      abort();
+      llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
+                        "vector type!");
      }
  
      // Make life easier by making sure the index is zero-extended to i32
      if (Elt.getValueType() != MVT::i32)
-      Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
+      Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
  
      // Scale the index to a bit/byte shift quantity
      APInt scaleFactor =
@@ -2033,52 +2015,52 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
  
      if (scaleShift > 0) {
        // Scale the shift factor:
-      Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
+      Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
                          DAG.getConstant(scaleShift, MVT::i32));
      }
  
-    vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
+    vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
  
      // Replicate the bytes starting at byte 0 across the entire vector (for
      // consistency with the notion of a unified register set)
      SDValue replicate;
  
-    switch (VT.getSimpleVT()) {
+    switch (VT.getSimpleVT().SimpleTy) {
      default:
-      cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
-      abort();
+      llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
+                        "type");
        /*NOTREACHED*/
      case MVT::i8: {
        SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
-      replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
-                              factor, factor);
+      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                              factor, factor, factor, factor);
        break;
      }
      case MVT::i16: {
        SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
-      replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
-                              factor, factor);
+      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                              factor, factor, factor, factor);
        break;
      }
      case MVT::i32:
      case MVT::f32: {
        SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
-      replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
-                              factor, factor);
+      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                              factor, factor, factor, factor);
        break;
      }
      case MVT::i64:
      case MVT::f64: {
        SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
        SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
-      replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
-                              loFactor, hiFactor);
+      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                              loFactor, hiFactor, loFactor, hiFactor);
        break;
      }
      }
  
-    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
-                         DAG.getNode(SPUISD::SHUFB, VecVT,
+    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+                         DAG.getNode(SPUISD::SHUFB, dl, VecVT,
                                       vecShift, vecShift, replicate));
    }
  
@@ -2089,23 +2071,24 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
    SDValue VecOp = Op.getOperand(0);
    SDValue ValOp = Op.getOperand(1);
    SDValue IdxOp = Op.getOperand(2);
-  MVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
  
    ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
    assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
  
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
    // Use $sp ($1) because it's always 16-byte aligned and it's available:
-  SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+  SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
                                  DAG.getRegister(SPU::R1, PtrVT),
                                  DAG.getConstant(CN->getSExtValue(), PtrVT));
-  SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
+  SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
  
    SDValue result =
-    DAG.getNode(SPUISD::SHUFB, VT,
-                DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
+    DAG.getNode(SPUISD::SHUFB, dl, VT,
+                DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
                  VecOp,
-                DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
+                DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
  
    return result;
  }
@@ -2114,22 +2097,23 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
                             const TargetLowering &TLI)
  {
    SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
-  MVT ShiftVT = TLI.getShiftAmountTy();
+  DebugLoc dl = Op.getDebugLoc();
+  EVT ShiftVT = TLI.getShiftAmountTy();
  
    assert(Op.getValueType() == MVT::i8);
    switch (Opc) {
    default:
-    assert(0 && "Unhandled i8 math operator");
+    llvm_unreachable("Unhandled i8 math operator");
      /*NOTREACHED*/
      break;
    case ISD::ADD: {
      // 8-bit addition: Promote the arguments up to 16-bits and truncate
      // the result:
      SDValue N1 = Op.getOperand(1);
-    N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
-    N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
-    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
-                       DAG.getNode(Opc, MVT::i16, N0, N1));
+    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
  
    }
  
@@ -2137,81 +2121,75 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
      // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
      // the result:
      SDValue N1 = Op.getOperand(1);
-    N0 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0);
-    N1 = DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1);
-    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
-                       DAG.getNode(Opc, MVT::i16, N0, N1));
+    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
    }
    case ISD::ROTR:
    case ISD::ROTL: {
      SDValue N1 = Op.getOperand(1);
-    unsigned N1Opc;
-    N0 = (N0.getOpcode() != ISD::Constant
-          ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
-          : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
-                            MVT::i16));
-    N1Opc = N1.getValueType().bitsLT(ShiftVT)
-            ? ISD::ZERO_EXTEND
-            : ISD::TRUNCATE;
-    N1 = (N1.getOpcode() != ISD::Constant
-          ? DAG.getNode(N1Opc, ShiftVT, N1)
-          : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
-                            TLI.getShiftAmountTy()));
+    EVT N1VT = N1.getValueType();
+
+    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+    if (!N1VT.bitsEq(ShiftVT)) {
+      unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
+                       ? ISD::ZERO_EXTEND
+                       : ISD::TRUNCATE;
+      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+    }
+
+    // Replicate lower 8-bits into upper 8:
      SDValue ExpandArg =
-      DAG.getNode(ISD::OR, MVT::i16, N0,
-                  DAG.getNode(ISD::SHL, MVT::i16,
+      DAG.getNode(ISD::OR, dl, MVT::i16, N0,
+                  DAG.getNode(ISD::SHL, dl, MVT::i16,
                                N0, DAG.getConstant(8, MVT::i32)));
-    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
-                       DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
+
+    // Truncate back down to i8
+    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+                       DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
    }
    case ISD::SRL:
    case ISD::SHL: {
      SDValue N1 = Op.getOperand(1);
-    unsigned N1Opc;
-    N0 = (N0.getOpcode() != ISD::Constant
-          ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
-          : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
-                            MVT::i32));
-    N1Opc = N1.getValueType().bitsLT(ShiftVT)
-            ? ISD::ZERO_EXTEND
-            : ISD::TRUNCATE;
-    N1 = (N1.getOpcode() != ISD::Constant
-          ? DAG.getNode(N1Opc, ShiftVT, N1)
-          : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
-    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
-                       DAG.getNode(Opc, MVT::i16, N0, N1));
+    EVT N1VT = N1.getValueType();
+
+    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+    if (!N1VT.bitsEq(ShiftVT)) {
+      unsigned N1Opc = ISD::ZERO_EXTEND;
+
+      if (N1.getValueType().bitsGT(ShiftVT))
+        N1Opc = ISD::TRUNCATE;
+
+      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+    }
+
+    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
    }
    case ISD::SRA: {
      SDValue N1 = Op.getOperand(1);
-    unsigned N1Opc;
-    N0 = (N0.getOpcode() != ISD::Constant
-          ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
-          : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
-                            MVT::i16));
-    N1Opc = N1.getValueType().bitsLT(ShiftVT)
-            ? ISD::SIGN_EXTEND
-            : ISD::TRUNCATE;
-    N1 = (N1.getOpcode() != ISD::Constant
-          ? DAG.getNode(N1Opc, ShiftVT, N1)
-          : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
-                            ShiftVT));
-    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
-                       DAG.getNode(Opc, MVT::i16, N0, N1));
+    EVT N1VT = N1.getValueType();
+
+    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+    if (!N1VT.bitsEq(ShiftVT)) {
+      unsigned N1Opc = ISD::SIGN_EXTEND;
+
+      if (N1VT.bitsGT(ShiftVT))
+        N1Opc = ISD::TRUNCATE;
+      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+    }
+
+    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
    }
    case ISD::MUL: {
      SDValue N1 = Op.getOperand(1);
-    unsigned N1Opc;
-    N0 = (N0.getOpcode() != ISD::Constant
-          ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
-          : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
-                            MVT::i16));
-    N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
-    N1 = (N1.getOpcode() != ISD::Constant
-          ? DAG.getNode(N1Opc, MVT::i16, N1)
-          : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
-                            MVT::i16));
-    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
-                       DAG.getNode(Opc, MVT::i16, N0, N1));
+
+    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
      break;
    }
    }
@@ -2219,42 +2197,13 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
    return SDValue();
  }
  
-//! Generate the carry-generate shuffle mask.
-SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG) {
-  SmallVector<SDValue, 16 > ShufBytes;
-
-  // Create the shuffle mask for "rotating" the borrow up one register slot
-  // once the borrow is generated.
-  ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
-  ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
-  ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
-  ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
-
-  return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
-                     &ShufBytes[0], ShufBytes.size());
-}
-
-//! Generate the borrow-generate shuffle mask
-SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG) {
-  SmallVector<SDValue, 16 > ShufBytes;
-
-  // Create the shuffle mask for "rotating" the borrow up one register slot
-  // once the borrow is generated.
-  ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
-  ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
-  ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
-  ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
-
-  return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
-                     &ShufBytes[0], ShufBytes.size());
-}
-
  //! Lower byte immediate operations for v16i8 vectors:
  static SDValue
  LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
    SDValue ConstVec;
    SDValue Arg;
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
  
    ConstVec = Op.getOperand(0);
    Arg = Op.getOperand(1);
@@ -2271,25 +2220,24 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
    }
  
    if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
-    uint64_t VectorBits[2];
-    uint64_t UndefBits[2];
-    uint64_t SplatBits, SplatUndef;
-    int SplatSize;
-
-    if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
-        && isConstantSplat(VectorBits, UndefBits,
-                           VT.getVectorElementType().getSizeInBits(),
-                           SplatBits, SplatUndef, SplatSize)) {
-      SDValue tcVec[16];
+    BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
+    assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
+
+    APInt APSplatBits, APSplatUndef;
+    unsigned SplatBitSize;
+    bool HasAnyUndefs;
+    unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
+
+    if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+                              HasAnyUndefs, minSplatBits)
+        && minSplatBits <= SplatBitSize) {
+      uint64_t SplatBits = APSplatBits.getZExtValue();
        SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
-      const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
-
-      // Turn the BUILD_VECTOR into a set of target constants:
-      for (size_t i = 0; i < tcVecSize; ++i)
-        tcVec[i] = tc;
  
-      return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
-                         DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
+      SmallVector<SDValue, 16> tcVec;
+      tcVec.assign(16, tc);
+      return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
+                         DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
      }
    }
  
@@ -2305,20 +2253,22 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
    ones per byte, which then have to be accumulated.
  */
  static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
-  MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+  EVT VT = Op.getValueType();
+  EVT vecVT = EVT::getVectorVT(*DAG.getContext(), 
+                               VT, (128 / VT.getSizeInBits()));
+  DebugLoc dl = Op.getDebugLoc();
  
-  switch (VT.getSimpleVT()) {
+  switch (VT.getSimpleVT().SimpleTy) {
    default:
      assert(false && "Invalid value type!");
    case MVT::i8: {
      SDValue N = Op.getOperand(0);
      SDValue Elt0 = DAG.getConstant(0, MVT::i32);
  
-    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
-    SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
+    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
  
-    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
    }
  
    case MVT::i16: {
@@ -2332,22 +2282,22 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
      SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
      SDValue Shift1 = DAG.getConstant(8, MVT::i32);
  
-    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
-    SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
+    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
  
      // CNTB_result becomes the chain to which all of the virtual registers
      // CNTB_reg, SUM1_reg become associated:
      SDValue CNTB_result =
-      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
+      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
  
      SDValue CNTB_rescopy =
-      DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
+      DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
  
-    SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
+    SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
  
-    return DAG.getNode(ISD::AND, MVT::i16,
-                       DAG.getNode(ISD::ADD, MVT::i16,
-                                   DAG.getNode(ISD::SRL, MVT::i16,
+    return DAG.getNode(ISD::AND, dl, MVT::i16,
+                       DAG.getNode(ISD::ADD, dl, MVT::i16,
+                                   DAG.getNode(ISD::SRL, dl, MVT::i16,
                                                 Tmp1, Shift1),
                                     Tmp1),
                         Mask0);
@@ -2366,37 +2316,38 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
      SDValue Shift1 = DAG.getConstant(16, MVT::i32);
      SDValue Shift2 = DAG.getConstant(8, MVT::i32);
  
-    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
-    SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
+    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
  
      // CNTB_result becomes the chain to which all of the virtual registers
      // CNTB_reg, SUM1_reg become associated:
      SDValue CNTB_result =
-      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
+      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
  
      SDValue CNTB_rescopy =
-      DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
+      DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
  
      SDValue Comp1 =
-      DAG.getNode(ISD::SRL, MVT::i32,
-                  DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
+      DAG.getNode(ISD::SRL, dl, MVT::i32,
+                  DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
+                  Shift1);
  
      SDValue Sum1 =
-      DAG.getNode(ISD::ADD, MVT::i32,
-                  Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
+      DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
+                  DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
  
      SDValue Sum1_rescopy =
-      DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
+      DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
  
      SDValue Comp2 =
-      DAG.getNode(ISD::SRL, MVT::i32,
-                  DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
+      DAG.getNode(ISD::SRL, dl, MVT::i32,
+                  DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
                    Shift2);
      SDValue Sum2 =
-      DAG.getNode(ISD::ADD, MVT::i32, Comp2,
-                  DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
+      DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
+                  DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
  
-    return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
+    return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
    }
  
    case MVT::i64:
@@ -2413,9 +2364,9 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
   */
  static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
                                SPUTargetLowering &TLI) {
-  MVT OpVT = Op.getValueType();
+  EVT OpVT = Op.getValueType();
    SDValue Op0 = Op.getOperand(0);
-  MVT Op0VT = Op0.getValueType();
+  EVT Op0VT = Op0.getValueType();
  
    if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
        || OpVT == MVT::i64) {
@@ -2429,7 +2380,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
      return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
    }
  
-  return Op;                    // return unmolested, legalized op
+  return Op;
  }
  
  //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
@@ -2439,9 +2390,9 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
   */
  static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
                                SPUTargetLowering &TLI) {
-  MVT OpVT = Op.getValueType();
+  EVT OpVT = Op.getValueType();
    SDValue Op0 = Op.getOperand(0);
-  MVT Op0VT = Op0.getValueType();
+  EVT Op0VT = Op0.getValueType();
  
    if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
        || Op0VT == MVT::i64) {
@@ -2455,7 +2406,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
      return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
    }
  
-  return Op;                    // return unmolested, legalized
+  return Op;
  }
  
  //! Lower ISD::SETCC
@@ -2465,17 +2416,17 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
  static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
                            const TargetLowering &TLI) {
    CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
-  DebugLoc dl = Op.getNode()->getDebugLoc();
+  DebugLoc dl = Op.getDebugLoc();
    assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
  
    SDValue lhs = Op.getOperand(0);
    SDValue rhs = Op.getOperand(1);
-  MVT lhsVT = lhs.getValueType();
+  EVT lhsVT = lhs.getValueType();
    assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
  
-  MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
+  EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
    APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
-  MVT IntVT(MVT::i64);
+  EVT IntVT(MVT::i64);
  
    // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
    // selected to a NOP:
@@ -2513,7 +2464,7 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
                                      ISD::SETGT));
    }
  
-  SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, IntVT, rhs);
+  SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
    SDValue rhsHi32 =
            DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
                        DAG.getNode(ISD::SRL, dl, IntVT,
@@ -2559,13 +2510,11 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
    case ISD::SETONE:
      compareOp = ISD::SETNE; break;
    default:
-    cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
-    abort();
-    break;
+    llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
    }
  
    SDValue result =
-          DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect, 
+          DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
                         (ISD::CondCode) compareOp);
  
    if ((CC->get() & 0x8) == 0) {
@@ -2598,12 +2547,13 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
  
  static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
                                const TargetLowering &TLI) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
    SDValue lhs = Op.getOperand(0);
    SDValue rhs = Op.getOperand(1);
    SDValue trueval = Op.getOperand(2);
    SDValue falseval = Op.getOperand(3);
    SDValue condition = Op.getOperand(4);
+  DebugLoc dl = Op.getDebugLoc();
  
    // NOTE: SELB's arguments: $rA, $rB, $mask
    //
@@ -2616,47 +2566,99 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
    // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
    // with another "cannot select select_cc" assert:
  
-  SDValue compare = DAG.getNode(ISD::SETCC,
+  SDValue compare = DAG.getNode(ISD::SETCC, dl,
                                  TLI.getSetCCResultType(Op.getValueType()),
                                  lhs, rhs, condition);
-  return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare);
+  return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
  }
  
  //! Custom lower ISD::TRUNCATE
  static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
  {
-  MVT VT = Op.getValueType();
-  MVT::SimpleValueType simpleVT = VT.getSimpleVT();
-  MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+  // Type to truncate to
+  EVT VT = Op.getValueType();
+  MVT simpleVT = VT.getSimpleVT();
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), 
+                               VT, (128 / VT.getSizeInBits()));
+  DebugLoc dl = Op.getDebugLoc();
  
+  // Type to truncate from
    SDValue Op0 = Op.getOperand(0);
-  MVT Op0VT = Op0.getValueType();
-  MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
+  EVT Op0VT = Op0.getValueType();
  
    if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
      // Create shuffle mask, least significant doubleword of quadword
      unsigned maskHigh = 0x08090a0b;
      unsigned maskLow = 0x0c0d0e0f;
      // Use a shuffle to perform the truncation
-    SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+    SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
                                     DAG.getConstant(maskHigh, MVT::i32),
                                     DAG.getConstant(maskLow, MVT::i32),
                                     DAG.getConstant(maskHigh, MVT::i32),
                                     DAG.getConstant(maskLow, MVT::i32));
  
+    SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
+                                       Op0, Op0, shufMask);
  
-    SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
-
-    SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
-                                       PromoteScalar, PromoteScalar, shufMask);
-
-    return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
-                       DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
+    return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
    }
  
    return SDValue();             // Leave the truncate unmolested
  }
  
+/*!
+ * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
+ * algorithm is to duplicate the sign bit using rotmai to generate at
+ * least one byte full of sign bits. Then propagate the "sign-byte" into
+ * the leftmost words and the i64/i32 into the rightmost words using shufb.
+ *
+ * @param Op The sext operand
+ * @param DAG The current DAG
+ * @return The SDValue with the entire instruction sequence
+ */
+static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
+{
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Type to extend to
+  MVT OpVT = Op.getValueType().getSimpleVT();
+
+  // Type to extend from
+  SDValue Op0 = Op.getOperand(0);
+  MVT Op0VT = Op0.getValueType().getSimpleVT();
+
+  // The type to extend to needs to be a i128 and
+  // the type to extend from needs to be i64 or i32.
+  assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
+          "LowerSIGN_EXTEND: input and/or output operand have wrong size");
+
+  // Create shuffle mask
+  unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
+  unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
+  unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
+  SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                 DAG.getConstant(mask1, MVT::i32),
+                                 DAG.getConstant(mask1, MVT::i32),
+                                 DAG.getConstant(mask2, MVT::i32),
+                                 DAG.getConstant(mask3, MVT::i32));
+
+  // Word wise arithmetic right shift to generate at least one byte
+  // that contains sign bits.
+  MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
+  SDValue sraVal = DAG.getNode(ISD::SRA,
+                 dl,
+                 mvt,
+                 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
+                 DAG.getConstant(31, MVT::i32));
+
+  // Shuffle bytes - Copy the sign bits into the upper 64 bits
+  // and the input value into the lower 64 bits.
+  SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
+      DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
+
+  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
+}
+
  //! Custom (target-specific) lowering entry point
  /*!
    This is where LLVM's DAG selection process calls to do target-specific
@@ -2666,15 +2668,17 @@ SDValue
  SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
  {
    unsigned Opc = (unsigned) Op.getOpcode();
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
  
    switch (Opc) {
    default: {
-    cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
-    cerr << "Op.getOpcode() = " << Opc << "\n";
-    cerr << "*Op.getNode():\n";
+#ifndef NDEBUG
+    errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
+    errs() << "Op.getOpcode() = " << Opc << "\n";
+    errs() << "*Op.getNode():\n";
      Op.getNode()->dump();
-    abort();
+#endif
+    llvm_unreachable(0);
    }
    case ISD::LOAD:
    case ISD::EXTLOAD:
@@ -2691,12 +2695,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
      return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
    case ISD::ConstantFP:
      return LowerConstantFP(Op, DAG);
-  case ISD::FORMAL_ARGUMENTS:
-    return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
-  case ISD::CALL:
-    return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
-  case ISD::RET:
-    return LowerRET(Op, DAG, getTargetMachine());
  
    // i8, i64 math ops:
    case ISD::ADD:
@@ -2753,6 +2751,9 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
  
    case ISD::TRUNCATE:
      return LowerTRUNCATE(Op, DAG);
+
+  case ISD::SIGN_EXTEND:
+    return LowerSIGN_EXTEND(Op, DAG);
    }
  
    return SDValue();
@@ -2764,13 +2765,13 @@ void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
  {
  #if 0
    unsigned Opc = (unsigned) N->getOpcode();
-  MVT OpVT = N->getValueType(0);
+  EVT OpVT = N->getValueType(0);
  
    switch (Opc) {
    default: {
-    cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
-    cerr << "Op.getOpcode() = " << Opc << "\n";
-    cerr << "*Op.getNode():\n";
+    errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
+    errs() << "Op.getOpcode() = " << Opc << "\n";
+    errs() << "*Op.getNode():\n";
      N->dump();
      abort();
      /*NOTREACHED*/
@@ -2794,9 +2795,10 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
    const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
    SelectionDAG &DAG = DCI.DAG;
    SDValue Op0 = N->getOperand(0);       // everything has at least one operand
-  MVT NodeVT = N->getValueType(0);      // The node's value type
-  MVT Op0VT = Op0.getValueType();       // The first operand's result
+  EVT NodeVT = N->getValueType(0);      // The node's value type
+  EVT Op0VT = Op0.getValueType();       // The first operand's result
    SDValue Result;                       // Initially, empty result
+  DebugLoc dl = N->getDebugLoc();
  
    switch (N->getOpcode()) {
    default: break;
@@ -2823,7 +2825,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
  
  #if !defined(NDEBUG)
            if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-            cerr << "\n"
+            errs() << "\n"
                   << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
                   << "With:    (SPUindirect <arg>, <arg>)\n";
            }
@@ -2839,7 +2841,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
  
  #if !defined(NDEBUG)
            if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-            cerr << "\n"
+            errs() << "\n"
                   << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
                   << "), " << CN0->getSExtValue() << ")\n"
                   << "With:    (SPUindirect <arg>, "
@@ -2847,7 +2849,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
            }
  #endif
  
-          return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
+          return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
                               IndirectArg, combinedValue);
          }
        }
@@ -2863,11 +2865,11 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
        // Types must match, however...
  #if !defined(NDEBUG)
        if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-        cerr << "\nReplace: ";
+        errs() << "\nReplace: ";
          N->dump(&DAG);
-        cerr << "\nWith:    ";
+        errs() << "\nWith:    ";
          Op0.getNode()->dump(&DAG);
-        cerr << "\n";
+        errs() << "\n";
        }
  #endif
  
@@ -2882,11 +2884,11 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
          // (SPUindirect (SPUaform <addr>, 0), 0) ->
          // (SPUaform <addr>, 0)
  
-        DEBUG(cerr << "Replace: ");
+        DEBUG(errs() << "Replace: ");
          DEBUG(N->dump(&DAG));
-        DEBUG(cerr << "\nWith:    ");
+        DEBUG(errs() << "\nWith:    ");
          DEBUG(Op0.getNode()->dump(&DAG));
-        DEBUG(cerr << "\n");
+        DEBUG(errs() << "\n");
  
          return Op0;
        }
@@ -2899,13 +2901,13 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
  
  #if !defined(NDEBUG)
            if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-            cerr << "\n"
+            errs() << "\n"
                   << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
                   << "With:    (SPUindirect <arg>, <arg>)\n";
            }
  #endif
  
-          return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
+          return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
                               Op0.getOperand(0), Op0.getOperand(1));
          }
        }
@@ -2914,9 +2916,6 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
    }
    case SPUISD::SHLQUAD_L_BITS:
    case SPUISD::SHLQUAD_L_BYTES:
-  case SPUISD::VEC_SHL:
-  case SPUISD::VEC_SRL:
-  case SPUISD::VEC_SRA:
    case SPUISD::ROTBYTES_LEFT: {
      SDValue Op1 = N->getOperand(1);
  
@@ -2961,11 +2960,11 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
    // Otherwise, return unchanged.
  #ifndef NDEBUG
    if (Result.getNode()) {
-    DEBUG(cerr << "\nReplace.SPU: ");
+    DEBUG(errs() << "\nReplace.SPU: ");
      DEBUG(N->dump(&DAG));
-    DEBUG(cerr << "\nWith:        ");
+    DEBUG(errs() << "\nWith:        ");
      DEBUG(Result.getNode()->dump(&DAG));
-    DEBUG(cerr << "\n");
+    DEBUG(errs() << "\n");
    }
  #endif
  
@@ -2996,7 +2995,7 @@ SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const
  
  std::pair<unsigned, const TargetRegisterClass*>
  SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
-                                                MVT VT) const
+                                                EVT VT) const
  {
    if (Constraint.size() == 1) {
      // GCC RS6000 Constraint Letters
@@ -3029,7 +3028,7 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
                                                    const SelectionDAG &DAG,
                                                    unsigned Depth ) const {
  #if 0
-  const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
+  const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
  
    switch (Op.getOpcode()) {
    default:
@@ -3044,9 +3043,6 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
    case SPUISD::VEC2PREFSLOT:
    case SPUISD::SHLQUAD_L_BITS:
    case SPUISD::SHLQUAD_L_BYTES:
-  case SPUISD::VEC_SHL:
-  case SPUISD::VEC_SRL:
-  case SPUISD::VEC_SRA:
    case SPUISD::VEC_ROTL:
    case SPUISD::VEC_ROTR:
    case SPUISD::ROTBYTES_LEFT:
@@ -3064,7 +3060,7 @@ SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
      return 1;
  
    case ISD::SETCC: {
-    MVT VT = Op.getValueType();
+    EVT VT = Op.getValueType();
  
      if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
        VT = MVT::i32;