Reverting r56249. On further investigation, this functionality isn't needed.

[oota-llvm.git] / lib / Target / CellSPU / SPUISelLowering.cpp
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp

index 7893e677fbb0e4b414ad3c3fa56d9f4931c3dba8..384755d6657b239e7b895772029bf97e5ac5ade6 100644 (file)
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -14,8 +14,8 @@
  #include "SPURegisterNames.h"
  #include "SPUISelLowering.h"
  #include "SPUTargetMachine.h"
+#include "SPUFrameInfo.h"
  #include "llvm/ADT/VectorExtras.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
  #include "llvm/CodeGen/CallingConvLower.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/MachineFunction.h"
@@ -37,12 +37,12 @@ using namespace llvm;
  namespace {
    std::map<unsigned, const char *> node_names;
  
-  //! MVT::ValueType mapping to useful data for Cell SPU
+  //! MVT mapping to useful data for Cell SPU
    struct valtype_map_s {
-    const MVT::ValueType       valtype;
-    const int                  prefslot_byte;
+    const MVT        valtype;
+    const int                   prefslot_byte;
    };
-  
+
    const valtype_map_s valtype_map[] = {
      { MVT::i1,   3 },
      { MVT::i8,   3 },
@@ -56,21 +56,21 @@ namespace {
  
    const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  
-  const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
+  const valtype_map_s *getValueTypeMapEntry(MVT VT) {
      const valtype_map_s *retval = 0;
  
      for (size_t i = 0; i < n_valtype_map; ++i) {
        if (valtype_map[i].valtype == VT) {
-       retval = valtype_map + i;
-       break;
+        retval = valtype_map + i;
+        break;
        }
      }
  
  #ifndef NDEBUG
      if (retval == 0) {
        cerr << "getValueTypeMapEntry returns NULL for "
-          << MVT::getValueTypeString(VT)
-          << "\n";
+           << VT.getMVTString()
+           << "\n";
        abort();
      }
  #endif
@@ -82,25 +82,31 @@ namespace {
    /*!
      \arg Op Operand to test
      \return true if the operand is a memory target (i.e., global
-    address, external symbol, constant pool) or an existing D-Form
+    address, external symbol, constant pool) or an A-form
      address.
     */
-  bool isMemoryOperand(const SDOperand &Op)
+  bool isMemoryOperand(const SDValue &Op)
    {
      const unsigned Opc = Op.getOpcode();
      return (Opc == ISD::GlobalAddress
              || Opc == ISD::GlobalTLSAddress
-            || Opc ==  ISD::FrameIndex
              || Opc == ISD::JumpTable
              || Opc == ISD::ConstantPool
              || Opc == ISD::ExternalSymbol
              || Opc == ISD::TargetGlobalAddress
              || Opc == ISD::TargetGlobalTLSAddress
-            || Opc == ISD::TargetFrameIndex
              || Opc == ISD::TargetJumpTable
              || Opc == ISD::TargetConstantPool
              || Opc == ISD::TargetExternalSymbol
-           || Opc == SPUISD::DFormAddr);
+            || Opc == SPUISD::AFormAddr);
+  }
+
+  //! Predicate that returns true if the operand is an indirect target
+  bool isIndirectOperand(const SDValue &Op)
+  {
+    const unsigned Opc = Op.getOpcode();
+    return (Opc == ISD::Register
+            || Opc == SPUISD::LDRESULT);
    }
  }
  
@@ -114,11 +120,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
    // Use _setjmp/_longjmp instead of setjmp/longjmp.
    setUseUnderscoreSetJmp(true);
    setUseUnderscoreLongJmp(true);
-    
+
    // Set up the SPU's register classes:
-  // NOTE: i8 register class is not registered because we cannot determine when
-  // we need to zero or sign extend for custom-lowered loads and stores.
-  // NOTE: Ignore the previous note. For now. :-)
    addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
    addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
    addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
@@ -126,17 +129,25 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
    addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
    addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
    addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
-  
+
    // SPU has no sign or zero extended loads for i1, i8, i16:
-  setLoadXAction(ISD::EXTLOAD,  MVT::i1, Custom);
+  setLoadXAction(ISD::EXTLOAD,  MVT::i1, Promote);
    setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
    setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
-  setStoreXAction(MVT::i1, Custom);
+  setTruncStoreAction(MVT::i8, MVT::i1, Custom);
+  setTruncStoreAction(MVT::i16, MVT::i1, Custom);
+  setTruncStoreAction(MVT::i32, MVT::i1, Custom);
+  setTruncStoreAction(MVT::i64, MVT::i1, Custom);
+  setTruncStoreAction(MVT::i128, MVT::i1, Custom);
  
    setLoadXAction(ISD::EXTLOAD,  MVT::i8, Custom);
    setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
    setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
-  setStoreXAction(MVT::i8, Custom);
+  setTruncStoreAction(MVT::i8  , MVT::i8, Custom);
+  setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
+  setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
+  setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
+  setTruncStoreAction(MVT::i128, MVT::i8, Custom);
  
    setLoadXAction(ISD::EXTLOAD,  MVT::i16, Custom);
    setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
@@ -144,57 +155,70 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
  
    // SPU constant load actions are custom lowered:
    setOperationAction(ISD::Constant,   MVT::i64, Custom);
-  setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
+  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
    setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
  
    // SPU's loads and stores have to be custom lowered:
    for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
         ++sctype) {
-    setOperationAction(ISD::LOAD, sctype, Custom);
-    setOperationAction(ISD::STORE, sctype, Custom);
+    MVT VT = (MVT::SimpleValueType)sctype;
+
+    setOperationAction(ISD::LOAD, VT, Custom);
+    setOperationAction(ISD::STORE, VT, Custom);
    }
  
-  // SPU supports BRCOND, although DAGCombine will convert BRCONDs
-  // into BR_CCs. BR_CC instructions are custom selected in
-  // SPUDAGToDAGISel.
-  setOperationAction(ISD::BRCOND, MVT::Other, Legal);
+  // Custom lower BRCOND for i1, i8 to "promote" the result to
+  // i32 and i16, respectively.
+  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
  
    // Expand the jumptable branches
    setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
    setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
-  setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);  
+  setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
  
    // SPU has no intrinsics for these particular operations:
-  setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
-  setOperationAction(ISD::MEMSET, MVT::Other, Expand);
-  setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
-  
+  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
    // PowerPC has no SREM/UREM instructions
    setOperationAction(ISD::SREM, MVT::i32, Expand);
    setOperationAction(ISD::UREM, MVT::i32, Expand);
    setOperationAction(ISD::SREM, MVT::i64, Expand);
    setOperationAction(ISD::UREM, MVT::i64, Expand);
-  
+
    // We don't support sin/cos/sqrt/fmod
    setOperationAction(ISD::FSIN , MVT::f64, Expand);
    setOperationAction(ISD::FCOS , MVT::f64, Expand);
    setOperationAction(ISD::FREM , MVT::f64, Expand);
+  setOperationAction(ISD::FLOG , MVT::f64, Expand);
+  setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+  setOperationAction(ISD::FLOG10,MVT::f64, Expand);
+  setOperationAction(ISD::FEXP , MVT::f64, Expand);
+  setOperationAction(ISD::FEXP2, MVT::f64, Expand);
    setOperationAction(ISD::FSIN , MVT::f32, Expand);
    setOperationAction(ISD::FCOS , MVT::f32, Expand);
    setOperationAction(ISD::FREM , MVT::f32, Expand);
-  
+  setOperationAction(ISD::FLOG , MVT::f32, Expand);
+  setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+  setOperationAction(ISD::FLOG10,MVT::f32, Expand);
+  setOperationAction(ISD::FEXP , MVT::f32, Expand);
+  setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+
    // If we're enabling GP optimizations, use hardware square root
    setOperationAction(ISD::FSQRT, MVT::f64, Expand);
    setOperationAction(ISD::FSQRT, MVT::f32, Expand);
-  
+
    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
  
    // SPU can do rotate right and left, so legalize it... but customize for i8
    // because instructions don't exist.
-  setOperationAction(ISD::ROTR, MVT::i32,    Legal);
-  setOperationAction(ISD::ROTR, MVT::i16,    Legal);
-  setOperationAction(ISD::ROTR, MVT::i8,     Custom);
+
+  // FIXME: Change from "expand" to appropriate type once ROTR is supported in
+  //        .td files.
+  setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
+  setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
+  setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
+
    setOperationAction(ISD::ROTL, MVT::i32,    Legal);
    setOperationAction(ISD::ROTL, MVT::i16,    Legal);
    setOperationAction(ISD::ROTL, MVT::i8,     Custom);
@@ -202,14 +226,21 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
    setOperationAction(ISD::SHL,  MVT::i8,     Custom);
    setOperationAction(ISD::SRL,  MVT::i8,     Custom);
    setOperationAction(ISD::SRA,  MVT::i8,     Custom);
+  // And SPU needs custom lowering for shift left/right for i64
+  setOperationAction(ISD::SHL,  MVT::i64,    Custom);
+  setOperationAction(ISD::SRL,  MVT::i64,    Custom);
+  setOperationAction(ISD::SRA,  MVT::i64,    Custom);
  
-  // Custom lower i32 multiplications
+  // Custom lower i8, i32 and i64 multiplications
+  setOperationAction(ISD::MUL,  MVT::i8,     Custom);
    setOperationAction(ISD::MUL,  MVT::i32,    Custom);
+  setOperationAction(ISD::MUL,  MVT::i64,    Custom);
  
-  // Need to custom handle (some) common i8 math ops
+  // Need to custom handle (some) common i8, i64 math ops
+  setOperationAction(ISD::ADD,  MVT::i64,    Custom);
    setOperationAction(ISD::SUB,  MVT::i8,     Custom);
-  setOperationAction(ISD::MUL,  MVT::i8,     Custom);
-  
+  setOperationAction(ISD::SUB,  MVT::i64,    Custom);
+
    // SPU does not have BSWAP. It does have i32 support CTLZ.
    // CTPOP has to be custom lowered.
    setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
@@ -224,24 +255,27 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
    setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
  
    setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
-  
-  // SPU does not have select or setcc
-  setOperationAction(ISD::SELECT, MVT::i1,   Expand);
-  setOperationAction(ISD::SELECT, MVT::i8,   Expand);
-  setOperationAction(ISD::SELECT, MVT::i16,  Expand);
-  setOperationAction(ISD::SELECT, MVT::i32,  Expand);
+
+  // SPU has a version of select that implements (a&~c)|(b&c), just like
+  // select ought to work:
+  setOperationAction(ISD::SELECT, MVT::i1,   Promote);
+  setOperationAction(ISD::SELECT, MVT::i8,   Legal);
+  setOperationAction(ISD::SELECT, MVT::i16,  Legal);
+  setOperationAction(ISD::SELECT, MVT::i32,  Legal);
    setOperationAction(ISD::SELECT, MVT::i64,  Expand);
-  setOperationAction(ISD::SELECT, MVT::f32,  Expand);
-  setOperationAction(ISD::SELECT, MVT::f64,  Expand);
-
-  setOperationAction(ISD::SETCC, MVT::i1,   Expand);
-  setOperationAction(ISD::SETCC, MVT::i8,   Expand);
-  setOperationAction(ISD::SETCC, MVT::i16,  Expand);
-  setOperationAction(ISD::SETCC, MVT::i32,  Expand);
-  setOperationAction(ISD::SETCC, MVT::i64,  Expand);
-  setOperationAction(ISD::SETCC, MVT::f32,  Expand);
-  setOperationAction(ISD::SETCC, MVT::f64,  Expand);
-  
+
+  setOperationAction(ISD::SETCC, MVT::i1,    Promote);
+  setOperationAction(ISD::SETCC, MVT::i8,    Legal);
+  setOperationAction(ISD::SETCC, MVT::i16,   Legal);
+  setOperationAction(ISD::SETCC, MVT::i32,   Legal);
+  setOperationAction(ISD::SETCC, MVT::i64,   Expand);
+
+  // Zero extension and sign extension for i64 have to be
+  // custom legalized
+  setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
+  setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
+  setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom);
+
    // SPU has a legal FP -> signed INT instruction
    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
@@ -269,33 +303,33 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
  
    // We cannot sextinreg(i1).  Expand to shifts.
    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-  
+
    // Support label based line numbers.
-  setOperationAction(ISD::LOCATION, MVT::Other, Expand);
+  setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
    setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
-  
-  // We want to legalize GlobalAddress and ConstantPool nodes into the 
+
+  // We want to legalize GlobalAddress and ConstantPool nodes into the
    // appropriate instructions to materialize the address.
-  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
-  setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
-  setOperationAction(ISD::ConstantPool,  MVT::f32, Custom);
-  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
-  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
-  setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
-  setOperationAction(ISD::ConstantPool,  MVT::f64, Custom);
-  setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
+  for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
+       ++sctype) {
+    MVT VT = (MVT::SimpleValueType)sctype;
+
+    setOperationAction(ISD::GlobalAddress, VT, Custom);
+    setOperationAction(ISD::ConstantPool,  VT, Custom);
+    setOperationAction(ISD::JumpTable,     VT, Custom);
+  }
  
    // RET must be custom lowered, to meet ABI requirements
    setOperationAction(ISD::RET,           MVT::Other, Custom);
-  
+
    // VASTART needs to be custom lowered to use the VarArgsFrameIndex
    setOperationAction(ISD::VASTART           , MVT::Other, Custom);
-  
+
    // Use the default implementation.
    setOperationAction(ISD::VAARG             , MVT::Other, Expand);
    setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
    setOperationAction(ISD::VAEND             , MVT::Other, Expand);
-  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand); 
+  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
    setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
@@ -303,7 +337,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
    // Cell SPU has instructions for converting between i64 and fp.
    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
-    
+
    // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
  
@@ -319,36 +353,38 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
    addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
    addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
  
-  for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-       VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+  for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+       i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+    MVT VT = (MVT::SimpleValueType)i;
+
      // add/sub are legal for all supported vector VT's.
-    setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
-    setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
+    setOperationAction(ISD::ADD , VT, Legal);
+    setOperationAction(ISD::SUB , VT, Legal);
      // mul has to be custom lowered.
-    setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
-
-    setOperationAction(ISD::AND   , (MVT::ValueType)VT, Legal);
-    setOperationAction(ISD::OR    , (MVT::ValueType)VT, Legal);
-    setOperationAction(ISD::XOR   , (MVT::ValueType)VT, Legal);
-    setOperationAction(ISD::LOAD  , (MVT::ValueType)VT, Legal);
-    setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
-    setOperationAction(ISD::STORE,  (MVT::ValueType)VT, Legal);
-    
+    setOperationAction(ISD::MUL , VT, Custom);
+
+    setOperationAction(ISD::AND   , VT, Legal);
+    setOperationAction(ISD::OR    , VT, Legal);
+    setOperationAction(ISD::XOR   , VT, Legal);
+    setOperationAction(ISD::LOAD  , VT, Legal);
+    setOperationAction(ISD::SELECT, VT, Legal);
+    setOperationAction(ISD::STORE,  VT, Legal);
+
      // These operations need to be expanded:
-    setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
-    setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
-    setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
-    setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
-    setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
+    setOperationAction(ISD::SDIV, VT, Expand);
+    setOperationAction(ISD::SREM, VT, Expand);
+    setOperationAction(ISD::UDIV, VT, Expand);
+    setOperationAction(ISD::UREM, VT, Expand);
+    setOperationAction(ISD::FDIV, VT, Custom);
  
      // Custom lower build_vector, constant pool spills, insert and
      // extract vector elements:
-    setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
-    setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
-    setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
-    setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
-    setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
-    setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
+    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+    setOperationAction(ISD::ConstantPool, VT, Custom);
+    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
    }
  
    setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -356,16 +392,18 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
    setOperationAction(ISD::OR,  MVT::v16i8, Custom);
    setOperationAction(ISD::XOR, MVT::v16i8, Custom);
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
-  
-  setSetCCResultType(MVT::i32);
+
    setShiftAmountType(MVT::i32);
    setSetCCResultContents(ZeroOrOneSetCCResult);
-  
+
    setStackPointerRegisterToSaveRestore(SPU::R1);
-  
+
    // We have target-specific dag combine patterns for the following nodes:
-  // e.g., setTargetDAGCombine(ISD::SUB);
-  
+  setTargetDAGCombine(ISD::ADD);
+  setTargetDAGCombine(ISD::ZERO_EXTEND);
+  setTargetDAGCombine(ISD::SIGN_EXTEND);
+  setTargetDAGCombine(ISD::ANY_EXTEND);
+
    computeRegisterProperties();
  }
  
@@ -377,8 +415,8 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
      node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
      node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
      node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
-    node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
-    node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
+    node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
+    node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
      node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
      node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
      node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
@@ -386,7 +424,8 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
      node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
      node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
      node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
-    node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
+    node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
+                                              = "SPUISD::EXTRACT_ELT0_CHAINED";
      node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
      node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
      node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
@@ -395,21 +434,30 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
      node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
      node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
      node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
+    node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
+    node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
      node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
      node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
      node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
      node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
      node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
-    node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
-      "SPUISD::ROTBYTES_RIGHT_Z";
+    node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
+      "SPUISD::ROTQUAD_RZ_BYTES";
+    node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
+      "SPUISD::ROTQUAD_RZ_BITS";
      node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
        "SPUISD::ROTBYTES_RIGHT_S";
      node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
      node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
        "SPUISD::ROTBYTES_LEFT_CHAINED";
-    node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
+    node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
+      "SPUISD::ROTBYTES_LEFT_BITS";
+    node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
      node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
-    node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
+    node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
+    node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
+    node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
+    node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
      node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
      node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
      node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
@@ -420,6 +468,14 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
    return ((i != node_names.end()) ? i->second : 0);
  }
  
+MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
+  MVT VT = Op.getValueType();
+  if (VT.isInteger())
+    return VT;
+  else
+    return MVT::i32;
+}
+
  //===----------------------------------------------------------------------===//
  // Calling convention code:
  //===----------------------------------------------------------------------===//
@@ -430,211 +486,197 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
  //  LowerOperation implementation
  //===----------------------------------------------------------------------===//
  
-/// Custom lower loads for CellSPU
+/// Aligned load common code for CellSPU
  /*!
- All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
- within a 16-byte block, we have to rotate to extract the requested element.
+  \param[in] Op The SelectionDAG load or store operand
+  \param[in] DAG The selection DAG
+  \param[in] ST CellSPU subtarget information structure
+  \param[in,out] alignment Caller initializes this to the load or store node's
+  value from getAlignment(), may be updated while generating the aligned load
+  \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
+  offset (divisible by 16, modulo 16 == 0)
+  \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
+  offset of the preferred slot (modulo 16 != 0)
+  \param[in,out] VT Caller initializes this value type to the the load or store
+  node's loaded or stored value type; may be updated if an i1-extended load or
+  store.
+  \param[out] was16aligned true if the base pointer had 16-byte alignment,
+  otherwise false. Can help to determine if the chunk needs to be rotated.
+
+ Both load and store lowering load a block of data aligned on a 16-byte
+ boundary. This is the common aligned load code shared between both.
   */
-static SDOperand
-LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  LoadSDNode *LN = cast<LoadSDNode>(Op);
-  SDOperand basep = LN->getBasePtr();
-  SDOperand the_chain = LN->getChain();
-  MVT::ValueType BasepOpc = basep.Val->getOpcode();
-  MVT::ValueType VT = LN->getLoadedVT();
-  MVT::ValueType OpVT = Op.Val->getValueType(0);
-  MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  ISD::LoadExtType ExtType = LN->getExtensionType();
-  unsigned alignment = LN->getAlignment();
+static SDValue
+AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
+            LSBaseSDNode *LSN,
+            unsigned &alignment, int &alignOffs, int &prefSlotOffs,
+            MVT &VT, bool &was16aligned)
+{
+  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
    const valtype_map_s *vtm = getValueTypeMapEntry(VT);
-  SDOperand Ops[8];
-
-  if (BasepOpc == ISD::FrameIndex) {
-    // Loading from a frame index is always properly aligned. Always.
-    return SDOperand();
-  }
-
-  // For an extending load of an i1 variable, just call it i8 (or whatever we
-  // were passed) and make it zero-extended:
-  if (VT == MVT::i1) {
-    VT = OpVT;
-    ExtType = ISD::ZEXTLOAD;
-  }
+  SDValue basePtr = LSN->getBasePtr();
+  SDValue chain = LSN->getChain();
  
-  switch (LN->getAddressingMode()) {
-  case ISD::UNINDEXED: {
-    SDOperand result;
-    SDOperand rot_op, rotamt;
-    SDOperand ptrp;
-    int c_offset;
-    int c_rotamt;
-
-    // The vector type we really want to be when we load the 16-byte chunk
-    MVT::ValueType vecVT, opVecVT;
-    
-    vecVT = MVT::v16i8;
-    if (VT != MVT::i1)
-      vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
-    opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
+  if (basePtr.getOpcode() == ISD::ADD) {
+    SDValue Op1 = basePtr.getNode()->getOperand(1);
  
-    if (basep.getOpcode() == ISD::ADD) {
-      const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
+    if (Op1.getOpcode() == ISD::Constant
+        || Op1.getOpcode() == ISD::TargetConstant) {
+      const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
  
-      assert(CN != NULL
-             && "LowerLOAD: ISD::ADD operand 1 is not constant");
-
-      c_offset = (int) CN->getValue();
-      c_rotamt = (int) (c_offset & 0xf);
+      alignOffs = (int) CN->getZExtValue();
+      prefSlotOffs = (int) (alignOffs & 0xf);
  
        // Adjust the rotation amount to ensure that the final result ends up in
        // the preferred slot:
-      c_rotamt -= vtm->prefslot_byte;
-      ptrp = basep.getOperand(0);
+      prefSlotOffs -= vtm->prefslot_byte;
+      basePtr = basePtr.getOperand(0);
+
+      // Loading from memory, can we adjust alignment?
+      if (basePtr.getOpcode() == SPUISD::AFormAddr) {
+        SDValue APtr = basePtr.getOperand(0);
+        if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
+          GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
+          alignment = GSDN->getGlobal()->getAlignment();
+        }
+      }
      } else {
-      c_offset = 0;
-      c_rotamt = -vtm->prefslot_byte;
-      ptrp = basep;
+      alignOffs = 0;
+      prefSlotOffs = -vtm->prefslot_byte;
      }
+  } else if (basePtr.getOpcode() == ISD::FrameIndex) {
+    FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
+    alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
+    prefSlotOffs = (int) (alignOffs & 0xf);
+    prefSlotOffs -= vtm->prefslot_byte;
+    basePtr = DAG.getRegister(SPU::R1, VT);
+  } else {
+    alignOffs = 0;
+    prefSlotOffs = -vtm->prefslot_byte;
+  }
  
-    if (alignment == 16) {
-      // 16-byte aligned load into preferred slot, no rotation
-      if (c_rotamt == 0) {
-       if (isMemoryOperand(ptrp))
-         // Return unchanged
-         return SDOperand();
-       else {
-         // Return modified D-Form address for pointer:
-         ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
-                            ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
-         if (VT == OpVT)
-           return DAG.getLoad(VT, LN->getChain(), ptrp,
-                              LN->getSrcValue(), LN->getSrcValueOffset(),
-                              LN->isVolatile(), 16);
-         else
-           return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
-                                 LN->getSrcValueOffset(), OpVT,
-                                 LN->isVolatile(), 16);
-       }
-      } else {
-       // Need to rotate...
-       if (c_rotamt < 0)
-         c_rotamt += 16;
-       // Realign the base pointer, with a D-Form address
-       if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
-         basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
-                             ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
-       else
-         basep = ptrp;
-
-       // Rotate the load:
-       rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
-                            LN->getSrcValue(), LN->getSrcValueOffset(),
-                            LN->isVolatile(), 16);
-       the_chain = rot_op.getValue(1);
-       rotamt = DAG.getConstant(c_rotamt, MVT::i16);
-
-       SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
-       Ops[0] = the_chain;
-       Ops[1] = rot_op;
-       Ops[2] = rotamt;
-
-       result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
-       the_chain = result.getValue(1);
-
-       if (VT == OpVT || ExtType == ISD::EXTLOAD) {
-         SDVTList scalarvts;
-         Ops[0] = the_chain;
-         Ops[1] = result;
-         if (OpVT == VT) {
-           scalarvts = DAG.getVTList(VT, MVT::Other);
-         } else {
-           scalarvts = DAG.getVTList(OpVT, MVT::Other);
-         }
-
-         result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
-                              result);
-         Ops[0] = the_chain;
-         Ops[1] = result;
-         result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
-         the_chain = result.getValue(1);
-       } else {
-         // Handle the sign and zero-extending loads for i1 and i8:
-         unsigned NewOpC;
-
-         if (ExtType == ISD::SEXTLOAD) {
-           NewOpC = (OpVT == MVT::i1
-                     ? SPUISD::EXTRACT_I1_SEXT
-                     : SPUISD::EXTRACT_I8_SEXT);
-         } else {
-      assert(ExtType == ISD::ZEXTLOAD);
-           NewOpC = (OpVT == MVT::i1
-                     ? SPUISD::EXTRACT_I1_ZEXT
-                     : SPUISD::EXTRACT_I8_ZEXT);
-         }
-
-         result = DAG.getNode(NewOpC, OpVT, result);
-       }
-
-       SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
-       SDOperand retops[2] = { result, the_chain };
-
-       result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
-       return result;
-       /*UNREACHED*/
-      }
-    } else {
-      // Misaligned 16-byte load:
-      if (basep.getOpcode() == ISD::LOAD) {
-       LN = cast<LoadSDNode>(basep);
-       if (LN->getAlignment() == 16) {
-         // We can verify that we're really loading from a 16-byte aligned
-         // chunk. Encapsulate basep as a D-Form address and return a new
-         // load:
-         basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
-                             DAG.getConstant(0, PtrVT));
-         if (OpVT == VT)
-           return DAG.getLoad(VT, LN->getChain(), basep,
-                              LN->getSrcValue(), LN->getSrcValueOffset(),
-                              LN->isVolatile(), 16);
-         else
-           return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
-                                 LN->getSrcValue(), LN->getSrcValueOffset(),
-                                 OpVT, LN->isVolatile(), 16);
-       }
-      }
+  if (alignment == 16) {
+    // Realign the base pointer as a D-Form address:
+    if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
+      basePtr = DAG.getNode(ISD::ADD, PtrVT,
+                            basePtr,
+                            DAG.getConstant((alignOffs & ~0xf), PtrVT));
+    }
+
+    // Emit the vector load:
+    was16aligned = true;
+    return DAG.getLoad(MVT::v16i8, chain, basePtr,
+                       LSN->getSrcValue(), LSN->getSrcValueOffset(),
+                       LSN->isVolatile(), 16);
+  }
+
+  // Unaligned load or we're using the "large memory" model, which means that
+  // we have to be very pessimistic:
+  if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
+    basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
+                          DAG.getConstant(0, PtrVT));
+  }
  
-      // Catch all other cases where we can't guarantee that we have a
-      // 16-byte aligned entity, which means resorting to an X-form
-      // address scheme:
+  // Add the offset
+  basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
+                        DAG.getConstant((alignOffs & ~0xf), PtrVT));
+  was16aligned = false;
+  return DAG.getLoad(MVT::v16i8, chain, basePtr,
+                     LSN->getSrcValue(), LSN->getSrcValueOffset(),
+                     LSN->isVolatile(), 16);
+}
  
-      SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
-      SDOperand loOp = DAG.getNode(SPUISD::Lo, PtrVT, basep, ZeroOffs);
-      SDOperand hiOp = DAG.getNode(SPUISD::Hi, PtrVT, basep, ZeroOffs);
+/// Custom lower loads for CellSPU
+/*!
+ All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
+ within a 16-byte block, we have to rotate to extract the requested element.
+ */
+static SDValue
+LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+  LoadSDNode *LN = cast<LoadSDNode>(Op);
+  SDValue the_chain = LN->getChain();
+  MVT VT = LN->getMemoryVT();
+  MVT OpVT = Op.getNode()->getValueType(0);
+  ISD::LoadExtType ExtType = LN->getExtensionType();
+  unsigned alignment = LN->getAlignment();
+  SDValue Ops[8];
  
-      ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
+  switch (LN->getAddressingMode()) {
+  case ISD::UNINDEXED: {
+    int offset, rotamt;
+    bool was16aligned;
+    SDValue result =
+      AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
  
-      SDOperand alignLoad =
-       DAG.getLoad(opVecVT, LN->getChain(), ptrp,
-                   LN->getSrcValue(), LN->getSrcValueOffset(),
-                   LN->isVolatile(), 16);
+    if (result.getNode() == 0)
+      return result;
  
-      SDOperand insertEltOp =
-       DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
+    the_chain = result.getValue(1);
+    // Rotate the chunk if necessary
+    if (rotamt < 0)
+      rotamt += 16;
+    if (rotamt != 0 || !was16aligned) {
+      SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
+
+      Ops[0] = the_chain;
+      Ops[1] = result;
+      if (was16aligned) {
+        Ops[2] = DAG.getConstant(rotamt, MVT::i16);
+      } else {
+        MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+        LoadSDNode *LN1 = cast<LoadSDNode>(result);
+        Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
+                             DAG.getConstant(rotamt, PtrVT));
+      }
  
-      result = DAG.getNode(SPUISD::SHUFB, opVecVT,
-                          alignLoad,
-                          alignLoad,
-                          DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
+      result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
+      the_chain = result.getValue(1);
+    }
  
-      result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
+    if (VT == OpVT || ExtType == ISD::EXTLOAD) {
+      SDVTList scalarvts;
+      MVT vecVT = MVT::v16i8;
+
+      // Convert the loaded v16i8 vector to the appropriate vector type
+      // specified by the operand:
+      if (OpVT == VT) {
+        if (VT != MVT::i1)
+          vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+      } else
+        vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+
+      Ops[0] = the_chain;
+      Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
+      scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
+      result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
+      the_chain = result.getValue(1);
+    } else {
+      // Handle the sign and zero-extending loads for i1 and i8:
+      unsigned NewOpC;
  
-      SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
-      SDOperand retops[2] = { result, the_chain };
+      if (ExtType == ISD::SEXTLOAD) {
+        NewOpC = (OpVT == MVT::i1
+                  ? SPUISD::EXTRACT_I1_SEXT
+                  : SPUISD::EXTRACT_I8_SEXT);
+      } else {
+        assert(ExtType == ISD::ZEXTLOAD);
+        NewOpC = (OpVT == MVT::i1
+                  ? SPUISD::EXTRACT_I1_ZEXT
+                  : SPUISD::EXTRACT_I8_ZEXT);
+      }
  
-      result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
-      return result;
+      result = DAG.getNode(NewOpC, OpVT, result);
      }
-    break;
+
+    SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
+    SDValue retops[2] = {
+      result,
+      the_chain
+    };
+
+    result = DAG.getNode(SPUISD::LDRESULT, retvts,
+                         retops, sizeof(retops) / sizeof(retops[0]));
+    return result;
    }
    case ISD::PRE_INC:
    case ISD::PRE_DEC:
@@ -648,7 +690,7 @@ LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
      /*NOTREACHED*/
    }
  
-  return SDOperand();
+  return SDValue();
  }
  
  /// Custom lower stores for CellSPU
@@ -657,88 +699,78 @@ LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   within a 16-byte block, we have to generate a shuffle to insert the
   requested element into its place, then store the resulting block.
   */
-static SDOperand
-LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+static SDValue
+LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
    StoreSDNode *SN = cast<StoreSDNode>(Op);
-  SDOperand Value = SN->getValue();
-  MVT::ValueType VT = Value.getValueType();
-  MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
-  MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  SDOperand the_chain = SN->getChain();
-  //unsigned alignment = SN->getAlignment();
-  //const valtype_map_s *vtm = getValueTypeMapEntry(VT);
+  SDValue Value = SN->getValue();
+  MVT VT = Value.getValueType();
+  MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
+  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  unsigned alignment = SN->getAlignment();
  
    switch (SN->getAddressingMode()) {
    case ISD::UNINDEXED: {
-    SDOperand basep = SN->getBasePtr();
-    SDOperand ptrOp;
-    int offset;
-
-    if (basep.getOpcode() == ISD::FrameIndex) {
-      // FrameIndex nodes are always properly aligned. Really.
-      return SDOperand();
-    }
-
-    if (basep.getOpcode() == ISD::ADD) {
-      const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
-      assert(CN != NULL
-             && "LowerSTORE: ISD::ADD operand 1 is not constant");
-      offset = unsigned(CN->getValue());
-      ptrOp = basep.getOperand(0);
-      DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
-                << offset
-                << "\n");
-    } else {
-      ptrOp = basep;
-      offset = 0;
-    }
+    int chunk_offset, slot_offset;
+    bool was16aligned;
  
      // The vector type we really want to load from the 16-byte chunk, except
      // in the case of MVT::i1, which has to be v16i8.
-    unsigned vecVT, stVecVT;
+    MVT vecVT, stVecVT = MVT::v16i8;
  
      if (StVT != MVT::i1)
-      stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
-    else
-      stVecVT = MVT::v16i8;
-    vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
+      stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
+    vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
  
-    // Realign the pointer as a D-Form address (ptrOp is the pointer, basep is
-    // the actual dform addr offs($reg).
-    basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
-                        DAG.getConstant((offset & ~0xf), PtrVT));
+    SDValue alignLoadVec =
+      AlignedLoad(Op, DAG, ST, SN, alignment,
+                  chunk_offset, slot_offset, VT, was16aligned);
  
-    // Create the 16-byte aligned vector load
-    SDOperand alignLoad =
-      DAG.getLoad(vecVT, the_chain, basep,
-                  SN->getSrcValue(), SN->getSrcValueOffset(),
-                  SN->isVolatile(), 16);
-    the_chain = alignLoad.getValue(1);
+    if (alignLoadVec.getNode() == 0)
+      return alignLoadVec;
  
-    LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
-    SDOperand theValue = SN->getValue();
-    SDOperand result;
+    LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
+    SDValue basePtr = LN->getBasePtr();
+    SDValue the_chain = alignLoadVec.getValue(1);
+    SDValue theValue = SN->getValue();
+    SDValue result;
  
      if (StVT != VT
-       && (theValue.getOpcode() == ISD::AssertZext
-           || theValue.getOpcode() == ISD::AssertSext)) {
+        && (theValue.getOpcode() == ISD::AssertZext
+            || theValue.getOpcode() == ISD::AssertSext)) {
        // Drill down and get the value for zero- and sign-extended
        // quantities
-      theValue = theValue.getOperand(0); 
+      theValue = theValue.getOperand(0);
      }
  
-    SDOperand insertEltOp =
-      DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
-                 DAG.getNode(SPUISD::DFormAddr, PtrVT,
-                             ptrOp,
-                             DAG.getConstant((offset & 0xf), PtrVT)));
+    chunk_offset &= 0xf;
+
+    SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
+    SDValue insertEltPtr;
+    SDValue insertEltOp;
+
+    // If the base pointer is already a D-form address, then just create
+    // a new D-form address with a slot offset and the orignal base pointer.
+    // Otherwise generate a D-form address with the slot offset relative
+    // to the stack pointer, which is always aligned.
+    DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
+    DEBUG(basePtr.getNode()->dump(&DAG));
+    DEBUG(cerr << "\n");
+
+    if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
+        (basePtr.getOpcode() == ISD::ADD
+         && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
+      insertEltPtr = basePtr;
+    } else {
+      insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
+    }
  
+    insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
      result = DAG.getNode(SPUISD::SHUFB, vecVT,
-                        DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
-                        alignLoad,
-                        DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
+                         DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
+                         alignLoadVec,
+                         DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
  
-    result = DAG.getStore(the_chain, result, basep,
+    result = DAG.getStore(the_chain, result, basePtr,
                            LN->getSrcValue(), LN->getSrcValueOffset(),
                            LN->isVolatile(), LN->getAlignment());
  
@@ -757,91 +789,84 @@ LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
      /*NOTREACHED*/
    }
  
-  return SDOperand();
+  return SDValue();
  }
  
  /// Generate the address of a constant pool entry.
-static SDOperand
-LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  MVT::ValueType PtrVT = Op.getValueType();
+static SDValue
+LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+  MVT PtrVT = Op.getValueType();
    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
    Constant *C = CP->getConstVal();
-  SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
+  SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
+  SDValue Zero = DAG.getConstant(0, PtrVT);
    const TargetMachine &TM = DAG.getTarget();
-  SDOperand Zero = DAG.getConstant(0, PtrVT);
  
    if (TM.getRelocationModel() == Reloc::Static) {
      if (!ST->usingLargeMem()) {
-      // Just return the SDOperand with the constant pool address in it.
-      return CPI;
+      // Just return the SDValue with the constant pool address in it.
+      return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
      } else {
-      // Generate hi/lo address pair
-      SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
-      SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
-
-      return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
+      SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
+      SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
+      return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
      }
    }
  
    assert(0 &&
-         "LowerConstantPool: Relocation model other than static not supported.");
-  return SDOperand();
+         "LowerConstantPool: Relocation model other than static"
+         " not supported.");
+  return SDValue();
  }
  
-static SDOperand
-LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  MVT::ValueType PtrVT = Op.getValueType();
+static SDValue
+LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+  MVT PtrVT = Op.getValueType();
    JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
-  SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
-  SDOperand Zero = DAG.getConstant(0, PtrVT);
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+  SDValue Zero = DAG.getConstant(0, PtrVT);
    const TargetMachine &TM = DAG.getTarget();
  
    if (TM.getRelocationModel() == Reloc::Static) {
      if (!ST->usingLargeMem()) {
-      // Just return the SDOperand with the jump table address in it.
-      return JTI;
+      return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
      } else {
-      // Generate hi/lo address pair
-      SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
-      SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
-
-      return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
+      SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
+      SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
+      return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
      }
    }
  
    assert(0 &&
           "LowerJumpTable: Relocation model other than static not supported.");
-  return SDOperand();
+  return SDValue();
  }
  
-static SDOperand
-LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  MVT::ValueType PtrVT = Op.getValueType();
+static SDValue
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+  MVT PtrVT = Op.getValueType();
    GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
    GlobalValue *GV = GSDN->getGlobal();
-  SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
-  SDOperand Zero = DAG.getConstant(0, PtrVT);
+  SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
    const TargetMachine &TM = DAG.getTarget();
-  
+  SDValue Zero = DAG.getConstant(0, PtrVT);
+
    if (TM.getRelocationModel() == Reloc::Static) {
      if (!ST->usingLargeMem()) {
-      // Generate a local store address
-      return GA;
+      return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
      } else {
-      // Generate hi/lo address pair
-      SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
-      SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
-
-      return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
+      SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
+      SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
+      return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
      }
    } else {
      cerr << "LowerGlobalAddress: Relocation model other than static not "
-        << "supported.\n";
+         << "supported.\n";
      abort();
      /*NOTREACHED*/
    }
  
-  return SDOperand();
+  return SDValue();
  }
  
  //! Custom lower i64 integer constants
@@ -849,84 +874,93 @@ LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   This code inserts all of the necessary juggling that needs to occur to load
   a 64-bit constant into a register.
   */
-static SDOperand
-LowerConstant(SDOperand Op, SelectionDAG &DAG) {
-  unsigned VT = Op.getValueType();
-  ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
+static SDValue
+LowerConstant(SDValue Op, SelectionDAG &DAG) {
+  MVT VT = Op.getValueType();
+  ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
  
    if (VT == MVT::i64) {
-    SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
+    SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
      return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
-                      DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
-
+                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
    } else {
      cerr << "LowerConstant: unhandled constant type "
-        << MVT::getValueTypeString(VT)
-        << "\n";
+         << VT.getMVTString()
+         << "\n";
      abort();
      /*NOTREACHED*/
    }
  
-  return SDOperand();
+  return SDValue();
  }
  
-//! Custom lower single precision floating point constants
-/*!
-  "float" immediates can be lowered as if they were unsigned 32-bit integers.
-  The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
-  target description.
- */
-static SDOperand
-LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
-  unsigned VT = Op.getValueType();
-  ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
+//! Custom lower double precision floating point constants
+static SDValue
+LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
+  MVT VT = Op.getValueType();
+  ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
  
    assert((FP != 0) &&
-        "LowerConstantFP: Node is not ConstantFPSDNode");
+         "LowerConstantFP: Node is not ConstantFPSDNode");
  
-  if (VT == MVT::f32) {
-    float targetConst = FP->getValueAPF().convertToFloat();
-    return DAG.getNode(SPUISD::SFPConstant, VT,
-                      DAG.getTargetConstantFP(targetConst, VT));
-  } else if (VT == MVT::f64) {
+  if (VT == MVT::f64) {
      uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
      return DAG.getNode(ISD::BIT_CONVERT, VT,
-                      LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
+                       LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
    }
  
-  return SDOperand();
+  return SDValue();
+}
+
+//! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
+static SDValue
+LowerBRCOND(SDValue Op, SelectionDAG &DAG)
+{
+  SDValue Cond = Op.getOperand(1);
+  MVT CondVT = Cond.getValueType();
+  MVT CondNVT;
+
+  if (CondVT == MVT::i1 || CondVT == MVT::i8) {
+    CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
+    return DAG.getNode(ISD::BRCOND, Op.getValueType(),
+                      Op.getOperand(0),
+                      DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
+                      Op.getOperand(2));
+  } else
+    return SDValue();                // Unchanged
  }
  
-static SDOperand
-LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
+static SDValue
+LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
  {
    MachineFunction &MF = DAG.getMachineFunction();
    MachineFrameInfo *MFI = MF.getFrameInfo();
    MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  SmallVector<SDOperand, 8> ArgValues;
-  SDOperand Root = Op.getOperand(0);
-  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+  SmallVector<SDValue, 8> ArgValues;
+  SDValue Root = Op.getOperand(0);
+  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
  
    const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
    const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
-  
+
    unsigned ArgOffset = SPUFrameInfo::minStackSize();
    unsigned ArgRegIdx = 0;
    unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
-  
-  MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  
+
+  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
    // Add DAG nodes to load the arguments or copy them out of registers.
-  for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
-    SDOperand ArgVal;
+  for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
+       ArgNo != e; ++ArgNo) {
+    SDValue ArgVal;
      bool needsLoad = false;
-    MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
-    unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
+    MVT ObjectVT = Op.getValue(ArgNo).getValueType();
+    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
  
-    switch (ObjectVT) {
+    switch (ObjectVT.getSimpleVT()) {
      default: {
        cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
-          << MVT::getValueTypeString(ObjectVT)
+           << ObjectVT.getMVTString()
             << "\n";
        abort();
      }
@@ -992,6 +1026,7 @@ LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
        break;
      case MVT::v2f64:
      case MVT::v4f32:
+    case MVT::v2i64:
      case MVT::v4i32:
      case MVT::v8i16:
      case MVT::v16i8:
@@ -1005,119 +1040,111 @@ LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
        }
        break;
      }
-    
+
      // We need to load the argument to a virtual register if we determined above
      // that we ran out of physical registers of the appropriate type
      if (needsLoad) {
-      // If the argument is actually used, emit a load from the right stack
-      // slot.
-      if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
-        int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
-        SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
-        ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
-      } else {
-        // Don't emit a dead load.
-        ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
-      }
-
+      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
+      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+      ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
        ArgOffset += StackSlotSize;
      }
-    
+
      ArgValues.push_back(ArgVal);
    }
-  
+
    // If the function takes variable number of arguments, make a frame index for
    // the start of the first vararg value... for expansion of llvm.va_start.
    if (isVarArg) {
-    VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
+    VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
                                                 ArgOffset);
-    SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+    SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
      // If this function is vararg, store any remaining integer argument regs to
      // their spots on the stack so that they may be loaded by deferencing the
      // result of va_next.
-    SmallVector<SDOperand, 8> MemOps;
+    SmallVector<SDValue, 8> MemOps;
      for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
        unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
        RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
-      SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
-      SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
+      SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
+      SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
        MemOps.push_back(Store);
        // Increment the address by four for the next argument to store
-      SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
+      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
        FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
      }
      if (!MemOps.empty())
        Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
    }
-  
+
    ArgValues.push_back(Root);
- 
+
    // Return the new list of results.
-  std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
-                                    Op.Val->value_end());
-  return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
+  return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
+                            ArgValues.size());
  }
  
  /// isLSAAddress - Return the immediate to use if the specified
  /// value is representable as a LSA address.
-static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
+static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
    if (!C) return 0;
-  
-  int Addr = C->getValue();
+
+  int Addr = C->getZExtValue();
    if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
        (Addr << 14 >> 14) != Addr)
      return 0;  // Top 14 bits have to be sext of immediate.
-  
-  return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
+
+  return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
  }
  
  static
-SDOperand
-LowerCALL(SDOperand Op, SelectionDAG &DAG) {
-  SDOperand Chain = Op.getOperand(0);
+SDValue
+LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+  SDValue Chain = TheCall->getChain();
  #if 0
-  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
-  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
+  bool isVarArg   = TheCall->isVarArg();
+  bool isTailCall = TheCall->isTailCall();
  #endif
-  SDOperand Callee    = Op.getOperand(4);
-  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
+  SDValue Callee    = TheCall->getCallee();
+  unsigned NumOps     = TheCall->getNumArgs();
    unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
    const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
    const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
  
    // Handy pointer type
-  MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  
+  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
    // Accumulate how many bytes are to be pushed on the stack, including the
    // linkage area, and parameter passing area.  According to the SPU ABI,
    // we minimally need space for [LR] and [SP]
    unsigned NumStackBytes = SPUFrameInfo::minStackSize();
-  
+
    // Set up a copy of the stack pointer for use loading and storing any
    // arguments that may not fit in the registers available for argument
    // passing.
-  SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
-  
+  SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
+
    // Figure out which arguments are going to go in registers, and which in
    // memory.
    unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
    unsigned ArgRegIdx = 0;
  
    // Keep track of registers passing arguments
-  std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
+  std::vector<std::pair<unsigned, SDValue> > RegsToPass;
    // And the arguments passed on the stack
-  SmallVector<SDOperand, 8> MemOpChains;
+  SmallVector<SDValue, 8> MemOpChains;
  
    for (unsigned i = 0; i != NumOps; ++i) {
-    SDOperand Arg = Op.getOperand(5+2*i);
-    
+    SDValue Arg = TheCall->getArg(i);
+
      // PtrOff will be used to store the current argument to the stack if a
      // register cannot be found for it.
-    SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+    SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
      PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
  
-    switch (Arg.getValueType()) {
+    switch (Arg.getValueType().getSimpleVT()) {
      default: assert(0 && "Unexpected ValueType for argument!");
      case MVT::i32:
      case MVT::i64:
@@ -1126,7 +1153,7 @@ LowerCALL(SDOperand Op, SelectionDAG &DAG) {
          RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
        } else {
          MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
-       ArgOffset += StackSlotSize;
+        ArgOffset += StackSlotSize;
        }
        break;
      case MVT::f32:
@@ -1135,7 +1162,7 @@ LowerCALL(SDOperand Op, SelectionDAG &DAG) {
          RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
        } else {
          MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
-       ArgOffset += StackSlotSize;
+        ArgOffset += StackSlotSize;
        }
        break;
      case MVT::v4f32:
@@ -1146,7 +1173,7 @@ LowerCALL(SDOperand Op, SelectionDAG &DAG) {
          RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
        } else {
          MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
-       ArgOffset += StackSlotSize;
+        ArgOffset += StackSlotSize;
        }
        break;
      }
@@ -1161,137 +1188,141 @@ LowerCALL(SDOperand Op, SelectionDAG &DAG) {
      Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
                          &MemOpChains[0], MemOpChains.size());
    }
-  
+
    // Build a sequence of copy-to-reg nodes chained together with token chain
    // and flag operands which copy the outgoing args into the appropriate regs.
-  SDOperand InFlag;
+  SDValue InFlag;
    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
      Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
                               InFlag);
      InFlag = Chain.getValue(1);
    }
-  
-  std::vector<MVT::ValueType> NodeTys;
-  NodeTys.push_back(MVT::Other);   // Returns a chain
-  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
  
-  SmallVector<SDOperand, 8> Ops;
+  SmallVector<SDValue, 8> Ops;
    unsigned CallOpc = SPUISD::CALL;
-  
+
    // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
    // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
    // node so that legalize doesn't hack it.
    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
      GlobalValue *GV = G->getGlobal();
-    unsigned CalleeVT = Callee.getValueType();
-
-    // Turn calls to targets that are defined (i.e., have bodies) into BRSL
-    // style calls, otherwise, external symbols are BRASL calls.
-    // NOTE:
-    // This may be an unsafe assumption for JIT and really large compilation
-    // units.
-    if (GV->isDeclaration()) {
-      Callee = DAG.getGlobalAddress(GV, CalleeVT);
+    MVT CalleeVT = Callee.getValueType();
+    SDValue Zero = DAG.getConstant(0, PtrVT);
+    SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
+
+    if (!ST->usingLargeMem()) {
+      // Turn calls to targets that are defined (i.e., have bodies) into BRSL
+      // style calls, otherwise, external symbols are BRASL calls. This assumes
+      // that declared/defined symbols are in the same compilation unit and can
+      // be reached through PC-relative jumps.
+      //
+      // NOTE:
+      // This may be an unsafe assumption for JIT and really large compilation
+      // units.
+      if (GV->isDeclaration()) {
+        Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
+      } else {
+        Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
+      }
      } else {
-      Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
-                           DAG.getTargetGlobalAddress(GV, CalleeVT),
-                           DAG.getConstant(0, PtrVT));
+      // "Large memory" mode: Turn all calls into indirect calls with a X-form
+      // address pairs:
+      Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
      }
    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
      Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
-  else if (SDNode *Dest = isLSAAddress(Callee, DAG))
+  else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
      // If this is an absolute destination address that appears to be a legal
      // local store address, use the munged value.
-    Callee = SDOperand(Dest, 0);
+    Callee = SDValue(Dest, 0);
+  }
  
    Ops.push_back(Chain);
    Ops.push_back(Callee);
-  
+
    // Add argument registers to the end of the list so that they are known live
    // into the call.
    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
-    Ops.push_back(DAG.getRegister(RegsToPass[i].first, 
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
                                    RegsToPass[i].second.getValueType()));
-  
-  if (InFlag.Val)
+
+  if (InFlag.getNode())
      Ops.push_back(InFlag);
-  Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
+  // Returns a chain and a flag for retval copy to use.
+  Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
+                      &Ops[0], Ops.size());
    InFlag = Chain.getValue(1);
  
-  SDOperand ResultVals[3];
+  Chain = DAG.getCALLSEQ_END(Chain,
+                             DAG.getConstant(NumStackBytes, PtrVT),
+                             DAG.getConstant(0, PtrVT),
+                             InFlag);
+  if (TheCall->getValueType(0) != MVT::Other)
+    InFlag = Chain.getValue(1);
+
+  SDValue ResultVals[3];
    unsigned NumResults = 0;
-  NodeTys.clear();
-  
+
    // If the call has results, copy the values out of the ret val registers.
-  switch (Op.Val->getValueType(0)) {
+  switch (TheCall->getValueType(0).getSimpleVT()) {
    default: assert(0 && "Unexpected ret value!");
    case MVT::Other: break;
    case MVT::i32:
-    if (Op.Val->getValueType(1) == MVT::i32) {
+    if (TheCall->getValueType(1) == MVT::i32) {
        Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
        ResultVals[0] = Chain.getValue(0);
        Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
                                   Chain.getValue(2)).getValue(1);
        ResultVals[1] = Chain.getValue(0);
        NumResults = 2;
-      NodeTys.push_back(MVT::i32);
      } else {
        Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
        ResultVals[0] = Chain.getValue(0);
        NumResults = 1;
      }
-    NodeTys.push_back(MVT::i32);
      break;
    case MVT::i64:
      Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
      ResultVals[0] = Chain.getValue(0);
      NumResults = 1;
-    NodeTys.push_back(MVT::i64);
      break;
    case MVT::f32:
    case MVT::f64:
-    Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
+    Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
                                 InFlag).getValue(1);
      ResultVals[0] = Chain.getValue(0);
      NumResults = 1;
-    NodeTys.push_back(Op.Val->getValueType(0));
      break;
    case MVT::v2f64:
    case MVT::v4f32:
    case MVT::v4i32:
    case MVT::v8i16:
    case MVT::v16i8:
-    Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
+    Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
                                     InFlag).getValue(1);
      ResultVals[0] = Chain.getValue(0);
      NumResults = 1;
-    NodeTys.push_back(Op.Val->getValueType(0));
      break;
    }
-  
-  Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
-                      DAG.getConstant(NumStackBytes, PtrVT));
-  NodeTys.push_back(MVT::Other);
-  
+
    // If the function returns void, just return the chain.
    if (NumResults == 0)
      return Chain;
-  
+
    // Otherwise, merge everything together with a MERGE_VALUES node.
    ResultVals[NumResults++] = Chain;
-  SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
-                              ResultVals, NumResults);
-  return Res.getValue(Op.ResNo);
+  SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
+  return Res.getValue(Op.getResNo());
  }
  
-static SDOperand
-LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
+static SDValue
+LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
    SmallVector<CCValAssign, 16> RVLocs;
    unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
    bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
    CCState CCInfo(CC, isVarArg, TM, RVLocs);
-  CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
-  
+  CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
+
    // If this is the first return lowered for this function, add the regs to the
    // liveout set for the function.
    if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
@@ -1299,9 +1330,9 @@ LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
    }
  
-  SDOperand Chain = Op.getOperand(0);
-  SDOperand Flag;
-  
+  SDValue Chain = Op.getOperand(0);
+  SDValue Flag;
+
    // Copy the result values into the output registers.
    for (unsigned i = 0; i != RVLocs.size(); ++i) {
      CCValAssign &VA = RVLocs[i];
@@ -1310,7 +1341,7 @@ LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
      Flag = Chain.getValue(1);
    }
  
-  if (Flag.Val)
+  if (Flag.getNode())
      return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
    else
      return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
@@ -1323,18 +1354,18 @@ LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
  
  static ConstantSDNode *
  getVecImm(SDNode *N) {
-  SDOperand OpVal(0, 0);
-  
+  SDValue OpVal(0, 0);
+
    // Check to see if this buildvec has a single non-undef value in its elements.
    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
      if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
-    if (OpVal.Val == 0)
+    if (OpVal.getNode() == 0)
        OpVal = N->getOperand(i);
      else if (OpVal != N->getOperand(i))
        return 0;
    }
-  
-  if (OpVal.Val != 0) {
+
+  if (OpVal.getNode() != 0) {
      if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
        return CN;
      }
@@ -1346,60 +1377,68 @@ getVecImm(SDNode *N) {
  /// get_vec_i18imm - Test if this vector is a vector filled with the same value
  /// and the value fits into an unsigned 18-bit constant, and if so, return the
  /// constant
-SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
-                              MVT::ValueType ValueType) {
+SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
+                              MVT ValueType) {
    if (ConstantSDNode *CN = getVecImm(N)) {
-    uint64_t Value = CN->getValue();
+    uint64_t Value = CN->getZExtValue();
+    if (ValueType == MVT::i64) {
+      uint64_t UValue = CN->getZExtValue();
+      uint32_t upper = uint32_t(UValue >> 32);
+      uint32_t lower = uint32_t(UValue);
+      if (upper != lower)
+        return SDValue();
+      Value = Value >> 32;
+    }
      if (Value <= 0x3ffff)
        return DAG.getConstant(Value, ValueType);
    }
  
-  return SDOperand();
+  return SDValue();
  }
  
  /// get_vec_i16imm - Test if this vector is a vector filled with the same value
  /// and the value fits into a signed 16-bit constant, and if so, return the
  /// constant
-SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
-                              MVT::ValueType ValueType) {
+SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
+                              MVT ValueType) {
    if (ConstantSDNode *CN = getVecImm(N)) {
-    if (ValueType == MVT::i32) {
-      int Value = (int) CN->getValue();
-      int SExtValue = ((Value & 0xffff) << 16) >> 16;
-
-      if (Value == SExtValue)
-       return DAG.getConstant(Value, ValueType);
-    } else if (ValueType == MVT::i16) {
-      short Value = (short) CN->getValue();
-      int SExtValue = ((int) Value << 16) >> 16;
-
-      if (Value == (short) SExtValue)
-       return DAG.getConstant(Value, ValueType);
-    } else if (ValueType == MVT::i64) {
-      int64_t Value = CN->getValue();
-      int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
-
-      if (Value == SExtValue)
-       return DAG.getConstant(Value, ValueType);
+    int64_t Value = CN->getSignExtended();
+    if (ValueType == MVT::i64) {
+      uint64_t UValue = CN->getZExtValue();
+      uint32_t upper = uint32_t(UValue >> 32);
+      uint32_t lower = uint32_t(UValue);
+      if (upper != lower)
+        return SDValue();
+      Value = Value >> 32;
+    }
+    if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
+      return DAG.getConstant(Value, ValueType);
      }
    }
  
-  return SDOperand();
+  return SDValue();
  }
  
  /// get_vec_i10imm - Test if this vector is a vector filled with the same value
  /// and the value fits into a signed 10-bit constant, and if so, return the
  /// constant
-SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
-                              MVT::ValueType ValueType) {
+SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
+                              MVT ValueType) {
    if (ConstantSDNode *CN = getVecImm(N)) {
-    int Value = (int) CN->getValue();
-    if ((ValueType == MVT::i32 && isS10Constant(Value))
-       || (ValueType == MVT::i16 && isS10Constant((short) Value)))
+    int64_t Value = CN->getSignExtended();
+    if (ValueType == MVT::i64) {
+      uint64_t UValue = CN->getZExtValue();
+      uint32_t upper = uint32_t(UValue >> 32);
+      uint32_t lower = uint32_t(UValue);
+      if (upper != lower)
+        return SDValue();
+      Value = Value >> 32;
+    }
+    if (isS10Constant(Value))
        return DAG.getConstant(Value, ValueType);
    }
  
-  return SDOperand();
+  return SDValue();
  }
  
  /// get_vec_i8imm - Test if this vector is a vector filled with the same value
@@ -1409,58 +1448,58 @@ SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
  /// @note: The incoming vector is v16i8 because that's the only way we can load
  /// constant vectors. Thus, we test to see if the upper and lower bytes are the
  /// same value.
-SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
-                             MVT::ValueType ValueType) {
+SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
+                             MVT ValueType) {
    if (ConstantSDNode *CN = getVecImm(N)) {
-    int Value = (int) CN->getValue();
+    int Value = (int) CN->getZExtValue();
      if (ValueType == MVT::i16
-       && Value <= 0xffff                 /* truncated from uint64_t */
-       && ((short) Value >> 8) == ((short) Value & 0xff))
+        && Value <= 0xffff                 /* truncated from uint64_t */
+        && ((short) Value >> 8) == ((short) Value & 0xff))
        return DAG.getConstant(Value & 0xff, ValueType);
      else if (ValueType == MVT::i8
-            && (Value & 0xff) == Value)
+             && (Value & 0xff) == Value)
        return DAG.getConstant(Value, ValueType);
    }
  
-  return SDOperand();
+  return SDValue();
  }
  
  /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
  /// and the value fits into a signed 16-bit constant, and if so, return the
  /// constant
-SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
-                               MVT::ValueType ValueType) {
+SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
+                               MVT ValueType) {
    if (ConstantSDNode *CN = getVecImm(N)) {
-    uint64_t Value = CN->getValue();
+    uint64_t Value = CN->getZExtValue();
      if ((ValueType == MVT::i32
-         && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
-       || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
+          && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
+        || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
        return DAG.getConstant(Value >> 16, ValueType);
    }
  
-  return SDOperand();
+  return SDValue();
  }
  
  /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
-SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
+SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
    if (ConstantSDNode *CN = getVecImm(N)) {
-    return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
+    return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i32);
    }
  
-  return SDOperand();
+  return SDValue();
  }
  
  /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
-SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
+SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
    if (ConstantSDNode *CN = getVecImm(N)) {
-    return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
+    return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i64);
    }
  
-  return SDOperand();
+  return SDValue();
  }
  
  // If this is a vector of constants or undefs, get the bits.  A bit in
-// UndefBits is set if the corresponding element of the vector is an 
+// UndefBits is set if the corresponding element of the vector is an
  // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
  // zero.   Return true if this is not an array of constants, false if it is.
  //
@@ -1468,11 +1507,11 @@ static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
                                         uint64_t UndefBits[2]) {
    // Start with zero'd results.
    VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
-  
-  unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
+
+  unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
    for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
-    SDOperand OpVal = BV->getOperand(i);
-    
+    SDValue OpVal = BV->getOperand(i);
+
      unsigned PartNo = i >= e/2;     // In the upper 128 bits?
      unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
  
@@ -1482,32 +1521,32 @@ static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
        UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
        continue;
      } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
-      EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
+      EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
      } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
        const APFloat &apf = CN->getValueAPF();
        EltBits = (CN->getValueType(0) == MVT::f32
-                ? FloatToBits(apf.convertToFloat())
-                : DoubleToBits(apf.convertToDouble()));
+                 ? FloatToBits(apf.convertToFloat())
+                 : DoubleToBits(apf.convertToDouble()));
      } else {
        // Nonconstant element.
        return true;
      }
-    
+
      VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
    }
-  
-  //printf("%llx %llx  %llx %llx\n", 
+
+  //printf("%llx %llx  %llx %llx\n",
    //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
    return false;
  }
  
  /// If this is a splat (repetition) of a value across the whole vector, return
  /// the smallest size that splats it.  For example, "0x01010101010101..." is a
-/// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and 
+/// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
  /// SplatSize = 1 byte.
-static bool isConstantSplat(const uint64_t Bits128[2], 
+static bool isConstantSplat(const uint64_t Bits128[2],
                              const uint64_t Undef128[2],
-                           int MinSplatBits,
+                            int MinSplatBits,
                              uint64_t &SplatBits, uint64_t &SplatUndef,
                              int &SplatSize) {
    // Don't let undefs prevent splats from matching.  See if the top 64-bits are
@@ -1521,38 +1560,39 @@ static bool isConstantSplat(const uint64_t Bits128[2],
  
    if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
      if (MinSplatBits < 64) {
-  
+
        // Check that the top 32-bits are the same as the lower 32-bits, ignoring
        // undefs.
        if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
-       if (MinSplatBits < 32) {
-
-         // If the top 16-bits are different than the lower 16-bits, ignoring
-         // undefs, we have an i32 splat.
-         if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
-           if (MinSplatBits < 16) {
-             // If the top 8-bits are different than the lower 8-bits, ignoring
-             // undefs, we have an i16 splat.
-             if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
-               // Otherwise, we have an 8-bit splat.
-               SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
-               SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
-               SplatSize = 1;
-               return true;
-             }
-           } else {
-             SplatBits = Bits16;
-             SplatUndef = Undef16;
-             SplatSize = 2;
-             return true;
-           }
-         }
-       } else {
-         SplatBits = Bits32;
-         SplatUndef = Undef32;
-         SplatSize = 4;
-         return true;
-       }
+        if (MinSplatBits < 32) {
+
+          // If the top 16-bits are different than the lower 16-bits, ignoring
+          // undefs, we have an i32 splat.
+          if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
+            if (MinSplatBits < 16) {
+              // If the top 8-bits are different than the lower 8-bits, ignoring
+              // undefs, we have an i16 splat.
+              if ((Bits16 & (uint16_t(~Undef16) >> 8))
+                  == ((Bits16 >> 8) & ~Undef16)) {
+                // Otherwise, we have an 8-bit splat.
+                SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
+                SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
+                SplatSize = 1;
+                return true;
+              }
+            } else {
+              SplatBits = Bits16;
+              SplatUndef = Undef16;
+              SplatSize = 2;
+              return true;
+            }
+          }
+        } else {
+          SplatBits = Bits32;
+          SplatUndef = Undef32;
+          SplatSize = 4;
+          return true;
+        }
        }
      } else {
        SplatBits = Bits128[0];
@@ -1570,48 +1610,48 @@ static bool isConstantSplat(const uint64_t Bits128[2],
  // selects to a single instruction, return Op.  Otherwise, if we can codegen
  // this case more efficiently than a constant pool load, lower it to the
  // sequence of ops that should be used.
-static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
-  MVT::ValueType VT = Op.getValueType();
+static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+  MVT VT = Op.getValueType();
    // If this is a vector of constants or undefs, get the bits.  A bit in
-  // UndefBits is set if the corresponding element of the vector is an 
+  // UndefBits is set if the corresponding element of the vector is an
    // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
-  // zero. 
+  // zero.
    uint64_t VectorBits[2];
    uint64_t UndefBits[2];
    uint64_t SplatBits, SplatUndef;
    int SplatSize;
-  if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
+  if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
        || !isConstantSplat(VectorBits, UndefBits,
-                         MVT::getSizeInBits(MVT::getVectorElementType(VT)),
+                          VT.getVectorElementType().getSizeInBits(),
                            SplatBits, SplatUndef, SplatSize))
-    return SDOperand();   // Not a constant vector, not a splat.
-  
-  switch (VT) {
+    return SDValue();   // Not a constant vector, not a splat.
+
+  switch (VT.getSimpleVT()) {
    default:
    case MVT::v4f32: {
      uint32_t Value32 = SplatBits;
      assert(SplatSize == 4
-          && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
+           && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
      // NOTE: pretend the constant is an integer. LLVM won't load FP constants
-    SDOperand T = DAG.getConstant(Value32, MVT::i32);
+    SDValue T = DAG.getConstant(Value32, MVT::i32);
      return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
-                      DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
+                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
      break;
    }
    case MVT::v2f64: {
      uint64_t f64val = SplatBits;
      assert(SplatSize == 8
-          && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
+           && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
      // NOTE: pretend the constant is an integer. LLVM won't load FP constants
-    SDOperand T = DAG.getConstant(f64val, MVT::i64);
+    SDValue T = DAG.getConstant(f64val, MVT::i64);
      return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
-                      DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
+                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
      break;
    }
    case MVT::v16i8: {
     // 8-bit constants have to be expanded to 16-bits
     unsigned short Value16 = SplatBits | (SplatBits << 8);
-   SDOperand Ops[8];
+   SDValue Ops[8];
     for (int i = 0; i < 8; ++i)
       Ops[i] = DAG.getConstant(Value16, MVT::i16);
     return DAG.getNode(ISD::BIT_CONVERT, VT,
@@ -1619,18 +1659,18 @@ static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
    }
    case MVT::v8i16: {
      unsigned short Value16;
-    if (SplatSize == 2) 
+    if (SplatSize == 2)
        Value16 = (unsigned short) (SplatBits & 0xffff);
      else
        Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
-    SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
-    SDOperand Ops[8];
+    SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
+    SDValue Ops[8];
      for (int i = 0; i < 8; ++i) Ops[i] = T;
      return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
    }
    case MVT::v4i32: {
      unsigned int Value = SplatBits;
-    SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
+    SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
      return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
    }
    case MVT::v2i64: {
@@ -1638,11 +1678,15 @@ static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
      uint32_t upper = uint32_t(val >> 32);
      uint32_t lower = uint32_t(val);
  
-    if (val != 0) {
-      SDOperand LO32;
-      SDOperand HI32;
-      SmallVector<SDOperand, 16> ShufBytes;
-      SDOperand Result;
+    if (upper == lower) {
+      // Magic constant that can be matched by IL, ILA, et. al.
+      SDValue Val = DAG.getTargetConstant(val, MVT::i64);
+      return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
+    } else {
+      SDValue LO32;
+      SDValue HI32;
+      SmallVector<SDValue, 16> ShufBytes;
+      SDValue Result;
        bool upper_special, lower_special;
  
        // NOTE: This code creates common-case shuffle masks that can be easily
@@ -1655,74 +1699,68 @@ static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
  
        // Create lower vector if not a special pattern
        if (!lower_special) {
-       SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
-       LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
-                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
-                                      LO32C, LO32C, LO32C, LO32C));
+        SDValue LO32C = DAG.getConstant(lower, MVT::i32);
+        LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
+                           DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+                                       LO32C, LO32C, LO32C, LO32C));
        }
  
        // Create upper vector if not a special pattern
        if (!upper_special) {
-       SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
-       HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
-                          DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
-                                      HI32C, HI32C, HI32C, HI32C));
+        SDValue HI32C = DAG.getConstant(upper, MVT::i32);
+        HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
+                           DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+                                       HI32C, HI32C, HI32C, HI32C));
        }
  
        // If either upper or lower are special, then the two input operands are
        // the same (basically, one of them is a "don't care")
        if (lower_special)
-       LO32 = HI32;
+        LO32 = HI32;
        if (upper_special)
-       HI32 = LO32;
+        HI32 = LO32;
        if (lower_special && upper_special) {
-       // Unhappy situation... both upper and lower are special, so punt with
-       // a target constant:
-        SDOperand Zero = DAG.getConstant(0, MVT::i32);
-       HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
+        // Unhappy situation... both upper and lower are special, so punt with
+        // a target constant:
+        SDValue Zero = DAG.getConstant(0, MVT::i32);
+        HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
                                    Zero, Zero);
        }
  
        for (int i = 0; i < 4; ++i) {
-       for (int j = 0; j < 4; ++j) {
-         SDOperand V;
-         bool process_upper, process_lower;
-         uint64_t val = 0;
-
-         process_upper = (upper_special && (i & 1) == 0);
-         process_lower = (lower_special && (i & 1) == 1);
-
-         if (process_upper || process_lower) {
-           if ((process_upper && upper == 0)
-               || (process_lower && lower == 0))
-             val = 0x80;
-           else if ((process_upper && upper == 0xffffffff)
-                    || (process_lower && lower == 0xffffffff))
-             val = 0xc0;
-           else if ((process_upper && upper == 0x80000000)
-                    || (process_lower && lower == 0x80000000))
-             val = (j == 0 ? 0xe0 : 0x80);
-         } else
-           val = i * 4 + j + ((i & 1) * 16);
-
-         ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
-       }
+        uint64_t val = 0;
+        for (int j = 0; j < 4; ++j) {
+          SDValue V;
+          bool process_upper, process_lower;
+          val <<= 8;
+          process_upper = (upper_special && (i & 1) == 0);
+          process_lower = (lower_special && (i & 1) == 1);
+
+          if (process_upper || process_lower) {
+            if ((process_upper && upper == 0)
+                || (process_lower && lower == 0))
+              val |= 0x80;
+            else if ((process_upper && upper == 0xffffffff)
+                     || (process_lower && lower == 0xffffffff))
+              val |= 0xc0;
+            else if ((process_upper && upper == 0x80000000)
+                     || (process_lower && lower == 0x80000000))
+              val |= (j == 0 ? 0xe0 : 0x80);
+          } else
+            val |= i * 4 + j + ((i & 1) * 16);
+        }
+
+        ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
        }
  
        return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
-                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
-                                    &ShufBytes[0], ShufBytes.size()));
-    } else {
-      // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
-      SDOperand Zero = DAG.getConstant(0, MVT::i32);
-      return DAG.getNode(ISD::BIT_CONVERT, VT,
-                        DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
-                                    Zero, Zero, Zero, Zero));
+                         DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+                                     &ShufBytes[0], ShufBytes.size()));
      }
    }
    }
- 
-  return SDOperand();
+
+  return SDValue();
  }
  
  /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
@@ -1738,17 +1776,17 @@ static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
  /// element move from V2 into V1.
  /// \note
  /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
-static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
-  SDOperand V1 = Op.getOperand(0);
-  SDOperand V2 = Op.getOperand(1);
-  SDOperand PermMask = Op.getOperand(2);
-  
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  SDValue PermMask = Op.getOperand(2);
+
    if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
-  
+
    // If we have a single element being moved from V1 to V2, this can be handled
    // using the C*[DX] compute mask instructions, but the vector elements have
    // to be monotonically increasing with one exception element.
-  MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
+  MVT EltVT = V1.getValueType().getVectorElementType();
    unsigned EltsFromV2 = 0;
    unsigned V2Elt = 0;
    unsigned V2EltIdx0 = 0;
@@ -1769,8 +1807,8 @@ static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
      unsigned SrcElt;
      if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
        SrcElt = 0;
-    else 
-      SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
+    else
+      SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
  
      if (SrcElt >= V2EltIdx0) {
        ++EltsFromV2;
@@ -1787,57 +1825,58 @@ static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
      MachineFunction &MF = DAG.getMachineFunction();
      MachineRegisterInfo &RegInfo = MF.getRegInfo();
      unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
-    MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+    MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
      // Initialize temporary register to 0
-    SDOperand InitTempReg =
+    SDValue InitTempReg =
        DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
      // Copy register's contents as index in INSERT_MASK:
-    SDOperand ShufMaskOp =
+    SDValue ShufMaskOp =
        DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
-                 DAG.getTargetConstant(V2Elt, MVT::i32),
-                 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
+                  DAG.getTargetConstant(V2Elt, MVT::i32),
+                  DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
      // Use shuffle mask in SHUFB synthetic instruction:
      return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
    } else {
-    // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
-    unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
-    
-    SmallVector<SDOperand, 16> ResultMask;
+   // Convert the SHUFFLE_VECTOR mask's input element units to the
+   // actual bytes.
+    unsigned BytesPerElement = EltVT.getSizeInBits()/8;
+
+    SmallVector<SDValue, 16> ResultMask;
      for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
        unsigned SrcElt;
        if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
-       SrcElt = 0;
-      else 
-       SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
-      
-      for (unsigned j = 0; j != BytesPerElement; ++j) {
-       ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
-                                            MVT::i8));
+        SrcElt = 0;
+      else
+        SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
+
+      for (unsigned j = 0; j < BytesPerElement; ++j) {
+        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
+                                             MVT::i8));
        }
      }
-    
-    SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
-                                     &ResultMask[0], ResultMask.size());
+
+    SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
+                                      &ResultMask[0], ResultMask.size());
      return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
    }
  }
  
-static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
-  SDOperand Op0 = Op.getOperand(0);                    // Op0 = the scalar
+static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
+  SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
  
-  if (Op0.Val->getOpcode() == ISD::Constant) {
+  if (Op0.getNode()->getOpcode() == ISD::Constant) {
      // For a constant, build the appropriate constant vector, which will
      // eventually simplify to a vector register load.
  
-    ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
-    SmallVector<SDOperand, 16> ConstVecValues;
-    MVT::ValueType VT;
+    ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
+    SmallVector<SDValue, 16> ConstVecValues;
+    MVT VT;
      size_t n_copies;
  
      // Create a constant vector:
-    switch (Op.getValueType()) {
+    switch (Op.getValueType().getSimpleVT()) {
      default: assert(0 && "Unexpected constant value type in "
-                        "LowerSCALAR_TO_VECTOR");
+                         "LowerSCALAR_TO_VECTOR");
      case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
      case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
      case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
@@ -1846,15 +1885,15 @@ static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
      case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
      }
  
-    SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
+    SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
      for (size_t j = 0; j < n_copies; ++j)
        ConstVecValues.push_back(CValue);
  
      return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
-                      &ConstVecValues[0], ConstVecValues.size());
+                       &ConstVecValues[0], ConstVecValues.size());
    } else {
      // Otherwise, copy the value from one register to another:
-    switch (Op0.getValueType()) {
+    switch (Op0.getValueType().getSimpleVT()) {
      default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
      case MVT::i8:
      case MVT::i16:
@@ -1866,18 +1905,25 @@ static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
      }
    }
  
-  return SDOperand();
+  return SDValue();
  }
  
-static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
-  switch (Op.getValueType()) {
+static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
+  switch (Op.getValueType().getSimpleVT()) {
+  default:
+    cerr << "CellSPU: Unknown vector multiplication, got "
+         << Op.getValueType().getMVTString()
+         << "\n";
+    abort();
+    /*NOTREACHED*/
+
    case MVT::v4i32: {
-    SDOperand rA = Op.getOperand(0);
-    SDOperand rB = Op.getOperand(1);
-    SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
-    SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
-    SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
-    SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
+    SDValue rA = Op.getOperand(0);
+    SDValue rB = Op.getOperand(1);
+    SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
+    SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
+    SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
+    SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
  
      return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
      break;
@@ -1888,38 +1934,38 @@ static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
    // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
    // c) Use SELB to select upper and lower halves from the intermediate results
    //
-  // NOTE: We really want to move the FSMBI to earlier to actually get the
+  // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
    // dual-issue. This code does manage to do this, even if it's a little on
    // the wacky side
    case MVT::v8i16: {
      MachineFunction &MF = DAG.getMachineFunction();
      MachineRegisterInfo &RegInfo = MF.getRegInfo();
-    SDOperand Chain = Op.getOperand(0);
-    SDOperand rA = Op.getOperand(0);
-    SDOperand rB = Op.getOperand(1);
+    SDValue Chain = Op.getOperand(0);
+    SDValue rA = Op.getOperand(0);
+    SDValue rB = Op.getOperand(1);
      unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
      unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
  
-    SDOperand FSMBOp =
+    SDValue FSMBOp =
        DAG.getCopyToReg(Chain, FSMBIreg,
-                      DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
-                                  DAG.getConstant(0xcccc, MVT::i32)));
+                       DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
+                                   DAG.getConstant(0xcccc, MVT::i16)));
  
-    SDOperand HHProd =
+    SDValue HHProd =
        DAG.getCopyToReg(FSMBOp, HiProdReg,
-                      DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
+                       DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
  
-    SDOperand HHProd_v4i32 =
+    SDValue HHProd_v4i32 =
        DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
-                 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
+                  DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
  
      return DAG.getNode(SPUISD::SELB, MVT::v8i16,
-                      DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
-                      DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
-                                  DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
-                                              HHProd_v4i32,
-                                              DAG.getConstant(16, MVT::i16))),
-                      DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
+                       DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
+                       DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
+                                   DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
+                                               HHProd_v4i32,
+                                               DAG.getConstant(16, MVT::i16))),
+                       DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
    }
  
    // This M00sE is N@stI! (apologies to Monty Python)
@@ -1928,107 +1974,88 @@ static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
    // is to break it all apart, sign extend, and reassemble the various
    // intermediate products.
    case MVT::v16i8: {
-    MachineFunction &MF = DAG.getMachineFunction();
-    MachineRegisterInfo &RegInfo = MF.getRegInfo();
-    SDOperand Chain = Op.getOperand(0);
-    SDOperand rA = Op.getOperand(0);
-    SDOperand rB = Op.getOperand(1);
-    SDOperand c8 = DAG.getConstant(8, MVT::i8);
-    SDOperand c16 = DAG.getConstant(16, MVT::i8);
-
-    unsigned FSMBreg_2222 = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
-    unsigned LoProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
-    unsigned HiProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
+    SDValue rA = Op.getOperand(0);
+    SDValue rB = Op.getOperand(1);
+    SDValue c8 = DAG.getConstant(8, MVT::i32);
+    SDValue c16 = DAG.getConstant(16, MVT::i32);
  
-    SDOperand LLProd =
+    SDValue LLProd =
        DAG.getNode(SPUISD::MPY, MVT::v8i16,
-                 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
-                 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
+                  DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
+                  DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
  
-    SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
+    SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
  
-    SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
+    SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
  
-    SDOperand LHProd =
+    SDValue LHProd =
        DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
-                 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
+                  DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
  
-    SDOperand FSMBdef_2222 =
-      DAG.getCopyToReg(Chain, FSMBreg_2222,
-                      DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
-                                  DAG.getConstant(0x2222, MVT::i32)));
+    SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
+                                     DAG.getConstant(0x2222, MVT::i16));
  
-    SDOperand FSMBuse_2222 =
-      DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
-
-    SDOperand LoProd_1 =
-      DAG.getCopyToReg(Chain, LoProd_reg,
-                      DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
-                                  FSMBuse_2222));
+    SDValue LoProdParts =
+      DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
+                  DAG.getNode(SPUISD::SELB, MVT::v8i16,
+                              LLProd, LHProd, FSMBmask));
  
-    SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
+    SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
  
-    SDOperand LoProd = 
+    SDValue LoProd =
        DAG.getNode(ISD::AND, MVT::v4i32,
-                 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
-                 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
-                             LoProdMask, LoProdMask,
-                             LoProdMask, LoProdMask));
+                  LoProdParts,
+                  DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+                              LoProdMask, LoProdMask,
+                              LoProdMask, LoProdMask));
  
-    SDOperand rAH =
+    SDValue rAH =
        DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
-                 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
+                  DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
  
-    SDOperand rBH =
+    SDValue rBH =
        DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
-                 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
+                  DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
  
-    SDOperand HLProd =
+    SDValue HLProd =
        DAG.getNode(SPUISD::MPY, MVT::v8i16,
-                 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
-                 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
+                  DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
+                  DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
  
-    SDOperand HHProd_1 =
+    SDValue HHProd_1 =
        DAG.getNode(SPUISD::MPY, MVT::v8i16,
-                 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
-                             DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
-                 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
-                             DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
-
-    SDOperand HHProd =
-      DAG.getCopyToReg(Chain, HiProd_reg,
-                      DAG.getNode(SPUISD::SELB, MVT::v8i16,
-                                  HLProd,
-                                  DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
-                                  FSMBuse_2222));
-
-    SDOperand HiProd =
-      DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
-                 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
+                  DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
+                              DAG.getNode(SPUISD::VEC_SRA,
+                                          MVT::v4i32, rAH, c8)),
+                  DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
+                              DAG.getNode(SPUISD::VEC_SRA,
+                                          MVT::v4i32, rBH, c8)));
+
+    SDValue HHProd =
+      DAG.getNode(SPUISD::SELB, MVT::v8i16,
+                  HLProd,
+                  DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
+                  FSMBmask);
+
+    SDValue HiProd =
+      DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
  
      return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
-                      DAG.getNode(ISD::OR, MVT::v4i32,
-                                  LoProd, HiProd));
+                       DAG.getNode(ISD::OR, MVT::v4i32,
+                                   LoProd, HiProd));
    }
-
-  default:
-    cerr << "CellSPU: Unknown vector multiplication, got "
-         << MVT::getValueTypeString(Op.getValueType())
-        << "\n";
-    abort();
-    /*NOTREACHED*/
    }
  
-  return SDOperand();
+  return SDValue();
  }
  
-static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
+static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
    MachineFunction &MF = DAG.getMachineFunction();
    MachineRegisterInfo &RegInfo = MF.getRegInfo();
  
-  SDOperand A = Op.getOperand(0);
-  SDOperand B = Op.getOperand(1);
-  unsigned VT = Op.getValueType();
+  SDValue A = Op.getOperand(0);
+  SDValue B = Op.getOperand(1);
+  MVT VT = Op.getValueType();
  
    unsigned VRegBR, VRegC;
  
@@ -2044,38 +2071,38 @@ static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
  
    // Computes BRcpl =
    // (Floating Interpolate (FP Reciprocal Estimate B))
-  SDOperand BRcpl =
-      DAG.getCopyToReg(DAG.getEntryNode(), VRegBR, 
-                      DAG.getNode(SPUISD::FPInterp, VT, B, 
-                               DAG.getNode(SPUISD::FPRecipEst, VT, B)));
-  
+  SDValue BRcpl =
+      DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
+                       DAG.getNode(SPUISD::FPInterp, VT, B,
+                                DAG.getNode(SPUISD::FPRecipEst, VT, B)));
+
    // Computes A * BRcpl and stores in a temporary register
-  SDOperand AxBRcpl =
+  SDValue AxBRcpl =
        DAG.getCopyToReg(BRcpl, VRegC,
-                DAG.getNode(ISD::FMUL, VT, A, 
-                       DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
+                 DAG.getNode(ISD::FMUL, VT, A,
+                        DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
    // What's the Chain variable do? It's magic!
    // TODO: set Chain = Op(0).getEntryNode()
-  
-  return DAG.getNode(ISD::FADD, VT, 
-               DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
-               DAG.getNode(ISD::FMUL, VT, 
-                       DAG.getCopyFromReg(AxBRcpl, VRegBR, VT), 
-                       DAG.getNode(ISD::FSUB, VT, A,
-                           DAG.getNode(ISD::FMUL, VT, B, 
-                           DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
+
+  return DAG.getNode(ISD::FADD, VT,
+                DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
+                DAG.getNode(ISD::FMUL, VT,
+                        DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
+                        DAG.getNode(ISD::FSUB, VT, A,
+                            DAG.getNode(ISD::FMUL, VT, B,
+                            DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
  }
  
-static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
-  unsigned VT = Op.getValueType();
-  SDOperand N = Op.getOperand(0);
-  SDOperand Elt = Op.getOperand(1);
-  SDOperand ShufMask[16];
+static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+  MVT VT = Op.getValueType();
+  SDValue N = Op.getOperand(0);
+  SDValue Elt = Op.getOperand(1);
+  SDValue ShufMask[16];
    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
  
    assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
  
-  int EltNo = (int) C->getValue();
+  int EltNo = (int) C->getZExtValue();
  
    // sanity checks:
    if (VT == MVT::i8 && EltNo >= 16)
@@ -2094,9 +2121,11 @@ static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
  
    // Need to generate shuffle mask and extract:
    int prefslot_begin = -1, prefslot_end = -1;
-  int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
+  int elt_byte = EltNo * VT.getSizeInBits() / 8;
  
-  switch (VT) {
+  switch (VT.getSimpleVT()) {
+  default:
+    assert(false && "Invalid value type!");
    case MVT::i8: {
      prefslot_begin = prefslot_end = 3;
      break;
@@ -2116,7 +2145,7 @@ static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
    }
  
    assert(prefslot_begin != -1 && prefslot_end != -1 &&
-        "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
+         "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
  
    for (int i = 0; i < 16; ++i) {
      // zero fill uppper part of preferred slot, don't care about the
@@ -2125,54 +2154,55 @@ static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
  
      if (i <= prefslot_end) {
        mask_val =
-       ((i < prefslot_begin)
-        ? 0x80
-        : elt_byte + (i - prefslot_begin));
+        ((i < prefslot_begin)
+         ? 0x80
+         : elt_byte + (i - prefslot_begin));
  
        ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
-    } else 
+    } else
        ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
    }
  
-  SDOperand ShufMaskVec =
+  SDValue ShufMaskVec =
      DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
-               &ShufMask[0],
-               sizeof(ShufMask) / sizeof(ShufMask[0]));
+                &ShufMask[0],
+                sizeof(ShufMask) / sizeof(ShufMask[0]));
  
    return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
-                    DAG.getNode(SPUISD::SHUFB, N.getValueType(),
-                                N, N, ShufMaskVec));
-                                
+                     DAG.getNode(SPUISD::SHUFB, N.getValueType(),
+                                 N, N, ShufMaskVec));
+
  }
  
-static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
-  SDOperand VecOp = Op.getOperand(0);
-  SDOperand ValOp = Op.getOperand(1);
-  SDOperand IdxOp = Op.getOperand(2);
-  MVT::ValueType VT = Op.getValueType();
+static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+  SDValue VecOp = Op.getOperand(0);
+  SDValue ValOp = Op.getOperand(1);
+  SDValue IdxOp = Op.getOperand(2);
+  MVT VT = Op.getValueType();
  
    ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
    assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
  
-  MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
    // Use $2 because it's always 16-byte aligned and it's available:
-  SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
+  SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
  
-  SDOperand result =
+  SDValue result =
      DAG.getNode(SPUISD::SHUFB, VT,
                  DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
                  VecOp,
                  DAG.getNode(SPUISD::INSERT_MASK, VT,
                              DAG.getNode(ISD::ADD, PtrVT,
                                          PtrBase,
-                                        DAG.getConstant(CN->getValue(),
-                                                       PtrVT))));
+                                        DAG.getConstant(CN->getZExtValue(),
+                                                        PtrVT))));
  
    return result;
  }
  
-static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
-  SDOperand N0 = Op.getOperand(0);      // Everything has at least one operand
+static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
+{
+  SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
  
    assert(Op.getValueType() == MVT::i8);
    switch (Opc) {
@@ -2183,151 +2213,343 @@ static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
    case ISD::SUB: {
      // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
      // the result:
-    SDOperand N1 = Op.getOperand(1);
+    SDValue N1 = Op.getOperand(1);
      N0 = (N0.getOpcode() != ISD::Constant
            ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
-          : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
+          : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
+                            MVT::i16));
      N1 = (N1.getOpcode() != ISD::Constant
            ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
-          : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
-    return DAG.getNode(ISD::TRUNCATE, MVT::i8, 
+          : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+                            MVT::i16));
+    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
                         DAG.getNode(Opc, MVT::i16, N0, N1));
-  } 
+  }
    case ISD::ROTR:
    case ISD::ROTL: {
-    SDOperand N1 = Op.getOperand(1);
+    SDValue N1 = Op.getOperand(1);
      unsigned N1Opc;
      N0 = (N0.getOpcode() != ISD::Constant
            ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
-          : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
-    N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
+          : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
+                            MVT::i16));
+    N1Opc = N1.getValueType().bitsLT(MVT::i16)
+            ? ISD::ZERO_EXTEND
+            : ISD::TRUNCATE;
      N1 = (N1.getOpcode() != ISD::Constant
            ? DAG.getNode(N1Opc, MVT::i16, N1)
-          : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
-    SDOperand ExpandArg =
+          : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+                            MVT::i16));
+    SDValue ExpandArg =
        DAG.getNode(ISD::OR, MVT::i16, N0,
                    DAG.getNode(ISD::SHL, MVT::i16,
                                N0, DAG.getConstant(8, MVT::i16)));
-    return DAG.getNode(ISD::TRUNCATE, MVT::i8, 
+    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
                         DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
    }
    case ISD::SRL:
    case ISD::SHL: {
-    SDOperand N1 = Op.getOperand(1);
+    SDValue N1 = Op.getOperand(1);
      unsigned N1Opc;
      N0 = (N0.getOpcode() != ISD::Constant
            ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
-          : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
-    N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
+          : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
+                            MVT::i16));
+    N1Opc = N1.getValueType().bitsLT(MVT::i16)
+            ? ISD::ZERO_EXTEND
+            : ISD::TRUNCATE;
      N1 = (N1.getOpcode() != ISD::Constant
            ? DAG.getNode(N1Opc, MVT::i16, N1)
-          : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
-    return DAG.getNode(ISD::TRUNCATE, MVT::i8, 
+          : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+                            MVT::i16));
+    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
                         DAG.getNode(Opc, MVT::i16, N0, N1));
    }
    case ISD::SRA: {
-    SDOperand N1 = Op.getOperand(1);
+    SDValue N1 = Op.getOperand(1);
      unsigned N1Opc;
      N0 = (N0.getOpcode() != ISD::Constant
            ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
-          : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
-    N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
+          : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
+                            MVT::i16));
+    N1Opc = N1.getValueType().bitsLT(MVT::i16)
+            ? ISD::SIGN_EXTEND
+            : ISD::TRUNCATE;
      N1 = (N1.getOpcode() != ISD::Constant
            ? DAG.getNode(N1Opc, MVT::i16, N1)
-          : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
-    return DAG.getNode(ISD::TRUNCATE, MVT::i8, 
+          : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+                            MVT::i16));
+    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
                         DAG.getNode(Opc, MVT::i16, N0, N1));
    }
    case ISD::MUL: {
-    SDOperand N1 = Op.getOperand(1);
+    SDValue N1 = Op.getOperand(1);
      unsigned N1Opc;
      N0 = (N0.getOpcode() != ISD::Constant
            ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
-          : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
-    N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
+          : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
+                            MVT::i16));
+    N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
      N1 = (N1.getOpcode() != ISD::Constant
            ? DAG.getNode(N1Opc, MVT::i16, N1)
-          : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
-    return DAG.getNode(ISD::TRUNCATE, MVT::i8, 
+          : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+                            MVT::i16));
+    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
                         DAG.getNode(Opc, MVT::i16, N0, N1));
      break;
    }
    }
  
-  return SDOperand();
+  return SDValue();
+}
+
+static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
+{
+  MVT VT = Op.getValueType();
+  MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+
+  SDValue Op0 = Op.getOperand(0);
+
+  switch (Opc) {
+  case ISD::ZERO_EXTEND:
+  case ISD::SIGN_EXTEND:
+  case ISD::ANY_EXTEND: {
+    MVT Op0VT = Op0.getValueType();
+    MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
+
+    assert(Op0VT == MVT::i32
+           && "CellSPU: Zero/sign extending something other than i32");
+    DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
+
+    unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
+                      ? SPUISD::ROTBYTES_RIGHT_S
+                      : SPUISD::ROTQUAD_RZ_BYTES);
+    SDValue PromoteScalar =
+      DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
+
+    return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
+                       DAG.getNode(ISD::BIT_CONVERT, VecVT,
+                                   DAG.getNode(NewOpc, Op0VecVT,
+                                               PromoteScalar,
+                                               DAG.getConstant(4, MVT::i32))));
+  }
+
+  case ISD::ADD: {
+    // Turn operands into vectors to satisfy type checking (shufb works on
+    // vectors)
+    SDValue Op0 =
+      DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
+    SDValue Op1 =
+      DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
+    SmallVector<SDValue, 16> ShufBytes;
+
+    // Create the shuffle mask for "rotating" the borrow up one register slot
+    // once the borrow is generated.
+    ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+    ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+    ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+    ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+
+    SDValue CarryGen =
+      DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
+    SDValue ShiftedCarry =
+      DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
+                  CarryGen, CarryGen,
+                  DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+                              &ShufBytes[0], ShufBytes.size()));
+
+    return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
+                       DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
+                                   Op0, Op1, ShiftedCarry));
+  }
+
+  case ISD::SUB: {
+    // Turn operands into vectors to satisfy type checking (shufb works on
+    // vectors)
+    SDValue Op0 =
+      DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
+    SDValue Op1 =
+      DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
+    SmallVector<SDValue, 16> ShufBytes;
+
+    // Create the shuffle mask for "rotating" the borrow up one register slot
+    // once the borrow is generated.
+    ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+    ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+    ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+    ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+
+    SDValue BorrowGen =
+      DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
+    SDValue ShiftedBorrow =
+      DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
+                  BorrowGen, BorrowGen,
+                  DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+                              &ShufBytes[0], ShufBytes.size()));
+
+    return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
+                       DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
+                                   Op0, Op1, ShiftedBorrow));
+  }
+
+  case ISD::SHL: {
+    SDValue ShiftAmt = Op.getOperand(1);
+    MVT ShiftAmtVT = ShiftAmt.getValueType();
+    SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
+    SDValue MaskLower =
+      DAG.getNode(SPUISD::SELB, VecVT,
+                  Op0Vec,
+                  DAG.getConstant(0, VecVT),
+                  DAG.getNode(SPUISD::SELECT_MASK, VecVT,
+                              DAG.getConstant(0xff00ULL, MVT::i16)));
+    SDValue ShiftAmtBytes =
+      DAG.getNode(ISD::SRL, ShiftAmtVT,
+                  ShiftAmt,
+                  DAG.getConstant(3, ShiftAmtVT));
+    SDValue ShiftAmtBits =
+      DAG.getNode(ISD::AND, ShiftAmtVT,
+                  ShiftAmt,
+                  DAG.getConstant(7, ShiftAmtVT));
+
+    return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
+                       DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
+                                   DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
+                                               MaskLower, ShiftAmtBytes),
+                                   ShiftAmtBits));
+  }
+
+  case ISD::SRL: {
+    MVT VT = Op.getValueType();
+    SDValue ShiftAmt = Op.getOperand(1);
+    MVT ShiftAmtVT = ShiftAmt.getValueType();
+    SDValue ShiftAmtBytes =
+      DAG.getNode(ISD::SRL, ShiftAmtVT,
+                  ShiftAmt,
+                  DAG.getConstant(3, ShiftAmtVT));
+    SDValue ShiftAmtBits =
+      DAG.getNode(ISD::AND, ShiftAmtVT,
+                  ShiftAmt,
+                  DAG.getConstant(7, ShiftAmtVT));
+
+    return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
+                       DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
+                                   Op0, ShiftAmtBytes),
+                       ShiftAmtBits);
+  }
+
+  case ISD::SRA: {
+    // Promote Op0 to vector
+    SDValue Op0 =
+      DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
+    SDValue ShiftAmt = Op.getOperand(1);
+    MVT ShiftVT = ShiftAmt.getValueType();
+
+    // Negate variable shift amounts
+    if (!isa<ConstantSDNode>(ShiftAmt)) {
+      ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
+                             DAG.getConstant(0, ShiftVT), ShiftAmt);
+    }
+
+    SDValue UpperHalfSign =
+      DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
+                  DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
+                              DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
+                                          Op0, DAG.getConstant(31, MVT::i32))));
+    SDValue UpperHalfSignMask =
+      DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
+    SDValue UpperLowerMask =
+      DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
+                  DAG.getConstant(0xff00, MVT::i16));
+    SDValue UpperLowerSelect =
+      DAG.getNode(SPUISD::SELB, MVT::v2i64,
+                  UpperHalfSignMask, Op0, UpperLowerMask);
+    SDValue RotateLeftBytes =
+      DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
+                  UpperLowerSelect, ShiftAmt);
+    SDValue RotateLeftBits =
+      DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
+                  RotateLeftBytes, ShiftAmt);
+
+    return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
+                       RotateLeftBits);
+  }
+  }
+
+  return SDValue();
  }
  
  //! Lower byte immediate operations for v16i8 vectors:
-static SDOperand
-LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
-  SDOperand ConstVec;
-  SDOperand Arg;
-  MVT::ValueType VT = Op.getValueType();
+static SDValue
+LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
+  SDValue ConstVec;
+  SDValue Arg;
+  MVT VT = Op.getValueType();
  
    ConstVec = Op.getOperand(0);
    Arg = Op.getOperand(1);
-  if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
-    if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
+  if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
+    if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
        ConstVec = ConstVec.getOperand(0);
      } else {
        ConstVec = Op.getOperand(1);
        Arg = Op.getOperand(0);
-      if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
-       ConstVec = ConstVec.getOperand(0);
+      if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
+        ConstVec = ConstVec.getOperand(0);
        }
      }
    }
  
-  if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
+  if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
      uint64_t VectorBits[2];
      uint64_t UndefBits[2];
      uint64_t SplatBits, SplatUndef;
      int SplatSize;
  
-    if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
-       && isConstantSplat(VectorBits, UndefBits,
-                          MVT::getSizeInBits(MVT::getVectorElementType(VT)),
-                          SplatBits, SplatUndef, SplatSize)) {
-      SDOperand tcVec[16];
-      SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
+    if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
+        && isConstantSplat(VectorBits, UndefBits,
+                           VT.getVectorElementType().getSizeInBits(),
+                           SplatBits, SplatUndef, SplatSize)) {
+      SDValue tcVec[16];
+      SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
        const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
  
        // Turn the BUILD_VECTOR into a set of target constants:
        for (size_t i = 0; i < tcVecSize; ++i)
-       tcVec[i] = tc;
+        tcVec[i] = tc;
  
-      return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
-                        DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
+      return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
+                         DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
      }
    }
-
-  return SDOperand();
+  // These operations (AND, OR, XOR) are legal, they just couldn't be custom
+  // lowered.  Return the operation, rather than a null SDValue.
+  return Op;
  }
  
  //! Lower i32 multiplication
-static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
+static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
                            unsigned Opc) {
-  switch (VT) {
+  switch (VT.getSimpleVT()) {
    default:
      cerr << "CellSPU: Unknown LowerMUL value type, got "
-         << MVT::getValueTypeString(Op.getValueType())
-        << "\n";
+         << Op.getValueType().getMVTString()
+         << "\n";
      abort();
      /*NOTREACHED*/
  
    case MVT::i32: {
-    SDOperand rA = Op.getOperand(0);
-    SDOperand rB = Op.getOperand(1);
+    SDValue rA = Op.getOperand(0);
+    SDValue rB = Op.getOperand(1);
  
      return DAG.getNode(ISD::ADD, MVT::i32,
-                      DAG.getNode(ISD::ADD, MVT::i32,
-                                  DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
-                                  DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
-                      DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
+                       DAG.getNode(ISD::ADD, MVT::i32,
+                                   DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
+                                   DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
+                       DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
    }
    }
  
-  return SDOperand();
+  return SDValue();
  }
  
  //! Custom lowering for CTPOP (count population)
@@ -2336,17 +2558,19 @@ static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
    operand. SPU has such an instruction, but it counts the number of
    ones per byte, which then have to be accumulated.
  */
-static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
-  unsigned VT = Op.getValueType();
-  unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
+static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
+  MVT VT = Op.getValueType();
+  MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
  
-  switch (VT) {
+  switch (VT.getSimpleVT()) {
+  default:
+    assert(false && "Invalid value type!");
    case MVT::i8: {
-    SDOperand N = Op.getOperand(0);
-    SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
+    SDValue N = Op.getOperand(0);
+    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
  
-    SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
-    SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
+    SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
+    SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
  
      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
    }
@@ -2357,30 +2581,30 @@ static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
  
      unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
  
-    SDOperand N = Op.getOperand(0);
-    SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
-    SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
-    SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
+    SDValue N = Op.getOperand(0);
+    SDValue Elt0 = DAG.getConstant(0, MVT::i16);
+    SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
+    SDValue Shift1 = DAG.getConstant(8, MVT::i16);
  
-    SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
-    SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
+    SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
+    SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
  
      // CNTB_result becomes the chain to which all of the virtual registers
      // CNTB_reg, SUM1_reg become associated:
-    SDOperand CNTB_result =
+    SDValue CNTB_result =
        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
-                 
-    SDOperand CNTB_rescopy =
+
+    SDValue CNTB_rescopy =
        DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
  
-    SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
+    SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
  
      return DAG.getNode(ISD::AND, MVT::i16,
-                      DAG.getNode(ISD::ADD, MVT::i16,
-                                  DAG.getNode(ISD::SRL, MVT::i16,
-                                              Tmp1, Shift1),
-                                  Tmp1),
-                      Mask0);
+                       DAG.getNode(ISD::ADD, MVT::i16,
+                                   DAG.getNode(ISD::SRL, MVT::i16,
+                                               Tmp1, Shift1),
+                                   Tmp1),
+                       Mask0);
    }
  
    case MVT::i32: {
@@ -2390,41 +2614,41 @@ static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
      unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
      unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
  
-    SDOperand N = Op.getOperand(0);
-    SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
-    SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
-    SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
-    SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
+    SDValue N = Op.getOperand(0);
+    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
+    SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
+    SDValue Shift1 = DAG.getConstant(16, MVT::i32);
+    SDValue Shift2 = DAG.getConstant(8, MVT::i32);
  
-    SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
-    SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
+    SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
+    SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
  
      // CNTB_result becomes the chain to which all of the virtual registers
      // CNTB_reg, SUM1_reg become associated:
-    SDOperand CNTB_result =
+    SDValue CNTB_result =
        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
-                 
-    SDOperand CNTB_rescopy =
+
+    SDValue CNTB_rescopy =
        DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
  
-    SDOperand Comp1 =
+    SDValue Comp1 =
        DAG.getNode(ISD::SRL, MVT::i32,
-                 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
+                  DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
  
-    SDOperand Sum1 =
+    SDValue Sum1 =
        DAG.getNode(ISD::ADD, MVT::i32,
-                 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
+                  Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
  
-    SDOperand Sum1_rescopy =
+    SDValue Sum1_rescopy =
        DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
  
-    SDOperand Comp2 =
+    SDValue Comp2 =
        DAG.getNode(ISD::SRL, MVT::i32,
-                 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
-                 Shift2);
-    SDOperand Sum2 =
+                  DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
+                  Shift2);
+    SDValue Sum2 =
        DAG.getNode(ISD::ADD, MVT::i32, Comp2,
-                 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
+                  DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
  
      return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
    }
@@ -2433,20 +2657,23 @@ static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
      break;
    }
  
-  return SDOperand();
+  return SDValue();
  }
  
  /// LowerOperation - Provide custom lowering hooks for some operations.
  ///
-SDOperand
-SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
+SDValue
+SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
  {
-  switch (Op.getOpcode()) {
+  unsigned Opc = (unsigned) Op.getOpcode();
+  MVT VT = Op.getValueType();
+
+  switch (Opc) {
    default: {
      cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
-    cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
-    cerr << "*Op.Val:\n";
-    Op.Val->dump();
+    cerr << "Op.getOpcode() = " << Opc << "\n";
+    cerr << "*Op.getNode():\n";
+    Op.getNode()->dump();
      abort();
    }
    case ISD::LOAD:
@@ -2465,21 +2692,33 @@ SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
      return LowerConstant(Op, DAG);
    case ISD::ConstantFP:
      return LowerConstantFP(Op, DAG);
+  case ISD::BRCOND:
+    return LowerBRCOND(Op, DAG);
    case ISD::FORMAL_ARGUMENTS:
-      return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
+    return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
    case ISD::CALL:
-    return LowerCALL(Op, DAG);
+    return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
    case ISD::RET:
      return LowerRET(Op, DAG, getTargetMachine());
  
-  // i8 math ops:
+
+  // i8, i64 math ops:
+  case ISD::ZERO_EXTEND:
+  case ISD::SIGN_EXTEND:
+  case ISD::ANY_EXTEND:
+  case ISD::ADD:
    case ISD::SUB:
    case ISD::ROTR:
    case ISD::ROTL:
    case ISD::SRL:
    case ISD::SHL:
-  case ISD::SRA:
-    return LowerI8Math(Op, DAG, Op.getOpcode());
+  case ISD::SRA: {
+    if (VT == MVT::i8)
+      return LowerI8Math(Op, DAG, Opc);
+    else if (VT == MVT::i64)
+      return LowerI64Math(Op, DAG, Opc);
+    break;
+  }
  
    // Vector-related lowering.
    case ISD::BUILD_VECTOR:
@@ -2501,15 +2740,15 @@ SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
  
    // Vector and i8 multiply:
    case ISD::MUL:
-    if (MVT::isVector(Op.getValueType()))
+    if (VT.isVector())
        return LowerVectorMUL(Op, DAG);
-    else if (Op.getValueType() == MVT::i8)
-      return LowerI8Math(Op, DAG, Op.getOpcode());
+    else if (VT == MVT::i8)
+      return LowerI8Math(Op, DAG, Opc);
      else
-      return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
+      return LowerMUL(Op, DAG, VT, Opc);
  
    case ISD::FDIV:
-    if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
+    if (VT == MVT::f32 || VT == MVT::v4f32)
        return LowerFDIVf32(Op, DAG);
  //    else if (Op.getValueType() == MVT::f64)
  //      return LowerFDIVf64(Op, DAG);
@@ -2520,73 +2759,168 @@ SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
      return LowerCTPOP(Op, DAG);
    }
  
-  return SDOperand();
-}
-
-//===----------------------------------------------------------------------===//
-//  Other Lowering Code
-//===----------------------------------------------------------------------===//
-
-MachineBasicBlock *
-SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
-                                           MachineBasicBlock *BB)
-{
-  return BB;
+  return SDValue();
  }
  
  //===----------------------------------------------------------------------===//
  // Target Optimization Hooks
  //===----------------------------------------------------------------------===//
  
-SDOperand
+SDValue
  SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
  {
  #if 0
    TargetMachine &TM = getTargetMachine();
-  SelectionDAG &DAG = DCI.DAG;
  #endif
-  SDOperand N0 = N->getOperand(0);     // everything has at least one operand
+  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue Op0 = N->getOperand(0);      // everything has at least one operand
+  SDValue Result;                     // Initially, NULL result
  
    switch (N->getOpcode()) {
    default: break;
+  case ISD::ADD: {
+    SDValue Op1 = N->getOperand(1);
+
+    if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
+      SDValue Op01 = Op0.getOperand(1);
+      if (Op01.getOpcode() == ISD::Constant
+          || Op01.getOpcode() == ISD::TargetConstant) {
+        // (add <const>, (SPUindirect <arg>, <const>)) ->
+        // (SPUindirect <arg>, <const + const>)
+        ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
+        ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
+        SDValue combinedConst =
+          DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
+                          Op0.getValueType());
+
+        DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
+                   << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
+        DEBUG(cerr << "With:    (SPUindirect <arg>, "
+                   << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
+        return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
+                           Op0.getOperand(0), combinedConst);
+      }
+    } else if (isa<ConstantSDNode>(Op0)
+               && Op1.getOpcode() == SPUISD::IndirectAddr) {
+      SDValue Op11 = Op1.getOperand(1);
+      if (Op11.getOpcode() == ISD::Constant
+          || Op11.getOpcode() == ISD::TargetConstant) {
+        // (add (SPUindirect <arg>, <const>), <const>) ->
+        // (SPUindirect <arg>, <const + const>)
+        ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
+        ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
+        SDValue combinedConst =
+          DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
+                          Op0.getValueType());
+
+        DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
+                   << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
+        DEBUG(cerr << "With:    (SPUindirect <arg>, "
+                   << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
+
+        return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
+                           Op1.getOperand(0), combinedConst);
+      }
+    }
+    break;
+  }
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND: {
+    if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
+        N->getValueType(0) == Op0.getValueType()) {
+      // (any_extend (SPUextract_elt0 <arg>)) ->
+      // (SPUextract_elt0 <arg>)
+      // Types must match, however...
+      DEBUG(cerr << "Replace: ");
+      DEBUG(N->dump(&DAG));
+      DEBUG(cerr << "\nWith:    ");
+      DEBUG(Op0.getNode()->dump(&DAG));
+      DEBUG(cerr << "\n");
+
+      return Op0;
+    }
+    break;
+  }
+  case SPUISD::IndirectAddr: {
+    if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
+      ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
+      if (CN->getZExtValue() == 0) {
+        // (SPUindirect (SPUaform <addr>, 0), 0) ->
+        // (SPUaform <addr>, 0)
  
-  // Look for obvious optimizations for shift left:
-  // a) Replace 0 << V with 0
-  // b) Replace V << 0 with V
-  //
-  // N.B: llvm will generate an undef node if the shift amount is greater than
-  // 15 (e.g.: V << 16), which will naturally trigger an assert.
-  case SPU::SHLIr32:
-  case SPU::SHLHIr16:
-  case SPU::SHLQBIIvec:
-  case SPU::ROTHIr16:
-  case SPU::ROTHIr16_i32:
-  case SPU::ROTIr32:
-  case SPU::ROTIr32_i16:
-  case SPU::ROTQBYIvec:
-  case SPU::ROTQBYBIvec:
-  case SPU::ROTQBIIvec:
-  case SPU::ROTHMIr16:
-  case SPU::ROTMIr32:
-  case SPU::ROTQMBYIvec: {
-    if (N0.getOpcode() == ISD::Constant) {
-      if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
-       if (C->getValue() == 0)         // 0 << V -> 0.
-         return N0;
+        DEBUG(cerr << "Replace: ");
+        DEBUG(N->dump(&DAG));
+        DEBUG(cerr << "\nWith:    ");
+        DEBUG(Op0.getNode()->dump(&DAG));
+        DEBUG(cerr << "\n");
+
+        return Op0;
        }
      }
-    SDOperand N1 = N->getOperand(1);
-    if (N1.getOpcode() == ISD::Constant) {
-      if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
-       if (C->getValue() == 0)         // V << 0 -> V
-         return N1;
+    break;
+  }
+  case SPUISD::SHLQUAD_L_BITS:
+  case SPUISD::SHLQUAD_L_BYTES:
+  case SPUISD::VEC_SHL:
+  case SPUISD::VEC_SRL:
+  case SPUISD::VEC_SRA:
+  case SPUISD::ROTQUAD_RZ_BYTES:
+  case SPUISD::ROTQUAD_RZ_BITS: {
+    SDValue Op1 = N->getOperand(1);
+
+    if (isa<ConstantSDNode>(Op1)) {
+      // Kill degenerate vector shifts:
+      ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+
+      if (CN->getZExtValue() == 0) {
+        Result = Op0;
        }
      }
      break;
    }
+  case SPUISD::PROMOTE_SCALAR: {
+    switch (Op0.getOpcode()) {
+    default:
+      break;
+    case ISD::ANY_EXTEND:
+    case ISD::ZERO_EXTEND:
+    case ISD::SIGN_EXTEND: {
+      // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
+      // <arg>
+      // but only if the SPUpromote_scalar and <arg> types match.
+      SDValue Op00 = Op0.getOperand(0);
+      if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
+        SDValue Op000 = Op00.getOperand(0);
+        if (Op000.getValueType() == N->getValueType(0)) {
+          Result = Op000;
+        }
+      }
+      break;
+    }
+    case SPUISD::EXTRACT_ELT0: {
+      // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
+      // <arg>
+      Result = Op0.getOperand(0);
+      break;
+    }
+    }
+    break;
+  }
+  }
+  // Otherwise, return unchanged.
+#if 1
+  if (Result.getNode()) {
+    DEBUG(cerr << "\nReplace.SPU: ");
+    DEBUG(N->dump(&DAG));
+    DEBUG(cerr << "\nWith:        ");
+    DEBUG(Result.getNode()->dump(&DAG));
+    DEBUG(cerr << "\n");
    }
-  
-  return SDOperand();
+#endif
+
+  return Result;
  }
  
  //===----------------------------------------------------------------------===//
@@ -2595,7 +2929,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
  
  /// getConstraintType - Given a constraint letter, return the type of
  /// constraint it is for this target.
-SPUTargetLowering::ConstraintType 
+SPUTargetLowering::ConstraintType
  SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
    if (ConstraintLetter.size() == 1) {
      switch (ConstraintLetter[0]) {
@@ -2606,14 +2940,14 @@ SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const
      case 'v':
      case 'y':
        return C_RegisterClass;
-    }  
+    }
    }
    return TargetLowering::getConstraintType(ConstraintLetter);
  }
  
-std::pair<unsigned, const TargetRegisterClass*> 
+std::pair<unsigned, const TargetRegisterClass*>
  SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
-                                                MVT::ValueType VT) const
+                                                MVT VT) const
  {
    if (Constraint.size() == 1) {
      // GCC RS6000 Constraint Letters
@@ -2629,42 +2963,107 @@ SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
        else if (VT == MVT::f64)
          return std::make_pair(0U, SPU::R64FPRegisterClass);
        break;
-    case 'v': 
+    case 'v':
        return std::make_pair(0U, SPU::GPRCRegisterClass);
      }
    }
-  
+
    return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
  }
  
+//! Compute used/known bits for a SPU operand
  void
-SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
-                                                 uint64_t Mask,
-                                                 uint64_t &KnownZero, 
-                                                 uint64_t &KnownOne,
-                                                 const SelectionDAG &DAG,
-                                                 unsigned Depth ) const {
-  KnownZero = 0;
-  KnownOne = 0;
+SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+                                                  const APInt &Mask,
+                                                  APInt &KnownZero,
+                                                  APInt &KnownOne,
+                                                  const SelectionDAG &DAG,
+                                                  unsigned Depth ) const {
+#if 0
+  const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
+#endif
+
+  switch (Op.getOpcode()) {
+  default:
+    // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+    break;
+
+#if 0
+  case CALL:
+  case SHUFB:
+  case INSERT_MASK:
+  case CNTB:
+#endif
+
+  case SPUISD::PROMOTE_SCALAR: {
+    SDValue Op0 = Op.getOperand(0);
+    MVT Op0VT = Op0.getValueType();
+    unsigned Op0VTBits = Op0VT.getSizeInBits();
+    uint64_t InMask = Op0VT.getIntegerVTBitMask();
+    KnownZero |= APInt(Op0VTBits, ~InMask, false);
+    KnownOne |= APInt(Op0VTBits, InMask, false);
+    break;
+  }
+
+  case SPUISD::LDRESULT:
+  case SPUISD::EXTRACT_ELT0:
+  case SPUISD::EXTRACT_ELT0_CHAINED: {
+    MVT OpVT = Op.getValueType();
+    unsigned OpVTBits = OpVT.getSizeInBits();
+    uint64_t InMask = OpVT.getIntegerVTBitMask();
+    KnownZero |= APInt(OpVTBits, ~InMask, false);
+    KnownOne |= APInt(OpVTBits, InMask, false);
+    break;
+  }
+
+#if 0
+  case EXTRACT_I1_ZEXT:
+  case EXTRACT_I1_SEXT:
+  case EXTRACT_I8_ZEXT:
+  case EXTRACT_I8_SEXT:
+  case MPY:
+  case MPYU:
+  case MPYH:
+  case MPYHH:
+  case SPUISD::SHLQUAD_L_BITS:
+  case SPUISD::SHLQUAD_L_BYTES:
+  case SPUISD::VEC_SHL:
+  case SPUISD::VEC_SRL:
+  case SPUISD::VEC_SRA:
+  case SPUISD::VEC_ROTL:
+  case SPUISD::VEC_ROTR:
+  case SPUISD::ROTQUAD_RZ_BYTES:
+  case SPUISD::ROTQUAD_RZ_BITS:
+  case SPUISD::ROTBYTES_RIGHT_S:
+  case SPUISD::ROTBYTES_LEFT:
+  case SPUISD::ROTBYTES_LEFT_CHAINED:
+  case SPUISD::SELECT_MASK:
+  case SPUISD::SELB:
+  case SPUISD::FPInterp:
+  case SPUISD::FPRecipEst:
+  case SPUISD::SEXT32TO64:
+#endif
+  }
  }
  
  // LowerAsmOperandForConstraint
  void
-SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
+SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
                                                  char ConstraintLetter,
-                                                std::vector<SDOperand> &Ops,
-                                                SelectionDAG &DAG) {
+                                                std::vector<SDValue> &Ops,
+                                                SelectionDAG &DAG) const {
    // Default, for the time being, to the base class handler
    TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
  }
  
  /// isLegalAddressImmediate - Return true if the integer value can be used
  /// as the offset of the target addressing mode.
-bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
+bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
+                                                const Type *Ty) const {
    // SPU's addresses are 256K:
    return (V > -(1 << 18) && V < (1 << 18) - 1);
  }
  
  bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
-  return false; 
+  return false;
  }