X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FCellSPU%2FSPUISelLowering.cpp;h=2002f093866325c3ff8cfa628698121347381992;hb=b9505f6bed33c56e2e57f1f8782827cf9bf8613f;hp=c7d7f978216ff00128f135d3fe42012c92a094d6;hpb=b625f2f8960de32bc973092aaee8ac62863006fe;p=oota-llvm.git diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index c7d7f978216..2002f093866 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -1,5 +1,4 @@ //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// -// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source @@ -14,21 +13,26 @@ #include "SPURegisterNames.h" #include "SPUISelLowering.h" #include "SPUTargetMachine.h" -#include "llvm/ADT/VectorExtras.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "SPUFrameLowering.h" +#include "SPUMachineFunction.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/CallingConv.h" +#include "llvm/Type.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/VectorExtras.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetOptions.h" - +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -37,94 +41,70 @@ using namespace llvm; namespace { std::map node_names; - //! MVT::ValueType mapping to useful data for Cell SPU - struct valtype_map_s { - const MVT::ValueType valtype; - const int prefslot_byte; - }; - - const valtype_map_s valtype_map[] = { - { MVT::i1, 3 }, - { MVT::i8, 3 }, - { MVT::i16, 2 }, - { MVT::i32, 0 }, - { MVT::f32, 0 }, - { MVT::i64, 0 }, - { MVT::f64, 0 }, - { MVT::i128, 0 } - }; - - const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); - - const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) { - const valtype_map_s *retval = 0; - - for (size_t i = 0; i < n_valtype_map; ++i) { - if (valtype_map[i].valtype == VT) { - retval = valtype_map + i; - break; - } - } - -#ifndef NDEBUG - if (retval == 0) { - cerr << "getValueTypeMapEntry returns NULL for " - << MVT::getValueTypeString(VT) - << "\n"; - abort(); - } -#endif + // Byte offset of the preferred slot (counted from the MSB) + int prefslotOffset(EVT VT) { + int retval=0; + if (VT==MVT::i1) retval=3; + if (VT==MVT::i8) retval=3; + if (VT==MVT::i16) retval=2; return retval; } - //! Predicate that returns true if operand is a memory target + //! Expand a library call into an actual call DAG node /*! - \arg Op Operand to test - \return true if the operand is a memory target (i.e., global - address, external symbol, constant pool) or an A-form - address. + \note + This code is taken from SelectionDAGLegalize, since it is not exposed as + part of the LLVM SelectionDAG API. */ - bool isMemoryOperand(const SDOperand &Op) - { - const unsigned Opc = Op.getOpcode(); - return (Opc == ISD::GlobalAddress - || Opc == ISD::GlobalTLSAddress - || Opc == ISD::JumpTable - || Opc == ISD::ConstantPool - || Opc == ISD::ExternalSymbol - || Opc == ISD::TargetGlobalAddress - || Opc == ISD::TargetGlobalTLSAddress - || Opc == ISD::TargetJumpTable - || Opc == ISD::TargetConstantPool - || Opc == ISD::TargetExternalSymbol - || Opc == SPUISD::AFormAddr); - } - - //! Predicate that returns true if the operand is an indirect target - bool isIndirectOperand(const SDOperand &Op) - { - const unsigned Opc = Op.getOpcode(); - return (Opc == ISD::Register - || Opc == SPUISD::LDRESULT); + + SDValue + ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, + bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) { + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + EVT ArgVT = Op.getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op.getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + const Type *RetTy = + Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext()); + std::pair CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, Op.getDebugLoc()); + + return CallInfo.first; } } SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) - : TargetLowering(TM), - SPUTM(TM) -{ - // Fold away setcc operations if possible. - setPow2DivIsCheap(); + : TargetLowering(TM, new TargetLoweringObjectFileELF()), + SPUTM(TM) { // Use _setjmp/_longjmp instead of setjmp/longjmp. setUseUnderscoreSetJmp(true); setUseUnderscoreLongJmp(true); - + + // Set RTLIB libcall names as used by SPU: + setLibcallName(RTLIB::DIV_F64, "__fast_divdf3"); + // Set up the SPU's register classes: - // NOTE: i8 register class is not registered because we cannot determine when - // we need to zero or sign extend for custom-lowered loads and stores. - // NOTE: Ignore the previous note. For now. :-) addRegisterClass(MVT::i8, SPU::R8CRegisterClass); addRegisterClass(MVT::i16, SPU::R16CRegisterClass); addRegisterClass(MVT::i32, SPU::R32CRegisterClass); @@ -132,62 +112,102 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) addRegisterClass(MVT::f32, SPU::R32FPRegisterClass); addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); - + // SPU has no sign or zero extended loads for i1, i8, i16: - setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote); - setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote); - setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setTruncStoreAction(MVT::i8, MVT::i1, Custom); - setTruncStoreAction(MVT::i16, MVT::i1, Custom); - setTruncStoreAction(MVT::i32, MVT::i1, Custom); - setTruncStoreAction(MVT::i64, MVT::i1, Custom); - setTruncStoreAction(MVT::i128, MVT::i1, Custom); - - setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom); - setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom); - setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom); - setTruncStoreAction(MVT::i8 , MVT::i8, Custom); - setTruncStoreAction(MVT::i16 , MVT::i8, Custom); - setTruncStoreAction(MVT::i32 , MVT::i8, Custom); - setTruncStoreAction(MVT::i64 , MVT::i8, Custom); - setTruncStoreAction(MVT::i128, MVT::i8, Custom); - - setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom); - setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom); - setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); + + setTruncStoreAction(MVT::i128, MVT::i64, Expand); + setTruncStoreAction(MVT::i128, MVT::i32, Expand); + setTruncStoreAction(MVT::i128, MVT::i16, Expand); + setTruncStoreAction(MVT::i128, MVT::i8, Expand); + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); // SPU constant load actions are custom lowered: - setOperationAction(ISD::Constant, MVT::i64, Custom); - setOperationAction(ISD::ConstantFP, MVT::f32, Custom); + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); setOperationAction(ISD::ConstantFP, MVT::f64, Custom); // SPU's loads and stores have to be custom lowered: - for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128; + for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128; ++sctype) { - setOperationAction(ISD::LOAD, sctype, Custom); - setOperationAction(ISD::STORE, sctype, Custom); + MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; + + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + setLoadExtAction(ISD::EXTLOAD, VT, Custom); + setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); + setLoadExtAction(ISD::SEXTLOAD, VT, Custom); + + for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { + MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; + setTruncStoreAction(VT, StoreVT, Expand); + } } - // Custom lower BRCOND for i1, i8 to "promote" the result to - // i32 and i16, respectively. - setOperationAction(ISD::BRCOND, MVT::Other, Custom); + for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; + ++sctype) { + MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype; + + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + + for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { + MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; + setTruncStoreAction(VT, StoreVT, Expand); + } + } // Expand the jumptable branches setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::BR_CC, MVT::Other, Expand); - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + + // Custom lower SELECT_CC for most cases, but expand by default + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); // SPU has no intrinsics for these particular operations: - setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); - setOperationAction(ISD::MEMSET, MVT::Other, Expand); - setOperationAction(ISD::MEMCPY, MVT::Other, Expand); - - // PowerPC has no SREM/UREM instructions - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); - + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + + // SPU has no division/remainder instructions + setOperationAction(ISD::SREM, MVT::i8, Expand); + setOperationAction(ISD::UREM, MVT::i8, Expand); + setOperationAction(ISD::SDIV, MVT::i8, Expand); + setOperationAction(ISD::UDIV, MVT::i8, Expand); + setOperationAction(ISD::SDIVREM, MVT::i8, Expand); + setOperationAction(ISD::UDIVREM, MVT::i8, Expand); + setOperationAction(ISD::SREM, MVT::i16, Expand); + setOperationAction(ISD::UREM, MVT::i16, Expand); + setOperationAction(ISD::SDIV, MVT::i16, Expand); + setOperationAction(ISD::UDIV, MVT::i16, Expand); + setOperationAction(ISD::SDIVREM, MVT::i16, Expand); + setOperationAction(ISD::UDIVREM, MVT::i16, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + setOperationAction(ISD::SDIV, MVT::i64, Expand); + setOperationAction(ISD::UDIV, MVT::i64, Expand); + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + setOperationAction(ISD::SREM, MVT::i128, Expand); + setOperationAction(ISD::UREM, MVT::i128, Expand); + setOperationAction(ISD::SDIV, MVT::i128, Expand); + setOperationAction(ISD::UDIV, MVT::i128, Expand); + setOperationAction(ISD::SDIVREM, MVT::i128, Expand); + setOperationAction(ISD::UDIVREM, MVT::i128, Expand); + // We don't support sin/cos/sqrt/fmod setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); @@ -195,34 +215,68 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); - - // If we're enabling GP optimizations, use hardware square root + + // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt + // for f32!) setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSQRT, MVT::f32, Expand); - + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); // SPU can do rotate right and left, so legalize it... but customize for i8 // because instructions don't exist. - setOperationAction(ISD::ROTR, MVT::i32, Legal); - setOperationAction(ISD::ROTR, MVT::i16, Legal); - setOperationAction(ISD::ROTR, MVT::i8, Custom); + + // FIXME: Change from "expand" to appropriate type once ROTR is supported in + // .td files. + setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/); + setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/); + setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/); + setOperationAction(ISD::ROTL, MVT::i32, Legal); setOperationAction(ISD::ROTL, MVT::i16, Legal); setOperationAction(ISD::ROTL, MVT::i8, Custom); + // SPU has no native version of shift left/right for i8 setOperationAction(ISD::SHL, MVT::i8, Custom); setOperationAction(ISD::SRL, MVT::i8, Custom); setOperationAction(ISD::SRA, MVT::i8, Custom); - // Custom lower i32 multiplications - setOperationAction(ISD::MUL, MVT::i32, Custom); + // Make these operations legal and handle them during instruction selection: + setOperationAction(ISD::SHL, MVT::i64, Legal); + setOperationAction(ISD::SRL, MVT::i64, Legal); + setOperationAction(ISD::SRA, MVT::i64, Legal); - // Need to custom handle (some) common i8 math ops - setOperationAction(ISD::SUB, MVT::i8, Custom); + // Custom lower i8, i32 and i64 multiplications setOperationAction(ISD::MUL, MVT::i8, Custom); - + setOperationAction(ISD::MUL, MVT::i32, Legal); + setOperationAction(ISD::MUL, MVT::i64, Legal); + + // Expand double-width multiplication + // FIXME: It would probably be reasonable to support some of these operations + setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); + setOperationAction(ISD::MULHU, MVT::i8, Expand); + setOperationAction(ISD::MULHS, MVT::i8, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); + setOperationAction(ISD::MULHU, MVT::i16, Expand); + setOperationAction(ISD::MULHS, MVT::i16, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); + setOperationAction(ISD::MULHS, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i64, Expand); + setOperationAction(ISD::MULHS, MVT::i64, Expand); + + // Need to custom handle (some) common i8, i64 math ops + setOperationAction(ISD::ADD, MVT::i8, Custom); + setOperationAction(ISD::ADD, MVT::i64, Legal); + setOperationAction(ISD::SUB, MVT::i8, Custom); + setOperationAction(ISD::SUB, MVT::i64, Legal); + // SPU does not have BSWAP. It does have i32 support CTLZ. // CTPOP has to be custom lowered. setOperationAction(ISD::BSWAP, MVT::i32, Expand); @@ -232,81 +286,92 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::CTPOP, MVT::i16, Custom); setOperationAction(ISD::CTPOP, MVT::i32, Custom); setOperationAction(ISD::CTPOP, MVT::i64, Custom); + setOperationAction(ISD::CTPOP, MVT::i128, Expand); + setOperationAction(ISD::CTTZ , MVT::i8, Expand); + setOperationAction(ISD::CTTZ , MVT::i16, Expand); setOperationAction(ISD::CTTZ , MVT::i32, Expand); setOperationAction(ISD::CTTZ , MVT::i64, Expand); + setOperationAction(ISD::CTTZ , MVT::i128, Expand); + setOperationAction(ISD::CTLZ , MVT::i8, Promote); + setOperationAction(ISD::CTLZ , MVT::i16, Promote); setOperationAction(ISD::CTLZ , MVT::i32, Legal); - - // SPU does not have select or setcc - setOperationAction(ISD::SELECT, MVT::i1, Expand); - setOperationAction(ISD::SELECT, MVT::i8, Expand); - setOperationAction(ISD::SELECT, MVT::i16, Expand); - setOperationAction(ISD::SELECT, MVT::i32, Expand); - setOperationAction(ISD::SELECT, MVT::i64, Expand); - setOperationAction(ISD::SELECT, MVT::f32, Expand); - setOperationAction(ISD::SELECT, MVT::f64, Expand); - - setOperationAction(ISD::SETCC, MVT::i1, Expand); - setOperationAction(ISD::SETCC, MVT::i8, Expand); - setOperationAction(ISD::SETCC, MVT::i16, Expand); - setOperationAction(ISD::SETCC, MVT::i32, Expand); - setOperationAction(ISD::SETCC, MVT::i64, Expand); - setOperationAction(ISD::SETCC, MVT::f32, Expand); - setOperationAction(ISD::SETCC, MVT::f64, Expand); - - // SPU has a legal FP -> signed INT instruction - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); + setOperationAction(ISD::CTLZ , MVT::i64, Expand); + setOperationAction(ISD::CTLZ , MVT::i128, Expand); + + // SPU has a version of select that implements (a&~c)|(b&c), just like + // select ought to work: + setOperationAction(ISD::SELECT, MVT::i8, Legal); + setOperationAction(ISD::SELECT, MVT::i16, Legal); + setOperationAction(ISD::SELECT, MVT::i32, Legal); + setOperationAction(ISD::SELECT, MVT::i64, Legal); + + setOperationAction(ISD::SETCC, MVT::i8, Legal); + setOperationAction(ISD::SETCC, MVT::i16, Legal); + setOperationAction(ISD::SETCC, MVT::i32, Legal); + setOperationAction(ISD::SETCC, MVT::i64, Legal); + setOperationAction(ISD::SETCC, MVT::f64, Custom); + + // Custom lower i128 -> i64 truncates + setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); + + // Custom lower i32/i64 -> i128 sign extend + setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom); + + setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); + // SPU has a legal FP -> signed INT instruction for f32, but for f64, need + // to expand to a libcall, hence the custom lowering: + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand); // FDIV on SPU requires custom lowering - setOperationAction(ISD::FDIV, MVT::f32, Custom); - //setOperationAction(ISD::FDIV, MVT::f64, Custom); + setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall - // SPU has [U|S]INT_TO_FP - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); + // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64: + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal); - setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal); - setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal); - setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal); + setOperationAction(ISD::BITCAST, MVT::i32, Legal); + setOperationAction(ISD::BITCAST, MVT::f32, Legal); + setOperationAction(ISD::BITCAST, MVT::i64, Legal); + setOperationAction(ISD::BITCAST, MVT::f64, Legal); // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - - // Support label based line numbers. - setOperationAction(ISD::LOCATION, MVT::Other, Expand); - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - - // We want to legalize GlobalAddress and ConstantPool nodes into the + + // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. - for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128; + for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; ++sctype) { - setOperationAction(ISD::GlobalAddress, sctype, Custom); - setOperationAction(ISD::ConstantPool, sctype, Custom); - setOperationAction(ISD::JumpTable, sctype, Custom); + MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; + + setOperationAction(ISD::GlobalAddress, VT, Custom); + setOperationAction(ISD::ConstantPool, VT, Custom); + setOperationAction(ISD::JumpTable, VT, Custom); } - // RET must be custom lowered, to meet ABI requirements - setOperationAction(ISD::RET, MVT::Other, Custom); - // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - + // Use the default implementation. setOperationAction(ISD::VAARG , MVT::Other, Expand); setOperationAction(ISD::VACOPY , MVT::Other, Expand); setOperationAction(ISD::VAEND , MVT::Other, Expand); - setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand); @@ -314,7 +379,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // Cell SPU has instructions for converting between i64 and fp. setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); - + // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); @@ -330,54 +395,63 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); - for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { + for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)i; + // add/sub are legal for all supported vector VT's. - setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal); - setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal); + setOperationAction(ISD::ADD, VT, Legal); + setOperationAction(ISD::SUB, VT, Legal); // mul has to be custom lowered. - setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom); - - setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal); - setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal); - setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal); - setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal); - setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal); - setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal); - + setOperationAction(ISD::MUL, VT, Legal); + + setOperationAction(ISD::AND, VT, Legal); + setOperationAction(ISD::OR, VT, Legal); + setOperationAction(ISD::XOR, VT, Legal); + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::SELECT, VT, Legal); + setOperationAction(ISD::STORE, VT, Custom); + // These operations need to be expanded: - setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); - setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); - setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); - setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); - setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom); + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); // Custom lower build_vector, constant pool spills, insert and // extract vector elements: - setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom); - setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::ConstantPool, VT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); } - setOperationAction(ISD::MUL, MVT::v16i8, Custom); setOperationAction(ISD::AND, MVT::v16i8, Custom); setOperationAction(ISD::OR, MVT::v16i8, Custom); setOperationAction(ISD::XOR, MVT::v16i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); - setSetCCResultType(MVT::i32); + setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + setShiftAmountType(MVT::i32); - setSetCCResultContents(ZeroOrOneSetCCResult); - + setBooleanContents(ZeroOrNegativeOneBooleanContent); + setStackPointerRegisterToSaveRestore(SPU::R1); - + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::ADD); - + setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::ANY_EXTEND); + computeRegisterProperties(); + + // Set pre-RA register scheduler default to BURR, which produces slightly + // better code than the default (could also be TDRR, but TargetLowering.h + // needs a mod to support that model): + setSchedulingPreference(Sched::RegPressure); } const char * @@ -393,37 +467,22 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT"; node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL"; node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; - node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK"; + node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK"; node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; - node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR"; - node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0"; - node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED"; - node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT"; - node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT"; - node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT"; - node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT"; - node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY"; - node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU"; - node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH"; - node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH"; - node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL"; - node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL"; - node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA"; + node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; + node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; + node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS"; + node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES"; node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; - node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] = - "SPUISD::ROTBYTES_RIGHT_Z"; - node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] = - "SPUISD::ROTBYTES_RIGHT_S"; node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; - node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] = - "SPUISD::ROTBYTES_LEFT_CHAINED"; - node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI"; + node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = + "SPUISD::ROTBYTES_LEFT_BITS"; + node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; - node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant"; - node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp"; - node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst"; - node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64"; + node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER"; + node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER"; + node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER"; } std::map::iterator i = node_names.find(Opcode); @@ -431,214 +490,262 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const return ((i != node_names.end()) ? i->second : 0); } -//===----------------------------------------------------------------------===// -// Calling convention code: -//===----------------------------------------------------------------------===// - -#include "SPUGenCallingConv.inc" +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const { + return 3; +} //===----------------------------------------------------------------------===// -// LowerOperation implementation +// Return the Cell SPU's SETCC result type //===----------------------------------------------------------------------===// -/// Aligned load common code for CellSPU -/*! - \param[in] Op The SelectionDAG load or store operand - \param[in] DAG The selection DAG - \param[in] ST CellSPU subtarget information structure - \param[in,out] alignment Caller initializes this to the load or store node's - value from getAlignment(), may be updated while generating the aligned load - \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned - offset (divisible by 16, modulo 16 == 0) - \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the - offset of the preferred slot (modulo 16 != 0) - \param[in,out] VT Caller initializes this value type to the the load or store - node's loaded or stored value type; may be updated if an i1-extended load or - store. - \param[out] was16aligned true if the base pointer had 16-byte alignment, - otherwise false. Can help to determine if the chunk needs to be rotated. - - Both load and store lowering load a block of data aligned on a 16-byte - boundary. This is the common aligned load code shared between both. - */ -static SDOperand -AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST, - LSBaseSDNode *LSN, - unsigned &alignment, int &alignOffs, int &prefSlotOffs, - MVT::ValueType &VT, bool &was16aligned) -{ - MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - const valtype_map_s *vtm = getValueTypeMapEntry(VT); - SDOperand basePtr = LSN->getBasePtr(); - SDOperand chain = LSN->getChain(); - - if (basePtr.getOpcode() == ISD::ADD) { - SDOperand Op1 = basePtr.Val->getOperand(1); - - if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) { - const ConstantSDNode *CN = cast(basePtr.getOperand(1)); +MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const { + // i8, i16 and i32 are valid SETCC result types + MVT::SimpleValueType retval; - alignOffs = (int) CN->getValue(); - prefSlotOffs = (int) (alignOffs & 0xf); - - // Adjust the rotation amount to ensure that the final result ends up in - // the preferred slot: - prefSlotOffs -= vtm->prefslot_byte; - basePtr = basePtr.getOperand(0); - - // Loading from memory, can we adjust alignment? - if (basePtr.getOpcode() == SPUISD::AFormAddr) { - SDOperand APtr = basePtr.getOperand(0); - if (APtr.getOpcode() == ISD::TargetGlobalAddress) { - GlobalAddressSDNode *GSDN = cast(APtr); - alignment = GSDN->getGlobal()->getAlignment(); - } - } - } else { - alignOffs = 0; - prefSlotOffs = -vtm->prefslot_byte; - } - } else { - alignOffs = 0; - prefSlotOffs = -vtm->prefslot_byte; + switch(VT.getSimpleVT().SimpleTy){ + case MVT::i1: + case MVT::i8: + retval = MVT::i8; break; + case MVT::i16: + retval = MVT::i16; break; + case MVT::i32: + default: + retval = MVT::i32; } + return retval; +} - if (alignment == 16) { - // Realign the base pointer as a D-Form address: - if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) { - basePtr = DAG.getNode(ISD::ADD, PtrVT, - basePtr, - DAG.getConstant((alignOffs & ~0xf), PtrVT)); - } - - // Emit the vector load: - was16aligned = true; - return DAG.getLoad(MVT::v16i8, chain, basePtr, - LSN->getSrcValue(), LSN->getSrcValueOffset(), - LSN->isVolatile(), 16); - } +//===----------------------------------------------------------------------===// +// Calling convention code: +//===----------------------------------------------------------------------===// - // Unaligned load or we're using the "large memory" model, which means that - // we have to be very pessimistic: - if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) { - basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT)); - } +#include "SPUGenCallingConv.inc" - // Add the offset - basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, - DAG.getConstant((alignOffs & ~0xf), PtrVT)); - was16aligned = false; - return DAG.getLoad(MVT::v16i8, chain, basePtr, - LSN->getSrcValue(), LSN->getSrcValueOffset(), - LSN->isVolatile(), 16); -} +//===----------------------------------------------------------------------===// +// LowerOperation implementation +//===----------------------------------------------------------------------===// /// Custom lower loads for CellSPU /*! All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements within a 16-byte block, we have to rotate to extract the requested element. - */ -static SDOperand -LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + + For extending loads, we also want to ensure that the following sequence is + emitted, e.g. for MVT::f32 extending load to MVT::f64: + +\verbatim +%1 v16i8,ch = load +%2 v16i8,ch = rotate %1 +%3 v4f8, ch = bitconvert %2 +%4 f32 = vec2perfslot %3 +%5 f64 = fp_extend %4 +\endverbatim +*/ +static SDValue +LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { LoadSDNode *LN = cast(Op); - SDOperand the_chain = LN->getChain(); - MVT::ValueType VT = LN->getMemoryVT(); - MVT::ValueType OpVT = Op.Val->getValueType(0); + SDValue the_chain = LN->getChain(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT InVT = LN->getMemoryVT(); + EVT OutVT = Op.getValueType(); ISD::LoadExtType ExtType = LN->getExtensionType(); unsigned alignment = LN->getAlignment(); - SDOperand Ops[8]; + int pso = prefslotOffset(InVT); + DebugLoc dl = Op.getDebugLoc(); + EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT, + (128 / InVT.getSizeInBits())); + + // two sanity checks + assert( LN->getAddressingMode() == ISD::UNINDEXED + && "we should get only UNINDEXED adresses"); + // clean aligned loads can be selected as-is + if (InVT.getSizeInBits() == 128 && alignment == 16) + return SDValue(); + + // Get pointerinfos to the memory chunk(s) that contain the data to load + uint64_t mpi_offset = LN->getPointerInfo().Offset; + mpi_offset -= mpi_offset%16; + MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset); + MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16); + + SDValue result; + SDValue basePtr = LN->getBasePtr(); + SDValue rotate; - switch (LN->getAddressingMode()) { - case ISD::UNINDEXED: { - int offset, rotamt; - bool was16aligned; - SDOperand result = - AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned); - - if (result.Val == 0) - return result; + if (alignment == 16) { + ConstantSDNode *CN; - the_chain = result.getValue(1); - // Rotate the chunk if necessary - if (rotamt < 0) - rotamt += 16; - if (rotamt != 0 || !was16aligned) { - SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other); - - Ops[0] = the_chain; - Ops[1] = result; - if (was16aligned) { - Ops[2] = DAG.getConstant(rotamt, MVT::i16); - } else { - MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - LoadSDNode *LN1 = cast(result); - Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(), - DAG.getConstant(rotamt, PtrVT)); - } + // Special cases for a known aligned load to simplify the base pointer + // and the rotation amount: + if (basePtr.getOpcode() == ISD::ADD + && (CN = dyn_cast (basePtr.getOperand(1))) != 0) { + // Known offset into basePtr + int64_t offset = CN->getSExtValue(); + int64_t rotamt = int64_t((offset & 0xf) - pso); - result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3); - the_chain = result.getValue(1); - } + if (rotamt < 0) + rotamt += 16; - if (VT == OpVT || ExtType == ISD::EXTLOAD) { - SDVTList scalarvts; - MVT::ValueType vecVT = MVT::v16i8; - - // Convert the loaded v16i8 vector to the appropriate vector type - // specified by the operand: - if (OpVT == VT) { - if (VT != MVT::i1) - vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT))); - } else - vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT))); + rotate = DAG.getConstant(rotamt, MVT::i16); - Ops[0] = the_chain; - Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result); - scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other); - result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2); - the_chain = result.getValue(1); + // Simplify the base pointer for this case: + basePtr = basePtr.getOperand(0); + if ((offset & ~0xf) > 0) { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant((offset & ~0xf), PtrVT)); + } + } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) + || (basePtr.getOpcode() == SPUISD::IndirectAddr + && basePtr.getOperand(0).getOpcode() == SPUISD::Hi + && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { + // Plain aligned a-form address: rotate into preferred slot + // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) + int64_t rotamt = -pso; + if (rotamt < 0) + rotamt += 16; + rotate = DAG.getConstant(rotamt, MVT::i16); } else { - // Handle the sign and zero-extending loads for i1 and i8: - unsigned NewOpC; - - if (ExtType == ISD::SEXTLOAD) { - NewOpC = (OpVT == MVT::i1 - ? SPUISD::EXTRACT_I1_SEXT - : SPUISD::EXTRACT_I8_SEXT); + // Offset the rotate amount by the basePtr and the preferred slot + // byte offset + int64_t rotamt = -pso; + if (rotamt < 0) + rotamt += 16; + rotate = DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, + DAG.getConstant(rotamt, PtrVT)); + } + } else { + // Unaligned load: must be more pessimistic about addressing modes: + if (basePtr.getOpcode() == ISD::ADD) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + SDValue Flag; + + SDValue Op0 = basePtr.getOperand(0); + SDValue Op1 = basePtr.getOperand(1); + + if (isa(Op1)) { + // Convert the (add , ) to an indirect address contained + // in a register. Note that this is done because we need to avoid + // creating a 0(reg) d-form address due to the SPU's block loads. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); + basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); } else { - assert(ExtType == ISD::ZEXTLOAD); - NewOpC = (OpVT == MVT::i1 - ? SPUISD::EXTRACT_I1_ZEXT - : SPUISD::EXTRACT_I8_ZEXT); + // Convert the (add , ) to an indirect address, which + // will likely be lowered as a reg(reg) x-form address. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); } - - result = DAG.getNode(NewOpC, OpVT, result); + } else { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + + // Offset the rotate amount by the basePtr and the preferred slot + // byte offset + rotate = DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, + DAG.getConstant(-pso, PtrVT)); + } + + // Do the load as a i128 to allow possible shifting + SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr, + lowMemPtr, + LN->isVolatile(), LN->isNonTemporal(), 16); + + // When the size is not greater than alignment we get all data with just + // one load + if (alignment >= InVT.getSizeInBits()/8) { + // Update the chain + the_chain = low.getValue(1); + + // Rotate into the preferred slot: + result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128, + low.getValue(0), rotate); + + // Convert the loaded v16i8 vector to the appropriate vector type + // specified by the operand: + EVT vecVT = EVT::getVectorVT(*DAG.getContext(), + InVT, (128 / InVT.getSizeInBits())); + result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, + DAG.getNode(ISD::BITCAST, dl, vecVT, result)); + } + // When alignment is less than the size, we might need (known only at + // run-time) two loads + // TODO: if the memory address is composed only from constants, we have + // extra kowledge, and might avoid the second load + else { + // storage position offset from lower 16 byte aligned memory chunk + SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32, + basePtr, DAG.getConstant( 0xf, MVT::i32 ) ); + // 16 - offset + SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant( 16, MVT::i32), + offset ); + // get a registerfull of ones. (this implementation is a workaround: LLVM + // cannot handle 128 bit signed int constants) + SDValue ones = DAG.getConstant(-1, MVT::v4i32 ); + ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones); + + SDValue high = DAG.getLoad(MVT::i128, dl, the_chain, + DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, + DAG.getConstant(16, PtrVT)), + highMemPtr, + LN->isVolatile(), LN->isNonTemporal(), 16); + + the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1), + high.getValue(1)); + + // Shift the (possible) high part right to compensate the misalignemnt. + // if there is no highpart (i.e. value is i64 and offset is 4), this + // will zero out the high value. + high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high, + DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant( 16, MVT::i32), + offset + )); + + // Shift the low similarily + // TODO: add SPUISD::SHL_BYTES + low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset ); + + // Merge the two parts + result = DAG.getNode(ISD::BITCAST, dl, vecVT, + DAG.getNode(ISD::OR, dl, MVT::i128, low, high)); + + if (!InVT.isVector()) { + result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result ); + } + + } + // Handle extending loads by extending the scalar result: + if (ExtType == ISD::SEXTLOAD) { + result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result); + } else if (ExtType == ISD::ZEXTLOAD) { + result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result); + } else if (ExtType == ISD::EXTLOAD) { + unsigned NewOpc = ISD::ANY_EXTEND; + + if (OutVT.isFloatingPoint()) + NewOpc = ISD::FP_EXTEND; + + result = DAG.getNode(NewOpc, dl, OutVT, result); } - SDVTList retvts = DAG.getVTList(OpVT, MVT::Other); - SDOperand retops[3] = { + SDVTList retvts = DAG.getVTList(OutVT, MVT::Other); + SDValue retops[2] = { result, - the_chain, - DAG.getConstant(alignment, MVT::i32) + the_chain }; - result = DAG.getNode(SPUISD::LDRESULT, retvts, + result = DAG.getNode(SPUISD::LDRESULT, dl, retvts, retops, sizeof(retops) / sizeof(retops[0])); return result; - } - case ISD::PRE_INC: - case ISD::PRE_DEC: - case ISD::POST_INC: - case ISD::POST_DEC: - case ISD::LAST_INDEXED_MODE: - cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than " - "UNINDEXED\n"; - cerr << (unsigned) LN->getAddressingMode() << "\n"; - abort(); - /*NOTREACHED*/ - } - - return SDOperand(); } /// Custom lower stores for CellSPU @@ -647,545 +754,625 @@ LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { within a 16-byte block, we have to generate a shuffle to insert the requested element into its place, then store the resulting block. */ -static SDOperand -LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { +static SDValue +LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { StoreSDNode *SN = cast(Op); - SDOperand Value = SN->getValue(); - MVT::ValueType VT = Value.getValueType(); - MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); - MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDValue Value = SN->getValue(); + EVT VT = Value.getValueType(); + EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); unsigned alignment = SN->getAlignment(); + SDValue result; + EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT, + (128 / StVT.getSizeInBits())); + // Get pointerinfos to the memory chunk(s) that contain the data to load + uint64_t mpi_offset = SN->getPointerInfo().Offset; + mpi_offset -= mpi_offset%16; + MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset); + MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16); + + + // two sanity checks + assert( SN->getAddressingMode() == ISD::UNINDEXED + && "we should get only UNINDEXED adresses"); + // clean aligned loads can be selected as-is + if (StVT.getSizeInBits() == 128 && alignment == 16) + return SDValue(); + + SDValue alignLoadVec; + SDValue basePtr = SN->getBasePtr(); + SDValue the_chain = SN->getChain(); + SDValue insertEltOffs; - switch (SN->getAddressingMode()) { - case ISD::UNINDEXED: { - int chunk_offset, slot_offset; - bool was16aligned; + if (alignment == 16) { + ConstantSDNode *CN; + // Special cases for a known aligned load to simplify the base pointer + // and insertion byte: + if (basePtr.getOpcode() == ISD::ADD + && (CN = dyn_cast(basePtr.getOperand(1))) != 0) { + // Known offset into basePtr + int64_t offset = CN->getSExtValue(); + + // Simplify the base pointer for this case: + basePtr = basePtr.getOperand(0); + insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant((offset & 0xf), PtrVT)); + + if ((offset & ~0xf) > 0) { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant((offset & ~0xf), PtrVT)); + } + } else { + // Otherwise, assume it's at byte 0 of basePtr + insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + } else { + // Unaligned load: must be more pessimistic about addressing modes: + if (basePtr.getOpcode() == ISD::ADD) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + SDValue Flag; + + SDValue Op0 = basePtr.getOperand(0); + SDValue Op1 = basePtr.getOperand(1); + + if (isa(Op1)) { + // Convert the (add , ) to an indirect address contained + // in a register. Note that this is done because we need to avoid + // creating a 0(reg) d-form address due to the SPU's block loads. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); + basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); + } else { + // Convert the (add , ) to an indirect address, which + // will likely be lowered as a reg(reg) x-form address. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + } + } else { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } - // The vector type we really want to load from the 16-byte chunk, except - // in the case of MVT::i1, which has to be v16i8. - unsigned vecVT, stVecVT = MVT::v16i8; - - if (StVT != MVT::i1) - stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT))); - vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT))); + // Insertion point is solely determined by basePtr's contents + insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } - SDOperand alignLoadVec = - AlignedLoad(Op, DAG, ST, SN, alignment, - chunk_offset, slot_offset, VT, was16aligned); + // Load the lower part of the memory to which to store. + SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr, + lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16); - if (alignLoadVec.Val == 0) - return alignLoadVec; + // if we don't need to store over the 16 byte boundary, one store suffices + if (alignment >= StVT.getSizeInBits()/8) { + // Update the chain + the_chain = low.getValue(1); - LoadSDNode *LN = cast(alignLoadVec); - SDOperand basePtr = LN->getBasePtr(); - SDOperand the_chain = alignLoadVec.getValue(1); - SDOperand theValue = SN->getValue(); - SDOperand result; + LoadSDNode *LN = cast(low); + SDValue theValue = SN->getValue(); if (StVT != VT - && (theValue.getOpcode() == ISD::AssertZext - || theValue.getOpcode() == ISD::AssertSext)) { + && (theValue.getOpcode() == ISD::AssertZext + || theValue.getOpcode() == ISD::AssertSext)) { // Drill down and get the value for zero- and sign-extended // quantities - theValue = theValue.getOperand(0); + theValue = theValue.getOperand(0); } - chunk_offset &= 0xf; - - SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT); - SDOperand insertEltPtr; - SDOperand insertEltOp; - // If the base pointer is already a D-form address, then just create // a new D-form address with a slot offset and the orignal base pointer. // Otherwise generate a D-form address with the slot offset relative // to the stack pointer, which is always aligned. - DEBUG(cerr << "CellSPU LowerSTORE: basePtr = "); - DEBUG(basePtr.Val->dump(&DAG)); - DEBUG(cerr << "\n"); - - if (basePtr.getOpcode() == SPUISD::IndirectAddr || - (basePtr.getOpcode() == ISD::ADD - && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) { - insertEltPtr = basePtr; - } else { -#if 0 - // $sp is always aligned, so use it when necessary to avoid loading - // an address - SDOperand ptrP = - basePtr.Val->hasOneUse() ? DAG.getRegister(SPU::R1, PtrVT) : basePtr; - insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, ptrP, insertEltOffs); -#else - insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs); +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + errs() << "CellSPU LowerSTORE: basePtr = "; + basePtr.getNode()->dump(&DAG); + errs() << "\n"; + } #endif - } - insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr); - result = DAG.getNode(SPUISD::SHUFB, vecVT, - DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue), - alignLoadVec, - DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp)); + SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, + insertEltOffs); + SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, + theValue); + + result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, + vectorizeOp, low, + DAG.getNode(ISD::BITCAST, dl, + MVT::v4i32, insertEltOp)); + + result = DAG.getStore(the_chain, dl, result, basePtr, + lowMemPtr, + LN->isVolatile(), LN->isNonTemporal(), + 16); + + } + // do the store when it might cross the 16 byte memory access boundary. + else { + // TODO issue a warning if SN->isVolatile()== true? This is likely not + // what the user wanted. + + // address offset from nearest lower 16byte alinged address + SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32, + SN->getBasePtr(), + DAG.getConstant(0xf, MVT::i32)); + // 16 - offset + SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant( 16, MVT::i32), + offset); + SDValue hi_shift = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant( VT.getSizeInBits()/8, + MVT::i32), + offset_compl); + // 16 - sizeof(Value) + SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant( 16, MVT::i32), + DAG.getConstant( VT.getSizeInBits()/8, + MVT::i32)); + // get a registerfull of ones + SDValue ones = DAG.getConstant(-1, MVT::v4i32); + ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones); + + // Create the 128 bit masks that have ones where the data to store is + // located. + SDValue lowmask, himask; + // if the value to store don't fill up the an entire 128 bits, zero + // out the last bits of the mask so that only the value we want to store + // is masked. + // this is e.g. in the case of store i32, align 2 + if (!VT.isVector()){ + Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value); + lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus); + lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask, + surplus); + Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value); + Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask); - result = DAG.getStore(the_chain, result, basePtr, - LN->getSrcValue(), LN->getSrcValueOffset(), - LN->isVolatile(), LN->getAlignment()); - - return result; - /*UNREACHED*/ - } - case ISD::PRE_INC: - case ISD::PRE_DEC: - case ISD::POST_INC: - case ISD::POST_DEC: - case ISD::LAST_INDEXED_MODE: - cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than " - "UNINDEXED\n"; - cerr << (unsigned) SN->getAddressingMode() << "\n"; - abort(); - /*NOTREACHED*/ + } + else { + lowmask = ones; + Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value); + } + // this will zero, if there are no data that goes to the high quad + himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask, + offset_compl); + lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask, + offset); + + // Load in the old data and zero out the parts that will be overwritten with + // the new data to store. + SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain, + DAG.getNode(ISD::ADD, dl, PtrVT, basePtr, + DAG.getConstant( 16, PtrVT)), + highMemPtr, + SN->isVolatile(), SN->isNonTemporal(), 16); + the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1), + hi.getValue(1)); + + low = DAG.getNode(ISD::AND, dl, MVT::i128, + DAG.getNode( ISD::BITCAST, dl, MVT::i128, low), + DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones)); + hi = DAG.getNode(ISD::AND, dl, MVT::i128, + DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi), + DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones)); + + // Shift the Value to store into place. rlow contains the parts that go to + // the lower memory chunk, rhi has the parts that go to the upper one. + SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset); + rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask); + SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value, + offset_compl); + + // Merge the old data and the new data and store the results + // Need to convert vectors here to integer as 'OR'ing floats assert + rlow = DAG.getNode(ISD::OR, dl, MVT::i128, + DAG.getNode(ISD::BITCAST, dl, MVT::i128, low), + DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow)); + rhi = DAG.getNode(ISD::OR, dl, MVT::i128, + DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi), + DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi)); + + low = DAG.getStore(the_chain, dl, rlow, basePtr, + lowMemPtr, + SN->isVolatile(), SN->isNonTemporal(), 16); + hi = DAG.getStore(the_chain, dl, rhi, + DAG.getNode(ISD::ADD, dl, PtrVT, basePtr, + DAG.getConstant( 16, PtrVT)), + highMemPtr, + SN->isVolatile(), SN->isNonTemporal(), 16); + result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0), + hi.getValue(0)); } - return SDOperand(); + return result; } -/// Generate the address of a constant pool entry. -static SDOperand -LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - MVT::ValueType PtrVT = Op.getValueType(); +//! Generate the address of a constant pool entry. +static SDValue +LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast(Op); - Constant *C = CP->getConstVal(); - SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); - SDOperand Zero = DAG.getConstant(0, PtrVT); + const Constant *C = CP->getConstVal(); + SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); + SDValue Zero = DAG.getConstant(0, PtrVT); const TargetMachine &TM = DAG.getTarget(); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); if (TM.getRelocationModel() == Reloc::Static) { if (!ST->usingLargeMem()) { - // Just return the SDOperand with the constant pool address in it. - return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero); + // Just return the SDValue with the constant pool address in it. + return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero); } else { -#if 1 - SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero); - SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero); - - return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi); -#else - return DAG.getNode(SPUISD::IndirectAddr, PtrVT, CPI, Zero); -#endif + SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero); + SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero); + return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); } } - assert(0 && - "LowerConstantPool: Relocation model other than static not supported."); - return SDOperand(); + llvm_unreachable("LowerConstantPool: Relocation model other than static" + " not supported."); + return SDValue(); +} + +//! Alternate entry point for generating the address of a constant pool entry +SDValue +SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) { + return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl()); } -static SDOperand -LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - MVT::ValueType PtrVT = Op.getValueType(); +static SDValue +LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast(Op); - SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); - SDOperand Zero = DAG.getConstant(0, PtrVT); + SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + SDValue Zero = DAG.getConstant(0, PtrVT); const TargetMachine &TM = DAG.getTarget(); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); if (TM.getRelocationModel() == Reloc::Static) { - SDOperand JmpAForm = DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero); - return (!ST->usingLargeMem() - ? JmpAForm - : DAG.getNode(SPUISD::IndirectAddr, PtrVT, JmpAForm, Zero)); + if (!ST->usingLargeMem()) { + return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero); + } else { + SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero); + SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero); + return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); + } } - assert(0 && - "LowerJumpTable: Relocation model other than static not supported."); - return SDOperand(); + llvm_unreachable("LowerJumpTable: Relocation model other than static" + " not supported."); + return SDValue(); } -static SDOperand -LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - MVT::ValueType PtrVT = Op.getValueType(); +static SDValue +LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast(Op); - GlobalValue *GV = GSDN->getGlobal(); - SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); + const GlobalValue *GV = GSDN->getGlobal(); + SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), + PtrVT, GSDN->getOffset()); const TargetMachine &TM = DAG.getTarget(); - SDOperand Zero = DAG.getConstant(0, PtrVT); - + SDValue Zero = DAG.getConstant(0, PtrVT); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); + if (TM.getRelocationModel() == Reloc::Static) { if (!ST->usingLargeMem()) { - return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero); + return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero); } else { - SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero); - SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero); - return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo); + SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero); + SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero); + return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); } } else { - cerr << "LowerGlobalAddress: Relocation model other than static not " - << "supported.\n"; - abort(); + report_fatal_error("LowerGlobalAddress: Relocation model other than static" + "not supported."); /*NOTREACHED*/ } - return SDOperand(); + return SDValue(); } -//! Custom lower i64 integer constants -/*! - This code inserts all of the necessary juggling that needs to occur to load - a 64-bit constant into a register. - */ -static SDOperand -LowerConstant(SDOperand Op, SelectionDAG &DAG) { - unsigned VT = Op.getValueType(); - ConstantSDNode *CN = cast(Op.Val); +//! Custom lower double precision floating point constants +static SDValue +LowerConstantFP(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); - if (VT == MVT::i64) { - SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64); - return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T)); - - } else { - cerr << "LowerConstant: unhandled constant type " - << MVT::getValueTypeString(VT) - << "\n"; - abort(); - /*NOTREACHED*/ - } + if (VT == MVT::f64) { + ConstantFPSDNode *FP = cast(Op.getNode()); - return SDOperand(); -} + assert((FP != 0) && + "LowerConstantFP: Node is not ConstantFPSDNode"); -//! Custom lower single precision floating point constants -/*! - "float" immediates can be lowered as if they were unsigned 32-bit integers. - The SPUISD::SFPConstant pseudo-instruction handles this in the instruction - target description. - */ -static SDOperand -LowerConstantFP(SDOperand Op, SelectionDAG &DAG) { - unsigned VT = Op.getValueType(); - ConstantFPSDNode *FP = cast(Op.Val); - - assert((FP != 0) && - "LowerConstantFP: Node is not ConstantFPSDNode"); - - if (VT == MVT::f32) { - float targetConst = FP->getValueAPF().convertToFloat(); - return DAG.getNode(SPUISD::SFPConstant, VT, - DAG.getTargetConstantFP(targetConst, VT)); - } else if (VT == MVT::f64) { uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble()); - return DAG.getNode(ISD::BIT_CONVERT, VT, - LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG)); + SDValue T = DAG.getConstant(dbits, MVT::i64); + SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T); + return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, + DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec)); } - return SDOperand(); + return SDValue(); } -//! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16) -static SDOperand -LowerBRCOND(SDOperand Op, SelectionDAG &DAG) -{ - SDOperand Cond = Op.getOperand(1); - MVT::ValueType CondVT = Cond.getValueType(); - MVT::ValueType CondNVT; - - if (CondVT == MVT::i1 || CondVT == MVT::i8) { - CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16); - return DAG.getNode(ISD::BRCOND, Op.getValueType(), - Op.getOperand(0), - DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)), - Op.getOperand(2)); - } else - return SDOperand(); // Unchanged -} +SDValue +SPUTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) + const { -static SDOperand -LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex) -{ MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); - SmallVector ArgValues; - SDOperand Root = Op.getOperand(0); - bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; - - const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); - const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); - - unsigned ArgOffset = SPUFrameInfo::minStackSize(); + SPUFunctionInfo *FuncInfo = MF.getInfo(); + + unsigned ArgOffset = SPUFrameLowering::minStackSize(); unsigned ArgRegIdx = 0; - unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); - - MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - + unsigned StackSlotSize = SPUFrameLowering::stackSlotSize(); + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + SmallVector ArgLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, + *DAG.getContext()); + // FIXME: allow for other calling conventions + CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU); + // Add DAG nodes to load the arguments or copy them out of registers. - for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) { - SDOperand ArgVal; - bool needsLoad = false; - MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType(); - unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8; - - switch (ObjectVT) { - default: { - cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: " - << MVT::getValueTypeString(ObjectVT) - << "\n"; - abort(); - } - case MVT::i8: - if (!isVarArg && ArgRegIdx < NumArgRegs) { - unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass); - RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg); - ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8); - ++ArgRegIdx; - } else { - needsLoad = true; - } - break; - case MVT::i16: - if (!isVarArg && ArgRegIdx < NumArgRegs) { - unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass); - RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg); - ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16); - ++ArgRegIdx; - } else { - needsLoad = true; - } - break; - case MVT::i32: - if (!isVarArg && ArgRegIdx < NumArgRegs) { - unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); - RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg); - ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32); - ++ArgRegIdx; - } else { - needsLoad = true; - } - break; - case MVT::i64: - if (!isVarArg && ArgRegIdx < NumArgRegs) { - unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass); - RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg); - ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64); - ++ArgRegIdx; - } else { - needsLoad = true; - } - break; - case MVT::f32: - if (!isVarArg && ArgRegIdx < NumArgRegs) { - unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass); - RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg); - ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32); - ++ArgRegIdx; - } else { - needsLoad = true; - } - break; - case MVT::f64: - if (!isVarArg && ArgRegIdx < NumArgRegs) { - unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass); - RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg); - ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64); - ++ArgRegIdx; - } else { - needsLoad = true; - } - break; - case MVT::v2f64: - case MVT::v4f32: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v16i8: - if (!isVarArg && ArgRegIdx < NumArgRegs) { - unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); - RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg); - ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); - ++ArgRegIdx; - } else { - needsLoad = true; - } - break; - } - - // We need to load the argument to a virtual register if we determined above - // that we ran out of physical registers of the appropriate type - if (needsLoad) { - // If the argument is actually used, emit a load from the right stack - // slot. - if (!Op.Val->hasNUsesOfValue(0, ArgNo)) { - int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); - SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0); - } else { - // Don't emit a dead load. - ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT); + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { + EVT ObjectVT = Ins[ArgNo].VT; + unsigned ObjSize = ObjectVT.getSizeInBits()/8; + SDValue ArgVal; + CCValAssign &VA = ArgLocs[ArgNo]; + + if (VA.isRegLoc()) { + const TargetRegisterClass *ArgRegClass; + + switch (ObjectVT.getSimpleVT().SimpleTy) { + default: + report_fatal_error("LowerFormalArguments Unhandled argument type: " + + Twine(ObjectVT.getEVTString())); + case MVT::i8: + ArgRegClass = &SPU::R8CRegClass; + break; + case MVT::i16: + ArgRegClass = &SPU::R16CRegClass; + break; + case MVT::i32: + ArgRegClass = &SPU::R32CRegClass; + break; + case MVT::i64: + ArgRegClass = &SPU::R64CRegClass; + break; + case MVT::i128: + ArgRegClass = &SPU::GPRCRegClass; + break; + case MVT::f32: + ArgRegClass = &SPU::R32FPRegClass; + break; + case MVT::f64: + ArgRegClass = &SPU::R64FPRegClass; + break; + case MVT::v2f64: + case MVT::v4f32: + case MVT::v2i64: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + ArgRegClass = &SPU::VECREGRegClass; + break; } + unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); + ++ArgRegIdx; + } else { + // We need to load the argument to a virtual register if we determined + // above that we ran out of physical registers of the appropriate type + // or we're forced to do vararg + int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), + false, false, 0); ArgOffset += StackSlotSize; } - - ArgValues.push_back(ArgVal); + + InVals.push_back(ArgVal); + // Update the chain + Chain = ArgVal.getOperand(0); } - - // If the function takes variable number of arguments, make a frame index for - // the start of the first vararg value... for expansion of llvm.va_start. + + // vararg handling: if (isVarArg) { - VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8, - ArgOffset); - SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); - // If this function is vararg, store any remaining integer argument regs to - // their spots on the stack so that they may be loaded by deferencing the - // result of va_next. - SmallVector MemOps; + // FIXME: we should be able to query the argument registers from + // tablegen generated code. + static const unsigned ArgRegs[] = { + SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, + SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, + SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, + SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30, + SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37, + SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44, + SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51, + SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58, + SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65, + SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72, + SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 + }; + // size of ArgRegs array + unsigned NumArgRegs = 77; + + // We will spill (79-3)+1 registers to the stack + SmallVector MemOps; + + // Create the frame slot for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { - unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass); - RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg); - SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT); - SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); + FuncInfo->setVarArgsFrameIndex( + MFI->CreateFixedObject(StackSlotSize, ArgOffset, true)); + SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); + unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass); + SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); + SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(), + false, false, 0); + Chain = Store.getOperand(0); MemOps.push_back(Store); - // Increment the address by four for the next argument to store - SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT); - FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); + + // Increment address by stack slot size for the next stored argument + ArgOffset += StackSlotSize; } if (!MemOps.empty()) - Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size()); - } - - ArgValues.push_back(Root); - - // Return the new list of results. - std::vector RetVT(Op.Val->value_begin(), - Op.Val->value_end()); - return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOps[0], MemOps.size()); + } + + return Chain; } /// isLSAAddress - Return the immediate to use if the specified /// value is representable as a LSA address. -static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) { +static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { ConstantSDNode *C = dyn_cast(Op); if (!C) return 0; - - int Addr = C->getValue(); + + int Addr = C->getZExtValue(); if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. (Addr << 14 >> 14) != Addr) return 0; // Top 14 bits have to be sext of immediate. - - return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val; + + return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode(); } -static -SDOperand -LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - SDOperand Chain = Op.getOperand(0); -#if 0 - bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; - bool isTailCall = cast(Op.getOperand(3))->getValue() != 0; -#endif - SDOperand Callee = Op.getOperand(4); - unsigned NumOps = (Op.getNumOperands() - 5) / 2; - unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); - const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); - const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); +SDValue +SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool &isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const { + // CellSPU target does not yet support tail call optimization. + isTailCall = false; + + const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); + unsigned NumOps = Outs.size(); + unsigned StackSlotSize = SPUFrameLowering::stackSlotSize(); + + SmallVector ArgLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, + *DAG.getContext()); + // FIXME: allow for other calling conventions + CCInfo.AnalyzeCallOperands(Outs, CCC_SPU); + + const unsigned NumArgRegs = ArgLocs.size(); + // Handy pointer type - MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - - // Accumulate how many bytes are to be pushed on the stack, including the - // linkage area, and parameter passing area. According to the SPU ABI, - // we minimally need space for [LR] and [SP] - unsigned NumStackBytes = SPUFrameInfo::minStackSize(); - + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument // passing. - SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32); - + SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32); + // Figure out which arguments are going to go in registers, and which in // memory. - unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR] + unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR] unsigned ArgRegIdx = 0; // Keep track of registers passing arguments - std::vector > RegsToPass; + std::vector > RegsToPass; // And the arguments passed on the stack - SmallVector MemOpChains; + SmallVector MemOpChains; + + for (; ArgRegIdx != NumOps; ++ArgRegIdx) { + SDValue Arg = OutVals[ArgRegIdx]; + CCValAssign &VA = ArgLocs[ArgRegIdx]; - for (unsigned i = 0; i != NumOps; ++i) { - SDOperand Arg = Op.getOperand(5+2*i); - // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. - SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); - PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff); + SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); - switch (Arg.getValueType()) { - default: assert(0 && "Unexpected ValueType for argument!"); + switch (Arg.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected ValueType for argument!"); + case MVT::i8: + case MVT::i16: case MVT::i32: case MVT::i64: case MVT::i128: - if (ArgRegIdx != NumArgRegs) { - RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); - } else { - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); - ArgOffset += StackSlotSize; - } - break; case MVT::f32: case MVT::f64: - if (ArgRegIdx != NumArgRegs) { - RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); - } else { - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); - ArgOffset += StackSlotSize; - } - break; + case MVT::v2i64: + case MVT::v2f64: case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: if (ArgRegIdx != NumArgRegs) { - RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); - ArgOffset += StackSlotSize; + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), + false, false, 0)); + ArgOffset += StackSlotSize; } break; } } - // Update number of stack bytes actually used, insert a call sequence start - NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize()); - Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT)); + // Accumulate how many bytes are to be pushed on the stack, including the + // linkage area, and parameter passing area. According to the SPU ABI, + // we minimally need space for [LR] and [SP]. + unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize(); + + // Insert a call sequence start + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes, + true)); if (!MemOpChains.empty()) { // Adjust the stack pointer for the stack arguments. - Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); } - + // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. - SDOperand InFlag; + SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, - InFlag); + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } - - std::vector NodeTys; - NodeTys.push_back(MVT::Other); // Returns a chain - NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. - SmallVector Ops; + SmallVector Ops; unsigned CallOpc = SPUISD::CALL; - + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. if (GlobalAddressSDNode *G = dyn_cast(Callee)) { - GlobalValue *GV = G->getGlobal(); - unsigned CalleeVT = Callee.getValueType(); - SDOperand Zero = DAG.getConstant(0, PtrVT); - SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT); + const GlobalValue *GV = G->getGlobal(); + EVT CalleeVT = Callee.getValueType(); + SDValue Zero = DAG.getConstant(0, PtrVT); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT); if (!ST->usingLargeMem()) { // Turn calls to targets that are defined (i.e., have bodies) into BRSL @@ -1197,111 +1384,90 @@ LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // This may be an unsafe assumption for JIT and really large compilation // units. if (GV->isDeclaration()) { - Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero); + Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero); } else { - Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero); + Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero); } } else { // "Large memory" mode: Turn all calls into indirect calls with a X-form // address pairs: - Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero); + Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero); + } + } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + EVT CalleeVT = Callee.getValueType(); + SDValue Zero = DAG.getConstant(0, PtrVT); + SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(), + Callee.getValueType()); + + if (!ST->usingLargeMem()) { + Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero); + } else { + Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero); } - } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) - Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType()); - else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { + } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { // If this is an absolute destination address that appears to be a legal // local store address, use the munged value. - Callee = SDOperand(Dest, 0); + Callee = SDValue(Dest, 0); } Ops.push_back(Chain); Ops.push_back(Callee); - + // Add argument registers to the end of the list so that they are known live // into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, + Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); - - if (InFlag.Val) + + if (InFlag.getNode()) Ops.push_back(InFlag); - Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size()); + // Returns a chain and a flag for retval copy to use. + Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue), + &Ops[0], Ops.size()); InFlag = Chain.getValue(1); - SDOperand ResultVals[3]; - unsigned NumResults = 0; - NodeTys.clear(); - - // If the call has results, copy the values out of the ret val registers. - switch (Op.Val->getValueType(0)) { - default: assert(0 && "Unexpected ret value!"); - case MVT::Other: break; - case MVT::i32: - if (Op.Val->getValueType(1) == MVT::i32) { - Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1); - ResultVals[0] = Chain.getValue(0); - Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, - Chain.getValue(2)).getValue(1); - ResultVals[1] = Chain.getValue(0); - NumResults = 2; - NodeTys.push_back(MVT::i32); - } else { - Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1); - ResultVals[0] = Chain.getValue(0); - NumResults = 1; - } - NodeTys.push_back(MVT::i32); - break; - case MVT::i64: - Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1); - ResultVals[0] = Chain.getValue(0); - NumResults = 1; - NodeTys.push_back(MVT::i64); - break; - case MVT::f32: - case MVT::f64: - Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0), - InFlag).getValue(1); - ResultVals[0] = Chain.getValue(0); - NumResults = 1; - NodeTys.push_back(Op.Val->getValueType(0)); - break; - case MVT::v2f64: - case MVT::v4f32: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v16i8: - Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0), - InFlag).getValue(1); - ResultVals[0] = Chain.getValue(0); - NumResults = 1; - NodeTys.push_back(Op.Val->getValueType(0)); - break; - } - - Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain, - DAG.getConstant(NumStackBytes, PtrVT)); - NodeTys.push_back(MVT::Other); - + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + if (!Ins.empty()) + InFlag = Chain.getValue(1); + // If the function returns void, just return the chain. - if (NumResults == 0) + if (Ins.empty()) return Chain; - - // Otherwise, merge everything together with a MERGE_VALUES node. - ResultVals[NumResults++] = Chain; - SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, - ResultVals, NumResults); - return Res.getValue(Op.ResNo); + + // Now handle the return value(s) + SmallVector RVLocs; + CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU); + + + // If the call has results, copy the values out of the ret val registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign VA = RVLocs[i]; + + SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), + InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + InVals.push_back(Val); + } + + return Chain; } -static SDOperand -LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) { +SDValue +SPUTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + DebugLoc dl, SelectionDAG &DAG) const { + SmallVector RVLocs; - unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); - bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); - CCState CCInfo(CC, isVarArg, TM, RVLocs); - CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU); - + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + CCInfo.AnalyzeReturn(Outs, RetCC_SPU); + // If this is the first return lowered for this function, add the regs to the // liveout set for the function. if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { @@ -1309,21 +1475,21 @@ LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) { DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); } - SDOperand Chain = Op.getOperand(0); - SDOperand Flag; - + SDValue Flag; + // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + OutVals[i], Flag); Flag = Chain.getValue(1); } - if (Flag.Val) - return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag); + if (Flag.getNode()) + return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag); else - return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain); + return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain); } @@ -1333,83 +1499,91 @@ LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) { static ConstantSDNode * getVecImm(SDNode *N) { - SDOperand OpVal(0, 0); - + SDValue OpVal(0, 0); + // Check to see if this buildvec has a single non-undef value in its elements. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; - if (OpVal.Val == 0) + if (OpVal.getNode() == 0) OpVal = N->getOperand(i); else if (OpVal != N->getOperand(i)) return 0; } - - if (OpVal.Val != 0) { + + if (OpVal.getNode() != 0) { if (ConstantSDNode *CN = dyn_cast(OpVal)) { return CN; } } - return 0; // All UNDEF: use implicit def.; not Constant node + return 0; } /// get_vec_i18imm - Test if this vector is a vector filled with the same value /// and the value fits into an unsigned 18-bit constant, and if so, return the /// constant -SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, - MVT::ValueType ValueType) { +SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, + EVT ValueType) { if (ConstantSDNode *CN = getVecImm(N)) { - uint64_t Value = CN->getValue(); + uint64_t Value = CN->getZExtValue(); + if (ValueType == MVT::i64) { + uint64_t UValue = CN->getZExtValue(); + uint32_t upper = uint32_t(UValue >> 32); + uint32_t lower = uint32_t(UValue); + if (upper != lower) + return SDValue(); + Value = Value >> 32; + } if (Value <= 0x3ffff) - return DAG.getConstant(Value, ValueType); + return DAG.getTargetConstant(Value, ValueType); } - return SDOperand(); + return SDValue(); } /// get_vec_i16imm - Test if this vector is a vector filled with the same value /// and the value fits into a signed 16-bit constant, and if so, return the /// constant -SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, - MVT::ValueType ValueType) { +SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, + EVT ValueType) { if (ConstantSDNode *CN = getVecImm(N)) { - if (ValueType == MVT::i32) { - int Value = (int) CN->getValue(); - int SExtValue = ((Value & 0xffff) << 16) >> 16; - - if (Value == SExtValue) - return DAG.getConstant(Value, ValueType); - } else if (ValueType == MVT::i16) { - short Value = (short) CN->getValue(); - int SExtValue = ((int) Value << 16) >> 16; - - if (Value == (short) SExtValue) - return DAG.getConstant(Value, ValueType); - } else if (ValueType == MVT::i64) { - int64_t Value = CN->getValue(); - int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16); - - if (Value == SExtValue) - return DAG.getConstant(Value, ValueType); + int64_t Value = CN->getSExtValue(); + if (ValueType == MVT::i64) { + uint64_t UValue = CN->getZExtValue(); + uint32_t upper = uint32_t(UValue >> 32); + uint32_t lower = uint32_t(UValue); + if (upper != lower) + return SDValue(); + Value = Value >> 32; + } + if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) { + return DAG.getTargetConstant(Value, ValueType); } } - return SDOperand(); + return SDValue(); } /// get_vec_i10imm - Test if this vector is a vector filled with the same value /// and the value fits into a signed 10-bit constant, and if so, return the /// constant -SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, - MVT::ValueType ValueType) { +SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, + EVT ValueType) { if (ConstantSDNode *CN = getVecImm(N)) { - int Value = (int) CN->getValue(); - if ((ValueType == MVT::i32 && isS10Constant(Value)) - || (ValueType == MVT::i16 && isS10Constant((short) Value))) - return DAG.getConstant(Value, ValueType); + int64_t Value = CN->getSExtValue(); + if (ValueType == MVT::i64) { + uint64_t UValue = CN->getZExtValue(); + uint32_t upper = uint32_t(UValue >> 32); + uint32_t lower = uint32_t(UValue); + if (upper != lower) + return SDValue(); + Value = Value >> 32; + } + if (isInt<10>(Value)) + return DAG.getTargetConstant(Value, ValueType); } - return SDOperand(); + return SDValue(); } /// get_vec_i8imm - Test if this vector is a vector filled with the same value @@ -1419,435 +1593,376 @@ SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, /// @note: The incoming vector is v16i8 because that's the only way we can load /// constant vectors. Thus, we test to see if the upper and lower bytes are the /// same value. -SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, - MVT::ValueType ValueType) { +SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, + EVT ValueType) { if (ConstantSDNode *CN = getVecImm(N)) { - int Value = (int) CN->getValue(); + int Value = (int) CN->getZExtValue(); if (ValueType == MVT::i16 - && Value <= 0xffff /* truncated from uint64_t */ - && ((short) Value >> 8) == ((short) Value & 0xff)) - return DAG.getConstant(Value & 0xff, ValueType); + && Value <= 0xffff /* truncated from uint64_t */ + && ((short) Value >> 8) == ((short) Value & 0xff)) + return DAG.getTargetConstant(Value & 0xff, ValueType); else if (ValueType == MVT::i8 - && (Value & 0xff) == Value) - return DAG.getConstant(Value, ValueType); + && (Value & 0xff) == Value) + return DAG.getTargetConstant(Value, ValueType); } - return SDOperand(); + return SDValue(); } /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value /// and the value fits into a signed 16-bit constant, and if so, return the /// constant -SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, - MVT::ValueType ValueType) { +SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, + EVT ValueType) { if (ConstantSDNode *CN = getVecImm(N)) { - uint64_t Value = CN->getValue(); + uint64_t Value = CN->getZExtValue(); if ((ValueType == MVT::i32 - && ((unsigned) Value & 0xffff0000) == (unsigned) Value) - || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value)) - return DAG.getConstant(Value >> 16, ValueType); + && ((unsigned) Value & 0xffff0000) == (unsigned) Value) + || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value)) + return DAG.getTargetConstant(Value >> 16, ValueType); } - return SDOperand(); + return SDValue(); } /// get_v4i32_imm - Catch-all for general 32-bit constant vectors -SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) { +SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) { if (ConstantSDNode *CN = getVecImm(N)) { - return DAG.getConstant((unsigned) CN->getValue(), MVT::i32); + return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32); } - return SDOperand(); + return SDValue(); } /// get_v4i32_imm - Catch-all for general 64-bit constant vectors -SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { +SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { if (ConstantSDNode *CN = getVecImm(N)) { - return DAG.getConstant((unsigned) CN->getValue(), MVT::i64); + return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64); } - return SDOperand(); + return SDValue(); } -// If this is a vector of constants or undefs, get the bits. A bit in -// UndefBits is set if the corresponding element of the vector is an -// ISD::UNDEF value. For undefs, the corresponding VectorBits values are -// zero. Return true if this is not an array of constants, false if it is. -// -static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], - uint64_t UndefBits[2]) { - // Start with zero'd results. - VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; - - unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType()); - for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { - SDOperand OpVal = BV->getOperand(i); - - unsigned PartNo = i >= e/2; // In the upper 128 bits? - unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t. - - uint64_t EltBits = 0; - if (OpVal.getOpcode() == ISD::UNDEF) { - uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize); - UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); - continue; - } else if (ConstantSDNode *CN = dyn_cast(OpVal)) { - EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize)); - } else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) { - const APFloat &apf = CN->getValueAPF(); - EltBits = (CN->getValueType(0) == MVT::f32 - ? FloatToBits(apf.convertToFloat()) - : DoubleToBits(apf.convertToDouble())); - } else { - // Nonconstant element. - return true; - } - - VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); - } - - //printf("%llx %llx %llx %llx\n", - // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); - return false; -} +//! Lower a BUILD_VECTOR instruction creatively: +static SDValue +LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + EVT EltVT = VT.getVectorElementType(); + DebugLoc dl = Op.getDebugLoc(); + BuildVectorSDNode *BCN = dyn_cast(Op.getNode()); + assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR"); + unsigned minSplatBits = EltVT.getSizeInBits(); -/// If this is a splat (repetition) of a value across the whole vector, return -/// the smallest size that splats it. For example, "0x01010101010101..." is a -/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and -/// SplatSize = 1 byte. -static bool isConstantSplat(const uint64_t Bits128[2], - const uint64_t Undef128[2], - int MinSplatBits, - uint64_t &SplatBits, uint64_t &SplatUndef, - int &SplatSize) { - // Don't let undefs prevent splats from matching. See if the top 64-bits are - // the same as the lower 64-bits, ignoring undefs. - uint64_t Bits64 = Bits128[0] | Bits128[1]; - uint64_t Undef64 = Undef128[0] & Undef128[1]; - uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32); - uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32); - uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16); - uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16); - - if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) { - if (MinSplatBits < 64) { - - // Check that the top 32-bits are the same as the lower 32-bits, ignoring - // undefs. - if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) { - if (MinSplatBits < 32) { - - // If the top 16-bits are different than the lower 16-bits, ignoring - // undefs, we have an i32 splat. - if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) { - if (MinSplatBits < 16) { - // If the top 8-bits are different than the lower 8-bits, ignoring - // undefs, we have an i16 splat. - if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) { - // Otherwise, we have an 8-bit splat. - SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8); - SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8); - SplatSize = 1; - return true; - } - } else { - SplatBits = Bits16; - SplatUndef = Undef16; - SplatSize = 2; - return true; - } - } - } else { - SplatBits = Bits32; - SplatUndef = Undef32; - SplatSize = 4; - return true; - } - } - } else { - SplatBits = Bits128[0]; - SplatUndef = Undef128[0]; - SplatSize = 8; - return true; - } - } + if (minSplatBits < 16) + minSplatBits = 16; - return false; // Can't be a splat if two pieces don't match. -} + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; -// If this is a case we can't handle, return null and let the default -// expansion code take care of it. If we CAN select this case, and if it -// selects to a single instruction, return Op. Otherwise, if we can codegen -// this case more efficiently than a constant pool load, lower it to the -// sequence of ops that should be used. -static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { - MVT::ValueType VT = Op.getValueType(); - // If this is a vector of constants or undefs, get the bits. A bit in - // UndefBits is set if the corresponding element of the vector is an - // ISD::UNDEF value. For undefs, the corresponding VectorBits values are - // zero. - uint64_t VectorBits[2]; - uint64_t UndefBits[2]; - uint64_t SplatBits, SplatUndef; - int SplatSize; - if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits) - || !isConstantSplat(VectorBits, UndefBits, - MVT::getSizeInBits(MVT::getVectorElementType(VT)), - SplatBits, SplatUndef, SplatSize)) - return SDOperand(); // Not a constant vector, not a splat. - - switch (VT) { + if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, minSplatBits) + || minSplatBits < SplatBitSize) + return SDValue(); // Wasn't a constant vector or splat exceeded min + + uint64_t SplatBits = APSplatBits.getZExtValue(); + + switch (VT.getSimpleVT().SimpleTy) { default: + report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " + + Twine(VT.getEVTString())); + /*NOTREACHED*/ case MVT::v4f32: { - uint32_t Value32 = SplatBits; - assert(SplatSize == 4 - && "LowerBUILD_VECTOR: Unexpected floating point vector element."); + uint32_t Value32 = uint32_t(SplatBits); + assert(SplatBitSize == 32 + && "LowerBUILD_VECTOR: Unexpected floating point vector element."); // NOTE: pretend the constant is an integer. LLVM won't load FP constants - SDOperand T = DAG.getConstant(Value32, MVT::i32); - return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T)); + SDValue T = DAG.getConstant(Value32, MVT::i32); + return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T)); break; } case MVT::v2f64: { - uint64_t f64val = SplatBits; - assert(SplatSize == 8 - && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size."); + uint64_t f64val = uint64_t(SplatBits); + assert(SplatBitSize == 64 + && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes."); // NOTE: pretend the constant is an integer. LLVM won't load FP constants - SDOperand T = DAG.getConstant(f64val, MVT::i64); - return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T)); + SDValue T = DAG.getConstant(f64val, MVT::i64); + return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T)); break; } case MVT::v16i8: { // 8-bit constants have to be expanded to 16-bits - unsigned short Value16 = SplatBits | (SplatBits << 8); - SDOperand Ops[8]; - for (int i = 0; i < 8; ++i) - Ops[i] = DAG.getConstant(Value16, MVT::i16); - return DAG.getNode(ISD::BIT_CONVERT, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8)); + unsigned short Value16 = SplatBits /* | (SplatBits << 8) */; + SmallVector Ops; + + Ops.assign(8, DAG.getConstant(Value16, MVT::i16)); + return DAG.getNode(ISD::BITCAST, dl, VT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size())); } case MVT::v8i16: { - unsigned short Value16; - if (SplatSize == 2) - Value16 = (unsigned short) (SplatBits & 0xffff); - else - Value16 = (unsigned short) (SplatBits | (SplatBits << 8)); - SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT)); - SDOperand Ops[8]; - for (int i = 0; i < 8; ++i) Ops[i] = T; - return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8); + unsigned short Value16 = SplatBits; + SDValue T = DAG.getConstant(Value16, EltVT); + SmallVector Ops; + + Ops.assign(8, T); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); } case MVT::v4i32: { - unsigned int Value = SplatBits; - SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT)); - return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T); + SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); } case MVT::v2i64: { - uint64_t val = SplatBits; - uint32_t upper = uint32_t(val >> 32); - uint32_t lower = uint32_t(val); - - if (val != 0) { - SDOperand LO32; - SDOperand HI32; - SmallVector ShufBytes; - SDOperand Result; - bool upper_special, lower_special; - - // NOTE: This code creates common-case shuffle masks that can be easily - // detected as common expressions. It is not attempting to create highly - // specialized masks to replace any and all 0's, 0xff's and 0x80's. - - // Detect if the upper or lower half is a special shuffle mask pattern: - upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); - lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); - - // Create lower vector if not a special pattern - if (!lower_special) { - SDOperand LO32C = DAG.getConstant(lower, MVT::i32); - LO32 = DAG.getNode(ISD::BIT_CONVERT, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - LO32C, LO32C, LO32C, LO32C)); - } + return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); + } + } - // Create upper vector if not a special pattern - if (!upper_special) { - SDOperand HI32C = DAG.getConstant(upper, MVT::i32); - HI32 = DAG.getNode(ISD::BIT_CONVERT, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - HI32C, HI32C, HI32C, HI32C)); - } + return SDValue(); +} - // If either upper or lower are special, then the two input operands are - // the same (basically, one of them is a "don't care") - if (lower_special) - LO32 = HI32; - if (upper_special) - HI32 = LO32; - if (lower_special && upper_special) { - // Unhappy situation... both upper and lower are special, so punt with - // a target constant: - SDOperand Zero = DAG.getConstant(0, MVT::i32); - HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero, - Zero, Zero); - } +/*! + */ +SDValue +SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, + DebugLoc dl) { + uint32_t upper = uint32_t(SplatVal >> 32); + uint32_t lower = uint32_t(SplatVal); + + if (upper == lower) { + // Magic constant that can be matched by IL, ILA, et. al. + SDValue Val = DAG.getTargetConstant(upper, MVT::i32); + return DAG.getNode(ISD::BITCAST, dl, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + Val, Val, Val, Val)); + } else { + bool upper_special, lower_special; + + // NOTE: This code creates common-case shuffle masks that can be easily + // detected as common expressions. It is not attempting to create highly + // specialized masks to replace any and all 0's, 0xff's and 0x80's. + + // Detect if the upper or lower half is a special shuffle mask pattern: + upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); + lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); - for (int i = 0; i < 4; ++i) { - for (int j = 0; j < 4; ++j) { - SDOperand V; - bool process_upper, process_lower; - uint64_t val = 0; - - process_upper = (upper_special && (i & 1) == 0); - process_lower = (lower_special && (i & 1) == 1); - - if (process_upper || process_lower) { - if ((process_upper && upper == 0) - || (process_lower && lower == 0)) - val = 0x80; - else if ((process_upper && upper == 0xffffffff) - || (process_lower && lower == 0xffffffff)) - val = 0xc0; - else if ((process_upper && upper == 0x80000000) - || (process_lower && lower == 0x80000000)) - val = (j == 0 ? 0xe0 : 0x80); - } else - val = i * 4 + j + ((i & 1) * 16); - - ShufBytes.push_back(DAG.getConstant(val, MVT::i8)); - } + // Both upper and lower are special, lower to a constant pool load: + if (lower_special && upper_special) { + SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64); + return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, + SplatValCN, SplatValCN); + } + + SDValue LO32; + SDValue HI32; + SmallVector ShufBytes; + SDValue Result; + + // Create lower vector if not a special pattern + if (!lower_special) { + SDValue LO32C = DAG.getConstant(lower, MVT::i32); + LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + LO32C, LO32C, LO32C, LO32C)); + } + + // Create upper vector if not a special pattern + if (!upper_special) { + SDValue HI32C = DAG.getConstant(upper, MVT::i32); + HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + HI32C, HI32C, HI32C, HI32C)); + } + + // If either upper or lower are special, then the two input operands are + // the same (basically, one of them is a "don't care") + if (lower_special) + LO32 = HI32; + if (upper_special) + HI32 = LO32; + + for (int i = 0; i < 4; ++i) { + uint64_t val = 0; + for (int j = 0; j < 4; ++j) { + SDValue V; + bool process_upper, process_lower; + val <<= 8; + process_upper = (upper_special && (i & 1) == 0); + process_lower = (lower_special && (i & 1) == 1); + + if (process_upper || process_lower) { + if ((process_upper && upper == 0) + || (process_lower && lower == 0)) + val |= 0x80; + else if ((process_upper && upper == 0xffffffff) + || (process_lower && lower == 0xffffffff)) + val |= 0xc0; + else if ((process_upper && upper == 0x80000000) + || (process_lower && lower == 0x80000000)) + val |= (j == 0 ? 0xe0 : 0x80); + } else + val |= i * 4 + j + ((i & 1) * 16); } - return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, - &ShufBytes[0], ShufBytes.size())); - } else { - // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR - SDOperand Zero = DAG.getConstant(0, MVT::i32); - return DAG.getNode(ISD::BIT_CONVERT, VT, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - Zero, Zero, Zero, Zero)); + ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); } + + return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufBytes[0], ShufBytes.size())); } - } - - return SDOperand(); } /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on /// which the Cell can operate. The code inspects V3 to ascertain whether the /// permutation vector, V3, is monotonically increasing with one "exception" /// element, e.g., (0, 1, _, 3). If this is the case, then generate a -/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool. +/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool. /// In either case, the net result is going to eventually invoke SHUFB to /// permute/shuffle the bytes from V1 and V2. /// \note -/// INSERT_MASK is eventually selected as one of the C*D instructions, generate +/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate /// control word for byte/halfword/word insertion. This takes care of a single /// element move from V2 into V1. /// \note /// SPUISD::SHUFB is eventually selected as Cell's shufb instructions. -static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { - SDOperand V1 = Op.getOperand(0); - SDOperand V2 = Op.getOperand(1); - SDOperand PermMask = Op.getOperand(2); - +static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + const ShuffleVectorSDNode *SVN = cast(Op); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + if (V2.getOpcode() == ISD::UNDEF) V2 = V1; - + // If we have a single element being moved from V1 to V2, this can be handled // using the C*[DX] compute mask instructions, but the vector elements have - // to be monotonically increasing with one exception element. - MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType()); + // to be monotonically increasing with one exception element, and the source + // slot of the element to move must be the same as the destination. + EVT VecVT = V1.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); unsigned EltsFromV2 = 0; - unsigned V2Elt = 0; + unsigned V2EltOffset = 0; unsigned V2EltIdx0 = 0; unsigned CurrElt = 0; + unsigned MaxElts = VecVT.getVectorNumElements(); + unsigned PrevElt = 0; bool monotonic = true; - if (EltVT == MVT::i8) + bool rotate = true; + int rotamt=0; + EVT maskVT; // which of the c?d instructions to use + + if (EltVT == MVT::i8) { V2EltIdx0 = 16; - else if (EltVT == MVT::i16) + maskVT = MVT::v16i8; + } else if (EltVT == MVT::i16) { V2EltIdx0 = 8; - else if (EltVT == MVT::i32) + maskVT = MVT::v8i16; + } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { V2EltIdx0 = 4; - else - assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE"); - - for (unsigned i = 0, e = PermMask.getNumOperands(); - EltsFromV2 <= 1 && monotonic && i != e; - ++i) { - unsigned SrcElt; - if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) - SrcElt = 0; - else - SrcElt = cast(PermMask.getOperand(i))->getValue(); - - if (SrcElt >= V2EltIdx0) { - ++EltsFromV2; - V2Elt = (V2EltIdx0 - SrcElt) << 2; - } else if (CurrElt != SrcElt) { - monotonic = false; + maskVT = MVT::v4i32; + } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { + V2EltIdx0 = 2; + maskVT = MVT::v2i64; + } else + llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE"); + + for (unsigned i = 0; i != MaxElts; ++i) { + if (SVN->getMaskElt(i) < 0) + continue; + + unsigned SrcElt = SVN->getMaskElt(i); + + if (monotonic) { + if (SrcElt >= V2EltIdx0) { + // TODO: optimize for the monotonic case when several consecutive + // elements are taken form V2. Do we ever get such a case? + if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0)) + V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8); + else + monotonic = false; + ++EltsFromV2; + } else if (CurrElt != SrcElt) { + monotonic = false; + } + + ++CurrElt; } - ++CurrElt; + if (rotate) { + if (PrevElt > 0 && SrcElt < MaxElts) { + if ((PrevElt == SrcElt - 1) + || (PrevElt == MaxElts - 1 && SrcElt == 0)) { + PrevElt = SrcElt; + } else { + rotate = false; + } + } else if (i == 0 || (PrevElt==0 && SrcElt==1)) { + // First time or after a "wrap around" + rotamt = SrcElt-i; + PrevElt = SrcElt; + } else { + // This isn't a rotation, takes elements from vector 2 + rotate = false; + } + } } if (EltsFromV2 == 1 && monotonic) { // Compute mask and shuffle - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); - MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - // Initialize temporary register to 0 - SDOperand InitTempReg = - DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT)); - // Copy register's contents as index in INSERT_MASK: - SDOperand ShufMaskOp = - DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(), - DAG.getTargetConstant(V2Elt, MVT::i32), - DAG.getCopyFromReg(InitTempReg, VReg, PtrVT)); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + // As SHUFFLE_MASK becomes a c?d instruction, feed it an address + // R1 ($sp) is used here only as it is guaranteed to have last bits zero + SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + DAG.getRegister(SPU::R1, PtrVT), + DAG.getConstant(V2EltOffset, MVT::i32)); + SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, + maskVT, Pointer); + // Use shuffle mask in SHUFB synthetic instruction: - return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp); + return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1, + ShufMaskOp); + } else if (rotate) { + if (rotamt < 0) + rotamt +=MaxElts; + rotamt *= EltVT.getSizeInBits()/8; + return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(), + V1, DAG.getConstant(rotamt, MVT::i16)); } else { - // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes. - unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8; - - SmallVector ResultMask; - for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { - unsigned SrcElt; - if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) - SrcElt = 0; - else - SrcElt = cast(PermMask.getOperand(i))->getValue(); - - for (unsigned j = 0; j != BytesPerElement; ++j) { - ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, - MVT::i8)); - } + // Convert the SHUFFLE_VECTOR mask's input element units to the + // actual bytes. + unsigned BytesPerElement = EltVT.getSizeInBits()/8; + + SmallVector ResultMask; + for (unsigned i = 0, e = MaxElts; i != e; ++i) { + unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i); + + for (unsigned j = 0; j < BytesPerElement; ++j) + ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); } - - SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, - &ResultMask[0], ResultMask.size()); - return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask); + SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, + &ResultMask[0], ResultMask.size()); + return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask); } } -static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { - SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar +static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { + SDValue Op0 = Op.getOperand(0); // Op0 = the scalar + DebugLoc dl = Op.getDebugLoc(); - if (Op0.Val->getOpcode() == ISD::Constant) { + if (Op0.getNode()->getOpcode() == ISD::Constant) { // For a constant, build the appropriate constant vector, which will // eventually simplify to a vector register load. - ConstantSDNode *CN = cast(Op0.Val); - SmallVector ConstVecValues; - MVT::ValueType VT; + ConstantSDNode *CN = cast(Op0.getNode()); + SmallVector ConstVecValues; + EVT VT; size_t n_copies; // Create a constant vector: - switch (Op.getValueType()) { - default: assert(0 && "Unexpected constant value type in " - "LowerSCALAR_TO_VECTOR"); + switch (Op.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected constant value type in " + "LowerSCALAR_TO_VECTOR"); case MVT::v16i8: n_copies = 16; VT = MVT::i8; break; case MVT::v8i16: n_copies = 8; VT = MVT::i16; break; case MVT::v4i32: n_copies = 4; VT = MVT::i32; break; @@ -1856,488 +1971,379 @@ static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { case MVT::v2f64: n_copies = 2; VT = MVT::f64; break; } - SDOperand CValue = DAG.getConstant(CN->getValue(), VT); + SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT); for (size_t j = 0; j < n_copies; ++j) ConstVecValues.push_back(CValue); - return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(), - &ConstVecValues[0], ConstVecValues.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(), + &ConstVecValues[0], ConstVecValues.size()); } else { // Otherwise, copy the value from one register to another: - switch (Op0.getValueType()) { - default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR"); + switch (Op0.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR"); case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: case MVT::f32: case MVT::f64: - return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0); + return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0); } } - return SDOperand(); + return SDValue(); } -static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) { - switch (Op.getValueType()) { - case MVT::v4i32: { - SDOperand rA = Op.getOperand(0); - SDOperand rB = Op.getOperand(1); - SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB); - SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA); - SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB); - SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1); - - return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2); - break; - } +static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + SDValue N = Op.getOperand(0); + SDValue Elt = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + SDValue retval; + + if (ConstantSDNode *C = dyn_cast(Elt)) { + // Constant argument: + int EltNo = (int) C->getZExtValue(); + + // sanity checks: + if (VT == MVT::i8 && EltNo >= 16) + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); + else if (VT == MVT::i16 && EltNo >= 8) + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); + else if (VT == MVT::i32 && EltNo >= 4) + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); + else if (VT == MVT::i64 && EltNo >= 2) + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); + + if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { + // i32 and i64: Element 0 is the preferred slot + return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N); + } - // Multiply two v8i16 vectors (pipeline friendly version): - // a) multiply lower halves, mask off upper 16-bit of 32-bit product - // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes) - // c) Use SELB to select upper and lower halves from the intermediate results - // - // NOTE: We really want to move the FSMBI to earlier to actually get the - // dual-issue. This code does manage to do this, even if it's a little on - // the wacky side - case MVT::v8i16: { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - SDOperand Chain = Op.getOperand(0); - SDOperand rA = Op.getOperand(0); - SDOperand rB = Op.getOperand(1); - unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); - unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); - - SDOperand FSMBOp = - DAG.getCopyToReg(Chain, FSMBIreg, - DAG.getNode(SPUISD::FSMBI, MVT::v8i16, - DAG.getConstant(0xcccc, MVT::i32))); - - SDOperand HHProd = - DAG.getCopyToReg(FSMBOp, HiProdReg, - DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB)); - - SDOperand HHProd_v4i32 = - DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, - DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32)); - - return DAG.getNode(SPUISD::SELB, MVT::v8i16, - DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB), - DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), - DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, - HHProd_v4i32, - DAG.getConstant(16, MVT::i16))), - DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32)); - } - - // This M00sE is N@stI! (apologies to Monty Python) - // - // SPU doesn't know how to do any 8-bit multiplication, so the solution - // is to break it all apart, sign extend, and reassemble the various - // intermediate products. - case MVT::v16i8: { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - SDOperand Chain = Op.getOperand(0); - SDOperand rA = Op.getOperand(0); - SDOperand rB = Op.getOperand(1); - SDOperand c8 = DAG.getConstant(8, MVT::i8); - SDOperand c16 = DAG.getConstant(16, MVT::i8); - - unsigned FSMBreg_2222 = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); - unsigned LoProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); - unsigned HiProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); - - SDOperand LLProd = - DAG.getNode(SPUISD::MPY, MVT::v8i16, - DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA), - DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB)); - - SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8); - - SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8); - - SDOperand LHProd = - DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, - DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8); - - SDOperand FSMBdef_2222 = - DAG.getCopyToReg(Chain, FSMBreg_2222, - DAG.getNode(SPUISD::FSMBI, MVT::v8i16, - DAG.getConstant(0x2222, MVT::i32))); - - SDOperand FSMBuse_2222 = - DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32); - - SDOperand LoProd_1 = - DAG.getCopyToReg(Chain, LoProd_reg, - DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd, - FSMBuse_2222)); - - SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32); - - SDOperand LoProd = - DAG.getNode(ISD::AND, MVT::v4i32, - DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32), - DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, - LoProdMask, LoProdMask, - LoProdMask, LoProdMask)); - - SDOperand rAH = - DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, - DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16); - - SDOperand rBH = - DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, - DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16); - - SDOperand HLProd = - DAG.getNode(SPUISD::MPY, MVT::v8i16, - DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH), - DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH)); - - SDOperand HHProd_1 = - DAG.getNode(SPUISD::MPY, MVT::v8i16, - DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, - DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)), - DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, - DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8))); - - SDOperand HHProd = - DAG.getCopyToReg(Chain, HiProd_reg, - DAG.getNode(SPUISD::SELB, MVT::v8i16, - HLProd, - DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8), - FSMBuse_2222)); - - SDOperand HiProd = - DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, - DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16); - - return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, - DAG.getNode(ISD::OR, MVT::v4i32, - LoProd, HiProd)); - } + // Need to generate shuffle mask and extract: + int prefslot_begin = -1, prefslot_end = -1; + int elt_byte = EltNo * VT.getSizeInBits() / 8; - default: - cerr << "CellSPU: Unknown vector multiplication, got " - << MVT::getValueTypeString(Op.getValueType()) - << "\n"; - abort(); - /*NOTREACHED*/ - } + switch (VT.getSimpleVT().SimpleTy) { + default: + assert(false && "Invalid value type!"); + case MVT::i8: { + prefslot_begin = prefslot_end = 3; + break; + } + case MVT::i16: { + prefslot_begin = 2; prefslot_end = 3; + break; + } + case MVT::i32: + case MVT::f32: { + prefslot_begin = 0; prefslot_end = 3; + break; + } + case MVT::i64: + case MVT::f64: { + prefslot_begin = 0; prefslot_end = 7; + break; + } + } - return SDOperand(); -} + assert(prefslot_begin != -1 && prefslot_end != -1 && + "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized"); -static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned int ShufBytes[16] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + for (int i = 0; i < 16; ++i) { + // zero fill uppper part of preferred slot, don't care about the + // other slots: + unsigned int mask_val; + if (i <= prefslot_end) { + mask_val = + ((i < prefslot_begin) + ? 0x80 + : elt_byte + (i - prefslot_begin)); + + ShufBytes[i] = mask_val; + } else + ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)]; + } - SDOperand A = Op.getOperand(0); - SDOperand B = Op.getOperand(1); - unsigned VT = Op.getValueType(); + SDValue ShufMask[4]; + for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) { + unsigned bidx = i * 4; + unsigned int bits = ((ShufBytes[bidx] << 24) | + (ShufBytes[bidx+1] << 16) | + (ShufBytes[bidx+2] << 8) | + ShufBytes[bidx+3]); + ShufMask[i] = DAG.getConstant(bits, MVT::i32); + } - unsigned VRegBR, VRegC; + SDValue ShufMaskVec = + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0])); - if (VT == MVT::f32) { - VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass); - VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass); + retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, + DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(), + N, N, ShufMaskVec)); } else { - VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); - VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); - } - // TODO: make sure we're feeding FPInterp the right arguments - // Right now: fi B, frest(B) - - // Computes BRcpl = - // (Floating Interpolate (FP Reciprocal Estimate B)) - SDOperand BRcpl = - DAG.getCopyToReg(DAG.getEntryNode(), VRegBR, - DAG.getNode(SPUISD::FPInterp, VT, B, - DAG.getNode(SPUISD::FPRecipEst, VT, B))); - - // Computes A * BRcpl and stores in a temporary register - SDOperand AxBRcpl = - DAG.getCopyToReg(BRcpl, VRegC, - DAG.getNode(ISD::FMUL, VT, A, - DAG.getCopyFromReg(BRcpl, VRegBR, VT))); - // What's the Chain variable do? It's magic! - // TODO: set Chain = Op(0).getEntryNode() - - return DAG.getNode(ISD::FADD, VT, - DAG.getCopyFromReg(AxBRcpl, VRegC, VT), - DAG.getNode(ISD::FMUL, VT, - DAG.getCopyFromReg(AxBRcpl, VRegBR, VT), - DAG.getNode(ISD::FSUB, VT, A, - DAG.getNode(ISD::FMUL, VT, B, - DAG.getCopyFromReg(AxBRcpl, VRegC, VT))))); -} - -static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { - unsigned VT = Op.getValueType(); - SDOperand N = Op.getOperand(0); - SDOperand Elt = Op.getOperand(1); - SDOperand ShufMask[16]; - ConstantSDNode *C = dyn_cast(Elt); - - assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode"); - - int EltNo = (int) C->getValue(); - - // sanity checks: - if (VT == MVT::i8 && EltNo >= 16) - assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); - else if (VT == MVT::i16 && EltNo >= 8) - assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); - else if (VT == MVT::i32 && EltNo >= 4) - assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); - else if (VT == MVT::i64 && EltNo >= 2) - assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); - - if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { - // i32 and i64: Element 0 is the preferred slot - return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N); - } - - // Need to generate shuffle mask and extract: - int prefslot_begin = -1, prefslot_end = -1; - int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8; + // Variable index: Rotate the requested element into slot 0, then replicate + // slot 0 across the vector + EVT VecVT = N.getValueType(); + if (!VecVT.isSimple() || !VecVT.isVector()) { + report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" + "vector type!"); + } - switch (VT) { - case MVT::i8: { - prefslot_begin = prefslot_end = 3; - break; - } - case MVT::i16: { - prefslot_begin = 2; prefslot_end = 3; - break; - } - case MVT::i32: { - prefslot_begin = 0; prefslot_end = 3; - break; - } - case MVT::i64: { - prefslot_begin = 0; prefslot_end = 7; - break; - } - } + // Make life easier by making sure the index is zero-extended to i32 + if (Elt.getValueType() != MVT::i32) + Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt); - assert(prefslot_begin != -1 && prefslot_end != -1 && - "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized"); + // Scale the index to a bit/byte shift quantity + APInt scaleFactor = + APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false); + unsigned scaleShift = scaleFactor.logBase2(); + SDValue vecShift; - for (int i = 0; i < 16; ++i) { - // zero fill uppper part of preferred slot, don't care about the - // other slots: - unsigned int mask_val; + if (scaleShift > 0) { + // Scale the shift factor: + Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt, + DAG.getConstant(scaleShift, MVT::i32)); + } - if (i <= prefslot_end) { - mask_val = - ((i < prefslot_begin) - ? 0x80 - : elt_byte + (i - prefslot_begin)); + vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt); + + // Replicate the bytes starting at byte 0 across the entire vector (for + // consistency with the notion of a unified register set) + SDValue replicate; + + switch (VT.getSimpleVT().SimpleTy) { + default: + report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector" + "type"); + /*NOTREACHED*/ + case MVT::i8: { + SDValue factor = DAG.getConstant(0x00000000, MVT::i32); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); + break; + } + case MVT::i16: { + SDValue factor = DAG.getConstant(0x00010001, MVT::i32); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); + break; + } + case MVT::i32: + case MVT::f32: { + SDValue factor = DAG.getConstant(0x00010203, MVT::i32); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); + break; + } + case MVT::i64: + case MVT::f64: { + SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32); + SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + loFactor, hiFactor, loFactor, hiFactor); + break; + } + } - ShufMask[i] = DAG.getConstant(mask_val, MVT::i8); - } else - ShufMask[i] = ShufMask[i % (prefslot_end + 1)]; + retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, + DAG.getNode(SPUISD::SHUFB, dl, VecVT, + vecShift, vecShift, replicate)); } - SDOperand ShufMaskVec = - DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, - &ShufMask[0], - sizeof(ShufMask) / sizeof(ShufMask[0])); - - return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, - DAG.getNode(SPUISD::SHUFB, N.getValueType(), - N, N, ShufMaskVec)); - + return retval; } -static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { - SDOperand VecOp = Op.getOperand(0); - SDOperand ValOp = Op.getOperand(1); - SDOperand IdxOp = Op.getOperand(2); - MVT::ValueType VT = Op.getValueType(); - - ConstantSDNode *CN = cast(IdxOp); - assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); - - MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - // Use $2 because it's always 16-byte aligned and it's available: - SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT); - - SDOperand result = - DAG.getNode(SPUISD::SHUFB, VT, - DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp), +static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { + SDValue VecOp = Op.getOperand(0); + SDValue ValOp = Op.getOperand(1); + SDValue IdxOp = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + EVT eltVT = ValOp.getValueType(); + + // use 0 when the lane to insert to is 'undef' + int64_t Offset=0; + if (IdxOp.getOpcode() != ISD::UNDEF) { + ConstantSDNode *CN = cast(IdxOp); + assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); + Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8; + } + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + // Use $sp ($1) because it's always 16-byte aligned and it's available: + SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + DAG.getRegister(SPU::R1, PtrVT), + DAG.getConstant(Offset, PtrVT)); + // widen the mask when dealing with half vectors + EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(), + 128/ VT.getVectorElementType().getSizeInBits()); + SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer); + + SDValue result = + DAG.getNode(SPUISD::SHUFB, dl, VT, + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp), VecOp, - DAG.getNode(SPUISD::INSERT_MASK, VT, - DAG.getNode(ISD::ADD, PtrVT, - PtrBase, - DAG.getConstant(CN->getValue(), - PtrVT)))); + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask)); return result; } -static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) { - SDOperand N0 = Op.getOperand(0); // Everything has at least one operand +static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, + const TargetLowering &TLI) +{ + SDValue N0 = Op.getOperand(0); // Everything has at least one operand + DebugLoc dl = Op.getDebugLoc(); + EVT ShiftVT = TLI.getShiftAmountTy(); assert(Op.getValueType() == MVT::i8); switch (Opc) { default: - assert(0 && "Unhandled i8 math operator"); + llvm_unreachable("Unhandled i8 math operator"); /*NOTREACHED*/ break; + case ISD::ADD: { + // 8-bit addition: Promote the arguments up to 16-bits and truncate + // the result: + SDValue N1 = Op.getOperand(1); + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); + + } + case ISD::SUB: { // 8-bit subtraction: Promote the arguments up to 16-bits and truncate // the result: - SDOperand N1 = Op.getOperand(1); - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getValue(), MVT::i16)); - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1) - : DAG.getConstant(cast(N1)->getValue(), MVT::i16)); - return DAG.getNode(ISD::TRUNCATE, MVT::i8, - DAG.getNode(Opc, MVT::i16, N0, N1)); - } + SDValue N1 = Op.getOperand(1); + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); + } case ISD::ROTR: case ISD::ROTL: { - SDOperand N1 = Op.getOperand(1); - unsigned N1Opc; - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getValue(), MVT::i16)); - N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE); - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, MVT::i16, N1) - : DAG.getConstant(cast(N1)->getValue(), MVT::i16)); - SDOperand ExpandArg = - DAG.getNode(ISD::OR, MVT::i16, N0, - DAG.getNode(ISD::SHL, MVT::i16, - N0, DAG.getConstant(8, MVT::i16))); - return DAG.getNode(ISD::TRUNCATE, MVT::i8, - DAG.getNode(Opc, MVT::i16, ExpandArg, N1)); + SDValue N1 = Op.getOperand(1); + EVT N1VT = N1.getValueType(); + + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT) + ? ISD::ZERO_EXTEND + : ISD::TRUNCATE; + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); + } + + // Replicate lower 8-bits into upper 8: + SDValue ExpandArg = + DAG.getNode(ISD::OR, dl, MVT::i16, N0, + DAG.getNode(ISD::SHL, dl, MVT::i16, + N0, DAG.getConstant(8, MVT::i32))); + + // Truncate back down to i8 + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1)); } case ISD::SRL: case ISD::SHL: { - SDOperand N1 = Op.getOperand(1); - unsigned N1Opc; - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getValue(), MVT::i16)); - N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE); - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, MVT::i16, N1) - : DAG.getConstant(cast(N1)->getValue(), MVT::i16)); - return DAG.getNode(ISD::TRUNCATE, MVT::i8, - DAG.getNode(Opc, MVT::i16, N0, N1)); + SDValue N1 = Op.getOperand(1); + EVT N1VT = N1.getValueType(); + + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = ISD::ZERO_EXTEND; + + if (N1.getValueType().bitsGT(ShiftVT)) + N1Opc = ISD::TRUNCATE; + + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); + } + + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); } case ISD::SRA: { - SDOperand N1 = Op.getOperand(1); - unsigned N1Opc; - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getValue(), MVT::i16)); - N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE); - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, MVT::i16, N1) - : DAG.getConstant(cast(N1)->getValue(), MVT::i16)); - return DAG.getNode(ISD::TRUNCATE, MVT::i8, - DAG.getNode(Opc, MVT::i16, N0, N1)); + SDValue N1 = Op.getOperand(1); + EVT N1VT = N1.getValueType(); + + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = ISD::SIGN_EXTEND; + + if (N1VT.bitsGT(ShiftVT)) + N1Opc = ISD::TRUNCATE; + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); + } + + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); } case ISD::MUL: { - SDOperand N1 = Op.getOperand(1); - unsigned N1Opc; - N0 = (N0.getOpcode() != ISD::Constant - ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) - : DAG.getConstant(cast(N0)->getValue(), MVT::i16)); - N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE); - N1 = (N1.getOpcode() != ISD::Constant - ? DAG.getNode(N1Opc, MVT::i16, N1) - : DAG.getConstant(cast(N1)->getValue(), MVT::i16)); - return DAG.getNode(ISD::TRUNCATE, MVT::i8, - DAG.getNode(Opc, MVT::i16, N0, N1)); + SDValue N1 = Op.getOperand(1); + + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); break; } } - return SDOperand(); + return SDValue(); } //! Lower byte immediate operations for v16i8 vectors: -static SDOperand -LowerByteImmed(SDOperand Op, SelectionDAG &DAG) { - SDOperand ConstVec; - SDOperand Arg; - MVT::ValueType VT = Op.getValueType(); +static SDValue +LowerByteImmed(SDValue Op, SelectionDAG &DAG) { + SDValue ConstVec; + SDValue Arg; + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); ConstVec = Op.getOperand(0); Arg = Op.getOperand(1); - if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) { - if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) { + if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) { + if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) { ConstVec = ConstVec.getOperand(0); } else { ConstVec = Op.getOperand(1); Arg = Op.getOperand(0); - if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) { - ConstVec = ConstVec.getOperand(0); + if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) { + ConstVec = ConstVec.getOperand(0); } } } - if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) { - uint64_t VectorBits[2]; - uint64_t UndefBits[2]; - uint64_t SplatBits, SplatUndef; - int SplatSize; + if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) { + BuildVectorSDNode *BCN = dyn_cast(ConstVec.getNode()); + assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed"); - if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits) - && isConstantSplat(VectorBits, UndefBits, - MVT::getSizeInBits(MVT::getVectorElementType(VT)), - SplatBits, SplatUndef, SplatSize)) { - SDOperand tcVec[16]; - SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); - const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]); + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned minSplatBits = VT.getVectorElementType().getSizeInBits(); - // Turn the BUILD_VECTOR into a set of target constants: - for (size_t i = 0; i < tcVecSize; ++i) - tcVec[i] = tc; + if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, minSplatBits) + && minSplatBits <= SplatBitSize) { + uint64_t SplatBits = APSplatBits.getZExtValue(); + SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); - return DAG.getNode(Op.Val->getOpcode(), VT, Arg, - DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize)); + SmallVector tcVec; + tcVec.assign(16, tc); + return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg, + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size())); } } - return SDOperand(); -} - -//! Lower i32 multiplication -static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT, - unsigned Opc) { - switch (VT) { - default: - cerr << "CellSPU: Unknown LowerMUL value type, got " - << MVT::getValueTypeString(Op.getValueType()) - << "\n"; - abort(); - /*NOTREACHED*/ - - case MVT::i32: { - SDOperand rA = Op.getOperand(0); - SDOperand rB = Op.getOperand(1); - - return DAG.getNode(ISD::ADD, MVT::i32, - DAG.getNode(ISD::ADD, MVT::i32, - DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB), - DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)), - DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB)); - } - } - - return SDOperand(); + // These operations (AND, OR, XOR) are legal, they just couldn't be custom + // lowered. Return the operation, rather than a null SDValue. + return Op; } //! Custom lowering for CTPOP (count population) @@ -2346,19 +2352,23 @@ static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT, operand. SPU has such an instruction, but it counts the number of ones per byte, which then have to be accumulated. */ -static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) { - unsigned VT = Op.getValueType(); - unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT))); +static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + EVT vecVT = EVT::getVectorVT(*DAG.getContext(), + VT, (128 / VT.getSizeInBits())); + DebugLoc dl = Op.getDebugLoc(); - switch (VT) { + switch (VT.getSimpleVT().SimpleTy) { + default: + assert(false && "Invalid value type!"); case MVT::i8: { - SDOperand N = Op.getOperand(0); - SDOperand Elt0 = DAG.getConstant(0, MVT::i32); + SDValue N = Op.getOperand(0); + SDValue Elt0 = DAG.getConstant(0, MVT::i32); - SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N); - SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); + SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); + SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0); } case MVT::i16: { @@ -2367,30 +2377,30 @@ static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) { unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass); - SDOperand N = Op.getOperand(0); - SDOperand Elt0 = DAG.getConstant(0, MVT::i16); - SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16); - SDOperand Shift1 = DAG.getConstant(8, MVT::i16); + SDValue N = Op.getOperand(0); + SDValue Elt0 = DAG.getConstant(0, MVT::i16); + SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16); + SDValue Shift1 = DAG.getConstant(8, MVT::i32); - SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N); - SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); + SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); + SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); // CNTB_result becomes the chain to which all of the virtual registers // CNTB_reg, SUM1_reg become associated: - SDOperand CNTB_result = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0); - - SDOperand CNTB_rescopy = - DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result); + SDValue CNTB_result = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0); + + SDValue CNTB_rescopy = + DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); - SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16); + SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16); - return DAG.getNode(ISD::AND, MVT::i16, - DAG.getNode(ISD::ADD, MVT::i16, - DAG.getNode(ISD::SRL, MVT::i16, - Tmp1, Shift1), - Tmp1), - Mask0); + return DAG.getNode(ISD::AND, dl, MVT::i16, + DAG.getNode(ISD::ADD, dl, MVT::i16, + DAG.getNode(ISD::SRL, dl, MVT::i16, + Tmp1, Shift1), + Tmp1), + Mask0); } case MVT::i32: { @@ -2400,66 +2410,383 @@ static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) { unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); - SDOperand N = Op.getOperand(0); - SDOperand Elt0 = DAG.getConstant(0, MVT::i32); - SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32); - SDOperand Shift1 = DAG.getConstant(16, MVT::i32); - SDOperand Shift2 = DAG.getConstant(8, MVT::i32); + SDValue N = Op.getOperand(0); + SDValue Elt0 = DAG.getConstant(0, MVT::i32); + SDValue Mask0 = DAG.getConstant(0xff, MVT::i32); + SDValue Shift1 = DAG.getConstant(16, MVT::i32); + SDValue Shift2 = DAG.getConstant(8, MVT::i32); - SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N); - SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); + SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); + SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); // CNTB_result becomes the chain to which all of the virtual registers // CNTB_reg, SUM1_reg become associated: - SDOperand CNTB_result = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0); - - SDOperand CNTB_rescopy = - DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result); + SDValue CNTB_result = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0); - SDOperand Comp1 = - DAG.getNode(ISD::SRL, MVT::i32, - DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1); + SDValue CNTB_rescopy = + DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); - SDOperand Sum1 = - DAG.getNode(ISD::ADD, MVT::i32, - Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32)); + SDValue Comp1 = + DAG.getNode(ISD::SRL, dl, MVT::i32, + DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32), + Shift1); - SDOperand Sum1_rescopy = - DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1); + SDValue Sum1 = + DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1, + DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32)); - SDOperand Comp2 = - DAG.getNode(ISD::SRL, MVT::i32, - DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32), - Shift2); - SDOperand Sum2 = - DAG.getNode(ISD::ADD, MVT::i32, Comp2, - DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32)); + SDValue Sum1_rescopy = + DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1); - return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0); + SDValue Comp2 = + DAG.getNode(ISD::SRL, dl, MVT::i32, + DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32), + Shift2); + SDValue Sum2 = + DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2, + DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32)); + + return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0); } case MVT::i64: break; } - return SDOperand(); + return SDValue(); } -/// LowerOperation - Provide custom lowering hooks for some operations. -/// -SDOperand -SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) +//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32 +/*! + f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall. + All conversions to i64 are expanded to a libcall. + */ +static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, + const SPUTargetLowering &TLI) { + EVT OpVT = Op.getValueType(); + SDValue Op0 = Op.getOperand(0); + EVT Op0VT = Op0.getValueType(); + + if ((OpVT == MVT::i32 && Op0VT == MVT::f64) + || OpVT == MVT::i64) { + // Convert f32 / f64 to i32 / i64 via libcall. + RTLIB::Libcall LC = + (Op.getOpcode() == ISD::FP_TO_SINT) + ? RTLIB::getFPTOSINT(Op0VT, OpVT) + : RTLIB::getFPTOUINT(Op0VT, OpVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!"); + SDValue Dummy; + return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); + } + + return Op; +} + +//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32 +/*! + i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall. + All conversions from i64 are expanded to a libcall. + */ +static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, + const SPUTargetLowering &TLI) { + EVT OpVT = Op.getValueType(); + SDValue Op0 = Op.getOperand(0); + EVT Op0VT = Op0.getValueType(); + + if ((OpVT == MVT::f64 && Op0VT == MVT::i32) + || Op0VT == MVT::i64) { + // Convert i32, i64 to f64 via libcall: + RTLIB::Libcall LC = + (Op.getOpcode() == ISD::SINT_TO_FP) + ? RTLIB::getSINTTOFP(Op0VT, OpVT) + : RTLIB::getUINTTOFP(Op0VT, OpVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!"); + SDValue Dummy; + return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); + } + + return Op; +} + +//! Lower ISD::SETCC +/*! + This handles MVT::f64 (double floating point) condition lowering + */ +static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + CondCodeSDNode *CC = dyn_cast(Op.getOperand(2)); + DebugLoc dl = Op.getDebugLoc(); + assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); + + SDValue lhs = Op.getOperand(0); + SDValue rhs = Op.getOperand(1); + EVT lhsVT = lhs.getValueType(); + assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n"); + + EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType()); + APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); + EVT IntVT(MVT::i64); + + // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently + // selected to a NOP: + SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs); + SDValue lhsHi32 = + DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, + DAG.getNode(ISD::SRL, dl, IntVT, + i64lhs, DAG.getConstant(32, MVT::i32))); + SDValue lhsHi32abs = + DAG.getNode(ISD::AND, dl, MVT::i32, + lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32)); + SDValue lhsLo32 = + DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs); + + // SETO and SETUO only use the lhs operand: + if (CC->get() == ISD::SETO) { + // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of + // SETUO + APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); + return DAG.getNode(ISD::XOR, dl, ccResultVT, + DAG.getSetCC(dl, ccResultVT, + lhs, DAG.getConstantFP(0.0, lhsVT), + ISD::SETUO), + DAG.getConstant(ccResultAllOnes, ccResultVT)); + } else if (CC->get() == ISD::SETUO) { + // Evaluates to true if Op0 is [SQ]NaN + return DAG.getNode(ISD::AND, dl, ccResultVT, + DAG.getSetCC(dl, ccResultVT, + lhsHi32abs, + DAG.getConstant(0x7ff00000, MVT::i32), + ISD::SETGE), + DAG.getSetCC(dl, ccResultVT, + lhsLo32, + DAG.getConstant(0, MVT::i32), + ISD::SETGT)); + } + + SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs); + SDValue rhsHi32 = + DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, + DAG.getNode(ISD::SRL, dl, IntVT, + i64rhs, DAG.getConstant(32, MVT::i32))); + + // If a value is negative, subtract from the sign magnitude constant: + SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT); + + // Convert the sign-magnitude representation into 2's complement: + SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, + lhsHi32, DAG.getConstant(31, MVT::i32)); + SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs); + SDValue lhsSelect = + DAG.getNode(ISD::SELECT, dl, IntVT, + lhsSelectMask, lhsSignMag2TC, i64lhs); + + SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, + rhsHi32, DAG.getConstant(31, MVT::i32)); + SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs); + SDValue rhsSelect = + DAG.getNode(ISD::SELECT, dl, IntVT, + rhsSelectMask, rhsSignMag2TC, i64rhs); + + unsigned compareOp; + + switch (CC->get()) { + case ISD::SETOEQ: + case ISD::SETUEQ: + compareOp = ISD::SETEQ; break; + case ISD::SETOGT: + case ISD::SETUGT: + compareOp = ISD::SETGT; break; + case ISD::SETOGE: + case ISD::SETUGE: + compareOp = ISD::SETGE; break; + case ISD::SETOLT: + case ISD::SETULT: + compareOp = ISD::SETLT; break; + case ISD::SETOLE: + case ISD::SETULE: + compareOp = ISD::SETLE; break; + case ISD::SETUNE: + case ISD::SETONE: + compareOp = ISD::SETNE; break; + default: + report_fatal_error("CellSPU ISel Select: unimplemented f64 condition"); + } + + SDValue result = + DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect, + (ISD::CondCode) compareOp); + + if ((CC->get() & 0x8) == 0) { + // Ordered comparison: + SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT, + lhs, DAG.getConstantFP(0.0, MVT::f64), + ISD::SETO); + SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT, + rhs, DAG.getConstantFP(0.0, MVT::f64), + ISD::SETO); + SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN); + + result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result); + } + + return result; +} + +//! Lower ISD::SELECT_CC +/*! + ISD::SELECT_CC can (generally) be implemented directly on the SPU using the + SELB instruction. + + \note Need to revisit this in the future: if the code path through the true + and false value computations is longer than the latency of a branch (6 + cycles), then it would be more advantageous to branch and insert a new basic + block and branch on the condition. However, this code does not make that + assumption, given the simplisitc uses so far. + */ + +static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + EVT VT = Op.getValueType(); + SDValue lhs = Op.getOperand(0); + SDValue rhs = Op.getOperand(1); + SDValue trueval = Op.getOperand(2); + SDValue falseval = Op.getOperand(3); + SDValue condition = Op.getOperand(4); + DebugLoc dl = Op.getDebugLoc(); + + // NOTE: SELB's arguments: $rA, $rB, $mask + // + // SELB selects bits from $rA where bits in $mask are 0, bits from $rB + // where bits in $mask are 1. CCond will be inverted, having 1s where the + // condition was true and 0s where the condition was false. Hence, the + // arguments to SELB get reversed. + + // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's + // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up + // with another "cannot select select_cc" assert: + + SDValue compare = DAG.getNode(ISD::SETCC, dl, + TLI.getSetCCResultType(Op.getValueType()), + lhs, rhs, condition); + return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare); +} + +//! Custom lower ISD::TRUNCATE +static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) { - switch (Op.getOpcode()) { + // Type to truncate to + EVT VT = Op.getValueType(); + MVT simpleVT = VT.getSimpleVT(); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), + VT, (128 / VT.getSizeInBits())); + DebugLoc dl = Op.getDebugLoc(); + + // Type to truncate from + SDValue Op0 = Op.getOperand(0); + EVT Op0VT = Op0.getValueType(); + + if (Op0VT == MVT::i128 && simpleVT == MVT::i64) { + // Create shuffle mask, least significant doubleword of quadword + unsigned maskHigh = 0x08090a0b; + unsigned maskLow = 0x0c0d0e0f; + // Use a shuffle to perform the truncation + SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + DAG.getConstant(maskHigh, MVT::i32), + DAG.getConstant(maskLow, MVT::i32), + DAG.getConstant(maskHigh, MVT::i32), + DAG.getConstant(maskLow, MVT::i32)); + + SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT, + Op0, Op0, shufMask); + + return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle); + } + + return SDValue(); // Leave the truncate unmolested +} + +/*! + * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic + * algorithm is to duplicate the sign bit using rotmai to generate at + * least one byte full of sign bits. Then propagate the "sign-byte" into + * the leftmost words and the i64/i32 into the rightmost words using shufb. + * + * @param Op The sext operand + * @param DAG The current DAG + * @return The SDValue with the entire instruction sequence + */ +static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) +{ + DebugLoc dl = Op.getDebugLoc(); + + // Type to extend to + MVT OpVT = Op.getValueType().getSimpleVT(); + + // Type to extend from + SDValue Op0 = Op.getOperand(0); + MVT Op0VT = Op0.getValueType().getSimpleVT(); + + // The type to extend to needs to be a i128 and + // the type to extend from needs to be i64 or i32. + assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) && + "LowerSIGN_EXTEND: input and/or output operand have wrong size"); + + // Create shuffle mask + unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7 + unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11 + unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15 + SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + DAG.getConstant(mask1, MVT::i32), + DAG.getConstant(mask1, MVT::i32), + DAG.getConstant(mask2, MVT::i32), + DAG.getConstant(mask3, MVT::i32)); + + // Word wise arithmetic right shift to generate at least one byte + // that contains sign bits. + MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32; + SDValue sraVal = DAG.getNode(ISD::SRA, + dl, + mvt, + DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0), + DAG.getConstant(31, MVT::i32)); + + // reinterpret as a i128 (SHUFB requires it). This gets lowered away. + SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, + dl, Op0VT, Op0, + DAG.getTargetConstant( + SPU::GPRCRegClass.getID(), + MVT::i32)), 0); + // Shuffle bytes - Copy the sign bits into the upper 64 bits + // and the input value into the lower 64 bits. + SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt, + extended, sraVal, shufMask); + return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle); +} + +//! Custom (target-specific) lowering entry point +/*! + This is where LLVM's DAG selection process calls to do target-specific + lowering of nodes. + */ +SDValue +SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const +{ + unsigned Opc = (unsigned) Op.getOpcode(); + EVT VT = Op.getValueType(); + + switch (Opc) { default: { - cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; - cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n"; - cerr << "*Op.Val:\n"; - Op.Val->dump(); - abort(); +#ifndef NDEBUG + errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; + errs() << "Op.getOpcode() = " << Opc << "\n"; + errs() << "*Op.getNode():\n"; + Op.getNode()->dump(); +#endif + llvm_unreachable(0); } case ISD::LOAD: + case ISD::EXTLOAD: case ISD::SEXTLOAD: case ISD::ZEXTLOAD: return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl()); @@ -2471,27 +2798,29 @@ SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); case ISD::JumpTable: return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); - case ISD::Constant: - return LowerConstant(Op, DAG); case ISD::ConstantFP: return LowerConstantFP(Op, DAG); - case ISD::BRCOND: - return LowerBRCOND(Op, DAG); - case ISD::FORMAL_ARGUMENTS: - return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex); - case ISD::CALL: - return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl()); - case ISD::RET: - return LowerRET(Op, DAG, getTargetMachine()); - - // i8 math ops: + + // i8, i64 math ops: + case ISD::ADD: case ISD::SUB: case ISD::ROTR: case ISD::ROTL: case ISD::SRL: case ISD::SHL: - case ISD::SRA: - return LowerI8Math(Op, DAG, Op.getOpcode()); + case ISD::SRA: { + if (VT == MVT::i8) + return LowerI8Math(Op, DAG, Opc, *this); + break; + } + + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + return LowerFP_TO_INT(Op, DAG, *this); + + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + return LowerINT_TO_FP(Op, DAG, *this); // Vector-related lowering. case ISD::BUILD_VECTOR: @@ -2513,44 +2842,56 @@ SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) // Vector and i8 multiply: case ISD::MUL: - if (MVT::isVector(Op.getValueType())) - return LowerVectorMUL(Op, DAG); - else if (Op.getValueType() == MVT::i8) - return LowerI8Math(Op, DAG, Op.getOpcode()); - else - return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode()); - - case ISD::FDIV: - if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32) - return LowerFDIVf32(Op, DAG); -// else if (Op.getValueType() == MVT::f64) -// return LowerFDIVf64(Op, DAG); - else - assert(0 && "Calling FDIV on unsupported MVT"); + if (VT == MVT::i8) + return LowerI8Math(Op, DAG, Opc, *this); case ISD::CTPOP: return LowerCTPOP(Op, DAG); + + case ISD::SELECT_CC: + return LowerSELECT_CC(Op, DAG, *this); + + case ISD::SETCC: + return LowerSETCC(Op, DAG, *this); + + case ISD::TRUNCATE: + return LowerTRUNCATE(Op, DAG); + + case ISD::SIGN_EXTEND: + return LowerSIGN_EXTEND(Op, DAG); } - return SDOperand(); + return SDValue(); } -//===----------------------------------------------------------------------===// -// Other Lowering Code -//===----------------------------------------------------------------------===// - -MachineBasicBlock * -SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, - MachineBasicBlock *BB) +void SPUTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl&Results, + SelectionDAG &DAG) const { - return BB; +#if 0 + unsigned Opc = (unsigned) N->getOpcode(); + EVT OpVT = N->getValueType(0); + + switch (Opc) { + default: { + errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n"; + errs() << "Op.getOpcode() = " << Opc << "\n"; + errs() << "*Op.getNode():\n"; + N->dump(); + abort(); + /*NOTREACHED*/ + } + } +#endif + + /* Otherwise, return unchanged */ } //===----------------------------------------------------------------------===// // Target Optimization Hooks //===----------------------------------------------------------------------===// -SDOperand +SDValue SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { #if 0 @@ -2558,79 +2899,181 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const #endif const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); SelectionDAG &DAG = DCI.DAG; - SDOperand N0 = N->getOperand(0); // everything has at least one operand + SDValue Op0 = N->getOperand(0); // everything has at least one operand + EVT NodeVT = N->getValueType(0); // The node's value type + EVT Op0VT = Op0.getValueType(); // The first operand's result + SDValue Result; // Initially, empty result + DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { default: break; + case ISD::ADD: { + SDValue Op1 = N->getOperand(1); + + if (Op0.getOpcode() == SPUISD::IndirectAddr + || Op1.getOpcode() == SPUISD::IndirectAddr) { + // Normalize the operands to reduce repeated code + SDValue IndirectArg = Op0, AddArg = Op1; + + if (Op1.getOpcode() == SPUISD::IndirectAddr) { + IndirectArg = Op1; + AddArg = Op0; + } + + if (isa(AddArg)) { + ConstantSDNode *CN0 = cast (AddArg); + SDValue IndOp1 = IndirectArg.getOperand(1); + + if (CN0->isNullValue()) { + // (add (SPUindirect , ), 0) -> + // (SPUindirect , ) + +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + errs() << "\n" + << "Replace: (add (SPUindirect , ), 0)\n" + << "With: (SPUindirect , )\n"; + } +#endif + + return IndirectArg; + } else if (isa(IndOp1)) { + // (add (SPUindirect , ), ) -> + // (SPUindirect , ) + ConstantSDNode *CN1 = cast (IndOp1); + int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue(); + SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT); + +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + errs() << "\n" + << "Replace: (add (SPUindirect , " << CN1->getSExtValue() + << "), " << CN0->getSExtValue() << ")\n" + << "With: (SPUindirect , " + << combinedConst << ")\n"; + } +#endif + + return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, + IndirectArg, combinedValue); + } + } + } + break; + } + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: { + if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) { + // (any_extend (SPUextract_elt0 )) -> + // (SPUextract_elt0 ) + // Types must match, however... +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + errs() << "\nReplace: "; + N->dump(&DAG); + errs() << "\nWith: "; + Op0.getNode()->dump(&DAG); + errs() << "\n"; + } +#endif + + return Op0; + } + break; + } case SPUISD::IndirectAddr: { - if (!ST->usingLargeMem() && N0.getOpcode() == SPUISD::AFormAddr) { - ConstantSDNode *CN = cast(N->getOperand(1)); - if (CN->getValue() == 0) { + if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { + ConstantSDNode *CN = dyn_cast(N->getOperand(1)); + if (CN != 0 && CN->isNullValue()) { // (SPUindirect (SPUaform , 0), 0) -> // (SPUaform , 0) - DEBUG(cerr << "Replace: "); + DEBUG(errs() << "Replace: "); DEBUG(N->dump(&DAG)); - DEBUG(cerr << "\nWith: "); - DEBUG(N0.Val->dump(&DAG)); - DEBUG(cerr << "\n"); + DEBUG(errs() << "\nWith: "); + DEBUG(Op0.getNode()->dump(&DAG)); + DEBUG(errs() << "\n"); - return N0; + return Op0; + } + } else if (Op0.getOpcode() == ISD::ADD) { + SDValue Op1 = N->getOperand(1); + if (ConstantSDNode *CN1 = dyn_cast(Op1)) { + // (SPUindirect (add , ), 0) -> + // (SPUindirect , ) + if (CN1->isNullValue()) { + +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + errs() << "\n" + << "Replace: (SPUindirect (add , ), 0)\n" + << "With: (SPUindirect , )\n"; + } +#endif + + return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, + Op0.getOperand(0), Op0.getOperand(1)); + } } } + break; } - case ISD::ADD: { - SDOperand Op0 = N->getOperand(0); - SDOperand Op1 = N->getOperand(1); - - if ((Op1.getOpcode() == ISD::Constant - || Op1.getOpcode() == ISD::TargetConstant) - && Op0.getOpcode() == SPUISD::IndirectAddr) { - SDOperand Op01 = Op0.getOperand(1); - if (Op01.getOpcode() == ISD::Constant - || Op01.getOpcode() == ISD::TargetConstant) { - // (add , (SPUindirect , )) -> - // (SPUindirect , ) - ConstantSDNode *CN0 = cast(Op1); - ConstantSDNode *CN1 = cast(Op01); - SDOperand combinedConst = - DAG.getConstant(CN0->getValue() + CN1->getValue(), - Op0.getValueType()); - - DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", " - << "(SPUindirect , " << CN1->getValue() << "))\n"); - DEBUG(cerr << "With: (SPUindirect , " - << CN0->getValue() + CN1->getValue() << ")\n"); - return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(), - Op0.getOperand(0), combinedConst); + case SPUISD::SHL_BITS: + case SPUISD::SHL_BYTES: + case SPUISD::ROTBYTES_LEFT: { + SDValue Op1 = N->getOperand(1); + + // Kill degenerate vector shifts: + if (ConstantSDNode *CN = dyn_cast(Op1)) { + if (CN->isNullValue()) { + Result = Op0; } - } else if ((Op0.getOpcode() == ISD::Constant - || Op0.getOpcode() == ISD::TargetConstant) - && Op1.getOpcode() == SPUISD::IndirectAddr) { - SDOperand Op11 = Op1.getOperand(1); - if (Op11.getOpcode() == ISD::Constant - || Op11.getOpcode() == ISD::TargetConstant) { - // (add (SPUindirect , ), ) -> - // (SPUindirect , ) - ConstantSDNode *CN0 = cast(Op0); - ConstantSDNode *CN1 = cast(Op11); - SDOperand combinedConst = - DAG.getConstant(CN0->getValue() + CN1->getValue(), - Op0.getValueType()); - - DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", " - << "(SPUindirect , " << CN1->getValue() << "))\n"); - DEBUG(cerr << "With: (SPUindirect , " - << CN0->getValue() + CN1->getValue() << ")\n"); - - return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(), - Op1.getOperand(0), combinedConst); + } + break; + } + case SPUISD::PREFSLOT2VEC: { + switch (Op0.getOpcode()) { + default: + break; + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: { + // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot ))) -> + // + // but only if the SPUprefslot2vec and types match. + SDValue Op00 = Op0.getOperand(0); + if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) { + SDValue Op000 = Op00.getOperand(0); + if (Op000.getValueType() == NodeVT) { + Result = Op000; + } } + break; } + case SPUISD::VEC2PREFSLOT: { + // (SPUprefslot2vec (SPUvec2prefslot )) -> + // + Result = Op0.getOperand(0); + break; + } + } + break; } } + // Otherwise, return unchanged. - return SDOperand(); +#ifndef NDEBUG + if (Result.getNode()) { + DEBUG(errs() << "\nReplace.SPU: "); + DEBUG(N->dump(&DAG)); + DEBUG(errs() << "\nWith: "); + DEBUG(Result.getNode()->dump(&DAG)); + DEBUG(errs() << "\n"); + } +#endif + + return Result; } //===----------------------------------------------------------------------===// @@ -2639,7 +3082,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. -SPUTargetLowering::ConstraintType +SPUTargetLowering::ConstraintType SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const { if (ConstraintLetter.size() == 1) { switch (ConstraintLetter[0]) { @@ -2650,14 +3093,46 @@ SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const case 'v': case 'y': return C_RegisterClass; - } + } } return TargetLowering::getConstraintType(ConstraintLetter); } -std::pair +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight +SPUTargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (CallOperandVal == NULL) + return CW_Default; + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + //FIXME: Seems like the supported constraint letters were just copied + // from PPC, as the following doesn't correspond to the GCC docs. + // I'm leaving it so until someone adds the corresponding lowering support. + case 'b': + case 'r': + case 'f': + case 'd': + case 'v': + case 'y': + weight = CW_Register; + break; + } + return weight; +} + +std::pair SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - MVT::ValueType VT) const + EVT VT) const { if (Constraint.size() == 1) { // GCC RS6000 Constraint Letters @@ -2673,42 +3148,115 @@ SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, else if (VT == MVT::f64) return std::make_pair(0U, SPU::R64FPRegisterClass); break; - case 'v': + case 'v': return std::make_pair(0U, SPU::GPRCRegisterClass); } } - + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } +//! Compute used/known bits for a SPU operand void -SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, - uint64_t Mask, - uint64_t &KnownZero, - uint64_t &KnownOne, - const SelectionDAG &DAG, - unsigned Depth ) const { - KnownZero = 0; - KnownOne = 0; +SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth ) const { +#if 0 + const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT; + + switch (Op.getOpcode()) { + default: + // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + break; + case CALL: + case SHUFB: + case SHUFFLE_MASK: + case CNTB: + case SPUISD::PREFSLOT2VEC: + case SPUISD::LDRESULT: + case SPUISD::VEC2PREFSLOT: + case SPUISD::SHLQUAD_L_BITS: + case SPUISD::SHLQUAD_L_BYTES: + case SPUISD::VEC_ROTL: + case SPUISD::VEC_ROTR: + case SPUISD::ROTBYTES_LEFT: + case SPUISD::SELECT_MASK: + case SPUISD::SELB: + } +#endif +} + +unsigned +SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, + unsigned Depth) const { + switch (Op.getOpcode()) { + default: + return 1; + + case ISD::SETCC: { + EVT VT = Op.getValueType(); + + if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) { + VT = MVT::i32; + } + return VT.getSizeInBits(); + } + } } // LowerAsmOperandForConstraint void -SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op, +SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - std::vector &Ops, - SelectionDAG &DAG) { + std::vector &Ops, + SelectionDAG &DAG) const { // Default, for the time being, to the base class handler TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG); } /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode. -bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const { +bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, + const Type *Ty) const { // SPU's addresses are 256K: return (V > -(1 << 18) && V < (1 << 18) - 1); } bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { - return false; + return false; +} + +bool +SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // The SPU target isn't yet aware of offsets. + return false; } + +// can we compare to Imm without writing it into a register? +bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + //ceqi, cgti, etc. all take s10 operand + return isInt<10>(Imm); +} + +bool +SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM, + const Type * ) const{ + + // A-form: 18bit absolute address. + if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0) + return true; + + // D-form: reg + 14bit offset + if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs)) + return true; + + // X-form: reg+reg + if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0) + return true; + + return false; +} +