-//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
//
+//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
#include "SPUISelLowering.h"
#include "SPUTargetMachine.h"
#include "SPUFrameInfo.h"
-#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/VectorExtras.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetOptions.h"
-
+#include "llvm/Support/raw_ostream.h"
#include <map>
using namespace llvm;
namespace {
std::map<unsigned, const char *> node_names;
- //! MVT mapping to useful data for Cell SPU
+ //! EVT mapping to useful data for Cell SPU
struct valtype_map_s {
- const MVT valtype;
- const int prefslot_byte;
+ EVT valtype;
+ int prefslot_byte;
};
const valtype_map_s valtype_map[] = {
const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
- const valtype_map_s *getValueTypeMapEntry(MVT VT) {
+ const valtype_map_s *getValueTypeMapEntry(EVT VT) {
const valtype_map_s *retval = 0;
for (size_t i = 0; i < n_valtype_map; ++i) {
#ifndef NDEBUG
if (retval == 0) {
- cerr << "getValueTypeMapEntry returns NULL for "
- << VT.getMVTString()
- << "\n";
- abort();
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "getValueTypeMapEntry returns NULL for "
+ << VT.getEVTString();
+ llvm_report_error(Msg.str());
}
#endif
return retval;
}
- //! Predicate that returns true if operand is a memory target
+ //! Expand a library call into an actual call DAG node
/*!
- \arg Op Operand to test
- \return true if the operand is a memory target (i.e., global
- address, external symbol, constant pool) or an A-form
- address.
+ \note
+ This code is taken from SelectionDAGLegalize, since it is not exposed as
+ part of the LLVM SelectionDAG API.
*/
- bool isMemoryOperand(const SDValue &Op)
- {
- const unsigned Opc = Op.getOpcode();
- return (Opc == ISD::GlobalAddress
- || Opc == ISD::GlobalTLSAddress
- || Opc == ISD::JumpTable
- || Opc == ISD::ConstantPool
- || Opc == ISD::ExternalSymbol
- || Opc == ISD::TargetGlobalAddress
- || Opc == ISD::TargetGlobalTLSAddress
- || Opc == ISD::TargetJumpTable
- || Opc == ISD::TargetConstantPool
- || Opc == ISD::TargetExternalSymbol
- || Opc == SPUISD::AFormAddr);
- }
-
- //! Predicate that returns true if the operand is an indirect target
- bool isIndirectOperand(const SDValue &Op)
- {
- const unsigned Opc = Op.getOpcode();
- return (Opc == ISD::Register
- || Opc == SPUISD::LDRESULT);
+
+ SDValue
+ ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
+ bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Op.getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op.getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ const Type *RetTy =
+ Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), false,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Op.getDebugLoc(),
+ DAG.GetOrdering(InChain.getNode()));
+
+ return CallInfo.first;
}
}
SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
- : TargetLowering(TM),
- SPUTM(TM)
-{
+ : TargetLowering(TM, new TargetLoweringObjectFileELF()),
+ SPUTM(TM) {
// Fold away setcc operations if possible.
setPow2DivIsCheap();
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(true);
+ // Set RTLIB libcall names as used by SPU:
+ setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
+
// Set up the SPU's register classes:
addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setTruncStoreAction(MVT::i8, MVT::i1, Custom);
- setTruncStoreAction(MVT::i16, MVT::i1, Custom);
- setTruncStoreAction(MVT::i32, MVT::i1, Custom);
- setTruncStoreAction(MVT::i64, MVT::i1, Custom);
- setTruncStoreAction(MVT::i128, MVT::i1, Custom);
-
- setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
- setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
- setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
- setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
- setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
- setTruncStoreAction(MVT::i128, MVT::i8, Custom);
-
- setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
+
+ setTruncStoreAction(MVT::i128, MVT::i64, Expand);
+ setTruncStoreAction(MVT::i128, MVT::i32, Expand);
+ setTruncStoreAction(MVT::i128, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i128, MVT::i8, Expand);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// SPU constant load actions are custom lowered:
- setOperationAction(ISD::Constant, MVT::i64, Custom);
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
// SPU's loads and stores have to be custom lowered:
- for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
+ for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
++sctype) {
- MVT VT = (MVT::SimpleValueType)sctype;
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
+
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
+ MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
+ setTruncStoreAction(VT, StoreVT, Expand);
+ }
}
- // Custom lower BRCOND for i1, i8 to "promote" the result to
- // i32 and i16, respectively.
- setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
+ ++sctype) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
+
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
+ MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
+ setTruncStoreAction(VT, StoreVT, Expand);
+ }
+ }
// Expand the jumptable branches
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+
+ // Custom lower SELECT_CC for most cases, but expand by default
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
// SPU has no intrinsics for these particular operations:
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
- // PowerPC has no SREM/UREM instructions
- setOperationAction(ISD::SREM, MVT::i32, Expand);
- setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::SREM, MVT::i64, Expand);
- setOperationAction(ISD::UREM, MVT::i64, Expand);
+ // SPU has no division/remainder instructions
+ setOperationAction(ISD::SREM, MVT::i8, Expand);
+ setOperationAction(ISD::UREM, MVT::i8, Expand);
+ setOperationAction(ISD::SDIV, MVT::i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::i8, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
+ setOperationAction(ISD::SREM, MVT::i16, Expand);
+ setOperationAction(ISD::UREM, MVT::i16, Expand);
+ setOperationAction(ISD::SDIV, MVT::i16, Expand);
+ setOperationAction(ISD::UDIV, MVT::i16, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIV, MVT::i64, Expand);
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::SREM, MVT::i128, Expand);
+ setOperationAction(ISD::UREM, MVT::i128, Expand);
+ setOperationAction(ISD::SDIV, MVT::i128, Expand);
+ setOperationAction(ISD::UDIV, MVT::i128, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
// We don't support sin/cos/sqrt/fmod
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
- // If we're enabling GP optimizations, use hardware square root
+ // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
+ // for f32!)
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
setOperationAction(ISD::ROTL, MVT::i32, Legal);
setOperationAction(ISD::ROTL, MVT::i16, Legal);
setOperationAction(ISD::ROTL, MVT::i8, Custom);
+
// SPU has no native version of shift left/right for i8
setOperationAction(ISD::SHL, MVT::i8, Custom);
setOperationAction(ISD::SRL, MVT::i8, Custom);
setOperationAction(ISD::SRA, MVT::i8, Custom);
- // And SPU needs custom lowering for shift left/right for i64
- setOperationAction(ISD::SHL, MVT::i64, Custom);
- setOperationAction(ISD::SRL, MVT::i64, Custom);
- setOperationAction(ISD::SRA, MVT::i64, Custom);
+
+ // Make these operations legal and handle them during instruction selection:
+ setOperationAction(ISD::SHL, MVT::i64, Legal);
+ setOperationAction(ISD::SRL, MVT::i64, Legal);
+ setOperationAction(ISD::SRA, MVT::i64, Legal);
// Custom lower i8, i32 and i64 multiplications
setOperationAction(ISD::MUL, MVT::i8, Custom);
- setOperationAction(ISD::MUL, MVT::i32, Custom);
- setOperationAction(ISD::MUL, MVT::i64, Custom);
+ setOperationAction(ISD::MUL, MVT::i32, Legal);
+ setOperationAction(ISD::MUL, MVT::i64, Legal);
+
+ // Expand double-width multiplication
+ // FIXME: It would probably be reasonable to support some of these operations
+ setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::MULHU, MVT::i8, Expand);
+ setOperationAction(ISD::MULHS, MVT::i8, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::MULHU, MVT::i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::i16, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
// Need to custom handle (some) common i8, i64 math ops
- setOperationAction(ISD::ADD, MVT::i64, Custom);
+ setOperationAction(ISD::ADD, MVT::i8, Custom);
+ setOperationAction(ISD::ADD, MVT::i64, Legal);
setOperationAction(ISD::SUB, MVT::i8, Custom);
- setOperationAction(ISD::SUB, MVT::i64, Custom);
+ setOperationAction(ISD::SUB, MVT::i64, Legal);
// SPU does not have BSWAP. It does have i32 support CTLZ.
// CTPOP has to be custom lowered.
setOperationAction(ISD::CTPOP, MVT::i16, Custom);
setOperationAction(ISD::CTPOP, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i64, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i128, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i8, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i16, Expand);
setOperationAction(ISD::CTTZ , MVT::i32, Expand);
setOperationAction(ISD::CTTZ , MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i128, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i8, Promote);
+ setOperationAction(ISD::CTLZ , MVT::i16, Promote);
setOperationAction(ISD::CTLZ , MVT::i32, Legal);
+ setOperationAction(ISD::CTLZ , MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i128, Expand);
// SPU has a version of select that implements (a&~c)|(b&c), just like
// select ought to work:
- setOperationAction(ISD::SELECT, MVT::i1, Promote);
setOperationAction(ISD::SELECT, MVT::i8, Legal);
setOperationAction(ISD::SELECT, MVT::i16, Legal);
setOperationAction(ISD::SELECT, MVT::i32, Legal);
- setOperationAction(ISD::SELECT, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::i64, Legal);
- setOperationAction(ISD::SETCC, MVT::i1, Promote);
setOperationAction(ISD::SETCC, MVT::i8, Legal);
setOperationAction(ISD::SETCC, MVT::i16, Legal);
setOperationAction(ISD::SETCC, MVT::i32, Legal);
- setOperationAction(ISD::SETCC, MVT::i64, Expand);
-
- // Zero extension and sign extension for i64 have to be
- // custom legalized
- setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
- setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
-
- // SPU has a legal FP -> signed INT instruction
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::SETCC, MVT::i64, Legal);
+ setOperationAction(ISD::SETCC, MVT::f64, Custom);
+
+ // Custom lower i128 -> i64 truncates
+ setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
+
+ // Custom lower i32/i64 -> i128 sign extend
+ setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+ // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
+ // to expand to a libcall, hence the custom lowering:
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
// FDIV on SPU requires custom lowering
- setOperationAction(ISD::FDIV, MVT::f32, Custom);
- //setOperationAction(ISD::FDIV, MVT::f64, Custom);
+ setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
- // SPU has [U|S]INT_TO_FP
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
+ // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- // Support label based line numbers.
- setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
- setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
-
// We want to legalize GlobalAddress and ConstantPool nodes into the
// appropriate instructions to materialize the address.
- for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
+ for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
++sctype) {
- MVT VT = (MVT::SimpleValueType)sctype;
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
- setOperationAction(ISD::GlobalAddress, VT, Custom);
- setOperationAction(ISD::ConstantPool, VT, Custom);
- setOperationAction(ISD::JumpTable, VT, Custom);
+ setOperationAction(ISD::GlobalAddress, VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::JumpTable, VT, Custom);
}
- // RET must be custom lowered, to meet ABI requirements
- setOperationAction(ISD::RET, MVT::Other, Custom);
-
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
+ // "Odd size" vector classes that we're willing to support:
+ addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
+
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
// add/sub are legal for all supported vector VT's.
- setOperationAction(ISD::ADD , VT, Legal);
- setOperationAction(ISD::SUB , VT, Legal);
+ setOperationAction(ISD::ADD, VT, Legal);
+ setOperationAction(ISD::SUB, VT, Legal);
// mul has to be custom lowered.
- setOperationAction(ISD::MUL , VT, Custom);
+ setOperationAction(ISD::MUL, VT, Legal);
- setOperationAction(ISD::AND , VT, Legal);
- setOperationAction(ISD::OR , VT, Legal);
- setOperationAction(ISD::XOR , VT, Legal);
- setOperationAction(ISD::LOAD , VT, Legal);
- setOperationAction(ISD::SELECT, VT, Legal);
- setOperationAction(ISD::STORE, VT, Legal);
+ setOperationAction(ISD::AND, VT, Legal);
+ setOperationAction(ISD::OR, VT, Legal);
+ setOperationAction(ISD::XOR, VT, Legal);
+ setOperationAction(ISD::LOAD, VT, Legal);
+ setOperationAction(ISD::SELECT, VT, Legal);
+ setOperationAction(ISD::STORE, VT, Legal);
// These operations need to be expanded:
- setOperationAction(ISD::SDIV, VT, Expand);
- setOperationAction(ISD::SREM, VT, Expand);
- setOperationAction(ISD::UDIV, VT, Expand);
- setOperationAction(ISD::UREM, VT, Expand);
- setOperationAction(ISD::FDIV, VT, Custom);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
// Custom lower build_vector, constant pool spills, insert and
// extract vector elements:
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
}
- setOperationAction(ISD::MUL, MVT::v16i8, Custom);
setOperationAction(ISD::AND, MVT::v16i8, Custom);
setOperationAction(ISD::OR, MVT::v16i8, Custom);
setOperationAction(ISD::XOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+
setShiftAmountType(MVT::i32);
- setSetCCResultContents(ZeroOrOneSetCCResult);
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
setStackPointerRegisterToSaveRestore(SPU::R1);
setTargetDAGCombine(ISD::ANY_EXTEND);
computeRegisterProperties();
+
+ // Set pre-RA register scheduler default to BURR, which produces slightly
+ // better code than the default (could also be TDRR, but TargetLowering.h
+ // needs a mod to support that model):
+ setSchedulingPreference(SchedulingForRegPressure);
}
const char *
node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
- node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
+ node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
- node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
- node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
- node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
- = "SPUISD::EXTRACT_ELT0_CHAINED";
- node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
- node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
- node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
- node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
- node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
- node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
- node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
- node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
+ node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
+ node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
- node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
- node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
- node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
- node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
- "SPUISD::ROTQUAD_RZ_BYTES";
- node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
- "SPUISD::ROTQUAD_RZ_BITS";
- node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
- "SPUISD::ROTBYTES_RIGHT_S";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
- node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
- "SPUISD::ROTBYTES_LEFT_CHAINED";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
- "SPUISD::ROTBYTES_LEFT_BITS";
+ "SPUISD::ROTBYTES_LEFT_BITS";
node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
- node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
- node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
- node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
- node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
- node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
- node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
- node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
+ node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
+ node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
+ node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
}
std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
return ((i != node_names.end()) ? i->second : 0);
}
-MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
- MVT VT = Op.getValueType();
- return (VT.isInteger() ? VT : MVT(MVT::i32));
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
+ return 3;
+}
+
+//===----------------------------------------------------------------------===//
+// Return the Cell SPU's SETCC result type
+//===----------------------------------------------------------------------===//
+
+MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
+ // i16 and i32 are valid SETCC result types
+ return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
+ VT.getSimpleVT().SimpleTy :
+ MVT::i32);
}
//===----------------------------------------------------------------------===//
// LowerOperation implementation
//===----------------------------------------------------------------------===//
-/// Aligned load common code for CellSPU
-/*!
- \param[in] Op The SelectionDAG load or store operand
- \param[in] DAG The selection DAG
- \param[in] ST CellSPU subtarget information structure
- \param[in,out] alignment Caller initializes this to the load or store node's
- value from getAlignment(), may be updated while generating the aligned load
- \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
- offset (divisible by 16, modulo 16 == 0)
- \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
- offset of the preferred slot (modulo 16 != 0)
- \param[in,out] VT Caller initializes this value type to the the load or store
- node's loaded or stored value type; may be updated if an i1-extended load or
- store.
- \param[out] was16aligned true if the base pointer had 16-byte alignment,
- otherwise false. Can help to determine if the chunk needs to be rotated.
-
- Both load and store lowering load a block of data aligned on a 16-byte
- boundary. This is the common aligned load code shared between both.
- */
-static SDValue
-AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
- LSBaseSDNode *LSN,
- unsigned &alignment, int &alignOffs, int &prefSlotOffs,
- MVT &VT, bool &was16aligned)
-{
- MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- const valtype_map_s *vtm = getValueTypeMapEntry(VT);
- SDValue basePtr = LSN->getBasePtr();
- SDValue chain = LSN->getChain();
-
- if (basePtr.getOpcode() == ISD::ADD) {
- SDValue Op1 = basePtr.getNode()->getOperand(1);
-
- if (Op1.getOpcode() == ISD::Constant
- || Op1.getOpcode() == ISD::TargetConstant) {
- const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
-
- alignOffs = (int) CN->getZExtValue();
- prefSlotOffs = (int) (alignOffs & 0xf);
-
- // Adjust the rotation amount to ensure that the final result ends up in
- // the preferred slot:
- prefSlotOffs -= vtm->prefslot_byte;
- basePtr = basePtr.getOperand(0);
-
- // Loading from memory, can we adjust alignment?
- if (basePtr.getOpcode() == SPUISD::AFormAddr) {
- SDValue APtr = basePtr.getOperand(0);
- if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
- GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
- alignment = GSDN->getGlobal()->getAlignment();
- }
- }
- } else {
- alignOffs = 0;
- prefSlotOffs = -vtm->prefslot_byte;
- }
- } else if (basePtr.getOpcode() == ISD::FrameIndex) {
- FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
- alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
- prefSlotOffs = (int) (alignOffs & 0xf);
- prefSlotOffs -= vtm->prefslot_byte;
- basePtr = DAG.getRegister(SPU::R1, VT);
- } else {
- alignOffs = 0;
- prefSlotOffs = -vtm->prefslot_byte;
- }
-
- if (alignment == 16) {
- // Realign the base pointer as a D-Form address:
- if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
- basePtr = DAG.getNode(ISD::ADD, PtrVT,
- basePtr,
- DAG.getConstant((alignOffs & ~0xf), PtrVT));
- }
-
- // Emit the vector load:
- was16aligned = true;
- return DAG.getLoad(MVT::v16i8, chain, basePtr,
- LSN->getSrcValue(), LSN->getSrcValueOffset(),
- LSN->isVolatile(), 16);
- }
-
- // Unaligned load or we're using the "large memory" model, which means that
- // we have to be very pessimistic:
- if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
- DAG.getConstant(0, PtrVT));
- }
-
- // Add the offset
- basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
- DAG.getConstant((alignOffs & ~0xf), PtrVT));
- was16aligned = false;
- return DAG.getLoad(MVT::v16i8, chain, basePtr,
- LSN->getSrcValue(), LSN->getSrcValueOffset(),
- LSN->isVolatile(), 16);
-}
-
/// Custom lower loads for CellSPU
/*!
All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
within a 16-byte block, we have to rotate to extract the requested element.
- */
+
+ For extending loads, we also want to ensure that the following sequence is
+ emitted, e.g. for MVT::f32 extending load to MVT::f64:
+
+\verbatim
+%1 v16i8,ch = load
+%2 v16i8,ch = rotate %1
+%3 v4f8, ch = bitconvert %2
+%4 f32 = vec2perfslot %3
+%5 f64 = fp_extend %4
+\endverbatim
+*/
static SDValue
LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
LoadSDNode *LN = cast<LoadSDNode>(Op);
SDValue the_chain = LN->getChain();
- MVT VT = LN->getMemoryVT();
- MVT OpVT = Op.getNode()->getValueType(0);
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT InVT = LN->getMemoryVT();
+ EVT OutVT = Op.getValueType();
ISD::LoadExtType ExtType = LN->getExtensionType();
unsigned alignment = LN->getAlignment();
- SDValue Ops[8];
+ const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
+ DebugLoc dl = Op.getDebugLoc();
switch (LN->getAddressingMode()) {
case ISD::UNINDEXED: {
- int offset, rotamt;
- bool was16aligned;
- SDValue result =
- AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
-
- if (result.getNode() == 0)
- return result;
-
- the_chain = result.getValue(1);
- // Rotate the chunk if necessary
- if (rotamt < 0)
- rotamt += 16;
- if (rotamt != 0 || !was16aligned) {
- SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
-
- Ops[0] = the_chain;
- Ops[1] = result;
- if (was16aligned) {
- Ops[2] = DAG.getConstant(rotamt, MVT::i16);
+ SDValue result;
+ SDValue basePtr = LN->getBasePtr();
+ SDValue rotate;
+
+ if (alignment == 16) {
+ ConstantSDNode *CN;
+
+ // Special cases for a known aligned load to simplify the base pointer
+ // and the rotation amount:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+ int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
+
+ if (rotamt < 0)
+ rotamt += 16;
+
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ if ((offset & ~0xf) > 0) {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & ~0xf), PtrVT));
+ }
+ } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
+ || (basePtr.getOpcode() == SPUISD::IndirectAddr
+ && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
+ && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
+ // Plain aligned a-form address: rotate into preferred slot
+ // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
+ int64_t rotamt = -vtm->prefslot_byte;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getConstant(rotamt, MVT::i16);
} else {
- MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- LoadSDNode *LN1 = cast<LoadSDNode>(result);
- Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ int64_t rotamt = -vtm->prefslot_byte;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
DAG.getConstant(rotamt, PtrVT));
}
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
- result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
- the_chain = result.getValue(1);
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(-vtm->prefslot_byte, PtrVT));
}
- if (VT == OpVT || ExtType == ISD::EXTLOAD) {
- SDVTList scalarvts;
- MVT vecVT = MVT::v16i8;
-
- // Convert the loaded v16i8 vector to the appropriate vector type
- // specified by the operand:
- if (OpVT == VT) {
- if (VT != MVT::i1)
- vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
- } else
- vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
-
- Ops[0] = the_chain;
- Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
- scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
- result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
- the_chain = result.getValue(1);
- } else {
- // Handle the sign and zero-extending loads for i1 and i8:
- unsigned NewOpC;
+ // Re-emit as a v16i8 vector load
+ result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
+ LN->getSrcValue(), LN->getSrcValueOffset(),
+ LN->isVolatile(), 16);
- if (ExtType == ISD::SEXTLOAD) {
- NewOpC = (OpVT == MVT::i1
- ? SPUISD::EXTRACT_I1_SEXT
- : SPUISD::EXTRACT_I8_SEXT);
- } else {
- assert(ExtType == ISD::ZEXTLOAD);
- NewOpC = (OpVT == MVT::i1
- ? SPUISD::EXTRACT_I1_ZEXT
- : SPUISD::EXTRACT_I8_ZEXT);
- }
+ // Update the chain
+ the_chain = result.getValue(1);
- result = DAG.getNode(NewOpC, OpVT, result);
+ // Rotate into the preferred slot:
+ result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
+ result.getValue(0), rotate);
+
+ // Convert the loaded v16i8 vector to the appropriate vector type
+ // specified by the operand:
+ EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT, (128 / InVT.getSizeInBits()));
+ result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
+ DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
+
+ // Handle extending loads by extending the scalar result:
+ if (ExtType == ISD::SEXTLOAD) {
+ result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
+ } else if (ExtType == ISD::ZEXTLOAD) {
+ result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
+ } else if (ExtType == ISD::EXTLOAD) {
+ unsigned NewOpc = ISD::ANY_EXTEND;
+
+ if (OutVT.isFloatingPoint())
+ NewOpc = ISD::FP_EXTEND;
+
+ result = DAG.getNode(NewOpc, dl, OutVT, result);
}
- SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
+ SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
SDValue retops[2] = {
result,
the_chain
};
- result = DAG.getNode(SPUISD::LDRESULT, retvts,
+ result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
retops, sizeof(retops) / sizeof(retops[0]));
return result;
}
case ISD::POST_INC:
case ISD::POST_DEC:
case ISD::LAST_INDEXED_MODE:
- cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
+ {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
"UNINDEXED\n";
- cerr << (unsigned) LN->getAddressingMode() << "\n";
- abort();
- /*NOTREACHED*/
+ Msg << (unsigned) LN->getAddressingMode();
+ llvm_report_error(Msg.str());
+ /*NOTREACHED*/
+ }
}
return SDValue();
LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
StoreSDNode *SN = cast<StoreSDNode>(Op);
SDValue Value = SN->getValue();
- MVT VT = Value.getValueType();
- MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
- MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT VT = Value.getValueType();
+ EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
unsigned alignment = SN->getAlignment();
switch (SN->getAddressingMode()) {
case ISD::UNINDEXED: {
- int chunk_offset, slot_offset;
- bool was16aligned;
-
- // The vector type we really want to load from the 16-byte chunk, except
- // in the case of MVT::i1, which has to be v16i8.
- MVT vecVT, stVecVT = MVT::v16i8;
+ // The vector type we really want to load from the 16-byte chunk.
+ EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+ VT, (128 / VT.getSizeInBits()));
+
+ SDValue alignLoadVec;
+ SDValue basePtr = SN->getBasePtr();
+ SDValue the_chain = SN->getChain();
+ SDValue insertEltOffs;
+
+ if (alignment == 16) {
+ ConstantSDNode *CN;
+
+ // Special cases for a known aligned load to simplify the base pointer
+ // and insertion byte:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & 0xf), PtrVT));
+
+ if ((offset & ~0xf) > 0) {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & ~0xf), PtrVT));
+ }
+ } else {
+ // Otherwise, assume it's at byte 0 of basePtr
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
- if (StVT != MVT::i1)
- stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
- vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+ // Insertion point is solely determined by basePtr's contents
+ insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
- SDValue alignLoadVec =
- AlignedLoad(Op, DAG, ST, SN, alignment,
- chunk_offset, slot_offset, VT, was16aligned);
+ // Re-emit as a v16i8 vector load
+ alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
+ SN->getSrcValue(), SN->getSrcValueOffset(),
+ SN->isVolatile(), 16);
- if (alignLoadVec.getNode() == 0)
- return alignLoadVec;
+ // Update the chain
+ the_chain = alignLoadVec.getValue(1);
LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
- SDValue basePtr = LN->getBasePtr();
- SDValue the_chain = alignLoadVec.getValue(1);
SDValue theValue = SN->getValue();
SDValue result;
theValue = theValue.getOperand(0);
}
- chunk_offset &= 0xf;
-
- SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
- SDValue insertEltPtr;
- SDValue insertEltOp;
-
// If the base pointer is already a D-form address, then just create
// a new D-form address with a slot offset and the orignal base pointer.
// Otherwise generate a D-form address with the slot offset relative
// to the stack pointer, which is always aligned.
- DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
- DEBUG(basePtr.getNode()->dump(&DAG));
- DEBUG(cerr << "\n");
-
- if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
- (basePtr.getOpcode() == ISD::ADD
- && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
- insertEltPtr = basePtr;
- } else {
- insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
- }
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "CellSPU LowerSTORE: basePtr = ";
+ basePtr.getNode()->dump(&DAG);
+ errs() << "\n";
+ }
+#endif
+
+ SDValue insertEltOp =
+ DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
+ SDValue vectorizeOp =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
- insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
- result = DAG.getNode(SPUISD::SHUFB, vecVT,
- DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
- alignLoadVec,
- DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
+ result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
+ vectorizeOp, alignLoadVec,
+ DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::v4i32, insertEltOp));
- result = DAG.getStore(the_chain, result, basePtr,
+ result = DAG.getStore(the_chain, dl, result, basePtr,
LN->getSrcValue(), LN->getSrcValueOffset(),
LN->isVolatile(), LN->getAlignment());
+#if 0 && !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ const SDValue ¤tRoot = DAG.getRoot();
+
+ DAG.setRoot(result);
+ errs() << "------- CellSPU:LowerStore result:\n";
+ DAG.dump();
+ errs() << "-------\n";
+ DAG.setRoot(currentRoot);
+ }
+#endif
+
return result;
/*UNREACHED*/
}
case ISD::POST_INC:
case ISD::POST_DEC:
case ISD::LAST_INDEXED_MODE:
- cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
+ {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
"UNINDEXED\n";
- cerr << (unsigned) SN->getAddressingMode() << "\n";
- abort();
- /*NOTREACHED*/
+ Msg << (unsigned) SN->getAddressingMode();
+ llvm_report_error(Msg.str());
+ /*NOTREACHED*/
+ }
}
return SDValue();
}
-/// Generate the address of a constant pool entry.
+//! Generate the address of a constant pool entry.
static SDValue
LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
- MVT PtrVT = Op.getValueType();
+ EVT PtrVT = Op.getValueType();
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
Constant *C = CP->getConstVal();
SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
SDValue Zero = DAG.getConstant(0, PtrVT);
const TargetMachine &TM = DAG.getTarget();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
if (TM.getRelocationModel() == Reloc::Static) {
if (!ST->usingLargeMem()) {
// Just return the SDValue with the constant pool address in it.
- return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
+ return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
} else {
- SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
- SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
- return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
+ SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
+ SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
+ return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
}
}
- assert(0 &&
- "LowerConstantPool: Relocation model other than static"
- " not supported.");
+ llvm_unreachable("LowerConstantPool: Relocation model other than static"
+ " not supported.");
return SDValue();
}
+//! Alternate entry point for generating the address of a constant pool entry
+SDValue
+SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
+ return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
+}
+
static SDValue
LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
- MVT PtrVT = Op.getValueType();
+ EVT PtrVT = Op.getValueType();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
SDValue Zero = DAG.getConstant(0, PtrVT);
const TargetMachine &TM = DAG.getTarget();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
if (TM.getRelocationModel() == Reloc::Static) {
if (!ST->usingLargeMem()) {
- return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
+ return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
} else {
- SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
- SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
- return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
+ SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
+ SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
+ return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
}
}
- assert(0 &&
- "LowerJumpTable: Relocation model other than static not supported.");
+ llvm_unreachable("LowerJumpTable: Relocation model other than static"
+ " not supported.");
return SDValue();
}
static SDValue
LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
- MVT PtrVT = Op.getValueType();
+ EVT PtrVT = Op.getValueType();
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
GlobalValue *GV = GSDN->getGlobal();
SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
const TargetMachine &TM = DAG.getTarget();
SDValue Zero = DAG.getConstant(0, PtrVT);
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
if (TM.getRelocationModel() == Reloc::Static) {
if (!ST->usingLargeMem()) {
- return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
+ return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
} else {
- SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
- SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
- return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
+ SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
+ SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
+ return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
}
} else {
- cerr << "LowerGlobalAddress: Relocation model other than static not "
- << "supported.\n";
- abort();
- /*NOTREACHED*/
- }
-
- return SDValue();
-}
-
-//! Custom lower i64 integer constants
-/*!
- This code inserts all of the necessary juggling that needs to occur to load
- a 64-bit constant into a register.
- */
-static SDValue
-LowerConstant(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getValueType();
- ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
-
- if (VT == MVT::i64) {
- SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
- return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
- } else {
- cerr << "LowerConstant: unhandled constant type "
- << VT.getMVTString()
- << "\n";
- abort();
+ llvm_report_error("LowerGlobalAddress: Relocation model other than static"
+ "not supported.");
/*NOTREACHED*/
}
//! Custom lower double precision floating point constants
static SDValue
LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getValueType();
- ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
-
- assert((FP != 0) &&
- "LowerConstantFP: Node is not ConstantFPSDNode");
+ EVT VT = Op.getValueType();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
if (VT == MVT::f64) {
+ ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
+
+ assert((FP != 0) &&
+ "LowerConstantFP: Node is not ConstantFPSDNode");
+
uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
- return DAG.getNode(ISD::BIT_CONVERT, VT,
- LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
+ SDValue T = DAG.getConstant(dbits, MVT::i64);
+ SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
}
return SDValue();
}
-//! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
-static SDValue
-LowerBRCOND(SDValue Op, SelectionDAG &DAG)
-{
- SDValue Cond = Op.getOperand(1);
- MVT CondVT = Cond.getValueType();
- MVT CondNVT;
-
- if (CondVT == MVT::i1 || CondVT == MVT::i8) {
- CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
- return DAG.getNode(ISD::BRCOND, Op.getValueType(),
- Op.getOperand(0),
- DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
- Op.getOperand(2));
- } else
- return SDValue(); // Unchanged
-}
+SDValue
+SPUTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) {
-static SDValue
-LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
-{
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- SmallVector<SDValue, 48> ArgValues;
- SDValue Root = Op.getOperand(0);
- bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
unsigned ArgRegIdx = 0;
unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
- MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Add DAG nodes to load the arguments or copy them out of registers.
- for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
- ArgNo != e; ++ArgNo) {
- MVT ObjectVT = Op.getValue(ArgNo).getValueType();
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+ EVT ObjectVT = Ins[ArgNo].VT;
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
SDValue ArgVal;
if (ArgRegIdx < NumArgRegs) {
const TargetRegisterClass *ArgRegClass;
- switch (ObjectVT.getSimpleVT()) {
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
default: {
- cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
- << ObjectVT.getMVTString()
- << "\n";
- abort();
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "LowerFormalArguments Unhandled argument type: "
+ << ObjectVT.getEVTString();
+ llvm_report_error(Msg.str());
}
case MVT::i8:
- ArgRegClass = &SPU::R8CRegClass;
- break;
+ ArgRegClass = &SPU::R8CRegClass;
+ break;
case MVT::i16:
- ArgRegClass = &SPU::R16CRegClass;
- break;
+ ArgRegClass = &SPU::R16CRegClass;
+ break;
case MVT::i32:
- ArgRegClass = &SPU::R32CRegClass;
- break;
+ ArgRegClass = &SPU::R32CRegClass;
+ break;
case MVT::i64:
- ArgRegClass = &SPU::R64CRegClass;
- break;
+ ArgRegClass = &SPU::R64CRegClass;
+ break;
+ case MVT::i128:
+ ArgRegClass = &SPU::GPRCRegClass;
+ break;
case MVT::f32:
- ArgRegClass = &SPU::R32FPRegClass;
- break;
+ ArgRegClass = &SPU::R32FPRegClass;
+ break;
case MVT::f64:
- ArgRegClass = &SPU::R64FPRegClass;
- break;
+ ArgRegClass = &SPU::R64FPRegClass;
+ break;
case MVT::v2f64:
case MVT::v4f32:
case MVT::v2i64:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
- ArgRegClass = &SPU::VECREGRegClass;
- break;
+ ArgRegClass = &SPU::VECREGRegClass;
+ break;
}
unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
- ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++ArgRegIdx;
} else {
// We need to load the argument to a virtual register if we determined
// above that we ran out of physical registers of the appropriate type
// or we're forced to do vararg
- int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
+ int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
ArgOffset += StackSlotSize;
}
- ArgValues.push_back(ArgVal);
+ InVals.push_back(ArgVal);
// Update the chain
- Root = ArgVal.getOperand(0);
+ Chain = ArgVal.getOperand(0);
}
// vararg handling:
// Create the frame slot
for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
- VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
+ VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
+ true, false);
SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
- SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
- Root = Store.getOperand(0);
+ SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
+ Chain = Store.getOperand(0);
MemOps.push_back(Store);
// Increment address by stack slot size for the next stored argument
ArgOffset += StackSlotSize;
}
if (!MemOps.empty())
- Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
}
- ArgValues.push_back(Root);
-
- // Return the new list of results.
- return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
- ArgValues.size());
+ return Chain;
}
/// isLSAAddress - Return the immediate to use if the specified
/// value is representable as a LSA address.
static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
- ConstantSDNode *C = cast<ConstantSDNode>(Op);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
if (!C) return 0;
int Addr = C->getZExtValue();
return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
}
-static
SDValue
-LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
- CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
- SDValue Chain = TheCall->getChain();
- SDValue Callee = TheCall->getCallee();
- unsigned NumOps = TheCall->getNumArgs();
+SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const Type *RetTy,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) {
+ // CellSPU target does not yet support tail call optimization.
+ isTailCall = false;
+
+ const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
+ unsigned NumOps = Outs.size();
unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
// Handy pointer type
- MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-
- // Accumulate how many bytes are to be pushed on the stack, including the
- // linkage area, and parameter passing area. According to the SPU ABI,
- // we minimally need space for [LR] and [SP]
- unsigned NumStackBytes = SPUFrameInfo::minStackSize();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Set up a copy of the stack pointer for use loading and storing any
// arguments that may not fit in the registers available for argument
SmallVector<SDValue, 8> MemOpChains;
for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = TheCall->getArg(i);
+ SDValue Arg = Outs[i].Val;
// PtrOff will be used to store the current argument to the stack if a
// register cannot be found for it.
SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
- PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
- switch (Arg.getValueType().getSimpleVT()) {
- default: assert(0 && "Unexpected ValueType for argument!");
+ switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i8:
+ case MVT::i16:
case MVT::i32:
case MVT::i64:
case MVT::i128:
if (ArgRegIdx != NumArgRegs) {
RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
} else {
- MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
ArgOffset += StackSlotSize;
}
break;
if (ArgRegIdx != NumArgRegs) {
RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
} else {
- MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
ArgOffset += StackSlotSize;
}
break;
+ case MVT::v2i64:
+ case MVT::v2f64:
case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
if (ArgRegIdx != NumArgRegs) {
RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
} else {
- MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
ArgOffset += StackSlotSize;
}
break;
}
}
- // Update number of stack bytes actually used, insert a call sequence start
- NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
+ // Accumulate how many bytes are to be pushed on the stack, including the
+ // linkage area, and parameter passing area. According to the SPU ABI,
+ // we minimally need space for [LR] and [SP].
+ unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
+
+ // Insert a call sequence start
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
true));
if (!MemOpChains.empty()) {
// Adjust the stack pointer for the stack arguments.
- Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
}
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
- InFlag);
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee)) {
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
GlobalValue *GV = G->getGlobal();
- MVT CalleeVT = Callee.getValueType();
+ EVT CalleeVT = Callee.getValueType();
SDValue Zero = DAG.getConstant(0, PtrVT);
SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
// This may be an unsafe assumption for JIT and really large compilation
// units.
if (GV->isDeclaration()) {
- Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
+ Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
} else {
- Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
+ Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
}
} else {
// "Large memory" mode: Turn all calls into indirect calls with a X-form
// address pairs:
- Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
+ Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
+ }
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ EVT CalleeVT = Callee.getValueType();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
+ Callee.getValueType());
+
+ if (!ST->usingLargeMem()) {
+ Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
+ } else {
+ Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
}
- } else if (ExternalSymbolSDNode *S = cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
- else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
+ } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
// If this is an absolute destination address that appears to be a legal
// local store address, use the munged value.
Callee = SDValue(Dest, 0);
if (InFlag.getNode())
Ops.push_back(InFlag);
// Returns a chain and a flag for retval copy to use.
- Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
+ Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
&Ops[0], Ops.size());
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
DAG.getIntPtrConstant(0, true), InFlag);
- if (TheCall->getValueType(0) != MVT::Other)
+ if (!Ins.empty())
InFlag = Chain.getValue(1);
- SDValue ResultVals[3];
- unsigned NumResults = 0;
+ // If the function returns void, just return the chain.
+ if (Ins.empty())
+ return Chain;
// If the call has results, copy the values out of the ret val registers.
- switch (TheCall->getValueType(0).getSimpleVT()) {
- default: assert(0 && "Unexpected ret value!");
+ switch (Ins[0].VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected ret value!");
case MVT::Other: break;
case MVT::i32:
- if (TheCall->getValueType(1) == MVT::i32) {
- Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
- ResultVals[0] = Chain.getValue(0);
- Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
+ if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
+ MVT::i32, InFlag).getValue(1);
+ InVals.push_back(Chain.getValue(0));
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
Chain.getValue(2)).getValue(1);
- ResultVals[1] = Chain.getValue(0);
- NumResults = 2;
+ InVals.push_back(Chain.getValue(0));
} else {
- Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
- ResultVals[0] = Chain.getValue(0);
- NumResults = 1;
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
+ InFlag).getValue(1);
+ InVals.push_back(Chain.getValue(0));
}
break;
case MVT::i64:
- Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
- ResultVals[0] = Chain.getValue(0);
- NumResults = 1;
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
+ InFlag).getValue(1);
+ InVals.push_back(Chain.getValue(0));
+ break;
+ case MVT::i128:
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
+ InFlag).getValue(1);
+ InVals.push_back(Chain.getValue(0));
break;
case MVT::f32:
case MVT::f64:
- Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
InFlag).getValue(1);
- ResultVals[0] = Chain.getValue(0);
- NumResults = 1;
+ InVals.push_back(Chain.getValue(0));
break;
case MVT::v2f64:
+ case MVT::v2i64:
case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
- Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
InFlag).getValue(1);
- ResultVals[0] = Chain.getValue(0);
- NumResults = 1;
+ InVals.push_back(Chain.getValue(0));
break;
}
- // If the function returns void, just return the chain.
- if (NumResults == 0)
- return Chain;
-
- // Otherwise, merge everything together with a MERGE_VALUES node.
- ResultVals[NumResults++] = Chain;
- SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
- return Res.getValue(Op.getResNo());
+ return Chain;
}
-static SDValue
-LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
+SDValue
+SPUTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ DebugLoc dl, SelectionDAG &DAG) {
+
SmallVector<CCValAssign, 16> RVLocs;
- unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
- bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
- CCState CCInfo(CC, isVarArg, TM, RVLocs);
- CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
// If this is the first return lowered for this function, add the regs to the
// liveout set for the function.
DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
}
- SDValue Chain = Op.getOperand(0);
SDValue Flag;
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Outs[i].Val, Flag);
Flag = Chain.getValue(1);
}
if (Flag.getNode())
- return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
+ return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
else
- return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
+ return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
}
}
if (OpVal.getNode() != 0) {
- if (ConstantSDNode *CN = cast<ConstantSDNode>(OpVal)) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
return CN;
}
}
- return 0; // All UNDEF: use implicit def.; not Constant node
+ return 0;
}
/// get_vec_i18imm - Test if this vector is a vector filled with the same value
/// and the value fits into an unsigned 18-bit constant, and if so, return the
/// constant
SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
- MVT ValueType) {
+ EVT ValueType) {
if (ConstantSDNode *CN = getVecImm(N)) {
uint64_t Value = CN->getZExtValue();
if (ValueType == MVT::i64) {
/// and the value fits into a signed 16-bit constant, and if so, return the
/// constant
SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
- MVT ValueType) {
+ EVT ValueType) {
if (ConstantSDNode *CN = getVecImm(N)) {
int64_t Value = CN->getSExtValue();
if (ValueType == MVT::i64) {
/// and the value fits into a signed 10-bit constant, and if so, return the
/// constant
SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
- MVT ValueType) {
+ EVT ValueType) {
if (ConstantSDNode *CN = getVecImm(N)) {
int64_t Value = CN->getSExtValue();
if (ValueType == MVT::i64) {
/// constant vectors. Thus, we test to see if the upper and lower bytes are the
/// same value.
SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
- MVT ValueType) {
+ EVT ValueType) {
if (ConstantSDNode *CN = getVecImm(N)) {
int Value = (int) CN->getZExtValue();
if (ValueType == MVT::i16
/// and the value fits into a signed 16-bit constant, and if so, return the
/// constant
SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
- MVT ValueType) {
+ EVT ValueType) {
if (ConstantSDNode *CN = getVecImm(N)) {
uint64_t Value = CN->getZExtValue();
if ((ValueType == MVT::i32
return SDValue();
}
-// If this is a vector of constants or undefs, get the bits. A bit in
-// UndefBits is set if the corresponding element of the vector is an
-// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
-// zero. Return true if this is not an array of constants, false if it is.
-//
-static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
- uint64_t UndefBits[2]) {
- // Start with zero'd results.
- VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
-
- unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
- for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
- SDValue OpVal = BV->getOperand(i);
-
- unsigned PartNo = i >= e/2; // In the upper 128 bits?
- unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
-
- uint64_t EltBits = 0;
- if (OpVal.getOpcode() == ISD::UNDEF) {
- uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
- UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
- continue;
- } else if (ConstantSDNode *CN = cast<ConstantSDNode>(OpVal)) {
- EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
- } else if (ConstantFPSDNode *CN = cast<ConstantFPSDNode>(OpVal)) {
- const APFloat &apf = CN->getValueAPF();
- EltBits = (CN->getValueType(0) == MVT::f32
- ? FloatToBits(apf.convertToFloat())
- : DoubleToBits(apf.convertToDouble()));
- } else {
- // Nonconstant element.
- return true;
- }
+//! Lower a BUILD_VECTOR instruction creatively:
+static SDValue
+LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+ BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
+ unsigned minSplatBits = EltVT.getSizeInBits();
- VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
- }
+ if (minSplatBits < 16)
+ minSplatBits = 16;
- //printf("%llx %llx %llx %llx\n",
- // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
- return false;
-}
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
-/// If this is a splat (repetition) of a value across the whole vector, return
-/// the smallest size that splats it. For example, "0x01010101010101..." is a
-/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
-/// SplatSize = 1 byte.
-static bool isConstantSplat(const uint64_t Bits128[2],
- const uint64_t Undef128[2],
- int MinSplatBits,
- uint64_t &SplatBits, uint64_t &SplatUndef,
- int &SplatSize) {
- // Don't let undefs prevent splats from matching. See if the top 64-bits are
- // the same as the lower 64-bits, ignoring undefs.
- uint64_t Bits64 = Bits128[0] | Bits128[1];
- uint64_t Undef64 = Undef128[0] & Undef128[1];
- uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
- uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
- uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
- uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
-
- if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
- if (MinSplatBits < 64) {
-
- // Check that the top 32-bits are the same as the lower 32-bits, ignoring
- // undefs.
- if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
- if (MinSplatBits < 32) {
-
- // If the top 16-bits are different than the lower 16-bits, ignoring
- // undefs, we have an i32 splat.
- if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
- if (MinSplatBits < 16) {
- // If the top 8-bits are different than the lower 8-bits, ignoring
- // undefs, we have an i16 splat.
- if ((Bits16 & (uint16_t(~Undef16) >> 8))
- == ((Bits16 >> 8) & ~Undef16)) {
- // Otherwise, we have an 8-bit splat.
- SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
- SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
- SplatSize = 1;
- return true;
- }
- } else {
- SplatBits = Bits16;
- SplatUndef = Undef16;
- SplatSize = 2;
- return true;
- }
- }
- } else {
- SplatBits = Bits32;
- SplatUndef = Undef32;
- SplatSize = 4;
- return true;
- }
- }
- } else {
- SplatBits = Bits128[0];
- SplatUndef = Undef128[0];
- SplatSize = 8;
- return true;
- }
- }
+ if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, minSplatBits)
+ || minSplatBits < SplatBitSize)
+ return SDValue(); // Wasn't a constant vector or splat exceeded min
- return false; // Can't be a splat if two pieces don't match.
-}
+ uint64_t SplatBits = APSplatBits.getZExtValue();
-// If this is a case we can't handle, return null and let the default
-// expansion code take care of it. If we CAN select this case, and if it
-// selects to a single instruction, return Op. Otherwise, if we can codegen
-// this case more efficiently than a constant pool load, lower it to the
-// sequence of ops that should be used.
-static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getValueType();
- // If this is a vector of constants or undefs, get the bits. A bit in
- // UndefBits is set if the corresponding element of the vector is an
- // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
- // zero.
- uint64_t VectorBits[2];
- uint64_t UndefBits[2];
- uint64_t SplatBits, SplatUndef;
- int SplatSize;
- if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
- || !isConstantSplat(VectorBits, UndefBits,
- VT.getVectorElementType().getSizeInBits(),
- SplatBits, SplatUndef, SplatSize))
- return SDValue(); // Not a constant vector, not a splat.
-
- switch (VT.getSimpleVT()) {
- default:
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
+ << VT.getEVTString();
+ llvm_report_error(Msg.str());
+ /*NOTREACHED*/
+ }
case MVT::v4f32: {
- uint32_t Value32 = SplatBits;
- assert(SplatSize == 4
+ uint32_t Value32 = uint32_t(SplatBits);
+ assert(SplatBitSize == 32
&& "LowerBUILD_VECTOR: Unexpected floating point vector element.");
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
SDValue T = DAG.getConstant(Value32, MVT::i32);
- return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
break;
}
case MVT::v2f64: {
- uint64_t f64val = SplatBits;
- assert(SplatSize == 8
- && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
+ uint64_t f64val = uint64_t(SplatBits);
+ assert(SplatBitSize == 64
+ && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
SDValue T = DAG.getConstant(f64val, MVT::i64);
- return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
break;
}
case MVT::v16i8: {
// 8-bit constants have to be expanded to 16-bits
- unsigned short Value16 = SplatBits | (SplatBits << 8);
- SDValue Ops[8];
- for (int i = 0; i < 8; ++i)
- Ops[i] = DAG.getConstant(Value16, MVT::i16);
- return DAG.getNode(ISD::BIT_CONVERT, VT,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
+ unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
+ SmallVector<SDValue, 8> Ops;
+
+ Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
}
case MVT::v8i16: {
- unsigned short Value16;
- if (SplatSize == 2)
- Value16 = (unsigned short) (SplatBits & 0xffff);
- else
- Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
- SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
- SDValue Ops[8];
- for (int i = 0; i < 8; ++i) Ops[i] = T;
- return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
+ unsigned short Value16 = SplatBits;
+ SDValue T = DAG.getConstant(Value16, EltVT);
+ SmallVector<SDValue, 8> Ops;
+
+ Ops.assign(8, T);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
}
case MVT::v4i32: {
- unsigned int Value = SplatBits;
- SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
- return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
+ SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
+ }
+ case MVT::v2i32: {
+ SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
}
case MVT::v2i64: {
- uint64_t val = SplatBits;
- uint32_t upper = uint32_t(val >> 32);
- uint32_t lower = uint32_t(val);
-
- if (upper == lower) {
- // Magic constant that can be matched by IL, ILA, et. al.
- SDValue Val = DAG.getTargetConstant(val, MVT::i64);
- return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
- } else {
- SDValue LO32;
- SDValue HI32;
- SmallVector<SDValue, 16> ShufBytes;
- SDValue Result;
- bool upper_special, lower_special;
-
- // NOTE: This code creates common-case shuffle masks that can be easily
- // detected as common expressions. It is not attempting to create highly
- // specialized masks to replace any and all 0's, 0xff's and 0x80's.
-
- // Detect if the upper or lower half is a special shuffle mask pattern:
- upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
- lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
-
- // Create lower vector if not a special pattern
- if (!lower_special) {
- SDValue LO32C = DAG.getConstant(lower, MVT::i32);
- LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- LO32C, LO32C, LO32C, LO32C));
- }
+ return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
+ }
+ }
- // Create upper vector if not a special pattern
- if (!upper_special) {
- SDValue HI32C = DAG.getConstant(upper, MVT::i32);
- HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- HI32C, HI32C, HI32C, HI32C));
- }
+ return SDValue();
+}
- // If either upper or lower are special, then the two input operands are
- // the same (basically, one of them is a "don't care")
- if (lower_special)
- LO32 = HI32;
- if (upper_special)
- HI32 = LO32;
- if (lower_special && upper_special) {
- // Unhappy situation... both upper and lower are special, so punt with
- // a target constant:
- SDValue Zero = DAG.getConstant(0, MVT::i32);
- HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
- Zero, Zero);
- }
+/*!
+ */
+SDValue
+SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
+ DebugLoc dl) {
+ uint32_t upper = uint32_t(SplatVal >> 32);
+ uint32_t lower = uint32_t(SplatVal);
+
+ if (upper == lower) {
+ // Magic constant that can be matched by IL, ILA, et. al.
+ SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ Val, Val, Val, Val));
+ } else {
+ bool upper_special, lower_special;
- for (int i = 0; i < 4; ++i) {
- uint64_t val = 0;
- for (int j = 0; j < 4; ++j) {
- SDValue V;
- bool process_upper, process_lower;
- val <<= 8;
- process_upper = (upper_special && (i & 1) == 0);
- process_lower = (lower_special && (i & 1) == 1);
-
- if (process_upper || process_lower) {
- if ((process_upper && upper == 0)
- || (process_lower && lower == 0))
- val |= 0x80;
- else if ((process_upper && upper == 0xffffffff)
- || (process_lower && lower == 0xffffffff))
- val |= 0xc0;
- else if ((process_upper && upper == 0x80000000)
- || (process_lower && lower == 0x80000000))
- val |= (j == 0 ? 0xe0 : 0x80);
- } else
- val |= i * 4 + j + ((i & 1) * 16);
- }
+ // NOTE: This code creates common-case shuffle masks that can be easily
+ // detected as common expressions. It is not attempting to create highly
+ // specialized masks to replace any and all 0's, 0xff's and 0x80's.
+
+ // Detect if the upper or lower half is a special shuffle mask pattern:
+ upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
+ lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
+
+ // Both upper and lower are special, lower to a constant pool load:
+ if (lower_special && upper_special) {
+ SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
+ SplatValCN, SplatValCN);
+ }
+
+ SDValue LO32;
+ SDValue HI32;
+ SmallVector<SDValue, 16> ShufBytes;
+ SDValue Result;
+
+ // Create lower vector if not a special pattern
+ if (!lower_special) {
+ SDValue LO32C = DAG.getConstant(lower, MVT::i32);
+ LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ LO32C, LO32C, LO32C, LO32C));
+ }
+
+ // Create upper vector if not a special pattern
+ if (!upper_special) {
+ SDValue HI32C = DAG.getConstant(upper, MVT::i32);
+ HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ HI32C, HI32C, HI32C, HI32C));
+ }
- ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
+ // If either upper or lower are special, then the two input operands are
+ // the same (basically, one of them is a "don't care")
+ if (lower_special)
+ LO32 = HI32;
+ if (upper_special)
+ HI32 = LO32;
+
+ for (int i = 0; i < 4; ++i) {
+ uint64_t val = 0;
+ for (int j = 0; j < 4; ++j) {
+ SDValue V;
+ bool process_upper, process_lower;
+ val <<= 8;
+ process_upper = (upper_special && (i & 1) == 0);
+ process_lower = (lower_special && (i & 1) == 1);
+
+ if (process_upper || process_lower) {
+ if ((process_upper && upper == 0)
+ || (process_lower && lower == 0))
+ val |= 0x80;
+ else if ((process_upper && upper == 0xffffffff)
+ || (process_lower && lower == 0xffffffff))
+ val |= 0xc0;
+ else if ((process_upper && upper == 0x80000000)
+ || (process_lower && lower == 0x80000000))
+ val |= (j == 0 ? 0xe0 : 0x80);
+ } else
+ val |= i * 4 + j + ((i & 1) * 16);
}
- return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- &ShufBytes[0], ShufBytes.size()));
+ ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
}
- }
- }
- return SDValue();
+ return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size()));
+ }
}
/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
/// which the Cell can operate. The code inspects V3 to ascertain whether the
/// permutation vector, V3, is monotonically increasing with one "exception"
/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
-/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
+/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
/// In either case, the net result is going to eventually invoke SHUFB to
/// permute/shuffle the bytes from V1 and V2.
/// \note
-/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
+/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
/// control word for byte/halfword/word insertion. This takes care of a single
/// element move from V2 into V1.
/// \note
/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- SDValue PermMask = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
// If we have a single element being moved from V1 to V2, this can be handled
// using the C*[DX] compute mask instructions, but the vector elements have
// to be monotonically increasing with one exception element.
- MVT EltVT = V1.getValueType().getVectorElementType();
+ EVT VecVT = V1.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
unsigned EltsFromV2 = 0;
unsigned V2Elt = 0;
unsigned V2EltIdx0 = 0;
unsigned CurrElt = 0;
+ unsigned MaxElts = VecVT.getVectorNumElements();
+ unsigned PrevElt = 0;
+ unsigned V0Elt = 0;
bool monotonic = true;
- if (EltVT == MVT::i8)
+ bool rotate = true;
+
+ if (EltVT == MVT::i8) {
V2EltIdx0 = 16;
- else if (EltVT == MVT::i16)
+ } else if (EltVT == MVT::i16) {
V2EltIdx0 = 8;
- else if (EltVT == MVT::i32)
+ } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
V2EltIdx0 = 4;
- else
- assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
+ } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
+ V2EltIdx0 = 2;
+ } else
+ llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
+
+ for (unsigned i = 0; i != MaxElts; ++i) {
+ if (SVN->getMaskElt(i) < 0)
+ continue;
+
+ unsigned SrcElt = SVN->getMaskElt(i);
- for (unsigned i = 0, e = PermMask.getNumOperands();
- EltsFromV2 <= 1 && monotonic && i != e;
- ++i) {
- unsigned SrcElt;
- if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
- SrcElt = 0;
- else
- SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
+ if (monotonic) {
+ if (SrcElt >= V2EltIdx0) {
+ if (1 >= (++EltsFromV2)) {
+ V2Elt = (V2EltIdx0 - SrcElt) << 2;
+ }
+ } else if (CurrElt != SrcElt) {
+ monotonic = false;
+ }
- if (SrcElt >= V2EltIdx0) {
- ++EltsFromV2;
- V2Elt = (V2EltIdx0 - SrcElt) << 2;
- } else if (CurrElt != SrcElt) {
- monotonic = false;
+ ++CurrElt;
}
- ++CurrElt;
+ if (rotate) {
+ if (PrevElt > 0 && SrcElt < MaxElts) {
+ if ((PrevElt == SrcElt - 1)
+ || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
+ PrevElt = SrcElt;
+ if (SrcElt == 0)
+ V0Elt = i;
+ } else {
+ rotate = false;
+ }
+ } else if (PrevElt == 0) {
+ // First time through, need to keep track of previous element
+ PrevElt = SrcElt;
+ } else {
+ // This isn't a rotation, takes elements from vector 2
+ rotate = false;
+ }
+ }
}
if (EltsFromV2 == 1 && monotonic) {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
- MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Initialize temporary register to 0
SDValue InitTempReg =
- DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
- // Copy register's contents as index in INSERT_MASK:
+ DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
+ // Copy register's contents as index in SHUFFLE_MASK:
SDValue ShufMaskOp =
- DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
+ DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
DAG.getTargetConstant(V2Elt, MVT::i32),
- DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
+ DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
// Use shuffle mask in SHUFB synthetic instruction:
- return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
+ return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
+ ShufMaskOp);
+ } else if (rotate) {
+ int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
+
+ return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
+ V1, DAG.getConstant(rotamt, MVT::i16));
} else {
// Convert the SHUFFLE_VECTOR mask's input element units to the
// actual bytes.
unsigned BytesPerElement = EltVT.getSizeInBits()/8;
SmallVector<SDValue, 16> ResultMask;
- for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
- unsigned SrcElt;
- if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
- SrcElt = 0;
- else
- SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
-
- for (unsigned j = 0; j < BytesPerElement; ++j) {
- ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
- MVT::i8));
- }
+ for (unsigned i = 0, e = MaxElts; i != e; ++i) {
+ unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
+
+ for (unsigned j = 0; j < BytesPerElement; ++j)
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
}
- SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
- &ResultMask[0], ResultMask.size());
- return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
+ SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
+ &ResultMask[0], ResultMask.size());
+ return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
}
}
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
+ DebugLoc dl = Op.getDebugLoc();
if (Op0.getNode()->getOpcode() == ISD::Constant) {
// For a constant, build the appropriate constant vector, which will
ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
SmallVector<SDValue, 16> ConstVecValues;
- MVT VT;
+ EVT VT;
size_t n_copies;
// Create a constant vector:
- switch (Op.getValueType().getSimpleVT()) {
- default: assert(0 && "Unexpected constant value type in "
- "LowerSCALAR_TO_VECTOR");
+ switch (Op.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected constant value type in "
+ "LowerSCALAR_TO_VECTOR");
case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
for (size_t j = 0; j < n_copies; ++j)
ConstVecValues.push_back(CValue);
- return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
&ConstVecValues[0], ConstVecValues.size());
} else {
// Otherwise, copy the value from one register to another:
- switch (Op0.getValueType().getSimpleVT()) {
- default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
+ switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
case MVT::f32:
case MVT::f64:
- return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
+ return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
}
}
return SDValue();
}
-static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
- switch (Op.getValueType().getSimpleVT()) {
- default:
- cerr << "CellSPU: Unknown vector multiplication, got "
- << Op.getValueType().getMVTString()
- << "\n";
- abort();
- /*NOTREACHED*/
-
- case MVT::v4i32: {
- SDValue rA = Op.getOperand(0);
- SDValue rB = Op.getOperand(1);
- SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
- SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
- SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
- SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
-
- return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
- break;
- }
-
- // Multiply two v8i16 vectors (pipeline friendly version):
- // a) multiply lower halves, mask off upper 16-bit of 32-bit product
- // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
- // c) Use SELB to select upper and lower halves from the intermediate results
- //
- // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
- // dual-issue. This code does manage to do this, even if it's a little on
- // the wacky side
- case MVT::v8i16: {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- SDValue Chain = Op.getOperand(0);
- SDValue rA = Op.getOperand(0);
- SDValue rB = Op.getOperand(1);
- unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
- unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
-
- SDValue FSMBOp =
- DAG.getCopyToReg(Chain, FSMBIreg,
- DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
- DAG.getConstant(0xcccc, MVT::i16)));
-
- SDValue HHProd =
- DAG.getCopyToReg(FSMBOp, HiProdReg,
- DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
-
- SDValue HHProd_v4i32 =
- DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
- DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
-
- return DAG.getNode(SPUISD::SELB, MVT::v8i16,
- DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
- DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
- DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
- HHProd_v4i32,
- DAG.getConstant(16, MVT::i16))),
- DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
- }
-
- // This M00sE is N@stI! (apologies to Monty Python)
- //
- // SPU doesn't know how to do any 8-bit multiplication, so the solution
- // is to break it all apart, sign extend, and reassemble the various
- // intermediate products.
- case MVT::v16i8: {
- SDValue rA = Op.getOperand(0);
- SDValue rB = Op.getOperand(1);
- SDValue c8 = DAG.getConstant(8, MVT::i32);
- SDValue c16 = DAG.getConstant(16, MVT::i32);
-
- SDValue LLProd =
- DAG.getNode(SPUISD::MPY, MVT::v8i16,
- DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
- DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
-
- SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
-
- SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
-
- SDValue LHProd =
- DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
- DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
-
- SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
- DAG.getConstant(0x2222, MVT::i16));
-
- SDValue LoProdParts =
- DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
- DAG.getNode(SPUISD::SELB, MVT::v8i16,
- LLProd, LHProd, FSMBmask));
-
- SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
-
- SDValue LoProd =
- DAG.getNode(ISD::AND, MVT::v4i32,
- LoProdParts,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- LoProdMask, LoProdMask,
- LoProdMask, LoProdMask));
-
- SDValue rAH =
- DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
- DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
-
- SDValue rBH =
- DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
- DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
-
- SDValue HLProd =
- DAG.getNode(SPUISD::MPY, MVT::v8i16,
- DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
- DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
-
- SDValue HHProd_1 =
- DAG.getNode(SPUISD::MPY, MVT::v8i16,
- DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
- DAG.getNode(SPUISD::VEC_SRA,
- MVT::v4i32, rAH, c8)),
- DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
- DAG.getNode(SPUISD::VEC_SRA,
- MVT::v4i32, rBH, c8)));
-
- SDValue HHProd =
- DAG.getNode(SPUISD::SELB, MVT::v8i16,
- HLProd,
- DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
- FSMBmask);
+static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ SDValue N = Op.getOperand(0);
+ SDValue Elt = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue retval;
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+ // Constant argument:
+ int EltNo = (int) C->getZExtValue();
+
+ // sanity checks:
+ if (VT == MVT::i8 && EltNo >= 16)
+ llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
+ else if (VT == MVT::i16 && EltNo >= 8)
+ llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
+ else if (VT == MVT::i32 && EltNo >= 4)
+ llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
+ else if (VT == MVT::i64 && EltNo >= 2)
+ llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
+
+ if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
+ // i32 and i64: Element 0 is the preferred slot
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
+ }
- SDValue HiProd =
- DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
+ // Need to generate shuffle mask and extract:
+ int prefslot_begin = -1, prefslot_end = -1;
+ int elt_byte = EltNo * VT.getSizeInBits() / 8;
- return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
- DAG.getNode(ISD::OR, MVT::v4i32,
- LoProd, HiProd));
- }
- }
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Invalid value type!");
+ case MVT::i8: {
+ prefslot_begin = prefslot_end = 3;
+ break;
+ }
+ case MVT::i16: {
+ prefslot_begin = 2; prefslot_end = 3;
+ break;
+ }
+ case MVT::i32:
+ case MVT::f32: {
+ prefslot_begin = 0; prefslot_end = 3;
+ break;
+ }
+ case MVT::i64:
+ case MVT::f64: {
+ prefslot_begin = 0; prefslot_end = 7;
+ break;
+ }
+ }
- return SDValue();
-}
+ assert(prefslot_begin != -1 && prefslot_end != -1 &&
+ "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
-static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned int ShufBytes[16] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+ for (int i = 0; i < 16; ++i) {
+ // zero fill uppper part of preferred slot, don't care about the
+ // other slots:
+ unsigned int mask_val;
+ if (i <= prefslot_end) {
+ mask_val =
+ ((i < prefslot_begin)
+ ? 0x80
+ : elt_byte + (i - prefslot_begin));
+
+ ShufBytes[i] = mask_val;
+ } else
+ ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
+ }
- SDValue A = Op.getOperand(0);
- SDValue B = Op.getOperand(1);
- MVT VT = Op.getValueType();
+ SDValue ShufMask[4];
+ for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
+ unsigned bidx = i * 4;
+ unsigned int bits = ((ShufBytes[bidx] << 24) |
+ (ShufBytes[bidx+1] << 16) |
+ (ShufBytes[bidx+2] << 8) |
+ ShufBytes[bidx+3]);
+ ShufMask[i] = DAG.getConstant(bits, MVT::i32);
+ }
- unsigned VRegBR, VRegC;
+ SDValue ShufMaskVec =
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
- if (VT == MVT::f32) {
- VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
- VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
+ retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+ DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
+ N, N, ShufMaskVec));
} else {
- VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
- VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
- }
- // TODO: make sure we're feeding FPInterp the right arguments
- // Right now: fi B, frest(B)
-
- // Computes BRcpl =
- // (Floating Interpolate (FP Reciprocal Estimate B))
- SDValue BRcpl =
- DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
- DAG.getNode(SPUISD::FPInterp, VT, B,
- DAG.getNode(SPUISD::FPRecipEst, VT, B)));
-
- // Computes A * BRcpl and stores in a temporary register
- SDValue AxBRcpl =
- DAG.getCopyToReg(BRcpl, VRegC,
- DAG.getNode(ISD::FMUL, VT, A,
- DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
- // What's the Chain variable do? It's magic!
- // TODO: set Chain = Op(0).getEntryNode()
-
- return DAG.getNode(ISD::FADD, VT,
- DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
- DAG.getNode(ISD::FMUL, VT,
- DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
- DAG.getNode(ISD::FSUB, VT, A,
- DAG.getNode(ISD::FMUL, VT, B,
- DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
-}
-
-static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getValueType();
- SDValue N = Op.getOperand(0);
- SDValue Elt = Op.getOperand(1);
- SDValue ShufMask[16];
- ConstantSDNode *C = cast<ConstantSDNode>(Elt);
-
- assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
-
- int EltNo = (int) C->getZExtValue();
-
- // sanity checks:
- if (VT == MVT::i8 && EltNo >= 16)
- assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
- else if (VT == MVT::i16 && EltNo >= 8)
- assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
- else if (VT == MVT::i32 && EltNo >= 4)
- assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
- else if (VT == MVT::i64 && EltNo >= 2)
- assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
+ // Variable index: Rotate the requested element into slot 0, then replicate
+ // slot 0 across the vector
+ EVT VecVT = N.getValueType();
+ if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
+ llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
+ "vector type!");
+ }
- if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
- // i32 and i64: Element 0 is the preferred slot
- return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
- }
+ // Make life easier by making sure the index is zero-extended to i32
+ if (Elt.getValueType() != MVT::i32)
+ Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
- // Need to generate shuffle mask and extract:
- int prefslot_begin = -1, prefslot_end = -1;
- int elt_byte = EltNo * VT.getSizeInBits() / 8;
+ // Scale the index to a bit/byte shift quantity
+ APInt scaleFactor =
+ APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
+ unsigned scaleShift = scaleFactor.logBase2();
+ SDValue vecShift;
- switch (VT.getSimpleVT()) {
- default:
- assert(false && "Invalid value type!");
- case MVT::i8: {
- prefslot_begin = prefslot_end = 3;
- break;
- }
- case MVT::i16: {
- prefslot_begin = 2; prefslot_end = 3;
- break;
- }
- case MVT::i32:
- case MVT::f32: {
- prefslot_begin = 0; prefslot_end = 3;
- break;
- }
- case MVT::i64:
- case MVT::f64: {
- prefslot_begin = 0; prefslot_end = 7;
- break;
- }
- }
+ if (scaleShift > 0) {
+ // Scale the shift factor:
+ Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
+ DAG.getConstant(scaleShift, MVT::i32));
+ }
- assert(prefslot_begin != -1 && prefslot_end != -1 &&
- "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
+ vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
- for (int i = 0; i < 16; ++i) {
- // zero fill uppper part of preferred slot, don't care about the
- // other slots:
- unsigned int mask_val;
+ // Replicate the bytes starting at byte 0 across the entire vector (for
+ // consistency with the notion of a unified register set)
+ SDValue replicate;
- if (i <= prefslot_end) {
- mask_val =
- ((i < prefslot_begin)
- ? 0x80
- : elt_byte + (i - prefslot_begin));
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
+ "type");
+ /*NOTREACHED*/
+ case MVT::i8: {
+ SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
+ break;
+ }
+ case MVT::i16: {
+ SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
+ break;
+ }
+ case MVT::i32:
+ case MVT::f32: {
+ SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
+ break;
+ }
+ case MVT::i64:
+ case MVT::f64: {
+ SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
+ SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ loFactor, hiFactor, loFactor, hiFactor);
+ break;
+ }
+ }
- ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
- } else
- ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
+ retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+ DAG.getNode(SPUISD::SHUFB, dl, VecVT,
+ vecShift, vecShift, replicate));
}
- SDValue ShufMaskVec =
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
- &ShufMask[0],
- sizeof(ShufMask) / sizeof(ShufMask[0]));
-
- return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
- DAG.getNode(SPUISD::SHUFB, N.getValueType(),
- N, N, ShufMaskVec));
-
+ return retval;
}
static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
SDValue VecOp = Op.getOperand(0);
SDValue ValOp = Op.getOperand(1);
SDValue IdxOp = Op.getOperand(2);
- MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
- MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- // Use $2 because it's always 16-byte aligned and it's available:
- SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Use $sp ($1) because it's always 16-byte aligned and it's available:
+ SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ DAG.getRegister(SPU::R1, PtrVT),
+ DAG.getConstant(CN->getSExtValue(), PtrVT));
+ SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
SDValue result =
- DAG.getNode(SPUISD::SHUFB, VT,
- DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
+ DAG.getNode(SPUISD::SHUFB, dl, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
VecOp,
- DAG.getNode(SPUISD::INSERT_MASK, VT,
- DAG.getNode(ISD::ADD, PtrVT,
- PtrBase,
- DAG.getConstant(CN->getZExtValue(),
- PtrVT))));
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
return result;
}
-static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
+static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
+ const TargetLowering &TLI)
{
SDValue N0 = Op.getOperand(0); // Everything has at least one operand
+ DebugLoc dl = Op.getDebugLoc();
+ EVT ShiftVT = TLI.getShiftAmountTy();
assert(Op.getValueType() == MVT::i8);
switch (Opc) {
default:
- assert(0 && "Unhandled i8 math operator");
+ llvm_unreachable("Unhandled i8 math operator");
/*NOTREACHED*/
break;
+ case ISD::ADD: {
+ // 8-bit addition: Promote the arguments up to 16-bits and truncate
+ // the result:
+ SDValue N1 = Op.getOperand(1);
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+
+ }
+
case ISD::SUB: {
// 8-bit subtraction: Promote the arguments up to 16-bits and truncate
// the result:
SDValue N1 = Op.getOperand(1);
- N0 = (N0.getOpcode() != ISD::Constant
- ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
- : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
- MVT::i16));
- N1 = (N1.getOpcode() != ISD::Constant
- ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
- : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
- MVT::i16));
- return DAG.getNode(ISD::TRUNCATE, MVT::i8,
- DAG.getNode(Opc, MVT::i16, N0, N1));
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
}
case ISD::ROTR:
case ISD::ROTL: {
SDValue N1 = Op.getOperand(1);
- unsigned N1Opc;
- N0 = (N0.getOpcode() != ISD::Constant
- ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
- : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
- MVT::i16));
- N1Opc = N1.getValueType().bitsLT(MVT::i32)
- ? ISD::ZERO_EXTEND
- : ISD::TRUNCATE;
- N1 = (N1.getOpcode() != ISD::Constant
- ? DAG.getNode(N1Opc, MVT::i32, N1)
- : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
- MVT::i32));
+ EVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
+ ? ISD::ZERO_EXTEND
+ : ISD::TRUNCATE;
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
+ // Replicate lower 8-bits into upper 8:
SDValue ExpandArg =
- DAG.getNode(ISD::OR, MVT::i16, N0,
- DAG.getNode(ISD::SHL, MVT::i16,
+ DAG.getNode(ISD::OR, dl, MVT::i16, N0,
+ DAG.getNode(ISD::SHL, dl, MVT::i16,
N0, DAG.getConstant(8, MVT::i32)));
- return DAG.getNode(ISD::TRUNCATE, MVT::i8,
- DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
+
+ // Truncate back down to i8
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
}
case ISD::SRL:
case ISD::SHL: {
SDValue N1 = Op.getOperand(1);
- unsigned N1Opc;
- N0 = (N0.getOpcode() != ISD::Constant
- ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
- : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
- MVT::i16));
- N1Opc = N1.getValueType().bitsLT(MVT::i16)
- ? ISD::ZERO_EXTEND
- : ISD::TRUNCATE;
- N1 = (N1.getOpcode() != ISD::Constant
- ? DAG.getNode(N1Opc, MVT::i16, N1)
- : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
- MVT::i16));
- return DAG.getNode(ISD::TRUNCATE, MVT::i8,
- DAG.getNode(Opc, MVT::i16, N0, N1));
- }
- case ISD::SRA: {
- SDValue N1 = Op.getOperand(1);
- unsigned N1Opc;
- N0 = (N0.getOpcode() != ISD::Constant
- ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
- : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
- MVT::i16));
- N1Opc = N1.getValueType().bitsLT(MVT::i16)
- ? ISD::SIGN_EXTEND
- : ISD::TRUNCATE;
- N1 = (N1.getOpcode() != ISD::Constant
- ? DAG.getNode(N1Opc, MVT::i16, N1)
- : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
- MVT::i16));
- return DAG.getNode(ISD::TRUNCATE, MVT::i8,
- DAG.getNode(Opc, MVT::i16, N0, N1));
- }
- case ISD::MUL: {
- SDValue N1 = Op.getOperand(1);
- unsigned N1Opc;
- N0 = (N0.getOpcode() != ISD::Constant
- ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
- : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
- MVT::i16));
- N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
- N1 = (N1.getOpcode() != ISD::Constant
- ? DAG.getNode(N1Opc, MVT::i16, N1)
- : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
- MVT::i16));
- return DAG.getNode(ISD::TRUNCATE, MVT::i8,
- DAG.getNode(Opc, MVT::i16, N0, N1));
- break;
- }
- }
-
- return SDValue();
-}
-
-static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
-{
- MVT VT = Op.getValueType();
- MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
-
- SDValue Op0 = Op.getOperand(0);
-
- switch (Opc) {
- case ISD::ZERO_EXTEND:
- case ISD::SIGN_EXTEND:
- case ISD::ANY_EXTEND: {
- MVT Op0VT = Op0.getValueType();
- MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
-
- assert(Op0VT == MVT::i32
- && "CellSPU: Zero/sign extending something other than i32");
- DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
+ EVT N1VT = N1.getValueType();
- unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
- ? SPUISD::ROTBYTES_RIGHT_S
- : SPUISD::ROTQUAD_RZ_BYTES);
- SDValue PromoteScalar =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
-
- return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
- DAG.getNode(ISD::BIT_CONVERT, VecVT,
- DAG.getNode(NewOpc, Op0VecVT,
- PromoteScalar,
- DAG.getConstant(4, MVT::i32))));
- }
-
- case ISD::ADD: {
- // Turn operands into vectors to satisfy type checking (shufb works on
- // vectors)
- SDValue Op0 =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
- SDValue Op1 =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
- SmallVector<SDValue, 16> ShufBytes;
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = ISD::ZERO_EXTEND;
- // Create the shuffle mask for "rotating" the borrow up one register slot
- // once the borrow is generated.
- ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+ if (N1.getValueType().bitsGT(ShiftVT))
+ N1Opc = ISD::TRUNCATE;
- SDValue CarryGen =
- DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
- SDValue ShiftedCarry =
- DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
- CarryGen, CarryGen,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- &ShufBytes[0], ShufBytes.size()));
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
- return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
- DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
- Op0, Op1, ShiftedCarry));
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
}
+ case ISD::SRA: {
+ SDValue N1 = Op.getOperand(1);
+ EVT N1VT = N1.getValueType();
- case ISD::SUB: {
- // Turn operands into vectors to satisfy type checking (shufb works on
- // vectors)
- SDValue Op0 =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
- SDValue Op1 =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
- SmallVector<SDValue, 16> ShufBytes;
-
- // Create the shuffle mask for "rotating" the borrow up one register slot
- // once the borrow is generated.
- ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
-
- SDValue BorrowGen =
- DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
- SDValue ShiftedBorrow =
- DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
- BorrowGen, BorrowGen,
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- &ShufBytes[0], ShufBytes.size()));
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = ISD::SIGN_EXTEND;
- return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
- DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
- Op0, Op1, ShiftedBorrow));
- }
+ if (N1VT.bitsGT(ShiftVT))
+ N1Opc = ISD::TRUNCATE;
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
- case ISD::SHL: {
- SDValue ShiftAmt = Op.getOperand(1);
- MVT ShiftAmtVT = ShiftAmt.getValueType();
- SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
- SDValue MaskLower =
- DAG.getNode(SPUISD::SELB, VecVT,
- Op0Vec,
- DAG.getConstant(0, VecVT),
- DAG.getNode(SPUISD::SELECT_MASK, VecVT,
- DAG.getConstant(0xff00ULL, MVT::i16)));
- SDValue ShiftAmtBytes =
- DAG.getNode(ISD::SRL, ShiftAmtVT,
- ShiftAmt,
- DAG.getConstant(3, ShiftAmtVT));
- SDValue ShiftAmtBits =
- DAG.getNode(ISD::AND, ShiftAmtVT,
- ShiftAmt,
- DAG.getConstant(7, ShiftAmtVT));
-
- return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
- DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
- DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
- MaskLower, ShiftAmtBytes),
- ShiftAmtBits));
- }
-
- case ISD::SRL: {
- MVT VT = Op.getValueType();
- SDValue ShiftAmt = Op.getOperand(1);
- MVT ShiftAmtVT = ShiftAmt.getValueType();
- SDValue ShiftAmtBytes =
- DAG.getNode(ISD::SRL, ShiftAmtVT,
- ShiftAmt,
- DAG.getConstant(3, ShiftAmtVT));
- SDValue ShiftAmtBits =
- DAG.getNode(ISD::AND, ShiftAmtVT,
- ShiftAmt,
- DAG.getConstant(7, ShiftAmtVT));
-
- return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
- DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
- Op0, ShiftAmtBytes),
- ShiftAmtBits);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
}
+ case ISD::MUL: {
+ SDValue N1 = Op.getOperand(1);
- case ISD::SRA: {
- // Promote Op0 to vector
- SDValue Op0 =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
- SDValue ShiftAmt = Op.getOperand(1);
- MVT ShiftVT = ShiftAmt.getValueType();
-
- // Negate variable shift amounts
- if (!isa<ConstantSDNode>(ShiftAmt)) {
- ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
- DAG.getConstant(0, ShiftVT), ShiftAmt);
- }
-
- SDValue UpperHalfSign =
- DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
- DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
- DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
- Op0, DAG.getConstant(31, MVT::i32))));
- SDValue UpperHalfSignMask =
- DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
- SDValue UpperLowerMask =
- DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
- DAG.getConstant(0xff00, MVT::i16));
- SDValue UpperLowerSelect =
- DAG.getNode(SPUISD::SELB, MVT::v2i64,
- UpperHalfSignMask, Op0, UpperLowerMask);
- SDValue RotateLeftBytes =
- DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
- UpperLowerSelect, ShiftAmt);
- SDValue RotateLeftBits =
- DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
- RotateLeftBytes, ShiftAmt);
-
- return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
- RotateLeftBits);
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ break;
}
}
LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
SDValue ConstVec;
SDValue Arg;
- MVT VT = Op.getValueType();
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
ConstVec = Op.getOperand(0);
Arg = Op.getOperand(1);
}
if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
- uint64_t VectorBits[2];
- uint64_t UndefBits[2];
- uint64_t SplatBits, SplatUndef;
- int SplatSize;
-
- if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
- && isConstantSplat(VectorBits, UndefBits,
- VT.getVectorElementType().getSizeInBits(),
- SplatBits, SplatUndef, SplatSize)) {
- SDValue tcVec[16];
+ BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
+ assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
+
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
+
+ if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, minSplatBits)
+ && minSplatBits <= SplatBitSize) {
+ uint64_t SplatBits = APSplatBits.getZExtValue();
SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
- const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
-
- // Turn the BUILD_VECTOR into a set of target constants:
- for (size_t i = 0; i < tcVecSize; ++i)
- tcVec[i] = tc;
- return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
- DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
+ SmallVector<SDValue, 16> tcVec;
+ tcVec.assign(16, tc);
+ return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
}
}
+
// These operations (AND, OR, XOR) are legal, they just couldn't be custom
// lowered. Return the operation, rather than a null SDValue.
return Op;
}
-//! Lower i32 multiplication
-static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
- unsigned Opc) {
- switch (VT.getSimpleVT()) {
- default:
- cerr << "CellSPU: Unknown LowerMUL value type, got "
- << Op.getValueType().getMVTString()
- << "\n";
- abort();
- /*NOTREACHED*/
-
- case MVT::i32: {
- SDValue rA = Op.getOperand(0);
- SDValue rB = Op.getOperand(1);
-
- return DAG.getNode(ISD::ADD, MVT::i32,
- DAG.getNode(ISD::ADD, MVT::i32,
- DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
- DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
- DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
- }
- }
-
- return SDValue();
-}
-
//! Custom lowering for CTPOP (count population)
/*!
Custom lowering code that counts the number ones in the input
ones per byte, which then have to be accumulated.
*/
static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getValueType();
- MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+ EVT VT = Op.getValueType();
+ EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+ VT, (128 / VT.getSizeInBits()));
+ DebugLoc dl = Op.getDebugLoc();
- switch (VT.getSimpleVT()) {
+ switch (VT.getSimpleVT().SimpleTy) {
default:
assert(false && "Invalid value type!");
case MVT::i8: {
SDValue N = Op.getOperand(0);
SDValue Elt0 = DAG.getConstant(0, MVT::i32);
- SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
- SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+ SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
}
case MVT::i16: {
SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
SDValue Shift1 = DAG.getConstant(8, MVT::i32);
- SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
- SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+ SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
// CNTB_result becomes the chain to which all of the virtual registers
// CNTB_reg, SUM1_reg become associated:
SDValue CNTB_result =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
SDValue CNTB_rescopy =
- DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
+ DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
- SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
+ SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
- return DAG.getNode(ISD::AND, MVT::i16,
- DAG.getNode(ISD::ADD, MVT::i16,
- DAG.getNode(ISD::SRL, MVT::i16,
+ return DAG.getNode(ISD::AND, dl, MVT::i16,
+ DAG.getNode(ISD::ADD, dl, MVT::i16,
+ DAG.getNode(ISD::SRL, dl, MVT::i16,
Tmp1, Shift1),
Tmp1),
Mask0);
SDValue Shift1 = DAG.getConstant(16, MVT::i32);
SDValue Shift2 = DAG.getConstant(8, MVT::i32);
- SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
- SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+ SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
// CNTB_result becomes the chain to which all of the virtual registers
// CNTB_reg, SUM1_reg become associated:
SDValue CNTB_result =
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
SDValue CNTB_rescopy =
- DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
+ DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
SDValue Comp1 =
- DAG.getNode(ISD::SRL, MVT::i32,
- DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
+ Shift1);
SDValue Sum1 =
- DAG.getNode(ISD::ADD, MVT::i32,
- Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
+ DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
SDValue Sum1_rescopy =
- DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
+ DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
SDValue Comp2 =
- DAG.getNode(ISD::SRL, MVT::i32,
- DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
Shift2);
SDValue Sum2 =
- DAG.getNode(ISD::ADD, MVT::i32, Comp2,
- DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
+ DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
- return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
+ return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
}
case MVT::i64:
return SDValue();
}
-/// LowerOperation - Provide custom lowering hooks for some operations.
-///
+//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
+/*!
+ f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
+ All conversions to i64 are expanded to a libcall.
+ */
+static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+ SPUTargetLowering &TLI) {
+ EVT OpVT = Op.getValueType();
+ SDValue Op0 = Op.getOperand(0);
+ EVT Op0VT = Op0.getValueType();
+
+ if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
+ || OpVT == MVT::i64) {
+ // Convert f32 / f64 to i32 / i64 via libcall.
+ RTLIB::Libcall LC =
+ (Op.getOpcode() == ISD::FP_TO_SINT)
+ ? RTLIB::getFPTOSINT(Op0VT, OpVT)
+ : RTLIB::getFPTOUINT(Op0VT, OpVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
+ SDValue Dummy;
+ return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
+ }
+
+ return Op;
+}
+
+//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
+/*!
+ i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
+ All conversions from i64 are expanded to a libcall.
+ */
+static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+ SPUTargetLowering &TLI) {
+ EVT OpVT = Op.getValueType();
+ SDValue Op0 = Op.getOperand(0);
+ EVT Op0VT = Op0.getValueType();
+
+ if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
+ || Op0VT == MVT::i64) {
+ // Convert i32, i64 to f64 via libcall:
+ RTLIB::Libcall LC =
+ (Op.getOpcode() == ISD::SINT_TO_FP)
+ ? RTLIB::getSINTTOFP(Op0VT, OpVT)
+ : RTLIB::getUINTTOFP(Op0VT, OpVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
+ SDValue Dummy;
+ return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
+ }
+
+ return Op;
+}
+
+//! Lower ISD::SETCC
+/*!
+ This handles MVT::f64 (double floating point) condition lowering
+ */
+static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
+ DebugLoc dl = Op.getDebugLoc();
+ assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
+
+ SDValue lhs = Op.getOperand(0);
+ SDValue rhs = Op.getOperand(1);
+ EVT lhsVT = lhs.getValueType();
+ assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
+
+ EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
+ APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
+ EVT IntVT(MVT::i64);
+
+ // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
+ // selected to a NOP:
+ SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
+ SDValue lhsHi32 =
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+ DAG.getNode(ISD::SRL, dl, IntVT,
+ i64lhs, DAG.getConstant(32, MVT::i32)));
+ SDValue lhsHi32abs =
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
+ SDValue lhsLo32 =
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
+
+ // SETO and SETUO only use the lhs operand:
+ if (CC->get() == ISD::SETO) {
+ // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
+ // SETUO
+ APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
+ return DAG.getNode(ISD::XOR, dl, ccResultVT,
+ DAG.getSetCC(dl, ccResultVT,
+ lhs, DAG.getConstantFP(0.0, lhsVT),
+ ISD::SETUO),
+ DAG.getConstant(ccResultAllOnes, ccResultVT));
+ } else if (CC->get() == ISD::SETUO) {
+ // Evaluates to true if Op0 is [SQ]NaN
+ return DAG.getNode(ISD::AND, dl, ccResultVT,
+ DAG.getSetCC(dl, ccResultVT,
+ lhsHi32abs,
+ DAG.getConstant(0x7ff00000, MVT::i32),
+ ISD::SETGE),
+ DAG.getSetCC(dl, ccResultVT,
+ lhsLo32,
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETGT));
+ }
+
+ SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
+ SDValue rhsHi32 =
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+ DAG.getNode(ISD::SRL, dl, IntVT,
+ i64rhs, DAG.getConstant(32, MVT::i32)));
+
+ // If a value is negative, subtract from the sign magnitude constant:
+ SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
+
+ // Convert the sign-magnitude representation into 2's complement:
+ SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
+ lhsHi32, DAG.getConstant(31, MVT::i32));
+ SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
+ SDValue lhsSelect =
+ DAG.getNode(ISD::SELECT, dl, IntVT,
+ lhsSelectMask, lhsSignMag2TC, i64lhs);
+
+ SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
+ rhsHi32, DAG.getConstant(31, MVT::i32));
+ SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
+ SDValue rhsSelect =
+ DAG.getNode(ISD::SELECT, dl, IntVT,
+ rhsSelectMask, rhsSignMag2TC, i64rhs);
+
+ unsigned compareOp;
+
+ switch (CC->get()) {
+ case ISD::SETOEQ:
+ case ISD::SETUEQ:
+ compareOp = ISD::SETEQ; break;
+ case ISD::SETOGT:
+ case ISD::SETUGT:
+ compareOp = ISD::SETGT; break;
+ case ISD::SETOGE:
+ case ISD::SETUGE:
+ compareOp = ISD::SETGE; break;
+ case ISD::SETOLT:
+ case ISD::SETULT:
+ compareOp = ISD::SETLT; break;
+ case ISD::SETOLE:
+ case ISD::SETULE:
+ compareOp = ISD::SETLE; break;
+ case ISD::SETUNE:
+ case ISD::SETONE:
+ compareOp = ISD::SETNE; break;
+ default:
+ llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
+ }
+
+ SDValue result =
+ DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
+ (ISD::CondCode) compareOp);
+
+ if ((CC->get() & 0x8) == 0) {
+ // Ordered comparison:
+ SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
+ lhs, DAG.getConstantFP(0.0, MVT::f64),
+ ISD::SETO);
+ SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
+ rhs, DAG.getConstantFP(0.0, MVT::f64),
+ ISD::SETO);
+ SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
+
+ result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
+ }
+
+ return result;
+}
+
+//! Lower ISD::SELECT_CC
+/*!
+ ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
+ SELB instruction.
+
+ \note Need to revisit this in the future: if the code path through the true
+ and false value computations is longer than the latency of a branch (6
+ cycles), then it would be more advantageous to branch and insert a new basic
+ block and branch on the condition. However, this code does not make that
+ assumption, given the simplisitc uses so far.
+ */
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT = Op.getValueType();
+ SDValue lhs = Op.getOperand(0);
+ SDValue rhs = Op.getOperand(1);
+ SDValue trueval = Op.getOperand(2);
+ SDValue falseval = Op.getOperand(3);
+ SDValue condition = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // NOTE: SELB's arguments: $rA, $rB, $mask
+ //
+ // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
+ // where bits in $mask are 1. CCond will be inverted, having 1s where the
+ // condition was true and 0s where the condition was false. Hence, the
+ // arguments to SELB get reversed.
+
+ // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
+ // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
+ // with another "cannot select select_cc" assert:
+
+ SDValue compare = DAG.getNode(ISD::SETCC, dl,
+ TLI.getSetCCResultType(Op.getValueType()),
+ lhs, rhs, condition);
+ return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
+}
+
+//! Custom lower ISD::TRUNCATE
+static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
+{
+ // Type to truncate to
+ EVT VT = Op.getValueType();
+ MVT simpleVT = VT.getSimpleVT();
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
+ VT, (128 / VT.getSizeInBits()));
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Type to truncate from
+ SDValue Op0 = Op.getOperand(0);
+ EVT Op0VT = Op0.getValueType();
+
+ if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
+ // Create shuffle mask, least significant doubleword of quadword
+ unsigned maskHigh = 0x08090a0b;
+ unsigned maskLow = 0x0c0d0e0f;
+ // Use a shuffle to perform the truncation
+ SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32),
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32));
+
+ SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
+ Op0, Op0, shufMask);
+
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
+ }
+
+ return SDValue(); // Leave the truncate unmolested
+}
+
+/*!
+ * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
+ * algorithm is to duplicate the sign bit using rotmai to generate at
+ * least one byte full of sign bits. Then propagate the "sign-byte" into
+ * the leftmost words and the i64/i32 into the rightmost words using shufb.
+ *
+ * @param Op The sext operand
+ * @param DAG The current DAG
+ * @return The SDValue with the entire instruction sequence
+ */
+static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
+{
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Type to extend to
+ MVT OpVT = Op.getValueType().getSimpleVT();
+
+ // Type to extend from
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType().getSimpleVT();
+
+ // The type to extend to needs to be a i128 and
+ // the type to extend from needs to be i64 or i32.
+ assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
+ "LowerSIGN_EXTEND: input and/or output operand have wrong size");
+
+ // Create shuffle mask
+ unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
+ unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11
+ unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
+ SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ DAG.getConstant(mask1, MVT::i32),
+ DAG.getConstant(mask1, MVT::i32),
+ DAG.getConstant(mask2, MVT::i32),
+ DAG.getConstant(mask3, MVT::i32));
+
+ // Word wise arithmetic right shift to generate at least one byte
+ // that contains sign bits.
+ MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
+ SDValue sraVal = DAG.getNode(ISD::SRA,
+ dl,
+ mvt,
+ DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
+ DAG.getConstant(31, MVT::i32));
+
+ // Shuffle bytes - Copy the sign bits into the upper 64 bits
+ // and the input value into the lower 64 bits.
+ SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
+ DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
+
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
+}
+
+//! Custom (target-specific) lowering entry point
+/*!
+ This is where LLVM's DAG selection process calls to do target-specific
+ lowering of nodes.
+ */
SDValue
SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
{
unsigned Opc = (unsigned) Op.getOpcode();
- MVT VT = Op.getValueType();
+ EVT VT = Op.getValueType();
switch (Opc) {
default: {
- cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
- cerr << "Op.getOpcode() = " << Opc << "\n";
- cerr << "*Op.getNode():\n";
+#ifndef NDEBUG
+ errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
+ errs() << "Op.getOpcode() = " << Opc << "\n";
+ errs() << "*Op.getNode():\n";
Op.getNode()->dump();
- abort();
+#endif
+ llvm_unreachable(0);
}
case ISD::LOAD:
+ case ISD::EXTLOAD:
case ISD::SEXTLOAD:
case ISD::ZEXTLOAD:
return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
case ISD::JumpTable:
return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
- case ISD::Constant:
- return LowerConstant(Op, DAG);
case ISD::ConstantFP:
return LowerConstantFP(Op, DAG);
- case ISD::BRCOND:
- return LowerBRCOND(Op, DAG);
- case ISD::FORMAL_ARGUMENTS:
- return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
- case ISD::CALL:
- return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
- case ISD::RET:
- return LowerRET(Op, DAG, getTargetMachine());
-
// i8, i64 math ops:
- case ISD::ZERO_EXTEND:
- case ISD::SIGN_EXTEND:
- case ISD::ANY_EXTEND:
case ISD::ADD:
case ISD::SUB:
case ISD::ROTR:
case ISD::SHL:
case ISD::SRA: {
if (VT == MVT::i8)
- return LowerI8Math(Op, DAG, Opc);
- else if (VT == MVT::i64)
- return LowerI64Math(Op, DAG, Opc);
+ return LowerI8Math(Op, DAG, Opc, *this);
break;
}
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ return LowerFP_TO_INT(Op, DAG, *this);
+
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ return LowerINT_TO_FP(Op, DAG, *this);
+
// Vector-related lowering.
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG);
// Vector and i8 multiply:
case ISD::MUL:
- if (VT.isVector())
- return LowerVectorMUL(Op, DAG);
- else if (VT == MVT::i8)
- return LowerI8Math(Op, DAG, Opc);
- else
- return LowerMUL(Op, DAG, VT, Opc);
-
- case ISD::FDIV:
- if (VT == MVT::f32 || VT == MVT::v4f32)
- return LowerFDIVf32(Op, DAG);
-// else if (Op.getValueType() == MVT::f64)
-// return LowerFDIVf64(Op, DAG);
- else
- assert(0 && "Calling FDIV on unsupported MVT");
+ if (VT == MVT::i8)
+ return LowerI8Math(Op, DAG, Opc, *this);
case ISD::CTPOP:
return LowerCTPOP(Op, DAG);
+
+ case ISD::SELECT_CC:
+ return LowerSELECT_CC(Op, DAG, *this);
+
+ case ISD::SETCC:
+ return LowerSETCC(Op, DAG, *this);
+
+ case ISD::TRUNCATE:
+ return LowerTRUNCATE(Op, DAG);
+
+ case ISD::SIGN_EXTEND:
+ return LowerSIGN_EXTEND(Op, DAG);
}
return SDValue();
}
-SDNode *SPUTargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG)
+void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG)
{
#if 0
unsigned Opc = (unsigned) N->getOpcode();
- MVT OpVT = N->getValueType(0);
+ EVT OpVT = N->getValueType(0);
switch (Opc) {
default: {
- cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
- cerr << "Op.getOpcode() = " << Opc << "\n";
- cerr << "*Op.getNode():\n";
+ errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
+ errs() << "Op.getOpcode() = " << Opc << "\n";
+ errs() << "*Op.getNode():\n";
N->dump();
abort();
/*NOTREACHED*/
#endif
/* Otherwise, return unchanged */
- return 0;
}
//===----------------------------------------------------------------------===//
#endif
const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
SelectionDAG &DAG = DCI.DAG;
- SDValue Op0 = N->getOperand(0); // everything has at least one operand
- SDValue Result; // Initially, NULL result
+ SDValue Op0 = N->getOperand(0); // everything has at least one operand
+ EVT NodeVT = N->getValueType(0); // The node's value type
+ EVT Op0VT = Op0.getValueType(); // The first operand's result
+ SDValue Result; // Initially, empty result
+ DebugLoc dl = N->getDebugLoc();
switch (N->getOpcode()) {
default: break;
case ISD::ADD: {
SDValue Op1 = N->getOperand(1);
- if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
- SDValue Op01 = Op0.getOperand(1);
- if (Op01.getOpcode() == ISD::Constant
- || Op01.getOpcode() == ISD::TargetConstant) {
- // (add <const>, (SPUindirect <arg>, <const>)) ->
- // (SPUindirect <arg>, <const + const>)
- ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
- ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
- SDValue combinedConst =
- DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
- Op0.getValueType());
-
- DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
- << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
- DEBUG(cerr << "With: (SPUindirect <arg>, "
- << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
- return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
- Op0.getOperand(0), combinedConst);
+ if (Op0.getOpcode() == SPUISD::IndirectAddr
+ || Op1.getOpcode() == SPUISD::IndirectAddr) {
+ // Normalize the operands to reduce repeated code
+ SDValue IndirectArg = Op0, AddArg = Op1;
+
+ if (Op1.getOpcode() == SPUISD::IndirectAddr) {
+ IndirectArg = Op1;
+ AddArg = Op0;
}
- } else if (isa<ConstantSDNode>(Op0)
- && Op1.getOpcode() == SPUISD::IndirectAddr) {
- SDValue Op11 = Op1.getOperand(1);
- if (Op11.getOpcode() == ISD::Constant
- || Op11.getOpcode() == ISD::TargetConstant) {
- // (add (SPUindirect <arg>, <const>), <const>) ->
- // (SPUindirect <arg>, <const + const>)
- ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
- ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
- SDValue combinedConst =
- DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
- Op0.getValueType());
-
- DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
- << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
- DEBUG(cerr << "With: (SPUindirect <arg>, "
- << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
-
- return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
- Op1.getOperand(0), combinedConst);
+
+ if (isa<ConstantSDNode>(AddArg)) {
+ ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
+ SDValue IndOp1 = IndirectArg.getOperand(1);
+
+ if (CN0->isNullValue()) {
+ // (add (SPUindirect <arg>, <arg>), 0) ->
+ // (SPUindirect <arg>, <arg>)
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "\n"
+ << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
+ << "With: (SPUindirect <arg>, <arg>)\n";
+ }
+#endif
+
+ return IndirectArg;
+ } else if (isa<ConstantSDNode>(IndOp1)) {
+ // (add (SPUindirect <arg>, <const>), <const>) ->
+ // (SPUindirect <arg>, <const + const>)
+ ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
+ int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
+ SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "\n"
+ << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
+ << "), " << CN0->getSExtValue() << ")\n"
+ << "With: (SPUindirect <arg>, "
+ << combinedConst << ")\n";
+ }
+#endif
+
+ return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
+ IndirectArg, combinedValue);
+ }
}
}
break;
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: {
- if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
- N->getValueType(0) == Op0.getValueType()) {
+ if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
// (any_extend (SPUextract_elt0 <arg>)) ->
// (SPUextract_elt0 <arg>)
// Types must match, however...
- DEBUG(cerr << "Replace: ");
- DEBUG(N->dump(&DAG));
- DEBUG(cerr << "\nWith: ");
- DEBUG(Op0.getNode()->dump(&DAG));
- DEBUG(cerr << "\n");
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "\nReplace: ";
+ N->dump(&DAG);
+ errs() << "\nWith: ";
+ Op0.getNode()->dump(&DAG);
+ errs() << "\n";
+ }
+#endif
return Op0;
}
}
case SPUISD::IndirectAddr: {
if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
- ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
- if (CN->getZExtValue() == 0) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (CN != 0 && CN->getZExtValue() == 0) {
// (SPUindirect (SPUaform <addr>, 0), 0) ->
// (SPUaform <addr>, 0)
- DEBUG(cerr << "Replace: ");
+ DEBUG(errs() << "Replace: ");
DEBUG(N->dump(&DAG));
- DEBUG(cerr << "\nWith: ");
+ DEBUG(errs() << "\nWith: ");
DEBUG(Op0.getNode()->dump(&DAG));
- DEBUG(cerr << "\n");
+ DEBUG(errs() << "\n");
return Op0;
}
+ } else if (Op0.getOpcode() == ISD::ADD) {
+ SDValue Op1 = N->getOperand(1);
+ if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
+ // (SPUindirect (add <arg>, <arg>), 0) ->
+ // (SPUindirect <arg>, <arg>)
+ if (CN1->isNullValue()) {
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "\n"
+ << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
+ << "With: (SPUindirect <arg>, <arg>)\n";
+ }
+#endif
+
+ return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
+ Op0.getOperand(0), Op0.getOperand(1));
+ }
+ }
}
break;
}
case SPUISD::SHLQUAD_L_BITS:
case SPUISD::SHLQUAD_L_BYTES:
- case SPUISD::VEC_SHL:
- case SPUISD::VEC_SRL:
- case SPUISD::VEC_SRA:
- case SPUISD::ROTQUAD_RZ_BYTES:
- case SPUISD::ROTQUAD_RZ_BITS: {
+ case SPUISD::ROTBYTES_LEFT: {
SDValue Op1 = N->getOperand(1);
- if (isa<ConstantSDNode>(Op1)) {
- // Kill degenerate vector shifts:
- ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
-
- if (CN->getZExtValue() == 0) {
+ // Kill degenerate vector shifts:
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
+ if (CN->isNullValue()) {
Result = Op0;
}
}
break;
}
- case SPUISD::PROMOTE_SCALAR: {
+ case SPUISD::PREFSLOT2VEC: {
switch (Op0.getOpcode()) {
default:
break;
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND: {
- // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
+ // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
// <arg>
- // but only if the SPUpromote_scalar and <arg> types match.
+ // but only if the SPUprefslot2vec and <arg> types match.
SDValue Op00 = Op0.getOperand(0);
- if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
+ if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
SDValue Op000 = Op00.getOperand(0);
- if (Op000.getValueType() == N->getValueType(0)) {
+ if (Op000.getValueType() == NodeVT) {
Result = Op000;
}
}
break;
}
- case SPUISD::EXTRACT_ELT0: {
- // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
+ case SPUISD::VEC2PREFSLOT: {
+ // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
// <arg>
Result = Op0.getOperand(0);
break;
break;
}
}
+
// Otherwise, return unchanged.
-#if 1
+#ifndef NDEBUG
if (Result.getNode()) {
- DEBUG(cerr << "\nReplace.SPU: ");
+ DEBUG(errs() << "\nReplace.SPU: ");
DEBUG(N->dump(&DAG));
- DEBUG(cerr << "\nWith: ");
+ DEBUG(errs() << "\nWith: ");
DEBUG(Result.getNode()->dump(&DAG));
- DEBUG(cerr << "\n");
+ DEBUG(errs() << "\n");
}
#endif
std::pair<unsigned, const TargetRegisterClass*>
SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
- MVT VT) const
+ EVT VT) const
{
if (Constraint.size() == 1) {
// GCC RS6000 Constraint Letters
const SelectionDAG &DAG,
unsigned Depth ) const {
#if 0
- const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
-#endif
+ const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
switch (Op.getOpcode()) {
default:
// KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
break;
-
-#if 0
case CALL:
case SHUFB:
- case INSERT_MASK:
+ case SHUFFLE_MASK:
case CNTB:
-#endif
-
- case SPUISD::PROMOTE_SCALAR: {
- SDValue Op0 = Op.getOperand(0);
- MVT Op0VT = Op0.getValueType();
- unsigned Op0VTBits = Op0VT.getSizeInBits();
- uint64_t InMask = Op0VT.getIntegerVTBitMask();
- KnownZero |= APInt(Op0VTBits, ~InMask, false);
- KnownOne |= APInt(Op0VTBits, InMask, false);
- break;
- }
-
+ case SPUISD::PREFSLOT2VEC:
case SPUISD::LDRESULT:
- case SPUISD::EXTRACT_ELT0:
- case SPUISD::EXTRACT_ELT0_CHAINED: {
- MVT OpVT = Op.getValueType();
- unsigned OpVTBits = OpVT.getSizeInBits();
- uint64_t InMask = OpVT.getIntegerVTBitMask();
- KnownZero |= APInt(OpVTBits, ~InMask, false);
- KnownOne |= APInt(OpVTBits, InMask, false);
- break;
- }
-
-#if 0
- case EXTRACT_I1_ZEXT:
- case EXTRACT_I1_SEXT:
- case EXTRACT_I8_ZEXT:
- case EXTRACT_I8_SEXT:
- case MPY:
- case MPYU:
- case MPYH:
- case MPYHH:
+ case SPUISD::VEC2PREFSLOT:
case SPUISD::SHLQUAD_L_BITS:
case SPUISD::SHLQUAD_L_BYTES:
- case SPUISD::VEC_SHL:
- case SPUISD::VEC_SRL:
- case SPUISD::VEC_SRA:
case SPUISD::VEC_ROTL:
case SPUISD::VEC_ROTR:
- case SPUISD::ROTQUAD_RZ_BYTES:
- case SPUISD::ROTQUAD_RZ_BITS:
- case SPUISD::ROTBYTES_RIGHT_S:
case SPUISD::ROTBYTES_LEFT:
- case SPUISD::ROTBYTES_LEFT_CHAINED:
case SPUISD::SELECT_MASK:
case SPUISD::SELB:
- case SPUISD::FPInterp:
- case SPUISD::FPRecipEst:
- case SPUISD::SEXT32TO64:
+ }
#endif
+}
+
+unsigned
+SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ switch (Op.getOpcode()) {
+ default:
+ return 1;
+
+ case ISD::SETCC: {
+ EVT VT = Op.getValueType();
+
+ if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
+ VT = MVT::i32;
+ }
+ return VT.getSizeInBits();
+ }
}
}