//! EVT mapping to useful data for Cell SPU
struct valtype_map_s {
- const EVT valtype;
- const int prefslot_byte;
+ EVT valtype;
+ int prefslot_byte;
};
const valtype_map_s valtype_map[] = {
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
EVT ArgVT = Op.getOperand(i).getValueType();
- const Type *ArgTy = ArgVT.getTypeForEVT();
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Op.getOperand(i);
Entry.Ty = ArgTy;
Entry.isSExt = isSigned;
TLI.getPointerTy());
// Splice the libcall in wherever FindInputOutputChains tells us to.
- const Type *RetTy = Op.getNode()->getValueType(0).getTypeForEVT();
+ const Type *RetTy =
+ Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
- 0, CallingConv::C, false,
+ 0, TLI.getLibcallCallingConv(LC), false,
/*isReturnValueUsed=*/true,
- Callee, Args, DAG,
- Op.getDebugLoc());
+ Callee, Args, DAG, Op.getDebugLoc(),
+ DAG.GetOrdering(InChain.getNode()));
return CallInfo.first;
}
// Custom lower i128 -> i64 truncates
setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
+ // Custom lower i32/i64 -> i128 sign extend
+ setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
+
setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- // Support label based line numbers.
- setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
- setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
-
// We want to legalize GlobalAddress and ConstantPool nodes into the
// appropriate instructions to materialize the address.
for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
- node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
- node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
- node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
// Convert the loaded v16i8 vector to the appropriate vector type
// specified by the operand:
- EVT vecVT = EVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
+ EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT, (128 / InVT.getSizeInBits()));
result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
switch (SN->getAddressingMode()) {
case ISD::UNINDEXED: {
// The vector type we really want to load from the 16-byte chunk.
- EVT vecVT = EVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
- stVecVT = EVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
+ EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+ VT, (128 / VT.getSizeInBits()));
SDValue alignLoadVec;
SDValue basePtr = SN->getBasePtr();
// to the stack pointer, which is always aligned.
#if !defined(NDEBUG)
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- cerr << "CellSPU LowerSTORE: basePtr = ";
+ errs() << "CellSPU LowerSTORE: basePtr = ";
basePtr.getNode()->dump(&DAG);
- cerr << "\n";
+ errs() << "\n";
}
#endif
const SDValue ¤tRoot = DAG.getRoot();
DAG.setRoot(result);
- cerr << "------- CellSPU:LowerStore result:\n";
+ errs() << "------- CellSPU:LowerStore result:\n";
DAG.dump();
- cerr << "-------\n";
+ errs() << "-------\n";
DAG.setRoot(currentRoot);
}
#endif
SDValue
SPUTargetLowering::LowerFormalArguments(SDValue Chain,
- unsigned CallConv, bool isVarArg,
+ CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
&Ins,
DebugLoc dl, SelectionDAG &DAG,
// We need to load the argument to a virtual register if we determined
// above that we ran out of physical registers of the appropriate type
// or we're forced to do vararg
- int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
+ int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
ArgOffset += StackSlotSize;
// Create the frame slot
for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
- VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
+ VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
+ true, false);
SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
}
SDValue
-SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- unsigned CallConv, bool isVarArg,
- bool isTailCall,
+SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const Type *RetTy,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) {
+ // CellSPU target does not yet support tail call optimization.
+ isTailCall = false;
const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
unsigned NumOps = Outs.size();
// Handy pointer type
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- // Accumulate how many bytes are to be pushed on the stack, including the
- // linkage area, and parameter passing area. According to the SPU ABI,
- // we minimally need space for [LR] and [SP]
- unsigned NumStackBytes = SPUFrameInfo::minStackSize();
-
// Set up a copy of the stack pointer for use loading and storing any
// arguments that may not fit in the registers available for argument
// passing.
}
}
- // Update number of stack bytes actually used, insert a call sequence start
- NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
+ // Accumulate how many bytes are to be pushed on the stack, including the
+ // linkage area, and parameter passing area. According to the SPU ABI,
+ // we minimally need space for [LR] and [SP].
+ unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
+
+ // Insert a call sequence start
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
true));
SDValue
SPUTargetLowering::LowerReturn(SDValue Chain,
- unsigned CallConv, bool isVarArg,
+ CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
DebugLoc dl, SelectionDAG &DAG) {
assert(prefslot_begin != -1 && prefslot_end != -1 &&
"LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
- unsigned int ShufBytes[16];
+ unsigned int ShufBytes[16] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
for (int i = 0; i < 16; ++i) {
// zero fill uppper part of preferred slot, don't care about the
// other slots:
*/
static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
- EVT vecVT = EVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+ EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+ VT, (128 / VT.getSizeInBits()));
DebugLoc dl = Op.getDebugLoc();
switch (VT.getSimpleVT().SimpleTy) {
// Type to truncate to
EVT VT = Op.getValueType();
MVT simpleVT = VT.getSimpleVT();
- EVT VecVT = EVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
+ VT, (128 / VT.getSizeInBits()));
DebugLoc dl = Op.getDebugLoc();
// Type to truncate from
return SDValue(); // Leave the truncate unmolested
}
+/*!
+ * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
+ * algorithm is to duplicate the sign bit using rotmai to generate at
+ * least one byte full of sign bits. Then propagate the "sign-byte" into
+ * the leftmost words and the i64/i32 into the rightmost words using shufb.
+ *
+ * @param Op The sext operand
+ * @param DAG The current DAG
+ * @return The SDValue with the entire instruction sequence
+ */
+static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
+{
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Type to extend to
+ MVT OpVT = Op.getValueType().getSimpleVT();
+
+ // Type to extend from
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType().getSimpleVT();
+
+ // The type to extend to needs to be a i128 and
+ // the type to extend from needs to be i64 or i32.
+ assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
+ "LowerSIGN_EXTEND: input and/or output operand have wrong size");
+
+ // Create shuffle mask
+ unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
+ unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11
+ unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
+ SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ DAG.getConstant(mask1, MVT::i32),
+ DAG.getConstant(mask1, MVT::i32),
+ DAG.getConstant(mask2, MVT::i32),
+ DAG.getConstant(mask3, MVT::i32));
+
+ // Word wise arithmetic right shift to generate at least one byte
+ // that contains sign bits.
+ MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
+ SDValue sraVal = DAG.getNode(ISD::SRA,
+ dl,
+ mvt,
+ DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
+ DAG.getConstant(31, MVT::i32));
+
+ // Shuffle bytes - Copy the sign bits into the upper 64 bits
+ // and the input value into the lower 64 bits.
+ SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
+ DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
+
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
+}
+
//! Custom (target-specific) lowering entry point
/*!
This is where LLVM's DAG selection process calls to do target-specific
switch (Opc) {
default: {
#ifndef NDEBUG
- cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
- cerr << "Op.getOpcode() = " << Opc << "\n";
- cerr << "*Op.getNode():\n";
+ errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
+ errs() << "Op.getOpcode() = " << Opc << "\n";
+ errs() << "*Op.getNode():\n";
Op.getNode()->dump();
#endif
llvm_unreachable(0);
case ISD::TRUNCATE:
return LowerTRUNCATE(Op, DAG);
+
+ case ISD::SIGN_EXTEND:
+ return LowerSIGN_EXTEND(Op, DAG);
}
return SDValue();
switch (Opc) {
default: {
- cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
- cerr << "Op.getOpcode() = " << Opc << "\n";
- cerr << "*Op.getNode():\n";
+ errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
+ errs() << "Op.getOpcode() = " << Opc << "\n";
+ errs() << "*Op.getNode():\n";
N->dump();
abort();
/*NOTREACHED*/
#if !defined(NDEBUG)
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- cerr << "\n"
+ errs() << "\n"
<< "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
<< "With: (SPUindirect <arg>, <arg>)\n";
}
#if !defined(NDEBUG)
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- cerr << "\n"
+ errs() << "\n"
<< "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
<< "), " << CN0->getSExtValue() << ")\n"
<< "With: (SPUindirect <arg>, "
// Types must match, however...
#if !defined(NDEBUG)
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- cerr << "\nReplace: ";
+ errs() << "\nReplace: ";
N->dump(&DAG);
- cerr << "\nWith: ";
+ errs() << "\nWith: ";
Op0.getNode()->dump(&DAG);
- cerr << "\n";
+ errs() << "\n";
}
#endif
// (SPUindirect (SPUaform <addr>, 0), 0) ->
// (SPUaform <addr>, 0)
- DEBUG(cerr << "Replace: ");
+ DEBUG(errs() << "Replace: ");
DEBUG(N->dump(&DAG));
- DEBUG(cerr << "\nWith: ");
+ DEBUG(errs() << "\nWith: ");
DEBUG(Op0.getNode()->dump(&DAG));
- DEBUG(cerr << "\n");
+ DEBUG(errs() << "\n");
return Op0;
}
#if !defined(NDEBUG)
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
- cerr << "\n"
+ errs() << "\n"
<< "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
<< "With: (SPUindirect <arg>, <arg>)\n";
}
}
case SPUISD::SHLQUAD_L_BITS:
case SPUISD::SHLQUAD_L_BYTES:
- case SPUISD::VEC_SHL:
- case SPUISD::VEC_SRL:
- case SPUISD::VEC_SRA:
case SPUISD::ROTBYTES_LEFT: {
SDValue Op1 = N->getOperand(1);
// Otherwise, return unchanged.
#ifndef NDEBUG
if (Result.getNode()) {
- DEBUG(cerr << "\nReplace.SPU: ");
+ DEBUG(errs() << "\nReplace.SPU: ");
DEBUG(N->dump(&DAG));
- DEBUG(cerr << "\nWith: ");
+ DEBUG(errs() << "\nWith: ");
DEBUG(Result.getNode()->dump(&DAG));
- DEBUG(cerr << "\n");
+ DEBUG(errs() << "\n");
}
#endif
case SPUISD::VEC2PREFSLOT:
case SPUISD::SHLQUAD_L_BITS:
case SPUISD::SHLQUAD_L_BYTES:
- case SPUISD::VEC_SHL:
- case SPUISD::VEC_SRL:
- case SPUISD::VEC_SRA:
case SPUISD::VEC_ROTL:
case SPUISD::VEC_ROTR:
case SPUISD::ROTBYTES_LEFT: