//! MVT mapping to useful data for Cell SPU
struct valtype_map_s {
- const MVT valtype;
- const int prefslot_byte;
+ const MVT valtype;
+ const int prefslot_byte;
};
const valtype_map_s valtype_map[] = {
addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
- // Initialize libcalls:
- setLibcallName(RTLIB::MUL_I64, "__muldi3");
-
// SPU has no sign or zero extended loads for i1, i8, i16:
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Custom);
+
// SPU constant load actions are custom lowered:
setOperationAction(ISD::Constant, MVT::i64, Custom);
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::STORE, VT, Custom);
}
- // Custom lower BRCOND for i1, i8 to "promote" the result to
- // i32 and i16, respectively.
+ // Custom lower BRCOND for i8 to "promote" the result to i16
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
// Expand the jumptable branches
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+
+ // Custom lower SELECT_CC for most cases, but expand by default
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+#if 0
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+#endif
// SPU has no intrinsics for these particular operations:
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
setOperationAction(ISD::SHL, MVT::i8, Custom);
setOperationAction(ISD::SRL, MVT::i8, Custom);
setOperationAction(ISD::SRA, MVT::i8, Custom);
- // And SPU needs custom lowering for shift left/right for i64
+
+ // SPU needs custom lowering for shift left/right for i64
setOperationAction(ISD::SHL, MVT::i64, Custom);
setOperationAction(ISD::SRL, MVT::i64, Custom);
setOperationAction(ISD::SRA, MVT::i64, Custom);
// Custom lower i8, i32 and i64 multiplications
setOperationAction(ISD::MUL, MVT::i8, Custom);
setOperationAction(ISD::MUL, MVT::i32, Custom);
- setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
+
+ // SMUL_LOHI, UMUL_LOHI
+#if 0
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+#endif
// Need to custom handle (some) common i8, i64 math ops
setOperationAction(ISD::ADD, MVT::i64, Custom);
// SPU has a version of select that implements (a&~c)|(b&c), just like
// select ought to work:
- setOperationAction(ISD::SELECT, MVT::i1, Promote);
setOperationAction(ISD::SELECT, MVT::i8, Legal);
setOperationAction(ISD::SELECT, MVT::i16, Legal);
setOperationAction(ISD::SELECT, MVT::i32, Legal);
setOperationAction(ISD::SELECT, MVT::i64, Expand);
- setOperationAction(ISD::SETCC, MVT::i1, Promote);
setOperationAction(ISD::SETCC, MVT::i8, Legal);
setOperationAction(ISD::SETCC, MVT::i16, Legal);
setOperationAction(ISD::SETCC, MVT::i32, Legal);
setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
+ // Custom lower truncates
+ setOperationAction(ISD::TRUNCATE, MVT::i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
+
// SPU has a legal FP -> signed INT instruction
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
// We want to legalize GlobalAddress and ConstantPool nodes into the
// appropriate instructions to materialize the address.
- for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
+ for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
++sctype) {
MVT VT = (MVT::SimpleValueType)sctype;
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
setShiftAmountType(MVT::i32);
- setSetCCResultContents(ZeroOrOneSetCCResult);
+ setBooleanContents(ZeroOrOneBooleanContent);
setStackPointerRegisterToSaveRestore(SPU::R1);
setTargetDAGCombine(ISD::ANY_EXTEND);
computeRegisterProperties();
+
+ // Set pre-RA register scheduler default to BURR, which produces slightly
+ // better code than the default (could also be TDRR, but TargetLowering.h
+ // needs a mod to support that model):
+ setSchedulingPreference(SchedulingForRegPressure);
}
const char *
node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
- node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
+ node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
- node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
- node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
- = "SPUISD::EXTRACT_ELT0_CHAINED";
- node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
- node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
- node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
- node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
+ node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
"SPUISD::ROTQUAD_RZ_BYTES";
node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
"SPUISD::ROTQUAD_RZ_BITS";
- node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
- "SPUISD::ROTBYTES_RIGHT_S";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
- node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
- "SPUISD::ROTBYTES_LEFT_CHAINED";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
"SPUISD::ROTBYTES_LEFT_BITS";
node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
prefSlotOffs = (int) (alignOffs & 0xf);
prefSlotOffs -= vtm->prefslot_byte;
- basePtr = DAG.getRegister(SPU::R1, VT);
} else {
alignOffs = 0;
prefSlotOffs = -vtm->prefslot_byte;
/*!
All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
within a 16-byte block, we have to rotate to extract the requested element.
- */
+
+ For extending loads, we also want to ensure that the following sequence is
+ emitted, e.g. for MVT::f32 extending load to MVT::f64:
+
+\verbatim
+%1 v16i8,ch = load
+%2 v16i8,ch = rotate %1
+%3 v4f8, ch = bitconvert %2
+%4 f32 = vec2perfslot %3
+%5 f64 = fp_extend %4
+\endverbatim
+*/
static SDValue
LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
LoadSDNode *LN = cast<LoadSDNode>(Op);
SDValue the_chain = LN->getChain();
- MVT VT = LN->getMemoryVT();
- MVT OpVT = Op.getNode()->getValueType(0);
+ MVT InVT = LN->getMemoryVT();
+ MVT OutVT = Op.getValueType();
ISD::LoadExtType ExtType = LN->getExtensionType();
unsigned alignment = LN->getAlignment();
SDValue Ops[8];
int offset, rotamt;
bool was16aligned;
SDValue result =
- AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
+ AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, InVT,
+ was16aligned);
if (result.getNode() == 0)
return result;
if (rotamt != 0 || !was16aligned) {
SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
- Ops[0] = the_chain;
- Ops[1] = result;
+ Ops[0] = result;
if (was16aligned) {
- Ops[2] = DAG.getConstant(rotamt, MVT::i16);
+ Ops[1] = DAG.getConstant(rotamt, MVT::i16);
} else {
MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
LoadSDNode *LN1 = cast<LoadSDNode>(result);
- Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
+ Ops[1] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
DAG.getConstant(rotamt, PtrVT));
}
- result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
- the_chain = result.getValue(1);
+ result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8, Ops, 2);
}
- if (VT == OpVT || ExtType == ISD::EXTLOAD) {
- SDVTList scalarvts;
- MVT vecVT = MVT::v16i8;
+ // Convert the loaded v16i8 vector to the appropriate vector type
+ // specified by the operand:
+ MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
+ result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
+ DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
- // Convert the loaded v16i8 vector to the appropriate vector type
- // specified by the operand:
- if (OpVT == VT) {
- if (VT != MVT::i1)
- vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
- } else
- vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+ // Handle extending loads by extending the scalar result:
+ if (ExtType == ISD::SEXTLOAD) {
+ result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
+ } else if (ExtType == ISD::ZEXTLOAD) {
+ result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
+ } else if (ExtType == ISD::EXTLOAD) {
+ unsigned NewOpc = ISD::ANY_EXTEND;
- Ops[0] = the_chain;
- Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
- scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
- result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
- the_chain = result.getValue(1);
- } else {
- // Handle the sign and zero-extending loads for i1 and i8:
- unsigned NewOpC;
+ if (OutVT.isFloatingPoint())
+ NewOpc = ISD::FP_EXTEND;
- if (ExtType == ISD::SEXTLOAD) {
- NewOpC = (OpVT == MVT::i1
- ? SPUISD::EXTRACT_I1_SEXT
- : SPUISD::EXTRACT_I8_SEXT);
- } else {
- assert(ExtType == ISD::ZEXTLOAD);
- NewOpC = (OpVT == MVT::i1
- ? SPUISD::EXTRACT_I1_ZEXT
- : SPUISD::EXTRACT_I8_ZEXT);
- }
-
- result = DAG.getNode(NewOpC, OpVT, result);
+ result = DAG.getNode(NewOpc, OutVT, result);
}
- SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
+ SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
SDValue retops[2] = {
result,
the_chain
int chunk_offset, slot_offset;
bool was16aligned;
- // The vector type we really want to load from the 16-byte chunk, except
- // in the case of MVT::i1, which has to be v16i8.
+ // The vector type we really want to load from the 16-byte chunk.
MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
}
SDValue insertEltOp =
- DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
+ DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltPtr);
SDValue vectorizeOp =
DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
- result = DAG.getNode(SPUISD::SHUFB, vecVT, vectorizeOp, alignLoadVec,
- DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
+ result = DAG.getNode(SPUISD::SHUFB, vecVT,
+ vectorizeOp, alignLoadVec,
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
result = DAG.getStore(the_chain, result, basePtr,
LN->getSrcValue(), LN->getSrcValueOffset(),
LN->isVolatile(), LN->getAlignment());
-#if 0 && defined(NDEBUG)
+#if 0 && !defined(NDEBUG)
if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
const SDValue ¤tRoot = DAG.getRoot();
DAG.setRoot(currentRoot);
}
#endif
-
+
return result;
/*UNREACHED*/
}
static SDValue
LowerConstant(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
- ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
if (VT == MVT::i64) {
- SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
- return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
+ SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
} else {
cerr << "LowerConstant: unhandled constant type "
static SDValue
LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
- ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
-
- assert((FP != 0) &&
- "LowerConstantFP: Node is not ConstantFPSDNode");
if (VT == MVT::f64) {
+ ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
+
+ assert((FP != 0) &&
+ "LowerConstantFP: Node is not ConstantFPSDNode");
+
uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
- return DAG.getNode(ISD::BIT_CONVERT, VT,
- LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
+ SDValue T = DAG.getConstant(dbits, MVT::i64);
+ SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
}
return SDValue();
}
-//! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
+//! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
static SDValue
LowerBRCOND(SDValue Op, SelectionDAG &DAG)
{
MVT CondVT = Cond.getValueType();
MVT CondNVT;
- if (CondVT == MVT::i1 || CondVT == MVT::i8) {
- CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
+ if (CondVT == MVT::i8) {
+ CondNVT = MVT::i16;
return DAG.getNode(ISD::BRCOND, Op.getValueType(),
Op.getOperand(0),
DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
switch (ObjectVT.getSimpleVT()) {
default: {
- cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
- << ObjectVT.getMVTString()
- << "\n";
- abort();
+ cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
+ << ObjectVT.getMVTString()
+ << "\n";
+ abort();
}
case MVT::i8:
- ArgRegClass = &SPU::R8CRegClass;
- break;
+ ArgRegClass = &SPU::R8CRegClass;
+ break;
case MVT::i16:
- ArgRegClass = &SPU::R16CRegClass;
- break;
+ ArgRegClass = &SPU::R16CRegClass;
+ break;
case MVT::i32:
- ArgRegClass = &SPU::R32CRegClass;
- break;
+ ArgRegClass = &SPU::R32CRegClass;
+ break;
case MVT::i64:
- ArgRegClass = &SPU::R64CRegClass;
- break;
+ ArgRegClass = &SPU::R64CRegClass;
+ break;
case MVT::f32:
- ArgRegClass = &SPU::R32FPRegClass;
- break;
+ ArgRegClass = &SPU::R32FPRegClass;
+ break;
case MVT::f64:
- ArgRegClass = &SPU::R64FPRegClass;
- break;
+ ArgRegClass = &SPU::R64FPRegClass;
+ break;
case MVT::v2f64:
case MVT::v4f32:
case MVT::v2i64:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
- ArgRegClass = &SPU::VECREGRegClass;
- break;
+ ArgRegClass = &SPU::VECREGRegClass;
+ break;
}
unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
ArgValues.push_back(Root);
// Return the new list of results.
- return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
- ArgValues.size());
+ return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
+ &ArgValues[0], ArgValues.size());
}
/// isLSAAddress - Return the immediate to use if the specified
ArgOffset += StackSlotSize;
}
break;
+ case MVT::v2i64:
+ case MVT::v2f64:
case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
NumResults = 1;
break;
case MVT::v2f64:
+ case MVT::v2i64:
case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v2f64: {
uint64_t f64val = SplatBits;
assert(SplatSize == 8
- && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
+ && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
SDValue T = DAG.getConstant(f64val, MVT::i64);
return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
// specialized masks to replace any and all 0's, 0xff's and 0x80's.
// Detect if the upper or lower half is a special shuffle mask pattern:
- upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
- lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
+ upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
+ lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
// Create lower vector if not a special pattern
if (!lower_special) {
/// which the Cell can operate. The code inspects V3 to ascertain whether the
/// permutation vector, V3, is monotonically increasing with one "exception"
/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
-/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
+/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
/// In either case, the net result is going to eventually invoke SHUFB to
/// permute/shuffle the bytes from V1 and V2.
/// \note
-/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
+/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
/// control word for byte/halfword/word insertion. This takes care of a single
/// element move from V2 into V1.
/// \note
// If we have a single element being moved from V1 to V2, this can be handled
// using the C*[DX] compute mask instructions, but the vector elements have
// to be monotonically increasing with one exception element.
- MVT EltVT = V1.getValueType().getVectorElementType();
+ MVT VecVT = V1.getValueType();
+ MVT EltVT = VecVT.getVectorElementType();
unsigned EltsFromV2 = 0;
unsigned V2Elt = 0;
unsigned V2EltIdx0 = 0;
unsigned CurrElt = 0;
+ unsigned MaxElts = VecVT.getVectorNumElements();
+ unsigned PrevElt = 0;
+ unsigned V0Elt = 0;
bool monotonic = true;
- if (EltVT == MVT::i8)
+ bool rotate = true;
+
+ if (EltVT == MVT::i8) {
V2EltIdx0 = 16;
- else if (EltVT == MVT::i16)
+ } else if (EltVT == MVT::i16) {
V2EltIdx0 = 8;
- else if (EltVT == MVT::i32)
+ } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
V2EltIdx0 = 4;
- else
+ } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
+ V2EltIdx0 = 2;
+ } else
assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
- for (unsigned i = 0, e = PermMask.getNumOperands();
- EltsFromV2 <= 1 && monotonic && i != e;
- ++i) {
- unsigned SrcElt;
- if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
- SrcElt = 0;
- else
- SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
+ for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
+ if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
+ unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
- if (SrcElt >= V2EltIdx0) {
- ++EltsFromV2;
- V2Elt = (V2EltIdx0 - SrcElt) << 2;
- } else if (CurrElt != SrcElt) {
- monotonic = false;
- }
+ if (monotonic) {
+ if (SrcElt >= V2EltIdx0) {
+ if (1 >= (++EltsFromV2)) {
+ V2Elt = (V2EltIdx0 - SrcElt) << 2;
+ }
+ } else if (CurrElt != SrcElt) {
+ monotonic = false;
+ }
+
+ ++CurrElt;
+ }
- ++CurrElt;
+ if (rotate) {
+ if (PrevElt > 0 && SrcElt < MaxElts) {
+ if ((PrevElt == SrcElt - 1)
+ || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
+ PrevElt = SrcElt;
+ if (SrcElt == 0)
+ V0Elt = i;
+ } else {
+ rotate = false;
+ }
+ } else if (PrevElt == 0) {
+ // First time through, need to keep track of previous element
+ PrevElt = SrcElt;
+ } else {
+ // This isn't a rotation, takes elements from vector 2
+ rotate = false;
+ }
+ }
+ }
}
if (EltsFromV2 == 1 && monotonic) {
// Initialize temporary register to 0
SDValue InitTempReg =
DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
- // Copy register's contents as index in INSERT_MASK:
+ // Copy register's contents as index in SHUFFLE_MASK:
SDValue ShufMaskOp =
- DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
+ DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
DAG.getTargetConstant(V2Elt, MVT::i32),
DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
// Use shuffle mask in SHUFB synthetic instruction:
return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
+ } else if (rotate) {
+ int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
+
+ return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
+ V1, DAG.getConstant(rotamt, MVT::i16));
} else {
// Convert the SHUFFLE_VECTOR mask's input element units to the
// actual bytes.
}
SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
- &ResultMask[0], ResultMask.size());
+ &ResultMask[0], ResultMask.size());
return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
}
}
MVT VT = Op.getValueType();
SDValue N = Op.getOperand(0);
SDValue Elt = Op.getOperand(1);
- SDValue ShufMask[16];
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
+ SDValue retval;
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+ // Constant argument:
+ int EltNo = (int) C->getZExtValue();
+
+ // sanity checks:
+ if (VT == MVT::i8 && EltNo >= 16)
+ assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
+ else if (VT == MVT::i16 && EltNo >= 8)
+ assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
+ else if (VT == MVT::i32 && EltNo >= 4)
+ assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
+ else if (VT == MVT::i64 && EltNo >= 2)
+ assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
+
+ if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
+ // i32 and i64: Element 0 is the preferred slot
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
+ }
- assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
+ // Need to generate shuffle mask and extract:
+ int prefslot_begin = -1, prefslot_end = -1;
+ int elt_byte = EltNo * VT.getSizeInBits() / 8;
- int EltNo = (int) C->getZExtValue();
+ switch (VT.getSimpleVT()) {
+ default:
+ assert(false && "Invalid value type!");
+ case MVT::i8: {
+ prefslot_begin = prefslot_end = 3;
+ break;
+ }
+ case MVT::i16: {
+ prefslot_begin = 2; prefslot_end = 3;
+ break;
+ }
+ case MVT::i32:
+ case MVT::f32: {
+ prefslot_begin = 0; prefslot_end = 3;
+ break;
+ }
+ case MVT::i64:
+ case MVT::f64: {
+ prefslot_begin = 0; prefslot_end = 7;
+ break;
+ }
+ }
- // sanity checks:
- if (VT == MVT::i8 && EltNo >= 16)
- assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
- else if (VT == MVT::i16 && EltNo >= 8)
- assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
- else if (VT == MVT::i32 && EltNo >= 4)
- assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
- else if (VT == MVT::i64 && EltNo >= 2)
- assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
+ assert(prefslot_begin != -1 && prefslot_end != -1 &&
+ "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
+
+ unsigned int ShufBytes[16];
+ for (int i = 0; i < 16; ++i) {
+ // zero fill uppper part of preferred slot, don't care about the
+ // other slots:
+ unsigned int mask_val;
+ if (i <= prefslot_end) {
+ mask_val =
+ ((i < prefslot_begin)
+ ? 0x80
+ : elt_byte + (i - prefslot_begin));
+
+ ShufBytes[i] = mask_val;
+ } else
+ ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
+ }
- if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
- // i32 and i64: Element 0 is the preferred slot
- return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
- }
+ SDValue ShufMask[4];
+ for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
+ unsigned bidx = i * 4;
+ unsigned int bits = ((ShufBytes[bidx] << 24) |
+ (ShufBytes[bidx+1] << 16) |
+ (ShufBytes[bidx+2] << 8) |
+ ShufBytes[bidx+3]);
+ ShufMask[i] = DAG.getConstant(bits, MVT::i32);
+ }
- // Need to generate shuffle mask and extract:
- int prefslot_begin = -1, prefslot_end = -1;
- int elt_byte = EltNo * VT.getSizeInBits() / 8;
+ SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ &ShufMask[0],
+ sizeof(ShufMask) / sizeof(ShufMask[0]));
- switch (VT.getSimpleVT()) {
- default:
- assert(false && "Invalid value type!");
- case MVT::i8: {
- prefslot_begin = prefslot_end = 3;
- break;
- }
- case MVT::i16: {
- prefslot_begin = 2; prefslot_end = 3;
- break;
- }
- case MVT::i32:
- case MVT::f32: {
- prefslot_begin = 0; prefslot_end = 3;
- break;
- }
- case MVT::i64:
- case MVT::f64: {
- prefslot_begin = 0; prefslot_end = 7;
- break;
- }
- }
+ retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
+ DAG.getNode(SPUISD::SHUFB, N.getValueType(),
+ N, N, ShufMaskVec));
+ } else {
+ // Variable index: Rotate the requested element into slot 0, then replicate
+ // slot 0 across the vector
+ MVT VecVT = N.getValueType();
+ if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
+ cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
+ abort();
+ }
- assert(prefslot_begin != -1 && prefslot_end != -1 &&
- "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
+ // Make life easier by making sure the index is zero-extended to i32
+ if (Elt.getValueType() != MVT::i32)
+ Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
- for (int i = 0; i < 16; ++i) {
- // zero fill uppper part of preferred slot, don't care about the
- // other slots:
- unsigned int mask_val;
+ // Scale the index to a bit/byte shift quantity
+ APInt scaleFactor =
+ APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
+ unsigned scaleShift = scaleFactor.logBase2();
+ SDValue vecShift;
- if (i <= prefslot_end) {
- mask_val =
- ((i < prefslot_begin)
- ? 0x80
- : elt_byte + (i - prefslot_begin));
+ if (scaleShift > 0) {
+ // Scale the shift factor:
+ Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
+ DAG.getConstant(scaleShift, MVT::i32));
+ }
- ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
- } else
- ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
- }
+ vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
- SDValue ShufMaskVec =
- DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
- &ShufMask[0],
- sizeof(ShufMask) / sizeof(ShufMask[0]));
+ // Replicate the bytes starting at byte 0 across the entire vector (for
+ // consistency with the notion of a unified register set)
+ SDValue replicate;
+
+ switch (VT.getSimpleVT()) {
+ default:
+ cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
+ abort();
+ /*NOTREACHED*/
+ case MVT::i8: {
+ SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
+ factor, factor);
+ break;
+ }
+ case MVT::i16: {
+ SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
+ factor, factor);
+ break;
+ }
+ case MVT::i32:
+ case MVT::f32: {
+ SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
+ factor, factor);
+ break;
+ }
+ case MVT::i64:
+ case MVT::f64: {
+ SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
+ SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
+ loFactor, hiFactor);
+ break;
+ }
+ }
- return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
- DAG.getNode(SPUISD::SHUFB, N.getValueType(),
- N, N, ShufMaskVec));
+ retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
+ DAG.getNode(SPUISD::SHUFB, VecVT,
+ vecShift, vecShift, replicate));
+ }
+ return retval;
}
static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- // Use $2 because it's always 16-byte aligned and it's available:
- SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
+ // Use $sp ($1) because it's always 16-byte aligned and it's available:
+ SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ DAG.getRegister(SPU::R1, PtrVT),
+ DAG.getConstant(CN->getSExtValue(), PtrVT));
+ SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
SDValue result =
DAG.getNode(SPUISD::SHUFB, VT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
- VecOp,
- DAG.getNode(SPUISD::INSERT_MASK, VT,
- DAG.getNode(ISD::ADD, PtrVT,
- PtrBase,
- DAG.getConstant(CN->getZExtValue(),
- PtrVT))));
+ VecOp,
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
return result;
}
assert(Op0VT == MVT::i32
&& "CellSPU: Zero/sign extending something other than i32");
- DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
- unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
- ? SPUISD::ROTBYTES_RIGHT_S
- : SPUISD::ROTQUAD_RZ_BYTES);
- SDValue PromoteScalar =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
+ DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
- return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
- DAG.getNode(ISD::BIT_CONVERT, VecVT,
- DAG.getNode(NewOpc, Op0VecVT,
- PromoteScalar,
- DAG.getConstant(4, MVT::i32))));
+ SDValue PromoteScalar =
+ DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
+
+ if (Opc != ISD::SIGN_EXTEND) {
+ // Use a shuffle to zero extend the i32 to i64 directly:
+ SDValue shufMask =
+ DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
+ DAG.getConstant(0x80808080, MVT::i32),
+ DAG.getConstant(0x00010203, MVT::i32),
+ DAG.getConstant(0x80808080, MVT::i32),
+ DAG.getConstant(0x08090a0b, MVT::i32));
+ SDValue zextShuffle =
+ DAG.getNode(SPUISD::SHUFB, Op0VecVT,
+ PromoteScalar, PromoteScalar, shufMask);
+
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
+ DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
+ } else {
+ // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift
+ // right and propagate the sign bit) instruction.
+ SDValue RotQuad =
+ DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT,
+ PromoteScalar, DAG.getConstant(4, MVT::i32));
+ SDValue SignQuad =
+ DAG.getNode(SPUISD::VEC_SRA, Op0VecVT,
+ PromoteScalar, DAG.getConstant(32, MVT::i32));
+ SDValue SelMask =
+ DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT,
+ DAG.getConstant(0xf0f0, MVT::i16));
+ SDValue CombineQuad =
+ DAG.getNode(SPUISD::SELB, Op0VecVT,
+ SignQuad, RotQuad, SelMask);
+
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
+ DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad));
+ }
}
case ISD::ADD: {
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
&ShufBytes[0], ShufBytes.size()));
- return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
Op0, Op1, ShiftedCarry));
}
DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
&ShufBytes[0], ShufBytes.size()));
- return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
Op0, Op1, ShiftedBorrow));
}
ShiftAmt,
DAG.getConstant(7, ShiftAmtVT));
- return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
MaskLower, ShiftAmtBytes),
}
SDValue UpperHalfSign =
- DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
+ DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32,
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
Op0, DAG.getConstant(31, MVT::i32))));
DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
RotateLeftBytes, ShiftAmt);
- return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
RotateLeftBits);
}
}
return SDValue();
}
-/// LowerOperation - Provide custom lowering hooks for some operations.
-///
+//! Lower ISD::SELECT_CC
+/*!
+ ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
+ SELB instruction.
+
+ \note Need to revisit this in the future: if the code path through the true
+ and false value computations is longer than the latency of a branch (6
+ cycles), then it would be more advantageous to branch and insert a new basic
+ block and branch on the condition. However, this code does not make that
+ assumption, given the simplisitc uses so far.
+ */
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ SDValue lhs = Op.getOperand(0);
+ SDValue rhs = Op.getOperand(1);
+ SDValue trueval = Op.getOperand(2);
+ SDValue falseval = Op.getOperand(3);
+ SDValue condition = Op.getOperand(4);
+
+ // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
+ // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
+ // with another "cannot select select_cc" assert:
+
+ SDValue compare = DAG.getNode(ISD::SETCC, VT, lhs, rhs, condition);
+ return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare);
+}
+
+//! Custom lower ISD::TRUNCATE
+static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
+{
+ MVT VT = Op.getValueType();
+ MVT::SimpleValueType simpleVT = VT.getSimpleVT();
+ MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType();
+ MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
+
+ SDValue PromoteScalar = DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
+
+ unsigned maskLow;
+ unsigned maskHigh;
+
+ // Create shuffle mask
+ switch (Op0VT.getSimpleVT()) {
+ case MVT::i128:
+ switch (simpleVT) {
+ case MVT::i64:
+ // least significant doubleword of quadword
+ maskHigh = 0x08090a0b;
+ maskLow = 0x0c0d0e0f;
+ break;
+ case MVT::i32:
+ // least significant word of quadword
+ maskHigh = maskLow = 0x0c0d0e0f;
+ break;
+ case MVT::i16:
+ // least significant halfword of quadword
+ maskHigh = maskLow = 0x0e0f0e0f;
+ break;
+ case MVT::i8:
+ // least significant byte of quadword
+ maskHigh = maskLow = 0x0f0f0f0f;
+ break;
+ default:
+ cerr << "Truncation to illegal type!";
+ abort();
+ }
+ break;
+ case MVT::i64:
+ switch (simpleVT) {
+ case MVT::i32:
+ // least significant word of doubleword
+ maskHigh = maskLow = 0x04050607;
+ break;
+ case MVT::i16:
+ // least significant halfword of doubleword
+ maskHigh = maskLow = 0x06070607;
+ break;
+ case MVT::i8:
+ // least significant byte of doubleword
+ maskHigh = maskLow = 0x07070707;
+ break;
+ default:
+ cerr << "Truncation to illegal type!";
+ abort();
+ }
+ break;
+ case MVT::i32:
+ case MVT::i16:
+ switch (simpleVT) {
+ case MVT::i16:
+ // least significant halfword of word
+ maskHigh = maskLow = 0x02030203;
+ break;
+ case MVT::i8:
+ // least significant byte of word/halfword
+ maskHigh = maskLow = 0x03030303;
+ break;
+ default:
+ cerr << "Truncation to illegal type!";
+ abort();
+ }
+ break;
+ default:
+ cerr << "Trying to lower truncation from illegal type!";
+ abort();
+ }
+
+ // Use a shuffle to perform the truncation
+ SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32),
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32));
+
+ SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
+ PromoteScalar, PromoteScalar, shufMask);
+
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
+ DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
+}
+
+//! Custom (target-specific) lowering entry point
+/*!
+ This is where LLVM's DAG selection process calls to do target-specific
+ lowering of nodes.
+ */
SDValue
SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
{
abort();
}
case ISD::LOAD:
+ case ISD::EXTLOAD:
case ISD::SEXTLOAD:
case ISD::ZEXTLOAD:
return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
case ISD::FDIV:
if (VT == MVT::f32 || VT == MVT::v4f32)
return LowerFDIVf32(Op, DAG);
-// else if (Op.getValueType() == MVT::f64)
-// return LowerFDIVf64(Op, DAG);
+#if 0
+ // This is probably a libcall
+ else if (Op.getValueType() == MVT::f64)
+ return LowerFDIVf64(Op, DAG);
+#endif
else
assert(0 && "Calling FDIV on unsupported MVT");
case ISD::CTPOP:
return LowerCTPOP(Op, DAG);
+
+ case ISD::SELECT_CC:
+ return LowerSELECT_CC(Op, DAG);
+
+ case ISD::TRUNCATE:
+ return LowerTRUNCATE(Op, DAG);
}
return SDValue();
}
-SDNode *SPUTargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG)
+void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG)
{
#if 0
unsigned Opc = (unsigned) N->getOpcode();
#endif
/* Otherwise, return unchanged */
- return 0;
}
//===----------------------------------------------------------------------===//
#endif
const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
SelectionDAG &DAG = DCI.DAG;
- SDValue Op0 = N->getOperand(0); // everything has at least one operand
- SDValue Result; // Initially, NULL result
+ SDValue Op0 = N->getOperand(0); // everything has at least one operand
+ MVT NodeVT = N->getValueType(0); // The node's value type
+ MVT Op0VT = Op0.getValueType(); // The first operand's result
+ SDValue Result; // Initially, empty result
switch (N->getOpcode()) {
default: break;
ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
SDValue combinedConst =
- DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
- Op0.getValueType());
+ DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "\n"
+ << "Replace: (add " << CN0->getZExtValue() << ", "
+ << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n"
+ << "With: (SPUindirect <arg>, "
+ << CN0->getZExtValue() + CN1->getZExtValue() << ")\n";
+ }
+#endif
- DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
- << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
- DEBUG(cerr << "With: (SPUindirect <arg>, "
- << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
- return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
+ return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
Op0.getOperand(0), combinedConst);
}
} else if (isa<ConstantSDNode>(Op0)
ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
SDValue combinedConst =
- DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
- Op0.getValueType());
+ DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
<< "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: {
- if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
- N->getValueType(0) == Op0.getValueType()) {
+ if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
// (any_extend (SPUextract_elt0 <arg>)) ->
// (SPUextract_elt0 <arg>)
// Types must match, however...
- DEBUG(cerr << "Replace: ");
- DEBUG(N->dump(&DAG));
- DEBUG(cerr << "\nWith: ");
- DEBUG(Op0.getNode()->dump(&DAG));
- DEBUG(cerr << "\n");
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "\nReplace: ";
+ N->dump(&DAG);
+ cerr << "\nWith: ";
+ Op0.getNode()->dump(&DAG);
+ cerr << "\n";
+ }
+#endif
return Op0;
}
if (isa<ConstantSDNode>(Op1)) {
// Kill degenerate vector shifts:
ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
-
if (CN->getZExtValue() == 0) {
Result = Op0;
}
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND: {
- // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
+ // (SPUpromote_scalar (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
// <arg>
// but only if the SPUpromote_scalar and <arg> types match.
SDValue Op00 = Op0.getOperand(0);
- if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
+ if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
SDValue Op000 = Op00.getOperand(0);
- if (Op000.getValueType() == N->getValueType(0)) {
+ if (Op000.getValueType() == NodeVT) {
Result = Op000;
}
}
break;
}
- case SPUISD::EXTRACT_ELT0: {
- // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
+ case SPUISD::VEC2PREFSLOT: {
+ // (SPUpromote_scalar (SPUvec2prefslot <arg>)) ->
// <arg>
Result = Op0.getOperand(0);
break;
}
}
// Otherwise, return unchanged.
-#if 1
+#ifndef NDEBUG
if (Result.getNode()) {
DEBUG(cerr << "\nReplace.SPU: ");
DEBUG(N->dump(&DAG));
#if 0
case CALL:
case SHUFB:
- case INSERT_MASK:
+ case SHUFFLE_MASK:
case CNTB:
#endif
}
case SPUISD::LDRESULT:
- case SPUISD::EXTRACT_ELT0:
- case SPUISD::EXTRACT_ELT0_CHAINED: {
+ case SPUISD::VEC2PREFSLOT: {
MVT OpVT = Op.getValueType();
unsigned OpVTBits = OpVT.getSizeInBits();
uint64_t InMask = OpVT.getIntegerVTBitMask();
}
#if 0
- case EXTRACT_I1_ZEXT:
- case EXTRACT_I1_SEXT:
- case EXTRACT_I8_ZEXT:
- case EXTRACT_I8_SEXT:
case MPY:
case MPYU:
case MPYH:
case SPUISD::VEC_ROTR:
case SPUISD::ROTQUAD_RZ_BYTES:
case SPUISD::ROTQUAD_RZ_BITS:
- case SPUISD::ROTBYTES_RIGHT_S:
case SPUISD::ROTBYTES_LEFT:
- case SPUISD::ROTBYTES_LEFT_CHAINED:
case SPUISD::SELECT_MASK:
case SPUISD::SELB:
case SPUISD::FPInterp: