#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/DerivedTypes.h"
using namespace llvm;
static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
- // Shortening conversions involving ppcf128 get expanded (2 regs -> 1 reg)
- setConvertAction(MVT::ppcf128, MVT::f64, Expand);
- setConvertAction(MVT::ppcf128, MVT::f32, Expand);
// This is used in the ppcf128->int sequence. Note it has different semantics
// from FP_ROUND: that rounds to nearest, this rounds to zero.
setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ // This is just the low 32 bits of a (signed) fp->i64 conversion.
+ // We cannot do this with Promote because i64 is not a legal type.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
// FIXME: disable this lowered code. This generates 64-bit register values,
// and we don't model the fact that the top part is clobbered by calls. We
// need to flag these together so that the value isn't live across a call.
//setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
-
- // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
} else {
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
/// true if Op is undef or if it matches the specified value.
-static bool isConstantOrUndef(SDValue Op, unsigned Val) {
- return Op.getOpcode() == ISD::UNDEF ||
- cast<ConstantSDNode>(Op)->getZExtValue() == Val;
+static bool isConstantOrUndef(int Op, int Val) {
+ return Op < 0 || Op == Val;
}
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
-bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) {
+bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
if (!isUnary) {
for (unsigned i = 0; i != 16; ++i)
- if (!isConstantOrUndef(N->getOperand(i), i*2+1))
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
return false;
} else {
for (unsigned i = 0; i != 8; ++i)
- if (!isConstantOrUndef(N->getOperand(i), i*2+1) ||
- !isConstantOrUndef(N->getOperand(i+8), i*2+1))
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+1))
return false;
}
return true;
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
-bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) {
+bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
if (!isUnary) {
for (unsigned i = 0; i != 16; i += 2)
- if (!isConstantOrUndef(N->getOperand(i ), i*2+2) ||
- !isConstantOrUndef(N->getOperand(i+1), i*2+3))
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
return false;
} else {
for (unsigned i = 0; i != 8; i += 2)
- if (!isConstantOrUndef(N->getOperand(i ), i*2+2) ||
- !isConstantOrUndef(N->getOperand(i+1), i*2+3) ||
- !isConstantOrUndef(N->getOperand(i+8), i*2+2) ||
- !isConstantOrUndef(N->getOperand(i+9), i*2+3))
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+9), i*2+3))
return false;
}
return true;
/// isVMerge - Common function, used to match vmrg* shuffles.
///
-static bool isVMerge(SDNode *N, unsigned UnitSize,
+static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned LHSStart, unsigned RHSStart) {
- assert(N->getOpcode() == ISD::BUILD_VECTOR &&
- N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ "PPC only supports shuffles by bytes!");
assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
"Unsupported merge size!");
for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
- if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j),
+ if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
LHSStart+j+i*UnitSize) ||
- !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j),
+ !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
RHSStart+j+i*UnitSize))
return false;
}
- return true;
+ return true;
}
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
-bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
+bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary) {
if (!isUnary)
return isVMerge(N, UnitSize, 8, 24);
return isVMerge(N, UnitSize, 8, 8);
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
-bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) {
+bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary) {
if (!isUnary)
return isVMerge(N, UnitSize, 0, 16);
return isVMerge(N, UnitSize, 0, 0);
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
- assert(N->getOpcode() == ISD::BUILD_VECTOR &&
- N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!");
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ "PPC only supports shuffles by bytes!");
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+
// Find the first non-undef value in the shuffle mask.
unsigned i;
- for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i)
+ for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
/*search*/;
if (i == 16) return -1; // all undef.
- // Otherwise, check to see if the rest of the elements are consequtively
+ // Otherwise, check to see if the rest of the elements are consecutively
// numbered from this value.
- unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getZExtValue();
+ unsigned ShiftAmt = SVOp->getMaskElt(i);
if (ShiftAmt < i) return -1;
ShiftAmt -= i;
if (!isUnary) {
- // Check the rest of the elements to see if they are consequtive.
+ // Check the rest of the elements to see if they are consecutive.
for (++i; i != 16; ++i)
- if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i))
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
return -1;
} else {
- // Check the rest of the elements to see if they are consequtive.
+ // Check the rest of the elements to see if they are consecutive.
for (++i; i != 16; ++i)
- if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15))
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
return -1;
}
-
return ShiftAmt;
}
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to
/// VSPLTB/VSPLTH/VSPLTW.
-bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) {
- assert(N->getOpcode() == ISD::BUILD_VECTOR &&
- N->getNumOperands() == 16 &&
+bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
+ assert(N->getValueType(0) == MVT::v16i8 &&
(EltSize == 1 || EltSize == 2 || EltSize == 4));
// This is a splat operation if each element of the permute is the same, and
// if the value doesn't reference the second vector.
- unsigned ElementBase = 0;
- SDValue Elt = N->getOperand(0);
- if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt))
- ElementBase = EltV->getZExtValue();
- else
- return false; // FIXME: Handle UNDEF elements too!
-
- if (cast<ConstantSDNode>(Elt)->getZExtValue() >= 16)
+ unsigned ElementBase = N->getMaskElt(0);
+
+ // FIXME: Handle UNDEF elements too!
+ if (ElementBase >= 16)
return false;
- // Check that they are consequtive.
- for (unsigned i = 1; i != EltSize; ++i) {
- if (!isa<ConstantSDNode>(N->getOperand(i)) ||
- cast<ConstantSDNode>(N->getOperand(i))->getZExtValue() != i+ElementBase)
+ // Check that the indices are consecutive, in the case of a multi-byte element
+ // splatted with a v16i8 mask.
+ for (unsigned i = 1; i != EltSize; ++i)
+ if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
return false;
- }
- assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
- if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
- assert(isa<ConstantSDNode>(N->getOperand(i)) &&
- "Invalid VECTOR_SHUFFLE mask!");
+ if (N->getMaskElt(i) < 0) continue;
for (unsigned j = 0; j != EltSize; ++j)
- if (N->getOperand(i+j) != N->getOperand(j))
+ if (N->getMaskElt(i+j) != N->getMaskElt(j))
return false;
}
-
return true;
}
/// isAllNegativeZeroVector - Returns true if all elements of build_vector
/// are -0.0.
bool PPC::isAllNegativeZeroVector(SDNode *N) {
- assert(N->getOpcode() == ISD::BUILD_VECTOR);
- if (PPC::isSplatShuffleMask(N, N->getNumOperands()))
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N))
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
+
+ APInt APVal, APUndef;
+ unsigned BitSize;
+ bool HasAnyUndefs;
+
+ if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32))
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
return CFP->getValueAPF().isNegZero();
+
return false;
}
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
- assert(isSplatShuffleMask(N, EltSize));
- return cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() / EltSize;
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ assert(isSplatShuffleMask(SVOp, EltSize));
+ return SVOp->getMaskElt(0) / EltSize;
}
/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def.
- unsigned ValSizeInBytes = 0;
+ unsigned ValSizeInBytes = EltSize;
uint64_t Value = 0;
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
Value = CN->getZExtValue();
- ValSizeInBytes = CN->getValueType(0).getSizeInBits()/8;
} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
Value = FloatToBits(CN->getValueAPF().convertToFloat());
- ValSizeInBytes = 4;
}
// If the splat value is larger than the element value, then we can never do
// Not FP? Not a fsel.
if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
!Op.getOperand(2).getValueType().isFloatingPoint())
- return SDValue();
+ return Op;
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
// Cannot handle SETEQ/SETNE.
- if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDValue();
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
MVT ResVT = Op.getValueType();
MVT CmpVT = Op.getOperand(0).getValueType();
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
}
- return SDValue();
+ return Op;
}
// FIXME: Split this code up when LegalizeDAGTypes lands.
-SDValue PPCTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG,
+SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
DebugLoc dl) {
assert(Op.getOperand(0).getValueType().isFloatingPoint());
SDValue Src = Op.getOperand(0);
SDValue Tmp;
switch (Op.getValueType().getSimpleVT()) {
- default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
+ default: assert(0 && "Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
- Tmp = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Src);
+ Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
+ PPCISD::FCTIDZ,
+ dl, MVT::f64, Src);
break;
case MVT::i64:
Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
MVT CanonicalVT = VTys[SplatSize-1];
// Build a canonical splat for this value.
- SDValue Elt = DAG.getConstant(Val, CanonicalVT.getVectorElementType());
+ SDValue Elt = DAG.getConstant(Val, MVT::i32);
SmallVector<SDValue, 8> Ops;
Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
- SDValue Res = DAG.getBUILD_VECTOR(CanonicalVT, dl, &Ops[0], Ops.size());
+ SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
+ &Ops[0], Ops.size());
return DAG.getNode(ISD::BIT_CONVERT, dl, ReqVT, Res);
}
LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS);
RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS);
- SDValue Ops[16];
+ int Ops[16];
for (unsigned i = 0; i != 16; ++i)
- Ops[i] = DAG.getConstant(i+Amt, MVT::i8);
- SDValue T = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v16i8, LHS, RHS,
- DAG.getBUILD_VECTOR(MVT::v16i8, dl, Ops,16));
+ Ops[i] = i + Amt;
+ SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
}
// selects to a single instruction, return Op. Otherwise, if we can codegen
// this case more efficiently than a constant pool load, lower it to the
// sequence of ops that should be used.
-SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
- SelectionDAG &DAG) {
- // If this is a vector of constants or undefs, get the bits. A bit in
- // UndefBits is set if the corresponding element of the vector is an
- // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
- // zero.
+SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
- uint64_t SplatBits;
- uint64_t SplatUndef;
- unsigned SplatSize;
+ // Check if this is a splat of a constant value.
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
bool HasAnyUndefs;
+ if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs) || SplatBitSize > 32)
+ return SDValue();
- // If this is a splat (repetition) of a value across the whole vector, return
- // the smallest size that splats it. For example, "0x01010101010101..." is a
- // splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
- // SplatSize = 1 byte.
- if (BVN->isConstantSplat(HasAnyUndefs, SplatBits, SplatUndef, SplatSize)) {
- // First, handle single instruction cases.
-
- // All zeros?
- if (SplatBits == 0) {
- // Canonicalize all zero vectors to be v4i32.
- if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
- SDValue Z = DAG.getConstant(0, MVT::i32);
- Z = DAG.getBUILD_VECTOR(MVT::v4i32, dl, Z, Z, Z, Z);
- Op = DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Z);
- }
- return Op;
- }
+ unsigned SplatBits = APSplatBits.getZExtValue();
+ unsigned SplatUndef = APSplatUndef.getZExtValue();
+ unsigned SplatSize = SplatBitSize / 8;
- // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
- int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize);
- if (SextVal >= -16 && SextVal <= 15)
- return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
+ // First, handle single instruction cases.
+ // All zeros?
+ if (SplatBits == 0) {
+ // Canonicalize all zero vectors to be v4i32.
+ if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
+ SDValue Z = DAG.getConstant(0, MVT::i32);
+ Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
+ Op = DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Z);
+ }
+ return Op;
+ }
- // Two instruction sequences.
+ // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
+ int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
+ (32-SplatBitSize));
+ if (SextVal >= -16 && SextVal <= 15)
+ return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
- // If this value is in the range [-32,30] and is even, use:
- // tmp = VSPLTI[bhw], result = add tmp, tmp
- if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
- SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
- Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
- }
- // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
- // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
- // for fneg/fabs.
- if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
- // Make -1 and vspltisw -1:
- SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
+ // Two instruction sequences.
- // Make the VSLW intrinsic, computing 0x8000_0000.
- SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
- OnesV, DAG, dl);
+ // If this value is in the range [-32,30] and is even, use:
+ // tmp = VSPLTI[bhw], result = add tmp, tmp
+ if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
+ SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
+ Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ }
- // xor by OnesV to invert it.
- Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
- }
+ // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
+ // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
+ // for fneg/fabs.
+ if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
+ // Make -1 and vspltisw -1:
+ SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
- // Check to see if this is a wide variety of vsplti*, binop self cases.
- unsigned SplatBitSize = SplatSize*8;
- static const signed char SplatCsts[] = {
- -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
- -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
- };
+ // Make the VSLW intrinsic, computing 0x8000_0000.
+ SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
+ OnesV, DAG, dl);
- for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
- // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
- // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
- int i = SplatCsts[idx];
-
- // Figure out what shift amount will be used by altivec if shifted by i in
- // this splat size.
- unsigned TypeShiftAmt = i & (SplatBitSize-1);
-
- // vsplti + shl self.
- if (SextVal == (i << (int)TypeShiftAmt)) {
- SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
- static const unsigned IIDs[] = { // Intrinsic to use for each size.
- Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
- Intrinsic::ppc_altivec_vslw
- };
- Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
- }
+ // xor by OnesV to invert it.
+ Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ }
- // vsplti + srl self.
- if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
- SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
- static const unsigned IIDs[] = { // Intrinsic to use for each size.
- Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
- Intrinsic::ppc_altivec_vsrw
- };
- Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
- }
+ // Check to see if this is a wide variety of vsplti*, binop self cases.
+ static const signed char SplatCsts[] = {
+ -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
+ -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
+ };
- // vsplti + sra self.
- if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
- SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
- static const unsigned IIDs[] = { // Intrinsic to use for each size.
- Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
- Intrinsic::ppc_altivec_vsraw
- };
- Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
- }
+ for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
+ // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
+ // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
+ int i = SplatCsts[idx];
+
+ // Figure out what shift amount will be used by altivec if shifted by i in
+ // this splat size.
+ unsigned TypeShiftAmt = i & (SplatBitSize-1);
+
+ // vsplti + shl self.
+ if (SextVal == (i << (int)TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
+ Intrinsic::ppc_altivec_vslw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ }
- // vsplti + rol self.
- if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
- ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
- SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
- static const unsigned IIDs[] = { // Intrinsic to use for each size.
- Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
- Intrinsic::ppc_altivec_vrlw
- };
- Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
- }
+ // vsplti + srl self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
+ Intrinsic::ppc_altivec_vsrw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ }
- // t = vsplti c, result = vsldoi t, t, 1
- if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {
- SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
- return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
- }
- // t = vsplti c, result = vsldoi t, t, 2
- if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {
- SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
- return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
- }
- // t = vsplti c, result = vsldoi t, t, 3
- if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {
- SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
- return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
- }
+ // vsplti + sra self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
+ Intrinsic::ppc_altivec_vsraw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
}
- // Three instruction sequences.
+ // vsplti + rol self.
+ if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
+ ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
+ Intrinsic::ppc_altivec_vrlw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ }
- // Odd, in range [17,31]: (vsplti C)-(vsplti -16).
- if (SextVal >= 0 && SextVal <= 31) {
- SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
- SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
- LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS);
+ // t = vsplti c, result = vsldoi t, t, 1
+ if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
}
- // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
- if (SextVal >= -31 && SextVal <= 0) {
- SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
- SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
- LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
- return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS);
+ // t = vsplti c, result = vsldoi t, t, 2
+ if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
+ }
+ // t = vsplti c, result = vsldoi t, t, 3
+ if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
}
}
+ // Three instruction sequences.
+
+ // Odd, in range [17,31]: (vsplti C)-(vsplti -16).
+ if (SextVal >= 0 && SextVal <= 31) {
+ SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
+ SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
+ LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS);
+ }
+ // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
+ if (SextVal >= -31 && SextVal <= 0) {
+ SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
+ SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
+ LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS);
+ }
+
return SDValue();
}
OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
- unsigned ShufIdxs[16];
+ int ShufIdxs[16];
switch (OpNum) {
default: assert(0 && "Unknown i32 permute!");
case OP_VMRGHW:
case OP_VSLDOI12:
return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
}
- SDValue Ops[16];
- for (unsigned i = 0; i != 16; ++i)
- Ops[i] = DAG.getConstant(ShufIdxs[i], MVT::i8);
-
- return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, OpLHS.getValueType(),
- OpLHS, OpRHS,
- DAG.getBUILD_VECTOR(MVT::v16i8, dl, Ops, 16));
+ MVT VT = OpLHS.getValueType();
+ OpLHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpLHS);
+ OpRHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpRHS);
+ SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
}
/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
/// return the code it can be lowered into. Worst case, it can always be
/// lowered into a vperm.
SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- SDValue PermMask = Op.getOperand(2);
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ MVT VT = Op.getValueType();
// Cases that are handled by instructions that take permute immediates
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
// selected by the instruction selector.
if (V2.getOpcode() == ISD::UNDEF) {
- if (PPC::isSplatShuffleMask(PermMask.getNode(), 1) ||
- PPC::isSplatShuffleMask(PermMask.getNode(), 2) ||
- PPC::isSplatShuffleMask(PermMask.getNode(), 4) ||
- PPC::isVPKUWUMShuffleMask(PermMask.getNode(), true) ||
- PPC::isVPKUHUMShuffleMask(PermMask.getNode(), true) ||
- PPC::isVSLDOIShuffleMask(PermMask.getNode(), true) != -1 ||
- PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, true) ||
- PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, true) ||
- PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, true) ||
- PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, true) ||
- PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, true) ||
- PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, true)) {
+ if (PPC::isSplatShuffleMask(SVOp, 1) ||
+ PPC::isSplatShuffleMask(SVOp, 2) ||
+ PPC::isSplatShuffleMask(SVOp, 4) ||
+ PPC::isVPKUWUMShuffleMask(SVOp, true) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, true) ||
+ PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
return Op;
}
}
// Altivec has a variety of "shuffle immediates" that take two vector inputs
// and produce a fixed permutation. If any of these match, do not lower to
// VPERM.
- if (PPC::isVPKUWUMShuffleMask(PermMask.getNode(), false) ||
- PPC::isVPKUHUMShuffleMask(PermMask.getNode(), false) ||
- PPC::isVSLDOIShuffleMask(PermMask.getNode(), false) != -1 ||
- PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, false) ||
- PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, false) ||
- PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, false) ||
- PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, false) ||
- PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, false) ||
- PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, false))
+ if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, false) ||
+ PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, false))
return Op;
// Check to see if this is a shuffle of 4-byte values. If so, we can use our
// perfect shuffle table to emit an optimal matching sequence.
+ SmallVector<int, 16> PermMask;
+ SVOp->getMask(PermMask);
+
unsigned PFIndexes[4];
bool isFourElementShuffle = true;
for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
unsigned EltNo = 8; // Start out undef.
for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
- if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF)
+ if (PermMask[i*4+j] < 0)
continue; // Undef, ignore it.
- unsigned ByteSource =
- cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getZExtValue();
+ unsigned ByteSource = PermMask[i*4+j];
if ((ByteSource & 3) != j) {
isFourElementShuffle = false;
break;
unsigned BytesPerElement = EltVT.getSizeInBits()/8;
SmallVector<SDValue, 16> ResultMask;
- for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
- unsigned SrcElt;
- if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
- SrcElt = 0;
- else
- SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
for (unsigned j = 0; j != BytesPerElement; ++j)
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
- MVT::i8));
+ MVT::i32));
}
- SDValue VPermMask = DAG.getBUILD_VECTOR(MVT::v16i8, dl,
- &ResultMask[0], ResultMask.size());
+ SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
+ &ResultMask[0], ResultMask.size());
return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
}
OddParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OddParts);
// Merge the results together.
- SDValue Ops[16];
+ int Ops[16];
for (unsigned i = 0; i != 8; ++i) {
- Ops[i*2 ] = DAG.getConstant(2*i+1, MVT::i8);
- Ops[i*2+1] = DAG.getConstant(2*i+1+16, MVT::i8);
+ Ops[i*2 ] = 2*i+1;
+ Ops[i*2+1] = 2*i+1+16;
}
- return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, MVT::v16i8, EvenParts, OddParts,
- DAG.getBUILD_VECTOR(MVT::v16i8, dl, Ops, 16));
+ return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
} else {
assert(0 && "Unknown mul to lower!");
abort();
return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
- case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG,
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
Op.getDebugLoc());
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
return;
}
case ISD::FP_TO_SINT:
- Results.push_back(LowerFP_TO_SINT(SDValue(N, 0), DAG, dl));
+ Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
return;
}
}