// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SUB);
+ setTargetDAGCombine(ISD::MUL);
setStackPointerRegisterToSaveRestore(ARM::SP);
- setSchedulingPreference(SchedulingForRegPressure);
+
+ if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+ setSchedulingPreference(Sched::RegPressure);
+ else
+ setSchedulingPreference(Sched::Hybrid);
// FIXME: If-converter should use instruction latency to determine
// profitability rather than relying on fixed limits.
return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1;
}
+Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT.isFloatingPoint() || VT.isVector())
+ return Sched::Latency;
+ }
+ return Sched::RegPressure;
+}
+
//===----------------------------------------------------------------------===//
// Lowering Code
//===----------------------------------------------------------------------===//
}
}
- // If there are only 2 elements in a 128-bit vector, insert them into an
- // undef vector. This handles the common case for 128-bit vector argument
- // passing, where the insertions should be translated to subreg accesses
- // with no real instructions.
- if (VT.is128BitVector() && Op.getNumOperands() == 2) {
- SDValue Val = DAG.getUNDEF(VT);
- SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- if (Op0.getOpcode() != ISD::UNDEF)
- Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op0,
- DAG.getIntPtrConstant(0));
- if (Op1.getOpcode() != ISD::UNDEF)
- Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op1,
- DAG.getIntPtrConstant(1));
- return Val;
+ // Scan through the operands to see if only one value is used.
+ unsigned NumElts = VT.getVectorNumElements();
+ bool isOnlyLowElement = true;
+ bool usesOnlyOneValue = true;
+ bool isConstant = true;
+ SDValue Value;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ if (!Value.getNode())
+ Value = V;
+ else if (V != Value)
+ usesOnlyOneValue = false;
+ }
+
+ if (!Value.getNode())
+ return DAG.getUNDEF(VT);
+
+ if (isOnlyLowElement)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
+
+ // If all elements are constants, fall back to the default expansion, which
+ // will generate a load from the constant pool.
+ if (isConstant)
+ return SDValue();
+
+ // Use VDUP for non-constant splats.
+ if (usesOnlyOneValue)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+
+ // Vectors with 32- or 64-bit elements can be built by directly assigning
+ // the subregisters.
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize >= 32) {
+ // Do the expansion with floating-point types, since that is what the VFP
+ // registers are defined to use, and since i64 is not legal.
+ EVT EltVT = EVT::getFloatingPointVT(EltSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+ SDValue Val = DAG.getUNDEF(VecVT);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Elt = Op.getOperand(i);
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ Elt = DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Elt);
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, Elt,
+ DAG.getConstant(i, MVT::i32));
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
}
return SDValue();
// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.
- if (VT.getVectorNumElements() == 4 &&
- (VT.is128BitVector() || VT.is64BitVector())) {
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts == 4) {
unsigned PFIndexes[4];
for (unsigned i = 0; i != 4; ++i) {
if (ShuffleMask[i] < 0)
// Compute the index in the perfect shuffle table.
unsigned PFTableIndex =
PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
-
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
unsigned Cost = (PFEntry >> 30);
return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
}
+ // Implement shuffles with 32- or 64-bit elements as subreg copies.
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize >= 32) {
+ // Do the expansion with floating-point types, since that is what the VFP
+ // registers are defined to use, and since i64 is not legal.
+ EVT EltVT = EVT::getFloatingPointVT(EltSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1);
+ V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2);
+ SDValue Val = DAG.getUNDEF(VecVT);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if (ShuffleMask[i] < 0)
+ continue;
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ ShuffleMask[i] < (int)NumElts ? V1 : V2,
+ DAG.getConstant(ShuffleMask[i] & (NumElts-1),
+ MVT::i32));
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val,
+ Elt, DAG.getConstant(i, MVT::i32));
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
+ }
+
return SDValue();
}
return SDValue();
}
+static SDValue PerformMULCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+
+ if (Subtarget->isThumb1Only())
+ return SDValue();
+
+ if (DAG.getMachineFunction().
+ getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ return SDValue();
+
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return SDValue();
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ uint64_t MulAmt = C->getZExtValue();
+ unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+ ShiftAmt = ShiftAmt & (32 - 1);
+ SDValue V = N->getOperand(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ SDValue Res;
+ MulAmt >>= ShiftAmt;
+ if (isPowerOf2_32(MulAmt - 1)) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ Res = DAG.getNode(ISD::ADD, DL, VT,
+ V, DAG.getNode(ISD::SHL, DL, VT,
+ V, DAG.getConstant(Log2_32(MulAmt-1),
+ MVT::i32)));
+ } else if (isPowerOf2_32(MulAmt + 1)) {
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V, DAG.getConstant(Log2_32(MulAmt+1),
+ MVT::i32)),
+ V);
+ } else
+ return SDValue();
+
+ if (ShiftAmt != 0)
+ Res = DAG.getNode(ISD::SHL, DL, VT, Res,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+}
+
/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
/// ARMISD::VMOVRRD.
static SDValue PerformVMOVRRDCombine(SDNode *N,
default: break;
case ISD::ADD: return PerformADDCombine(N, DCI);
case ISD::SUB: return PerformSUBCombine(N, DCI);
+ case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
bool isSEXTLoad = false;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
VT = LD->getMemoryVT();
+ Ptr = LD->getBasePtr();
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
VT = ST->getMemoryVT();
+ Ptr = ST->getBasePtr();
} else
return false;
bool isLegal = false;
if (Subtarget->isThumb2())
isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
- isInc, DAG);
+ isInc, DAG);
else
isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
isInc, DAG);
if (!isLegal)
return false;
+ if (Ptr != Base) {
+ // Swap base ptr and offset to catch more post-index load / store when
+ // it's legal. In Thumb2 mode, offset must be an immediate.
+ if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
+ !Subtarget->isThumb2())
+ std::swap(Base, Offset);
+
+ // Post-indexed load / store update the base pointer.
+ if (Ptr != Base)
+ return false;
+ }
+
AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
return true;
}