// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::BITCAST);
setTargetDAGCombine(ISD::VSELECT);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SHL);
MachineFunction &MF) const {
const Function *F = MF.getFunction();
if ((!IsMemset || ZeroMemset) &&
- !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoImplicitFloat)) {
+ !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
if (Size >= 16 &&
(Subtarget->isUnalignedMemAccessFast() ||
((DstAlign == 0 || DstAlign >= 16) &&
// Win32 requires us to put the sret argument to %eax as well.
// We saved the argument into a virtual register in the entry block,
// so now we copy the value out and into %rax/%eax.
- if (DAG.getMachineFunction().getFunction()->hasStructRetAttr() &&
- (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
- MachineFunction &MF = DAG.getMachineFunction();
- X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
- unsigned Reg = FuncInfo->getSRetReturnReg();
- assert(Reg &&
- "SRetReturnReg should have been set in LowerFormalArguments().");
- SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+ //
+ // Checking Function.hasStructRetAttr() here is insufficient because the IR
+ // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
+ // false, then an sret argument may be implicitly inserted in the SelDAG. In
+ // either case FuncInfo->setSRetReturnReg() will have been called.
+ if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
+ assert((Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC()) &&
+ "No need for an sret register");
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg, getPointerTy());
unsigned RetValReg
= (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
}
const Function *Fn = MF.getFunction();
- bool NoImplicitFloatOps = Fn->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
+ bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
assert(!(MF.getTarget().Options.UseSoftFloat && NoImplicitFloatOps) &&
"SSE register cannot be used when SSE is disabled!");
if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
// Figure out if XMM registers are in use.
assert(!(MF.getTarget().Options.UseSoftFloat &&
- Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoImplicitFloat)) &&
+ Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
"SSE register cannot be used when SSE is disabled!");
// 64-bit calling conventions support varargs and register parameters, so we
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
OpFlags = X86II::MO_DARWIN_STUB;
- } else if (Subtarget->isPICStyleRIPRel() &&
- isa<Function>(GV) &&
- cast<Function>(GV)->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NonLazyBind)) {
+ } else if (Subtarget->isPICStyleRIPRel() && isa<Function>(GV) &&
+ cast<Function>(GV)->hasFnAttribute(Attribute::NonLazyBind)) {
// If the function is marked as non-lazy, generate an indirect call
// which loads from the GOT directly. This avoids runtime overhead
// at the cost of eager binding (and one extra byte of encoding).
return false;
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
- bool symetricMaskRequired =
+ bool symmetricMaskRequired =
(VT.getSizeInBits() >= 256) && (EltSize == 32);
// VSHUFPSY divides the resulting vector into 4 chunks.
// For VSHUFPSY, the mask of the second half must be the same as the
// first but with the appropriate offsets. This works in the same way as
// VPERMILPS works with masks.
- if (!symetricMaskRequired || Idx < 0)
+ if (!symmetricMaskRequired || Idx < 0)
continue;
if (MaskVal[i] < 0) {
MaskVal[i] = Idx - l;
return (FstHalf | (SndHalf << 4));
}
-// Symetric in-lane mask. Each lane has 4 elements (for imm8)
+// Symmetric in-lane mask. Each lane has 4 elements (for imm8)
static bool isPermImmMask(ArrayRef<int> Mask, MVT VT, unsigned& Imm8) {
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (EltSize < 32)
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (VT.getSizeInBits() < 256 || EltSize < 32)
return false;
- bool symetricMaskRequired = (EltSize == 32);
+ bool symmetricMaskRequired = (EltSize == 32);
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits()/128;
for (unsigned i = 0; i != LaneSize; ++i) {
if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
return false;
- if (symetricMaskRequired) {
+ if (symmetricMaskRequired) {
if (ExpectedMaskVal[i] < 0 && Mask[i+l] >= 0) {
ExpectedMaskVal[i] = Mask[i+l] - l;
continue;
if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
DecodePSHUFBMask(C, Mask);
+ if (Mask.empty())
+ return false;
break;
}
// it may be detrimental to overall size. There needs to be a way to detect
// that condition to know if this is truly a size win.
const Function *F = DAG.getMachineFunction().getFunction();
- bool OptForSize = F->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ bool OptForSize = F->hasFnAttribute(Attribute::OptimizeForSize);
// Handle broadcasting a single constant scalar from the constant pool
// into a vector.
/// \brief Implementation of the \c isShuffleEquivalent variadic functor.
///
/// See its documentation for details.
-bool isShuffleEquivalentImpl(ArrayRef<int> Mask, ArrayRef<const int *> Args) {
+bool isShuffleEquivalentImpl(SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ ArrayRef<const int *> Args) {
if (Mask.size() != Args.size())
return false;
+
+ // If the values are build vectors, we can look through them to find
+ // equivalent inputs that make the shuffles equivalent.
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(V1);
+ auto *BV2 = dyn_cast<BuildVectorSDNode>(V2);
+
for (int i = 0, e = Mask.size(); i < e; ++i) {
assert(*Args[i] >= 0 && "Arguments must be positive integers!");
- if (Mask[i] != -1 && Mask[i] != *Args[i])
- return false;
+ if (Mask[i] != -1 && Mask[i] != *Args[i]) {
+ auto *MaskBV = Mask[i] < e ? BV1 : BV2;
+ auto *ArgsBV = *Args[i] < e ? BV1 : BV2;
+ if (!MaskBV || !ArgsBV ||
+ MaskBV->getOperand(Mask[i] % e) != ArgsBV->getOperand(*Args[i] % e))
+ return false;
+ }
}
return true;
}
/// It returns true if the mask is exactly as wide as the argument list, and
/// each element of the mask is either -1 (signifying undef) or the value given
/// in the argument.
-static const VariadicFunction1<
- bool, ArrayRef<int>, int, isShuffleEquivalentImpl> isShuffleEquivalent = {};
+static const VariadicFunction3<bool, SDValue, SDValue, ArrayRef<int>, int,
+ isShuffleEquivalentImpl> isShuffleEquivalent =
+ {};
/// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
///
}
}
+/// \brief Try to lower as a blend of elements from two inputs followed by
+/// a single-input permutation.
+///
+/// This matches the pattern where we can blend elements from two inputs and
+/// then reduce the shuffle to a single-input permutation.
+static SDValue lowerVectorShuffleAsBlendAndPermute(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2,
+ ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ // We build up the blend mask while checking whether a blend is a viable way
+ // to reduce the shuffle.
+ SmallVector<int, 32> BlendMask(Mask.size(), -1);
+ SmallVector<int, 32> PermuteMask(Mask.size(), -1);
+
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ if (Mask[i] < 0)
+ continue;
+
+ assert(Mask[i] < Size * 2 && "Shuffle input is out of bounds.");
+
+ if (BlendMask[Mask[i] % Size] == -1)
+ BlendMask[Mask[i] % Size] = Mask[i];
+ else if (BlendMask[Mask[i] % Size] != Mask[i])
+ return SDValue(); // Can't blend in the needed input!
+
+ PermuteMask[i] = Mask[i] % Size;
+ }
+
+ SDValue V = DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
+ return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), PermuteMask);
+}
+
/// \brief Generic routine to lower a shuffle and blend as a decomposed set of
/// unblended shuffles followed by an unshuffled blend.
///
SDValue V1, SDValue V2) {
SmallBitVector Zeroable(Mask.size(), false);
+ while (V1.getOpcode() == ISD::BITCAST)
+ V1 = V1->getOperand(0);
+ while (V2.getOpcode() == ISD::BITCAST)
+ V2 = V2->getOperand(0);
+
bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
continue;
}
- // If this is an index into a build_vector node, dig out the input value and
- // use it.
+ // If this is an index into a build_vector node (which has the same number
+ // of elements), dig out the input value and use it.
SDValue V = M < Size ? V1 : V2;
- if (V.getOpcode() != ISD::BUILD_VECTOR)
+ if (V.getOpcode() != ISD::BUILD_VECTOR || Size != (int)V.getNumOperands())
continue;
SDValue Input = V.getOperand(M % Size);
return Zeroable;
}
+/// \brief Try to emit a bitmask instruction for a shuffle.
+///
+/// This handles cases where we can model a blend exactly as a bitmask due to
+/// one of the inputs being zeroable.
+static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ MVT EltVT = VT.getScalarType();
+ int NumEltBits = EltVT.getSizeInBits();
+ MVT IntEltVT = MVT::getIntegerVT(NumEltBits);
+ SDValue Zero = DAG.getConstant(0, IntEltVT);
+ SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), IntEltVT);
+ if (EltVT.isFloatingPoint()) {
+ Zero = DAG.getNode(ISD::BITCAST, DL, EltVT, Zero);
+ AllOnes = DAG.getNode(ISD::BITCAST, DL, EltVT, AllOnes);
+ }
+ SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ SDValue V;
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ if (Zeroable[i])
+ continue;
+ if (Mask[i] % Size != i)
+ return SDValue(); // Not a blend.
+ if (!V)
+ V = Mask[i] < Size ? V1 : V2;
+ else if (V != (Mask[i] < Size ? V1 : V2))
+ return SDValue(); // Can only let one input through the mask.
+
+ VMaskOps[i] = AllOnes;
+ }
+ if (!V)
+ return SDValue(); // No non-zeroable elements!
+
+ SDValue VMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, VMaskOps);
+ V = DAG.getNode(VT.isFloatingPoint()
+ ? (unsigned) X86ISD::FAND : (unsigned) ISD::AND,
+ DL, VT, V, VMask);
+ return V;
+}
+
/// \brief Try to lower a vector shuffle as a byte shift (shifts in zeros).
///
/// Attempts to match a shuffle mask against the PSRLDQ and PSLLDQ SSE2
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
int Size = Mask.size();
- assert(Size == VT.getVectorNumElements() && "Unexpected mask size");
+ assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
// PSRL : (little-endian) right bit shift.
// [ 1, zz, 3, zz]
ExtVT, V1, V2);
}
+ // This lowering only works for the low element with floating point vectors.
+ if (VT.isFloatingPoint() && V2Index != 0)
+ return SDValue();
+
V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2);
if (ExtVT != VT)
V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
if (isSingleInputShuffleMask(Mask)) {
// Use low duplicate instructions for masks that match their pattern.
if (Subtarget->hasSSE3())
- if (isShuffleEquivalent(Mask, 0, 0))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 0))
return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, V1);
// Straight shuffle of a single input vector. Simulate this by using the
assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!");
assert(Mask[1] >= 2 && "Non-canonicalized blend!");
- // Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 2))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2);
- if (isShuffleEquivalent(Mask, 1, 3))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
-
// If we have a single input, insert that into V1 if we can do so cheaply.
if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1) {
if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
// Try to use one of the special instruction patterns to handle two common
// blend patterns if a zero-blend above didn't work.
- if (isShuffleEquivalent(Mask, 0, 3) || isShuffleEquivalent(Mask, 1, 3))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 3) || isShuffleEquivalent(V1, V2, Mask, 1, 3))
if (SDValue V1S = getScalarValueForVectorElement(V1, Mask[0], DAG))
// We can either use a special instruction to load over the low double or
// to move just the low double.
Subtarget, DAG))
return Blend;
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 2))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, 1, 3))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
+
unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V2,
DAG.getConstant(SHUFPDMask, MVT::i8));
return Insertion;
}
- // Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 2))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, V1, V2);
- if (isShuffleEquivalent(Mask, 1, 3))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);
-
if (Subtarget->hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
Subtarget, DAG))
return Blend;
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 2))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, 1, 3))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);
+
// Try to use byte rotation instructions.
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
if (Subtarget->hasSSSE3())
DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask));
}
+/// \brief Test whether this can be lowered with a single SHUFPS instruction.
+///
+/// This is used to disable more specialized lowerings when the shufps lowering
+/// will happen to be efficient.
+static bool isSingleSHUFPSMask(ArrayRef<int> Mask) {
+ // This routine only handles 128-bit shufps.
+ assert(Mask.size() == 4 && "Unsupported mask size!");
+
+ // To lower with a single SHUFPS we need to have the low half and high half
+ // each requiring a single input.
+ if (Mask[0] != -1 && Mask[1] != -1 && (Mask[0] < 4) != (Mask[1] < 4))
+ return false;
+ if (Mask[2] != -1 && Mask[3] != -1 && (Mask[2] < 4) != (Mask[3] < 4))
+ return false;
+
+ return true;
+}
+
/// \brief Lower a vector shuffle using the SHUFPS instruction.
///
/// This is a helper routine dedicated to lowering vector shuffles using SHUFPS.
// Use even/odd duplicate instructions for masks that match their pattern.
if (Subtarget->hasSSE3()) {
- if (isShuffleEquivalent(Mask, 0, 0, 2, 2))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 0, 2, 2))
return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v4f32, V1);
- if (isShuffleEquivalent(Mask, 1, 1, 3, 3))
+ if (isShuffleEquivalent(V1, V2, Mask, 1, 1, 3, 3))
return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v4f32, V1);
}
getV4X86ShuffleImm8ForMask(Mask, DAG));
}
- // Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2);
- if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
-
// There are special ways we can lower some single-element blends. However, we
// have custom ways we can lower more complex single-element blends below that
// we defer to if both this and BLENDPS fail to match, so restrict this to
// Use INSERTPS if we can complete the shuffle efficiently.
if (SDValue V = lowerVectorShuffleAsInsertPS(Op, V1, V2, Mask, DAG))
return V;
+
+ if (!isSingleSHUFPSMask(Mask))
+ if (SDValue BlendPerm = lowerVectorShuffleAsBlendAndPermute(
+ DL, MVT::v4f32, V1, V2, Mask, DAG))
+ return BlendPerm;
}
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 4, 1, 5))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, 2, 6, 3, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
+
// Otherwise fall back to a SHUFPS lowering strategy.
return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG);
}
// so prevents folding a load into this instruction or making a copy.
const int UnpackLoMask[] = {0, 0, 1, 1};
const int UnpackHiMask[] = {2, 2, 3, 3};
- if (isShuffleEquivalent(Mask, 0, 0, 1, 1))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 0, 1, 1))
Mask = UnpackLoMask;
- else if (isShuffleEquivalent(Mask, 2, 2, 3, 3))
+ else if (isShuffleEquivalent(V1, V2, Mask, 2, 2, 3, 3))
Mask = UnpackHiMask;
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
Mask, Subtarget, DAG))
return V;
- // Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
- if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);
-
if (Subtarget->hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
Subtarget, DAG))
return Blend;
+ if (SDValue Masked =
+ lowerVectorShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask, DAG))
+ return Masked;
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 4, 1, 5))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, 2, 6, 3, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);
+
// Try to use byte rotation instructions.
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
if (Subtarget->hasSSSE3())
return Shift;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 0, 1, 1, 2, 2, 3, 3))
+ if (isShuffleEquivalent(V, V, Mask, 0, 0, 1, 1, 2, 2, 3, 3))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V, V);
- if (isShuffleEquivalent(Mask, 4, 4, 5, 5, 6, 6, 7, 7))
+ if (isShuffleEquivalent(V, V, Mask, 4, 4, 5, 5, 6, 6, 7, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V, V);
// Try to use byte rotation instructions.
Mask, Subtarget, DAG))
return V;
- // Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 2, 10, 3, 11))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V2);
- if (isShuffleEquivalent(Mask, 4, 12, 5, 13, 6, 14, 7, 15))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V2);
-
if (Subtarget->hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
Subtarget, DAG))
return Blend;
+ if (SDValue Masked =
+ lowerVectorShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask, DAG))
+ return Masked;
+
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 8, 1, 9, 2, 10, 3, 11))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, 4, 12, 5, 13, 6, 14, 7, 15))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V2);
+
// Try to use byte rotation instructions.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
return true;
}
-/// \brief Generic routine to split ector shuffle into half-sized shuffles.
+/// \brief Generic routine to split vector shuffle into half-sized shuffles.
///
/// This routine just extracts two subvectors, shuffles them independently, and
/// then concatenates them back together. This should work effectively with all
MVT ScalarVT = VT.getScalarType();
MVT SplitVT = MVT::getVectorVT(ScalarVT, NumElements / 2);
- SDValue LoV1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V1,
- DAG.getIntPtrConstant(0));
- SDValue HiV1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V1,
- DAG.getIntPtrConstant(SplitNumElements));
- SDValue LoV2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V2,
- DAG.getIntPtrConstant(0));
- SDValue HiV2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V2,
- DAG.getIntPtrConstant(SplitNumElements));
+ // Rather than splitting build-vectors, just build two narrower build
+ // vectors. This helps shuffling with splats and zeros.
+ auto SplitVector = [&](SDValue V) {
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V->getOperand(0);
+
+ MVT OrigVT = V.getSimpleValueType();
+ int OrigNumElements = OrigVT.getVectorNumElements();
+ int OrigSplitNumElements = OrigNumElements / 2;
+ MVT OrigScalarVT = OrigVT.getScalarType();
+ MVT OrigSplitVT = MVT::getVectorVT(OrigScalarVT, OrigNumElements / 2);
+
+ SDValue LoV, HiV;
+
+ auto *BV = dyn_cast<BuildVectorSDNode>(V);
+ if (!BV) {
+ LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V,
+ DAG.getIntPtrConstant(0));
+ HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V,
+ DAG.getIntPtrConstant(OrigSplitNumElements));
+ } else {
+
+ SmallVector<SDValue, 16> LoOps, HiOps;
+ for (int i = 0; i < OrigSplitNumElements; ++i) {
+ LoOps.push_back(BV->getOperand(i));
+ HiOps.push_back(BV->getOperand(i + OrigSplitNumElements));
+ }
+ LoV = DAG.getNode(ISD::BUILD_VECTOR, DL, OrigSplitVT, LoOps);
+ HiV = DAG.getNode(ISD::BUILD_VECTOR, DL, OrigSplitVT, HiOps);
+ }
+ return std::make_pair(DAG.getNode(ISD::BITCAST, DL, SplitVT, LoV),
+ DAG.getNode(ISD::BITCAST, DL, SplitVT, HiV));
+ };
+
+ SDValue LoV1, HiV1, LoV2, HiV2;
+ std::tie(LoV1, HiV1) = SplitVector(V1);
+ std::tie(LoV2, HiV2) = SplitVector(V2);
// Now create two 4-way blends of these half-width vectors.
auto HalfBlend = [&](ArrayRef<int> HalfMask) {
VT.getVectorNumElements() / 2);
// Check for patterns which can be matched with a single insert of a 128-bit
// subvector.
- if (isShuffleEquivalent(Mask, 0, 1, 0, 1) ||
- isShuffleEquivalent(Mask, 0, 1, 4, 5)) {
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 1, 0, 1) ||
+ isShuffleEquivalent(V1, V2, Mask, 0, 1, 4, 5)) {
SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
DAG.getIntPtrConstant(0));
SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
Mask[2] < 4 ? V1 : V2, DAG.getIntPtrConstant(0));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
}
- if (isShuffleEquivalent(Mask, 0, 1, 6, 7)) {
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 1, 6, 7)) {
SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
DAG.getIntPtrConstant(0));
SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
return Broadcast;
// Use low duplicate instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 0, 2, 2))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 0, 2, 2))
return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v4f64, V1);
if (!is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask)) {
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
- if (isShuffleEquivalent(Mask, 0, 4, 2, 6))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 4, 2, 6))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V1, V2);
- if (isShuffleEquivalent(Mask, 1, 5, 3, 7))
+ if (isShuffleEquivalent(V1, V2, Mask, 1, 5, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V1, V2);
// If we have a single input to the zero element, insert that into V1 if we
}
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 4, 2, 6))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 4, 2, 6))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V1, V2);
- if (isShuffleEquivalent(Mask, 1, 5, 3, 7))
+ if (isShuffleEquivalent(V1, V2, Mask, 1, 5, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V1, V2);
}
"Repeated masks must be half the mask width!");
// Use even/odd duplicate instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 0, 2, 2, 4, 4, 6, 6))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 0, 2, 2, 4, 4, 6, 6))
return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v8f32, V1);
- if (isShuffleEquivalent(Mask, 1, 1, 3, 3, 5, 5, 7, 7))
+ if (isShuffleEquivalent(V1, V2, Mask, 1, 1, 3, 3, 5, 5, 7, 7))
return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v8f32, V1);
if (isSingleInputShuffleMask(Mask))
getV4X86ShuffleImm8ForMask(RepeatedMask, DAG));
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 4, 12, 5, 13))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 8, 1, 9, 4, 12, 5, 13))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V1, V2);
- if (isShuffleEquivalent(Mask, 2, 10, 3, 11, 6, 14, 7, 15))
+ if (isShuffleEquivalent(V1, V2, Mask, 2, 10, 3, 11, 6, 14, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V1, V2);
// Otherwise, fall back to a SHUFPS sequence. Here it is important that we
getV4X86ShuffleImm8ForMask(RepeatedMask, DAG));
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 4, 12, 5, 13))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 8, 1, 9, 4, 12, 5, 13))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V1, V2);
- if (isShuffleEquivalent(Mask, 2, 10, 3, 11, 6, 14, 7, 15))
+ if (isShuffleEquivalent(V1, V2, Mask, 2, 10, 3, 11, 6, 14, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V1, V2);
}
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask,
+ if (isShuffleEquivalent(V1, V2, Mask,
// First 128-bit lane:
0, 16, 1, 17, 2, 18, 3, 19,
// Second 128-bit lane:
8, 24, 9, 25, 10, 26, 11, 27))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i16, V1, V2);
- if (isShuffleEquivalent(Mask,
+ if (isShuffleEquivalent(V1, V2, Mask,
// First 128-bit lane:
4, 20, 5, 21, 6, 22, 7, 23,
// Second 128-bit lane:
// Note that these are repeated 128-bit lane unpacks, not unpacks across all
// 256-bit lanes.
if (isShuffleEquivalent(
- Mask,
+ V1, V2, Mask,
// First 128-bit lane:
0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39,
// Second 128-bit lane:
16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v32i8, V1, V2);
if (isShuffleEquivalent(
- Mask,
+ V1, V2, Mask,
// First 128-bit lane:
8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47,
// Second 128-bit lane:
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
- if (isShuffleEquivalent(Mask, 0, 8, 2, 10, 4, 12, 6, 14))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 8, 2, 10, 4, 12, 6, 14))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2);
- if (isShuffleEquivalent(Mask, 1, 9, 3, 11, 5, 13, 7, 15))
+ if (isShuffleEquivalent(V1, V2, Mask, 1, 9, 3, 11, 5, 13, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2);
// FIXME: Implement direct support for this type!
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask,
+ if (isShuffleEquivalent(V1, V2, Mask,
0, 16, 1, 17, 4, 20, 5, 21,
8, 24, 9, 25, 12, 28, 13, 29))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2);
- if (isShuffleEquivalent(Mask,
+ if (isShuffleEquivalent(V1, V2, Mask,
2, 18, 3, 19, 6, 22, 7, 23,
10, 26, 11, 27, 14, 30, 15, 31))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2);
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
- if (isShuffleEquivalent(Mask, 0, 8, 2, 10, 4, 12, 6, 14))
+ if (isShuffleEquivalent(V1, V2, Mask, 0, 8, 2, 10, 4, 12, 6, 14))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2);
- if (isShuffleEquivalent(Mask, 1, 9, 3, 11, 5, 13, 7, 15))
+ if (isShuffleEquivalent(V1, V2, Mask, 1, 9, 3, 11, 5, 13, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2);
// FIXME: Implement direct support for this type!
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(Mask,
+ if (isShuffleEquivalent(V1, V2, Mask,
0, 16, 1, 17, 4, 20, 5, 21,
8, 24, 9, 25, 12, 28, 13, 29))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2);
- if (isShuffleEquivalent(Mask,
+ if (isShuffleEquivalent(V1, V2, Mask,
2, 18, 3, 19, 6, 22, 7, 23,
10, 26, 11, 27, 14, 30, 15, 31))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2);
return DAG.getVectorShuffle(VT, dl, V1, V2, NewMask);
}
+ // We actually see shuffles that are entirely re-arrangements of a set of
+ // zero inputs. This mostly happens while decomposing complex shuffles into
+ // simple ones. Directly lower these as a buildvector of zeros.
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ if (Zeroable.all())
+ return getZeroVector(VT, Subtarget, DAG, dl);
+
// Try to collapse shuffles into using a vector type with fewer elements but
// wider element types. We cap this to not form integers or floating point
// elements wider than 64 bits, but it might be interesting to form i128
unsigned NumLanes = (NumElems - 1) / 8 + 1;
unsigned NumElemsInLane = NumElems / NumLanes;
- // Blend for v16i16 should be symetric for the both lanes.
+ // Blend for v16i16 should be symmetric for both lanes.
for (unsigned i = 0; i < NumElemsInLane; ++i) {
int SndLaneEltIdx = (NumLanes == 2) ? MaskVals[i + NumElemsInLane] : -1;
SDValue getMOVDDup(SDValue &Op, SDLoc &dl, SDValue V1, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
- // Canonizalize to v2f64.
+ // Canonicalize to v2f64.
V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
return DAG.getNode(ISD::BITCAST, dl, VT,
getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64,
if (HasSSE2 && VT == MVT::v2f64)
return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
- // v4f32 or v4i32: canonizalized to v4f32 (which is legal for SSE1)
+ // v4f32 or v4i32: canonicalize to v4f32 (which is legal for SSE1)
return DAG.getNode(ISD::BITCAST, dl, VT,
getTargetShuffleNode(X86ISD::MOVLHPS, dl, MVT::v4f32,
DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V1),
bool HasFp256 = Subtarget->hasFp256();
bool HasInt256 = Subtarget->hasInt256();
MachineFunction &MF = DAG.getMachineFunction();
- bool OptForSize = MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ bool OptForSize =
+ MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
// Check if we should use the experimental vector shuffle lowering. If so,
// delegate completely to that code path.
DAG);
unsigned MaskValue;
- if (isBlendMask(M, VT, Subtarget->hasSSE41(), Subtarget->hasInt256(),
- &MaskValue))
+ if (isBlendMask(M, VT, Subtarget->hasSSE41(), HasInt256, &MaskValue))
return LowerVECTOR_SHUFFLEtoBlend(SVOp, MaskValue, Subtarget, DAG);
if (isSHUFPMask(M, VT))
return NewOp;
}
- if (VT == MVT::v16i16 && Subtarget->hasInt256()) {
+ if (VT == MVT::v16i16 && HasInt256) {
SDValue NewOp = LowerVECTOR_SHUFFLEv16i16(Op, DAG);
if (NewOp.getNode())
return NewOp;
/// equivalent.
SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
SelectionDAG &DAG) const {
- if (Op.getValueType() == MVT::i1)
- // KORTEST instruction should be selected
- return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
- DAG.getConstant(0, Op.getValueType()));
-
+ if (Op.getValueType() == MVT::i1) {
+ SDValue ExtOp = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Op);
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, ExtOp,
+ DAG.getConstant(0, MVT::i8));
+ }
// CF and OF aren't always set the way we want. Determine which
// of these we need.
bool NeedCF = false;
// if we're optimizing for size, however, as that'll allow better folding
// of memory operations.
if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 &&
- !DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::MinSize) &&
+ !DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ Attribute::MinSize) &&
!Subtarget->isAtom()) {
unsigned ExtendOp =
isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
// may emit an illegal shuffle but the expansion is still better than scalar
// code. We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise
// we'll emit a shuffle and a arithmetic shift.
+// FIXME: Is the expansion actually better than scalar code? It doesn't seem so.
// TODO: It is possible to support ZExt by zeroing the undef values during
// the shuffle phase or after the shuffle.
static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
if (ArgMode == 2) {
// Sanity Check: Make sure using fp_offset makes sense.
assert(!DAG.getTarget().Options.UseSoftFloat &&
- !(DAG.getMachineFunction()
- .getFunction()->getAttributes()
- .hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoImplicitFloat)) &&
+ !(DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ Attribute::NoImplicitFloat)) &&
Subtarget->hasSSE1());
}
}
SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ EVT VT = Op.getValueType();
+
MFI->setFrameAddressIsTaken(true);
- EVT VT = Op.getValueType();
+ if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
+ // Depth > 0 makes no sense on targets which use Windows unwind codes. It
+ // is not possible to crawl up the stack without looking at the unwind codes
+ // simultaneously.
+ int FrameAddrIndex = FuncInfo->getFAIndex();
+ if (!FrameAddrIndex) {
+ // Set up a frame object for the return address.
+ unsigned SlotSize = RegInfo->getSlotSize();
+ FrameAddrIndex = MF.getFrameInfo()->CreateFixedObject(
+ SlotSize, /*Offset=*/INT64_MIN, /*IsImmutable=*/false);
+ FuncInfo->setFAIndex(FrameAddrIndex);
+ }
+ return DAG.getFrameIndex(FrameAddrIndex, VT);
+ }
+
+ unsigned FrameReg =
+ RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
- unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(
- DAG.getMachineFunction());
assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
(FrameReg == X86::EBP && VT == MVT::i32)) &&
"Invalid Frame Register!");
return false;
}
+bool X86TargetLowering::isVectorLoadExtDesirable(SDValue) const { return true; }
+
bool
X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
if (!(Subtarget->hasFMA() || Subtarget->hasFMA4()))
// We're looking for blends between FADD and FSUB nodes. We insist on these
// nodes being lined up in a specific expected pattern.
- if (!(isShuffleEquivalent(Mask, 0, 3) ||
- isShuffleEquivalent(Mask, 0, 5, 2, 7) ||
- isShuffleEquivalent(Mask, 0, 9, 2, 11, 4, 13, 6, 15)))
+ if (!(isShuffleEquivalent(V1, V2, Mask, 0, 3) ||
+ isShuffleEquivalent(V1, V2, Mask, 0, 5, 2, 7) ||
+ isShuffleEquivalent(V1, V2, Mask, 0, 9, 2, 11, 4, 13, 6, 15)))
return SDValue();
// Only specific types are legal at this point, assert so we notice if and
EltNo);
}
+/// \brief Detect bitcasts between i32 to x86mmx low word. Since MMX types are
+/// special and don't usually play with other vector types, it's better to
+/// handle them early to be sure we emit efficient code by avoiding
+/// store-load conversions.
+static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
+ if (N->getValueType(0) != MVT::x86mmx ||
+ N->getOperand(0)->getOpcode() != ISD::BUILD_VECTOR ||
+ N->getOperand(0)->getValueType(0) != MVT::v2i32)
+ return SDValue();
+
+ SDValue V = N->getOperand(0);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
+ if (C && C->getZExtValue() == 0 && V.getOperand(0).getValueType() == MVT::i32)
+ return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(V.getOperand(0)),
+ N->getValueType(0), V.getOperand(0));
+
+ return SDValue();
+}
+
/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
/// generation and convert it from being a bunch of shuffles and extracts
/// into a somewhat faster sequence. For i686, the best sequence is apparently
uint64_t Mask = MaskNode->getZExtValue();
uint64_t Shift = ShiftNode->getZExtValue();
if (isMask_64(Mask)) {
- uint64_t MaskSize = CountPopulation_64(Mask);
+ uint64_t MaskSize = countPopulation(Mask);
if (Shift + MaskSize <= VT.getSizeInBits())
return DAG.getNode(X86ISD::BEXTR, DL, VT, N0.getOperand(0),
DAG.getConstant(Shift | (MaskSize << 8), VT));
// fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
MachineFunction &MF = DAG.getMachineFunction();
- bool OptForSize = MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ bool OptForSize =
+ MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
// SHLD/SHRD instructions have lower register pressure, but on some
// platforms they have higher latency than the equivalent
return SDValue();
const Function *F = DAG.getMachineFunction().getFunction();
- bool NoImplicitFloatOps = F->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
+ bool NoImplicitFloatOps = F->hasFnAttribute(Attribute::NoImplicitFloat);
bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
&& Subtarget->hasSSE2();
if ((VT.isVector() ||
/// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
+
// F[X]OR(0.0, x) -> x
- // F[X]OR(x, 0.0) -> x
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
+
+ // F[X]OR(x, 0.0) -> x
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(0);
/// Do target-specific dag combines on X86ISD::FAND nodes.
static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
// FAND(0.0, x) -> 0.0
- // FAND(x, 0.0) -> 0.0
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
return N->getOperand(0);
+
+ // FAND(x, 0.0) -> 0.0
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
+
return SDValue();
}
/// Do target-specific dag combines on X86ISD::FANDN nodes
static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) {
- // FANDN(x, 0.0) -> 0.0
// FANDN(0.0, x) -> x
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
+
+ // FANDN(x, 0.0) -> 0.0
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
+
return SDValue();
}
case ISD::SELECT:
case X86ISD::SHRUNKBLEND:
return PerformSELECTCombine(N, DAG, DCI, Subtarget);
+ case ISD::BITCAST: return PerformBITCASTCombine(N, DAG);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget);
case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget);
case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);