#define DEBUG_TYPE "x86-isel"
#include "X86ISelLowering.h"
+#include "Utils/X86ShuffleDecode.h"
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
-#include "Utils/X86ShuffleDecode.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/VariadicFunction.h"
#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/VariadicFunction.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
// turn on ones that can be effectively codegen'd.
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+ MVT VT = (MVT::SimpleValueType)i;
setOperationAction(ISD::ADD , VT, Expand);
setOperationAction(ISD::SUB , VT, Expand);
setOperationAction(ISD::FADD, VT, Expand);
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
+ setOperationAction(ISD::FCEIL, VT, Expand);
+ setOperationAction(ISD::FTRUNC, VT, Expand);
+ setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
setOperationAction(ISD::SRA, MVT::v8i16, Custom);
setOperationAction(ISD::SRA, MVT::v16i8, Custom);
- if (Subtarget->hasAVX2()) {
+ if (Subtarget->hasInt256()) {
setOperationAction(ISD::SRL, MVT::v2i64, Legal);
setOperationAction(ISD::SRL, MVT::v4i32, Legal);
}
}
- if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
addRegisterClass(MVT::v8i32, &X86::VR256RegClass);
setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v8f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v8f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::v8f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v8f32, Legal);
setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
setOperationAction(ISD::FABS, MVT::v8f32, Custom);
setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::v4f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
setOperationAction(ISD::FABS, MVT::v4f64, Custom);
setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
- setOperationAction(ISD::FMA, MVT::v8f32, Custom);
- setOperationAction(ISD::FMA, MVT::v4f64, Custom);
- setOperationAction(ISD::FMA, MVT::v4f32, Custom);
- setOperationAction(ISD::FMA, MVT::v2f64, Custom);
- setOperationAction(ISD::FMA, MVT::f32, Custom);
- setOperationAction(ISD::FMA, MVT::f64, Custom);
+ setOperationAction(ISD::FMA, MVT::v8f32, Legal);
+ setOperationAction(ISD::FMA, MVT::v4f64, Legal);
+ setOperationAction(ISD::FMA, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMA, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMA, MVT::f32, Legal);
+ setOperationAction(ISD::FMA, MVT::f64, Legal);
}
- if (Subtarget->hasAVX2()) {
+ if (Subtarget->hasInt256()) {
setOperationAction(ISD::ADD, MVT::v4i64, Legal);
setOperationAction(ISD::ADD, MVT::v8i32, Legal);
setOperationAction(ISD::ADD, MVT::v16i16, Legal);
bool IsZeroVal,
bool MemcpyStrSrc,
MachineFunction &MF) const {
- // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
- // linux. This is because the stack realignment code can't handle certain
- // cases like PR2962. This should be removed when PR2962 is fixed.
const Function *F = MF.getFunction();
if (IsZeroVal &&
!F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) {
if (Size >= 16 &&
(Subtarget->isUnalignedMemAccessFast() ||
((DstAlign == 0 || DstAlign >= 16) &&
- (SrcAlign == 0 || SrcAlign >= 16))) &&
- Subtarget->getStackAlignment() >= 16) {
- if (Subtarget->getStackAlignment() >= 32) {
- if (Subtarget->hasAVX2())
+ (SrcAlign == 0 || SrcAlign >= 16)))) {
+ if (Size >= 32) {
+ if (Subtarget->hasInt256())
return MVT::v8i32;
- if (Subtarget->hasAVX())
+ if (Subtarget->hasFp256())
return MVT::v8f32;
}
if (Subtarget->hasSSE2())
return MVT::v4f32;
} else if (!MemcpyStrSrc && Size >= 8 &&
!Subtarget->is64Bit() &&
- Subtarget->getStackAlignment() >= 8 &&
Subtarget->hasSSE2()) {
// Do not use f64 to lower memcpy if source is string constant. It's
// better to use i32 to avoid the loads.
/// IsTailCallConvention - Return true if the calling convention is one that
/// supports tail call optimization.
static bool IsTailCallConvention(CallingConv::ID CC) {
- return (CC == CallingConv::Fast || CC == CallingConv::GHC);
+ return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
+ CC == CallingConv::HiPE);
}
bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
bool IsWin64 = Subtarget->isTargetWin64();
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
- "Var args not supported with calling convention fastcc or ghc");
+ "Var args not supported with calling convention fastcc, ghc or hipe");
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
}
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
- "Var args not supported with calling convention fastcc or ghc");
+ "Var args not supported with calling convention fastcc, ghc or hipe");
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
return TailCallOpt;
case CallingConv::GHC:
return TailCallOpt;
+ case CallingConv::HiPE:
+ return TailCallOpt;
}
}
/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
/// specified value.
static bool isUndefOrEqual(int Val, int CmpVal) {
- if (Val < 0 || Val == CmpVal)
- return true;
- return false;
+ return (Val < 0 || Val == CmpVal);
}
/// isSequentialOrUndefInRange - Return true if every element in Mask, beginning
/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFHW.
-static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
- if (VT != MVT::v8i16 && (!HasAVX2 || VT != MVT::v16i16))
+static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+ if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
return false;
// Lower quadword copied in order or undef.
/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFLW.
-static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
- if (VT != MVT::v8i16 && (!HasAVX2 || VT != MVT::v16i16))
+static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+ if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
return false;
// Upper quadword copied in order.
static bool isPALIGNRMask(ArrayRef<int> Mask, EVT VT,
const X86Subtarget *Subtarget) {
if ((VT.getSizeInBits() == 128 && !Subtarget->hasSSSE3()) ||
- (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2()))
+ (VT.getSizeInBits() == 256 && !Subtarget->hasInt256()))
return false;
unsigned NumElts = VT.getVectorNumElements();
/// specifies a shuffle of elements that is suitable for input to 128/256-bit
/// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
/// reverse of what x86 shuffles want.
-static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX,
+static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256,
bool Commuted = false) {
- if (!HasAVX && VT.getSizeInBits() == 256)
+ if (!HasFp256 && VT.getSizeInBits() == 256)
return false;
unsigned NumElems = VT.getVectorNumElements();
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
- bool HasAVX2, bool V2IsSplat = false) {
+ bool HasInt256, bool V2IsSplat = false) {
unsigned NumElts = VT.getVectorNumElements();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
- (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKH.
static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
- bool HasAVX2, bool V2IsSplat = false) {
+ bool HasInt256, bool V2IsSplat = false) {
unsigned NumElts = VT.getVectorNumElements();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
- (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
/// <0, 0, 1, 1>
static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT,
- bool HasAVX2) {
+ bool HasInt256) {
unsigned NumElts = VT.getVectorNumElements();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
- (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
// For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern
/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
/// <2, 2, 3, 3>
-static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
+static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
unsigned NumElts = VT.getVectorNumElements();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
- (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
/// The first half comes from the second half of V1 and the second half from the
/// the second half of V2.
-static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
- if (!HasAVX || !VT.is256BitVector())
+static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+ if (!HasFp256 || !VT.is256BitVector())
return false;
// The shuffle result is divided into half A and half B. In total the two
/// to the same elements of the low, but to the higher half of the source.
/// In VPERMILPD the two lanes could be shuffled independently of each other
/// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
-static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
- if (!HasAVX)
+static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+ if (!HasFp256)
return false;
unsigned NumElts = VT.getVectorNumElements();
/// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 256-bit
/// version of MOVDDUP.
-static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
- if (!HasAVX || !VT.is256BitVector())
+static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+ if (!HasFp256 || !VT.is256BitVector())
return false;
unsigned NumElts = VT.getVectorNumElements();
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
}
} else if (Size == 256) { // AVX
- if (Subtarget->hasAVX2()) { // AVX2
+ if (Subtarget->hasInt256()) { // AVX2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
/// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
/// Then bitcast to their original type, ensuring they get CSE'd.
-static SDValue getOnesVector(EVT VT, bool HasAVX2, SelectionDAG &DAG,
+static SDValue getOnesVector(EVT VT, bool HasInt256, SelectionDAG &DAG,
DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
unsigned Size = VT.getSizeInBits();
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
SDValue Vec;
if (Size == 256) {
- if (HasAVX2) { // AVX2
+ if (HasInt256) { // AVX2
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
} else { // AVX
/// or SDValue() otherwise.
SDValue
X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
- if (!Subtarget->hasAVX())
+ if (!Subtarget->hasFp256())
return SDValue();
EVT VT = Op.getValueType();
if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
Sc.getOpcode() != ISD::BUILD_VECTOR) {
- if (!Subtarget->hasAVX2())
+ if (!Subtarget->hasInt256())
return SDValue();
// Use the register form of the broadcast instruction available on AVX2.
// Handle the broadcasting a single constant scalar from the constant pool
// into a vector. On Sandybridge it is still better to load a constant vector
// from the constant pool and not to broadcast it from a scalar.
- if (ConstSplatVal && Subtarget->hasAVX2()) {
+ if (ConstSplatVal && Subtarget->hasInt256()) {
EVT CVT = Ld.getValueType();
assert(!CVT.isVector() && "Must not broadcast a vector type");
unsigned ScalarSize = CVT.getSizeInBits();
unsigned ScalarSize = Ld.getValueType().getSizeInBits();
// Handle AVX2 in-register broadcasts.
- if (!IsLoad && Subtarget->hasAVX2() &&
+ if (!IsLoad && Subtarget->hasInt256() &&
(ScalarSize == 32 || (Is256 && ScalarSize == 64)))
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
// The integer check is needed for the 64-bit into 128-bit so it doesn't match
// double since there is no vbroadcastsd xmm
- if (Subtarget->hasAVX2() && Ld.getValueType().isInteger()) {
+ if (Subtarget->hasInt256() && Ld.getValueType().isInteger()) {
if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
}
// vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
// vpcmpeqd on 256-bit vectors.
if (ISD::isBuildVectorAllOnes(Op.getNode())) {
- if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasAVX2()))
+ if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256()))
return Op;
- return getOnesVector(VT, Subtarget->hasAVX2(), DAG, dl);
+ return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
}
SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
- MVT VT = SVOp->getValueType(0).getSimpleVT();
+ EVT VT = SVOp->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
- if (!Subtarget->hasSSE41())
+ if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
+ return SDValue();
+ if (!Subtarget->hasInt256() && VT == MVT::v16i16)
return SDValue();
- unsigned ISDNo = 0;
- MVT OpTy;
-
- switch (VT.SimpleTy) {
- default: return SDValue();
- case MVT::v8i16:
- ISDNo = X86ISD::BLENDPW;
- OpTy = MVT::v8i16;
- break;
- case MVT::v4i32:
- case MVT::v4f32:
- ISDNo = X86ISD::BLENDPS;
- OpTy = MVT::v4f32;
- break;
- case MVT::v2i64:
- case MVT::v2f64:
- ISDNo = X86ISD::BLENDPD;
- OpTy = MVT::v2f64;
- break;
- case MVT::v8i32:
- case MVT::v8f32:
- if (!Subtarget->hasAVX())
- return SDValue();
- ISDNo = X86ISD::BLENDPS;
- OpTy = MVT::v8f32;
- break;
- case MVT::v4i64:
- case MVT::v4f64:
- if (!Subtarget->hasAVX())
- return SDValue();
- ISDNo = X86ISD::BLENDPD;
- OpTy = MVT::v4f64;
- break;
- }
- assert(ISDNo && "Invalid Op Number");
+ // Check the mask for BLEND and build the value.
+ unsigned MaskValue = 0;
+ // There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
+ unsigned NumLanes = (NumElems-1)/8 + 1;
+ unsigned NumElemsInLane = NumElems / NumLanes;
- unsigned MaskVals = 0;
+ // Blend for v16i16 should be symetric for the both lanes.
+ for (unsigned i = 0; i < NumElemsInLane; ++i) {
- for (unsigned i = 0; i != NumElems; ++i) {
+ int SndLaneEltIdx = (NumLanes == 2) ?
+ SVOp->getMaskElt(i + NumElemsInLane) : -1;
int EltIdx = SVOp->getMaskElt(i);
- if (EltIdx == (int)i || EltIdx < 0)
- MaskVals |= (1<<i);
- else if (EltIdx == (int)(i + NumElems))
- continue; // Bit is set to zero;
- else
+
+ if ((EltIdx == -1 || EltIdx == (int)i) &&
+ (SndLaneEltIdx == -1 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
+ continue;
+
+ if (((unsigned)EltIdx == (i + NumElems)) &&
+ (SndLaneEltIdx == -1 ||
+ (unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
+ MaskValue |= (1<<i);
+ else
return SDValue();
}
- V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1);
- V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2);
- SDValue Ret = DAG.getNode(ISDNo, dl, OpTy, V1, V2,
- DAG.getConstant(MaskVals, MVT::i32));
+ // Convert i32 vectors to floating point if it is not AVX2.
+ // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
+ EVT BlendVT = VT;
+ if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
+ BlendVT = EVT::getVectorVT(*DAG.getContext(),
+ EVT::getFloatingPointVT(EltVT.getSizeInBits()),
+ NumElems);
+ V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2);
+ }
+
+ SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2,
+ DAG.getConstant(MaskValue, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
}
// (1) one of input vector is undefined or zeroinitializer.
// The mask value 0x80 puts 0 in the corresponding slot of the vector.
// And (2) the mask indexes don't cross the 128-bit lane.
- if (VT != MVT::v32i8 || !Subtarget->hasAVX2() ||
+ if (VT != MVT::v32i8 || !Subtarget->hasInt256() ||
(!V2IsUndef && !V2IsAllZero && !V1IsAllZero))
return SDValue();
return MayFoldLoad(V);
}
-// FIXME: the version above should always be used. Since there's
-// a bug where several vector shuffles can't be folded because the
-// DAG is not updated during lowering and a node claims to have two
-// uses while it only has one, use this version, and let isel match
-// another instruction if the load really happens to have more than
-// one use. Remove this version after this bug get fixed.
-// rdar://8434668, PR8156
-static bool RelaxedMayFoldVectorLoad(SDValue V) {
- if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
- V = V.getOperand(0);
- if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
- V = V.getOperand(0);
- if (ISD::isNormalLoad(V.getNode()))
- return true;
- return false;
-}
-
static
SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
EVT VT = Op.getValueType();
// Only AVX2 support 256-bit vector integer extending.
- if (!Subtarget->hasAVX2() && VT.is256BitVector())
+ if (!Subtarget->hasInt256() && VT.is256BitVector())
return SDValue();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
// Handle splats by matching through known shuffle masks
if ((Size == 128 && NumElem <= 4) ||
- (Size == 256 && NumElem < 8))
+ (Size == 256 && NumElem <= 8))
return SDValue();
// All remaning splats are promoted to target supported vector shuffles.
bool V1IsSplat = false;
bool V2IsSplat = false;
bool HasSSE2 = Subtarget->hasSSE2();
- bool HasAVX = Subtarget->hasAVX();
- bool HasAVX2 = Subtarget->hasAVX2();
+ bool HasFp256 = Subtarget->hasFp256();
+ bool HasInt256 = Subtarget->hasInt256();
MachineFunction &MF = DAG.getMachineFunction();
bool OptForSize = MF.getFunction()->getFnAttributes().
hasAttribute(Attributes::OptimizeForSize);
// NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
// unpckh_undef). Only use pshufd if speed is more important than size.
- if (OptForSize && isUNPCKL_v_undef_Mask(M, VT, HasAVX2))
+ if (OptForSize && isUNPCKL_v_undef_Mask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
- if (OptForSize && isUNPCKH_v_undef_Mask(M, VT, HasAVX2))
+ if (OptForSize && isUNPCKH_v_undef_Mask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
if (isMOVDDUPMask(M, VT) && Subtarget->hasSSE3() &&
- V2IsUndef && RelaxedMayFoldVectorLoad(V1))
+ V2IsUndef && MayFoldVectorLoad(V1))
return getMOVDDup(Op, dl, V1, DAG);
if (isMOVHLPS_v_undef_Mask(M, VT))
return getMOVHighToLow(Op, dl, DAG);
// Use to match splats
- if (HasSSE2 && isUNPCKHMask(M, VT, HasAVX2) && V2IsUndef &&
+ if (HasSSE2 && isUNPCKHMask(M, VT, HasInt256) && V2IsUndef &&
(VT == MVT::v2f64 || VT == MVT::v2i64))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
unsigned TargetMask = getShuffleSHUFImmediate(SVOp);
- if (HasAVX && (VT == MVT::v4f32 || VT == MVT::v2f64))
- return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask, DAG);
-
if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
+ if (HasFp256 && (VT == MVT::v4f32 || VT == MVT::v2f64))
+ return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask,
+ DAG);
+
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V1,
TargetMask, DAG);
}
}
// FIXME: fold these into legal mask.
- if (isMOVLHPSMask(M, VT) && !isUNPCKLMask(M, VT, HasAVX2))
+ if (isMOVLHPSMask(M, VT) && !isUNPCKLMask(M, VT, HasInt256))
return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
if (isMOVHLPSMask(M, VT))
return getMOVL(DAG, dl, VT, V2, V1);
}
- if (isUNPCKLMask(M, VT, HasAVX2))
+ if (isUNPCKLMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
- if (isUNPCKHMask(M, VT, HasAVX2))
+ if (isUNPCKHMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
if (V2IsSplat) {
// new vector_shuffle with the corrected mask.p
SmallVector<int, 8> NewMask(M.begin(), M.end());
NormalizeMask(NewMask, NumElems);
- if (isUNPCKLMask(NewMask, VT, HasAVX2, true))
+ if (isUNPCKLMask(NewMask, VT, HasInt256, true))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
- if (isUNPCKHMask(NewMask, VT, HasAVX2, true))
+ if (isUNPCKHMask(NewMask, VT, HasInt256, true))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
}
std::swap(V1IsSplat, V2IsSplat);
Commuted = false;
- if (isUNPCKLMask(M, VT, HasAVX2))
+ if (isUNPCKLMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
- if (isUNPCKHMask(M, VT, HasAVX2))
+ if (isUNPCKHMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
}
// Normalize the node to match x86 shuffle ops if needed
- if (!V2IsUndef && (isSHUFPMask(M, VT, HasAVX, /* Commuted */ true)))
+ if (!V2IsUndef && (isSHUFPMask(M, VT, HasFp256, /* Commuted */ true)))
return CommuteVectorShuffle(SVOp, DAG);
// The checks below are all present in isShuffleMaskLegal, but they are
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
}
- if (isPSHUFHWMask(M, VT, HasAVX2))
+ if (isPSHUFHWMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
getShufflePSHUFHWImmediate(SVOp),
DAG);
- if (isPSHUFLWMask(M, VT, HasAVX2))
+ if (isPSHUFLWMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
getShufflePSHUFLWImmediate(SVOp),
DAG);
- if (isSHUFPMask(M, VT, HasAVX))
+ if (isSHUFPMask(M, VT, HasFp256))
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
getShuffleSHUFImmediate(SVOp), DAG);
- if (isUNPCKL_v_undef_Mask(M, VT, HasAVX2))
+ if (isUNPCKL_v_undef_Mask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
- if (isUNPCKH_v_undef_Mask(M, VT, HasAVX2))
+ if (isUNPCKH_v_undef_Mask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
//===--------------------------------------------------------------------===//
//
// Handle VMOVDDUPY permutations
- if (V2IsUndef && isMOVDDUPYMask(M, VT, HasAVX))
+ if (V2IsUndef && isMOVDDUPYMask(M, VT, HasFp256))
return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
// Handle VPERMILPS/D* permutations
- if (isVPERMILPMask(M, VT, HasAVX)) {
- if (HasAVX2 && VT == MVT::v8i32)
+ if (isVPERMILPMask(M, VT, HasFp256)) {
+ if (HasInt256 && VT == MVT::v8i32)
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
getShuffleSHUFImmediate(SVOp), DAG);
return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
}
// Handle VPERM2F128/VPERM2I128 permutations
- if (isVPERM2X128Mask(M, VT, HasAVX))
+ if (isVPERM2X128Mask(M, VT, HasFp256))
return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
if (BlendOp.getNode())
return BlendOp;
- if (V2IsUndef && HasAVX2 && (VT == MVT::v8i32 || VT == MVT::v8f32)) {
+ if (V2IsUndef && HasInt256 && (VT == MVT::v8i32 || VT == MVT::v8f32)) {
SmallVector<SDValue, 8> permclMask;
for (unsigned i = 0; i != 8; ++i) {
permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MVT::i32));
DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1);
}
- if (V2IsUndef && HasAVX2 && (VT == MVT::v4i64 || VT == MVT::v4f64))
+ if (V2IsUndef && HasInt256 && (VT == MVT::v4i64 || VT == MVT::v4f64))
return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1,
getShuffleCLImmediate(SVOp), DAG);
// upper bits of a vector.
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- if (Subtarget->hasAVX()) {
+ if (Subtarget->hasFp256()) {
DebugLoc dl = Op.getNode()->getDebugLoc();
SDValue Vec = Op.getNode()->getOperand(0);
SDValue Idx = Op.getNode()->getOperand(1);
// the upper bits of a vector.
static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- if (Subtarget->hasAVX()) {
+ if (Subtarget->hasFp256()) {
DebugLoc dl = Op.getNode()->getDebugLoc();
SDValue Vec = Op.getNode()->getOperand(0);
SDValue SubVec = Op.getNode()->getOperand(1);
VT.getVectorNumElements() != SVT.getVectorNumElements())
return SDValue();
- assert(Subtarget->hasAVX() && "256-bit vector is observed without AVX!");
+ assert(Subtarget->hasFp256() && "256-bit vector is observed without AVX!");
// AVX2 has better support of integer extending.
- if (Subtarget->hasAVX2())
+ if (Subtarget->hasInt256())
return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
SDValue Lo = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, In);
VT.getVectorNumElements() != SVT.getVectorNumElements())
return SDValue();
- assert(Subtarget->hasAVX() && "256-bit vector is observed without AVX!");
+ assert(Subtarget->hasFp256() && "256-bit vector is observed without AVX!");
unsigned NumElems = VT.getVectorNumElements();
EVT NVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
}
+static bool isAllOnes(SDValue V) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+ return C && C->isAllOnesValue();
+}
+
/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
/// if it's possible.
SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
}
if (LHS.getNode()) {
+ // If the LHS is of the form (x ^ -1) then replace the LHS with x and flip
+ // the condition code later.
+ bool Invert = false;
+ if (LHS.getOpcode() == ISD::XOR && isAllOnes(LHS.getOperand(1))) {
+ Invert = true;
+ LHS = LHS.getOperand(0);
+ }
+
// If LHS is i8, promote it to i32 with any_extend. There is no i8 BT
// instruction. Since the shift amount is in-range-or-undefined, we know
// that doing a bittest on the i32 value is ok. We extend to i32 because
RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
- unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+ X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+ // Flip the condition if the LHS was a not instruction
+ if (Invert)
+ Cond = X86::GetOppositeBranchCondition(Cond);
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(Cond, MVT::i8), BT);
}
}
// Break 256-bit integer vector compare into smaller ones.
- if (VT.is256BitVector() && !Subtarget->hasAVX2())
+ if (VT.is256BitVector() && !Subtarget->hasInt256())
return Lower256IntVSETCC(Op, DAG);
// We are handling one of the integer comparisons here. Since SSE only has
return C && C->isNullValue();
}
-static bool isAllOnes(SDValue V) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
- return C && C->isAllOnesValue();
-}
-
static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
if (V.getOpcode() != ISD::TRUNCATE)
return false;
EVT VT = Op.getValueType();
// Decompose 256-bit ops into smaller 128-bit ops.
- if (VT.is256BitVector() && !Subtarget->hasAVX2())
+ if (VT.is256BitVector() && !Subtarget->hasInt256())
return Lower256IntArith(Op, DAG);
assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
uint64_t ShiftAmt = C->getZExtValue();
if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
- (Subtarget->hasAVX2() &&
+ (Subtarget->hasInt256() &&
(VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16))) {
if (Op.getOpcode() == ISD::SHL)
return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
llvm_unreachable("Unknown shift opcode.");
}
- if (Subtarget->hasAVX2() && VT == MVT::v32i8) {
+ if (Subtarget->hasInt256() && VT == MVT::v32i8) {
if (Op.getOpcode() == ISD::SHL) {
// Make a large shift.
SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v16i16, R,
default: return SDValue();
case MVT::v8i32:
case MVT::v16i16:
- if (!Subtarget->hasAVX())
+ if (!Subtarget->hasFp256())
return SDValue();
- if (!Subtarget->hasAVX2()) {
+ if (!Subtarget->hasInt256()) {
// needs to be split
unsigned NumElems = VT.getVectorNumElements();
case X86ISD::ANDNP: return "X86ISD::ANDNP";
case X86ISD::PSIGN: return "X86ISD::PSIGN";
case X86ISD::BLENDV: return "X86ISD::BLENDV";
- case X86ISD::BLENDPW: return "X86ISD::BLENDPW";
- case X86ISD::BLENDPS: return "X86ISD::BLENDPS";
- case X86ISD::BLENDPD: return "X86ISD::BLENDPD";
+ case X86ISD::BLENDI: return "X86ISD::BLENDI";
case X86ISD::HADD: return "X86ISD::HADD";
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
}
+bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ EVT VT1 = Val.getValueType();
+ if (isZExtFree(VT1, VT2))
+ return true;
+
+ if (Val.getOpcode() != ISD::LOAD)
+ return false;
+
+ if (!VT1.isSimple() || !VT1.isInteger() ||
+ !VT2.isSimple() || !VT2.isInteger())
+ return false;
+
+ switch (VT1.getSimpleVT().SimpleTy) {
+ default: break;
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ // X86 has 8, 16, and 32-bit zero-extending loads.
+ return true;
+ }
+
+ return false;
+}
+
bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
// i16 instructions are longer (0x66 prefix) and potentially slower.
return !(VT1 == MVT::i32 && VT2 == MVT::i16);
return (VT.getVectorNumElements() == 2 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isMOVLMask(M, VT) ||
- isSHUFPMask(M, VT, Subtarget->hasAVX()) ||
+ isSHUFPMask(M, VT, Subtarget->hasFp256()) ||
isPSHUFDMask(M, VT) ||
- isPSHUFHWMask(M, VT, Subtarget->hasAVX2()) ||
- isPSHUFLWMask(M, VT, Subtarget->hasAVX2()) ||
+ isPSHUFHWMask(M, VT, Subtarget->hasInt256()) ||
+ isPSHUFLWMask(M, VT, Subtarget->hasInt256()) ||
isPALIGNRMask(M, VT, Subtarget) ||
- isUNPCKLMask(M, VT, Subtarget->hasAVX2()) ||
- isUNPCKHMask(M, VT, Subtarget->hasAVX2()) ||
- isUNPCKL_v_undef_Mask(M, VT, Subtarget->hasAVX2()) ||
- isUNPCKH_v_undef_Mask(M, VT, Subtarget->hasAVX2()));
+ isUNPCKLMask(M, VT, Subtarget->hasInt256()) ||
+ isUNPCKHMask(M, VT, Subtarget->hasInt256()) ||
+ isUNPCKL_v_undef_Mask(M, VT, Subtarget->hasInt256()) ||
+ isUNPCKH_v_undef_Mask(M, VT, Subtarget->hasInt256()));
}
bool
if (NumElts == 4 && VT.is128BitVector()) {
return (isMOVLMask(Mask, VT) ||
isCommutedMOVLMask(Mask, VT, true) ||
- isSHUFPMask(Mask, VT, Subtarget->hasAVX()) ||
- isSHUFPMask(Mask, VT, Subtarget->hasAVX(), /* Commuted */ true));
+ isSHUFPMask(Mask, VT, Subtarget->hasFp256()) ||
+ isSHUFPMask(Mask, VT, Subtarget->hasFp256(), /* Commuted */ true));
}
return false;
}
MBB->addSuccessor(EndMBB);
}
- unsigned MOVOpc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
+ unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;
// In the XMM save block, save all the XMM argument registers.
for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
return SDValue();
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
- if (Subtarget->hasAVX() && VT.is256BitVector() &&
+ if (Subtarget->hasFp256() && VT.is256BitVector() &&
N->getOpcode() == ISD::VECTOR_SHUFFLE)
return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
if (!DCI.isBeforeLegalizeOps())
return SDValue();
- if (!Subtarget->hasAVX())
+ if (!Subtarget->hasFp256())
return SDValue();
EVT VT = N->getValueType(0);
if ((VT == MVT::v4i32) && (OpVT == MVT::v4i64)) {
- if (Subtarget->hasAVX2()) {
+ if (Subtarget->hasInt256()) {
// AVX2: v4i64 -> v4i32
// VPERMD
if ((VT == MVT::v8i16) && (OpVT == MVT::v8i32)) {
- if (Subtarget->hasAVX2()) {
+ if (Subtarget->hasInt256()) {
// AVX2: v8i32 -> v8i16
Op = DAG.getNode(ISD::BITCAST, dl, MVT::v32i8, Op);
return SDValue();
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
- (!Subtarget->hasAVX2() ||
+ (!Subtarget->hasInt256() ||
(VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
return SDValue();
// look for psign/blend
if (VT == MVT::v2i64 || VT == MVT::v4i64) {
if (!Subtarget->hasSSSE3() ||
- (VT == MVT::v4i64 && !Subtarget->hasAVX2()))
+ (VT == MVT::v4i64 && !Subtarget->hasInt256()))
return SDValue();
// Canonicalize pandn to RHS
// On Sandy Bridge, 256-bit memory operations are executed by two
// 128-bit ports. However, on Haswell it is better to issue a single 256-bit
// memory operation.
- if (VT.is256BitVector() && !Subtarget->hasAVX2() &&
+ if (VT.is256BitVector() && !Subtarget->hasInt256() &&
StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
StoredVal.getNumOperands() == 2) {
SDValue Value0 = StoredVal.getOperand(0);
// Try to synthesize horizontal adds from adds of shuffles.
if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
- (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
+ (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, true))
return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS);
return SDValue();
// Try to synthesize horizontal subs from subs of shuffles.
if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
- (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
+ (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, false))
return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS);
return SDValue();
if (!DCI.isBeforeLegalizeOps())
return SDValue();
- if (!Subtarget->hasAVX())
+ if (!Subtarget->hasFp256())
return SDValue();
EVT VT = N->getValueType(0);
if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) ||
(VT == MVT::v8i32 && OpVT == MVT::v8i16)) {
- if (Subtarget->hasAVX2())
+ if (Subtarget->hasInt256())
return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, Op);
// Optimize vectors in AVX mode
if (!DCI.isBeforeLegalizeOps())
return SDValue();
- if (!Subtarget->hasAVX())
+ if (!Subtarget->hasFp256())
return SDValue();
if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) ||
((VT == MVT::v4i64) && (OpVT == MVT::v4i32))) {
- if (Subtarget->hasAVX2())
+ if (Subtarget->hasInt256())
return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, N0);
SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl);
// Try to synthesize horizontal adds from adds of shuffles.
if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
- (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+ (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HADD, N->getDebugLoc(), VT, Op0, Op1);
// Try to synthesize horizontal adds from adds of shuffles.
EVT VT = N->getValueType(0);
if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
- (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+ (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HSUB, N->getDebugLoc(), VT, Op0, Op1);
case 'x':
case 'Y':
if (((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) ||
- ((type->getPrimitiveSizeInBits() == 256) && Subtarget->hasAVX()))
+ ((type->getPrimitiveSizeInBits() == 256) && Subtarget->hasFp256()))
weight = CW_Register;
break;
case 'I':
return -1;
}
+ScalarTargetTransformInfo::PopcntHwSupport
+X86ScalarTargetTransformImpl::getPopcntHwSupport(unsigned TyWidth) const {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>();
+
+ // TODO: Currently the __builtin_popcount() implementation using SSE3
+ // instructions is inefficient. Once the problem is fixed, we should
+ // call ST.hasSSE3() instead of ST.hasSSE4().
+ return ST.hasSSE41() ? Fast : None;
+}
+
unsigned
X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
Type *Ty) const {