#include "X86ISelLowering.h"
#include "Utils/X86ShuffleDecode.h"
#include "X86.h"
+#include "X86CallingConv.h"
#include "X86InstrBuilder.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
VecIdx);
return Result;
-
+
}
/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
bool is64Bit = Subtarget->is64Bit();
- if (Subtarget->isTargetEnvMacho()) {
+ if (Subtarget->isTargetMacho()) {
if (is64Bit)
return new X86_64MachoTargetObjectFile();
return new TargetLoweringObjectFileMachO();
return new X86LinuxTargetObjectFile();
if (Subtarget->isTargetELF())
return new TargetLoweringObjectFileELF();
- if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
+ if (Subtarget->isTargetCOFF())
return new TargetLoweringObjectFileCOFF();
llvm_unreachable("unknown subtarget type");
}
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
+ if (Subtarget->isOSWindows() && !Subtarget->isTargetMacho())
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Custom);
else if (TM.Options.EnableSegmentedStacks)
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
setOperationAction(ISD::FABS, MVT::v4f64, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
-
setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
setOperationAction(ISD::FMA, MVT::v8f32, Legal);
addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
+ addRegisterClass(MVT::i1, &X86::VK1RegClass);
addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
+ setOperationAction(ISD::BR_CC, MVT::i1, Expand);
+ setOperationAction(ISD::SETCC, MVT::i1, Custom);
+ setOperationAction(ISD::XOR, MVT::i1, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, Legal);
setOperationAction(ISD::LOAD, MVT::v16f32, Legal);
setOperationAction(ISD::LOAD, MVT::v8f64, Legal);
setOperationAction(ISD::FMA, MVT::v16f32, Legal);
setOperationAction(ISD::SDIV, MVT::v16i32, Custom);
-
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
+ }
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
- setOperationAction(ISD::TRUNCATE, MVT::i1, Legal);
+ setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Legal);
setOperationAction(ISD::SETCC, MVT::v16i1, Custom);
setOperationAction(ISD::SETCC, MVT::v8i1, Custom);
setOperationAction(ISD::MUL, MVT::v8i64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
}
EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
- if (!VT.isVector()) return MVT::i8;
+ if (!VT.isVector())
+ return MVT::i8;
+
+ const TargetMachine &TM = getTargetMachine();
+ if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512())
+ switch(VT.getVectorNumElements()) {
+ case 8: return MVT::v8i1;
+ case 16: return MVT::v16i1;
+ }
+
return VT.changeVectorElementTypeToInteger();
}
return true;
}
+bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
+ unsigned DestAS) const {
+ assert(SrcAS != DestAS && "Expected different address spaces!");
+
+ return SrcAS < 256 && DestAS < 256;
+}
+
//===----------------------------------------------------------------------===//
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
return CCInfo.CheckReturn(Outs, RetCC_X86);
}
+const uint16_t *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
+ static const uint16_t ScratchRegs[] = { X86::R11, 0 };
+ return ScratchRegs;
+}
+
SDValue
X86TargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
MachineFrameInfo *MFI = MF.getFrameInfo();
bool Is64Bit = Subtarget->is64Bit();
- bool IsWindows = Subtarget->isTargetWindows();
bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
RC = &X86::VR128RegClass;
else if (RegVT == MVT::x86mmx)
RC = &X86::VR64RegClass;
+ else if (RegVT == MVT::i1)
+ RC = &X86::VK1RegClass;
else if (RegVT == MVT::v8i1)
RC = &X86::VK8RegClass;
else if (RegVT == MVT::v16i1)
} else {
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
- if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
+ if (!Is64Bit && !IsTailCallConvention(CallConv) &&
+ !Subtarget->getTargetTriple().isOSMSVCRT() &&
argsAreStructReturn(Ins) == StackStructReturn)
FuncInfo->setBytesToPopOnReturn(4);
}
MachineFunction &MF = DAG.getMachineFunction();
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
- bool IsWindows = Subtarget->isTargetWindows();
StructReturnType SR = callIsStructReturn(Outs);
bool IsSibcall = false;
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
getTargetMachine().Options.GuaranteedTailCallOpt))
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
- else if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
+ else if (!Is64Bit && !IsTailCallConvention(CallConv) &&
+ !Subtarget->getTargetTriple().isOSMSVCRT() &&
SR == StackStructReturn)
// If this is a call to a struct-return function, the callee
// pops the hidden struct pointer, so we have to push it back.
if (isCalleeStructRet || isCallerStructRet)
return false;
- // An stdcall caller is expected to clean up its arguments; the callee
- // isn't going to do that.
- if (!CCMatch && CallerCC == CallingConv::X86_StdCall)
+ // An stdcall/thiscall caller is expected to clean up its arguments; the
+ // callee isn't going to do that.
+ // FIXME: this is more restrictive than needed. We could produce a tailcall
+ // when the stack adjustment matches. For example, with a thiscall that takes
+ // only one argument.
+ if (!CCMatch && (CallerCC == CallingConv::X86_StdCall ||
+ CallerCC == CallingConv::X86_ThisCall))
return false;
// Do not sibcall optimize vararg calls unless all arguments are passed via
}
}
+/// \brief Return true if the condition is an unsigned comparison operation.
+static bool isX86CCUnsigned(unsigned X86CC) {
+ switch (X86CC) {
+ default: llvm_unreachable("Invalid integer condition!");
+ case X86::COND_E: return true;
+ case X86::COND_G: return false;
+ case X86::COND_GE: return false;
+ case X86::COND_L: return false;
+ case X86::COND_LE: return false;
+ case X86::COND_NE: return true;
+ case X86::COND_B: return true;
+ case X86::COND_A: return true;
+ case X86::COND_BE: return true;
+ case X86::COND_AE: return true;
+ }
+ llvm_unreachable("covered switch fell through?!");
+}
+
/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
/// specific condition code, returning the condition code and the LHS/RHS of the
/// comparison to make.
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned LaneSize = NumElts/NumLanes;
// 2 or 4 elements in one lane
-
+
SmallVector<int, 4> ExpectedMaskVal(LaneSize, -1);
for (unsigned l = 0; l != NumElts; l += LaneSize) {
for (unsigned i = 0; i != LaneSize; ++i) {
/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
/// There's even a handy isZeroNode for that purpose.
static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
- SDLoc &DL, SelectionDAG &DAG) {
+ SDLoc &DL, SelectionDAG &DAG,
+ bool isAfterLegalize) {
EVT EltVT = VT.getVectorElementType();
unsigned NumElems = Elts.size();
// load of the entire vector width starting at the base pointer. If we found
// consecutive loads for the low half, generate a vzext_load node.
if (LastLoadedElt == NumElems - 1) {
+
+ if (isAfterLegalize &&
+ !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT))
+ return SDValue();
+
SDValue NewLd = SDValue();
+
if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(),
V[i] = Op.getOperand(i);
// Check for elements which are consecutive loads.
- SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
+ SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false);
if (LD.getNode())
return LD;
if(ResVT.is256BitVector())
return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
+ if (Op.getNumOperands() == 4) {
+ MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(),
+ ResVT.getVectorNumElements()/2);
+ SDValue V3 = Op.getOperand(2);
+ SDValue V4 = Op.getOperand(3);
+ return Concat256BitVectors(Concat128BitVectors(V1, V2, HalfVT, NumElems/2, DAG, dl),
+ Concat128BitVectors(V3, V4, HalfVT, NumElems/2, DAG, dl), ResVT, NumElems, DAG, dl);
+ }
return Concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
}
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
- assert(Op.getNumOperands() == 2);
+ MVT LLVM_ATTRIBUTE_UNUSED VT = Op.getSimpleValueType();
+ assert((VT.is256BitVector() && Op.getNumOperands() == 2) ||
+ (VT.is512BitVector() && (Op.getNumOperands() == 2 ||
+ Op.getNumOperands() == 4)));
- // AVX/AVX-512 can use the vinsertf128 instruction to create 256-bit vectors
+ // AVX can use the vinsertf128 instruction to create 256-bit vectors
// from two other 128-bit ones.
+
+ // 512-bit vector may contain 2 256-bit vectors or 4 128-bit vectors
return LowerAVXCONCAT_VECTORS(Op, DAG);
}
MVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
+ // There is no blend with immediate in AVX-512.
+ if (VT.is512BitVector())
+ return SDValue();
+
if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
return SDValue();
if (!Subtarget->hasInt256() && VT == MVT::v16i16)
return SDValue();
}
+/// Extract one bit from mask vector, like v16i1 or v8i1.
+/// AVX-512 feature.
+static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) {
+ SDValue Vec = Op.getOperand(0);
+ SDLoc dl(Vec);
+ MVT VecVT = Vec.getSimpleValueType();
+ SDValue Idx = Op.getOperand(1);
+ MVT EltVT = Op.getSimpleValueType();
+
+ assert((EltVT == MVT::i1) && "Unexpected operands in ExtractBitFromMaskVector");
+
+ // variable index can't be handled in mask registers,
+ // extend vector to VR512
+ if (!isa<ConstantSDNode>(Idx)) {
+ MVT ExtVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
+ SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Vec);
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ ExtVT.getVectorElementType(), Ext, Idx);
+ return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
+ }
+
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal) {
+ unsigned MaxSift = VecVT.getSizeInBits() - 1;
+ Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
+ DAG.getConstant(MaxSift - IdxVal, MVT::i8));
+ Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
+ DAG.getConstant(MaxSift, MVT::i8));
+ }
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i1, Vec,
+ DAG.getIntPtrConstant(0));
+}
+
SDValue
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
SDValue Vec = Op.getOperand(0);
MVT VecVT = Vec.getSimpleValueType();
SDValue Idx = Op.getOperand(1);
+
+ if (Op.getSimpleValueType() == MVT::i1)
+ return ExtractBitFromMaskVector(Op, DAG);
+
if (!isa<ConstantSDNode>(Idx)) {
if (VecVT.is512BitVector() ||
(VecVT.is256BitVector() && Subtarget->hasInt256() &&
MVT::getIntegerVT(VecVT.getVectorElementType().getSizeInBits());
MVT MaskVT = MVT::getVectorVT(MaskEltVT, VecVT.getSizeInBits() /
MaskEltVT.getSizeInBits());
-
- if (Idx.getSimpleValueType() != MaskEltVT)
- if (Idx.getOpcode() == ISD::ZERO_EXTEND ||
- Idx.getOpcode() == ISD::SIGN_EXTEND)
- Idx = Idx.getOperand(0);
- assert(Idx.getSimpleValueType() == MaskEltVT &&
- "Unexpected index in insertelement");
+
+ Idx = DAG.getZExtOrTrunc(Idx, dl, MaskEltVT);
SDValue Mask = DAG.getNode(X86ISD::VINSERT, dl, MaskVT,
getZeroVector(MaskVT, Subtarget, DAG, dl),
Idx, DAG.getConstant(0, getPointerTy()));
Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
is64Bit ? 257 : 256));
- SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
- DAG.getIntPtrConstant(0),
- MachinePointerInfo(Ptr),
- false, false, false, 0);
+ SDValue ThreadPointer =
+ DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0),
+ MachinePointerInfo(Ptr), false, false, false, 0);
unsigned char OperandFlags = 0;
// Most TLS accesses are not RIP relative, even on x86-64. One exception is
// emit "addl x@ntpoff,%eax" (local exec)
// or "addl x@indntpoff,%eax" (initial exec)
// or "addl x@gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic)
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
- GA->getValueType(0),
- GA->getOffset(), OperandFlags);
+ SDValue TGA =
+ DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
+ GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
if (model == TLSModel::InitialExec) {
if (isPIC && !is64Bit) {
Offset = DAG.getNode(ISD::ADD, dl, PtrVT,
- DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
+ DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
Offset);
}
Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
- MachinePointerInfo::getGOT(), false, false, false,
- 0);
+ MachinePointerInfo::getGOT(), false, false, false, 0);
}
// The address of the thread local variable is the add of the thread
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
+ // X86ISD::SHLD and X86ISD::SHRD have defined overflow behavior but the
+ // generic ISD nodes haven't. Insert an AND to be safe, it's optimized away
+ // during isel.
+ SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
+ DAG.getConstant(VTBits - 1, MVT::i8));
SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
DAG.getConstant(VTBits - 1, MVT::i8))
: DAG.getConstant(0, VT);
SDValue Tmp2, Tmp3;
if (Op.getOpcode() == ISD::SHL_PARTS) {
Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
- Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
} else {
Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
- Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, ShAmt);
+ Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
}
+ // If the shift amount is larger or equal than the width of a part we can't
+ // rely on the results of shld/shrd. Insert a test and select the appropriate
+ // values for large shift amounts.
SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
DAG.getConstant(VTBits, MVT::i8));
SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
// Concat upper and lower parts.
//
- if (((VT != MVT::v8i32) || (InVT != MVT::v8i16)) &&
+ if (((VT != MVT::v16i16) || (InVT != MVT::v16i8)) &&
+ ((VT != MVT::v8i32) || (InVT != MVT::v8i16)) &&
((VT != MVT::v4i64) || (InVT != MVT::v4i32)))
return SDValue();
return Res;
}
- if (!VT.is256BitVector() || !SVT.is128BitVector() ||
- VT.getVectorNumElements() != SVT.getVectorNumElements())
- return SDValue();
-
- assert(Subtarget->hasFp256() && "256-bit vector is observed without AVX!");
-
- // AVX2 has better support of integer extending.
- if (Subtarget->hasInt256())
- return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
-
- SDValue Lo = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, In);
- static const int Mask[] = {4, 5, 6, 7, -1, -1, -1, -1};
- SDValue Hi = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32,
- DAG.getVectorShuffle(MVT::v8i16, DL, In,
- DAG.getUNDEF(MVT::v8i16),
- &Mask[0]));
-
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi);
+ assert(!VT.is256BitVector() || !SVT.is128BitVector() ||
+ VT.getVectorNumElements() != SVT.getVectorNumElements());
+ return SDValue();
}
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
- MVT VT = Op.getSimpleValueType();
+ MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
MVT InVT = In.getSimpleValueType();
+
+ if (VT == MVT::i1) {
+ assert((InVT.isInteger() && (InVT.getSizeInBits() <= 64)) &&
+ "Invalid scalar TRUNCATE operation");
+ In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(1, InVT));
+ if (InVT.getSizeInBits() == 64)
+ In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::i32, In);
+ else if (InVT.getSizeInBits() < 32)
+ In = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, In);
+ return DAG.getNode(X86ISD::TRUNC, DL, VT, In);
+ }
assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
"Invalid TRUNCATE operation");
/// equivalent.
SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
SelectionDAG &DAG) const {
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1))
+ SDLoc dl(Op0);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1)) {
if (C->getAPIntValue() == 0)
return EmitTest(Op0, X86CC, DAG);
- SDLoc dl(Op0);
+ if (Op0.getValueType() == MVT::i1) {
+ Op0 = DAG.getNode(ISD::XOR, dl, MVT::i1, Op0, DAG.getConstant(-1, MVT::i1));
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i1, Op0, Op0);
+ }
+ }
+
if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
+ // Do the comparison at i32 if it's smaller. This avoids subregister
+ // aliasing issues. Keep the smaller reference if we're optimizing for
+ // size, however, as that'll allow better folding of memory operations.
+ if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 &&
+ !DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::MinSize)) {
+ unsigned ExtendOp =
+ isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
+ Op0 = DAG.getNode(ExtendOp, dl, MVT::i32, Op0);
+ Op1 = DAG.getNode(ExtendOp, dl, MVT::i32, Op1);
+ }
// Use SUB instead of CMP to enable CSE between SUB and CMP.
SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs,
// operations may be required for some comparisons.
unsigned Opc;
bool Swap = false, Invert = false, FlipSigns = false, MinMax = false;
-
+
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETNE: Invert = true;
case ISD::SETULE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
FlipSigns = true; Invert = true; break;
}
-
+
// Special case: Use min/max operations for SETULE/SETUGE
MVT VET = VT.getVectorElementType();
bool hasMinMax =
(Subtarget->hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32))
|| (Subtarget->hasSSE2() && (VET == MVT::i8));
-
+
if (hasMinMax) {
switch (SetCCOpcode) {
default: break;
case ISD::SETULE: Opc = X86ISD::UMIN; MinMax = true; break;
case ISD::SETUGE: Opc = X86ISD::UMAX; MinMax = true; break;
}
-
+
if (MinMax) { Swap = false; Invert = false; FlipSigns = false; }
}
-
+
if (Swap)
std::swap(Op0, Op1);
// If the logical-not of the result is required, perform that now.
if (Invert)
Result = DAG.getNOT(dl, Result, VT);
-
+
if (MinMax)
Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Op0, Result);
if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
- assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
+ assert((VT == MVT::i8 || (Subtarget->hasAVX512() && VT == MVT::i1))
+ && "SetCC type must be 8-bit or 1-bit integer");
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDLoc dl(Op);
cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
if (SSECC != 8) {
- unsigned Opcode = VT == MVT::f32 ? X86ISD::FSETCCss : X86ISD::FSETCCsd;
- SDValue Cmp = DAG.getNode(Opcode, DL, VT, CondOp0, CondOp1,
+ if (Subtarget->hasAVX512()) {
+ SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CondOp0, CondOp1,
+ DAG.getConstant(SSECC, MVT::i8));
+ return DAG.getNode(X86ISD::SELECT, DL, VT, Cmp, Op1, Op2);
+ }
+ SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,
DAG.getConstant(SSECC, MVT::i8));
SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2);
SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1);
return LowerSIGN_EXTEND_AVX512(Op, DAG);
if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
- (VT != MVT::v8i32 || InVT != MVT::v8i16))
+ (VT != MVT::v8i32 || InVT != MVT::v8i16) &&
+ (VT != MVT::v16i16 || InVT != MVT::v16i8))
return SDValue();
if (Subtarget->hasInt256())
getTargetMachine().Options.EnableSegmentedStacks) &&
"This should be used only on Windows targets or when segmented stacks "
"are being used");
- assert(!Subtarget->isTargetEnvMacho() && "Not implemented");
+ assert(!Subtarget->isTargetMacho() && "Not implemented");
SDLoc dl(Op);
// Get the inputs.
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
- // FIXME: Ensure alignment here
+ unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ EVT VT = Op.getNode()->getValueType(0);
bool Is64Bit = Subtarget->is64Bit();
EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
- Flag = Chain.getValue(1);
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
- Chain = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
- SPTy).getValue(1);
+ unsigned SPReg = RegInfo->getStackRegister();
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
+ Chain = SP.getValue(1);
+
+ if (Align) {
+ SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
+ DAG.getConstant(-(uint64_t)Align, VT));
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP);
+ }
- SDValue Ops1[2] = { Chain.getValue(0), Chain };
+ SDValue Ops1[2] = { SP, Chain };
return DAG.getMergeValues(Ops1, 2, dl);
}
}
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
+// getTargetVShiftByConstNode - Handle vector element shifts where the shift
+// amount is a constant. Takes immediate version of shift as input.
+static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, EVT VT,
+ SDValue SrcOp, uint64_t ShiftAmt,
+ SelectionDAG &DAG) {
+
+ // Check for ShiftAmt >= element width
+ if (ShiftAmt >= VT.getVectorElementType().getSizeInBits()) {
+ if (Opc == X86ISD::VSRAI)
+ ShiftAmt = VT.getVectorElementType().getSizeInBits() - 1;
+ else
+ return DAG.getConstant(0, VT);
+ }
+
+ assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI)
+ && "Unknown target vector shift-by-constant node");
+
+ return DAG.getNode(Opc, dl, VT, SrcOp, DAG.getConstant(ShiftAmt, MVT::i8));
+}
+
// getTargetVShiftNode - Handle vector element shifts where the shift amount
// may or may not be a constant. Takes immediate version of shift as input.
static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, EVT VT,
SelectionDAG &DAG) {
assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
- if (isa<ConstantSDNode>(ShAmt)) {
- // Constant may be a TargetConstant. Use a regular constant.
- uint32_t ShiftAmt = cast<ConstantSDNode>(ShAmt)->getZExtValue();
- switch (Opc) {
- default: llvm_unreachable("Unknown target vector shift node");
- case X86ISD::VSHLI:
- case X86ISD::VSRLI:
- case X86ISD::VSRAI:
- return DAG.getNode(Opc, dl, VT, SrcOp,
- DAG.getConstant(ShiftAmt, MVT::i32));
- }
- }
+ // Catch shift-by-constant.
+ if (ConstantSDNode *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
+ return getTargetVShiftByConstNode(Opc, dl, VT, SrcOp,
+ CShAmt->getZExtValue(), DAG);
// Change opcode to non-immediate version
switch (Opc) {
case Intrinsic::x86_avx2_pmaxu_b:
case Intrinsic::x86_avx2_pmaxu_w:
case Intrinsic::x86_avx2_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_q:
case Intrinsic::x86_sse2_pminu_b:
case Intrinsic::x86_sse41_pminuw:
case Intrinsic::x86_sse41_pminud:
case Intrinsic::x86_avx2_pminu_b:
case Intrinsic::x86_avx2_pminu_w:
case Intrinsic::x86_avx2_pminu_d:
+ case Intrinsic::x86_avx512_pminu_d:
+ case Intrinsic::x86_avx512_pminu_q:
case Intrinsic::x86_sse41_pmaxsb:
case Intrinsic::x86_sse2_pmaxs_w:
case Intrinsic::x86_sse41_pmaxsd:
case Intrinsic::x86_avx2_pmaxs_b:
case Intrinsic::x86_avx2_pmaxs_w:
case Intrinsic::x86_avx2_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_q:
case Intrinsic::x86_sse41_pminsb:
case Intrinsic::x86_sse2_pmins_w:
case Intrinsic::x86_sse41_pminsd:
case Intrinsic::x86_avx2_pmins_b:
case Intrinsic::x86_avx2_pmins_w:
- case Intrinsic::x86_avx2_pmins_d: {
+ case Intrinsic::x86_avx2_pmins_d:
+ case Intrinsic::x86_avx512_pmins_d:
+ case Intrinsic::x86_avx512_pmins_q: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_avx2_pmaxu_b:
case Intrinsic::x86_avx2_pmaxu_w:
case Intrinsic::x86_avx2_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_q:
Opcode = X86ISD::UMAX;
break;
case Intrinsic::x86_sse2_pminu_b:
case Intrinsic::x86_avx2_pminu_b:
case Intrinsic::x86_avx2_pminu_w:
case Intrinsic::x86_avx2_pminu_d:
+ case Intrinsic::x86_avx512_pminu_d:
+ case Intrinsic::x86_avx512_pminu_q:
Opcode = X86ISD::UMIN;
break;
case Intrinsic::x86_sse41_pmaxsb:
case Intrinsic::x86_avx2_pmaxs_b:
case Intrinsic::x86_avx2_pmaxs_w:
case Intrinsic::x86_avx2_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_q:
Opcode = X86ISD::SMAX;
break;
case Intrinsic::x86_sse41_pminsb:
case Intrinsic::x86_avx2_pmins_b:
case Intrinsic::x86_avx2_pmins_w:
case Intrinsic::x86_avx2_pmins_d:
+ case Intrinsic::x86_avx512_pmins_d:
+ case Intrinsic::x86_avx512_pmins_q:
Opcode = X86ISD::SMIN;
break;
}
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
- case Intrinsic::x86_avx512_kortestz:
- case Intrinsic::x86_avx512_kortestc: {
- unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz)? X86::COND_E: X86::COND_B;
+ case Intrinsic::x86_avx512_kortestz_w:
+ case Intrinsic::x86_avx512_kortestc_w: {
+ unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz_w)? X86::COND_E: X86::COND_B;
SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1));
SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2));
SDValue CC = DAG.getConstant(X86CC, MVT::i8);
case Intrinsic::x86_fma_vfmaddsub_ps_256:
case Intrinsic::x86_fma_vfmaddsub_pd_256:
case Intrinsic::x86_fma_vfmsubadd_ps_256:
- case Intrinsic::x86_fma_vfmsubadd_pd_256: {
+ case Intrinsic::x86_fma_vfmsubadd_pd_256:
+ case Intrinsic::x86_fma_vfmadd_ps_512:
+ case Intrinsic::x86_fma_vfmadd_pd_512:
+ case Intrinsic::x86_fma_vfmsub_ps_512:
+ case Intrinsic::x86_fma_vfmsub_pd_512:
+ case Intrinsic::x86_fma_vfnmadd_ps_512:
+ case Intrinsic::x86_fma_vfnmadd_pd_512:
+ case Intrinsic::x86_fma_vfnmsub_ps_512:
+ case Intrinsic::x86_fma_vfnmsub_pd_512:
+ case Intrinsic::x86_fma_vfmaddsub_ps_512:
+ case Intrinsic::x86_fma_vfmaddsub_pd_512:
+ case Intrinsic::x86_fma_vfmsubadd_ps_512:
+ case Intrinsic::x86_fma_vfmsubadd_pd_512: {
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_fma_vfmadd_pd:
case Intrinsic::x86_fma_vfmadd_ps_256:
case Intrinsic::x86_fma_vfmadd_pd_256:
+ case Intrinsic::x86_fma_vfmadd_ps_512:
+ case Intrinsic::x86_fma_vfmadd_pd_512:
Opc = X86ISD::FMADD;
break;
case Intrinsic::x86_fma_vfmsub_ps:
case Intrinsic::x86_fma_vfmsub_pd:
case Intrinsic::x86_fma_vfmsub_ps_256:
case Intrinsic::x86_fma_vfmsub_pd_256:
+ case Intrinsic::x86_fma_vfmsub_ps_512:
+ case Intrinsic::x86_fma_vfmsub_pd_512:
Opc = X86ISD::FMSUB;
break;
case Intrinsic::x86_fma_vfnmadd_ps:
case Intrinsic::x86_fma_vfnmadd_pd:
case Intrinsic::x86_fma_vfnmadd_ps_256:
case Intrinsic::x86_fma_vfnmadd_pd_256:
+ case Intrinsic::x86_fma_vfnmadd_ps_512:
+ case Intrinsic::x86_fma_vfnmadd_pd_512:
Opc = X86ISD::FNMADD;
break;
case Intrinsic::x86_fma_vfnmsub_ps:
case Intrinsic::x86_fma_vfnmsub_pd:
case Intrinsic::x86_fma_vfnmsub_ps_256:
case Intrinsic::x86_fma_vfnmsub_pd_256:
+ case Intrinsic::x86_fma_vfnmsub_ps_512:
+ case Intrinsic::x86_fma_vfnmsub_pd_512:
Opc = X86ISD::FNMSUB;
break;
case Intrinsic::x86_fma_vfmaddsub_ps:
case Intrinsic::x86_fma_vfmaddsub_pd:
case Intrinsic::x86_fma_vfmaddsub_ps_256:
case Intrinsic::x86_fma_vfmaddsub_pd_256:
+ case Intrinsic::x86_fma_vfmaddsub_ps_512:
+ case Intrinsic::x86_fma_vfmaddsub_pd_512:
Opc = X86ISD::FMADDSUB;
break;
case Intrinsic::x86_fma_vfmsubadd_ps:
case Intrinsic::x86_fma_vfmsubadd_pd:
case Intrinsic::x86_fma_vfmsubadd_ps_256:
case Intrinsic::x86_fma_vfmsubadd_pd_256:
+ case Intrinsic::x86_fma_vfmsubadd_ps_512:
+ case Intrinsic::x86_fma_vfmsubadd_pd_512:
Opc = X86ISD::FMSUBADD;
break;
}
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
assert(C && "Invalid scale type");
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
- SDValue Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
- EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ SDValue Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
+ EVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getValueType().getVectorNumElements());
SDValue MaskInReg = DAG.getConstant(~0, MaskVT);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
if (Src.getOpcode() == ISD::UNDEF)
- Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
+ Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::x86_avx512_gather_qps_mask_512:
+ case Intrinsic::x86_avx512_gather_qps_mask_512:
Opc = X86::VGATHERQPSZrm; break;
case Intrinsic::x86_avx512_gather_qpd_mask_512:
Opc = X86::VGATHERQPDZrm; break;
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::x86_avx512_scatter_qpd_512:
+ case Intrinsic::x86_avx512_scatter_qpd_512:
Opc = X86::VSCATTERQPDZmr; break;
case Intrinsic::x86_avx512_scatter_qps_512:
Opc = X86::VSCATTERQPSZmr; break;
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::x86_avx512_scatter_qpd_mask_512:
+ case Intrinsic::x86_avx512_scatter_qpd_mask_512:
Opc = X86::VSCATTERQPDZmr; break;
case Intrinsic::x86_avx512_scatter_qps_mask_512:
Opc = X86::VSCATTERQPSZmr; break;
return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);
}
- assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
- "Only know how to lower V2I64/V4I64 multiply");
+ assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) &&
+ "Only know how to lower V2I64/V4I64/V8I64 multiply");
// Ahi = psrlqi(a, 32);
// Bhi = psrlqi(b, 32);
// AhiBlo = psllqi(AhiBlo, 32);
// return AloBlo + AloBhi + AhiBlo;
- SDValue ShAmt = DAG.getConstant(32, MVT::i32);
-
- SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A, ShAmt);
- SDValue Bhi = DAG.getNode(X86ISD::VSRLI, dl, VT, B, ShAmt);
+ SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);
+ SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);
// Bit cast to 32-bit vectors for MULUDQ
- EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 : MVT::v8i32;
+ EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
+ (VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32;
A = DAG.getNode(ISD::BITCAST, dl, MulVT, A);
B = DAG.getNode(ISD::BITCAST, dl, MulVT, B);
Ahi = DAG.getNode(ISD::BITCAST, dl, MulVT, Ahi);
SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
SDValue AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
- AloBhi = DAG.getNode(X86ISD::VSHLI, dl, VT, AloBhi, ShAmt);
- AhiBlo = DAG.getNode(X86ISD::VSHLI, dl, VT, AhiBlo, ShAmt);
+ AloBhi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AloBhi, 32, DAG);
+ AhiBlo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AhiBlo, 32, DAG);
SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
if ((SplatValue != 0) &&
(SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) {
- unsigned lg2 = SplatValue.countTrailingZeros();
+ unsigned Lg2 = SplatValue.countTrailingZeros();
// Splat the sign bit.
- SDValue Sz = DAG.getConstant(EltTy.getSizeInBits()-1, MVT::i32);
- SDValue SGN = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, N0, Sz, DAG);
+ SmallVector<SDValue, 16> Sz(NumElts,
+ DAG.getConstant(EltTy.getSizeInBits() - 1,
+ EltTy));
+ SDValue SGN = DAG.getNode(ISD::SRA, dl, VT, N0,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Sz[0],
+ NumElts));
// Add (N0 < 0) ? abs2 - 1 : 0;
- SDValue Amt = DAG.getConstant(EltTy.getSizeInBits() - lg2, MVT::i32);
- SDValue SRL = getTargetVShiftNode(X86ISD::VSRLI, dl, VT, SGN, Amt, DAG);
+ SmallVector<SDValue, 16> Amt(NumElts,
+ DAG.getConstant(EltTy.getSizeInBits() - Lg2,
+ EltTy));
+ SDValue SRL = DAG.getNode(ISD::SRL, dl, VT, SGN,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Amt[0],
+ NumElts));
SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL);
- SDValue Lg2Amt = DAG.getConstant(lg2, MVT::i32);
- SDValue SRA = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, ADD, Lg2Amt, DAG);
+ SmallVector<SDValue, 16> Lg2Amt(NumElts, DAG.getConstant(Lg2, EltTy));
+ SDValue SRA = DAG.getNode(ISD::SRA, dl, VT, ADD,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Lg2Amt[0],
+ NumElts));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
(Subtarget->hasAVX512() &&
(VT == MVT::v8i64 || VT == MVT::v16i32))) {
if (Op.getOpcode() == ISD::SHL)
- return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, R, ShiftAmt,
+ DAG);
if (Op.getOpcode() == ISD::SRL)
- return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt,
+ DAG);
if (Op.getOpcode() == ISD::SRA && VT != MVT::v2i64 && VT != MVT::v4i64)
- return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, ShiftAmt,
+ DAG);
}
if (VT == MVT::v16i8) {
if (Op.getOpcode() == ISD::SHL) {
// Make a large shift.
- SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl,
+ MVT::v8i16, R, ShiftAmt,
+ DAG);
SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
// Zero out the rightmost bits.
SmallVector<SDValue, 16> V(16,
}
if (Op.getOpcode() == ISD::SRL) {
// Make a large shift.
- SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v8i16, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl,
+ MVT::v8i16, R, ShiftAmt,
+ DAG);
SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
// Zero out the leftmost bits.
SmallVector<SDValue, 16> V(16,
if (Subtarget->hasInt256() && VT == MVT::v32i8) {
if (Op.getOpcode() == ISD::SHL) {
// Make a large shift.
- SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v16i16, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl,
+ MVT::v16i16, R, ShiftAmt,
+ DAG);
SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
// Zero out the rightmost bits.
SmallVector<SDValue, 32> V(32,
}
if (Op.getOpcode() == ISD::SRL) {
// Make a large shift.
- SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v16i16, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl,
+ MVT::v16i16, R, ShiftAmt,
+ DAG);
SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
// Zero out the leftmost bits.
SmallVector<SDValue, 32> V(32,
default:
llvm_unreachable("Unknown shift opcode!");
case ISD::SHL:
- return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, R, ShiftAmt,
+ DAG);
case ISD::SRL:
- return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt,
+ DAG);
case ISD::SRA:
- return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, ShiftAmt,
+ DAG);
}
}
// r = VSELECT(r, psllw(r & (char16)15, 4), a);
SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1);
- M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
- DAG.getConstant(4, MVT::i32), DAG);
+ M = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, M, 4, DAG);
M = DAG.getNode(ISD::BITCAST, dl, VT, M);
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
// r = VSELECT(r, psllw(r & (char16)63, 2), a);
M = DAG.getNode(ISD::AND, dl, VT, R, CM2);
- M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
- DAG.getConstant(2, MVT::i32), DAG);
+ M = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, M, 2, DAG);
M = DAG.getNode(ISD::BITCAST, dl, VT, M);
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
ExtraVT.getScalarType().getSizeInBits();
- SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
switch (VT.getSimpleVT().SimpleTy) {
default: return SDValue();
// fall through
case MVT::v4i32:
case MVT::v8i16: {
- // (sext (vzext x)) -> (vsext x)
SDValue Op0 = Op.getOperand(0);
SDValue Op00 = Op0.getOperand(0);
SDValue Tmp1;
// Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
if (Op0.getOpcode() == ISD::BITCAST &&
- Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
+ Op00.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ // (sext (vzext x)) -> (vsext x)
Tmp1 = LowerVectorIntExtend(Op00, Subtarget, DAG);
- if (Tmp1.getNode()) {
- SDValue Tmp1Op0 = Tmp1.getOperand(0);
- assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
- "This optimization is invalid without a VZEXT.");
- return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+ if (Tmp1.getNode()) {
+ EVT ExtraEltVT = ExtraVT.getVectorElementType();
+ // This folding is only valid when the in-reg type is a vector of i8,
+ // i16, or i32.
+ if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 ||
+ ExtraEltVT == MVT::i32) {
+ SDValue Tmp1Op0 = Tmp1.getOperand(0);
+ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+ "This optimization is invalid without a VZEXT.");
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+ }
+ Op0 = Tmp1;
+ }
}
// If the above didn't work, then just use Shift-Left + Shift-Right.
- Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG);
- return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
+ Tmp1 = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Op0, BitsDiff,
+ DAG);
+ return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Tmp1, BitsDiff,
+ DAG);
}
}
}
case X86ISD::CMPMU: return "X86ISD::CMPMU";
case X86ISD::SETCC: return "X86ISD::SETCC";
case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
- case X86ISD::FSETCCsd: return "X86ISD::FSETCCsd";
- case X86ISD::FSETCCss: return "X86ISD::FSETCCss";
+ case X86ISD::FSETCC: return "X86ISD::FSETCC";
case X86ISD::CMOV: return "X86ISD::CMOV";
case X86ISD::BRCOND: return "X86ISD::BRCOND";
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
case X86ISD::TESTP: return "X86ISD::TESTP";
case X86ISD::TESTM: return "X86ISD::TESTM";
case X86ISD::KORTEST: return "X86ISD::KORTEST";
- case X86ISD::KTEST: return "X86ISD::KTEST";
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;
// In the XMM save block, save all the XMM argument registers.
- for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
+ for (int i = 3, e = MI->getNumOperands() - 1; i != e; ++i) {
int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
MachineMemOperand *MMO =
F->getMachineMemOperand(
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
- assert(!Subtarget->isTargetEnvMacho());
+ assert(!Subtarget->isTargetMacho());
// The lowering is pretty easy: we're just emitting the call to _alloca. The
// non-trivial part is impdef of ESP.
case X86::CMOV_V8F32:
case X86::CMOV_V4F64:
case X86::CMOV_V4I64:
+ case X86::CMOV_V16F32:
+ case X86::CMOV_V8F64:
+ case X86::CMOV_V8I64:
case X86::CMOV_GR16:
case X86::CMOV_GR32:
case X86::CMOV_RFP32:
case X86::EH_SjLj_LongJmp32:
case X86::EH_SjLj_LongJmp64:
return emitEHSjLjLongJmp(MI, BB);
+
+ case TargetOpcode::STACKMAP:
+ case TargetOpcode::PATCHPOINT:
+ return emitPatchPoint(MI, BB);
}
}
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
- return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
+ return EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
}
/// PerformTruncateCombine - Converts truncate operation to
return NewOp;
SDValue InputVector = N->getOperand(0);
+
// Detect whether we are trying to convert from mmx to i32 and the bitcast
// from mmx to v2i32 has a single usage.
if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST &&
return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
}
- if (Subtarget->hasAVX512() && VT.isVector() &&
- Cond.getValueType().getVectorElementType() == MVT::i1) {
+ EVT CondVT = Cond.getValueType();
+ if (Subtarget->hasAVX512() && VT.isVector() && CondVT.isVector() &&
+ CondVT.getVectorElementType() == MVT::i1) {
// v16i8 (select v16i1, v16i8, v16i8) does not have a proper
// lowering on AVX-512. In this case we convert it to
// v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
// Simplify vector selection if the selector will be produced by CMPP*/PCMP*.
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
// Check if SETCC has already been promoted
- TLI.getSetCCResultType(*DAG.getContext(), VT) == Cond.getValueType()) {
+ TLI.getSetCCResultType(*DAG.getContext(), VT) == CondVT &&
+ // Check that condition value type matches vselect operand type
+ CondVT == VT) {
assert(Cond.getValueType().isVector() &&
"vector select expects a vector selector!");
- EVT IntVT = Cond.getValueType();
bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
ISD::CondCode NewCC =
ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
Cond.getOperand(0).getValueType().isInteger());
- Cond = DAG.getSetCC(DL, IntVT, Cond.getOperand(0), Cond.getOperand(1), NewCC);
+ Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1), NewCC);
std::swap(LHS, RHS);
TValIsAllOnes = FValIsAllOnes;
FValIsAllZeros = TValIsAllZeros;
if (TValIsAllOnes && FValIsAllZeros)
Ret = Cond;
else if (TValIsAllOnes)
- Ret = DAG.getNode(ISD::OR, DL, IntVT, Cond,
- DAG.getNode(ISD::BITCAST, DL, IntVT, RHS));
+ Ret = DAG.getNode(ISD::OR, DL, CondVT, Cond,
+ DAG.getNode(ISD::BITCAST, DL, CondVT, RHS));
else if (FValIsAllZeros)
- Ret = DAG.getNode(ISD::AND, DL, IntVT, Cond,
- DAG.getNode(ISD::BITCAST, DL, IntVT, LHS));
+ Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond,
+ DAG.getNode(ISD::BITCAST, DL, CondVT, LHS));
return DAG.getNode(ISD::BITCAST, DL, VT, Ret);
}
if (BitWidth == 1)
return SDValue();
+ // Check all uses of that condition operand to check whether it will be
+ // consumed by non-BLEND instructions, which may depend on all bits are set
+ // properly.
+ for (SDNode::use_iterator I = Cond->use_begin(),
+ E = Cond->use_end(); I != E; ++I)
+ if (I->getOpcode() != ISD::VSELECT)
+ // TODO: Add other opcodes eventually lowered into BLEND.
+ return SDValue();
+
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
}
/// \brief Returns a vector of 0s if the node in input is a vector logical
-/// shift by a constant amount which is known to be bigger than or equal
+/// shift by a constant amount which is known to be bigger than or equal
/// to the vector element size in bits.
static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
unsigned MaxAmount = VT.getVectorElementType().getSizeInBits();
// SSE2/AVX2 logical shifts always return a vector of 0s
- // if the shift amount is bigger than or equal to
+ // if the shift amount is bigger than or equal to
// the element size. The constant shift amount will be
// encoded as a 8-bit immediate.
if (ShiftAmt.trunc(8).uge(MaxAmount))
if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
(cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
bool is64BitFP = (CMP00.getValueType() == MVT::f64);
- X86ISD::NodeType NTOperator = is64BitFP ?
- X86ISD::FSETCCsd : X86ISD::FSETCCss;
// FIXME: need symbolic constants for these magic numbers.
// See X86ATTInstPrinter.cpp:printSSECC().
unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
- SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP00, CMP01,
+ SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, CMP01,
DAG.getConstant(x86cc, MVT::i8));
- SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32,
+ MVT IntVT = (is64BitFP ? MVT::i64 : MVT::i32);
+ SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT,
OnesOrZeroesF);
- SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI,
- DAG.getConstant(1, MVT::i32));
+ SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
+ DAG.getConstant(1, IntVT));
SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
return OneBitOfTruth;
}
return SDValue();
// fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool OptForSize = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+
+ // SHLD/SHRD instructions have lower register pressure, but on some
+ // platforms they have higher latency than the equivalent
+ // series of shifts/or that would otherwise be generated.
+ // Don't fold (or (x << c) | (y >> (64 - c))) if SHLD/SHRD instructions
+ // have higher latencies and we are not optimizing for size.
+ if (!OptForSize && Subtarget->isSHLDSlow())
+ return SDValue();
+
if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
std::swap(N0, N1);
if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
!XTLI->getSubtarget()->is64Bit() &&
- !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ VT == MVT::i64) {
SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0),
Ld->getChain(), Op0, DAG);
DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
const VariadicFunction1<bool, StringRef, StringRef, matchAsmImpl> matchAsm={};
}
+static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) {
+
+ if (AsmPieces.size() == 3 || AsmPieces.size() == 4) {
+ if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{cc}") &&
+ std::count(AsmPieces.begin(), AsmPieces.end(), "~{flags}") &&
+ std::count(AsmPieces.begin(), AsmPieces.end(), "~{fpsr}")) {
+
+ if (AsmPieces.size() == 3)
+ return true;
+ else if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{dirflag}"))
+ return true;
+ }
+ }
+ return false;
+}
+
bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
array_pod_sort(AsmPieces.begin(), AsmPieces.end());
- if (AsmPieces.size() == 4 &&
- AsmPieces[0] == "~{cc}" &&
- AsmPieces[1] == "~{dirflag}" &&
- AsmPieces[2] == "~{flags}" &&
- AsmPieces[3] == "~{fpsr}")
- return IntrinsicLowering::LowerToByteSwap(CI);
+ if (clobbersFlagRegisters(AsmPieces))
+ return IntrinsicLowering::LowerToByteSwap(CI);
}
break;
case 3:
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
array_pod_sort(AsmPieces.begin(), AsmPieces.end());
- if (AsmPieces.size() == 4 &&
- AsmPieces[0] == "~{cc}" &&
- AsmPieces[1] == "~{dirflag}" &&
- AsmPieces[2] == "~{flags}" &&
- AsmPieces[3] == "~{fpsr}")
+ if (clobbersFlagRegisters(AsmPieces))
return IntrinsicLowering::LowerToByteSwap(CI);
}