#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
return new X86LinuxTargetObjectFile();
if (Subtarget->isTargetELF())
return new TargetLoweringObjectFileELF();
+ if (Subtarget->isTargetWindows())
+ return new X86WindowsTargetObjectFile();
if (Subtarget->isTargetCOFF())
return new TargetLoweringObjectFileCOFF();
llvm_unreachable("unknown subtarget type");
}
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
- setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
+
+ if (!Subtarget->hasMOVBE())
+ setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
// These should be promoted to a larger select which is supported.
setOperationAction(ISD::SELECT , MVT::i1 , Promote);
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
setOperationAction(ISD::SETCC, MVT::i1, Custom);
setOperationAction(ISD::XOR, MVT::i1, Legal);
+ setOperationAction(ISD::OR, MVT::i1, Legal);
+ setOperationAction(ISD::AND, MVT::i1, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, Legal);
setOperationAction(ISD::LOAD, MVT::v16f32, Legal);
setOperationAction(ISD::LOAD, MVT::v8f64, Legal);
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector())
- return MVT::i8;
+ return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
- const TargetMachine &TM = getTargetMachine();
- if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512())
+ if (Subtarget->hasAVX512())
switch(VT.getVectorNumElements()) {
case 8: return MVT::v8i1;
case 16: return MVT::v16i1;
- }
+ }
return VT.changeVectorElementTypeToInteger();
}
}
bool
-X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+ unsigned,
+ bool *Fast) const {
if (Fast)
*Fast = Subtarget->isUnalignedMemAccessFast();
return true;
else if (VA.getLocInfo() == CCValAssign::BCvt)
ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy);
+ assert(VA.getLocInfo() != CCValAssign::FPExt &&
+ "Unexpected FP-extend for return value.");
+
// If this is x86-64, and we disabled SSE, we can't return FP values,
// or SSE or MMX vectors.
if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
X86Info->setTCReturnAddrDelta(FPDiff);
}
+ unsigned NumBytesToPush = NumBytes;
+ unsigned NumBytesToPop = NumBytes;
+
+ // If we have an inalloca argument, all stack space has already been allocated
+ // for us and be right at the top of the stack. We don't support multiple
+ // arguments passed in memory when using inalloca.
+ if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
+ NumBytesToPush = 0;
+ assert(ArgLocs.back().getLocMemOffset() == 0 &&
+ "an inalloca argument must be the only memory argument");
+ }
+
if (!IsSibcall)
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
- dl);
+ Chain = DAG.getCALLSEQ_START(
+ Chain, DAG.getIntPtrConstant(NumBytesToPush, true), dl);
SDValue RetAddrFrIdx;
// Load return address for tail calls.
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ // Skip inalloca arguments, they have already been written.
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ if (Flags.isInAlloca())
+ continue;
+
CCValAssign &VA = ArgLocs[i];
EVT RegVT = VA.getLocVT();
SDValue Arg = OutVals[i];
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
bool isByVal = Flags.isByVal();
// Promote the value if needed.
// We should use extra load for direct calls to dllimported functions in
// non-JIT mode.
const GlobalValue *GV = G->getGlobal();
- if (!GV->hasDLLImportLinkage()) {
+ if (!GV->hasDLLImportStorageClass()) {
unsigned char OpFlags = 0;
bool ExtraLoad = false;
unsigned WrapperKind = ISD::DELETED_NODE;
SmallVector<SDValue, 8> Ops;
if (!IsSibcall && isTailCall) {
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(0, true), InFlag, dl);
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(NumBytesToPop, true),
+ DAG.getIntPtrConstant(0, true), InFlag, dl);
InFlag = Chain.getValue(1);
}
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
- unsigned NumBytesForCalleeToPush;
+ unsigned NumBytesForCalleeToPop;
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
getTargetMachine().Options.GuaranteedTailCallOpt))
- NumBytesForCalleeToPush = NumBytes; // Callee pops everything
+ NumBytesForCalleeToPop = NumBytes; // Callee pops everything
else if (!Is64Bit && !IsTailCallConvention(CallConv) &&
!Subtarget->getTargetTriple().isOSMSVCRT() &&
SR == StackStructReturn)
// pops the hidden struct pointer, so we have to push it back.
// This is common for Darwin/X86, Linux & Mingw32 targets.
// For MSVC Win32 targets, the caller pops the hidden struct pointer.
- NumBytesForCalleeToPush = 4;
+ NumBytesForCalleeToPop = 4;
else
- NumBytesForCalleeToPush = 0; // Callee pops nothing.
+ NumBytesForCalleeToPop = 0; // Callee pops nothing.
// Returns a flag for retval copy to use.
if (!IsSibcall) {
Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(NumBytesForCalleeToPush,
+ DAG.getIntPtrConstant(NumBytesToPop, true),
+ DAG.getIntPtrConstant(NumBytesForCalleeToPop,
true),
InFlag, dl);
InFlag = Chain.getValue(1);
return false;
}
+/// \brief Returns true if it is beneficial to convert a load of a constant
+/// to just the constant itself.
+bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
+ Type *Ty) const {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0 || BitSize > 64)
+ return false;
+ return true;
+}
+
/// isUndefOrInRange - Return true if Val is undef or if its value falls within
/// the specified range (L, H].
static bool isUndefOrInRange(int Val, int Low, int Hi) {
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops, 16);
+ } else if (VT.getScalarType() == MVT::i1) {
+ assert(VT.getVectorNumElements() <= 16 && "Unexpected vector type");
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i1);
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
+ Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+ Ops, VT.getVectorNumElements());
} else
llvm_unreachable("Unexpected vector type");
bool AllContants = true;
uint64_t Immediate = 0;
+ int NonConstIdx = -1;
+ bool IsSplat = true;
for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
SDValue In = Op.getOperand(idx);
if (In.getOpcode() == ISD::UNDEF)
continue;
if (!isa<ConstantSDNode>(In)) {
AllContants = false;
- break;
+ NonConstIdx = idx;
}
- if (cast<ConstantSDNode>(In)->getZExtValue())
+ else if (cast<ConstantSDNode>(In)->getZExtValue())
Immediate |= (1ULL << idx);
+ if (In != Op.getOperand(0))
+ IsSplat = false;
}
if (AllContants) {
DAG.getIntPtrConstant(0));
}
- // Splat vector (with undefs)
- SDValue In = Op.getOperand(0);
- for (unsigned i = 1, e = Op.getNumOperands(); i != e; ++i) {
- if (Op.getOperand(i) != In && Op.getOperand(i).getOpcode() != ISD::UNDEF)
- llvm_unreachable("Unsupported predicate operation");
- }
-
- SDValue EFLAGS, X86CC;
- if (In.getOpcode() == ISD::SETCC) {
- SDValue Op0 = In.getOperand(0);
- SDValue Op1 = In.getOperand(1);
- ISD::CondCode CC = cast<CondCodeSDNode>(In.getOperand(2))->get();
- bool isFP = Op1.getValueType().isFloatingPoint();
- unsigned X86CCVal = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
-
- assert(X86CCVal != X86::COND_INVALID && "Unsupported predicate operation");
-
- X86CC = DAG.getConstant(X86CCVal, MVT::i8);
- EFLAGS = EmitCmp(Op0, Op1, X86CCVal, DAG);
- EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
- } else if (In.getOpcode() == X86ISD::SETCC) {
- X86CC = In.getOperand(0);
- EFLAGS = In.getOperand(1);
- } else {
- // The algorithm:
- // Bit1 = In & 0x1
- // if (Bit1 != 0)
- // ZF = 0
- // else
- // ZF = 1
- // if (ZF == 0)
- // res = allOnes ### CMOVNE -1, %res
- // else
- // res = allZero
- MVT InVT = In.getSimpleValueType();
- SDValue Bit1 = DAG.getNode(ISD::AND, dl, InVT, In, DAG.getConstant(1, InVT));
- EFLAGS = EmitTest(Bit1, X86::COND_NE, DAG);
- X86CC = DAG.getConstant(X86::COND_NE, MVT::i8);
- }
-
- if (VT == MVT::v16i1) {
- SDValue Cst1 = DAG.getConstant(-1, MVT::i16);
- SDValue Cst0 = DAG.getConstant(0, MVT::i16);
- SDValue CmovOp = DAG.getNode(X86ISD::CMOV, dl, MVT::i16,
- Cst0, Cst1, X86CC, EFLAGS);
- return DAG.getNode(ISD::BITCAST, dl, VT, CmovOp);
- }
-
- if (VT == MVT::v8i1) {
- SDValue Cst1 = DAG.getConstant(-1, MVT::i32);
- SDValue Cst0 = DAG.getConstant(0, MVT::i32);
- SDValue CmovOp = DAG.getNode(X86ISD::CMOV, dl, MVT::i32,
- Cst0, Cst1, X86CC, EFLAGS);
- CmovOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CmovOp);
- return DAG.getNode(ISD::BITCAST, dl, VT, CmovOp);
- }
- llvm_unreachable("Unsupported predicate operation");
+ if (!IsSplat && (NonConstIdx != 0))
+ llvm_unreachable("Unsupported BUILD_VECTOR operation");
+ MVT SelectVT = (VT == MVT::v16i1)? MVT::i16 : MVT::i8;
+ SDValue Select;
+ if (IsSplat)
+ Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
+ DAG.getConstant(-1, SelectVT),
+ DAG.getConstant(0, SelectVT));
+ else
+ Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
+ DAG.getConstant((Immediate | 1), SelectVT),
+ DAG.getConstant(Immediate, SelectVT));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Select);
}
SDValue
// For AVX-length vectors, build the individual 128-bit pieces and use
// shuffles to put them in place.
- if (VT.is256BitVector()) {
- SmallVector<SDValue, 32> V;
+ if (VT.is256BitVector() || VT.is512BitVector()) {
+ SmallVector<SDValue, 64> V;
for (unsigned i = 0; i != NumElems; ++i)
V.push_back(Op.getOperand(i));
NumElems/2);
// Recreate the wider vector with the lower and upper part.
- return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
+ if (VT.is256BitVector())
+ return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
+ return Concat256BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
}
// Let legalizer expand 2-wide build_vectors.
if (V1IsUndef && V2IsUndef)
return DAG.getUNDEF(VT);
- assert(!V1IsUndef && "Op 1 of shuffle should not be undef");
+ // When we create a shuffle node we put the UNDEF node to second operand,
+ // but in some cases the first operand may be transformed to UNDEF.
+ // In this case we should just commute the node.
+ if (V1IsUndef)
+ return CommuteVectorShuffle(SVOp, DAG);
// Vector shuffle lowering takes 3 steps:
//
// Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32
return DAG.getNode(X86ISD::VPERMV, dl, VT,
DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1);
- return DAG.getNode(X86ISD::VPERMV3, dl, VT,
- DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1, V2);
+ return DAG.getNode(X86ISD::VPERMV3, dl, VT, V1,
+ DAG.getNode(ISD::BITCAST, dl, VT, Mask), V2);
}
//===--------------------------------------------------------------------===//
/// Extract one bit from mask vector, like v16i1 or v8i1.
/// AVX-512 feature.
-static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) {
+SDValue
+X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const {
SDValue Vec = Op.getOperand(0);
SDLoc dl(Vec);
MVT VecVT = Vec.getSimpleValueType();
}
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
- if (IdxVal) {
- unsigned MaxSift = VecVT.getSizeInBits() - 1;
- Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
- DAG.getConstant(MaxSift - IdxVal, MVT::i8));
- Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
- DAG.getConstant(MaxSift, MVT::i8));
- }
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i1, Vec,
+ const TargetRegisterClass* rc = getRegClassFor(VecVT);
+ unsigned MaxSift = rc->getSize()*8 - 1;
+ Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
+ DAG.getConstant(MaxSift - IdxVal, MVT::i8));
+ Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
+ DAG.getConstant(MaxSift, MVT::i8));
+ return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i1, Vec,
DAG.getIntPtrConstant(0));
}
/// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values
/// and take a 2 x i32 value to shift plus a shift amount.
-SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{
+static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
unsigned VTBits = VT.getSizeInBits();
SDLoc dl(Op);
bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
- EVT SrcVT = Op.getOperand(0).getValueType();
+ MVT SrcVT = Op.getOperand(0).getSimpleValueType();
if (SrcVT.isVector())
return SDValue();
- assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
+ assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
// These are really Legal; return the operand so the caller accepts it as
SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
SelectionDAG &DAG) const {
SDValue N0 = Op.getOperand(0);
- EVT SVT = N0.getValueType();
+ MVT SVT = N0.getSimpleValueType();
SDLoc dl(Op);
assert((SVT == MVT::v4i8 || SVT == MVT::v4i16 ||
SVT == MVT::v8i8 || SVT == MVT::v8i16) &&
"Custom UINT_TO_FP is not supported!");
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
- SVT.getVectorNumElements());
+ MVT NVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
}
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
- EVT SrcVT = N0.getValueType();
- EVT DstVT = Op.getValueType();
+ MVT SrcVT = N0.getSimpleValueType();
+ MVT DstVT = Op.getSimpleValueType();
if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i64(Op, DAG);
if (SrcVT == MVT::i32 && X86ScalarSSEf64)
return SDValue();
if (Subtarget->hasInt256())
- return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, In);
+ return DAG.getNode(X86ISD::VZEXT, dl, VT, In);
SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
SDValue Undef = DAG.getUNDEF(InVT);
static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
SelectionDAG &DAG) {
- MVT VT = Op->getValueType(0).getSimpleVT();
+ MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
- MVT InVT = In.getValueType().getSimpleVT();
+ MVT InVT = In.getSimpleValueType();
SDLoc DL(Op);
unsigned int NumElts = VT.getVectorNumElements();
if (NumElts != 8 && NumElts != 16)
if (VT == MVT::i1) {
assert((InVT.isInteger() && (InVT.getSizeInBits() <= 64)) &&
"Invalid scalar TRUNCATE operation");
- In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(1, InVT));
+ if (InVT == MVT::i32)
+ return SDValue();
if (InVT.getSizeInBits() == 64)
In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::i32, In);
else if (InVT.getSizeInBits() < 32)
In = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, In);
- return DAG.getNode(X86ISD::TRUNC, DL, VT, In);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, In);
}
assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
"Invalid TRUNCATE operation");
In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
InVT = ExtVT;
}
+
SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType());
const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
SDValue CP = DAG.getConstantPool(C, getPointerTy());
DAG.getIntPtrConstant(0));
}
- // On AVX, v4i64 -> v4i32 becomes a sequence that uses PSHUFD and MOVLHPS.
SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
DAG.getIntPtrConstant(0));
SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
DAG.getIntPtrConstant(2));
-
OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo);
OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi);
-
- // The PSHUFD mask:
- static const int ShufMask1[] = {0, 2, 0, 0};
- SDValue Undef = DAG.getUNDEF(VT);
- OpLo = DAG.getVectorShuffle(VT, DL, OpLo, Undef, ShufMask1);
- OpHi = DAG.getVectorShuffle(VT, DL, OpHi, Undef, ShufMask1);
-
- // The MOVLHPS mask:
- static const int ShufMask2[] = {0, 1, 4, 5};
- return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask2);
+ static const int ShufMask[] = {0, 2, 4, 6};
+ return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask);
}
if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
assert(Subtarget->hasFp256() && "256-bit vector without AVX!");
unsigned NumElems = VT.getVectorNumElements();
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
- NumElems * 2);
+ MVT NVT = MVT::getVectorVT(VT.getVectorElementType(), NumElems * 2);
SmallVector<int, 16> MaskVec(NumElems * 2, -1);
// Prepare truncation shuffle mask
In, DAG.getUNDEF(SVT)));
}
-SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) {
LLVMContext *Context = DAG.getContext();
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
APInt(32, ~(1U << 31))));
C = ConstantVector::getSplat(NumElts, C);
- SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
}
-SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) {
LLVMContext *Context = DAG.getContext();
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle,
APInt(32, 1U << 31)));
C = ConstantVector::getSplat(NumElts, C);
- SDValue CPIdx = DAG.getConstantPool(C, getPointerTy());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
}
-SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
LLVMContext *Context = DAG.getContext();
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
}
Constant *C = ConstantVector::get(CV);
- SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16);
SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0))));
}
C = ConstantVector::get(CV);
- CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16);
SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
SelectionDAG &DAG) const {
SDLoc dl(Op);
+ if (Op.getValueType() == MVT::i1)
+ // KORTEST instruction should be selected
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, Op.getValueType()));
+
// CF and OF aren't always set the way we want. Determine which
// of these we need.
bool NeedCF = false;
NeedOF = true;
break;
}
-
// See if we can use the EFLAGS value from the operand instead of
// doing a separate TEST. TEST always sets OF and CF to 0, so unless
// we prove that the arithmetic won't overflow, we can't use OF or CF.
- if (Op.getResNo() != 0 || NeedOF || NeedCF)
+ if (Op.getResNo() != 0 || NeedOF || NeedCF) {
// Emit a CMP with 0, which is the TEST pattern.
+ //if (Op.getValueType() == MVT::i1)
+ // return DAG.getNode(X86ISD::CMP, dl, MVT::i1, Op,
+ // DAG.getConstant(0, MVT::i1));
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, Op.getValueType()));
-
+ }
unsigned Opcode = 0;
unsigned NumOperands = 0;
if (C->getAPIntValue() == 0)
return EmitTest(Op0, X86CC, DAG);
- if (Op0.getValueType() == MVT::i1) {
- Op0 = DAG.getNode(ISD::XOR, dl, MVT::i1, Op0, DAG.getConstant(-1, MVT::i1));
- return DAG.getNode(X86ISD::CMP, dl, MVT::i1, Op0, Op0);
- }
+ if (Op0.getValueType() == MVT::i1)
+ llvm_unreachable("Unexpected comparison operation for MVT::i1 operands");
}
if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
}
-static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
MVT VT = Op.getSimpleValueType();
+ SDLoc dl(Op);
assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 32 &&
Op.getValueType().getScalarType() == MVT::i1 &&
"Cannot set masked compare for this operation");
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
- SDLoc dl(Op);
-
+ unsigned Opc = 0;
bool Unsigned = false;
+ bool Swap = false;
unsigned SSECC;
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETNE: SSECC = 4; break;
- case ISD::SETEQ: SSECC = 0; break;
- case ISD::SETUGT: Unsigned = true;
- case ISD::SETGT: SSECC = 6; break; // NLE
- case ISD::SETULT: Unsigned = true;
- case ISD::SETLT: SSECC = 1; break;
- case ISD::SETUGE: Unsigned = true;
- case ISD::SETGE: SSECC = 5; break; // NLT
- case ISD::SETULE: Unsigned = true;
+ case ISD::SETEQ: Opc = X86ISD::PCMPEQM; break;
+ case ISD::SETUGT: SSECC = 6; Unsigned = true; break;
+ case ISD::SETLT: Swap = true; //fall-through
+ case ISD::SETGT: Opc = X86ISD::PCMPGTM; break;
+ case ISD::SETULT: SSECC = 1; Unsigned = true; break;
+ case ISD::SETUGE: SSECC = 5; Unsigned = true; break; //NLT
+ case ISD::SETGE: Swap = true; SSECC = 2; break; // LE + swap
+ case ISD::SETULE: Unsigned = true; //fall-through
case ISD::SETLE: SSECC = 2; break;
}
- unsigned Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
+
+ if (Swap)
+ std::swap(Op0, Op1);
+ if (Opc)
+ return DAG.getNode(Opc, dl, VT, Op0, Op1);
+ Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
return DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(SSECC, MVT::i8));
+}
+
+/// \brief Try to turn a VSETULT into a VSETULE by modifying its second
+/// operand \p Op1. If non-trivial (for example because it's not constant)
+/// return an empty value.
+static SDValue ChangeVSETULTtoVSETULE(SDValue Op1, SelectionDAG &DAG)
+{
+ BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1.getNode());
+ if (!BV)
+ return SDValue();
+ MVT VT = Op1.getSimpleValueType();
+ MVT EVT = VT.getVectorElementType();
+ unsigned n = VT.getVectorNumElements();
+ SmallVector<SDValue, 8> ULTOp1;
+
+ for (unsigned i = 0; i < n; ++i) {
+ ConstantSDNode *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i));
+ if (!Elt || Elt->isOpaque() || Elt->getValueType(0) != EVT)
+ return SDValue();
+
+ // Avoid underflow.
+ APInt Val = Elt->getAPIntValue();
+ if (Val == 0)
+ return SDValue();
+
+ ULTOp1.push_back(DAG.getConstant(Val - 1, EVT));
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op1), VT, ULTOp1.data(),
+ ULTOp1.size());
}
static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
if (Subtarget->hasAVX512()) {
if (Op1.getValueType().is512BitVector() ||
(MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
- return LowerIntVSETCC_AVX512(Op, DAG);
+ return LowerIntVSETCC_AVX512(Op, DAG, Subtarget);
// In AVX-512 architecture setcc returns mask with i1 elements,
// But there is no compare instruction for i8 and i16 elements.
// operations may be required for some comparisons.
unsigned Opc;
bool Swap = false, Invert = false, FlipSigns = false, MinMax = false;
+ bool Subus = false;
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETNE: Invert = true;
- case ISD::SETEQ: Opc = MaskResult? X86ISD::PCMPEQM: X86ISD::PCMPEQ; break;
+ case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
case ISD::SETLT: Swap = true;
- case ISD::SETGT: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT; break;
+ case ISD::SETGT: Opc = X86ISD::PCMPGT; break;
case ISD::SETGE: Swap = true;
- case ISD::SETLE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
+ case ISD::SETLE: Opc = X86ISD::PCMPGT;
Invert = true; break;
case ISD::SETULT: Swap = true;
- case ISD::SETUGT: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
+ case ISD::SETUGT: Opc = X86ISD::PCMPGT;
FlipSigns = true; break;
case ISD::SETUGE: Swap = true;
- case ISD::SETULE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
+ case ISD::SETULE: Opc = X86ISD::PCMPGT;
FlipSigns = true; Invert = true; break;
}
if (MinMax) { Swap = false; Invert = false; FlipSigns = false; }
}
+ bool hasSubus = Subtarget->hasSSE2() && (VET == MVT::i8 || VET == MVT::i16);
+ if (!MinMax && hasSubus) {
+ // As another special case, use PSUBUS[BW] when it's profitable. E.g. for
+ // Op0 u<= Op1:
+ // t = psubus Op0, Op1
+ // pcmpeq t, <0..0>
+ switch (SetCCOpcode) {
+ default: break;
+ case ISD::SETULT: {
+ // If the comparison is against a constant we can turn this into a
+ // setule. With psubus, setule does not require a swap. This is
+ // beneficial because the constant in the register is no longer
+ // destructed as the destination so it can be hoisted out of a loop.
+ // Only do this pre-AVX since vpcmp* is no longer destructive.
+ if (Subtarget->hasAVX())
+ break;
+ SDValue ULEOp1 = ChangeVSETULTtoVSETULE(Op1, DAG);
+ if (ULEOp1.getNode()) {
+ Op1 = ULEOp1;
+ Subus = true; Invert = false; Swap = false;
+ }
+ break;
+ }
+ // Psubus is better than flip-sign because it requires no inversion.
+ case ISD::SETUGE: Subus = true; Invert = false; Swap = true; break;
+ case ISD::SETULE: Subus = true; Invert = false; Swap = false; break;
+ }
+
+ if (Subus) {
+ Opc = X86ISD::SUBUS;
+ FlipSigns = false;
+ }
+ }
+
if (Swap)
std::swap(Op0, Op1);
if (MinMax)
Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Op0, Result);
+ if (Subus)
+ Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result,
+ getZeroVector(VT, Subtarget, DAG, dl));
+
return Result;
}
if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
- assert((VT == MVT::i8 || (Subtarget->hasAVX512() && VT == MVT::i1))
+ assert(((!Subtarget->hasAVX512() && VT == MVT::i8) || (VT == MVT::i1))
&& "SetCC type must be 8-bit or 1-bit integer");
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
bool Invert = (CC == ISD::SETNE) ^
cast<ConstantSDNode>(Op1)->isNullValue();
- if (!Invert) return Op0;
+ if (!Invert)
+ return Op0;
CCode = X86::GetOppositeBranchCondition(CCode);
- return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(CCode, MVT::i8),
+ Op0.getOperand(1));
+ if (VT == MVT::i1)
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC);
+ return SetCC;
}
}
+ if ((Op0.getValueType() == MVT::i1) && (Op1.getOpcode() == ISD::Constant) &&
+ (cast<ConstantSDNode>(Op1)->getZExtValue() == 1) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+
+ ISD::CondCode NewCC = ISD::getSetCCInverse(CC, true);
+ return DAG.getSetCC(dl, VT, Op0, DAG.getConstant(0, MVT::i1), NewCC);
+ }
bool isFP = Op1.getSimpleValueType().isFloatingPoint();
unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
- return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
- DAG.getConstant(X86CC, MVT::i8), EFLAGS);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), EFLAGS);
+ if (VT == MVT::i1)
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC);
+ return SetCC;
}
// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
return SDValue();
if (Subtarget->hasInt256())
- return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, In);
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
// Optimize vectors in AVX mode
// Sign extend v8i16 to v8i32 and
MVT HalfVT = MVT::getVectorVT(VT.getScalarType(),
VT.getVectorNumElements()/2);
- OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
- OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi);
+ OpLo = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpLo);
+ OpHi = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpHi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
unsigned X86Opcode;
unsigned X86Cond;
SDVTList VTs;
+ // Keep this in sync with LowerXALUO, otherwise we might create redundant
+ // instructions that can't be removed afterwards (i.e. X86ISD::ADD and
+ // X86ISD::INC).
switch (CondOpcode) {
case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
- case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
+ case ISD::SADDO:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
+ if (C->isOne()) {
+ X86Opcode = X86ISD::INC; X86Cond = X86::COND_O;
+ break;
+ }
+ X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
- case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
+ case ISD::SSUBO:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
+ if (C->isOne()) {
+ X86Opcode = X86ISD::DEC; X86Cond = X86::COND_O;
+ break;
+ }
+ X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
default: llvm_unreachable("unexpected overflowing operator");
// getTargetVShiftByConstNode - Handle vector element shifts where the shift
// amount is a constant. Takes immediate version of shift as input.
-static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, EVT VT,
+static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT,
SDValue SrcOp, uint64_t ShiftAmt,
SelectionDAG &DAG) {
+ MVT ElementType = VT.getVectorElementType();
// Check for ShiftAmt >= element width
- if (ShiftAmt >= VT.getVectorElementType().getSizeInBits()) {
+ if (ShiftAmt >= ElementType.getSizeInBits()) {
if (Opc == X86ISD::VSRAI)
- ShiftAmt = VT.getVectorElementType().getSizeInBits() - 1;
+ ShiftAmt = ElementType.getSizeInBits() - 1;
else
return DAG.getConstant(0, VT);
}
assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI)
&& "Unknown target vector shift-by-constant node");
+ // Fold this packed vector shift into a build vector if SrcOp is a
+ // vector of Constants or UNDEFs, and SrcOp valuetype is the same as VT.
+ if (VT == SrcOp.getSimpleValueType() &&
+ ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) {
+ SmallVector<SDValue, 8> Elts;
+ unsigned NumElts = SrcOp->getNumOperands();
+ ConstantSDNode *ND;
+
+ switch(Opc) {
+ default: llvm_unreachable(0);
+ case X86ISD::VSHLI:
+ for (unsigned i=0; i!=NumElts; ++i) {
+ SDValue CurrentOp = SrcOp->getOperand(i);
+ if (CurrentOp->getOpcode() == ISD::UNDEF) {
+ Elts.push_back(CurrentOp);
+ continue;
+ }
+ ND = cast<ConstantSDNode>(CurrentOp);
+ const APInt &C = ND->getAPIntValue();
+ Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), ElementType));
+ }
+ break;
+ case X86ISD::VSRLI:
+ for (unsigned i=0; i!=NumElts; ++i) {
+ SDValue CurrentOp = SrcOp->getOperand(i);
+ if (CurrentOp->getOpcode() == ISD::UNDEF) {
+ Elts.push_back(CurrentOp);
+ continue;
+ }
+ ND = cast<ConstantSDNode>(CurrentOp);
+ const APInt &C = ND->getAPIntValue();
+ Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), ElementType));
+ }
+ break;
+ case X86ISD::VSRAI:
+ for (unsigned i=0; i!=NumElts; ++i) {
+ SDValue CurrentOp = SrcOp->getOperand(i);
+ if (CurrentOp->getOpcode() == ISD::UNDEF) {
+ Elts.push_back(CurrentOp);
+ continue;
+ }
+ ND = cast<ConstantSDNode>(CurrentOp);
+ const APInt &C = ND->getAPIntValue();
+ Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), ElementType));
+ }
+ break;
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Elts[0], NumElts);
+ }
+
return DAG.getNode(Opc, dl, VT, SrcOp, DAG.getConstant(ShiftAmt, MVT::i8));
}
// getTargetVShiftNode - Handle vector element shifts where the shift amount
// may or may not be a constant. Takes immediate version of shift as input.
-static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, EVT VT,
+static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
SDValue SrcOp, SDValue ShAmt,
SelectionDAG &DAG) {
assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
// The return type has to be a 128-bit type with the same element
// type as the input type.
- MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ MVT EltVT = VT.getVectorElementType();
EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
ShAmt = DAG.getNode(ISD::BITCAST, dl, ShVT, ShAmt);
case Intrinsic::x86_avx2_pmaxu_b:
case Intrinsic::x86_avx2_pmaxu_w:
case Intrinsic::x86_avx2_pmaxu_d:
- case Intrinsic::x86_avx512_pmaxu_d:
- case Intrinsic::x86_avx512_pmaxu_q:
case Intrinsic::x86_sse2_pminu_b:
case Intrinsic::x86_sse41_pminuw:
case Intrinsic::x86_sse41_pminud:
case Intrinsic::x86_avx2_pminu_b:
case Intrinsic::x86_avx2_pminu_w:
case Intrinsic::x86_avx2_pminu_d:
- case Intrinsic::x86_avx512_pminu_d:
- case Intrinsic::x86_avx512_pminu_q:
case Intrinsic::x86_sse41_pmaxsb:
case Intrinsic::x86_sse2_pmaxs_w:
case Intrinsic::x86_sse41_pmaxsd:
case Intrinsic::x86_avx2_pmaxs_b:
case Intrinsic::x86_avx2_pmaxs_w:
case Intrinsic::x86_avx2_pmaxs_d:
- case Intrinsic::x86_avx512_pmaxs_d:
- case Intrinsic::x86_avx512_pmaxs_q:
case Intrinsic::x86_sse41_pminsb:
case Intrinsic::x86_sse2_pmins_w:
case Intrinsic::x86_sse41_pminsd:
case Intrinsic::x86_avx2_pmins_b:
case Intrinsic::x86_avx2_pmins_w:
- case Intrinsic::x86_avx2_pmins_d:
- case Intrinsic::x86_avx512_pmins_d:
- case Intrinsic::x86_avx512_pmins_q: {
+ case Intrinsic::x86_avx2_pmins_d: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_avx2_pmaxu_b:
case Intrinsic::x86_avx2_pmaxu_w:
case Intrinsic::x86_avx2_pmaxu_d:
- case Intrinsic::x86_avx512_pmaxu_d:
- case Intrinsic::x86_avx512_pmaxu_q:
Opcode = X86ISD::UMAX;
break;
case Intrinsic::x86_sse2_pminu_b:
case Intrinsic::x86_avx2_pminu_b:
case Intrinsic::x86_avx2_pminu_w:
case Intrinsic::x86_avx2_pminu_d:
- case Intrinsic::x86_avx512_pminu_d:
- case Intrinsic::x86_avx512_pminu_q:
Opcode = X86ISD::UMIN;
break;
case Intrinsic::x86_sse41_pmaxsb:
case Intrinsic::x86_avx2_pmaxs_b:
case Intrinsic::x86_avx2_pmaxs_w:
case Intrinsic::x86_avx2_pmaxs_d:
- case Intrinsic::x86_avx512_pmaxs_d:
- case Intrinsic::x86_avx512_pmaxs_q:
Opcode = X86ISD::SMAX;
break;
case Intrinsic::x86_sse41_pminsb:
case Intrinsic::x86_avx2_pmins_b:
case Intrinsic::x86_avx2_pmins_w:
case Intrinsic::x86_avx2_pmins_d:
- case Intrinsic::x86_avx512_pmins_d:
- case Intrinsic::x86_avx512_pmins_q:
Opcode = X86ISD::SMIN;
break;
}
case Intrinsic::x86_sse2_max_pd:
case Intrinsic::x86_avx_max_ps_256:
case Intrinsic::x86_avx_max_pd_256:
- case Intrinsic::x86_avx512_max_ps_512:
- case Intrinsic::x86_avx512_max_pd_512:
case Intrinsic::x86_sse_min_ps:
case Intrinsic::x86_sse2_min_pd:
case Intrinsic::x86_avx_min_ps_256:
- case Intrinsic::x86_avx_min_pd_256:
- case Intrinsic::x86_avx512_min_ps_512:
- case Intrinsic::x86_avx512_min_pd_512: {
+ case Intrinsic::x86_avx_min_pd_256: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse2_max_pd:
case Intrinsic::x86_avx_max_ps_256:
case Intrinsic::x86_avx_max_pd_256:
- case Intrinsic::x86_avx512_max_ps_512:
- case Intrinsic::x86_avx512_max_pd_512:
Opcode = X86ISD::FMAX;
break;
case Intrinsic::x86_sse_min_ps:
case Intrinsic::x86_sse2_min_pd:
case Intrinsic::x86_avx_min_ps_256:
case Intrinsic::x86_avx_min_pd_256:
- case Intrinsic::x86_avx512_min_ps_512:
- case Intrinsic::x86_avx512_min_pd_512:
Opcode = X86ISD::FMIN;
break;
}
SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2));
SDValue CC = DAG.getConstant(X86CC, MVT::i8);
SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
- SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i1, CC, Test);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
Opcode = X86ISD::VSRAI;
break;
}
- return getTargetVShiftNode(Opcode, dl, Op.getValueType(),
+ return getTargetVShiftNode(Opcode, dl, Op.getSimpleValueType(),
Op.getOperand(1), Op.getOperand(2), DAG);
}
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
SDValue Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
EVT MaskVT = MVT::getVectorVT(MVT::i1,
- Index.getValueType().getVectorNumElements());
+ Index.getSimpleValueType().getVectorNumElements());
SDValue MaskInReg = DAG.getConstant(~0, MaskVT);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
assert(C && "Invalid scale type");
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
EVT MaskVT = MVT::getVectorVT(MVT::i1,
- Index.getValueType().getVectorNumElements());
+ Index.getSimpleValueType().getVectorNumElements());
SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
EVT MaskVT = MVT::getVectorVT(MVT::i1,
- Index.getValueType().getVectorNumElements());
+ Index.getSimpleValueType().getVectorNumElements());
SDValue MaskInReg = DAG.getConstant(~0, MaskVT);
SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
EVT MaskVT = MVT::getVectorVT(MVT::i1,
- Index.getValueType().getVectorNumElements());
+ Index.getSimpleValueType().getVectorNumElements());
SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
case Intrinsic::x86_avx512_gather_dpi_512: {
unsigned Opc;
switch (IntNo) {
- default: llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::x86_avx512_gather_qps_512: Opc = X86::VGATHERQPSZrm; break;
- case Intrinsic::x86_avx512_gather_qpd_512: Opc = X86::VGATHERQPDZrm; break;
- case Intrinsic::x86_avx512_gather_dpd_512: Opc = X86::VGATHERDPDZrm; break;
- case Intrinsic::x86_avx512_gather_dps_512: Opc = X86::VGATHERDPSZrm; break;
- case Intrinsic::x86_avx512_gather_qpi_512: Opc = X86::VPGATHERQDZrm; break;
- case Intrinsic::x86_avx512_gather_qpq_512: Opc = X86::VPGATHERQQZrm; break;
- case Intrinsic::x86_avx512_gather_dpi_512: Opc = X86::VPGATHERDDZrm; break;
- case Intrinsic::x86_avx512_gather_dpq_512: Opc = X86::VPGATHERDQZrm; break;
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_avx512_gather_qps_512: Opc = X86::VGATHERQPSZrm; break;
+ case Intrinsic::x86_avx512_gather_qpd_512: Opc = X86::VGATHERQPDZrm; break;
+ case Intrinsic::x86_avx512_gather_dpd_512: Opc = X86::VGATHERDPDZrm; break;
+ case Intrinsic::x86_avx512_gather_dps_512: Opc = X86::VGATHERDPSZrm; break;
+ case Intrinsic::x86_avx512_gather_qpi_512: Opc = X86::VPGATHERQDZrm; break;
+ case Intrinsic::x86_avx512_gather_qpq_512: Opc = X86::VPGATHERQQZrm; break;
+ case Intrinsic::x86_avx512_gather_dpi_512: Opc = X86::VPGATHERDDZrm; break;
+ case Intrinsic::x86_avx512_gather_dpq_512: Opc = X86::VPGATHERDQZrm; break;
}
SDValue Chain = Op.getOperand(0);
SDValue Index = Op.getOperand(2);
case Intrinsic::x86_avx512_gather_dpq_mask_512: {
unsigned Opc;
switch (IntNo) {
- default: llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::x86_avx512_gather_qps_mask_512:
- Opc = X86::VGATHERQPSZrm; break;
- case Intrinsic::x86_avx512_gather_qpd_mask_512:
- Opc = X86::VGATHERQPDZrm; break;
- case Intrinsic::x86_avx512_gather_dpd_mask_512:
- Opc = X86::VGATHERDPDZrm; break;
- case Intrinsic::x86_avx512_gather_dps_mask_512:
- Opc = X86::VGATHERDPSZrm; break;
- case Intrinsic::x86_avx512_gather_qpi_mask_512:
- Opc = X86::VPGATHERQDZrm; break;
- case Intrinsic::x86_avx512_gather_qpq_mask_512:
- Opc = X86::VPGATHERQQZrm; break;
- case Intrinsic::x86_avx512_gather_dpi_mask_512:
- Opc = X86::VPGATHERDDZrm; break;
- case Intrinsic::x86_avx512_gather_dpq_mask_512:
- Opc = X86::VPGATHERDQZrm; break;
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_avx512_gather_qps_mask_512:
+ Opc = X86::VGATHERQPSZrm; break;
+ case Intrinsic::x86_avx512_gather_qpd_mask_512:
+ Opc = X86::VGATHERQPDZrm; break;
+ case Intrinsic::x86_avx512_gather_dpd_mask_512:
+ Opc = X86::VGATHERDPDZrm; break;
+ case Intrinsic::x86_avx512_gather_dps_mask_512:
+ Opc = X86::VGATHERDPSZrm; break;
+ case Intrinsic::x86_avx512_gather_qpi_mask_512:
+ Opc = X86::VPGATHERQDZrm; break;
+ case Intrinsic::x86_avx512_gather_qpq_mask_512:
+ Opc = X86::VPGATHERQQZrm; break;
+ case Intrinsic::x86_avx512_gather_dpi_mask_512:
+ Opc = X86::VPGATHERDDZrm; break;
+ case Intrinsic::x86_avx512_gather_dpq_mask_512:
+ Opc = X86::VPGATHERDQZrm; break;
}
SDValue Chain = Op.getOperand(0);
SDValue Src = Op.getOperand(2);
case Intrinsic::x86_avx512_scatter_dpi_512: {
unsigned Opc;
switch (IntNo) {
- default: llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::x86_avx512_scatter_qpd_512:
- Opc = X86::VSCATTERQPDZmr; break;
- case Intrinsic::x86_avx512_scatter_qps_512:
- Opc = X86::VSCATTERQPSZmr; break;
- case Intrinsic::x86_avx512_scatter_dpd_512:
- Opc = X86::VSCATTERDPDZmr; break;
- case Intrinsic::x86_avx512_scatter_dps_512:
- Opc = X86::VSCATTERDPSZmr; break;
- case Intrinsic::x86_avx512_scatter_qpi_512:
- Opc = X86::VPSCATTERQDZmr; break;
- case Intrinsic::x86_avx512_scatter_qpq_512:
- Opc = X86::VPSCATTERQQZmr; break;
- case Intrinsic::x86_avx512_scatter_dpq_512:
- Opc = X86::VPSCATTERDQZmr; break;
- case Intrinsic::x86_avx512_scatter_dpi_512:
- Opc = X86::VPSCATTERDDZmr; break;
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_avx512_scatter_qpd_512:
+ Opc = X86::VSCATTERQPDZmr; break;
+ case Intrinsic::x86_avx512_scatter_qps_512:
+ Opc = X86::VSCATTERQPSZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpd_512:
+ Opc = X86::VSCATTERDPDZmr; break;
+ case Intrinsic::x86_avx512_scatter_dps_512:
+ Opc = X86::VSCATTERDPSZmr; break;
+ case Intrinsic::x86_avx512_scatter_qpi_512:
+ Opc = X86::VPSCATTERQDZmr; break;
+ case Intrinsic::x86_avx512_scatter_qpq_512:
+ Opc = X86::VPSCATTERQQZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpq_512:
+ Opc = X86::VPSCATTERDQZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpi_512:
+ Opc = X86::VPSCATTERDDZmr; break;
}
SDValue Chain = Op.getOperand(0);
SDValue Base = Op.getOperand(2);
case Intrinsic::x86_avx512_scatter_dpq_mask_512: {
unsigned Opc;
switch (IntNo) {
- default: llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::x86_avx512_scatter_qpd_mask_512:
- Opc = X86::VSCATTERQPDZmr; break;
- case Intrinsic::x86_avx512_scatter_qps_mask_512:
- Opc = X86::VSCATTERQPSZmr; break;
- case Intrinsic::x86_avx512_scatter_dpd_mask_512:
- Opc = X86::VSCATTERDPDZmr; break;
- case Intrinsic::x86_avx512_scatter_dps_mask_512:
- Opc = X86::VSCATTERDPSZmr; break;
- case Intrinsic::x86_avx512_scatter_qpi_mask_512:
- Opc = X86::VPSCATTERQDZmr; break;
- case Intrinsic::x86_avx512_scatter_qpq_mask_512:
- Opc = X86::VPSCATTERQQZmr; break;
- case Intrinsic::x86_avx512_scatter_dpq_mask_512:
- Opc = X86::VPSCATTERDQZmr; break;
- case Intrinsic::x86_avx512_scatter_dpi_mask_512:
- Opc = X86::VPSCATTERDDZmr; break;
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_avx512_scatter_qpd_mask_512:
+ Opc = X86::VSCATTERQPDZmr; break;
+ case Intrinsic::x86_avx512_scatter_qps_mask_512:
+ Opc = X86::VSCATTERQPSZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpd_mask_512:
+ Opc = X86::VSCATTERDPDZmr; break;
+ case Intrinsic::x86_avx512_scatter_dps_mask_512:
+ Opc = X86::VSCATTERDPSZmr; break;
+ case Intrinsic::x86_avx512_scatter_qpi_mask_512:
+ Opc = X86::VPSCATTERQDZmr; break;
+ case Intrinsic::x86_avx512_scatter_qpq_mask_512:
+ Opc = X86::VPSCATTERQQZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpq_mask_512:
+ Opc = X86::VPSCATTERDQZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpi_mask_512:
+ Opc = X86::VPSCATTERDDZmr; break;
}
SDValue Chain = Op.getOperand(0);
SDValue Base = Op.getOperand(2);
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setReturnAddressIsTaken(true);
+ if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+ return SDValue();
+
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDLoc dl(Op);
EVT PtrVT = getPointerTy();
const TargetMachine &TM = MF.getTarget();
const TargetFrameLowering &TFI = *TM.getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
// Save FP Control Word to stack slot
}
static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
EVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
SDLoc dl(Op);
}
static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
EVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
SDLoc dl(Op);
}
static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
unsigned NumBits = VT.getSizeInBits();
SDLoc dl(Op);
Op = Op.getOperand(0);
// Lower256IntArith - Break a 256-bit integer operation into two new 128-bit
// ones, and then concatenate the result back.
static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
assert(VT.is256BitVector() && VT.isInteger() &&
"Unsupported value type for operation");
SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, dl);
SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl);
- MVT EltVT = VT.getVectorElementType().getSimpleVT();
- EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+ MVT EltVT = VT.getVectorElementType();
+ MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1),
}
static SDValue LowerADD(SDValue Op, SelectionDAG &DAG) {
- assert(Op.getValueType().is256BitVector() &&
- Op.getValueType().isInteger() &&
+ assert(Op.getSimpleValueType().is256BitVector() &&
+ Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return Lower256IntArith(Op, DAG);
}
static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) {
- assert(Op.getValueType().is256BitVector() &&
- Op.getValueType().isInteger() &&
+ assert(Op.getSimpleValueType().is256BitVector() &&
+ Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return Lower256IntArith(Op, DAG);
}
static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
// Decompose 256-bit ops into smaller 128-bit ops.
if (VT.is256BitVector() && !Subtarget->hasInt256())
}
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
- EVT EltTy = VT.getVectorElementType();
+ MVT VT = Op.getSimpleValueType();
+ MVT EltTy = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
Amt.getOpcode() == ISD::BITCAST &&
Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
Amt = Amt.getOperand(0);
- unsigned Ratio = Amt.getValueType().getVectorNumElements() /
+ unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
VT.getVectorNumElements();
unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
uint64_t ShiftAmt = 0;
static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
const X86Subtarget* Subtarget) {
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
default:
llvm_unreachable("Unknown shift opcode!");
case ISD::SHL:
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
default: return SDValue();
case MVT::v2i64:
case MVT::v4i32:
return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG);
}
case ISD::SRA:
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
default: return SDValue();
case MVT::v4i32:
case MVT::v8i16:
return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG);
}
case ISD::SRL:
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
default: return SDValue();
case MVT::v2i64:
case MVT::v4i32:
Amt.getOpcode() == ISD::BITCAST &&
Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
Amt = Amt.getOperand(0);
- unsigned Ratio = Amt.getValueType().getVectorNumElements() /
+ unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
VT.getVectorNumElements();
std::vector<SDValue> Vals(Ratio);
for (unsigned i = 0; i != Ratio; ++i)
static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
return Op;
}
+ // If possible, lower this packed shift into a vector multiply instead of
+ // expanding it into a sequence of scalar shifts.
+ // Do this only if the vector shift count is a constant build_vector.
+ if (Op.getOpcode() == ISD::SHL &&
+ (VT == MVT::v8i16 || VT == MVT::v4i32 ||
+ (Subtarget->hasInt256() && VT == MVT::v16i16)) &&
+ ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
+ SmallVector<SDValue, 8> Elts;
+ EVT SVT = VT.getScalarType();
+ unsigned SVTBits = SVT.getSizeInBits();
+ const APInt &One = APInt(SVTBits, 1);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ for (unsigned i=0; i !=NumElems; ++i) {
+ SDValue Op = Amt->getOperand(i);
+ if (Op->getOpcode() == ISD::UNDEF) {
+ Elts.push_back(Op);
+ continue;
+ }
+
+ ConstantSDNode *ND = cast<ConstantSDNode>(Op);
+ const APInt &C = APInt(SVTBits, ND->getAPIntValue().getZExtValue());
+ uint64_t ShAmt = C.getZExtValue();
+ if (ShAmt >= SVTBits) {
+ Elts.push_back(DAG.getUNDEF(SVT));
+ continue;
+ }
+ Elts.push_back(DAG.getConstant(One.shl(ShAmt), SVT));
+ }
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Elts[0], NumElems);
+ return DAG.getNode(ISD::MUL, dl, VT, R, BV);
+ }
+
// Lower SHL with variable shift amount.
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT));
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
}
+
if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
return R;
}
+ // It's worth extending once and using the v8i32 shifts for 16-bit types, but
+ // the extra overheads to get from v16i8 to v8i32 make the existing SSE
+ // solution better.
+ if (Subtarget->hasInt256() && VT == MVT::v8i16) {
+ MVT NewVT = VT == MVT::v8i16 ? MVT::v8i32 : MVT::v16i16;
+ unsigned ExtOpc =
+ Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ R = DAG.getNode(ExtOpc, dl, NewVT, R);
+ Amt = DAG.getNode(ISD::ANY_EXTEND, dl, NewVT, Amt);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(Op.getOpcode(), dl, NewVT, R, Amt));
+ }
+
// Decompose 256-bit shifts into smaller 128-bit shifts.
if (VT.is256BitVector()) {
unsigned NumElems = VT.getVectorNumElements();
- MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ MVT EltVT = VT.getVectorElementType();
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
// Extract the two vectors
SelectionDAG &DAG) const {
SDLoc dl(Op);
EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
if (!Subtarget->hasSSE2() || !VT.isVector())
return SDValue();
unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
ExtraVT.getScalarType().getSizeInBits();
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
default: return SDValue();
case MVT::v8i32:
case MVT::v16i16:
SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl);
SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl);
- MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ MVT EltVT = VT.getVectorElementType();
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
EVT ExtraEltVT = ExtraVT.getVectorElementType();
static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- EVT T = Op.getValueType();
+ MVT T = Op.getSimpleValueType();
SDLoc DL(Op);
unsigned Reg = 0;
unsigned size = 0;
- switch(T.getSimpleVT().SimpleTy) {
+ switch(T.SimpleTy) {
default: llvm_unreachable("Invalid value type!");
case MVT::i8: Reg = X86::AL; size = 1; break;
case MVT::i16: Reg = X86::AX; size = 2; break;
}
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getNode()->getValueType(0);
+ EVT VT = Op.getNode()->getSimpleValueType(0);
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
Node->getOperand(1), Zero, Zero,
cast<AtomicSDNode>(Node)->getMemOperand(),
cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
cast<AtomicSDNode>(Node)->getSynchScope());
Results.push_back(Swap.getValue(0));
Results.push_back(Swap.getValue(1));
case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
- case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VZEXT: return "X86ISD::VZEXT";
case X86ISD::VSEXT: return "X86ISD::VSEXT";
case X86ISD::OR: return "X86ISD::OR";
case X86ISD::XOR: return "X86ISD::XOR";
case X86ISD::AND: return "X86ISD::AND";
- case X86ISD::BLSI: return "X86ISD::BLSI";
- case X86ISD::BLSMSK: return "X86ISD::BLSMSK";
- case X86ISD::BLSR: return "X86ISD::BLSR";
case X86ISD::BZHI: return "X86ISD::BZHI";
case X86ISD::BEXTR: return "X86ISD::BEXTR";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::TESTP: return "X86ISD::TESTP";
case X86ISD::TESTM: return "X86ISD::TESTM";
+ case X86ISD::TESTNM: return "X86ISD::TESTNM";
case X86ISD::KORTEST: return "X86ISD::KORTEST";
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
case X86ISD::VPERMV: return "X86ISD::VPERMV";
case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
+ case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3";
case X86ISD::VPERMI: return "X86ISD::VPERMI";
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
return true;
}
+bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const {
+ unsigned Bits = Ty->getScalarSizeInBits();
+
+ // 8-bit shifts are always expensive, but versions with a scalar amount aren't
+ // particularly cheaper than those without.
+ if (Bits == 8)
+ return false;
+
+ // On AVX2 there are new vpsllv[dq] instructions (and other shifts), that make
+ // variable shifts just as cheap as scalar ones.
+ if (Subtarget->hasInt256() && (Bits == 32 || Bits == 64))
+ return false;
+
+ // Otherwise, it's significantly cheaper to shift by a scalar amount than by a
+ // fully general vector.
+ return true;
+}
+
bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), MBB,
- llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// thisMBB:
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), MBB,
- llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// thisMBB:
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), MBB,
- llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// thisMBB:
// Transfer the remainder of MBB and its successor edges to endMBB.
endMBB->splice(endMBB->begin(), thisMBB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- thisMBB->end());
+ std::next(MachineBasicBlock::iterator(MI)), thisMBB->end());
endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
// Make offsetMBB and overflowMBB successors of thisMBB
// Transfer the remainder of MBB and its successor edges to EndMBB.
EndMBB->splice(EndMBB->begin(), MBB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- MBB->end());
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
EndMBB->transferSuccessorsAndUpdatePHIs(MBB);
// The original block will now fall through to the XMM save block.
MBB->addSuccessor(EndMBB);
}
+ // Make sure the last operand is EFLAGS, which gets clobbered by the branch
+ // that was just emitted, but clearly shouldn't be "saved".
+ assert((MI->getNumOperands() <= 3 ||
+ !MI->getOperand(MI->getNumOperands() - 1).isReg() ||
+ MI->getOperand(MI->getNumOperands() - 1).getReg() == X86::EFLAGS)
+ && "Expected last argument to be EFLAGS");
unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;
// In the XMM save block, save all the XMM argument registers.
for (int i = 3, e = MI->getNumOperands() - 1; i != e; ++i) {
MachineBasicBlock* BB,
const TargetRegisterInfo* TRI) {
// Scan forward through BB for a use/def of EFLAGS.
- MachineBasicBlock::iterator miI(llvm::next(SelectItr));
+ MachineBasicBlock::iterator miI(std::next(SelectItr));
for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
const MachineInstr& mi = *miI;
if (mi.readsRegister(X86::EFLAGS))
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
// Add the true and fallthrough blocks as its successors.
MF->insert(MBBIter, mallocMBB);
MF->insert(MBBIter, continueMBB);
- continueMBB->splice(continueMBB->begin(), BB, llvm::next
- (MachineBasicBlock::iterator(MI)), BB->end());
+ continueMBB->splice(continueMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
continueMBB->transferSuccessorsAndUpdatePHIs(BB);
// Add code to the main basic block to check if the stack limit has been hit,
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), MBB,
- llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// thisMBB:
return MBB;
}
+// Replace 213-type (isel default) FMA3 instructions with 231-type for
+// accumulator loops. Writing back to the accumulator allows the coalescer
+// to remove extra copies in the loop.
+MachineBasicBlock *
+X86TargetLowering::emitFMA3Instr(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ MachineOperand &AddendOp = MI->getOperand(3);
+
+ // Bail out early if the addend isn't a register - we can't switch these.
+ if (!AddendOp.isReg())
+ return MBB;
+
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Check whether the addend is defined by a PHI:
+ assert(MRI.hasOneDef(AddendOp.getReg()) && "Multiple defs in SSA?");
+ MachineInstr &AddendDef = *MRI.def_begin(AddendOp.getReg());
+ if (!AddendDef.isPHI())
+ return MBB;
+
+ // Look for the following pattern:
+ // loop:
+ // %addend = phi [%entry, 0], [%loop, %result]
+ // ...
+ // %result<tied1> = FMA213 %m2<tied0>, %m1, %addend
+
+ // Replace with:
+ // loop:
+ // %addend = phi [%entry, 0], [%loop, %result]
+ // ...
+ // %result<tied1> = FMA231 %addend<tied0>, %m1, %m2
+
+ for (unsigned i = 1, e = AddendDef.getNumOperands(); i < e; i += 2) {
+ assert(AddendDef.getOperand(i).isReg());
+ MachineOperand PHISrcOp = AddendDef.getOperand(i);
+ MachineInstr &PHISrcInst = *MRI.def_begin(PHISrcOp.getReg());
+ if (&PHISrcInst == MI) {
+ // Found a matching instruction.
+ unsigned NewFMAOpc = 0;
+ switch (MI->getOpcode()) {
+ case X86::VFMADDPDr213r: NewFMAOpc = X86::VFMADDPDr231r; break;
+ case X86::VFMADDPSr213r: NewFMAOpc = X86::VFMADDPSr231r; break;
+ case X86::VFMADDSDr213r: NewFMAOpc = X86::VFMADDSDr231r; break;
+ case X86::VFMADDSSr213r: NewFMAOpc = X86::VFMADDSSr231r; break;
+ case X86::VFMSUBPDr213r: NewFMAOpc = X86::VFMSUBPDr231r; break;
+ case X86::VFMSUBPSr213r: NewFMAOpc = X86::VFMSUBPSr231r; break;
+ case X86::VFMSUBSDr213r: NewFMAOpc = X86::VFMSUBSDr231r; break;
+ case X86::VFMSUBSSr213r: NewFMAOpc = X86::VFMSUBSSr231r; break;
+ case X86::VFNMADDPDr213r: NewFMAOpc = X86::VFNMADDPDr231r; break;
+ case X86::VFNMADDPSr213r: NewFMAOpc = X86::VFNMADDPSr231r; break;
+ case X86::VFNMADDSDr213r: NewFMAOpc = X86::VFNMADDSDr231r; break;
+ case X86::VFNMADDSSr213r: NewFMAOpc = X86::VFNMADDSSr231r; break;
+ case X86::VFNMSUBPDr213r: NewFMAOpc = X86::VFNMSUBPDr231r; break;
+ case X86::VFNMSUBPSr213r: NewFMAOpc = X86::VFNMSUBPSr231r; break;
+ case X86::VFNMSUBSDr213r: NewFMAOpc = X86::VFNMSUBSDr231r; break;
+ case X86::VFNMSUBSSr213r: NewFMAOpc = X86::VFNMSUBSSr231r; break;
+ case X86::VFMADDPDr213rY: NewFMAOpc = X86::VFMADDPDr231rY; break;
+ case X86::VFMADDPSr213rY: NewFMAOpc = X86::VFMADDPSr231rY; break;
+ case X86::VFMSUBPDr213rY: NewFMAOpc = X86::VFMSUBPDr231rY; break;
+ case X86::VFMSUBPSr213rY: NewFMAOpc = X86::VFMSUBPSr231rY; break;
+ case X86::VFNMADDPDr213rY: NewFMAOpc = X86::VFNMADDPDr231rY; break;
+ case X86::VFNMADDPSr213rY: NewFMAOpc = X86::VFNMADDPSr231rY; break;
+ case X86::VFNMSUBPDr213rY: NewFMAOpc = X86::VFNMSUBPDr231rY; break;
+ case X86::VFNMSUBPSr213rY: NewFMAOpc = X86::VFNMSUBPSr231rY; break;
+ default: llvm_unreachable("Unrecognized FMA variant.");
+ }
+
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ MachineInstrBuilder MIB =
+ BuildMI(MF, MI->getDebugLoc(), TII.get(NewFMAOpc))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(3))
+ .addOperand(MI->getOperand(2))
+ .addOperand(MI->getOperand(1));
+ MBB->insert(MachineBasicBlock::iterator(MI), MIB);
+ MI->eraseFromParent();
+ }
+ }
+
+ return MBB;
+}
+
MachineBasicBlock *
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
+
+ case X86::VFMADDPDr213r:
+ case X86::VFMADDPSr213r:
+ case X86::VFMADDSDr213r:
+ case X86::VFMADDSSr213r:
+ case X86::VFMSUBPDr213r:
+ case X86::VFMSUBPSr213r:
+ case X86::VFMSUBSDr213r:
+ case X86::VFMSUBSSr213r:
+ case X86::VFNMADDPDr213r:
+ case X86::VFNMADDPSr213r:
+ case X86::VFNMADDSDr213r:
+ case X86::VFNMADDSSr213r:
+ case X86::VFNMSUBPDr213r:
+ case X86::VFNMSUBPSr213r:
+ case X86::VFNMSUBSDr213r:
+ case X86::VFNMSUBSSr213r:
+ case X86::VFMADDPDr213rY:
+ case X86::VFMADDPSr213rY:
+ case X86::VFMSUBPDr213rY:
+ case X86::VFMSUBPSr213rY:
+ case X86::VFNMADDPDr213rY:
+ case X86::VFNMADDPSr213rY:
+ case X86::VFNMSUBPDr213rY:
+ case X86::VFNMSUBPSr213rY:
+ return emitFMA3Instr(MI, BB);
}
}
}
}
+ // Try to fold this VSELECT into a MOVSS/MOVSD
+ if (N->getOpcode() == ISD::VSELECT &&
+ Cond.getOpcode() == ISD::BUILD_VECTOR && !DCI.isBeforeLegalize()) {
+ if (VT == MVT::v4i32 || VT == MVT::v4f32 ||
+ (Subtarget->hasSSE2() && (VT == MVT::v2i64 || VT == MVT::v2f64))) {
+ bool CanFold = false;
+ unsigned NumElems = Cond.getNumOperands();
+ SDValue A = LHS;
+ SDValue B = RHS;
+
+ if (isZero(Cond.getOperand(0))) {
+ CanFold = true;
+
+ // fold (vselect <0,-1,-1,-1>, A, B) -> (movss A, B)
+ // fold (vselect <0,-1> -> (movsd A, B)
+ for (unsigned i = 1, e = NumElems; i != e && CanFold; ++i)
+ CanFold = isAllOnes(Cond.getOperand(i));
+ } else if (isAllOnes(Cond.getOperand(0))) {
+ CanFold = true;
+ std::swap(A, B);
+
+ // fold (vselect <-1,0,0,0>, A, B) -> (movss B, A)
+ // fold (vselect <-1,0> -> (movsd B, A)
+ for (unsigned i = 1, e = NumElems; i != e && CanFold; ++i)
+ CanFold = isZero(Cond.getOperand(i));
+ }
+
+ if (CanFold) {
+ if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return getTargetShuffleNode(X86ISD::MOVSS, DL, VT, A, B, DAG);
+ return getTargetShuffleNode(X86ISD::MOVSD, DL, VT, A, B, DAG);
+ }
+
+ if (Subtarget->hasSSE2() && (VT == MVT::v4i32 || VT == MVT::v4f32)) {
+ // fold (v4i32: vselect <0,0,-1,-1>, A, B) ->
+ // (v4i32 (bitcast (movsd (v2i64 (bitcast A)),
+ // (v2i64 (bitcast B)))))
+ //
+ // fold (v4f32: vselect <0,0,-1,-1>, A, B) ->
+ // (v4f32 (bitcast (movsd (v2f64 (bitcast A)),
+ // (v2f64 (bitcast B)))))
+ //
+ // fold (v4i32: vselect <-1,-1,0,0>, A, B) ->
+ // (v4i32 (bitcast (movsd (v2i64 (bitcast B)),
+ // (v2i64 (bitcast A)))))
+ //
+ // fold (v4f32: vselect <-1,-1,0,0>, A, B) ->
+ // (v4f32 (bitcast (movsd (v2f64 (bitcast B)),
+ // (v2f64 (bitcast A)))))
+
+ CanFold = (isZero(Cond.getOperand(0)) &&
+ isZero(Cond.getOperand(1)) &&
+ isAllOnes(Cond.getOperand(2)) &&
+ isAllOnes(Cond.getOperand(3)));
+
+ if (!CanFold && isAllOnes(Cond.getOperand(0)) &&
+ isAllOnes(Cond.getOperand(1)) &&
+ isZero(Cond.getOperand(2)) &&
+ isZero(Cond.getOperand(3))) {
+ CanFold = true;
+ std::swap(LHS, RHS);
+ }
+
+ if (CanFold) {
+ EVT NVT = (VT == MVT::v4i32) ? MVT::v2i64 : MVT::v2f64;
+ SDValue NewA = DAG.getNode(ISD::BITCAST, DL, NVT, LHS);
+ SDValue NewB = DAG.getNode(ISD::BITCAST, DL, NVT, RHS);
+ SDValue Select = getTargetShuffleNode(X86ISD::MOVSD, DL, NVT, NewA,
+ NewB, DAG);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Select);
+ }
+ }
+ }
+ }
+
// If we know that this node is legal then we know that it is going to be
// matched by one of the SSE/AVX BLEND instructions. These instructions only
// depend on the highest bit in each word. Try to use SimplifyDemandedBits
if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
(cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
- bool is64BitFP = (CMP00.getValueType() == MVT::f64);
// FIXME: need symbolic constants for these magic numbers.
// See X86ATTInstPrinter.cpp:printSSECC().
unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
- SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, CMP01,
+ if (Subtarget->hasAVX512()) {
+ SDValue FSetCC = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CMP00,
+ CMP01, DAG.getConstant(x86cc, MVT::i8));
+ if (N->getValueType(0) != MVT::i1)
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0),
+ FSetCC);
+ return FSetCC;
+ }
+ SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL,
+ CMP00.getValueType(), CMP00, CMP01,
DAG.getConstant(x86cc, MVT::i8));
- MVT IntVT = (is64BitFP ? MVT::i64 : MVT::i32);
- SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT,
- OnesOrZeroesF);
+
+ bool is64BitFP = (CMP00.getValueType() == MVT::f64);
+ MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;
+
+ if (is64BitFP && !Subtarget->is64Bit()) {
+ // On a 32-bit target, we cannot bitcast the 64-bit float to a
+ // 64-bit integer, since that's not a legal type. Since
+ // OnesOrZeroesF is all ones of all zeroes, we don't need all the
+ // bits, but can do this little dance to extract the lowest 32 bits
+ // and work with those going forward.
+ SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
+ OnesOrZeroesF);
+ SDValue Vector32 = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32,
+ Vector64);
+ OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32,
+ Vector32, DAG.getIntPtrConstant(0));
+ IntVT = MVT::i32;
+ }
+
+ SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT, OnesOrZeroesF);
SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
DAG.getConstant(1, IntVT));
SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
if (R.getNode())
return R;
- // Create BLSI, BLSR, and BZHI instructions
- // BLSI is X & (-X)
- // BLSR is X & (X-1)
+ // Create BEXTR and BZHI instructions
// BZHI is X & ((1 << Y) - 1)
// BEXTR is ((X >> imm) & (2**size-1))
if (VT == MVT::i32 || VT == MVT::i64) {
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
- if (Subtarget->hasBMI()) {
- // Check LHS for neg
- if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 &&
- isZero(N0.getOperand(0)))
- return DAG.getNode(X86ISD::BLSI, DL, VT, N1);
-
- // Check RHS for neg
- if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 &&
- isZero(N1.getOperand(0)))
- return DAG.getNode(X86ISD::BLSI, DL, VT, N0);
-
- // Check LHS for X-1
- if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 &&
- isAllOnes(N0.getOperand(1)))
- return DAG.getNode(X86ISD::BLSR, DL, VT, N1);
-
- // Check RHS for X-1
- if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 &&
- isAllOnes(N1.getOperand(1)))
- return DAG.getNode(X86ISD::BLSR, DL, VT, N0);
- }
-
if (Subtarget->hasBMI2()) {
// Check for (and (add (shl 1, Y), -1), X)
if (N0.getOpcode() == ISD::ADD && isAllOnes(N0.getOperand(1))) {
static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
- EVT VT = N->getValueType(0);
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
// look for psign/blend
if (VT == MVT::v2i64 || VT == MVT::v4i64) {
static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
- EVT VT = N->getValueType(0);
if (DCI.isBeforeLegalizeOps())
return SDValue();
return RV;
}
- // Try forming BMI if it is available.
- if (!Subtarget->hasBMI())
- return SDValue();
-
- if (VT != MVT::i32 && VT != MVT::i64)
- return SDValue();
-
- assert(Subtarget->hasBMI() && "Creating BLSMSK requires BMI instructions");
-
- // Create BLSMSK instructions by finding X ^ (X-1)
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- SDLoc DL(N);
-
- if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 &&
- isAllOnes(N0.getOperand(1)))
- return DAG.getNode(X86ISD::BLSMSK, DL, VT, N1);
-
- if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 &&
- isAllOnes(N1.getOperand(1)))
- return DAG.getNode(X86ISD::BLSMSK, DL, VT, N0);
-
return SDValue();
}
}
}
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ N0.hasOneUse() &&
+ N0.getOperand(0).hasOneUse()) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
+ return DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
+ N00.getOperand(0), N00.getOperand(1)),
+ DAG.getConstant(1, VT));
+ }
+ }
if (VT.is256BitVector()) {
SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
if (R.getNode())
// Optimize x == -y --> x+y == 0
// x != -y --> x+y != 0
-static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget* Subtarget) {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
return DAG.getSetCC(SDLoc(N), N->getValueType(0),
addV, DAG.getConstant(0, addV.getValueType()), CC);
}
+
+ if (VT.getScalarType() == MVT::i1) {
+ bool IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
+ (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
+ bool IsVZero0 = ISD::isBuildVectorAllZeros(LHS.getNode());
+ if (!IsSEXT0 && !IsVZero0)
+ return SDValue();
+ bool IsSEXT1 = (RHS.getOpcode() == ISD::SIGN_EXTEND) &&
+ (RHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
+ bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
+
+ if (!IsSEXT1 && !IsVZero1)
+ return SDValue();
+
+ if (IsSEXT0 && IsVZero1) {
+ assert(VT == LHS.getOperand(0).getValueType() && "Uexpected operand type");
+ if (CC == ISD::SETEQ)
+ return DAG.getNOT(DL, LHS.getOperand(0), VT);
+ return LHS.getOperand(0);
+ }
+ if (IsSEXT1 && IsVZero0) {
+ assert(VT == RHS.getOperand(0).getValueType() && "Uexpected operand type");
+ if (CC == ISD::SETEQ)
+ return DAG.getNOT(DL, RHS.getOperand(0), VT);
+ return RHS.getOperand(0);
+ }
+ }
+
return SDValue();
}
// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
// as "sbb reg,reg", since it can be extended without zext and produces
// an all-ones bit which is more useful than 0/1 in some cases.
-static SDValue MaterializeSETB(SDLoc DL, SDValue EFLAGS, SelectionDAG &DAG) {
- return DAG.getNode(ISD::AND, DL, MVT::i8,
+static SDValue MaterializeSETB(SDLoc DL, SDValue EFLAGS, SelectionDAG &DAG,
+ MVT VT) {
+ if (VT == MVT::i8)
+ return DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
+ DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS),
+ DAG.getConstant(1, VT));
+ assert (VT == MVT::i1 && "Unexpected type for SECCC node");
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1,
DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
- DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS),
- DAG.getConstant(1, MVT::i8));
+ DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS));
}
// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
EFLAGS.getNode()->getVTList(),
EFLAGS.getOperand(1), EFLAGS.getOperand(0));
SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
- return MaterializeSETB(DL, NewEFLAGS, DAG);
+ return MaterializeSETB(DL, NewEFLAGS, DAG, N->getSimpleValueType(0));
}
}
// a zext and produces an all-ones bit which is more useful than 0/1 in some
// cases.
if (CC == X86::COND_B)
- return MaterializeSETB(DL, EFLAGS, DAG);
+ return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0));
SDValue Flags;
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget);
- case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
+ case ISD::SETCC: return PerformISDSETCCCombine(N, DAG, Subtarget);
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget);