}
if (Subtarget->hasSSE2()) {
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
+
setOperationAction(ISD::SRL, MVT::v8i16, Custom);
setOperationAction(ISD::SRL, MVT::v16i8, Custom);
for (MVT VT : MVT::fp_vector_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i8, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i8, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
+
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
setOperationAction(ISD::SETCC, MVT::i1, Custom);
setOperationAction(ISD::XOR, MVT::i1, Legal);
setOperationAction(ISD::OR, MVT::i1, Legal);
setOperationAction(ISD::AND, MVT::i1, Legal);
+ setOperationAction(ISD::SUB, MVT::i1, Custom);
+ setOperationAction(ISD::ADD, MVT::i1, Custom);
+ setOperationAction(ISD::MUL, MVT::i1, Custom);
setOperationAction(ISD::LOAD, MVT::v16f32, Legal);
setOperationAction(ISD::LOAD, MVT::v8f64, Legal);
setOperationAction(ISD::LOAD, MVT::v8i64, Legal);
setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v32i16, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v64i8, Legal);
+ setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
const MVT VT = (MVT::SimpleValueType)i;
}
static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
- SelectionDAG &DAG) {
+ const X86Subtarget *Subtarget, SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
SDLoc DL(Op);
unsigned int NumElts = VT.getVectorNumElements();
- if (NumElts != 8 && NumElts != 16)
+ if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI())
return SDValue();
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
MVT SVT = In.getSimpleValueType();
if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
- return LowerZERO_EXTEND_AVX512(Op, DAG);
+ return LowerZERO_EXTEND_AVX512(Op, Subtarget, DAG);
if (Subtarget->hasFp256()) {
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
SDValue New = DAG.getNode(ISD::AND, dl, VT, Op->getOperand(0),
DAG.getConstant(Mask, dl, VT));
DAG.ReplaceAllUsesWith(Op, New);
- DAG.RemoveDeadNode(Op.getNode());
Op = New;
}
break;
return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops);
}
-static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
+ const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
unsigned int NumElts = VT.getVectorNumElements();
- if (NumElts != 8 && NumElts != 16)
+ if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI())
return SDValue();
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) {
return DAG.getNode(X86ISD::VTRUNC, dl, VT, V);
}
+static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDValue In = Op->getOperand(0);
+ MVT VT = Op->getSimpleValueType(0);
+ MVT InVT = In.getSimpleValueType();
+ assert(VT.getSizeInBits() == InVT.getSizeInBits());
+
+ MVT InSVT = InVT.getScalarType();
+ assert(VT.getScalarType().getScalarSizeInBits() > InSVT.getScalarSizeInBits());
+
+ if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
+ return SDValue();
+ if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
+ return SDValue();
+
+ SDLoc dl(Op);
+
+ // SSE41 targets can use the pmovsx* instructions directly.
+ if (Subtarget->hasSSE41())
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
+
+ // pre-SSE41 targets unpack lower lanes and then sign-extend using SRAI.
+ SDValue Curr = In;
+ MVT CurrVT = InVT;
+
+ // As SRAI is only available on i16/i32 types, we expand only up to i32
+ // and handle i64 separately.
+ while (CurrVT != VT && CurrVT.getScalarType() != MVT::i32) {
+ Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr);
+ MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2);
+ CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2);
+ Curr = DAG.getNode(ISD::BITCAST, dl, CurrVT, Curr);
+ }
+
+ SDValue SignExt = Curr;
+ if (CurrVT != InVT) {
+ unsigned SignExtShift =
+ CurrVT.getScalarSizeInBits() - InSVT.getScalarSizeInBits();
+ SignExt = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
+ DAG.getConstant(SignExtShift, dl, MVT::i8));
+ }
+
+ if (CurrVT == VT)
+ return SignExt;
+
+ if (VT == MVT::v2i64 && CurrVT == MVT::v4i32) {
+ SDValue Sign = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
+ DAG.getConstant(31, dl, MVT::i8));
+ SDValue Ext = DAG.getVectorShuffle(CurrVT, dl, SignExt, Sign, {0, 4, 1, 5});
+ return DAG.getNode(ISD::BITCAST, dl, VT, Ext);
+ }
+
+ return SDValue();
+}
+
static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(Opcode, dl, VTs, NewOps);
}
+
+ case Intrinsic::x86_seh_lsda: {
+ // Compute the symbol for the LSDA. We know it'll get emitted later.
+ MachineFunction &MF = DAG.getMachineFunction();
+ SDValue Op1 = Op.getOperand(1);
+ auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal());
+ MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol(
+ GlobalValue::getRealLinkageName(Fn->getName()));
+ StringRef Name = LSDASym->getName();
+ assert(Name.data()[Name.size()] == '\0' && "not null terminated");
+
+ // Generate a simple absolute symbol reference. This intrinsic is only
+ // supported on 32-bit Windows, which isn't PIC.
+ SDValue Result =
+ DAG.getTargetExternalSymbol(Name.data(), VT, X86II::MO_NOPREFIX);
+ return DAG.getNode(X86ISD::Wrapper, dl, VT, Result);
+ }
}
}
}
static SDValue LowerADD(SDValue Op, SelectionDAG &DAG) {
+ if (Op.getValueType() == MVT::i1)
+ return DAG.getNode(ISD::XOR, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(0), Op.getOperand(1));
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
}
static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) {
+ if (Op.getValueType() == MVT::i1)
+ return DAG.getNode(ISD::XOR, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(0), Op.getOperand(1));
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
+ if (VT == MVT::i1)
+ return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1));
+
// Decompose 256-bit ops into smaller 128-bit ops.
if (VT.is256BitVector() && !Subtarget->hasInt256())
return Lower256IntArith(Op, DAG);
MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
if (Op.getOpcode() == ISD::SHL) {
+ // Simple i8 add case
+ if (ShiftAmt == 1)
+ return DAG.getNode(ISD::ADD, dl, VT, R, R);
+
// Make a large shift.
SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT,
R, ShiftAmt, DAG);
// the extra overheads to get from v16i8 to v8i32 make the existing SSE
// solution better.
if (Subtarget->hasInt256() && VT == MVT::v8i16) {
- MVT NewVT = VT == MVT::v8i16 ? MVT::v8i32 : MVT::v16i16;
+ MVT ExtVT = MVT::v8i32;
unsigned ExtOpc =
Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
- R = DAG.getNode(ExtOpc, dl, NewVT, R);
- Amt = DAG.getNode(ISD::ANY_EXTEND, dl, NewVT, Amt);
+ R = DAG.getNode(ExtOpc, dl, ExtVT, R);
+ Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt);
return DAG.getNode(ISD::TRUNCATE, dl, VT,
- DAG.getNode(Op.getOpcode(), dl, NewVT, R, Amt));
+ DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
+ }
+
+ if (Subtarget->hasInt256() && VT == MVT::v16i16) {
+ MVT ExtVT = MVT::v8i32;
+ SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
+ SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Amt, Z);
+ SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Amt, Z);
+ SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, R, R);
+ SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, R, R);
+ ALo = DAG.getNode(ISD::BITCAST, dl, ExtVT, ALo);
+ AHi = DAG.getNode(ISD::BITCAST, dl, ExtVT, AHi);
+ RLo = DAG.getNode(ISD::BITCAST, dl, ExtVT, RLo);
+ RHi = DAG.getNode(ISD::BITCAST, dl, ExtVT, RHi);
+ SDValue Lo = DAG.getNode(Op.getOpcode(), dl, ExtVT, RLo, ALo);
+ SDValue Hi = DAG.getNode(Op.getOpcode(), dl, ExtVT, RHi, AHi);
+ Lo = DAG.getNode(ISD::SRL, dl, ExtVT, Lo, DAG.getConstant(16, dl, ExtVT));
+ Hi = DAG.getNode(ISD::SRL, dl, ExtVT, Hi, DAG.getConstant(16, dl, ExtVT));
+ return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
}
// Decompose 256-bit shifts into smaller 128-bit shifts.
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG);
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, Subtarget, DAG);
case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, Subtarget, DAG);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return LowerSIGN_EXTEND_VECTOR_INREG(Op, Subtarget, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
if (!InVec.hasOneUse())
return SDValue();
EVT BCVT = InVec.getOperand(0).getValueType();
- if (!BCVT.isVector() ||
+ if (!BCVT.isVector() ||
BCVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
return SDValue();
InVec = InVec.getOperand(0);
}
EVT VT = N->getValueType(0);
-
+
if (VT == MVT::i1 && dyn_cast<ConstantSDNode>(N->getOperand(1)) &&
InputVector.getOpcode() == ISD::BITCAST &&
dyn_cast<ConstantSDNode>(InputVector.getOperand(0))) {
// know will be matched by LowerVECTOR_SHUFFLEtoBlend.
if ((N->getOpcode() == ISD::VSELECT ||
N->getOpcode() == X86ISD::SHRUNKBLEND) &&
- !DCI.isBeforeLegalize()) {
+ !DCI.isBeforeLegalize() && !VT.is512BitVector()) {
SDValue Shuffle = transformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget);
if (Shuffle.getNode())
return Shuffle;
const X86Subtarget *Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- SDLoc dl(N);
+ EVT SVT = VT.getScalarType();
+ EVT InVT = N0->getValueType(0);
+ EVT InSVT = InVT.getScalarType();
+ SDLoc DL(N);
// (i8,i32 sext (sdivrem (i8 x, i8 y)) ->
// (i8,i32 (sdivrem_sext_hreg (i8 x, i8 y)
// This exposes the sext to the sdivrem lowering, so that it directly extends
// from AH (which we otherwise need to do contortions to access).
if (N0.getOpcode() == ISD::SDIVREM && N0.getResNo() == 1 &&
- N0.getValueType() == MVT::i8 && VT == MVT::i32) {
+ InVT == MVT::i8 && VT == MVT::i32) {
SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
- SDValue R = DAG.getNode(X86ISD::SDIVREM8_SEXT_HREG, dl, NodeTys,
+ SDValue R = DAG.getNode(X86ISD::SDIVREM8_SEXT_HREG, DL, NodeTys,
N0.getOperand(0), N0.getOperand(1));
DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
return R.getValue(1);
if (!DCI.isBeforeLegalizeOps()) {
if (N0.getValueType() == MVT::i1) {
- SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue AllOnes =
- DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, VT);
- return DAG.getNode(ISD::SELECT, dl, VT, N0, AllOnes, Zero);
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
+ return DAG.getNode(ISD::SELECT, DL, VT, N0, AllOnes, Zero);
}
return SDValue();
}
+ if (VT.isVector()) {
+ auto ExtendToVec128 = [&DAG](SDLoc DL, SDValue N) {
+ EVT InVT = N->getValueType(0);
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
+ 128 / InVT.getScalarSizeInBits());
+ SmallVector<SDValue, 8> Opnds(128 / InVT.getSizeInBits(),
+ DAG.getUNDEF(InVT));
+ Opnds[0] = N;
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds);
+ };
+
+ // If target-size is 128-bits, then convert to ISD::SIGN_EXTEND_VECTOR_INREG
+ // which ensures lowering to X86ISD::VSEXT (pmovsx*).
+ if (VT.getSizeInBits() == 128 &&
+ (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
+ (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
+ SDValue ExOp = ExtendToVec128(DL, N0);
+ return DAG.getSignExtendVectorInReg(ExOp, DL, VT);
+ }
+
+ // On pre-AVX2 targets, split into 128-bit nodes of
+ // ISD::SIGN_EXTEND_VECTOR_INREG.
+ if (!Subtarget->hasInt256() && !(VT.getSizeInBits() % 128) &&
+ (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
+ (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
+ unsigned NumVecs = VT.getSizeInBits() / 128;
+ unsigned NumSubElts = 128 / SVT.getSizeInBits();
+ EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts);
+ EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts);
+
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0, Offset = 0; i != NumVecs;
+ ++i, Offset += NumSubElts) {
+ SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
+ DAG.getIntPtrConstant(Offset, DL));
+ SrcVec = ExtendToVec128(DL, SrcVec);
+ SrcVec = DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT);
+ Opnds.push_back(SrcVec);
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
+ }
+ }
+
if (!Subtarget->hasFp256())
return SDValue();