setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
uint64_t Immediate = 0;
int NonConstIdx = -1;
bool IsSplat = true;
+ unsigned NumNonConsts = 0;
+ unsigned NumConsts = 0;
for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
SDValue In = Op.getOperand(idx);
if (In.getOpcode() == ISD::UNDEF)
if (!isa<ConstantSDNode>(In)) {
AllContants = false;
NonConstIdx = idx;
+ NumNonConsts++;
}
- else if (cast<ConstantSDNode>(In)->getZExtValue())
+ else {
+ NumConsts++;
+ if (cast<ConstantSDNode>(In)->getZExtValue())
Immediate |= (1ULL << idx);
+ }
if (In != Op.getOperand(0))
IsSplat = false;
}
DAG.getIntPtrConstant(0));
}
+ if (NumNonConsts == 1 && NonConstIdx != 0) {
+ SDValue DstVec;
+ if (NumConsts) {
+ SDValue VecAsImm = DAG.getConstant(Immediate,
+ MVT::getIntegerVT(VT.getSizeInBits()));
+ DstVec = DAG.getNode(ISD::BITCAST, dl, VT, VecAsImm);
+ }
+ else
+ DstVec = DAG.getUNDEF(VT);
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
+ Op.getOperand(NonConstIdx),
+ DAG.getIntPtrConstant(NonConstIdx));
+ }
if (!IsSplat && (NonConstIdx != 0))
llvm_unreachable("Unsupported BUILD_VECTOR operation");
MVT SelectVT = (VT == MVT::v16i1)? MVT::i16 : MVT::i8;
return SDValue();
}
+/// Insert one bit to mask vector, like v16i1 or v8i1.
+/// AVX-512 feature.
+SDValue
+X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDValue Vec = Op.getOperand(0);
+ SDValue Elt = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ MVT VecVT = Vec.getSimpleValueType();
+
+ if (!isa<ConstantSDNode>(Idx)) {
+ // Non constant index. Extend source and destination,
+ // insert element and then truncate the result.
+ MVT ExtVecVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
+ MVT ExtEltVT = (VecVT == MVT::v8i1 ? MVT::i64 : MVT::i32);
+ SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
+ DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVecVT, Vec),
+ DAG.getNode(ISD::ZERO_EXTEND, dl, ExtEltVT, Elt), Idx);
+ return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp);
+ }
+
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
+ DAG.getConstant(IdxVal, MVT::i8));
+ const TargetRegisterClass* rc = getRegClassFor(VecVT);
+ unsigned MaxSift = rc->getSize()*8 - 1;
+ EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
+ DAG.getConstant(MaxSift, MVT::i8));
+ EltInVec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, EltInVec,
+ DAG.getConstant(MaxSift - IdxVal, MVT::i8));
+ return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
+}
SDValue
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
+
+ if (EltVT == MVT::i1)
+ return InsertBitToMaskVector(Op, DAG);
SDLoc dl(Op);
SDValue N0 = Op.getOperand(0);
(EXTRACT_SUBREG
(AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
sub_16bit)>;
+ def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK16)>;
+ def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK8)>;
}
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
let Predicates = [HasAVX512] in {
"vmovdqu64", SSEPackedInt, v8i64>,
XS, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
+ (v16i32 immAllZerosV), GR16:$mask)),
+ (VMOVDQU32rmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
+
+def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
+ (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
+ (VMOVDQU64rmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
+
let AddedComplexity = 20 in {
def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
(bc_v8i64 (v16i32 immAllZerosV)))),
GR8:$mask),
(VPCONFLICTQrrk VR512:$src1,
(v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
+
+def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
+def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
+def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
%res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
ret i64 %res
}
+
+;CHECK-LABEL: test15
+;CHECK: kshiftlw
+;CHECK: kmovw
+;CHECK: ret
+define i16 @test15(i1 *%addr) {
+ %x = load i1 * %addr, align 128
+ %x1 = insertelement <16 x i1> undef, i1 %x, i32 10
+ %x2 = bitcast <16 x i1>%x1 to i16
+ ret i16 %x2
+}
+
+;CHECK-LABEL: test16
+;CHECK: kshiftlw
+;CHECK: kshiftrw
+;CHECK: korw
+;CHECK: ret
+define i16 @test16(i1 *%addr, i16 %a) {
+ %x = load i1 * %addr, align 128
+ %a1 = bitcast i16 %a to <16 x i1>
+ %x1 = insertelement <16 x i1> %a1, i1 %x, i32 10
+ %x2 = bitcast <16 x i1>%x1 to i16
+ ret i16 %x2
+}
+
+;CHECK-LABEL: test17
+;CHECK: kshiftlw
+;CHECK: kshiftrw
+;CHECK: korw
+;CHECK: ret
+define i8 @test17(i1 *%addr, i8 %a) {
+ %x = load i1 * %addr, align 128
+ %a1 = bitcast i8 %a to <8 x i1>
+ %x1 = insertelement <8 x i1> %a1, i1 %x, i32 10
+ %x2 = bitcast <8 x i1>%x1 to i8
+ ret i8 %x2
+}
+