}
}
- // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
- // We have already tested above for an UNDEF only concatenation.
// fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
// -> (BUILD_VECTOR A, B, ..., C, D, ...)
- auto IsBuildVectorOrUndef = [](const SDValue &Op) {
- return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
- };
- bool AllBuildVectorsOrUndefs =
- std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef);
- if (AllBuildVectorsOrUndefs) {
+ if (N->getNumOperands() == 2 &&
+ N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+ N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) {
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
SmallVector<SDValue, 8> Opnds;
- EVT SVT = VT.getScalarType();
-
- EVT MinVT = SVT;
- if (!SVT.isFloatingPoint())
+ unsigned BuildVecNumElts = N0.getNumOperands();
+
+ EVT SclTy0 = N0.getOperand(0)->getValueType(0);
+ EVT SclTy1 = N1.getOperand(0)->getValueType(0);
+ if (SclTy0.isFloatingPoint()) {
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(N0.getOperand(i));
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(N1.getOperand(i));
+ } else {
// If BUILD_VECTOR are from built from integer, they may have different
// operand types. Get the smaller type and truncate all operands to it.
- for (const SDValue &Op : N->ops())
- if (ISD::BUILD_VECTOR == Op.getOpcode()) {
- EVT OpSVT = Op.getOperand(0)->getValueType(0);
- MinVT = MinVT.bitsLE(OpSVT) ? MinVT : OpSVT;
- }
-
- for (const SDValue &Op : N->ops()) {
- EVT OpVT = Op.getValueType();
- unsigned NumElts = OpVT.getVectorNumElements();
-
- if (ISD::UNDEF == Op.getOpcode())
- for (unsigned i = 0; i != NumElts; ++i)
- Opnds.push_back(DAG.getUNDEF(MinVT));
-
- if (ISD::BUILD_VECTOR == Op.getOpcode()) {
- if (SVT.isFloatingPoint()) {
- assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
- for (unsigned i = 0; i != NumElts; ++i)
- Opnds.push_back(Op.getOperand(i));
- } else {
- for (unsigned i = 0; i != NumElts; ++i)
- Opnds.push_back(
- DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
- }
- }
+ EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
+ N0.getOperand(i)));
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
+ N1.getOperand(i)));
}
- assert(VT.getVectorNumElements() == Opnds.size() &&
- "Concat vector type mismatch");
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
}
;
; AVX1-LABEL: shuf_zext_8i16_to_8i32:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1\r
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]\r
-; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero\r
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuf_zext_8i16_to_8i32:
; AVX2: # BB#0: # %entry
-; AVX2-NEXT: # kill
-; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; AVX2-NEXT: vpunpcklwd{{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX2-NEXT: retq
entry:
%B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
;
; AVX1-LABEL: shuf_zext_4i32_to_4i64:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero\r
-; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2\r
-; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1]\r
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]\r
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0\r
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,1],xmm1[0,0]
+; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuf_zext_4i32_to_4i64:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: # kill
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: vmovd %eax, %xmm1
+; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-NEXT: retq
entry:
%B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>