+static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl,
+ unsigned vectorWidth) {
+ assert((vectorWidth == 128 || vectorWidth == 256) &&
+ "Unsupported vector width");
+ EVT VT = Vec.getValueType();
+ EVT ElVT = VT.getVectorElementType();
+ unsigned Factor = VT.getSizeInBits()/vectorWidth;
+ EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
+ VT.getVectorNumElements()/Factor);
+
+ // Extract from UNDEF is UNDEF.
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(ResultVT);
+
+ // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
+ unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
+
+ // This is the index of the first element of the vectorWidth-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
+ * ElemsPerChunk);
+
+ // If the input is a buildvector just emit a smaller one.
+ if (Vec.getOpcode() == ISD::BUILD_VECTOR)
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
+ makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
+ ElemsPerChunk));
+
+ SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
+}
+
+/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
+/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
+/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
+/// instructions or a simple subregister reference. Idx is an index in the
+/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
+/// lowering EXTRACT_VECTOR_ELT operations easier.
+static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl) {
+ assert((Vec.getValueType().is256BitVector() ||
+ Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
+ return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
+}
+
+/// Generate a DAG to grab 256-bits from a 512-bit vector.
+static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl) {
+ assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
+ return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
+}
+
+static SDValue InsertSubVector(SDValue Result, SDValue Vec,
+ unsigned IdxVal, SelectionDAG &DAG,
+ SDLoc dl, unsigned vectorWidth) {
+ assert((vectorWidth == 128 || vectorWidth == 256) &&
+ "Unsupported vector width");
+ // Inserting UNDEF is Result
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return Result;
+ EVT VT = Vec.getValueType();
+ EVT ElVT = VT.getVectorElementType();
+ EVT ResultVT = Result.getValueType();
+
+ // Insert the relevant vectorWidth bits.
+ unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
+
+ // This is the index of the first element of the vectorWidth-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
+ * ElemsPerChunk);
+
+ SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
+}
+
+/// Generate a DAG to put 128-bits into a vector > 128 bits. This
+/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
+/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
+/// simple superregister reference. Idx is an index in the 128 bits
+/// we want. It need not be aligned to a 128-bit boundary. That makes
+/// lowering INSERT_VECTOR_ELT operations easier.
+static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl) {
+ assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
+
+ // For insertion into the zero index (low half) of a 256-bit vector, it is
+ // more efficient to generate a blend with immediate instead of an insert*128.
+ // We are still creating an INSERT_SUBVECTOR below with an undef node to
+ // extend the subvector to the size of the result vector. Make sure that
+ // we are not recursing on that node by checking for undef here.
+ if (IdxVal == 0 && Result.getValueType().is256BitVector() &&
+ Result.getOpcode() != ISD::UNDEF) {
+ EVT ResultVT = Result.getValueType();
+ SDValue ZeroIndex = DAG.getIntPtrConstant(0, dl);
+ SDValue Undef = DAG.getUNDEF(ResultVT);
+ SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Undef,
+ Vec, ZeroIndex);
+
+ // The blend instruction, and therefore its mask, depend on the data type.
+ MVT ScalarType = ResultVT.getScalarType().getSimpleVT();
+ if (ScalarType.isFloatingPoint()) {
+ // Choose either vblendps (float) or vblendpd (double).
+ unsigned ScalarSize = ScalarType.getSizeInBits();
+ assert((ScalarSize == 64 || ScalarSize == 32) && "Unknown float type");
+ unsigned MaskVal = (ScalarSize == 64) ? 0x03 : 0x0f;
+ SDValue Mask = DAG.getConstant(MaskVal, dl, MVT::i8);
+ return DAG.getNode(X86ISD::BLENDI, dl, ResultVT, Result, Vec256, Mask);
+ }
+
+ const X86Subtarget &Subtarget =
+ static_cast<const X86Subtarget &>(DAG.getSubtarget());
+
+ // AVX2 is needed for 256-bit integer blend support.
+ // Integers must be cast to 32-bit because there is only vpblendd;
+ // vpblendw can't be used for this because it has a handicapped mask.
+
+ // If we don't have AVX2, then cast to float. Using a wrong domain blend
+ // is still more efficient than using the wrong domain vinsertf128 that
+ // will be created by InsertSubVector().
+ MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
+
+ SDValue Mask = DAG.getConstant(0x0f, dl, MVT::i8);
+ Vec256 = DAG.getNode(ISD::BITCAST, dl, CastVT, Vec256);
+ Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask);
+ return DAG.getNode(ISD::BITCAST, dl, ResultVT, Vec256);
+ }
+
+ return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
+}
+
+static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl) {
+ assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
+ return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
+}
+
+/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
+/// instructions. This is used because creating CONCAT_VECTOR nodes of
+/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
+/// large BUILD_VECTORS.
+static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
+ unsigned NumElems, SelectionDAG &DAG,
+ SDLoc dl) {
+ SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
+ return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
+}
+
+static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
+ unsigned NumElems, SelectionDAG &DAG,
+ SDLoc dl) {
+ SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
+ return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
+}
+