From: Kevin Qin Date: Wed, 22 Jan 2014 06:11:03 +0000 (+0000) Subject: [AArch64 NEON] Try to generate CONCAT_VECTOR when lowering BUILD_VECTOR or SHUFFLE_VE... X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=0af7a7db530b62f20b6d5766e1a95816fc8eb66e [AArch64 NEON] Try to generate CONCAT_VECTOR when lowering BUILD_VECTOR or SHUFFLE_VECTOR. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199791 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 27277c47f39..4aae9e59e2a 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4154,21 +4154,70 @@ AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { return false; } -// Check whether a Build Vector could be presented as Shuffle Vector. If yes, -// try to call LowerVECTOR_SHUFFLE to lower it. +// Check whether a shuffle_vecotor could be presented as conact_vector. +bool AArch64TargetLowering::isConactVector(SDValue Op,SelectionDAG &DAG, + SDValue V0, SDValue V1, + const int* Mask, + SDValue &Res) const { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned V0NumElts = V0.getValueType().getVectorNumElements(); + bool isContactVector = true; + bool splitV0 = false; + int offset = 0; + for (int I = 0, E = NumElts; I != E; I++){ + if (Mask[I] != I + offset) { + if(I && !splitV0 && Mask[I] == I + (int)V0NumElts / 2) { + splitV0 = true; + offset = V0NumElts / 2; + } else { + isContactVector = false; + break; + } + } + } + if (isContactVector) { + EVT CastVT = EVT::getVectorVT(*DAG.getContext(), + VT.getVectorElementType(), NumElts / 2); + if(CastVT.getSizeInBits() < 64) + return false; + + if (splitV0) { + assert(V0NumElts >= NumElts / 2 && + "invalid operand for extract_subvector!"); + V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0, + DAG.getConstant(0, MVT::i64)); + } + if (NumElts != V1.getValueType().getVectorNumElements() * 2) { + assert(V1.getValueType().getVectorNumElements() >= NumElts / 2 && + "invalid operand for extract_subvector!"); + V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1, + DAG.getConstant(0, MVT::i64)); + } + Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1); + return true; + } + return false; +} + +// Check whether a Build Vector could be presented as Shuffle Vector. +// This Shuffle Vector maybe not legalized, so the length of its operand and +// the length of result may not equal. bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, - SDValue &Res) const { + SDValue &V0, SDValue &V1, + int *Mask) const { SDLoc DL(Op); EVT VT = Op.getValueType(); unsigned NumElts = VT.getVectorNumElements(); unsigned V0NumElts = 0; - int Mask[16]; - SDValue V0, V1; // Check if all elements are extracted from less than 3 vectors. for (unsigned i = 0; i < NumElts; ++i) { SDValue Elt = Op.getOperand(i); - if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + Elt.getOperand(0).getValueType().getVectorElementType() != + VT.getVectorElementType()) return false; if (V0.getNode() == 0) { @@ -4189,25 +4238,7 @@ bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, return false; } } - - if (!V1.getNode() && V0NumElts == NumElts * 2) { - V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, - DAG.getConstant(NumElts, MVT::i64)); - V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, - DAG.getConstant(0, MVT::i64)); - V0NumElts = V0.getValueType().getVectorNumElements(); - } - - if (V1.getNode() && NumElts == V0NumElts && - V0NumElts == V1.getValueType().getVectorNumElements()) { - SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask); - if(Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) - Res = Shuffle; - else - Res = LowerVECTOR_SHUFFLE(Shuffle, DAG); - return true; - } else - return false; + return true; } // If this is a case we can't handle, return null and let the default @@ -4413,9 +4444,31 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, return SDValue(); // Try to lower this in lowering ShuffleVector way. - SDValue Shuf; - if (isKnownShuffleVector(Op, DAG, Shuf)) - return Shuf; + SDValue V0, V1; + int Mask[16]; + if (isKnownShuffleVector(Op, DAG, V0, V1, Mask)) { + unsigned V0NumElts = V0.getValueType().getVectorNumElements(); + if (!V1.getNode() && V0NumElts == NumElts * 2) { + V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, + DAG.getConstant(NumElts, MVT::i64)); + V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, + DAG.getConstant(0, MVT::i64)); + V0NumElts = V0.getValueType().getVectorNumElements(); + } + + if (V1.getNode() && NumElts == V0NumElts && + V0NumElts == V1.getValueType().getVectorNumElements()) { + SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask); + if(Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) + return Shuffle; + else + return LowerVECTOR_SHUFFLE(Shuffle, DAG); + } else { + SDValue Res; + if(isConactVector(Op, DAG, V0, V1, Mask, Res)) + return Res; + } + } // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we // know the default expansion would otherwise fall back on something even @@ -4601,6 +4654,10 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, return DAG.getNode(ISDNo, dl, VT, V1, V2); } + SDValue Res; + if (isConactVector(Op, DAG, V1, V2, &ShuffleMask[0], Res)) + return Res; + // If the element of shuffle mask are all the same constant, we can // transform it into either NEON_VDUP or NEON_VDUPLANE if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 3879663e570..8baf992a2bd 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -232,7 +232,11 @@ public: SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; - bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &Res) const; + bool isConactVector(SDValue Op,SelectionDAG &DAG, SDValue V0, SDValue V1, + const int* Mask, SDValue &Res) const; + + bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &V0, + SDValue &V1, int *Mask) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const AArch64Subtarget *ST) const; diff --git a/test/CodeGen/AArch64/neon-copy.ll b/test/CodeGen/AArch64/neon-copy.ll index 7faf2a2a8cc..514689a2e6b 100644 --- a/test/CodeGen/AArch64/neon-copy.ll +++ b/test/CodeGen/AArch64/neon-copy.ll @@ -979,4 +979,274 @@ entry: %0 = extractelement <1 x float> %a, i32 0 %vecinit1.i = insertelement <4 x float> undef, float %0, i32 0 ret <4 x float> %vecinit1.i +} + +define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 { +; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> + ret <16 x i8> %vecinit30 +} + +define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { +; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <8 x i8> %x, i32 0 + %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 + %vecext1 = extractelement <8 x i8> %x, i32 1 + %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 + %vecext3 = extractelement <8 x i8> %x, i32 2 + %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 + %vecext5 = extractelement <8 x i8> %x, i32 3 + %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 + %vecext7 = extractelement <8 x i8> %x, i32 4 + %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 + %vecext9 = extractelement <8 x i8> %x, i32 5 + %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 + %vecext11 = extractelement <8 x i8> %x, i32 6 + %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 + %vecext13 = extractelement <8 x i8> %x, i32 7 + %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 + %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> + ret <16 x i8> %vecinit30 +} + +define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 { +; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <16 x i8> %x, i32 0 + %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 + %vecext1 = extractelement <16 x i8> %x, i32 1 + %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 + %vecext3 = extractelement <16 x i8> %x, i32 2 + %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 + %vecext5 = extractelement <16 x i8> %x, i32 3 + %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 + %vecext7 = extractelement <16 x i8> %x, i32 4 + %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 + %vecext9 = extractelement <16 x i8> %x, i32 5 + %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 + %vecext11 = extractelement <16 x i8> %x, i32 6 + %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 + %vecext13 = extractelement <16 x i8> %x, i32 7 + %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 + %vecext15 = extractelement <8 x i8> %y, i32 0 + %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 + %vecext17 = extractelement <8 x i8> %y, i32 1 + %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 + %vecext19 = extractelement <8 x i8> %y, i32 2 + %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 + %vecext21 = extractelement <8 x i8> %y, i32 3 + %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 + %vecext23 = extractelement <8 x i8> %y, i32 4 + %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 + %vecext25 = extractelement <8 x i8> %y, i32 5 + %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 + %vecext27 = extractelement <8 x i8> %y, i32 6 + %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 + %vecext29 = extractelement <8 x i8> %y, i32 7 + %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 + ret <16 x i8> %vecinit30 +} + +define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 { +; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <8 x i8> %x, i32 0 + %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 + %vecext1 = extractelement <8 x i8> %x, i32 1 + %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 + %vecext3 = extractelement <8 x i8> %x, i32 2 + %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 + %vecext5 = extractelement <8 x i8> %x, i32 3 + %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 + %vecext7 = extractelement <8 x i8> %x, i32 4 + %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 + %vecext9 = extractelement <8 x i8> %x, i32 5 + %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 + %vecext11 = extractelement <8 x i8> %x, i32 6 + %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 + %vecext13 = extractelement <8 x i8> %x, i32 7 + %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 + %vecext15 = extractelement <8 x i8> %y, i32 0 + %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 + %vecext17 = extractelement <8 x i8> %y, i32 1 + %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 + %vecext19 = extractelement <8 x i8> %y, i32 2 + %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 + %vecext21 = extractelement <8 x i8> %y, i32 3 + %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 + %vecext23 = extractelement <8 x i8> %y, i32 4 + %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 + %vecext25 = extractelement <8 x i8> %y, i32 5 + %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 + %vecext27 = extractelement <8 x i8> %y, i32 6 + %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 + %vecext29 = extractelement <8 x i8> %y, i32 7 + %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 + ret <16 x i8> %vecinit30 +} + +define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 { +; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> + ret <8 x i16> %vecinit14 +} + +define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { +; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <4 x i16> %x, i32 0 + %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 + %vecext1 = extractelement <4 x i16> %x, i32 1 + %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 + %vecext3 = extractelement <4 x i16> %x, i32 2 + %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 + %vecext5 = extractelement <4 x i16> %x, i32 3 + %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 + %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> + ret <8 x i16> %vecinit14 +} + +define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 { +; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <8 x i16> %x, i32 0 + %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 + %vecext1 = extractelement <8 x i16> %x, i32 1 + %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 + %vecext3 = extractelement <8 x i16> %x, i32 2 + %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 + %vecext5 = extractelement <8 x i16> %x, i32 3 + %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 + %vecext7 = extractelement <4 x i16> %y, i32 0 + %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 + %vecext9 = extractelement <4 x i16> %y, i32 1 + %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 + %vecext11 = extractelement <4 x i16> %y, i32 2 + %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 + %vecext13 = extractelement <4 x i16> %y, i32 3 + %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 + ret <8 x i16> %vecinit14 +} + +define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 { +; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <4 x i16> %x, i32 0 + %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 + %vecext1 = extractelement <4 x i16> %x, i32 1 + %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 + %vecext3 = extractelement <4 x i16> %x, i32 2 + %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 + %vecext5 = extractelement <4 x i16> %x, i32 3 + %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 + %vecext7 = extractelement <4 x i16> %y, i32 0 + %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 + %vecext9 = extractelement <4 x i16> %y, i32 1 + %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 + %vecext11 = extractelement <4 x i16> %y, i32 2 + %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 + %vecext13 = extractelement <4 x i16> %y, i32 3 + %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 + ret <8 x i16> %vecinit14 +} + +define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 { +; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> + ret <4 x i32> %vecinit6 +} + +define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { +; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <2 x i32> %x, i32 0 + %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 + %vecext1 = extractelement <2 x i32> %x, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 + %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> + ret <4 x i32> %vecinit6 +} + +define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 { +; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <4 x i32> %x, i32 0 + %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 + %vecext1 = extractelement <4 x i32> %x, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 + %vecext3 = extractelement <2 x i32> %y, i32 0 + %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 + %vecext5 = extractelement <2 x i32> %y, i32 1 + %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 + ret <4 x i32> %vecinit6 +} + +define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 { +; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <2 x i32> %x, i32 0 + %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 + %vecext1 = extractelement <2 x i32> %x, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 + %vecext3 = extractelement <2 x i32> %y, i32 0 + %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 + %vecext5 = extractelement <2 x i32> %y, i32 1 + %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 + ret <4 x i32> %vecinit6 +} + +define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 { +; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> + ret <2 x i64> %vecinit2 +} + +define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 { +; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <1 x i64> %x, i32 0 + %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 + %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> + ret <2 x i64> %vecinit2 +} + +define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 { +; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <2 x i64> %x, i32 0 + %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 + %vecext1 = extractelement <1 x i64> %y, i32 0 + %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 + ret <2 x i64> %vecinit2 +} + +define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 { +; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %vecext = extractelement <1 x i64> %x, i32 0 + %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 + %vecext1 = extractelement <1 x i64> %y, i32 0 + %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 + ret <2 x i64> %vecinit2 } \ No newline at end of file