From: Andrea Di Biagio Date: Thu, 3 Jul 2014 22:24:18 +0000 (+0000) Subject: [CostModel][x86] Improved cost model for alternate shuffles. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=60e9a53c21bef26ed81c1395b569cb48f7b4ef8e;p=oota-llvm.git [CostModel][x86] Improved cost model for alternate shuffles. This patch: 1) Improves the cost model for x86 alternate shuffles (originally added at revision 211339); 2) Teaches the Cost Model Analysis pass how to analyze alternate shuffles. Alternate shuffles are a special kind of blend; on x86, we can often easily lowered alternate shuffled into single blend instruction (depending on the subtarget features). The existing cost model didn't take into account subtarget features. Also, it had a couple of "dead" entries for vector types that are never legal (example: on x86 types v2i32 and v2f32 are not legal; those are always either promoted or widened to 128-bit vector types). The new x86 cost model takes into account what target features we have before returning the shuffle cost (i.e. the number of instructions after the blend is lowered/expanded). This patch also teaches the Cost Model Analysis how to identify and analyze alternate shuffles (i.e. 'SK_Alternate' shufflevector instructions): - added function 'isAlternateVectorMask'; - added some logic to check if an instruction is a alternate shuffle and, in case, call the target specific TTI to get the corresponding shuffle cost; - added a test to verify the cost model analysis on alternate shuffles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212296 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp index 780b1aaa820..1b74f8c19c5 100644 --- a/lib/Analysis/CostModel.cpp +++ b/lib/Analysis/CostModel.cpp @@ -95,6 +95,31 @@ static bool isReverseVectorMask(SmallVectorImpl &Mask) { return true; } +static bool isAlternateVectorMask(SmallVectorImpl &Mask) { + bool isAlternate = true; + unsigned MaskSize = Mask.size(); + + // Example: shufflevector A, B, <0,5,2,7> + for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { + if (Mask[i] < 0) + continue; + isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i); + } + + if (isAlternate) + return true; + + isAlternate = true; + // Example: shufflevector A, B, <4,1,6,3> + for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { + if (Mask[i] < 0) + continue; + isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i); + } + + return isAlternate; +} + static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { TargetTransformInfo::OperandValueKind OpInfo = TargetTransformInfo::OK_AnyValue; @@ -466,9 +491,15 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { unsigned NumVecElems = VecTypOp0->getVectorNumElements(); SmallVector Mask = Shuffle->getShuffleMask(); - if (NumVecElems == Mask.size() && isReverseVectorMask(Mask)) - return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0, - nullptr); + if (NumVecElems == Mask.size()) { + if (isReverseVectorMask(Mask)) + return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, + 0, nullptr); + if (isAlternateVectorMask(Mask)) + return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, + VecTypOp0, 0, nullptr); + } + return -1; } case Instruction::Call: diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 9fa911967e7..be9caba506d 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -417,29 +417,99 @@ unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, } if (Kind == SK_Alternate) { - static const CostTblEntry X86AltShuffleTbl[] = { - // Alt shuffle cost table for X86. Cost is the number of instructions - // required to create the shuffled vector. + // 64-bit packed float vectors (v2f32) are widened to type v4f32. + // 64-bit packed integer vectors (v2i32) are promoted to type v2i64. + std::pair LT = TLI->getTypeLegalizationCost(Tp); + + // The backend knows how to generate a single VEX.256 version of + // instruction VPBLENDW if the target supports AVX2. + if (ST->hasAVX2() && LT.second == MVT::v16i16) + return LT.first; - {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, + static const CostTblEntry AVXAltShuffleTbl[] = { + {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vblendpd + {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vblendpd - {ISD::VECTOR_SHUFFLE, MVT::v2i32, 2}, - {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, - {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vblendps + {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vblendps - {ISD::VECTOR_SHUFFLE, MVT::v4i16, 8}, - {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, + // This shuffle is custom lowered into a sequence of: + // 2x vextractf128 , 2x vpblendw , 1x vinsertf128 + {ISD::VECTOR_SHUFFLE, MVT::v16i16, 5}, - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 49}}; + // This shuffle is custom lowered into a long sequence of: + // 2x vextractf128 , 4x vpshufb , 2x vpor , 1x vinsertf128 + {ISD::VECTOR_SHUFFLE, MVT::v32i8, 9} + }; - std::pair LT = TLI->getTypeLegalizationCost(Tp); + if (ST->hasAVX()) { + int Idx = CostTableLookup(AVXAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); + if (Idx != -1) + return LT.first * AVXAltShuffleTbl[Idx].Cost; + } + + static const CostTblEntry SSE41AltShuffleTbl[] = { + // These are lowered into movsd. + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, + + // packed float vectors with four elements are lowered into BLENDI dag + // nodes. A v4i32/v4f32 BLENDI generates a single 'blendps'/'blendpd'. + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, + + // This shuffle generates a single pshufw. + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, + + // There is no instruction that matches a v16i8 alternate shuffle. + // The backend will expand it into the sequence 'pshufb + pshufb + or'. + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} + }; + + if (ST->hasSSE41()) { + int Idx = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); + if (Idx != -1) + return LT.first * SSE41AltShuffleTbl[Idx].Cost; + } + + static const CostTblEntry SSSE3AltShuffleTbl[] = { + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd + + // SSE3 doesn't have 'blendps'. The following shuffles are expanded into + // the sequence 'shufps + pshufd' + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, + + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or + }; + + if (ST->hasSSSE3()) { + int Idx = CostTableLookup(SSSE3AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); + if (Idx != -1) + return LT.first * SSSE3AltShuffleTbl[Idx].Cost; + } - int Idx = CostTableLookup(X86AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); - if (Idx == -1) - return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); - return LT.first * X86AltShuffleTbl[Idx].Cost; + static const CostTblEntry SSEAltShuffleTbl[] = { + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd + + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd + + // This is expanded into a long sequence of four extract + four insert. + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw. + + // 8 x (pinsrw + pextrw + and + movb + movzb + or) + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 48} + }; + + // Fall-back (SSE3 and SSE2). + int Idx = CostTableLookup(SSEAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); + if (Idx != -1) + return LT.first * SSEAltShuffleTbl[Idx].Cost; + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); } return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); diff --git a/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll b/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll new file mode 100644 index 00000000000..2e162f0f000 --- /dev/null +++ b/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll @@ -0,0 +1,347 @@ +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-ssse3 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,+sse3,+ssse3 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSSE3 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 + + +; Verify the cost model for alternate shuffles. + +; shufflevector instructions with illegal 64-bit vector types. +; 64-bit packed integer vectors (v2i32) are promoted to type v2i64. +; 64-bit packed float vectors (v2f32) are widened to type v4f32. + +define <2 x i32> @test_v2i32(<2 x i32> %a, <2 x i32> %b) { + %1 = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2i32': +; SSE2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + +define <2 x float> @test_v2f32(<2 x float> %a, <2 x float> %b) { + %1 = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + ret <2 x float> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2f32': +; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + +define <2 x i32> @test_v2i32_2(<2 x i32> %a, <2 x i32> %b) { + %1 = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2i32_2': +; SSE2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + +define <2 x float> @test_v2f32_2(<2 x float> %a, <2 x float> %b) { + %1 = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + ret <2 x float> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2f32_2': +; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + + +; Test shuffles on packed vectors of two elements. + +define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) { + %1 = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2i64': +; SSE2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + +define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b) { + %1 = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2f64': +; SSE2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + + +define <2 x i64> @test_v2i64_2(<2 x i64> %a, <2 x i64> %b) { + %1 = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2i64_2': +; SSE2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + + +define <2 x double> @test_v2f64_2(<2 x double> %a, <2 x double> %b) { + %1 = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v2f64_2': +; SSE2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + +; Test shuffles on packed vectors of four elements. + +define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b) { + %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4i32': +; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + + +define <4 x i32> @test_v4i32_2(<4 x i32> %a, <4 x i32> %b) { + %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4i32_2': +; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + + +define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b) { + %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4f32': +; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + + +define <4 x float> @test_v4f32_2(<4 x float> %a, <4 x float> %b) { + %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4f32_2': +; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + +define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) { + %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4i64': +; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + + +define <4 x i64> @test_v4i64_2(<4 x i64> %a, <4 x i64> %b) { + %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4i64_2': +; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + + +define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) { + %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4f64': +; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + + +define <4 x double> @test_v4f64_2(<4 x double> %a, <4 x double> %b) { + %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v4f64_2': +; SSE2: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + + +; Test shuffles on packed vectors of eight elements. +define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) { + %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i16': +; SSE2: Cost Model: {{.*}} 8 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + + +define <8 x i16> @test_v8i16_2(<8 x i16> %a, <8 x i16> %b) { + %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i16_2': +; SSE2: Cost Model: {{.*}} 8 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + + +define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) { + %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i32': +; SSE2: Cost Model: {{.*}} 4 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 4 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + + +define <8 x i32> @test_v8i32_2(<8 x i32> %a, <8 x i32> %b) { + %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i32_2': +; SSE2: Cost Model: {{.*}} 4 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 4 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + + +define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) { + %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8f32': +; SSE2: Cost Model: {{.*}} 4 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 4 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + + +define <8 x float> @test_v8f32_2(<8 x float> %a, <8 x float> %b) { + %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8f32_2': +; SSE2: Cost Model: {{.*}} 4 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 4 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + + +; Test shuffles on packed vectors of sixteen elements. +define <16 x i8> @test_v16i8(<16 x i8> %a, <16 x i8> %b) { + %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i8': +; SSE2: Cost Model: {{.*}} 48 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector + + +define <16 x i8> @test_v16i8_2(<16 x i8> %a, <16 x i8> %b) { + %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i8_2': +; SSE2: Cost Model: {{.*}} 48 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector + + +define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { + %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i16': +; SSE2: Cost Model: {{.*}} 16 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 5 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + + +define <16 x i16> @test_v16i16_2(<16 x i16> %a, <16 x i16> %b) { + %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i16_2': +; SSE2: Cost Model: {{.*}} 16 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 5 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector + +define <32 x i8> @test_v32i8(<32 x i8> %a, <32 x i8> %b) { + %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v32i8': +; SSE2: Cost Model: {{.*}} 96 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 9 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 9 for instruction: %1 = shufflevector + + +define <32 x i8> @test_v32i8_2(<32 x i8> %a, <32 x i8> %b) { + %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %1 +} +; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v32i8_2': +; SSE2: Cost Model: {{.*}} 96 for instruction: %1 = shufflevector +; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 9 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 9 for instruction: %1 = shufflevector +