; RUN: opt < %s -instcombine -S | FileCheck %s ; Verify that instcombine is able to fold identity shuffles. define <16 x i8> @identity_test(<16 x i8> %InVec) { ; CHECK-LABEL: @identity_test ; CHECK: ret <16 x i8> %InVec %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <32 x i8> @identity_test_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @identity_test_avx2 ; CHECK: ret <32 x i8> %InVec %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } ; Verify that instcombine is able to fold byte shuffles with zero masks. define <16 x i8> @fold_to_zero_vector(<16 x i8> %InVec) { ; CHECK-LABEL: @fold_to_zero_vector ; CHECK: ret <16 x i8> zeroinitializer %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <32 x i8> @fold_to_zero_vector_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @fold_to_zero_vector_avx2 ; CHECK: ret <32 x i8> zeroinitializer %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } ; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector ; with a shuffle mask of all zeroes. define <16 x i8> @splat_test(<16 x i8> %InVec) { ; CHECK-LABEL: @splat_test ; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> zeroinitializer %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> zeroinitializer) ret <16 x i8> %1 } ; In the test case below, elements in the low 128-bit lane of the result ; vector are equal to the lower byte of %InVec (shuffle index 0). ; Elements in the high 128-bit lane of the result vector are equal to ; the lower byte in the high 128-bit lane of %InVec (shuffle index 16). define <32 x i8> @splat_test_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @splat_test_avx2 ; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> zeroinitializer) ret <32 x i8> %1 } ; Each of the byte shuffles in the following tests is equivalent to a blend between ; vector %InVec and a vector of all zeroes. define <16 x i8> @blend1(<16 x i8> %InVec) { ; CHECK-LABEL: @blend1 ; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <16 x i8> @blend2(<16 x i8> %InVec) { ; CHECK-LABEL: @blend2 ; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <16 x i8> @blend3(<16 x i8> %InVec) { ; CHECK-LABEL: @blend3 ; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <16 x i8> @blend4(<16 x i8> %InVec) { ; CHECK-LABEL: @blend4 ; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <16 x i8> @blend5(<16 x i8> %InVec) { ; CHECK-LABEL: @blend5 ; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <16 x i8> @blend6(<16 x i8> %InVec) { ; CHECK-LABEL: @blend6 ; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <32 x i8> @blend1_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @blend1_avx2 ; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <32 x i8> @blend2_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @blend2_avx2 ; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <32 x i8> @blend3_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @blend3_avx2 ; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <32 x i8> @blend4_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @blend4_avx2 ; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <32 x i8> @blend5_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @blend5_avx2 ; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <32 x i8> @blend6_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @blend6_avx2 ; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } ; movq idiom. define <16 x i8> @movq_idiom(<16 x i8> %InVec) { ; CHECK-LABEL: @movq_idiom ; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> , <16 x i32> %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <32 x i8> @movq_idiom_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @movq_idiom_avx2 ; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> , <32 x i32> %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } ; Vector permutations using byte shuffles. define <16 x i8> @permute1(<16 x i8> %InVec) { ; CHECK-LABEL: @permute1 ; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <16 x i8> @permute2(<16 x i8> %InVec) { ; CHECK-LABEL: @permute2 ; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <32 x i8> @permute1_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @permute1_avx2 ; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <32 x i8> @permute2_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @permute2_avx2 ; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } ; Test that instcombine correctly folds a pshufb with values that ; are not -128 and that are not encoded in four bits. define <16 x i8> @identity_test2_2(<16 x i8> %InVec) { ; CHECK-LABEL: @identity_test2_2 ; CHECK: ret <16 x i8> %InVec %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <32 x i8> @identity_test_avx2_2(<32 x i8> %InVec) { ; CHECK-LABEL: @identity_test_avx2_2 ; CHECK: ret <32 x i8> %InVec %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <16 x i8> @fold_to_zero_vector_2(<16 x i8> %InVec) { ; CHECK-LABEL: @fold_to_zero_vector_2 ; CHECK: ret <16 x i8> zeroinitializer %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <32 x i8> @fold_to_zero_vector_avx2_2(<32 x i8> %InVec) { ; CHECK-LABEL: @fold_to_zero_vector_avx2_2 ; CHECK: ret <32 x i8> zeroinitializer %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } define <16 x i8> @permute3(<16 x i8> %InVec) { ; CHECK-LABEL: @permute3 ; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) ret <16 x i8> %1 } define <32 x i8> @permute3_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @permute3_avx2 ; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) ret <32 x i8> %1 } declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)