From 49cf7619254ff6529c14ece0a6fa3a02cb3fbfcf Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 25 Oct 2015 17:40:54 +0000 Subject: [PATCH] [X86][SSE4A] Fix for EXTRQI shuffle lowering. Incorrect range test - found during fuzz testing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251245 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ++-- test/CodeGen/X86/vector-shuffle-sse4a.ll | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d1879570a03..c978fe08476 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7334,7 +7334,7 @@ static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1, // Determine the extraction length from the part of the // lower half that isn't zeroable. int Len = HalfSize; - for (; Len >= 0; --Len) + for (; Len > 0; --Len) if (!Zeroable[Len - 1]) break; assert(Len > 0 && "Zeroable shuffle mask"); @@ -7350,7 +7350,7 @@ static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1, M = M % Size; // All mask elements must be in the lower half. - if (M > HalfSize) + if (M >= HalfSize) return SDValue(); if (Idx < 0 || (Src == V && Idx == (M - i))) { diff --git a/test/CodeGen/X86/vector-shuffle-sse4a.ll b/test/CodeGen/X86/vector-shuffle-sse4a.ll index 58807b9c0fd..2dd43e2852a 100644 --- a/test/CodeGen/X86/vector-shuffle-sse4a.ll +++ b/test/CodeGen/X86/vector-shuffle-sse4a.ll @@ -296,5 +296,28 @@ define <8 x i16> @shuf_089uuuuu(<8 x i16> %a0, <8 x i16> %a1) { ret <8 x i16> %s } +; +; Special Cases +; + +; Out of range. +define <16 x i8> @shuffle_8_18_uuuuuuuuuuuuuu(<16 x i8> %a, <16 x i8> %b) { +; BTVER1-LABEL: shuffle_8_18_uuuuuuuuuuuuuu: +; BTVER1: # BB#0: +; BTVER1-NEXT: psrld $16, %xmm1 +; BTVER1-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; BTVER1-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; BTVER1-NEXT: retq +; +; BTVER2-LABEL: shuffle_8_18_uuuuuuuuuuuuuu: +; BTVER2: # BB#0: +; BTVER2-NEXT: vpsrld $16, %xmm1, %xmm1 +; BTVER2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; BTVER2-NEXT: retq + %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %1 +} + declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind -- 2.34.1