From cc727ec92a9869ebd04b88a3b71d986fef1c7b0a Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sat, 20 Sep 2014 18:36:39 +0000 Subject: [PATCH] [x86] Start moving to a fancier check syntax to reduce the need for duplication of check lines. The idea is to have broad sets of compilation modes that will frequently diverge without having to always and immediately explode to the precise ISA feature set. While this already helps due to VEX encoded differences, it will help much more as I teach the new shuffle lowering about more of the new VEX encoded instructions which can still be used to implement 128-bit shuffles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218188 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/vector-shuffle-128-v4.ll | 276 ++++++++-------------- 1 file changed, 102 insertions(+), 174 deletions(-) diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll index 3645c9475a2..9a0f2b349ca 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE3 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSSE3 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE41 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" @@ -152,9 +152,9 @@ define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) { ; SSE41: movsldup {{.*}} # xmm0 = xmm0[0,0,2,2] ; SSE41-NEXT: retq ; -; AVX1-LABEL: @shuffle_v4f32_0022 -; AVX1: vmovsldup {{.*}} # xmm0 = xmm0[0,0,2,2] -; AVX1-NEXT: retq +; AVX-LABEL: @shuffle_v4f32_0022 +; AVX: vmovsldup {{.*}} # xmm0 = xmm0[0,0,2,2] +; AVX-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle } @@ -175,9 +175,9 @@ define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) { ; SSE41: movshdup {{.*}} # xmm0 = xmm0[1,1,3,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: @shuffle_v4f32_1133 -; AVX1: vmovshdup {{.*}} # xmm0 = xmm0[1,1,3,3] -; AVX1-NEXT: retq +; AVX-LABEL: @shuffle_v4f32_1133 +; AVX: vmovshdup {{.*}} # xmm0 = xmm0[1,1,3,3] +; AVX-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle } @@ -202,9 +202,9 @@ define <4 x i32> @shuffle_v4i32_0124(<4 x i32> %a, <4 x i32> %b) { ; SSE41: insertps {{.*}} # xmm0 = xmm0[0,1,2],xmm1[0] ; SSE41-NEXT: retq ; -; AVX1-LABEL: @shuffle_v4i32_0124 -; AVX1: vinsertps {{.*}} # xmm0 = xmm0[0,1,2],xmm1[0] -; AVX1-NEXT: retq +; AVX-LABEL: @shuffle_v4i32_0124 +; AVX: vinsertps {{.*}} # xmm0 = xmm0[0,1,2],xmm1[0] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } @@ -217,66 +217,30 @@ define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) { ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_0412(<4 x i32> %a, <4 x i32> %b) { -; SSE2-LABEL: @shuffle_v4i32_0412 -; SSE2: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0] -; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[2,0],xmm0[1,2] -; SSE2-NEXT: movaps %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: @shuffle_v4i32_0412 -; SSE3: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0] -; SSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[2,0],xmm0[1,2] -; SSE3-NEXT: movaps %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: @shuffle_v4i32_0412 -; SSSE3: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0] -; SSSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[2,0],xmm0[1,2] -; SSSE3-NEXT: movaps %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: @shuffle_v4i32_0412 -; SSE41: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0] -; SSE41-NEXT: shufps {{.*}} # xmm1 = xmm1[2,0],xmm0[1,2] -; SSE41-NEXT: movaps %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: @shuffle_v4i32_0412 -; AVX1: vshufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0] -; AVX1-NEXT: vshufps {{.*}} # xmm0 = xmm1[2,0],xmm0[1,2] -; AVX1-NEXT: retq +; SSE-LABEL: @shuffle_v4i32_0412 +; SSE: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0] +; SSE-NEXT: shufps {{.*}} # xmm1 = xmm1[2,0],xmm0[1,2] +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: @shuffle_v4i32_0412 +; AVX: vshufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0] +; AVX-NEXT: vshufps {{.*}} # xmm0 = xmm1[2,0],xmm0[1,2] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_4012(<4 x i32> %a, <4 x i32> %b) { -; SSE2-LABEL: @shuffle_v4i32_4012 -; SSE2: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0] -; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[0,2],xmm0[1,2] -; SSE2-NEXT: movaps %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: @shuffle_v4i32_4012 -; SSE3: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0] -; SSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[0,2],xmm0[1,2] -; SSE3-NEXT: movaps %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: @shuffle_v4i32_4012 -; SSSE3: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0] -; SSSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[0,2],xmm0[1,2] -; SSSE3-NEXT: movaps %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: @shuffle_v4i32_4012 -; SSE41: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0] -; SSE41-NEXT: shufps {{.*}} # xmm1 = xmm1[0,2],xmm0[1,2] -; SSE41-NEXT: movaps %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: @shuffle_v4i32_4012 -; AVX1: vshufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0] -; AVX1-NEXT: vshufps {{.*}} # xmm0 = xmm1[0,2],xmm0[1,2] -; AVX1-NEXT: retq +; SSE-LABEL: @shuffle_v4i32_4012 +; SSE: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0] +; SSE-NEXT: shufps {{.*}} # xmm1 = xmm1[0,2],xmm0[1,2] +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: @shuffle_v4i32_4012 +; AVX: vshufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0] +; AVX-NEXT: vshufps {{.*}} # xmm0 = xmm1[0,2],xmm0[1,2] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } @@ -296,29 +260,14 @@ define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) { ret <4 x i32> %shuffle } define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) { -; SSE2-LABEL: @shuffle_v4i32_4501 -; SSE2: punpcklqdq {{.*}} # xmm1 = xmm1[0],xmm0[0] -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: @shuffle_v4i32_4501 -; SSE3: punpcklqdq {{.*}} # xmm1 = xmm1[0],xmm0[0] -; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: @shuffle_v4i32_4501 -; SSSE3: punpcklqdq {{.*}} # xmm1 = xmm1[0],xmm0[0] -; SSSE3-NEXT: movdqa %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: @shuffle_v4i32_4501 -; SSE41: punpcklqdq {{.*}} # xmm1 = xmm1[0],xmm0[0] -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: @shuffle_v4i32_4501 -; AVX1: punpcklqdq {{.*}} # xmm0 = xmm1[0],xmm0[0] -; AVX1-NEXT: retq +; SSE-LABEL: @shuffle_v4i32_4501 +; SSE: punpcklqdq {{.*}} # xmm1 = xmm1[0],xmm0[0] +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: @shuffle_v4i32_4501 +; AVX: punpcklqdq {{.*}} # xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } @@ -356,10 +305,10 @@ define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) { ; SSE41-NEXT: movaps %[[X]], %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: @shuffle_v4f32_4zzz -; AVX1: vxorps %[[X:xmm[0-9]+]], %[[X]] -; AVX1-NEXT: vblendps {{.*}} # xmm0 = xmm0[0],[[X]][1,2,3] -; AVX1-NEXT: retq +; AVX-LABEL: @shuffle_v4f32_4zzz +; AVX: vxorps %[[X:xmm[0-9]+]], %[[X]] +; AVX-NEXT: vblendps {{.*}} # xmm0 = xmm0[0],[[X]][1,2,3] +; AVX-NEXT: retq %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> ret <4 x float> %shuffle } @@ -387,9 +336,9 @@ define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) { ; SSE41: insertps {{.*}} # xmm0 = zero,xmm0[0],zero,zero ; SSE41-NEXT: retq ; -; AVX1-LABEL: @shuffle_v4f32_z4zz -; AVX1: vinsertps {{.*}} # xmm0 = zero,xmm0[0],zero,zero -; AVX1-NEXT: retq +; AVX-LABEL: @shuffle_v4f32_z4zz +; AVX: vinsertps {{.*}} # xmm0 = zero,xmm0[0],zero,zero +; AVX-NEXT: retq %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> ret <4 x float> %shuffle } @@ -420,9 +369,9 @@ define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) { ; SSE41: insertps {{.*}} # xmm0 = zero,zero,xmm0[0],zero ; SSE41-NEXT: retq ; -; AVX1-LABEL: @shuffle_v4f32_zz4z -; AVX1: vinsertps {{.*}} # xmm0 = zero,zero,xmm0[0],zero -; AVX1-NEXT: retq +; AVX-LABEL: @shuffle_v4f32_zz4z +; AVX: vinsertps {{.*}} # xmm0 = zero,zero,xmm0[0],zero +; AVX-NEXT: retq %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> ret <4 x float> %shuffle } @@ -450,9 +399,9 @@ define <4 x float> @shuffle_v4f32_zuu4(<4 x float> %a) { ; SSE41: insertps {{.*}} # xmm0 = zero,zero,zero,xmm0[0] ; SSE41-NEXT: retq ; -; AVX1-LABEL: @shuffle_v4f32_zuu4 -; AVX1: vinsertps {{.*}} # xmm0 = zero,zero,zero,xmm0[0] -; AVX1-NEXT: retq +; AVX-LABEL: @shuffle_v4f32_zuu4 +; AVX: vinsertps {{.*}} # xmm0 = zero,zero,zero,xmm0[0] +; AVX-NEXT: retq %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> ret <4 x float> %shuffle } @@ -485,10 +434,10 @@ define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) { ; SSE41-NEXT: movaps %[[X]], %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: @shuffle_v4f32_zzz7 -; AVX1: vxorps %[[X:xmm[0-9]+]], %[[X]] -; AVX1-NEXT: vblendps {{.*}} # xmm0 = [[X]][0,1,2],xmm0[3] -; AVX1-NEXT: retq +; AVX-LABEL: @shuffle_v4f32_zzz7 +; AVX: vxorps %[[X:xmm[0-9]+]], %[[X]] +; AVX-NEXT: vblendps {{.*}} # xmm0 = [[X]][0,1,2],xmm0[3] +; AVX-NEXT: retq %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> ret <4 x float> %shuffle } @@ -516,9 +465,9 @@ define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) { ; SSE41: insertps {{.*}} # xmm0 = zero,xmm0[2],zero,zero ; SSE41-NEXT: retq ; -; AVX1-LABEL: @shuffle_v4f32_z6zz -; AVX1: vinsertps {{.*}} # xmm0 = zero,xmm0[2],zero,zero -; AVX1-NEXT: retq +; AVX-LABEL: @shuffle_v4f32_z6zz +; AVX: vinsertps {{.*}} # xmm0 = zero,xmm0[2],zero,zero +; AVX-NEXT: retq %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> ret <4 x float> %shuffle } @@ -597,10 +546,10 @@ define <4 x i32> @shuffle_v4i32_7012(<4 x i32> %a, <4 x i32> %b) { ; SSE41-NEXT: palignr $12, {{.*}} # xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] ; SSE41-NEXT: retq ; -; AVX1-LABEL: @shuffle_v4i32_7012 -; AVX1: # BB#0: -; AVX1-NEXT: vpalignr $12, {{.*}} # xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] -; AVX1-NEXT: retq +; AVX-LABEL: @shuffle_v4i32_7012 +; AVX: # BB#0: +; AVX-NEXT: vpalignr $12, {{.*}} # xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } @@ -628,10 +577,10 @@ define <4 x i32> @shuffle_v4i32_6701(<4 x i32> %a, <4 x i32> %b) { ; SSE41-NEXT: palignr $8, {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] ; SSE41-NEXT: retq ; -; AVX1-LABEL: @shuffle_v4i32_6701 -; AVX1: # BB#0: -; AVX1-NEXT: vpalignr $8, {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] -; AVX1-NEXT: retq +; AVX-LABEL: @shuffle_v4i32_6701 +; AVX: # BB#0: +; AVX-NEXT: vpalignr $8, {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } @@ -661,10 +610,10 @@ define <4 x i32> @shuffle_v4i32_5670(<4 x i32> %a, <4 x i32> %b) { ; SSE41-NEXT: palignr $4, {{.*}} # xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3] ; SSE41-NEXT: retq ; -; AVX1-LABEL: @shuffle_v4i32_5670 -; AVX1: # BB#0: -; AVX1-NEXT: vpalignr $4, {{.*}} # xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3] -; AVX1-NEXT: retq +; AVX-LABEL: @shuffle_v4i32_5670 +; AVX: # BB#0: +; AVX-NEXT: vpalignr $4, {{.*}} # xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } @@ -694,10 +643,10 @@ define <4 x i32> @shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b) { ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: @shuffle_v4i32_1234 -; AVX1: # BB#0: -; AVX1-NEXT: vpalignr $4, {{.*}} # xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] -; AVX1-NEXT: retq +; AVX-LABEL: @shuffle_v4i32_1234 +; AVX: # BB#0: +; AVX-NEXT: vpalignr $4, {{.*}} # xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } @@ -725,10 +674,10 @@ define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) { ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: @shuffle_v4i32_2345 -; AVX1: # BB#0: -; AVX1-NEXT: vpalignr $8, {{.*}} # xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] -; AVX1-NEXT: retq +; AVX-LABEL: @shuffle_v4i32_2345 +; AVX: # BB#0: +; AVX-NEXT: vpalignr $8, {{.*}} # xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } @@ -758,10 +707,10 @@ define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) { ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: @shuffle_v4i32_3456 -; AVX1: # BB#0: -; AVX1-NEXT: vpalignr $12, {{.*}} # xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] -; AVX1-NEXT: retq +; AVX-LABEL: @shuffle_v4i32_3456 +; AVX: # BB#0: +; AVX-NEXT: vpalignr $12, {{.*}} # xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle } @@ -799,10 +748,10 @@ define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) { ; SSE41-NEXT: pmovzxdq %xmm0, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: @shuffle_v4i32_0z1z -; AVX1: # BB#0: -; AVX1-NEXT: vpmovzxdq %xmm0, %xmm0 -; AVX1-NEXT: retq +; AVX-LABEL: @shuffle_v4i32_0z1z +; AVX: # BB#0: +; AVX-NEXT: vpmovzxdq %xmm0, %xmm0 +; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> ret <4 x i32> %shuffle } @@ -829,39 +778,18 @@ define <4 x i32> @insert_mem_and_zero_v4i32(i32* %ptr) { } define <4 x float> @insert_reg_and_zero_v4f32(float %a) { -; SSE2-LABEL: @insert_reg_and_zero_v4f32 -; SSE2: # BB#0: -; SSE2-NEXT: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSE2-NEXT: movss %xmm0, %[[X]] -; SSE2-NEXT: movaps %[[X]], %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: @insert_reg_and_zero_v4f32 -; SSE3: # BB#0: -; SSE3-NEXT: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSE3-NEXT: movss %xmm0, %[[X]] -; SSE3-NEXT: movaps %[[X]], %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: @insert_reg_and_zero_v4f32 -; SSSE3: # BB#0: -; SSSE3-NEXT: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSSE3-NEXT: movss %xmm0, %[[X]] -; SSSE3-NEXT: movaps %[[X]], %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: @insert_reg_and_zero_v4f32 -; SSE41: # BB#0: -; SSE41-NEXT: xorps %[[X:xmm[0-9]+]], %[[X]] -; SSE41-NEXT: movss %xmm0, %[[X]] -; SSE41-NEXT: movaps %[[X]], %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: @insert_reg_and_zero_v4f32 -; AVX1: # BB#0: -; AVX1-NEXT: vxorps %[[X:xmm[0-9]+]], %[[X]], %[[X]] -; AVX1-NEXT: vmovss %xmm0, %[[X]], %xmm0 -; AVX1-NEXT: retq +; SSE-LABEL: @insert_reg_and_zero_v4f32 +; SSE: # BB#0: +; SSE-NEXT: xorps %[[X:xmm[0-9]+]], %[[X]] +; SSE-NEXT: movss %xmm0, %[[X]] +; SSE-NEXT: movaps %[[X]], %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: @insert_reg_and_zero_v4f32 +; AVX: # BB#0: +; AVX-NEXT: vxorps %[[X:xmm[0-9]+]], %[[X]], %[[X]] +; AVX-NEXT: vmovss %xmm0, %[[X]], %xmm0 +; AVX-NEXT: retq %v = insertelement <4 x float> undef, float %a, i32 0 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> ret <4 x float> %shuffle -- 2.34.1