From 0810814bcc9130967db7d7dfe38a1179bf56b5e5 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Wed, 24 Jun 2015 00:07:16 +0000 Subject: [PATCH] [X86] Don't generate vbroadcasti128 for v4i64 splats from memory. We used to erroneously match: (v4i64 shuffle (v2i64 load), <0,0,0,0>) Whereas vbroadcasti128 is more like: (v4i64 shuffle (v2i64 load), <0,1,0,1>) This problem doesn't exist for vbroadcastf128, which kept matching the intrinsic after r231182. We should perhaps re-introduce the intrinsic here as well, but that's a separate issue still being discussed. While there, add some proper vbroadcastf128 tests. We don't currently match those, like for loading vbroadcastsd/ss on AVX (the reg-reg broadcasts where added in AVX2). Fixes PR23886. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240488 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 9 ++-- test/CodeGen/X86/vector-shuffle-256-v4.ll | 54 +++++++++++++++++++++++ 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 95629184f2c..2a896dfe8aa 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7860,10 +7860,11 @@ def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, int_x86_avx2_vbroadcast_sd_pd_256, WriteFShuffle256>, VEX_L; -let Predicates = [HasAVX2] in -def VBROADCASTI128 : avx_broadcast_no_int<0x5A, "vbroadcasti128", VR256, - i128mem, v4i64, loadv2i64, - WriteLoad>, VEX_L; +let mayLoad = 1, Predicates = [HasAVX2] in +def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst), + (ins i128mem:$src), + "vbroadcasti128\t{$src, $dst|$dst, $src}", []>, + Sched<[WriteLoad]>, VEX, VEX_L; let Predicates = [HasAVX] in def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll index f5cc989de34..62bf288a870 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -900,6 +900,60 @@ define <4 x double> @splat_v4f64(<2 x double> %r) { ret <4 x double> %1 } +define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) { +; AVX1-LABEL: splat_mem_v4i64_from_v2i64: +; AVX1: # BB#0: +; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splat_mem_v4i64_from_v2i64: +; AVX2: # BB#0: +; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX2-NEXT: retq + %v = load <2 x i64>, <2 x i64>* %ptr + %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> + ret <4 x i64> %shuffle +} + +define <4 x double> @splat_mem_v4f64_from_v2f64(<2 x double>* %ptr) { +; AVX1-LABEL: splat_mem_v4f64_from_v2f64: +; AVX1: # BB#0: +; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splat_mem_v4f64_from_v2f64: +; AVX2: # BB#0: +; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX2-NEXT: retq + %v = load <2 x double>, <2 x double>* %ptr + %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> + ret <4 x double> %shuffle +} + +define <4 x i64> @splat128_mem_v4i64_from_v2i64(<2 x i64>* %ptr) { +; ALL-LABEL: splat128_mem_v4i64_from_v2i64: +; ALL: # BB#0: +; ALL-NEXT: vmovaps (%rdi), %xmm0 +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; ALL-NEXT: retq + %v = load <2 x i64>, <2 x i64>* %ptr + %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> + ret <4 x i64> %shuffle +} + +define <4 x double> @splat128_mem_v4f64_from_v2f64(<2 x double>* %ptr) { +; ALL-LABEL: splat128_mem_v4f64_from_v2f64: +; ALL: # BB#0: +; ALL-NEXT: vmovaps (%rdi), %xmm0 +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; ALL-NEXT: retq + %v = load <2 x double>, <2 x double>* %ptr + %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> + ret <4 x double> %shuffle +} + define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) { ; AVX1-LABEL: bitcast_v4f64_0426: ; AVX1: # BB#0: -- 2.34.1