From 4acd20a20be9f7d91ed35c1c6a501cec1605e854 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 18 Sep 2013 03:55:53 +0000 Subject: [PATCH] Lift alignment restrictions for load/store folding on VINSERTF128/VEXTRACTF128. Fixes PR17268. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190916 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 20 ++++++++++---------- lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +- test/CodeGen/X86/avx-shuffle.ll | 2 -- tools/opt/opt.cpp | 1 + 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 2b271e92c59..81ea2752e69 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7716,11 +7716,11 @@ def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; -def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (memopv4f32 addr:$src2), +def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2), (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; -def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (memopv2f64 addr:$src2), +def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2), (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; @@ -7744,22 +7744,22 @@ def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; -def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2), +def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), - (bc_v4i32 (memopv2i64 addr:$src2)), + (bc_v4i32 (loadv2i64 addr:$src2)), (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), - (bc_v16i8 (memopv2i64 addr:$src2)), + (bc_v16i8 (loadv2i64 addr:$src2)), (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), - (bc_v8i16 (memopv2i64 addr:$src2)), + (bc_v8i16 (loadv2i64 addr:$src2)), (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; @@ -7791,12 +7791,12 @@ def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), (v4f64 VR256:$src1), (EXTRACT_get_vextract128_imm VR128:$ext)))>; -def : Pat<(alignedstore (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1), - (iPTR imm))), addr:$dst), +def : Pat<(store (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1), + (iPTR imm))), addr:$dst), (VEXTRACTF128mr addr:$dst, VR256:$src1, (EXTRACT_get_vextract128_imm VR128:$ext))>; -def : Pat<(alignedstore (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1), - (iPTR imm))), addr:$dst), +def : Pat<(store (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1), + (iPTR imm))), addr:$dst), (VEXTRACTF128mr addr:$dst, VR256:$src1, (EXTRACT_get_vextract128_imm VR128:$ext))>; } diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index b287ca7c8d5..c2427e72d84 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -51,7 +51,7 @@ static cl::opt "number ")); namespace { -static const unsigned MinVecRegSize = 128; +static const unsigned MinVecRegSize = 256; static const unsigned RecursionMaxDepth = 12; diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll index a625601961e..b0e64d10275 100644 --- a/test/CodeGen/X86/avx-shuffle.ll +++ b/test/CodeGen/X86/avx-shuffle.ll @@ -251,8 +251,6 @@ define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind { ; CHECK: swap8doubles ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} -; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} -; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}} ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}} ; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi) diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index 94f4cca9681..71a9c02b511 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -462,6 +462,7 @@ static void AddOptimizationPasses(PassManagerBase &MPM,FunctionPassManager &FPM, DisableLoopUnrolling : OptLevel == 0; Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2; + Builder.SLPVectorize = true; Builder.populateFunctionPassManager(FPM); Builder.populateModulePassManager(MPM); -- 2.34.1