From 0e8c1f5ca33f413cdd320fd1dcfebdba34b14f06 Mon Sep 17 00:00:00 2001 From: Kevin Qin Date: Tue, 24 Dec 2013 08:11:47 +0000 Subject: [PATCH] [AArch64 NEON] Fix a pattern match failure with NEON_VDUP. This failure caused by improper condition when lowering shuffle_vector to scalar_to_vector. After this patch NEON_VDUP with v1i64 will not be generated. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197966 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 4 +--- lib/Target/AArch64/AArch64InstrNEON.td | 10 ++++++--- test/CodeGen/AArch64/neon-simd-ldst-one.ll | 25 ++++++++++++++++++++++ 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 1b75d0571ae..f72dfe46de7 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4070,9 +4070,7 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (ValueCounts.size() == 0) return DAG.getUNDEF(VT); - // Loads are better lowered with insert_vector_elt. - // Keep going if we are hitting this case. - if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode())) + if (isOnlyLowElement) return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); unsigned EltSize = VT.getVectorElementType().getSizeInBits(); diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 43c2bf48c7d..cd063d3d2fe 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -3690,12 +3690,16 @@ def : LD1R_pattern; def : LD1R_pattern; def : LD1R_pattern; -def : LD1R_pattern; -def : LD1R_pattern; - def : LD1R_pattern; def : LD1R_pattern; +class LD1R_pattern_v1 + : Pat<(VTy (scalar_to_vector (DTy (LoadOp GPR64xsp:$Rn)))), + (VTy (INST GPR64xsp:$Rn))>; + +def : LD1R_pattern_v1; +def : LD1R_pattern_v1; multiclass VectorList_Bare_BHSD { diff --git a/test/CodeGen/AArch64/neon-simd-ldst-one.ll b/test/CodeGen/AArch64/neon-simd-ldst-one.ll index 9d49d2358fa..58a42c76a65 100644 --- a/test/CodeGen/AArch64/neon-simd-ldst-one.ll +++ b/test/CodeGen/AArch64/neon-simd-ldst-one.ll @@ -236,6 +236,31 @@ entry: ret <1 x double> %1 } +define <1 x i64> @testDUP.v1i64(i64* %a, i64* %b) #0 { +; As there is a store operation depending on %1, LD1R pattern can't be selected. +; So LDR and FMOV should be emitted. +; CHECK-LABEL: testDUP.v1i64 +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}] +; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}] + %1 = load i64* %a, align 8 + store i64 %1, i64* %b, align 8 + %vecinit.i = insertelement <1 x i64> undef, i64 %1, i32 0 + ret <1 x i64> %vecinit.i +} + +define <1 x double> @testDUP.v1f64(double* %a, double* %b) #0 { +; As there is a store operation depending on %1, LD1R pattern can't be selected. +; So LDR and FMOV should be emitted. +; CHECK-LABEL: testDUP.v1f64 +; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}] +; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}] + %1 = load double* %a, align 8 + store double %1, double* %b, align 8 + %vecinit.i = insertelement <1 x double> undef, double %1, i32 0 + ret <1 x double> %vecinit.i +} + define %struct.int8x16x2_t @test_vld2q_dup_s8(i8* %a) { ; CHECK-LABEL: test_vld2q_dup_s8 ; CHECK: ld2r {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, [x0] -- 2.34.1