From 70b63374f24a8e05389bb410372d6b83230df227 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 18 Apr 2014 09:31:20 +0000 Subject: [PATCH] ARM64: implement cunning optimisation from AArch64 A vector extract followed by a dup can become a single instruction even if the types don't match. AArch64 handled this in ISelLowering, but a few reasonably simple patterns can take care of it in TableGen, so that's where I've put it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206573 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/ARM64InstrInfo.td | 53 ++++++++++++++++++++++++++++++ test/CodeGen/ARM64/dup.ll | 5 +-- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td index 509e215f823..53d1dbe2dfd 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.td +++ b/lib/Target/ARM64/ARM64InstrInfo.td @@ -3026,6 +3026,59 @@ def : Pat<(v4f32 (ARM64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), def : Pat<(v2f64 (ARM64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; +// If there's an (ARM64dup (vector_extract ...) ...), we can use a duplane +// instruction even if the types don't match: we just have to remap the lane +// carefully. N.b. this trick only applies to truncations. +def VecIndex_x2 : SDNodeXFormgetTargetConstant(2 * N->getZExtValue(), MVT::i64); +}]>; +def VecIndex_x4 : SDNodeXFormgetTargetConstant(4 * N->getZExtValue(), MVT::i64); +}]>; +def VecIndex_x8 : SDNodeXFormgetTargetConstant(8 * N->getZExtValue(), MVT::i64); +}]>; + +multiclass DUPWithTruncPats { + def : Pat<(ResVT (ARM64dup (ScalVT (vector_extract (Src128VT V128:$Rn), + imm:$idx)))), + (DUP V128:$Rn, (IdxXFORM imm:$idx))>; + + def : Pat<(ResVT (ARM64dup (ScalVT (vector_extract (Src64VT V64:$Rn), + imm:$idx)))), + (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; +} + +defm : DUPWithTruncPats; +defm : DUPWithTruncPats; +defm : DUPWithTruncPats; + +defm : DUPWithTruncPats; +defm : DUPWithTruncPats; +defm : DUPWithTruncPats; + +multiclass DUPWithTrunci64Pats { + def : Pat<(ResVT (ARM64dup (i32 (trunc (vector_extract (v2i64 V128:$Rn), + imm:$idx))))), + (DUP V128:$Rn, (IdxXFORM imm:$idx))>; + + def : Pat<(ResVT (ARM64dup (i32 (trunc (vector_extract (v1i64 V64:$Rn), + imm:$idx))))), + (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; +} + +defm : DUPWithTrunci64Pats; +defm : DUPWithTrunci64Pats; +defm : DUPWithTrunci64Pats; + +defm : DUPWithTrunci64Pats; +defm : DUPWithTrunci64Pats; +defm : DUPWithTrunci64Pats; + +// SMOV and UMOV definitions, with some extra patterns for convenience defm SMOV : SMov; defm UMOV : UMov; diff --git a/test/CodeGen/ARM64/dup.ll b/test/CodeGen/ARM64/dup.ll index e65957522b7..97774a7d18f 100644 --- a/test/CodeGen/ARM64/dup.ll +++ b/test/CodeGen/ARM64/dup.ll @@ -297,10 +297,11 @@ define <2 x i64> @h(i64 %a, i64 %b) nounwind readnone { ; the scalar corresponding to the vector type is illegal (e.g. a <4 x i16> ; BUILD_VECTOR will have an i32 as its source). In that case, the operation is ; not a simple "dup vD.4h, vN.h[idx]" after all, and we crashed. +; +; *However*, it is a dup vD.4h, vN.h[2*idx]. define <4 x i16> @test_build_illegal(<4 x i32> %in) { ; CHECK-LABEL: test_build_illegal: -; CHECK: umov.s [[WTMP:w[0-9]+]], v0[3] -; CHECK: dup.4h v0, [[WTMP]] +; CHECK: dup.4h v0, v0[6] %val = extractelement <4 x i32> %in, i32 3 %smallval = trunc i32 %val to i16 %vec = insertelement <4x i16> undef, i16 %smallval, i32 3 -- 2.34.1