From 1127a30ffed10b2434dc44abb51c21847ab6f8fd Mon Sep 17 00:00:00 2001 From: Silviu Baranga Date: Mon, 17 Aug 2015 16:05:09 +0000 Subject: [PATCH] [CostModel][AArch64] Increase cost of vector insert element and add missing cast costs Summary: Increase the estimated costs for insert/extract element operations on AArch64. This is motivated by results from benchmarking interleaved accesses. Add missing costs for zext/sext/trunc instructions and some integer to floating point conversions. These costs were previously calculated by scalarizing these operation and were affected by the cost increase of the insert/extract element operations. Reviewers: rengolin Subscribers: mcrosier, aemerson, rengolin, llvm-commits Differential Revision: http://reviews.llvm.org/D11939 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245226 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AArch64/AArch64TargetTransformInfo.cpp | 34 ++++++++++++++++++- .../SLPVectorizer/AArch64/commute.ll | 2 +- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 3098e8d6d46..79f657fa26b 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -187,6 +187,28 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { return BaseT::getCastInstrCost(Opcode, Dst, Src); static const TypeConversionCostTblEntry ConversionTbl[] = { + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, + { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, + { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, + { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, + + // The number of shll instructions for the extension. + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, + + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 }, + // LowerVectorINT_TO_FP: { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, @@ -209,6 +231,16 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + // Complex: to v8f32 + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, + + // Complex: to v16f32 + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 }, + // Complex: to v2f64 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, @@ -280,7 +312,7 @@ int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, } // All other insert/extracts cost this much. - return 2; + return 3; } int AArch64TTIImpl::getArithmeticInstrCost( diff --git a/test/Transforms/SLPVectorizer/AArch64/commute.ll b/test/Transforms/SLPVectorizer/AArch64/commute.ll index 1cff73d9f69..2bce59c6200 100644 --- a/test/Transforms/SLPVectorizer/AArch64/commute.ll +++ b/test/Transforms/SLPVectorizer/AArch64/commute.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -slp-vectorizer %s | FileCheck %s +; RUN: opt -S -slp-vectorizer %s -slp-threshold=-10 | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnu" -- 2.34.1