From: Nadav Rotem Date: Thu, 27 Jun 2013 17:52:04 +0000 (+0000) Subject: CostModel: improve the cost model for load/store of non power-of-two types such as... X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=16d36a5cd1a581dfac79a4616b6b9602a43b6cd1;p=oota-llvm.git CostModel: improve the cost model for load/store of non power-of-two types such as <3 x float>, which are popular in graphics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185085 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 3bcdfc1be50..ac63db50dc1 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -539,8 +539,51 @@ unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val, return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); } +unsigned X86TTI::getScalarizationOverhead(Type *Ty, bool Insert, + bool Extract) const { + assert (Ty->isVectorTy() && "Can only scalarize vectors"); + unsigned Cost = 0; + + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + if (Insert) + Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); + if (Extract) + Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + + return Cost; +} + unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { + // Handle non power of two vectors such as <3 x float> + if (VectorType *VTy = dyn_cast(Src)) { + unsigned NumElem = VTy->getVectorNumElements(); + + // Handle a few common cases: + // <3 x float> + if (NumElem == 3 && VTy->getScalarSizeInBits() == 32) + // Cost = 64 bit store + extract + 32 bit store. + return 3; + + // <3 x double> + if (NumElem == 3 && VTy->getScalarSizeInBits() == 64) + // Cost = 128 bit store + unpack + 64 bit store. + return 3; + + // Assume that all other non power-of-two numbers are scalarized. + if (!isPowerOf2_32(NumElem)) { + unsigned Cost = TargetTransformInfo::getMemoryOpCost(Opcode, + VTy->getScalarType(), + Alignment, + AddressSpace); + unsigned SplitCost = getScalarizationOverhead(Src, + Opcode == Instruction::Load, + Opcode==Instruction::Store); + return NumElem * Cost + SplitCost; + } + } + // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(Src); assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && diff --git a/test/Analysis/CostModel/X86/load_store.ll b/test/Analysis/CostModel/X86/load_store.ll index 4195b1d879a..a53d0bd4e99 100644 --- a/test/Analysis/CostModel/X86/load_store.ll +++ b/test/Analysis/CostModel/X86/load_store.ll @@ -59,6 +59,25 @@ define i32 @loads(i32 %arg) { ;CHECK: cost of 4 {{.*}} load load <8 x i64>* undef, align 4 + + ;CHECK: cost of 3 {{.*}} load + load <3 x float>* undef, align 4 + + ;CHECK: cost of 3 {{.*}} load + load <3 x double>* undef, align 4 + + ;CHECK: cost of 3 {{.*}} load + load <3 x i32>* undef, align 4 + + ;CHECK: cost of 3 {{.*}} load + load <3 x i64>* undef, align 4 + + ;CHECK: cost of 10 {{.*}} load + load <5 x i32>* undef, align 4 + + ;CHECK: cost of 10 {{.*}} load + load <5 x i64>* undef, align 4 + ret i32 undef }