From 83c8e732dd5288ce03391b49095f9b5a4de1f8b6 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Wed, 12 Nov 2014 18:09:15 +0000 Subject: [PATCH] Disable indvar widening if arithmetics on the wider type are more expensive Summary: Reapply r221772. The old patch breaks the bot because the @indvar_32_bit test was run whether NVPTX was enabled or not. IndVarSimplify should not widen an indvar if arithmetics on the wider indvar are more expensive than those on the narrower indvar. For instance, although NVPTX64 treats i64 as a legal type, an ADD on i64 is twice as expensive as that on i32, because the hardware needs to simulate a 64-bit integer using two 32-bit integers. Split from D6188, and based on D6195 which adds NVPTXTargetTransformInfo. Fixes PR21148. Test Plan: Added @indvar_32_bit that verifies we do not widen an indvar if the arithmetics on the wider type are more expensive. This test is run only when NVPTX is enabled. Reviewers: jholewinski, eliben, meheff, atrick Reviewed By: atrick Subscribers: jholewinski, llvm-commits Differential Revision: http://reviews.llvm.org/D6196 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221799 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 41 ++++++++++++++++++- lib/Transforms/Scalar/IndVarSimplify.cpp | 40 +++++++++++++----- .../IndVarSimplify/NVPTX/lit.local.cfg | 2 + .../NVPTX/no-widen-expensive.ll | 37 +++++++++++++++++ 4 files changed, 108 insertions(+), 12 deletions(-) create mode 100644 test/Transforms/IndVarSimplify/NVPTX/lit.local.cfg create mode 100644 test/Transforms/IndVarSimplify/NVPTX/no-widen-expensive.ll diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index bcac6392a67..5aea7021e42 100644 --- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -36,12 +36,14 @@ void initializeNVPTXTTIPass(PassRegistry &); namespace { class NVPTXTTI final : public ImmutablePass, public TargetTransformInfo { + const NVPTXTargetLowering *TLI; public: - NVPTXTTI() : ImmutablePass(ID) { + NVPTXTTI() : ImmutablePass(ID), TLI(nullptr) { llvm_unreachable("This pass cannot be directly constructed"); } - NVPTXTTI(const NVPTXTargetMachine *TM) : ImmutablePass(ID) { + NVPTXTTI(const NVPTXTargetMachine *TM) + : ImmutablePass(ID), TLI(TM->getSubtargetImpl()->getTargetLowering()) { initializeNVPTXTTIPass(*PassRegistry::getPassRegistry()); } @@ -63,6 +65,12 @@ public: bool hasBranchDivergence() const override; + unsigned getArithmeticInstrCost( + unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue, + OperandValueKind Opd2Info = OK_AnyValue, + OperandValueProperties Opd1PropInfo = OP_None, + OperandValueProperties Opd2PropInfo = OP_None) const override; + /// @} }; @@ -78,3 +86,32 @@ llvm::createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM) { } bool NVPTXTTI::hasBranchDivergence() const { return true; } + +unsigned NVPTXTTI::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, + OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, + OperandValueProperties Opd2PropInfo) const { + // Legalize the type. + std::pair LT = TLI->getTypeLegalizationCost(Ty); + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + + switch (ISD) { + default: + return TargetTransformInfo::getArithmeticInstrCost( + Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); + case ISD::ADD: + case ISD::MUL: + case ISD::XOR: + case ISD::OR: + case ISD::AND: + // The machine code (SASS) simulates an i64 with two i32. Therefore, we + // estimate that arithmetic operations on i64 are twice as expensive as + // those on types that can fit into one machine register. + if (LT.second.SimpleTy == MVT::i64) + return 2 * LT.first; + // Delegate other cases to the basic TTI. + return TargetTransformInfo::getArithmeticInstrCost( + Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); + } +} diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 03dffb66f35..bf90912ebd0 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -69,11 +70,12 @@ static cl::opt ReduceLiveIVs("liv-reduce", cl::Hidden, namespace { class IndVarSimplify : public LoopPass { - LoopInfo *LI; - ScalarEvolution *SE; - DominatorTree *DT; - const DataLayout *DL; - TargetLibraryInfo *TLI; + LoopInfo *LI; + ScalarEvolution *SE; + DominatorTree *DT; + const DataLayout *DL; + TargetLibraryInfo *TLI; + const TargetTransformInfo *TTI; SmallVector DeadInsts; bool Changed; @@ -661,7 +663,7 @@ namespace { /// extended by this sign or zero extend operation. This is used to determine /// the final width of the IV before actually widening it. static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, - const DataLayout *DL) { + const DataLayout *DL, const TargetTransformInfo *TTI) { bool IsSigned = Cast->getOpcode() == Instruction::SExt; if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) return; @@ -671,6 +673,19 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, if (DL && !DL->isLegalInteger(Width)) return; + // Cast is either an sext or zext up to this point. + // We should not widen an indvar if arithmetics on the wider indvar are more + // expensive than those on the narrower indvar. We check only the cost of ADD + // because at least an ADD is required to increment the induction variable. We + // could compute more comprehensively the cost of all instructions on the + // induction variable when necessary. + if (TTI && + TTI->getArithmeticInstrCost(Instruction::Add, Ty) > + TTI->getArithmeticInstrCost(Instruction::Add, + Cast->getOperand(0)->getType())) { + return; + } + if (!WI.WidestNativeType) { WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); WI.IsSigned = IsSigned; @@ -1187,14 +1202,16 @@ namespace { class IndVarSimplifyVisitor : public IVVisitor { ScalarEvolution *SE; const DataLayout *DL; + const TargetTransformInfo *TTI; PHINode *IVPhi; public: WideIVInfo WI; IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, - const DataLayout *DL, const DominatorTree *DTree): - SE(SCEV), DL(DL), IVPhi(IV) { + const DataLayout *DL, const TargetTransformInfo *TTI, + const DominatorTree *DTree) + : SE(SCEV), DL(DL), TTI(TTI), IVPhi(IV) { DT = DTree; WI.NarrowIV = IVPhi; if (ReduceLiveIVs) @@ -1202,7 +1219,9 @@ namespace { } // Implement the interface used by simplifyUsersOfIV. - void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, DL); } + void visitCast(CastInst *Cast) override { + visitIVCast(Cast, WI, SE, DL, TTI); + } }; } @@ -1236,7 +1255,7 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L, PHINode *CurrIV = LoopPhis.pop_back_val(); // Information about sign/zero extensions of CurrIV. - IndVarSimplifyVisitor Visitor(CurrIV, SE, DL, DT); + IndVarSimplifyVisitor Visitor(CurrIV, SE, DL, TTI, DT); Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor); @@ -1895,6 +1914,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { DataLayoutPass *DLP = getAnalysisIfAvailable(); DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = getAnalysisIfAvailable(); + TTI = getAnalysisIfAvailable(); DeadInsts.clear(); Changed = false; diff --git a/test/Transforms/IndVarSimplify/NVPTX/lit.local.cfg b/test/Transforms/IndVarSimplify/NVPTX/lit.local.cfg new file mode 100644 index 00000000000..2cb98eb371b --- /dev/null +++ b/test/Transforms/IndVarSimplify/NVPTX/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'NVPTX' in config.root.targets: + config.unsupported = True diff --git a/test/Transforms/IndVarSimplify/NVPTX/no-widen-expensive.ll b/test/Transforms/IndVarSimplify/NVPTX/no-widen-expensive.ll new file mode 100644 index 00000000000..8744b19d5e2 --- /dev/null +++ b/test/Transforms/IndVarSimplify/NVPTX/no-widen-expensive.ll @@ -0,0 +1,37 @@ +; RUN: opt < %s -indvars -S | FileCheck %s + +target triple = "nvptx64-unknown-unknown" + +; For the nvptx64 architecture, the cost of an arithmetic instruction on a +; 64-bit integer is twice as expensive as that on a 32-bit integer, because the +; hardware needs to simulate a 64-bit integer using two 32-bit integers. +; Therefore, in this particular architecture, we should not widen induction +; variables to 64-bit integers even though i64 is a legal type in the 64-bit +; PTX ISA. + +define void @indvar_32_bit(i32 %n, i32* nocapture %output) { +; CHECK-LABEL: @indvar_32_bit +entry: + %cmp5 = icmp sgt i32 %n, 0 + br i1 %cmp5, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %i.06 = phi i32 [ 0, %for.body.preheader ], [ %add, %for.body ] +; CHECK: phi i32 + %mul = mul nsw i32 %i.06, %i.06 + %0 = sext i32 %i.06 to i64 + %arrayidx = getelementptr inbounds i32* %output, i64 %0 + store i32 %mul, i32* %arrayidx, align 4 + %add = add nsw i32 %i.06, 3 + %cmp = icmp slt i32 %add, %n + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} -- 2.34.1