From: Hal Finkel Date: Sun, 13 Apr 2014 23:02:40 +0000 (+0000) Subject: [PowerPC] [Constant Hoisting] Enable constant hoisting on PPC X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=b9ed50cf1772205b5a8f3ce2b604d01f9335e360;p=oota-llvm.git [PowerPC] [Constant Hoisting] Enable constant hoisting on PPC Implements the various TTI functions to enable constant hoisting on PPC. The only significant test-suite change is this: MultiSource/Benchmarks/VersaBench/bmm/bmm - 20% speedup (which essentially reverses the slowdown from r206120). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206141 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 23a75cf39df..7ebf0b4c95f 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -18,11 +18,15 @@ #include "PPC.h" #include "PPCTargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" #include "llvm/Target/TargetLowering.h" using namespace llvm; +static cl::opt DisablePPCConstHoist("disable-ppc-constant-hoisting", +cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden); + // Declare the pass initialization routine locally as target-specific passes // don't havve a target-wide initialization entry point, and so we rely on the // pass constructor initialization. @@ -67,6 +71,13 @@ public: /// \name Scalar TTI Implementations /// @{ + unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; + + unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty) const override; + unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty) const override; + virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; virtual void getUnrollingPreferences( @@ -123,6 +134,142 @@ PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { return PSK_Software; } +unsigned PPCTTI::getIntImmCost(const APInt &Imm, Type *Ty) const { + if (DisablePPCConstHoist) + return TargetTransformInfo::getIntImmCost(Imm, Ty); + + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + if (BitSize == 0) + return ~0U; + + if (Imm == 0) + return TCC_Free; + + if (Imm.getBitWidth() <= 64) { + if (isInt<16>(Imm.getSExtValue())) + return TCC_Basic; + + if (isInt<32>(Imm.getSExtValue())) { + // A constant that can be materialized using lis. + if ((Imm.getZExtValue() & 0xFFFF) == 0) + return TCC_Basic; + + return 2 * TCC_Basic; + } + } + + return 4 * TCC_Basic; +} + +unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) const { + if (DisablePPCConstHoist) + return TargetTransformInfo::getIntImmCost(IID, Idx, Imm, Ty); + + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + if (BitSize == 0) + return ~0U; + + switch (IID) { + default: return TCC_Free; + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue())) + return TCC_Free; + break; + } + return PPCTTI::getIntImmCost(Imm, Ty); +} + +unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty) const { + if (DisablePPCConstHoist) + return TargetTransformInfo::getIntImmCost(Opcode, Idx, Imm, Ty); + + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + if (BitSize == 0) + return ~0U; + + unsigned ImmIdx = ~0U; + bool ShiftedFree = false, RunFree = false, UnsignedFree = false, + ZeroFree = false; + switch (Opcode) { + default: return TCC_Free; + case Instruction::GetElementPtr: + // Always hoist the base address of a GetElementPtr. This prevents the + // creation of new constants for every base constant that gets constant + // folded with the offset. + if (Idx == 0) + return 2 * TCC_Basic; + return TCC_Free; + case Instruction::And: + RunFree = true; // (for the rotate-and-mask instructions) + // Fallthrough... + case Instruction::Add: + case Instruction::Or: + case Instruction::Xor: + ShiftedFree = true; + // Fallthrough... + case Instruction::Sub: + case Instruction::Mul: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + ImmIdx = 1; + break; + case Instruction::ICmp: + UnsignedFree = true; + ImmIdx = 1; + // Fallthrough... (zero comparisons can use record-form instructions) + case Instruction::Select: + ZeroFree = true; + break; + case Instruction::PHI: + case Instruction::Call: + case Instruction::Ret: + case Instruction::Load: + case Instruction::Store: + break; + } + + if (ZeroFree && Imm == 0) + return TCC_Free; + + if (Idx == ImmIdx && Imm.getBitWidth() <= 64) { + if (isInt<16>(Imm.getSExtValue())) + return TCC_Free; + + if (RunFree) { + if (Imm.getBitWidth() <= 32 && + (isShiftedMask_32(Imm.getZExtValue()) || + isShiftedMask_32(~Imm.getZExtValue()))) + return TCC_Free; + + + if (ST->isPPC64() && + (isShiftedMask_64(Imm.getZExtValue()) || + isShiftedMask_64(~Imm.getZExtValue()))) + return TCC_Free; + } + + if (UnsignedFree && isUInt<16>(Imm.getZExtValue())) + return TCC_Free; + + if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0) + return TCC_Free; + } + + return PPCTTI::getIntImmCost(Imm, Ty); +} + void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const { if (ST->getDarwinDirective() == PPC::DIR_A2) { // The A2 is in-order with a deep pipeline, and concatenation unrolling diff --git a/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll b/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll new file mode 100644 index 00000000000..b4337eeda65 --- /dev/null +++ b/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll @@ -0,0 +1,23 @@ +; RUN: opt -S -consthoist < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%T = type { i32, i32, i32, i32 } + +; Test if even cheap base addresses are hoisted. +define i32 @test1() nounwind { +; CHECK-LABEL: @test1 +; CHECK: %const = bitcast i32 12345678 to i32 +; CHECK: %1 = inttoptr i32 %const to %T* +; CHECK: %addr1 = getelementptr %T* %1, i32 0, i32 1 + %addr1 = getelementptr %T* inttoptr (i32 12345678 to %T*), i32 0, i32 1 + %tmp1 = load i32* %addr1 + %addr2 = getelementptr %T* inttoptr (i32 12345678 to %T*), i32 0, i32 2 + %tmp2 = load i32* %addr2 + %addr3 = getelementptr %T* inttoptr (i32 12345678 to %T*), i32 0, i32 3 + %tmp3 = load i32* %addr3 + %tmp4 = add i32 %tmp1, %tmp2 + %tmp5 = add i32 %tmp3, %tmp4 + ret i32 %tmp5 +} + diff --git a/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg b/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg new file mode 100644 index 00000000000..2e463005586 --- /dev/null +++ b/test/Transforms/ConstantHoisting/PowerPC/lit.local.cfg @@ -0,0 +1,4 @@ +targets = set(config.root.targets_to_build.split()) +if not 'PowerPC' in targets: + config.unsupported = True + diff --git a/test/Transforms/ConstantHoisting/PowerPC/masks.ll b/test/Transforms/ConstantHoisting/PowerPC/masks.ll new file mode 100644 index 00000000000..d5531820113 --- /dev/null +++ b/test/Transforms/ConstantHoisting/PowerPC/masks.ll @@ -0,0 +1,66 @@ +; RUN: opt -S -consthoist < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Here the masks are all contiguous, and should not be hoisted. +define i32 @test1() nounwind { +entry: +; CHECK-LABEL: @test1 +; CHECK-NOT: bitcast i32 65535 to i32 +; CHECK: and i32 undef, 65535 + %conv121 = and i32 undef, 65535 + br i1 undef, label %if.then152, label %if.end167 + +if.then152: +; CHECK: and i32 undef, 65535 + %conv153 = and i32 undef, 65535 + br i1 undef, label %if.end167, label %end2 + +if.end167: +; CHECK: and i32 {{.*}}, 32768 + %shl161 = shl nuw nsw i32 %conv121, 15 + %0 = load i8* undef, align 1 + %conv169 = zext i8 %0 to i32 + %shl170 = shl nuw nsw i32 %conv169, 7 + %shl161.masked = and i32 %shl161, 32768 + %conv174 = or i32 %shl170, %shl161.masked + %cmp178 = icmp ugt i32 %conv174, 32767 + br i1 %cmp178, label %end1, label %end2 + +end1: + unreachable + +end2: + unreachable +} + +; Here the masks are not contiguous, and should be hoisted. +define i32 @test2() nounwind { +entry: +; CHECK-LABEL: @test2 +; CHECK: bitcast i32 65531 to i32 + %conv121 = and i32 undef, 65531 + br i1 undef, label %if.then152, label %if.end167 + +if.then152: + %conv153 = and i32 undef, 65531 + br i1 undef, label %if.end167, label %end2 + +if.end167: +; CHECK: add i32 {{.*}}, -32758 + %shl161 = shl nuw nsw i32 %conv121, 15 + %0 = load i8* undef, align 1 + %conv169 = zext i8 %0 to i32 + %shl170 = shl nuw nsw i32 %conv169, 7 + %shl161.masked = and i32 %shl161, 32773 + %conv174 = or i32 %shl170, %shl161.masked + %cmp178 = icmp ugt i32 %conv174, 32767 + br i1 %cmp178, label %end1, label %end2 + +end1: + unreachable + +end2: + unreachable +} +