From 9a2cfffdb6340c54ff553c1b81364d0f17fa8f45 Mon Sep 17 00:00:00 2001 From: Preston Gurd Date: Mon, 4 Mar 2013 18:13:57 +0000 Subject: [PATCH] Bypass Slow Divides * Only apply divide bypass optimization when not optimizing for size. * Fixed bug caused by constant for 0 value of type Int32, used dividend type to generate the constant instead. * For atom x86-64 apply the divide bypass to use 16-bit divides instead of 64-bit divides when operand values are small enough. * Added lit tests for 64-bit divide bypass. Patch by Tyler Nowicki! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176442 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 7 ++- lib/Transforms/Scalar/CodeGenPrepare.cpp | 2 +- lib/Transforms/Utils/BypassSlowDivision.cpp | 4 +- .../X86/atom-bypass-slow-division-64.ll | 46 +++++++++++++++++++ test/CodeGen/X86/atom-bypass-slow-division.ll | 2 +- 5 files changed, 55 insertions(+), 6 deletions(-) create mode 100644 test/CodeGen/X86/atom-bypass-slow-division-64.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 451acca5f32..5d12b0a3922 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -181,9 +181,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); - // Bypass i32 with i8 on Atom when compiling with O2 - if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) + // Bypass expensive divides on Atom when compiling with O2 + if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) { addBypassSlowDiv(32, 8); + if (Subtarget->is64Bit()) + addBypassSlowDiv(64, 16); + } if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) { // Setup Windows compiler runtime calls. diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index d71dd5dec63..015fd2e6e6f 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -154,7 +154,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { /// This optimization identifies DIV instructions that can be /// profitably bypassed and carried out with a shorter, faster divide. - if (TLI && TLI->isSlowDivBypassed()) { + if (!OptSize && TLI && TLI->isSlowDivBypassed()) { const DenseMap &BypassWidths = TLI->getBypassSlowDivWidths(); for (Function::iterator I = F.begin(); I != F.end(); I++) diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index 00cda8e0346..1f517d038d1 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -163,7 +163,7 @@ static bool insertFastDiv(Function &F, Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); // Compare operand values and branch - Value *ZeroV = MainBuilder.getInt32(0); + Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); @@ -244,7 +244,7 @@ bool llvm::bypassSlowDivision(Function &F, // Get bitwidth of div/rem instruction IntegerType *T = cast(J->getType()); - int bitwidth = T->getBitWidth(); + unsigned int bitwidth = T->getBitWidth(); // Continue if bitwidth is not bypassed DenseMap::const_iterator BI = BypassWidths.find(bitwidth); diff --git a/test/CodeGen/X86/atom-bypass-slow-division-64.ll b/test/CodeGen/X86/atom-bypass-slow-division-64.ll new file mode 100644 index 00000000000..a3bbea3c996 --- /dev/null +++ b/test/CodeGen/X86/atom-bypass-slow-division-64.ll @@ -0,0 +1,46 @@ +; RUN: llc < %s -mcpu=atom -mtriple=i686-linux -march=x86-64 | FileCheck %s + +; Additional tests for 64-bit divide bypass + +define i64 @Test_get_quotient(i64 %a, i64 %b) nounwind { +; CHECK: Test_get_quotient: +; CHECK: orq %rsi, %rcx +; CHECK-NEXT: testq $-65536, %rcx +; CHECK-NEXT: je +; CHECK: idivq +; CHECK: ret +; CHECK: divw +; CHECK: ret + %result = sdiv i64 %a, %b + ret i64 %result +} + +define i64 @Test_get_remainder(i64 %a, i64 %b) nounwind { +; CHECK: Test_get_remainder: +; CHECK: orq %rsi, %rcx +; CHECK-NEXT: testq $-65536, %rcx +; CHECK-NEXT: je +; CHECK: idivq +; CHECK: ret +; CHECK: divw +; CHECK: ret + %result = srem i64 %a, %b + ret i64 %result +} + +define i64 @Test_get_quotient_and_remainder(i64 %a, i64 %b) nounwind { +; CHECK: Test_get_quotient_and_remainder: +; CHECK: orq %rsi, %rcx +; CHECK-NEXT: testq $-65536, %rcx +; CHECK-NEXT: je +; CHECK: idivq +; CHECK: divw +; CHECK: addq +; CHECK: ret +; CHECK-NOT: idivq +; CHECK-NOT: divw + %resultdiv = sdiv i64 %a, %b + %resultrem = srem i64 %a, %b + %result = add i64 %resultdiv, %resultrem + ret i64 %result +} diff --git a/test/CodeGen/X86/atom-bypass-slow-division.ll b/test/CodeGen/X86/atom-bypass-slow-division.ll index 453e72672bb..4612940445c 100644 --- a/test/CodeGen/X86/atom-bypass-slow-division.ll +++ b/test/CodeGen/X86/atom-bypass-slow-division.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s +; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s define i32 @Test_get_quotient(i32 %a, i32 %b) nounwind { ; CHECK: Test_get_quotient: -- 2.34.1