Bypass Slow Divides

author Preston Gurd <preston.gurd@intel.com>

Mon, 4 Mar 2013 18:13:57 +0000 (18:13 +0000)

committer Preston Gurd <preston.gurd@intel.com>

Mon, 4 Mar 2013 18:13:57 +0000 (18:13 +0000)
author Preston Gurd <preston.gurd@intel.com>
Mon, 4 Mar 2013 18:13:57 +0000 (18:13 +0000)
committer Preston Gurd <preston.gurd@intel.com>
Mon, 4 Mar 2013 18:13:57 +0000 (18:13 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 451acca5f32e1cbde4b70708d7781406498a5e61..5d12b0a3922716fe2abaad96b2ce0b175335f17d 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -181,9 +181,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
      setSchedulingPreference(Sched::RegPressure);
    setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
  
-  // Bypass i32 with i8 on Atom when compiling with O2
-  if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default)
+  // Bypass expensive divides on Atom when compiling with O2
+  if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
      addBypassSlowDiv(32, 8);
+    if (Subtarget->is64Bit())
+      addBypassSlowDiv(64, 16);
+  }
  
    if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
      // Setup Windows compiler runtime calls.
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp

index d71dd5dec63efd4c17e03390492d8f6ef02018a7..015fd2e6e6fcca45f498ed8a6ec1891f764b9868 100644 (file)
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -154,7 +154,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
  
    /// This optimization identifies DIV instructions that can be
    /// profitably bypassed and carried out with a shorter, faster divide.
-  if (TLI && TLI->isSlowDivBypassed()) {
+  if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
      const DenseMap<unsigned int, unsigned int> &BypassWidths =
         TLI->getBypassSlowDivWidths();
      for (Function::iterator I = F.begin(); I != F.end(); I++)
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp

index 00cda8e034644d20ff7f416d2e2462ecd3aa0bac..1f517d038d1951b6a77208d28b42a59ae17ab869 100644 (file)
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -163,7 +163,7 @@ static bool insertFastDiv(Function &F,
    Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
  
    // Compare operand values and branch
-  Value *ZeroV = MainBuilder.getInt32(0);
+  Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
    Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
    MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
  
@@ -244,7 +244,7 @@ bool llvm::bypassSlowDivision(Function &F,
  
      // Get bitwidth of div/rem instruction
      IntegerType *T = cast<IntegerType>(J->getType());
-    int bitwidth = T->getBitWidth();
+    unsigned int bitwidth = T->getBitWidth();
  
      // Continue if bitwidth is not bypassed
      DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
diff --git a/test/CodeGen/X86/atom-bypass-slow-division-64.ll b/test/CodeGen/X86/atom-bypass-slow-division-64.ll

new file mode 100644 (file)

index 0000000..a3bbea3
--- /dev/null
+++ b/test/CodeGen/X86/atom-bypass-slow-division-64.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux -march=x86-64 | FileCheck %s
+
+; Additional tests for 64-bit divide bypass
+
+define i64 @Test_get_quotient(i64 %a, i64 %b) nounwind {
+; CHECK: Test_get_quotient:
+; CHECK: orq %rsi, %rcx
+; CHECK-NEXT: testq $-65536, %rcx
+; CHECK-NEXT: je
+; CHECK: idivq
+; CHECK: ret
+; CHECK: divw
+; CHECK: ret
+  %result = sdiv i64 %a, %b
+  ret i64 %result
+}
+
+define i64 @Test_get_remainder(i64 %a, i64 %b) nounwind {
+; CHECK: Test_get_remainder:
+; CHECK: orq %rsi, %rcx
+; CHECK-NEXT: testq $-65536, %rcx
+; CHECK-NEXT: je
+; CHECK: idivq
+; CHECK: ret
+; CHECK: divw
+; CHECK: ret
+  %result = srem i64 %a, %b
+  ret i64 %result
+}
+
+define i64 @Test_get_quotient_and_remainder(i64 %a, i64 %b) nounwind {
+; CHECK: Test_get_quotient_and_remainder:
+; CHECK: orq %rsi, %rcx
+; CHECK-NEXT: testq $-65536, %rcx
+; CHECK-NEXT: je
+; CHECK: idivq
+; CHECK: divw
+; CHECK: addq
+; CHECK: ret
+; CHECK-NOT: idivq
+; CHECK-NOT: divw
+  %resultdiv = sdiv i64 %a, %b
+  %resultrem = srem i64 %a, %b
+  %result = add i64 %resultdiv, %resultrem
+  ret i64 %result
+}
diff --git a/test/CodeGen/X86/atom-bypass-slow-division.ll b/test/CodeGen/X86/atom-bypass-slow-division.ll

index 453e72672bbc0ecafd829afc5f104ab17197de4a..4612940445cb5865381973e80405cab2c858b409 100644 (file)
--- a/test/CodeGen/X86/atom-bypass-slow-division.ll
+++ b/test/CodeGen/X86/atom-bypass-slow-division.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck %s
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
  
  define i32 @Test_get_quotient(i32 %a, i32 %b) nounwind {
  ; CHECK: Test_get_quotient:
author	Preston Gurd <preston.gurd@intel.com>
	Mon, 4 Mar 2013 18:13:57 +0000 (18:13 +0000)
committer	Preston Gurd <preston.gurd@intel.com>
	Mon, 4 Mar 2013 18:13:57 +0000 (18:13 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Transforms/Scalar/CodeGenPrepare.cpp		patch \| blob \| history
lib/Transforms/Utils/BypassSlowDivision.cpp		patch \| blob \| history
test/CodeGen/X86/atom-bypass-slow-division-64.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/atom-bypass-slow-division.ll		patch \| blob \| history