[X86] For Silvermont CPU use 16-bit division instead of 64-bit for small positive...

author Alexey Volkov <avolkov.intel@gmail.com>

Fri, 21 Nov 2014 11:19:34 +0000 (11:19 +0000)

committer Alexey Volkov <avolkov.intel@gmail.com>

Fri, 21 Nov 2014 11:19:34 +0000 (11:19 +0000)
author Alexey Volkov <avolkov.intel@gmail.com>
Fri, 21 Nov 2014 11:19:34 +0000 (11:19 +0000)
committer Alexey Volkov <avolkov.intel@gmail.com>
Fri, 21 Nov 2014 11:19:34 +0000 (11:19 +0000)
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td

index 83f55d32975723b4d2a3f5539345f4056e7f97f1..9729f4638cb9548314ad324567850757d0d3436c 100644 (file)
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -167,9 +167,12 @@ def FeatureSMAP    : SubtargetFeature<"smap", "HasSMAP", "true",
                                        "Support SMAP instructions">;
  def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
                                       "Use LEA for adjusting the stack pointer">;
-def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
-                                     "HasSlowDivide", "true",
-                                     "Use small divide for positive values less than 256">;
+def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
+                                     "HasSlowDivide32", "true",
+                                     "Use 8-bit divide for positive values less than 256">;
+def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw",
+                                     "HasSlowDivide64", "true",
+                                     "Use 16-bit divide for positive values less than 65536">;
  def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
                                       "PadShortFunctions", "true",
                                       "Pad short functions">;
@@ -234,7 +237,7 @@ def : ProcessorModel<"penryn", SandyBridgeModel,
  def : ProcessorModel<"atom", AtomModel,
                       [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
                        FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
-                      FeatureSlowDivide,
+                      FeatureSlowDivide32, FeatureSlowDivide64,
                        FeatureCallRegIndirect,
                        FeatureLEAUsesAG,
                        FeaturePadShortFunctions]>;
@@ -244,6 +247,7 @@ def : ProcessorModel<"slm",  SLMModel, [ProcIntelSLM,
                                 FeatureSSE42, FeatureCMPXCHG16B,
                                 FeatureMOVBE, FeaturePOPCNT,
                                 FeaturePCLMUL, FeatureAES,
+                               FeatureSlowDivide64,
                                 FeatureCallRegIndirect,
                                 FeaturePRFCHW,
                                 FeatureSlowLEA, FeatureSlowIncDec,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index f05b6c61ca0981927fd3f5fb69ba1d9e7950e989..c9006000f262f53ce5ff2d55c79644ddf5a69072 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -249,9 +249,10 @@ void X86TargetLowering::resetOperationActions() {
    setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
  
    // Bypass expensive divides on Atom when compiling with O2
-  if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
-    addBypassSlowDiv(32, 8);
-    if (Subtarget->is64Bit())
+  if (TM.getOptLevel() >= CodeGenOpt::Default) {
+    if (Subtarget->hasSlowDivide32()) 
+      addBypassSlowDiv(32, 8);
+    if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
        addBypassSlowDiv(64, 16);
    }
  
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp

index 9d877c99e54dbf872540d86ee5c858ed21801c77..afa0173fb10c1dbd138980c6bb7af888c41ed1ab 100644 (file)
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -267,7 +267,8 @@ void X86Subtarget::initializeEnvironment() {
    HasVectorUAMem = false;
    HasCmpxchg16b = false;
    UseLeaForSP = false;
-  HasSlowDivide = false;
+  HasSlowDivide32 = false;
+  HasSlowDivide64 = false;
    PadShortFunctions = false;
    CallRegIndirect = false;
    LEAUsesAG = false;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h

index 091b6c4fbbda4dca2e480fb1c516d22b6280696b..cf76ac70e0b2adeeccd97f47867793553ce35b99 100644 (file)
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -171,9 +171,13 @@ protected:
    /// the stack pointer. This is an optimization for Intel Atom processors.
    bool UseLeaForSP;
  
-  /// HasSlowDivide - True if smaller divides are significantly faster than
-  /// full divides and should be used when possible.
-  bool HasSlowDivide;
+  /// HasSlowDivide32 - True if 8-bit divisions are significantly faster than
+  /// 32-bit divisions and should be used when possible.
+  bool HasSlowDivide32;
+
+  /// HasSlowDivide64 - True if 16-bit divides are significantly faster than
+  /// 64-bit divisions and should be used when possible.
+  bool HasSlowDivide64;
  
    /// PadShortFunctions - True if the short functions should be padded to prevent
    /// a stall when returning too early.
@@ -373,7 +377,8 @@ public:
    bool hasVectorUAMem() const { return HasVectorUAMem; }
    bool hasCmpxchg16b() const { return HasCmpxchg16b; }
    bool useLeaForSP() const { return UseLeaForSP; }
-  bool hasSlowDivide() const { return HasSlowDivide; }
+  bool hasSlowDivide32() const { return HasSlowDivide32; }
+  bool hasSlowDivide64() const { return HasSlowDivide64; }
    bool padShortFunctions() const { return PadShortFunctions; }
    bool callRegIndirect() const { return CallRegIndirect; }
    bool LEAusesAG() const { return LEAUsesAG; }
diff --git a/test/CodeGen/X86/slow-div.ll b/test/CodeGen/X86/slow-div.ll

new file mode 100644 (file)

index 0000000..5222382
--- /dev/null
+++ b/test/CodeGen/X86/slow-div.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivl-to-divb < %s | FileCheck -check-prefix=DIV32 %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivq-to-divw < %s | FileCheck -check-prefix=DIV64 %s
+
+define i32 @div32(i32 %a, i32 %b) {
+entry:
+; DIV32-LABEL: div32:
+; DIV32: orl %{{.*}}, [[REG:%[a-z]+]]
+; DIV32: testl $-256, [[REG]]
+; DIV32: divb
+; DIV64-LABEL: div32:
+; DIV64-NOT: divb
+  %div = sdiv i32 %a, %b
+  ret i32 %div
+}
+
+define i64 @div64(i64 %a, i64 %b) {
+entry:
+; DIV32-LABEL: div64:
+; DIV32-NOT: divw
+; DIV64-LABEL: div64:
+; DIV64: orq %{{.*}}, [[REG:%[a-z]+]]
+; DIV64: testq   $-65536, [[REG]]
+; DIV64: divw
+  %div = sdiv i64 %a, %b
+  ret i64 %div
+}
+
+
author	Alexey Volkov <avolkov.intel@gmail.com>
	Fri, 21 Nov 2014 11:19:34 +0000 (11:19 +0000)
committer	Alexey Volkov <avolkov.intel@gmail.com>
	Fri, 21 Nov 2014 11:19:34 +0000 (11:19 +0000)
lib/Target/X86/X86.td		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86Subtarget.cpp		patch \| blob \| history
lib/Target/X86/X86Subtarget.h		patch \| blob \| history
test/CodeGen/X86/slow-div.ll	[new file with mode: 0644]	patch \| blob