For the current Atom processor, the fastest way to handle a call

author Preston Gurd <preston.gurd@intel.com>

Wed, 27 Mar 2013 19:14:02 +0000 (19:14 +0000)

committer Preston Gurd <preston.gurd@intel.com>

Wed, 27 Mar 2013 19:14:02 +0000 (19:14 +0000)
author Preston Gurd <preston.gurd@intel.com>
Wed, 27 Mar 2013 19:14:02 +0000 (19:14 +0000)
committer Preston Gurd <preston.gurd@intel.com>
Wed, 27 Mar 2013 19:14:02 +0000 (19:14 +0000)
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td

index e87da56cc6ab346ae9c173d28d84c0e726cbd1e7..bf095017f859fdcf8e52e49b6e93bdb520bb2507 100644 (file)
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -134,6 +134,9 @@ def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
  def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
                                       "PadShortFunctions", "true",
                                       "Pad short functions">;
+def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
+                                     "CallRegIndirect", "true",
+                                     "Call register indirect">;
  
  //===----------------------------------------------------------------------===//
  // X86 processors supported.
@@ -181,7 +184,9 @@ def : ProcessorModel<"penryn", SandyBridgeModel,
  def : ProcessorModel<"atom", AtomModel,
                       [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
                        FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
-                      FeatureSlowDivide, FeaturePadShortFunctions]>;
+                      FeatureSlowDivide,
+                      FeatureCallRegIndirect,
+                      FeaturePadShortFunctions]>;
  
  // "Arrandale" along with corei3 and corei5
  def : ProcessorModel<"corei7", SandyBridgeModel,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 9ef6a3bca021390d6bbabb4ce323525cb6f49734..0eaab0f8185b1ea0151cf29cc904118e83aecb24 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2629,6 +2629,19 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
      InFlag = Chain.getValue(1);
    }
  
+  // Use indirect reference through register, when CALL uses a memory reference.
+  if (Subtarget->callRegIndirect() &&
+      Callee.getOpcode() == ISD::LOAD) {
+    const TargetRegisterClass *AddrRegClass =
+      getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32);
+    MachineRegisterInfo &MRI = MF.getRegInfo();
+    unsigned VReg = MRI.createVirtualRegister(AddrRegClass);
+    SDValue tempValue = DAG.getCopyFromReg(Callee,
+                                           dl, VReg, Callee.getValueType());
+    Chain = DAG.getCopyToReg(Chain, dl, VReg, tempValue, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
    Ops.push_back(Chain);
    Ops.push_back(Callee);
  
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp

index 4b368b490149a50330c84b4717d2567c26529054..6e66c1aa96c6808bb63f34ff2fe67f42b2968675 100644 (file)
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -458,6 +458,7 @@ void X86Subtarget::initializeEnvironment() {
    HasSlowDivide = false;
    PostRAScheduler = false;
    PadShortFunctions = false;
+  CallRegIndirect = false;
    stackAlignment = 4;
    // FIXME: this is a known good value for Yonah. How about others?
    MaxInlineSizeThreshold = 128;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h

index 108ef0eb1c2c5535bbe4c27d5257b836d5a1fa77..cac3f579b00ebfb80284a71029149f8b53ac08ba 100644 (file)
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -159,6 +159,10 @@ protected:
    /// a stall when returning too early.
    bool PadShortFunctions;
  
+  /// CallRegIndirect - True if the Calls with memory reference should be converted
+  /// to a register-based indirect call.
+  bool CallRegIndirect;
+
    /// stackAlignment - The minimum alignment known to hold of the stack frame on
    /// entry to the function and which must be maintained by every function.
    unsigned stackAlignment;
@@ -269,6 +273,7 @@ public:
    bool useLeaForSP() const { return UseLeaForSP; }
    bool hasSlowDivide() const { return HasSlowDivide; }
    bool padShortFunctions() const { return PadShortFunctions; }
+  bool callRegIndirect() const { return CallRegIndirect; }
  
    bool isAtom() const { return X86ProcFamily == IntelAtom; }
  
diff --git a/test/CodeGen/X86/atom-call-reg-indirect.ll b/test/CodeGen/X86/atom-call-reg-indirect.ll

new file mode 100644 (file)

index 0000000..6327811
--- /dev/null
+++ b/test/CodeGen/X86/atom-call-reg-indirect.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck -check-prefix=ATOM32 %s
+; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck -check-prefix=ATOM-NOT32 %s
+; RUN: llc < %s -mcpu=atom -mtriple=x86_64-linux  | FileCheck -check-prefix=ATOM64 %s
+; RUN: llc < %s -mcpu=core2 -mtriple=x86_64-linux | FileCheck -check-prefix=ATOM-NOT64 %s
+
+
+; fn_ptr.ll
+%class.A = type { i32 (...)** }
+
+define i32 @test1() #0 {
+  ;ATOM: test1
+entry:
+  %call = tail call %class.A* @_Z3facv()
+  %0 = bitcast %class.A* %call to void (%class.A*)***
+  %vtable = load void (%class.A*)*** %0, align 8
+  %1 = load void (%class.A*)** %vtable, align 8
+  ;ATOM32: movl (%ecx), %ecx
+  ;ATOM32: calll *%ecx
+  ;ATOM-NOT32: calll *(%ecx)
+  ;ATOM64: movq (%rcx), %rcx
+  ;ATOM64: callq *%rcx
+  ;ATOM-NOT64: callq *(%rcx)
+  tail call void %1(%class.A* %call)
+  ret i32 0
+}
+
+declare %class.A* @_Z3facv() #1
+
+; virt_fn.ll
+@p = external global void (i32)**
+
+define i32 @test2() #0 {
+  ;ATOM: test2
+entry:
+  %0 = load void (i32)*** @p, align 8
+  %1 = load void (i32)** %0, align 8
+  ;ATOM32: movl (%eax), %eax
+  ;ATOM32: calll *%eax
+  ;ATOM-NOT: calll *(%eax)
+  ;ATOM64: movq (%rax), %rax
+  ;ATOM64: callq *%rax
+  ;ATOM-NOT64: callq *(%rax)
+  tail call void %1(i32 2)
+  ret i32 0
+}
author	Preston Gurd <preston.gurd@intel.com>
	Wed, 27 Mar 2013 19:14:02 +0000 (19:14 +0000)
committer	Preston Gurd <preston.gurd@intel.com>
	Wed, 27 Mar 2013 19:14:02 +0000 (19:14 +0000)
lib/Target/X86/X86.td		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86Subtarget.cpp		patch \| blob \| history
lib/Target/X86/X86Subtarget.h		patch \| blob \| history
test/CodeGen/X86/atom-call-reg-indirect.ll	[new file with mode: 0644]	patch \| blob