[Orc] Tidy up the assembly for the x86-64 resolver block.
authorLang Hames <lhames@gmail.com>
Mon, 6 Apr 2015 03:01:29 +0000 (03:01 +0000)
committerLang Hames <lhames@gmail.com>
Mon, 6 Apr 2015 03:01:29 +0000 (03:01 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234138 91177308-0d34-0410-b5e6-96231b3b80d8

lib/ExecutionEngine/Orc/OrcTargetSupport.cpp

index 6fe5301..67861c3 100644 (file)
@@ -6,39 +6,6 @@ using namespace llvm::orc;
 
 namespace {
 
-std::array<const char *, 12> X86GPRsToSave = {{
-    "rbp", "rbx", "r12", "r13", "r14", "r15", // Callee saved.
-    "rdi", "rsi", "rdx", "rcx", "r8", "r9",   // Int args.
-}};
-
-std::array<const char *, 8> X86XMMsToSave = {{
-    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" // FP args
-}};
-
-template <typename OStream> unsigned saveX86Regs(OStream &OS) {
-  for (const auto &GPR : X86GPRsToSave)
-    OS << "  pushq   %" << GPR << "\n";
-
-  OS << "  subq    $" << (16 * X86XMMsToSave.size()) << ", %rsp\n";
-
-  for (unsigned i = 0; i < X86XMMsToSave.size(); ++i)
-    OS << "  movdqu  %" << X86XMMsToSave[i] << ", "
-       << (16 * (X86XMMsToSave.size() - i - 1)) << "(%rsp)\n";
-
-  return (8 * X86GPRsToSave.size()) + (16 * X86XMMsToSave.size());
-}
-
-template <typename OStream> void restoreX86Regs(OStream &OS) {
-  for (unsigned i = 0; i < X86XMMsToSave.size(); ++i)
-    OS << "  movdqu  " << (16 * i) << "(%rsp), %"
-       << X86XMMsToSave[(X86XMMsToSave.size() - i - 1)] << "\n";
-  OS << "  addq    $" << (16 * X86XMMsToSave.size()) << ", %rsp\n";
-
-  for (unsigned i = 0; i < X86GPRsToSave.size(); ++i)
-    OS << "  popq    %" << X86GPRsToSave[X86GPRsToSave.size() - i - 1] << "\n";
-}
-
-template <typename TargetT>
 uint64_t executeCompileCallback(JITCompileCallbackManagerBase *JCBM,
                                 TargetAddress CallbackID) {
   return JCBM->executeCompileCallback(CallbackID);
@@ -53,14 +20,26 @@ const char* OrcX86_64::ResolverBlockName = "orc_resolver_block";
 
 void OrcX86_64::insertResolverBlock(
     Module &M, JITCompileCallbackManagerBase &JCBM) {
+
+  // Trampoline code-sequence length, used to get trampoline address from return
+  // address.
   const unsigned X86_64_TrampolineLength = 6;
-  auto CallbackPtr = executeCompileCallback<OrcX86_64>;
+
+  // List of x86-64 GPRs to save.
+  std::array<const char *, 11> GPRs = {{
+      "rbx", "r12", "r13", "r14", "r15", // Callee saved (rbp preserved below).
+      "rdi", "rsi", "rdx", "rcx", "r8", "r9",   // Int args.
+    }};
+
+  // Address of the executeCompileCallback function.
   uint64_t CallbackAddr =
-      static_cast<uint64_t>(reinterpret_cast<uintptr_t>(CallbackPtr));
+      static_cast<uint64_t>(
+        reinterpret_cast<uintptr_t>(executeCompileCallback));
 
   std::ostringstream AsmStream;
   Triple TT(M.getTargetTriple());
 
+  // Switch to text section.
   if (TT.getOS() == Triple::Darwin)
     AsmStream << ".section __TEXT,__text,regular,pure_instructions\n"
               << ".align 4, 0x90\n";
@@ -68,24 +47,46 @@ void OrcX86_64::insertResolverBlock(
     AsmStream << ".text\n"
               << ".align 16, 0x90\n";
 
+  // Bake in a pointer to the callback manager immediately before the
+  // start of the resolver function.
   AsmStream << "jit_callback_manager_addr:\n"
-            << "  .quad " << &JCBM << "\n"
-            << ResolverBlockName << ":\n";
+            << "  .quad " << &JCBM << "\n";
 
-  uint64_t ReturnAddrOffset = saveX86Regs(AsmStream);
+  // Start the resolver function.
+  AsmStream << ResolverBlockName << ":\n"
+            << "  pushq   %rbp\n"
+            << "  movq    %rsp, %rbp\n";
 
-  // Compute index, load object address, and call JIT.
-  AsmStream << "  leaq    jit_callback_manager_addr(%rip), %rdi\n"
+  // Store the GPRs.
+  for (const auto &GPR : GPRs)
+    AsmStream << "  pushq   %" << GPR << "\n";
+
+  // Store floating-point state with FXSAVE.
+  AsmStream << "  subq    $512, %rsp\n"
+            << "  fxsave  (%rsp)\n"
+
+  // Load callback manager address, compute trampoline address, call JIT.
+            << "  lea     jit_callback_manager_addr(%rip), %rdi\n"
             << "  movq    (%rdi), %rdi\n"
-            << "  movq    " << ReturnAddrOffset << "(%rsp), %rsi\n"
+            << "  movq    0x8(%rbp), %rsi\n"
             << "  subq    $" << X86_64_TrampolineLength << ", %rsi\n"
             << "  movabsq $" << CallbackAddr << ", %rax\n"
             << "  callq   *%rax\n"
-            << "  movq    %rax, " << ReturnAddrOffset << "(%rsp)\n";
 
-  restoreX86Regs(AsmStream);
+  // Replace the return to the trampoline with the return address of the
+  // compiled function body.
+            << "  movq    %rax, 0x8(%rbp)\n"
+
+  // Restore the floating point state.
+            << "  fxrstor (%rsp)\n"
+            << "  addq    $512, %rsp\n";
+
+  for (const auto &GPR : make_range(GPRs.rbegin(), GPRs.rend()))
+    AsmStream << "  popq    %" << GPR << "\n";
 
-  AsmStream << "  retq\n";
+  // Restore original RBP and return to compiled function body.
+  AsmStream << "  popq    %rbp\n"
+            << "  retq\n";
 
   M.appendModuleInlineAsm(AsmStream.str());
 }