ef34c4a2b451dc589c7f27589362e7199a37d56e
[oota-llvm.git] / lib / ExecutionEngine / Orc / OrcTargetSupport.cpp
1 #include "llvm/ADT/Triple.h"
2 #include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h"
3 #include "llvm/Support/Process.h"
4 #include <array>
5
6
7 using namespace llvm::orc;
8
9 namespace {
10
11 uint64_t executeCompileCallback(JITCompileCallbackManagerBase *JCBM,
12                                 TargetAddress CallbackID) {
13   return JCBM->executeCompileCallback(CallbackID);
14 }
15
16 }
17
18 namespace llvm {
19 namespace orc {
20
21 const char* OrcX86_64::ResolverBlockName = "orc_resolver_block";
22
23 void OrcX86_64::insertResolverBlock(
24     Module &M, JITCompileCallbackManagerBase &JCBM) {
25
26   // Trampoline code-sequence length, used to get trampoline address from return
27   // address.
28   const unsigned X86_64_TrampolineLength = 6;
29
30   // List of x86-64 GPRs to save. Note - RBP saved separately below.
31   std::array<const char *, 14> GPRs = {{
32       "rax", "rbx", "rcx", "rdx",
33       "rsi", "rdi", "r8", "r9",
34       "r10", "r11", "r12", "r13",
35       "r14", "r15"
36     }};
37
38   // Address of the executeCompileCallback function.
39   uint64_t CallbackAddr =
40       static_cast<uint64_t>(
41         reinterpret_cast<uintptr_t>(executeCompileCallback));
42
43   std::ostringstream AsmStream;
44   Triple TT(M.getTargetTriple());
45
46   // Switch to text section.
47   if (TT.getOS() == Triple::Darwin)
48     AsmStream << ".section __TEXT,__text,regular,pure_instructions\n"
49               << ".align 4, 0x90\n";
50   else
51     AsmStream << ".text\n"
52               << ".align 16, 0x90\n";
53
54   // Bake in a pointer to the callback manager immediately before the
55   // start of the resolver function.
56   AsmStream << "jit_callback_manager_addr:\n"
57             << "  .quad " << &JCBM << "\n";
58
59   // Start the resolver function.
60   AsmStream << ResolverBlockName << ":\n"
61             << "  pushq     %rbp\n"
62             << "  movq      %rsp, %rbp\n";
63
64   // Store the GPRs.
65   for (const auto &GPR : GPRs)
66     AsmStream << "  pushq     %" << GPR << "\n";
67
68   // Store floating-point state with FXSAVE.
69   // Note: We need to keep the stack 16-byte aligned, so if we've emitted an odd
70   //       number of 64-bit pushes so far (GPRs.size() plus 1 for RBP) then add
71   //       an extra 64 bits of padding to the FXSave area.
72   unsigned Padding = (GPRs.size() + 1) % 2 ? 8 : 0;
73   unsigned FXSaveSize = 512 + Padding;
74   AsmStream << "  subq      $" << FXSaveSize << ", %rsp\n"
75             << "  fxsave64  (%rsp)\n"
76
77   // Load callback manager address, compute trampoline address, call JIT.
78             << "  lea       jit_callback_manager_addr(%rip), %rdi\n"
79             << "  movq      (%rdi), %rdi\n"
80             << "  movq      0x8(%rbp), %rsi\n"
81             << "  subq      $" << X86_64_TrampolineLength << ", %rsi\n"
82             << "  movabsq   $" << CallbackAddr << ", %rax\n"
83             << "  callq     *%rax\n"
84
85   // Replace the return to the trampoline with the return address of the
86   // compiled function body.
87             << "  movq      %rax, 0x8(%rbp)\n"
88
89   // Restore the floating point state.
90             << "  fxrstor64 (%rsp)\n"
91             << "  addq      $" << FXSaveSize << ", %rsp\n";
92
93   for (const auto &GPR : make_range(GPRs.rbegin(), GPRs.rend()))
94     AsmStream << "  popq      %" << GPR << "\n";
95
96   // Restore original RBP and return to compiled function body.
97   AsmStream << "  popq      %rbp\n"
98             << "  retq\n";
99
100   M.appendModuleInlineAsm(AsmStream.str());
101 }
102
103 OrcX86_64::LabelNameFtor
104 OrcX86_64::insertCompileCallbackTrampolines(Module &M,
105                                             TargetAddress ResolverBlockAddr,
106                                             unsigned NumCalls,
107                                             unsigned StartIndex) {
108   const char *ResolverBlockPtrName = "Lorc_resolve_block_addr";
109
110   std::ostringstream AsmStream;
111   Triple TT(M.getTargetTriple());
112
113   if (TT.getOS() == Triple::Darwin)
114     AsmStream << ".section __TEXT,__text,regular,pure_instructions\n"
115               << ".align 4, 0x90\n";
116   else
117     AsmStream << ".text\n"
118               << ".align 16, 0x90\n";
119
120   AsmStream << ResolverBlockPtrName << ":\n"
121             << "  .quad " << ResolverBlockAddr << "\n";
122
123   auto GetLabelName =
124     [=](unsigned I) {
125       std::ostringstream LabelStream;
126       LabelStream << "orc_jcc_" << (StartIndex + I);
127       return LabelStream.str();
128   };
129
130   for (unsigned I = 0; I < NumCalls; ++I)
131     AsmStream << GetLabelName(I) << ":\n"
132               << "  callq *" << ResolverBlockPtrName << "(%rip)\n";
133
134   M.appendModuleInlineAsm(AsmStream.str());
135
136   return GetLabelName;
137 }
138
139 OrcX86_64::IndirectStubsInfo::~IndirectStubsInfo() {
140   sys::Memory::releaseMappedMemory(StubsBlock);
141   sys::Memory::releaseMappedMemory(PtrsBlock);
142 }
143
144 std::error_code OrcX86_64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
145                                                   unsigned MinStubs,
146                                                   void *InitialPtrVal) {
147   // Stub format is:
148   //
149   // .section __orc_stubs
150   // stub1:
151   //                 jmpq    *ptr1(%rip)
152   //                 .byte   0xC4         ; <- Invalid opcode padding.
153   //                 .byte   0xF1
154   // stub2:
155   //                 jmpq    *ptr2(%rip)
156   //
157   // ...
158   //
159   // .section __orc_ptrs
160   // ptr1:
161   //                 .quad 0x0
162   // ptr2:
163   //                 .quad 0x0
164   //
165   // ...
166
167   const unsigned StubSize = IndirectStubsInfo::StubSize;
168
169   // Emit at least MinStubs, rounded up to fill the pages allocated.
170   unsigned PageSize = sys::Process::getPageSize();
171   unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
172   unsigned NumStubs = (NumPages * PageSize) / StubSize;
173
174   // Allocate memory for stubs and pointers in one call.
175   std::error_code EC;
176   auto InitialBlock = sys::Memory::allocateMappedMemory(2 * NumPages * PageSize,
177                                                         nullptr,
178                                                         sys::Memory::MF_READ |
179                                                         sys::Memory::MF_WRITE,
180                                                         EC);
181
182   if (EC)
183     return EC;
184
185   // Create separate MemoryBlocks representing the stubs and pointers.
186   sys::MemoryBlock StubsBlock(InitialBlock.base(), NumPages * PageSize);
187   sys::MemoryBlock PtrsBlock(static_cast<char*>(InitialBlock.base()) +
188                              NumPages * PageSize,
189                              NumPages * PageSize);
190
191   // Populate the stubs page stubs and mark it executable.
192   uint64_t *Stub = reinterpret_cast<uint64_t*>(StubsBlock.base());
193   uint64_t PtrOffsetField =
194     static_cast<uint64_t>(NumPages * PageSize - 6) << 16;
195   for (unsigned I = 0; I < NumStubs; ++I)
196     Stub[I] = 0xF1C40000000025ff | PtrOffsetField;
197
198   if (auto EC = sys::Memory::protectMappedMemory(StubsBlock,
199                                                  sys::Memory::MF_READ |
200                                                  sys::Memory::MF_EXEC))
201     return EC;
202
203   // Initialize all pointers to point at FailureAddress.
204   void **Ptr = reinterpret_cast<void**>(PtrsBlock.base());
205   for (unsigned I = 0; I < NumStubs; ++I)
206     Ptr[I] = InitialPtrVal;
207
208   StubsInfo.NumStubs = NumStubs;
209   StubsInfo.StubsBlock = std::move(StubsBlock);
210   StubsInfo.PtrsBlock = std::move(PtrsBlock);
211
212   return std::error_code();
213 }
214
215 } // End namespace orc.
216 } // End namespace llvm.