1 //===-- PPC32JITInfo.cpp - Implement the JIT interfaces for the PowerPC ---===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the LLVM research group and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the JIT interfaces for the 32-bit PowerPC target.
12 //===----------------------------------------------------------------------===//
14 #define DEBUG_TYPE "jit"
15 #include "PPC32JITInfo.h"
16 #include "PPC32Relocations.h"
17 #include "llvm/CodeGen/MachineCodeEmitter.h"
18 #include "llvm/Config/alloca.h"
21 static TargetJITInfo::JITCompilerFn JITCompilerFunction;
23 #define BUILD_ADDIS(RD,RS,IMM16) \
24 ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
25 #define BUILD_ORI(RD,RS,UIMM16) \
26 ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
27 #define BUILD_MTSPR(RS,SPR) \
28 ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1))
29 #define BUILD_BCCTRx(BO,BI,LINK) \
30 ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1))
33 #define BUILD_LIS(RD,IMM16) BUILD_ADDIS(RD,0,IMM16)
34 #define BUILD_MTCTR(RS) BUILD_MTSPR(RS,9)
35 #define BUILD_BCTR(LINK) BUILD_BCCTRx(20,0,LINK)
38 static void EmitBranchToAt(void *At, void *To, bool isCall) {
39 intptr_t Addr = (intptr_t)To;
41 // FIXME: should special case the short branch case.
42 unsigned *AtI = (unsigned*)At;
44 AtI[0] = BUILD_LIS(12, Addr >> 16); // lis r12, hi16(address)
45 AtI[1] = BUILD_ORI(12, 12, Addr); // ori r12, r12, low16(address)
46 AtI[2] = BUILD_MTCTR(12); // mtctr r12
47 AtI[3] = BUILD_BCTR(isCall); // bctr/bctrl
50 extern "C" void PPC32CompilationCallback();
52 #if defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)
53 // CompilationCallback stub - We can't use a C function with inline assembly in
54 // it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
55 // write our own wrapper, which does things our way, so we have complete control
56 // over register saving and restoring.
60 ".globl _PPC32CompilationCallback\n"
61 "_PPC32CompilationCallback:\n"
62 // Make space for 29 ints r[3-31] and 14 doubles f[0-13]
65 "stw r11, 280(r1)\n" // Set up a proper stack frame
66 "stmw r3, 156(r1)\n" // Save all of the integer registers
67 // Save all call-clobbered FP regs.
68 "stfd f1, 44(r1)\n" "stfd f2, 52(r1)\n" "stfd f3, 60(r1)\n"
69 "stfd f4, 68(r1)\n" "stfd f5, 76(r1)\n" "stfd f6, 84(r1)\n"
70 "stfd f7, 92(r1)\n" "stfd f8, 100(r1)\n" "stfd f9, 108(r1)\n"
71 "stfd f10, 116(r1)\n" "stfd f11, 124(r1)\n" "stfd f12, 132(r1)\n"
74 // Now that everything is saved, go to the C compilation callback function,
75 // passing the address of the intregs and fpregs.
76 "addi r3, r1, 156\n" // &IntRegs[0]
77 "addi r4, r1, 44\n" // &FPRegs[0]
78 "bl _PPC32CompilationCallbackC\n"
82 extern "C" void PPC32CompilationCallbackC(unsigned *IntRegs, double *FPRegs) {
83 unsigned *CameFromStub = (unsigned*)__builtin_return_address(0+1);
84 unsigned *CameFromOrig = (unsigned*)__builtin_return_address(1+1);
85 unsigned *CCStackPtr = (unsigned*)__builtin_frame_address(0);
86 //unsigned *StubStackPtr = (unsigned*)__builtin_frame_address(1);
87 unsigned *OrigStackPtr = (unsigned*)__builtin_frame_address(2+1);
89 // Adjust pointer to the branch, not the return address.
92 void *Target = JITCompilerFunction(CameFromStub);
94 // Check to see if CameFromOrig[-1] is a 'bl' instruction, and if we can
95 // rewrite it to branch directly to the destination. If so, rewrite it so it
96 // does not need to go through the stub anymore.
97 unsigned CameFromOrigInst = CameFromOrig[-1];
98 if ((CameFromOrigInst >> 26) == 18) { // Direct call.
99 intptr_t Offset = ((intptr_t)Target-(intptr_t)CameFromOrig+4) >> 2;
100 if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
101 // Clear the original target out.
102 CameFromOrigInst &= (63 << 26) | 3;
103 // Fill in the new target.
104 CameFromOrigInst |= (Offset & ((1 << 24)-1)) << 2;
106 CameFromOrig[-1] = CameFromOrigInst;
110 // Locate the start of the stub. If this is a short call, adjust backwards
111 // the short amount, otherwise the full amount.
112 bool isShortStub = (*CameFromStub >> 26) == 18;
113 CameFromStub -= isShortStub ? 2 : 6;
115 // Rewrite the stub with an unconditional branch to the target, for any users
116 // who took the address of the stub.
117 EmitBranchToAt(CameFromStub, Target, false);
119 // Change the SP so that we pop two stack frames off when we return.
120 *CCStackPtr = (intptr_t)OrigStackPtr;
122 // Put the address of the stub and the LR value that originally came into the
123 // stub in a place that is easy to get on the stack after we restore all regs.
124 CCStackPtr[2] = (intptr_t)Target;
125 CCStackPtr[1] = (intptr_t)CameFromOrig;
127 // Note, this is not a standard epilog!
128 #if defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)
129 register unsigned *IRR asm ("r2") = IntRegs;
130 register double *FRR asm ("r3") = FPRegs;
131 __asm__ __volatile__ (
132 "lfd f1, 0(%0)\n" "lfd f2, 8(%0)\n" "lfd f3, 16(%0)\n"
133 "lfd f4, 24(%0)\n" "lfd f5, 32(%0)\n" "lfd f6, 40(%0)\n"
134 "lfd f7, 48(%0)\n" "lfd f8, 56(%0)\n" "lfd f9, 64(%0)\n"
135 "lfd f10, 72(%0)\n" "lfd f11, 80(%0)\n" "lfd f12, 88(%0)\n"
137 "lmw r3, 0(%1)\n" // Load all integer regs
138 "lwz r0,4(r1)\n" // Get CameFromOrig (LR into stub)
139 "mtlr r0\n" // Put it in the LR register
140 "lwz r0,8(r1)\n" // Get target function pointer
141 "mtctr r0\n" // Put it into the CTR register
142 "lwz r1,0(r1)\n" // Pop two frames off
143 "bctr\n" :: // Return to stub!
144 "b" (FRR), "b" (IRR));
150 TargetJITInfo::LazyResolverFn
151 PPC32JITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
152 JITCompilerFunction = Fn;
153 return PPC32CompilationCallback;
156 void *PPC32JITInfo::emitFunctionStub(void *Fn, MachineCodeEmitter &MCE) {
157 // If this is just a call to an external function, emit a branch instead of a
158 // call. The code is the same except for one bit of the last instruction.
159 if (Fn != PPC32CompilationCallback) {
160 MCE.startFunctionStub(4*4);
161 void *Addr = (void*)(intptr_t)MCE.getCurrentPCValue();
166 EmitBranchToAt(Addr, Fn, false);
167 return MCE.finishFunctionStub(0);
170 MCE.startFunctionStub(4*7);
171 MCE.emitWord(0x9421ffe0); // stwu r1,-32(r1)
172 MCE.emitWord(0x7d6802a6); // mflr r11
173 MCE.emitWord(0x91610028); // stw r11, 40(r1)
174 void *Addr = (void*)(intptr_t)MCE.getCurrentPCValue();
179 EmitBranchToAt(Addr, Fn, true/*is call*/);
180 return MCE.finishFunctionStub(0);
184 void PPC32JITInfo::relocate(void *Function, MachineRelocation *MR,
185 unsigned NumRelocs) {
186 for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
187 unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
188 intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
189 switch ((PPC::RelocationType)MR->getRelocationType()) {
190 default: assert(0 && "Unknown relocation type!");
191 case PPC::reloc_pcrel_bx:
192 // PC-relative relocation for b and bl instructions.
193 ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
194 assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) &&
195 "Relocation out of range!");
196 *RelocPos |= (ResultPtr & ((1 << 24)-1)) << 2;
198 case PPC::reloc_absolute_loadhi: // Relocate high bits into addis
199 case PPC::reloc_absolute_la: // Relocate low bits into addi
200 ResultPtr += MR->getConstantVal();
202 if (MR->getRelocationType() == PPC::reloc_absolute_loadhi) {
203 // If the low part will have a carry (really a borrow) from the low
204 // 16-bits into the high 16, add a bit to borrow from.
205 if (((int)ResultPtr << 16) < 0)
206 ResultPtr += 1 << 16;
210 // Do the addition then mask, so the addition does not overflow the 16-bit
211 // immediate section of the instruction.
212 unsigned LowBits = (*RelocPos + ResultPtr) & 65535;
213 unsigned HighBits = *RelocPos & ~65535;
214 *RelocPos = LowBits | HighBits; // Slam into low 16-bits
220 void PPC32JITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
221 EmitBranchToAt(Old, New, false);