//
// The LLVM Compiler Infrastructure
//
-// This file was developed by the LLVM research group and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
#define DEBUG_TYPE "jit"
#include "PPCJITInfo.h"
#include "PPCRelocations.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
-#include "llvm/Config/alloca.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/System/Memory.h"
#include "llvm/Support/Debug.h"
-#include <set>
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
static TargetJITInfo::JITCompilerFn JITCompilerFunction;
extern "C" void PPC64CompilationCallback();
#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
- !defined(__ppc64__)
+ !(defined(__ppc64__) || defined(__FreeBSD__))
// CompilationCallback stub - We can't use a C function with inline assembly in
// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
// write our own wrapper, which does things our way, so we have complete control
// FIXME: need to save v[0-19] for altivec?
// FIXME: could shrink frame
// Set up a proper stack frame
- "stwu r1, -208(r1)\n"
+ // FIXME Layout
+ // PowerPC64 ABI linkage - 24 bytes
+ // parameters - 32 bytes
+ // 13 double registers - 104 bytes
+ // 8 int registers - 32 bytes
"mflr r0\n"
- "stw r0, 216(r1)\n"
+ "stw r0, 8(r1)\n"
+ "stwu r1, -208(r1)\n"
// Save all int arg registers
"stw r10, 204(r1)\n" "stw r9, 200(r1)\n"
"stw r8, 196(r1)\n" "stw r7, 192(r1)\n"
"mtlr r2\n"
"bctr\n"
);
+
+#elif defined(__PPC__) && !defined(__ppc64__)
+// Linux & FreeBSD / PPC 32 support
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl PPC32CompilationCallback\n"
+"PPC32CompilationCallback:\n"
+ // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // FIXME: could shrink frame
+ // Set up a proper stack frame
+ // FIXME Layout
+ // 8 double registers - 64 bytes
+ // 8 int registers - 32 bytes
+ "mflr 0\n"
+ "stw 0, 4(1)\n"
+ "stwu 1, -104(1)\n"
+ // Save all int arg registers
+ "stw 10, 100(1)\n" "stw 9, 96(1)\n"
+ "stw 8, 92(1)\n" "stw 7, 88(1)\n"
+ "stw 6, 84(1)\n" "stw 5, 80(1)\n"
+ "stw 4, 76(1)\n" "stw 3, 72(1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd 8, 64(1)\n"
+ "stfd 7, 56(1)\n" "stfd 6, 48(1)\n"
+ "stfd 5, 40(1)\n" "stfd 4, 32(1)\n"
+ "stfd 3, 24(1)\n" "stfd 2, 16(1)\n"
+ "stfd 1, 8(1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 0.
+ "mr 3, 0\n"
+ "lwz 5, 104(1)\n" // stub's frame
+ "lwz 4, 4(5)\n" // stub's lr
+ "li 5, 0\n" // 0 == 32 bit
+ "bl PPCCompilationCallbackC\n"
+ "mtctr 3\n"
+ // Restore all int arg registers
+ "lwz 10, 100(1)\n" "lwz 9, 96(1)\n"
+ "lwz 8, 92(1)\n" "lwz 7, 88(1)\n"
+ "lwz 6, 84(1)\n" "lwz 5, 80(1)\n"
+ "lwz 4, 76(1)\n" "lwz 3, 72(1)\n"
+ // Restore all FP arg registers
+ "lfd 8, 64(1)\n"
+ "lfd 7, 56(1)\n" "lfd 6, 48(1)\n"
+ "lfd 5, 40(1)\n" "lfd 4, 32(1)\n"
+ "lfd 3, 24(1)\n" "lfd 2, 16(1)\n"
+ "lfd 1, 8(1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "lwz 1, 104(1)\n"
+ "lwz 0, 4(1)\n"
+ "mtlr 0\n"
+ "bctr\n"
+ );
#else
void PPC32CompilationCallback() {
- assert(0 && "This is not a power pc, you can't execute this!");
- abort();
+ llvm_unreachable("This is not a power pc, you can't execute this!");
}
#endif
// Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
// FIXME: need to save v[0-19] for altivec?
// Set up a proper stack frame
- "stdu r1, -208(r1)\n"
+ // Layout
+ // PowerPC64 ABI linkage - 48 bytes
+ // parameters - 64 bytes
+ // 13 double registers - 104 bytes
+ // 8 int registers - 64 bytes
"mflr r0\n"
- "std r0, 224(r1)\n"
+ "std r0, 16(r1)\n"
+ "stdu r1, -280(r1)\n"
// Save all int arg registers
- "std r10, 200(r1)\n" "std r9, 192(r1)\n"
- "std r8, 184(r1)\n" "std r7, 176(r1)\n"
- "std r6, 168(r1)\n" "std r5, 160(r1)\n"
- "std r4, 152(r1)\n" "std r3, 144(r1)\n"
+ "std r10, 272(r1)\n" "std r9, 264(r1)\n"
+ "std r8, 256(r1)\n" "std r7, 248(r1)\n"
+ "std r6, 240(r1)\n" "std r5, 232(r1)\n"
+ "std r4, 224(r1)\n" "std r3, 216(r1)\n"
// Save all call-clobbered FP regs.
- "stfd f13, 136(r1)\n" "stfd f12, 128(r1)\n"
- "stfd f11, 120(r1)\n" "stfd f10, 112(r1)\n"
- "stfd f9, 104(r1)\n" "stfd f8, 96(r1)\n"
- "stfd f7, 88(r1)\n" "stfd f6, 80(r1)\n"
- "stfd f5, 72(r1)\n" "stfd f4, 64(r1)\n"
- "stfd f3, 56(r1)\n" "stfd f2, 48(r1)\n"
- "stfd f1, 40(r1)\n"
+ "stfd f13, 208(r1)\n" "stfd f12, 200(r1)\n"
+ "stfd f11, 192(r1)\n" "stfd f10, 184(r1)\n"
+ "stfd f9, 176(r1)\n" "stfd f8, 168(r1)\n"
+ "stfd f7, 160(r1)\n" "stfd f6, 152(r1)\n"
+ "stfd f5, 144(r1)\n" "stfd f4, 136(r1)\n"
+ "stfd f3, 128(r1)\n" "stfd f2, 120(r1)\n"
+ "stfd f1, 112(r1)\n"
// Arguments to Compilation Callback:
// r3 - our lr (address of the call instruction in stub plus 4)
// r4 - stub's lr (address of instruction that called the stub plus 4)
// r5 - is64Bit - always 1.
"mr r3, r0\n"
- "ld r2, 208(r1)\n" // stub's frame
+ "ld r2, 280(r1)\n" // stub's frame
"ld r4, 16(r2)\n" // stub's lr
"li r5, 1\n" // 1 == 64 bit
"bl _PPCCompilationCallbackC\n"
"mtctr r3\n"
// Restore all int arg registers
- "ld r10, 200(r1)\n" "ld r9, 192(r1)\n"
- "ld r8, 184(r1)\n" "ld r7, 176(r1)\n"
- "ld r6, 168(r1)\n" "ld r5, 160(r1)\n"
- "ld r4, 152(r1)\n" "ld r3, 144(r1)\n"
+ "ld r10, 272(r1)\n" "ld r9, 264(r1)\n"
+ "ld r8, 256(r1)\n" "ld r7, 248(r1)\n"
+ "ld r6, 240(r1)\n" "ld r5, 232(r1)\n"
+ "ld r4, 224(r1)\n" "ld r3, 216(r1)\n"
// Restore all FP arg registers
- "lfd f13, 136(r1)\n" "lfd f12, 128(r1)\n"
- "lfd f11, 120(r1)\n" "lfd f10, 112(r1)\n"
- "lfd f9, 104(r1)\n" "lfd f8, 96(r1)\n"
- "lfd f7, 88(r1)\n" "lfd f6, 80(r1)\n"
- "lfd f5, 72(r1)\n" "lfd f4, 64(r1)\n"
- "lfd f3, 56(r1)\n" "lfd f2, 48(r1)\n"
- "lfd f1, 40(r1)\n"
+ "lfd f13, 208(r1)\n" "lfd f12, 200(r1)\n"
+ "lfd f11, 192(r1)\n" "lfd f10, 184(r1)\n"
+ "lfd f9, 176(r1)\n" "lfd f8, 168(r1)\n"
+ "lfd f7, 160(r1)\n" "lfd f6, 152(r1)\n"
+ "lfd f5, 144(r1)\n" "lfd f4, 136(r1)\n"
+ "lfd f3, 128(r1)\n" "lfd f2, 120(r1)\n"
+ "lfd f1, 112(r1)\n"
// Pop 3 frames off the stack and branch to target
- "ld r1, 208(r1)\n"
+ "ld r1, 280(r1)\n"
"ld r2, 16(r1)\n"
"mtlr r2\n"
"bctr\n"
);
#else
void PPC64CompilationCallback() {
- assert(0 && "This is not a power pc, you can't execute this!");
- abort();
+ llvm_unreachable("This is not a power pc, you can't execute this!");
}
#endif
return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback;
}
-void *PPCJITInfo::emitFunctionStub(void *Fn, MachineCodeEmitter &MCE) {
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+defined(__APPLE__)
+extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
+#endif
+
+void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
// If this is just a call to an external function, emit a branch instead of a
// call. The code is the same except for one bit of the last instruction.
if (Fn != (void*)(intptr_t)PPC32CompilationCallback &&
Fn != (void*)(intptr_t)PPC64CompilationCallback) {
- MCE.startFunctionStub(7*4);
- intptr_t Addr = (intptr_t)MCE.getCurrentPCValue();
- MCE.emitWordBE(0);
- MCE.emitWordBE(0);
- MCE.emitWordBE(0);
- MCE.emitWordBE(0);
- MCE.emitWordBE(0);
- MCE.emitWordBE(0);
- MCE.emitWordBE(0);
+ JCE.startGVStub(F, 7*4);
+ intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
EmitBranchToAt(Addr, (intptr_t)Fn, false, is64Bit);
- return MCE.finishFunctionStub(0);
+ sys::Memory::InvalidateInstructionCache((void*)Addr, 7*4);
+ return JCE.finishGVStub(F);
}
- MCE.startFunctionStub(10*4);
+ JCE.startGVStub(F, 10*4);
+ intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
if (is64Bit) {
- MCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1)
- MCE.emitWordBE(0x7d6802a6); // mflr r11
- MCE.emitWordBE(0xf9610060); // std r11, 96(r1)
+ JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0xf9610060); // std r11, 96(r1)
+ } else if (TM.getSubtargetImpl()->isDarwinABI()){
+ JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0x91610028); // stw r11, 40(r1)
} else {
- MCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
- MCE.emitWordBE(0x7d6802a6); // mflr r11
- MCE.emitWordBE(0x91610028); // stw r11, 40(r1)
+ JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0x91610024); // stw r11, 36(r1)
}
- intptr_t Addr = (intptr_t)MCE.getCurrentPCValue();
- MCE.emitWordBE(0);
- MCE.emitWordBE(0);
- MCE.emitWordBE(0);
- MCE.emitWordBE(0);
- MCE.emitWordBE(0);
- MCE.emitWordBE(0);
- MCE.emitWordBE(0);
- EmitBranchToAt(Addr, (intptr_t)Fn, true, is64Bit);
- return MCE.finishFunctionStub(0);
+ intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue();
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit);
+ sys::Memory::InvalidateInstructionCache((void*)Addr, 10*4);
+ return JCE.finishGVStub(F);
}
unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
switch ((PPC::RelocationType)MR->getRelocationType()) {
- default: assert(0 && "Unknown relocation type!");
+ default: llvm_unreachable("Unknown relocation type!");
case PPC::reloc_pcrel_bx:
// PC-relative relocation for b and bl instructions.
ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;