From aa5b9c0f6f3a99f955fe0ded13d61d7eb4e1a0b5 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 7 Aug 2014 22:02:54 +0000 Subject: [PATCH] Temporarily Revert "Nuke the old JIT." as it's not quite ready to be deleted. This will be reapplied as soon as possible and before the 3.6 branch date at any rate. Approved by Jim Grosbach, Lang Hames, Rafael Espindola. This reverts commits r215111, 215115, 215116, 215117, 215136. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215154 91177308-0d34-0410-b5e6-96231b3b80d8 --- Makefile.rules | 7 +- bindings/ocaml/executionengine/Makefile | 2 +- .../executionengine/executionengine_ocaml.c | 2 +- docs/TableGen/BackEnds.rst | 3 +- examples/BrainF/BrainFDriver.cpp | 2 +- examples/BrainF/CMakeLists.txt | 1 + examples/ExceptionDemo/ExceptionDemo.cpp | 2 + examples/Fibonacci/CMakeLists.txt | 1 + examples/Fibonacci/fibonacci.cpp | 1 + examples/HowToUseJIT/CMakeLists.txt | 1 + examples/HowToUseJIT/HowToUseJIT.cpp | 2 + examples/Kaleidoscope/Chapter4/CMakeLists.txt | 1 + examples/Kaleidoscope/Chapter4/toy.cpp | 1 + examples/Kaleidoscope/Chapter5/CMakeLists.txt | 1 + examples/Kaleidoscope/Chapter5/toy.cpp | 1 + examples/Kaleidoscope/Chapter6/CMakeLists.txt | 1 + examples/Kaleidoscope/Chapter6/toy.cpp | 1 + examples/Kaleidoscope/Chapter7/CMakeLists.txt | 1 + examples/Kaleidoscope/Chapter7/toy.cpp | 1 + .../Kaleidoscope/MCJIT/cached/toy-jit.cpp | 1 + examples/Kaleidoscope/MCJIT/cached/toy.cpp | 1 + examples/Kaleidoscope/MCJIT/complete/toy.cpp | 129 +- examples/Kaleidoscope/MCJIT/initial/toy.cpp | 1 + examples/Kaleidoscope/MCJIT/lazy/toy-jit.cpp | 1 + examples/Kaleidoscope/MCJIT/lazy/toy.cpp | 1 + examples/ParallelJIT/CMakeLists.txt | 1 + examples/ParallelJIT/ParallelJIT.cpp | 1 + include/llvm-c/ExecutionEngine.h | 1 + include/llvm/CodeGen/JITCodeEmitter.h | 344 +++ .../llvm/ExecutionEngine/ExecutionEngine.h | 46 + include/llvm/ExecutionEngine/JIT.h | 38 + include/llvm/Target/TargetJITInfo.h | 136 ++ include/llvm/Target/TargetLowering.h | 14 + include/llvm/Target/TargetMachine.h | 35 + include/llvm/Target/TargetSubtargetInfo.h | 6 + lib/CodeGen/BasicTargetTransformInfo.cpp | 5 +- lib/CodeGen/CMakeLists.txt | 1 + lib/CodeGen/JITCodeEmitter.cpp | 14 + lib/CodeGen/LLVMTargetMachine.cpp | 20 + .../SelectionDAG/SelectionDAGBuilder.cpp | 5 +- lib/CodeGen/TargetLoweringBase.cpp | 1 + lib/ExecutionEngine/CMakeLists.txt | 1 + lib/ExecutionEngine/ExecutionEngine.cpp | 27 +- .../ExecutionEngineBindings.cpp | 4 +- lib/ExecutionEngine/Interpreter/Interpreter.h | 12 + lib/ExecutionEngine/JIT/CMakeLists.txt | 8 + lib/ExecutionEngine/JIT/JIT.cpp | 696 ++++++ lib/ExecutionEngine/JIT/JIT.h | 214 ++ lib/ExecutionEngine/JIT/JITEmitter.cpp | 1249 +++++++++++ .../{MCJIT => JIT}/JITMemoryManager.cpp | 0 lib/ExecutionEngine/JIT/LLVMBuild.txt | 22 + lib/ExecutionEngine/JIT/Makefile | 38 + lib/ExecutionEngine/LLVMBuild.txt | 2 +- lib/ExecutionEngine/MCJIT/CMakeLists.txt | 1 - lib/ExecutionEngine/MCJIT/MCJIT.cpp | 18 +- lib/ExecutionEngine/MCJIT/MCJIT.h | 8 +- lib/ExecutionEngine/Makefile | 2 +- lib/ExecutionEngine/TargetSelect.cpp | 5 +- lib/Target/AArch64/CMakeLists.txt | 2 +- lib/Target/ARM/ARM.h | 5 + lib/Target/ARM/ARMCodeEmitter.cpp | 1910 +++++++++++++++++ lib/Target/ARM/ARMISelLowering.cpp | 1 - lib/Target/ARM/ARMJITInfo.cpp | 344 +++ lib/Target/ARM/ARMJITInfo.h | 177 ++ lib/Target/ARM/ARMSubtarget.cpp | 3 +- lib/Target/ARM/ARMSubtarget.h | 4 + lib/Target/ARM/ARMTargetMachine.cpp | 7 + lib/Target/ARM/ARMTargetMachine.h | 2 + lib/Target/ARM/CMakeLists.txt | 5 +- lib/Target/ARM/Makefile | 2 +- lib/Target/CMakeLists.txt | 1 + lib/Target/Mips/CMakeLists.txt | 5 +- lib/Target/Mips/Makefile | 2 +- lib/Target/Mips/Mips.h | 2 + lib/Target/Mips/Mips16ISelLowering.cpp | 2 - lib/Target/Mips/MipsCodeEmitter.cpp | 483 +++++ lib/Target/Mips/MipsConstantIslandPass.cpp | 1 - lib/Target/Mips/MipsISelLowering.cpp | 1 - lib/Target/Mips/MipsJITInfo.cpp | 286 +++ lib/Target/Mips/MipsJITInfo.h | 71 + lib/Target/Mips/MipsLongBranch.cpp | 1 - lib/Target/Mips/MipsSEISelLowering.cpp | 1 - lib/Target/Mips/MipsSubtarget.cpp | 2 +- lib/Target/Mips/MipsSubtarget.h | 3 + lib/Target/Mips/MipsTargetMachine.cpp | 7 + lib/Target/Mips/MipsTargetMachine.h | 4 + lib/Target/NVPTX/NVPTXTargetMachine.h | 6 + lib/Target/PowerPC/CMakeLists.txt | 5 +- lib/Target/PowerPC/Makefile | 2 +- lib/Target/PowerPC/PPC.h | 3 + lib/Target/PowerPC/PPCCTRLoops.cpp | 3 +- lib/Target/PowerPC/PPCCodeEmitter.cpp | 295 +++ lib/Target/PowerPC/PPCISelLowering.cpp | 51 +- lib/Target/PowerPC/PPCJITInfo.cpp | 482 +++++ lib/Target/PowerPC/PPCJITInfo.h | 46 + lib/Target/PowerPC/PPCSubtarget.cpp | 15 +- lib/Target/PowerPC/PPCSubtarget.h | 11 + lib/Target/PowerPC/PPCTargetMachine.cpp | 12 + lib/Target/PowerPC/PPCTargetMachine.h | 2 + lib/Target/R600/CMakeLists.txt | 2 +- lib/Target/Sparc/CMakeLists.txt | 5 +- lib/Target/Sparc/Makefile | 2 +- lib/Target/Sparc/Sparc.h | 2 + lib/Target/Sparc/SparcCodeEmitter.cpp | 281 +++ lib/Target/Sparc/SparcJITInfo.cpp | 326 +++ lib/Target/Sparc/SparcJITInfo.h | 67 + lib/Target/Sparc/SparcSubtarget.h | 3 + lib/Target/Sparc/SparcTargetMachine.cpp | 7 + lib/Target/Sparc/SparcTargetMachine.h | 5 + lib/Target/SystemZ/CMakeLists.txt | 2 +- lib/Target/SystemZ/Makefile | 1 + lib/Target/TargetJITInfo.cpp | 14 + lib/Target/X86/CMakeLists.txt | 2 + lib/Target/X86/X86.h | 6 + lib/Target/X86/X86CodeEmitter.cpp | 1502 +++++++++++++ lib/Target/X86/X86ISelDAGToDAG.cpp | 1 - lib/Target/X86/X86InstrInfo.cpp | 1 - lib/Target/X86/X86JITInfo.cpp | 588 +++++ lib/Target/X86/X86JITInfo.h | 79 + lib/Target/X86/X86Subtarget.cpp | 3 +- lib/Target/X86/X86Subtarget.h | 3 + lib/Target/X86/X86TargetMachine.cpp | 7 + lib/Target/X86/X86TargetMachine.h | 6 + test/ExecutionEngine/2002-12-16-ArgTest.ll | 1 + .../ExecutionEngine/2003-01-04-ArgumentBug.ll | 1 + test/ExecutionEngine/2003-01-04-LoopTest.ll | 1 + .../2003-01-15-AlignmentTest.ll | 1 + .../2003-05-06-LivenessClobber.ll | 1 + .../2003-05-07-ArgumentTest.ll | 1 + .../2003-08-15-AllocaAssertion.ll | 1 + .../2003-08-21-EnvironmentTest.ll | 1 + .../2003-08-23-RegisterAllocatePhysReg.ll | 1 + ...8-PHINode-ConstantExpr-CondCode-Failure.ll | 1 + .../ExecutionEngine/2005-12-02-TailCallBug.ll | 1 + .../MCJIT/2002-12-16-ArgTest.ll | 2 +- .../MCJIT/2003-01-04-ArgumentBug.ll | 2 +- .../MCJIT/2003-01-04-LoopTest.ll | 2 +- .../MCJIT/2003-01-04-PhiTest.ll | 2 +- .../MCJIT/2003-01-09-SARTest.ll | 2 +- .../ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll | 2 +- .../MCJIT/2003-01-15-AlignmentTest.ll | 2 +- .../MCJIT/2003-05-06-LivenessClobber.ll | 2 +- .../MCJIT/2003-05-07-ArgumentTest.ll | 2 +- .../MCJIT/2003-05-11-PHIRegAllocBug.ll | 2 +- .../MCJIT/2003-06-04-bzip2-bug.ll | 2 +- .../MCJIT/2003-06-05-PHIBug.ll | 2 +- .../MCJIT/2003-08-15-AllocaAssertion.ll | 2 +- .../MCJIT/2003-08-21-EnvironmentTest.ll | 2 +- .../2003-08-23-RegisterAllocatePhysReg.ll | 2 +- ...8-PHINode-ConstantExpr-CondCode-Failure.ll | 2 +- .../MCJIT/2005-12-02-TailCallBug.ll | 2 +- .../MCJIT/2007-12-10-APIntLoadStore.ll | 2 +- .../MCJIT/2008-06-05-APInt-OverAShr.ll | 2 +- .../MCJIT/2010-01-15-UndefValue.ll | 2 +- .../MCJIT/2013-04-04-RelocAddend.ll | 2 +- test/ExecutionEngine/MCJIT/cross-module-a.ll | 2 +- .../MCJIT/cross-module-sm-pic-a.ll | 2 +- test/ExecutionEngine/MCJIT/eh-lg-pic.ll | 2 +- test/ExecutionEngine/MCJIT/eh-sm-pic.ll | 2 +- test/ExecutionEngine/MCJIT/eh.ll | 2 +- test/ExecutionEngine/MCJIT/fpbitcast.ll | 2 +- test/ExecutionEngine/MCJIT/hello-sm-pic.ll | 2 +- test/ExecutionEngine/MCJIT/hello.ll | 2 +- test/ExecutionEngine/MCJIT/hello2.ll | 2 +- test/ExecutionEngine/MCJIT/load-object-a.ll | 6 +- test/ExecutionEngine/MCJIT/multi-module-a.ll | 2 +- .../MCJIT/multi-module-eh-a.ll | 2 +- .../MCJIT/multi-module-sm-pic-a.ll | 2 +- .../MCJIT/non-extern-addend-smallcodemodel.ll | 2 +- .../MCJIT/non-extern-addend.ll | 2 +- test/ExecutionEngine/MCJIT/pr13727.ll | 2 +- .../MCJIT/remote/cross-module-a.ll | 2 +- .../MCJIT/remote/cross-module-sm-pic-a.ll | 2 +- .../MCJIT/remote/multi-module-a.ll | 2 +- .../MCJIT/remote/multi-module-sm-pic-a.ll | 2 +- .../MCJIT/remote/simpletest-remote.ll | 2 +- .../MCJIT/remote/stubs-remote.ll | 2 +- .../MCJIT/remote/stubs-sm-pic.ll | 2 +- .../remote/test-common-symbols-remote.ll | 2 +- .../MCJIT/remote/test-data-align-remote.ll | 2 +- .../test-fp-no-external-funcs-remote.ll | 2 +- .../remote/test-global-init-nonzero-remote.ll | 2 +- .../remote/test-global-init-nonzero-sm-pic.ll | 2 +- .../MCJIT/remote/test-ptr-reloc-remote.ll | 2 +- .../MCJIT/remote/test-ptr-reloc-sm-pic.ll | 2 +- test/ExecutionEngine/MCJIT/simplesttest.ll | 2 +- test/ExecutionEngine/MCJIT/simpletest.ll | 2 +- test/ExecutionEngine/MCJIT/stubs-sm-pic.ll | 2 +- test/ExecutionEngine/MCJIT/stubs.ll | 2 +- test/ExecutionEngine/MCJIT/test-arith.ll | 2 +- test/ExecutionEngine/MCJIT/test-branch.ll | 2 +- .../MCJIT/test-call-no-external-funcs.ll | 2 +- test/ExecutionEngine/MCJIT/test-call.ll | 2 +- test/ExecutionEngine/MCJIT/test-cast.ll | 2 +- .../MCJIT/test-common-symbols-alignment.ll | 2 +- .../MCJIT/test-common-symbols.ll | 2 +- .../MCJIT/test-constantexpr.ll | 2 +- test/ExecutionEngine/MCJIT/test-data-align.ll | 2 +- .../MCJIT/test-fp-no-external-funcs.ll | 2 +- test/ExecutionEngine/MCJIT/test-fp.ll | 2 +- .../MCJIT/test-global-ctors.ll | 2 +- .../MCJIT/test-global-init-nonzero-sm-pic.ll | 2 +- .../MCJIT/test-global-init-nonzero.ll | 2 +- test/ExecutionEngine/MCJIT/test-global.ll | 2 +- test/ExecutionEngine/MCJIT/test-loadstore.ll | 2 +- test/ExecutionEngine/MCJIT/test-local.ll | 2 +- test/ExecutionEngine/MCJIT/test-logical.ll | 2 +- test/ExecutionEngine/MCJIT/test-loop.ll | 2 +- test/ExecutionEngine/MCJIT/test-phi.ll | 2 +- .../MCJIT/test-ptr-reloc-sm-pic.ll | 2 +- test/ExecutionEngine/MCJIT/test-ptr-reloc.ll | 2 +- test/ExecutionEngine/MCJIT/test-ret.ll | 2 +- test/ExecutionEngine/MCJIT/test-return.ll | 2 +- test/ExecutionEngine/MCJIT/test-setcond-fp.ll | 2 +- .../ExecutionEngine/MCJIT/test-setcond-int.ll | 2 +- test/ExecutionEngine/MCJIT/test-shift.ll | 2 +- test/ExecutionEngine/hello.ll | 1 + test/ExecutionEngine/hello2.ll | 1 + test/ExecutionEngine/mov64zext32.ll | 1 + test/ExecutionEngine/simpletest.ll | 1 + test/ExecutionEngine/stubs.ll | 1 + .../test-call-no-external-funcs.ll | 1 + test/ExecutionEngine/test-call.ll | 1 + test/ExecutionEngine/test-common-symbols.ll | 1 + .../test-fp-no-external-funcs.ll | 1 + test/ExecutionEngine/test-fp.ll | 1 + .../test-global-init-nonzero.ll | 1 + test/ExecutionEngine/test-global.ll | 1 + test/ExecutionEngine/test-loadstore.ll | 1 + test/ExecutionEngine/test-local.ll | 1 + test/lit.cfg | 23 +- tools/lli/CMakeLists.txt | 1 + tools/lli/LLVMBuild.txt | 2 +- tools/lli/Makefile | 2 +- tools/lli/lli.cpp | 42 +- tools/llvm-jitlistener/LLVMBuild.txt | 2 +- tools/llvm-jitlistener/llvm-jitlistener.cpp | 1 + tools/llvm-rtdyld/LLVMBuild.txt | 2 +- tools/llvm-rtdyld/Makefile | 2 +- unittests/ExecutionEngine/CMakeLists.txt | 3 +- unittests/ExecutionEngine/JIT/CMakeLists.txt | 65 + .../JIT/IntelJITEventListenerTest.cpp | 113 + .../JIT/JITEventListenerTest.cpp | 237 ++ .../JIT/JITEventListenerTestCommon.h | 207 ++ .../JIT/JITMemoryManagerTest.cpp | 302 +++ unittests/ExecutionEngine/JIT/JITTest.cpp | 728 +++++++ unittests/ExecutionEngine/JIT/JITTests.def | 4 + unittests/ExecutionEngine/JIT/Makefile | 52 + .../ExecutionEngine/JIT/MultiJITTest.cpp | 190 ++ .../JIT/OProfileJITEventListenerTest.cpp | 165 ++ .../ExecutionEngine/MCJIT/CMakeLists.txt | 1 + .../MCJIT/MCJITMemoryManagerTest.cpp | 1 + .../MCJIT/MCJITObjectCacheTest.cpp | 1 + .../ExecutionEngine/MCJIT/MCJITTestBase.h | 2 + unittests/ExecutionEngine/MCJIT/Makefile | 2 +- unittests/ExecutionEngine/Makefile | 2 +- utils/TableGen/CodeEmitterGen.cpp | 26 +- utils/llvm-build/llvmbuild/main.py | 2 +- 258 files changed, 12512 insertions(+), 184 deletions(-) create mode 100644 include/llvm/CodeGen/JITCodeEmitter.h create mode 100644 include/llvm/ExecutionEngine/JIT.h create mode 100644 include/llvm/Target/TargetJITInfo.h create mode 100644 lib/CodeGen/JITCodeEmitter.cpp create mode 100644 lib/ExecutionEngine/JIT/CMakeLists.txt create mode 100644 lib/ExecutionEngine/JIT/JIT.cpp create mode 100644 lib/ExecutionEngine/JIT/JIT.h create mode 100644 lib/ExecutionEngine/JIT/JITEmitter.cpp rename lib/ExecutionEngine/{MCJIT => JIT}/JITMemoryManager.cpp (100%) create mode 100644 lib/ExecutionEngine/JIT/LLVMBuild.txt create mode 100644 lib/ExecutionEngine/JIT/Makefile create mode 100644 lib/Target/ARM/ARMCodeEmitter.cpp create mode 100644 lib/Target/ARM/ARMJITInfo.cpp create mode 100644 lib/Target/ARM/ARMJITInfo.h create mode 100644 lib/Target/Mips/MipsCodeEmitter.cpp create mode 100644 lib/Target/Mips/MipsJITInfo.cpp create mode 100644 lib/Target/Mips/MipsJITInfo.h create mode 100644 lib/Target/PowerPC/PPCCodeEmitter.cpp create mode 100644 lib/Target/PowerPC/PPCJITInfo.cpp create mode 100644 lib/Target/PowerPC/PPCJITInfo.h create mode 100644 lib/Target/Sparc/SparcCodeEmitter.cpp create mode 100644 lib/Target/Sparc/SparcJITInfo.cpp create mode 100644 lib/Target/Sparc/SparcJITInfo.h create mode 100644 lib/Target/TargetJITInfo.cpp create mode 100644 lib/Target/X86/X86CodeEmitter.cpp create mode 100644 lib/Target/X86/X86JITInfo.cpp create mode 100644 lib/Target/X86/X86JITInfo.h create mode 100644 unittests/ExecutionEngine/JIT/CMakeLists.txt create mode 100644 unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp create mode 100644 unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp create mode 100644 unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h create mode 100644 unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp create mode 100644 unittests/ExecutionEngine/JIT/JITTest.cpp create mode 100644 unittests/ExecutionEngine/JIT/JITTests.def create mode 100644 unittests/ExecutionEngine/JIT/Makefile create mode 100644 unittests/ExecutionEngine/JIT/MultiJITTest.cpp create mode 100644 unittests/ExecutionEngine/JIT/OProfileJITEventListenerTest.cpp diff --git a/Makefile.rules b/Makefile.rules index 43fc030e8b1..ebebc0a85c4 100644 --- a/Makefile.rules +++ b/Makefile.rules @@ -1673,13 +1673,18 @@ $(ObjDir)/%GenAsmMatcher.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN) $(TARGET:%=$(ObjDir)/%GenMCCodeEmitter.inc.tmp): \ $(ObjDir)/%GenMCCodeEmitter.inc.tmp: %.td $(ObjDir)/.dir $(LLVM_TBLGEN) $(Echo) "Building $( bool */ diff --git a/docs/TableGen/BackEnds.rst b/docs/TableGen/BackEnds.rst index e8544b65216..42de41da74f 100644 --- a/docs/TableGen/BackEnds.rst +++ b/docs/TableGen/BackEnds.rst @@ -78,7 +78,8 @@ returns the (currently, 32-bit unsigned) value of the instruction. **Output**: C++ code, implementing the target's CodeEmitter class by overriding the virtual functions as ``CodeEmitter::function()``. -**Usage**: Used to include directly at the end of ``MCCodeEmitter.cpp``. +**Usage**: Used to include directly at the end of ``CodeEmitter.cpp``, and +with option `-mc-emitter` to be included in ``MCCodeEmitter.cpp``. RegisterInfo ------------ diff --git a/examples/BrainF/BrainFDriver.cpp b/examples/BrainF/BrainFDriver.cpp index c8c440b5f47..e2de6bc58d7 100644 --- a/examples/BrainF/BrainFDriver.cpp +++ b/examples/BrainF/BrainFDriver.cpp @@ -26,8 +26,8 @@ #include "BrainF.h" #include "llvm/Bitcode/ReaderWriter.h" -#include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" diff --git a/examples/BrainF/CMakeLists.txt b/examples/BrainF/CMakeLists.txt index cf1cf1b6159..65589d9f39f 100644 --- a/examples/BrainF/CMakeLists.txt +++ b/examples/BrainF/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS BitWriter Core ExecutionEngine + JIT MC Support nativecodegen diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp index 3583677b689..24e538cacf2 100644 --- a/examples/ExceptionDemo/ExceptionDemo.cpp +++ b/examples/ExceptionDemo/ExceptionDemo.cpp @@ -1964,8 +1964,10 @@ int main(int argc, char *argv[]) { // Build engine with JIT llvm::EngineBuilder factory(module); factory.setEngineKind(llvm::EngineKind::JIT); + factory.setAllocateGVsWithCode(false); factory.setTargetOptions(Opts); factory.setMCJITMemoryManager(MemMgr); + factory.setUseMCJIT(true); llvm::ExecutionEngine *executionEngine = factory.create(); { diff --git a/examples/Fibonacci/CMakeLists.txt b/examples/Fibonacci/CMakeLists.txt index 087ccdd7d84..c015e50ac35 100644 --- a/examples/Fibonacci/CMakeLists.txt +++ b/examples/Fibonacci/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine Interpreter + JIT MC Support nativecodegen diff --git a/examples/Fibonacci/fibonacci.cpp b/examples/Fibonacci/fibonacci.cpp index 40137c3a051..ba8e95342fa 100644 --- a/examples/Fibonacci/fibonacci.cpp +++ b/examples/Fibonacci/fibonacci.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/Verifier.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" diff --git a/examples/HowToUseJIT/CMakeLists.txt b/examples/HowToUseJIT/CMakeLists.txt index a344ad07ca6..237cbea861d 100644 --- a/examples/HowToUseJIT/CMakeLists.txt +++ b/examples/HowToUseJIT/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine Interpreter + JIT MC Support nativecodegen diff --git a/examples/HowToUseJIT/HowToUseJIT.cpp b/examples/HowToUseJIT/HowToUseJIT.cpp index 906b066ca45..7125a156104 100644 --- a/examples/HowToUseJIT/HowToUseJIT.cpp +++ b/examples/HowToUseJIT/HowToUseJIT.cpp @@ -36,6 +36,7 @@ #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" @@ -125,6 +126,7 @@ int main() { // Import result of execution: outs() << "Result: " << gv.IntVal << "\n"; + EE->freeMachineCodeForFunction(FooF); delete EE; llvm_shutdown(); return 0; diff --git a/examples/Kaleidoscope/Chapter4/CMakeLists.txt b/examples/Kaleidoscope/Chapter4/CMakeLists.txt index 2f828dc819e..2b87e868498 100644 --- a/examples/Kaleidoscope/Chapter4/CMakeLists.txt +++ b/examples/Kaleidoscope/Chapter4/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine InstCombine + JIT MC ScalarOpts Support diff --git a/examples/Kaleidoscope/Chapter4/toy.cpp b/examples/Kaleidoscope/Chapter4/toy.cpp index 8a9e7dfbebf..a8f59428c0d 100644 --- a/examples/Kaleidoscope/Chapter4/toy.cpp +++ b/examples/Kaleidoscope/Chapter4/toy.cpp @@ -1,5 +1,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" diff --git a/examples/Kaleidoscope/Chapter5/CMakeLists.txt b/examples/Kaleidoscope/Chapter5/CMakeLists.txt index 1912ddc0741..c3e7c43cb41 100644 --- a/examples/Kaleidoscope/Chapter5/CMakeLists.txt +++ b/examples/Kaleidoscope/Chapter5/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine InstCombine + JIT MC ScalarOpts Support diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp index 1abc8809086..a31b5b4792a 100644 --- a/examples/Kaleidoscope/Chapter5/toy.cpp +++ b/examples/Kaleidoscope/Chapter5/toy.cpp @@ -1,5 +1,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" diff --git a/examples/Kaleidoscope/Chapter6/CMakeLists.txt b/examples/Kaleidoscope/Chapter6/CMakeLists.txt index d36f03090e5..cd61cec89d5 100644 --- a/examples/Kaleidoscope/Chapter6/CMakeLists.txt +++ b/examples/Kaleidoscope/Chapter6/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine InstCombine + JIT MC ScalarOpts Support diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp index c21fe8a20ac..5a3bd2e3147 100644 --- a/examples/Kaleidoscope/Chapter6/toy.cpp +++ b/examples/Kaleidoscope/Chapter6/toy.cpp @@ -1,5 +1,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" diff --git a/examples/Kaleidoscope/Chapter7/CMakeLists.txt b/examples/Kaleidoscope/Chapter7/CMakeLists.txt index bdc0e5525db..cdb13c465d1 100644 --- a/examples/Kaleidoscope/Chapter7/CMakeLists.txt +++ b/examples/Kaleidoscope/Chapter7/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine InstCombine + JIT MC ScalarOpts Support diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp index e23637e2681..c2c337c9008 100644 --- a/examples/Kaleidoscope/Chapter7/toy.cpp +++ b/examples/Kaleidoscope/Chapter7/toy.cpp @@ -1,5 +1,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" diff --git a/examples/Kaleidoscope/MCJIT/cached/toy-jit.cpp b/examples/Kaleidoscope/MCJIT/cached/toy-jit.cpp index 00f5b83bde5..9466360af19 100644 --- a/examples/Kaleidoscope/MCJIT/cached/toy-jit.cpp +++ b/examples/Kaleidoscope/MCJIT/cached/toy-jit.cpp @@ -2,6 +2,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" diff --git a/examples/Kaleidoscope/MCJIT/cached/toy.cpp b/examples/Kaleidoscope/MCJIT/cached/toy.cpp index af51b4a8314..16c548c9806 100644 --- a/examples/Kaleidoscope/MCJIT/cached/toy.cpp +++ b/examples/Kaleidoscope/MCJIT/cached/toy.cpp @@ -897,6 +897,7 @@ ExecutionEngine *MCJITHelper::compileModule(Module *M) { std::string ErrStr; ExecutionEngine *NewEngine = EngineBuilder(M) .setErrorStr(&ErrStr) + .setUseMCJIT(true) .setMCJITMemoryManager(new HelpingMemoryManager(this)) .create(); if (!NewEngine) { diff --git a/examples/Kaleidoscope/MCJIT/complete/toy.cpp b/examples/Kaleidoscope/MCJIT/complete/toy.cpp index 3beb0d83789..10e7ada1e88 100644 --- a/examples/Kaleidoscope/MCJIT/complete/toy.cpp +++ b/examples/Kaleidoscope/MCJIT/complete/toy.cpp @@ -1,5 +1,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/ExecutionEngine/MCJIT.h" #include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" @@ -51,6 +52,10 @@ namespace { cl::desc("Dump IR from modules to stderr on shutdown"), cl::init(false)); + cl::opt UseMCJIT( + "use-mcjit", cl::desc("Use the MCJIT execution engine"), + cl::init(true)); + cl::opt EnableLazyCompilation( "enable-lazy-compilation", cl::desc("Enable lazy compilation when using the MCJIT engine"), cl::init(true)); @@ -787,6 +792,96 @@ public: virtual void dump(); }; +//===----------------------------------------------------------------------===// +// Helper class for JIT execution engine +//===----------------------------------------------------------------------===// + +class JITHelper : public BaseHelper { +public: + JITHelper(LLVMContext &Context) { + // Make the module, which holds all the code. + if (!InputIR.empty()) { + TheModule = parseInputIR(InputIR, Context); + } else { + TheModule = new Module("my cool jit", Context); + } + + // Create the JIT. This takes ownership of the module. + std::string ErrStr; + TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create(); + if (!TheExecutionEngine) { + fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); + exit(1); + } + + TheFPM = new FunctionPassManager(TheModule); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + TheFPM->add(new DataLayout(*TheExecutionEngine->getDataLayout())); + // Provide basic AliasAnalysis support for GVN. + TheFPM->add(createBasicAliasAnalysisPass()); + // Promote allocas to registers. + TheFPM->add(createPromoteMemoryToRegisterPass()); + // Do simple "peephole" optimizations and bit-twiddling optzns. + TheFPM->add(createInstructionCombiningPass()); + // Reassociate expressions. + TheFPM->add(createReassociatePass()); + // Eliminate Common SubExpressions. + TheFPM->add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + TheFPM->add(createCFGSimplificationPass()); + + TheFPM->doInitialization(); + } + + virtual ~JITHelper() { + if (TheFPM) + delete TheFPM; + if (TheExecutionEngine) + delete TheExecutionEngine; + } + + virtual Function *getFunction(const std::string FnName) { + assert(TheModule); + return TheModule->getFunction(FnName); + } + + virtual Module *getModuleForNewFunction() { + assert(TheModule); + return TheModule; + } + + virtual void *getPointerToFunction(Function* F) { + assert(TheExecutionEngine); + return TheExecutionEngine->getPointerToFunction(F); + } + + virtual void *getPointerToNamedFunction(const std::string &Name) { + return TheExecutionEngine->getPointerToNamedFunction(Name); + } + + virtual void runFPM(Function &F) { + assert(TheFPM); + TheFPM->run(F); + } + + virtual void closeCurrentModule() { + // This should never be called for JIT + assert(false); + } + + virtual void dump() { + assert(TheModule); + TheModule->dump(); + } + +private: + Module *TheModule; + ExecutionEngine *TheExecutionEngine; + FunctionPassManager *TheFPM; +}; + //===----------------------------------------------------------------------===// // MCJIT helper class //===----------------------------------------------------------------------===// @@ -939,6 +1034,7 @@ ExecutionEngine *MCJITHelper::compileModule(Module *M) { std::string ErrStr; ExecutionEngine *EE = EngineBuilder(M) .setErrorStr(&ErrStr) + .setUseMCJIT(true) .setMCJITMemoryManager(new HelpingMemoryManager(this)) .create(); if (!EE) { @@ -1098,8 +1194,10 @@ Value *UnaryExprAST::Codegen() { Value *OperandV = Operand->Codegen(); if (OperandV == 0) return 0; Function *F; - F = TheHelper->getFunction( - MakeLegalFunctionName(std::string("unary") + Opcode)); + if (UseMCJIT) + F = TheHelper->getFunction(MakeLegalFunctionName(std::string("unary")+Opcode)); + else + F = TheHelper->getFunction(std::string("unary")+Opcode); if (F == 0) return ErrorV("Unknown unary operator"); @@ -1148,7 +1246,10 @@ Value *BinaryExprAST::Codegen() { // If it wasn't a builtin binary operator, it must be a user defined one. Emit // a call to it. Function *F; - F = TheHelper->getFunction(MakeLegalFunctionName(std::string("binary")+Op)); + if (UseMCJIT) + F = TheHelper->getFunction(MakeLegalFunctionName(std::string("binary")+Op)); + else + F = TheHelper->getFunction(std::string("binary")+Op); assert(F && "binary operator not found!"); Value *Ops[] = { L, R }; @@ -1381,7 +1482,10 @@ Function *PrototypeAST::Codegen() { Doubles, false); std::string FnName; - FnName = MakeLegalFunctionName(Name); + if (UseMCJIT) + FnName = MakeLegalFunctionName(Name); + else + FnName = Name; Module* M = TheHelper->getModuleForNewFunction(); Function *F = Function::Create(FT, Function::ExternalLinkage, FnName, M); @@ -1456,6 +1560,10 @@ Function *FunctionAST::Codegen() { // Validate the generated code, checking for consistency. verifyFunction(*TheFunction); + // Optimize the function. + if (!UseMCJIT) + TheHelper->runFPM(*TheFunction); + return TheFunction; } @@ -1473,7 +1581,7 @@ Function *FunctionAST::Codegen() { static void HandleDefinition() { if (FunctionAST *F = ParseDefinition()) { - if (EnableLazyCompilation) + if (UseMCJIT && EnableLazyCompilation) TheHelper->closeCurrentModule(); Function *LF = F->Codegen(); if (LF && VerboseOutput) { @@ -1563,8 +1671,10 @@ double printlf() { int main(int argc, char **argv) { InitializeNativeTarget(); - InitializeNativeTargetAsmPrinter(); - InitializeNativeTargetAsmParser(); + if (UseMCJIT) { + InitializeNativeTargetAsmPrinter(); + InitializeNativeTargetAsmParser(); + } LLVMContext &Context = getGlobalContext(); cl::ParseCommandLineOptions(argc, argv, @@ -1580,7 +1690,10 @@ int main(int argc, char **argv) { BinopPrecedence['*'] = 40; // highest. // Make the Helper, which holds all the code. - TheHelper = new MCJITHelper(Context); + if (UseMCJIT) + TheHelper = new MCJITHelper(Context); + else + TheHelper = new JITHelper(Context); // Prime the first token. if (!SuppressPrompts) diff --git a/examples/Kaleidoscope/MCJIT/initial/toy.cpp b/examples/Kaleidoscope/MCJIT/initial/toy.cpp index 2c1b2973af5..4c4711338c4 100644 --- a/examples/Kaleidoscope/MCJIT/initial/toy.cpp +++ b/examples/Kaleidoscope/MCJIT/initial/toy.cpp @@ -778,6 +778,7 @@ void *MCJITHelper::getPointerToFunction(Function* F) { std::string ErrStr; ExecutionEngine *NewEngine = EngineBuilder(OpenModule) .setErrorStr(&ErrStr) + .setUseMCJIT(true) .setMCJITMemoryManager(new HelpingMemoryManager(this)) .create(); if (!NewEngine) { diff --git a/examples/Kaleidoscope/MCJIT/lazy/toy-jit.cpp b/examples/Kaleidoscope/MCJIT/lazy/toy-jit.cpp index 98c1001dc51..2d540dd040f 100644 --- a/examples/Kaleidoscope/MCJIT/lazy/toy-jit.cpp +++ b/examples/Kaleidoscope/MCJIT/lazy/toy-jit.cpp @@ -2,6 +2,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" diff --git a/examples/Kaleidoscope/MCJIT/lazy/toy.cpp b/examples/Kaleidoscope/MCJIT/lazy/toy.cpp index 9c2a0d48f39..ff88e23bd35 100644 --- a/examples/Kaleidoscope/MCJIT/lazy/toy.cpp +++ b/examples/Kaleidoscope/MCJIT/lazy/toy.cpp @@ -808,6 +808,7 @@ ExecutionEngine *MCJITHelper::compileModule(Module *M) { std::string ErrStr; ExecutionEngine *NewEngine = EngineBuilder(M) .setErrorStr(&ErrStr) + .setUseMCJIT(true) .setMCJITMemoryManager(new HelpingMemoryManager(this)) .create(); if (!NewEngine) { diff --git a/examples/ParallelJIT/CMakeLists.txt b/examples/ParallelJIT/CMakeLists.txt index 3c489e83027..8673917f558 100644 --- a/examples/ParallelJIT/CMakeLists.txt +++ b/examples/ParallelJIT/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine Interpreter + JIT Support nativecodegen ) diff --git a/examples/ParallelJIT/ParallelJIT.cpp b/examples/ParallelJIT/ParallelJIT.cpp index 653b8cc0342..2aa63d91ffb 100644 --- a/examples/ParallelJIT/ParallelJIT.cpp +++ b/examples/ParallelJIT/ParallelJIT.cpp @@ -19,6 +19,7 @@ #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h index f1f4cadec34..7cdf0d78d5b 100644 --- a/include/llvm-c/ExecutionEngine.h +++ b/include/llvm-c/ExecutionEngine.h @@ -34,6 +34,7 @@ extern "C" { * @{ */ +void LLVMLinkInJIT(void); void LLVMLinkInMCJIT(void); void LLVMLinkInInterpreter(void); diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h new file mode 100644 index 00000000000..dc2a0272db4 --- /dev/null +++ b/include/llvm/CodeGen/JITCodeEmitter.h @@ -0,0 +1,344 @@ +//===-- llvm/CodeGen/JITCodeEmitter.h - Code emission ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an abstract interface that is used by the machine code +// emission framework to output the code. This allows machine code emission to +// be separated from concerns such as resolution of call targets, and where the +// machine code will be written (memory or disk, f.e.). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_JITCODEEMITTER_H +#define LLVM_CODEGEN_JITCODEEMITTER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/MathExtras.h" +#include + +namespace llvm { + +class MachineBasicBlock; +class MachineConstantPool; +class MachineJumpTableInfo; +class MachineFunction; +class MachineModuleInfo; +class MachineRelocation; +class Value; +class GlobalValue; +class Function; + +/// JITCodeEmitter - This class defines two sorts of methods: those for +/// emitting the actual bytes of machine code, and those for emitting auxiliary +/// structures, such as jump tables, relocations, etc. +/// +/// Emission of machine code is complicated by the fact that we don't (in +/// general) know the size of the machine code that we're about to emit before +/// we emit it. As such, we preallocate a certain amount of memory, and set the +/// BufferBegin/BufferEnd pointers to the start and end of the buffer. As we +/// emit machine instructions, we advance the CurBufferPtr to indicate the +/// location of the next byte to emit. In the case of a buffer overflow (we +/// need to emit more machine code than we have allocated space for), the +/// CurBufferPtr will saturate to BufferEnd and ignore stores. Once the entire +/// function has been emitted, the overflow condition is checked, and if it has +/// occurred, more memory is allocated, and we reemit the code into it. +/// +class JITCodeEmitter : public MachineCodeEmitter { + void anchor() override; +public: + virtual ~JITCodeEmitter() {} + + /// startFunction - This callback is invoked when the specified function is + /// about to be code generated. This initializes the BufferBegin/End/Ptr + /// fields. + /// + void startFunction(MachineFunction &F) override = 0; + + /// finishFunction - This callback is invoked when the specified function has + /// finished code generation. If a buffer overflow has occurred, this method + /// returns true (the callee is required to try again), otherwise it returns + /// false. + /// + bool finishFunction(MachineFunction &F) override = 0; + + /// allocIndirectGV - Allocates and fills storage for an indirect + /// GlobalValue, and returns the address. + virtual void *allocIndirectGV(const GlobalValue *GV, + const uint8_t *Buffer, size_t Size, + unsigned Alignment) = 0; + + /// emitByte - This callback is invoked when a byte needs to be written to the + /// output stream. + /// + void emitByte(uint8_t B) { + if (CurBufferPtr != BufferEnd) + *CurBufferPtr++ = B; + } + + /// emitWordLE - This callback is invoked when a 32-bit word needs to be + /// written to the output stream in little-endian format. + /// + void emitWordLE(uint32_t W) { + if (4 <= BufferEnd-CurBufferPtr) { + *CurBufferPtr++ = (uint8_t)(W >> 0); + *CurBufferPtr++ = (uint8_t)(W >> 8); + *CurBufferPtr++ = (uint8_t)(W >> 16); + *CurBufferPtr++ = (uint8_t)(W >> 24); + } else { + CurBufferPtr = BufferEnd; + } + } + + /// emitWordBE - This callback is invoked when a 32-bit word needs to be + /// written to the output stream in big-endian format. + /// + void emitWordBE(uint32_t W) { + if (4 <= BufferEnd-CurBufferPtr) { + *CurBufferPtr++ = (uint8_t)(W >> 24); + *CurBufferPtr++ = (uint8_t)(W >> 16); + *CurBufferPtr++ = (uint8_t)(W >> 8); + *CurBufferPtr++ = (uint8_t)(W >> 0); + } else { + CurBufferPtr = BufferEnd; + } + } + + /// emitDWordLE - This callback is invoked when a 64-bit word needs to be + /// written to the output stream in little-endian format. + /// + void emitDWordLE(uint64_t W) { + if (8 <= BufferEnd-CurBufferPtr) { + *CurBufferPtr++ = (uint8_t)(W >> 0); + *CurBufferPtr++ = (uint8_t)(W >> 8); + *CurBufferPtr++ = (uint8_t)(W >> 16); + *CurBufferPtr++ = (uint8_t)(W >> 24); + *CurBufferPtr++ = (uint8_t)(W >> 32); + *CurBufferPtr++ = (uint8_t)(W >> 40); + *CurBufferPtr++ = (uint8_t)(W >> 48); + *CurBufferPtr++ = (uint8_t)(W >> 56); + } else { + CurBufferPtr = BufferEnd; + } + } + + /// emitDWordBE - This callback is invoked when a 64-bit word needs to be + /// written to the output stream in big-endian format. + /// + void emitDWordBE(uint64_t W) { + if (8 <= BufferEnd-CurBufferPtr) { + *CurBufferPtr++ = (uint8_t)(W >> 56); + *CurBufferPtr++ = (uint8_t)(W >> 48); + *CurBufferPtr++ = (uint8_t)(W >> 40); + *CurBufferPtr++ = (uint8_t)(W >> 32); + *CurBufferPtr++ = (uint8_t)(W >> 24); + *CurBufferPtr++ = (uint8_t)(W >> 16); + *CurBufferPtr++ = (uint8_t)(W >> 8); + *CurBufferPtr++ = (uint8_t)(W >> 0); + } else { + CurBufferPtr = BufferEnd; + } + } + + /// emitAlignment - Move the CurBufferPtr pointer up to the specified + /// alignment (saturated to BufferEnd of course). + void emitAlignment(unsigned Alignment) { + if (Alignment == 0) Alignment = 1; + uint8_t *NewPtr = (uint8_t*)RoundUpToAlignment((uintptr_t)CurBufferPtr, + Alignment); + CurBufferPtr = std::min(NewPtr, BufferEnd); + } + + /// emitAlignmentWithFill - Similar to emitAlignment, except that the + /// extra bytes are filled with the provided byte. + void emitAlignmentWithFill(unsigned Alignment, uint8_t Fill) { + if (Alignment == 0) Alignment = 1; + uint8_t *NewPtr = (uint8_t*)RoundUpToAlignment((uintptr_t)CurBufferPtr, + Alignment); + // Fail if we don't have room. + if (NewPtr > BufferEnd) { + CurBufferPtr = BufferEnd; + return; + } + while (CurBufferPtr < NewPtr) { + *CurBufferPtr++ = Fill; + } + } + + /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be + /// written to the output stream. + void emitULEB128Bytes(uint64_t Value, unsigned PadTo = 0) { + do { + uint8_t Byte = Value & 0x7f; + Value >>= 7; + if (Value || PadTo != 0) Byte |= 0x80; + emitByte(Byte); + } while (Value); + + if (PadTo) { + do { + uint8_t Byte = (PadTo > 1) ? 0x80 : 0x0; + emitByte(Byte); + } while (--PadTo); + } + } + + /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be + /// written to the output stream. + void emitSLEB128Bytes(int64_t Value) { + int32_t Sign = Value >> (8 * sizeof(Value) - 1); + bool IsMore; + + do { + uint8_t Byte = Value & 0x7f; + Value >>= 7; + IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0; + if (IsMore) Byte |= 0x80; + emitByte(Byte); + } while (IsMore); + } + + /// emitString - This callback is invoked when a String needs to be + /// written to the output stream. + void emitString(const std::string &String) { + for (size_t i = 0, N = String.size(); i < N; ++i) { + uint8_t C = String[i]; + emitByte(C); + } + emitByte(0); + } + + /// emitInt32 - Emit a int32 directive. + void emitInt32(uint32_t Value) { + if (4 <= BufferEnd-CurBufferPtr) { + *((uint32_t*)CurBufferPtr) = Value; + CurBufferPtr += 4; + } else { + CurBufferPtr = BufferEnd; + } + } + + /// emitInt64 - Emit a int64 directive. + void emitInt64(uint64_t Value) { + if (8 <= BufferEnd-CurBufferPtr) { + *((uint64_t*)CurBufferPtr) = Value; + CurBufferPtr += 8; + } else { + CurBufferPtr = BufferEnd; + } + } + + /// emitInt32At - Emit the Int32 Value in Addr. + void emitInt32At(uintptr_t *Addr, uintptr_t Value) { + if (Addr >= (uintptr_t*)BufferBegin && Addr < (uintptr_t*)BufferEnd) + (*(uint32_t*)Addr) = (uint32_t)Value; + } + + /// emitInt64At - Emit the Int64 Value in Addr. + void emitInt64At(uintptr_t *Addr, uintptr_t Value) { + if (Addr >= (uintptr_t*)BufferBegin && Addr < (uintptr_t*)BufferEnd) + (*(uint64_t*)Addr) = (uint64_t)Value; + } + + + /// emitLabel - Emits a label + void emitLabel(MCSymbol *Label) override = 0; + + /// allocateSpace - Allocate a block of space in the current output buffer, + /// returning null (and setting conditions to indicate buffer overflow) on + /// failure. Alignment is the alignment in bytes of the buffer desired. + void *allocateSpace(uintptr_t Size, unsigned Alignment) override { + emitAlignment(Alignment); + void *Result; + + // Check for buffer overflow. + if (Size >= (uintptr_t)(BufferEnd-CurBufferPtr)) { + CurBufferPtr = BufferEnd; + Result = nullptr; + } else { + // Allocate the space. + Result = CurBufferPtr; + CurBufferPtr += Size; + } + + return Result; + } + + /// allocateGlobal - Allocate memory for a global. Unlike allocateSpace, + /// this method does not allocate memory in the current output buffer, + /// because a global may live longer than the current function. + virtual void *allocateGlobal(uintptr_t Size, unsigned Alignment) = 0; + + /// StartMachineBasicBlock - This should be called by the target when a new + /// basic block is about to be emitted. This way the MCE knows where the + /// start of the block is, and can implement getMachineBasicBlockAddress. + void StartMachineBasicBlock(MachineBasicBlock *MBB) override = 0; + + /// getCurrentPCValue - This returns the address that the next emitted byte + /// will be output to. + /// + uintptr_t getCurrentPCValue() const override { + return (uintptr_t)CurBufferPtr; + } + + /// getCurrentPCOffset - Return the offset from the start of the emitted + /// buffer that we are currently writing to. + uintptr_t getCurrentPCOffset() const override { + return CurBufferPtr-BufferBegin; + } + + /// earlyResolveAddresses - True if the code emitter can use symbol addresses + /// during code emission time. The JIT is capable of doing this because it + /// creates jump tables or constant pools in memory on the fly while the + /// object code emitters rely on a linker to have real addresses and should + /// use relocations instead. + bool earlyResolveAddresses() const override { return true; } + + /// addRelocation - Whenever a relocatable address is needed, it should be + /// noted with this interface. + void addRelocation(const MachineRelocation &MR) override = 0; + + /// FIXME: These should all be handled with relocations! + + /// getConstantPoolEntryAddress - Return the address of the 'Index' entry in + /// the constant pool that was last emitted with the emitConstantPool method. + /// + uintptr_t getConstantPoolEntryAddress(unsigned Index) const override = 0; + + /// getJumpTableEntryAddress - Return the address of the jump table with index + /// 'Index' in the function that last called initJumpTableInfo. + /// + uintptr_t getJumpTableEntryAddress(unsigned Index) const override = 0; + + /// getMachineBasicBlockAddress - Return the address of the specified + /// MachineBasicBlock, only usable after the label for the MBB has been + /// emitted. + /// + uintptr_t + getMachineBasicBlockAddress(MachineBasicBlock *MBB) const override = 0; + + /// getLabelAddress - Return the address of the specified Label, only usable + /// after the Label has been emitted. + /// + uintptr_t getLabelAddress(MCSymbol *Label) const override = 0; + + /// Specifies the MachineModuleInfo object. This is used for exception handling + /// purposes. + void setModuleInfo(MachineModuleInfo* Info) override = 0; + + /// getLabelLocations - Return the label locations map of the label IDs to + /// their address. + virtual DenseMap *getLabelLocations() { + return nullptr; + } +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h index 22ce4491b7e..e3ef104aea8 100644 --- a/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -141,6 +141,12 @@ protected: // To avoid having libexecutionengine depend on the JIT and interpreter // libraries, the execution engine implementations set these functions to ctor // pointers at startup time if they are linked in. + static ExecutionEngine *(*JITCtor)( + Module *M, + std::string *ErrorStr, + JITMemoryManager *JMM, + bool GVsWithCode, + TargetMachine *TM); static ExecutionEngine *(*MCJITCtor)( Module *M, std::string *ErrorStr, @@ -329,6 +335,13 @@ public: /// getFunctionAddress instead. virtual void *getPointerToFunction(Function *F) = 0; + /// getPointerToBasicBlock - The different EE's represent basic blocks in + /// different ways. Return the representation for a blockaddress of the + /// specified block. + /// + /// This function will not be implemented for the MCJIT execution engine. + virtual void *getPointerToBasicBlock(BasicBlock *BB) = 0; + /// getPointerToFunctionOrStub - If the specified function has been /// code-gen'd, return a pointer to the function. If not, compile it, or use /// a stub to implement lazy compilation if available. See @@ -376,6 +389,18 @@ public: void InitializeMemory(const Constant *Init, void *Addr); + /// recompileAndRelinkFunction - This method is used to force a function which + /// has already been compiled to be compiled again, possibly after it has been + /// modified. Then the entry to the old copy is overwritten with a branch to + /// the new copy. If there was no old copy, this acts just like + /// VM::getPointerToFunction(). + virtual void *recompileAndRelinkFunction(Function *F) = 0; + + /// freeMachineCodeForFunction - Release memory in the ExecutionEngine + /// corresponding to the machine code emitted to execute this function, useful + /// for garbage-collecting generated code. + virtual void freeMachineCodeForFunction(Function *F) = 0; + /// getOrEmitGlobalVariable - Return the address of the specified global /// variable, possibly emitting it to memory if needed. This is used by the /// Emitter. @@ -512,12 +537,14 @@ private: CodeGenOpt::Level OptLevel; RTDyldMemoryManager *MCJMM; JITMemoryManager *JMM; + bool AllocateGVsWithCode; TargetOptions Options; Reloc::Model RelocModel; CodeModel::Model CMModel; std::string MArch; std::string MCPU; SmallVector MAttrs; + bool UseMCJIT; bool VerifyModules; /// InitEngine - Does the common initialization of default options. @@ -599,6 +626,18 @@ public: return *this; } + /// setAllocateGVsWithCode - Sets whether global values should be allocated + /// into the same buffer as code. For most applications this should be set + /// to false. Allocating globals with code breaks freeMachineCodeForFunction + /// and is probably unsafe and bad for performance. However, we have clients + /// who depend on this behavior, so we must support it. This option defaults + /// to false so that users of the new API can safely use the new memory + /// manager and free machine code. + EngineBuilder &setAllocateGVsWithCode(bool a) { + AllocateGVsWithCode = a; + return *this; + } + /// setMArch - Override the architecture set by the Module's triple. EngineBuilder &setMArch(StringRef march) { MArch.assign(march.begin(), march.end()); @@ -611,6 +650,13 @@ public: return *this; } + /// setUseMCJIT - Set whether the MC-JIT implementation should be used + /// (experimental). + EngineBuilder &setUseMCJIT(bool Value) { + UseMCJIT = Value; + return *this; + } + /// setVerifyModules - Set whether the JIT implementation should verify /// IR modules during compilation. EngineBuilder &setVerifyModules(bool Verify) { diff --git a/include/llvm/ExecutionEngine/JIT.h b/include/llvm/ExecutionEngine/JIT.h new file mode 100644 index 00000000000..581d6e6c35e --- /dev/null +++ b/include/llvm/ExecutionEngine/JIT.h @@ -0,0 +1,38 @@ +//===-- JIT.h - Abstract Execution Engine Interface -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file forces the JIT to link in on certain operating systems. +// (Windows). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JIT_H +#define LLVM_EXECUTIONENGINE_JIT_H + +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include + +extern "C" void LLVMLinkInJIT(); + +namespace { + struct ForceJITLinking { + ForceJITLinking() { + // We must reference JIT in such a way that compilers will not + // delete it all as dead code, even with whole program optimization, + // yet is effectively a NO-OP. As the compiler isn't smart enough + // to know that getenv() never returns -1, this will do the job. + if (std::getenv("bar") != (char*) -1) + return; + + LLVMLinkInJIT(); + } + } ForceJITLinking; +} + +#endif diff --git a/include/llvm/Target/TargetJITInfo.h b/include/llvm/Target/TargetJITInfo.h new file mode 100644 index 00000000000..58acf30e034 --- /dev/null +++ b/include/llvm/Target/TargetJITInfo.h @@ -0,0 +1,136 @@ +//===- Target/TargetJITInfo.h - Target Information for JIT ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file exposes an abstract interface used by the Just-In-Time code +// generator to perform target-specific activities, such as emitting stubs. If +// a TargetMachine supports JIT code generation, it should provide one of these +// objects through the getJITInfo() method. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_TARGETJITINFO_H +#define LLVM_TARGET_TARGETJITINFO_H + +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include + +namespace llvm { + class Function; + class GlobalValue; + class JITCodeEmitter; + class MachineRelocation; + + /// TargetJITInfo - Target specific information required by the Just-In-Time + /// code generator. + class TargetJITInfo { + virtual void anchor(); + public: + virtual ~TargetJITInfo() {} + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + virtual void replaceMachineCodeForFunction(void *Old, void *New) = 0; + + /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object + /// to emit an indirect symbol which contains the address of the specified + /// ptr. + virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, + JITCodeEmitter &JCE) { + llvm_unreachable("This target doesn't implement " + "emitGlobalValueIndirectSym!"); + } + + /// Records the required size and alignment for a call stub in bytes. + struct StubLayout { + size_t Size; + size_t Alignment; + }; + /// Returns the maximum size and alignment for a call stub on this target. + virtual StubLayout getStubLayout() { + llvm_unreachable("This target doesn't implement getStubLayout!"); + } + + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a + /// small native function that simply calls the function at the specified + /// address. The JITCodeEmitter must already have storage allocated for the + /// stub. Return the address of the resultant function, which may have been + /// aligned from the address the JCE was set up to emit at. + virtual void *emitFunctionStub(const Function* F, void *Target, + JITCodeEmitter &JCE) { + llvm_unreachable("This target doesn't implement emitFunctionStub!"); + } + + /// getPICJumpTableEntry - Returns the value of the jumptable entry for the + /// specific basic block. + virtual uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase) { + llvm_unreachable("This target doesn't implement getPICJumpTableEntry!"); + } + + /// LazyResolverFn - This typedef is used to represent the function that + /// unresolved call points should invoke. This is a target specific + /// function that knows how to walk the stack and find out which stub the + /// call is coming from. + typedef void (*LazyResolverFn)(); + + /// JITCompilerFn - This typedef is used to represent the JIT function that + /// lazily compiles the function corresponding to a stub. The JIT keeps + /// track of the mapping between stubs and LLVM Functions, the target + /// provides the ability to figure out the address of a stub that is called + /// by the LazyResolverFn. + typedef void* (*JITCompilerFn)(void *); + + /// getLazyResolverFunction - This method is used to initialize the JIT, + /// giving the target the function that should be used to compile a + /// function, and giving the JIT the target function used to do the lazy + /// resolving. + virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn) { + llvm_unreachable("Not implemented for this target!"); + } + + /// relocate - Before the JIT can run a block of code that has been emitted, + /// it must rewrite the code to contain the actual addresses of any + /// referenced global symbols. + virtual void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) { + assert(NumRelocs == 0 && "This target does not have relocations!"); + } + + /// allocateThreadLocalMemory - Each target has its own way of + /// handling thread local variables. This method returns a value only + /// meaningful to the target. + virtual char* allocateThreadLocalMemory(size_t size) { + llvm_unreachable("This target does not implement thread local storage!"); + } + + /// needsGOT - Allows a target to specify that it would like the + /// JIT to manage a GOT for it. + bool needsGOT() const { return useGOT; } + + /// hasCustomConstantPool - Allows a target to specify that constant + /// pool address resolution is handled by the target. + virtual bool hasCustomConstantPool() const { return false; } + + /// hasCustomJumpTables - Allows a target to specify that jumptables + /// are emitted by the target. + virtual bool hasCustomJumpTables() const { return false; } + + /// allocateSeparateGVMemory - If true, globals should be placed in + /// separately allocated heap memory rather than in the same + /// code memory allocated by JITCodeEmitter. + virtual bool allocateSeparateGVMemory() const { return false; } + protected: + bool useGOT; + }; +} // End llvm namespace + +#endif diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index f5c4132b8ff..ac35737b6c5 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -828,6 +828,11 @@ public: return UseUnderscoreLongJmp; } + /// Return whether the target can generate code for jump tables. + bool supportJumpTables() const { + return SupportJumpTables; + } + /// Return integer threshold on number of blocks to use jump tables rather /// than if sequence. int getMinimumJumpTableEntries() const { @@ -996,6 +1001,11 @@ protected: UseUnderscoreLongJmp = Val; } + /// Indicate whether the target can generate code for jump tables. + void setSupportJumpTables(bool Val) { + SupportJumpTables = Val; + } + /// Indicate the number of blocks to generate jump tables rather than if /// sequence. void setMinimumJumpTableEntries(int Val) { @@ -1499,6 +1509,10 @@ private: /// Defaults to false. bool UseUnderscoreLongJmp; + /// Whether the target can generate code for jumptables. If it's not true, + /// then each jumptable must be lowered into if-then-else's. + bool SupportJumpTables; + /// Number of blocks threshold to use jump tables. int MinimumJumpTableEntries; diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index d4b93da19be..22d01bfc846 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -24,6 +24,7 @@ namespace llvm { class InstrItineraryData; +class JITCodeEmitter; class GlobalValue; class Mangler; class MCAsmInfo; @@ -35,6 +36,7 @@ class DataLayout; class TargetLibraryInfo; class TargetFrameLowering; class TargetIntrinsicInfo; +class TargetJITInfo; class TargetLowering; class TargetPassConfig; class TargetRegisterInfo; @@ -99,6 +101,10 @@ public: virtual const TargetSubtargetInfo *getSubtargetImpl() const { return nullptr; } + TargetSubtargetInfo *getSubtargetImpl() { + const TargetMachine *TM = this; + return const_cast(TM->getSubtargetImpl()); + } /// getSubtarget - This method returns a pointer to the specified type of /// TargetSubtargetInfo. In debug builds, it verifies that the object being @@ -195,6 +201,18 @@ public: return true; } + /// addPassesToEmitMachineCode - Add passes to the specified pass manager to + /// get machine code emitted. This uses a JITCodeEmitter object to handle + /// actually outputting the machine code and resolving things like the address + /// of functions. This method returns true if machine code emission is + /// not supported. + /// + virtual bool addPassesToEmitMachineCode(PassManagerBase &, + JITCodeEmitter &, + bool /*DisableVerify*/ = true) { + return true; + } + /// addPassesToEmitMC - Add passes to the specified pass manager to get /// machine code emitted with the MCJIT. This method returns true if machine /// code is not supported. It fills the MCContext Ctx pointer which can be @@ -241,6 +259,15 @@ public: AnalysisID StartAfter = nullptr, AnalysisID StopAfter = nullptr) override; + /// addPassesToEmitMachineCode - Add passes to the specified pass manager to + /// get machine code emitted. This uses a JITCodeEmitter object to handle + /// actually outputting the machine code and resolving things like the address + /// of functions. This method returns true if machine code emission is + /// not supported. + /// + bool addPassesToEmitMachineCode(PassManagerBase &PM, JITCodeEmitter &MCE, + bool DisableVerify = true) override; + /// addPassesToEmitMC - Add passes to the specified pass manager to get /// machine code emitted with the MCJIT. This method returns true if machine /// code is not supported. It fills the MCContext Ctx pointer which can be @@ -248,6 +275,14 @@ public: /// bool addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, raw_ostream &OS, bool DisableVerify = true) override; + + /// addCodeEmitter - This pass should be overridden by the target to add a + /// code emitter, if supported. If this is not supported, 'true' should be + /// returned. + virtual bool addCodeEmitter(PassManagerBase &, + JITCodeEmitter &) { + return true; + } }; } // End llvm namespace diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h index 9791d4051ee..45a93309501 100644 --- a/include/llvm/Target/TargetSubtargetInfo.h +++ b/include/llvm/Target/TargetSubtargetInfo.h @@ -26,6 +26,7 @@ class SDep; class SUnit; class TargetFrameLowering; class TargetInstrInfo; +class TargetJITInfo; class TargetLowering; class TargetRegisterClass; class TargetRegisterInfo; @@ -78,6 +79,11 @@ public: /// virtual const TargetRegisterInfo *getRegisterInfo() const { return nullptr; } + /// getJITInfo - If this target supports a JIT, return information for it, + /// otherwise return null. + /// + virtual TargetJITInfo *getJITInfo() { return nullptr; } + /// getInstrItineraryData - Returns instruction itinerary data for the target /// or specific subtarget. /// diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index 43a6e0f26dc..24bc570f44a 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -188,8 +188,9 @@ unsigned BasicTTI::getJumpBufSize() const { bool BasicTTI::shouldBuildLookupTables() const { const TargetLoweringBase *TLI = getTLI(); - return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); + return TLI->supportJumpTables() && + (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); } bool BasicTTI::haveFastSqrt(Type *Ty) const { diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index acc8a26936a..2a247c12e64 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -27,6 +27,7 @@ add_llvm_library(LLVMCodeGen InlineSpiller.cpp InterferenceCache.cpp IntrinsicLowering.cpp + JITCodeEmitter.cpp JumpInstrTables.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp diff --git a/lib/CodeGen/JITCodeEmitter.cpp b/lib/CodeGen/JITCodeEmitter.cpp new file mode 100644 index 00000000000..96a53892f6d --- /dev/null +++ b/lib/CodeGen/JITCodeEmitter.cpp @@ -0,0 +1,14 @@ +//===-- llvm/CodeGen/JITCodeEmitter.cpp - Code emission --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/JITCodeEmitter.h" + +using namespace llvm; + +void JITCodeEmitter::anchor() { } diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 2c77eabb0af..0e0d7ba4c89 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -226,6 +226,26 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, return false; } +/// addPassesToEmitMachineCode - Add passes to the specified pass manager to +/// get machine code emitted. This uses a JITCodeEmitter object to handle +/// actually outputting the machine code and resolving things like the address +/// of functions. This method should return true if machine code emission is +/// not supported. +/// +bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, + JITCodeEmitter &JCE, + bool DisableVerify) { + // Add common CodeGen passes. + MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, + nullptr); + if (!Context) + return true; + + addCodeEmitter(PM, JCE); + + return false; // success! +} + /// addPassesToEmitMC - Add passes to the specified pass manager to get /// machine code emitted with the MCJIT. This method returns true if machine /// code is not supported. It fills the MCContext Ctx pointer which can be diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1fa5bf1114e..cc6eac70ba7 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2228,8 +2228,9 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } static inline bool areJTsAllowed(const TargetLowering &TLI) { - return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); + return TLI.supportJumpTables() && + (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); } static APInt ComputeRange(const APInt &First, const APInt &Last) { diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 1a90c6c866f..a8c7a28a4e8 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -719,6 +719,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, PrefLoopAlignment = 0; MinStackArgumentAlignment = 1; InsertFencesForAtomic = false; + SupportJumpTables = true; MinimumJumpTableEntries = 4; InitLibcallNames(LibcallRoutineNames, Triple(TM.getTargetTriple())); diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt index 208495c8847..3102c7bd582 100644 --- a/lib/ExecutionEngine/CMakeLists.txt +++ b/lib/ExecutionEngine/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_library(LLVMExecutionEngine ) add_subdirectory(Interpreter) +add_subdirectory(JIT) add_subdirectory(MCJIT) add_subdirectory(RuntimeDyld) diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 01b9bcc8905..063f3fb05c2 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -48,6 +48,12 @@ void ObjectCache::anchor() {} void ObjectBuffer::anchor() {} void ObjectBufferStream::anchor() {} +ExecutionEngine *(*ExecutionEngine::JITCtor)( + Module *M, + std::string *ErrorStr, + JITMemoryManager *JMM, + bool GVsWithCode, + TargetMachine *TM) = nullptr; ExecutionEngine *(*ExecutionEngine::MCJITCtor)( Module *M, std::string *ErrorStr, @@ -411,8 +417,10 @@ void EngineBuilder::InitEngine() { MCJMM = nullptr; JMM = nullptr; Options = TargetOptions(); + AllocateGVsWithCode = false; RelocModel = Reloc::Default; CMModel = CodeModel::JITDefault; + UseMCJIT = false; // IR module verification is enabled by default in debug builds, and disabled // by default in release builds. @@ -445,6 +453,14 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { return nullptr; } } + + if (MCJMM && ! UseMCJIT) { + if (ErrorStr) + *ErrorStr = + "Cannot create a legacy JIT with a runtime dyld memory " + "manager."; + return nullptr; + } // Unless the interpreter was explicitly selected or the JIT is not linked, // try making a JIT. @@ -457,9 +473,12 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { } ExecutionEngine *EE = nullptr; - if (ExecutionEngine::MCJITCtor) + if (UseMCJIT && ExecutionEngine::MCJITCtor) EE = ExecutionEngine::MCJITCtor(M, ErrorStr, MCJMM ? MCJMM : JMM, TheTM.release()); + else if (ExecutionEngine::JITCtor) + EE = ExecutionEngine::JITCtor(M, ErrorStr, JMM, + AllocateGVsWithCode, TheTM.release()); if (EE) { EE->setVerifyModules(VerifyModules); @@ -477,7 +496,8 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { return nullptr; } - if ((WhichEngine & EngineKind::JIT) && !ExecutionEngine::MCJITCtor) { + if ((WhichEngine & EngineKind::JIT) && !ExecutionEngine::JITCtor && + !ExecutionEngine::MCJITCtor) { if (ErrorStr) *ErrorStr = "JIT has not been linked in."; } @@ -823,6 +843,9 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { Result = PTOGV(getPointerToFunctionOrStub(const_cast(F))); else if (const GlobalVariable *GV = dyn_cast(C)) Result = PTOGV(getOrEmitGlobalVariable(const_cast(GV))); + else if (const BlockAddress *BA = dyn_cast(C)) + Result = PTOGV(getPointerToBasicBlock(const_cast( + BA->getBasicBlock()))); else llvm_unreachable("Unknown constant pointer type!"); break; diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp index fa2f23809a8..6ff1e7ac063 100644 --- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -192,6 +192,7 @@ LLVMBool LLVMCreateMCJITCompilerForModule( EngineBuilder builder(unwrap(M)); builder.setEngineKind(EngineKind::JIT) .setErrorStr(&Error) + .setUseMCJIT(true) .setOptLevel((CodeGenOpt::Level)options.OptLevel) .setCodeModel(unwrap(options.CodeModel)) .setTargetOptions(targetOptions); @@ -274,6 +275,7 @@ LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F, } void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F) { + unwrap(EE)->freeMachineCodeForFunction(unwrap(F)); } void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M){ @@ -312,7 +314,7 @@ LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name, void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn) { - return nullptr; + return unwrap(EE)->recompileAndRelinkFunction(unwrap(Fn)); } LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE) { diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h index ed6f8f44629..2145cde05fb 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -121,6 +121,17 @@ public: return nullptr; } + /// recompileAndRelinkFunction - For the interpreter, functions are always + /// up-to-date. + /// + void *recompileAndRelinkFunction(Function *F) override { + return getPointerToFunction(F); + } + + /// freeMachineCodeForFunction - The interpreter does not generate any code. + /// + void freeMachineCodeForFunction(Function *F) override { } + // Methods used to execute code: // Place a call on the stack void callFunction(Function *F, const std::vector &ArgVals); @@ -202,6 +213,7 @@ private: // Helper functions void SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF); void *getPointerToFunction(Function *F) override { return (void*)F; } + void *getPointerToBasicBlock(BasicBlock *BB) override { return (void*)BB; } void initializeExecutionEngine() { } void initializeExternalFunctions(); diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt new file mode 100644 index 00000000000..e16baede50f --- /dev/null +++ b/lib/ExecutionEngine/JIT/CMakeLists.txt @@ -0,0 +1,8 @@ +# TODO: Support other architectures. See Makefile. +add_definitions(-DENABLE_X86_JIT) + +add_llvm_library(LLVMJIT + JIT.cpp + JITEmitter.cpp + JITMemoryManager.cpp + ) diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp new file mode 100644 index 00000000000..ab0c1a680bd --- /dev/null +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -0,0 +1,696 @@ +//===-- JIT.cpp - LLVM Just in Time Compiler ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This tool implements a just-in-time compiler for LLVM, allowing direct +// execution of LLVM bitcode in an efficient manner. +// +//===----------------------------------------------------------------------===// + +#include "JIT.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineCodeInfo.h" +#include "llvm/Config/config.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MutexGuard.h" +#include "llvm/Target/TargetJITInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#ifdef __APPLE__ +// Apple gcc defaults to -fuse-cxa-atexit (i.e. calls __cxa_atexit instead +// of atexit). It passes the address of linker generated symbol __dso_handle +// to the function. +// This configuration change happened at version 5330. +# include +# if defined(MAC_OS_X_VERSION_10_4) && \ + ((MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_4) || \ + (MAC_OS_X_VERSION_MIN_REQUIRED == MAC_OS_X_VERSION_10_4 && \ + __APPLE_CC__ >= 5330)) +# ifndef HAVE___DSO_HANDLE +# define HAVE___DSO_HANDLE 1 +# endif +# endif +#endif + +#if HAVE___DSO_HANDLE +extern void *__dso_handle __attribute__ ((__visibility__ ("hidden"))); +#endif + +namespace { + +static struct RegisterJIT { + RegisterJIT() { JIT::Register(); } +} JITRegistrator; + +} + +extern "C" void LLVMLinkInJIT() { +} + +/// createJIT - This is the factory method for creating a JIT for the current +/// machine, it does not fall back to the interpreter. This takes ownership +/// of the module. +ExecutionEngine *JIT::createJIT(Module *M, + std::string *ErrorStr, + JITMemoryManager *JMM, + bool GVsWithCode, + TargetMachine *TM) { + // Try to register the program as a source of symbols to resolve against. + // + // FIXME: Don't do this here. + sys::DynamicLibrary::LoadLibraryPermanently(nullptr, nullptr); + + // If the target supports JIT code generation, create the JIT. + if (TargetJITInfo *TJ = TM->getSubtargetImpl()->getJITInfo()) { + return new JIT(M, *TM, *TJ, JMM, GVsWithCode); + } else { + if (ErrorStr) + *ErrorStr = "target does not support JIT code generation"; + return nullptr; + } +} + +namespace { +/// This class supports the global getPointerToNamedFunction(), which allows +/// bugpoint or gdb users to search for a function by name without any context. +class JitPool { + SmallPtrSet JITs; // Optimize for process containing just 1 JIT. + mutable sys::Mutex Lock; +public: + void Add(JIT *jit) { + MutexGuard guard(Lock); + JITs.insert(jit); + } + void Remove(JIT *jit) { + MutexGuard guard(Lock); + JITs.erase(jit); + } + void *getPointerToNamedFunction(const char *Name) const { + MutexGuard guard(Lock); + assert(JITs.size() != 0 && "No Jit registered"); + //search function in every instance of JIT + for (SmallPtrSet::const_iterator Jit = JITs.begin(), + end = JITs.end(); + Jit != end; ++Jit) { + if (Function *F = (*Jit)->FindFunctionNamed(Name)) + return (*Jit)->getPointerToFunction(F); + } + // The function is not available : fallback on the first created (will + // search in symbol of the current program/library) + return (*JITs.begin())->getPointerToNamedFunction(Name); + } +}; +ManagedStatic AllJits; +} +extern "C" { + // getPointerToNamedFunction - This function is used as a global wrapper to + // JIT::getPointerToNamedFunction for the purpose of resolving symbols when + // bugpoint is debugging the JIT. In that scenario, we are loading an .so and + // need to resolve function(s) that are being mis-codegenerated, so we need to + // resolve their addresses at runtime, and this is the way to do it. + void *getPointerToNamedFunction(const char *Name) { + return AllJits->getPointerToNamedFunction(Name); + } +} + +JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, + JITMemoryManager *jmm, bool GVsWithCode) + : ExecutionEngine(M), TM(tm), TJI(tji), + JMM(jmm ? jmm : JITMemoryManager::CreateDefaultMemManager()), + AllocateGVsWithCode(GVsWithCode), isAlreadyCodeGenerating(false) { + setDataLayout(TM.getSubtargetImpl()->getDataLayout()); + + jitstate = new JITState(M); + + // Initialize JCE + JCE = createEmitter(*this, JMM, TM); + + // Register in global list of all JITs. + AllJits->Add(this); + + // Add target data + MutexGuard locked(lock); + FunctionPassManager &PM = jitstate->getPM(); + M->setDataLayout(TM.getSubtargetImpl()->getDataLayout()); + PM.add(new DataLayoutPass(M)); + + // Turn the machine code intermediate representation into bytes in memory that + // may be executed. + if (TM.addPassesToEmitMachineCode(PM, *JCE, !getVerifyModules())) { + report_fatal_error("Target does not support machine code emission!"); + } + + // Initialize passes. + PM.doInitialization(); +} + +JIT::~JIT() { + // Cleanup. + AllJits->Remove(this); + delete jitstate; + delete JCE; + // JMM is a ownership of JCE, so we no need delete JMM here. + delete &TM; +} + +/// addModule - Add a new Module to the JIT. If we previously removed the last +/// Module, we need re-initialize jitstate with a valid Module. +void JIT::addModule(Module *M) { + MutexGuard locked(lock); + + if (Modules.empty()) { + assert(!jitstate && "jitstate should be NULL if Modules vector is empty!"); + + jitstate = new JITState(M); + + FunctionPassManager &PM = jitstate->getPM(); + M->setDataLayout(TM.getSubtargetImpl()->getDataLayout()); + PM.add(new DataLayoutPass(M)); + + // Turn the machine code intermediate representation into bytes in memory + // that may be executed. + if (TM.addPassesToEmitMachineCode(PM, *JCE, !getVerifyModules())) { + report_fatal_error("Target does not support machine code emission!"); + } + + // Initialize passes. + PM.doInitialization(); + } + + ExecutionEngine::addModule(M); +} + +/// removeModule - If we are removing the last Module, invalidate the jitstate +/// since the PassManager it contains references a released Module. +bool JIT::removeModule(Module *M) { + bool result = ExecutionEngine::removeModule(M); + + MutexGuard locked(lock); + + if (jitstate && jitstate->getModule() == M) { + delete jitstate; + jitstate = nullptr; + } + + if (!jitstate && !Modules.empty()) { + jitstate = new JITState(Modules[0]); + + FunctionPassManager &PM = jitstate->getPM(); + M->setDataLayout(TM.getSubtargetImpl()->getDataLayout()); + PM.add(new DataLayoutPass(M)); + + // Turn the machine code intermediate representation into bytes in memory + // that may be executed. + if (TM.addPassesToEmitMachineCode(PM, *JCE, !getVerifyModules())) { + report_fatal_error("Target does not support machine code emission!"); + } + + // Initialize passes. + PM.doInitialization(); + } + return result; +} + +/// run - Start execution with the specified function and arguments. +/// +GenericValue JIT::runFunction(Function *F, + const std::vector &ArgValues) { + assert(F && "Function *F was null at entry to run()"); + + void *FPtr = getPointerToFunction(F); + assert(FPtr && "Pointer to fn's code was null after getPointerToFunction"); + FunctionType *FTy = F->getFunctionType(); + Type *RetTy = FTy->getReturnType(); + + assert((FTy->getNumParams() == ArgValues.size() || + (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) && + "Wrong number of arguments passed into function!"); + assert(FTy->getNumParams() == ArgValues.size() && + "This doesn't support passing arguments through varargs (yet)!"); + + // Handle some common cases first. These cases correspond to common `main' + // prototypes. + if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) { + switch (ArgValues.size()) { + case 3: + if (FTy->getParamType(0)->isIntegerTy(32) && + FTy->getParamType(1)->isPointerTy() && + FTy->getParamType(2)->isPointerTy()) { + int (*PF)(int, char **, const char **) = + (int(*)(int, char **, const char **))(intptr_t)FPtr; + + // Call the function. + GenericValue rv; + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), + (char **)GVTOP(ArgValues[1]), + (const char **)GVTOP(ArgValues[2]))); + return rv; + } + break; + case 2: + if (FTy->getParamType(0)->isIntegerTy(32) && + FTy->getParamType(1)->isPointerTy()) { + int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr; + + // Call the function. + GenericValue rv; + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), + (char **)GVTOP(ArgValues[1]))); + return rv; + } + break; + case 1: + if (FTy->getParamType(0)->isIntegerTy(32)) { + GenericValue rv; + int (*PF)(int) = (int(*)(int))(intptr_t)FPtr; + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue())); + return rv; + } + if (FTy->getParamType(0)->isPointerTy()) { + GenericValue rv; + int (*PF)(char *) = (int(*)(char *))(intptr_t)FPtr; + rv.IntVal = APInt(32, PF((char*)GVTOP(ArgValues[0]))); + return rv; + } + break; + } + } + + // Handle cases where no arguments are passed first. + if (ArgValues.empty()) { + GenericValue rv; + switch (RetTy->getTypeID()) { + default: llvm_unreachable("Unknown return type for function call!"); + case Type::IntegerTyID: { + unsigned BitWidth = cast(RetTy)->getBitWidth(); + if (BitWidth == 1) + rv.IntVal = APInt(BitWidth, ((bool(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 8) + rv.IntVal = APInt(BitWidth, ((char(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 16) + rv.IntVal = APInt(BitWidth, ((short(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 32) + rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 64) + rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)()); + else + llvm_unreachable("Integer types > 64 bits not supported"); + return rv; + } + case Type::VoidTyID: + rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)()); + return rv; + case Type::FloatTyID: + rv.FloatVal = ((float(*)())(intptr_t)FPtr)(); + return rv; + case Type::DoubleTyID: + rv.DoubleVal = ((double(*)())(intptr_t)FPtr)(); + return rv; + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + llvm_unreachable("long double not supported yet"); + case Type::PointerTyID: + return PTOGV(((void*(*)())(intptr_t)FPtr)()); + } + } + + // Okay, this is not one of our quick and easy cases. Because we don't have a + // full FFI, we have to codegen a nullary stub function that just calls the + // function we are interested in, passing in constants for all of the + // arguments. Make this function and return. + + // First, create the function. + FunctionType *STy=FunctionType::get(RetTy, false); + Function *Stub = Function::Create(STy, Function::InternalLinkage, "", + F->getParent()); + + // Insert a basic block. + BasicBlock *StubBB = BasicBlock::Create(F->getContext(), "", Stub); + + // Convert all of the GenericValue arguments over to constants. Note that we + // currently don't support varargs. + SmallVector Args; + for (unsigned i = 0, e = ArgValues.size(); i != e; ++i) { + Constant *C = nullptr; + Type *ArgTy = FTy->getParamType(i); + const GenericValue &AV = ArgValues[i]; + switch (ArgTy->getTypeID()) { + default: llvm_unreachable("Unknown argument type for function call!"); + case Type::IntegerTyID: + C = ConstantInt::get(F->getContext(), AV.IntVal); + break; + case Type::FloatTyID: + C = ConstantFP::get(F->getContext(), APFloat(AV.FloatVal)); + break; + case Type::DoubleTyID: + C = ConstantFP::get(F->getContext(), APFloat(AV.DoubleVal)); + break; + case Type::PPC_FP128TyID: + case Type::X86_FP80TyID: + case Type::FP128TyID: + C = ConstantFP::get(F->getContext(), APFloat(ArgTy->getFltSemantics(), + AV.IntVal)); + break; + case Type::PointerTyID: + void *ArgPtr = GVTOP(AV); + if (sizeof(void*) == 4) + C = ConstantInt::get(Type::getInt32Ty(F->getContext()), + (int)(intptr_t)ArgPtr); + else + C = ConstantInt::get(Type::getInt64Ty(F->getContext()), + (intptr_t)ArgPtr); + // Cast the integer to pointer + C = ConstantExpr::getIntToPtr(C, ArgTy); + break; + } + Args.push_back(C); + } + + CallInst *TheCall = CallInst::Create(F, Args, "", StubBB); + TheCall->setCallingConv(F->getCallingConv()); + TheCall->setTailCall(); + if (!TheCall->getType()->isVoidTy()) + // Return result of the call. + ReturnInst::Create(F->getContext(), TheCall, StubBB); + else + ReturnInst::Create(F->getContext(), StubBB); // Just return void. + + // Finally, call our nullary stub function. + GenericValue Result = runFunction(Stub, std::vector()); + // Erase it, since no other function can have a reference to it. + Stub->eraseFromParent(); + // And return the result. + return Result; +} + +void JIT::RegisterJITEventListener(JITEventListener *L) { + if (!L) + return; + MutexGuard locked(lock); + EventListeners.push_back(L); +} +void JIT::UnregisterJITEventListener(JITEventListener *L) { + if (!L) + return; + MutexGuard locked(lock); + std::vector::reverse_iterator I= + std::find(EventListeners.rbegin(), EventListeners.rend(), L); + if (I != EventListeners.rend()) { + std::swap(*I, EventListeners.back()); + EventListeners.pop_back(); + } +} +void JIT::NotifyFunctionEmitted( + const Function &F, + void *Code, size_t Size, + const JITEvent_EmittedFunctionDetails &Details) { + MutexGuard locked(lock); + for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { + EventListeners[I]->NotifyFunctionEmitted(F, Code, Size, Details); + } +} + +void JIT::NotifyFreeingMachineCode(void *OldPtr) { + MutexGuard locked(lock); + for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { + EventListeners[I]->NotifyFreeingMachineCode(OldPtr); + } +} + +/// runJITOnFunction - Run the FunctionPassManager full of +/// just-in-time compilation passes on F, hopefully filling in +/// GlobalAddress[F] with the address of F's machine code. +/// +void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) { + MutexGuard locked(lock); + + class MCIListener : public JITEventListener { + MachineCodeInfo *const MCI; + public: + MCIListener(MachineCodeInfo *mci) : MCI(mci) {} + void NotifyFunctionEmitted(const Function &, void *Code, size_t Size, + const EmittedFunctionDetails &) override { + MCI->setAddress(Code); + MCI->setSize(Size); + } + }; + MCIListener MCIL(MCI); + if (MCI) + RegisterJITEventListener(&MCIL); + + runJITOnFunctionUnlocked(F); + + if (MCI) + UnregisterJITEventListener(&MCIL); +} + +void JIT::runJITOnFunctionUnlocked(Function *F) { + assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!"); + + jitTheFunctionUnlocked(F); + + // If the function referred to another function that had not yet been + // read from bitcode, and we are jitting non-lazily, emit it now. + while (!jitstate->getPendingFunctions().empty()) { + Function *PF = jitstate->getPendingFunctions().back(); + jitstate->getPendingFunctions().pop_back(); + + assert(!PF->hasAvailableExternallyLinkage() && + "Externally-defined function should not be in pending list."); + + jitTheFunctionUnlocked(PF); + + // Now that the function has been jitted, ask the JITEmitter to rewrite + // the stub with real address of the function. + updateFunctionStubUnlocked(PF); + } +} + +void JIT::jitTheFunctionUnlocked(Function *F) { + isAlreadyCodeGenerating = true; + jitstate->getPM().run(*F); + isAlreadyCodeGenerating = false; + + // clear basic block addresses after this function is done + getBasicBlockAddressMap().clear(); +} + +/// getPointerToFunction - This method is used to get the address of the +/// specified function, compiling it if necessary. +/// +void *JIT::getPointerToFunction(Function *F) { + + if (void *Addr = getPointerToGlobalIfAvailable(F)) + return Addr; // Check if function already code gen'd + + MutexGuard locked(lock); + + // Now that this thread owns the lock, make sure we read in the function if it + // exists in this Module. + std::string ErrorMsg; + if (F->Materialize(&ErrorMsg)) { + report_fatal_error("Error reading function '" + F->getName()+ + "' from bitcode file: " + ErrorMsg); + } + + // ... and check if another thread has already code gen'd the function. + if (void *Addr = getPointerToGlobalIfAvailable(F)) + return Addr; + + if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) { + bool AbortOnFailure = !F->hasExternalWeakLinkage(); + void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure); + addGlobalMapping(F, Addr); + return Addr; + } + + runJITOnFunctionUnlocked(F); + + void *Addr = getPointerToGlobalIfAvailable(F); + assert(Addr && "Code generation didn't add function to GlobalAddress table!"); + return Addr; +} + +void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) { + MutexGuard locked(lock); + + BasicBlockAddressMapTy::iterator I = + getBasicBlockAddressMap().find(BB); + if (I == getBasicBlockAddressMap().end()) { + getBasicBlockAddressMap()[BB] = Addr; + } else { + // ignore repeats: some BBs can be split into few MBBs? + } +} + +void JIT::clearPointerToBasicBlock(const BasicBlock *BB) { + MutexGuard locked(lock); + getBasicBlockAddressMap().erase(BB); +} + +void *JIT::getPointerToBasicBlock(BasicBlock *BB) { + // make sure it's function is compiled by JIT + (void)getPointerToFunction(BB->getParent()); + + // resolve basic block address + MutexGuard locked(lock); + + BasicBlockAddressMapTy::iterator I = + getBasicBlockAddressMap().find(BB); + if (I != getBasicBlockAddressMap().end()) { + return I->second; + } else { + llvm_unreachable("JIT does not have BB address for address-of-label, was" + " it eliminated by optimizer?"); + } +} + +void *JIT::getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure){ + if (!isSymbolSearchingDisabled()) { + void *ptr = JMM->getPointerToNamedFunction(Name, false); + if (ptr) + return ptr; + } + + /// If a LazyFunctionCreator is installed, use it to get/create the function. + if (LazyFunctionCreator) + if (void *RP = LazyFunctionCreator(Name)) + return RP; + + if (AbortOnFailure) { + report_fatal_error("Program used external function '"+Name+ + "' which could not be resolved!"); + } + return nullptr; +} + + +/// getOrEmitGlobalVariable - Return the address of the specified global +/// variable, possibly emitting it to memory if needed. This is used by the +/// Emitter. +void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) { + MutexGuard locked(lock); + + void *Ptr = getPointerToGlobalIfAvailable(GV); + if (Ptr) return Ptr; + + // If the global is external, just remember the address. + if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) { +#if HAVE___DSO_HANDLE + if (GV->getName() == "__dso_handle") + return (void*)&__dso_handle; +#endif + Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName()); + if (!Ptr) { + report_fatal_error("Could not resolve external global address: " + +GV->getName()); + } + addGlobalMapping(GV, Ptr); + } else { + // If the global hasn't been emitted to memory yet, allocate space and + // emit it into memory. + Ptr = getMemoryForGV(GV); + addGlobalMapping(GV, Ptr); + EmitGlobalVariable(GV); // Initialize the variable. + } + return Ptr; +} + +/// recompileAndRelinkFunction - This method is used to force a function +/// which has already been compiled, to be compiled again, possibly +/// after it has been modified. Then the entry to the old copy is overwritten +/// with a branch to the new copy. If there was no old copy, this acts +/// just like JIT::getPointerToFunction(). +/// +void *JIT::recompileAndRelinkFunction(Function *F) { + void *OldAddr = getPointerToGlobalIfAvailable(F); + + // If it's not already compiled there is no reason to patch it up. + if (!OldAddr) return getPointerToFunction(F); + + // Delete the old function mapping. + addGlobalMapping(F, nullptr); + + // Recodegen the function + runJITOnFunction(F); + + // Update state, forward the old function to the new function. + void *Addr = getPointerToGlobalIfAvailable(F); + assert(Addr && "Code generation didn't add function to GlobalAddress table!"); + TJI.replaceMachineCodeForFunction(OldAddr, Addr); + return Addr; +} + +/// getMemoryForGV - This method abstracts memory allocation of global +/// variable so that the JIT can allocate thread local variables depending +/// on the target. +/// +char* JIT::getMemoryForGV(const GlobalVariable* GV) { + char *Ptr; + + // GlobalVariable's which are not "constant" will cause trouble in a server + // situation. It's returned in the same block of memory as code which may + // not be writable. + if (isGVCompilationDisabled() && !GV->isConstant()) { + report_fatal_error("Compilation of non-internal GlobalValue is disabled!"); + } + + // Some applications require globals and code to live together, so they may + // be allocated into the same buffer, but in general globals are allocated + // through the memory manager which puts them near the code but not in the + // same buffer. + Type *GlobalType = GV->getType()->getElementType(); + size_t S = getDataLayout()->getTypeAllocSize(GlobalType); + size_t A = getDataLayout()->getPreferredAlignment(GV); + if (GV->isThreadLocal()) { + MutexGuard locked(lock); + Ptr = TJI.allocateThreadLocalMemory(S); + } else if (TJI.allocateSeparateGVMemory()) { + if (A <= 8) { + Ptr = (char*)malloc(S); + } else { + // Allocate S+A bytes of memory, then use an aligned pointer within that + // space. + Ptr = (char*)malloc(S+A); + unsigned MisAligned = ((intptr_t)Ptr & (A-1)); + Ptr = Ptr + (MisAligned ? (A-MisAligned) : 0); + } + } else if (AllocateGVsWithCode) { + Ptr = (char*)JCE->allocateSpace(S, A); + } else { + Ptr = (char*)JCE->allocateGlobal(S, A); + } + return Ptr; +} + +void JIT::addPendingFunction(Function *F) { + MutexGuard locked(lock); + jitstate->getPendingFunctions().push_back(F); +} + + +JITEventListener::~JITEventListener() {} diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h new file mode 100644 index 00000000000..a742a61de7f --- /dev/null +++ b/lib/ExecutionEngine/JIT/JIT.h @@ -0,0 +1,214 @@ +//===-- JIT.h - Class definition for the JIT --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the top-level JIT data structure. +// +//===----------------------------------------------------------------------===// + +#ifndef JIT_H +#define JIT_H + +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/PassManager.h" + +namespace llvm { + +class Function; +struct JITEvent_EmittedFunctionDetails; +class MachineCodeEmitter; +class MachineCodeInfo; +class TargetJITInfo; +class TargetMachine; + +class JITState { +private: + FunctionPassManager PM; // Passes to compile a function + Module *M; // Module used to create the PM + + /// PendingFunctions - Functions which have not been code generated yet, but + /// were called from a function being code generated. + std::vector > PendingFunctions; + +public: + explicit JITState(Module *M) : PM(M), M(M) {} + + FunctionPassManager &getPM() { + return PM; + } + + Module *getModule() const { return M; } + std::vector > &getPendingFunctions() { + return PendingFunctions; + } +}; + + +class JIT : public ExecutionEngine { + /// types + typedef ValueMap + BasicBlockAddressMapTy; + /// data + TargetMachine &TM; // The current target we are compiling to + TargetJITInfo &TJI; // The JITInfo for the target we are compiling to + JITCodeEmitter *JCE; // JCE object + JITMemoryManager *JMM; + std::vector EventListeners; + + /// AllocateGVsWithCode - Some applications require that global variables and + /// code be allocated into the same region of memory, in which case this flag + /// should be set to true. Doing so breaks freeMachineCodeForFunction. + bool AllocateGVsWithCode; + + /// True while the JIT is generating code. Used to assert against recursive + /// entry. + bool isAlreadyCodeGenerating; + + JITState *jitstate; + + /// BasicBlockAddressMap - A mapping between LLVM basic blocks and their + /// actualized version, only filled for basic blocks that have their address + /// taken. + BasicBlockAddressMapTy BasicBlockAddressMap; + + + JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, + JITMemoryManager *JMM, bool AllocateGVsWithCode); +public: + ~JIT(); + + static void Register() { + JITCtor = createJIT; + } + + /// getJITInfo - Return the target JIT information structure. + /// + TargetJITInfo &getJITInfo() const { return TJI; } + + void addModule(Module *M) override; + + /// removeModule - Remove a Module from the list of modules. Returns true if + /// M is found. + bool removeModule(Module *M) override; + + /// runFunction - Start execution with the specified function and arguments. + /// + GenericValue runFunction(Function *F, + const std::vector &ArgValues) override; + + /// getPointerToNamedFunction - This method returns the address of the + /// specified function by using the MemoryManager. As such it is only + /// useful for resolving library symbols, not code generated symbols. + /// + /// If AbortOnFailure is false and no function with the given name is + /// found, this function silently returns a null pointer. Otherwise, + /// it prints a message to stderr and aborts. + /// + void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true) override; + + // CompilationCallback - Invoked the first time that a call site is found, + // which causes lazy compilation of the target function. + // + static void CompilationCallback(); + + /// getPointerToFunction - This returns the address of the specified function, + /// compiling it if necessary. + /// + void *getPointerToFunction(Function *F) override; + + /// addPointerToBasicBlock - Adds address of the specific basic block. + void addPointerToBasicBlock(const BasicBlock *BB, void *Addr); + + /// clearPointerToBasicBlock - Removes address of specific basic block. + void clearPointerToBasicBlock(const BasicBlock *BB); + + /// getPointerToBasicBlock - This returns the address of the specified basic + /// block, assuming function is compiled. + void *getPointerToBasicBlock(BasicBlock *BB) override; + + /// getOrEmitGlobalVariable - Return the address of the specified global + /// variable, possibly emitting it to memory if needed. This is used by the + /// Emitter. + void *getOrEmitGlobalVariable(const GlobalVariable *GV) override; + + /// getPointerToFunctionOrStub - If the specified function has been + /// code-gen'd, return a pointer to the function. If not, compile it, or use + /// a stub to implement lazy compilation if available. + /// + void *getPointerToFunctionOrStub(Function *F) override; + + /// recompileAndRelinkFunction - This method is used to force a function + /// which has already been compiled, to be compiled again, possibly + /// after it has been modified. Then the entry to the old copy is overwritten + /// with a branch to the new copy. If there was no old copy, this acts + /// just like JIT::getPointerToFunction(). + /// + void *recompileAndRelinkFunction(Function *F) override; + + /// freeMachineCodeForFunction - deallocate memory used to code-generate this + /// Function. + /// + void freeMachineCodeForFunction(Function *F) override; + + /// addPendingFunction - while jitting non-lazily, a called but non-codegen'd + /// function was encountered. Add it to a pending list to be processed after + /// the current function. + /// + void addPendingFunction(Function *F); + + /// getCodeEmitter - Return the code emitter this JIT is emitting into. + /// + JITCodeEmitter *getCodeEmitter() const { return JCE; } + + static ExecutionEngine *createJIT(Module *M, + std::string *ErrorStr, + JITMemoryManager *JMM, + bool GVsWithCode, + TargetMachine *TM); + + // Run the JIT on F and return information about the generated code + void runJITOnFunction(Function *F, MachineCodeInfo *MCI = nullptr) override; + + void RegisterJITEventListener(JITEventListener *L) override; + void UnregisterJITEventListener(JITEventListener *L) override; + + TargetMachine *getTargetMachine() override { return &TM; } + + /// These functions correspond to the methods on JITEventListener. They + /// iterate over the registered listeners and call the corresponding method on + /// each. + void NotifyFunctionEmitted( + const Function &F, void *Code, size_t Size, + const JITEvent_EmittedFunctionDetails &Details); + void NotifyFreeingMachineCode(void *OldPtr); + + BasicBlockAddressMapTy & + getBasicBlockAddressMap() { + return BasicBlockAddressMap; + } + + +private: + static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM, + TargetMachine &tm); + void runJITOnFunctionUnlocked(Function *F); + void updateFunctionStubUnlocked(Function *F); + void jitTheFunctionUnlocked(Function *F); + +protected: + + /// getMemoryforGV - Allocate memory for a global variable. + char* getMemoryForGV(const GlobalVariable* GV) override; + +}; + +} // End llvm namespace + +#endif diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp new file mode 100644 index 00000000000..2ba1f8695d7 --- /dev/null +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -0,0 +1,1249 @@ +//===-- JITEmitter.cpp - Write machine code to executable memory ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a MachineCodeEmitter object that is used by the JIT to +// write machine code to memory and remember where relocatable values are. +// +//===----------------------------------------------------------------------===// + +#include "JIT.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineCodeInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRelocation.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/MutexGuard.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetJITInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include +#ifndef NDEBUG +#include +#endif +using namespace llvm; + +#define DEBUG_TYPE "jit" + +STATISTIC(NumBytes, "Number of bytes of machine code compiled"); +STATISTIC(NumRelos, "Number of relocations applied"); +STATISTIC(NumRetries, "Number of retries with more memory"); + + +// A declaration may stop being a declaration once it's fully read from bitcode. +// This function returns true if F is fully read and is still a declaration. +static bool isNonGhostDeclaration(const Function *F) { + return F->isDeclaration() && !F->isMaterializable(); +} + +//===----------------------------------------------------------------------===// +// JIT lazy compilation code. +// +namespace { + class JITEmitter; + class JITResolverState; + + template + struct NoRAUWValueMapConfig : public ValueMapConfig { + typedef JITResolverState *ExtraData; + static void onRAUW(JITResolverState *, Value *Old, Value *New) { + llvm_unreachable("The JIT doesn't know how to handle a" + " RAUW on a value it has emitted."); + } + }; + + struct CallSiteValueMapConfig : public NoRAUWValueMapConfig { + typedef JITResolverState *ExtraData; + static void onDelete(JITResolverState *JRS, Function *F); + }; + + class JITResolverState { + public: + typedef ValueMap > + FunctionToLazyStubMapTy; + typedef std::map > CallSiteToFunctionMapTy; + typedef ValueMap, + CallSiteValueMapConfig> FunctionToCallSitesMapTy; + typedef std::map, void*> GlobalToIndirectSymMapTy; + private: + /// FunctionToLazyStubMap - Keep track of the lazy stub created for a + /// particular function so that we can reuse them if necessary. + FunctionToLazyStubMapTy FunctionToLazyStubMap; + + /// CallSiteToFunctionMap - Keep track of the function that each lazy call + /// site corresponds to, and vice versa. + CallSiteToFunctionMapTy CallSiteToFunctionMap; + FunctionToCallSitesMapTy FunctionToCallSitesMap; + + /// GlobalToIndirectSymMap - Keep track of the indirect symbol created for a + /// particular GlobalVariable so that we can reuse them if necessary. + GlobalToIndirectSymMapTy GlobalToIndirectSymMap; + +#ifndef NDEBUG + /// Instance of the JIT this ResolverState serves. + JIT *TheJIT; +#endif + + public: + JITResolverState(JIT *jit) : FunctionToLazyStubMap(this), + FunctionToCallSitesMap(this) { +#ifndef NDEBUG + TheJIT = jit; +#endif + } + + FunctionToLazyStubMapTy& getFunctionToLazyStubMap() { + return FunctionToLazyStubMap; + } + + GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap() { + return GlobalToIndirectSymMap; + } + + std::pair LookupFunctionFromCallSite( + void *CallSite) const { + // The address given to us for the stub may not be exactly right, it + // might be a little bit after the stub. As such, use upper_bound to + // find it. + CallSiteToFunctionMapTy::const_iterator I = + CallSiteToFunctionMap.upper_bound(CallSite); + assert(I != CallSiteToFunctionMap.begin() && + "This is not a known call site!"); + --I; + return *I; + } + + void AddCallSite(void *CallSite, Function *F) { + bool Inserted = CallSiteToFunctionMap.insert( + std::make_pair(CallSite, F)).second; + (void)Inserted; + assert(Inserted && "Pair was already in CallSiteToFunctionMap"); + FunctionToCallSitesMap[F].insert(CallSite); + } + + void EraseAllCallSitesForPrelocked(Function *F); + + // Erases _all_ call sites regardless of their function. This is used to + // unregister the stub addresses from the StubToResolverMap in + // ~JITResolver(). + void EraseAllCallSitesPrelocked(); + }; + + /// JITResolver - Keep track of, and resolve, call sites for functions that + /// have not yet been compiled. + class JITResolver { + typedef JITResolverState::FunctionToLazyStubMapTy FunctionToLazyStubMapTy; + typedef JITResolverState::CallSiteToFunctionMapTy CallSiteToFunctionMapTy; + typedef JITResolverState::GlobalToIndirectSymMapTy GlobalToIndirectSymMapTy; + + /// LazyResolverFn - The target lazy resolver function that we actually + /// rewrite instructions to use. + TargetJITInfo::LazyResolverFn LazyResolverFn; + + JITResolverState state; + + /// ExternalFnToStubMap - This is the equivalent of FunctionToLazyStubMap + /// for external functions. TODO: Of course, external functions don't need + /// a lazy stub. It's actually here to make it more likely that far calls + /// succeed, but no single stub can guarantee that. I'll remove this in a + /// subsequent checkin when I actually fix far calls. + std::map ExternalFnToStubMap; + + /// revGOTMap - map addresses to indexes in the GOT + std::map revGOTMap; + unsigned nextGOTIndex; + + JITEmitter &JE; + + /// Instance of JIT corresponding to this Resolver. + JIT *TheJIT; + + public: + explicit JITResolver(JIT &jit, JITEmitter &je) + : state(&jit), nextGOTIndex(0), JE(je), TheJIT(&jit) { + LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn); + } + + ~JITResolver(); + + /// getLazyFunctionStubIfAvailable - This returns a pointer to a function's + /// lazy-compilation stub if it has already been created. + void *getLazyFunctionStubIfAvailable(Function *F); + + /// getLazyFunctionStub - This returns a pointer to a function's + /// lazy-compilation stub, creating one on demand as needed. + void *getLazyFunctionStub(Function *F); + + /// getExternalFunctionStub - Return a stub for the function at the + /// specified address, created lazily on demand. + void *getExternalFunctionStub(void *FnAddr); + + /// getGlobalValueIndirectSym - Return an indirect symbol containing the + /// specified GV address. + void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress); + + /// getGOTIndexForAddress - Return a new or existing index in the GOT for + /// an address. This function only manages slots, it does not manage the + /// contents of the slots or the memory associated with the GOT. + unsigned getGOTIndexForAddr(void *addr); + + /// JITCompilerFn - This function is called to resolve a stub to a compiled + /// address. If the LLVM Function corresponding to the stub has not yet + /// been compiled, this function compiles it first. + static void *JITCompilerFn(void *Stub); + }; + + class StubToResolverMapTy { + /// Map a stub address to a specific instance of a JITResolver so that + /// lazily-compiled functions can find the right resolver to use. + /// + /// Guarded by Lock. + std::map Map; + + /// Guards Map from concurrent accesses. + mutable sys::Mutex Lock; + + public: + /// Registers a Stub to be resolved by Resolver. + void RegisterStubResolver(void *Stub, JITResolver *Resolver) { + MutexGuard guard(Lock); + Map.insert(std::make_pair(Stub, Resolver)); + } + /// Unregisters the Stub when it's invalidated. + void UnregisterStubResolver(void *Stub) { + MutexGuard guard(Lock); + Map.erase(Stub); + } + /// Returns the JITResolver instance that owns the Stub. + JITResolver *getResolverFromStub(void *Stub) const { + MutexGuard guard(Lock); + // The address given to us for the stub may not be exactly right, it might + // be a little bit after the stub. As such, use upper_bound to find it. + // This is the same trick as in LookupFunctionFromCallSite from + // JITResolverState. + std::map::const_iterator I = Map.upper_bound(Stub); + assert(I != Map.begin() && "This is not a known stub!"); + --I; + return I->second; + } + /// True if any stubs refer to the given resolver. Only used in an assert(). + /// O(N) + bool ResolverHasStubs(JITResolver* Resolver) const { + MutexGuard guard(Lock); + for (std::map::const_iterator I = Map.begin(), + E = Map.end(); I != E; ++I) { + if (I->second == Resolver) + return true; + } + return false; + } + }; + /// This needs to be static so that a lazy call stub can access it with no + /// context except the address of the stub. + ManagedStatic StubToResolverMap; + + /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is + /// used to output functions to memory for execution. + class JITEmitter : public JITCodeEmitter { + JITMemoryManager *MemMgr; + + // When outputting a function stub in the context of some other function, we + // save BufferBegin/BufferEnd/CurBufferPtr here. + uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr; + + // When reattempting to JIT a function after running out of space, we store + // the estimated size of the function we're trying to JIT here, so we can + // ask the memory manager for at least this much space. When we + // successfully emit the function, we reset this back to zero. + uintptr_t SizeEstimate; + + /// Relocations - These are the relocations that the function needs, as + /// emitted. + std::vector Relocations; + + /// MBBLocations - This vector is a mapping from MBB ID's to their address. + /// It is filled in by the StartMachineBasicBlock callback and queried by + /// the getMachineBasicBlockAddress callback. + std::vector MBBLocations; + + /// ConstantPool - The constant pool for the current function. + /// + MachineConstantPool *ConstantPool; + + /// ConstantPoolBase - A pointer to the first entry in the constant pool. + /// + void *ConstantPoolBase; + + /// ConstPoolAddresses - Addresses of individual constant pool entries. + /// + SmallVector ConstPoolAddresses; + + /// JumpTable - The jump tables for the current function. + /// + MachineJumpTableInfo *JumpTable; + + /// JumpTableBase - A pointer to the first entry in the jump table. + /// + void *JumpTableBase; + + /// Resolver - This contains info about the currently resolved functions. + JITResolver Resolver; + + /// LabelLocations - This vector is a mapping from Label ID's to their + /// address. + DenseMap LabelLocations; + + /// MMI - Machine module info for exception informations + MachineModuleInfo* MMI; + + // CurFn - The llvm function being emitted. Only valid during + // finishFunction(). + const Function *CurFn; + + /// Information about emitted code, which is passed to the + /// JITEventListeners. This is reset in startFunction and used in + /// finishFunction. + JITEvent_EmittedFunctionDetails EmissionDetails; + + struct EmittedCode { + void *FunctionBody; // Beginning of the function's allocation. + void *Code; // The address the function's code actually starts at. + void *ExceptionTable; + EmittedCode() : FunctionBody(nullptr), Code(nullptr), + ExceptionTable(nullptr) {} + }; + struct EmittedFunctionConfig : public ValueMapConfig { + typedef JITEmitter *ExtraData; + static void onDelete(JITEmitter *, const Function*); + static void onRAUW(JITEmitter *, const Function*, const Function*); + }; + ValueMap EmittedFunctions; + + DebugLoc PrevDL; + + /// Instance of the JIT + JIT *TheJIT; + + public: + JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM) + : SizeEstimate(0), Resolver(jit, *this), MMI(nullptr), CurFn(nullptr), + EmittedFunctions(this), TheJIT(&jit) { + MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager(); + if (jit.getJITInfo().needsGOT()) { + MemMgr->AllocateGOT(); + DEBUG(dbgs() << "JIT is managing a GOT\n"); + } + + } + ~JITEmitter() { + delete MemMgr; + } + + JITResolver &getJITResolver() { return Resolver; } + + void startFunction(MachineFunction &F) override; + bool finishFunction(MachineFunction &F) override; + + void emitConstantPool(MachineConstantPool *MCP); + void initJumpTableInfo(MachineJumpTableInfo *MJTI); + void emitJumpTableInfo(MachineJumpTableInfo *MJTI); + + void startGVStub(const GlobalValue* GV, + unsigned StubSize, unsigned Alignment = 1); + void startGVStub(void *Buffer, unsigned StubSize); + void finishGVStub(); + void *allocIndirectGV(const GlobalValue *GV, const uint8_t *Buffer, + size_t Size, unsigned Alignment) override; + + /// allocateSpace - Reserves space in the current block if any, or + /// allocate a new one of the given size. + void *allocateSpace(uintptr_t Size, unsigned Alignment) override; + + /// allocateGlobal - Allocate memory for a global. Unlike allocateSpace, + /// this method does not allocate memory in the current output buffer, + /// because a global may live longer than the current function. + void *allocateGlobal(uintptr_t Size, unsigned Alignment) override; + + void addRelocation(const MachineRelocation &MR) override { + Relocations.push_back(MR); + } + + void StartMachineBasicBlock(MachineBasicBlock *MBB) override { + if (MBBLocations.size() <= (unsigned)MBB->getNumber()) + MBBLocations.resize((MBB->getNumber()+1)*2); + MBBLocations[MBB->getNumber()] = getCurrentPCValue(); + if (MBB->hasAddressTaken()) + TheJIT->addPointerToBasicBlock(MBB->getBasicBlock(), + (void*)getCurrentPCValue()); + DEBUG(dbgs() << "JIT: Emitting BB" << MBB->getNumber() << " at [" + << (void*) getCurrentPCValue() << "]\n"); + } + + uintptr_t getConstantPoolEntryAddress(unsigned Entry) const override; + uintptr_t getJumpTableEntryAddress(unsigned Entry) const override; + + uintptr_t + getMachineBasicBlockAddress(MachineBasicBlock *MBB) const override { + assert(MBBLocations.size() > (unsigned)MBB->getNumber() && + MBBLocations[MBB->getNumber()] && "MBB not emitted!"); + return MBBLocations[MBB->getNumber()]; + } + + /// retryWithMoreMemory - Log a retry and deallocate all memory for the + /// given function. Increase the minimum allocation size so that we get + /// more memory next time. + void retryWithMoreMemory(MachineFunction &F); + + /// deallocateMemForFunction - Deallocate all memory for the specified + /// function body. + void deallocateMemForFunction(const Function *F); + + void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) override; + + void emitLabel(MCSymbol *Label) override { + LabelLocations[Label] = getCurrentPCValue(); + } + + DenseMap *getLabelLocations() override { + return &LabelLocations; + } + + uintptr_t getLabelAddress(MCSymbol *Label) const override { + assert(LabelLocations.count(Label) && "Label not emitted!"); + return LabelLocations.find(Label)->second; + } + + void setModuleInfo(MachineModuleInfo* Info) override { + MMI = Info; + } + + private: + void *getPointerToGlobal(GlobalValue *GV, void *Reference, + bool MayNeedFarStub); + void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference); + }; +} + +void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) { + JRS->EraseAllCallSitesForPrelocked(F); +} + +void JITResolverState::EraseAllCallSitesForPrelocked(Function *F) { + FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F); + if (F2C == FunctionToCallSitesMap.end()) + return; + StubToResolverMapTy &S2RMap = *StubToResolverMap; + for (SmallPtrSet::const_iterator I = F2C->second.begin(), + E = F2C->second.end(); I != E; ++I) { + S2RMap.UnregisterStubResolver(*I); + bool Erased = CallSiteToFunctionMap.erase(*I); + (void)Erased; + assert(Erased && "Missing call site->function mapping"); + } + FunctionToCallSitesMap.erase(F2C); +} + +void JITResolverState::EraseAllCallSitesPrelocked() { + StubToResolverMapTy &S2RMap = *StubToResolverMap; + for (CallSiteToFunctionMapTy::const_iterator + I = CallSiteToFunctionMap.begin(), + E = CallSiteToFunctionMap.end(); I != E; ++I) { + S2RMap.UnregisterStubResolver(I->first); + } + CallSiteToFunctionMap.clear(); + FunctionToCallSitesMap.clear(); +} + +JITResolver::~JITResolver() { + // No need to lock because we're in the destructor, and state isn't shared. + state.EraseAllCallSitesPrelocked(); + assert(!StubToResolverMap->ResolverHasStubs(this) && + "Resolver destroyed with stubs still alive."); +} + +/// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub +/// if it has already been created. +void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) { + MutexGuard locked(TheJIT->lock); + + // If we already have a stub for this function, recycle it. + return state.getFunctionToLazyStubMap().lookup(F); +} + +/// getFunctionStub - This returns a pointer to a function stub, creating +/// one on demand as needed. +void *JITResolver::getLazyFunctionStub(Function *F) { + MutexGuard locked(TheJIT->lock); + + // If we already have a lazy stub for this function, recycle it. + void *&Stub = state.getFunctionToLazyStubMap()[F]; + if (Stub) return Stub; + + // Call the lazy resolver function if we are JIT'ing lazily. Otherwise we + // must resolve the symbol now. + void *Actual = TheJIT->isCompilingLazily() + ? (void *)(intptr_t)LazyResolverFn : (void *)nullptr; + + // If this is an external declaration, attempt to resolve the address now + // to place in the stub. + if (isNonGhostDeclaration(F) || F->hasAvailableExternallyLinkage()) { + Actual = TheJIT->getPointerToFunction(F); + + // If we resolved the symbol to a null address (eg. a weak external) + // don't emit a stub. Return a null pointer to the application. + if (!Actual) return nullptr; + } + + TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout(); + JE.startGVStub(F, SL.Size, SL.Alignment); + // Codegen a new stub, calling the lazy resolver or the actual address of the + // external function, if it was resolved. + Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual, JE); + JE.finishGVStub(); + + if (Actual != (void*)(intptr_t)LazyResolverFn) { + // If we are getting the stub for an external function, we really want the + // address of the stub in the GlobalAddressMap for the JIT, not the address + // of the external function. + TheJIT->updateGlobalMapping(F, Stub); + } + + DEBUG(dbgs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '" + << F->getName() << "'\n"); + + if (TheJIT->isCompilingLazily()) { + // Register this JITResolver as the one corresponding to this call site so + // JITCompilerFn will be able to find it. + StubToResolverMap->RegisterStubResolver(Stub, this); + + // Finally, keep track of the stub-to-Function mapping so that the + // JITCompilerFn knows which function to compile! + state.AddCallSite(Stub, F); + } else if (!Actual) { + // If we are JIT'ing non-lazily but need to call a function that does not + // exist yet, add it to the JIT's work list so that we can fill in the + // stub address later. + assert(!isNonGhostDeclaration(F) && !F->hasAvailableExternallyLinkage() && + "'Actual' should have been set above."); + TheJIT->addPendingFunction(F); + } + + return Stub; +} + +/// getGlobalValueIndirectSym - Return a lazy pointer containing the specified +/// GV address. +void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) { + MutexGuard locked(TheJIT->lock); + + // If we already have a stub for this global variable, recycle it. + void *&IndirectSym = state.getGlobalToIndirectSymMap()[GV]; + if (IndirectSym) return IndirectSym; + + // Otherwise, codegen a new indirect symbol. + IndirectSym = TheJIT->getJITInfo().emitGlobalValueIndirectSym(GV, GVAddress, + JE); + + DEBUG(dbgs() << "JIT: Indirect symbol emitted at [" << IndirectSym + << "] for GV '" << GV->getName() << "'\n"); + + return IndirectSym; +} + +/// getExternalFunctionStub - Return a stub for the function at the +/// specified address, created lazily on demand. +void *JITResolver::getExternalFunctionStub(void *FnAddr) { + // If we already have a stub for this function, recycle it. + void *&Stub = ExternalFnToStubMap[FnAddr]; + if (Stub) return Stub; + + TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout(); + JE.startGVStub(nullptr, SL.Size, SL.Alignment); + Stub = TheJIT->getJITInfo().emitFunctionStub(nullptr, FnAddr, JE); + JE.finishGVStub(); + + DEBUG(dbgs() << "JIT: Stub emitted at [" << Stub + << "] for external function at '" << FnAddr << "'\n"); + return Stub; +} + +unsigned JITResolver::getGOTIndexForAddr(void* addr) { + unsigned idx = revGOTMap[addr]; + if (!idx) { + idx = ++nextGOTIndex; + revGOTMap[addr] = idx; + DEBUG(dbgs() << "JIT: Adding GOT entry " << idx << " for addr [" + << addr << "]\n"); + } + return idx; +} + +/// JITCompilerFn - This function is called when a lazy compilation stub has +/// been entered. It looks up which function this stub corresponds to, compiles +/// it if necessary, then returns the resultant function pointer. +void *JITResolver::JITCompilerFn(void *Stub) { + JITResolver *JR = StubToResolverMap->getResolverFromStub(Stub); + assert(JR && "Unable to find the corresponding JITResolver to the call site"); + + Function* F = nullptr; + void* ActualPtr = nullptr; + + { + // Only lock for getting the Function. The call getPointerToFunction made + // in this function might trigger function materializing, which requires + // JIT lock to be unlocked. + MutexGuard locked(JR->TheJIT->lock); + + // The address given to us for the stub may not be exactly right, it might + // be a little bit after the stub. As such, use upper_bound to find it. + std::pair I = + JR->state.LookupFunctionFromCallSite(Stub); + F = I.second; + ActualPtr = I.first; + } + + // If we have already code generated the function, just return the address. + void *Result = JR->TheJIT->getPointerToGlobalIfAvailable(F); + + if (!Result) { + // Otherwise we don't have it, do lazy compilation now. + + // If lazy compilation is disabled, emit a useful error message and abort. + if (!JR->TheJIT->isCompilingLazily()) { + report_fatal_error("LLVM JIT requested to do lazy compilation of" + " function '" + + F->getName() + "' when lazy compiles are disabled!"); + } + + DEBUG(dbgs() << "JIT: Lazily resolving function '" << F->getName() + << "' In stub ptr = " << Stub << " actual ptr = " + << ActualPtr << "\n"); + (void)ActualPtr; + + Result = JR->TheJIT->getPointerToFunction(F); + } + + // Reacquire the lock to update the GOT map. + MutexGuard locked(JR->TheJIT->lock); + + // We might like to remove the call site from the CallSiteToFunction map, but + // we can't do that! Multiple threads could be stuck, waiting to acquire the + // lock above. As soon as the 1st function finishes compiling the function, + // the next one will be released, and needs to be able to find the function it + // needs to call. + + // FIXME: We could rewrite all references to this stub if we knew them. + + // What we will do is set the compiled function address to map to the + // same GOT entry as the stub so that later clients may update the GOT + // if they see it still using the stub address. + // Note: this is done so the Resolver doesn't have to manage GOT memory + // Do this without allocating map space if the target isn't using a GOT + if(JR->revGOTMap.find(Stub) != JR->revGOTMap.end()) + JR->revGOTMap[Result] = JR->revGOTMap[Stub]; + + return Result; +} + +//===----------------------------------------------------------------------===// +// JITEmitter code. +// + +static GlobalObject *getSimpleAliasee(Constant *C) { + C = C->stripPointerCasts(); + return dyn_cast(C); +} + +void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference, + bool MayNeedFarStub) { + if (GlobalVariable *GV = dyn_cast(V)) + return TheJIT->getOrEmitGlobalVariable(GV); + + if (GlobalAlias *GA = dyn_cast(V)) { + // We can only handle simple cases. + if (GlobalValue *GV = getSimpleAliasee(GA->getAliasee())) + return TheJIT->getPointerToGlobal(GV); + return nullptr; + } + + // If we have already compiled the function, return a pointer to its body. + Function *F = cast(V); + + void *FnStub = Resolver.getLazyFunctionStubIfAvailable(F); + if (FnStub) { + // Return the function stub if it's already created. We do this first so + // that we're returning the same address for the function as any previous + // call. TODO: Yes, this is wrong. The lazy stub isn't guaranteed to be + // close enough to call. + return FnStub; + } + + // If we know the target can handle arbitrary-distance calls, try to + // return a direct pointer. + if (!MayNeedFarStub) { + // If we have code, go ahead and return that. + void *ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F); + if (ResultPtr) return ResultPtr; + + // If this is an external function pointer, we can force the JIT to + // 'compile' it, which really just adds it to the map. + if (isNonGhostDeclaration(F) || F->hasAvailableExternallyLinkage()) + return TheJIT->getPointerToFunction(F); + } + + // Otherwise, we may need a to emit a stub, and, conservatively, we always do + // so. Note that it's possible to return null from getLazyFunctionStub in the + // case of a weak extern that fails to resolve. + return Resolver.getLazyFunctionStub(F); +} + +void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) { + // Make sure GV is emitted first, and create a stub containing the fully + // resolved address. + void *GVAddress = getPointerToGlobal(V, Reference, false); + void *StubAddr = Resolver.getGlobalValueIndirectSym(V, GVAddress); + return StubAddr; +} + +void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) { + if (DL.isUnknown()) return; + if (!BeforePrintingInsn) return; + + const LLVMContext &Context = EmissionDetails.MF->getFunction()->getContext(); + + if (DL.getScope(Context) != nullptr && PrevDL != DL) { + JITEvent_EmittedFunctionDetails::LineStart NextLine; + NextLine.Address = getCurrentPCValue(); + NextLine.Loc = DL; + EmissionDetails.LineStarts.push_back(NextLine); + } + + PrevDL = DL; +} + +static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP, + const DataLayout *TD) { + const std::vector &Constants = MCP->getConstants(); + if (Constants.empty()) return 0; + + unsigned Size = 0; + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + MachineConstantPoolEntry CPE = Constants[i]; + unsigned AlignMask = CPE.getAlignment() - 1; + Size = (Size + AlignMask) & ~AlignMask; + Type *Ty = CPE.getType(); + Size += TD->getTypeAllocSize(Ty); + } + return Size; +} + +void JITEmitter::startFunction(MachineFunction &F) { + DEBUG(dbgs() << "JIT: Starting CodeGen of Function " + << F.getName() << "\n"); + + uintptr_t ActualSize = 0; + // Set the memory writable, if it's not already + MemMgr->setMemoryWritable(); + + if (SizeEstimate > 0) { + // SizeEstimate will be non-zero on reallocation attempts. + ActualSize = SizeEstimate; + } + + BufferBegin = CurBufferPtr = MemMgr->startFunctionBody(F.getFunction(), + ActualSize); + BufferEnd = BufferBegin+ActualSize; + EmittedFunctions[F.getFunction()].FunctionBody = BufferBegin; + + // Ensure the constant pool/jump table info is at least 4-byte aligned. + emitAlignment(16); + + emitConstantPool(F.getConstantPool()); + if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo()) + initJumpTableInfo(MJTI); + + // About to start emitting the machine code for the function. + emitAlignment(std::max(F.getFunction()->getAlignment(), 8U)); + TheJIT->updateGlobalMapping(F.getFunction(), CurBufferPtr); + EmittedFunctions[F.getFunction()].Code = CurBufferPtr; + + MBBLocations.clear(); + + EmissionDetails.MF = &F; + EmissionDetails.LineStarts.clear(); +} + +bool JITEmitter::finishFunction(MachineFunction &F) { + if (CurBufferPtr == BufferEnd) { + // We must call endFunctionBody before retrying, because + // deallocateMemForFunction requires it. + MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr); + retryWithMoreMemory(F); + return true; + } + + if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo()) + emitJumpTableInfo(MJTI); + + // FnStart is the start of the text, not the start of the constant pool and + // other per-function data. + uint8_t *FnStart = + (uint8_t *)TheJIT->getPointerToGlobalIfAvailable(F.getFunction()); + + // FnEnd is the end of the function's machine code. + uint8_t *FnEnd = CurBufferPtr; + + if (!Relocations.empty()) { + CurFn = F.getFunction(); + NumRelos += Relocations.size(); + + // Resolve the relocations to concrete pointers. + for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { + MachineRelocation &MR = Relocations[i]; + void *ResultPtr = nullptr; + if (!MR.letTargetResolve()) { + if (MR.isExternalSymbol()) { + ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(), + false); + DEBUG(dbgs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to [" + << ResultPtr << "]\n"); + + // If the target REALLY wants a stub for this function, emit it now. + if (MR.mayNeedFarStub()) { + ResultPtr = Resolver.getExternalFunctionStub(ResultPtr); + } + } else if (MR.isGlobalValue()) { + ResultPtr = getPointerToGlobal(MR.getGlobalValue(), + BufferBegin+MR.getMachineCodeOffset(), + MR.mayNeedFarStub()); + } else if (MR.isIndirectSymbol()) { + ResultPtr = getPointerToGVIndirectSym( + MR.getGlobalValue(), BufferBegin+MR.getMachineCodeOffset()); + } else if (MR.isBasicBlock()) { + ResultPtr = (void*)getMachineBasicBlockAddress(MR.getBasicBlock()); + } else if (MR.isConstantPoolIndex()) { + ResultPtr = + (void*)getConstantPoolEntryAddress(MR.getConstantPoolIndex()); + } else { + assert(MR.isJumpTableIndex()); + ResultPtr=(void*)getJumpTableEntryAddress(MR.getJumpTableIndex()); + } + + MR.setResultPointer(ResultPtr); + } + + // if we are managing the GOT and the relocation wants an index, + // give it one + if (MR.isGOTRelative() && MemMgr->isManagingGOT()) { + unsigned idx = Resolver.getGOTIndexForAddr(ResultPtr); + MR.setGOTIndex(idx); + if (((void**)MemMgr->getGOTBase())[idx] != ResultPtr) { + DEBUG(dbgs() << "JIT: GOT was out of date for " << ResultPtr + << " pointing at " << ((void**)MemMgr->getGOTBase())[idx] + << "\n"); + ((void**)MemMgr->getGOTBase())[idx] = ResultPtr; + } + } + } + + CurFn = nullptr; + TheJIT->getJITInfo().relocate(BufferBegin, &Relocations[0], + Relocations.size(), MemMgr->getGOTBase()); + } + + // Update the GOT entry for F to point to the new code. + if (MemMgr->isManagingGOT()) { + unsigned idx = Resolver.getGOTIndexForAddr((void*)BufferBegin); + if (((void**)MemMgr->getGOTBase())[idx] != (void*)BufferBegin) { + DEBUG(dbgs() << "JIT: GOT was out of date for " << (void*)BufferBegin + << " pointing at " << ((void**)MemMgr->getGOTBase())[idx] + << "\n"); + ((void**)MemMgr->getGOTBase())[idx] = (void*)BufferBegin; + } + } + + // CurBufferPtr may have moved beyond FnEnd, due to memory allocation for + // global variables that were referenced in the relocations. + MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr); + + if (CurBufferPtr == BufferEnd) { + retryWithMoreMemory(F); + return true; + } else { + // Now that we've succeeded in emitting the function, reset the + // SizeEstimate back down to zero. + SizeEstimate = 0; + } + + BufferBegin = CurBufferPtr = nullptr; + NumBytes += FnEnd-FnStart; + + // Invalidate the icache if necessary. + sys::Memory::InvalidateInstructionCache(FnStart, FnEnd-FnStart); + + TheJIT->NotifyFunctionEmitted(*F.getFunction(), FnStart, FnEnd-FnStart, + EmissionDetails); + + // Reset the previous debug location. + PrevDL = DebugLoc(); + + DEBUG(dbgs() << "JIT: Finished CodeGen of [" << (void*)FnStart + << "] Function: " << F.getName() + << ": " << (FnEnd-FnStart) << " bytes of text, " + << Relocations.size() << " relocations\n"); + + Relocations.clear(); + ConstPoolAddresses.clear(); + + // Mark code region readable and executable if it's not so already. + MemMgr->setMemoryExecutable(); + + DEBUG({ + dbgs() << "JIT: Binary code:\n"; + uint8_t* q = FnStart; + for (int i = 0; q < FnEnd; q += 4, ++i) { + if (i == 4) + i = 0; + if (i == 0) + dbgs() << "JIT: " << (long)(q - FnStart) << ": "; + bool Done = false; + for (int j = 3; j >= 0; --j) { + if (q + j >= FnEnd) + Done = true; + else + dbgs() << (unsigned short)q[j]; + } + if (Done) + break; + dbgs() << ' '; + if (i == 3) + dbgs() << '\n'; + } + dbgs()<< '\n'; + }); + + if (MMI) + MMI->EndFunction(); + + return false; +} + +void JITEmitter::retryWithMoreMemory(MachineFunction &F) { + DEBUG(dbgs() << "JIT: Ran out of space for native code. Reattempting.\n"); + Relocations.clear(); // Clear the old relocations or we'll reapply them. + ConstPoolAddresses.clear(); + ++NumRetries; + deallocateMemForFunction(F.getFunction()); + // Try again with at least twice as much free space. + SizeEstimate = (uintptr_t)(2 * (BufferEnd - BufferBegin)); + + for (MachineFunction::iterator MBB = F.begin(), E = F.end(); MBB != E; ++MBB){ + if (MBB->hasAddressTaken()) + TheJIT->clearPointerToBasicBlock(MBB->getBasicBlock()); + } +} + +/// deallocateMemForFunction - Deallocate all memory for the specified +/// function body. Also drop any references the function has to stubs. +/// May be called while the Function is being destroyed inside ~Value(). +void JITEmitter::deallocateMemForFunction(const Function *F) { + ValueMap::iterator + Emitted = EmittedFunctions.find(F); + if (Emitted != EmittedFunctions.end()) { + MemMgr->deallocateFunctionBody(Emitted->second.FunctionBody); + TheJIT->NotifyFreeingMachineCode(Emitted->second.Code); + + EmittedFunctions.erase(Emitted); + } +} + + +void *JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) { + if (BufferBegin) + return JITCodeEmitter::allocateSpace(Size, Alignment); + + // create a new memory block if there is no active one. + // care must be taken so that BufferBegin is invalidated when a + // block is trimmed + BufferBegin = CurBufferPtr = MemMgr->allocateSpace(Size, Alignment); + BufferEnd = BufferBegin+Size; + return CurBufferPtr; +} + +void *JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) { + // Delegate this call through the memory manager. + return MemMgr->allocateGlobal(Size, Alignment); +} + +void JITEmitter::emitConstantPool(MachineConstantPool *MCP) { + if (TheJIT->getJITInfo().hasCustomConstantPool()) + return; + + const std::vector &Constants = MCP->getConstants(); + if (Constants.empty()) return; + + unsigned Size = GetConstantPoolSizeInBytes(MCP, TheJIT->getDataLayout()); + unsigned Align = MCP->getConstantPoolAlignment(); + ConstantPoolBase = allocateSpace(Size, Align); + ConstantPool = MCP; + + if (!ConstantPoolBase) return; // Buffer overflow. + + DEBUG(dbgs() << "JIT: Emitted constant pool at [" << ConstantPoolBase + << "] (size: " << Size << ", alignment: " << Align << ")\n"); + + // Initialize the memory for all of the constant pool entries. + unsigned Offset = 0; + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + MachineConstantPoolEntry CPE = Constants[i]; + unsigned AlignMask = CPE.getAlignment() - 1; + Offset = (Offset + AlignMask) & ~AlignMask; + + uintptr_t CAddr = (uintptr_t)ConstantPoolBase + Offset; + ConstPoolAddresses.push_back(CAddr); + if (CPE.isMachineConstantPoolEntry()) { + // FIXME: add support to lower machine constant pool values into bytes! + report_fatal_error("Initialize memory with machine specific constant pool" + "entry has not been implemented!"); + } + TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr); + DEBUG(dbgs() << "JIT: CP" << i << " at [0x"; + dbgs().write_hex(CAddr) << "]\n"); + + Type *Ty = CPE.Val.ConstVal->getType(); + Offset += TheJIT->getDataLayout()->getTypeAllocSize(Ty); + } +} + +void JITEmitter::initJumpTableInfo(MachineJumpTableInfo *MJTI) { + if (TheJIT->getJITInfo().hasCustomJumpTables()) + return; + if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) + return; + + const std::vector &JT = MJTI->getJumpTables(); + if (JT.empty()) return; + + unsigned NumEntries = 0; + for (unsigned i = 0, e = JT.size(); i != e; ++i) + NumEntries += JT[i].MBBs.size(); + + unsigned EntrySize = MJTI->getEntrySize(*TheJIT->getDataLayout()); + + // Just allocate space for all the jump tables now. We will fix up the actual + // MBB entries in the tables after we emit the code for each block, since then + // we will know the final locations of the MBBs in memory. + JumpTable = MJTI; + JumpTableBase = allocateSpace(NumEntries * EntrySize, + MJTI->getEntryAlignment(*TheJIT->getDataLayout())); +} + +void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { + if (TheJIT->getJITInfo().hasCustomJumpTables()) + return; + + const std::vector &JT = MJTI->getJumpTables(); + if (JT.empty() || !JumpTableBase) return; + + + switch (MJTI->getEntryKind()) { + case MachineJumpTableInfo::EK_Inline: + return; + case MachineJumpTableInfo::EK_BlockAddress: { + // EK_BlockAddress - Each entry is a plain address of block, e.g.: + // .word LBB123 + assert(MJTI->getEntrySize(*TheJIT->getDataLayout()) == sizeof(void*) && + "Cross JIT'ing?"); + + // For each jump table, map each target in the jump table to the address of + // an emitted MachineBasicBlock. + intptr_t *SlotPtr = (intptr_t*)JumpTableBase; + + for (unsigned i = 0, e = JT.size(); i != e; ++i) { + const std::vector &MBBs = JT[i].MBBs; + // Store the address of the basic block for this jump table slot in the + // memory we allocated for the jump table in 'initJumpTableInfo' + for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) + *SlotPtr++ = getMachineBasicBlockAddress(MBBs[mi]); + } + break; + } + + case MachineJumpTableInfo::EK_Custom32: + case MachineJumpTableInfo::EK_GPRel32BlockAddress: + case MachineJumpTableInfo::EK_LabelDifference32: { + assert(MJTI->getEntrySize(*TheJIT->getDataLayout()) == 4&&"Cross JIT'ing?"); + // For each jump table, place the offset from the beginning of the table + // to the target address. + int *SlotPtr = (int*)JumpTableBase; + + for (unsigned i = 0, e = JT.size(); i != e; ++i) { + const std::vector &MBBs = JT[i].MBBs; + // Store the offset of the basic block for this jump table slot in the + // memory we allocated for the jump table in 'initJumpTableInfo' + uintptr_t Base = (uintptr_t)SlotPtr; + for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) { + uintptr_t MBBAddr = getMachineBasicBlockAddress(MBBs[mi]); + /// FIXME: USe EntryKind instead of magic "getPICJumpTableEntry" hook. + *SlotPtr++ = TheJIT->getJITInfo().getPICJumpTableEntry(MBBAddr, Base); + } + } + break; + } + case MachineJumpTableInfo::EK_GPRel64BlockAddress: + llvm_unreachable( + "JT Info emission not implemented for GPRel64BlockAddress yet."); + } +} + +void JITEmitter::startGVStub(const GlobalValue* GV, + unsigned StubSize, unsigned Alignment) { + SavedBufferBegin = BufferBegin; + SavedBufferEnd = BufferEnd; + SavedCurBufferPtr = CurBufferPtr; + + BufferBegin = CurBufferPtr = MemMgr->allocateStub(GV, StubSize, Alignment); + BufferEnd = BufferBegin+StubSize+1; +} + +void JITEmitter::startGVStub(void *Buffer, unsigned StubSize) { + SavedBufferBegin = BufferBegin; + SavedBufferEnd = BufferEnd; + SavedCurBufferPtr = CurBufferPtr; + + BufferBegin = CurBufferPtr = (uint8_t *)Buffer; + BufferEnd = BufferBegin+StubSize+1; +} + +void JITEmitter::finishGVStub() { + assert(CurBufferPtr != BufferEnd && "Stub overflowed allocated space."); + NumBytes += getCurrentPCOffset(); + BufferBegin = SavedBufferBegin; + BufferEnd = SavedBufferEnd; + CurBufferPtr = SavedCurBufferPtr; +} + +void *JITEmitter::allocIndirectGV(const GlobalValue *GV, + const uint8_t *Buffer, size_t Size, + unsigned Alignment) { + uint8_t *IndGV = MemMgr->allocateStub(GV, Size, Alignment); + memcpy(IndGV, Buffer, Size); + return IndGV; +} + +// getConstantPoolEntryAddress - Return the address of the 'ConstantNum' entry +// in the constant pool that was last emitted with the 'emitConstantPool' +// method. +// +uintptr_t JITEmitter::getConstantPoolEntryAddress(unsigned ConstantNum) const { + assert(ConstantNum < ConstantPool->getConstants().size() && + "Invalid ConstantPoolIndex!"); + return ConstPoolAddresses[ConstantNum]; +} + +// getJumpTableEntryAddress - Return the address of the JumpTable with index +// 'Index' in the jumpp table that was last initialized with 'initJumpTableInfo' +// +uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const { + const std::vector &JT = JumpTable->getJumpTables(); + assert(Index < JT.size() && "Invalid jump table index!"); + + unsigned EntrySize = JumpTable->getEntrySize(*TheJIT->getDataLayout()); + + unsigned Offset = 0; + for (unsigned i = 0; i < Index; ++i) + Offset += JT[i].MBBs.size(); + + Offset *= EntrySize; + + return (uintptr_t)((char *)JumpTableBase + Offset); +} + +void JITEmitter::EmittedFunctionConfig::onDelete( + JITEmitter *Emitter, const Function *F) { + Emitter->deallocateMemForFunction(F); +} +void JITEmitter::EmittedFunctionConfig::onRAUW( + JITEmitter *, const Function*, const Function*) { + llvm_unreachable("The JIT doesn't know how to handle a" + " RAUW on a value it has emitted."); +} + + +//===----------------------------------------------------------------------===// +// Public interface to this file +//===----------------------------------------------------------------------===// + +JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM, + TargetMachine &tm) { + return new JITEmitter(jit, JMM, tm); +} + +// getPointerToFunctionOrStub - If the specified function has been +// code-gen'd, return a pointer to the function. If not, compile it, or use +// a stub to implement lazy compilation if available. +// +void *JIT::getPointerToFunctionOrStub(Function *F) { + // If we have already code generated the function, just return the address. + if (void *Addr = getPointerToGlobalIfAvailable(F)) + return Addr; + + // Get a stub if the target supports it. + JITEmitter *JE = static_cast(getCodeEmitter()); + return JE->getJITResolver().getLazyFunctionStub(F); +} + +void JIT::updateFunctionStubUnlocked(Function *F) { + // Get the empty stub we generated earlier. + JITEmitter *JE = static_cast(getCodeEmitter()); + void *Stub = JE->getJITResolver().getLazyFunctionStub(F); + void *Addr = getPointerToGlobalIfAvailable(F); + assert(Addr != Stub && "Function must have non-stub address to be updated."); + + // Tell the target jit info to rewrite the stub at the specified address, + // rather than creating a new one. + TargetJITInfo::StubLayout layout = getJITInfo().getStubLayout(); + JE->startGVStub(Stub, layout.Size); + getJITInfo().emitFunctionStub(F, Addr, *getCodeEmitter()); + JE->finishGVStub(); +} + +/// freeMachineCodeForFunction - release machine code memory for given Function. +/// +void JIT::freeMachineCodeForFunction(Function *F) { + // Delete translation for this from the ExecutionEngine, so it will get + // retranslated next time it is used. + updateGlobalMapping(F, nullptr); + + // Free the actual memory for the function body and related stuff. + static_cast(JCE)->deallocateMemForFunction(F); +} diff --git a/lib/ExecutionEngine/MCJIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp similarity index 100% rename from lib/ExecutionEngine/MCJIT/JITMemoryManager.cpp rename to lib/ExecutionEngine/JIT/JITMemoryManager.cpp diff --git a/lib/ExecutionEngine/JIT/LLVMBuild.txt b/lib/ExecutionEngine/JIT/LLVMBuild.txt new file mode 100644 index 00000000000..dd22f1b464a --- /dev/null +++ b/lib/ExecutionEngine/JIT/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/ExecutionEngine/JIT/LLVMBuild.txt ------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = JIT +parent = ExecutionEngine +required_libraries = CodeGen Core ExecutionEngine Support diff --git a/lib/ExecutionEngine/JIT/Makefile b/lib/ExecutionEngine/JIT/Makefile new file mode 100644 index 00000000000..aafa3d9d420 --- /dev/null +++ b/lib/ExecutionEngine/JIT/Makefile @@ -0,0 +1,38 @@ +##===- lib/ExecutionEngine/JIT/Makefile --------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMJIT + +# Get the $(ARCH) setting +include $(LEVEL)/Makefile.config + +# Enable the X86 JIT if compiling on X86 +ifeq ($(ARCH), x86) + ENABLE_X86_JIT = 1 +endif + +# This flag can also be used on the command line to force inclusion +# of the X86 JIT on non-X86 hosts +ifdef ENABLE_X86_JIT + CPPFLAGS += -DENABLE_X86_JIT +endif + +# Enable the Sparc JIT if compiling on Sparc +ifeq ($(ARCH), Sparc) + ENABLE_SPARC_JIT = 1 +endif + +# This flag can also be used on the command line to force inclusion +# of the Sparc JIT on non-Sparc hosts +ifdef ENABLE_SPARC_JIT + CPPFLAGS += -DENABLE_SPARC_JIT +endif + +include $(LEVEL)/Makefile.common diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt index ecae078ec7d..6dc75af2ec9 100644 --- a/lib/ExecutionEngine/LLVMBuild.txt +++ b/lib/ExecutionEngine/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT +subdirectories = Interpreter JIT MCJIT RuntimeDyld IntelJITEvents OProfileJIT [component_0] type = Library diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt index 0f42c31060b..088635a0e99 100644 --- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt +++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt @@ -1,5 +1,4 @@ add_llvm_library(LLVMMCJIT - JITMemoryManager.cpp MCJIT.cpp SectionMemoryManager.cpp ) diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 5f1fac7eff1..53630d5a5e8 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -247,6 +247,10 @@ void MCJIT::finalizeModule(Module *M) { finalizeLoadedModules(); } +void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) { + report_fatal_error("not yet implemented"); +} + uint64_t MCJIT::getExistingSymbolAddress(const std::string &Name) { Mangler Mang(TM->getSubtargetImpl()->getDataLayout()); SmallString<128> FullName; @@ -368,6 +372,14 @@ void *MCJIT::getPointerToFunction(Function *F) { return (void*)Dyld.getSymbolLoadAddress(Name); } +void *MCJIT::recompileAndRelinkFunction(Function *F) { + report_fatal_error("not yet implemented"); +} + +void MCJIT::freeMachineCodeForFunction(Function *F) { + report_fatal_error("not yet implemented"); +} + void MCJIT::runStaticConstructorsDestructorsInModulePtrSet( bool isDtors, ModulePtrSet::iterator I, ModulePtrSet::iterator E) { for (; I != E; ++I) { @@ -537,7 +549,8 @@ void MCJIT::UnregisterJITEventListener(JITEventListener *L) { if (!L) return; MutexGuard locked(lock); - auto I = std::find(EventListeners.rbegin(), EventListeners.rend(), L); + SmallVector::reverse_iterator I= + std::find(EventListeners.rbegin(), EventListeners.rend(), L); if (I != EventListeners.rend()) { std::swap(*I, EventListeners.back()); EventListeners.pop_back(); @@ -553,8 +566,7 @@ void MCJIT::NotifyObjectEmitted(const ObjectImage& Obj) { void MCJIT::NotifyFreeingObject(const ObjectImage& Obj) { MutexGuard locked(lock); for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { - JITEventListener *L = EventListeners[I]; - L->NotifyFreeingObject(Obj); + EventListeners[I]->NotifyFreeingObject(Obj); } } diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index 247de7c90b8..83e3321db92 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -211,7 +211,7 @@ class MCJIT : public ExecutionEngine { MCContext *Ctx; LinkingMemoryManager MemMgr; RuntimeDyld Dyld; - std::vector EventListeners; + SmallVector EventListeners; OwningModuleContainer OwnedModules; @@ -275,8 +275,14 @@ public: /// \param isDtors - Run the destructors instead of constructors. void runStaticConstructorsDestructors(bool isDtors) override; + void *getPointerToBasicBlock(BasicBlock *BB) override; + void *getPointerToFunction(Function *F) override; + void *recompileAndRelinkFunction(Function *F) override; + + void freeMachineCodeForFunction(Function *F) override; + GenericValue runFunction(Function *F, const std::vector &ArgValues) override; diff --git a/lib/ExecutionEngine/Makefile b/lib/ExecutionEngine/Makefile index cf714324e3b..c26e0ada5bc 100644 --- a/lib/ExecutionEngine/Makefile +++ b/lib/ExecutionEngine/Makefile @@ -11,7 +11,7 @@ LIBRARYNAME = LLVMExecutionEngine include $(LEVEL)/Makefile.config -PARALLEL_DIRS = Interpreter MCJIT RuntimeDyld +PARALLEL_DIRS = Interpreter JIT MCJIT RuntimeDyld ifeq ($(USE_INTEL_JITEVENTS), 1) PARALLEL_DIRS += IntelJITEvents diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp index e6679cfb7f7..b10d51f6486 100644 --- a/lib/ExecutionEngine/TargetSelect.cpp +++ b/lib/ExecutionEngine/TargetSelect.cpp @@ -30,7 +30,7 @@ TargetMachine *EngineBuilder::selectTarget() { // MCJIT can generate code for remote targets, but the old JIT and Interpreter // must use the host architecture. - if (WhichEngine != EngineKind::Interpreter && M) + if (UseMCJIT && WhichEngine != EngineKind::Interpreter && M) TT.setTriple(M->getTargetTriple()); return selectTarget(TT, MArch, MCPU, MAttrs); @@ -89,7 +89,8 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple, } // FIXME: non-iOS ARM FastISel is broken with MCJIT. - if (TheTriple.getArch() == Triple::arm && + if (UseMCJIT && + TheTriple.getArch() == Triple::arm && !TheTriple.isiOS() && OptLevel == CodeGenOpt::None) { OptLevel = CodeGenOpt::Less; diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt index 8462d3664a0..789d549bb15 100644 --- a/lib/Target/AArch64/CMakeLists.txt +++ b/lib/Target/AArch64/CMakeLists.txt @@ -2,7 +2,7 @@ set(LLVM_TARGET_DEFINITIONS AArch64.td) tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info) tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info) -tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer) tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index aec283f8085..55df29c1499 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -23,6 +23,7 @@ class ARMAsmPrinter; class ARMBaseTargetMachine; class FunctionPass; class ImmutablePass; +class JITCodeEmitter; class MachineInstr; class MCInst; class TargetLowering; @@ -30,6 +31,10 @@ class TargetMachine; FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); + +FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, + JITCodeEmitter &JCE); + FunctionPass *createA15SDOptimizerPass(); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp new file mode 100644 index 00000000000..714497c1bd8 --- /dev/null +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -0,0 +1,1910 @@ +//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the pass that transforms the ARM machine instructions into +// relocatable machine code. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMConstantPoolValue.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMRelocations.h" +#include "ARMSubtarget.h" +#include "ARMTargetMachine.h" +#include "MCTargetDesc/ARMAddressingModes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/PassManager.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#ifndef NDEBUG +#include +#endif +using namespace llvm; + +#define DEBUG_TYPE "jit" + +STATISTIC(NumEmitted, "Number of machine instructions emitted"); + +namespace { + + class ARMCodeEmitter : public MachineFunctionPass { + ARMJITInfo *JTI; + const ARMBaseInstrInfo *II; + const DataLayout *TD; + const ARMSubtarget *Subtarget; + TargetMachine &TM; + JITCodeEmitter &MCE; + MachineModuleInfo *MMI; + const std::vector *MCPEs; + const std::vector *MJTEs; + bool IsPIC; + bool IsThumb; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + static char ID; + public: + ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) + : MachineFunctionPass(ID), JTI(nullptr), + II((const ARMBaseInstrInfo *)tm.getSubtargetImpl()->getInstrInfo()), + TD(tm.getSubtargetImpl()->getDataLayout()), TM(tm), MCE(mce), + MCPEs(nullptr), MJTEs(nullptr), + IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {} + + /// getBinaryCodeForInstr - This function, generated by the + /// CodeEmitterGenerator using TableGen, produces the binary encoding for + /// machine instructions. + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "ARM Machine Code Emitter"; + } + + void emitInstruction(const MachineInstr &MI); + + private: + + void emitWordLE(unsigned Binary); + void emitDWordLE(uint64_t Binary); + void emitConstPoolInstruction(const MachineInstr &MI); + void emitMOVi32immInstruction(const MachineInstr &MI); + void emitMOVi2piecesInstruction(const MachineInstr &MI); + void emitLEApcrelJTInstruction(const MachineInstr &MI); + void emitPseudoMoveInstruction(const MachineInstr &MI); + void addPCLabel(unsigned LabelID); + void emitPseudoInstruction(const MachineInstr &MI); + unsigned getMachineSoRegOpValue(const MachineInstr &MI, + const MCInstrDesc &MCID, + const MachineOperand &MO, + unsigned OpIdx); + + unsigned getMachineSoImmOpValue(unsigned SoImm); + unsigned getAddrModeSBit(const MachineInstr &MI, + const MCInstrDesc &MCID) const; + + void emitDataProcessingInstruction(const MachineInstr &MI, + unsigned ImplicitRd = 0, + unsigned ImplicitRn = 0); + + void emitLoadStoreInstruction(const MachineInstr &MI, + unsigned ImplicitRd = 0, + unsigned ImplicitRn = 0); + + void emitMiscLoadStoreInstruction(const MachineInstr &MI, + unsigned ImplicitRn = 0); + + void emitLoadStoreMultipleInstruction(const MachineInstr &MI); + + void emitMulFrmInstruction(const MachineInstr &MI); + + void emitExtendInstruction(const MachineInstr &MI); + + void emitMiscArithInstruction(const MachineInstr &MI); + + void emitSaturateInstruction(const MachineInstr &MI); + + void emitBranchInstruction(const MachineInstr &MI); + + void emitInlineJumpTable(unsigned JTIndex); + + void emitMiscBranchInstruction(const MachineInstr &MI); + + void emitVFPArithInstruction(const MachineInstr &MI); + + void emitVFPConversionInstruction(const MachineInstr &MI); + + void emitVFPLoadStoreInstruction(const MachineInstr &MI); + + void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI); + + void emitNEONLaneInstruction(const MachineInstr &MI); + void emitNEONDupInstruction(const MachineInstr &MI); + void emitNEON1RegModImmInstruction(const MachineInstr &MI); + void emitNEON2RegInstruction(const MachineInstr &MI); + void emitNEON3RegInstruction(const MachineInstr &MI); + + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const; + unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) const { + return getMachineOpValue(MI, MI.getOperand(OpIdx)); + } + + // FIXME: The legacy JIT ARMCodeEmitter doesn't rely on the the + // TableGen'erated getBinaryCodeForInstr() function to encode any + // operand values, instead querying getMachineOpValue() directly for + // each operand it needs to encode. Thus, any of the new encoder + // helper functions can simply return 0 as the values the return + // are already handled elsewhere. They are placeholders to allow this + // encoder to continue to function until the MC encoder is sufficiently + // far along that this one can be eliminated entirely. + unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val) + const { return 0; } + unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val) + const { return 0; } + unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val) + const { return 0; } + unsigned NEONThumb2V8PostEncoder(const MachineInstr &MI,unsigned Val) + const { return 0; } + unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val) + const { return 0; } + unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbAdrLabelOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbBLTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbBLXTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbBRTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbBCCTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbCBTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getUnconditionalBranchTargetOpValue(const MachineInstr &MI, + unsigned Op) const { return 0; } + unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getARMBLTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getSORegRegOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getSORegImmOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2Imm8s4OpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2AddrModeImm0_1020s4OpValue(const MachineInstr &MI,unsigned Op) + const { return 0; } + unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2AddrModeSORegOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getAddrMode6OneLane32AddressOpValue(const MachineInstr &MI, + unsigned Op) + const { return 0; } + unsigned getAddrMode6DupAddressOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getAddrMode6OffsetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI, + unsigned Op) const { return 0; } + uint32_t getLdStSORegOpValue(const MachineInstr &MI, unsigned OpIdx) + const { return 0; } + + unsigned getAddrModeImm12OpValue(const MachineInstr &MI, unsigned Op) + const { + // {17-13} = reg + // {12} = (U)nsigned (add == '1', sub == '0') + // {11-0} = imm12 + const MachineOperand &MO = MI.getOperand(Op); + const MachineOperand &MO1 = MI.getOperand(Op + 1); + if (!MO.isReg()) { + emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry); + return 0; + } + unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg()); + int32_t Imm12 = MO1.getImm(); + uint32_t Binary; + Binary = Imm12 & 0xfff; + if (Imm12 >= 0) + Binary |= (1 << 12); + Binary |= (Reg << 13); + return Binary; + } + + unsigned getHiLo16ImmOpValue(const MachineInstr &MI, unsigned Op) const { + return 0; + } + + uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx) + const { return 0;} + uint32_t getPostIdxRegOpValue(const MachineInstr &MI, unsigned OpIdx) + const { return 0;} + uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx) + const { return 0;} + uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + uint32_t getAddrModeThumbSPOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + uint32_t getAddrModeISOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + uint32_t getAddrModePCOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + uint32_t getAddrMode5OpValue(const MachineInstr &MI, unsigned Op) const { + // {17-13} = reg + // {12} = (U)nsigned (add == '1', sub == '0') + // {11-0} = imm12 + const MachineOperand &MO = MI.getOperand(Op); + const MachineOperand &MO1 = MI.getOperand(Op + 1); + if (!MO.isReg()) { + emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry); + return 0; + } + unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg()); + int32_t Imm12 = MO1.getImm(); + + // Special value for #-0 + if (Imm12 == INT32_MIN) + Imm12 = 0; + + // Immediate is always encoded as positive. The 'U' bit controls add vs + // sub. + bool isAdd = true; + if (Imm12 < 0) { + Imm12 = -Imm12; + isAdd = false; + } + + uint32_t Binary = Imm12 & 0xfff; + if (isAdd) + Binary |= (1 << 12); + Binary |= (Reg << 13); + return Binary; + } + unsigned getNEONVcvtImm32OpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + + unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + + unsigned getShiftRight8Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getShiftRight16Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getShiftRight32Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getShiftRight64Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + + /// getMovi32Value - Return binary encoding of operand for movw/movt. If the + /// machine operand requires relocation, record the relocation and return + /// zero. + unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO, + unsigned Reloc); + + /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. + /// + unsigned getShiftOp(unsigned Imm) const ; + + /// Routines that handle operands which add machine relocations which are + /// fixed up by the relocation stage. + void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, + bool MayNeedFarStub, bool Indirect, + intptr_t ACPV = 0) const; + void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const; + void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const; + void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; + void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc, + intptr_t JTBase = 0) const; + unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) const; + }; +} + +char ARMCodeEmitter::ID = 0; + +/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM +/// code to the specified MCE object. +FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, + JITCodeEmitter &JCE) { + return new ARMCodeEmitter(TM, JCE); +} + +bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { + TargetMachine &Target = const_cast(MF.getTarget()); + + assert((Target.getRelocationModel() != Reloc::Default || + Target.getRelocationModel() != Reloc::Static) && + "JIT relocation model must be set to static or default!"); + // Initialize the subtarget first so we can grab all of the + // subtarget dependent variables from there. + Subtarget = &TM.getSubtarget(); + JTI = static_cast(Target.getSubtargetImpl()->getJITInfo()); + II = static_cast(Subtarget->getInstrInfo()); + TD = Target.getSubtargetImpl()->getDataLayout(); + + MCPEs = &MF.getConstantPool()->getConstants(); + MJTEs = nullptr; + if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables(); + IsPIC = TM.getRelocationModel() == Reloc::PIC_; + IsThumb = MF.getInfo()->isThumbFunction(); + JTI->Initialize(MF, IsPIC); + MMI = &getAnalysis(); + MCE.setModuleInfo(MMI); + + do { + DEBUG(errs() << "JITTing function '" + << MF.getName() << "'\n"); + MCE.startFunction(MF); + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + MBB != E; ++MBB) { + MCE.StartMachineBasicBlock(MBB); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) + emitInstruction(*I); + } + } while (MCE.finishFunction(MF)); + + return false; +} + +/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. +/// +unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const { + switch (ARM_AM::getAM2ShiftOpc(Imm)) { + default: llvm_unreachable("Unknown shift opc!"); + case ARM_AM::asr: return 2; + case ARM_AM::lsl: return 0; + case ARM_AM::lsr: return 1; + case ARM_AM::ror: + case ARM_AM::rrx: return 3; + } +} + +/// getMovi32Value - Return binary encoding of operand for movw/movt. If the +/// machine operand requires relocation, record the relocation and return zero. +unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI, + const MachineOperand &MO, + unsigned Reloc) { + assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw)) + && "Relocation to this function should be for movt or movw"); + + if (MO.isImm()) + return static_cast(MO.getImm()); + else if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), Reloc, true, false); + else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), Reloc); + else if (MO.isMBB()) + emitMachineBasicBlock(MO.getMBB(), Reloc); + else { +#ifndef NDEBUG + errs() << MO; +#endif + llvm_unreachable("Unsupported operand type for movw/movt"); + } + return 0; +} + +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const { + if (MO.isReg()) + return II->getRegisterInfo().getEncodingValue(MO.getReg()); + else if (MO.isImm()) + return static_cast(MO.getImm()); + else if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true, false); + else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch); + else if (MO.isCPI()) { + const MCInstrDesc &MCID = MI.getDesc(); + // For VFP load, the immediate offset is multiplied by 4. + unsigned Reloc = ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm) + ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry; + emitConstPoolAddress(MO.getIndex(), Reloc); + } else if (MO.isJTI()) + emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative); + else if (MO.isMBB()) + emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch); + else + llvm_unreachable("Unable to encode MachineOperand!"); + return 0; +} + +/// emitGlobalAddress - Emit the specified address to the code stream. +/// +void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, + bool MayNeedFarStub, bool Indirect, + intptr_t ACPV) const { + MachineRelocation MR = Indirect + ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, + const_cast(GV), + ACPV, MayNeedFarStub) + : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, + const_cast(GV), ACPV, + MayNeedFarStub); + MCE.addRelocation(MR); +} + +/// emitExternalSymbolAddress - Arrange for the address of an external symbol to +/// be emitted to the current location in the function, and allow it to be PC +/// relative. +void ARMCodeEmitter:: +emitExternalSymbolAddress(const char *ES, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + Reloc, ES)); +} + +/// emitConstPoolAddress - Arrange for the address of an constant pool +/// to be emitted to the current location in the function, and allow it to be PC +/// relative. +void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const { + // Tell JIT emitter we'll resolve the address. + MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + Reloc, CPI, 0, true)); +} + +/// emitJumpTableAddress - Arrange for the address of a jump table to +/// be emitted to the current location in the function, and allow it to be PC +/// relative. +void ARMCodeEmitter:: +emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), + Reloc, JTIndex, 0, true)); +} + +/// emitMachineBasicBlock - Emit the specified address basic block. +void ARMCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB, + unsigned Reloc, + intptr_t JTBase) const { + MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), + Reloc, BB, JTBase)); +} + +void ARMCodeEmitter::emitWordLE(unsigned Binary) { + DEBUG(errs() << " 0x"; + errs().write_hex(Binary) << "\n"); + MCE.emitWordLE(Binary); +} + +void ARMCodeEmitter::emitDWordLE(uint64_t Binary) { + DEBUG(errs() << " 0x"; + errs().write_hex(Binary) << "\n"); + MCE.emitDWordLE(Binary); +} + +void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { + DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI); + + MCE.processDebugLoc(MI.getDebugLoc(), true); + + ++NumEmitted; // Keep track of the # of mi's emitted + switch (MI.getDesc().TSFlags & ARMII::FormMask) { + default: { + llvm_unreachable("Unhandled instruction encoding format!"); + } + case ARMII::MiscFrm: + if (MI.getOpcode() == ARM::LEApcrelJT) { + // Materialize jumptable address. + emitLEApcrelJTInstruction(MI); + break; + } + llvm_unreachable("Unhandled instruction encoding!"); + case ARMII::Pseudo: + emitPseudoInstruction(MI); + break; + case ARMII::DPFrm: + case ARMII::DPSoRegFrm: + emitDataProcessingInstruction(MI); + break; + case ARMII::LdFrm: + case ARMII::StFrm: + emitLoadStoreInstruction(MI); + break; + case ARMII::LdMiscFrm: + case ARMII::StMiscFrm: + emitMiscLoadStoreInstruction(MI); + break; + case ARMII::LdStMulFrm: + emitLoadStoreMultipleInstruction(MI); + break; + case ARMII::MulFrm: + emitMulFrmInstruction(MI); + break; + case ARMII::ExtFrm: + emitExtendInstruction(MI); + break; + case ARMII::ArithMiscFrm: + emitMiscArithInstruction(MI); + break; + case ARMII::SatFrm: + emitSaturateInstruction(MI); + break; + case ARMII::BrFrm: + emitBranchInstruction(MI); + break; + case ARMII::BrMiscFrm: + emitMiscBranchInstruction(MI); + break; + // VFP instructions. + case ARMII::VFPUnaryFrm: + case ARMII::VFPBinaryFrm: + emitVFPArithInstruction(MI); + break; + case ARMII::VFPConv1Frm: + case ARMII::VFPConv2Frm: + case ARMII::VFPConv3Frm: + case ARMII::VFPConv4Frm: + case ARMII::VFPConv5Frm: + emitVFPConversionInstruction(MI); + break; + case ARMII::VFPLdStFrm: + emitVFPLoadStoreInstruction(MI); + break; + case ARMII::VFPLdStMulFrm: + emitVFPLoadStoreMultipleInstruction(MI); + break; + + // NEON instructions. + case ARMII::NGetLnFrm: + case ARMII::NSetLnFrm: + emitNEONLaneInstruction(MI); + break; + case ARMII::NDupFrm: + emitNEONDupInstruction(MI); + break; + case ARMII::N1RegModImmFrm: + emitNEON1RegModImmInstruction(MI); + break; + case ARMII::N2RegFrm: + emitNEON2RegInstruction(MI); + break; + case ARMII::N3RegFrm: + emitNEON3RegInstruction(MI); + break; + } + MCE.processDebugLoc(MI.getDebugLoc(), false); +} + +void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { + unsigned CPI = MI.getOperand(0).getImm(); // CP instruction index. + unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index. + const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex]; + + // Remember the CONSTPOOL_ENTRY address for later relocation. + JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue()); + + // Emit constpool island entry. In most cases, the actual values will be + // resolved and relocated after code emission. + if (MCPE.isMachineConstantPoolEntry()) { + ARMConstantPoolValue *ACPV = + static_cast(MCPE.Val.MachineCPVal); + + DEBUG(errs() << " ** ARM constant pool #" << CPI << " @ " + << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n'); + + assert(ACPV->isGlobalValue() && "unsupported constant pool value"); + const GlobalValue *GV = cast(ACPV)->getGV(); + if (GV) { + Reloc::Model RelocM = TM.getRelocationModel(); + emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry, + isa(GV), + Subtarget->GVIsIndirectSymbol(GV, RelocM), + (intptr_t)ACPV); + } else { + const char *Sym = cast(ACPV)->getSymbol(); + emitExternalSymbolAddress(Sym, ARM::reloc_arm_absolute); + } + emitWordLE(0); + } else { + const Constant *CV = MCPE.Val.ConstVal; + + DEBUG({ + errs() << " ** Constant pool #" << CPI << " @ " + << (void*)MCE.getCurrentPCValue() << " "; + if (const Function *F = dyn_cast(CV)) + errs() << F->getName(); + else + errs() << *CV; + errs() << '\n'; + }); + + if (const GlobalValue *GV = dyn_cast(CV)) { + emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa(GV), false); + emitWordLE(0); + } else if (const ConstantInt *CI = dyn_cast(CV)) { + uint32_t Val = uint32_t(*CI->getValue().getRawData()); + emitWordLE(Val); + } else if (const ConstantFP *CFP = dyn_cast(CV)) { + if (CFP->getType()->isFloatTy()) + emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); + else if (CFP->getType()->isDoubleTy()) + emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); + else { + llvm_unreachable("Unable to handle this constantpool entry!"); + } + } else { + llvm_unreachable("Unable to handle this constantpool entry!"); + } + } +} + +void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) { + const MachineOperand &MO0 = MI.getOperand(0); + const MachineOperand &MO1 = MI.getOperand(1); + + // Emit the 'movw' instruction. + unsigned Binary = 0x30 << 20; // mov: Insts{27-20} = 0b00110000 + + unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF; + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode imm16 as imm4:imm12 + Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12 + Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4 + emitWordLE(Binary); + + unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16; + // Emit the 'movt' instruction. + Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100 + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode imm16 as imm4:imm1, same as movw above. + Binary |= Hi16 & 0xFFF; + Binary |= ((Hi16 >> 12) & 0xF) << 16; + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) { + const MachineOperand &MO0 = MI.getOperand(0); + const MachineOperand &MO1 = MI.getOperand(1); + assert(MO1.isImm() && ARM_AM::isSOImmTwoPartVal(MO1.getImm()) && + "Not a valid so_imm value!"); + unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm()); + unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm()); + + // Emit the 'mov' instruction. + unsigned Binary = 0xd << 21; // mov: Insts{24-21} = 0b1101 + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode so_imm. + // Set bit I(25) to identify this is the immediate form of + Binary |= 1 << ARMII::I_BitShift; + Binary |= getMachineSoImmOpValue(V1); + emitWordLE(Binary); + + // Now the 'orr' instruction. + Binary = 0xc << 21; // orr: Insts{24-21} = 0b1100 + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode Rn. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRnShift; + + // Encode so_imm. + // Set bit I(25) to identify this is the immediate form of + Binary |= 1 << ARMII::I_BitShift; + Binary |= getMachineSoImmOpValue(V2); + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) { + // It's basically add r, pc, (LJTI - $+8) + + const MCInstrDesc &MCID = MI.getDesc(); + + // Emit the 'add' instruction. + unsigned Binary = 0x4 << 21; // add: Insts{24-21} = 0b0100 + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode S bit if MI modifies CPSR. + Binary |= getAddrModeSBit(MI, MCID); + + // Encode Rd. + Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; + + // Encode Rn which is PC. + Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift; + + // Encode the displacement. + Binary |= 1 << ARMII::I_BitShift; + emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base); + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitPseudoMoveInstruction(const MachineInstr &MI) { + unsigned Opcode = MI.getDesc().Opcode; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode S bit if MI modifies CPSR. + if (Opcode == ARM::MOVsrl_flag || Opcode == ARM::MOVsra_flag) + Binary |= 1 << ARMII::S_BitShift; + + // Encode register def if there is one. + Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; + + // Encode the shift operation. + switch (Opcode) { + default: break; + case ARM::RRX: + // rrx + Binary |= 0x6 << 4; + break; + case ARM::MOVsrl_flag: + // lsr #1 + Binary |= (0x2 << 4) | (1 << 7); + break; + case ARM::MOVsra_flag: + // asr #1 + Binary |= (0x4 << 4) | (1 << 7); + break; + } + + // Encode register Rm. + Binary |= getMachineOpValue(MI, 1); + + emitWordLE(Binary); +} + +void ARMCodeEmitter::addPCLabel(unsigned LabelID) { + DEBUG(errs() << " ** LPC" << LabelID << " @ " + << (void*)MCE.getCurrentPCValue() << '\n'); + JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue()); +} + +void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { + unsigned Opcode = MI.getDesc().Opcode; + switch (Opcode) { + default: + llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction"); + case ARM::BX_CALL: + case ARM::BMOVPCRX_CALL: { + // First emit mov lr, pc + unsigned Binary = 0x01a0e00f; + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + emitWordLE(Binary); + + // and then emit the branch. + emitMiscBranchInstruction(MI); + break; + } + case TargetOpcode::INLINEASM: { + // We allow inline assembler nodes with empty bodies - they can + // implicitly define registers, which is ok for JIT. + if (MI.getOperand(0).getSymbolName()[0]) { + report_fatal_error("JIT does not support inline asm!"); + } + break; + } + case TargetOpcode::CFI_INSTRUCTION: + break; + case TargetOpcode::EH_LABEL: + MCE.emitLabel(MI.getOperand(0).getMCSymbol()); + break; + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + // Do nothing. + break; + case ARM::CONSTPOOL_ENTRY: + emitConstPoolInstruction(MI); + break; + case ARM::PICADD: { + // Remember of the address of the PC label for relocation later. + addPCLabel(MI.getOperand(2).getImm()); + // PICADD is just an add instruction that implicitly read pc. + emitDataProcessingInstruction(MI, 0, ARM::PC); + break; + } + case ARM::PICLDR: + case ARM::PICLDRB: + case ARM::PICSTR: + case ARM::PICSTRB: { + // Remember of the address of the PC label for relocation later. + addPCLabel(MI.getOperand(2).getImm()); + // These are just load / store instructions that implicitly read pc. + emitLoadStoreInstruction(MI, 0, ARM::PC); + break; + } + case ARM::PICLDRH: + case ARM::PICLDRSH: + case ARM::PICLDRSB: + case ARM::PICSTRH: { + // Remember of the address of the PC label for relocation later. + addPCLabel(MI.getOperand(2).getImm()); + // These are just load / store instructions that implicitly read pc. + emitMiscLoadStoreInstruction(MI, ARM::PC); + break; + } + + case ARM::MOVi32imm: + // Two instructions to materialize a constant. + if (Subtarget->hasV6T2Ops()) + emitMOVi32immInstruction(MI); + else + emitMOVi2piecesInstruction(MI); + break; + + case ARM::LEApcrelJT: + // Materialize jumptable address. + emitLEApcrelJTInstruction(MI); + break; + case ARM::RRX: + case ARM::MOVsrl_flag: + case ARM::MOVsra_flag: + emitPseudoMoveInstruction(MI); + break; + } +} + +unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI, + const MCInstrDesc &MCID, + const MachineOperand &MO, + unsigned OpIdx) { + unsigned Binary = getMachineOpValue(MI, MO); + + const MachineOperand &MO1 = MI.getOperand(OpIdx + 1); + const MachineOperand &MO2 = MI.getOperand(OpIdx + 2); + ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm()); + + // Encode the shift opcode. + unsigned SBits = 0; + unsigned Rs = MO1.getReg(); + if (Rs) { + // Set shift operand (bit[7:4]). + // LSL - 0001 + // LSR - 0011 + // ASR - 0101 + // ROR - 0111 + // RRX - 0110 and bit[11:8] clear. + switch (SOpc) { + default: llvm_unreachable("Unknown shift opc!"); + case ARM_AM::lsl: SBits = 0x1; break; + case ARM_AM::lsr: SBits = 0x3; break; + case ARM_AM::asr: SBits = 0x5; break; + case ARM_AM::ror: SBits = 0x7; break; + case ARM_AM::rrx: SBits = 0x6; break; + } + } else { + // Set shift operand (bit[6:4]). + // LSL - 000 + // LSR - 010 + // ASR - 100 + // ROR - 110 + switch (SOpc) { + default: llvm_unreachable("Unknown shift opc!"); + case ARM_AM::lsl: SBits = 0x0; break; + case ARM_AM::lsr: SBits = 0x2; break; + case ARM_AM::asr: SBits = 0x4; break; + case ARM_AM::ror: SBits = 0x6; break; + } + } + Binary |= SBits << 4; + if (SOpc == ARM_AM::rrx) + return Binary; + + // Encode the shift operation Rs or shift_imm (except rrx). + if (Rs) { + // Encode Rs bit[11:8]. + assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); + return Binary | (II->getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift); + } + + // Encode shift_imm bit[11:7]. + return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7; +} + +unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) { + int SoImmVal = ARM_AM::getSOImmVal(SoImm); + assert(SoImmVal != -1 && "Not a valid so_imm value!"); + + // Encode rotate_imm. + unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1) + << ARMII::SoRotImmShift; + + // Encode immed_8. + Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal); + return Binary; +} + +unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, + const MCInstrDesc &MCID) const { + for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e;--i){ + const MachineOperand &MO = MI.getOperand(i-1); + if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) + return 1 << ARMII::S_BitShift; + } + return 0; +} + +void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, + unsigned ImplicitRd, + unsigned ImplicitRn) { + const MCInstrDesc &MCID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode S bit if MI modifies CPSR. + Binary |= getAddrModeSBit(MI, MCID); + + // Encode register def if there is one. + unsigned NumDefs = MCID.getNumDefs(); + unsigned OpIdx = 0; + if (NumDefs) + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + else if (ImplicitRd) + // Special handling for implicit use (e.g. PC). + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift); + + if (MCID.Opcode == ARM::MOVi16) { + // Get immediate from MI. + unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx), + ARM::reloc_arm_movw); + // Encode imm which is the same as in emitMOVi32immInstruction(). + Binary |= Lo16 & 0xFFF; + Binary |= ((Lo16 >> 12) & 0xF) << 16; + emitWordLE(Binary); + return; + } else if(MCID.Opcode == ARM::MOVTi16) { + unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx), + ARM::reloc_arm_movt) >> 16); + Binary |= Hi16 & 0xFFF; + Binary |= ((Hi16 >> 12) & 0xF) << 16; + emitWordLE(Binary); + return; + } else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) { + uint32_t v = ~MI.getOperand(2).getImm(); + int32_t lsb = countTrailingZeros(v); + int32_t msb = (32 - countLeadingZeros(v)) - 1; + // Instr{20-16} = msb, Instr{11-7} = lsb + Binary |= (msb & 0x1F) << 16; + Binary |= (lsb & 0x1F) << 7; + emitWordLE(Binary); + return; + } else if ((MCID.Opcode == ARM::UBFX) || (MCID.Opcode == ARM::SBFX)) { + // Encode Rn in Instr{0-3} + Binary |= getMachineOpValue(MI, OpIdx++); + + uint32_t lsb = MI.getOperand(OpIdx++).getImm(); + uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1; + + // Instr{20-16} = widthm1, Instr{11-7} = lsb + Binary |= (widthm1 & 0x1F) << 16; + Binary |= (lsb & 0x1F) << 7; + emitWordLE(Binary); + return; + } + + // If this is a two-address operand, skip it. e.g. MOVCCr operand 1. + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + ++OpIdx; + + // Encode first non-shifter register operand if there is one. + bool isUnary = MCID.TSFlags & ARMII::UnaryDP; + if (!isUnary) { + if (ImplicitRn) + // Special handling for implicit use (e.g. PC). + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); + else { + Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift; + ++OpIdx; + } + } + + // Encode shifter operand. + const MachineOperand &MO = MI.getOperand(OpIdx); + if ((MCID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) { + // Encode SoReg. + emitWordLE(Binary | getMachineSoRegOpValue(MI, MCID, MO, OpIdx)); + return; + } + + if (MO.isReg()) { + // Encode register Rm. + emitWordLE(Binary | II->getRegisterInfo().getEncodingValue(MO.getReg())); + return; + } + + // Encode so_imm. + Binary |= getMachineSoImmOpValue((unsigned)MO.getImm()); + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, + unsigned ImplicitRd, + unsigned ImplicitRn) { + const MCInstrDesc &MCID = MI.getDesc(); + unsigned Form = MCID.TSFlags & ARMII::FormMask; + bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // If this is an LDRi12, STRi12 or LDRcp, nothing more needs be done. + if (MI.getOpcode() == ARM::LDRi12 || MI.getOpcode() == ARM::LDRcp || + MI.getOpcode() == ARM::STRi12) { + emitWordLE(Binary); + return; + } + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + + // Operand 0 of a pre- and post-indexed store is the address base + // writeback. Skip it. + bool Skipped = false; + if (IsPrePost && Form == ARMII::StFrm) { + ++OpIdx; + Skipped = true; + } + + // Set first operand + if (ImplicitRd) + // Special handling for implicit use (e.g. PC). + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift); + else + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + + // Set second operand + if (ImplicitRn) + // Special handling for implicit use (e.g. PC). + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); + else + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; + + // If this is a two-address operand, skip it. e.g. LDR_PRE. + if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + ++OpIdx; + + const MachineOperand &MO2 = MI.getOperand(OpIdx); + unsigned AM2Opc = (ImplicitRn == ARM::PC) + ? 0 : MI.getOperand(OpIdx+1).getImm(); + + // Set bit U(23) according to sign of immed value (positive or negative). + Binary |= ((ARM_AM::getAM2Op(AM2Opc) == ARM_AM::add ? 1 : 0) << + ARMII::U_BitShift); + if (!MO2.getReg()) { // is immediate + if (ARM_AM::getAM2Offset(AM2Opc)) + // Set the value of offset_12 field + Binary |= ARM_AM::getAM2Offset(AM2Opc); + emitWordLE(Binary); + return; + } + + // Set bit I(25), because this is not in immediate encoding. + Binary |= 1 << ARMII::I_BitShift; + assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg())); + // Set bit[3:0] to the corresponding Rm register + Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg()); + + // If this instr is in scaled register offset/index instruction, set + // shift_immed(bit[11:7]) and shift(bit[6:5]) fields. + if (unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc)) { + Binary |= getShiftOp(AM2Opc) << ARMII::ShiftImmShift; // shift + Binary |= ShImm << ARMII::ShiftShift; // shift_immed + } + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, + unsigned ImplicitRn) { + const MCInstrDesc &MCID = MI.getDesc(); + unsigned Form = MCID.TSFlags & ARMII::FormMask; + bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + + // Operand 0 of a pre- and post-indexed store is the address base + // writeback. Skip it. + bool Skipped = false; + if (IsPrePost && Form == ARMII::StMiscFrm) { + ++OpIdx; + Skipped = true; + } + + // Set first operand + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + + // Skip LDRD and STRD's second operand. + if (MCID.Opcode == ARM::LDRD || MCID.Opcode == ARM::STRD) + ++OpIdx; + + // Set second operand + if (ImplicitRn) + // Special handling for implicit use (e.g. PC). + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); + else + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; + + // If this is a two-address operand, skip it. e.g. LDRH_POST. + if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + ++OpIdx; + + const MachineOperand &MO2 = MI.getOperand(OpIdx); + unsigned AM3Opc = (ImplicitRn == ARM::PC) + ? 0 : MI.getOperand(OpIdx+1).getImm(); + + // Set bit U(23) according to sign of immed value (positive or negative) + Binary |= ((ARM_AM::getAM3Op(AM3Opc) == ARM_AM::add ? 1 : 0) << + ARMII::U_BitShift); + + // If this instr is in register offset/index encoding, set bit[3:0] + // to the corresponding Rm register. + if (MO2.getReg()) { + Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg()); + emitWordLE(Binary); + return; + } + + // This instr is in immediate offset/index encoding, set bit 22 to 1. + Binary |= 1 << ARMII::AM3_I_BitShift; + if (unsigned ImmOffs = ARM_AM::getAM3Offset(AM3Opc)) { + // Set operands + Binary |= (ImmOffs >> 4) << ARMII::ImmHiShift; // immedH + Binary |= (ImmOffs & 0xF); // immedL + } + + emitWordLE(Binary); +} + +static unsigned getAddrModeUPBits(unsigned Mode) { + unsigned Binary = 0; + + // Set addressing mode by modifying bits U(23) and P(24) + // IA - Increment after - bit U = 1 and bit P = 0 + // IB - Increment before - bit U = 1 and bit P = 1 + // DA - Decrement after - bit U = 0 and bit P = 0 + // DB - Decrement before - bit U = 0 and bit P = 1 + switch (Mode) { + default: llvm_unreachable("Unknown addressing sub-mode!"); + case ARM_AM::da: break; + case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break; + case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break; + case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break; + } + + return Binary; +} + +void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Skip operand 0 of an instruction with base register update. + unsigned OpIdx = 0; + if (IsUpdating) + ++OpIdx; + + // Set base address operand + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; + + // Set addressing mode by modifying bits U(23) and P(24) + ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode()); + Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode)); + + // Set bit W(21) + if (IsUpdating) + Binary |= 0x1 << ARMII::W_BitShift; + + // Set registers + for (unsigned i = OpIdx+2, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + break; + unsigned RegNum = II->getRegisterInfo().getEncodingValue(MO.getReg()); + assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && + RegNum < 16); + Binary |= 0x1 << RegNum; + } + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode S bit if MI modifies CPSR. + Binary |= getAddrModeSBit(MI, MCID); + + // 32x32->64bit operations have two destination registers. The number + // of register definitions will tell us if that's what we're dealing with. + unsigned OpIdx = 0; + if (MCID.getNumDefs() == 2) + Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift; + + // Encode Rd + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdHiShift; + + // Encode Rm + Binary |= getMachineOpValue(MI, OpIdx++); + + // Encode Rs + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRsShift; + + // Many multiple instructions (e.g. MLA) have three src operands. Encode + // it as Rn (for multiply, that's in the same offset as RdLo. + if (MCID.getNumOperands() > OpIdx && + !MCID.OpInfo[OpIdx].isPredicate() && + !MCID.OpInfo[OpIdx].isOptionalDef()) + Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift; + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + + // Encode Rd + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + + const MachineOperand &MO1 = MI.getOperand(OpIdx++); + const MachineOperand &MO2 = MI.getOperand(OpIdx); + if (MO2.isReg()) { + // Two register operand form. + // Encode Rn. + Binary |= getMachineOpValue(MI, MO1) << ARMII::RegRnShift; + + // Encode Rm. + Binary |= getMachineOpValue(MI, MO2); + ++OpIdx; + } else { + Binary |= getMachineOpValue(MI, MO1); + } + + // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand. + if (MI.getOperand(OpIdx).isImm() && + !MCID.OpInfo[OpIdx].isPredicate() && + !MCID.OpInfo[OpIdx].isOptionalDef()) + Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift; + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // PKH instructions are finished at this point + if (MCID.Opcode == ARM::PKHBT || MCID.Opcode == ARM::PKHTB) { + emitWordLE(Binary); + return; + } + + unsigned OpIdx = 0; + + // Encode Rd + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + + const MachineOperand &MO = MI.getOperand(OpIdx++); + if (OpIdx == MCID.getNumOperands() || + MCID.OpInfo[OpIdx].isPredicate() || + MCID.OpInfo[OpIdx].isOptionalDef()) { + // Encode Rm and it's done. + Binary |= getMachineOpValue(MI, MO); + emitWordLE(Binary); + return; + } + + // Encode Rn. + Binary |= getMachineOpValue(MI, MO) << ARMII::RegRnShift; + + // Encode Rm. + Binary |= getMachineOpValue(MI, OpIdx++); + + // Encode shift_imm. + unsigned ShiftAmt = MI.getOperand(OpIdx).getImm(); + if (MCID.Opcode == ARM::PKHTB) { + assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!"); + if (ShiftAmt == 32) + ShiftAmt = 0; + } + assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!"); + Binary |= ShiftAmt << ARMII::ShiftShift; + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + // Part of binary is determined by TableGen. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd + Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; + + // Encode saturate bit position. + unsigned Pos = MI.getOperand(1).getImm(); + if (MCID.Opcode == ARM::SSAT || MCID.Opcode == ARM::SSAT16) + Pos -= 1; + assert((Pos < 16 || (Pos < 32 && + MCID.Opcode != ARM::SSAT16 && + MCID.Opcode != ARM::USAT16)) && + "saturate bit position out of range"); + Binary |= Pos << 16; + + // Encode Rm + Binary |= getMachineOpValue(MI, 2); + + // Encode shift_imm. + if (MCID.getNumOperands() == 4) { + unsigned ShiftOp = MI.getOperand(3).getImm(); + ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); + if (Opc == ARM_AM::asr) + Binary |= (1 << 6); + unsigned ShiftAmt = MI.getOperand(3).getImm(); + if (ShiftAmt == 32 && Opc == ARM_AM::asr) + ShiftAmt = 0; + assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!"); + Binary |= ShiftAmt << ARMII::ShiftShift; + } + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + if (MCID.Opcode == ARM::TPsoft) { + llvm_unreachable("ARM::TPsoft FIXME"); // FIXME + } + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Set signed_immed_24 field + Binary |= getMachineOpValue(MI, 0); + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) { + // Remember the base address of the inline jump table. + uintptr_t JTBase = MCE.getCurrentPCValue(); + JTI->addJumpTableBaseAddr(JTIndex, JTBase); + DEBUG(errs() << " ** Jump Table #" << JTIndex << " @ " << (void*)JTBase + << '\n'); + + // Now emit the jump table entries. + const std::vector &MBBs = (*MJTEs)[JTIndex].MBBs; + for (unsigned i = 0, e = MBBs.size(); i != e; ++i) { + if (IsPIC) + // DestBB address - JT base. + emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_pic_jt, JTBase); + else + // Absolute DestBB address. + emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_absolute); + emitWordLE(0); + } +} + +void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + // Handle jump tables. + if (MCID.Opcode == ARM::BR_JTr || MCID.Opcode == ARM::BR_JTadd) { + // First emit a ldr pc, [] instruction. + emitDataProcessingInstruction(MI, ARM::PC); + + // Then emit the inline jump table. + unsigned JTIndex = + (MCID.Opcode == ARM::BR_JTr) + ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex(); + emitInlineJumpTable(JTIndex); + return; + } else if (MCID.Opcode == ARM::BR_JTm) { + // First emit a ldr pc, [] instruction. + emitLoadStoreInstruction(MI, ARM::PC); + + // Then emit the inline jump table. + emitInlineJumpTable(MI.getOperand(3).getIndex()); + return; + } + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR) + // The return register is LR. + Binary |= II->getRegisterInfo().getEncodingValue(ARM::LR); + else + // otherwise, set the return register + Binary |= getMachineOpValue(MI, 0); + + emitWordLE(Binary); +} + +unsigned ARMCodeEmitter::encodeVFPRd(const MachineInstr &MI, + unsigned OpIdx) const { + unsigned RegD = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + bool isSPVFP = ARM::SPRRegClass.contains(RegD); + RegD = II->getRegisterInfo().getEncodingValue(RegD); + if (!isSPVFP) + Binary |= RegD << ARMII::RegRdShift; + else { + Binary |= ((RegD & 0x1E) >> 1) << ARMII::RegRdShift; + Binary |= (RegD & 0x01) << ARMII::D_BitShift; + } + return Binary; +} + +unsigned ARMCodeEmitter::encodeVFPRn(const MachineInstr &MI, + unsigned OpIdx) const { + unsigned RegN = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + bool isSPVFP = ARM::SPRRegClass.contains(RegN); + RegN = II->getRegisterInfo().getEncodingValue(RegN); + if (!isSPVFP) + Binary |= RegN << ARMII::RegRnShift; + else { + Binary |= ((RegN & 0x1E) >> 1) << ARMII::RegRnShift; + Binary |= (RegN & 0x01) << ARMII::N_BitShift; + } + return Binary; +} + +unsigned ARMCodeEmitter::encodeVFPRm(const MachineInstr &MI, + unsigned OpIdx) const { + unsigned RegM = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + bool isSPVFP = ARM::SPRRegClass.contains(RegM); + RegM = II->getRegisterInfo().getEncodingValue(RegM); + if (!isSPVFP) + Binary |= RegM; + else { + Binary |= ((RegM & 0x1E) >> 1); + Binary |= (RegM & 0x01) << ARMII::M_BitShift; + } + return Binary; +} + +void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + assert((Binary & ARMII::D_BitShift) == 0 && + (Binary & ARMII::N_BitShift) == 0 && + (Binary & ARMII::M_BitShift) == 0 && "VFP encoding bug!"); + + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, OpIdx++); + + // If this is a two-address operand, skip it, e.g. FMACD. + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + ++OpIdx; + + // Encode Dn / Sn. + if ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm) + Binary |= encodeVFPRn(MI, OpIdx++); + + if (OpIdx == MCID.getNumOperands() || + MCID.OpInfo[OpIdx].isPredicate() || + MCID.OpInfo[OpIdx].isOptionalDef()) { + // FCMPEZD etc. has only one operand. + emitWordLE(Binary); + return; + } + + // Encode Dm / Sm. + Binary |= encodeVFPRm(MI, OpIdx); + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + unsigned Form = MCID.TSFlags & ARMII::FormMask; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + switch (Form) { + default: break; + case ARMII::VFPConv1Frm: + case ARMII::VFPConv2Frm: + case ARMII::VFPConv3Frm: + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, 0); + break; + case ARMII::VFPConv4Frm: + // Encode Dn / Sn. + Binary |= encodeVFPRn(MI, 0); + break; + case ARMII::VFPConv5Frm: + // Encode Dm / Sm. + Binary |= encodeVFPRm(MI, 0); + break; + } + + switch (Form) { + default: break; + case ARMII::VFPConv1Frm: + // Encode Dm / Sm. + Binary |= encodeVFPRm(MI, 1); + break; + case ARMII::VFPConv2Frm: + case ARMII::VFPConv3Frm: + // Encode Dn / Sn. + Binary |= encodeVFPRn(MI, 1); + break; + case ARMII::VFPConv4Frm: + case ARMII::VFPConv5Frm: + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, 1); + break; + } + + if (Form == ARMII::VFPConv5Frm) + // Encode Dn / Sn. + Binary |= encodeVFPRn(MI, 2); + else if (Form == ARMII::VFPConv3Frm) + // Encode Dm / Sm. + Binary |= encodeVFPRm(MI, 2); + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) { + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, OpIdx++); + + // Encode address base. + const MachineOperand &Base = MI.getOperand(OpIdx++); + Binary |= getMachineOpValue(MI, Base) << ARMII::RegRnShift; + + // If there is a non-zero immediate offset, encode it. + if (Base.isReg()) { + const MachineOperand &Offset = MI.getOperand(OpIdx); + if (unsigned ImmOffs = ARM_AM::getAM5Offset(Offset.getImm())) { + if (ARM_AM::getAM5Op(Offset.getImm()) == ARM_AM::add) + Binary |= 1 << ARMII::U_BitShift; + Binary |= ImmOffs; + emitWordLE(Binary); + return; + } + } + + // If immediate offset is omitted, default to +0. + Binary |= 1 << ARMII::U_BitShift; + + emitWordLE(Binary); +} + +void +ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Skip operand 0 of an instruction with base register update. + unsigned OpIdx = 0; + if (IsUpdating) + ++OpIdx; + + // Set base address operand + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; + + // Set addressing mode by modifying bits U(23) and P(24) + ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode()); + Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode)); + + // Set bit W(21) + if (IsUpdating) + Binary |= 0x1 << ARMII::W_BitShift; + + // First register is encoded in Dd. + Binary |= encodeVFPRd(MI, OpIdx+2); + + // Count the number of registers. + unsigned NumRegs = 1; + for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + break; + ++NumRegs; + } + // Bit 8 will be set if is consecutive 64-bit registers (e.g., D0) + // Otherwise, it will be 0, in the case of 32-bit registers. + if(Binary & 0x100) + Binary |= NumRegs * 2; + else + Binary |= NumRegs; + + emitWordLE(Binary); +} + +unsigned ARMCodeEmitter::encodeNEONRd(const MachineInstr &MI, + unsigned OpIdx) const { + unsigned RegD = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + RegD = II->getRegisterInfo().getEncodingValue(RegD); + Binary |= (RegD & 0xf) << ARMII::RegRdShift; + Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift; + return Binary; +} + +unsigned ARMCodeEmitter::encodeNEONRn(const MachineInstr &MI, + unsigned OpIdx) const { + unsigned RegN = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + RegN = II->getRegisterInfo().getEncodingValue(RegN); + Binary |= (RegN & 0xf) << ARMII::RegRnShift; + Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift; + return Binary; +} + +unsigned ARMCodeEmitter::encodeNEONRm(const MachineInstr &MI, + unsigned OpIdx) const { + unsigned RegM = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + RegM = II->getRegisterInfo().getEncodingValue(RegM); + Binary |= (RegM & 0xf); + Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift; + return Binary; +} + +/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON +/// data-processing instruction to the corresponding Thumb encoding. +static unsigned convertNEONDataProcToThumb(unsigned Binary) { + assert((Binary & 0xfe000000) == 0xf2000000 && + "not an ARM NEON data-processing instruction"); + unsigned UBit = (Binary >> 24) & 1; + return 0xef000000 | (UBit << 28) | (Binary & 0xffffff); +} + +void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) { + unsigned Binary = getBinaryCodeForInstr(MI); + + unsigned RegTOpIdx, RegNOpIdx, LnOpIdx; + const MCInstrDesc &MCID = MI.getDesc(); + if ((MCID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) { + RegTOpIdx = 0; + RegNOpIdx = 1; + LnOpIdx = 2; + } else { // ARMII::NSetLnFrm + RegTOpIdx = 2; + RegNOpIdx = 0; + LnOpIdx = 3; + } + + // Set the conditional execution predicate + Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; + + unsigned RegT = MI.getOperand(RegTOpIdx).getReg(); + RegT = II->getRegisterInfo().getEncodingValue(RegT); + Binary |= (RegT << ARMII::RegRdShift); + Binary |= encodeNEONRn(MI, RegNOpIdx); + + unsigned LaneShift; + if ((Binary & (1 << 22)) != 0) + LaneShift = 0; // 8-bit elements + else if ((Binary & (1 << 5)) != 0) + LaneShift = 1; // 16-bit elements + else + LaneShift = 2; // 32-bit elements + + unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift; + unsigned Opc1 = Lane >> 2; + unsigned Opc2 = Lane & 3; + assert((Opc1 & 3) == 0 && "out-of-range lane number operand"); + Binary |= (Opc1 << 21); + Binary |= (Opc2 << 5); + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) { + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; + + unsigned RegT = MI.getOperand(1).getReg(); + RegT = II->getRegisterInfo().getEncodingValue(RegT); + Binary |= (RegT << ARMII::RegRdShift); + Binary |= encodeNEONRn(MI, 0); + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) { + unsigned Binary = getBinaryCodeForInstr(MI); + // Destination register is encoded in Dd. + Binary |= encodeNEONRd(MI, 0); + // Immediate fields: Op, Cmode, I, Imm3, Imm4 + unsigned Imm = MI.getOperand(1).getImm(); + unsigned Op = (Imm >> 12) & 1; + unsigned Cmode = (Imm >> 8) & 0xf; + unsigned I = (Imm >> 7) & 1; + unsigned Imm3 = (Imm >> 4) & 0x7; + unsigned Imm4 = Imm & 0xf; + Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4; + if (IsThumb) + Binary = convertNEONDataProcToThumb(Binary); + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + unsigned Binary = getBinaryCodeForInstr(MI); + // Destination register is encoded in Dd; source register in Dm. + unsigned OpIdx = 0; + Binary |= encodeNEONRd(MI, OpIdx++); + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + ++OpIdx; + Binary |= encodeNEONRm(MI, OpIdx); + if (IsThumb) + Binary = convertNEONDataProcToThumb(Binary); + // FIXME: This does not handle VDUPfdf or VDUPfqf. + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + unsigned Binary = getBinaryCodeForInstr(MI); + // Destination register is encoded in Dd; source registers in Dn and Dm. + unsigned OpIdx = 0; + Binary |= encodeNEONRd(MI, OpIdx++); + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + ++OpIdx; + Binary |= encodeNEONRn(MI, OpIdx++); + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + ++OpIdx; + Binary |= encodeNEONRm(MI, OpIdx); + if (IsThumb) + Binary = convertNEONDataProcToThumb(Binary); + // FIXME: This does not handle VMOVDneon or VMOVQ. + emitWordLE(Binary); +} + +#include "ARMGenCodeEmitter.inc" diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index a7cd6ed9ea4..3a4f788c848 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -29,7 +29,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp new file mode 100644 index 00000000000..6d1114d51aa --- /dev/null +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -0,0 +1,344 @@ +//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the ARM target. +// +//===----------------------------------------------------------------------===// + +#include "ARMJITInfo.h" +#include "ARMConstantPoolValue.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMRelocations.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +#define DEBUG_TYPE "jit" + +void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction"); +} + +/// JITCompilerFunction - This contains the address of the JIT function used to +/// compile a function lazily. +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +// Get the ASMPREFIX for the current host. This is often '_'. +#ifndef __USER_LABEL_PREFIX__ +#define __USER_LABEL_PREFIX__ +#endif +#define GETASMPREFIX2(X) #X +#define GETASMPREFIX(X) GETASMPREFIX2(X) +#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__) + +// CompilationCallback stub - We can't use a C function with inline assembly in +// it, because the prolog/epilog inserted by GCC won't work for us. (We need +// to preserve more context and manipulate the stack directly). Instead, +// write our own wrapper, which does things our way, so we have complete +// control over register saving and restoring. +extern "C" { +#if defined(__arm__) + void ARMCompilationCallback(); + asm( + ".text\n" + ".align 2\n" + ".globl " ASMPREFIX "ARMCompilationCallback\n" + ASMPREFIX "ARMCompilationCallback:\n" + // Save caller saved registers since they may contain stuff + // for the real target function right now. We have to act as if this + // whole compilation callback doesn't exist as far as the caller is + // concerned, so we can't just preserve the callee saved regs. + "stmdb sp!, {r0, r1, r2, r3, lr}\n" +#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) + "vstmdb sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" +#endif + // The LR contains the address of the stub function on entry. + // pass it as the argument to the C part of the callback + "mov r0, lr\n" + "sub sp, sp, #4\n" + // Call the C portion of the callback + "bl " ASMPREFIX "ARMCompilationCallbackC\n" + "add sp, sp, #4\n" + // Restoring the LR to the return address of the function that invoked + // the stub and de-allocating the stack space for it requires us to + // swap the two saved LR values on the stack, as they're backwards + // for what we need since the pop instruction has a pre-determined + // order for the registers. + // +--------+ + // 0 | LR | Original return address + // +--------+ + // 1 | LR | Stub address (start of stub) + // 2-5 | R3..R0 | Saved registers (we need to preserve all regs) + // 6-20 | D0..D7 | Saved VFP registers + // +--------+ + // +#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) + // Restore VFP caller-saved registers. + "vldmia sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" +#endif + // + // We need to exchange the values in slots 0 and 1 so we can + // return to the address in slot 1 with the address in slot 0 + // restored to the LR. + "ldr r0, [sp,#20]\n" + "ldr r1, [sp,#16]\n" + "str r1, [sp,#20]\n" + "str r0, [sp,#16]\n" + // Return to the (newly modified) stub to invoke the real function. + // The above twiddling of the saved return addresses allows us to + // deallocate everything, including the LR the stub saved, with two + // updating load instructions. + "ldmia sp!, {r0, r1, r2, r3, lr}\n" + "ldr pc, [sp], #4\n" + ); +#else // Not an ARM host + void ARMCompilationCallback() { + llvm_unreachable("Cannot call ARMCompilationCallback() on a non-ARM arch!"); + } +#endif +} + +/// ARMCompilationCallbackC - This is the target-specific function invoked +/// by the function stub when we did not know the real target of a call. +/// This function must locate the start of the stub or call site and pass +/// it into the JIT compiler function. +extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) { + // Get the address of the compiled code for this function. + intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr); + + // Rewrite the call target... so that we don't end up here every time we + // execute the call. We're replacing the first two instructions of the + // stub with: + // ldr pc, [pc,#-4] + // + if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) { + llvm_unreachable("ERROR: Unable to mark stub writable"); + } + *(intptr_t *)StubAddr = 0xe51ff004; // ldr pc, [pc, #-4] + *(intptr_t *)(StubAddr+4) = NewVal; + if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) { + llvm_unreachable("ERROR: Unable to mark stub executable"); + } +} + +TargetJITInfo::LazyResolverFn +ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) { + JITCompilerFunction = F; + return ARMCompilationCallback; +} + +void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr, + JITCodeEmitter &JCE) { + uint8_t Buffer[4]; + uint8_t *Cur = Buffer; + MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)Ptr); + void *PtrAddr = JCE.allocIndirectGV( + GV, Buffer, sizeof(Buffer), /*Alignment=*/4); + addIndirectSymAddr(Ptr, (intptr_t)PtrAddr); + return PtrAddr; +} + +TargetJITInfo::StubLayout ARMJITInfo::getStubLayout() { + // The stub contains up to 3 4-byte instructions, aligned at 4 bytes, and a + // 4-byte address. See emitFunctionStub for details. + StubLayout Result = {16, 4}; + return Result; +} + +void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE) { + void *Addr; + // If this is just a call to an external function, emit a branch instead of a + // call. The code is the same except for one bit of the last instruction. + if (Fn != (void*)(intptr_t)ARMCompilationCallback) { + // Branch to the corresponding function addr. + if (IsPIC) { + // The stub is 16-byte size and 4-aligned. + intptr_t LazyPtr = getIndirectSymAddr(Fn); + if (!LazyPtr) { + // In PIC mode, the function stub is loading a lazy-ptr. + LazyPtr= (intptr_t)emitGlobalValueIndirectSym((const GlobalValue*)F, Fn, JCE); + DEBUG(if (F) + errs() << "JIT: Indirect symbol emitted at [" << LazyPtr + << "] for GV '" << F->getName() << "'\n"; + else + errs() << "JIT: Stub emitted at [" << LazyPtr + << "] for external function at '" << Fn << "'\n"); + } + JCE.emitAlignment(4); + Addr = (void*)JCE.getCurrentPCValue(); + if (!sys::Memory::setRangeWritable(Addr, 16)) { + llvm_unreachable("ERROR: Unable to mark stub writable"); + } + JCE.emitWordLE(0xe59fc004); // ldr ip, [pc, #+4] + JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip + JCE.emitWordLE(0xe59cf000); // ldr pc, [ip] + JCE.emitWordLE(LazyPtr - (intptr_t(Addr)+4+8)); // func - (L_func$scv+8) + sys::Memory::InvalidateInstructionCache(Addr, 16); + if (!sys::Memory::setRangeExecutable(Addr, 16)) { + llvm_unreachable("ERROR: Unable to mark stub executable"); + } + } else { + // The stub is 8-byte size and 4-aligned. + JCE.emitAlignment(4); + Addr = (void*)JCE.getCurrentPCValue(); + if (!sys::Memory::setRangeWritable(Addr, 8)) { + llvm_unreachable("ERROR: Unable to mark stub writable"); + } + JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4] + JCE.emitWordLE((intptr_t)Fn); // addr of function + sys::Memory::InvalidateInstructionCache(Addr, 8); + if (!sys::Memory::setRangeExecutable(Addr, 8)) { + llvm_unreachable("ERROR: Unable to mark stub executable"); + } + } + } else { + // The compilation callback will overwrite the first two words of this + // stub with indirect branch instructions targeting the compiled code. + // This stub sets the return address to restart the stub, so that + // the new branch will be invoked when we come back. + // + // Branch and link to the compilation callback. + // The stub is 16-byte size and 4-byte aligned. + JCE.emitAlignment(4); + Addr = (void*)JCE.getCurrentPCValue(); + if (!sys::Memory::setRangeWritable(Addr, 16)) { + llvm_unreachable("ERROR: Unable to mark stub writable"); + } + // Save LR so the callback can determine which stub called it. + // The compilation callback is responsible for popping this prior + // to returning. + JCE.emitWordLE(0xe92d4000); // push {lr} + // Set the return address to go back to the start of this stub. + JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12 + // Invoke the compilation callback. + JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4] + // The address of the compilation callback. + JCE.emitWordLE((intptr_t)ARMCompilationCallback); + sys::Memory::InvalidateInstructionCache(Addr, 16); + if (!sys::Memory::setRangeExecutable(Addr, 16)) { + llvm_unreachable("ERROR: Unable to mark stub executable"); + } + } + + return Addr; +} + +intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const { + ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType(); + switch (RT) { + default: + return (intptr_t)(MR->getResultPointer()); + case ARM::reloc_arm_pic_jt: + // Destination address - jump table base. + return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal(); + case ARM::reloc_arm_jt_base: + // Jump table base address. + return getJumpTableBaseAddr(MR->getJumpTableIndex()); + case ARM::reloc_arm_cp_entry: + case ARM::reloc_arm_vfp_cp_entry: + // Constant pool entry address. + return getConstantPoolEntryAddr(MR->getConstantPoolIndex()); + case ARM::reloc_arm_machine_cp_entry: { + ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal(); + assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) && + "Can't handle this machine constant pool entry yet!"); + intptr_t Addr = (intptr_t)(MR->getResultPointer()); + Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment(); + return Addr; + } + } +} + +/// relocate - Before the JIT can run a block of code that has been emitted, +/// it must rewrite the code to contain the actual addresses of any +/// referenced global symbols. +void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + void *RelocPos = (char*)Function + MR->getMachineCodeOffset(); + intptr_t ResultPtr = resolveRelocDestAddr(MR); + switch ((ARM::RelocationType)MR->getRelocationType()) { + case ARM::reloc_arm_cp_entry: + case ARM::reloc_arm_vfp_cp_entry: + case ARM::reloc_arm_relative: { + // It is necessary to calculate the correct PC relative value. We + // subtract the base addr from the target addr to form a byte offset. + ResultPtr = ResultPtr - (intptr_t)RelocPos - 8; + // If the result is positive, set bit U(23) to 1. + if (ResultPtr >= 0) + *((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift; + else { + // Otherwise, obtain the absolute value and set bit U(23) to 0. + *((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift); + ResultPtr = - ResultPtr; + } + // Set the immed value calculated. + // VFP immediate offset is multiplied by 4. + if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry) + ResultPtr = ResultPtr >> 2; + *((intptr_t*)RelocPos) |= ResultPtr; + // Set register Rn to PC (which is register 15 on all architectures). + // FIXME: This avoids the need for register info in the JIT class. + *((intptr_t*)RelocPos) |= 15 << ARMII::RegRnShift; + break; + } + case ARM::reloc_arm_pic_jt: + case ARM::reloc_arm_machine_cp_entry: + case ARM::reloc_arm_absolute: { + // These addresses have already been resolved. + *((intptr_t*)RelocPos) |= (intptr_t)ResultPtr; + break; + } + case ARM::reloc_arm_branch: { + // It is necessary to calculate the correct value of signed_immed_24 + // field. We subtract the base addr from the target addr to form a + // byte offset, which must be inside the range -33554432 and +33554428. + // Then, we set the signed_immed_24 field of the instruction to bits + // [25:2] of the byte offset. More details ARM-ARM p. A4-11. + ResultPtr = ResultPtr - (intptr_t)RelocPos - 8; + ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2; + assert(ResultPtr >= -33554432 && ResultPtr <= 33554428); + *((intptr_t*)RelocPos) |= ResultPtr; + break; + } + case ARM::reloc_arm_jt_base: { + // JT base - (instruction addr + 8) + ResultPtr = ResultPtr - (intptr_t)RelocPos - 8; + *((intptr_t*)RelocPos) |= ResultPtr; + break; + } + case ARM::reloc_arm_movw: { + ResultPtr = ResultPtr & 0xFFFF; + *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; + *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; + break; + } + case ARM::reloc_arm_movt: { + ResultPtr = (ResultPtr >> 16) & 0xFFFF; + *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; + *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; + break; + } + } + } +} + +void ARMJITInfo::Initialize(const MachineFunction &MF, bool isPIC) { + const ARMFunctionInfo *AFI = MF.getInfo(); + ConstPoolId2AddrMap.resize(AFI->getNumPICLabels()); + JumpTableId2AddrMap.resize(AFI->getNumJumpTables()); + IsPIC = isPIC; +} diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h new file mode 100644 index 00000000000..27e2a201340 --- /dev/null +++ b/lib/Target/ARM/ARMJITInfo.h @@ -0,0 +1,177 @@ +//===-- ARMJITInfo.h - ARM implementation of the JIT interface -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the ARMJITInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMJITINFO_H +#define ARMJITINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Target/TargetJITInfo.h" + +namespace llvm { + class ARMTargetMachine; + + class ARMJITInfo : public TargetJITInfo { + // ConstPoolId2AddrMap - A map from constant pool ids to the corresponding + // CONSTPOOL_ENTRY addresses. + SmallVector ConstPoolId2AddrMap; + + // JumpTableId2AddrMap - A map from inline jumptable ids to the + // corresponding inline jump table bases. + SmallVector JumpTableId2AddrMap; + + // PCLabelMap - A map from PC labels to addresses. + DenseMap PCLabelMap; + + // Sym2IndirectSymMap - A map from symbol (GlobalValue and ExternalSymbol) + // addresses to their indirect symbol addresses. + DenseMap Sym2IndirectSymMap; + + // IsPIC - True if the relocation model is PIC. This is used to determine + // how to codegen function stubs. + bool IsPIC; + + public: + explicit ARMJITInfo() : IsPIC(false) { useGOT = false; } + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + void replaceMachineCodeForFunction(void *Old, void *New) override; + + /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object + /// to emit an indirect symbol which contains the address of the specified + /// ptr. + void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, + JITCodeEmitter &JCE) override; + + // getStubLayout - Returns the size and alignment of the largest call stub + // on ARM. + StubLayout getStubLayout() override; + + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a + /// small native function that simply calls the function at the specified + /// address. + void *emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE) override; + + /// getLazyResolverFunction - Expose the lazy resolver to the JIT. + LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; + + /// relocate - Before the JIT can run a block of code that has been emitted, + /// it must rewrite the code to contain the actual addresses of any + /// referenced global symbols. + void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) override; + + /// hasCustomConstantPool - Allows a target to specify that constant + /// pool address resolution is handled by the target. + bool hasCustomConstantPool() const override { return true; } + + /// hasCustomJumpTables - Allows a target to specify that jumptables + /// are emitted by the target. + bool hasCustomJumpTables() const override { return true; } + + /// allocateSeparateGVMemory - If true, globals should be placed in + /// separately allocated heap memory rather than in the same + /// code memory allocated by JITCodeEmitter. + bool allocateSeparateGVMemory() const override { +#ifdef __APPLE__ + return true; +#else + return false; +#endif + } + + /// Initialize - Initialize internal stage for the function being JITted. + /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize + /// jump table ids to jump table bases map; remember if codegen relocation + /// model is PIC. + void Initialize(const MachineFunction &MF, bool isPIC); + + /// getConstantPoolEntryAddr - The ARM target puts all constant + /// pool entries into constant islands. This returns the address of the + /// constant pool entry of the specified index. + intptr_t getConstantPoolEntryAddr(unsigned CPI) const { + assert(CPI < ConstPoolId2AddrMap.size()); + return ConstPoolId2AddrMap[CPI]; + } + + /// addConstantPoolEntryAddr - Map a Constant Pool Index to the address + /// where its associated value is stored. When relocations are processed, + /// this value will be used to resolve references to the constant. + void addConstantPoolEntryAddr(unsigned CPI, intptr_t Addr) { + assert(CPI < ConstPoolId2AddrMap.size()); + ConstPoolId2AddrMap[CPI] = Addr; + } + + /// getJumpTableBaseAddr - The ARM target inline all jump tables within + /// text section of the function. This returns the address of the base of + /// the jump table of the specified index. + intptr_t getJumpTableBaseAddr(unsigned JTI) const { + assert(JTI < JumpTableId2AddrMap.size()); + return JumpTableId2AddrMap[JTI]; + } + + /// addJumpTableBaseAddr - Map a jump table index to the address where + /// the corresponding inline jump table is emitted. When relocations are + /// processed, this value will be used to resolve references to the + /// jump table. + void addJumpTableBaseAddr(unsigned JTI, intptr_t Addr) { + assert(JTI < JumpTableId2AddrMap.size()); + JumpTableId2AddrMap[JTI] = Addr; + } + + /// getPCLabelAddr - Retrieve the address of the PC label of the + /// specified id. + intptr_t getPCLabelAddr(unsigned Id) const { + DenseMap::const_iterator I = PCLabelMap.find(Id); + assert(I != PCLabelMap.end()); + return I->second; + } + + /// addPCLabelAddr - Remember the address of the specified PC label. + void addPCLabelAddr(unsigned Id, intptr_t Addr) { + PCLabelMap.insert(std::make_pair(Id, Addr)); + } + + /// getIndirectSymAddr - Retrieve the address of the indirect symbol of the + /// specified symbol located at address. Returns 0 if the indirect symbol + /// has not been emitted. + intptr_t getIndirectSymAddr(void *Addr) const { + DenseMap::const_iterator I= Sym2IndirectSymMap.find(Addr); + if (I != Sym2IndirectSymMap.end()) + return I->second; + return 0; + } + + /// addIndirectSymAddr - Add a mapping from address of an emitted symbol to + /// its indirect symbol address. + void addIndirectSymAddr(void *SymAddr, intptr_t IndSymAddr) { + Sym2IndirectSymMap.insert(std::make_pair(SymAddr, IndSymAddr)); + } + + private: + /// resolveRelocDestAddr - Resolve the resulting address of the relocation + /// if it's not already solved. Constantpool entries must be resolved by + /// ARM target. + intptr_t resolveRelocDestAddr(MachineRelocation *MR) const; + }; +} + +#endif diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 2ce083ca2b6..c1b4562f411 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -15,6 +15,7 @@ #include "ARMFrameLowering.h" #include "ARMISelLowering.h" #include "ARMInstrInfo.h" +#include "ARMJITInfo.h" #include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" #include "ARMMachineFunctionInfo.h" @@ -157,7 +158,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(Options), TargetABI(ARM_ABI_UNKNOWN), DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))), - TSInfo(DL), + TSInfo(DL), JITInfo(), InstrInfo(isThumb1Only() ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this) : !isThumb() diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 10c61ccf02a..f79b69199fb 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -18,11 +18,13 @@ #include "ARMFrameLowering.h" #include "ARMISelLowering.h" #include "ARMInstrInfo.h" +#include "ARMJITInfo.h" #include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" #include "Thumb1FrameLowering.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" +#include "ARMJITInfo.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" @@ -259,6 +261,7 @@ protected: const ARMSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } + ARMJITInfo *getJITInfo() override { return &JITInfo; } const ARMBaseInstrInfo *getInstrInfo() const override { return InstrInfo.get(); } @@ -275,6 +278,7 @@ protected: private: const DataLayout DL; ARMSelectionDAGInfo TSInfo; + ARMJITInfo JITInfo; // Either Thumb1InstrInfo or Thumb2InstrInfo. std::unique_ptr InstrInfo; ARMTargetLowering TLInfo; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 20e2624c830..d85194b75ec 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -244,3 +244,10 @@ bool ARMPassConfig::addPreEmitPass() { return true; } + +bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, + JITCodeEmitter &JCE) { + // Machine code emitter pass for ARM. + PM.add(createARMJITCodeEmitterPass(*this, JCE)); + return false; +} diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index d26d817ba7b..8b559682211 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -39,6 +39,8 @@ public: // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + + bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE) override; }; /// ARMTargetMachine - ARM target machine. diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 2530640139a..9b5fa75fe2a 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -2,7 +2,8 @@ set(LLVM_TARGET_DEFINITIONS ARM.td) tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info) tablegen(LLVM ARMGenInstrInfo.inc -gen-instr-info) -tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM ARMGenCodeEmitter.inc -gen-emitter) +tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM ARMGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM ARMGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM ARMGenAsmMatcher.inc -gen-asm-matcher) @@ -18,6 +19,7 @@ add_llvm_target(ARMCodeGen ARMAsmPrinter.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp + ARMCodeEmitter.cpp ARMConstantIslandPass.cpp ARMConstantPoolValue.cpp ARMExpandPseudoInsts.cpp @@ -27,6 +29,7 @@ add_llvm_target(ARMCodeGen ARMISelDAGToDAG.cpp ARMISelLowering.cpp ARMInstrInfo.cpp + ARMJITInfo.cpp ARMLoadStoreOptimizer.cpp ARMMCInstLower.cpp ARMMachineFunctionInfo.cpp diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index c1601a3f29d..f069535ff3c 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -15,7 +15,7 @@ TARGET = ARM BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \ ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \ ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \ - ARMGenCallingConv.inc \ + ARMGenCodeEmitter.inc ARMGenCallingConv.inc \ ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \ ARMGenMCPseudoLowering.inc ARMGenDisassemblerTables.inc diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index c61805b63f5..06a74d72081 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMTarget Target.cpp TargetIntrinsicInfo.cpp + TargetJITInfo.cpp TargetLibraryInfo.cpp TargetLoweringObjectFile.cpp TargetMachine.cpp diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 6028db68507..bf67d71b0a0 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -3,7 +3,8 @@ set(LLVM_TARGET_DEFINITIONS Mips.td) tablegen(LLVM MipsGenRegisterInfo.inc -gen-register-info) tablegen(LLVM MipsGenInstrInfo.inc -gen-instr-info) tablegen(LLVM MipsGenDisassemblerTables.inc -gen-disassembler) -tablegen(LLVM MipsGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM MipsGenCodeEmitter.inc -gen-emitter) +tablegen(LLVM MipsGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM MipsGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel) tablegen(LLVM MipsGenFastISel.inc -gen-fast-isel) @@ -23,9 +24,11 @@ add_llvm_target(MipsCodeGen Mips16RegisterInfo.cpp MipsAnalyzeImmediate.cpp MipsAsmPrinter.cpp + MipsCodeEmitter.cpp MipsConstantIslandPass.cpp MipsDelaySlotFiller.cpp MipsFastISel.cpp + MipsJITInfo.cpp MipsInstrInfo.cpp MipsISelDAGToDAG.cpp MipsISelLowering.cpp diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile index 56db450f696..41efa470e42 100644 --- a/lib/Target/Mips/Makefile +++ b/lib/Target/Mips/Makefile @@ -13,7 +13,7 @@ TARGET = Mips # Make sure that tblgen is run, first thing. BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \ - MipsGenAsmWriter.inc MipsGenFastISel.inc \ + MipsGenAsmWriter.inc MipsGenFastISel.inc MipsGenCodeEmitter.inc \ MipsGenDAGISel.inc MipsGenCallingConv.inc \ MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \ MipsGenDisassemblerTables.inc \ diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index 387ef9f5ae4..d512d6589c4 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -26,6 +26,8 @@ namespace llvm { FunctionPass *createMipsOptimizePICCallPass(MipsTargetMachine &TM); FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM); + FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM, + JITCodeEmitter &JCE); FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm); } // end namespace llvm; diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp index 8d9cb024a9e..3ca0ffe23eb 100644 --- a/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -12,8 +12,6 @@ //===----------------------------------------------------------------------===// #include "Mips16ISelLowering.h" #include "MCTargetDesc/MipsBaseInfo.h" -#include "Mips16HardFloatInfo.h" -#include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" #include "MipsTargetMachine.h" #include "llvm/ADT/StringRef.h" diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp new file mode 100644 index 00000000000..3885bb96e4f --- /dev/null +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -0,0 +1,483 @@ +//===-- Mips/MipsCodeEmitter.cpp - Convert Mips Code to Machine Code ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This file contains the pass that transforms the Mips machine instructions +// into relocatable machine code. +// +//===---------------------------------------------------------------------===// + +#include "Mips.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "MipsInstrInfo.h" +#include "MipsRelocations.h" +#include "MipsSubtarget.h" +#include "MipsTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/PassManager.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#ifndef NDEBUG +#include +#endif + +using namespace llvm; + +#define DEBUG_TYPE "jit" + +STATISTIC(NumEmitted, "Number of machine instructions emitted"); + +namespace { + +class MipsCodeEmitter : public MachineFunctionPass { + MipsJITInfo *JTI; + const MipsInstrInfo *II; + const DataLayout *TD; + const MipsSubtarget *Subtarget; + TargetMachine &TM; + JITCodeEmitter &MCE; + const std::vector *MCPEs; + const std::vector *MJTEs; + bool IsPIC; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired (); + MachineFunctionPass::getAnalysisUsage(AU); + } + + static char ID; + +public: + MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) + : MachineFunctionPass(ID), JTI(nullptr), II(nullptr), TD(nullptr), + TM(tm), MCE(mce), MCPEs(nullptr), MJTEs(nullptr), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "Mips Machine Code Emitter"; + } + + /// getBinaryCodeForInstr - This function, generated by the + /// CodeEmitterGenerator using TableGen, produces the binary encoding for + /// machine instructions. + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; + + void emitInstruction(MachineBasicBlock::instr_iterator MI, + MachineBasicBlock &MBB); + +private: + + void emitWord(unsigned Word); + + /// Routines that handle operands which add machine relocations which are + /// fixed up by the relocation stage. + void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, + bool MayNeedFarStub) const; + void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const; + void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const; + void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; + void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const; + + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const; + + unsigned getRelocation(const MachineInstr &MI, + const MachineOperand &MO) const; + + unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; + unsigned getJumpTargetOpValueMM(const MachineInstr &MI, unsigned OpNo) const; + unsigned getBranchTargetOpValueMM(const MachineInstr &MI, + unsigned OpNo) const; + + unsigned getBranchTarget21OpValue(const MachineInstr &MI, + unsigned OpNo) const; + unsigned getBranchTarget26OpValue(const MachineInstr &MI, + unsigned OpNo) const; + unsigned getJumpOffset16OpValue(const MachineInstr &MI, unsigned OpNo) const; + + unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; + unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getMemEncodingMMImm12(const MachineInstr &MI, unsigned OpNo) const; + unsigned getMSAMemEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getLSAImmEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getSimm19Lsl2Encoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getSimm18Lsl3Encoding(const MachineInstr &MI, unsigned OpNo) const; + + /// Expand pseudo instructions with accumulator register operands. + void expandACCInstr(MachineBasicBlock::instr_iterator MI, + MachineBasicBlock &MBB, unsigned Opc) const; + + void expandPseudoIndirectBranch(MachineBasicBlock::instr_iterator MI, + MachineBasicBlock &MBB) const; + + /// \brief Expand pseudo instruction. Return true if MI was expanded. + bool expandPseudos(MachineBasicBlock::instr_iterator &MI, + MachineBasicBlock &MBB) const; +}; +} + +char MipsCodeEmitter::ID = 0; + +bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { + MipsTargetMachine &Target = static_cast( + const_cast(MF.getTarget())); + // Initialize the subtarget so that we can grab the subtarget dependent + // variables from it. + Subtarget = &TM.getSubtarget(); + JTI = Target.getSubtargetImpl()->getJITInfo(); + II = Subtarget->getInstrInfo(); + TD = Subtarget->getDataLayout(); + MCPEs = &MF.getConstantPool()->getConstants(); + MJTEs = nullptr; + if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables(); + JTI->Initialize(MF, IsPIC, Subtarget->isLittle()); + MCE.setModuleInfo(&getAnalysis ()); + + do { + DEBUG(errs() << "JITTing function '" + << MF.getName() << "'\n"); + MCE.startFunction(MF); + + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + MBB != E; ++MBB){ + MCE.StartMachineBasicBlock(MBB); + for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), + E = MBB->instr_end(); I != E;) + emitInstruction(*I++, *MBB); + } + } while (MCE.finishFunction(MF)); + + return false; +} + +unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI, + const MachineOperand &MO) const { + // NOTE: This relocations are for static. + uint64_t TSFlags = MI.getDesc().TSFlags; + uint64_t Form = TSFlags & MipsII::FormMask; + if (Form == MipsII::FrmJ) + return Mips::reloc_mips_26; + if ((Form == MipsII::FrmI || Form == MipsII::FrmFI) + && MI.isBranch()) + return Mips::reloc_mips_pc16; + if (Form == MipsII::FrmI && MI.getOpcode() == Mips::LUi) + return Mips::reloc_mips_hi; + return Mips::reloc_mips_lo; +} + +unsigned MipsCodeEmitter::getJumpTargetOpValue(const MachineInstr &MI, + unsigned OpNo) const { + MachineOperand MO = MI.getOperand(OpNo); + if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true); + else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO)); + else if (MO.isMBB()) + emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO)); + else + llvm_unreachable("Unexpected jump target operand kind."); + return 0; +} + +unsigned MipsCodeEmitter::getJumpTargetOpValueMM(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getBranchTargetOpValueMM(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getBranchTarget21OpValue(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getBranchTarget26OpValue(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getJumpOffset16OpValue(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getBranchTargetOpValue(const MachineInstr &MI, + unsigned OpNo) const { + MachineOperand MO = MI.getOperand(OpNo); + emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO)); + return 0; +} + +unsigned MipsCodeEmitter::getMemEncoding(const MachineInstr &MI, + unsigned OpNo) const { + // Base register is encoded in bits 20-16, offset is encoded in bits 15-0. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo)) << 16; + return (getMachineOpValue(MI, MI.getOperand(OpNo+1)) & 0xFFFF) | RegBits; +} + +unsigned MipsCodeEmitter::getMemEncodingMMImm12(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getMSAMemEncoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getSizeExtEncoding(const MachineInstr &MI, + unsigned OpNo) const { + // size is encoded as size-1. + return getMachineOpValue(MI, MI.getOperand(OpNo)) - 1; +} + +unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI, + unsigned OpNo) const { + // size is encoded as pos+size-1. + return getMachineOpValue(MI, MI.getOperand(OpNo-1)) + + getMachineOpValue(MI, MI.getOperand(OpNo)) - 1; +} + +unsigned MipsCodeEmitter::getLSAImmEncoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getSimm18Lsl3Encoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getSimm19Lsl2Encoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const { + if (MO.isReg()) + return TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue( + MO.getReg()); + else if (MO.isImm()) + return static_cast(MO.getImm()); + else if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true); + else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO)); + else if (MO.isCPI()) + emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO)); + else if (MO.isJTI()) + emitJumpTableAddress(MO.getIndex(), getRelocation(MI, MO)); + else if (MO.isMBB()) + emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO)); + else + llvm_unreachable("Unable to encode MachineOperand!"); + return 0; +} + +void MipsCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, + bool MayNeedFarStub) const { + MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, + const_cast(GV), 0, + MayNeedFarStub)); +} + +void MipsCodeEmitter:: +emitExternalSymbolAddress(const char *ES, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + Reloc, ES, 0, 0)); +} + +void MipsCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + Reloc, CPI, 0, false)); +} + +void MipsCodeEmitter:: +emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), + Reloc, JTIndex, 0, false)); +} + +void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB, + unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), + Reloc, BB)); +} + +void MipsCodeEmitter::emitInstruction(MachineBasicBlock::instr_iterator MI, + MachineBasicBlock &MBB) { + DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << *MI); + + // Expand pseudo instruction. Skip if MI was not expanded. + if (((MI->getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo) && + !expandPseudos(MI, MBB)) + return; + + MCE.processDebugLoc(MI->getDebugLoc(), true); + + emitWord(getBinaryCodeForInstr(*MI)); + ++NumEmitted; // Keep track of the # of mi's emitted + + MCE.processDebugLoc(MI->getDebugLoc(), false); +} + +void MipsCodeEmitter::emitWord(unsigned Word) { + DEBUG(errs() << " 0x"; + errs().write_hex(Word) << "\n"); + if (Subtarget->isLittle()) + MCE.emitWordLE(Word); + else + MCE.emitWordBE(Word); +} + +void MipsCodeEmitter::expandACCInstr(MachineBasicBlock::instr_iterator MI, + MachineBasicBlock &MBB, + unsigned Opc) const { + // Expand "pseudomult $ac0, $t0, $t1" to "mult $t0, $t1". + BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Opc)) + .addReg(MI->getOperand(1).getReg()).addReg(MI->getOperand(2).getReg()); +} + +void MipsCodeEmitter::expandPseudoIndirectBranch( + MachineBasicBlock::instr_iterator MI, MachineBasicBlock &MBB) const { + // This logic is duplicated from MipsAsmPrinter::emitPseudoIndirectBranch() + bool HasLinkReg = false; + unsigned Opcode = 0; + + if (Subtarget->hasMips64r6()) { + // MIPS64r6 should use (JALR64 ZERO_64, $rs) + Opcode = Mips::JALR64; + HasLinkReg = true; + } else if (Subtarget->hasMips32r6()) { + // MIPS32r6 should use (JALR ZERO, $rs) + Opcode = Mips::JALR; + HasLinkReg = true; + } else if (Subtarget->inMicroMipsMode()) + // microMIPS should use (JR_MM $rs) + Opcode = Mips::JR_MM; + else { + // Everything else should use (JR $rs) + Opcode = Mips::JR; + } + + auto MIB = BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Opcode)); + + if (HasLinkReg) { + unsigned ZeroReg = Subtarget->isGP64bit() ? Mips::ZERO_64 : Mips::ZERO; + MIB.addReg(ZeroReg); + } + + MIB.addReg(MI->getOperand(0).getReg()); +} + +bool MipsCodeEmitter::expandPseudos(MachineBasicBlock::instr_iterator &MI, + MachineBasicBlock &MBB) const { + switch (MI->getOpcode()) { + default: + llvm_unreachable("Unhandled pseudo"); + return false; + case Mips::NOP: + BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::SLL), Mips::ZERO) + .addReg(Mips::ZERO).addImm(0); + break; + case Mips::B: + BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::BEQ)).addReg(Mips::ZERO) + .addReg(Mips::ZERO).addOperand(MI->getOperand(0)); + break; + case Mips::TRAP: + BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::BREAK)).addImm(0) + .addImm(0); + break; + case Mips::JALRPseudo: + BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::JALR), Mips::RA) + .addReg(MI->getOperand(0).getReg()); + break; + case Mips::PseudoMULT: + expandACCInstr(MI, MBB, Mips::MULT); + break; + case Mips::PseudoMULTu: + expandACCInstr(MI, MBB, Mips::MULTu); + break; + case Mips::PseudoSDIV: + expandACCInstr(MI, MBB, Mips::SDIV); + break; + case Mips::PseudoUDIV: + expandACCInstr(MI, MBB, Mips::UDIV); + break; + case Mips::PseudoMADD: + expandACCInstr(MI, MBB, Mips::MADD); + break; + case Mips::PseudoMADDU: + expandACCInstr(MI, MBB, Mips::MADDU); + break; + case Mips::PseudoMSUB: + expandACCInstr(MI, MBB, Mips::MSUB); + break; + case Mips::PseudoMSUBU: + expandACCInstr(MI, MBB, Mips::MSUBU); + break; + case Mips::PseudoReturn: + case Mips::PseudoReturn64: + case Mips::PseudoIndirectBranch: + case Mips::PseudoIndirectBranch64: + expandPseudoIndirectBranch(MI, MBB); + break; + case TargetOpcode::CFI_INSTRUCTION: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + // Do nothing + return false; + } + + (MI--)->eraseFromBundle(); + return true; +} + +/// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips +/// code to the specified MCE object. +FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM, + JITCodeEmitter &JCE) { + return new MipsCodeEmitter(TM, JCE); +} + +#include "MipsGenCodeEmitter.inc" diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp index f40e53a34d8..57c20a6cf48 100644 --- a/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -28,7 +28,6 @@ #include "MipsTargetMachine.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index ddbb2324e9a..416de552b4a 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -24,7 +24,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp new file mode 100644 index 00000000000..2072488206a --- /dev/null +++ b/lib/Target/Mips/MipsJITInfo.cpp @@ -0,0 +1,286 @@ +//===-- MipsJITInfo.cpp - Implement the Mips JIT Interface ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the Mips target. +// +//===----------------------------------------------------------------------===// + +#include "MipsJITInfo.h" +#include "MipsInstrInfo.h" +#include "MipsRelocations.h" +#include "MipsSubtarget.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +#define DEBUG_TYPE "jit" + + +void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + unsigned NewAddr = (intptr_t)New; + unsigned OldAddr = (intptr_t)Old; + const unsigned NopInstr = 0x0; + + // If the functions are in the same memory segment, insert PC-region branch. + if ((NewAddr & 0xF0000000) == ((OldAddr + 4) & 0xF0000000)) { + unsigned *OldInstruction = (unsigned *)Old; + *OldInstruction = 0x08000000; + unsigned JTargetAddr = NewAddr & 0x0FFFFFFC; + + JTargetAddr >>= 2; + *OldInstruction |= JTargetAddr; + + // Insert a NOP. + OldInstruction++; + *OldInstruction = NopInstr; + + sys::Memory::InvalidateInstructionCache(Old, 2 * 4); + } else { + // We need to clear hint bits from the instruction, in case it is 'jr ra'. + const unsigned HintMask = 0xFFFFF83F, ReturnSequence = 0x03e00008; + unsigned* CurrentInstr = (unsigned*)Old; + unsigned CurrInstrHintClear = (*CurrentInstr) & HintMask; + unsigned* NextInstr = CurrentInstr + 1; + unsigned NextInstrHintClear = (*NextInstr) & HintMask; + + // Do absolute jump if there are 2 or more instructions before return from + // the old function. + if ((CurrInstrHintClear != ReturnSequence) && + (NextInstrHintClear != ReturnSequence)) { + const unsigned LuiT0Instr = 0x3c080000, AddiuT0Instr = 0x25080000; + const unsigned JrT0Instr = 0x01000008; + // lui t0, high 16 bit of the NewAddr + (*(CurrentInstr++)) = LuiT0Instr | ((NewAddr & 0xffff0000) >> 16); + // addiu t0, t0, low 16 bit of the NewAddr + (*(CurrentInstr++)) = AddiuT0Instr | (NewAddr & 0x0000ffff); + // jr t0 + (*(CurrentInstr++)) = JrT0Instr; + (*CurrentInstr) = NopInstr; + + sys::Memory::InvalidateInstructionCache(Old, 4 * 4); + } else { + // Unsupported case + report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction"); + } + } +} + +/// JITCompilerFunction - This contains the address of the JIT function used to +/// compile a function lazily. +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +// Get the ASMPREFIX for the current host. This is often '_'. +#ifndef __USER_LABEL_PREFIX__ +#define __USER_LABEL_PREFIX__ +#endif +#define GETASMPREFIX2(X) #X +#define GETASMPREFIX(X) GETASMPREFIX2(X) +#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__) + +// CompilationCallback stub - We can't use a C function with inline assembly in +// it, because the prolog/epilog inserted by GCC won't work for us. Instead, +// write our own wrapper, which does things our way, so we have complete control +// over register saving and restoring. This code saves registers, calls +// MipsCompilationCallbackC and restores registers. +extern "C" { +#if defined (__mips__) +void MipsCompilationCallback(); + + asm( + ".text\n" + ".align 2\n" + ".globl " ASMPREFIX "MipsCompilationCallback\n" + ASMPREFIX "MipsCompilationCallback:\n" + ".ent " ASMPREFIX "MipsCompilationCallback\n" + ".frame $sp, 32, $ra\n" + ".set noreorder\n" + ".cpload $t9\n" + + "addiu $sp, $sp, -64\n" + ".cprestore 16\n" + + // Save argument registers a0, a1, a2, a3, f12, f14 since they may contain + // stuff for the real target function right now. We have to act as if this + // whole compilation callback doesn't exist as far as the caller is + // concerned. We also need to save the ra register since it contains the + // original return address, and t8 register since it contains the address + // of the end of function stub. + "sw $a0, 20($sp)\n" + "sw $a1, 24($sp)\n" + "sw $a2, 28($sp)\n" + "sw $a3, 32($sp)\n" + "sw $ra, 36($sp)\n" + "sw $t8, 40($sp)\n" + "sdc1 $f12, 48($sp)\n" + "sdc1 $f14, 56($sp)\n" + + // t8 points at the end of function stub. Pass the beginning of the stub + // to the MipsCompilationCallbackC. + "addiu $a0, $t8, -16\n" + "jal " ASMPREFIX "MipsCompilationCallbackC\n" + "nop\n" + + // Restore registers. + "lw $a0, 20($sp)\n" + "lw $a1, 24($sp)\n" + "lw $a2, 28($sp)\n" + "lw $a3, 32($sp)\n" + "lw $ra, 36($sp)\n" + "lw $t8, 40($sp)\n" + "ldc1 $f12, 48($sp)\n" + "ldc1 $f14, 56($sp)\n" + "addiu $sp, $sp, 64\n" + + // Jump to the (newly modified) stub to invoke the real function. + "addiu $t8, $t8, -16\n" + "jr $t8\n" + "nop\n" + + ".set reorder\n" + ".end " ASMPREFIX "MipsCompilationCallback\n" + ); +#else // host != Mips + void MipsCompilationCallback() { + llvm_unreachable( + "Cannot call MipsCompilationCallback() on a non-Mips arch!"); + } +#endif +} + +/// MipsCompilationCallbackC - This is the target-specific function invoked +/// by the function stub when we did not know the real target of a call. +/// This function must locate the start of the stub or call site and pass +/// it into the JIT compiler function. +extern "C" void MipsCompilationCallbackC(intptr_t StubAddr) { + // Get the address of the compiled code for this function. + intptr_t NewVal = (intptr_t) JITCompilerFunction((void*) StubAddr); + + // Rewrite the function stub so that we don't end up here every time we + // execute the call. We're replacing the first four instructions of the + // stub with code that jumps to the compiled function: + // lui $t9, %hi(NewVal) + // addiu $t9, $t9, %lo(NewVal) + // jr $t9 + // nop + + int Hi = ((unsigned)NewVal & 0xffff0000) >> 16; + if ((NewVal & 0x8000) != 0) + Hi++; + int Lo = (int)(NewVal & 0xffff); + + *(intptr_t *)(StubAddr) = 0xf << 26 | 25 << 16 | Hi; + *(intptr_t *)(StubAddr + 4) = 9 << 26 | 25 << 21 | 25 << 16 | Lo; + *(intptr_t *)(StubAddr + 8) = 25 << 21 | 8; + *(intptr_t *)(StubAddr + 12) = 0; + + sys::Memory::InvalidateInstructionCache((void*) StubAddr, 16); +} + +TargetJITInfo::LazyResolverFn MipsJITInfo::getLazyResolverFunction( + JITCompilerFn F) { + JITCompilerFunction = F; + return MipsCompilationCallback; +} + +TargetJITInfo::StubLayout MipsJITInfo::getStubLayout() { + // The stub contains 4 4-byte instructions, aligned at 4 bytes. See + // emitFunctionStub for details. + StubLayout Result = { 4*4, 4 }; + return Result; +} + +void *MipsJITInfo::emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE) { + JCE.emitAlignment(4); + void *Addr = (void*) (JCE.getCurrentPCValue()); + if (!sys::Memory::setRangeWritable(Addr, 16)) + llvm_unreachable("ERROR: Unable to mark stub writable."); + + intptr_t EmittedAddr; + if (Fn != (void*)(intptr_t)MipsCompilationCallback) + EmittedAddr = (intptr_t)Fn; + else + EmittedAddr = (intptr_t)MipsCompilationCallback; + + + int Hi = ((unsigned)EmittedAddr & 0xffff0000) >> 16; + if ((EmittedAddr & 0x8000) != 0) + Hi++; + int Lo = (int)(EmittedAddr & 0xffff); + + // lui $t9, %hi(EmittedAddr) + // addiu $t9, $t9, %lo(EmittedAddr) + // jalr $t8, $t9 + // nop + if (IsLittleEndian) { + JCE.emitWordLE(0xf << 26 | 25 << 16 | Hi); + JCE.emitWordLE(9 << 26 | 25 << 21 | 25 << 16 | Lo); + JCE.emitWordLE(25 << 21 | 24 << 11 | 9); + JCE.emitWordLE(0); + } else { + JCE.emitWordBE(0xf << 26 | 25 << 16 | Hi); + JCE.emitWordBE(9 << 26 | 25 << 21 | 25 << 16 | Lo); + JCE.emitWordBE(25 << 21 | 24 << 11 | 9); + JCE.emitWordBE(0); + } + + sys::Memory::InvalidateInstructionCache(Addr, 16); + if (!sys::Memory::setRangeExecutable(Addr, 16)) + llvm_unreachable("ERROR: Unable to mark stub executable."); + + return Addr; +} + +/// relocate - Before the JIT can run a block of code that has been emitted, +/// it must rewrite the code to contain the actual addresses of any +/// referenced global symbols. +void MipsJITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char *GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + + void *RelocPos = (char*) Function + MR->getMachineCodeOffset(); + intptr_t ResultPtr = (intptr_t) MR->getResultPointer(); + + switch ((Mips::RelocationType) MR->getRelocationType()) { + case Mips::reloc_mips_pc16: + ResultPtr = (((ResultPtr - (intptr_t) RelocPos) - 4) >> 2) & 0xffff; + *((unsigned*) RelocPos) |= (unsigned) ResultPtr; + break; + + case Mips::reloc_mips_26: + ResultPtr = (ResultPtr & 0x0fffffff) >> 2; + *((unsigned*) RelocPos) |= (unsigned) ResultPtr; + break; + + case Mips::reloc_mips_hi: + ResultPtr = ResultPtr >> 16; + if ((((intptr_t) (MR->getResultPointer()) & 0xffff) >> 15) == 1) { + ResultPtr += 1; + } + *((unsigned*) RelocPos) |= (unsigned) ResultPtr; + break; + + case Mips::reloc_mips_lo: { + // Addend is needed for unaligned load/store instructions, where offset + // for the second load/store in the expanded instruction sequence must + // be modified by +1 or +3. Otherwise, Addend is 0. + int Addend = *((unsigned*) RelocPos) & 0xffff; + ResultPtr = (ResultPtr + Addend) & 0xffff; + *((unsigned*) RelocPos) &= 0xffff0000; + *((unsigned*) RelocPos) |= (unsigned) ResultPtr; + break; + } + } + } +} diff --git a/lib/Target/Mips/MipsJITInfo.h b/lib/Target/Mips/MipsJITInfo.h new file mode 100644 index 00000000000..c9dfd831d2d --- /dev/null +++ b/lib/Target/Mips/MipsJITInfo.h @@ -0,0 +1,71 @@ +//===- MipsJITInfo.h - Mips Implementation of the JIT Interface -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MipsJITInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSJITINFO_H +#define MIPSJITINFO_H + +#include "MipsMachineFunction.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Target/TargetJITInfo.h" + +namespace llvm { +class MipsTargetMachine; + +class MipsJITInfo : public TargetJITInfo { + + bool IsPIC; + bool IsLittleEndian; + + public: + explicit MipsJITInfo() : + IsPIC(false), IsLittleEndian(true) {} + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + void replaceMachineCodeForFunction(void *Old, void *New) override; + + // getStubLayout - Returns the size and alignment of the largest call stub + // on Mips. + StubLayout getStubLayout() override; + + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a + /// small native function that simply calls the function at the specified + /// address. + void *emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE) override; + + /// getLazyResolverFunction - Expose the lazy resolver to the JIT. + LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; + + /// relocate - Before the JIT can run a block of code that has been emitted, + /// it must rewrite the code to contain the actual addresses of any + /// referenced global symbols. + void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char *GOTBase) override; + + /// Initialize - Initialize internal stage for the function being JITted. + void Initialize(const MachineFunction &MF, bool isPIC, + bool isLittleEndian) { + IsPIC = isPIC; + IsLittleEndian = isLittleEndian; + } + +}; +} + +#endif diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index 71a6f4d4a11..19dac0c0419 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -16,7 +16,6 @@ #include "Mips.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsMCNaCl.h" -#include "MipsMachineFunction.h" #include "MipsTargetMachine.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index c1bbf61712b..0733a62cc8a 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// #include "MipsSEISelLowering.h" -#include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" #include "MipsTargetMachine.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 1b0f5d7fa91..5bf875daea9 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -115,7 +115,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false), TM(_TM), TargetTriple(TT), DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS, TM))), - TSInfo(DL), InstrInfo(MipsInstrInfo::create(*this)), + TSInfo(DL), JITInfo(), InstrInfo(MipsInstrInfo::create(*this)), FrameLowering(MipsFrameLowering::create(*this)), TLInfo(MipsTargetLowering::create(*TM, *this)) { diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index ab01414b6b8..3f7a6c3ed73 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -17,6 +17,7 @@ #include "MipsFrameLowering.h" #include "MipsISelLowering.h" #include "MipsInstrInfo.h" +#include "MipsJITInfo.h" #include "MipsSelectionDAGInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCInstrItineraries.h" @@ -144,6 +145,7 @@ protected: const DataLayout DL; // Calculates type size & alignment const MipsSelectionDAGInfo TSInfo; + MipsJITInfo JITInfo; std::unique_ptr InstrInfo; std::unique_ptr FrameLowering; std::unique_ptr TLInfo; @@ -270,6 +272,7 @@ public: void setHelperClassesMips16(); void setHelperClassesMipsSE(); + MipsJITInfo *getJITInfo() override { return &JITInfo; } const MipsSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 79d1b4b13e9..ccf420962cd 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -189,3 +189,10 @@ bool MipsPassConfig::addPreEmitPass() { addPass(createMipsConstantIslandPass(TM)); return true; } + +bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM, + JITCodeEmitter &JCE) { + // Machine code emitter pass for Mips. + PM.add(createMipsJITCodeEmitterPass(*this, JCE)); + return false; +} diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index e82efe2ecd2..eefd96ab4ae 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -44,12 +44,16 @@ public: return Subtarget; return &DefaultSubtarget; } + MipsSubtarget *getSubtargetImpl() { + return static_cast(TargetMachine::getSubtargetImpl()); + } /// \brief Reset the subtarget for the Mips target. void resetSubtarget(MachineFunction *MF); // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override; }; /// MipsebTargetMachine - Mips32/64 big endian target machine. diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index c0b44598a4b..55bb7293058 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -43,6 +43,12 @@ public: TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + // Emission of machine code through JITCodeEmitter is not supported. + bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &, + bool = true) override { + return true; + } + // Emission of machine code through MCJIT is not supported. bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &, bool = true) override { diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 47a9474ae16..ea4de63a244 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -2,8 +2,9 @@ set(LLVM_TARGET_DEFINITIONS PPC.td) tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM PPCGenAsmMatcher.inc -gen-asm-matcher) +tablegen(LLVM PPCGenCodeEmitter.inc -gen-emitter) tablegen(LLVM PPCGenDisassemblerTables.inc -gen-disassembler) -tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info) tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info) tablegen(LLVM PPCGenDAGISel.inc -gen-dag-isel) @@ -15,6 +16,7 @@ add_public_tablegen_target(PowerPCCommonTableGen) add_llvm_target(PowerPCCodeGen PPCAsmPrinter.cpp PPCBranchSelector.cpp + PPCCodeEmitter.cpp PPCCTRLoops.cpp PPCHazardRecognizers.cpp PPCInstrInfo.cpp @@ -22,6 +24,7 @@ add_llvm_target(PowerPCCodeGen PPCISelLowering.cpp PPCFastISel.cpp PPCFrameLowering.cpp + PPCJITInfo.cpp PPCMCInstLower.cpp PPCMachineFunctionInfo.cpp PPCRegisterInfo.cpp diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile index cf516f4e5ec..c96674809b0 100644 --- a/lib/Target/PowerPC/Makefile +++ b/lib/Target/PowerPC/Makefile @@ -13,7 +13,7 @@ TARGET = PPC # Make sure that tblgen is run, first thing. BUILT_SOURCES = PPCGenRegisterInfo.inc PPCGenAsmMatcher.inc \ - PPCGenAsmWriter.inc \ + PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \ PPCGenInstrInfo.inc PPCGenDAGISel.inc \ PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \ PPCGenMCCodeEmitter.inc PPCGenFastISel.inc \ diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 87be6b5382b..ba5fa4f79b4 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -26,6 +26,7 @@ namespace llvm { class PassRegistry; class FunctionPass; class ImmutablePass; + class JITCodeEmitter; class MachineInstr; class AsmPrinter; class MCInst; @@ -40,6 +41,8 @@ namespace llvm { FunctionPass *createPPCVSXFMAMutatePass(); FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); + FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, + JITCodeEmitter &MCE); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 5f3b1764173..333780f1fcd 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -386,7 +386,8 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { return true; const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); - if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) + if (TLI->supportJumpTables() && + SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries()) return true; } } diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp new file mode 100644 index 00000000000..cf704fab277 --- /dev/null +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -0,0 +1,295 @@ +//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PowerPC 32-bit CodeEmitter and associated machinery to +// JIT-compile bitcode to native PowerPC. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "PPCRelocations.h" +#include "PPCTargetMachine.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +namespace { + class PPCCodeEmitter : public MachineFunctionPass { + TargetMachine &TM; + JITCodeEmitter &MCE; + MachineModuleInfo *MMI; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + static char ID; + + /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record + /// its address in the function into this pointer. + void *MovePCtoLROffset; + public: + + PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) + : MachineFunctionPass(ID), TM(tm), MCE(mce) {} + + /// getBinaryCodeForInstr - This function, generated by the + /// CodeEmitterGenerator using TableGen, produces the binary encoding for + /// machine instructions. + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; + + + MachineRelocation GetRelocation(const MachineOperand &MO, + unsigned RelocID) const; + + /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr + unsigned getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const; + + unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getAbsDirectBrEncoding(const MachineInstr &MI, + unsigned OpNo) const; + unsigned getAbsCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const; + + unsigned getImm16Encoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getTLSCallEncoding(const MachineInstr &MI, unsigned OpNo) const; + + const char *getPassName() const override { + return "PowerPC Machine Code Emitter"; + } + + /// runOnMachineFunction - emits the given MachineFunction to memory + /// + bool runOnMachineFunction(MachineFunction &MF) override; + + /// emitBasicBlock - emits the given MachineBasicBlock to memory + /// + void emitBasicBlock(MachineBasicBlock &MBB); + }; +} + +char PPCCodeEmitter::ID = 0; + +/// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code +/// to the specified MCE object. +FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM, + JITCodeEmitter &JCE) { + return new PPCCodeEmitter(TM, JCE); +} + +bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) { + assert((MF.getTarget().getRelocationModel() != Reloc::Default || + MF.getTarget().getRelocationModel() != Reloc::Static) && + "JIT relocation model must be set to static or default!"); + + MMI = &getAnalysis(); + MCE.setModuleInfo(MMI); + do { + MovePCtoLROffset = nullptr; + MCE.startFunction(MF); + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) + emitBasicBlock(*BB); + } while (MCE.finishFunction(MF)); + + return false; +} + +void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) { + MCE.StartMachineBasicBlock(&MBB); + + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){ + const MachineInstr &MI = *I; + MCE.processDebugLoc(MI.getDebugLoc(), true); + switch (MI.getOpcode()) { + default: + MCE.emitWordBE(getBinaryCodeForInstr(MI)); + break; + case TargetOpcode::CFI_INSTRUCTION: + break; + case TargetOpcode::EH_LABEL: + MCE.emitLabel(MI.getOperand(0).getMCSymbol()); + break; + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + break; // pseudo opcode, no side effects + case PPC::MovePCtoLR: + case PPC::MovePCtoLR8: + assert(TM.getRelocationModel() == Reloc::PIC_); + MovePCtoLROffset = (void*)MCE.getCurrentPCValue(); + MCE.emitWordBE(0x48000005); // bl 1 + break; + } + MCE.processDebugLoc(MI.getDebugLoc(), false); + } +} + +unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + assert((MI.getOpcode() == PPC::MTOCRF || MI.getOpcode() == PPC::MTOCRF8 || + MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) && + (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); + return 0x80 >> TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue( + MO.getReg()); +} + +MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO, + unsigned RelocID) const { + // If in PIC mode, we need to encode the negated address of the + // 'movepctolr' into the unrelocated field. After relocation, we'll have + // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm + // field, we get &gv. This doesn't happen for branch relocations, which are + // always implicitly pc relative. + intptr_t Cst = 0; + if (TM.getRelocationModel() == Reloc::PIC_) { + assert(MovePCtoLROffset && "MovePCtoLR not seen yet?"); + Cst = -(intptr_t)MovePCtoLROffset - 4; + } + + if (MO.isGlobal()) + return MachineRelocation::getGV(MCE.getCurrentPCOffset(), RelocID, + const_cast(MO.getGlobal()), + Cst, isa(MO.getGlobal())); + if (MO.isSymbol()) + return MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + RelocID, MO.getSymbolName(), Cst); + if (MO.isCPI()) + return MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + RelocID, MO.getIndex(), Cst); + + if (MO.isMBB()) + return MachineRelocation::getBB(MCE.getCurrentPCOffset(), + RelocID, MO.getMBB()); + + assert(MO.isJTI()); + return MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), + RelocID, MO.getIndex(), Cst); +} + +unsigned PPCCodeEmitter::getDirectBrEncoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); + + MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bx)); + return 0; +} + +unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bcx)); + return 0; +} + +unsigned PPCCodeEmitter::getAbsDirectBrEncoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); + + llvm_unreachable("Absolute branch relocations unsupported on the old JIT."); +} + +unsigned PPCCodeEmitter::getAbsCondBrEncoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Absolute branch relocations unsupported on the old JIT."); +} + +unsigned PPCCodeEmitter::getImm16Encoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); + + unsigned RelocID; + switch (MO.getTargetFlags() & PPCII::MO_ACCESS_MASK) { + default: llvm_unreachable("Unsupported target operand flags!"); + case PPCII::MO_LO: RelocID = PPC::reloc_absolute_low; break; + case PPCII::MO_HA: RelocID = PPC::reloc_absolute_high; break; + } + + MCE.addRelocation(GetRelocation(MO, RelocID)); + return 0; +} + +unsigned PPCCodeEmitter::getMemRIEncoding(const MachineInstr &MI, + unsigned OpNo) const { + // Encode (imm, reg) as a memri, which has the low 16-bits as the + // displacement and the next 5 bits as the register #. + assert(MI.getOperand(OpNo+1).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 16; + + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) + return (getMachineOpValue(MI, MO) & 0xFFFF) | RegBits; + + // Add a fixup for the displacement field. + MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low)); + return RegBits; +} + +unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI, + unsigned OpNo) const { + // Encode (imm, reg) as a memrix, which has the low 14-bits as the + // displacement and the next 5 bits as the register #. + assert(MI.getOperand(OpNo+1).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 14; + + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) + return ((getMachineOpValue(MI, MO) >> 2) & 0x3FFF) | RegBits; + + MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix)); + return RegBits; +} + + +unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("TLS not supported on the old JIT."); + return 0; +} + +unsigned PPCCodeEmitter::getTLSCallEncoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("TLS not supported on the old JIT."); + return 0; +} + +unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const { + + if (MO.isReg()) { + // MTOCRF/MFOCRF should go through get_crbitm_encoding for the CR operand. + // The GPR operand should come through here though. + assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 && + MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) || + MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); + return TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue( + MO.getReg()); + } + + assert(MO.isImm() && + "Relocation required in an instruction that we cannot encode!"); + return MO.getImm(); +} + +#include "PPCGenCodeEmitter.inc" diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 61003cf1b34..36e1e1334e5 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -684,6 +684,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) if (Subtarget.isDarwin()) setPrefFunctionAlignment(4); + if (isPPC64 && Subtarget.isJITCodeModel()) + // Temporary workaround for the inability of PPC64 JIT to handle jump + // tables. + setSupportJumpTables(false); + setInsertFencesForAtomic(true); if (Subtarget.enableMachineScheduler()) @@ -3559,27 +3564,33 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, } if (GlobalAddressSDNode *G = dyn_cast(Callee)) { - unsigned OpFlags = 0; - if ((DAG.getTarget().getRelocationModel() != Reloc::Static && - (Subtarget.getTargetTriple().isMacOSX() && - Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && - (G->getGlobal()->isDeclaration() || - G->getGlobal()->isWeakForLinker())) || - (Subtarget.isTargetELF() && !isPPC64 && - !G->getGlobal()->hasLocalLinkage() && - DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { - // PC-relative references to external symbols should go through $stub, - // unless we're building with the leopard linker or later, which - // automatically synthesizes these stubs. - OpFlags = PPCII::MO_PLT_OR_STUB; - } + // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201 + // Use indirect calls for ALL functions calls in JIT mode, since the + // far-call stubs may be outside relocation limits for a BL instruction. + if (!DAG.getTarget().getSubtarget().isJITCodeModel()) { + unsigned OpFlags = 0; + if ((DAG.getTarget().getRelocationModel() != Reloc::Static && + (Subtarget.getTargetTriple().isMacOSX() && + Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && + (G->getGlobal()->isDeclaration() || + G->getGlobal()->isWeakForLinker())) || + (Subtarget.isTargetELF() && !isPPC64 && + !G->getGlobal()->hasLocalLinkage() && + DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { + // PC-relative references to external symbols should go through $stub, + // unless we're building with the leopard linker or later, which + // automatically synthesizes these stubs. + OpFlags = PPCII::MO_PLT_OR_STUB; + } - // If the callee is a GlobalAddress/ExternalSymbol node (quite common, - // every direct call is) turn it into a TargetGlobalAddress / - // TargetExternalSymbol node so that legalize doesn't hack it. - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, - Callee.getValueType(), 0, OpFlags); - needIndirectCall = false; + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, + // every direct call is) turn it into a TargetGlobalAddress / + // TargetExternalSymbol node so that legalize doesn't hack it. + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, + Callee.getValueType(), + 0, OpFlags); + needIndirectCall = false; + } } if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp new file mode 100644 index 00000000000..e5f113a0c03 --- /dev/null +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -0,0 +1,482 @@ +//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the 32-bit PowerPC target. +// +//===----------------------------------------------------------------------===// + +#include "PPCJITInfo.h" +#include "PPCRelocations.h" +#include "PPCSubtarget.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "jit" + +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +PPCJITInfo::PPCJITInfo(PPCSubtarget &STI) + : Subtarget(STI), is64Bit(STI.isPPC64()) { + useGOT = 0; +} + +#define BUILD_ADDIS(RD,RS,IMM16) \ + ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535)) +#define BUILD_ORI(RD,RS,UIMM16) \ + ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535)) +#define BUILD_ORIS(RD,RS,UIMM16) \ + ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535)) +#define BUILD_RLDICR(RD,RS,SH,ME) \ + ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \ + (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1)) +#define BUILD_MTSPR(RS,SPR) \ + ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1)) +#define BUILD_BCCTRx(BO,BI,LINK) \ + ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1)) +#define BUILD_B(TARGET, LINK) \ + ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1)) + +// Pseudo-ops +#define BUILD_LIS(RD,IMM16) BUILD_ADDIS(RD,0,IMM16) +#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6) +#define BUILD_MTCTR(RS) BUILD_MTSPR(RS,9) +#define BUILD_BCTR(LINK) BUILD_BCCTRx(20,0,LINK) + +static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){ + intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2; + unsigned *AtI = (unsigned*)(intptr_t)At; + + if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range? + AtI[0] = BUILD_B(Offset, isCall); // b/bl target + } else if (!is64Bit) { + AtI[0] = BUILD_LIS(12, To >> 16); // lis r12, hi16(address) + AtI[1] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address) + AtI[2] = BUILD_MTCTR(12); // mtctr r12 + AtI[3] = BUILD_BCTR(isCall); // bctr/bctrl + } else { + AtI[0] = BUILD_LIS(12, To >> 48); // lis r12, hi16(address) + AtI[1] = BUILD_ORI(12, 12, To >> 32); // ori r12, r12, lo16(address) + AtI[2] = BUILD_SLDI(12, 12, 32); // sldi r12, r12, 32 + AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address) + AtI[4] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address) + AtI[5] = BUILD_MTCTR(12); // mtctr r12 + AtI[6] = BUILD_BCTR(isCall); // bctr/bctrl + } +} + +extern "C" void PPC32CompilationCallback(); +extern "C" void PPC64CompilationCallback(); + +// The first clause of the preprocessor directive looks wrong, but it is +// necessary when compiling this code on non-PowerPC hosts. +#if (!defined(__ppc__) && !defined(__powerpc__)) || defined(__powerpc64__) || defined(__ppc64__) +void PPC32CompilationCallback() { + llvm_unreachable("This is not a 32bit PowerPC, you can't execute this!"); +} +#elif !defined(__ELF__) +// CompilationCallback stub - We can't use a C function with inline assembly in +// it, because we the prolog/epilog inserted by GCC won't work for us. Instead, +// write our own wrapper, which does things our way, so we have complete control +// over register saving and restoring. +asm( + ".text\n" + ".align 2\n" + ".globl _PPC32CompilationCallback\n" +"_PPC32CompilationCallback:\n" + // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the + // FIXME: need to save v[0-19] for altivec? + // FIXME: could shrink frame + // Set up a proper stack frame + // FIXME Layout + // PowerPC32 ABI linkage - 24 bytes + // parameters - 32 bytes + // 13 double registers - 104 bytes + // 8 int registers - 32 bytes + "mflr r0\n" + "stw r0, 8(r1)\n" + "stwu r1, -208(r1)\n" + // Save all int arg registers + "stw r10, 204(r1)\n" "stw r9, 200(r1)\n" + "stw r8, 196(r1)\n" "stw r7, 192(r1)\n" + "stw r6, 188(r1)\n" "stw r5, 184(r1)\n" + "stw r4, 180(r1)\n" "stw r3, 176(r1)\n" + // Save all call-clobbered FP regs. + "stfd f13, 168(r1)\n" "stfd f12, 160(r1)\n" + "stfd f11, 152(r1)\n" "stfd f10, 144(r1)\n" + "stfd f9, 136(r1)\n" "stfd f8, 128(r1)\n" + "stfd f7, 120(r1)\n" "stfd f6, 112(r1)\n" + "stfd f5, 104(r1)\n" "stfd f4, 96(r1)\n" + "stfd f3, 88(r1)\n" "stfd f2, 80(r1)\n" + "stfd f1, 72(r1)\n" + // Arguments to Compilation Callback: + // r3 - our lr (address of the call instruction in stub plus 4) + // r4 - stub's lr (address of instruction that called the stub plus 4) + // r5 - is64Bit - always 0. + "mr r3, r0\n" + "lwz r2, 208(r1)\n" // stub's frame + "lwz r4, 8(r2)\n" // stub's lr + "li r5, 0\n" // 0 == 32 bit + "bl _LLVMPPCCompilationCallback\n" + "mtctr r3\n" + // Restore all int arg registers + "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n" + "lwz r8, 196(r1)\n" "lwz r7, 192(r1)\n" + "lwz r6, 188(r1)\n" "lwz r5, 184(r1)\n" + "lwz r4, 180(r1)\n" "lwz r3, 176(r1)\n" + // Restore all FP arg registers + "lfd f13, 168(r1)\n" "lfd f12, 160(r1)\n" + "lfd f11, 152(r1)\n" "lfd f10, 144(r1)\n" + "lfd f9, 136(r1)\n" "lfd f8, 128(r1)\n" + "lfd f7, 120(r1)\n" "lfd f6, 112(r1)\n" + "lfd f5, 104(r1)\n" "lfd f4, 96(r1)\n" + "lfd f3, 88(r1)\n" "lfd f2, 80(r1)\n" + "lfd f1, 72(r1)\n" + // Pop 3 frames off the stack and branch to target + "lwz r1, 208(r1)\n" + "lwz r2, 8(r1)\n" + "mtlr r2\n" + "bctr\n" + ); + +#else +// ELF PPC 32 support + +// CompilationCallback stub - We can't use a C function with inline assembly in +// it, because we the prolog/epilog inserted by GCC won't work for us. Instead, +// write our own wrapper, which does things our way, so we have complete control +// over register saving and restoring. +asm( + ".text\n" + ".align 2\n" + ".globl PPC32CompilationCallback\n" +"PPC32CompilationCallback:\n" + // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the + // FIXME: need to save v[0-19] for altivec? + // FIXME: could shrink frame + // Set up a proper stack frame + // FIXME Layout + // 8 double registers - 64 bytes + // 8 int registers - 32 bytes + "mflr 0\n" + "stw 0, 4(1)\n" + "stwu 1, -104(1)\n" + // Save all int arg registers + "stw 10, 100(1)\n" "stw 9, 96(1)\n" + "stw 8, 92(1)\n" "stw 7, 88(1)\n" + "stw 6, 84(1)\n" "stw 5, 80(1)\n" + "stw 4, 76(1)\n" "stw 3, 72(1)\n" + // Save all call-clobbered FP regs. + "stfd 8, 64(1)\n" + "stfd 7, 56(1)\n" "stfd 6, 48(1)\n" + "stfd 5, 40(1)\n" "stfd 4, 32(1)\n" + "stfd 3, 24(1)\n" "stfd 2, 16(1)\n" + "stfd 1, 8(1)\n" + // Arguments to Compilation Callback: + // r3 - our lr (address of the call instruction in stub plus 4) + // r4 - stub's lr (address of instruction that called the stub plus 4) + // r5 - is64Bit - always 0. + "mr 3, 0\n" + "lwz 5, 104(1)\n" // stub's frame + "lwz 4, 4(5)\n" // stub's lr + "li 5, 0\n" // 0 == 32 bit + "bl LLVMPPCCompilationCallback\n" + "mtctr 3\n" + // Restore all int arg registers + "lwz 10, 100(1)\n" "lwz 9, 96(1)\n" + "lwz 8, 92(1)\n" "lwz 7, 88(1)\n" + "lwz 6, 84(1)\n" "lwz 5, 80(1)\n" + "lwz 4, 76(1)\n" "lwz 3, 72(1)\n" + // Restore all FP arg registers + "lfd 8, 64(1)\n" + "lfd 7, 56(1)\n" "lfd 6, 48(1)\n" + "lfd 5, 40(1)\n" "lfd 4, 32(1)\n" + "lfd 3, 24(1)\n" "lfd 2, 16(1)\n" + "lfd 1, 8(1)\n" + // Pop 3 frames off the stack and branch to target + "lwz 1, 104(1)\n" + "lwz 0, 4(1)\n" + "mtlr 0\n" + "bctr\n" + ); +#endif + +#if !defined(__powerpc64__) && !defined(__ppc64__) +void PPC64CompilationCallback() { + llvm_unreachable("This is not a 64bit PowerPC, you can't execute this!"); +} +#else +# ifdef __ELF__ +asm( + ".text\n" + ".align 2\n" + ".globl PPC64CompilationCallback\n" +#if _CALL_ELF == 2 + ".type PPC64CompilationCallback,@function\n" +"PPC64CompilationCallback:\n" +#else + ".section \".opd\",\"aw\",@progbits\n" + ".align 3\n" +"PPC64CompilationCallback:\n" + ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n" + ".size PPC64CompilationCallback,24\n" + ".previous\n" + ".align 4\n" + ".type PPC64CompilationCallback,@function\n" +".L.PPC64CompilationCallback:\n" +#endif +# else +asm( + ".text\n" + ".align 2\n" + ".globl _PPC64CompilationCallback\n" +"_PPC64CompilationCallback:\n" +# endif + // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the + // FIXME: need to save v[0-19] for altivec? + // Set up a proper stack frame + // Layout + // PowerPC64 ABI linkage - 48 bytes + // parameters - 64 bytes + // 13 double registers - 104 bytes + // 8 int registers - 64 bytes + "mflr 0\n" + "std 0, 16(1)\n" + "stdu 1, -280(1)\n" + // Save all int arg registers + "std 10, 272(1)\n" "std 9, 264(1)\n" + "std 8, 256(1)\n" "std 7, 248(1)\n" + "std 6, 240(1)\n" "std 5, 232(1)\n" + "std 4, 224(1)\n" "std 3, 216(1)\n" + // Save all call-clobbered FP regs. + "stfd 13, 208(1)\n" "stfd 12, 200(1)\n" + "stfd 11, 192(1)\n" "stfd 10, 184(1)\n" + "stfd 9, 176(1)\n" "stfd 8, 168(1)\n" + "stfd 7, 160(1)\n" "stfd 6, 152(1)\n" + "stfd 5, 144(1)\n" "stfd 4, 136(1)\n" + "stfd 3, 128(1)\n" "stfd 2, 120(1)\n" + "stfd 1, 112(1)\n" + // Arguments to Compilation Callback: + // r3 - our lr (address of the call instruction in stub plus 4) + // r4 - stub's lr (address of instruction that called the stub plus 4) + // r5 - is64Bit - always 1. + "mr 3, 0\n" // return address (still in r0) + "ld 5, 280(1)\n" // stub's frame + "ld 4, 16(5)\n" // stub's lr + "li 5, 1\n" // 1 == 64 bit +# ifdef __ELF__ + "bl LLVMPPCCompilationCallback\n" + "nop\n" +# else + "bl _LLVMPPCCompilationCallback\n" +# endif + "mtctr 3\n" + // Restore all int arg registers + "ld 10, 272(1)\n" "ld 9, 264(1)\n" + "ld 8, 256(1)\n" "ld 7, 248(1)\n" + "ld 6, 240(1)\n" "ld 5, 232(1)\n" + "ld 4, 224(1)\n" "ld 3, 216(1)\n" + // Restore all FP arg registers + "lfd 13, 208(1)\n" "lfd 12, 200(1)\n" + "lfd 11, 192(1)\n" "lfd 10, 184(1)\n" + "lfd 9, 176(1)\n" "lfd 8, 168(1)\n" + "lfd 7, 160(1)\n" "lfd 6, 152(1)\n" + "lfd 5, 144(1)\n" "lfd 4, 136(1)\n" + "lfd 3, 128(1)\n" "lfd 2, 120(1)\n" + "lfd 1, 112(1)\n" + // Pop 3 frames off the stack and branch to target + "ld 1, 280(1)\n" + "ld 0, 16(1)\n" + "mtlr 0\n" + // XXX: any special TOC handling in the ELF case for JIT? + "bctr\n" + ); +#endif + +extern "C" { +LLVM_LIBRARY_VISIBILITY void * +LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4, + unsigned *OrigCallAddrPlus4, + bool is64Bit) { + // Adjust the pointer to the address of the call instruction in the stub + // emitted by emitFunctionStub, rather than the instruction after it. + unsigned *StubCallAddr = StubCallAddrPlus4 - 1; + unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1; + + void *Target = JITCompilerFunction(StubCallAddr); + + // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite + // it to branch directly to the destination. If so, rewrite it so it does not + // need to go through the stub anymore. + unsigned OrigCallInst = *OrigCallAddr; + if ((OrigCallInst >> 26) == 18) { // Direct call. + intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2; + + if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range? + // Clear the original target out. + OrigCallInst &= (63 << 26) | 3; + // Fill in the new target. + OrigCallInst |= (Offset & ((1 << 24)-1)) << 2; + // Replace the call. + *OrigCallAddr = OrigCallInst; + } + } + + // Assert that we are coming from a stub that was created with our + // emitFunctionStub. + if ((*StubCallAddr >> 26) == 18) + StubCallAddr -= 3; + else { + assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!"); + StubCallAddr -= is64Bit ? 9 : 6; + } + + // Rewrite the stub with an unconditional branch to the target, for any users + // who took the address of the stub. + EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit); + sys::Memory::InvalidateInstructionCache(StubCallAddr, 7*4); + + // Put the address of the target function to call and the address to return to + // after calling the target function in a place that is easy to get on the + // stack after we restore all regs. + return Target; +} +} + + + +TargetJITInfo::LazyResolverFn +PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) { + JITCompilerFunction = Fn; + return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback; +} + +TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() { + // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3 + // instructions to save the caller's address if this is a lazy-compilation + // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address + // into a register and jump through it. + StubLayout Result = {10*4, 4}; + return Result; +} + +#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \ +defined(__APPLE__) +extern "C" void sys_icache_invalidate(const void *Addr, size_t len); +#endif + +void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE) { + // If this is just a call to an external function, emit a branch instead of a + // call. The code is the same except for one bit of the last instruction. + if (Fn != (void*)(intptr_t)PPC32CompilationCallback && + Fn != (void*)(intptr_t)PPC64CompilationCallback) { + void *Addr = (void*)JCE.getCurrentPCValue(); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit); + sys::Memory::InvalidateInstructionCache(Addr, 7*4); + return Addr; + } + + void *Addr = (void*)JCE.getCurrentPCValue(); + if (is64Bit) { + JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1) + JCE.emitWordBE(0x7d6802a6); // mflr r11 + JCE.emitWordBE(0xf9610060); // std r11, 96(r1) + } else if (Subtarget.isDarwinABI()){ + JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1) + JCE.emitWordBE(0x7d6802a6); // mflr r11 + JCE.emitWordBE(0x91610028); // stw r11, 40(r1) + } else { + JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1) + JCE.emitWordBE(0x7d6802a6); // mflr r11 + JCE.emitWordBE(0x91610024); // stw r11, 36(r1) + } + intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue(); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit); + sys::Memory::InvalidateInstructionCache(Addr, 10*4); + return Addr; +} + + +void PPCJITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4; + intptr_t ResultPtr = (intptr_t)MR->getResultPointer(); + switch ((PPC::RelocationType)MR->getRelocationType()) { + default: llvm_unreachable("Unknown relocation type!"); + case PPC::reloc_pcrel_bx: + // PC-relative relocation for b and bl instructions. + ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2; + assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) && + "Relocation out of range!"); + *RelocPos |= (ResultPtr & ((1 << 24)-1)) << 2; + break; + case PPC::reloc_pcrel_bcx: + // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other + // bcx instructions. + ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2; + assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) && + "Relocation out of range!"); + *RelocPos |= (ResultPtr & ((1 << 14)-1)) << 2; + break; + case PPC::reloc_absolute_high: // high bits of ref -> low 16 of instr + case PPC::reloc_absolute_low: { // low bits of ref -> low 16 of instr + ResultPtr += MR->getConstantVal(); + + // If this is a high-part access, get the high-part. + if (MR->getRelocationType() == PPC::reloc_absolute_high) { + // If the low part will have a carry (really a borrow) from the low + // 16-bits into the high 16, add a bit to borrow from. + if (((int)ResultPtr << 16) < 0) + ResultPtr += 1 << 16; + ResultPtr >>= 16; + } + + // Do the addition then mask, so the addition does not overflow the 16-bit + // immediate section of the instruction. + unsigned LowBits = (*RelocPos + ResultPtr) & 65535; + unsigned HighBits = *RelocPos & ~65535; + *RelocPos = LowBits | HighBits; // Slam into low 16-bits + break; + } + case PPC::reloc_absolute_low_ix: { // low bits of ref -> low 14 of instr + ResultPtr += MR->getConstantVal(); + // Do the addition then mask, so the addition does not overflow the 16-bit + // immediate section of the instruction. + unsigned LowBits = (*RelocPos + ResultPtr) & 0xFFFC; + unsigned HighBits = *RelocPos & 0xFFFF0003; + *RelocPos = LowBits | HighBits; // Slam into low 14-bits. + break; + } + } + } +} + +void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit); + sys::Memory::InvalidateInstructionCache(Old, 7*4); +} diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h new file mode 100644 index 00000000000..b6b37ffb852 --- /dev/null +++ b/lib/Target/PowerPC/PPCJITInfo.h @@ -0,0 +1,46 @@ +//===-- PPCJITInfo.h - PowerPC impl. of the JIT interface -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PowerPC implementation of the TargetJITInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPC_JITINFO_H +#define POWERPC_JITINFO_H + +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/Target/TargetJITInfo.h" + +namespace llvm { +class PPCSubtarget; +class PPCJITInfo : public TargetJITInfo { +protected: + PPCSubtarget &Subtarget; + bool is64Bit; + +public: + PPCJITInfo(PPCSubtarget &STI); + + StubLayout getStubLayout() override; + void *emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE) override; + LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; + void relocate(void *Function, MachineRelocation *MR, unsigned NumRelocs, + unsigned char *GOTBase) override; + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + void replaceMachineCodeForFunction(void *Old, void *New) override; +}; +} + +#endif diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index fd7e0c761a2..85b77354de3 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -80,9 +80,21 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT), OptLevel(OptLevel), TargetABI(PPC_ABI_UNKNOWN), FrameLowering(initializeSubtargetDependencies(CPU, FS)), - DL(getDataLayoutString(*this)), InstrInfo(*this), + DL(getDataLayoutString(*this)), InstrInfo(*this), JITInfo(*this), TLInfo(TM), TSInfo(&DL) {} +/// SetJITMode - This is called to inform the subtarget info that we are +/// producing code for the JIT. +void PPCSubtarget::SetJITMode() { + // JIT mode doesn't want lazy resolver stubs, it knows exactly where + // everything is. This matters for PPC64, which codegens in PIC mode without + // stubs. + HasLazyResolverStubs = false; + + // Calls to external functions need to use indirect calls + IsJITCodeModel = true; +} + void PPCSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { AttributeSet FnAttrs = MF->getFunction()->getAttributes(); Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, @@ -131,6 +143,7 @@ void PPCSubtarget::initializeEnvironment() { DeprecatedMFTB = false; DeprecatedDST = false; HasLazyResolverStubs = false; + IsJITCodeModel = false; } void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index f74a2a76d41..374962de427 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -17,6 +17,7 @@ #include "PPCFrameLowering.h" #include "PPCInstrInfo.h" #include "PPCISelLowering.h" +#include "PPCJITInfo.h" #include "PPCSelectionDAGInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" @@ -103,6 +104,7 @@ protected: bool DeprecatedMFTB; bool DeprecatedDST; bool HasLazyResolverStubs; + bool IsJITCodeModel; bool IsLittleEndian; /// TargetTriple - What processor and OS we're targeting. @@ -120,6 +122,7 @@ protected: PPCFrameLowering FrameLowering; const DataLayout DL; PPCInstrInfo InstrInfo; + PPCJITInfo JITInfo; PPCTargetLowering TLInfo; PPCSelectionDAGInfo TSInfo; @@ -135,6 +138,10 @@ public: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + /// SetJITMode - This is called to inform the subtarget info that we are + /// producing code for the JIT. + void SetJITMode(); + /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. @@ -155,6 +162,7 @@ public: } const DataLayout *getDataLayout() const override { return &DL; } const PPCInstrInfo *getInstrInfo() const override { return &InstrInfo; } + PPCJITInfo *getJITInfo() override { return &JITInfo; } const PPCTargetLowering *getTargetLowering() const override { return &TLInfo; } @@ -199,6 +207,9 @@ public: bool hasLazyResolverStub(const GlobalValue *GV, const TargetMachine &TM) const; + // isJITCodeModel - True if we're generating code for the JIT + bool isJITCodeModel() const { return IsJITCodeModel; } + // isLittleEndian - True if generating little-endian code bool isLittleEndian() const { return IsLittleEndian; } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index e7f961c4324..9563b9045c3 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -148,6 +148,18 @@ bool PPCPassConfig::addPreEmitPass() { return false; } +bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, + JITCodeEmitter &JCE) { + // Inform the subtarget that we are in JIT mode. FIXME: does this break macho + // writing? + Subtarget.SetJITMode(); + + // Machine code emitter pass for PowerPC. + PM.add(createPPCJITCodeEmitterPass(*this, JCE)); + + return false; +} + void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) { // Add first the target-independent BasicTTI pass, then our PPC pass. This // allows the PPC pass to delegate to the target independent layer when diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index b8078bdb5b6..9bda22a354d 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -36,6 +36,8 @@ public: // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + bool addCodeEmitter(PassManagerBase &PM, + JITCodeEmitter &JCE) override; /// \brief Register PPC analysis passes with a pass manager. void addAnalysisPasses(PassManagerBase &PM) override; diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index c5f4680d49c..49a7f8aa18c 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -6,7 +6,7 @@ tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel) tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv) tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic) -tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer) tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer) add_public_tablegen_target(AMDGPUCommonTableGen) diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index c486411f9a1..cebda920e74 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -2,8 +2,9 @@ set(LLVM_TARGET_DEFINITIONS Sparc.td) tablegen(LLVM SparcGenRegisterInfo.inc -gen-register-info) tablegen(LLVM SparcGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM SparcGenCodeEmitter.inc -gen-emitter) tablegen(LLVM SparcGenDisassemblerTables.inc -gen-disassembler) -tablegen(LLVM SparcGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM SparcGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM SparcGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM SparcGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM SparcGenDAGISel.inc -gen-dag-isel) @@ -23,6 +24,8 @@ add_llvm_target(SparcCodeGen SparcSubtarget.cpp SparcTargetMachine.cpp SparcSelectionDAGInfo.cpp + SparcJITInfo.cpp + SparcCodeEmitter.cpp SparcMCInstLower.cpp SparcTargetObjectFile.cpp ) diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile index c2a95b47151..bcc02918cdb 100644 --- a/lib/Target/Sparc/Makefile +++ b/lib/Target/Sparc/Makefile @@ -16,7 +16,7 @@ BUILT_SOURCES = SparcGenRegisterInfo.inc SparcGenInstrInfo.inc \ SparcGenAsmWriter.inc SparcGenAsmMatcher.inc \ SparcGenDAGISel.inc SparcGenDisassemblerTables.inc \ SparcGenSubtargetInfo.inc SparcGenCallingConv.inc \ - SparcGenMCCodeEmitter.inc + SparcGenCodeEmitter.inc SparcGenMCCodeEmitter.inc DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h index 75572f2ea75..de20aaa5db5 100644 --- a/lib/Target/Sparc/Sparc.h +++ b/lib/Target/Sparc/Sparc.h @@ -29,6 +29,8 @@ namespace llvm { FunctionPass *createSparcISelDag(SparcTargetMachine &TM); FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM); + FunctionPass *createSparcJITCodeEmitterPass(SparcTargetMachine &TM, + JITCodeEmitter &JCE); void LowerSparcMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, diff --git a/lib/Target/Sparc/SparcCodeEmitter.cpp b/lib/Target/Sparc/SparcCodeEmitter.cpp new file mode 100644 index 00000000000..98239bfc400 --- /dev/null +++ b/lib/Target/Sparc/SparcCodeEmitter.cpp @@ -0,0 +1,281 @@ +//===-- Sparc/SparcCodeEmitter.cpp - Convert Sparc Code to Machine Code ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This file contains the pass that transforms the Sparc machine instructions +// into relocatable machine code. +// +//===---------------------------------------------------------------------===// + +#include "Sparc.h" +#include "MCTargetDesc/SparcMCExpr.h" +#include "SparcRelocations.h" +#include "SparcTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "jit" + +STATISTIC(NumEmitted, "Number of machine instructions emitted"); + +namespace { + +class SparcCodeEmitter : public MachineFunctionPass { + SparcJITInfo *JTI; + const SparcInstrInfo *II; + const DataLayout *TD; + const SparcSubtarget *Subtarget; + TargetMachine &TM; + JITCodeEmitter &MCE; + const std::vector *MCPEs; + bool IsPIC; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired (); + MachineFunctionPass::getAnalysisUsage(AU); + } + + static char ID; + +public: + SparcCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) + : MachineFunctionPass(ID), JTI(nullptr), II(nullptr), TD(nullptr), + TM(tm), MCE(mce), MCPEs(nullptr), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "Sparc Machine Code Emitter"; + } + + /// getBinaryCodeForInstr - This function, generated by the + /// CodeEmitterGenerator using TableGen, produces the binary encoding for + /// machine instructions. + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; + + void emitInstruction(MachineBasicBlock::instr_iterator MI, + MachineBasicBlock &MBB); + +private: + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const; + + unsigned getCallTargetOpValue(const MachineInstr &MI, + unsigned) const; + unsigned getBranchTargetOpValue(const MachineInstr &MI, + unsigned) const; + unsigned getBranchPredTargetOpValue(const MachineInstr &MI, + unsigned) const; + unsigned getBranchOnRegTargetOpValue(const MachineInstr &MI, + unsigned) const; + + void emitWord(unsigned Word); + + unsigned getRelocation(const MachineInstr &MI, + const MachineOperand &MO) const; + + void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc) const; + void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const; + void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const; + void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const; +}; +} // end anonymous namespace. + +char SparcCodeEmitter::ID = 0; + +bool SparcCodeEmitter::runOnMachineFunction(MachineFunction &MF) { + SparcTargetMachine &Target = static_cast( + const_cast(MF.getTarget())); + + JTI = Target.getSubtargetImpl()->getJITInfo(); + II = Target.getSubtargetImpl()->getInstrInfo(); + TD = Target.getSubtargetImpl()->getDataLayout(); + Subtarget = &TM.getSubtarget(); + MCPEs = &MF.getConstantPool()->getConstants(); + JTI->Initialize(MF, IsPIC); + MCE.setModuleInfo(&getAnalysis ()); + + do { + DEBUG(errs() << "JITTing function '" + << MF.getName() << "'\n"); + MCE.startFunction(MF); + + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + MBB != E; ++MBB){ + MCE.StartMachineBasicBlock(MBB); + for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), + E = MBB->instr_end(); I != E;) + emitInstruction(*I++, *MBB); + } + } while (MCE.finishFunction(MF)); + + return false; +} + +void SparcCodeEmitter::emitInstruction(MachineBasicBlock::instr_iterator MI, + MachineBasicBlock &MBB) { + DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << *MI); + + MCE.processDebugLoc(MI->getDebugLoc(), true); + + ++NumEmitted; + + switch (MI->getOpcode()) { + default: { + emitWord(getBinaryCodeForInstr(*MI)); + break; + } + case TargetOpcode::INLINEASM: { + // We allow inline assembler nodes with empty bodies - they can + // implicitly define registers, which is ok for JIT. + if (MI->getOperand(0).getSymbolName()[0]) { + report_fatal_error("JIT does not support inline asm!"); + } + break; + } + case TargetOpcode::CFI_INSTRUCTION: + break; + case TargetOpcode::EH_LABEL: { + MCE.emitLabel(MI->getOperand(0).getMCSymbol()); + break; + } + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: { + // Do nothing. + break; + } + case SP::GETPCX: { + report_fatal_error("JIT does not support pseudo instruction GETPCX yet!"); + break; + } + } + + MCE.processDebugLoc(MI->getDebugLoc(), false); +} + +void SparcCodeEmitter::emitWord(unsigned Word) { + DEBUG(errs() << " 0x"; + errs().write_hex(Word) << "\n"); + MCE.emitWordBE(Word); +} + +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +unsigned SparcCodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const { + if (MO.isReg()) + return TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue( + MO.getReg()); + else if (MO.isImm()) + return static_cast(MO.getImm()); + else if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO)); + else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO)); + else if (MO.isCPI()) + emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO)); + else if (MO.isMBB()) + emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO)); + else + llvm_unreachable("Unable to encode MachineOperand!"); + return 0; +} +unsigned SparcCodeEmitter::getCallTargetOpValue(const MachineInstr &MI, + unsigned opIdx) const { + const MachineOperand MO = MI.getOperand(opIdx); + return getMachineOpValue(MI, MO); +} + +unsigned SparcCodeEmitter::getBranchTargetOpValue(const MachineInstr &MI, + unsigned opIdx) const { + const MachineOperand MO = MI.getOperand(opIdx); + return getMachineOpValue(MI, MO); +} + +unsigned SparcCodeEmitter::getBranchPredTargetOpValue(const MachineInstr &MI, + unsigned opIdx) const { + const MachineOperand MO = MI.getOperand(opIdx); + return getMachineOpValue(MI, MO); +} + +unsigned SparcCodeEmitter::getBranchOnRegTargetOpValue(const MachineInstr &MI, + unsigned opIdx) const { + const MachineOperand MO = MI.getOperand(opIdx); + return getMachineOpValue(MI, MO); +} + +unsigned SparcCodeEmitter::getRelocation(const MachineInstr &MI, + const MachineOperand &MO) const { + + unsigned TF = MO.getTargetFlags(); + switch (TF) { + default: + case SparcMCExpr::VK_Sparc_None: break; + case SparcMCExpr::VK_Sparc_LO: return SP::reloc_sparc_lo; + case SparcMCExpr::VK_Sparc_HI: return SP::reloc_sparc_hi; + case SparcMCExpr::VK_Sparc_H44: return SP::reloc_sparc_h44; + case SparcMCExpr::VK_Sparc_M44: return SP::reloc_sparc_m44; + case SparcMCExpr::VK_Sparc_L44: return SP::reloc_sparc_l44; + case SparcMCExpr::VK_Sparc_HH: return SP::reloc_sparc_hh; + case SparcMCExpr::VK_Sparc_HM: return SP::reloc_sparc_hm; + } + + unsigned Opc = MI.getOpcode(); + switch (Opc) { + default: break; + case SP::CALL: return SP::reloc_sparc_pc30; + case SP::BA: + case SP::BCOND: + case SP::FBCOND: return SP::reloc_sparc_pc22; + case SP::BPXCC: return SP::reloc_sparc_pc19; + } + llvm_unreachable("unknown reloc!"); +} + +void SparcCodeEmitter::emitGlobalAddress(const GlobalValue *GV, + unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, + const_cast(GV), 0, + true)); +} + +void SparcCodeEmitter:: +emitExternalSymbolAddress(const char *ES, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + Reloc, ES, 0, 0)); +} + +void SparcCodeEmitter:: +emitConstPoolAddress(unsigned CPI, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + Reloc, CPI, 0, false)); +} + +void SparcCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB, + unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), + Reloc, BB)); +} + + +/// createSparcJITCodeEmitterPass - Return a pass that emits the collected Sparc +/// code to the specified MCE object. +FunctionPass *llvm::createSparcJITCodeEmitterPass(SparcTargetMachine &TM, + JITCodeEmitter &JCE) { + return new SparcCodeEmitter(TM, JCE); +} + +#include "SparcGenCodeEmitter.inc" diff --git a/lib/Target/Sparc/SparcJITInfo.cpp b/lib/Target/Sparc/SparcJITInfo.cpp new file mode 100644 index 00000000000..d0eec98b5e9 --- /dev/null +++ b/lib/Target/Sparc/SparcJITInfo.cpp @@ -0,0 +1,326 @@ +//===-- SparcJITInfo.cpp - Implement the Sparc JIT Interface --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the Sparc target. +// +//===----------------------------------------------------------------------===// +#include "SparcJITInfo.h" +#include "Sparc.h" +#include "SparcRelocations.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/Support/Memory.h" + +using namespace llvm; + +#define DEBUG_TYPE "jit" + +/// JITCompilerFunction - This contains the address of the JIT function used to +/// compile a function lazily. +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +extern "C" void SparcCompilationCallback(); + +extern "C" { +#if defined (__sparc__) + +#if defined(__arch64__) +#define FRAME_PTR(X) #X "+2047" +#else +#define FRAME_PTR(X) #X +#endif + + asm( + ".text\n" + "\t.align 4\n" + "\t.global SparcCompilationCallback\n" + "\t.type SparcCompilationCallback, #function\n" + "SparcCompilationCallback:\n" + // Save current register window and create stack. + // 128 (save area) + 6*8 (for arguments) + 16*8 (for float regfile) = 304 + "\tsave %sp, -304, %sp\n" + // save float regfile to the stack. + "\tstd %f0, [" FRAME_PTR(%fp) "-0]\n" + "\tstd %f2, [" FRAME_PTR(%fp) "-8]\n" + "\tstd %f4, [" FRAME_PTR(%fp) "-16]\n" + "\tstd %f6, [" FRAME_PTR(%fp) "-24]\n" + "\tstd %f8, [" FRAME_PTR(%fp) "-32]\n" + "\tstd %f10, [" FRAME_PTR(%fp) "-40]\n" + "\tstd %f12, [" FRAME_PTR(%fp) "-48]\n" + "\tstd %f14, [" FRAME_PTR(%fp) "-56]\n" + "\tstd %f16, [" FRAME_PTR(%fp) "-64]\n" + "\tstd %f18, [" FRAME_PTR(%fp) "-72]\n" + "\tstd %f20, [" FRAME_PTR(%fp) "-80]\n" + "\tstd %f22, [" FRAME_PTR(%fp) "-88]\n" + "\tstd %f24, [" FRAME_PTR(%fp) "-96]\n" + "\tstd %f26, [" FRAME_PTR(%fp) "-104]\n" + "\tstd %f28, [" FRAME_PTR(%fp) "-112]\n" + "\tstd %f30, [" FRAME_PTR(%fp) "-120]\n" + // stubaddr is in %g1. + "\tcall SparcCompilationCallbackC\n" + "\t mov %g1, %o0\n" + // restore float regfile from the stack. + "\tldd [" FRAME_PTR(%fp) "-0], %f0\n" + "\tldd [" FRAME_PTR(%fp) "-8], %f2\n" + "\tldd [" FRAME_PTR(%fp) "-16], %f4\n" + "\tldd [" FRAME_PTR(%fp) "-24], %f6\n" + "\tldd [" FRAME_PTR(%fp) "-32], %f8\n" + "\tldd [" FRAME_PTR(%fp) "-40], %f10\n" + "\tldd [" FRAME_PTR(%fp) "-48], %f12\n" + "\tldd [" FRAME_PTR(%fp) "-56], %f14\n" + "\tldd [" FRAME_PTR(%fp) "-64], %f16\n" + "\tldd [" FRAME_PTR(%fp) "-72], %f18\n" + "\tldd [" FRAME_PTR(%fp) "-80], %f20\n" + "\tldd [" FRAME_PTR(%fp) "-88], %f22\n" + "\tldd [" FRAME_PTR(%fp) "-96], %f24\n" + "\tldd [" FRAME_PTR(%fp) "-104], %f26\n" + "\tldd [" FRAME_PTR(%fp) "-112], %f28\n" + "\tldd [" FRAME_PTR(%fp) "-120], %f30\n" + // restore original register window and + // copy %o0 to %g1 + "\trestore %o0, 0, %g1\n" + // call the new stub + "\tjmp %g1\n" + "\t nop\n" + "\t.size SparcCompilationCallback, .-SparcCompilationCallback" + ); +#else + void SparcCompilationCallback() { + llvm_unreachable( + "Cannot call SparcCompilationCallback() on a non-sparc arch!"); + } +#endif +} + + +#define SETHI_INST(imm, rd) (0x01000000 | ((rd) << 25) | ((imm) & 0x3FFFFF)) +#define JMP_INST(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x38 << 19) \ + | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF)) +#define NOP_INST SETHI_INST(0, 0) +#define OR_INST_I(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x02 << 19) \ + | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF)) +#define OR_INST_R(rs1, rs2, rd) (0x80000000 | ((rd) << 25) | (0x02 << 19) \ + | ((rs1) << 14) | (0 << 13) | ((rs2) & 0x1F)) +#define RDPC_INST(rd) (0x80000000 | ((rd) << 25) | (0x28 << 19) \ + | (5 << 14)) +#define LDX_INST(rs1, imm, rd) (0xC0000000 | ((rd) << 25) | (0x0B << 19) \ + | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF)) +#define SLLX_INST(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x25 << 19) \ + | ((rs1) << 14) | (3 << 12) | ((imm) & 0x3F)) +#define SUB_INST(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x04 << 19) \ + | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF)) +#define XOR_INST(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x03 << 19) \ + | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF)) +#define BA_INST(tgt) (0x10800000 | ((tgt) & 0x3FFFFF)) + +// Emit instructions to jump to Addr and store the starting address of +// the instructions emitted in the scratch register. +static void emitInstrForIndirectJump(intptr_t Addr, + unsigned scratch, + SmallVectorImpl &Insts) { + + if (isInt<13>(Addr)) { + // Emit: jmpl %g0+Addr, + // nop + Insts.push_back(JMP_INST(0, LO10(Addr), scratch)); + Insts.push_back(NOP_INST); + return; + } + + if (isUInt<32>(Addr)) { + // Emit: sethi %hi(Addr), scratch + // jmpl scratch+%lo(Addr), scratch + // sub scratch, 4, scratch + Insts.push_back(SETHI_INST(HI22(Addr), scratch)); + Insts.push_back(JMP_INST(scratch, LO10(Addr), scratch)); + Insts.push_back(SUB_INST(scratch, 4, scratch)); + return; + } + + if (Addr < 0 && isInt<33>(Addr)) { + // Emit: sethi %hix(Addr), scratch) + // xor scratch, %lox(Addr), scratch + // jmpl scratch+0, scratch + // sub scratch, 8, scratch + Insts.push_back(SETHI_INST(HIX22(Addr), scratch)); + Insts.push_back(XOR_INST(scratch, LOX10(Addr), scratch)); + Insts.push_back(JMP_INST(scratch, 0, scratch)); + Insts.push_back(SUB_INST(scratch, 8, scratch)); + return; + } + + // Emit: rd %pc, scratch + // ldx [scratch+16], scratch + // jmpl scratch+0, scratch + // sub scratch, 8, scratch + // + Insts.push_back(RDPC_INST(scratch)); + Insts.push_back(LDX_INST(scratch, 16, scratch)); + Insts.push_back(JMP_INST(scratch, 0, scratch)); + Insts.push_back(SUB_INST(scratch, 8, scratch)); + Insts.push_back((uint32_t)(((int64_t)Addr) >> 32) & 0xffffffff); + Insts.push_back((uint32_t)(Addr & 0xffffffff)); + + // Instruction sequence without rdpc instruction + // 7 instruction and 2 scratch register + // Emit: sethi %hh(Addr), scratch + // or scratch, %hm(Addr), scratch + // sllx scratch, 32, scratch + // sethi %hi(Addr), scratch2 + // or scratch, scratch2, scratch + // jmpl scratch+%lo(Addr), scratch + // sub scratch, 20, scratch + // Insts.push_back(SETHI_INST(HH22(Addr), scratch)); + // Insts.push_back(OR_INST_I(scratch, HM10(Addr), scratch)); + // Insts.push_back(SLLX_INST(scratch, 32, scratch)); + // Insts.push_back(SETHI_INST(HI22(Addr), scratch2)); + // Insts.push_back(OR_INST_R(scratch, scratch2, scratch)); + // Insts.push_back(JMP_INST(scratch, LO10(Addr), scratch)); + // Insts.push_back(SUB_INST(scratch, 20, scratch)); +} + +extern "C" void *SparcCompilationCallbackC(intptr_t StubAddr) { + // Get the address of the compiled code for this function. + intptr_t NewVal = (intptr_t) JITCompilerFunction((void*) StubAddr); + + // Rewrite the function stub so that we don't end up here every time we + // execute the call. We're replacing the stub instructions with code + // that jumps to the compiled function: + + SmallVector Insts; + intptr_t diff = (NewVal - StubAddr) >> 2; + if (isInt<22>(diff)) { + // Use branch instruction to jump + Insts.push_back(BA_INST(diff)); + Insts.push_back(NOP_INST); + } else { + // Otherwise, use indirect jump to the compiled function + emitInstrForIndirectJump(NewVal, 1, Insts); + } + + for (unsigned i = 0, e = Insts.size(); i != e; ++i) + *(uint32_t *)(StubAddr + i*4) = Insts[i]; + + sys::Memory::InvalidateInstructionCache((void*) StubAddr, Insts.size() * 4); + return (void*)StubAddr; +} + + +void SparcJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + llvm_unreachable("FIXME: Implement SparcJITInfo::" + "replaceMachineCodeForFunction"); +} + + +TargetJITInfo::StubLayout SparcJITInfo::getStubLayout() { + // The stub contains maximum of 4 4-byte instructions and 8 bytes for address, + // aligned at 32 bytes. + // See emitFunctionStub and emitInstrForIndirectJump for details. + StubLayout Result = { 4*4 + 8, 32 }; + return Result; +} + +void *SparcJITInfo::emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE) +{ + JCE.emitAlignment(32); + void *Addr = (void*) (JCE.getCurrentPCValue()); + + intptr_t CurrentAddr = (intptr_t)Addr; + intptr_t EmittedAddr; + SmallVector Insts; + if (Fn != (void*)(intptr_t)SparcCompilationCallback) { + EmittedAddr = (intptr_t)Fn; + intptr_t diff = (EmittedAddr - CurrentAddr) >> 2; + if (isInt<22>(diff)) { + Insts.push_back(BA_INST(diff)); + Insts.push_back(NOP_INST); + } + } else { + EmittedAddr = (intptr_t)SparcCompilationCallback; + } + + if (Insts.size() == 0) + emitInstrForIndirectJump(EmittedAddr, 1, Insts); + + + if (!sys::Memory::setRangeWritable(Addr, 4 * Insts.size())) + llvm_unreachable("ERROR: Unable to mark stub writable."); + + for (unsigned i = 0, e = Insts.size(); i != e; ++i) + JCE.emitWordBE(Insts[i]); + + sys::Memory::InvalidateInstructionCache(Addr, 4 * Insts.size()); + if (!sys::Memory::setRangeExecutable(Addr, 4 * Insts.size())) + llvm_unreachable("ERROR: Unable to mark stub executable."); + + return Addr; +} + + +TargetJITInfo::LazyResolverFn +SparcJITInfo::getLazyResolverFunction(JITCompilerFn F) { + JITCompilerFunction = F; + return SparcCompilationCallback; +} + +/// relocate - Before the JIT can run a block of code that has been emitted, +/// it must rewrite the code to contain the actual addresses of any +/// referenced global symbols. +void SparcJITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char *GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + void *RelocPos = (char*) Function + MR->getMachineCodeOffset(); + intptr_t ResultPtr = (intptr_t) MR->getResultPointer(); + + switch ((SP::RelocationType) MR->getRelocationType()) { + case SP::reloc_sparc_hi: + ResultPtr = (ResultPtr >> 10) & 0x3fffff; + break; + + case SP::reloc_sparc_lo: + ResultPtr = (ResultPtr & 0x3ff); + break; + + case SP::reloc_sparc_pc30: + ResultPtr = ((ResultPtr - (intptr_t)RelocPos) >> 2) & 0x3fffffff; + break; + + case SP::reloc_sparc_pc22: + ResultPtr = ((ResultPtr - (intptr_t)RelocPos) >> 2) & 0x3fffff; + break; + + case SP::reloc_sparc_pc19: + ResultPtr = ((ResultPtr - (intptr_t)RelocPos) >> 2) & 0x7ffff; + break; + + case SP::reloc_sparc_h44: + ResultPtr = (ResultPtr >> 22) & 0x3fffff; + break; + + case SP::reloc_sparc_m44: + ResultPtr = (ResultPtr >> 12) & 0x3ff; + break; + + case SP::reloc_sparc_l44: + ResultPtr = (ResultPtr & 0xfff); + break; + + case SP::reloc_sparc_hh: + ResultPtr = (((int64_t)ResultPtr) >> 42) & 0x3fffff; + break; + + case SP::reloc_sparc_hm: + ResultPtr = (((int64_t)ResultPtr) >> 32) & 0x3ff; + break; + + } + *((unsigned*) RelocPos) |= (unsigned) ResultPtr; + } +} diff --git a/lib/Target/Sparc/SparcJITInfo.h b/lib/Target/Sparc/SparcJITInfo.h new file mode 100644 index 00000000000..ff1b43a7f6a --- /dev/null +++ b/lib/Target/Sparc/SparcJITInfo.h @@ -0,0 +1,67 @@ +//==- SparcJITInfo.h - Sparc Implementation of the JIT Interface -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the SparcJITInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef SPARCJITINFO_H +#define SPARCJITINFO_H + +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetJITInfo.h" + +namespace llvm { +class SparcTargetMachine; + +class SparcJITInfo : public TargetJITInfo { + + bool IsPIC; + + public: + explicit SparcJITInfo() + : IsPIC(false) {} + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + void replaceMachineCodeForFunction(void *Old, void *New) override; + + // getStubLayout - Returns the size and alignment of the largest call stub + // on Sparc. + StubLayout getStubLayout() override; + + + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a + /// small native function that simply calls the function at the specified + /// address. + void *emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE) override; + + /// getLazyResolverFunction - Expose the lazy resolver to the JIT. + LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; + + /// relocate - Before the JIT can run a block of code that has been emitted, + /// it must rewrite the code to contain the actual addresses of any + /// referenced global symbols. + void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char *GOTBase) override; + + /// Initialize - Initialize internal stage for the function being JITted. + void Initialize(const MachineFunction &MF, bool isPIC) { + IsPIC = isPIC; + } + +}; +} + +#endif diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h index 6cb5e20e3b9..0f4a1626f04 100644 --- a/lib/Target/Sparc/SparcSubtarget.h +++ b/lib/Target/Sparc/SparcSubtarget.h @@ -17,6 +17,7 @@ #include "SparcFrameLowering.h" #include "SparcInstrInfo.h" #include "SparcISelLowering.h" +#include "SparcJITInfo.h" #include "SparcSelectionDAGInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" @@ -42,6 +43,7 @@ class SparcSubtarget : public SparcGenSubtargetInfo { SparcTargetLowering TLInfo; SparcSelectionDAGInfo TSInfo; SparcFrameLowering FrameLowering; + SparcJITInfo JITInfo; public: SparcSubtarget(const std::string &TT, const std::string &CPU, @@ -60,6 +62,7 @@ public: const SparcSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } + SparcJITInfo *getJITInfo() override { return &JITInfo; } const DataLayout *getDataLayout() const override { return &DL; } bool isV9() const { return IsV9; } diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 80c71448963..0130face3ff 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -61,6 +61,13 @@ bool SparcPassConfig::addInstSelector() { return false; } +bool SparcTargetMachine::addCodeEmitter(PassManagerBase &PM, + JITCodeEmitter &JCE) { + // Machine code emitter pass for Sparc. + PM.add(createSparcJITCodeEmitterPass(*this, JCE)); + return false; +} + /// addPreEmitPass - This pass may be implemented by targets that want to run /// passes immediately before machine code is emitted. This should return /// true if -print-machineinstrs should print out the code after the passes. diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index a8f5b85480f..62f088b1481 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -30,8 +30,13 @@ public: const SparcSubtarget *getSubtargetImpl() const override { return &Subtarget; } + SparcSubtarget *getSubtargetImpl() { + return static_cast(TargetMachine::getSubtargetImpl()); + } + // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override; }; /// SparcV8TargetMachine - Sparc 32-bit target machine diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt index 41a614d9d15..4da2d0f2dd5 100644 --- a/lib/Target/SystemZ/CMakeLists.txt +++ b/lib/Target/SystemZ/CMakeLists.txt @@ -5,7 +5,7 @@ tablegen(LLVM SystemZGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM SystemZGenCallingConv.inc -gen-callingconv) tablegen(LLVM SystemZGenDAGISel.inc -gen-dag-isel) tablegen(LLVM SystemZGenDisassemblerTables.inc -gen-disassembler) -tablegen(LLVM SystemZGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM SystemZGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM SystemZGenInstrInfo.inc -gen-instr-info) tablegen(LLVM SystemZGenRegisterInfo.inc -gen-register-info) tablegen(LLVM SystemZGenSubtargetInfo.inc -gen-subtarget) diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile index 732c3172553..445725bd1e1 100644 --- a/lib/Target/SystemZ/Makefile +++ b/lib/Target/SystemZ/Makefile @@ -15,6 +15,7 @@ TARGET = SystemZ BUILT_SOURCES = SystemZGenRegisterInfo.inc \ SystemZGenAsmWriter.inc \ SystemZGenAsmMatcher.inc \ + SystemZGenCodeEmitter.inc \ SystemZGenDisassemblerTables.inc \ SystemZGenInstrInfo.inc \ SystemZGenDAGISel.inc \ diff --git a/lib/Target/TargetJITInfo.cpp b/lib/Target/TargetJITInfo.cpp new file mode 100644 index 00000000000..aafedf8749b --- /dev/null +++ b/lib/Target/TargetJITInfo.cpp @@ -0,0 +1,14 @@ +//===- Target/TargetJITInfo.h - Target Information for JIT ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetJITInfo.h" + +using namespace llvm; + +void TargetJITInfo::anchor() { } diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index b6fff7460e0..a09767e1eaf 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -15,12 +15,14 @@ add_public_tablegen_target(X86CommonTableGen) set(sources X86AsmPrinter.cpp X86AtomicExpandPass.cpp + X86CodeEmitter.cpp X86FastISel.cpp X86FloatingPoint.cpp X86FrameLowering.cpp X86ISelDAGToDAG.cpp X86ISelLowering.cpp X86InstrInfo.cpp + X86JITInfo.cpp X86MCInstLower.cpp X86MachineFunctionInfo.cpp X86PadShortFunction.cpp diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 20258197252..d5522ed95eb 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -21,6 +21,7 @@ namespace llvm { class FunctionPass; class ImmutablePass; +class JITCodeEmitter; class X86TargetMachine; /// createX86AtomicExpandPass - This pass expands atomic operations that cannot @@ -53,6 +54,11 @@ FunctionPass *createX86FloatingPointStackifierPass(); /// AVX and SSE. FunctionPass *createX86IssueVZeroUpperPass(); +/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code +/// to the specified MCE object. +FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, + JITCodeEmitter &JCE); + /// createX86EmitCodeToMemory - Returns a pass that converts a register /// allocated function into raw machine code in a dynamically /// allocated chunk of memory. diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp new file mode 100644 index 00000000000..9c68a9ce9ca --- /dev/null +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -0,0 +1,1502 @@ +//===-- X86CodeEmitter.cpp - Convert X86 code to machine code -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the pass that transforms the X86 machine instructions into +// relocatable machine code. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86JITInfo.h" +#include "X86Relocations.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/PassManager.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +#define DEBUG_TYPE "x86-emitter" + +STATISTIC(NumEmitted, "Number of machine instructions emitted"); + +namespace { + template + class Emitter : public MachineFunctionPass { + const X86InstrInfo *II; + const DataLayout *TD; + X86TargetMachine &TM; + CodeEmitter &MCE; + MachineModuleInfo *MMI; + intptr_t PICBaseOffset; + bool Is64BitMode; + bool IsPIC; + public: + static char ID; + explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce) + : MachineFunctionPass(ID), II(nullptr), TD(nullptr), TM(tm), + MCE(mce), PICBaseOffset(0), Is64BitMode(false), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "X86 Machine Code Emitter"; + } + + void emitOpcodePrefix(uint64_t TSFlags, int MemOperand, + const MachineInstr &MI, + const MCInstrDesc *Desc) const; + + void emitVEXOpcodePrefix(uint64_t TSFlags, int MemOperand, + const MachineInstr &MI, + const MCInstrDesc *Desc) const; + + void emitSegmentOverridePrefix(uint64_t TSFlags, + int MemOperand, + const MachineInstr &MI) const; + + void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc); + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + void emitPCRelativeBlockAddress(MachineBasicBlock *MBB); + void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, + intptr_t Disp = 0, intptr_t PCAdj = 0, + bool Indirect = false); + void emitExternalSymbolAddress(const char *ES, unsigned Reloc); + void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0, + intptr_t PCAdj = 0); + void emitJumpTableAddress(unsigned JTI, unsigned Reloc, + intptr_t PCAdj = 0); + + void emitDisplacementField(const MachineOperand *RelocOp, int DispVal, + intptr_t Adj = 0, bool IsPCRel = true); + + void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField); + void emitRegModRMByte(unsigned RegOpcodeField); + void emitSIBByte(unsigned SS, unsigned Index, unsigned Base); + void emitConstant(uint64_t Val, unsigned Size); + + void emitMemModRMByte(const MachineInstr &MI, + unsigned Op, unsigned RegOpcodeField, + intptr_t PCAdj = 0); + + unsigned getX86RegNum(unsigned RegNo) const { + const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); + return TRI->getEncodingValue(RegNo) & 0x7; + } + + unsigned char getVEXRegisterEncoding(const MachineInstr &MI, + unsigned OpNum) const; + }; + +template + char Emitter::ID = 0; +} // end anonymous namespace. + +/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code +/// to the specified JITCodeEmitter object. +FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM, + JITCodeEmitter &JCE) { + return new Emitter(TM, JCE); +} + +template +bool Emitter::runOnMachineFunction(MachineFunction &MF) { + MMI = &getAnalysis(); + MCE.setModuleInfo(MMI); + + II = TM.getSubtargetImpl()->getInstrInfo(); + TD = TM.getSubtargetImpl()->getDataLayout(); + Is64BitMode = TM.getSubtarget().is64Bit(); + IsPIC = TM.getRelocationModel() == Reloc::PIC_; + + do { + DEBUG(dbgs() << "JITTing function '" << MF.getName() << "'\n"); + MCE.startFunction(MF); + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + MBB != E; ++MBB) { + MCE.StartMachineBasicBlock(MBB); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const MCInstrDesc &Desc = I->getDesc(); + emitInstruction(*I, &Desc); + // MOVPC32r is basically a call plus a pop instruction. + if (Desc.getOpcode() == X86::MOVPC32r) + emitInstruction(*I, &II->get(X86::POP32r)); + ++NumEmitted; // Keep track of the # of mi's emitted + } + } + } while (MCE.finishFunction(MF)); + + return false; +} + +/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64 +/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand +/// size, and 3) use of X86-64 extended registers. +static unsigned determineREX(const MachineInstr &MI) { + unsigned REX = 0; + const MCInstrDesc &Desc = MI.getDesc(); + + // Pseudo instructions do not need REX prefix byte. + if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) + return 0; + if (Desc.TSFlags & X86II::REX_W) + REX |= 1 << 3; + + unsigned NumOps = Desc.getNumOperands(); + if (NumOps) { + bool isTwoAddr = NumOps > 1 && + Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1; + + // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. + unsigned i = isTwoAddr ? 1 : 0; + for (unsigned e = NumOps; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + if (X86II::isX86_64NonExtLowByteReg(Reg)) + REX |= 0x40; + } + } + + switch (Desc.TSFlags & X86II::FormMask) { + case X86II::MRMSrcReg: { + if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 2; + i = isTwoAddr ? 2 : 1; + for (unsigned e = NumOps; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (X86InstrInfo::isX86_64ExtendedReg(MO)) + REX |= 1 << 0; + } + break; + } + case X86II::MRMSrcMem: { + if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 2; + unsigned Bit = 0; + i = isTwoAddr ? 2 : 1; + for (; i != NumOps; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isReg()) { + if (X86InstrInfo::isX86_64ExtendedReg(MO)) + REX |= 1 << Bit; + Bit++; + } + } + break; + } + case X86II::MRMXm: + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + case X86II::MRMDestMem: { + unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands); + i = isTwoAddr ? 1 : 0; + if (NumOps > e && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e))) + REX |= 1 << 2; + unsigned Bit = 0; + for (; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isReg()) { + if (X86InstrInfo::isX86_64ExtendedReg(MO)) + REX |= 1 << Bit; + Bit++; + } + } + break; + } + default: { + if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 0; + i = isTwoAddr ? 2 : 1; + for (unsigned e = NumOps; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (X86InstrInfo::isX86_64ExtendedReg(MO)) + REX |= 1 << 2; + } + break; + } + } + } + return REX; +} + + +/// emitPCRelativeBlockAddress - This method keeps track of the information +/// necessary to resolve the address of this block later and emits a dummy +/// value. +/// +template +void Emitter::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) { + // Remember where this reference was and where it is to so we can + // deal with it later. + MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), + X86::reloc_pcrel_word, MBB)); + MCE.emitWordLE(0); +} + +/// emitGlobalAddress - Emit the specified address to the code stream assuming +/// this is part of a "take the address of a global" instruction. +/// +template +void Emitter::emitGlobalAddress(const GlobalValue *GV, + unsigned Reloc, + intptr_t Disp /* = 0 */, + intptr_t PCAdj /* = 0 */, + bool Indirect /* = false */) { + intptr_t RelocCST = Disp; + if (Reloc == X86::reloc_picrel_word) + RelocCST = PICBaseOffset; + else if (Reloc == X86::reloc_pcrel_word) + RelocCST = PCAdj; + MachineRelocation MR = Indirect + ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, + const_cast(GV), + RelocCST, false) + : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, + const_cast(GV), RelocCST, false); + MCE.addRelocation(MR); + // The relocated value will be added to the displacement + if (Reloc == X86::reloc_absolute_dword) + MCE.emitDWordLE(Disp); + else + MCE.emitWordLE((int32_t)Disp); +} + +/// emitExternalSymbolAddress - Arrange for the address of an external symbol to +/// be emitted to the current location in the function, and allow it to be PC +/// relative. +template +void Emitter::emitExternalSymbolAddress(const char *ES, + unsigned Reloc) { + intptr_t RelocCST = (Reloc == X86::reloc_picrel_word) ? PICBaseOffset : 0; + + // X86 never needs stubs because instruction selection will always pick + // an instruction sequence that is large enough to hold any address + // to a symbol. + // (see X86ISelLowering.cpp, near 2039: X86TargetLowering::LowerCall) + bool NeedStub = false; + MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + Reloc, ES, RelocCST, + 0, NeedStub)); + if (Reloc == X86::reloc_absolute_dword) + MCE.emitDWordLE(0); + else + MCE.emitWordLE(0); +} + +/// emitConstPoolAddress - Arrange for the address of an constant pool +/// to be emitted to the current location in the function, and allow it to be PC +/// relative. +template +void Emitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc, + intptr_t Disp /* = 0 */, + intptr_t PCAdj /* = 0 */) { + intptr_t RelocCST = 0; + if (Reloc == X86::reloc_picrel_word) + RelocCST = PICBaseOffset; + else if (Reloc == X86::reloc_pcrel_word) + RelocCST = PCAdj; + MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + Reloc, CPI, RelocCST)); + // The relocated value will be added to the displacement + if (Reloc == X86::reloc_absolute_dword) + MCE.emitDWordLE(Disp); + else + MCE.emitWordLE((int32_t)Disp); +} + +/// emitJumpTableAddress - Arrange for the address of a jump table to +/// be emitted to the current location in the function, and allow it to be PC +/// relative. +template +void Emitter::emitJumpTableAddress(unsigned JTI, unsigned Reloc, + intptr_t PCAdj /* = 0 */) { + intptr_t RelocCST = 0; + if (Reloc == X86::reloc_picrel_word) + RelocCST = PICBaseOffset; + else if (Reloc == X86::reloc_pcrel_word) + RelocCST = PCAdj; + MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), + Reloc, JTI, RelocCST)); + // The relocated value will be added to the displacement + if (Reloc == X86::reloc_absolute_dword) + MCE.emitDWordLE(0); + else + MCE.emitWordLE(0); +} + +inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, + unsigned RM) { + assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!"); + return RM | (RegOpcode << 3) | (Mod << 6); +} + +template +void Emitter::emitRegModRMByte(unsigned ModRMReg, + unsigned RegOpcodeFld){ + MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg))); +} + +template +void Emitter::emitRegModRMByte(unsigned RegOpcodeFld) { + MCE.emitByte(ModRMByte(3, RegOpcodeFld, 0)); +} + +template +void Emitter::emitSIBByte(unsigned SS, + unsigned Index, + unsigned Base) { + // SIB byte is in the same format as the ModRMByte... + MCE.emitByte(ModRMByte(SS, Index, Base)); +} + +template +void Emitter::emitConstant(uint64_t Val, unsigned Size) { + // Output the constant in little endian byte order... + for (unsigned i = 0; i != Size; ++i) { + MCE.emitByte(Val & 255); + Val >>= 8; + } +} + +/// isDisp8 - Return true if this signed displacement fits in a 8-bit +/// sign-extended field. +static bool isDisp8(int Value) { + return Value == (signed char)Value; +} + +static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp, + const TargetMachine &TM) { + // For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer + // mechanism as 32-bit mode. + if (TM.getSubtarget().is64Bit() && + !TM.getSubtarget().isTargetDarwin()) + return false; + + // Return true if this is a reference to a stub containing the address of the + // global, not the global itself. + return isGlobalStubReference(GVOp.getTargetFlags()); +} + +template +void Emitter::emitDisplacementField(const MachineOperand *RelocOp, + int DispVal, + intptr_t Adj /* = 0 */, + bool IsPCRel /* = true */) { + // If this is a simple integer displacement that doesn't require a relocation, + // emit it now. + if (!RelocOp) { + emitConstant(DispVal, 4); + return; + } + + // Otherwise, this is something that requires a relocation. Emit it as such + // now. + unsigned RelocType = Is64BitMode ? + (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext) + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + if (RelocOp->isGlobal()) { + // In 64-bit static small code model, we could potentially emit absolute. + // But it's probably not beneficial. If the MCE supports using RIP directly + // do it, otherwise fallback to absolute (this is determined by IsPCRel). + // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative + // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute + bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM); + emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(), + Adj, Indirect); + } else if (RelocOp->isSymbol()) { + emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType); + } else if (RelocOp->isCPI()) { + emitConstPoolAddress(RelocOp->getIndex(), RelocType, + RelocOp->getOffset(), Adj); + } else { + assert(RelocOp->isJTI() && "Unexpected machine operand!"); + emitJumpTableAddress(RelocOp->getIndex(), RelocType, Adj); + } +} + +template +void Emitter::emitMemModRMByte(const MachineInstr &MI, + unsigned Op,unsigned RegOpcodeField, + intptr_t PCAdj) { + const MachineOperand &Op3 = MI.getOperand(Op+3); + int DispVal = 0; + const MachineOperand *DispForReloc = nullptr; + + // Figure out what sort of displacement we have to handle here. + if (Op3.isGlobal()) { + DispForReloc = &Op3; + } else if (Op3.isSymbol()) { + DispForReloc = &Op3; + } else if (Op3.isCPI()) { + if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) { + DispForReloc = &Op3; + } else { + DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex()); + DispVal += Op3.getOffset(); + } + } else if (Op3.isJTI()) { + if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) { + DispForReloc = &Op3; + } else { + DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex()); + } + } else { + DispVal = Op3.getImm(); + } + + const MachineOperand &Base = MI.getOperand(Op); + const MachineOperand &Scale = MI.getOperand(Op+1); + const MachineOperand &IndexReg = MI.getOperand(Op+2); + + unsigned BaseReg = Base.getReg(); + + // Handle %rip relative addressing. + if (BaseReg == X86::RIP || + (Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode + assert(IndexReg.getReg() == 0 && Is64BitMode && + "Invalid rip-relative address"); + MCE.emitByte(ModRMByte(0, RegOpcodeField, 5)); + emitDisplacementField(DispForReloc, DispVal, PCAdj, true); + return; + } + + // Indicate that the displacement will use an pcrel or absolute reference + // by default. MCEs able to resolve addresses on-the-fly use pcrel by default + // while others, unless explicit asked to use RIP, use absolute references. + bool IsPCRel = MCE.earlyResolveAddresses() ? true : false; + + // Is a SIB byte needed? + // If no BaseReg, issue a RIP relative instruction only if the MCE can + // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table + // 2-7) and absolute references. + unsigned BaseRegNo = -1U; + if (BaseReg != 0 && BaseReg != X86::RIP) + BaseRegNo = getX86RegNum(BaseReg); + + if (// The SIB byte must be used if there is an index register. + IndexReg.getReg() == 0 && + // The SIB byte must be used if the base is ESP/RSP/R12, all of which + // encode to an R/M value of 4, which indicates that a SIB byte is + // present. + BaseRegNo != N86::ESP && + // If there is no base register and we're in 64-bit mode, we need a SIB + // byte to emit an addr that is just 'disp32' (the non-RIP relative form). + (!Is64BitMode || BaseReg != 0)) { + if (BaseReg == 0 || // [disp32] in X86-32 mode + BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode + MCE.emitByte(ModRMByte(0, RegOpcodeField, 5)); + emitDisplacementField(DispForReloc, DispVal, PCAdj, true); + return; + } + + // If the base is not EBP/ESP and there is no displacement, use simple + // indirect register encoding, this handles addresses like [EAX]. The + // encoding for [EBP] with no displacement means [disp32] so we handle it + // by emitting a displacement of 0 below. + if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { + MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo)); + return; + } + + // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. + if (!DispForReloc && isDisp8(DispVal)) { + MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo)); + emitConstant(DispVal, 1); + return; + } + + // Otherwise, emit the most general non-SIB encoding: [REG+disp32] + MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo)); + emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); + return; + } + + // Otherwise we need a SIB byte, so start by outputting the ModR/M byte first. + assert(IndexReg.getReg() != X86::ESP && + IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); + + bool ForceDisp32 = false; + bool ForceDisp8 = false; + if (BaseReg == 0) { + // If there is no base register, we emit the special case SIB byte with + // MOD=0, BASE=4, to JUST get the index, scale, and displacement. + MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); + ForceDisp32 = true; + } else if (DispForReloc) { + // Emit the normal disp32 encoding. + MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); + ForceDisp32 = true; + } else if (DispVal == 0 && BaseRegNo != N86::EBP) { + // Emit no displacement ModR/M byte + MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); + } else if (isDisp8(DispVal)) { + // Emit the disp8 encoding... + MCE.emitByte(ModRMByte(1, RegOpcodeField, 4)); + ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP + } else { + // Emit the normal disp32 encoding... + MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); + } + + // Calculate what the SS field value should be... + static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 }; + unsigned SS = SSTable[Scale.getImm()]; + + if (BaseReg == 0) { + // Handle the SIB byte for the case where there is no base, see Intel + // Manual 2A, table 2-7. The displacement has already been output. + unsigned IndexRegNo; + if (IndexReg.getReg()) + IndexRegNo = getX86RegNum(IndexReg.getReg()); + else // Examples: [ESP+1*+4] or [scaled idx]+disp32 (MOD=0,BASE=5) + IndexRegNo = 4; + emitSIBByte(SS, IndexRegNo, 5); + } else { + unsigned BaseRegNo = getX86RegNum(BaseReg); + unsigned IndexRegNo; + if (IndexReg.getReg()) + IndexRegNo = getX86RegNum(IndexReg.getReg()); + else + IndexRegNo = 4; // For example [ESP+1*+4] + emitSIBByte(SS, IndexRegNo, BaseRegNo); + } + + // Do we need to output a displacement? + if (ForceDisp8) { + emitConstant(DispVal, 1); + } else if (DispVal != 0 || ForceDisp32) { + emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); + } +} + +static const MCInstrDesc *UpdateOp(MachineInstr &MI, const X86InstrInfo *II, + unsigned Opcode) { + const MCInstrDesc *Desc = &II->get(Opcode); + MI.setDesc(*Desc); + return Desc; +} + +/// Is16BitMemOperand - Return true if the specified instruction has +/// a 16-bit memory operand. Op specifies the operand # of the memoperand. +static bool Is16BitMemOperand(const MachineInstr &MI, unsigned Op) { + const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); + const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); + + if ((BaseReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg()))) + return true; + return false; +} + +/// Is32BitMemOperand - Return true if the specified instruction has +/// a 32-bit memory operand. Op specifies the operand # of the memoperand. +static bool Is32BitMemOperand(const MachineInstr &MI, unsigned Op) { + const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); + const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); + + if ((BaseReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg.getReg()))) + return true; + return false; +} + +/// Is64BitMemOperand - Return true if the specified instruction has +/// a 64-bit memory operand. Op specifies the operand # of the memoperand. +#ifndef NDEBUG +static bool Is64BitMemOperand(const MachineInstr &MI, unsigned Op) { + const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); + const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); + + if ((BaseReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg()))) + return true; + return false; +} +#endif + +template +void Emitter::emitOpcodePrefix(uint64_t TSFlags, + int MemOperand, + const MachineInstr &MI, + const MCInstrDesc *Desc) const { + // Emit the operand size opcode prefix as needed. + if (((TSFlags & X86II::OpSizeMask) >> X86II::OpSizeShift) == X86II::OpSize16) + MCE.emitByte(0x66); + + switch (Desc->TSFlags & X86II::OpPrefixMask) { + case X86II::PD: // 66 + MCE.emitByte(0x66); + break; + case X86II::XS: // F3 + MCE.emitByte(0xF3); + break; + case X86II::XD: // F2 + MCE.emitByte(0xF2); + break; + } + + // Handle REX prefix. + if (Is64BitMode) { + if (unsigned REX = determineREX(MI)) + MCE.emitByte(0x40 | REX); + } + + // 0x0F escape code must be emitted just before the opcode. + switch (Desc->TSFlags & X86II::OpMapMask) { + case X86II::TB: // Two-byte opcode map + case X86II::T8: // 0F 38 + case X86II::TA: // 0F 3A + MCE.emitByte(0x0F); + break; + } + + switch (Desc->TSFlags & X86II::OpMapMask) { + case X86II::T8: // 0F 38 + MCE.emitByte(0x38); + break; + case X86II::TA: // 0F 3A + MCE.emitByte(0x3A); + break; + } +} + +// On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range +// 0-7 and the difference between the 2 groups is given by the REX prefix. +// In the VEX prefix, registers are seen sequencially from 0-15 and encoded +// in 1's complement form, example: +// +// ModRM field => XMM9 => 1 +// VEX.VVVV => XMM9 => ~9 +// +// See table 4-35 of Intel AVX Programming Reference for details. +template +unsigned char +Emitter::getVEXRegisterEncoding(const MachineInstr &MI, + unsigned OpNum) const { + unsigned SrcReg = MI.getOperand(OpNum).getReg(); + unsigned SrcRegNum = getX86RegNum(MI.getOperand(OpNum).getReg()); + if (X86II::isX86_64ExtendedReg(SrcReg)) + SrcRegNum |= 8; + + // The registers represented through VEX_VVVV should + // be encoded in 1's complement form. + return (~SrcRegNum) & 0xf; +} + +/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed +template +void Emitter::emitSegmentOverridePrefix(uint64_t TSFlags, + int MemOperand, + const MachineInstr &MI) const { + if (MemOperand < 0) + return; // No memory operand + + // Check for explicit segment override on memory operand. + switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) { + default: llvm_unreachable("Unknown segment register!"); + case 0: break; + case X86::CS: MCE.emitByte(0x2E); break; + case X86::SS: MCE.emitByte(0x36); break; + case X86::DS: MCE.emitByte(0x3E); break; + case X86::ES: MCE.emitByte(0x26); break; + case X86::FS: MCE.emitByte(0x64); break; + case X86::GS: MCE.emitByte(0x65); break; + } +} + +template +void Emitter::emitVEXOpcodePrefix(uint64_t TSFlags, + int MemOperand, + const MachineInstr &MI, + const MCInstrDesc *Desc) const { + unsigned char Encoding = (TSFlags & X86II::EncodingMask) >> + X86II::EncodingShift; + bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; + bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; + + // VEX_R: opcode externsion equivalent to REX.R in + // 1's complement (inverted) form + // + // 1: Same as REX_R=0 (must be 1 in 32-bit mode) + // 0: Same as REX_R=1 (64 bit mode only) + // + unsigned char VEX_R = 0x1; + + // VEX_X: equivalent to REX.X, only used when a + // register is used for index in SIB Byte. + // + // 1: Same as REX.X=0 (must be 1 in 32-bit mode) + // 0: Same as REX.X=1 (64-bit mode only) + unsigned char VEX_X = 0x1; + + // VEX_B: + // + // 1: Same as REX_B=0 (ignored in 32-bit mode) + // 0: Same as REX_B=1 (64 bit mode only) + // + unsigned char VEX_B = 0x1; + + // VEX_W: opcode specific (use like REX.W, or used for + // opcode extension, or ignored, depending on the opcode byte) + unsigned char VEX_W = 0; + + // VEX_5M (VEX m-mmmmm field): + // + // 0b00000: Reserved for future use + // 0b00001: implied 0F leading opcode + // 0b00010: implied 0F 38 leading opcode bytes + // 0b00011: implied 0F 3A leading opcode bytes + // 0b00100-0b11111: Reserved for future use + // 0b01000: XOP map select - 08h instructions with imm byte + // 0b01001: XOP map select - 09h instructions with no imm byte + // 0b01010: XOP map select - 0Ah instructions with imm dword + unsigned char VEX_5M = 0; + + // VEX_4V (VEX vvvv field): a register specifier + // (in 1's complement form) or 1111 if unused. + unsigned char VEX_4V = 0xf; + + // VEX_L (Vector Length): + // + // 0: scalar or 128-bit vector + // 1: 256-bit vector + // + unsigned char VEX_L = 0; + + // VEX_PP: opcode extension providing equivalent + // functionality of a SIMD prefix + // + // 0b00: None + // 0b01: 66 + // 0b10: F3 + // 0b11: F2 + // + unsigned char VEX_PP = 0; + + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W) + VEX_W = 1; + + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L) + VEX_L = 1; + + switch (TSFlags & X86II::OpPrefixMask) { + default: break; // VEX_PP already correct + case X86II::PD: VEX_PP = 0x1; break; // 66 + case X86II::XS: VEX_PP = 0x2; break; // F3 + case X86II::XD: VEX_PP = 0x3; break; // F2 + } + + switch (TSFlags & X86II::OpMapMask) { + default: llvm_unreachable("Invalid prefix!"); + case X86II::TB: VEX_5M = 0x1; break; // 0F + case X86II::T8: VEX_5M = 0x2; break; // 0F 38 + case X86II::TA: VEX_5M = 0x3; break; // 0F 3A + case X86II::XOP8: VEX_5M = 0x8; break; + case X86II::XOP9: VEX_5M = 0x9; break; + case X86II::XOPA: VEX_5M = 0xA; break; + } + + // Classify VEX_B, VEX_4V, VEX_R, VEX_X + unsigned NumOps = Desc->getNumOperands(); + unsigned CurOp = 0; + if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0) + ++CurOp; + else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) { + assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); + // Special case for GATHER with 2 TIED_TO operands + // Skip the first 2 operands: dst, mask_wb + CurOp += 2; + } + + switch (TSFlags & X86II::FormMask) { + default: llvm_unreachable("Unexpected form in emitVEXOpcodePrefix!"); + case X86II::RawFrm: + break; + case X86II::MRMDestMem: { + // MRMDestMem instructions forms: + // MemAddr, src1(ModR/M) + // MemAddr, src1(VEX_4V), src2(ModR/M) + // MemAddr, src1(ModR/M), imm8 + // + if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg())) + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg())) + VEX_X = 0x0; + + CurOp = X86::AddrNumOperands; + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + const MachineOperand &MO = MI.getOperand(CurOp); + if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg())) + VEX_R = 0x0; + break; + } + case X86II::MRMSrcMem: + // MRMSrcMem instructions forms: + // src1(ModR/M), MemAddr + // src1(ModR/M), src2(VEX_4V), MemAddr + // src1(ModR/M), MemAddr, imm8 + // src1(ModR/M), MemAddr, src2(VEX_I8IMM) + // + // FMA4: + // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) + // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M), + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_R = 0x0; + CurOp++; + + if (HasVEX_4V) { + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + CurOp++; + } + + if (X86II::isX86_64ExtendedReg( + MI.getOperand(MemOperand+X86::AddrBaseReg).getReg())) + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg( + MI.getOperand(MemOperand+X86::AddrIndexReg).getReg())) + VEX_X = 0x0; + + if (HasVEX_4VOp3) + VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands); + break; + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: { + // MRM[0-9]m instructions forms: + // MemAddr + // src1(VEX_4V), MemAddr + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg( + MI.getOperand(MemOperand+X86::AddrBaseReg).getReg())) + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg( + MI.getOperand(MemOperand+X86::AddrIndexReg).getReg())) + VEX_X = 0x0; + break; + } + case X86II::MRMSrcReg: + // MRMSrcReg instructions forms: + // dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) + // dst(ModR/M), src1(ModR/M) + // dst(ModR/M), src1(ModR/M), imm8 + // + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_R = 0x0; + CurOp++; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + CurOp++; + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_B = 0x0; + CurOp++; + if (HasVEX_4VOp3) + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + break; + case X86II::MRMDestReg: + // MRMDestReg instructions forms: + // dst(ModR/M), src(ModR/M) + // dst(ModR/M), src(ModR/M), imm8 + // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_B = 0x0; + CurOp++; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_R = 0x0; + break; + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: + // MRM0r-MRM7r instructions forms: + // dst(VEX_4V), src(ModR/M), imm8 + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + CurOp++; + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_B = 0x0; + break; + } + + // Emit segment override opcode prefix as needed. + emitSegmentOverridePrefix(TSFlags, MemOperand, MI); + + // VEX opcode prefix can have 2 or 3 bytes + // + // 3 bytes: + // +-----+ +--------------+ +-------------------+ + // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp | + // +-----+ +--------------+ +-------------------+ + // 2 bytes: + // +-----+ +-------------------+ + // | C5h | | R | vvvv | L | pp | + // +-----+ +-------------------+ + // + // XOP uses a similar prefix: + // +-----+ +--------------+ +-------------------+ + // | 8Fh | | RXB | m-mmmm | | W | vvvv | L | pp | + // +-----+ +--------------+ +-------------------+ + unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3); + + // Can this use the 2 byte VEX prefix? + if (Encoding == X86II::VEX && VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { + MCE.emitByte(0xC5); + MCE.emitByte(LastByte | (VEX_R << 7)); + return; + } + + // 3 byte VEX prefix + MCE.emitByte(Encoding == X86II::XOP ? 0x8F : 0xC4); + MCE.emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M); + MCE.emitByte(LastByte | (VEX_W << 7)); +} + +template +void Emitter::emitInstruction(MachineInstr &MI, + const MCInstrDesc *Desc) { + DEBUG(dbgs() << MI); + + // If this is a pseudo instruction, lower it. + switch (Desc->getOpcode()) { + case X86::ADD16rr_DB: Desc = UpdateOp(MI, II, X86::OR16rr); break; + case X86::ADD32rr_DB: Desc = UpdateOp(MI, II, X86::OR32rr); break; + case X86::ADD64rr_DB: Desc = UpdateOp(MI, II, X86::OR64rr); break; + case X86::ADD16ri_DB: Desc = UpdateOp(MI, II, X86::OR16ri); break; + case X86::ADD32ri_DB: Desc = UpdateOp(MI, II, X86::OR32ri); break; + case X86::ADD64ri32_DB: Desc = UpdateOp(MI, II, X86::OR64ri32); break; + case X86::ADD16ri8_DB: Desc = UpdateOp(MI, II, X86::OR16ri8); break; + case X86::ADD32ri8_DB: Desc = UpdateOp(MI, II, X86::OR32ri8); break; + case X86::ADD64ri8_DB: Desc = UpdateOp(MI, II, X86::OR64ri8); break; + case X86::ACQUIRE_MOV8rm: Desc = UpdateOp(MI, II, X86::MOV8rm); break; + case X86::ACQUIRE_MOV16rm: Desc = UpdateOp(MI, II, X86::MOV16rm); break; + case X86::ACQUIRE_MOV32rm: Desc = UpdateOp(MI, II, X86::MOV32rm); break; + case X86::ACQUIRE_MOV64rm: Desc = UpdateOp(MI, II, X86::MOV64rm); break; + case X86::RELEASE_MOV8mr: Desc = UpdateOp(MI, II, X86::MOV8mr); break; + case X86::RELEASE_MOV16mr: Desc = UpdateOp(MI, II, X86::MOV16mr); break; + case X86::RELEASE_MOV32mr: Desc = UpdateOp(MI, II, X86::MOV32mr); break; + case X86::RELEASE_MOV64mr: Desc = UpdateOp(MI, II, X86::MOV64mr); break; + } + + + MCE.processDebugLoc(MI.getDebugLoc(), true); + + unsigned Opcode = Desc->Opcode; + + // If this is a two-address instruction, skip one of the register operands. + unsigned NumOps = Desc->getNumOperands(); + unsigned CurOp = 0; + if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0) + ++CurOp; + else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) { + assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); + // Special case for GATHER with 2 TIED_TO operands + // Skip the first 2 operands: dst, mask_wb + CurOp += 2; + } + + uint64_t TSFlags = Desc->TSFlags; + + // Encoding type for this instruction. + unsigned char Encoding = (TSFlags & X86II::EncodingMask) >> + X86II::EncodingShift; + + // It uses the VEX.VVVV field? + bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; + bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; + const unsigned MemOp4_I8IMMOperand = 2; + + // Determine where the memory operand starts, if present. + int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode); + if (MemoryOperand != -1) MemoryOperand += CurOp; + + // Emit the lock opcode prefix as needed. + if (Desc->TSFlags & X86II::LOCK) + MCE.emitByte(0xF0); + + // Emit segment override opcode prefix as needed. + emitSegmentOverridePrefix(TSFlags, MemoryOperand, MI); + + // Emit the repeat opcode prefix as needed. + if (Desc->TSFlags & X86II::REP) + MCE.emitByte(0xF3); + + // Emit the address size opcode prefix as needed. + bool need_address_override; + if (TSFlags & X86II::AdSize) { + need_address_override = true; + } else if (MemoryOperand < 0) { + need_address_override = false; + } else if (Is64BitMode) { + assert(!Is16BitMemOperand(MI, MemoryOperand)); + need_address_override = Is32BitMemOperand(MI, MemoryOperand); + } else { + assert(!Is64BitMemOperand(MI, MemoryOperand)); + need_address_override = Is16BitMemOperand(MI, MemoryOperand); + } + + if (need_address_override) + MCE.emitByte(0x67); + + if (Encoding == 0) + emitOpcodePrefix(TSFlags, MemoryOperand, MI, Desc); + else + emitVEXOpcodePrefix(TSFlags, MemoryOperand, MI, Desc); + + unsigned char BaseOpcode = X86II::getBaseOpcodeFor(Desc->TSFlags); + switch (TSFlags & X86II::FormMask) { + default: + llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); + case X86II::Pseudo: + // Remember the current PC offset, this is the PIC relocation + // base address. + switch (Opcode) { + default: + llvm_unreachable("pseudo instructions should be removed before code" + " emission"); + // Do nothing for Int_MemBarrier - it's just a comment. Add a debug + // to make it slightly easier to see. + case X86::Int_MemBarrier: + DEBUG(dbgs() << "#MEMBARRIER\n"); + break; + + case TargetOpcode::INLINEASM: + // We allow inline assembler nodes with empty bodies - they can + // implicitly define registers, which is ok for JIT. + if (MI.getOperand(0).getSymbolName()[0]) { + DebugLoc DL = MI.getDebugLoc(); + DL.print(MI.getParent()->getParent()->getFunction()->getContext(), + llvm::errs()); + report_fatal_error("JIT does not support inline asm!"); + } + break; + case TargetOpcode::DBG_VALUE: + case TargetOpcode::CFI_INSTRUCTION: + break; + case TargetOpcode::GC_LABEL: + case TargetOpcode::EH_LABEL: + MCE.emitLabel(MI.getOperand(0).getMCSymbol()); + break; + + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + break; + + case X86::SEH_PushReg: + case X86::SEH_SaveReg: + case X86::SEH_SaveXMM: + case X86::SEH_StackAlloc: + case X86::SEH_SetFrame: + case X86::SEH_PushFrame: + case X86::SEH_EndPrologue: + case X86::SEH_Epilogue: + break; + + case X86::MOVPC32r: { + // This emits the "call" portion of this pseudo instruction. + MCE.emitByte(BaseOpcode); + emitConstant(0, X86II::getSizeOfImm(Desc->TSFlags)); + // Remember PIC base. + PICBaseOffset = (intptr_t) MCE.getCurrentPCOffset(); + X86JITInfo *JTI = TM.getSubtargetImpl()->getJITInfo(); + JTI->setPICBase(MCE.getCurrentPCValue()); + break; + } + } + CurOp = NumOps; + break; + case X86II::RawFrm: { + MCE.emitByte(BaseOpcode); + + if (CurOp == NumOps) + break; + + const MachineOperand &MO = MI.getOperand(CurOp++); + + DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n"); + DEBUG(dbgs() << "isMBB " << MO.isMBB() << "\n"); + DEBUG(dbgs() << "isGlobal " << MO.isGlobal() << "\n"); + DEBUG(dbgs() << "isSymbol " << MO.isSymbol() << "\n"); + DEBUG(dbgs() << "isImm " << MO.isImm() << "\n"); + + if (MO.isMBB()) { + emitPCRelativeBlockAddress(MO.getMBB()); + break; + } + + if (MO.isGlobal()) { + emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word, + MO.getOffset(), 0); + break; + } + + if (MO.isSymbol()) { + emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word); + break; + } + + // FIXME: Only used by hackish MCCodeEmitter, remove when dead. + if (MO.isJTI()) { + emitJumpTableAddress(MO.getIndex(), X86::reloc_pcrel_word); + break; + } + + assert(MO.isImm() && "Unknown RawFrm operand!"); + if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) { + // Fix up immediate operand for pc relative calls. + intptr_t Imm = (intptr_t)MO.getImm(); + Imm = Imm - MCE.getCurrentPCValue() - 4; + emitConstant(Imm, X86II::getSizeOfImm(Desc->TSFlags)); + } else + emitConstant(MO.getImm(), X86II::getSizeOfImm(Desc->TSFlags)); + break; + } + + case X86II::AddRegFrm: { + MCE.emitByte(BaseOpcode + + getX86RegNum(MI.getOperand(CurOp++).getReg())); + + if (CurOp == NumOps) + break; + + const MachineOperand &MO1 = MI.getOperand(CurOp++); + unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); + if (MO1.isImm()) { + emitConstant(MO1.getImm(), Size); + break; + } + + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + if (Opcode == X86::MOV32ri64) + rt = X86::reloc_absolute_word; // FIXME: add X86II flag? + // This should not occur on Darwin for relocatable objects. + if (Opcode == X86::MOV64ri) + rt = X86::reloc_absolute_dword; // FIXME: add X86II flag? + if (MO1.isGlobal()) { + bool Indirect = gvNeedsNonLazyPtr(MO1, TM); + emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, + Indirect); + } else if (MO1.isSymbol()) + emitExternalSymbolAddress(MO1.getSymbolName(), rt); + else if (MO1.isCPI()) + emitConstPoolAddress(MO1.getIndex(), rt); + else if (MO1.isJTI()) + emitJumpTableAddress(MO1.getIndex(), rt); + break; + } + + case X86II::MRMDestReg: { + MCE.emitByte(BaseOpcode); + + unsigned SrcRegNum = CurOp+1; + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + SrcRegNum++; + + emitRegModRMByte(MI.getOperand(CurOp).getReg(), + getX86RegNum(MI.getOperand(SrcRegNum).getReg())); + CurOp = SrcRegNum + 1; + break; + } + case X86II::MRMDestMem: { + MCE.emitByte(BaseOpcode); + + unsigned SrcRegNum = CurOp + X86::AddrNumOperands; + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + SrcRegNum++; + emitMemModRMByte(MI, CurOp, + getX86RegNum(MI.getOperand(SrcRegNum).getReg())); + CurOp = SrcRegNum + 1; + break; + } + + case X86II::MRMSrcReg: { + MCE.emitByte(BaseOpcode); + + unsigned SrcRegNum = CurOp+1; + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + ++SrcRegNum; + + if (HasMemOp4) // Skip 2nd src (which is encoded in I8IMM) + ++SrcRegNum; + + emitRegModRMByte(MI.getOperand(SrcRegNum).getReg(), + getX86RegNum(MI.getOperand(CurOp).getReg())); + // 2 operands skipped with HasMemOp4, compensate accordingly + CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1; + if (HasVEX_4VOp3) + ++CurOp; + break; + } + case X86II::MRMSrcMem: { + int AddrOperands = X86::AddrNumOperands; + unsigned FirstMemOp = CurOp+1; + if (HasVEX_4V) { + ++AddrOperands; + ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV). + } + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + ++FirstMemOp; + + MCE.emitByte(BaseOpcode); + + intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ? + X86II::getSizeOfImm(Desc->TSFlags) : 0; + emitMemModRMByte(MI, FirstMemOp, + getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj); + CurOp += AddrOperands + 1; + if (HasVEX_4VOp3) + ++CurOp; + break; + } + + case X86II::MRMXr: + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: { + if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). + ++CurOp; + MCE.emitByte(BaseOpcode); + uint64_t Form = (Desc->TSFlags & X86II::FormMask); + emitRegModRMByte(MI.getOperand(CurOp++).getReg(), + (Form == X86II::MRMXr) ? 0 : Form-X86II::MRM0r); + + if (CurOp == NumOps) + break; + + const MachineOperand &MO1 = MI.getOperand(CurOp++); + unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); + if (MO1.isImm()) { + emitConstant(MO1.getImm(), Size); + break; + } + + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + if (Opcode == X86::MOV64ri32) + rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? + if (MO1.isGlobal()) { + bool Indirect = gvNeedsNonLazyPtr(MO1, TM); + emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, + Indirect); + } else if (MO1.isSymbol()) + emitExternalSymbolAddress(MO1.getSymbolName(), rt); + else if (MO1.isCPI()) + emitConstPoolAddress(MO1.getIndex(), rt); + else if (MO1.isJTI()) + emitJumpTableAddress(MO1.getIndex(), rt); + break; + } + + case X86II::MRMXm: + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: { + if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). + ++CurOp; + intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ? + (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ? + X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0; + + MCE.emitByte(BaseOpcode); + uint64_t Form = (Desc->TSFlags & X86II::FormMask); + emitMemModRMByte(MI, CurOp, (Form==X86II::MRMXm) ? 0 : Form - X86II::MRM0m, + PCAdj); + CurOp += X86::AddrNumOperands; + + if (CurOp == NumOps) + break; + + const MachineOperand &MO = MI.getOperand(CurOp++); + unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); + if (MO.isImm()) { + emitConstant(MO.getImm(), Size); + break; + } + + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + if (Opcode == X86::MOV64mi32) + rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? + if (MO.isGlobal()) { + bool Indirect = gvNeedsNonLazyPtr(MO, TM); + emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0, + Indirect); + } else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), rt); + else if (MO.isCPI()) + emitConstPoolAddress(MO.getIndex(), rt); + else if (MO.isJTI()) + emitJumpTableAddress(MO.getIndex(), rt); + break; + } + + case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2: + case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C8: + case X86II::MRM_C9: case X86II::MRM_CA: case X86II::MRM_CB: + case X86II::MRM_CF: case X86II::MRM_D0: case X86II::MRM_D1: + case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6: + case X86II::MRM_D7: case X86II::MRM_D8: case X86II::MRM_D9: + case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC: + case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF: + case X86II::MRM_E0: case X86II::MRM_E1: case X86II::MRM_E2: + case X86II::MRM_E3: case X86II::MRM_E4: case X86II::MRM_E5: + case X86II::MRM_E8: case X86II::MRM_E9: case X86II::MRM_EA: + case X86II::MRM_EB: case X86II::MRM_EC: case X86II::MRM_ED: + case X86II::MRM_EE: case X86II::MRM_F0: case X86II::MRM_F1: + case X86II::MRM_F2: case X86II::MRM_F3: case X86II::MRM_F4: + case X86II::MRM_F5: case X86II::MRM_F6: case X86II::MRM_F7: + case X86II::MRM_F8: case X86II::MRM_F9: case X86II::MRM_FA: + case X86II::MRM_FB: case X86II::MRM_FC: case X86II::MRM_FD: + case X86II::MRM_FE: case X86II::MRM_FF: + MCE.emitByte(BaseOpcode); + + unsigned char MRM; + switch (TSFlags & X86II::FormMask) { + default: llvm_unreachable("Invalid Form"); + case X86II::MRM_C0: MRM = 0xC0; break; + case X86II::MRM_C1: MRM = 0xC1; break; + case X86II::MRM_C2: MRM = 0xC2; break; + case X86II::MRM_C3: MRM = 0xC3; break; + case X86II::MRM_C4: MRM = 0xC4; break; + case X86II::MRM_C8: MRM = 0xC8; break; + case X86II::MRM_C9: MRM = 0xC9; break; + case X86II::MRM_CA: MRM = 0xCA; break; + case X86II::MRM_CB: MRM = 0xCB; break; + case X86II::MRM_CF: MRM = 0xCF; break; + case X86II::MRM_D0: MRM = 0xD0; break; + case X86II::MRM_D1: MRM = 0xD1; break; + case X86II::MRM_D4: MRM = 0xD4; break; + case X86II::MRM_D5: MRM = 0xD5; break; + case X86II::MRM_D6: MRM = 0xD6; break; + case X86II::MRM_D7: MRM = 0xD7; break; + case X86II::MRM_D8: MRM = 0xD8; break; + case X86II::MRM_D9: MRM = 0xD9; break; + case X86II::MRM_DA: MRM = 0xDA; break; + case X86II::MRM_DB: MRM = 0xDB; break; + case X86II::MRM_DC: MRM = 0xDC; break; + case X86II::MRM_DD: MRM = 0xDD; break; + case X86II::MRM_DE: MRM = 0xDE; break; + case X86II::MRM_DF: MRM = 0xDF; break; + case X86II::MRM_E0: MRM = 0xE0; break; + case X86II::MRM_E1: MRM = 0xE1; break; + case X86II::MRM_E2: MRM = 0xE2; break; + case X86II::MRM_E3: MRM = 0xE3; break; + case X86II::MRM_E4: MRM = 0xE4; break; + case X86II::MRM_E5: MRM = 0xE5; break; + case X86II::MRM_E8: MRM = 0xE8; break; + case X86II::MRM_E9: MRM = 0xE9; break; + case X86II::MRM_EA: MRM = 0xEA; break; + case X86II::MRM_EB: MRM = 0xEB; break; + case X86II::MRM_EC: MRM = 0xEC; break; + case X86II::MRM_ED: MRM = 0xED; break; + case X86II::MRM_EE: MRM = 0xEE; break; + case X86II::MRM_F0: MRM = 0xF0; break; + case X86II::MRM_F1: MRM = 0xF1; break; + case X86II::MRM_F2: MRM = 0xF2; break; + case X86II::MRM_F3: MRM = 0xF3; break; + case X86II::MRM_F4: MRM = 0xF4; break; + case X86II::MRM_F5: MRM = 0xF5; break; + case X86II::MRM_F6: MRM = 0xF6; break; + case X86II::MRM_F7: MRM = 0xF7; break; + case X86II::MRM_F8: MRM = 0xF8; break; + case X86II::MRM_F9: MRM = 0xF9; break; + case X86II::MRM_FA: MRM = 0xFA; break; + case X86II::MRM_FB: MRM = 0xFB; break; + case X86II::MRM_FC: MRM = 0xFC; break; + case X86II::MRM_FD: MRM = 0xFD; break; + case X86II::MRM_FE: MRM = 0xFE; break; + case X86II::MRM_FF: MRM = 0xFF; break; + } + MCE.emitByte(MRM); + break; + } + + while (CurOp != NumOps && NumOps - CurOp <= 2) { + // The last source register of a 4 operand instruction in AVX is encoded + // in bits[7:4] of a immediate byte. + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) { + const MachineOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand + : CurOp); + ++CurOp; + unsigned RegNum = getX86RegNum(MO.getReg()) << 4; + if (X86II::isX86_64ExtendedReg(MO.getReg())) + RegNum |= 1 << 7; + // If there is an additional 5th operand it must be an immediate, which + // is encoded in bits[3:0] + if (CurOp != NumOps) { + const MachineOperand &MIMM = MI.getOperand(CurOp++); + if (MIMM.isImm()) { + unsigned Val = MIMM.getImm(); + assert(Val < 16 && "Immediate operand value out of range"); + RegNum |= Val; + } + } + emitConstant(RegNum, 1); + } else { + emitConstant(MI.getOperand(CurOp++).getImm(), + X86II::getSizeOfImm(Desc->TSFlags)); + } + } + + if (!MI.isVariadic() && CurOp != NumOps) { +#ifndef NDEBUG + dbgs() << "Cannot encode all operands of: " << MI << "\n"; +#endif + llvm_unreachable(nullptr); + } + + MCE.processDebugLoc(MI.getDebugLoc(), false); +} diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 7c973c2e55d..1f53b7cd791 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -24,7 +24,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index f14179603eb..0d46f706906 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -26,7 +26,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp new file mode 100644 index 00000000000..a082c4f8b0e --- /dev/null +++ b/lib/Target/X86/X86JITInfo.cpp @@ -0,0 +1,588 @@ +//===-- X86JITInfo.cpp - Implement the JIT interfaces for the X86 target --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the X86 target. +// +//===----------------------------------------------------------------------===// + +#include "X86JITInfo.h" +#include "X86Relocations.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Valgrind.h" +#include +#include +using namespace llvm; + +#define DEBUG_TYPE "jit" + +// Determine the platform we're running on +#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64) +# define X86_64_JIT +#elif defined(__i386__) || defined(i386) || defined(_M_IX86) +# define X86_32_JIT +#endif + +void X86JITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + unsigned char *OldByte = (unsigned char *)Old; + *OldByte++ = 0xE9; // Emit JMP opcode. + unsigned *OldWord = (unsigned *)OldByte; + unsigned NewAddr = (intptr_t)New; + unsigned OldAddr = (intptr_t)OldWord; + *OldWord = NewAddr - OldAddr - 4; // Emit PC-relative addr of New code. + + // X86 doesn't need to invalidate the processor cache, so just invalidate + // Valgrind's cache directly. + sys::ValgrindDiscardTranslations(Old, 5); +} + + +/// JITCompilerFunction - This contains the address of the JIT function used to +/// compile a function lazily. +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +// Get the ASMPREFIX for the current host. This is often '_'. +#ifndef __USER_LABEL_PREFIX__ +#define __USER_LABEL_PREFIX__ +#endif +#define GETASMPREFIX2(X) #X +#define GETASMPREFIX(X) GETASMPREFIX2(X) +#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__) + +// For ELF targets, use a .size and .type directive, to let tools +// know the extent of functions defined in assembler. +#if defined(__ELF__) +# define SIZE(sym) ".size " #sym ", . - " #sym "\n" +# define TYPE_FUNCTION(sym) ".type " #sym ", @function\n" +#else +# define SIZE(sym) +# define TYPE_FUNCTION(sym) +#endif + +// Provide a convenient way for disabling usage of CFI directives. +// This is needed for old/broken assemblers (for example, gas on +// Darwin is pretty old and doesn't support these directives) +#if defined(__APPLE__) +# define CFI(x) +#else +// FIXME: Disable this until we really want to use it. Also, we will +// need to add some workarounds for compilers, which support +// only subset of these directives. +# define CFI(x) +#endif + +// Provide a wrapper for LLVMX86CompilationCallback2 that saves non-traditional +// callee saved registers, for the fastcc calling convention. +extern "C" { +#if defined(X86_64_JIT) +# ifndef _MSC_VER + // No need to save EAX/EDX for X86-64. + void X86CompilationCallback(void); + asm( + ".text\n" + ".align 8\n" + ".globl " ASMPREFIX "X86CompilationCallback\n" + TYPE_FUNCTION(X86CompilationCallback) + ASMPREFIX "X86CompilationCallback:\n" + CFI(".cfi_startproc\n") + // Save RBP + "pushq %rbp\n" + CFI(".cfi_def_cfa_offset 16\n") + CFI(".cfi_offset %rbp, -16\n") + // Save RSP + "movq %rsp, %rbp\n" + CFI(".cfi_def_cfa_register %rbp\n") + // Save all int arg registers + "pushq %rdi\n" + CFI(".cfi_rel_offset %rdi, 0\n") + "pushq %rsi\n" + CFI(".cfi_rel_offset %rsi, 8\n") + "pushq %rdx\n" + CFI(".cfi_rel_offset %rdx, 16\n") + "pushq %rcx\n" + CFI(".cfi_rel_offset %rcx, 24\n") + "pushq %r8\n" + CFI(".cfi_rel_offset %r8, 32\n") + "pushq %r9\n" + CFI(".cfi_rel_offset %r9, 40\n") + // Align stack on 16-byte boundary. ESP might not be properly aligned + // (8 byte) if this is called from an indirect stub. + "andq $-16, %rsp\n" + // Save all XMM arg registers + "subq $128, %rsp\n" + "movaps %xmm0, (%rsp)\n" + "movaps %xmm1, 16(%rsp)\n" + "movaps %xmm2, 32(%rsp)\n" + "movaps %xmm3, 48(%rsp)\n" + "movaps %xmm4, 64(%rsp)\n" + "movaps %xmm5, 80(%rsp)\n" + "movaps %xmm6, 96(%rsp)\n" + "movaps %xmm7, 112(%rsp)\n" + // JIT callee +#if defined(_WIN64) || defined(__CYGWIN__) + "subq $32, %rsp\n" + "movq %rbp, %rcx\n" // Pass prev frame and return address + "movq 8(%rbp), %rdx\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" + "addq $32, %rsp\n" +#else + "movq %rbp, %rdi\n" // Pass prev frame and return address + "movq 8(%rbp), %rsi\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" +#endif + // Restore all XMM arg registers + "movaps 112(%rsp), %xmm7\n" + "movaps 96(%rsp), %xmm6\n" + "movaps 80(%rsp), %xmm5\n" + "movaps 64(%rsp), %xmm4\n" + "movaps 48(%rsp), %xmm3\n" + "movaps 32(%rsp), %xmm2\n" + "movaps 16(%rsp), %xmm1\n" + "movaps (%rsp), %xmm0\n" + // Restore RSP + "movq %rbp, %rsp\n" + CFI(".cfi_def_cfa_register %rsp\n") + // Restore all int arg registers + "subq $48, %rsp\n" + CFI(".cfi_adjust_cfa_offset 48\n") + "popq %r9\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %r9\n") + "popq %r8\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %r8\n") + "popq %rcx\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %rcx\n") + "popq %rdx\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %rdx\n") + "popq %rsi\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %rsi\n") + "popq %rdi\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %rdi\n") + // Restore RBP + "popq %rbp\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %rbp\n") + "ret\n" + CFI(".cfi_endproc\n") + SIZE(X86CompilationCallback) + ); +# else + // No inline assembler support on this platform. The routine is in external + // file. + void X86CompilationCallback(); + +# endif +#elif defined (X86_32_JIT) +# ifndef _MSC_VER + void X86CompilationCallback(void); + asm( + ".text\n" + ".align 8\n" + ".globl " ASMPREFIX "X86CompilationCallback\n" + TYPE_FUNCTION(X86CompilationCallback) + ASMPREFIX "X86CompilationCallback:\n" + CFI(".cfi_startproc\n") + "pushl %ebp\n" + CFI(".cfi_def_cfa_offset 8\n") + CFI(".cfi_offset %ebp, -8\n") + "movl %esp, %ebp\n" // Standard prologue + CFI(".cfi_def_cfa_register %ebp\n") + "pushl %eax\n" + CFI(".cfi_rel_offset %eax, 0\n") + "pushl %edx\n" // Save EAX/EDX/ECX + CFI(".cfi_rel_offset %edx, 4\n") + "pushl %ecx\n" + CFI(".cfi_rel_offset %ecx, 8\n") +# if defined(__APPLE__) + "andl $-16, %esp\n" // Align ESP on 16-byte boundary +# endif + "subl $16, %esp\n" + "movl 4(%ebp), %eax\n" // Pass prev frame and return address + "movl %eax, 4(%esp)\n" + "movl %ebp, (%esp)\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" + "movl %ebp, %esp\n" // Restore ESP + CFI(".cfi_def_cfa_register %esp\n") + "subl $12, %esp\n" + CFI(".cfi_adjust_cfa_offset 12\n") + "popl %ecx\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %ecx\n") + "popl %edx\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %edx\n") + "popl %eax\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %eax\n") + "popl %ebp\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %ebp\n") + "ret\n" + CFI(".cfi_endproc\n") + SIZE(X86CompilationCallback) + ); + + // Same as X86CompilationCallback but also saves XMM argument registers. + void X86CompilationCallback_SSE(void); + asm( + ".text\n" + ".align 8\n" + ".globl " ASMPREFIX "X86CompilationCallback_SSE\n" + TYPE_FUNCTION(X86CompilationCallback_SSE) + ASMPREFIX "X86CompilationCallback_SSE:\n" + CFI(".cfi_startproc\n") + "pushl %ebp\n" + CFI(".cfi_def_cfa_offset 8\n") + CFI(".cfi_offset %ebp, -8\n") + "movl %esp, %ebp\n" // Standard prologue + CFI(".cfi_def_cfa_register %ebp\n") + "pushl %eax\n" + CFI(".cfi_rel_offset %eax, 0\n") + "pushl %edx\n" // Save EAX/EDX/ECX + CFI(".cfi_rel_offset %edx, 4\n") + "pushl %ecx\n" + CFI(".cfi_rel_offset %ecx, 8\n") + "andl $-16, %esp\n" // Align ESP on 16-byte boundary + // Save all XMM arg registers + "subl $64, %esp\n" + // FIXME: provide frame move information for xmm registers. + // This can be tricky, because CFA register is ebp (unaligned) + // and we need to produce offsets relative to it. + "movaps %xmm0, (%esp)\n" + "movaps %xmm1, 16(%esp)\n" + "movaps %xmm2, 32(%esp)\n" + "movaps %xmm3, 48(%esp)\n" + "subl $16, %esp\n" + "movl 4(%ebp), %eax\n" // Pass prev frame and return address + "movl %eax, 4(%esp)\n" + "movl %ebp, (%esp)\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" + "addl $16, %esp\n" + "movaps 48(%esp), %xmm3\n" + CFI(".cfi_restore %xmm3\n") + "movaps 32(%esp), %xmm2\n" + CFI(".cfi_restore %xmm2\n") + "movaps 16(%esp), %xmm1\n" + CFI(".cfi_restore %xmm1\n") + "movaps (%esp), %xmm0\n" + CFI(".cfi_restore %xmm0\n") + "movl %ebp, %esp\n" // Restore ESP + CFI(".cfi_def_cfa_register esp\n") + "subl $12, %esp\n" + CFI(".cfi_adjust_cfa_offset 12\n") + "popl %ecx\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %ecx\n") + "popl %edx\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %edx\n") + "popl %eax\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %eax\n") + "popl %ebp\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %ebp\n") + "ret\n" + CFI(".cfi_endproc\n") + SIZE(X86CompilationCallback_SSE) + ); +# else + void LLVMX86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr); + + _declspec(naked) void X86CompilationCallback(void) { + __asm { + push ebp + mov ebp, esp + push eax + push edx + push ecx + and esp, -16 + sub esp, 16 + mov eax, dword ptr [ebp+4] + mov dword ptr [esp+4], eax + mov dword ptr [esp], ebp + call LLVMX86CompilationCallback2 + mov esp, ebp + sub esp, 12 + pop ecx + pop edx + pop eax + pop ebp + ret + } + } + +# endif // _MSC_VER + +#else // Not an i386 host + void X86CompilationCallback() { + llvm_unreachable("Cannot call X86CompilationCallback() on a non-x86 arch!"); + } +#endif +} + +/// This is the target-specific function invoked by the +/// function stub when we did not know the real target of a call. This function +/// must locate the start of the stub or call site and pass it into the JIT +/// compiler function. +extern "C" { +LLVM_ATTRIBUTE_USED // Referenced from inline asm. +LLVM_LIBRARY_VISIBILITY void LLVMX86CompilationCallback2(intptr_t *StackPtr, + intptr_t RetAddr) { + intptr_t *RetAddrLoc = &StackPtr[1]; + // We are reading raw stack data here. Tell MemorySanitizer that it is + // sufficiently initialized. + __msan_unpoison(RetAddrLoc, sizeof(*RetAddrLoc)); + assert(*RetAddrLoc == RetAddr && + "Could not find return address on the stack!"); + + // It's a stub if there is an interrupt marker after the call. + bool isStub = ((unsigned char*)RetAddr)[0] == 0xCE; + + // The call instruction should have pushed the return value onto the stack... +#if defined (X86_64_JIT) + RetAddr--; // Backtrack to the reference itself... +#else + RetAddr -= 4; // Backtrack to the reference itself... +#endif + +#if 0 + DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr + << " ESP=" << (void*)StackPtr + << ": Resolving call to function: " + << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n"); +#endif + + // Sanity check to make sure this really is a call instruction. +#if defined (X86_64_JIT) + assert(((unsigned char*)RetAddr)[-2] == 0x41 &&"Not a call instr!"); + assert(((unsigned char*)RetAddr)[-1] == 0xFF &&"Not a call instr!"); +#else + assert(((unsigned char*)RetAddr)[-1] == 0xE8 &&"Not a call instr!"); +#endif + + intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr); + + // Rewrite the call target... so that we don't end up here every time we + // execute the call. +#if defined (X86_64_JIT) + assert(isStub && + "X86-64 doesn't support rewriting non-stub lazy compilation calls:" + " the call instruction varies too much."); +#else + *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4); +#endif + + if (isStub) { + // If this is a stub, rewrite the call into an unconditional branch + // instruction so that two return addresses are not pushed onto the stack + // when the requested function finally gets called. This also makes the + // 0xCE byte (interrupt) dead, so the marker doesn't effect anything. +#if defined (X86_64_JIT) + // If the target address is within 32-bit range of the stub, use a + // PC-relative branch instead of loading the actual address. (This is + // considerably shorter than the 64-bit immediate load already there.) + // We assume here intptr_t is 64 bits. + intptr_t diff = NewVal-RetAddr+7; + if (diff >= -2147483648LL && diff <= 2147483647LL) { + *(unsigned char*)(RetAddr-0xc) = 0xE9; + *(intptr_t *)(RetAddr-0xb) = diff & 0xffffffff; + } else { + *(intptr_t *)(RetAddr - 0xa) = NewVal; + ((unsigned char*)RetAddr)[0] = (2 | (4 << 3) | (3 << 6)); + } + sys::ValgrindDiscardTranslations((void*)(RetAddr-0xc), 0xd); +#else + ((unsigned char*)RetAddr)[-1] = 0xE9; + sys::ValgrindDiscardTranslations((void*)(RetAddr-1), 5); +#endif + } + + // Change the return address to reexecute the call instruction... +#if defined (X86_64_JIT) + *RetAddrLoc -= 0xd; +#else + *RetAddrLoc -= 5; +#endif +} +} + +TargetJITInfo::LazyResolverFn +X86JITInfo::getLazyResolverFunction(JITCompilerFn F) { + TsanIgnoreWritesBegin(); + JITCompilerFunction = F; + TsanIgnoreWritesEnd(); + +#if defined (X86_32_JIT) && !defined (_MSC_VER) +#if defined(__SSE__) + // SSE Callback should be called for SSE-enabled LLVM. + return X86CompilationCallback_SSE; +#else + if (useSSE) + return X86CompilationCallback_SSE; +#endif +#endif + + return X86CompilationCallback; +} + +X86JITInfo::X86JITInfo(bool UseSSE) { + useSSE = UseSSE; + useGOT = 0; + TLSOffset = nullptr; +} + +void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, + JITCodeEmitter &JCE) { +#if defined (X86_64_JIT) + const unsigned Alignment = 8; + uint8_t Buffer[8]; + uint8_t *Cur = Buffer; + MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(intptr_t)ptr); + MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(((intptr_t)ptr) >> 32)); +#else + const unsigned Alignment = 4; + uint8_t Buffer[4]; + uint8_t *Cur = Buffer; + MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)ptr); +#endif + return JCE.allocIndirectGV(GV, Buffer, sizeof(Buffer), Alignment); +} + +TargetJITInfo::StubLayout X86JITInfo::getStubLayout() { + // The 64-bit stub contains: + // movabs r10 <- 8-byte-target-address # 10 bytes + // call|jmp *r10 # 3 bytes + // The 32-bit stub contains a 5-byte call|jmp. + // If the stub is a call to the compilation callback, an extra byte is added + // to mark it as a stub. + StubLayout Result = {14, 4}; + return Result; +} + +void *X86JITInfo::emitFunctionStub(const Function* F, void *Target, + JITCodeEmitter &JCE) { + // Note, we cast to intptr_t here to silence a -pedantic warning that + // complains about casting a function pointer to a normal pointer. +#if defined (X86_32_JIT) && !defined (_MSC_VER) + bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback && + Target != (void*)(intptr_t)X86CompilationCallback_SSE); +#else + bool NotCC = Target != (void*)(intptr_t)X86CompilationCallback; +#endif + JCE.emitAlignment(4); + void *Result = (void*)JCE.getCurrentPCValue(); + if (NotCC) { +#if defined (X86_64_JIT) + JCE.emitByte(0x49); // REX prefix + JCE.emitByte(0xB8+2); // movabsq r10 + JCE.emitWordLE((unsigned)(intptr_t)Target); + JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32)); + JCE.emitByte(0x41); // REX prefix + JCE.emitByte(0xFF); // jmpq *r10 + JCE.emitByte(2 | (4 << 3) | (3 << 6)); +#else + JCE.emitByte(0xE9); + JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4); +#endif + return Result; + } + +#if defined (X86_64_JIT) + JCE.emitByte(0x49); // REX prefix + JCE.emitByte(0xB8+2); // movabsq r10 + JCE.emitWordLE((unsigned)(intptr_t)Target); + JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32)); + JCE.emitByte(0x41); // REX prefix + JCE.emitByte(0xFF); // callq *r10 + JCE.emitByte(2 | (2 << 3) | (3 << 6)); +#else + JCE.emitByte(0xE8); // Call with 32 bit pc-rel destination... + + JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4); +#endif + + // This used to use 0xCD, but that value is used by JITMemoryManager to + // initialize the buffer with garbage, which means it may follow a + // noreturn function call, confusing LLVMX86CompilationCallback2. PR 4929. + JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub! + return Result; +} + +/// getPICJumpTableEntry - Returns the value of the jumptable entry for the +/// specific basic block. +uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) { +#if defined(X86_64_JIT) + return BB - Entry; +#else + return BB - PICBase; +#endif +} + +template static void addUnaligned(void *Pos, T Delta) { + T Value; + std::memcpy(reinterpret_cast(&Value), reinterpret_cast(Pos), + sizeof(T)); + Value += Delta; + std::memcpy(reinterpret_cast(Pos), reinterpret_cast(&Value), + sizeof(T)); +} + +/// relocate - Before the JIT can run a block of code that has been emitted, +/// it must rewrite the code to contain the actual addresses of any +/// referenced global symbols. +void X86JITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + void *RelocPos = (char*)Function + MR->getMachineCodeOffset(); + intptr_t ResultPtr = (intptr_t)MR->getResultPointer(); + switch ((X86::RelocationType)MR->getRelocationType()) { + case X86::reloc_pcrel_word: { + // PC relative relocation, add the relocated value to the value already in + // memory, after we adjust it for where the PC is. + ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal(); + addUnaligned(RelocPos, ResultPtr); + break; + } + case X86::reloc_picrel_word: { + // PIC base relative relocation, add the relocated value to the value + // already in memory, after we adjust it for where the PIC base is. + ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal()); + addUnaligned(RelocPos, ResultPtr); + break; + } + case X86::reloc_absolute_word: + case X86::reloc_absolute_word_sext: + // Absolute relocation, just add the relocated value to the value already + // in memory. + addUnaligned(RelocPos, ResultPtr); + break; + case X86::reloc_absolute_dword: + addUnaligned(RelocPos, ResultPtr); + break; + } + } +} + +char* X86JITInfo::allocateThreadLocalMemory(size_t size) { +#if defined(X86_32_JIT) && !defined(__APPLE__) && !defined(_MSC_VER) + TLSOffset -= size; + return TLSOffset; +#else + llvm_unreachable("Cannot allocate thread local storage on this arch!"); +#endif +} diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h new file mode 100644 index 00000000000..564343ffa3f --- /dev/null +++ b/lib/Target/X86/X86JITInfo.h @@ -0,0 +1,79 @@ +//===-- X86JITInfo.h - X86 implementation of the JIT interface --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the X86 implementation of the TargetJITInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef X86JITINFO_H +#define X86JITINFO_H + +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/IR/Function.h" +#include "llvm/Target/TargetJITInfo.h" + +namespace llvm { + class X86Subtarget; + + class X86JITInfo : public TargetJITInfo { + uintptr_t PICBase; + char *TLSOffset; + bool useSSE; + public: + explicit X86JITInfo(bool UseSSE); + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + void replaceMachineCodeForFunction(void *Old, void *New) override; + + /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object + /// to emit an indirect symbol which contains the address of the specified + /// ptr. + void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, + JITCodeEmitter &JCE) override; + + // getStubLayout - Returns the size and alignment of the largest call stub + // on X86. + StubLayout getStubLayout() override; + + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a + /// small native function that simply calls the function at the specified + /// address. + void *emitFunctionStub(const Function* F, void *Target, + JITCodeEmitter &JCE) override; + + /// getPICJumpTableEntry - Returns the value of the jumptable entry for the + /// specific basic block. + uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase) override; + + /// getLazyResolverFunction - Expose the lazy resolver to the JIT. + LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; + + /// relocate - Before the JIT can run a block of code that has been emitted, + /// it must rewrite the code to contain the actual addresses of any + /// referenced global symbols. + void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) override; + + /// allocateThreadLocalMemory - Each target has its own way of + /// handling thread local variables. This method returns a value only + /// meaningful to the target. + char* allocateThreadLocalMemory(size_t size) override; + + /// setPICBase / getPICBase - Getter / setter of PICBase, used to compute + /// PIC jumptable entry. + void setPICBase(uintptr_t Base) { PICBase = Base; } + uintptr_t getPICBase() const { return PICBase; } + }; +} + +#endif diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 3d13c4b59c0..c4caf06c936 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -356,7 +356,8 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, DL(computeDataLayout(*this)), TSInfo(DL), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM), FrameLowering(TargetFrameLowering::StackGrowsDown, getStackAlignment(), - is64Bit() ? -8 : -4) {} + is64Bit() ? -8 : -4), + JITInfo(hasSSE1()) {} bool X86Subtarget::enableEarlyIfConversion() const { return hasCMov() && X86EarlyIfConv; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 45dc0b8ebe2..75e8ae5dc2b 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -17,6 +17,7 @@ #include "X86FrameLowering.h" #include "X86ISelLowering.h" #include "X86InstrInfo.h" +#include "X86JITInfo.h" #include "X86SelectionDAGInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/CallingConv.h" @@ -242,6 +243,7 @@ private: X86InstrInfo InstrInfo; X86TargetLowering TLInfo; X86FrameLowering FrameLowering; + X86JITInfo JITInfo; public: /// This constructor initializes the data members to match that @@ -265,6 +267,7 @@ public: const X86RegisterInfo *getRegisterInfo() const override { return &getInstrInfo()->getRegisterInfo(); } + X86JITInfo *getJITInfo() override { return &JITInfo; } /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 0b1909f95c2..f12140f1f16 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -177,3 +177,10 @@ bool X86PassConfig::addPreEmitPass() { return ShouldPrint; } + +bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, + JITCodeEmitter &JCE) { + PM.add(createX86JITCodeEmitterPass(*this, JCE)); + + return false; +} diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 9de118a205e..633c5710315 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -33,11 +33,17 @@ public: CodeGenOpt::Level OL); const X86Subtarget *getSubtargetImpl() const override { return &Subtarget; } + X86Subtarget *getSubtargetImpl() { + return static_cast(TargetMachine::getSubtargetImpl()); + } + /// \brief Register X86 analysis passes with a pass manager. void addAnalysisPasses(PassManagerBase &PM) override; // Set up the pass pipeline. TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + + bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override; }; } // End llvm namespace diff --git a/test/ExecutionEngine/2002-12-16-ArgTest.ll b/test/ExecutionEngine/2002-12-16-ArgTest.ll index eb2fe8c0483..4c03519a85a 100644 --- a/test/ExecutionEngine/2002-12-16-ArgTest.ll +++ b/test/ExecutionEngine/2002-12-16-ArgTest.ll @@ -1,4 +1,5 @@ ; RUN: %lli %s > /dev/null +; XFAIL: arm @.LC0 = internal global [10 x i8] c"argc: %d\0A\00" ; <[10 x i8]*> [#uses=1] diff --git a/test/ExecutionEngine/2003-01-04-ArgumentBug.ll b/test/ExecutionEngine/2003-01-04-ArgumentBug.ll index 68fdefefa54..3182193453a 100644 --- a/test/ExecutionEngine/2003-01-04-ArgumentBug.ll +++ b/test/ExecutionEngine/2003-01-04-ArgumentBug.ll @@ -1,4 +1,5 @@ ; RUN: %lli %s > /dev/null +; XFAIL: arm define i32 @foo(i32 %X, i32 %Y, double %A) { %cond212 = fcmp une double %A, 1.000000e+00 ; [#uses=1] diff --git a/test/ExecutionEngine/2003-01-04-LoopTest.ll b/test/ExecutionEngine/2003-01-04-LoopTest.ll index 5a0311dd939..3e27e0607ba 100644 --- a/test/ExecutionEngine/2003-01-04-LoopTest.ll +++ b/test/ExecutionEngine/2003-01-04-LoopTest.ll @@ -1,4 +1,5 @@ ; RUN: %lli %s > /dev/null +; XFAIL: arm define i32 @main() { call i32 @mylog( i32 4 ) ; :1 [#uses=0] diff --git a/test/ExecutionEngine/2003-01-15-AlignmentTest.ll b/test/ExecutionEngine/2003-01-15-AlignmentTest.ll index 038d7500101..80e19ba1932 100644 --- a/test/ExecutionEngine/2003-01-15-AlignmentTest.ll +++ b/test/ExecutionEngine/2003-01-15-AlignmentTest.ll @@ -1,4 +1,5 @@ ; RUN: %lli %s > /dev/null +; XFAIL: arm define i32 @bar(i8* %X) { ; pointer should be 4 byte aligned! diff --git a/test/ExecutionEngine/2003-05-06-LivenessClobber.ll b/test/ExecutionEngine/2003-05-06-LivenessClobber.ll index 576ef7cf638..6f61aa68b67 100644 --- a/test/ExecutionEngine/2003-05-06-LivenessClobber.ll +++ b/test/ExecutionEngine/2003-05-06-LivenessClobber.ll @@ -1,6 +1,7 @@ ; This testcase should return with an exit code of 1. ; ; RUN: not %lli %s +; XFAIL: arm @test = global i64 0 ; [#uses=1] diff --git a/test/ExecutionEngine/2003-05-07-ArgumentTest.ll b/test/ExecutionEngine/2003-05-07-ArgumentTest.ll index 42db5fe93fc..236be18d96e 100644 --- a/test/ExecutionEngine/2003-05-07-ArgumentTest.ll +++ b/test/ExecutionEngine/2003-05-07-ArgumentTest.ll @@ -1,4 +1,5 @@ ; RUN: %lli %s test +; XFAIL: arm declare i32 @puts(i8*) diff --git a/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll b/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll index bee409c1441..22dd4ccb44c 100644 --- a/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll +++ b/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll @@ -1,4 +1,5 @@ ; RUN: %lli %s > /dev/null +; XFAIL: arm ; This testcase failed to work because two variable sized allocas confused the ; local register allocator. diff --git a/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll b/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll index 63303fcff7c..60dc3d6b7d4 100644 --- a/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll +++ b/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll @@ -1,4 +1,5 @@ ; RUN: %lli %s > /dev/null +; XFAIL: arm ; ; Regression Test: EnvironmentTest.ll diff --git a/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll b/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll index 8fb1bbbe9d7..04a5e1741bb 100644 --- a/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll +++ b/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll @@ -1,4 +1,5 @@ ; RUN: %lli %s > /dev/null +; XFAIL: arm ; This testcase exposes a bug in the local register allocator where it runs out ; of registers (due to too many overlapping live ranges), but then attempts to diff --git a/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll b/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll index 6513540903e..6e48c60db26 100644 --- a/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll +++ b/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll @@ -1,4 +1,5 @@ ; RUN: %lli %s > /dev/null +; XFAIL: arm @A = global i32 0 ; [#uses=1] diff --git a/test/ExecutionEngine/2005-12-02-TailCallBug.ll b/test/ExecutionEngine/2005-12-02-TailCallBug.ll index 2ac8ad1795d..8523b5e3f5b 100644 --- a/test/ExecutionEngine/2005-12-02-TailCallBug.ll +++ b/test/ExecutionEngine/2005-12-02-TailCallBug.ll @@ -1,5 +1,6 @@ ; PR672 ; RUN: %lli %s +; XFAIL: arm define i32 @main() { %f = bitcast i32 (i32, i32*, i32)* @check_tail to i32* ; [#uses=1] diff --git a/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll b/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll index eb2fe8c0483..babd8f6a780 100644 --- a/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll +++ b/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll @@ -1,4 +1,4 @@ -; RUN: %lli %s > /dev/null +; RUN: %lli_mcjit %s > /dev/null @.LC0 = internal global [10 x i8] c"argc: %d\0A\00" ; <[10 x i8]*> [#uses=1] diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll b/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll index 68fdefefa54..bbb81b88b16 100644 --- a/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll +++ b/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll @@ -1,4 +1,4 @@ -; RUN: %lli %s > /dev/null +; RUN: %lli_mcjit %s > /dev/null define i32 @foo(i32 %X, i32 %Y, double %A) { %cond212 = fcmp une double %A, 1.000000e+00 ; [#uses=1] diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll b/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll index 5a0311dd939..7574267bdcd 100644 --- a/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll +++ b/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll @@ -1,4 +1,4 @@ -; RUN: %lli %s > /dev/null +; RUN: %lli_mcjit %s > /dev/null define i32 @main() { call i32 @mylog( i32 4 ) ; :1 [#uses=0] diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll b/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll index 48576e7c83e..261939ad202 100644 --- a/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll +++ b/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll @@ -1,4 +1,4 @@ -; RUN: %lli %s > /dev/null +; RUN: %lli_mcjit %s > /dev/null define i32 @main() { ;