[llvm.py] Implement disassembler interface
authorGregory Szorc <gregory.szorc@gmail.com>
Sun, 11 Mar 2012 02:32:56 +0000 (02:32 +0000)
committerGregory Szorc <gregory.szorc@gmail.com>
Sun, 11 Mar 2012 02:32:56 +0000 (02:32 +0000)
It doesn't currently support the op info and symbol lookup callbacks,
but it is better than nothing.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@152527 91177308-0d34-0410-b5e6-96231b3b80d8

bindings/python/llvm/disassembler.py [new file with mode: 0644]
bindings/python/llvm/tests/test_disassembler.py [new file with mode: 0644]

diff --git a/bindings/python/llvm/disassembler.py b/bindings/python/llvm/disassembler.py
new file mode 100644 (file)
index 0000000..5030b98
--- /dev/null
@@ -0,0 +1,134 @@
+#===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+from ctypes import CFUNCTYPE
+from ctypes import POINTER
+from ctypes import addressof
+from ctypes import byref
+from ctypes import c_byte
+from ctypes import c_char_p
+from ctypes import c_int
+from ctypes import c_size_t
+from ctypes import c_ubyte
+from ctypes import c_uint64
+from ctypes import c_void_p
+from ctypes import cast
+
+from .common import LLVMObject
+from .common import c_object_p
+from .common import get_library
+
+__all__ = [
+    'Disassembler',
+]
+
+lib = get_library()
+callbacks = {}
+
+class Disassembler(LLVMObject):
+    """Represents a disassembler instance.
+
+    Disassembler instances are tied to specific "triple," which must be defined
+    at creation time.
+
+    Disassembler instances can disassemble instructions from multiple sources.
+    """
+    def __init__(self, triple):
+        """Create a new disassembler instance.
+
+        The triple argument is the triple to create the disassembler for. This
+        is something like 'i386-apple-darwin9'.
+        """
+        ptr = lib.LLVMCreateDisasm(c_char_p(triple), c_void_p(None), c_int(0),
+                callbacks['op_info'](0), callbacks['symbol_lookup'](0))
+        if not ptr.contents:
+            raise Exception('Could not obtain disassembler for triple: %s' %
+                            triple)
+
+        LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisasmDispose)
+
+    def get_instruction(self, source, pc=0):
+        """Obtain the next instruction from an input source.
+
+        The input source should be a str or bytearray or something that
+        represents a sequence of bytes.
+
+        This function will start reading bytes from the beginning of the
+        source.
+
+        The pc argument specifies the address that the first byte is at.
+
+        This returns a 2-tuple of:
+
+          long number of bytes read. 0 if no instruction was read.
+          str representation of instruction. This will be the assembly that
+            represents the instruction.
+        """
+        buf = cast(c_char_p(source), POINTER(c_ubyte))
+        out_str = cast((c_byte * 255)(), c_char_p)
+
+        result = lib.LLVMDisasmInstruction(self, buf, c_uint64(len(source)),
+                                           c_uint64(pc), out_str, 255)
+
+        return (result, out_str.value)
+
+    def get_instructions(self, source, pc=0):
+        """Obtain multiple instructions from an input source.
+
+        This is like get_instruction() except it is a generator for all
+        instructions within the source. It starts at the beginning of the
+        source and reads instructions until no more can be read.
+
+        This generator returns 3-tuple of:
+
+          long address of instruction.
+          long size of instruction, in bytes.
+          str representation of instruction.
+        """
+        source_bytes = c_char_p(source)
+        out_str = cast((c_byte * 255)(), c_char_p)
+
+        # This could probably be written cleaner. But, it does work.
+        buf = cast(source_bytes, POINTER(c_ubyte * len(source))).contents
+        offset = 0
+        address = pc
+        end_address = pc + len(source)
+        while address < end_address:
+            b = cast(addressof(buf) + offset, POINTER(c_ubyte))
+            result = lib.LLVMDisasmInstruction(self, b,
+                    c_uint64(len(source) - offset), c_uint64(address),
+                    out_str, 255)
+
+            if result == 0:
+                break
+
+            yield (address, result, out_str.value)
+
+            address += result
+            offset += result
+
+
+def register_library(library):
+    library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int,
+        callbacks['op_info'], callbacks['symbol_lookup']]
+    library.LLVMCreateDisasm.restype = c_object_p
+
+    library.LLVMDisasmDispose.argtypes = [Disassembler]
+
+    library.LLVMDisasmInstruction.argtypes = [Disassembler, POINTER(c_ubyte),
+            c_uint64, c_uint64, c_char_p, c_size_t]
+    library.LLVMDisasmInstruction.restype = c_size_t
+
+callbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64,
+                                 c_int, c_void_p)
+callbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64,
+                                       POINTER(c_uint64), c_uint64,
+                                       POINTER(c_char_p))
+
+register_library(lib)
diff --git a/bindings/python/llvm/tests/test_disassembler.py b/bindings/python/llvm/tests/test_disassembler.py
new file mode 100644 (file)
index 0000000..545e866
--- /dev/null
@@ -0,0 +1,28 @@
+from .base import TestBase
+
+from ..disassembler import Disassembler
+
+class TestDisassembler(TestBase):
+    def test_instantiate(self):
+         Disassembler('i686-apple-darwin9')
+
+    def test_basic(self):
+        sequence = '\x67\xe3\x81' # jcxz -127
+        triple = 'i686-apple-darwin9'
+
+        disassembler = Disassembler(triple)
+
+        count, s = disassembler.get_instruction(sequence)
+        self.assertEqual(count, 3)
+        self.assertEqual(s, '\tjcxz\t-127')
+
+    def test_get_instructions(self):
+        sequence = '\x67\xe3\x81\x01\xc7' # jcxz -127; addl %eax, %edi
+
+        disassembler = Disassembler('i686-apple-darwin9')
+
+        instructions = list(disassembler.get_instructions(sequence))
+        self.assertEqual(len(instructions), 2)
+
+        self.assertEqual(instructions[0], (0, 3, '\tjcxz\t-127'))
+        self.assertEqual(instructions[1], (3, 2, '\taddl\t%eax, %edi'))