From: Rafael Espindola Date: Wed, 2 Jul 2014 19:49:34 +0000 (+0000) Subject: Move CFG building code to a new lib/MC/MCAnalysis library. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=cca08872b013e4fffbfcef7093de631bcf6b9e0b;p=oota-llvm.git Move CFG building code to a new lib/MC/MCAnalysis library. The new library is 150KB on a Release+Asserts build, so it is quiet a bit of code that regular users of MC don't need to link with now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212209 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/MC/MCAnalysis/MCAtom.h b/include/llvm/MC/MCAnalysis/MCAtom.h new file mode 100644 index 00000000000..e9d0fbacc1e --- /dev/null +++ b/include/llvm/MC/MCAnalysis/MCAtom.h @@ -0,0 +1,199 @@ +//===-- llvm/MC/MCAtom.h ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MCAtom class, which is used to +// represent a contiguous region in a decoded object that is uniformly data or +// instructions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCATOM_H +#define LLVM_MC_MCATOM_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/DataTypes.h" +#include + +namespace llvm { + +class MCModule; + +class MCAtom; +class MCTextAtom; +class MCDataAtom; + +/// \brief Represents a contiguous range of either instructions (a TextAtom) +/// or data (a DataAtom). Address ranges are expressed as _closed_ intervals. +class MCAtom { + virtual void anchor(); +public: + virtual ~MCAtom() {} + + enum AtomKind { TextAtom, DataAtom }; + AtomKind getKind() const { return Kind; } + + /// \brief Get the start address of the atom. + uint64_t getBeginAddr() const { return Begin; } + /// \brief Get the end address, i.e. the last one inside the atom. + uint64_t getEndAddr() const { return End; } + + /// \name Atom modification methods: + /// When modifying a TextAtom, keep instruction boundaries in mind. + /// For instance, split must me given the start address of an instruction. + /// @{ + + /// \brief Splits the atom in two at a given address. + /// \param SplitPt Address at which to start a new atom, splitting this one. + /// \returns The newly created atom starting at \p SplitPt. + virtual MCAtom *split(uint64_t SplitPt) = 0; + + /// \brief Truncates an atom, discarding everything after \p TruncPt. + /// \param TruncPt Last byte address to be contained in this atom. + virtual void truncate(uint64_t TruncPt) = 0; + /// @} + + /// \name Naming: + /// + /// This is mostly for display purposes, and may contain anything that hints + /// at what the atom contains: section or symbol name, BB start address, .. + /// @{ + StringRef getName() const { return Name; } + void setName(StringRef NewName) { Name = NewName.str(); } + /// @} + +protected: + const AtomKind Kind; + std::string Name; + MCModule *Parent; + uint64_t Begin, End; + + friend class MCModule; + MCAtom(AtomKind K, MCModule *P, uint64_t B, uint64_t E) + : Kind(K), Name("(unknown)"), Parent(P), Begin(B), End(E) { } + + /// \name Atom remapping helpers + /// @{ + + /// \brief Remap the atom, using the given range, updating Begin/End. + /// One or both of the bounds can remain the same, but overlapping with other + /// atoms in the module is still forbidden. + void remap(uint64_t NewBegin, uint64_t NewEnd); + + /// \brief Remap the atom to prepare for a truncation at TruncPt. + /// Equivalent to: + /// \code + /// // Bound checks + /// remap(Begin, TruncPt); + /// \endcode + void remapForTruncate(uint64_t TruncPt); + + /// \brief Remap the atom to prepare for a split at SplitPt. + /// The bounds for the resulting atoms are returned in {L,R}{Begin,End}. + /// The current atom is truncated to \p LEnd. + void remapForSplit(uint64_t SplitPt, + uint64_t &LBegin, uint64_t &LEnd, + uint64_t &RBegin, uint64_t &REnd); + /// @} +}; + +/// \name Text atom +/// @{ + +/// \brief An entry in an MCTextAtom: a disassembled instruction. +/// NOTE: Both the Address and Size field are actually redundant when taken in +/// the context of the text atom, and may better be exposed in an iterator +/// instead of stored in the atom, which would replace this class. +class MCDecodedInst { +public: + MCInst Inst; + uint64_t Address; + uint64_t Size; + MCDecodedInst(const MCInst &Inst, uint64_t Address, uint64_t Size) + : Inst(Inst), Address(Address), Size(Size) {} +}; + +/// \brief An atom consisting of disassembled instructions. +class MCTextAtom : public MCAtom { +private: + typedef std::vector InstListTy; + InstListTy Insts; + + /// \brief The address of the next appended instruction, i.e., the + /// address immediately after the last instruction in the atom. + uint64_t NextInstAddress; +public: + /// Append an instruction, expanding the atom if necessary. + void addInst(const MCInst &Inst, uint64_t Size); + + /// \name Instruction list access + /// @{ + typedef InstListTy::const_iterator const_iterator; + const_iterator begin() const { return Insts.begin(); } + const_iterator end() const { return Insts.end(); } + + const MCDecodedInst &back() const { return Insts.back(); } + const MCDecodedInst &at(size_t n) const { return Insts.at(n); } + size_t size() const { return Insts.size(); } + /// @} + + /// \name Atom type specific split/truncate logic. + /// @{ + MCTextAtom *split(uint64_t SplitPt) override; + void truncate(uint64_t TruncPt) override; + /// @} + + // Class hierarchy. + static bool classof(const MCAtom *A) { return A->getKind() == TextAtom; } +private: + friend class MCModule; + // Private constructor - only callable by MCModule + MCTextAtom(MCModule *P, uint64_t Begin, uint64_t End) + : MCAtom(TextAtom, P, Begin, End), NextInstAddress(Begin) {} +}; +/// @} + +/// \name Data atom +/// @{ + +/// \brief An entry in an MCDataAtom. +// NOTE: This may change to a more complex type in the future. +typedef uint8_t MCData; + +/// \brief An atom consising of a sequence of bytes. +class MCDataAtom : public MCAtom { + std::vector Data; + +public: + /// Append a data entry, expanding the atom if necessary. + void addData(const MCData &D); + + /// Get a reference to the data in this atom. + ArrayRef getData() const { return Data; } + + /// \name Atom type specific split/truncate logic. + /// @{ + MCDataAtom *split(uint64_t SplitPt) override; + void truncate(uint64_t TruncPt) override; + /// @} + + // Class hierarchy. + static bool classof(const MCAtom *A) { return A->getKind() == DataAtom; } +private: + friend class MCModule; + // Private constructor - only callable by MCModule + MCDataAtom(MCModule *P, uint64_t Begin, uint64_t End) + : MCAtom(DataAtom, P, Begin, End) { + Data.reserve(End + 1 - Begin); + } +}; + +} + +#endif diff --git a/include/llvm/MC/MCAnalysis/MCFunction.h b/include/llvm/MC/MCAnalysis/MCFunction.h new file mode 100644 index 00000000000..bfa470b5f42 --- /dev/null +++ b/include/llvm/MC/MCAnalysis/MCFunction.h @@ -0,0 +1,142 @@ +//===-- llvm/MC/MCFunction.h ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the data structures to hold a CFG reconstructed from +// machine code. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCFUNCTION_H +#define LLVM_MC_MCFUNCTION_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCInst.h" +#include +#include +#include + +namespace llvm { + +class MCFunction; +class MCModule; +class MCTextAtom; + +/// \brief Basic block containing a sequence of disassembled instructions. +/// The basic block is backed by an MCTextAtom, which holds the instructions, +/// and the address range it covers. +/// Create a basic block using MCFunction::createBlock. +class MCBasicBlock { + const MCTextAtom *Insts; + + // MCFunction owns the basic block. + MCFunction *Parent; + friend class MCFunction; + MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent); + + /// \name Predecessors/Successors, to represent the CFG. + /// @{ + typedef std::vector BasicBlockListTy; + BasicBlockListTy Successors; + BasicBlockListTy Predecessors; + /// @} +public: + + /// \brief Get the backing MCTextAtom, containing the instruction sequence. + const MCTextAtom *getInsts() const { return Insts; } + + /// \name Get the owning MCFunction. + /// @{ + const MCFunction *getParent() const { return Parent; } + MCFunction *getParent() { return Parent; } + /// @} + + /// MC CFG access: Predecessors/Successors. + /// @{ + typedef BasicBlockListTy::const_iterator succ_const_iterator; + succ_const_iterator succ_begin() const { return Successors.begin(); } + succ_const_iterator succ_end() const { return Successors.end(); } + + typedef BasicBlockListTy::const_iterator pred_const_iterator; + pred_const_iterator pred_begin() const { return Predecessors.begin(); } + pred_const_iterator pred_end() const { return Predecessors.end(); } + + void addSuccessor(const MCBasicBlock *MCBB); + bool isSuccessor(const MCBasicBlock *MCBB) const; + + void addPredecessor(const MCBasicBlock *MCBB); + bool isPredecessor(const MCBasicBlock *MCBB) const; + + /// \brief Split block, mirrorring NewAtom = Insts->split(..). + /// This moves all successors to \p SplitBB, and + /// adds a fallthrough to it. + /// \p SplitBB The result of splitting Insts, a basic block directly following + /// this basic block. + void splitBasicBlock(MCBasicBlock *SplitBB); + /// @} +}; + +/// \brief Represents a function in machine code, containing MCBasicBlocks. +/// MCFunctions are created by MCModule. +class MCFunction { + MCFunction (const MCFunction&) LLVM_DELETED_FUNCTION; + MCFunction& operator=(const MCFunction&) LLVM_DELETED_FUNCTION; + + std::string Name; + MCModule *ParentModule; + typedef std::vector> BasicBlockListTy; + BasicBlockListTy Blocks; + + // MCModule owns the function. + friend class MCModule; + MCFunction(StringRef Name, MCModule *Parent); + +public: + /// \brief Create an MCBasicBlock backed by Insts and add it to this function. + /// \param Insts Sequence of straight-line code backing the basic block. + /// \returns The newly created basic block. + MCBasicBlock &createBlock(const MCTextAtom &Insts); + + StringRef getName() const { return Name; } + + /// \name Get the owning MC Module. + /// @{ + const MCModule *getParent() const { return ParentModule; } + MCModule *getParent() { return ParentModule; } + /// @} + + /// \name Access to the function's basic blocks. No ordering is enforced, + /// except that the first block is the entry block. + /// @{ + /// \brief Get the entry point basic block. + const MCBasicBlock *getEntryBlock() const { return front(); } + MCBasicBlock *getEntryBlock() { return front(); } + + bool empty() const { return Blocks.empty(); } + + typedef BasicBlockListTy::const_iterator const_iterator; + typedef BasicBlockListTy:: iterator iterator; + const_iterator begin() const { return Blocks.begin(); } + iterator begin() { return Blocks.begin(); } + const_iterator end() const { return Blocks.end(); } + iterator end() { return Blocks.end(); } + + const MCBasicBlock* front() const { return Blocks.front().get(); } + MCBasicBlock* front() { return Blocks.front().get(); } + const MCBasicBlock* back() const { return Blocks.back().get(); } + MCBasicBlock* back() { return Blocks.back().get(); } + + /// \brief Find the basic block, if any, that starts at \p StartAddr. + const MCBasicBlock *find(uint64_t StartAddr) const; + MCBasicBlock *find(uint64_t StartAddr); + /// @} +}; + +} + +#endif diff --git a/include/llvm/MC/MCAnalysis/MCModule.h b/include/llvm/MC/MCAnalysis/MCModule.h new file mode 100644 index 00000000000..aa389cbb0b2 --- /dev/null +++ b/include/llvm/MC/MCAnalysis/MCModule.h @@ -0,0 +1,134 @@ +//===-- llvm/MC/MCModule.h - MCModule class ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MCModule class, which is used to +// represent a complete, disassembled object file or executable. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCMODULE_H +#define LLVM_MC_MCMODULE_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DataTypes.h" +#include +#include + +namespace llvm { + +class MCAtom; +class MCBasicBlock; +class MCDataAtom; +class MCFunction; +class MCObjectDisassembler; +class MCTextAtom; + +/// \brief A completely disassembled object file or executable. +/// It comprises a list of MCAtom's, each representing a contiguous range of +/// either instructions or data. +/// An MCModule is created using MCObjectDisassembler::buildModule. +class MCModule { + /// \name Atom tracking + /// @{ + + /// \brief Atoms in this module, sorted by begin address. + /// FIXME: This doesn't handle overlapping atoms (which happen when a basic + /// block starts in the middle of an instruction of another basic block.) + typedef std::vector AtomListTy; + AtomListTy Atoms; + + // For access to map/remap. + friend class MCAtom; + + /// \brief Remap \p Atom to the given range, and update its Begin/End fields. + /// \param Atom An atom belonging to this module. + /// An atom should always use this method to update its bounds, because this + /// enables the owning MCModule to keep track of its atoms. + void remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd); + + /// \brief Insert an atom in the module, using its Begin and End addresses. + void map(MCAtom *NewAtom); + /// @} + + /// \name Basic block tracking + /// @{ + typedef std::vector BBsByAtomTy; + BBsByAtomTy BBsByAtom; + + // For access to basic block > atom tracking. + friend class MCBasicBlock; + friend class MCTextAtom; + + /// \brief Keep track of \p BBBackedByAtom as being backed by \p Atom. + /// This is used to update succs/preds when \p Atom is split. + void trackBBForAtom(const MCTextAtom *Atom, MCBasicBlock *BBBackedByAtom); + void splitBasicBlocksForAtom(const MCTextAtom *TA, const MCTextAtom *NewTA); + /// @} + + /// \name Function tracking + /// @{ + typedef std::vector> FunctionListTy; + FunctionListTy Functions; + /// @} + + /// The address of the entrypoint function. + uint64_t Entrypoint; + + MCModule (const MCModule &) LLVM_DELETED_FUNCTION; + MCModule& operator=(const MCModule &) LLVM_DELETED_FUNCTION; + + // MCObjectDisassembler creates MCModules. + friend class MCObjectDisassembler; + +public: + MCModule(); + ~MCModule(); + + /// \name Create a new MCAtom covering the specified offset range. + /// @{ + MCTextAtom *createTextAtom(uint64_t Begin, uint64_t End); + MCDataAtom *createDataAtom(uint64_t Begin, uint64_t End); + /// @} + + /// \name Access to the owned atom list, ordered by begin address. + /// @{ + const MCAtom *findAtomContaining(uint64_t Addr) const; + MCAtom *findAtomContaining(uint64_t Addr); + const MCAtom *findFirstAtomAfter(uint64_t Addr) const; + MCAtom *findFirstAtomAfter(uint64_t Addr); + + typedef AtomListTy::const_iterator const_atom_iterator; + typedef AtomListTy:: iterator atom_iterator; + const_atom_iterator atom_begin() const { return Atoms.begin(); } + atom_iterator atom_begin() { return Atoms.begin(); } + const_atom_iterator atom_end() const { return Atoms.end(); } + atom_iterator atom_end() { return Atoms.end(); } + /// @} + + /// \brief Create a new MCFunction. + MCFunction *createFunction(StringRef Name); + + /// \name Access to the owned function list. + /// @{ + typedef FunctionListTy::const_iterator const_func_iterator; + typedef FunctionListTy:: iterator func_iterator; + const_func_iterator func_begin() const { return Functions.begin(); } + func_iterator func_begin() { return Functions.begin(); } + const_func_iterator func_end() const { return Functions.end(); } + func_iterator func_end() { return Functions.end(); } + /// @} + + /// \brief Get the address of the entrypoint function, or 0 if there is none. + uint64_t getEntrypoint() const { return Entrypoint; } +}; + +} + +#endif diff --git a/include/llvm/MC/MCAnalysis/MCModuleYAML.h b/include/llvm/MC/MCAnalysis/MCModuleYAML.h new file mode 100644 index 00000000000..13b58a7ad64 --- /dev/null +++ b/include/llvm/MC/MCAnalysis/MCModuleYAML.h @@ -0,0 +1,40 @@ +//===- MCModuleYAML.h - MCModule YAMLIO implementation ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file declares classes for handling the YAML representation +/// of MCModule. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCMODULEYAML_H +#define LLVM_MC_MCMODULEYAML_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAnalysis/MCModule.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +class MCInstrInfo; +class MCRegisterInfo; + +/// \brief Dump a YAML representation of the MCModule \p MCM to \p OS. +/// \returns The empty string on success, an error message on failure. +StringRef mcmodule2yaml(raw_ostream &OS, const MCModule &MCM, + const MCInstrInfo &MII, const MCRegisterInfo &MRI); + +/// \brief Creates a new module and returns it in \p MCM. +/// \returns The empty string on success, an error message on failure. +StringRef yaml2mcmodule(std::unique_ptr &MCM, StringRef YamlContent, + const MCInstrInfo &MII, const MCRegisterInfo &MRI); + +} // end namespace llvm + +#endif diff --git a/include/llvm/MC/MCAtom.h b/include/llvm/MC/MCAtom.h deleted file mode 100644 index e9d0fbacc1e..00000000000 --- a/include/llvm/MC/MCAtom.h +++ /dev/null @@ -1,199 +0,0 @@ -//===-- llvm/MC/MCAtom.h ----------------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the MCAtom class, which is used to -// represent a contiguous region in a decoded object that is uniformly data or -// instructions. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_MC_MCATOM_H -#define LLVM_MC_MCATOM_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/MC/MCInst.h" -#include "llvm/Support/DataTypes.h" -#include - -namespace llvm { - -class MCModule; - -class MCAtom; -class MCTextAtom; -class MCDataAtom; - -/// \brief Represents a contiguous range of either instructions (a TextAtom) -/// or data (a DataAtom). Address ranges are expressed as _closed_ intervals. -class MCAtom { - virtual void anchor(); -public: - virtual ~MCAtom() {} - - enum AtomKind { TextAtom, DataAtom }; - AtomKind getKind() const { return Kind; } - - /// \brief Get the start address of the atom. - uint64_t getBeginAddr() const { return Begin; } - /// \brief Get the end address, i.e. the last one inside the atom. - uint64_t getEndAddr() const { return End; } - - /// \name Atom modification methods: - /// When modifying a TextAtom, keep instruction boundaries in mind. - /// For instance, split must me given the start address of an instruction. - /// @{ - - /// \brief Splits the atom in two at a given address. - /// \param SplitPt Address at which to start a new atom, splitting this one. - /// \returns The newly created atom starting at \p SplitPt. - virtual MCAtom *split(uint64_t SplitPt) = 0; - - /// \brief Truncates an atom, discarding everything after \p TruncPt. - /// \param TruncPt Last byte address to be contained in this atom. - virtual void truncate(uint64_t TruncPt) = 0; - /// @} - - /// \name Naming: - /// - /// This is mostly for display purposes, and may contain anything that hints - /// at what the atom contains: section or symbol name, BB start address, .. - /// @{ - StringRef getName() const { return Name; } - void setName(StringRef NewName) { Name = NewName.str(); } - /// @} - -protected: - const AtomKind Kind; - std::string Name; - MCModule *Parent; - uint64_t Begin, End; - - friend class MCModule; - MCAtom(AtomKind K, MCModule *P, uint64_t B, uint64_t E) - : Kind(K), Name("(unknown)"), Parent(P), Begin(B), End(E) { } - - /// \name Atom remapping helpers - /// @{ - - /// \brief Remap the atom, using the given range, updating Begin/End. - /// One or both of the bounds can remain the same, but overlapping with other - /// atoms in the module is still forbidden. - void remap(uint64_t NewBegin, uint64_t NewEnd); - - /// \brief Remap the atom to prepare for a truncation at TruncPt. - /// Equivalent to: - /// \code - /// // Bound checks - /// remap(Begin, TruncPt); - /// \endcode - void remapForTruncate(uint64_t TruncPt); - - /// \brief Remap the atom to prepare for a split at SplitPt. - /// The bounds for the resulting atoms are returned in {L,R}{Begin,End}. - /// The current atom is truncated to \p LEnd. - void remapForSplit(uint64_t SplitPt, - uint64_t &LBegin, uint64_t &LEnd, - uint64_t &RBegin, uint64_t &REnd); - /// @} -}; - -/// \name Text atom -/// @{ - -/// \brief An entry in an MCTextAtom: a disassembled instruction. -/// NOTE: Both the Address and Size field are actually redundant when taken in -/// the context of the text atom, and may better be exposed in an iterator -/// instead of stored in the atom, which would replace this class. -class MCDecodedInst { -public: - MCInst Inst; - uint64_t Address; - uint64_t Size; - MCDecodedInst(const MCInst &Inst, uint64_t Address, uint64_t Size) - : Inst(Inst), Address(Address), Size(Size) {} -}; - -/// \brief An atom consisting of disassembled instructions. -class MCTextAtom : public MCAtom { -private: - typedef std::vector InstListTy; - InstListTy Insts; - - /// \brief The address of the next appended instruction, i.e., the - /// address immediately after the last instruction in the atom. - uint64_t NextInstAddress; -public: - /// Append an instruction, expanding the atom if necessary. - void addInst(const MCInst &Inst, uint64_t Size); - - /// \name Instruction list access - /// @{ - typedef InstListTy::const_iterator const_iterator; - const_iterator begin() const { return Insts.begin(); } - const_iterator end() const { return Insts.end(); } - - const MCDecodedInst &back() const { return Insts.back(); } - const MCDecodedInst &at(size_t n) const { return Insts.at(n); } - size_t size() const { return Insts.size(); } - /// @} - - /// \name Atom type specific split/truncate logic. - /// @{ - MCTextAtom *split(uint64_t SplitPt) override; - void truncate(uint64_t TruncPt) override; - /// @} - - // Class hierarchy. - static bool classof(const MCAtom *A) { return A->getKind() == TextAtom; } -private: - friend class MCModule; - // Private constructor - only callable by MCModule - MCTextAtom(MCModule *P, uint64_t Begin, uint64_t End) - : MCAtom(TextAtom, P, Begin, End), NextInstAddress(Begin) {} -}; -/// @} - -/// \name Data atom -/// @{ - -/// \brief An entry in an MCDataAtom. -// NOTE: This may change to a more complex type in the future. -typedef uint8_t MCData; - -/// \brief An atom consising of a sequence of bytes. -class MCDataAtom : public MCAtom { - std::vector Data; - -public: - /// Append a data entry, expanding the atom if necessary. - void addData(const MCData &D); - - /// Get a reference to the data in this atom. - ArrayRef getData() const { return Data; } - - /// \name Atom type specific split/truncate logic. - /// @{ - MCDataAtom *split(uint64_t SplitPt) override; - void truncate(uint64_t TruncPt) override; - /// @} - - // Class hierarchy. - static bool classof(const MCAtom *A) { return A->getKind() == DataAtom; } -private: - friend class MCModule; - // Private constructor - only callable by MCModule - MCDataAtom(MCModule *P, uint64_t Begin, uint64_t End) - : MCAtom(DataAtom, P, Begin, End) { - Data.reserve(End + 1 - Begin); - } -}; - -} - -#endif diff --git a/include/llvm/MC/MCFunction.h b/include/llvm/MC/MCFunction.h deleted file mode 100644 index bfa470b5f42..00000000000 --- a/include/llvm/MC/MCFunction.h +++ /dev/null @@ -1,142 +0,0 @@ -//===-- llvm/MC/MCFunction.h ------------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the data structures to hold a CFG reconstructed from -// machine code. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_MC_MCFUNCTION_H -#define LLVM_MC_MCFUNCTION_H - -#include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCInst.h" -#include -#include -#include - -namespace llvm { - -class MCFunction; -class MCModule; -class MCTextAtom; - -/// \brief Basic block containing a sequence of disassembled instructions. -/// The basic block is backed by an MCTextAtom, which holds the instructions, -/// and the address range it covers. -/// Create a basic block using MCFunction::createBlock. -class MCBasicBlock { - const MCTextAtom *Insts; - - // MCFunction owns the basic block. - MCFunction *Parent; - friend class MCFunction; - MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent); - - /// \name Predecessors/Successors, to represent the CFG. - /// @{ - typedef std::vector BasicBlockListTy; - BasicBlockListTy Successors; - BasicBlockListTy Predecessors; - /// @} -public: - - /// \brief Get the backing MCTextAtom, containing the instruction sequence. - const MCTextAtom *getInsts() const { return Insts; } - - /// \name Get the owning MCFunction. - /// @{ - const MCFunction *getParent() const { return Parent; } - MCFunction *getParent() { return Parent; } - /// @} - - /// MC CFG access: Predecessors/Successors. - /// @{ - typedef BasicBlockListTy::const_iterator succ_const_iterator; - succ_const_iterator succ_begin() const { return Successors.begin(); } - succ_const_iterator succ_end() const { return Successors.end(); } - - typedef BasicBlockListTy::const_iterator pred_const_iterator; - pred_const_iterator pred_begin() const { return Predecessors.begin(); } - pred_const_iterator pred_end() const { return Predecessors.end(); } - - void addSuccessor(const MCBasicBlock *MCBB); - bool isSuccessor(const MCBasicBlock *MCBB) const; - - void addPredecessor(const MCBasicBlock *MCBB); - bool isPredecessor(const MCBasicBlock *MCBB) const; - - /// \brief Split block, mirrorring NewAtom = Insts->split(..). - /// This moves all successors to \p SplitBB, and - /// adds a fallthrough to it. - /// \p SplitBB The result of splitting Insts, a basic block directly following - /// this basic block. - void splitBasicBlock(MCBasicBlock *SplitBB); - /// @} -}; - -/// \brief Represents a function in machine code, containing MCBasicBlocks. -/// MCFunctions are created by MCModule. -class MCFunction { - MCFunction (const MCFunction&) LLVM_DELETED_FUNCTION; - MCFunction& operator=(const MCFunction&) LLVM_DELETED_FUNCTION; - - std::string Name; - MCModule *ParentModule; - typedef std::vector> BasicBlockListTy; - BasicBlockListTy Blocks; - - // MCModule owns the function. - friend class MCModule; - MCFunction(StringRef Name, MCModule *Parent); - -public: - /// \brief Create an MCBasicBlock backed by Insts and add it to this function. - /// \param Insts Sequence of straight-line code backing the basic block. - /// \returns The newly created basic block. - MCBasicBlock &createBlock(const MCTextAtom &Insts); - - StringRef getName() const { return Name; } - - /// \name Get the owning MC Module. - /// @{ - const MCModule *getParent() const { return ParentModule; } - MCModule *getParent() { return ParentModule; } - /// @} - - /// \name Access to the function's basic blocks. No ordering is enforced, - /// except that the first block is the entry block. - /// @{ - /// \brief Get the entry point basic block. - const MCBasicBlock *getEntryBlock() const { return front(); } - MCBasicBlock *getEntryBlock() { return front(); } - - bool empty() const { return Blocks.empty(); } - - typedef BasicBlockListTy::const_iterator const_iterator; - typedef BasicBlockListTy:: iterator iterator; - const_iterator begin() const { return Blocks.begin(); } - iterator begin() { return Blocks.begin(); } - const_iterator end() const { return Blocks.end(); } - iterator end() { return Blocks.end(); } - - const MCBasicBlock* front() const { return Blocks.front().get(); } - MCBasicBlock* front() { return Blocks.front().get(); } - const MCBasicBlock* back() const { return Blocks.back().get(); } - MCBasicBlock* back() { return Blocks.back().get(); } - - /// \brief Find the basic block, if any, that starts at \p StartAddr. - const MCBasicBlock *find(uint64_t StartAddr) const; - MCBasicBlock *find(uint64_t StartAddr); - /// @} -}; - -} - -#endif diff --git a/include/llvm/MC/MCModule.h b/include/llvm/MC/MCModule.h deleted file mode 100644 index aa389cbb0b2..00000000000 --- a/include/llvm/MC/MCModule.h +++ /dev/null @@ -1,134 +0,0 @@ -//===-- llvm/MC/MCModule.h - MCModule class ---------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the MCModule class, which is used to -// represent a complete, disassembled object file or executable. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_MC_MCMODULE_H -#define LLVM_MC_MCMODULE_H - -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/DataTypes.h" -#include -#include - -namespace llvm { - -class MCAtom; -class MCBasicBlock; -class MCDataAtom; -class MCFunction; -class MCObjectDisassembler; -class MCTextAtom; - -/// \brief A completely disassembled object file or executable. -/// It comprises a list of MCAtom's, each representing a contiguous range of -/// either instructions or data. -/// An MCModule is created using MCObjectDisassembler::buildModule. -class MCModule { - /// \name Atom tracking - /// @{ - - /// \brief Atoms in this module, sorted by begin address. - /// FIXME: This doesn't handle overlapping atoms (which happen when a basic - /// block starts in the middle of an instruction of another basic block.) - typedef std::vector AtomListTy; - AtomListTy Atoms; - - // For access to map/remap. - friend class MCAtom; - - /// \brief Remap \p Atom to the given range, and update its Begin/End fields. - /// \param Atom An atom belonging to this module. - /// An atom should always use this method to update its bounds, because this - /// enables the owning MCModule to keep track of its atoms. - void remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd); - - /// \brief Insert an atom in the module, using its Begin and End addresses. - void map(MCAtom *NewAtom); - /// @} - - /// \name Basic block tracking - /// @{ - typedef std::vector BBsByAtomTy; - BBsByAtomTy BBsByAtom; - - // For access to basic block > atom tracking. - friend class MCBasicBlock; - friend class MCTextAtom; - - /// \brief Keep track of \p BBBackedByAtom as being backed by \p Atom. - /// This is used to update succs/preds when \p Atom is split. - void trackBBForAtom(const MCTextAtom *Atom, MCBasicBlock *BBBackedByAtom); - void splitBasicBlocksForAtom(const MCTextAtom *TA, const MCTextAtom *NewTA); - /// @} - - /// \name Function tracking - /// @{ - typedef std::vector> FunctionListTy; - FunctionListTy Functions; - /// @} - - /// The address of the entrypoint function. - uint64_t Entrypoint; - - MCModule (const MCModule &) LLVM_DELETED_FUNCTION; - MCModule& operator=(const MCModule &) LLVM_DELETED_FUNCTION; - - // MCObjectDisassembler creates MCModules. - friend class MCObjectDisassembler; - -public: - MCModule(); - ~MCModule(); - - /// \name Create a new MCAtom covering the specified offset range. - /// @{ - MCTextAtom *createTextAtom(uint64_t Begin, uint64_t End); - MCDataAtom *createDataAtom(uint64_t Begin, uint64_t End); - /// @} - - /// \name Access to the owned atom list, ordered by begin address. - /// @{ - const MCAtom *findAtomContaining(uint64_t Addr) const; - MCAtom *findAtomContaining(uint64_t Addr); - const MCAtom *findFirstAtomAfter(uint64_t Addr) const; - MCAtom *findFirstAtomAfter(uint64_t Addr); - - typedef AtomListTy::const_iterator const_atom_iterator; - typedef AtomListTy:: iterator atom_iterator; - const_atom_iterator atom_begin() const { return Atoms.begin(); } - atom_iterator atom_begin() { return Atoms.begin(); } - const_atom_iterator atom_end() const { return Atoms.end(); } - atom_iterator atom_end() { return Atoms.end(); } - /// @} - - /// \brief Create a new MCFunction. - MCFunction *createFunction(StringRef Name); - - /// \name Access to the owned function list. - /// @{ - typedef FunctionListTy::const_iterator const_func_iterator; - typedef FunctionListTy:: iterator func_iterator; - const_func_iterator func_begin() const { return Functions.begin(); } - func_iterator func_begin() { return Functions.begin(); } - const_func_iterator func_end() const { return Functions.end(); } - func_iterator func_end() { return Functions.end(); } - /// @} - - /// \brief Get the address of the entrypoint function, or 0 if there is none. - uint64_t getEntrypoint() const { return Entrypoint; } -}; - -} - -#endif diff --git a/include/llvm/MC/MCModuleYAML.h b/include/llvm/MC/MCModuleYAML.h deleted file mode 100644 index c4ae829535c..00000000000 --- a/include/llvm/MC/MCModuleYAML.h +++ /dev/null @@ -1,40 +0,0 @@ -//===- MCModuleYAML.h - MCModule YAMLIO implementation ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// \brief This file declares classes for handling the YAML representation -/// of MCModule. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_MC_MCMODULEYAML_H -#define LLVM_MC_MCMODULEYAML_H - -#include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCModule.h" -#include "llvm/Support/raw_ostream.h" - -namespace llvm { - -class MCInstrInfo; -class MCRegisterInfo; - -/// \brief Dump a YAML representation of the MCModule \p MCM to \p OS. -/// \returns The empty string on success, an error message on failure. -StringRef mcmodule2yaml(raw_ostream &OS, const MCModule &MCM, - const MCInstrInfo &MII, const MCRegisterInfo &MRI); - -/// \brief Creates a new module and returns it in \p MCM. -/// \returns The empty string on success, an error message on failure. -StringRef yaml2mcmodule(std::unique_ptr &MCM, StringRef YamlContent, - const MCInstrInfo &MII, const MCRegisterInfo &MRI); - -} // end namespace llvm - -#endif diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index 78bd8c4ba14..f62894cab39 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -8,7 +8,6 @@ add_llvm_library(LLVMMC MCAsmInfoELF.cpp MCAsmStreamer.cpp MCAssembler.cpp - MCAtom.cpp MCCodeEmitter.cpp MCCodeGenInfo.cpp MCContext.cpp @@ -17,7 +16,6 @@ add_llvm_library(LLVMMC MCELF.cpp MCELFObjectTargetWriter.cpp MCELFStreamer.cpp - MCFunction.cpp MCExpr.cpp MCExternalSymbolizer.cpp MCInst.cpp @@ -27,13 +25,9 @@ add_llvm_library(LLVMMC MCLinkerOptimizationHint.cpp MCMachOStreamer.cpp MCMachObjectTargetWriter.cpp - MCModule.cpp - MCModuleYAML.cpp MCNullStreamer.cpp MCObjectFileInfo.cpp - MCObjectDisassembler.cpp MCObjectStreamer.cpp - MCObjectSymbolizer.cpp MCObjectWriter.cpp MCRegisterInfo.cpp MCRelocationInfo.cpp @@ -54,5 +48,6 @@ add_llvm_library(LLVMMC WinCOFFStreamer.cpp ) +add_subdirectory(MCAnalysis) add_subdirectory(MCParser) add_subdirectory(MCDisassembler) diff --git a/lib/MC/LLVMBuild.txt b/lib/MC/LLVMBuild.txt index f35dbe4d5d3..da9995d6c57 100644 --- a/lib/MC/LLVMBuild.txt +++ b/lib/MC/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = MCDisassembler MCParser +subdirectories = MCAnalysis MCDisassembler MCParser [component_0] type = Library diff --git a/lib/MC/MCAnalysis/CMakeLists.txt b/lib/MC/MCAnalysis/CMakeLists.txt new file mode 100644 index 00000000000..81eae2dfb15 --- /dev/null +++ b/lib/MC/MCAnalysis/CMakeLists.txt @@ -0,0 +1,8 @@ +add_llvm_library(LLVMMCAnalysis + MCAtom.cpp + MCFunction.cpp + MCModule.cpp + MCModuleYAML.cpp + MCObjectDisassembler.cpp + MCObjectSymbolizer.cpp +) diff --git a/lib/MC/MCAnalysis/LLVMBuild.txt b/lib/MC/MCAnalysis/LLVMBuild.txt new file mode 100644 index 00000000000..1b58fec6cc4 --- /dev/null +++ b/lib/MC/MCAnalysis/LLVMBuild.txt @@ -0,0 +1,5 @@ +[component_0] +type = Library +name = MCAnalysis +parent = Libraries +required_libraries = MC Object Support diff --git a/lib/MC/MCAnalysis/MCAtom.cpp b/lib/MC/MCAnalysis/MCAtom.cpp new file mode 100644 index 00000000000..82056eed1ea --- /dev/null +++ b/lib/MC/MCAnalysis/MCAtom.cpp @@ -0,0 +1,114 @@ +//===- lib/MC/MCAtom.cpp - MCAtom implementation --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAnalysis/MCAtom.h" +#include "llvm/MC/MCAnalysis/MCModule.h" +#include "llvm/Support/ErrorHandling.h" +#include + +using namespace llvm; + +// Pin the vtable to this file. +void MCAtom::anchor() {} + +void MCAtom::remap(uint64_t NewBegin, uint64_t NewEnd) { + Parent->remap(this, NewBegin, NewEnd); +} + +void MCAtom::remapForTruncate(uint64_t TruncPt) { + assert((TruncPt >= Begin && TruncPt < End) && + "Truncation point not contained in atom!"); + remap(Begin, TruncPt); +} + +void MCAtom::remapForSplit(uint64_t SplitPt, + uint64_t &LBegin, uint64_t &LEnd, + uint64_t &RBegin, uint64_t &REnd) { + assert((SplitPt > Begin && SplitPt <= End) && + "Splitting at point not contained in atom!"); + + // Compute the new begin/end points. + LBegin = Begin; + LEnd = SplitPt - 1; + RBegin = SplitPt; + REnd = End; + + // Remap this atom to become the lower of the two new ones. + remap(LBegin, LEnd); +} + +// MCDataAtom + +void MCDataAtom::addData(const MCData &D) { + Data.push_back(D); + if (Data.size() > End + 1 - Begin) + remap(Begin, End + 1); +} + +void MCDataAtom::truncate(uint64_t TruncPt) { + remapForTruncate(TruncPt); + + Data.resize(TruncPt - Begin + 1); +} + +MCDataAtom *MCDataAtom::split(uint64_t SplitPt) { + uint64_t LBegin, LEnd, RBegin, REnd; + remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd); + + MCDataAtom *RightAtom = Parent->createDataAtom(RBegin, REnd); + RightAtom->setName(getName()); + + std::vector::iterator I = Data.begin() + (RBegin - LBegin); + assert(I != Data.end() && "Split point not found in range!"); + + std::copy(I, Data.end(), std::back_inserter(RightAtom->Data)); + Data.erase(I, Data.end()); + return RightAtom; +} + +// MCTextAtom + +void MCTextAtom::addInst(const MCInst &I, uint64_t Size) { + if (NextInstAddress + Size - 1 > End) + remap(Begin, NextInstAddress + Size - 1); + Insts.push_back(MCDecodedInst(I, NextInstAddress, Size)); + NextInstAddress += Size; +} + +void MCTextAtom::truncate(uint64_t TruncPt) { + remapForTruncate(TruncPt); + + InstListTy::iterator I = Insts.begin(); + while (I != Insts.end() && I->Address <= TruncPt) ++I; + + assert(I != Insts.end() && "Truncation point not found in disassembly!"); + assert(I->Address == TruncPt + 1 && + "Truncation point does not fall on instruction boundary"); + + Insts.erase(I, Insts.end()); +} + +MCTextAtom *MCTextAtom::split(uint64_t SplitPt) { + uint64_t LBegin, LEnd, RBegin, REnd; + remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd); + + MCTextAtom *RightAtom = Parent->createTextAtom(RBegin, REnd); + RightAtom->setName(getName()); + + InstListTy::iterator I = Insts.begin(); + while (I != Insts.end() && I->Address < SplitPt) ++I; + assert(I != Insts.end() && "Split point not found in disassembly!"); + assert(I->Address == SplitPt && + "Split point does not fall on instruction boundary!"); + + std::copy(I, Insts.end(), std::back_inserter(RightAtom->Insts)); + Insts.erase(I, Insts.end()); + Parent->splitBasicBlocksForAtom(this, RightAtom); + return RightAtom; +} diff --git a/lib/MC/MCAnalysis/MCFunction.cpp b/lib/MC/MCAnalysis/MCFunction.cpp new file mode 100644 index 00000000000..4e09d1a52da --- /dev/null +++ b/lib/MC/MCAnalysis/MCFunction.cpp @@ -0,0 +1,76 @@ +//===-- lib/MC/MCFunction.cpp -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAnalysis/MCFunction.h" +#include "llvm/MC/MCAnalysis/MCAtom.h" +#include "llvm/MC/MCAnalysis/MCModule.h" +#include + +using namespace llvm; + +// MCFunction + +MCFunction::MCFunction(StringRef Name, MCModule *Parent) + : Name(Name), ParentModule(Parent) +{} + +MCBasicBlock &MCFunction::createBlock(const MCTextAtom &TA) { + std::unique_ptr MCBB(new MCBasicBlock(TA, this)); + Blocks.push_back(std::move(MCBB)); + return *Blocks.back(); +} + +MCBasicBlock *MCFunction::find(uint64_t StartAddr) { + for (const_iterator I = begin(), E = end(); I != E; ++I) + if ((*I)->getInsts()->getBeginAddr() == StartAddr) + return I->get(); + return nullptr; +} + +const MCBasicBlock *MCFunction::find(uint64_t StartAddr) const { + return const_cast(this)->find(StartAddr); +} + +// MCBasicBlock + +MCBasicBlock::MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent) + : Insts(&Insts), Parent(Parent) { + getParent()->getParent()->trackBBForAtom(&Insts, this); +} + +void MCBasicBlock::addSuccessor(const MCBasicBlock *MCBB) { + if (!isSuccessor(MCBB)) + Successors.push_back(MCBB); +} + +bool MCBasicBlock::isSuccessor(const MCBasicBlock *MCBB) const { + return std::find(Successors.begin(), Successors.end(), + MCBB) != Successors.end(); +} + +void MCBasicBlock::addPredecessor(const MCBasicBlock *MCBB) { + if (!isPredecessor(MCBB)) + Predecessors.push_back(MCBB); +} + +bool MCBasicBlock::isPredecessor(const MCBasicBlock *MCBB) const { + return std::find(Predecessors.begin(), Predecessors.end(), + MCBB) != Predecessors.end(); +} + +void MCBasicBlock::splitBasicBlock(MCBasicBlock *SplitBB) { + assert(Insts->getEndAddr() + 1 == SplitBB->Insts->getBeginAddr() && + "Splitting unrelated basic blocks!"); + SplitBB->addPredecessor(this); + assert(SplitBB->Successors.empty() && + "Split basic block shouldn't already have successors!"); + SplitBB->Successors = Successors; + Successors.clear(); + addSuccessor(SplitBB); +} diff --git a/lib/MC/MCAnalysis/MCModule.cpp b/lib/MC/MCAnalysis/MCModule.cpp new file mode 100644 index 00000000000..7512299c9e0 --- /dev/null +++ b/lib/MC/MCAnalysis/MCModule.cpp @@ -0,0 +1,142 @@ +//===- lib/MC/MCModule.cpp - MCModule implementation ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAnalysis/MCModule.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCAnalysis/MCAtom.h" +#include "llvm/MC/MCAnalysis/MCFunction.h" +#include + +using namespace llvm; + +static bool AtomComp(const MCAtom *L, uint64_t Addr) { + return L->getEndAddr() < Addr; +} + +static bool AtomCompInv(uint64_t Addr, const MCAtom *R) { + return Addr < R->getEndAddr(); +} + +void MCModule::map(MCAtom *NewAtom) { + uint64_t Begin = NewAtom->Begin; + + assert(Begin <= NewAtom->End && "Creating MCAtom with endpoints reversed?"); + + // Check for atoms already covering this range. + AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(), + Begin, AtomComp); + assert((I == atom_end() || (*I)->getBeginAddr() > NewAtom->End) + && "Offset range already occupied!"); + + // Insert the new atom to the list. + Atoms.insert(I, NewAtom); +} + +MCTextAtom *MCModule::createTextAtom(uint64_t Begin, uint64_t End) { + MCTextAtom *NewAtom = new MCTextAtom(this, Begin, End); + map(NewAtom); + return NewAtom; +} + +MCDataAtom *MCModule::createDataAtom(uint64_t Begin, uint64_t End) { + MCDataAtom *NewAtom = new MCDataAtom(this, Begin, End); + map(NewAtom); + return NewAtom; +} + +// remap - Update the interval mapping for an atom. +void MCModule::remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd) { + // Find and erase the old mapping. + AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(), + Atom->Begin, AtomComp); + assert(I != atom_end() && "Atom offset not found in module!"); + assert(*I == Atom && "Previous atom mapping was invalid!"); + Atoms.erase(I); + + // FIXME: special case NewBegin == Atom->Begin + + // Insert the new mapping. + AtomListTy::iterator NewI = std::lower_bound(atom_begin(), atom_end(), + NewBegin, AtomComp); + assert((NewI == atom_end() || (*NewI)->getBeginAddr() > Atom->End) + && "Offset range already occupied!"); + Atoms.insert(NewI, Atom); + + // Update the atom internal bounds. + Atom->Begin = NewBegin; + Atom->End = NewEnd; +} + +const MCAtom *MCModule::findAtomContaining(uint64_t Addr) const { + AtomListTy::const_iterator I = std::lower_bound(atom_begin(), atom_end(), + Addr, AtomComp); + if (I != atom_end() && (*I)->getBeginAddr() <= Addr) + return *I; + return nullptr; +} + +MCAtom *MCModule::findAtomContaining(uint64_t Addr) { + return const_cast( + const_cast(this)->findAtomContaining(Addr)); +} + +const MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) const { + AtomListTy::const_iterator I = std::upper_bound(atom_begin(), atom_end(), + Addr, AtomCompInv); + if (I != atom_end()) + return *I; + return nullptr; +} + +MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) { + return const_cast( + const_cast(this)->findFirstAtomAfter(Addr)); +} + +MCFunction *MCModule::createFunction(StringRef Name) { + std::unique_ptr MCF(new MCFunction(Name, this)); + Functions.push_back(std::move(MCF)); + return Functions.back().get(); +} + +static bool CompBBToAtom(MCBasicBlock *BB, const MCTextAtom *Atom) { + return BB->getInsts() < Atom; +} + +void MCModule::splitBasicBlocksForAtom(const MCTextAtom *TA, + const MCTextAtom *NewTA) { + BBsByAtomTy::iterator + I = std::lower_bound(BBsByAtom.begin(), BBsByAtom.end(), + TA, CompBBToAtom); + for (; I != BBsByAtom.end() && (*I)->getInsts() == TA; ++I) { + MCBasicBlock *BB = *I; + MCBasicBlock *NewBB = &BB->getParent()->createBlock(*NewTA); + BB->splitBasicBlock(NewBB); + } +} + +void MCModule::trackBBForAtom(const MCTextAtom *Atom, MCBasicBlock *BB) { + assert(Atom == BB->getInsts() && "Text atom doesn't back the basic block!"); + BBsByAtomTy::iterator I = std::lower_bound(BBsByAtom.begin(), + BBsByAtom.end(), + Atom, CompBBToAtom); + for (; I != BBsByAtom.end() && (*I)->getInsts() == Atom; ++I) + if (*I == BB) + return; + BBsByAtom.insert(I, BB); +} + +MCModule::MCModule() : Entrypoint(0) { } + +MCModule::~MCModule() { + for (AtomListTy::iterator AI = atom_begin(), + AE = atom_end(); + AI != AE; ++AI) + delete *AI; +} diff --git a/lib/MC/MCAnalysis/MCModuleYAML.cpp b/lib/MC/MCAnalysis/MCModuleYAML.cpp new file mode 100644 index 00000000000..c51c62e928d --- /dev/null +++ b/lib/MC/MCAnalysis/MCModuleYAML.cpp @@ -0,0 +1,464 @@ +//===- MCModuleYAML.cpp - MCModule YAMLIO implementation ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of MCModule. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAnalysis/MCModuleYAML.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/MC/MCAnalysis/MCAtom.h" +#include "llvm/MC/MCAnalysis/MCFunction.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Object/YAML.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/YAMLTraits.h" +#include + +namespace llvm { + +namespace { + +// This class is used to map opcode and register names to enum values. +// +// There are at least 3 obvious ways to do this: +// 1- Generate an MII/MRI method using a tablegen StringMatcher +// 2- Write an MII/MRI method using std::lower_bound and the assumption that +// the enums are sorted (starting at a fixed value). +// 3- Do the matching manually as is done here. +// +// Why 3? +// 1- A StringMatcher function for thousands of entries would incur +// a non-negligible binary size overhead. +// 2- The lower_bound comparators would be somewhat involved and aren't +// obviously reusable (see LessRecordRegister in llvm/TableGen/Record.h) +// 3- This isn't actually something useful outside tests (but the same argument +// can be made against having {MII,MRI}::getName). +// +// If this becomes useful outside this specific situation, feel free to do +// the Right Thing (tm) and move the functionality to MII/MRI. +// +class InstrRegInfoHolder { + typedef StringMap EnumValByNameTy; + EnumValByNameTy InstEnumValueByName; + EnumValByNameTy RegEnumValueByName; + +public: + const MCInstrInfo &MII; + const MCRegisterInfo &MRI; + InstrRegInfoHolder(const MCInstrInfo &MII, const MCRegisterInfo &MRI) + : InstEnumValueByName(NextPowerOf2(MII.getNumOpcodes())), + RegEnumValueByName(NextPowerOf2(MRI.getNumRegs())), MII(MII), MRI(MRI) { + for (int i = 0, e = MII.getNumOpcodes(); i != e; ++i) + InstEnumValueByName[MII.getName(i)] = i; + for (int i = 0, e = MRI.getNumRegs(); i != e; ++i) + RegEnumValueByName[MRI.getName(i)] = i; + } + + bool matchRegister(StringRef Name, unsigned &Reg) { + EnumValByNameTy::const_iterator It = RegEnumValueByName.find(Name); + if (It == RegEnumValueByName.end()) + return false; + Reg = It->getValue(); + return true; + } + bool matchOpcode(StringRef Name, unsigned &Opc) { + EnumValByNameTy::const_iterator It = InstEnumValueByName.find(Name); + if (It == InstEnumValueByName.end()) + return false; + Opc = It->getValue(); + return true; + } +}; + +} // end unnamed namespace + +namespace MCModuleYAML { + +LLVM_YAML_STRONG_TYPEDEF(unsigned, OpcodeEnum) + +struct Operand { + MCOperand MCOp; +}; + +struct Inst { + OpcodeEnum Opcode; + std::vector Operands; + uint64_t Size; +}; + +struct Atom { + MCAtom::AtomKind Type; + yaml::Hex64 StartAddress; + uint64_t Size; + + std::vector Insts; + object::yaml::BinaryRef Data; +}; + +struct BasicBlock { + yaml::Hex64 Address; + std::vector Preds; + std::vector Succs; +}; + +struct Function { + StringRef Name; + std::vector BasicBlocks; +}; + +struct Module { + std::vector Atoms; + std::vector Functions; +}; + +} // end namespace MCModuleYAML +} // end namespace llvm + +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::Hex64) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::MCModuleYAML::Operand) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Inst) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Atom) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::BasicBlock) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Function) + +namespace llvm { + +namespace yaml { + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &IO, MCAtom::AtomKind &Kind); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, MCModuleYAML::Atom &A); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, MCModuleYAML::Inst &I); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, MCModuleYAML::BasicBlock &BB); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, MCModuleYAML::Function &Fn); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, MCModuleYAML::Module &M); +}; + +template <> struct ScalarTraits { + static void output(const MCModuleYAML::Operand &, void *, + llvm::raw_ostream &); + static StringRef input(StringRef, void *, MCModuleYAML::Operand &); + static bool mustQuote(StringRef) { return false; } +}; + +template <> struct ScalarTraits { + static void output(const MCModuleYAML::OpcodeEnum &, void *, + llvm::raw_ostream &); + static StringRef input(StringRef, void *, MCModuleYAML::OpcodeEnum &); + static bool mustQuote(StringRef) { return false; } +}; + +void ScalarEnumerationTraits::enumeration( + IO &IO, MCAtom::AtomKind &Value) { + IO.enumCase(Value, "Text", MCAtom::TextAtom); + IO.enumCase(Value, "Data", MCAtom::DataAtom); +} + +void MappingTraits::mapping(IO &IO, MCModuleYAML::Atom &A) { + IO.mapRequired("StartAddress", A.StartAddress); + IO.mapRequired("Size", A.Size); + IO.mapRequired("Type", A.Type); + if (A.Type == MCAtom::TextAtom) + IO.mapRequired("Content", A.Insts); + else if (A.Type == MCAtom::DataAtom) + IO.mapRequired("Content", A.Data); +} + +void MappingTraits::mapping(IO &IO, MCModuleYAML::Inst &I) { + IO.mapRequired("Inst", I.Opcode); + IO.mapRequired("Size", I.Size); + IO.mapRequired("Ops", I.Operands); +} + +void +MappingTraits::mapping(IO &IO, + MCModuleYAML::BasicBlock &BB) { + IO.mapRequired("Address", BB.Address); + IO.mapRequired("Preds", BB.Preds); + IO.mapRequired("Succs", BB.Succs); +} + +void MappingTraits::mapping(IO &IO, + MCModuleYAML::Function &F) { + IO.mapRequired("Name", F.Name); + IO.mapRequired("BasicBlocks", F.BasicBlocks); +} + +void MappingTraits::mapping(IO &IO, + MCModuleYAML::Module &M) { + IO.mapRequired("Atoms", M.Atoms); + IO.mapOptional("Functions", M.Functions); +} + +void +ScalarTraits::output(const MCModuleYAML::Operand &Val, + void *Ctx, raw_ostream &Out) { + InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; + + // FIXME: Doesn't support FPImm and expr/inst, but do these make sense? + if (Val.MCOp.isImm()) + Out << "I" << Val.MCOp.getImm(); + else if (Val.MCOp.isReg()) + Out << "R" << IRI->MRI.getName(Val.MCOp.getReg()); + else + llvm_unreachable("Trying to output invalid MCOperand!"); +} + +StringRef +ScalarTraits::input(StringRef Scalar, void *Ctx, + MCModuleYAML::Operand &Val) { + InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; + char Type = 0; + if (Scalar.size() >= 1) + Type = Scalar.front(); + if (Type != 'R' && Type != 'I') + return "Operand must start with 'R' (register) or 'I' (immediate)."; + if (Type == 'R') { + unsigned Reg; + if (!IRI->matchRegister(Scalar.substr(1), Reg)) + return "Invalid register name."; + Val.MCOp = MCOperand::CreateReg(Reg); + } else if (Type == 'I') { + int64_t RIVal; + if (Scalar.substr(1).getAsInteger(10, RIVal)) + return "Invalid immediate value."; + Val.MCOp = MCOperand::CreateImm(RIVal); + } else { + Val.MCOp = MCOperand(); + } + return StringRef(); +} + +void ScalarTraits::output( + const MCModuleYAML::OpcodeEnum &Val, void *Ctx, raw_ostream &Out) { + InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; + Out << IRI->MII.getName(Val); +} + +StringRef +ScalarTraits::input(StringRef Scalar, void *Ctx, + MCModuleYAML::OpcodeEnum &Val) { + InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; + unsigned Opc; + if (!IRI->matchOpcode(Scalar, Opc)) + return "Invalid instruction opcode."; + Val = Opc; + return ""; +} + +} // end namespace yaml + +namespace { + +class MCModule2YAML { + const MCModule &MCM; + MCModuleYAML::Module YAMLModule; + void dumpAtom(const MCAtom *MCA); + void dumpFunction(const MCFunction &MCF); + void dumpBasicBlock(const MCBasicBlock *MCBB); + +public: + MCModule2YAML(const MCModule &MCM); + MCModuleYAML::Module &getYAMLModule(); +}; + +class YAML2MCModule { + MCModule &MCM; + +public: + YAML2MCModule(MCModule &MCM); + StringRef parse(const MCModuleYAML::Module &YAMLModule); +}; + +} // end unnamed namespace + +MCModule2YAML::MCModule2YAML(const MCModule &MCM) : MCM(MCM), YAMLModule() { + for (MCModule::const_atom_iterator AI = MCM.atom_begin(), AE = MCM.atom_end(); + AI != AE; ++AI) + dumpAtom(*AI); + for (MCModule::const_func_iterator FI = MCM.func_begin(), FE = MCM.func_end(); + FI != FE; ++FI) + dumpFunction(**FI); +} + +void MCModule2YAML::dumpAtom(const MCAtom *MCA) { + YAMLModule.Atoms.resize(YAMLModule.Atoms.size() + 1); + MCModuleYAML::Atom &A = YAMLModule.Atoms.back(); + A.Type = MCA->getKind(); + A.StartAddress = MCA->getBeginAddr(); + A.Size = MCA->getEndAddr() - MCA->getBeginAddr() + 1; + if (const MCTextAtom *TA = dyn_cast(MCA)) { + const size_t InstCount = TA->size(); + A.Insts.resize(InstCount); + for (size_t i = 0; i != InstCount; ++i) { + const MCDecodedInst &MCDI = TA->at(i); + A.Insts[i].Opcode = MCDI.Inst.getOpcode(); + A.Insts[i].Size = MCDI.Size; + const unsigned OpCount = MCDI.Inst.getNumOperands(); + A.Insts[i].Operands.resize(OpCount); + for (unsigned oi = 0; oi != OpCount; ++oi) + A.Insts[i].Operands[oi].MCOp = MCDI.Inst.getOperand(oi); + } + } else if (const MCDataAtom *DA = dyn_cast(MCA)) { + A.Data = DA->getData(); + } else { + llvm_unreachable("Unknown atom type."); + } +} + +void MCModule2YAML::dumpFunction(const MCFunction &MCF) { + YAMLModule.Functions.resize(YAMLModule.Functions.size() + 1); + MCModuleYAML::Function &F = YAMLModule.Functions.back(); + F.Name = MCF.getName(); + for (MCFunction::const_iterator BBI = MCF.begin(), BBE = MCF.end(); + BBI != BBE; ++BBI) { + const MCBasicBlock &MCBB = **BBI; + F.BasicBlocks.resize(F.BasicBlocks.size() + 1); + MCModuleYAML::BasicBlock &BB = F.BasicBlocks.back(); + BB.Address = MCBB.getInsts()->getBeginAddr(); + for (MCBasicBlock::pred_const_iterator PI = MCBB.pred_begin(), + PE = MCBB.pred_end(); + PI != PE; ++PI) + BB.Preds.push_back((*PI)->getInsts()->getBeginAddr()); + for (MCBasicBlock::succ_const_iterator SI = MCBB.succ_begin(), + SE = MCBB.succ_end(); + SI != SE; ++SI) + BB.Succs.push_back((*SI)->getInsts()->getBeginAddr()); + } +} + +MCModuleYAML::Module &MCModule2YAML::getYAMLModule() { return YAMLModule; } + +YAML2MCModule::YAML2MCModule(MCModule &MCM) : MCM(MCM) {} + +StringRef YAML2MCModule::parse(const MCModuleYAML::Module &YAMLModule) { + typedef std::vector::const_iterator AtomIt; + typedef std::vector::const_iterator InstIt; + typedef std::vector::const_iterator OpIt; + + typedef DenseMap AddrToTextAtomTy; + AddrToTextAtomTy TAByAddr; + + for (AtomIt AI = YAMLModule.Atoms.begin(), AE = YAMLModule.Atoms.end(); + AI != AE; ++AI) { + uint64_t StartAddress = AI->StartAddress; + if (AI->Size == 0) + return "Atoms can't be empty!"; + uint64_t EndAddress = StartAddress + AI->Size - 1; + switch (AI->Type) { + case MCAtom::TextAtom: { + MCTextAtom *TA = MCM.createTextAtom(StartAddress, EndAddress); + TAByAddr[StartAddress] = TA; + for (InstIt II = AI->Insts.begin(), IE = AI->Insts.end(); II != IE; + ++II) { + MCInst MI; + MI.setOpcode(II->Opcode); + for (OpIt OI = II->Operands.begin(), OE = II->Operands.end(); OI != OE; + ++OI) + MI.addOperand(OI->MCOp); + TA->addInst(MI, II->Size); + } + break; + } + case MCAtom::DataAtom: { + MCDataAtom *DA = MCM.createDataAtom(StartAddress, EndAddress); + SmallVector Data; + raw_svector_ostream OS(Data); + AI->Data.writeAsBinary(OS); + OS.flush(); + for (size_t i = 0, e = Data.size(); i != e; ++i) + DA->addData((uint8_t)Data[i]); + break; + } + } + } + + typedef std::vector::const_iterator FuncIt; + typedef std::vector::const_iterator BBIt; + typedef std::vector::const_iterator AddrIt; + for (FuncIt FI = YAMLModule.Functions.begin(), + FE = YAMLModule.Functions.end(); + FI != FE; ++FI) { + MCFunction *MCFN = MCM.createFunction(FI->Name); + for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end(); + BBI != BBE; ++BBI) { + AddrToTextAtomTy::const_iterator It = TAByAddr.find(BBI->Address); + if (It == TAByAddr.end()) + return "Basic block start address doesn't match any text atom!"; + MCFN->createBlock(*It->second); + } + for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end(); + BBI != BBE; ++BBI) { + MCBasicBlock *MCBB = MCFN->find(BBI->Address); + if (!MCBB) + return "Couldn't find matching basic block in function."; + for (AddrIt PI = BBI->Preds.begin(), PE = BBI->Preds.end(); PI != PE; + ++PI) { + MCBasicBlock *Pred = MCFN->find(*PI); + if (!Pred) + return "Couldn't find predecessor basic block."; + MCBB->addPredecessor(Pred); + } + for (AddrIt SI = BBI->Succs.begin(), SE = BBI->Succs.end(); SI != SE; + ++SI) { + MCBasicBlock *Succ = MCFN->find(*SI); + if (!Succ) + return "Couldn't find predecessor basic block."; + MCBB->addSuccessor(Succ); + } + } + } + return ""; +} + +StringRef mcmodule2yaml(raw_ostream &OS, const MCModule &MCM, + const MCInstrInfo &MII, const MCRegisterInfo &MRI) { + MCModule2YAML Dumper(MCM); + InstrRegInfoHolder IRI(MII, MRI); + yaml::Output YOut(OS, (void *)&IRI); + YOut << Dumper.getYAMLModule(); + return ""; +} + +StringRef yaml2mcmodule(std::unique_ptr &MCM, StringRef YamlContent, + const MCInstrInfo &MII, const MCRegisterInfo &MRI) { + MCM.reset(new MCModule); + YAML2MCModule Parser(*MCM); + MCModuleYAML::Module YAMLModule; + InstrRegInfoHolder IRI(MII, MRI); + yaml::Input YIn(YamlContent, (void *)&IRI); + YIn >> YAMLModule; + if (std::error_code ec = YIn.error()) + return ec.message(); + StringRef err = Parser.parse(YAMLModule); + if (!err.empty()) + return err; + return ""; +} + +} // end namespace llvm diff --git a/lib/MC/MCAnalysis/MCObjectDisassembler.cpp b/lib/MC/MCAnalysis/MCObjectDisassembler.cpp new file mode 100644 index 00000000000..0f789ff040f --- /dev/null +++ b/lib/MC/MCAnalysis/MCObjectDisassembler.cpp @@ -0,0 +1,574 @@ +//===- lib/MC/MCObjectDisassembler.cpp ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCObjectDisassembler.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAnalysis/MCAtom.h" +#include "llvm/MC/MCAnalysis/MCFunction.h" +#include "llvm/MC/MCAnalysis/MCModule.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCObjectSymbolizer.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/StringRefMemoryObject.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; +using namespace object; + +#define DEBUG_TYPE "mc" + +MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj, + const MCDisassembler &Dis, + const MCInstrAnalysis &MIA) + : Obj(Obj), Dis(Dis), MIA(MIA), MOS(nullptr) {} + +uint64_t MCObjectDisassembler::getEntrypoint() { + for (const SymbolRef &Symbol : Obj.symbols()) { + StringRef Name; + Symbol.getName(Name); + if (Name == "main" || Name == "_main") { + uint64_t Entrypoint; + Symbol.getAddress(Entrypoint); + return getEffectiveLoadAddr(Entrypoint); + } + } + return 0; +} + +ArrayRef MCObjectDisassembler::getStaticInitFunctions() { + return ArrayRef(); +} + +ArrayRef MCObjectDisassembler::getStaticExitFunctions() { + return ArrayRef(); +} + +MemoryObject *MCObjectDisassembler::getRegionFor(uint64_t Addr) { + // FIXME: Keep track of object sections. + return FallbackRegion.get(); +} + +uint64_t MCObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) { + return Addr; +} + +uint64_t MCObjectDisassembler::getOriginalLoadAddr(uint64_t Addr) { + return Addr; +} + +MCModule *MCObjectDisassembler::buildEmptyModule() { + MCModule *Module = new MCModule; + Module->Entrypoint = getEntrypoint(); + return Module; +} + +MCModule *MCObjectDisassembler::buildModule(bool withCFG) { + MCModule *Module = buildEmptyModule(); + + buildSectionAtoms(Module); + if (withCFG) + buildCFG(Module); + return Module; +} + +void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) { + for (const SectionRef &Section : Obj.sections()) { + bool isText; + Section.isText(isText); + bool isData; + Section.isData(isData); + if (!isData && !isText) + continue; + + uint64_t StartAddr; + Section.getAddress(StartAddr); + uint64_t SecSize; + Section.getSize(SecSize); + if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize) + continue; + StartAddr = getEffectiveLoadAddr(StartAddr); + + StringRef Contents; + Section.getContents(Contents); + StringRefMemoryObject memoryObject(Contents, StartAddr); + + // We don't care about things like non-file-backed sections yet. + if (Contents.size() != SecSize || !SecSize) + continue; + uint64_t EndAddr = StartAddr + SecSize - 1; + + StringRef SecName; + Section.getName(SecName); + + if (isText) { + MCTextAtom *Text = nullptr; + MCDataAtom *InvalidData = nullptr; + + uint64_t InstSize; + for (uint64_t Index = 0; Index < SecSize; Index += InstSize) { + const uint64_t CurAddr = StartAddr + Index; + MCInst Inst; + if (Dis.getInstruction(Inst, InstSize, memoryObject, CurAddr, nulls(), + nulls())) { + if (!Text) { + Text = Module->createTextAtom(CurAddr, CurAddr); + Text->setName(SecName); + } + Text->addInst(Inst, InstSize); + InvalidData = nullptr; + } else { + assert(InstSize && "getInstruction() consumed no bytes"); + if (!InvalidData) { + Text = nullptr; + InvalidData = Module->createDataAtom(CurAddr, CurAddr+InstSize - 1); + } + for (uint64_t I = 0; I < InstSize; ++I) + InvalidData->addData(Contents[Index+I]); + } + } + } else { + MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr); + Data->setName(SecName); + for (uint64_t Index = 0; Index < SecSize; ++Index) + Data->addData(Contents[Index]); + } + } +} + +namespace { + struct BBInfo; + typedef SmallPtrSet BBInfoSetTy; + + struct BBInfo { + MCTextAtom *Atom; + MCBasicBlock *BB; + BBInfoSetTy Succs; + BBInfoSetTy Preds; + MCObjectDisassembler::AddressSetTy SuccAddrs; + + BBInfo() : Atom(nullptr), BB(nullptr) {} + + void addSucc(BBInfo &Succ) { + Succs.insert(&Succ); + Succ.Preds.insert(this); + } + }; +} + +static void RemoveDupsFromAddressVector(MCObjectDisassembler::AddressSetTy &V) { + std::sort(V.begin(), V.end()); + V.erase(std::unique(V.begin(), V.end()), V.end()); +} + +void MCObjectDisassembler::buildCFG(MCModule *Module) { + typedef std::map BBInfoByAddrTy; + BBInfoByAddrTy BBInfos; + AddressSetTy Splits; + AddressSetTy Calls; + + for (const SymbolRef &Symbol : Obj.symbols()) { + SymbolRef::Type SymType; + Symbol.getType(SymType); + if (SymType == SymbolRef::ST_Function) { + uint64_t SymAddr; + Symbol.getAddress(SymAddr); + SymAddr = getEffectiveLoadAddr(SymAddr); + Calls.push_back(SymAddr); + Splits.push_back(SymAddr); + } + } + + assert(Module->func_begin() == Module->func_end() + && "Module already has a CFG!"); + + // First, determine the basic block boundaries and call targets. + for (MCModule::atom_iterator AI = Module->atom_begin(), + AE = Module->atom_end(); + AI != AE; ++AI) { + MCTextAtom *TA = dyn_cast(*AI); + if (!TA) continue; + Calls.push_back(TA->getBeginAddr()); + BBInfos[TA->getBeginAddr()].Atom = TA; + for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end(); + II != IE; ++II) { + if (MIA.isTerminator(II->Inst)) + Splits.push_back(II->Address + II->Size); + uint64_t Target; + if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) { + if (MIA.isCall(II->Inst)) + Calls.push_back(Target); + Splits.push_back(Target); + } + } + } + + RemoveDupsFromAddressVector(Splits); + RemoveDupsFromAddressVector(Calls); + + // Split text atoms into basic block atoms. + for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end(); + SI != SE; ++SI) { + MCAtom *A = Module->findAtomContaining(*SI); + if (!A) continue; + MCTextAtom *TA = cast(A); + if (TA->getBeginAddr() == *SI) + continue; + MCTextAtom *NewAtom = TA->split(*SI); + BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom; + StringRef BBName = TA->getName(); + BBName = BBName.substr(0, BBName.find_last_of(':')); + NewAtom->setName((BBName + ":" + utohexstr(*SI)).str()); + } + + // Compute succs/preds. + for (MCModule::atom_iterator AI = Module->atom_begin(), + AE = Module->atom_end(); + AI != AE; ++AI) { + MCTextAtom *TA = dyn_cast(*AI); + if (!TA) continue; + BBInfo &CurBB = BBInfos[TA->getBeginAddr()]; + const MCDecodedInst &LI = TA->back(); + if (MIA.isBranch(LI.Inst)) { + uint64_t Target; + if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target)) + CurBB.addSucc(BBInfos[Target]); + if (MIA.isConditionalBranch(LI.Inst)) + CurBB.addSucc(BBInfos[LI.Address + LI.Size]); + } else if (!MIA.isTerminator(LI.Inst)) + CurBB.addSucc(BBInfos[LI.Address + LI.Size]); + } + + + // Create functions and basic blocks. + for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end(); + CI != CE; ++CI) { + BBInfo &BBI = BBInfos[*CI]; + if (!BBI.Atom) continue; + + MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName()); + + // Create MCBBs. + SmallSetVector Worklist; + Worklist.insert(&BBI); + for (size_t wi = 0; wi < Worklist.size(); ++wi) { + BBInfo *BBI = Worklist[wi]; + if (!BBI->Atom) + continue; + BBI->BB = &MCFN.createBlock(*BBI->Atom); + // Add all predecessors and successors to the worklist. + for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end(); + SI != SE; ++SI) + Worklist.insert(*SI); + for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end(); + PI != PE; ++PI) + Worklist.insert(*PI); + } + + // Set preds/succs. + for (size_t wi = 0; wi < Worklist.size(); ++wi) { + BBInfo *BBI = Worklist[wi]; + MCBasicBlock *MCBB = BBI->BB; + if (!MCBB) + continue; + for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end(); + SI != SE; ++SI) + if ((*SI)->BB) + MCBB->addSuccessor((*SI)->BB); + for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end(); + PI != PE; ++PI) + if ((*PI)->BB) + MCBB->addPredecessor((*PI)->BB); + } + } +} + +// Basic idea of the disassembly + discovery: +// +// start with the wanted address, insert it in the worklist +// while worklist not empty, take next address in the worklist: +// - check if atom exists there +// - if middle of atom: +// - split basic blocks referencing the atom +// - look for an already encountered BBInfo (using a map) +// - if there is, split it (new one, fallthrough, move succs, etc..) +// - if start of atom: nothing else to do +// - if no atom: create new atom and new bbinfo +// - look at the last instruction in the atom, add succs to worklist +// for all elements in the worklist: +// - create basic block, update preds/succs, etc.. +// +MCBasicBlock *MCObjectDisassembler::getBBAt(MCModule *Module, MCFunction *MCFN, + uint64_t BBBeginAddr, + AddressSetTy &CallTargets, + AddressSetTy &TailCallTargets) { + typedef std::map BBInfoByAddrTy; + typedef SmallSetVector AddrWorklistTy; + BBInfoByAddrTy BBInfos; + AddrWorklistTy Worklist; + + Worklist.insert(BBBeginAddr); + for (size_t wi = 0; wi < Worklist.size(); ++wi) { + const uint64_t BeginAddr = Worklist[wi]; + BBInfo *BBI = &BBInfos[BeginAddr]; + + MCTextAtom *&TA = BBI->Atom; + assert(!TA && "Discovered basic block already has an associated atom!"); + + // Look for an atom at BeginAddr. + if (MCAtom *A = Module->findAtomContaining(BeginAddr)) { + // FIXME: We don't care about mixed atoms, see above. + TA = cast(A); + + // The found atom doesn't begin at BeginAddr, we have to split it. + if (TA->getBeginAddr() != BeginAddr) { + // FIXME: Handle overlapping atoms: middle-starting instructions, etc.. + MCTextAtom *NewTA = TA->split(BeginAddr); + + // Look for an already encountered basic block that needs splitting + BBInfoByAddrTy::iterator It = BBInfos.find(TA->getBeginAddr()); + if (It != BBInfos.end() && It->second.Atom) { + BBI->SuccAddrs = It->second.SuccAddrs; + It->second.SuccAddrs.clear(); + It->second.SuccAddrs.push_back(BeginAddr); + } + TA = NewTA; + } + BBI->Atom = TA; + } else { + // If we didn't find an atom, then we have to disassemble to create one! + + MemoryObject *Region = getRegionFor(BeginAddr); + if (!Region) + llvm_unreachable(("Couldn't find suitable region for disassembly at " + + utostr(BeginAddr)).c_str()); + + uint64_t InstSize; + uint64_t EndAddr = Region->getBase() + Region->getExtent(); + + // We want to stop before the next atom and have a fallthrough to it. + if (MCTextAtom *NextAtom = + cast_or_null(Module->findFirstAtomAfter(BeginAddr))) + EndAddr = std::min(EndAddr, NextAtom->getBeginAddr()); + + for (uint64_t Addr = BeginAddr; Addr < EndAddr; Addr += InstSize) { + MCInst Inst; + if (Dis.getInstruction(Inst, InstSize, *Region, Addr, nulls(), + nulls())) { + if (!TA) + TA = Module->createTextAtom(Addr, Addr); + TA->addInst(Inst, InstSize); + } else { + // We don't care about splitting mixed atoms either. + llvm_unreachable("Couldn't disassemble instruction in atom."); + } + + uint64_t BranchTarget; + if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) { + if (MIA.isCall(Inst)) + CallTargets.push_back(BranchTarget); + } + + if (MIA.isTerminator(Inst)) + break; + } + BBI->Atom = TA; + } + + assert(TA && "Couldn't disassemble atom, none was created!"); + assert(TA->begin() != TA->end() && "Empty atom!"); + + MemoryObject *Region = getRegionFor(TA->getBeginAddr()); + assert(Region && "Couldn't find region for already disassembled code!"); + uint64_t EndRegion = Region->getBase() + Region->getExtent(); + + // Now we have a basic block atom, add successors. + // Add the fallthrough block. + if ((MIA.isConditionalBranch(TA->back().Inst) || + !MIA.isTerminator(TA->back().Inst)) && + (TA->getEndAddr() + 1 < EndRegion)) { + BBI->SuccAddrs.push_back(TA->getEndAddr() + 1); + Worklist.insert(TA->getEndAddr() + 1); + } + + // If the terminator is a branch, add the target block. + if (MIA.isBranch(TA->back().Inst)) { + uint64_t BranchTarget; + if (MIA.evaluateBranch(TA->back().Inst, TA->back().Address, + TA->back().Size, BranchTarget)) { + StringRef ExtFnName; + if (MOS) + ExtFnName = + MOS->findExternalFunctionAt(getOriginalLoadAddr(BranchTarget)); + if (!ExtFnName.empty()) { + TailCallTargets.push_back(BranchTarget); + CallTargets.push_back(BranchTarget); + } else { + BBI->SuccAddrs.push_back(BranchTarget); + Worklist.insert(BranchTarget); + } + } + } + } + + for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) { + const uint64_t BeginAddr = Worklist[wi]; + BBInfo *BBI = &BBInfos[BeginAddr]; + + assert(BBI->Atom && "Found a basic block without an associated atom!"); + + // Look for a basic block at BeginAddr. + BBI->BB = MCFN->find(BeginAddr); + if (BBI->BB) { + // FIXME: check that the succs/preds are the same + continue; + } + // If there was none, we have to create one from the atom. + BBI->BB = &MCFN->createBlock(*BBI->Atom); + } + + for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) { + const uint64_t BeginAddr = Worklist[wi]; + BBInfo *BBI = &BBInfos[BeginAddr]; + MCBasicBlock *BB = BBI->BB; + + RemoveDupsFromAddressVector(BBI->SuccAddrs); + for (AddressSetTy::const_iterator SI = BBI->SuccAddrs.begin(), + SE = BBI->SuccAddrs.end(); + SE != SE; ++SI) { + MCBasicBlock *Succ = BBInfos[*SI].BB; + BB->addSuccessor(Succ); + Succ->addPredecessor(BB); + } + } + + assert(BBInfos[Worklist[0]].BB && + "No basic block created at requested address?"); + + return BBInfos[Worklist[0]].BB; +} + +MCFunction * +MCObjectDisassembler::createFunction(MCModule *Module, uint64_t BeginAddr, + AddressSetTy &CallTargets, + AddressSetTy &TailCallTargets) { + // First, check if this is an external function. + StringRef ExtFnName; + if (MOS) + ExtFnName = MOS->findExternalFunctionAt(getOriginalLoadAddr(BeginAddr)); + if (!ExtFnName.empty()) + return Module->createFunction(ExtFnName); + + // If it's not, look for an existing function. + for (MCModule::func_iterator FI = Module->func_begin(), + FE = Module->func_end(); + FI != FE; ++FI) { + if ((*FI)->empty()) + continue; + // FIXME: MCModule should provide a findFunctionByAddr() + if ((*FI)->getEntryBlock()->getInsts()->getBeginAddr() == BeginAddr) + return FI->get(); + } + + // Finally, just create a new one. + MCFunction *MCFN = Module->createFunction(""); + getBBAt(Module, MCFN, BeginAddr, CallTargets, TailCallTargets); + return MCFN; +} + +// MachO MCObjectDisassembler implementation. + +MCMachOObjectDisassembler::MCMachOObjectDisassembler( + const MachOObjectFile &MOOF, const MCDisassembler &Dis, + const MCInstrAnalysis &MIA, uint64_t VMAddrSlide, + uint64_t HeaderLoadAddress) + : MCObjectDisassembler(MOOF, Dis, MIA), MOOF(MOOF), + VMAddrSlide(VMAddrSlide), HeaderLoadAddress(HeaderLoadAddress) { + + for (const SectionRef &Section : MOOF.sections()) { + StringRef Name; + Section.getName(Name); + // FIXME: We should use the S_ section type instead of the name. + if (Name == "__mod_init_func") { + DEBUG(dbgs() << "Found __mod_init_func section!\n"); + Section.getContents(ModInitContents); + } else if (Name == "__mod_exit_func") { + DEBUG(dbgs() << "Found __mod_exit_func section!\n"); + Section.getContents(ModExitContents); + } + } +} + +// FIXME: Only do the translations for addresses actually inside the object. +uint64_t MCMachOObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) { + return Addr + VMAddrSlide; +} + +uint64_t +MCMachOObjectDisassembler::getOriginalLoadAddr(uint64_t EffectiveAddr) { + return EffectiveAddr - VMAddrSlide; +} + +uint64_t MCMachOObjectDisassembler::getEntrypoint() { + uint64_t EntryFileOffset = 0; + + // Look for LC_MAIN. + { + uint32_t LoadCommandCount = MOOF.getHeader().ncmds; + MachOObjectFile::LoadCommandInfo Load = MOOF.getFirstLoadCommandInfo(); + for (unsigned I = 0;; ++I) { + if (Load.C.cmd == MachO::LC_MAIN) { + EntryFileOffset = + ((const MachO::entry_point_command *)Load.Ptr)->entryoff; + break; + } + + if (I == LoadCommandCount - 1) + break; + else + Load = MOOF.getNextLoadCommandInfo(Load); + } + } + + // If we didn't find anything, default to the common implementation. + // FIXME: Maybe we could also look at LC_UNIXTHREAD and friends? + if (EntryFileOffset) + return MCObjectDisassembler::getEntrypoint(); + + return EntryFileOffset + HeaderLoadAddress; +} + +ArrayRef MCMachOObjectDisassembler::getStaticInitFunctions() { + // FIXME: We only handle 64bit mach-o + assert(MOOF.is64Bit()); + + size_t EntrySize = 8; + size_t EntryCount = ModInitContents.size() / EntrySize; + return ArrayRef( + reinterpret_cast(ModInitContents.data()), EntryCount); +} + +ArrayRef MCMachOObjectDisassembler::getStaticExitFunctions() { + // FIXME: We only handle 64bit mach-o + assert(MOOF.is64Bit()); + + size_t EntrySize = 8; + size_t EntryCount = ModExitContents.size() / EntrySize; + return ArrayRef( + reinterpret_cast(ModExitContents.data()), EntryCount); +} diff --git a/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp b/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp new file mode 100644 index 00000000000..b14959689d9 --- /dev/null +++ b/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp @@ -0,0 +1,268 @@ +//===-- lib/MC/MCObjectSymbolizer.cpp -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCObjectSymbolizer.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRelocationInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/MachO.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; +using namespace object; + +//===- MCMachObjectSymbolizer ---------------------------------------------===// + +namespace { +class MCMachObjectSymbolizer : public MCObjectSymbolizer { + const MachOObjectFile *MOOF; + // __TEXT;__stubs support. + uint64_t StubsStart; + uint64_t StubsCount; + uint64_t StubSize; + uint64_t StubsIndSymIndex; + +public: + MCMachObjectSymbolizer(MCContext &Ctx, + std::unique_ptr RelInfo, + const MachOObjectFile *MOOF); + + StringRef findExternalFunctionAt(uint64_t Addr) override; + + void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, + uint64_t Address) override; +}; +} // End unnamed namespace + +MCMachObjectSymbolizer::MCMachObjectSymbolizer( + MCContext &Ctx, std::unique_ptr RelInfo, + const MachOObjectFile *MOOF) + : MCObjectSymbolizer(Ctx, std::move(RelInfo), MOOF), MOOF(MOOF), + StubsStart(0), StubsCount(0), StubSize(0), StubsIndSymIndex(0) { + + for (const SectionRef &Section : MOOF->sections()) { + StringRef Name; + Section.getName(Name); + if (Name == "__stubs") { + SectionRef StubsSec = Section; + if (MOOF->is64Bit()) { + MachO::section_64 S = MOOF->getSection64(StubsSec.getRawDataRefImpl()); + StubsIndSymIndex = S.reserved1; + StubSize = S.reserved2; + } else { + MachO::section S = MOOF->getSection(StubsSec.getRawDataRefImpl()); + StubsIndSymIndex = S.reserved1; + StubSize = S.reserved2; + } + assert(StubSize && "Mach-O stub entry size can't be zero!"); + StubsSec.getAddress(StubsStart); + StubsSec.getSize(StubsCount); + StubsCount /= StubSize; + } + } +} + +StringRef MCMachObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) { + // FIXME: also, this can all be done at the very beginning, by iterating over + // all stubs and creating the calls to outside functions. Is it worth it + // though? + if (!StubSize) + return StringRef(); + uint64_t StubIdx = (Addr - StubsStart) / StubSize; + if (StubIdx >= StubsCount) + return StringRef(); + + uint32_t SymtabIdx = + MOOF->getIndirectSymbolTableEntry(MOOF->getDysymtabLoadCommand(), StubIdx); + + StringRef SymName; + symbol_iterator SI = MOOF->symbol_begin(); + for (uint32_t i = 0; i != SymtabIdx; ++i) + ++SI; + SI->getName(SymName); + assert(SI != MOOF->symbol_end() && "Stub wasn't found in the symbol table!"); + assert(SymName.front() == '_' && "Mach-O symbol doesn't start with '_'!"); + return SymName.substr(1); +} + +void MCMachObjectSymbolizer:: +tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, + uint64_t Address) { + if (const RelocationRef *R = findRelocationAt(Address)) { + const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R); + if (!RelExpr || RelExpr->EvaluateAsAbsolute(Value) == false) + return; + } + uint64_t Addr = Value; + if (const SectionRef *S = findSectionContaining(Addr)) { + StringRef Name; S->getName(Name); + uint64_t SAddr; S->getAddress(SAddr); + if (Name == "__cstring") { + StringRef Contents; + S->getContents(Contents); + Contents = Contents.substr(Addr - SAddr); + cStream << " ## literal pool for: " + << Contents.substr(0, Contents.find_first_of(0)); + } + } +} + +//===- MCObjectSymbolizer -------------------------------------------------===// + +MCObjectSymbolizer::MCObjectSymbolizer( + MCContext &Ctx, std::unique_ptr RelInfo, + const ObjectFile *Obj) + : MCSymbolizer(Ctx, std::move(RelInfo)), Obj(Obj), SortedSections(), + AddrToReloc() {} + +bool MCObjectSymbolizer:: +tryAddingSymbolicOperand(MCInst &MI, raw_ostream &cStream, + int64_t Value, uint64_t Address, bool IsBranch, + uint64_t Offset, uint64_t InstSize) { + if (IsBranch) { + StringRef ExtFnName = findExternalFunctionAt((uint64_t)Value); + if (!ExtFnName.empty()) { + MCSymbol *Sym = Ctx.GetOrCreateSymbol(ExtFnName); + const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + MI.addOperand(MCOperand::CreateExpr(Expr)); + return true; + } + } + + if (const RelocationRef *R = findRelocationAt(Address + Offset)) { + if (const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R)) { + MI.addOperand(MCOperand::CreateExpr(RelExpr)); + return true; + } + // Only try to create a symbol+offset expression if there is no relocation. + return false; + } + + // Interpret Value as a branch target. + if (IsBranch == false) + return false; + uint64_t UValue = Value; + // FIXME: map instead of looping each time? + for (const SymbolRef &Symbol : Obj->symbols()) { + uint64_t SymAddr; + Symbol.getAddress(SymAddr); + uint64_t SymSize; + Symbol.getSize(SymSize); + StringRef SymName; + Symbol.getName(SymName); + SymbolRef::Type SymType; + Symbol.getType(SymType); + if (SymAddr == UnknownAddressOrSize || SymSize == UnknownAddressOrSize || + SymName.empty() || SymType != SymbolRef::ST_Function) + continue; + + if ( SymAddr == UValue || + (SymAddr <= UValue && SymAddr + SymSize > UValue)) { + MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName); + const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + if (SymAddr != UValue) { + const MCExpr *Off = MCConstantExpr::Create(UValue - SymAddr, Ctx); + Expr = MCBinaryExpr::CreateAdd(Expr, Off, Ctx); + } + MI.addOperand(MCOperand::CreateExpr(Expr)); + return true; + } + } + return false; +} + +void MCObjectSymbolizer:: +tryAddingPcLoadReferenceComment(raw_ostream &cStream, + int64_t Value, uint64_t Address) { +} + +StringRef MCObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) { + return StringRef(); +} + +MCObjectSymbolizer *MCObjectSymbolizer::createObjectSymbolizer( + MCContext &Ctx, std::unique_ptr RelInfo, + const ObjectFile *Obj) { + if (const MachOObjectFile *MOOF = dyn_cast(Obj)) + return new MCMachObjectSymbolizer(Ctx, std::move(RelInfo), MOOF); + return new MCObjectSymbolizer(Ctx, std::move(RelInfo), Obj); +} + +// SortedSections implementation. + +static bool SectionStartsBefore(const SectionRef &S, uint64_t Addr) { + uint64_t SAddr; S.getAddress(SAddr); + return SAddr < Addr; +} + +const SectionRef *MCObjectSymbolizer::findSectionContaining(uint64_t Addr) { + if (SortedSections.empty()) + buildSectionList(); + + SortedSectionList::iterator + EndIt = SortedSections.end(), + It = std::lower_bound(SortedSections.begin(), EndIt, + Addr, SectionStartsBefore); + if (It == EndIt) + return nullptr; + uint64_t SAddr; It->getAddress(SAddr); + uint64_t SSize; It->getSize(SSize); + if (Addr >= SAddr + SSize) + return nullptr; + return &*It; +} + +const RelocationRef *MCObjectSymbolizer::findRelocationAt(uint64_t Addr) { + if (AddrToReloc.empty()) + buildRelocationByAddrMap(); + + AddrToRelocMap::const_iterator RI = AddrToReloc.find(Addr); + if (RI == AddrToReloc.end()) + return nullptr; + return &RI->second; +} + +void MCObjectSymbolizer::buildSectionList() { + for (const SectionRef &Section : Obj->sections()) { + bool RequiredForExec; + Section.isRequiredForExecution(RequiredForExec); + if (RequiredForExec == false) + continue; + uint64_t SAddr; + Section.getAddress(SAddr); + uint64_t SSize; + Section.getSize(SSize); + SortedSectionList::iterator It = + std::lower_bound(SortedSections.begin(), SortedSections.end(), SAddr, + SectionStartsBefore); + if (It != SortedSections.end()) { + uint64_t FoundSAddr; It->getAddress(FoundSAddr); + if (FoundSAddr < SAddr + SSize) + llvm_unreachable("Inserting overlapping sections"); + } + SortedSections.insert(It, Section); + } +} + +void MCObjectSymbolizer::buildRelocationByAddrMap() { + for (const SectionRef &Section : Obj->sections()) { + for (const RelocationRef &Reloc : Section.relocations()) { + uint64_t Address; + Reloc.getAddress(Address); + // At a specific address, only keep the first relocation. + if (AddrToReloc.find(Address) == AddrToReloc.end()) + AddrToReloc[Address] = Reloc; + } + } +} diff --git a/lib/MC/MCAtom.cpp b/lib/MC/MCAtom.cpp deleted file mode 100644 index bc353cdcf65..00000000000 --- a/lib/MC/MCAtom.cpp +++ /dev/null @@ -1,114 +0,0 @@ -//===- lib/MC/MCAtom.cpp - MCAtom implementation --------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/MC/MCAtom.h" -#include "llvm/MC/MCModule.h" -#include "llvm/Support/ErrorHandling.h" -#include - -using namespace llvm; - -// Pin the vtable to this file. -void MCAtom::anchor() {} - -void MCAtom::remap(uint64_t NewBegin, uint64_t NewEnd) { - Parent->remap(this, NewBegin, NewEnd); -} - -void MCAtom::remapForTruncate(uint64_t TruncPt) { - assert((TruncPt >= Begin && TruncPt < End) && - "Truncation point not contained in atom!"); - remap(Begin, TruncPt); -} - -void MCAtom::remapForSplit(uint64_t SplitPt, - uint64_t &LBegin, uint64_t &LEnd, - uint64_t &RBegin, uint64_t &REnd) { - assert((SplitPt > Begin && SplitPt <= End) && - "Splitting at point not contained in atom!"); - - // Compute the new begin/end points. - LBegin = Begin; - LEnd = SplitPt - 1; - RBegin = SplitPt; - REnd = End; - - // Remap this atom to become the lower of the two new ones. - remap(LBegin, LEnd); -} - -// MCDataAtom - -void MCDataAtom::addData(const MCData &D) { - Data.push_back(D); - if (Data.size() > End + 1 - Begin) - remap(Begin, End + 1); -} - -void MCDataAtom::truncate(uint64_t TruncPt) { - remapForTruncate(TruncPt); - - Data.resize(TruncPt - Begin + 1); -} - -MCDataAtom *MCDataAtom::split(uint64_t SplitPt) { - uint64_t LBegin, LEnd, RBegin, REnd; - remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd); - - MCDataAtom *RightAtom = Parent->createDataAtom(RBegin, REnd); - RightAtom->setName(getName()); - - std::vector::iterator I = Data.begin() + (RBegin - LBegin); - assert(I != Data.end() && "Split point not found in range!"); - - std::copy(I, Data.end(), std::back_inserter(RightAtom->Data)); - Data.erase(I, Data.end()); - return RightAtom; -} - -// MCTextAtom - -void MCTextAtom::addInst(const MCInst &I, uint64_t Size) { - if (NextInstAddress + Size - 1 > End) - remap(Begin, NextInstAddress + Size - 1); - Insts.push_back(MCDecodedInst(I, NextInstAddress, Size)); - NextInstAddress += Size; -} - -void MCTextAtom::truncate(uint64_t TruncPt) { - remapForTruncate(TruncPt); - - InstListTy::iterator I = Insts.begin(); - while (I != Insts.end() && I->Address <= TruncPt) ++I; - - assert(I != Insts.end() && "Truncation point not found in disassembly!"); - assert(I->Address == TruncPt + 1 && - "Truncation point does not fall on instruction boundary"); - - Insts.erase(I, Insts.end()); -} - -MCTextAtom *MCTextAtom::split(uint64_t SplitPt) { - uint64_t LBegin, LEnd, RBegin, REnd; - remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd); - - MCTextAtom *RightAtom = Parent->createTextAtom(RBegin, REnd); - RightAtom->setName(getName()); - - InstListTy::iterator I = Insts.begin(); - while (I != Insts.end() && I->Address < SplitPt) ++I; - assert(I != Insts.end() && "Split point not found in disassembly!"); - assert(I->Address == SplitPt && - "Split point does not fall on instruction boundary!"); - - std::copy(I, Insts.end(), std::back_inserter(RightAtom->Insts)); - Insts.erase(I, Insts.end()); - Parent->splitBasicBlocksForAtom(this, RightAtom); - return RightAtom; -} diff --git a/lib/MC/MCFunction.cpp b/lib/MC/MCFunction.cpp deleted file mode 100644 index 1ddc2505f07..00000000000 --- a/lib/MC/MCFunction.cpp +++ /dev/null @@ -1,76 +0,0 @@ -//===-- lib/MC/MCFunction.cpp -----------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/MC/MCFunction.h" -#include "llvm/MC/MCAtom.h" -#include "llvm/MC/MCModule.h" -#include - -using namespace llvm; - -// MCFunction - -MCFunction::MCFunction(StringRef Name, MCModule *Parent) - : Name(Name), ParentModule(Parent) -{} - -MCBasicBlock &MCFunction::createBlock(const MCTextAtom &TA) { - std::unique_ptr MCBB(new MCBasicBlock(TA, this)); - Blocks.push_back(std::move(MCBB)); - return *Blocks.back(); -} - -MCBasicBlock *MCFunction::find(uint64_t StartAddr) { - for (const_iterator I = begin(), E = end(); I != E; ++I) - if ((*I)->getInsts()->getBeginAddr() == StartAddr) - return I->get(); - return nullptr; -} - -const MCBasicBlock *MCFunction::find(uint64_t StartAddr) const { - return const_cast(this)->find(StartAddr); -} - -// MCBasicBlock - -MCBasicBlock::MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent) - : Insts(&Insts), Parent(Parent) { - getParent()->getParent()->trackBBForAtom(&Insts, this); -} - -void MCBasicBlock::addSuccessor(const MCBasicBlock *MCBB) { - if (!isSuccessor(MCBB)) - Successors.push_back(MCBB); -} - -bool MCBasicBlock::isSuccessor(const MCBasicBlock *MCBB) const { - return std::find(Successors.begin(), Successors.end(), - MCBB) != Successors.end(); -} - -void MCBasicBlock::addPredecessor(const MCBasicBlock *MCBB) { - if (!isPredecessor(MCBB)) - Predecessors.push_back(MCBB); -} - -bool MCBasicBlock::isPredecessor(const MCBasicBlock *MCBB) const { - return std::find(Predecessors.begin(), Predecessors.end(), - MCBB) != Predecessors.end(); -} - -void MCBasicBlock::splitBasicBlock(MCBasicBlock *SplitBB) { - assert(Insts->getEndAddr() + 1 == SplitBB->Insts->getBeginAddr() && - "Splitting unrelated basic blocks!"); - SplitBB->addPredecessor(this); - assert(SplitBB->Successors.empty() && - "Split basic block shouldn't already have successors!"); - SplitBB->Successors = Successors; - Successors.clear(); - addSuccessor(SplitBB); -} diff --git a/lib/MC/MCModule.cpp b/lib/MC/MCModule.cpp deleted file mode 100644 index 3ed735689d7..00000000000 --- a/lib/MC/MCModule.cpp +++ /dev/null @@ -1,142 +0,0 @@ -//===- lib/MC/MCModule.cpp - MCModule implementation ----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/STLExtras.h" -#include "llvm/MC/MCModule.h" -#include "llvm/MC/MCAtom.h" -#include "llvm/MC/MCFunction.h" -#include - -using namespace llvm; - -static bool AtomComp(const MCAtom *L, uint64_t Addr) { - return L->getEndAddr() < Addr; -} - -static bool AtomCompInv(uint64_t Addr, const MCAtom *R) { - return Addr < R->getEndAddr(); -} - -void MCModule::map(MCAtom *NewAtom) { - uint64_t Begin = NewAtom->Begin; - - assert(Begin <= NewAtom->End && "Creating MCAtom with endpoints reversed?"); - - // Check for atoms already covering this range. - AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(), - Begin, AtomComp); - assert((I == atom_end() || (*I)->getBeginAddr() > NewAtom->End) - && "Offset range already occupied!"); - - // Insert the new atom to the list. - Atoms.insert(I, NewAtom); -} - -MCTextAtom *MCModule::createTextAtom(uint64_t Begin, uint64_t End) { - MCTextAtom *NewAtom = new MCTextAtom(this, Begin, End); - map(NewAtom); - return NewAtom; -} - -MCDataAtom *MCModule::createDataAtom(uint64_t Begin, uint64_t End) { - MCDataAtom *NewAtom = new MCDataAtom(this, Begin, End); - map(NewAtom); - return NewAtom; -} - -// remap - Update the interval mapping for an atom. -void MCModule::remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd) { - // Find and erase the old mapping. - AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(), - Atom->Begin, AtomComp); - assert(I != atom_end() && "Atom offset not found in module!"); - assert(*I == Atom && "Previous atom mapping was invalid!"); - Atoms.erase(I); - - // FIXME: special case NewBegin == Atom->Begin - - // Insert the new mapping. - AtomListTy::iterator NewI = std::lower_bound(atom_begin(), atom_end(), - NewBegin, AtomComp); - assert((NewI == atom_end() || (*NewI)->getBeginAddr() > Atom->End) - && "Offset range already occupied!"); - Atoms.insert(NewI, Atom); - - // Update the atom internal bounds. - Atom->Begin = NewBegin; - Atom->End = NewEnd; -} - -const MCAtom *MCModule::findAtomContaining(uint64_t Addr) const { - AtomListTy::const_iterator I = std::lower_bound(atom_begin(), atom_end(), - Addr, AtomComp); - if (I != atom_end() && (*I)->getBeginAddr() <= Addr) - return *I; - return nullptr; -} - -MCAtom *MCModule::findAtomContaining(uint64_t Addr) { - return const_cast( - const_cast(this)->findAtomContaining(Addr)); -} - -const MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) const { - AtomListTy::const_iterator I = std::upper_bound(atom_begin(), atom_end(), - Addr, AtomCompInv); - if (I != atom_end()) - return *I; - return nullptr; -} - -MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) { - return const_cast( - const_cast(this)->findFirstAtomAfter(Addr)); -} - -MCFunction *MCModule::createFunction(StringRef Name) { - std::unique_ptr MCF(new MCFunction(Name, this)); - Functions.push_back(std::move(MCF)); - return Functions.back().get(); -} - -static bool CompBBToAtom(MCBasicBlock *BB, const MCTextAtom *Atom) { - return BB->getInsts() < Atom; -} - -void MCModule::splitBasicBlocksForAtom(const MCTextAtom *TA, - const MCTextAtom *NewTA) { - BBsByAtomTy::iterator - I = std::lower_bound(BBsByAtom.begin(), BBsByAtom.end(), - TA, CompBBToAtom); - for (; I != BBsByAtom.end() && (*I)->getInsts() == TA; ++I) { - MCBasicBlock *BB = *I; - MCBasicBlock *NewBB = &BB->getParent()->createBlock(*NewTA); - BB->splitBasicBlock(NewBB); - } -} - -void MCModule::trackBBForAtom(const MCTextAtom *Atom, MCBasicBlock *BB) { - assert(Atom == BB->getInsts() && "Text atom doesn't back the basic block!"); - BBsByAtomTy::iterator I = std::lower_bound(BBsByAtom.begin(), - BBsByAtom.end(), - Atom, CompBBToAtom); - for (; I != BBsByAtom.end() && (*I)->getInsts() == Atom; ++I) - if (*I == BB) - return; - BBsByAtom.insert(I, BB); -} - -MCModule::MCModule() : Entrypoint(0) { } - -MCModule::~MCModule() { - for (AtomListTy::iterator AI = atom_begin(), - AE = atom_end(); - AI != AE; ++AI) - delete *AI; -} diff --git a/lib/MC/MCModuleYAML.cpp b/lib/MC/MCModuleYAML.cpp deleted file mode 100644 index f6b7431eb3b..00000000000 --- a/lib/MC/MCModuleYAML.cpp +++ /dev/null @@ -1,464 +0,0 @@ -//===- MCModuleYAML.cpp - MCModule YAMLIO implementation ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines classes for handling the YAML representation of MCModule. -// -//===----------------------------------------------------------------------===// - -#include "llvm/MC/MCModuleYAML.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/MC/MCAtom.h" -#include "llvm/MC/MCFunction.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Object/YAML.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/YAMLTraits.h" -#include - -namespace llvm { - -namespace { - -// This class is used to map opcode and register names to enum values. -// -// There are at least 3 obvious ways to do this: -// 1- Generate an MII/MRI method using a tablegen StringMatcher -// 2- Write an MII/MRI method using std::lower_bound and the assumption that -// the enums are sorted (starting at a fixed value). -// 3- Do the matching manually as is done here. -// -// Why 3? -// 1- A StringMatcher function for thousands of entries would incur -// a non-negligible binary size overhead. -// 2- The lower_bound comparators would be somewhat involved and aren't -// obviously reusable (see LessRecordRegister in llvm/TableGen/Record.h) -// 3- This isn't actually something useful outside tests (but the same argument -// can be made against having {MII,MRI}::getName). -// -// If this becomes useful outside this specific situation, feel free to do -// the Right Thing (tm) and move the functionality to MII/MRI. -// -class InstrRegInfoHolder { - typedef StringMap EnumValByNameTy; - EnumValByNameTy InstEnumValueByName; - EnumValByNameTy RegEnumValueByName; - -public: - const MCInstrInfo &MII; - const MCRegisterInfo &MRI; - InstrRegInfoHolder(const MCInstrInfo &MII, const MCRegisterInfo &MRI) - : InstEnumValueByName(NextPowerOf2(MII.getNumOpcodes())), - RegEnumValueByName(NextPowerOf2(MRI.getNumRegs())), MII(MII), MRI(MRI) { - for (int i = 0, e = MII.getNumOpcodes(); i != e; ++i) - InstEnumValueByName[MII.getName(i)] = i; - for (int i = 0, e = MRI.getNumRegs(); i != e; ++i) - RegEnumValueByName[MRI.getName(i)] = i; - } - - bool matchRegister(StringRef Name, unsigned &Reg) { - EnumValByNameTy::const_iterator It = RegEnumValueByName.find(Name); - if (It == RegEnumValueByName.end()) - return false; - Reg = It->getValue(); - return true; - } - bool matchOpcode(StringRef Name, unsigned &Opc) { - EnumValByNameTy::const_iterator It = InstEnumValueByName.find(Name); - if (It == InstEnumValueByName.end()) - return false; - Opc = It->getValue(); - return true; - } -}; - -} // end unnamed namespace - -namespace MCModuleYAML { - -LLVM_YAML_STRONG_TYPEDEF(unsigned, OpcodeEnum) - -struct Operand { - MCOperand MCOp; -}; - -struct Inst { - OpcodeEnum Opcode; - std::vector Operands; - uint64_t Size; -}; - -struct Atom { - MCAtom::AtomKind Type; - yaml::Hex64 StartAddress; - uint64_t Size; - - std::vector Insts; - object::yaml::BinaryRef Data; -}; - -struct BasicBlock { - yaml::Hex64 Address; - std::vector Preds; - std::vector Succs; -}; - -struct Function { - StringRef Name; - std::vector BasicBlocks; -}; - -struct Module { - std::vector Atoms; - std::vector Functions; -}; - -} // end namespace MCModuleYAML -} // end namespace llvm - -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::Hex64) -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::MCModuleYAML::Operand) -LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Inst) -LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Atom) -LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::BasicBlock) -LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Function) - -namespace llvm { - -namespace yaml { - -template <> struct ScalarEnumerationTraits { - static void enumeration(IO &IO, MCAtom::AtomKind &Kind); -}; - -template <> struct MappingTraits { - static void mapping(IO &IO, MCModuleYAML::Atom &A); -}; - -template <> struct MappingTraits { - static void mapping(IO &IO, MCModuleYAML::Inst &I); -}; - -template <> struct MappingTraits { - static void mapping(IO &IO, MCModuleYAML::BasicBlock &BB); -}; - -template <> struct MappingTraits { - static void mapping(IO &IO, MCModuleYAML::Function &Fn); -}; - -template <> struct MappingTraits { - static void mapping(IO &IO, MCModuleYAML::Module &M); -}; - -template <> struct ScalarTraits { - static void output(const MCModuleYAML::Operand &, void *, - llvm::raw_ostream &); - static StringRef input(StringRef, void *, MCModuleYAML::Operand &); - static bool mustQuote(StringRef) { return false; } -}; - -template <> struct ScalarTraits { - static void output(const MCModuleYAML::OpcodeEnum &, void *, - llvm::raw_ostream &); - static StringRef input(StringRef, void *, MCModuleYAML::OpcodeEnum &); - static bool mustQuote(StringRef) { return false; } -}; - -void ScalarEnumerationTraits::enumeration( - IO &IO, MCAtom::AtomKind &Value) { - IO.enumCase(Value, "Text", MCAtom::TextAtom); - IO.enumCase(Value, "Data", MCAtom::DataAtom); -} - -void MappingTraits::mapping(IO &IO, MCModuleYAML::Atom &A) { - IO.mapRequired("StartAddress", A.StartAddress); - IO.mapRequired("Size", A.Size); - IO.mapRequired("Type", A.Type); - if (A.Type == MCAtom::TextAtom) - IO.mapRequired("Content", A.Insts); - else if (A.Type == MCAtom::DataAtom) - IO.mapRequired("Content", A.Data); -} - -void MappingTraits::mapping(IO &IO, MCModuleYAML::Inst &I) { - IO.mapRequired("Inst", I.Opcode); - IO.mapRequired("Size", I.Size); - IO.mapRequired("Ops", I.Operands); -} - -void -MappingTraits::mapping(IO &IO, - MCModuleYAML::BasicBlock &BB) { - IO.mapRequired("Address", BB.Address); - IO.mapRequired("Preds", BB.Preds); - IO.mapRequired("Succs", BB.Succs); -} - -void MappingTraits::mapping(IO &IO, - MCModuleYAML::Function &F) { - IO.mapRequired("Name", F.Name); - IO.mapRequired("BasicBlocks", F.BasicBlocks); -} - -void MappingTraits::mapping(IO &IO, - MCModuleYAML::Module &M) { - IO.mapRequired("Atoms", M.Atoms); - IO.mapOptional("Functions", M.Functions); -} - -void -ScalarTraits::output(const MCModuleYAML::Operand &Val, - void *Ctx, raw_ostream &Out) { - InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; - - // FIXME: Doesn't support FPImm and expr/inst, but do these make sense? - if (Val.MCOp.isImm()) - Out << "I" << Val.MCOp.getImm(); - else if (Val.MCOp.isReg()) - Out << "R" << IRI->MRI.getName(Val.MCOp.getReg()); - else - llvm_unreachable("Trying to output invalid MCOperand!"); -} - -StringRef -ScalarTraits::input(StringRef Scalar, void *Ctx, - MCModuleYAML::Operand &Val) { - InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; - char Type = 0; - if (Scalar.size() >= 1) - Type = Scalar.front(); - if (Type != 'R' && Type != 'I') - return "Operand must start with 'R' (register) or 'I' (immediate)."; - if (Type == 'R') { - unsigned Reg; - if (!IRI->matchRegister(Scalar.substr(1), Reg)) - return "Invalid register name."; - Val.MCOp = MCOperand::CreateReg(Reg); - } else if (Type == 'I') { - int64_t RIVal; - if (Scalar.substr(1).getAsInteger(10, RIVal)) - return "Invalid immediate value."; - Val.MCOp = MCOperand::CreateImm(RIVal); - } else { - Val.MCOp = MCOperand(); - } - return StringRef(); -} - -void ScalarTraits::output( - const MCModuleYAML::OpcodeEnum &Val, void *Ctx, raw_ostream &Out) { - InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; - Out << IRI->MII.getName(Val); -} - -StringRef -ScalarTraits::input(StringRef Scalar, void *Ctx, - MCModuleYAML::OpcodeEnum &Val) { - InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; - unsigned Opc; - if (!IRI->matchOpcode(Scalar, Opc)) - return "Invalid instruction opcode."; - Val = Opc; - return ""; -} - -} // end namespace yaml - -namespace { - -class MCModule2YAML { - const MCModule &MCM; - MCModuleYAML::Module YAMLModule; - void dumpAtom(const MCAtom *MCA); - void dumpFunction(const MCFunction &MCF); - void dumpBasicBlock(const MCBasicBlock *MCBB); - -public: - MCModule2YAML(const MCModule &MCM); - MCModuleYAML::Module &getYAMLModule(); -}; - -class YAML2MCModule { - MCModule &MCM; - -public: - YAML2MCModule(MCModule &MCM); - StringRef parse(const MCModuleYAML::Module &YAMLModule); -}; - -} // end unnamed namespace - -MCModule2YAML::MCModule2YAML(const MCModule &MCM) : MCM(MCM), YAMLModule() { - for (MCModule::const_atom_iterator AI = MCM.atom_begin(), AE = MCM.atom_end(); - AI != AE; ++AI) - dumpAtom(*AI); - for (MCModule::const_func_iterator FI = MCM.func_begin(), FE = MCM.func_end(); - FI != FE; ++FI) - dumpFunction(**FI); -} - -void MCModule2YAML::dumpAtom(const MCAtom *MCA) { - YAMLModule.Atoms.resize(YAMLModule.Atoms.size() + 1); - MCModuleYAML::Atom &A = YAMLModule.Atoms.back(); - A.Type = MCA->getKind(); - A.StartAddress = MCA->getBeginAddr(); - A.Size = MCA->getEndAddr() - MCA->getBeginAddr() + 1; - if (const MCTextAtom *TA = dyn_cast(MCA)) { - const size_t InstCount = TA->size(); - A.Insts.resize(InstCount); - for (size_t i = 0; i != InstCount; ++i) { - const MCDecodedInst &MCDI = TA->at(i); - A.Insts[i].Opcode = MCDI.Inst.getOpcode(); - A.Insts[i].Size = MCDI.Size; - const unsigned OpCount = MCDI.Inst.getNumOperands(); - A.Insts[i].Operands.resize(OpCount); - for (unsigned oi = 0; oi != OpCount; ++oi) - A.Insts[i].Operands[oi].MCOp = MCDI.Inst.getOperand(oi); - } - } else if (const MCDataAtom *DA = dyn_cast(MCA)) { - A.Data = DA->getData(); - } else { - llvm_unreachable("Unknown atom type."); - } -} - -void MCModule2YAML::dumpFunction(const MCFunction &MCF) { - YAMLModule.Functions.resize(YAMLModule.Functions.size() + 1); - MCModuleYAML::Function &F = YAMLModule.Functions.back(); - F.Name = MCF.getName(); - for (MCFunction::const_iterator BBI = MCF.begin(), BBE = MCF.end(); - BBI != BBE; ++BBI) { - const MCBasicBlock &MCBB = **BBI; - F.BasicBlocks.resize(F.BasicBlocks.size() + 1); - MCModuleYAML::BasicBlock &BB = F.BasicBlocks.back(); - BB.Address = MCBB.getInsts()->getBeginAddr(); - for (MCBasicBlock::pred_const_iterator PI = MCBB.pred_begin(), - PE = MCBB.pred_end(); - PI != PE; ++PI) - BB.Preds.push_back((*PI)->getInsts()->getBeginAddr()); - for (MCBasicBlock::succ_const_iterator SI = MCBB.succ_begin(), - SE = MCBB.succ_end(); - SI != SE; ++SI) - BB.Succs.push_back((*SI)->getInsts()->getBeginAddr()); - } -} - -MCModuleYAML::Module &MCModule2YAML::getYAMLModule() { return YAMLModule; } - -YAML2MCModule::YAML2MCModule(MCModule &MCM) : MCM(MCM) {} - -StringRef YAML2MCModule::parse(const MCModuleYAML::Module &YAMLModule) { - typedef std::vector::const_iterator AtomIt; - typedef std::vector::const_iterator InstIt; - typedef std::vector::const_iterator OpIt; - - typedef DenseMap AddrToTextAtomTy; - AddrToTextAtomTy TAByAddr; - - for (AtomIt AI = YAMLModule.Atoms.begin(), AE = YAMLModule.Atoms.end(); - AI != AE; ++AI) { - uint64_t StartAddress = AI->StartAddress; - if (AI->Size == 0) - return "Atoms can't be empty!"; - uint64_t EndAddress = StartAddress + AI->Size - 1; - switch (AI->Type) { - case MCAtom::TextAtom: { - MCTextAtom *TA = MCM.createTextAtom(StartAddress, EndAddress); - TAByAddr[StartAddress] = TA; - for (InstIt II = AI->Insts.begin(), IE = AI->Insts.end(); II != IE; - ++II) { - MCInst MI; - MI.setOpcode(II->Opcode); - for (OpIt OI = II->Operands.begin(), OE = II->Operands.end(); OI != OE; - ++OI) - MI.addOperand(OI->MCOp); - TA->addInst(MI, II->Size); - } - break; - } - case MCAtom::DataAtom: { - MCDataAtom *DA = MCM.createDataAtom(StartAddress, EndAddress); - SmallVector Data; - raw_svector_ostream OS(Data); - AI->Data.writeAsBinary(OS); - OS.flush(); - for (size_t i = 0, e = Data.size(); i != e; ++i) - DA->addData((uint8_t)Data[i]); - break; - } - } - } - - typedef std::vector::const_iterator FuncIt; - typedef std::vector::const_iterator BBIt; - typedef std::vector::const_iterator AddrIt; - for (FuncIt FI = YAMLModule.Functions.begin(), - FE = YAMLModule.Functions.end(); - FI != FE; ++FI) { - MCFunction *MCFN = MCM.createFunction(FI->Name); - for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end(); - BBI != BBE; ++BBI) { - AddrToTextAtomTy::const_iterator It = TAByAddr.find(BBI->Address); - if (It == TAByAddr.end()) - return "Basic block start address doesn't match any text atom!"; - MCFN->createBlock(*It->second); - } - for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end(); - BBI != BBE; ++BBI) { - MCBasicBlock *MCBB = MCFN->find(BBI->Address); - if (!MCBB) - return "Couldn't find matching basic block in function."; - for (AddrIt PI = BBI->Preds.begin(), PE = BBI->Preds.end(); PI != PE; - ++PI) { - MCBasicBlock *Pred = MCFN->find(*PI); - if (!Pred) - return "Couldn't find predecessor basic block."; - MCBB->addPredecessor(Pred); - } - for (AddrIt SI = BBI->Succs.begin(), SE = BBI->Succs.end(); SI != SE; - ++SI) { - MCBasicBlock *Succ = MCFN->find(*SI); - if (!Succ) - return "Couldn't find predecessor basic block."; - MCBB->addSuccessor(Succ); - } - } - } - return ""; -} - -StringRef mcmodule2yaml(raw_ostream &OS, const MCModule &MCM, - const MCInstrInfo &MII, const MCRegisterInfo &MRI) { - MCModule2YAML Dumper(MCM); - InstrRegInfoHolder IRI(MII, MRI); - yaml::Output YOut(OS, (void *)&IRI); - YOut << Dumper.getYAMLModule(); - return ""; -} - -StringRef yaml2mcmodule(std::unique_ptr &MCM, StringRef YamlContent, - const MCInstrInfo &MII, const MCRegisterInfo &MRI) { - MCM.reset(new MCModule); - YAML2MCModule Parser(*MCM); - MCModuleYAML::Module YAMLModule; - InstrRegInfoHolder IRI(MII, MRI); - yaml::Input YIn(YamlContent, (void *)&IRI); - YIn >> YAMLModule; - if (std::error_code ec = YIn.error()) - return ec.message(); - StringRef err = Parser.parse(YAMLModule); - if (!err.empty()) - return err; - return ""; -} - -} // end namespace llvm diff --git a/lib/MC/MCObjectDisassembler.cpp b/lib/MC/MCObjectDisassembler.cpp deleted file mode 100644 index 8a258cb0909..00000000000 --- a/lib/MC/MCObjectDisassembler.cpp +++ /dev/null @@ -1,574 +0,0 @@ -//===- lib/MC/MCObjectDisassembler.cpp ------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/MC/MCObjectDisassembler.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAtom.h" -#include "llvm/MC/MCDisassembler.h" -#include "llvm/MC/MCFunction.h" -#include "llvm/MC/MCInstrAnalysis.h" -#include "llvm/MC/MCModule.h" -#include "llvm/MC/MCObjectSymbolizer.h" -#include "llvm/Object/MachO.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MachO.h" -#include "llvm/Support/MemoryObject.h" -#include "llvm/Support/StringRefMemoryObject.h" -#include "llvm/Support/raw_ostream.h" -#include - -using namespace llvm; -using namespace object; - -#define DEBUG_TYPE "mc" - -MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj, - const MCDisassembler &Dis, - const MCInstrAnalysis &MIA) - : Obj(Obj), Dis(Dis), MIA(MIA), MOS(nullptr) {} - -uint64_t MCObjectDisassembler::getEntrypoint() { - for (const SymbolRef &Symbol : Obj.symbols()) { - StringRef Name; - Symbol.getName(Name); - if (Name == "main" || Name == "_main") { - uint64_t Entrypoint; - Symbol.getAddress(Entrypoint); - return getEffectiveLoadAddr(Entrypoint); - } - } - return 0; -} - -ArrayRef MCObjectDisassembler::getStaticInitFunctions() { - return ArrayRef(); -} - -ArrayRef MCObjectDisassembler::getStaticExitFunctions() { - return ArrayRef(); -} - -MemoryObject *MCObjectDisassembler::getRegionFor(uint64_t Addr) { - // FIXME: Keep track of object sections. - return FallbackRegion.get(); -} - -uint64_t MCObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) { - return Addr; -} - -uint64_t MCObjectDisassembler::getOriginalLoadAddr(uint64_t Addr) { - return Addr; -} - -MCModule *MCObjectDisassembler::buildEmptyModule() { - MCModule *Module = new MCModule; - Module->Entrypoint = getEntrypoint(); - return Module; -} - -MCModule *MCObjectDisassembler::buildModule(bool withCFG) { - MCModule *Module = buildEmptyModule(); - - buildSectionAtoms(Module); - if (withCFG) - buildCFG(Module); - return Module; -} - -void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) { - for (const SectionRef &Section : Obj.sections()) { - bool isText; - Section.isText(isText); - bool isData; - Section.isData(isData); - if (!isData && !isText) - continue; - - uint64_t StartAddr; - Section.getAddress(StartAddr); - uint64_t SecSize; - Section.getSize(SecSize); - if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize) - continue; - StartAddr = getEffectiveLoadAddr(StartAddr); - - StringRef Contents; - Section.getContents(Contents); - StringRefMemoryObject memoryObject(Contents, StartAddr); - - // We don't care about things like non-file-backed sections yet. - if (Contents.size() != SecSize || !SecSize) - continue; - uint64_t EndAddr = StartAddr + SecSize - 1; - - StringRef SecName; - Section.getName(SecName); - - if (isText) { - MCTextAtom *Text = nullptr; - MCDataAtom *InvalidData = nullptr; - - uint64_t InstSize; - for (uint64_t Index = 0; Index < SecSize; Index += InstSize) { - const uint64_t CurAddr = StartAddr + Index; - MCInst Inst; - if (Dis.getInstruction(Inst, InstSize, memoryObject, CurAddr, nulls(), - nulls())) { - if (!Text) { - Text = Module->createTextAtom(CurAddr, CurAddr); - Text->setName(SecName); - } - Text->addInst(Inst, InstSize); - InvalidData = nullptr; - } else { - assert(InstSize && "getInstruction() consumed no bytes"); - if (!InvalidData) { - Text = nullptr; - InvalidData = Module->createDataAtom(CurAddr, CurAddr+InstSize - 1); - } - for (uint64_t I = 0; I < InstSize; ++I) - InvalidData->addData(Contents[Index+I]); - } - } - } else { - MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr); - Data->setName(SecName); - for (uint64_t Index = 0; Index < SecSize; ++Index) - Data->addData(Contents[Index]); - } - } -} - -namespace { - struct BBInfo; - typedef SmallPtrSet BBInfoSetTy; - - struct BBInfo { - MCTextAtom *Atom; - MCBasicBlock *BB; - BBInfoSetTy Succs; - BBInfoSetTy Preds; - MCObjectDisassembler::AddressSetTy SuccAddrs; - - BBInfo() : Atom(nullptr), BB(nullptr) {} - - void addSucc(BBInfo &Succ) { - Succs.insert(&Succ); - Succ.Preds.insert(this); - } - }; -} - -static void RemoveDupsFromAddressVector(MCObjectDisassembler::AddressSetTy &V) { - std::sort(V.begin(), V.end()); - V.erase(std::unique(V.begin(), V.end()), V.end()); -} - -void MCObjectDisassembler::buildCFG(MCModule *Module) { - typedef std::map BBInfoByAddrTy; - BBInfoByAddrTy BBInfos; - AddressSetTy Splits; - AddressSetTy Calls; - - for (const SymbolRef &Symbol : Obj.symbols()) { - SymbolRef::Type SymType; - Symbol.getType(SymType); - if (SymType == SymbolRef::ST_Function) { - uint64_t SymAddr; - Symbol.getAddress(SymAddr); - SymAddr = getEffectiveLoadAddr(SymAddr); - Calls.push_back(SymAddr); - Splits.push_back(SymAddr); - } - } - - assert(Module->func_begin() == Module->func_end() - && "Module already has a CFG!"); - - // First, determine the basic block boundaries and call targets. - for (MCModule::atom_iterator AI = Module->atom_begin(), - AE = Module->atom_end(); - AI != AE; ++AI) { - MCTextAtom *TA = dyn_cast(*AI); - if (!TA) continue; - Calls.push_back(TA->getBeginAddr()); - BBInfos[TA->getBeginAddr()].Atom = TA; - for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end(); - II != IE; ++II) { - if (MIA.isTerminator(II->Inst)) - Splits.push_back(II->Address + II->Size); - uint64_t Target; - if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) { - if (MIA.isCall(II->Inst)) - Calls.push_back(Target); - Splits.push_back(Target); - } - } - } - - RemoveDupsFromAddressVector(Splits); - RemoveDupsFromAddressVector(Calls); - - // Split text atoms into basic block atoms. - for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end(); - SI != SE; ++SI) { - MCAtom *A = Module->findAtomContaining(*SI); - if (!A) continue; - MCTextAtom *TA = cast(A); - if (TA->getBeginAddr() == *SI) - continue; - MCTextAtom *NewAtom = TA->split(*SI); - BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom; - StringRef BBName = TA->getName(); - BBName = BBName.substr(0, BBName.find_last_of(':')); - NewAtom->setName((BBName + ":" + utohexstr(*SI)).str()); - } - - // Compute succs/preds. - for (MCModule::atom_iterator AI = Module->atom_begin(), - AE = Module->atom_end(); - AI != AE; ++AI) { - MCTextAtom *TA = dyn_cast(*AI); - if (!TA) continue; - BBInfo &CurBB = BBInfos[TA->getBeginAddr()]; - const MCDecodedInst &LI = TA->back(); - if (MIA.isBranch(LI.Inst)) { - uint64_t Target; - if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target)) - CurBB.addSucc(BBInfos[Target]); - if (MIA.isConditionalBranch(LI.Inst)) - CurBB.addSucc(BBInfos[LI.Address + LI.Size]); - } else if (!MIA.isTerminator(LI.Inst)) - CurBB.addSucc(BBInfos[LI.Address + LI.Size]); - } - - - // Create functions and basic blocks. - for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end(); - CI != CE; ++CI) { - BBInfo &BBI = BBInfos[*CI]; - if (!BBI.Atom) continue; - - MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName()); - - // Create MCBBs. - SmallSetVector Worklist; - Worklist.insert(&BBI); - for (size_t wi = 0; wi < Worklist.size(); ++wi) { - BBInfo *BBI = Worklist[wi]; - if (!BBI->Atom) - continue; - BBI->BB = &MCFN.createBlock(*BBI->Atom); - // Add all predecessors and successors to the worklist. - for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end(); - SI != SE; ++SI) - Worklist.insert(*SI); - for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end(); - PI != PE; ++PI) - Worklist.insert(*PI); - } - - // Set preds/succs. - for (size_t wi = 0; wi < Worklist.size(); ++wi) { - BBInfo *BBI = Worklist[wi]; - MCBasicBlock *MCBB = BBI->BB; - if (!MCBB) - continue; - for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end(); - SI != SE; ++SI) - if ((*SI)->BB) - MCBB->addSuccessor((*SI)->BB); - for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end(); - PI != PE; ++PI) - if ((*PI)->BB) - MCBB->addPredecessor((*PI)->BB); - } - } -} - -// Basic idea of the disassembly + discovery: -// -// start with the wanted address, insert it in the worklist -// while worklist not empty, take next address in the worklist: -// - check if atom exists there -// - if middle of atom: -// - split basic blocks referencing the atom -// - look for an already encountered BBInfo (using a map) -// - if there is, split it (new one, fallthrough, move succs, etc..) -// - if start of atom: nothing else to do -// - if no atom: create new atom and new bbinfo -// - look at the last instruction in the atom, add succs to worklist -// for all elements in the worklist: -// - create basic block, update preds/succs, etc.. -// -MCBasicBlock *MCObjectDisassembler::getBBAt(MCModule *Module, MCFunction *MCFN, - uint64_t BBBeginAddr, - AddressSetTy &CallTargets, - AddressSetTy &TailCallTargets) { - typedef std::map BBInfoByAddrTy; - typedef SmallSetVector AddrWorklistTy; - BBInfoByAddrTy BBInfos; - AddrWorklistTy Worklist; - - Worklist.insert(BBBeginAddr); - for (size_t wi = 0; wi < Worklist.size(); ++wi) { - const uint64_t BeginAddr = Worklist[wi]; - BBInfo *BBI = &BBInfos[BeginAddr]; - - MCTextAtom *&TA = BBI->Atom; - assert(!TA && "Discovered basic block already has an associated atom!"); - - // Look for an atom at BeginAddr. - if (MCAtom *A = Module->findAtomContaining(BeginAddr)) { - // FIXME: We don't care about mixed atoms, see above. - TA = cast(A); - - // The found atom doesn't begin at BeginAddr, we have to split it. - if (TA->getBeginAddr() != BeginAddr) { - // FIXME: Handle overlapping atoms: middle-starting instructions, etc.. - MCTextAtom *NewTA = TA->split(BeginAddr); - - // Look for an already encountered basic block that needs splitting - BBInfoByAddrTy::iterator It = BBInfos.find(TA->getBeginAddr()); - if (It != BBInfos.end() && It->second.Atom) { - BBI->SuccAddrs = It->second.SuccAddrs; - It->second.SuccAddrs.clear(); - It->second.SuccAddrs.push_back(BeginAddr); - } - TA = NewTA; - } - BBI->Atom = TA; - } else { - // If we didn't find an atom, then we have to disassemble to create one! - - MemoryObject *Region = getRegionFor(BeginAddr); - if (!Region) - llvm_unreachable(("Couldn't find suitable region for disassembly at " + - utostr(BeginAddr)).c_str()); - - uint64_t InstSize; - uint64_t EndAddr = Region->getBase() + Region->getExtent(); - - // We want to stop before the next atom and have a fallthrough to it. - if (MCTextAtom *NextAtom = - cast_or_null(Module->findFirstAtomAfter(BeginAddr))) - EndAddr = std::min(EndAddr, NextAtom->getBeginAddr()); - - for (uint64_t Addr = BeginAddr; Addr < EndAddr; Addr += InstSize) { - MCInst Inst; - if (Dis.getInstruction(Inst, InstSize, *Region, Addr, nulls(), - nulls())) { - if (!TA) - TA = Module->createTextAtom(Addr, Addr); - TA->addInst(Inst, InstSize); - } else { - // We don't care about splitting mixed atoms either. - llvm_unreachable("Couldn't disassemble instruction in atom."); - } - - uint64_t BranchTarget; - if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) { - if (MIA.isCall(Inst)) - CallTargets.push_back(BranchTarget); - } - - if (MIA.isTerminator(Inst)) - break; - } - BBI->Atom = TA; - } - - assert(TA && "Couldn't disassemble atom, none was created!"); - assert(TA->begin() != TA->end() && "Empty atom!"); - - MemoryObject *Region = getRegionFor(TA->getBeginAddr()); - assert(Region && "Couldn't find region for already disassembled code!"); - uint64_t EndRegion = Region->getBase() + Region->getExtent(); - - // Now we have a basic block atom, add successors. - // Add the fallthrough block. - if ((MIA.isConditionalBranch(TA->back().Inst) || - !MIA.isTerminator(TA->back().Inst)) && - (TA->getEndAddr() + 1 < EndRegion)) { - BBI->SuccAddrs.push_back(TA->getEndAddr() + 1); - Worklist.insert(TA->getEndAddr() + 1); - } - - // If the terminator is a branch, add the target block. - if (MIA.isBranch(TA->back().Inst)) { - uint64_t BranchTarget; - if (MIA.evaluateBranch(TA->back().Inst, TA->back().Address, - TA->back().Size, BranchTarget)) { - StringRef ExtFnName; - if (MOS) - ExtFnName = - MOS->findExternalFunctionAt(getOriginalLoadAddr(BranchTarget)); - if (!ExtFnName.empty()) { - TailCallTargets.push_back(BranchTarget); - CallTargets.push_back(BranchTarget); - } else { - BBI->SuccAddrs.push_back(BranchTarget); - Worklist.insert(BranchTarget); - } - } - } - } - - for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) { - const uint64_t BeginAddr = Worklist[wi]; - BBInfo *BBI = &BBInfos[BeginAddr]; - - assert(BBI->Atom && "Found a basic block without an associated atom!"); - - // Look for a basic block at BeginAddr. - BBI->BB = MCFN->find(BeginAddr); - if (BBI->BB) { - // FIXME: check that the succs/preds are the same - continue; - } - // If there was none, we have to create one from the atom. - BBI->BB = &MCFN->createBlock(*BBI->Atom); - } - - for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) { - const uint64_t BeginAddr = Worklist[wi]; - BBInfo *BBI = &BBInfos[BeginAddr]; - MCBasicBlock *BB = BBI->BB; - - RemoveDupsFromAddressVector(BBI->SuccAddrs); - for (AddressSetTy::const_iterator SI = BBI->SuccAddrs.begin(), - SE = BBI->SuccAddrs.end(); - SE != SE; ++SI) { - MCBasicBlock *Succ = BBInfos[*SI].BB; - BB->addSuccessor(Succ); - Succ->addPredecessor(BB); - } - } - - assert(BBInfos[Worklist[0]].BB && - "No basic block created at requested address?"); - - return BBInfos[Worklist[0]].BB; -} - -MCFunction * -MCObjectDisassembler::createFunction(MCModule *Module, uint64_t BeginAddr, - AddressSetTy &CallTargets, - AddressSetTy &TailCallTargets) { - // First, check if this is an external function. - StringRef ExtFnName; - if (MOS) - ExtFnName = MOS->findExternalFunctionAt(getOriginalLoadAddr(BeginAddr)); - if (!ExtFnName.empty()) - return Module->createFunction(ExtFnName); - - // If it's not, look for an existing function. - for (MCModule::func_iterator FI = Module->func_begin(), - FE = Module->func_end(); - FI != FE; ++FI) { - if ((*FI)->empty()) - continue; - // FIXME: MCModule should provide a findFunctionByAddr() - if ((*FI)->getEntryBlock()->getInsts()->getBeginAddr() == BeginAddr) - return FI->get(); - } - - // Finally, just create a new one. - MCFunction *MCFN = Module->createFunction(""); - getBBAt(Module, MCFN, BeginAddr, CallTargets, TailCallTargets); - return MCFN; -} - -// MachO MCObjectDisassembler implementation. - -MCMachOObjectDisassembler::MCMachOObjectDisassembler( - const MachOObjectFile &MOOF, const MCDisassembler &Dis, - const MCInstrAnalysis &MIA, uint64_t VMAddrSlide, - uint64_t HeaderLoadAddress) - : MCObjectDisassembler(MOOF, Dis, MIA), MOOF(MOOF), - VMAddrSlide(VMAddrSlide), HeaderLoadAddress(HeaderLoadAddress) { - - for (const SectionRef &Section : MOOF.sections()) { - StringRef Name; - Section.getName(Name); - // FIXME: We should use the S_ section type instead of the name. - if (Name == "__mod_init_func") { - DEBUG(dbgs() << "Found __mod_init_func section!\n"); - Section.getContents(ModInitContents); - } else if (Name == "__mod_exit_func") { - DEBUG(dbgs() << "Found __mod_exit_func section!\n"); - Section.getContents(ModExitContents); - } - } -} - -// FIXME: Only do the translations for addresses actually inside the object. -uint64_t MCMachOObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) { - return Addr + VMAddrSlide; -} - -uint64_t -MCMachOObjectDisassembler::getOriginalLoadAddr(uint64_t EffectiveAddr) { - return EffectiveAddr - VMAddrSlide; -} - -uint64_t MCMachOObjectDisassembler::getEntrypoint() { - uint64_t EntryFileOffset = 0; - - // Look for LC_MAIN. - { - uint32_t LoadCommandCount = MOOF.getHeader().ncmds; - MachOObjectFile::LoadCommandInfo Load = MOOF.getFirstLoadCommandInfo(); - for (unsigned I = 0;; ++I) { - if (Load.C.cmd == MachO::LC_MAIN) { - EntryFileOffset = - ((const MachO::entry_point_command *)Load.Ptr)->entryoff; - break; - } - - if (I == LoadCommandCount - 1) - break; - else - Load = MOOF.getNextLoadCommandInfo(Load); - } - } - - // If we didn't find anything, default to the common implementation. - // FIXME: Maybe we could also look at LC_UNIXTHREAD and friends? - if (EntryFileOffset) - return MCObjectDisassembler::getEntrypoint(); - - return EntryFileOffset + HeaderLoadAddress; -} - -ArrayRef MCMachOObjectDisassembler::getStaticInitFunctions() { - // FIXME: We only handle 64bit mach-o - assert(MOOF.is64Bit()); - - size_t EntrySize = 8; - size_t EntryCount = ModInitContents.size() / EntrySize; - return ArrayRef( - reinterpret_cast(ModInitContents.data()), EntryCount); -} - -ArrayRef MCMachOObjectDisassembler::getStaticExitFunctions() { - // FIXME: We only handle 64bit mach-o - assert(MOOF.is64Bit()); - - size_t EntrySize = 8; - size_t EntryCount = ModExitContents.size() / EntrySize; - return ArrayRef( - reinterpret_cast(ModExitContents.data()), EntryCount); -} diff --git a/lib/MC/MCObjectSymbolizer.cpp b/lib/MC/MCObjectSymbolizer.cpp deleted file mode 100644 index b14959689d9..00000000000 --- a/lib/MC/MCObjectSymbolizer.cpp +++ /dev/null @@ -1,268 +0,0 @@ -//===-- lib/MC/MCObjectSymbolizer.cpp -------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/MC/MCObjectSymbolizer.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCRelocationInfo.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Object/ELFObjectFile.h" -#include "llvm/Object/MachO.h" -#include "llvm/Support/raw_ostream.h" -#include - -using namespace llvm; -using namespace object; - -//===- MCMachObjectSymbolizer ---------------------------------------------===// - -namespace { -class MCMachObjectSymbolizer : public MCObjectSymbolizer { - const MachOObjectFile *MOOF; - // __TEXT;__stubs support. - uint64_t StubsStart; - uint64_t StubsCount; - uint64_t StubSize; - uint64_t StubsIndSymIndex; - -public: - MCMachObjectSymbolizer(MCContext &Ctx, - std::unique_ptr RelInfo, - const MachOObjectFile *MOOF); - - StringRef findExternalFunctionAt(uint64_t Addr) override; - - void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, - uint64_t Address) override; -}; -} // End unnamed namespace - -MCMachObjectSymbolizer::MCMachObjectSymbolizer( - MCContext &Ctx, std::unique_ptr RelInfo, - const MachOObjectFile *MOOF) - : MCObjectSymbolizer(Ctx, std::move(RelInfo), MOOF), MOOF(MOOF), - StubsStart(0), StubsCount(0), StubSize(0), StubsIndSymIndex(0) { - - for (const SectionRef &Section : MOOF->sections()) { - StringRef Name; - Section.getName(Name); - if (Name == "__stubs") { - SectionRef StubsSec = Section; - if (MOOF->is64Bit()) { - MachO::section_64 S = MOOF->getSection64(StubsSec.getRawDataRefImpl()); - StubsIndSymIndex = S.reserved1; - StubSize = S.reserved2; - } else { - MachO::section S = MOOF->getSection(StubsSec.getRawDataRefImpl()); - StubsIndSymIndex = S.reserved1; - StubSize = S.reserved2; - } - assert(StubSize && "Mach-O stub entry size can't be zero!"); - StubsSec.getAddress(StubsStart); - StubsSec.getSize(StubsCount); - StubsCount /= StubSize; - } - } -} - -StringRef MCMachObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) { - // FIXME: also, this can all be done at the very beginning, by iterating over - // all stubs and creating the calls to outside functions. Is it worth it - // though? - if (!StubSize) - return StringRef(); - uint64_t StubIdx = (Addr - StubsStart) / StubSize; - if (StubIdx >= StubsCount) - return StringRef(); - - uint32_t SymtabIdx = - MOOF->getIndirectSymbolTableEntry(MOOF->getDysymtabLoadCommand(), StubIdx); - - StringRef SymName; - symbol_iterator SI = MOOF->symbol_begin(); - for (uint32_t i = 0; i != SymtabIdx; ++i) - ++SI; - SI->getName(SymName); - assert(SI != MOOF->symbol_end() && "Stub wasn't found in the symbol table!"); - assert(SymName.front() == '_' && "Mach-O symbol doesn't start with '_'!"); - return SymName.substr(1); -} - -void MCMachObjectSymbolizer:: -tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, - uint64_t Address) { - if (const RelocationRef *R = findRelocationAt(Address)) { - const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R); - if (!RelExpr || RelExpr->EvaluateAsAbsolute(Value) == false) - return; - } - uint64_t Addr = Value; - if (const SectionRef *S = findSectionContaining(Addr)) { - StringRef Name; S->getName(Name); - uint64_t SAddr; S->getAddress(SAddr); - if (Name == "__cstring") { - StringRef Contents; - S->getContents(Contents); - Contents = Contents.substr(Addr - SAddr); - cStream << " ## literal pool for: " - << Contents.substr(0, Contents.find_first_of(0)); - } - } -} - -//===- MCObjectSymbolizer -------------------------------------------------===// - -MCObjectSymbolizer::MCObjectSymbolizer( - MCContext &Ctx, std::unique_ptr RelInfo, - const ObjectFile *Obj) - : MCSymbolizer(Ctx, std::move(RelInfo)), Obj(Obj), SortedSections(), - AddrToReloc() {} - -bool MCObjectSymbolizer:: -tryAddingSymbolicOperand(MCInst &MI, raw_ostream &cStream, - int64_t Value, uint64_t Address, bool IsBranch, - uint64_t Offset, uint64_t InstSize) { - if (IsBranch) { - StringRef ExtFnName = findExternalFunctionAt((uint64_t)Value); - if (!ExtFnName.empty()) { - MCSymbol *Sym = Ctx.GetOrCreateSymbol(ExtFnName); - const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); - MI.addOperand(MCOperand::CreateExpr(Expr)); - return true; - } - } - - if (const RelocationRef *R = findRelocationAt(Address + Offset)) { - if (const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R)) { - MI.addOperand(MCOperand::CreateExpr(RelExpr)); - return true; - } - // Only try to create a symbol+offset expression if there is no relocation. - return false; - } - - // Interpret Value as a branch target. - if (IsBranch == false) - return false; - uint64_t UValue = Value; - // FIXME: map instead of looping each time? - for (const SymbolRef &Symbol : Obj->symbols()) { - uint64_t SymAddr; - Symbol.getAddress(SymAddr); - uint64_t SymSize; - Symbol.getSize(SymSize); - StringRef SymName; - Symbol.getName(SymName); - SymbolRef::Type SymType; - Symbol.getType(SymType); - if (SymAddr == UnknownAddressOrSize || SymSize == UnknownAddressOrSize || - SymName.empty() || SymType != SymbolRef::ST_Function) - continue; - - if ( SymAddr == UValue || - (SymAddr <= UValue && SymAddr + SymSize > UValue)) { - MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName); - const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); - if (SymAddr != UValue) { - const MCExpr *Off = MCConstantExpr::Create(UValue - SymAddr, Ctx); - Expr = MCBinaryExpr::CreateAdd(Expr, Off, Ctx); - } - MI.addOperand(MCOperand::CreateExpr(Expr)); - return true; - } - } - return false; -} - -void MCObjectSymbolizer:: -tryAddingPcLoadReferenceComment(raw_ostream &cStream, - int64_t Value, uint64_t Address) { -} - -StringRef MCObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) { - return StringRef(); -} - -MCObjectSymbolizer *MCObjectSymbolizer::createObjectSymbolizer( - MCContext &Ctx, std::unique_ptr RelInfo, - const ObjectFile *Obj) { - if (const MachOObjectFile *MOOF = dyn_cast(Obj)) - return new MCMachObjectSymbolizer(Ctx, std::move(RelInfo), MOOF); - return new MCObjectSymbolizer(Ctx, std::move(RelInfo), Obj); -} - -// SortedSections implementation. - -static bool SectionStartsBefore(const SectionRef &S, uint64_t Addr) { - uint64_t SAddr; S.getAddress(SAddr); - return SAddr < Addr; -} - -const SectionRef *MCObjectSymbolizer::findSectionContaining(uint64_t Addr) { - if (SortedSections.empty()) - buildSectionList(); - - SortedSectionList::iterator - EndIt = SortedSections.end(), - It = std::lower_bound(SortedSections.begin(), EndIt, - Addr, SectionStartsBefore); - if (It == EndIt) - return nullptr; - uint64_t SAddr; It->getAddress(SAddr); - uint64_t SSize; It->getSize(SSize); - if (Addr >= SAddr + SSize) - return nullptr; - return &*It; -} - -const RelocationRef *MCObjectSymbolizer::findRelocationAt(uint64_t Addr) { - if (AddrToReloc.empty()) - buildRelocationByAddrMap(); - - AddrToRelocMap::const_iterator RI = AddrToReloc.find(Addr); - if (RI == AddrToReloc.end()) - return nullptr; - return &RI->second; -} - -void MCObjectSymbolizer::buildSectionList() { - for (const SectionRef &Section : Obj->sections()) { - bool RequiredForExec; - Section.isRequiredForExecution(RequiredForExec); - if (RequiredForExec == false) - continue; - uint64_t SAddr; - Section.getAddress(SAddr); - uint64_t SSize; - Section.getSize(SSize); - SortedSectionList::iterator It = - std::lower_bound(SortedSections.begin(), SortedSections.end(), SAddr, - SectionStartsBefore); - if (It != SortedSections.end()) { - uint64_t FoundSAddr; It->getAddress(FoundSAddr); - if (FoundSAddr < SAddr + SSize) - llvm_unreachable("Inserting overlapping sections"); - } - SortedSections.insert(It, Section); - } -} - -void MCObjectSymbolizer::buildRelocationByAddrMap() { - for (const SectionRef &Section : Obj->sections()) { - for (const RelocationRef &Reloc : Section.relocations()) { - uint64_t Address; - Reloc.getAddress(Address); - // At a specific address, only keep the first relocation. - if (AddrToReloc.find(Address) == AddrToReloc.end()) - AddrToReloc[Address] = Reloc; - } - } -} diff --git a/tools/llvm-objdump/CMakeLists.txt b/tools/llvm-objdump/CMakeLists.txt index 413cb9bb1f0..d63602bd018 100644 --- a/tools/llvm-objdump/CMakeLists.txt +++ b/tools/llvm-objdump/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} DebugInfo MC + MCAnalysis Object Support ) diff --git a/tools/llvm-objdump/LLVMBuild.txt b/tools/llvm-objdump/LLVMBuild.txt index d16c501a6cc..d9c09b60034 100644 --- a/tools/llvm-objdump/LLVMBuild.txt +++ b/tools/llvm-objdump/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Tool name = llvm-objdump parent = Tools -required_libraries = DebugInfo MC MCDisassembler MCParser Object all-targets +required_libraries = DebugInfo MC MCAnalysis MCDisassembler MCParser Object all-targets diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index d98691b4b04..309bf2369a8 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -20,17 +20,17 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/MCAnalysis/MCAtom.h" +#include "llvm/MC/MCAnalysis/MCFunction.h" +#include "llvm/MC/MCAnalysis/MCModule.h" +#include "llvm/MC/MCAnalysis/MCModuleYAML.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCAtom.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" -#include "llvm/MC/MCFunction.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCModule.h" -#include "llvm/MC/MCModuleYAML.h" #include "llvm/MC/MCObjectDisassembler.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectSymbolizer.h" diff --git a/unittests/MC/CMakeLists.txt b/unittests/MC/CMakeLists.txt index 0e4782c83f4..496056ecf59 100644 --- a/unittests/MC/CMakeLists.txt +++ b/unittests/MC/CMakeLists.txt @@ -1,11 +1,7 @@ set(LLVM_LINK_COMPONENTS - MC - ) - -set(MCSources - MCAtomTest.cpp + MCAnalysis ) add_llvm_unittest(MCTests - ${MCSources} + MCAtomTest.cpp ) diff --git a/unittests/MC/MCAtomTest.cpp b/unittests/MC/MCAtomTest.cpp index 17b056cd2de..16228b521f4 100644 --- a/unittests/MC/MCAtomTest.cpp +++ b/unittests/MC/MCAtomTest.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAtom.h" -#include "llvm/MC/MCModule.h" +#include "llvm/MC/MCAnalysis/MCAtom.h" +#include "llvm/MC/MCAnalysis/MCModule.h" #include "gtest/gtest.h" namespace llvm { diff --git a/unittests/MC/Makefile b/unittests/MC/Makefile index 4c25697d28d..07a608e65a3 100644 --- a/unittests/MC/Makefile +++ b/unittests/MC/Makefile @@ -9,7 +9,7 @@ LEVEL = ../.. TESTNAME = MC -LINK_COMPONENTS := MC +LINK_COMPONENTS := MCAnalysis include $(LEVEL)/Makefile.config include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest