-//===-- ReaderInternals.h - Definitions internal to the reader ---*- C++ -*--=//
+//===-- ReaderInternals.h - Definitions internal to the reader --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
//
// This header file defines various stuff that is used by the bytecode reader.
//
#ifndef READER_INTERNALS_H
#define READER_INTERNALS_H
-#include "llvm/Bytecode/Primitives.h"
-#include "llvm/Function.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Instruction.h"
+#include "ReaderPrimitives.h"
+#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
-#include <map>
+#include "llvm/Function.h"
+#include "llvm/ModuleProvider.h"
#include <utility>
-#include <list>
-#include <iostream>
+#include <map>
+
+namespace llvm {
// Enable to trace to figure out what the heck is going on when parsing fails
-#define TRACE_LEVEL 0
+//#define TRACE_LEVEL 10
+//#define DEBUG_OUTPUT
-#if TRACE_LEVEL // ByteCodeReading_TRACEer
-#define BCR_TRACE(n, X) if (n < TRACE_LEVEL) std::cerr << std::string(n*2, ' ') << X
+#if TRACE_LEVEL // ByteCodeReading_TRACEr
+#define BCR_TRACE(n, X) \
+ if (n < TRACE_LEVEL) std::cerr << std::string(n*2, ' ') << X
#else
#define BCR_TRACE(n, X)
#endif
-typedef unsigned char uchar;
-
-struct RawInst { // The raw fields out of the bytecode stream...
- unsigned NumOperands;
- unsigned Opcode;
- const Type *Ty;
- unsigned Arg1, Arg2;
- union {
- unsigned Arg3;
- std::vector<unsigned> *VarArgs; // Contains arg #3,4,5... if NumOperands > 3
- };
+struct LazyFunctionInfo {
+ const unsigned char *Buf, *EndBuf;
+ LazyFunctionInfo(const unsigned char *B = 0, const unsigned char *EB = 0)
+ : Buf(B), EndBuf(EB) {}
};
-class BytecodeParser : public AbstractTypeUser {
- std::string Error; // Error message string goes here...
+class BytecodeParser : public ModuleProvider {
+ BytecodeParser(const BytecodeParser &); // DO NOT IMPLEMENT
+ void operator=(const BytecodeParser &); // DO NOT IMPLEMENT
public:
BytecodeParser() {
// Define this in case we don't see a ModuleGlobalInfo block.
FirstDerivedTyID = Type::FirstDerivedTyID;
}
+
+ ~BytecodeParser() {
+ freeState();
+ }
+ void freeState() {
+ freeTable(Values);
+ freeTable(ModuleValues);
+ }
- Module *ParseBytecode(const uchar *Buf, const uchar *EndBuf);
+ Module* releaseModule() {
+ // Since we're losing control of this Module, we must hand it back complete
+ Module *M = ModuleProvider::releaseModule();
+ freeState();
+ return M;
+ }
- std::string getError() const { return Error; }
+ void ParseBytecode(const unsigned char *Buf, unsigned Length,
+ const std::string &ModuleID);
void dump() const {
std::cerr << "BytecodeParser instance!\n";
}
-private: // All of this data is transient across calls to ParseBytecode
- Module *TheModule; // Current Module being read into...
-
- typedef std::vector<Value *> ValueList;
- typedef std::vector<ValueList> ValueTable;
- ValueTable Values, LateResolveValues;
- ValueTable ModuleValues, LateResolveModuleValues;
+private:
+ struct ValueList : public User {
+ ValueList() : User(Type::TypeTy, Value::TypeVal) {}
+
+ // vector compatibility methods
+ unsigned size() const { return getNumOperands(); }
+ void push_back(Value *V) { Operands.push_back(Use(V, this)); }
+ Value *back() const { return Operands.back(); }
+ void pop_back() { Operands.pop_back(); }
+ bool empty() const { return Operands.empty(); }
+
+ virtual void print(std::ostream& OS) const {
+ OS << "Bytecode Reader UseHandle!";
+ }
+ };
+
+ // Information about the module, extracted from the bytecode revision number.
+ unsigned char RevisionNum; // The rev # itself
+ unsigned char FirstDerivedTyID; // First variable index to use for type
+ bool hasInternalMarkerOnly; // Only types of linkage are intern/external
+ bool hasExtendedLinkageSpecs; // Supports more than 4 linkage types
+ bool hasOldStyleVarargs; // Has old version of varargs intrinsics?
+ bool hasVarArgCallPadding; // Bytecode has extra padding in vararg call
- // GlobalRefs - This maintains a mapping between <Type, Slot #>'s and forward
- // references to global values. Global values may be referenced before they
+ bool usesOldStyleVarargs; // Does this module USE old style varargs?
+
+ typedef std::vector<ValueList*> ValueTable;
+ ValueTable Values;
+ ValueTable ModuleValues;
+ std::map<std::pair<unsigned,unsigned>, Value*> ForwardReferences;
+
+ std::vector<BasicBlock*> ParsedBasicBlocks;
+
+ // ConstantFwdRefs - This maintains a mapping between <Type, Slot #>'s and
+ // forward references to constants. Such values may be referenced before they
// are defined, and if so, the temporary object that they represent is held
// here.
//
- typedef std::map<std::pair<const PointerType *, unsigned>,
- GlobalVariable*> GlobalRefsType;
- GlobalRefsType GlobalRefs;
+ typedef std::map<std::pair<const Type*,unsigned>, Constant*> ConstantRefsType;
+ ConstantRefsType ConstantFwdRefs;
// TypesLoaded - This vector mirrors the Values[TypeTyID] plane. It is used
// to deal with forward references to types.
//
- typedef std::vector<PATypeHandle<Type> > TypeValuesListTy;
+ typedef std::vector<PATypeHolder> TypeValuesListTy;
TypeValuesListTy ModuleTypeValues;
- TypeValuesListTy MethodTypeValues;
+ TypeValuesListTy FunctionTypeValues;
- // Information read from the ModuleGlobalInfo section of the file...
- unsigned FirstDerivedTyID;
+ // When the ModuleGlobalInfo section is read, we create a function object for
+ // each function in the module. When the function is loaded, this function is
+ // filled in.
+ //
+ std::vector<Function*> FunctionSignatureList;
- // When the ModuleGlobalInfo section is read, we load the type of each method
- // and the 'ModuleValues' slot that it lands in. We then load a placeholder
- // into its slot to reserve it. When the method is loaded, this placeholder
- // is replaced.
+ // Constant values are read in after global variables. Because of this, we
+ // must defer setting the initializers on global variables until after module
+ // level constants have been read. In the mean time, this list keeps track of
+ // what we must do.
//
- std::list<std::pair<const PointerType *, unsigned> > MethodSignatureList;
+ std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
+
+ // For lazy reading-in of functions, we need to save away several pieces of
+ // information about each function: its begin and end pointer in the buffer
+ // and its FunctionSlot.
+ //
+ std::map<Function*, LazyFunctionInfo> LazyFunctionLoadMap;
+
+private:
+ void freeTable(ValueTable &Tab) {
+ while (!Tab.empty()) {
+ delete Tab.back();
+ Tab.pop_back();
+ }
+ }
+
+public:
+ void ParseModule(const unsigned char * Buf, const unsigned char *End);
+ void materializeFunction(Function *F);
private:
- bool ParseModule (const uchar * Buf, const uchar *End, Module *&);
- bool ParseModuleGlobalInfo(const uchar *&Buf, const uchar *End, Module *);
- bool ParseSymbolTable (const uchar *&Buf, const uchar *End, SymbolTable *);
- bool ParseMethod (const uchar *&Buf, const uchar *End, Module *);
- bool ParseBasicBlock (const uchar *&Buf, const uchar *End, BasicBlock *&);
- bool ParseInstruction (const uchar *&Buf, const uchar *End, Instruction *&);
- bool ParseRawInst (const uchar *&Buf, const uchar *End, RawInst &);
-
- bool ParseConstantPool(const uchar *&Buf, const uchar *EndBuf,
- ValueTable &Tab, TypeValuesListTy &TypeTab);
- bool parseConstantValue(const uchar *&Buf, const uchar *End,
- const Type *Ty, Constant *&V);
- bool parseTypeConstants(const uchar *&Buf, const uchar *EndBuf,
- TypeValuesListTy &Tab, unsigned NumEntries);
- const Type *parseTypeConstant(const uchar *&Buf, const uchar *EndBuf);
-
- Value *getValue(const Type *Ty, unsigned num, bool Create = true);
+ void ParseVersionInfo (const unsigned char *&Buf, const unsigned char *End);
+ void ParseModuleGlobalInfo(const unsigned char *&Buf, const unsigned char *E);
+ void ParseSymbolTable(const unsigned char *&Buf, const unsigned char *End,
+ SymbolTable *, Function *CurrentFunction);
+ void ParseFunction(const unsigned char *&Buf, const unsigned char *End);
+ void ParseGlobalTypes(const unsigned char *&Buf, const unsigned char *EndBuf);
+
+ BasicBlock *ParseBasicBlock(const unsigned char *&Buf,
+ const unsigned char *End,
+ unsigned BlockNo);
+ unsigned ParseInstructionList(Function *F, const unsigned char *&Buf,
+ const unsigned char *EndBuf);
+
+ void ParseInstruction(const unsigned char *&Buf, const unsigned char *End,
+ std::vector<unsigned> &Args, BasicBlock *BB);
+
+ void ParseConstantPool(const unsigned char *&Buf, const unsigned char *EndBuf,
+ ValueTable &Tab, TypeValuesListTy &TypeTab);
+ Constant *parseConstantValue(const unsigned char *&Buf,
+ const unsigned char *End,
+ unsigned TypeID);
+ void parseTypeConstants(const unsigned char *&Buf,
+ const unsigned char *EndBuf,
+ TypeValuesListTy &Tab, unsigned NumEntries);
+ const Type *parseTypeConstant(const unsigned char *&Buf,
+ const unsigned char *EndBuf);
+
+ Value *getValue(unsigned TypeID, unsigned num, bool Create = true);
const Type *getType(unsigned ID);
+ BasicBlock *getBasicBlock(unsigned ID);
+ Constant *getConstantValue(unsigned TypeID, unsigned num);
+ Constant *getConstantValue(const Type *Ty, unsigned num) {
+ return getConstantValue(getTypeSlot(Ty), num);
+ }
- int insertValue(Value *D, std::vector<ValueList> &D); // -1 = Failure
- bool postResolveValues(ValueTable &ValTab);
-
- bool getTypeSlot(const Type *Ty, unsigned &Slot);
+ unsigned insertValue(Value *V, unsigned Type, ValueTable &Table);
- // DeclareNewGlobalValue - Patch up forward references to global values in the
- // form of ConstantPointerRefs.
- //
- void DeclareNewGlobalValue(GlobalValue *GV, unsigned Slot);
+ unsigned getTypeSlot(const Type *Ty);
- // refineAbstractType - The callback method is invoked when one of the
- // elements of TypeValues becomes more concrete...
- //
- virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy);
+ // resolve all references to the placeholder (if any) for the given constant
+ void ResolveReferencesToConstant(Constant *C, unsigned Slot);
};
template<class SuperType>
class PlaceholderDef : public SuperType {
unsigned ID;
+ PlaceholderDef(); // DO NOT IMPLEMENT
+ void operator=(const PlaceholderDef &); // DO NOT IMPLEMENT
public:
PlaceholderDef(const Type *Ty, unsigned id) : SuperType(Ty), ID(id) {}
unsigned getID() { return ID; }
};
-struct InstPlaceHolderHelper : public Instruction {
- InstPlaceHolderHelper(const Type *Ty) : Instruction(Ty, UserOp1, "") {}
- virtual const char *getOpcodeName() const { return "placeholder"; }
-
- virtual Instruction *clone() const { abort(); return 0; }
-};
-
-struct BBPlaceHolderHelper : public BasicBlock {
- BBPlaceHolderHelper(const Type *Ty) : BasicBlock() {
- assert(Ty == Type::LabelTy);
- }
+struct ConstantPlaceHolderHelper : public ConstantExpr {
+ ConstantPlaceHolderHelper(const Type *Ty)
+ : ConstantExpr(Instruction::UserOp1, Constant::getNullValue(Ty), Ty) {}
};
-struct MethPlaceHolderHelper : public Function {
- MethPlaceHolderHelper(const Type *Ty)
- : Function(cast<const FunctionType>(Ty), true) {
- }
-};
+typedef PlaceholderDef<ConstantPlaceHolderHelper> ConstPHolder;
-typedef PlaceholderDef<InstPlaceHolderHelper> DefPHolder;
-typedef PlaceholderDef<BBPlaceHolderHelper> BBPHolder;
-typedef PlaceholderDef<MethPlaceHolderHelper> MethPHolder;
+// Some common errors we find
+static const std::string Error_readvbr = "read_vbr(): error reading.";
+static const std::string Error_read = "read(): error reading.";
+static const std::string Error_inputdata = "input_data(): error reading.";
+static const std::string Error_DestSlot = "No destination slot found.";
-static inline unsigned getValueIDNumberFromPlaceHolder(Value *Def) {
- switch (Def->getType()->getPrimitiveID()) {
- case Type::LabelTyID: return ((BBPHolder*)Def)->getID();
- case Type::FunctionTyID: return ((MethPHolder*)Def)->getID();
- default: return ((DefPHolder*)Def)->getID();
- }
-}
-
-static inline bool readBlock(const uchar *&Buf, const uchar *EndBuf,
- unsigned &Type, unsigned &Size) {
-#if DEBUG_OUTPUT
+static inline void readBlock(const unsigned char *&Buf,
+ const unsigned char *EndBuf,
+ unsigned &Type, unsigned &Size) {
+#ifdef DEBUG_OUTPUT
bool Result = read(Buf, EndBuf, Type) || read(Buf, EndBuf, Size);
std::cerr << "StartLoc = " << ((unsigned)Buf & 4095)
- << " Type = " << Type << " Size = " << Size << endl;
- return Result;
+ << " Type = " << Type << " Size = " << Size << "\n";
+ if (Result) throw Error_read;
#else
- return read(Buf, EndBuf, Type) || read(Buf, EndBuf, Size);
+ if (read(Buf, EndBuf, Type) || read(Buf, EndBuf, Size)) throw Error_read;
#endif
}
-
-// failure Template - This template function is used as a place to put
-// breakpoints in to debug failures of the bytecode parser.
-//
-template <typename X>
-static X failure(X Value) {
- return Value;
-}
+} // End llvm namespace
#endif