X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FBytecode%2FWriter%2FWriter.cpp;h=a2e8fe566d35633bfe55c7751ccb04126c0841bf;hb=ef9b9a793949469cdaa4ab6d0173136229dcab7b;hp=5f614fddf081d9980eb468ca56a7644f499fc5df;hpb=38287bdfde17249ed4725aa906ec15c59ccfe610;p=oota-llvm.git diff --git a/lib/Bytecode/Writer/Writer.cpp b/lib/Bytecode/Writer/Writer.cpp index 5f614fddf08..a2e8fe566d3 100644 --- a/lib/Bytecode/Writer/Writer.cpp +++ b/lib/Bytecode/Writer/Writer.cpp @@ -17,15 +17,22 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "bytecodewriter" #include "WriterInternals.h" #include "llvm/Bytecode/WriteBytecodePass.h" +#include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/InlineAsm.h" #include "llvm/Instructions.h" #include "llvm/Module.h" -#include "llvm/SymbolTable.h" +#include "llvm/TypeSymbolTable.h" +#include "llvm/ValueSymbolTable.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/Compressor.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Streams.h" +#include "llvm/System/Program.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include @@ -36,12 +43,11 @@ using namespace llvm; /// so that the reader can distinguish which format of the bytecode file has /// been written. /// @brief The bytecode version number -const unsigned BCVersionNum = 5; +const unsigned BCVersionNum = 7; static RegisterPass X("emitbytecode", "Bytecode Writer"); -static Statistic<> -BytesWritten("bytecodewriter", "Number of bytecode bytes written"); +STATISTIC(BytesWritten, "Number of bytecode bytes written"); //===----------------------------------------------------------------------===// //=== Output Primitives ===// @@ -127,7 +133,7 @@ inline void BytecodeWriter::output_vbr(int i) { inline void BytecodeWriter::output(const std::string &s) { unsigned Len = s.length(); - output_vbr(Len ); // Strings may have an arbitrary length... + output_vbr(Len); // Strings may have an arbitrary length. Out.insert(Out.end(), s.begin(), s.end()); } @@ -138,37 +144,29 @@ inline void BytecodeWriter::output_data(const void *Ptr, const void *End) { inline void BytecodeWriter::output_float(float& FloatVal) { /// FIXME: This isn't optimal, it has size problems on some platforms /// where FP is not IEEE. - union { - float f; - uint32_t i; - } FloatUnion; - FloatUnion.f = FloatVal; - Out.push_back( static_cast( (FloatUnion.i & 0xFF ))); - Out.push_back( static_cast( (FloatUnion.i >> 8) & 0xFF)); - Out.push_back( static_cast( (FloatUnion.i >> 16) & 0xFF)); - Out.push_back( static_cast( (FloatUnion.i >> 24) & 0xFF)); + uint32_t i = FloatToBits(FloatVal); + Out.push_back( static_cast( (i ) & 0xFF)); + Out.push_back( static_cast( (i >> 8 ) & 0xFF)); + Out.push_back( static_cast( (i >> 16) & 0xFF)); + Out.push_back( static_cast( (i >> 24) & 0xFF)); } inline void BytecodeWriter::output_double(double& DoubleVal) { /// FIXME: This isn't optimal, it has size problems on some platforms /// where FP is not IEEE. - union { - double d; - uint64_t i; - } DoubleUnion; - DoubleUnion.d = DoubleVal; - Out.push_back( static_cast( (DoubleUnion.i & 0xFF ))); - Out.push_back( static_cast( (DoubleUnion.i >> 8) & 0xFF)); - Out.push_back( static_cast( (DoubleUnion.i >> 16) & 0xFF)); - Out.push_back( static_cast( (DoubleUnion.i >> 24) & 0xFF)); - Out.push_back( static_cast( (DoubleUnion.i >> 32) & 0xFF)); - Out.push_back( static_cast( (DoubleUnion.i >> 40) & 0xFF)); - Out.push_back( static_cast( (DoubleUnion.i >> 48) & 0xFF)); - Out.push_back( static_cast( (DoubleUnion.i >> 56) & 0xFF)); + uint64_t i = DoubleToBits(DoubleVal); + Out.push_back( static_cast( (i ) & 0xFF)); + Out.push_back( static_cast( (i >> 8 ) & 0xFF)); + Out.push_back( static_cast( (i >> 16) & 0xFF)); + Out.push_back( static_cast( (i >> 24) & 0xFF)); + Out.push_back( static_cast( (i >> 32) & 0xFF)); + Out.push_back( static_cast( (i >> 40) & 0xFF)); + Out.push_back( static_cast( (i >> 48) & 0xFF)); + Out.push_back( static_cast( (i >> 56) & 0xFF)); } -inline BytecodeBlock::BytecodeBlock(unsigned ID, BytecodeWriter& w, - bool elideIfEmpty, bool hasLongFormat ) +inline BytecodeBlock::BytecodeBlock(unsigned ID, BytecodeWriter &w, + bool elideIfEmpty, bool hasLongFormat) : Id(ID), Writer(w), ElideIfEmpty(elideIfEmpty), HasLongFormat(hasLongFormat){ if (HasLongFormat) { @@ -200,29 +198,39 @@ inline BytecodeBlock::~BytecodeBlock() { // Do backpatch when block goes out //===----------------------------------------------------------------------===// void BytecodeWriter::outputType(const Type *T) { - output_vbr((unsigned)T->getTypeID()); + const StructType* STy = dyn_cast(T); + if(STy && STy->isPacked()) + output_vbr((unsigned)Type::PackedStructTyID); + else + output_vbr((unsigned)T->getTypeID()); // That's all there is to handling primitive types... - if (T->isPrimitiveType()) { + if (T->isPrimitiveType()) return; // We might do this if we alias a prim type: %x = type int - } switch (T->getTypeID()) { // Handle derived types now. + case Type::IntegerTyID: + output_vbr(cast(T)->getBitWidth()); + break; case Type::FunctionTyID: { const FunctionType *MT = cast(T); int Slot = Table.getSlot(MT->getReturnType()); assert(Slot != -1 && "Type used but not available!!"); output_typeid((unsigned)Slot); + output_vbr(unsigned(MT->getParamAttrs(0))); // Output the number of arguments to function (+1 if varargs): output_vbr((unsigned)MT->getNumParams()+MT->isVarArg()); // Output all of the arguments... FunctionType::param_iterator I = MT->param_begin(); + unsigned Idx = 1; for (; I != MT->param_end(); ++I) { Slot = Table.getSlot(*I); assert(Slot != -1 && "Type used but not available!!"); output_typeid((unsigned)Slot); + output_vbr(unsigned(MT->getParamAttrs(Idx))); + Idx++; } // Terminate list with VoidTy if we are a varargs function... @@ -249,10 +257,8 @@ void BytecodeWriter::outputType(const Type *T) { break; } - case Type::StructTyID: { const StructType *ST = cast(T); - // Output all of the element types... for (StructType::element_iterator I = ST->element_begin(), E = ST->element_end(); I != E; ++I) { @@ -279,15 +285,15 @@ void BytecodeWriter::outputType(const Type *T) { break; default: - std::cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" - << " Type '" << T->getDescription() << "'\n"; + cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" + << " Type '" << T->getDescription() << "'\n"; break; } } void BytecodeWriter::outputConstant(const Constant *CPV) { - assert((CPV->getType()->isPrimitiveType() || !CPV->isNullValue()) && - "Shouldn't output null constants!"); + assert(((CPV->getType()->isPrimitiveType() || CPV->getType()->isInteger()) || + !CPV->isNullValue()) && "Shouldn't output null constants!"); // We must check for a ConstantExpr before switching by type because // a ConstantExpr can be of any type, and has no explicit value. @@ -295,9 +301,9 @@ void BytecodeWriter::outputConstant(const Constant *CPV) { if (const ConstantExpr *CE = dyn_cast(CPV)) { // FIXME: Encoding of constant exprs could be much more compact! assert(CE->getNumOperands() > 0 && "ConstantExpr with 0 operands"); - assert(CE->getNumOperands() != 1 || CE->getOpcode() == Instruction::Cast); + assert(CE->getNumOperands() != 1 || CE->isCast()); output_vbr(1+CE->getNumOperands()); // flags as an expr - output_vbr(CE->getOpcode()); // flags as an expr + output_vbr(CE->getOpcode()); // Put out the CE op code for (User::const_op_iterator OI = CE->op_begin(); OI != CE->op_end(); ++OI){ int Slot = Table.getSlot(*OI); @@ -306,35 +312,27 @@ void BytecodeWriter::outputConstant(const Constant *CPV) { Slot = Table.getSlot((*OI)->getType()); output_typeid((unsigned)Slot); } + if (CE->isCompare()) + output_vbr((unsigned)CE->getPredicate()); return; } else if (isa(CPV)) { output_vbr(1U); // 1 -> UndefValue constant. return; } else { - output_vbr(0U); // flag as not a ConstantExpr + output_vbr(0U); // flag as not a ConstantExpr (i.e. 0 operands) } switch (CPV->getType()->getTypeID()) { - case Type::BoolTyID: // Boolean Types - if (cast(CPV)->getValue()) - output_vbr(1U); - else - output_vbr(0U); - break; - - case Type::UByteTyID: // Unsigned integer types... - case Type::UShortTyID: - case Type::UIntTyID: - case Type::ULongTyID: - output_vbr(cast(CPV)->getValue()); - break; - - case Type::SByteTyID: // Signed integer types... - case Type::ShortTyID: - case Type::IntTyID: - case Type::LongTyID: - output_vbr(cast(CPV)->getValue()); + case Type::IntegerTyID: { // Integer types... + unsigned NumBits = cast(CPV->getType())->getBitWidth(); + if (NumBits <= 32) + output_vbr(uint32_t(cast(CPV)->getZExtValue())); + else if (NumBits <= 64) + output_vbr(uint64_t(cast(CPV)->getZExtValue())); + else + assert("Integer types > 64 bits not supported."); break; + } case Type::ArrayTyID: { const ConstantArray *CPA = cast(CPV); @@ -388,13 +386,26 @@ void BytecodeWriter::outputConstant(const Constant *CPV) { case Type::VoidTyID: case Type::LabelTyID: default: - std::cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" - << " type '" << *CPV->getType() << "'\n"; + cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" + << " type '" << *CPV->getType() << "'\n"; break; } return; } +/// outputInlineAsm - InlineAsm's get emitted to the constant pool, so they can +/// be shared by multiple uses. +void BytecodeWriter::outputInlineAsm(const InlineAsm *IA) { + // Output a marker, so we know when we have one one parsing the constant pool. + // Note that this encoding is 5 bytes: not very efficient for a marker. Since + // unique inline asms are rare, this should hardly matter. + output_vbr(~0U); + + output(IA->getAsmString()); + output(IA->getConstraintString()); + output_vbr(unsigned(IA->hasSideEffects())); +} + void BytecodeWriter::outputConstantStrings() { SlotCalculator::string_iterator I = Table.string_begin(); SlotCalculator::string_iterator E = Table.string_end(); @@ -422,10 +433,9 @@ void BytecodeWriter::outputConstantStrings() { //===----------------------------------------------------------------------===// //=== Instruction Output ===// //===----------------------------------------------------------------------===// -typedef unsigned char uchar; // outputInstructionFormat0 - Output those weird instructions that have a large -// number of operands or have large operands themselves... +// number of operands or have large operands themselves. // // Format: [opcode] [type] [numargs] [arg0] [arg1] ... [arg] // @@ -438,8 +448,8 @@ void BytecodeWriter::outputInstructionFormat0(const Instruction *I, output_typeid(Type); // Result type unsigned NumArgs = I->getNumOperands(); - output_vbr(NumArgs + (isa(I) || isa(I) || - isa(I))); + output_vbr(NumArgs + (isa(I) || isa(I) || + isa(I) || isa(I) || Opcode == 58)); if (!isa(&I)) { for (unsigned i = 0; i < NumArgs; ++i) { @@ -452,12 +462,14 @@ void BytecodeWriter::outputInstructionFormat0(const Instruction *I, int Slot = Table.getSlot(I->getType()); assert(Slot != -1 && "Cast return type unknown?"); output_typeid((unsigned)Slot); - } else if (const VANextInst *VAI = dyn_cast(I)) { - int Slot = Table.getSlot(VAI->getArgType()); - assert(Slot != -1 && "VarArg argument type unknown?"); - output_typeid((unsigned)Slot); + } else if (isa(I)) { + output_vbr(unsigned(cast(I)->getPredicate())); + } else if (isa(I)) { + output_vbr(cast(I)->getCallingConv()); + } else if (Opcode == 58) { // Call escape sequence + output_vbr((cast(I)->getCallingConv() << 1) | + unsigned(cast(I)->isTailCall())); } - } else { int Slot = Table.getSlot(I->getOperand(0)); assert(Slot >= 0 && "No slot number for value!?!?"); @@ -471,15 +483,15 @@ void BytecodeWriter::outputInstructionFormat0(const Instruction *I, assert(Slot >= 0 && "No slot number for value!?!?"); if (isa(*TI)) { - unsigned IdxId; - switch (I->getOperand(Idx)->getType()->getTypeID()) { - default: assert(0 && "Unknown index type!"); - case Type::UIntTyID: IdxId = 0; break; - case Type::IntTyID: IdxId = 1; break; - case Type::ULongTyID: IdxId = 2; break; - case Type::LongTyID: IdxId = 3; break; - } - Slot = (Slot << 2) | IdxId; + // These should be either 32-bits or 64-bits, however, with bit + // accurate types we just distinguish between less than or equal to + // 32-bits or greater than 32-bits. + unsigned BitWidth = + cast(I->getOperand(Idx)->getType())->getBitWidth(); + assert(BitWidth == 32 || BitWidth == 64 && + "Invalid bitwidth for GEP index"); + unsigned IdxId = BitWidth == 32 ? 0 : 1; + Slot = (Slot << 1) | IdxId; } output_vbr(unsigned(Slot)); } @@ -520,7 +532,8 @@ void BytecodeWriter::outputInstrVarArgsCall(const Instruction *I, // variable argument. NumFixedOperands = 3+NumParams; } - output_vbr(2 * I->getNumOperands()-NumFixedOperands); + output_vbr(2 * I->getNumOperands()-NumFixedOperands + + unsigned(Opcode == 58 || isa(I))); // The type for the function has already been emitted in the type field of the // instruction. Just emit the slot # now. @@ -541,6 +554,14 @@ void BytecodeWriter::outputInstrVarArgsCall(const Instruction *I, assert(Slot >= 0 && "No slot number for value!?!?"); output_vbr((unsigned)Slot); } + + if (isa(I)) { + // Emit the tail call/calling conv for invoke instructions + output_vbr(cast(I)->getCallingConv()); + } else if (Opcode == 58) { + const CallInst *CI = cast(I); + output_vbr((CI->getCallingConv() << 1) | unsigned(CI->isTailCall())); + } } @@ -602,18 +623,31 @@ inline void BytecodeWriter::outputInstructionFormat3(const Instruction *I, } void BytecodeWriter::outputInstruction(const Instruction &I) { - assert(I.getOpcode() < 62 && "Opcode too big???"); + assert(I.getOpcode() < 57 && "Opcode too big???"); unsigned Opcode = I.getOpcode(); unsigned NumOperands = I.getNumOperands(); // Encode 'tail call' as 61, 'volatile load' as 62, and 'volatile store' as // 63. - if (isa(I) && cast(I).isTailCall()) - Opcode = 61; - if (isa(I) && cast(I).isVolatile()) + if (const CallInst *CI = dyn_cast(&I)) { + if (CI->getCallingConv() == CallingConv::C) { + if (CI->isTailCall()) + Opcode = 61; // CCC + Tail Call + else + ; // Opcode = Instruction::Call + } else if (CI->getCallingConv() == CallingConv::Fast) { + if (CI->isTailCall()) + Opcode = 59; // FastCC + TailCall + else + Opcode = 60; // FastCC + Not Tail Call + } else { + Opcode = 58; // Call escape sequence. + } + } else if (isa(I) && cast(I).isVolatile()) { Opcode = 62; - if (isa(I) && cast(I).isVolatile()) + } else if (isa(I) && cast(I).isVolatile()) { Opcode = 63; + } // Figure out which type to encode with the instruction. Typically we want // the type of the first parameter, as opposed to the type of the instruction @@ -680,28 +714,52 @@ void BytecodeWriter::outputInstruction(const Instruction &I) { assert(Slots[1] != ~0U && "Cast return type unknown?"); if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; NumOperands++; - } else if (const VANextInst *VANI = dyn_cast(&I)) { - Slots[1] = Table.getSlot(VANI->getArgType()); - assert(Slots[1] != ~0U && "va_next return type unknown?"); - if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; + } else if (const AllocationInst *AI = dyn_cast(&I)) { + assert(NumOperands == 1 && "Bogus allocation!"); + if (AI->getAlignment()) { + Slots[1] = Log2_32(AI->getAlignment())+1; + if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; + NumOperands = 2; + } + } else if (isa(I) || isa(I)) { + // We need to encode the compare instruction's predicate as the third + // operand. Its not really a slot, but we don't want to break the + // instruction format for these instructions. NumOperands++; + assert(NumOperands == 3 && "CmpInst with wrong number of operands?"); + Slots[2] = unsigned(cast(&I)->getPredicate()); + if (Slots[2] > MaxOpSlot) + MaxOpSlot = Slots[2]; } else if (const GetElementPtrInst *GEP = dyn_cast(&I)) { // We need to encode the type of sequential type indices into their slot # unsigned Idx = 1; for (gep_type_iterator I = gep_type_begin(GEP), E = gep_type_end(GEP); I != E; ++I, ++Idx) if (isa(*I)) { - unsigned IdxId; - switch (GEP->getOperand(Idx)->getType()->getTypeID()) { - default: assert(0 && "Unknown index type!"); - case Type::UIntTyID: IdxId = 0; break; - case Type::IntTyID: IdxId = 1; break; - case Type::ULongTyID: IdxId = 2; break; - case Type::LongTyID: IdxId = 3; break; - } - Slots[Idx] = (Slots[Idx] << 2) | IdxId; + // These should be either 32-bits or 64-bits, however, with bit + // accurate types we just distinguish between less than or equal to + // 32-bits or greater than 32-bits. + unsigned BitWidth = + cast(GEP->getOperand(Idx)->getType())->getBitWidth(); + assert(BitWidth == 32 || BitWidth == 64 && + "Invalid bitwidth for GEP index"); + unsigned IdxId = BitWidth == 32 ? 0 : 1; + Slots[Idx] = (Slots[Idx] << 1) | IdxId; if (Slots[Idx] > MaxOpSlot) MaxOpSlot = Slots[Idx]; } + } else if (Opcode == 58) { + // If this is the escape sequence for call, emit the tailcall/cc info. + const CallInst &CI = cast(I); + ++NumOperands; + if (NumOperands <= 3) { + Slots[NumOperands-1] = + (CI.getCallingConv() << 1)|unsigned(CI.isTailCall()); + if (Slots[NumOperands-1] > MaxOpSlot) + MaxOpSlot = Slots[NumOperands-1]; + } + } else if (isa(I)) { + // Invoke escape seq has at least 4 operands to encode. + ++NumOperands; } // Decide which instruction encoding to use. This is determined primarily @@ -749,28 +807,19 @@ BytecodeWriter::BytecodeWriter(std::vector &o, const Module *M) : Out(o), Table(M) { // Emit the signature... - static const unsigned char *Sig = (const unsigned char*)"llvm"; + static const unsigned char *Sig = (const unsigned char*)"llvm"; output_data(Sig, Sig+4); // Emit the top level CLASS block. BytecodeBlock ModuleBlock(BytecodeFormat::ModuleBlockID, *this, false, true); - bool isBigEndian = M->getEndianness() == Module::BigEndian; - bool hasLongPointers = M->getPointerSize() == Module::Pointer64; - bool hasNoEndianness = M->getEndianness() == Module::AnyEndianness; - bool hasNoPointerSize = M->getPointerSize() == Module::AnyPointerSize; - - // Output the version identifier and other information. - unsigned Version = (BCVersionNum << 4) | - (unsigned)isBigEndian | (hasLongPointers << 1) | - (hasNoEndianness << 2) | - (hasNoPointerSize << 3); - output_vbr(Version); + // Output the version identifier + output_vbr(BCVersionNum); // The Global type plane comes first { - BytecodeBlock CPool(BytecodeFormat::GlobalTypePlaneBlockID, *this ); - outputTypes(Type::FirstDerivedTyID); + BytecodeBlock CPool(BytecodeFormat::GlobalTypePlaneBlockID, *this); + outputTypes(Type::FirstDerivedTyID); } // The ModuleInfoBlock follows directly after the type information @@ -783,8 +832,11 @@ BytecodeWriter::BytecodeWriter(std::vector &o, const Module *M) for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) outputFunction(I); - // If needed, output the symbol table for the module... - outputSymbolTable(M->getSymbolTable()); + // Output the symbole table for types + outputTypeSymbolTable(M->getTypeSymbolTable()); + + // Output the symbol table for values + outputValueSymbolTable(M->getValueSymbolTable()); } void BytecodeWriter::outputTypes(unsigned TypeNum) { @@ -821,7 +873,8 @@ void BytecodeWriter::outputConstantsInPlane(const std::vector /*empty*/; unsigned NC = ValNo; // Number of constants - for (; NC < Plane.size() && (isa(Plane[NC])); NC++) + for (; NC < Plane.size() && (isa(Plane[NC]) || + isa(Plane[NC])); NC++) /*empty*/; NC -= ValNo; // Convert from index into count if (NC == 0) return; // Skip empty type planes... @@ -829,20 +882,21 @@ void BytecodeWriter::outputConstantsInPlane(const std::vector // FIXME: Most slabs only have 1 or 2 entries! We should encode this much // more compactly. - // Output type header: [num entries][type id number] + // Put out type header: [num entries][type id number] // output_vbr(NC); - // Output the Type ID Number... + // Put out the Type ID Number... int Slot = Table.getSlot(Plane.front()->getType()); assert (Slot != -1 && "Type in constant pool but not in function!!"); output_typeid((unsigned)Slot); for (unsigned i = ValNo; i < ValNo+NC; ++i) { const Value *V = Plane[i]; - if (const Constant *C = dyn_cast(V)) { + if (const Constant *C = dyn_cast(V)) outputConstant(C); - } + else + outputInlineAsm(cast(V)); } } @@ -885,27 +939,78 @@ void BytecodeWriter::outputConstants(bool isFunction) { static unsigned getEncodedLinkage(const GlobalValue *GV) { switch (GV->getLinkage()) { default: assert(0 && "Invalid linkage!"); - case GlobalValue::ExternalLinkage: return 0; - case GlobalValue::WeakLinkage: return 1; - case GlobalValue::AppendingLinkage: return 2; - case GlobalValue::InternalLinkage: return 3; - case GlobalValue::LinkOnceLinkage: return 4; + case GlobalValue::ExternalLinkage: return 0; + case GlobalValue::WeakLinkage: return 1; + case GlobalValue::AppendingLinkage: return 2; + case GlobalValue::InternalLinkage: return 3; + case GlobalValue::LinkOnceLinkage: return 4; + case GlobalValue::DLLImportLinkage: return 5; + case GlobalValue::DLLExportLinkage: return 6; + case GlobalValue::ExternalWeakLinkage: return 7; + } +} + +static unsigned getEncodedVisibility(const GlobalValue *GV) { + switch (GV->getVisibility()) { + default: assert(0 && "Invalid visibility!"); + case GlobalValue::DefaultVisibility: return 0; + case GlobalValue::HiddenVisibility: return 1; } } void BytecodeWriter::outputModuleInfoBlock(const Module *M) { BytecodeBlock ModuleInfoBlock(BytecodeFormat::ModuleGlobalInfoBlockID, *this); + // Give numbers to sections as we encounter them. + unsigned SectionIDCounter = 0; + std::vector SectionNames; + std::map SectionID; + // Output the types for the global variables in the module... - for (Module::const_global_iterator I = M->global_begin(), End = M->global_end(); I != End;++I) { + for (Module::const_global_iterator I = M->global_begin(), + End = M->global_end(); I != End; ++I) { int Slot = Table.getSlot(I->getType()); assert(Slot != -1 && "Module global vars is broken!"); + assert((I->hasInitializer() || !I->hasInternalLinkage()) && + "Global must have an initializer or have external linkage!"); + // Fields: bit0 = isConstant, bit1 = hasInitializer, bit2-4=Linkage, - // bit5+ = Slot # for type - unsigned oSlot = ((unsigned)Slot << 5) | (getEncodedLinkage(I) << 2) | - (I->hasInitializer() << 1) | (unsigned)I->isConstant(); - output_vbr(oSlot); + // bit5+ = Slot # for type. + bool HasExtensionWord = (I->getAlignment() != 0) || + I->hasSection() || + (I->getVisibility() != GlobalValue::DefaultVisibility); + + // If we need to use the extension byte, set linkage=3(internal) and + // initializer = 0 (impossible!). + if (!HasExtensionWord) { + unsigned oSlot = ((unsigned)Slot << 5) | (getEncodedLinkage(I) << 2) | + (I->hasInitializer() << 1) | (unsigned)I->isConstant(); + output_vbr(oSlot); + } else { + unsigned oSlot = ((unsigned)Slot << 5) | (3 << 2) | + (0 << 1) | (unsigned)I->isConstant(); + output_vbr(oSlot); + + // The extension word has this format: bit 0 = has initializer, bit 1-3 = + // linkage, bit 4-8 = alignment (log2), bit 9 = has SectionID, + // bits 10-12 = visibility, bits 13+ = future use. + unsigned ExtWord = (unsigned)I->hasInitializer() | + (getEncodedLinkage(I) << 1) | + ((Log2_32(I->getAlignment())+1) << 4) | + ((unsigned)I->hasSection() << 9) | + (getEncodedVisibility(I) << 10); + output_vbr(ExtWord); + if (I->hasSection()) { + // Give section names unique ID's. + unsigned &Entry = SectionID[I->getSection()]; + if (Entry == 0) { + Entry = ++SectionIDCounter; + SectionNames.push_back(I->getSection()); + } + output_vbr(Entry); + } + } // If we have an initializer, output it now. if (I->hasInitializer()) { @@ -921,11 +1026,49 @@ void BytecodeWriter::outputModuleInfoBlock(const Module *M) { int Slot = Table.getSlot(I->getType()); assert(Slot != -1 && "Module slot calculator is broken!"); assert(Slot >= Type::FirstDerivedTyID && "Derived type not in range!"); - assert(((Slot << 5) >> 5) == Slot && "Slot # too big!"); - unsigned ID = (Slot << 5) + 1; - if (I->isExternal()) // If external, we don't have an FunctionInfo block. + assert(((Slot << 6) >> 6) == Slot && "Slot # too big!"); + unsigned CC = I->getCallingConv()+1; + unsigned ID = (Slot << 5) | (CC & 15); + + if (I->isDeclaration()) // If external, we don't have an FunctionInfo block. ID |= 1 << 4; + + if (I->getAlignment() || I->hasSection() || (CC & ~15) != 0 || + (I->isDeclaration() && I->hasDLLImportLinkage()) || + (I->isDeclaration() && I->hasExternalWeakLinkage()) + ) + ID |= 1 << 31; // Do we need an extension word? + output_vbr(ID); + + if (ID & (1 << 31)) { + // Extension byte: bits 0-4 = alignment, bits 5-9 = top nibble of calling + // convention, bit 10 = hasSectionID., bits 11-12 = external linkage type + unsigned extLinkage = 0; + + if (I->isDeclaration()) { + if (I->hasDLLImportLinkage()) { + extLinkage = 1; + } else if (I->hasExternalWeakLinkage()) { + extLinkage = 2; + } + } + + ID = (Log2_32(I->getAlignment())+1) | ((CC >> 4) << 5) | + (I->hasSection() << 10) | + ((extLinkage & 3) << 11); + output_vbr(ID); + + // Give section names unique ID's. + if (I->hasSection()) { + unsigned &Entry = SectionID[I->getSection()]; + if (Entry == 0) { + Entry = ++SectionIDCounter; + SectionNames.push_back(I->getSection()); + } + output_vbr(Entry); + } + } } output_vbr((unsigned)Table.getSlot(Type::VoidTy) << 5); @@ -938,6 +1081,17 @@ void BytecodeWriter::outputModuleInfoBlock(const Module *M) { // Output the target triple from the module output(M->getTargetTriple()); + + // Output the data layout from the module + output(M->getDataLayout()); + + // Emit the table of section names. + output_vbr((unsigned)SectionNames.size()); + for (unsigned i = 0, e = SectionNames.size(); i != e; ++i) + output(SectionNames[i]); + + // Output the inline asm string. + output(M->getModuleInlineAsm()); } void BytecodeWriter::outputInstructions(const Function *F) { @@ -949,128 +1103,72 @@ void BytecodeWriter::outputInstructions(const Function *F) { void BytecodeWriter::outputFunction(const Function *F) { // If this is an external function, there is nothing else to emit! - if (F->isExternal()) return; + if (F->isDeclaration()) return; BytecodeBlock FunctionBlock(BytecodeFormat::FunctionBlockID, *this); - output_vbr(getEncodedLinkage(F)); + unsigned rWord = (getEncodedVisibility(F) << 16) | getEncodedLinkage(F); + output_vbr(rWord); // Get slot information about the function... Table.incorporateFunction(F); - if (Table.getCompactionTable().empty()) { - // Output information about the constants in the function if the compaction - // table is not being used. - outputConstants(true); - } else { - // Otherwise, emit the compaction table. - outputCompactionTable(); - } + outputConstants(true); // Output all of the instructions in the body of the function outputInstructions(F); // If needed, output the symbol table for the function... - outputSymbolTable(F->getSymbolTable()); + outputValueSymbolTable(F->getValueSymbolTable()); Table.purgeFunction(); } -void BytecodeWriter::outputCompactionTablePlane(unsigned PlaneNo, - const std::vector &Plane, - unsigned StartNo) { - unsigned End = Table.getModuleLevel(PlaneNo); - if (Plane.empty() || StartNo == End || End == 0) return; // Nothing to emit - assert(StartNo < End && "Cannot emit negative range!"); - assert(StartNo < Plane.size() && End <= Plane.size()); - - // Do not emit the null initializer! - ++StartNo; - - // Figure out which encoding to use. By far the most common case we have is - // to emit 0-2 entries in a compaction table plane. - switch (End-StartNo) { - case 0: // Avoid emitting two vbr's if possible. - case 1: - case 2: - output_vbr((PlaneNo << 2) | End-StartNo); - break; - default: - // Output the number of things. - output_vbr((unsigned(End-StartNo) << 2) | 3); - output_typeid(PlaneNo); // Emit the type plane this is - break; - } - - for (unsigned i = StartNo; i != End; ++i) - output_vbr(Table.getGlobalSlot(Plane[i])); -} - -void BytecodeWriter::outputCompactionTypes(unsigned StartNo) { - // Get the compaction type table from the slot calculator - const std::vector &CTypes = Table.getCompactionTypes(); - - // The compaction types may have been uncompactified back to the - // global types. If so, we just write an empty table - if (CTypes.size() == 0 ) { - output_vbr(0U); - return; - } - - assert(CTypes.size() >= StartNo && "Invalid compaction types start index"); - - // Determine how many types to write - unsigned NumTypes = CTypes.size() - StartNo; - - // Output the number of types. - output_vbr(NumTypes); - for (unsigned i = StartNo; i < StartNo+NumTypes; ++i) - output_typeid(Table.getGlobalSlot(CTypes[i])); -} - -void BytecodeWriter::outputCompactionTable() { - // Avoid writing the compaction table at all if there is no content. - if (Table.getCompactionTypes().size() >= Type::FirstDerivedTyID || - (!Table.CompactionTableIsEmpty())) { - BytecodeBlock CTB(BytecodeFormat::CompactionTableBlockID, *this, - true/*ElideIfEmpty*/); - const std::vector > &CT = - Table.getCompactionTable(); - - // First things first, emit the type compaction table if there is one. - outputCompactionTypes(Type::FirstDerivedTyID); - - for (unsigned i = 0, e = CT.size(); i != e; ++i) - outputCompactionTablePlane(i, CT[i], 0); - } -} - -void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) { - // Do not output the Bytecode block for an empty symbol table, it just wastes +void BytecodeWriter::outputTypeSymbolTable(const TypeSymbolTable &TST) { + // Do not output the block for an empty symbol table, it just wastes // space! - if (MST.isEmpty()) return; + if (TST.empty()) return; - BytecodeBlock SymTabBlock(BytecodeFormat::SymbolTableBlockID, *this, + // Create a header for the symbol table + BytecodeBlock SymTabBlock(BytecodeFormat::TypeSymbolTableBlockID, *this, true/*ElideIfEmpty*/); - // Write the number of types - output_vbr(MST.num_types()); + output_vbr(TST.size()); // Write each of the types - for (SymbolTable::type_const_iterator TI = MST.type_begin(), - TE = MST.type_end(); TI != TE; ++TI ) { + for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); + TI != TE; ++TI) { // Symtab entry:[def slot #][name] output_typeid((unsigned)Table.getSlot(TI->second)); output(TI->first); } +} - // Now do each of the type planes in order. - for (SymbolTable::plane_const_iterator PI = MST.plane_begin(), - PE = MST.plane_end(); PI != PE; ++PI) { - SymbolTable::value_const_iterator I = MST.value_begin(PI->first); - SymbolTable::value_const_iterator End = MST.value_end(PI->first); +void BytecodeWriter::outputValueSymbolTable(const ValueSymbolTable &VST) { + // Do not output the Bytecode block for an empty symbol table, it just wastes + // space! + if (VST.empty()) return; + + BytecodeBlock SymTabBlock(BytecodeFormat::ValueSymbolTableBlockID, *this, + true/*ElideIfEmpty*/); + + // Organize the symbol table by type + typedef std::pair PlaneMapEntry; + typedef std::vector PlaneMapVector; + typedef std::map PlaneMap; + PlaneMap Planes; + for (ValueSymbolTable::const_iterator SI = VST.begin(), SE = VST.end(); + SI != SE; ++SI) + Planes[SI->second->getType()].push_back( + std::make_pair(SI->first,SI->second)); + + for (PlaneMap::const_iterator PI = Planes.begin(), PE = Planes.end(); + PI != PE; ++PI) { int Slot; + PlaneMapVector::const_iterator I = PI->second.begin(); + PlaneMapVector::const_iterator End = PI->second.end(); + if (I == End) continue; // Don't mess with an absent type... // Write the number of values in this plane @@ -1092,10 +1190,15 @@ void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) { } } -void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out, - bool compress ) { +void llvm::WriteBytecodeToFile(const Module *M, OStream &Out, + bool compress) { assert(M && "You can't write a null module!!"); + // Make sure that std::cout is put into binary mode for systems + // that care. + if (Out == cout) + sys::Program::ChangeStdoutToBinary(); + // Create a vector of unsigned char for the bytecode output. We // reserve 256KBytes of space in the vector so that we avoid doing // lots of little allocations. 256KBytes is sufficient for a large @@ -1126,22 +1229,21 @@ void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out, compressed_magic[2] = 'v'; compressed_magic[3] = 'c'; - Out.write(compressed_magic,4); + Out.stream()->write(compressed_magic,4); // Compress everything after the magic number (which we altered) - uint64_t zipSize = Compressor::compressToStream( + Compressor::compressToStream( (char*)(FirstByte+4), // Skip the magic number Buffer.size()-4, // Skip the magic number - Out // Where to write compressed data + *Out.stream() // Where to write compressed data ); } else { // We're not compressing, so just write the entire block. - Out.write((char*)FirstByte, Buffer.size()); + Out.stream()->write((char*)FirstByte, Buffer.size()); } // make sure it hits disk now - Out.flush(); + Out.stream()->flush(); } -