X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FBytecode%2FWriter%2FWriter.cpp;h=48cccda8f40676d0aa97208a161a9d9123fc99d7;hb=b74ed07bfd3af42331b1964c24c39912610a08f4;hp=6654fd1b023bae8886d9e87593d3d99931c1223e;hpb=715c90ba524e736190a6380695ab337eeb5148be;p=oota-llvm.git diff --git a/lib/Bytecode/Writer/Writer.cpp b/lib/Bytecode/Writer/Writer.cpp index 6654fd1b023..48cccda8f40 100644 --- a/lib/Bytecode/Writer/Writer.cpp +++ b/lib/Bytecode/Writer/Writer.cpp @@ -1,10 +1,10 @@ //===-- Writer.cpp - Library for writing LLVM bytecode files --------------===// -// +// // The LLVM Compiler Infrastructure // // This file was developed by the LLVM research group and is distributed under // the University of Illinois Open Source License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This library implements the functionality defined in llvm/Bytecode/Writer.h @@ -19,14 +19,19 @@ #include "WriterInternals.h" #include "llvm/Bytecode/WriteBytecodePass.h" +#include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/InlineAsm.h" #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/SymbolTable.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "Support/STLExtras.h" -#include "Support/Statistic.h" +#include "llvm/Support/Compressor.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/System/Program.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include #include using namespace llvm; @@ -35,11 +40,11 @@ using namespace llvm; /// so that the reader can distinguish which format of the bytecode file has /// been written. /// @brief The bytecode version number -const unsigned BCVersionNum = 4; +const unsigned BCVersionNum = 5; static RegisterPass X("emitbytecode", "Bytecode Writer"); -static Statistic<> +static Statistic<> BytesWritten("bytecodewriter", "Number of bytecode bytes written"); //===----------------------------------------------------------------------===// @@ -47,11 +52,11 @@ BytesWritten("bytecodewriter", "Number of bytecode bytes written"); //===----------------------------------------------------------------------===// // output - If a position is specified, it must be in the valid portion of the -// string... note that this should be inlined always so only the relevant IF +// string... note that this should be inlined always so only the relevant IF // body should be included. inline void BytecodeWriter::output(unsigned i, int pos) { if (pos == -1) { // Be endian clean, little endian is our friend - Out.push_back((unsigned char)i); + Out.push_back((unsigned char)i); Out.push_back((unsigned char)(i >> 8)); Out.push_back((unsigned char)(i >> 16)); Out.push_back((unsigned char)(i >> 24)); @@ -70,15 +75,15 @@ inline void BytecodeWriter::output(int i) { /// output_vbr - Output an unsigned value, by using the least number of bytes /// possible. This is useful because many of our "infinite" values are really /// very small most of the time; but can be large a few times. -/// Data format used: If you read a byte with the high bit set, use the low -/// seven bits as data and then read another byte. +/// Data format used: If you read a byte with the high bit set, use the low +/// seven bits as data and then read another byte. inline void BytecodeWriter::output_vbr(uint64_t i) { while (1) { if (i < 0x80) { // done? Out.push_back((unsigned char)i); // We know the high bit is clear... return; } - + // Nope, we are bigger than a character, output the next 7 bits and set the // high bit to say that there is more coming... Out.push_back(0x80 | ((unsigned char)i & 0x7F)); @@ -92,7 +97,7 @@ inline void BytecodeWriter::output_vbr(unsigned i) { Out.push_back((unsigned char)i); // We know the high bit is clear... return; } - + // Nope, we are bigger than a character, output the next 7 bits and set the // high bit to say that there is more coming... Out.push_back(0x80 | ((unsigned char)i & 0x7F)); @@ -110,7 +115,7 @@ inline void BytecodeWriter::output_typeid(unsigned i) { } inline void BytecodeWriter::output_vbr(int64_t i) { - if (i < 0) + if (i < 0) output_vbr(((uint64_t)(-i) << 1) | 1); // Set low order sign bit... else output_vbr((uint64_t)i << 1); // Low order bit is clear. @@ -118,7 +123,7 @@ inline void BytecodeWriter::output_vbr(int64_t i) { inline void BytecodeWriter::output_vbr(int i) { - if (i < 0) + if (i < 0) output_vbr(((unsigned)(-i) << 1) | 1); // Set low order sign bit... else output_vbr((unsigned)i << 1); // Low order bit is clear. @@ -126,7 +131,7 @@ inline void BytecodeWriter::output_vbr(int i) { inline void BytecodeWriter::output(const std::string &s) { unsigned Len = s.length(); - output_vbr(Len ); // Strings may have an arbitrary length... + output_vbr(Len); // Strings may have an arbitrary length. Out.insert(Out.end(), s.begin(), s.end()); } @@ -137,37 +142,29 @@ inline void BytecodeWriter::output_data(const void *Ptr, const void *End) { inline void BytecodeWriter::output_float(float& FloatVal) { /// FIXME: This isn't optimal, it has size problems on some platforms /// where FP is not IEEE. - union { - float f; - uint32_t i; - } FloatUnion; - FloatUnion.f = FloatVal; - Out.push_back( static_cast( (FloatUnion.i & 0xFF ))); - Out.push_back( static_cast( (FloatUnion.i >> 8) & 0xFF)); - Out.push_back( static_cast( (FloatUnion.i >> 16) & 0xFF)); - Out.push_back( static_cast( (FloatUnion.i >> 24) & 0xFF)); + uint32_t i = FloatToBits(FloatVal); + Out.push_back( static_cast( (i ) & 0xFF)); + Out.push_back( static_cast( (i >> 8 ) & 0xFF)); + Out.push_back( static_cast( (i >> 16) & 0xFF)); + Out.push_back( static_cast( (i >> 24) & 0xFF)); } inline void BytecodeWriter::output_double(double& DoubleVal) { /// FIXME: This isn't optimal, it has size problems on some platforms /// where FP is not IEEE. - union { - double d; - uint64_t i; - } DoubleUnion; - DoubleUnion.d = DoubleVal; - Out.push_back( static_cast( (DoubleUnion.i & 0xFF ))); - Out.push_back( static_cast( (DoubleUnion.i >> 8) & 0xFF)); - Out.push_back( static_cast( (DoubleUnion.i >> 16) & 0xFF)); - Out.push_back( static_cast( (DoubleUnion.i >> 24) & 0xFF)); - Out.push_back( static_cast( (DoubleUnion.i >> 32) & 0xFF)); - Out.push_back( static_cast( (DoubleUnion.i >> 40) & 0xFF)); - Out.push_back( static_cast( (DoubleUnion.i >> 48) & 0xFF)); - Out.push_back( static_cast( (DoubleUnion.i >> 56) & 0xFF)); + uint64_t i = DoubleToBits(DoubleVal); + Out.push_back( static_cast( (i ) & 0xFF)); + Out.push_back( static_cast( (i >> 8 ) & 0xFF)); + Out.push_back( static_cast( (i >> 16) & 0xFF)); + Out.push_back( static_cast( (i >> 24) & 0xFF)); + Out.push_back( static_cast( (i >> 32) & 0xFF)); + Out.push_back( static_cast( (i >> 40) & 0xFF)); + Out.push_back( static_cast( (i >> 48) & 0xFF)); + Out.push_back( static_cast( (i >> 56) & 0xFF)); } -inline BytecodeBlock::BytecodeBlock(unsigned ID, BytecodeWriter& w, - bool elideIfEmpty, bool hasLongFormat ) +inline BytecodeBlock::BytecodeBlock(unsigned ID, BytecodeWriter &w, + bool elideIfEmpty, bool hasLongFormat) : Id(ID), Writer(w), ElideIfEmpty(elideIfEmpty), HasLongFormat(hasLongFormat){ if (HasLongFormat) { @@ -179,8 +176,8 @@ inline BytecodeBlock::BytecodeBlock(unsigned ID, BytecodeWriter& w, Loc = w.size(); } -inline BytecodeBlock::~BytecodeBlock() { // Do backpatch when block goes out - // of scope... +inline BytecodeBlock::~BytecodeBlock() { // Do backpatch when block goes out + // of scope... if (Loc == Writer.size() && ElideIfEmpty) { // If the block is empty, and we are allowed to, do not emit the block at // all! @@ -188,8 +185,6 @@ inline BytecodeBlock::~BytecodeBlock() { // Do backpatch when block go return; } - //cerr << "OldLoc = " << Loc << " NewLoc = " << NewLoc << " diff = " - // << (NewLoc-Loc) << endl; if (HasLongFormat) Writer.output(unsigned(Writer.size()-Loc), int(Loc-4)); else @@ -202,7 +197,7 @@ inline BytecodeBlock::~BytecodeBlock() { // Do backpatch when block go void BytecodeWriter::outputType(const Type *T) { output_vbr((unsigned)T->getTypeID()); - + // That's all there is to handling primitive types... if (T->isPrimitiveType()) { return; // We might do this if we alias a prim type: %x = type int @@ -237,8 +232,6 @@ void BytecodeWriter::outputType(const Type *T) { int Slot = Table.getSlot(AT->getElementType()); assert(Slot != -1 && "Type used but not available!!"); output_typeid((unsigned)Slot); - //std::cerr << "Type slot = " << Slot << " Type = " << T->getName() << endl; - output_vbr(AT->getNumElements()); break; } @@ -277,12 +270,10 @@ void BytecodeWriter::outputType(const Type *T) { break; } - case Type::OpaqueTyID: { + case Type::OpaqueTyID: // No need to emit anything, just the count of opaque types is enough. break; - } - //case Type::PackedTyID: default: std::cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" << " Type '" << T->getDescription() << "'\n"; @@ -296,13 +287,14 @@ void BytecodeWriter::outputConstant(const Constant *CPV) { // We must check for a ConstantExpr before switching by type because // a ConstantExpr can be of any type, and has no explicit value. - // + // if (const ConstantExpr *CE = dyn_cast(CPV)) { // FIXME: Encoding of constant exprs could be much more compact! assert(CE->getNumOperands() > 0 && "ConstantExpr with 0 operands"); - output_vbr(CE->getNumOperands()); // flags as an expr + assert(CE->getNumOperands() != 1 || CE->getOpcode() == Instruction::Cast); + output_vbr(1+CE->getNumOperands()); // flags as an expr output_vbr(CE->getOpcode()); // flags as an expr - + for (User::const_op_iterator OI = CE->op_begin(); OI != CE->op_end(); ++OI){ int Slot = Table.getSlot(*OI); assert(Slot != -1 && "Unknown constant used in ConstantExpr!!"); @@ -311,10 +303,13 @@ void BytecodeWriter::outputConstant(const Constant *CPV) { output_typeid((unsigned)Slot); } return; + } else if (isa(CPV)) { + output_vbr(1U); // 1 -> UndefValue constant. + return; } else { output_vbr(0U); // flag as not a ConstantExpr } - + switch (CPV->getType()->getTypeID()) { case Type::BoolTyID: // Boolean Types if (cast(CPV)->getValue()) @@ -386,7 +381,7 @@ void BytecodeWriter::outputConstant(const Constant *CPV) { break; } - case Type::VoidTyID: + case Type::VoidTyID: case Type::LabelTyID: default: std::cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" @@ -396,6 +391,19 @@ void BytecodeWriter::outputConstant(const Constant *CPV) { return; } +/// outputInlineAsm - InlineAsm's get emitted to the constant pool, so they can +/// be shared by multiple uses. +void BytecodeWriter::outputInlineAsm(const InlineAsm *IA) { + // Output a marker, so we know when we have one one parsing the constant pool. + // Note that this encoding is 5 bytes: not very efficient for a marker. Since + // unique inline asms are rare, this should hardly matter. + output_vbr(~0U); + + output(IA->getAsmString()); + output(IA->getConstraintString()); + output_vbr(unsigned(IA->hasSideEffects())); +} + void BytecodeWriter::outputConstantStrings() { SlotCalculator::string_iterator I = Table.string_begin(); SlotCalculator::string_iterator E = Table.string_end(); @@ -405,14 +413,14 @@ void BytecodeWriter::outputConstantStrings() { // the 'void' type plane. output_vbr(unsigned(E-I)); output_typeid(Type::VoidTyID); - + // Emit all of the strings. for (I = Table.string_begin(); I != E; ++I) { const ConstantArray *Str = *I; int Slot = Table.getSlot(Str->getType()); assert(Slot != -1 && "Constant string of unknown type?"); output_typeid((unsigned)Slot); - + // Now that we emitted the type (which indicates the size of the string), // emit all of the characters. std::string Val = Str->getAsString(); @@ -423,28 +431,28 @@ void BytecodeWriter::outputConstantStrings() { //===----------------------------------------------------------------------===// //=== Instruction Output ===// //===----------------------------------------------------------------------===// -typedef unsigned char uchar; -// outputInstructionFormat0 - Output those wierd instructions that have a large -// number of operands or have large operands themselves... +// outputInstructionFormat0 - Output those weird instructions that have a large +// number of operands or have large operands themselves. // // Format: [opcode] [type] [numargs] [arg0] [arg1] ... [arg] // -void BytecodeWriter::outputInstructionFormat0(const Instruction *I, unsigned Opcode, - const SlotCalculator &Table, - unsigned Type) { +void BytecodeWriter::outputInstructionFormat0(const Instruction *I, + unsigned Opcode, + const SlotCalculator &Table, + unsigned Type) { // Opcode must have top two bits clear... output_vbr(Opcode << 2); // Instruction Opcode ID output_typeid(Type); // Result type unsigned NumArgs = I->getNumOperands(); - output_vbr(NumArgs + (isa(I) || isa(I) || - isa(I))); + output_vbr(NumArgs + (isa(I) || + isa(I) || Opcode == 56 || Opcode == 58)); if (!isa(&I)) { for (unsigned i = 0; i < NumArgs; ++i) { int Slot = Table.getSlot(I->getOperand(i)); - assert(Slot >= 0 && "No slot number for value!?!?"); + assert(Slot >= 0 && "No slot number for value!?!?"); output_vbr((unsigned)Slot); } @@ -452,15 +460,15 @@ void BytecodeWriter::outputInstructionFormat0(const Instruction *I, unsigned Opc int Slot = Table.getSlot(I->getType()); assert(Slot != -1 && "Cast return type unknown?"); output_typeid((unsigned)Slot); - } else if (const VANextInst *VAI = dyn_cast(I)) { - int Slot = Table.getSlot(VAI->getArgType()); - assert(Slot != -1 && "VarArg argument type unknown?"); - output_typeid((unsigned)Slot); + } else if (Opcode == 56) { // Invoke escape sequence + output_vbr(cast(I)->getCallingConv()); + } else if (Opcode == 58) { // Call escape sequence + output_vbr((cast(I)->getCallingConv() << 1) | + unsigned(cast(I)->isTailCall())); } - } else { int Slot = Table.getSlot(I->getOperand(0)); - assert(Slot >= 0 && "No slot number for value!?!?"); + assert(Slot >= 0 && "No slot number for value!?!?"); output_vbr(unsigned(Slot)); // We need to encode the type of sequential type indices into their slot # @@ -468,8 +476,8 @@ void BytecodeWriter::outputInstructionFormat0(const Instruction *I, unsigned Opc for (gep_type_iterator TI = gep_type_begin(I), E = gep_type_end(I); Idx != NumArgs; ++TI, ++Idx) { Slot = Table.getSlot(I->getOperand(Idx)); - assert(Slot >= 0 && "No slot number for value!?!?"); - + assert(Slot >= 0 && "No slot number for value!?!?"); + if (isa(*TI)) { unsigned IdxId; switch (I->getOperand(Idx)->getType()->getTypeID()) { @@ -496,10 +504,10 @@ void BytecodeWriter::outputInstructionFormat0(const Instruction *I, unsigned Opc // // Format: [opcode] [type] [numargs] [arg0] [arg1] ... [arg] // -void BytecodeWriter::outputInstrVarArgsCall(const Instruction *I, - unsigned Opcode, - const SlotCalculator &Table, - unsigned Type) { +void BytecodeWriter::outputInstrVarArgsCall(const Instruction *I, + unsigned Opcode, + const SlotCalculator &Table, + unsigned Type) { assert(isa(I) || isa(I)); // Opcode must have top two bits clear... output_vbr(Opcode << 2); // Instruction Opcode ID @@ -520,37 +528,46 @@ void BytecodeWriter::outputInstrVarArgsCall(const Instruction *I, // variable argument. NumFixedOperands = 3+NumParams; } - output_vbr(2 * I->getNumOperands()-NumFixedOperands); + output_vbr(2 * I->getNumOperands()-NumFixedOperands + + unsigned(Opcode == 56 || Opcode == 58)); // The type for the function has already been emitted in the type field of the // instruction. Just emit the slot # now. for (unsigned i = 0; i != NumFixedOperands; ++i) { int Slot = Table.getSlot(I->getOperand(i)); - assert(Slot >= 0 && "No slot number for value!?!?"); + assert(Slot >= 0 && "No slot number for value!?!?"); output_vbr((unsigned)Slot); } for (unsigned i = NumFixedOperands, e = I->getNumOperands(); i != e; ++i) { // Output Arg Type ID int Slot = Table.getSlot(I->getOperand(i)->getType()); - assert(Slot >= 0 && "No slot number for value!?!?"); + assert(Slot >= 0 && "No slot number for value!?!?"); output_typeid((unsigned)Slot); - + // Output arg ID itself Slot = Table.getSlot(I->getOperand(i)); - assert(Slot >= 0 && "No slot number for value!?!?"); + assert(Slot >= 0 && "No slot number for value!?!?"); output_vbr((unsigned)Slot); } + + // If this is the escape sequence for call, emit the tailcall/cc info. + if (Opcode == 58) { + const CallInst *CI = cast(I); + output_vbr((CI->getCallingConv() << 1) | unsigned(CI->isTailCall())); + } else if (Opcode == 56) { // Invoke escape sequence. + output_vbr(cast(I)->getCallingConv()); + } } // outputInstructionFormat1 - Output one operand instructions, knowing that no // operand index is >= 2^12. // -inline void BytecodeWriter::outputInstructionFormat1(const Instruction *I, - unsigned Opcode, - unsigned *Slots, - unsigned Type) { +inline void BytecodeWriter::outputInstructionFormat1(const Instruction *I, + unsigned Opcode, + unsigned *Slots, + unsigned Type) { // bits Instruction format: // -------------------------- // 01-00: Opcode type, fixed to 1. @@ -558,42 +575,36 @@ inline void BytecodeWriter::outputInstructionFormat1(const Instruction *I, // 19-08: Resulting type plane // 31-20: Operand #1 (if set to (2^12-1), then zero operands) // - unsigned Bits = 1 | (Opcode << 2) | (Type << 8) | (Slots[0] << 20); - // cerr << "1 " << IType << " " << Type << " " << Slots[0] << endl; - output(Bits); + output(1 | (Opcode << 2) | (Type << 8) | (Slots[0] << 20)); } // outputInstructionFormat2 - Output two operand instructions, knowing that no // operand index is >= 2^8. // -inline void BytecodeWriter::outputInstructionFormat2(const Instruction *I, - unsigned Opcode, - unsigned *Slots, - unsigned Type) { +inline void BytecodeWriter::outputInstructionFormat2(const Instruction *I, + unsigned Opcode, + unsigned *Slots, + unsigned Type) { // bits Instruction format: // -------------------------- // 01-00: Opcode type, fixed to 2. // 07-02: Opcode // 15-08: Resulting type plane // 23-16: Operand #1 - // 31-24: Operand #2 + // 31-24: Operand #2 // - unsigned Bits = 2 | (Opcode << 2) | (Type << 8) | - (Slots[0] << 16) | (Slots[1] << 24); - // cerr << "2 " << IType << " " << Type << " " << Slots[0] << " " - // << Slots[1] << endl; - output(Bits); + output(2 | (Opcode << 2) | (Type << 8) | (Slots[0] << 16) | (Slots[1] << 24)); } // outputInstructionFormat3 - Output three operand instructions, knowing that no // operand index is >= 2^6. // -inline void BytecodeWriter::outputInstructionFormat3(const Instruction *I, +inline void BytecodeWriter::outputInstructionFormat3(const Instruction *I, unsigned Opcode, - unsigned *Slots, - unsigned Type) { + unsigned *Slots, + unsigned Type) { // bits Instruction format: // -------------------------- // 01-00: Opcode type, fixed to 3. @@ -603,29 +614,48 @@ inline void BytecodeWriter::outputInstructionFormat3(const Instruction *I, // 25-20: Operand #2 // 31-26: Operand #3 // - unsigned Bits = 3 | (Opcode << 2) | (Type << 8) | - (Slots[0] << 14) | (Slots[1] << 20) | (Slots[2] << 26); - //cerr << "3 " << IType << " " << Type << " " << Slots[0] << " " - // << Slots[1] << " " << Slots[2] << endl; - output(Bits); + output(3 | (Opcode << 2) | (Type << 8) | + (Slots[0] << 14) | (Slots[1] << 20) | (Slots[2] << 26)); } void BytecodeWriter::outputInstruction(const Instruction &I) { - assert(I.getOpcode() < 62 && "Opcode too big???"); + assert(I.getOpcode() < 56 && "Opcode too big???"); unsigned Opcode = I.getOpcode(); unsigned NumOperands = I.getNumOperands(); - // Encode 'volatile load' as 62 and 'volatile store' as 63. - if (isa(I) && cast(I).isVolatile()) + // Encode 'tail call' as 61, 'volatile load' as 62, and 'volatile store' as + // 63. + if (const CallInst *CI = dyn_cast(&I)) { + if (CI->getCallingConv() == CallingConv::C) { + if (CI->isTailCall()) + Opcode = 61; // CCC + Tail Call + else + ; // Opcode = Instruction::Call + } else if (CI->getCallingConv() == CallingConv::Fast) { + if (CI->isTailCall()) + Opcode = 59; // FastCC + TailCall + else + Opcode = 60; // FastCC + Not Tail Call + } else { + Opcode = 58; // Call escape sequence. + } + } else if (const InvokeInst *II = dyn_cast(&I)) { + if (II->getCallingConv() == CallingConv::Fast) + Opcode = 57; // FastCC invoke. + else if (II->getCallingConv() != CallingConv::C) + Opcode = 56; // Invoke escape sequence. + + } else if (isa(I) && cast(I).isVolatile()) { Opcode = 62; - if (isa(I) && cast(I).isVolatile()) + } else if (isa(I) && cast(I).isVolatile()) { Opcode = 63; + } // Figure out which type to encode with the instruction. Typically we want // the type of the first parameter, as opposed to the type of the instruction // (for example, with setcc, we always know it returns bool, but the type of // the first param is actually interesting). But if we have no arguments - // we take the type of the instruction itself. + // we take the type of the instruction itself. // const Type *Ty; switch (I.getOpcode()) { @@ -670,7 +700,7 @@ void BytecodeWriter::outputInstruction(const Instruction &I) { // unsigned MaxOpSlot = Type; unsigned Slots[3]; Slots[0] = (1 << 12)-1; // Marker to signify 0 operands - + for (unsigned i = 0; i != NumOperands; ++i) { int slot = Table.getSlot(I.getOperand(i)); assert(slot != -1 && "Broken bytecode!"); @@ -686,11 +716,13 @@ void BytecodeWriter::outputInstruction(const Instruction &I) { assert(Slots[1] != ~0U && "Cast return type unknown?"); if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; NumOperands++; - } else if (const VANextInst *VANI = dyn_cast(&I)) { - Slots[1] = Table.getSlot(VANI->getArgType()); - assert(Slots[1] != ~0U && "va_next return type unknown?"); - if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; - NumOperands++; + } else if (const AllocationInst *AI = dyn_cast(&I)) { + assert(NumOperands == 1 && "Bogus allocation!"); + if (AI->getAlignment()) { + Slots[1] = Log2_32(AI->getAlignment())+1; + if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; + NumOperands = 2; + } } else if (const GetElementPtrInst *GEP = dyn_cast(&I)) { // We need to encode the type of sequential type indices into their slot # unsigned Idx = 1; @@ -708,6 +740,19 @@ void BytecodeWriter::outputInstruction(const Instruction &I) { Slots[Idx] = (Slots[Idx] << 2) | IdxId; if (Slots[Idx] > MaxOpSlot) MaxOpSlot = Slots[Idx]; } + } else if (Opcode == 58) { + // If this is the escape sequence for call, emit the tailcall/cc info. + const CallInst &CI = cast(I); + ++NumOperands; + if (NumOperands <= 3) { + Slots[NumOperands-1] = + (CI.getCallingConv() << 1)|unsigned(CI.isTailCall()); + if (Slots[NumOperands-1] > MaxOpSlot) + MaxOpSlot = Slots[NumOperands-1]; + } + } else if (Opcode == 56) { + // Invoke escape seq has at least 4 operands to encode. + ++NumOperands; } // Decide which instruction encoding to use. This is determined primarily @@ -751,11 +796,11 @@ void BytecodeWriter::outputInstruction(const Instruction &I) { //=== Block Output ===// //===----------------------------------------------------------------------===// -BytecodeWriter::BytecodeWriter(std::vector &o, const Module *M) +BytecodeWriter::BytecodeWriter(std::vector &o, const Module *M) : Out(o), Table(M) { // Emit the signature... - static const unsigned char *Sig = (const unsigned char*)"llvm"; + static const unsigned char *Sig = (const unsigned char*)"llvm"; output_data(Sig, Sig+4); // Emit the top level CLASS block. @@ -766,18 +811,17 @@ BytecodeWriter::BytecodeWriter(std::vector &o, const Module *M) bool hasNoEndianness = M->getEndianness() == Module::AnyEndianness; bool hasNoPointerSize = M->getPointerSize() == Module::AnyPointerSize; - // Output the version identifier... we are currently on bytecode version #2, - // which corresponds to LLVM v1.3. - unsigned Version = (BCVersionNum << 4) | + // Output the version identifier and other information. + unsigned Version = (BCVersionNum << 4) | (unsigned)isBigEndian | (hasLongPointers << 1) | - (hasNoEndianness << 2) | + (hasNoEndianness << 2) | (hasNoPointerSize << 3); output_vbr(Version); // The Global type plane comes first { - BytecodeBlock CPool(BytecodeFormat::GlobalTypePlaneBlockID, *this ); - outputTypes(Type::FirstDerivedTyID); + BytecodeBlock CPool(BytecodeFormat::GlobalTypePlaneBlockID, *this); + outputTypes(Type::FirstDerivedTyID); } // The ModuleInfoBlock follows directly after the type information @@ -794,8 +838,7 @@ BytecodeWriter::BytecodeWriter(std::vector &o, const Module *M) outputSymbolTable(M->getSymbolTable()); } -void BytecodeWriter::outputTypes(unsigned TypeNum) -{ +void BytecodeWriter::outputTypes(unsigned TypeNum) { // Write the type plane for types first because earlier planes (e.g. for a // primitive type like float) may have constants constructed using types // coming later (e.g., via getelementptr from a pointer type). The type @@ -805,7 +848,7 @@ void BytecodeWriter::outputTypes(unsigned TypeNum) assert(TypeNum <= Types.size() && "Invalid TypeNo index"); unsigned NumEntries = Types.size() - TypeNum; - + // Output type header: [num entries] output_vbr(NumEntries); @@ -815,11 +858,11 @@ void BytecodeWriter::outputTypes(unsigned TypeNum) // Helper function for outputConstants(). // Writes out all the constants in the plane Plane starting at entry StartNo. -// +// void BytecodeWriter::outputConstantsInPlane(const std::vector &Plane, unsigned StartNo) { unsigned ValNo = StartNo; - + // Scan through and ignore function arguments, global values, and constant // strings. for (; ValNo < Plane.size() && @@ -829,7 +872,8 @@ void BytecodeWriter::outputConstantsInPlane(const std::vector /*empty*/; unsigned NC = ValNo; // Number of constants - for (; NC < Plane.size() && (isa(Plane[NC])); NC++) + for (; NC < Plane.size() && (isa(Plane[NC]) || + isa(Plane[NC])); NC++) /*empty*/; NC -= ValNo; // Convert from index into count if (NC == 0) return; // Skip empty type planes... @@ -848,14 +892,15 @@ void BytecodeWriter::outputConstantsInPlane(const std::vector for (unsigned i = ValNo; i < ValNo+NC; ++i) { const Value *V = Plane[i]; - if (const Constant *C = dyn_cast(V)) { + if (const Constant *C = dyn_cast(V)) outputConstant(C); - } + else + outputInlineAsm(cast(V)); } } -static inline bool hasNullValue(unsigned TyID) { - return TyID != Type::LabelTyID && TyID != Type::VoidTyID; +static inline bool hasNullValue(const Type *Ty) { + return Ty != Type::LabelTy && Ty != Type::VoidTy && !isa(Ty); } void BytecodeWriter::outputConstants(bool isFunction) { @@ -866,9 +911,9 @@ void BytecodeWriter::outputConstants(bool isFunction) { if (isFunction) // Output the type plane before any constants! - outputTypes( Table.getModuleTypeLevel() ); + outputTypes(Table.getModuleTypeLevel()); else - // Output module-level string constants before any other constants.x + // Output module-level string constants before any other constants. outputConstantStrings(); for (unsigned pno = 0; pno != NumPlanes; pno++) { @@ -877,13 +922,13 @@ void BytecodeWriter::outputConstants(bool isFunction) { unsigned ValNo = 0; if (isFunction) // Don't re-emit module constants ValNo += Table.getModuleLevel(pno); - - if (hasNullValue(pno)) { + + if (hasNullValue(Plane[0]->getType())) { // Skip zero initializer if (ValNo == 0) ValNo = 1; } - + // Write out constants in the plane outputConstantsInPlane(Plane, ValNo); } @@ -893,27 +938,67 @@ void BytecodeWriter::outputConstants(bool isFunction) { static unsigned getEncodedLinkage(const GlobalValue *GV) { switch (GV->getLinkage()) { default: assert(0 && "Invalid linkage!"); - case GlobalValue::ExternalLinkage: return 0; - case GlobalValue::WeakLinkage: return 1; - case GlobalValue::AppendingLinkage: return 2; - case GlobalValue::InternalLinkage: return 3; - case GlobalValue::LinkOnceLinkage: return 4; + case GlobalValue::ExternalLinkage: return 0; + case GlobalValue::WeakLinkage: return 1; + case GlobalValue::AppendingLinkage: return 2; + case GlobalValue::InternalLinkage: return 3; + case GlobalValue::LinkOnceLinkage: return 4; + case GlobalValue::DLLImportLinkage: return 5; + case GlobalValue::DLLExportLinkage: return 6; + case GlobalValue::ExternalWeakLinkage: return 7; } } void BytecodeWriter::outputModuleInfoBlock(const Module *M) { BytecodeBlock ModuleInfoBlock(BytecodeFormat::ModuleGlobalInfoBlockID, *this); + + // Give numbers to sections as we encounter them. + unsigned SectionIDCounter = 0; + std::vector SectionNames; + std::map SectionID; // Output the types for the global variables in the module... - for (Module::const_giterator I = M->gbegin(), End = M->gend(); I != End;++I) { + for (Module::const_global_iterator I = M->global_begin(), + End = M->global_end(); I != End; ++I) { int Slot = Table.getSlot(I->getType()); assert(Slot != -1 && "Module global vars is broken!"); + assert((I->hasInitializer() || !I->hasInternalLinkage()) && + "Global must have an initializer or have external linkage!"); + // Fields: bit0 = isConstant, bit1 = hasInitializer, bit2-4=Linkage, - // bit5+ = Slot # for type - unsigned oSlot = ((unsigned)Slot << 5) | (getEncodedLinkage(I) << 2) | - (I->hasInitializer() << 1) | (unsigned)I->isConstant(); - output_vbr(oSlot ); + // bit5+ = Slot # for type. + bool HasExtensionWord = (I->getAlignment() != 0) || I->hasSection(); + + // If we need to use the extension byte, set linkage=3(internal) and + // initializer = 0 (impossible!). + if (!HasExtensionWord) { + unsigned oSlot = ((unsigned)Slot << 5) | (getEncodedLinkage(I) << 2) | + (I->hasInitializer() << 1) | (unsigned)I->isConstant(); + output_vbr(oSlot); + } else { + unsigned oSlot = ((unsigned)Slot << 5) | (3 << 2) | + (0 << 1) | (unsigned)I->isConstant(); + output_vbr(oSlot); + + // The extension word has this format: bit 0 = has initializer, bit 1-3 = + // linkage, bit 4-8 = alignment (log2), bit 9 = has SectionID, + // bits 10+ = future use. + unsigned ExtWord = (unsigned)I->hasInitializer() | + (getEncodedLinkage(I) << 1) | + ((Log2_32(I->getAlignment())+1) << 4) | + ((unsigned)I->hasSection() << 9); + output_vbr(ExtWord); + if (I->hasSection()) { + // Give section names unique ID's. + unsigned &Entry = SectionID[I->getSection()]; + if (Entry == 0) { + Entry = ++SectionIDCounter; + SectionNames.push_back(I->getSection()); + } + output_vbr(Entry); + } + } // If we have an initializer, output it now. if (I->hasInitializer()) { @@ -924,25 +1009,74 @@ void BytecodeWriter::outputModuleInfoBlock(const Module *M) { } output_typeid((unsigned)Table.getSlot(Type::VoidTy)); - // Output the types of the functions in this module... + // Output the types of the functions in this module. for (Module::const_iterator I = M->begin(), End = M->end(); I != End; ++I) { int Slot = Table.getSlot(I->getType()); - assert(Slot != -1 && "Module const pool is broken!"); + assert(Slot != -1 && "Module slot calculator is broken!"); assert(Slot >= Type::FirstDerivedTyID && "Derived type not in range!"); - output_typeid((unsigned)Slot); + assert(((Slot << 6) >> 6) == Slot && "Slot # too big!"); + unsigned CC = I->getCallingConv()+1; + unsigned ID = (Slot << 5) | (CC & 15); + + if (I->isExternal()) // If external, we don't have an FunctionInfo block. + ID |= 1 << 4; + + if (I->getAlignment() || I->hasSection() || (CC & ~15) != 0 || + (I->isExternal() && I->hasDLLImportLinkage()) || + (I->isExternal() && I->hasExternalWeakLinkage()) + ) + ID |= 1 << 31; // Do we need an extension word? + + output_vbr(ID); + + if (ID & (1 << 31)) { + // Extension byte: bits 0-4 = alignment, bits 5-9 = top nibble of calling + // convention, bit 10 = hasSectionID., bits 11-12 = external linkage type + unsigned extLinkage = 0; + + if (I->isExternal()) { + if (I->hasDLLImportLinkage()) { + extLinkage = 1; + } else if (I->hasExternalWeakLinkage()) { + extLinkage = 2; + } + } + + ID = (Log2_32(I->getAlignment())+1) | ((CC >> 4) << 5) | + (I->hasSection() << 10) | + ((extLinkage & 3) << 11); + output_vbr(ID); + + // Give section names unique ID's. + if (I->hasSection()) { + unsigned &Entry = SectionID[I->getSection()]; + if (Entry == 0) { + Entry = ++SectionIDCounter; + SectionNames.push_back(I->getSection()); + } + output_vbr(Entry); + } + } } - output_typeid((unsigned)Table.getSlot(Type::VoidTy)); + output_vbr((unsigned)Table.getSlot(Type::VoidTy) << 5); - // Put out the list of dependent libraries for the Module + // Emit the list of dependent libraries for the Module. Module::lib_iterator LI = M->lib_begin(); Module::lib_iterator LE = M->lib_end(); - output_vbr( unsigned(LE - LI) ); // Put out the number of dependent libraries - for ( ; LI != LE; ++LI ) { + output_vbr(unsigned(LE - LI)); // Emit the number of dependent libraries. + for (; LI != LE; ++LI) output(*LI); - } // Output the target triple from the module output(M->getTargetTriple()); + + // Emit the table of section names. + output_vbr((unsigned)SectionNames.size()); + for (unsigned i = 0, e = SectionNames.size(); i != e; ++i) + output(SectionNames[i]); + + // Output the inline asm string. + output(M->getModuleInlineAsm()); } void BytecodeWriter::outputInstructions(const Function *F) { @@ -953,12 +1087,12 @@ void BytecodeWriter::outputInstructions(const Function *F) { } void BytecodeWriter::outputFunction(const Function *F) { - BytecodeBlock FunctionBlock(BytecodeFormat::FunctionBlockID, *this); - output_vbr(getEncodedLinkage(F)); - // If this is an external function, there is nothing else to emit! if (F->isExternal()) return; + BytecodeBlock FunctionBlock(BytecodeFormat::FunctionBlockID, *this); + output_vbr(getEncodedLinkage(F)); + // Get slot information about the function... Table.incorporateFunction(F); @@ -970,13 +1104,13 @@ void BytecodeWriter::outputFunction(const Function *F) { // Otherwise, emit the compaction table. outputCompactionTable(); } - + // Output all of the instructions in the body of the function outputInstructions(F); - + // If needed, output the symbol table for the function... outputSymbolTable(F->getSymbolTable()); - + Table.purgeFunction(); } @@ -1016,7 +1150,7 @@ void BytecodeWriter::outputCompactionTypes(unsigned StartNo) { // The compaction types may have been uncompactified back to the // global types. If so, we just write an empty table - if (CTypes.size() == 0 ) { + if (CTypes.size() == 0) { output_vbr(0U); return; } @@ -1034,47 +1168,52 @@ void BytecodeWriter::outputCompactionTypes(unsigned StartNo) { } void BytecodeWriter::outputCompactionTable() { - BytecodeBlock CTB(BytecodeFormat::CompactionTableBlockID, *this, - true/*ElideIfEmpty*/); - const std::vector > &CT =Table.getCompactionTable(); - - // First thing is first, emit the type compaction table if there is one. - outputCompactionTypes(Type::FirstDerivedTyID); - - for (unsigned i = 0, e = CT.size(); i != e; ++i) - outputCompactionTablePlane(i, CT[i], 0); + // Avoid writing the compaction table at all if there is no content. + if (Table.getCompactionTypes().size() >= Type::FirstDerivedTyID || + (!Table.CompactionTableIsEmpty())) { + BytecodeBlock CTB(BytecodeFormat::CompactionTableBlockID, *this, + true/*ElideIfEmpty*/); + const std::vector > &CT = + Table.getCompactionTable(); + + // First things first, emit the type compaction table if there is one. + outputCompactionTypes(Type::FirstDerivedTyID); + + for (unsigned i = 0, e = CT.size(); i != e; ++i) + outputCompactionTablePlane(i, CT[i], 0); + } } void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) { // Do not output the Bytecode block for an empty symbol table, it just wastes // space! - if ( MST.isEmpty() ) return; + if (MST.isEmpty()) return; BytecodeBlock SymTabBlock(BytecodeFormat::SymbolTableBlockID, *this, - true/* ElideIfEmpty*/); + true/*ElideIfEmpty*/); - // Write the number of types + // Write the number of types output_vbr(MST.num_types()); // Write each of the types for (SymbolTable::type_const_iterator TI = MST.type_begin(), - TE = MST.type_end(); TI != TE; ++TI ) { + TE = MST.type_end(); TI != TE; ++TI) { // Symtab entry:[def slot #][name] output_typeid((unsigned)Table.getSlot(TI->second)); - output(TI->first); + output(TI->first); } // Now do each of the type planes in order. - for (SymbolTable::plane_const_iterator PI = MST.plane_begin(), + for (SymbolTable::plane_const_iterator PI = MST.plane_begin(), PE = MST.plane_end(); PI != PE; ++PI) { SymbolTable::value_const_iterator I = MST.value_begin(PI->first); SymbolTable::value_const_iterator End = MST.value_end(PI->first); int Slot; - + if (I == End) continue; // Don't mess with an absent type... // Write the number of values in this plane - output_vbr(MST.type_size(PI->first)); + output_vbr((unsigned)PI->second.size()); // Write the slot number of the type for this plane Slot = Table.getSlot(PI->first); @@ -1092,38 +1231,60 @@ void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) { } } -void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out) { +void llvm::WriteBytecodeToFile(const Module *M, std::ostream &Out, + bool compress) { assert(M && "You can't write a null module!!"); + // Make sure that std::cout is put into binary mode for systems + // that care. + if (&Out == std::cout) + sys::Program::ChangeStdoutToBinary(); + + // Create a vector of unsigned char for the bytecode output. We + // reserve 256KBytes of space in the vector so that we avoid doing + // lots of little allocations. 256KBytes is sufficient for a large + // proportion of the bytecode files we will encounter. Larger files + // will be automatically doubled in size as needed (std::vector + // behavior). std::vector Buffer; - Buffer.reserve(64 * 1024); // avoid lots of little reallocs + Buffer.reserve(256 * 1024); - // This object populates buffer for us... + // The BytecodeWriter populates Buffer for us. BytecodeWriter BCW(Buffer, M); - // Keep track of how much we've written... + // Keep track of how much we've written BytesWritten += Buffer.size(); - // Okay, write the deque out to the ostream now... the deque is not - // sequential in memory, however, so write out as much as possible in big - // chunks, until we're done. - // + // Determine start and end points of the Buffer + const unsigned char *FirstByte = &Buffer.front(); - std::vector::const_iterator I = Buffer.begin(),E = Buffer.end(); - while (I != E) { // Loop until it's all written - // Scan to see how big this chunk is... - const unsigned char *ChunkPtr = &*I; - const unsigned char *LastPtr = ChunkPtr; - while (I != E) { - const unsigned char *ThisPtr = &*++I; - if (++LastPtr != ThisPtr) // Advanced by more than a byte of memory? - break; - } - - // Write out the chunk... - Out.write((char*)ChunkPtr, unsigned(LastPtr-ChunkPtr)); + // If we're supposed to compress this mess ... + if (compress) { + + // We signal compression by using an alternate magic number for the + // file. The compressed bytecode file's magic number is "llvc" instead + // of "llvm". + char compressed_magic[4]; + compressed_magic[0] = 'l'; + compressed_magic[1] = 'l'; + compressed_magic[2] = 'v'; + compressed_magic[3] = 'c'; + + Out.write(compressed_magic,4); + + // Compress everything after the magic number (which we altered) + uint64_t zipSize = Compressor::compressToStream( + (char*)(FirstByte+4), // Skip the magic number + Buffer.size()-4, // Skip the magic number + Out // Where to write compressed data + ); + + } else { + + // We're not compressing, so just write the entire block. + Out.write((char*)FirstByte, Buffer.size()); } + + // make sure it hits disk now Out.flush(); } - -// vim: sw=2 ai