X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FBytecode%2FReader%2FReader.cpp;h=ac6418d7a7d0f7c391558b14945b79f92556404f;hb=b74ed07bfd3af42331b1964c24c39912610a08f4;hp=3501d8775860dd1713d2a547729d56cbe02da683;hpb=2e7ec128f96cc5c5fc44393101f696df137f09dd;p=oota-llvm.git diff --git a/lib/Bytecode/Reader/Reader.cpp b/lib/Bytecode/Reader/Reader.cpp index 3501d877586..ac6418d7a7d 100644 --- a/lib/Bytecode/Reader/Reader.cpp +++ b/lib/Bytecode/Reader/Reader.cpp @@ -1,15 +1,15 @@ //===- Reader.cpp - Code to read bytecode files ---------------------------===// -// +// // The LLVM Compiler Infrastructure // // This file was developed by the LLVM research group and is distributed under // the University of Illinois Open Source License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This library implements the functionality defined in llvm/Bytecode/Reader.h // -// Note that this library should be as fast as possible, reentrant, and +// Note that this library should be as fast as possible, reentrant, and // threadsafe!! // // TODO: Allow passing in an option to ignore the symbol table @@ -17,43 +17,44 @@ //===----------------------------------------------------------------------===// #include "Reader.h" +#include "llvm/Assembly/AutoUpgrade.h" #include "llvm/Bytecode/BytecodeHandler.h" #include "llvm/BasicBlock.h" +#include "llvm/CallingConv.h" #include "llvm/Constants.h" +#include "llvm/InlineAsm.h" #include "llvm/Instructions.h" #include "llvm/SymbolTable.h" #include "llvm/Bytecode/Format.h" +#include "llvm/Config/alloca.h" #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/Compressor.h" +#include "llvm/Support/MathExtras.h" #include "llvm/ADT/StringExtras.h" #include #include using namespace llvm; namespace { - -/// @brief A class for maintaining the slot number definition -/// as a placeholder for the actual definition for forward constants defs. -class ConstantPlaceHolder : public ConstantExpr { - unsigned ID; - ConstantPlaceHolder(); // DO NOT IMPLEMENT - void operator=(const ConstantPlaceHolder &); // DO NOT IMPLEMENT -public: - ConstantPlaceHolder(const Type *Ty, unsigned id) - : ConstantExpr(Instruction::UserOp1, Constant::getNullValue(Ty), Ty), - ID(id) {} - unsigned getID() { return ID; } -}; - + /// @brief A class for maintaining the slot number definition + /// as a placeholder for the actual definition for forward constants defs. + class ConstantPlaceHolder : public ConstantExpr { + ConstantPlaceHolder(); // DO NOT IMPLEMENT + void operator=(const ConstantPlaceHolder &); // DO NOT IMPLEMENT + public: + Use Op; + ConstantPlaceHolder(const Type *Ty) + : ConstantExpr(Ty, Instruction::UserOp1, &Op, 1), + Op(UndefValue::get(Type::IntTy), this) { + } + }; } // Provide some details on error -inline void BytecodeReader::error(std::string err) { - err += " (Vers=" ; - err += itostr(RevisionNum) ; - err += ", Pos=" ; - err += itostr(At-MemStart); - err += ")"; - throw err; +inline void BytecodeReader::error(const std::string& err) { + ErrorMsg = err + " (Vers=" + itostr(RevisionNum) + ", Pos=" + + itostr(At-MemStart) + ")"; + longjmp(context,1); } //===----------------------------------------------------------------------===// @@ -76,17 +77,17 @@ inline void BytecodeReader::checkPastBlockEnd(const char * block_name) { inline void BytecodeReader::align32() { if (hasAlignment) { BufPtr Save = At; - At = (const unsigned char *)((unsigned long)(At+3) & (~3UL)); - if (At > Save) + At = (const unsigned char *)((intptr_t)(At+3) & (~3UL)); + if (At > Save) if (Handler) Handler->handleAlignment(At - Save); - if (At > BlockEnd) + if (At > BlockEnd) error("Ran out of data while aligning!"); } } /// Read a whole unsigned integer inline unsigned BytecodeReader::read_uint() { - if (At+4 > BlockEnd) + if (At+4 > BlockEnd) error("Ran out of data reading uint!"); At += 4; return At[-4] | (At[-3] << 8) | (At[-2] << 16) | (At[-1] << 24); @@ -97,9 +98,9 @@ inline unsigned BytecodeReader::read_vbr_uint() { unsigned Shift = 0; unsigned Result = 0; BufPtr Save = At; - + do { - if (At == BlockEnd) + if (At == BlockEnd) error("Ran out of data reading vbr_uint!"); Result |= (unsigned)((*At++) & 0x7F) << Shift; Shift += 7; @@ -113,9 +114,9 @@ inline uint64_t BytecodeReader::read_vbr_uint64() { unsigned Shift = 0; uint64_t Result = 0; BufPtr Save = At; - + do { - if (At == BlockEnd) + if (At == BlockEnd) error("Ran out of data reading vbr_uint64!"); Result |= (uint64_t)((*At++) & 0x7F) << Shift; Shift += 7; @@ -151,7 +152,7 @@ inline std::string BytecodeReader::read_str() { inline void BytecodeReader::read_data(void *Ptr, void *End) { unsigned char *Start = (unsigned char *)Ptr; unsigned Amount = (unsigned char *)End - Start; - if (At+Amount > BlockEnd) + if (At+Amount > BlockEnd) error("Ran out of data!"); std::copy(At, At+Amount, Start); At += Amount; @@ -161,29 +162,19 @@ inline void BytecodeReader::read_data(void *Ptr, void *End) { inline void BytecodeReader::read_float(float& FloatVal) { /// FIXME: This isn't optimal, it has size problems on some platforms /// where FP is not IEEE. - union { - float f; - uint32_t i; - } FloatUnion; - FloatUnion.i = At[0] | (At[1] << 8) | (At[2] << 16) | (At[3] << 24); + FloatVal = BitsToFloat(At[0] | (At[1] << 8) | (At[2] << 16) | (At[3] << 24)); At+=sizeof(uint32_t); - FloatVal = FloatUnion.f; } /// Read a double value in little-endian order inline void BytecodeReader::read_double(double& DoubleVal) { /// FIXME: This isn't optimal, it has size problems on some platforms /// where FP is not IEEE. - union { - double d; - uint64_t i; - } DoubleUnion; - DoubleUnion.i = (uint64_t(At[0]) << 0) | (uint64_t(At[1]) << 8) | - (uint64_t(At[2]) << 16) | (uint64_t(At[3]) << 24) | - (uint64_t(At[4]) << 32) | (uint64_t(At[5]) << 40) | - (uint64_t(At[6]) << 48) | (uint64_t(At[7]) << 56); + DoubleVal = BitsToDouble((uint64_t(At[0]) << 0) | (uint64_t(At[1]) << 8) | + (uint64_t(At[2]) << 16) | (uint64_t(At[3]) << 24) | + (uint64_t(At[4]) << 32) | (uint64_t(At[5]) << 40) | + (uint64_t(At[6]) << 48) | (uint64_t(At[7]) << 56)); At+=sizeof(uint64_t); - DoubleVal = DoubleUnion.d; } /// Read a block header and obtain its type and size @@ -192,10 +183,10 @@ inline void BytecodeReader::read_block(unsigned &Type, unsigned &Size) { Type = read_uint(); Size = read_uint(); switch (Type) { - case BytecodeFormat::Reserved_DoNotUse : + case BytecodeFormat::Reserved_DoNotUse : error("Reserved_DoNotUse used as Module Type?"); Type = BytecodeFormat::ModuleBlockID; break; - case BytecodeFormat::Module: + case BytecodeFormat::Module: Type = BytecodeFormat::ModuleBlockID; break; case BytecodeFormat::Function: Type = BytecodeFormat::FunctionBlockID; break; @@ -238,8 +229,8 @@ inline void BytecodeReader::read_block(unsigned &Type, unsigned &Size) { /// 1.3 this changed so that Type does not derive from Value. Consequently, /// the BytecodeReader's containers for Values can't contain Types because /// there's no inheritance relationship. This means that the "Type Type" -/// plane is defunct along with the Type::TypeTyID TypeID. In LLVM 1.3 -/// whenever a bytecode construct must have both types and values together, +/// plane is defunct along with the Type::TypeTyID TypeID. In LLVM 1.3 +/// whenever a bytecode construct must have both types and values together, /// the types are always read/written first and then the Values. Furthermore /// since Type::TypeTyID no longer exists, its value (12) now corresponds to /// Type::LabelTyID. In order to overcome this we must "sanitize" all the @@ -249,7 +240,7 @@ inline void BytecodeReader::read_block(unsigned &Type, unsigned &Size) { /// larger than 12 (Type::LabelTyID). If the value is exactly 12, then this /// function returns true, otherwise false. This helps detect situations /// where the pre 1.3 bytecode is indicating that what follows is a type. -/// @returns true iff type id corresponds to pre 1.3 "type type" +/// @returns true iff type id corresponds to pre 1.3 "type type" inline bool BytecodeReader::sanitizeTypeId(unsigned &TypeId) { if (hasTypeDerivedFromValue) { /// do nothing if 1.3 or later if (TypeId == Type::LabelTyID) { @@ -342,7 +333,7 @@ unsigned BytecodeReader::getTypeSlot(const Type *Ty) { if (!CompactionTypes.empty()) { for (unsigned i = 0, e = CompactionTypes.size(); i != e; ++i) if (CompactionTypes[i].first == Ty) - return Type::FirstDerivedTyID + i; + return Type::FirstDerivedTyID + i; error("Couldn't find type specified in compaction table!"); } @@ -352,14 +343,28 @@ unsigned BytecodeReader::getTypeSlot(const Type *Ty) { FunctionTypes.end(), Ty); if (I != FunctionTypes.end()) - return Type::FirstDerivedTyID + ModuleTypes.size() + + return Type::FirstDerivedTyID + ModuleTypes.size() + (&*I - &FunctionTypes[0]); - // Check the module level types now... - I = std::find(ModuleTypes.begin(), ModuleTypes.end(), Ty); - if (I == ModuleTypes.end()) + // If we don't have our cache yet, build it now. + if (ModuleTypeIDCache.empty()) { + unsigned N = 0; + ModuleTypeIDCache.reserve(ModuleTypes.size()); + for (TypeListTy::iterator I = ModuleTypes.begin(), E = ModuleTypes.end(); + I != E; ++I, ++N) + ModuleTypeIDCache.push_back(std::make_pair(*I, N)); + + std::sort(ModuleTypeIDCache.begin(), ModuleTypeIDCache.end()); + } + + // Binary search the cache for the entry. + std::vector >::iterator IT = + std::lower_bound(ModuleTypeIDCache.begin(), ModuleTypeIDCache.end(), + std::make_pair(Ty, 0U)); + if (IT == ModuleTypeIDCache.end() || IT->first != Ty) error("Didn't find type in ModuleTypes."); - return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]); + + return Type::FirstDerivedTyID + IT->second; } /// This is just like getType, but when a compaction table is in use, it is @@ -383,15 +388,30 @@ const Type *BytecodeReader::getGlobalTableType(unsigned Slot) { unsigned BytecodeReader::getGlobalTableTypeSlot(const Type *Ty) { if (Ty->isPrimitiveType()) return Ty->getTypeID(); - TypeListTy::iterator I = std::find(ModuleTypes.begin(), - ModuleTypes.end(), Ty); - if (I == ModuleTypes.end()) + + // If we don't have our cache yet, build it now. + if (ModuleTypeIDCache.empty()) { + unsigned N = 0; + ModuleTypeIDCache.reserve(ModuleTypes.size()); + for (TypeListTy::iterator I = ModuleTypes.begin(), E = ModuleTypes.end(); + I != E; ++I, ++N) + ModuleTypeIDCache.push_back(std::make_pair(*I, N)); + + std::sort(ModuleTypeIDCache.begin(), ModuleTypeIDCache.end()); + } + + // Binary search the cache for the entry. + std::vector >::iterator IT = + std::lower_bound(ModuleTypeIDCache.begin(), ModuleTypeIDCache.end(), + std::make_pair(Ty, 0U)); + if (IT == ModuleTypeIDCache.end() || IT->first != Ty) error("Didn't find type in ModuleTypes."); - return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]); + + return Type::FirstDerivedTyID + IT->second; } -/// Retrieve a value of a given type and slot number, possibly creating -/// it if it doesn't already exist. +/// Retrieve a value of a given type and slot number, possibly creating +/// it if it doesn't already exist. Value * BytecodeReader::getValue(unsigned type, unsigned oNum, bool Create) { assert(type != Type::LabelTyID && "getValue() cannot get blocks!"); unsigned Num = oNum; @@ -412,9 +432,12 @@ Value * BytecodeReader::getValue(unsigned type, unsigned oNum, bool Create) { GlobalTyID = CompactionTypes[type-Type::FirstDerivedTyID].second; if (hasImplicitNull(GlobalTyID)) { - if (Num == 0) - return Constant::getNullValue(getType(type)); - --Num; + const Type *Ty = getType(type); + if (!isa(Ty)) { + if (Num == 0) + return Constant::getNullValue(Ty); + --Num; + } } if (GlobalTyID < ModuleValues.size() && ModuleValues[GlobalTyID]) { @@ -424,8 +447,8 @@ Value * BytecodeReader::getValue(unsigned type, unsigned oNum, bool Create) { } } - if (FunctionValues.size() > type && - FunctionValues[type] && + if (FunctionValues.size() > type && + FunctionValues[type] && Num < FunctionValues[type]->size()) return FunctionValues[type]->getOperand(Num); @@ -444,11 +467,12 @@ Value * BytecodeReader::getValue(unsigned type, unsigned oNum, bool Create) { ForwardReferences.insert(I, std::make_pair(KeyValue, Val)); return Val; } - throw "Can't create placeholder for value of type slot #" + utostr(type); + error("Can't create placeholder for value of type slot #" + utostr(type)); + return 0; // just silence warning, error calls longjmp } -/// This is just like getValue, but when a compaction table is in use, it -/// is ignored. Also, no forward references or other fancy features are +/// This is just like getValue, but when a compaction table is in use, it +/// is ignored. Also, no forward references or other fancy features are /// supported. Value* BytecodeReader::getGlobalTableValue(unsigned TyID, unsigned SlotNo) { if (SlotNo == 0) @@ -467,11 +491,11 @@ Value* BytecodeReader::getGlobalTableValue(unsigned TyID, unsigned SlotNo) { SlotNo >= ModuleValues[TyID]->size()) { if (TyID >= ModuleValues.size() || ModuleValues[TyID] == 0) error("Corrupt compaction table entry!" - + utostr(TyID) + ", " + utostr(SlotNo) + ": " + + utostr(TyID) + ", " + utostr(SlotNo) + ": " + utostr(ModuleValues.size())); - else + else error("Corrupt compaction table entry!" - + utostr(TyID) + ", " + utostr(SlotNo) + ": " + + utostr(TyID) + ", " + utostr(SlotNo) + ": " + utostr(ModuleValues.size()) + ", " + utohexstr(reinterpret_cast(((void*)ModuleValues[TyID]))) + ", " @@ -483,18 +507,17 @@ Value* BytecodeReader::getGlobalTableValue(unsigned TyID, unsigned SlotNo) { /// Just like getValue, except that it returns a null pointer /// only on error. It always returns a constant (meaning that if the value is /// defined, but is not a constant, that is an error). If the specified -/// constant hasn't been parsed yet, a placeholder is defined and used. +/// constant hasn't been parsed yet, a placeholder is defined and used. /// Later, after the real value is parsed, the placeholder is eliminated. Constant* BytecodeReader::getConstantValue(unsigned TypeSlot, unsigned Slot) { if (Value *V = getValue(TypeSlot, Slot, false)) if (Constant *C = dyn_cast(V)) return C; // If we already have the value parsed, just return it else - error("Value for slot " + utostr(Slot) + + error("Value for slot " + utostr(Slot) + " is expected to be a constant!"); - const Type *Ty = getType(TypeSlot); - std::pair Key(Ty, Slot); + std::pair Key(TypeSlot, Slot); ConstantRefsType::iterator I = ConstantFwdRefs.lower_bound(Key); if (I != ConstantFwdRefs.end() && I->first == Key) { @@ -502,8 +525,8 @@ Constant* BytecodeReader::getConstantValue(unsigned TypeSlot, unsigned Slot) { } else { // Create a placeholder for the constant reference and // keep track of the fact that we have a forward ref to recycle it - Constant *C = new ConstantPlaceHolder(Ty, Slot); - + Constant *C = new ConstantPlaceHolder(getType(TypeSlot)); + // Keep track of the fact that we have a forward ref to recycle it ConstantFwdRefs.insert(I, std::make_pair(Key, C)); return C; @@ -517,12 +540,8 @@ Constant* BytecodeReader::getConstantValue(unsigned TypeSlot, unsigned Slot) { /// As values are created, they are inserted into the appropriate place /// with this method. The ValueTable argument must be one of ModuleValues /// or FunctionValues data members of this class. -unsigned BytecodeReader::insertValue(Value *Val, unsigned type, +unsigned BytecodeReader::insertValue(Value *Val, unsigned type, ValueTable &ValueTab) { - assert((!isa(Val) || !cast(Val)->isNullValue()) || - !hasImplicitNull(type) && - "Cannot read null values from bytecode!"); - if (ValueTab.size() <= type) ValueTab.resize(type+1); @@ -530,14 +549,14 @@ unsigned BytecodeReader::insertValue(Value *Val, unsigned type, ValueTab[type]->push_back(Val); - bool HasOffset = hasImplicitNull(type); + bool HasOffset = hasImplicitNull(type) && !isa(Val->getType()); return ValueTab[type]->size()-1 + HasOffset; } /// Insert the arguments of a function as new values in the reader. void BytecodeReader::insertArguments(Function* F) { const FunctionType *FT = F->getFunctionType(); - Function::aiterator AI = F->abegin(); + Function::arg_iterator AI = F->arg_begin(); for (FunctionType::param_iterator It = FT->param_begin(); It != FT->param_end(); ++It, ++AI) insertValue(AI, getTypeSlot(AI->getType()), FunctionValues); @@ -585,7 +604,7 @@ void BytecodeReader::ParseInstruction(std::vector &Oprnds, // -------------------------- // 15-08: Resulting type plane // 23-16: Operand #1 - // 31-24: Operand #2 + // 31-24: Operand #2 // iType = (Op >> 8) & 255; Oprnds[0] = (Op >> 16) & 255; @@ -646,21 +665,97 @@ void BytecodeReader::ParseInstruction(std::vector &Oprnds, getValue(iType, Oprnds[0]), getValue(iType, Oprnds[1])); + bool isCall = false; switch (Opcode) { - default: - if (Result == 0) + default: + if (Result == 0) error("Illegal instruction read!"); break; case Instruction::VAArg: - Result = new VAArgInst(getValue(iType, Oprnds[0]), + Result = new VAArgInst(getValue(iType, Oprnds[0]), getSanitizedType(Oprnds[1])); break; - case Instruction::VANext: - Result = new VANextInst(getValue(iType, Oprnds[0]), - getSanitizedType(Oprnds[1])); + case 32: { //VANext_old + const Type* ArgTy = getValue(iType, Oprnds[0])->getType(); + Function* NF = TheModule->getOrInsertFunction("llvm.va_copy", ArgTy, ArgTy, + (Type *)0); + + //b = vanext a, t -> + //foo = alloca 1 of t + //bar = vacopy a + //store bar -> foo + //tmp = vaarg foo, t + //b = load foo + AllocaInst* foo = new AllocaInst(ArgTy, 0, "vanext.fix"); + BB->getInstList().push_back(foo); + CallInst* bar = new CallInst(NF, getValue(iType, Oprnds[0])); + BB->getInstList().push_back(bar); + BB->getInstList().push_back(new StoreInst(bar, foo)); + Instruction* tmp = new VAArgInst(foo, getSanitizedType(Oprnds[1])); + BB->getInstList().push_back(tmp); + Result = new LoadInst(foo); + break; + } + case 33: { //VAArg_old + const Type* ArgTy = getValue(iType, Oprnds[0])->getType(); + Function* NF = TheModule->getOrInsertFunction("llvm.va_copy", ArgTy, ArgTy, + (Type *)0); + + //b = vaarg a, t -> + //foo = alloca 1 of t + //bar = vacopy a + //store bar -> foo + //b = vaarg foo, t + AllocaInst* foo = new AllocaInst(ArgTy, 0, "vaarg.fix"); + BB->getInstList().push_back(foo); + CallInst* bar = new CallInst(NF, getValue(iType, Oprnds[0])); + BB->getInstList().push_back(bar); + BB->getInstList().push_back(new StoreInst(bar, foo)); + Result = new VAArgInst(foo, getSanitizedType(Oprnds[1])); + break; + } + case Instruction::ExtractElement: { + if (Oprnds.size() != 2) + error("Invalid extractelement instruction!"); + Value *V1 = getValue(iType, Oprnds[0]); + Value *V2 = getValue(Type::UIntTyID, Oprnds[1]); + + if (!ExtractElementInst::isValidOperands(V1, V2)) + error("Invalid extractelement instruction!"); + + Result = new ExtractElementInst(V1, V2); + break; + } + case Instruction::InsertElement: { + const PackedType *PackedTy = dyn_cast(InstTy); + if (!PackedTy || Oprnds.size() != 3) + error("Invalid insertelement instruction!"); + + Value *V1 = getValue(iType, Oprnds[0]); + Value *V2 = getValue(getTypeSlot(PackedTy->getElementType()), Oprnds[1]); + Value *V3 = getValue(Type::UIntTyID, Oprnds[2]); + + if (!InsertElementInst::isValidOperands(V1, V2, V3)) + error("Invalid insertelement instruction!"); + Result = new InsertElementInst(V1, V2, V3); + break; + } + case Instruction::ShuffleVector: { + const PackedType *PackedTy = dyn_cast(InstTy); + if (!PackedTy || Oprnds.size() != 3) + error("Invalid shufflevector instruction!"); + Value *V1 = getValue(iType, Oprnds[0]); + Value *V2 = getValue(iType, Oprnds[1]); + const PackedType *EltTy = + PackedType::get(Type::UIntTy, PackedTy->getNumElements()); + Value *V3 = getValue(getTypeSlot(EltTy), Oprnds[2]); + if (!ShuffleVectorInst::isValidOperands(V1, V2, V3)) + error("Invalid shufflevector instruction!"); + Result = new ShuffleVectorInst(V1, V2, V3); break; + } case Instruction::Cast: - Result = new CastInst(getValue(iType, Oprnds[0]), + Result = new CastInst(getValue(iType, Oprnds[0]), getSanitizedType(Oprnds[1])); break; case Instruction::Select: @@ -673,7 +768,7 @@ void BytecodeReader::ParseInstruction(std::vector &Oprnds, error("Invalid phi node encountered!"); PHINode *PN = new PHINode(InstTy); - PN->op_reserve(Oprnds.size()); + PN->reserveOperandSpace(Oprnds.size()); for (unsigned i = 0, e = Oprnds.size(); i != e; i += 2) PN->addIncoming(getValue(iType, Oprnds[i]), getBasicBlock(Oprnds[i+1])); Result = PN; @@ -699,7 +794,7 @@ void BytecodeReader::ParseInstruction(std::vector &Oprnds, if (Oprnds.size() == 1) Result = new BranchInst(getBasicBlock(Oprnds[0])); else if (Oprnds.size() == 3) - Result = new BranchInst(getBasicBlock(Oprnds[0]), + Result = new BranchInst(getBasicBlock(Oprnds[0]), getBasicBlock(Oprnds[1]), getValue(Type::BoolTyID , Oprnds[2])); else error("Invalid number of operands for a 'br' instruction!"); @@ -709,20 +804,39 @@ void BytecodeReader::ParseInstruction(std::vector &Oprnds, error("Switch statement with odd number of arguments!"); SwitchInst *I = new SwitchInst(getValue(iType, Oprnds[0]), - getBasicBlock(Oprnds[1])); + getBasicBlock(Oprnds[1]), + Oprnds.size()/2-1); for (unsigned i = 2, e = Oprnds.size(); i != e; i += 2) - I->addCase(cast(getValue(iType, Oprnds[i])), + I->addCase(cast(getValue(iType, Oprnds[i])), getBasicBlock(Oprnds[i+1])); Result = I; break; } - case Instruction::Call: { + case 58: // Call with extra operand for calling conv + case 59: // tail call, Fast CC + case 60: // normal call, Fast CC + case 61: // tail call, C Calling Conv + case Instruction::Call: { // Normal Call, C Calling Convention if (Oprnds.size() == 0) error("Invalid call instruction encountered!"); Value *F = getValue(iType, Oprnds[0]); + unsigned CallingConv = CallingConv::C; + bool isTailCall = false; + + if (Opcode == 61 || Opcode == 59) + isTailCall = true; + + if (Opcode == 58) { + isTailCall = Oprnds.back() & 1; + CallingConv = Oprnds.back() >> 1; + Oprnds.pop_back(); + } else if (Opcode == 59 || Opcode == 60) { + CallingConv = CallingConv::Fast; + } + // Check to make sure we have a pointer to function type const PointerType *PTy = dyn_cast(F->getType()); if (PTy == 0) error("Call to non function pointer value!"); @@ -750,35 +864,47 @@ void BytecodeReader::ParseInstruction(std::vector &Oprnds, // Read all of the fixed arguments for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) Params.push_back(getValue(getTypeSlot(FTy->getParamType(i)),Oprnds[i])); - + FirstVariableOperand = FTy->getNumParams(); - if ((Oprnds.size()-FirstVariableOperand) & 1) + if ((Oprnds.size()-FirstVariableOperand) & 1) error("Invalid call instruction!"); // Must be pairs of type/value - - for (unsigned i = FirstVariableOperand, e = Oprnds.size(); + + for (unsigned i = FirstVariableOperand, e = Oprnds.size(); i != e; i += 2) Params.push_back(getValue(Oprnds[i], Oprnds[i+1])); } Result = new CallInst(F, Params); + if (isTailCall) cast(Result)->setTailCall(); + if (CallingConv) cast(Result)->setCallingConv(CallingConv); break; } - case Instruction::Invoke: { - if (Oprnds.size() < 3) + case 56: // Invoke with encoded CC + case 57: // Invoke Fast CC + case Instruction::Invoke: { // Invoke C CC + if (Oprnds.size() < 3) error("Invalid invoke instruction!"); Value *F = getValue(iType, Oprnds[0]); // Check to make sure we have a pointer to function type const PointerType *PTy = dyn_cast(F->getType()); - if (PTy == 0) + if (PTy == 0) error("Invoke to non function pointer value!"); const FunctionType *FTy = dyn_cast(PTy->getElementType()); - if (FTy == 0) + if (FTy == 0) error("Invoke to non function pointer value!"); std::vector Params; BasicBlock *Normal, *Except; + unsigned CallingConv = CallingConv::C; + + if (Opcode == 57) + CallingConv = CallingConv::Fast; + else if (Opcode == 56) { + CallingConv = Oprnds.back(); + Oprnds.pop_back(); + } if (!FTy->isVarArg()) { Normal = getBasicBlock(Oprnds[1]); @@ -797,12 +923,12 @@ void BytecodeReader::ParseInstruction(std::vector &Oprnds, Normal = getBasicBlock(Oprnds[0]); Except = getBasicBlock(Oprnds[1]); - + unsigned FirstVariableArgument = FTy->getNumParams()+2; for (unsigned i = 2; i != FirstVariableArgument; ++i) Params.push_back(getValue(getTypeSlot(FTy->getParamType(i-2)), Oprnds[i])); - + if (Oprnds.size()-FirstVariableArgument & 1) // Must be type/value pairs error("Invalid invoke instruction!"); @@ -811,29 +937,36 @@ void BytecodeReader::ParseInstruction(std::vector &Oprnds, } Result = new InvokeInst(F, Normal, Except, Params); + if (CallingConv) cast(Result)->setCallingConv(CallingConv); break; } - case Instruction::Malloc: - if (Oprnds.size() > 2) + case Instruction::Malloc: { + unsigned Align = 0; + if (Oprnds.size() == 2) + Align = (1 << Oprnds[1]) >> 1; + else if (Oprnds.size() > 2) error("Invalid malloc instruction!"); if (!isa(InstTy)) error("Invalid malloc instruction!"); Result = new MallocInst(cast(InstTy)->getElementType(), - Oprnds.size() ? getValue(Type::UIntTyID, - Oprnds[0]) : 0); + getValue(Type::UIntTyID, Oprnds[0]), Align); break; + } - case Instruction::Alloca: - if (Oprnds.size() > 2) + case Instruction::Alloca: { + unsigned Align = 0; + if (Oprnds.size() == 2) + Align = (1 << Oprnds[1]) >> 1; + else if (Oprnds.size() > 2) error("Invalid alloca instruction!"); if (!isa(InstTy)) error("Invalid alloca instruction!"); Result = new AllocaInst(cast(InstTy)->getElementType(), - Oprnds.size() ? getValue(Type::UIntTyID, - Oprnds[0]) :0); + getValue(Type::UIntTyID, Oprnds[0]), Align); break; + } case Instruction::Free: if (!isa(InstTy)) error("Invalid free instruction!"); @@ -848,8 +981,8 @@ void BytecodeReader::ParseInstruction(std::vector &Oprnds, const Type *NextTy = InstTy; for (unsigned i = 1, e = Oprnds.size(); i != e; ++i) { const CompositeType *TopTy = dyn_cast_or_null(NextTy); - if (!TopTy) - error("Invalid getelementptr instruction!"); + if (!TopTy) + error("Invalid getelementptr instruction!"); unsigned ValIdx = Oprnds[i]; unsigned IdxTy = 0; @@ -894,7 +1027,7 @@ void BytecodeReader::ParseInstruction(std::vector &Oprnds, Result = new LoadInst(getValue(iType, Oprnds[0]), "", Opcode == 62); break; - case 63: // volatile store + case 63: // volatile store case Instruction::Store: { if (!isa(InstTy) || Oprnds.size() != 2) error("Invalid store instruction!"); @@ -913,7 +1046,9 @@ void BytecodeReader::ParseInstruction(std::vector &Oprnds, if (Oprnds.size() != 0) error("Invalid unreachable instruction!"); Result = new UnreachableInst(); break; - } // end switch(Opcode) + } // end switch(Opcode) + + BB->getInstList().push_back(Result); unsigned TypeSlot; if (Result->getType() == InstTy) @@ -922,7 +1057,6 @@ void BytecodeReader::ParseInstruction(std::vector &Oprnds, TypeSlot = getTypeSlot(Result->getType()); insertValue(Result, TypeSlot, FunctionValues); - BB->getInstList().push_back(Result); } /// Get a particular numbered basic block, which might be a forward reference. @@ -945,7 +1079,7 @@ BasicBlock *BytecodeReader::getBasicBlock(unsigned ID) { return ParsedBasicBlocks[ID] = new BasicBlock(); } -/// In LLVM 1.0 bytecode files, we used to output one basicblock at a time. +/// In LLVM 1.0 bytecode files, we used to output one basicblock at a time. /// This method reads in one of the basicblock packets. This method is not used /// for bytecode files after LLVM 1.0 /// @returns The basic block constructed. @@ -970,7 +1104,7 @@ BasicBlock *BytecodeReader::ParseBasicBlock(unsigned BlockNo) { } /// Parse all of the BasicBlock's & Instruction's in the body of a function. -/// In post 1.0 bytecode files, we no longer emit basic block individually, +/// In post 1.0 bytecode files, we no longer emit basic block individually, /// in order to avoid per-basic-block overhead. /// @returns Rhe number of basic blocks encountered. unsigned BytecodeReader::ParseInstructionList(Function* F) { @@ -1063,7 +1197,7 @@ void BytecodeReader::ParseSymbolTable(Function *CurrentFunction, } if (V == 0) error("Failed value look-up for name '" + Name + "'"); - V->setName(Name, ST); + V->setName(Name); } } } @@ -1071,7 +1205,7 @@ void BytecodeReader::ParseSymbolTable(Function *CurrentFunction, if (Handler) Handler->handleSymbolTableEnd(); } -/// Read in the types portion of a compaction table. +/// Read in the types portion of a compaction table. void BytecodeReader::ParseCompactionTypes(unsigned NumEntries) { for (unsigned i = 0; i != NumEntries; ++i) { unsigned TypeSlot = 0; @@ -1089,7 +1223,7 @@ void BytecodeReader::ParseCompactionTable() { // Notify handler that we're beginning a compaction table. if (Handler) Handler->handleCompactionTableBegin(); - // In LLVM 1.3 Type no longer derives from Value. So, + // In LLVM 1.3 Type no longer derives from Value. So, // we always write them first in the compaction table // because they can't occupy a "type plane" where the // Values reside. @@ -1155,10 +1289,10 @@ void BytecodeReader::ParseCompactionTable() { // Notify handler that the compaction table is done. if (Handler) Handler->handleCompactionTableEnd(); } - + // Parse a single type. The typeid is read in first. If its a primitive type // then nothing else needs to be read, we know how to instantiate it. If its -// a derived type, then additional data is read to fill out the type +// a derived type, then additional data is read to fill out the type // definition. const Type *BytecodeReader::ParseType() { unsigned PrimType = 0; @@ -1168,7 +1302,7 @@ const Type *BytecodeReader::ParseType() { const Type *Result = 0; if ((Result = Type::getPrimitiveType((Type::TypeID)PrimType))) return Result; - + switch (PrimType) { case Type::FunctionTyID: { const Type *RetType = readSanitizedType(); @@ -1176,7 +1310,7 @@ const Type *BytecodeReader::ParseType() { unsigned NumParams = read_vbr_uint(); std::vector Params; - while (NumParams--) + while (NumParams--) Params.push_back(readSanitizedType()); bool isVarArg = Params.size() && Params.back() == Type::VoidTy; @@ -1248,19 +1382,23 @@ void BytecodeReader::ParseTypes(TypeListTy &Tab, unsigned NumEntries){ for (unsigned i = 0; i != NumEntries; ++i) Tab.push_back(OpaqueType::get()); - if (Handler) + if (Handler) Handler->handleTypeList(NumEntries); + // If we are about to resolve types, make sure the type cache is clear. + if (NumEntries) + ModuleTypeIDCache.clear(); + // Loop through reading all of the types. Forward types will make use of the // opaque types just inserted. // for (unsigned i = 0; i != NumEntries; ++i) { const Type* NewTy = ParseType(); const Type* OldTy = Tab[i].get(); - if (NewTy == 0) + if (NewTy == 0) error("Couldn't parse type!"); - // Don't directly push the new type on the Tab. Instead we want to replace + // Don't directly push the new type on the Tab. Instead we want to replace // the opaque type we previously inserted with the new concrete value. This // approach helps with forward references to types. The refinement from the // abstract (opaque) type to the new type causes all uses of the abstract @@ -1276,19 +1414,40 @@ void BytecodeReader::ParseTypes(TypeListTy &Tab, unsigned NumEntries){ } /// Parse a single constant value -Constant *BytecodeReader::ParseConstantValue(unsigned TypeID) { +Value *BytecodeReader::ParseConstantPoolValue(unsigned TypeID) { // We must check for a ConstantExpr before switching by type because // a ConstantExpr can be of any type, and has no explicit value. - // + // // 0 if not expr; numArgs if is expr unsigned isExprNumArgs = read_vbr_uint(); if (isExprNumArgs) { - // 'undef' is encoded with 'exprnumargs' == 1. - if (!hasNoUndefValue) - if (--isExprNumArgs == 0) + if (!hasNoUndefValue) { + // 'undef' is encoded with 'exprnumargs' == 1. + if (isExprNumArgs == 1) return UndefValue::get(getType(TypeID)); - + + // Inline asm is encoded with exprnumargs == ~0U. + if (isExprNumArgs == ~0U) { + std::string AsmStr = read_str(); + std::string ConstraintStr = read_str(); + unsigned Flags = read_vbr_uint(); + + const PointerType *PTy = dyn_cast(getType(TypeID)); + const FunctionType *FTy = + PTy ? dyn_cast(PTy->getElementType()) : 0; + + if (!FTy || !InlineAsm::Verify(FTy, ConstraintStr)) + error("Invalid constraints for inline asm"); + if (Flags & ~1U) + error("Invalid flags for inline asm"); + bool HasSideEffects = Flags & 1; + return InlineAsm::get(FTy, AsmStr, ConstraintStr, HasSideEffects); + } + + --isExprNumArgs; + } + // FIXME: Encoding of constant exprs could be much more compact! std::vector ArgVec; ArgVec.reserve(isExprNumArgs); @@ -1296,22 +1455,22 @@ Constant *BytecodeReader::ParseConstantValue(unsigned TypeID) { // Bytecode files before LLVM 1.4 need have a missing terminator inst. if (hasNoUnreachableInst) Opcode++; - + // Read the slot number and types of each of the arguments for (unsigned i = 0; i != isExprNumArgs; ++i) { unsigned ArgValSlot = read_vbr_uint(); unsigned ArgTypeSlot = 0; if (read_typeid(ArgTypeSlot)) error("Invalid argument type (type type) for constant value"); - + // Get the arg value from its slot if it exists, otherwise a placeholder ArgVec.push_back(getConstantValue(ArgTypeSlot, ArgValSlot)); } - + // Construct a ConstantExpr of the appropriate kind if (isExprNumArgs == 1) { // All one-operand expressions if (Opcode != Instruction::Cast) - error("Only Cast instruction has one argument for ConstantExpr"); + error("Only cast instruction has one argument for ConstantExpr"); Constant* Result = ConstantExpr::getCast(ArgVec[0], getType(TypeID)); if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result); @@ -1338,72 +1497,96 @@ Constant *BytecodeReader::ParseConstantValue(unsigned TypeID) { } else if (Opcode == Instruction::Select) { if (ArgVec.size() != 3) error("Select instruction must have three arguments."); - Constant* Result = ConstantExpr::getSelect(ArgVec[0], ArgVec[1], + Constant* Result = ConstantExpr::getSelect(ArgVec[0], ArgVec[1], ArgVec[2]); if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result); return Result; + } else if (Opcode == Instruction::ExtractElement) { + if (ArgVec.size() != 2 || + !ExtractElementInst::isValidOperands(ArgVec[0], ArgVec[1])) + error("Invalid extractelement constand expr arguments"); + Constant* Result = ConstantExpr::getExtractElement(ArgVec[0], ArgVec[1]); + if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result); + return Result; + } else if (Opcode == Instruction::InsertElement) { + if (ArgVec.size() != 3 || + !InsertElementInst::isValidOperands(ArgVec[0], ArgVec[1], ArgVec[2])) + error("Invalid insertelement constand expr arguments"); + + Constant *Result = + ConstantExpr::getInsertElement(ArgVec[0], ArgVec[1], ArgVec[2]); + if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result); + return Result; + } else if (Opcode == Instruction::ShuffleVector) { + if (ArgVec.size() != 3 || + !ShuffleVectorInst::isValidOperands(ArgVec[0], ArgVec[1], ArgVec[2])) + error("Invalid shufflevector constant expr arguments."); + Constant *Result = + ConstantExpr::getShuffleVector(ArgVec[0], ArgVec[1], ArgVec[2]); + if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result); + return Result; } else { // All other 2-operand expressions Constant* Result = ConstantExpr::get(Opcode, ArgVec[0], ArgVec[1]); if (Handler) Handler->handleConstantExpression(Opcode, ArgVec, Result); return Result; } } - + // Ok, not an ConstantExpr. We now know how to read the given type... const Type *Ty = getType(TypeID); + Constant *Result = 0; switch (Ty->getTypeID()) { case Type::BoolTyID: { unsigned Val = read_vbr_uint(); - if (Val != 0 && Val != 1) + if (Val != 0 && Val != 1) error("Invalid boolean value read."); - Constant* Result = ConstantBool::get(Val == 1); + Result = ConstantBool::get(Val == 1); if (Handler) Handler->handleConstantValue(Result); - return Result; + break; } case Type::UByteTyID: // Unsigned integer types... case Type::UShortTyID: case Type::UIntTyID: { unsigned Val = read_vbr_uint(); - if (!ConstantUInt::isValueValidForType(Ty, Val)) + if (!ConstantUInt::isValueValidForType(Ty, Val)) error("Invalid unsigned byte/short/int read."); - Constant* Result = ConstantUInt::get(Ty, Val); + Result = ConstantUInt::get(Ty, Val); if (Handler) Handler->handleConstantValue(Result); - return Result; + break; } - case Type::ULongTyID: { - Constant* Result = ConstantUInt::get(Ty, read_vbr_uint64()); + case Type::ULongTyID: + Result = ConstantUInt::get(Ty, read_vbr_uint64()); if (Handler) Handler->handleConstantValue(Result); - return Result; - } - + break; + case Type::SByteTyID: // Signed integer types... case Type::ShortTyID: - case Type::IntTyID: { - case Type::LongTyID: + case Type::IntTyID: + case Type::LongTyID: { int64_t Val = read_vbr_int64(); - if (!ConstantSInt::isValueValidForType(Ty, Val)) + if (!ConstantSInt::isValueValidForType(Ty, Val)) error("Invalid signed byte/short/int/long read."); - Constant* Result = ConstantSInt::get(Ty, Val); + Result = ConstantSInt::get(Ty, Val); if (Handler) Handler->handleConstantValue(Result); - return Result; + break; } case Type::FloatTyID: { float Val; read_float(Val); - Constant* Result = ConstantFP::get(Ty, Val); + Result = ConstantFP::get(Ty, Val); if (Handler) Handler->handleConstantValue(Result); - return Result; + break; } case Type::DoubleTyID: { double Val; read_double(Val); - Constant* Result = ConstantFP::get(Ty, Val); + Result = ConstantFP::get(Ty, Val); if (Handler) Handler->handleConstantValue(Result); - return Result; + break; } case Type::ArrayTyID: { @@ -1415,9 +1598,9 @@ Constant *BytecodeReader::ParseConstantValue(unsigned TypeID) { while (NumElements--) // Read all of the elements of the constant. Elements.push_back(getConstantValue(TypeSlot, read_vbr_uint())); - Constant* Result = ConstantArray::get(AT, Elements); + Result = ConstantArray::get(AT, Elements); if (Handler) Handler->handleConstantArray(AT, Elements, TypeSlot, Result); - return Result; + break; } case Type::StructTyID: { @@ -1429,10 +1612,10 @@ Constant *BytecodeReader::ParseConstantValue(unsigned TypeID) { Elements.push_back(getConstantValue(ST->getElementType(i), read_vbr_uint())); - Constant* Result = ConstantStruct::get(ST, Elements); + Result = ConstantStruct::get(ST, Elements); if (Handler) Handler->handleConstantStruct(ST, Elements, Result); - return Result; - } + break; + } case Type::PackedTyID: { const PackedType *PT = cast(Ty); @@ -1443,15 +1626,15 @@ Constant *BytecodeReader::ParseConstantValue(unsigned TypeID) { while (NumElements--) // Read all of the elements of the constant. Elements.push_back(getConstantValue(TypeSlot, read_vbr_uint())); - Constant* Result = ConstantPacked::get(PT, Elements); + Result = ConstantPacked::get(PT, Elements); if (Handler) Handler->handleConstantPacked(PT, Elements, TypeSlot, Result); - return Result; + break; } - case Type::PointerTyID: { // ConstantPointerRef value... + case Type::PointerTyID: { // ConstantPointerRef value (backwards compat). const PointerType *PT = cast(Ty); unsigned Slot = read_vbr_uint(); - + // Check to see if we have already read this global variable... Value *Val = getValue(TypeID, Slot, false); if (Val) { @@ -1469,16 +1652,25 @@ Constant *BytecodeReader::ParseConstantValue(unsigned TypeID) { Ty->getDescription()); break; } - return 0; + + // Check that we didn't read a null constant if they are implicit for this + // type plane. Do not do this check for constantexprs, as they may be folded + // to a null value in a way that isn't predicted when a .bc file is initially + // produced. + assert((!isa(Result) || !cast(Result)->isNullValue()) || + !hasImplicitNull(TypeID) && + "Cannot read null values from bytecode!"); + return Result; } -/// Resolve references for constants. This function resolves the forward -/// referenced constants in the ConstantFwdRefs map. It uses the +/// Resolve references for constants. This function resolves the forward +/// referenced constants in the ConstantFwdRefs map. It uses the /// replaceAllUsesWith method of Value class to substitute the placeholder /// instance with the actual instance. -void BytecodeReader::ResolveReferencesToConstant(Constant *NewV, unsigned Slot){ +void BytecodeReader::ResolveReferencesToConstant(Constant *NewV, unsigned Typ, + unsigned Slot) { ConstantRefsType::iterator I = - ConstantFwdRefs.find(std::make_pair(NewV->getType(), Slot)); + ConstantFwdRefs.find(std::make_pair(Typ, Slot)); if (I == ConstantFwdRefs.end()) return; // Never forward referenced? Value *PH = I->second; // Get the placeholder... @@ -1496,14 +1688,14 @@ void BytecodeReader::ParseStringConstants(unsigned NumEntries, ValueTable &Tab){ const Type *Ty = getType(Typ); if (!isa(Ty)) error("String constant data invalid!"); - + const ArrayType *ATy = cast(Ty); if (ATy->getElementType() != Type::SByteTy && ATy->getElementType() != Type::UByteTy) error("String constant data invalid!"); - + // Read character data. The type tells us how long the string is. - char Data[ATy->getNumElements()]; + char *Data = reinterpret_cast(alloca(ATy->getNumElements())); read_data(Data, Data+ATy->getNumElements()); std::vector Elements(ATy->getNumElements()); @@ -1517,13 +1709,13 @@ void BytecodeReader::ParseStringConstants(unsigned NumEntries, ValueTable &Tab){ // Create the constant, inserting it as needed. Constant *C = ConstantArray::get(ATy, Elements); unsigned Slot = insertValue(C, Typ, Tab); - ResolveReferencesToConstant(C, Slot); + ResolveReferencesToConstant(C, Typ, Slot); if (Handler) Handler->handleConstantString(cast(C)); } } /// Parse the constant pool. -void BytecodeReader::ParseConstantPool(ValueTable &Tab, +void BytecodeReader::ParseConstantPool(ValueTable &Tab, TypeListTy &TypeTab, bool isFunction) { if (Handler) Handler->handleGlobalConstantsBegin(); @@ -1553,9 +1745,9 @@ void BytecodeReader::ParseConstantPool(ValueTable &Tab, ParseStringConstants(NumEntries, Tab); } else { for (unsigned i = 0; i < NumEntries; ++i) { - Constant *C = ParseConstantValue(Typ); - assert(C && "ParseConstantValue returned NULL!"); - unsigned Slot = insertValue(C, Typ, Tab); + Value *V = ParseConstantPoolValue(Typ); + assert(V && "ParseConstantPoolValue returned NULL!"); + unsigned Slot = insertValue(V, Typ, Tab); // If we are reading a function constant table, make sure that we adjust // the slot number to be the real global constant number. @@ -1563,10 +1755,23 @@ void BytecodeReader::ParseConstantPool(ValueTable &Tab, if (&Tab != &ModuleValues && Typ < ModuleValues.size() && ModuleValues[Typ]) Slot += ModuleValues[Typ]->size(); - ResolveReferencesToConstant(C, Slot); + if (Constant *C = dyn_cast(V)) + ResolveReferencesToConstant(C, Typ, Slot); } } } + + // After we have finished parsing the constant pool, we had better not have + // any dangling references left. + if (!ConstantFwdRefs.empty()) { + ConstantRefsType::const_iterator I = ConstantFwdRefs.begin(); + Constant* missingConst = I->second; + error(utostr(ConstantFwdRefs.size()) + + " unresolved constant reference exist. First one is '" + + missingConst->getName() + "' of type '" + + missingConst->getType()->getDescription() + "'."); + } + checkPastBlockEnd("Constant Pool"); if (Handler) Handler->handleGlobalConstantsEnd(); } @@ -1586,6 +1791,9 @@ void BytecodeReader::ParseFunctionBody(Function* F) { case 2: Linkage = GlobalValue::AppendingLinkage; break; case 3: Linkage = GlobalValue::InternalLinkage; break; case 4: Linkage = GlobalValue::LinkOnceLinkage; break; + case 5: Linkage = GlobalValue::DLLImportLinkage; break; + case 6: Linkage = GlobalValue::DLLExportLinkage; break; + case 7: Linkage = GlobalValue::ExternalWeakLinkage; break; default: error("Invalid linkage type for Function."); Linkage = GlobalValue::InternalLinkage; @@ -1645,7 +1853,7 @@ void BytecodeReader::ParseFunctionBody(Function* F) { InsertedArguments = true; } - if (BlockNum) + if (BlockNum) error("Already parsed basic blocks!"); BlockNum = ParseInstructionList(F); break; @@ -1657,7 +1865,7 @@ void BytecodeReader::ParseFunctionBody(Function* F) { default: At += Size; - if (OldAt > At) + if (OldAt > At) error("Wrapped around reading bytecode."); break; } @@ -1675,42 +1883,32 @@ void BytecodeReader::ParseFunctionBody(Function* F) { // Resolve forward references. Replace any uses of a forward reference value // with the real value. - - // replaceAllUsesWith is very inefficient for instructions which have a LARGE - // number of operands. PHI nodes often have forward references, and can also - // often have a very large number of operands. - // - // FIXME: REEVALUATE. replaceAllUsesWith is _much_ faster now, and this code - // should be simplified back to using it! - // - std::map ForwardRefMapping; - for (std::map, Value*>::iterator - I = ForwardReferences.begin(), E = ForwardReferences.end(); - I != E; ++I) - ForwardRefMapping[I->second] = getValue(I->first.first, I->first.second, - false); - - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (Value* V = I->getOperand(i)) - if (Argument *A = dyn_cast(V)) { - std::map::iterator It = ForwardRefMapping.find(A); - if (It != ForwardRefMapping.end()) I->setOperand(i, It->second); - } - while (!ForwardReferences.empty()) { - std::map, Value*>::iterator I = - ForwardReferences.begin(); + std::map, Value*>::iterator + I = ForwardReferences.begin(); + Value *V = getValue(I->first.first, I->first.second, false); Value *PlaceHolder = I->second; + PlaceHolder->replaceAllUsesWith(V); ForwardReferences.erase(I); - - // Now that all the uses are gone, delete the placeholder... - // If we couldn't find a def (error case), then leak a little - // memory, because otherwise we can't remove all uses! delete PlaceHolder; } + // If upgraded intrinsic functions were detected during reading of the + // module information, then we need to look for instructions that need to + // be upgraded. This can't be done while the instructions are read in because + // additional instructions inserted mess up the slot numbering. + if (!upgradedFunctions.empty()) { + for (Function::iterator BI = F->begin(), BE = F->end(); BI != BE; ++BI) + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); + II != IE;) + if (CallInst* CI = dyn_cast(II++)) { + std::map::iterator FI = + upgradedFunctions.find(CI->getCalledFunction()); + if (FI != upgradedFunctions.end()) + UpgradeIntrinsicCall(CI, FI->second); + } + } + // Clear out function-level types... FunctionTypes.clear(); CompactionTypes.clear(); @@ -1722,7 +1920,7 @@ void BytecodeReader::ParseFunctionBody(Function* F) { /// This function parses LLVM functions lazily. It obtains the type of the /// function and records where the body of the function is in the bytecode -/// buffer. The caller can then use the ParseNextFunction and +/// buffer. The caller can then use the ParseNextFunction and /// ParseAllFunctionBodies to get handler events for the functions. void BytecodeReader::ParseFunctionLazily() { if (FunctionSignatureList.empty()) @@ -1734,24 +1932,32 @@ void BytecodeReader::ParseFunctionLazily() { // Save the information for future reading of the function LazyFunctionLoadMap[Func] = LazyFunctionInfo(BlockStart, BlockEnd); + // This function has a body but it's not loaded so it appears `External'. + // Mark it as a `Ghost' instead to notify the users that it has a body. + Func->setLinkage(GlobalValue::GhostLinkage); + // Pretend we've `parsed' this function At = BlockEnd; } -/// The ParserFunction method lazily parses one function. Use this method to -/// casue the parser to parse a specific function in the module. Note that -/// this will remove the function from what is to be included by +/// The ParserFunction method lazily parses one function. Use this method to +/// casue the parser to parse a specific function in the module. Note that +/// this will remove the function from what is to be included by /// ParseAllFunctionBodies. /// @see ParseAllFunctionBodies /// @see ParseBytecode -void BytecodeReader::ParseFunction(Function* Func) { +bool BytecodeReader::ParseFunction(Function* Func, std::string* ErrMsg) { + + if (setjmp(context)) + return true; + // Find {start, end} pointers and slot in the map. If not there, we're done. LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.find(Func); // Make sure we found it if (Fi == LazyFunctionLoadMap.end()) { error("Unrecognized function of type " + Func->getType()->getDescription()); - return; + return true; } BlockStart = At = Fi->second.Buf; @@ -1761,6 +1967,7 @@ void BytecodeReader::ParseFunction(Function* Func) { LazyFunctionLoadMap.erase(Fi); this->ParseFunctionBody(Func); + return false; } /// The ParseAllFunctionBodies method parses through all the previously @@ -1770,7 +1977,10 @@ void BytecodeReader::ParseFunction(Function* Func) { /// the function definitions are located. This function uses that information /// to materialize the functions. /// @see ParseBytecode -void BytecodeReader::ParseAllFunctionBodies() { +bool BytecodeReader::ParseAllFunctionBodies(std::string* ErrMsg) { + if (setjmp(context)) + return true; + LazyFunctionMap::iterator Fi = LazyFunctionLoadMap.begin(); LazyFunctionMap::iterator Fe = LazyFunctionLoadMap.end(); @@ -1778,9 +1988,11 @@ void BytecodeReader::ParseAllFunctionBodies() { Function* Func = Fi->first; BlockStart = At = Fi->second.Buf; BlockEnd = Fi->second.EndBuf; - this->ParseFunctionBody(Func); + ParseFunctionBody(Func); ++Fi; } + LazyFunctionLoadMap.clear(); + return false; } /// Parse the global type list @@ -1800,6 +2012,10 @@ void BytecodeReader::ParseModuleGlobalInfo() { if (Handler) Handler->handleModuleGlobalsBegin(); + // SectionID - If a global has an explicit section specified, this map + // remembers the ID until we can translate it into a string. + std::map SectionID; + // Read global variables... unsigned VarType = read_vbr_uint(); while (VarType != Type::VoidTyID) { // List is terminated by Void @@ -1810,39 +2026,59 @@ void BytecodeReader::ParseModuleGlobalInfo() { error("Invalid type (type type) for global var!"); unsigned LinkageID = (VarType >> 2) & 7; bool isConstant = VarType & 1; - bool hasInitializer = VarType & 2; - GlobalValue::LinkageTypes Linkage; + bool hasInitializer = (VarType & 2) != 0; + unsigned Alignment = 0; + unsigned GlobalSectionID = 0; + + // An extension word is present when linkage = 3 (internal) and hasinit = 0. + if (LinkageID == 3 && !hasInitializer) { + unsigned ExtWord = read_vbr_uint(); + // The extension word has this format: bit 0 = has initializer, bit 1-3 = + // linkage, bit 4-8 = alignment (log2), bits 10+ = future use. + hasInitializer = ExtWord & 1; + LinkageID = (ExtWord >> 1) & 7; + Alignment = (1 << ((ExtWord >> 4) & 31)) >> 1; + + if (ExtWord & (1 << 9)) // Has a section ID. + GlobalSectionID = read_vbr_uint(); + } + GlobalValue::LinkageTypes Linkage; switch (LinkageID) { case 0: Linkage = GlobalValue::ExternalLinkage; break; case 1: Linkage = GlobalValue::WeakLinkage; break; case 2: Linkage = GlobalValue::AppendingLinkage; break; case 3: Linkage = GlobalValue::InternalLinkage; break; case 4: Linkage = GlobalValue::LinkOnceLinkage; break; - default: + case 5: Linkage = GlobalValue::DLLImportLinkage; break; + case 6: Linkage = GlobalValue::DLLExportLinkage; break; + case 7: Linkage = GlobalValue::ExternalWeakLinkage; break; + default: error("Unknown linkage type: " + utostr(LinkageID)); Linkage = GlobalValue::InternalLinkage; break; } const Type *Ty = getType(SlotNo); - if (!Ty) { + if (!Ty) error("Global has no type! SlotNo=" + utostr(SlotNo)); - } - if (!isa(Ty)) { + if (!isa(Ty)) error("Global not a pointer type! Ty= " + Ty->getDescription()); - } const Type *ElTy = cast(Ty)->getElementType(); // Create the global variable... GlobalVariable *GV = new GlobalVariable(ElTy, isConstant, Linkage, 0, "", TheModule); + GV->setAlignment(Alignment); insertValue(GV, SlotNo, ModuleValues); + if (GlobalSectionID != 0) + SectionID[GV] = GlobalSectionID; + unsigned initSlot = 0; - if (hasInitializer) { + if (hasInitializer) { initSlot = read_vbr_uint(); GlobalInits.push_back(std::make_pair(GV, initSlot)); } @@ -1862,29 +2098,55 @@ void BytecodeReader::ParseModuleGlobalInfo() { FnSignature = (FnSignature << 5) + 1; // List is terminated by VoidTy. - while ((FnSignature >> 5) != Type::VoidTyID) { - const Type *Ty = getType(FnSignature >> 5); + while (((FnSignature & (~0U >> 1)) >> 5) != Type::VoidTyID) { + const Type *Ty = getType((FnSignature & (~0U >> 1)) >> 5); if (!isa(Ty) || !isa(cast(Ty)->getElementType())) { - error("Function not a pointer to function type! Ty = " + + error("Function not a pointer to function type! Ty = " + Ty->getDescription()); } // We create functions by passing the underlying FunctionType to create... - const FunctionType* FTy = + const FunctionType* FTy = cast(cast(Ty)->getElementType()); - - // Insert the place hodler - Function* Func = new Function(FTy, GlobalValue::InternalLinkage, + // Insert the place holder. + Function *Func = new Function(FTy, GlobalValue::ExternalLinkage, "", TheModule); - insertValue(Func, FnSignature >> 5, ModuleValues); - // Flags are not used yet. - //unsigned Flags = FnSignature & 31; + insertValue(Func, (FnSignature & (~0U >> 1)) >> 5, ModuleValues); - // Save this for later so we know type of lazily instantiated functions - FunctionSignatureList.push_back(Func); + // Flags are not used yet. + unsigned Flags = FnSignature & 31; + + // Save this for later so we know type of lazily instantiated functions. + // Note that known-external functions do not have FunctionInfo blocks, so we + // do not add them to the FunctionSignatureList. + if ((Flags & (1 << 4)) == 0) + FunctionSignatureList.push_back(Func); + + // Get the calling convention from the low bits. + unsigned CC = Flags & 15; + unsigned Alignment = 0; + if (FnSignature & (1 << 31)) { // Has extension word? + unsigned ExtWord = read_vbr_uint(); + Alignment = (1 << (ExtWord & 31)) >> 1; + CC |= ((ExtWord >> 5) & 15) << 4; + + if (ExtWord & (1 << 10)) // Has a section ID. + SectionID[Func] = read_vbr_uint(); + + // Parse external declaration linkage + switch ((ExtWord >> 11) & 3) { + case 0: break; + case 1: Func->setLinkage(Function::DLLImportLinkage); break; + case 2: Func->setLinkage(Function::ExternalWeakLinkage); break; + default: assert(0 && "Unsupported external linkage"); + } + } + + Func->setCallingConv(CC-1); + Func->setAlignment(Alignment); if (Handler) Handler->handleFunctionDeclaration(Func); @@ -1894,32 +2156,55 @@ void BytecodeReader::ParseModuleGlobalInfo() { FnSignature = (FnSignature << 5) + 1; } - // Now that the function signature list is set up, reverse it so that we can + // Now that the function signature list is set up, reverse it so that we can // remove elements efficiently from the back of the vector. std::reverse(FunctionSignatureList.begin(), FunctionSignatureList.end()); - // If this bytecode format has dependent library information in it .. - if (!hasNoDependentLibraries) { - // Read in the number of dependent library items that follow + /// SectionNames - This contains the list of section names encoded in the + /// moduleinfoblock. Functions and globals with an explicit section index + /// into this to get their section name. + std::vector SectionNames; + + if (hasInconsistentModuleGlobalInfo) { + align32(); + } else if (!hasNoDependentLibraries) { + // If this bytecode format has dependent library information in it, read in + // the number of dependent library items that follow. unsigned num_dep_libs = read_vbr_uint(); std::string dep_lib; - while( num_dep_libs-- ) { + while (num_dep_libs--) { dep_lib = read_str(); TheModule->addLibrary(dep_lib); if (Handler) Handler->handleDependentLibrary(dep_lib); } - - // Read target triple and place into the module + // Read target triple and place into the module. std::string triple = read_str(); TheModule->setTargetTriple(triple); if (Handler) Handler->handleTargetTriple(triple); + + if (!hasAlignment && At != BlockEnd) { + // If the file has section info in it, read the section names now. + unsigned NumSections = read_vbr_uint(); + while (NumSections--) + SectionNames.push_back(read_str()); + } + + // If the file has module-level inline asm, read it now. + if (!hasAlignment && At != BlockEnd) + TheModule->setModuleInlineAsm(read_str()); } - if (hasInconsistentModuleGlobalInfo) - align32(); + // If any globals are in specified sections, assign them now. + for (std::map::iterator I = SectionID.begin(), E = + SectionID.end(); I != E; ++I) + if (I->second) { + if (I->second > SectionID.size()) + error("SectionID out of range for global!"); + I->first->setSection(SectionNames[I->second-1]); + } // This is for future proofing... in the future extra fields may be added that // we don't understand, so we transparently ignore them. @@ -1942,7 +2227,7 @@ void BytecodeReader::ParseVersionInfo() { bool hasNoEndianness = Version & 4; bool hasNoPointerSize = Version & 8; - + RevisionNum = Version >> 4; // Default values for the current bytecode version @@ -1954,9 +2239,6 @@ void BytecodeReader::ParseVersionInfo() { has32BitTypes = false; hasNoDependentLibraries = false; hasAlignment = false; - hasInconsistentBBSlotNums = false; - hasVBRByteTypes = false; - hasUnnecessaryModuleBlockId = false; hasNoUndefValue = false; hasNoFlagsForFunctions = false; hasNoUnreachableInst = false; @@ -1982,12 +2264,12 @@ void BytecodeReader::ParseVersionInfo() { // LLVM 1.2 and before had the Type class derive from Value class. This // changed in release 1.3 and consequently LLVM 1.3 bytecode files are - // written differently because Types can no longer be part of the + // written differently because Types can no longer be part of the // type planes for Values. hasTypeDerivedFromValue = true; // FALL THROUGH - + case 2: // 1.2.5 (Not Released) // LLVM 1.2 and earlier had two-word block headers. This is a bit wasteful, @@ -2004,7 +2286,7 @@ void BytecodeReader::ParseVersionInfo() { // in various places and to ensure consistency. has32BitTypes = true; - // LLVM 1.2 and earlier did not provide a target triple nor a list of + // LLVM 1.2 and earlier did not provide a target triple nor a list of // libraries on which the bytecode is dependent. LLVM 1.3 provides these // features, for use in future versions of LLVM. hasNoDependentLibraries = true; @@ -2013,13 +2295,13 @@ void BytecodeReader::ParseVersionInfo() { case 3: // LLVM 1.3 (Released) // LLVM 1.3 and earlier caused alignment bytes to be written on some block - // boundaries and at the end of some strings. In extreme cases (e.g. lots + // boundaries and at the end of some strings. In extreme cases (e.g. lots // of GEP references to a constant array), this can increase the file size // by 30% or more. In version 1.4 alignment is done away with completely. hasAlignment = true; // FALL THROUGH - + case 4: // 1.3.1 (Not Released) // In version 4, we did not support the 'undef' constant. hasNoUndefValue = true; @@ -2035,24 +2317,8 @@ void BytecodeReader::ParseVersionInfo() { // FALL THROUGH - case 5: // 1.x.x (Not Released) + case 5: // 1.4 (Released) break; - // FIXME: NONE of this is implemented yet! - - // In version 5, basic blocks have a minimum index of 0 whereas all the - // other primitives have a minimum index of 1 (because 0 is the "null" - // value. In version 5, we made this consistent. - hasInconsistentBBSlotNums = true; - - // In version 5, the types SByte and UByte were encoded as vbr_uint so that - // signed values > 63 and unsigned values >127 would be encoded as two - // bytes. In version 5, they are encoded directly in a single byte. - hasVBRByteTypes = true; - - // In version 5, modules begin with a "Module Block" which encodes a 4-byte - // integer value 0x01 to identify the module block. This is unnecessary and - // removed in version 5. - hasUnnecessaryModuleBlockId = true; default: error("Unknown bytecode version number: " + itostr(RevisionNum)); @@ -2095,7 +2361,7 @@ void BytecodeReader::ParseModule() { SeenGlobalTypePlane = true; break; - case BytecodeFormat::ModuleGlobalInfoBlockID: + case BytecodeFormat::ModuleGlobalInfoBlockID: if (SeenModuleGlobalInfo) error("Two ModuleGlobalInfo Blocks Encountered!"); ParseModuleGlobalInfo(); @@ -2138,7 +2404,7 @@ void BytecodeReader::ParseModule() { const llvm::PointerType* GVType = GV->getType(); unsigned TypeSlot = getTypeSlot(GVType->getElementType()); if (Constant *CV = getConstantValue(TypeSlot, Slot)) { - if (GV->hasInitializer()) + if (GV->hasInitializer()) error("Global *already* has an initializer?!"); if (Handler) Handler->handleGlobalInitializer(GV,CV); GV->setInitializer(CV); @@ -2146,6 +2412,9 @@ void BytecodeReader::ParseModule() { error("Cannot find initializer value."); } + if (!ConstantFwdRefs.empty()) + error("Use of undefined constants in a module"); + /// Make sure we pulled them all out. If we didn't then there's a declaration /// but a missing body. That's not allowed. if (!FunctionSignatureList.empty()) @@ -2154,76 +2423,113 @@ void BytecodeReader::ParseModule() { /// This function completely parses a bytecode buffer given by the \p Buf /// and \p Length parameters. -void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length, - const std::string &ModuleID) { +bool BytecodeReader::ParseBytecode(volatile BufPtr Buf, unsigned Length, + const std::string &ModuleID, + std::string* ErrMsg) { + + /// We handle errors by + if (setjmp(context)) { + // Cleanup after error + if (Handler) Handler->handleError(ErrorMsg); + freeState(); + delete TheModule; + TheModule = 0; + if (decompressedBlock != 0 ) { + ::free(decompressedBlock); + decompressedBlock = 0; + } + // Set caller's error message, if requested + if (ErrMsg) + *ErrMsg = ErrorMsg; + // Indicate an error occurred + return true; + } - try { - At = MemStart = BlockStart = Buf; - MemEnd = BlockEnd = Buf + Length; + RevisionNum = 0; + At = MemStart = BlockStart = Buf; + MemEnd = BlockEnd = Buf + Length; - // Create the module - TheModule = new Module(ModuleID); + // Create the module + TheModule = new Module(ModuleID); - if (Handler) Handler->handleStart(TheModule, Length); + if (Handler) Handler->handleStart(TheModule, Length); - // Read and check signature... - unsigned Sig = read_uint(); - if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) { - error("Invalid bytecode signature: " + utostr(Sig)); - } + // Read the four bytes of the signature. + unsigned Sig = read_uint(); - // Tell the handler we're starting a module - if (Handler) Handler->handleModuleBegin(ModuleID); + // If this is a compressed file + if (Sig == ('l' | ('l' << 8) | ('v' << 16) | ('c' << 24))) { - // Get the module block and size and verify. This is handled specially - // because the module block/size is always written in long format. Other - // blocks are written in short format so the read_block method is used. - unsigned Type, Size; - Type = read_uint(); - Size = read_uint(); - if (Type != BytecodeFormat::ModuleBlockID) { - error("Expected Module Block! Type:" + utostr(Type) + ", Size:" - + utostr(Size)); - } + // Invoke the decompression of the bytecode. Note that we have to skip the + // file's magic number which is not part of the compressed block. Hence, + // the Buf+4 and Length-4. The result goes into decompressedBlock, a data + // member for retention until BytecodeReader is destructed. + unsigned decompressedLength = Compressor::decompressToNewBuffer( + (char*)Buf+4,Length-4,decompressedBlock); + + // We must adjust the buffer pointers used by the bytecode reader to point + // into the new decompressed block. After decompression, the + // decompressedBlock will point to a contiguous memory area that has + // the decompressed data. + At = MemStart = BlockStart = Buf = (BufPtr) decompressedBlock; + MemEnd = BlockEnd = Buf + decompressedLength; + + // else if this isn't a regular (uncompressed) bytecode file, then its + // and error, generate that now. + } else if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) { + error("Invalid bytecode signature: " + utohexstr(Sig)); + } - // It looks like the darwin ranlib program is broken, and adds trailing - // garbage to the end of some bytecode files. This hack allows the bc - // reader to ignore trailing garbage on bytecode files. - if (At + Size < MemEnd) - MemEnd = BlockEnd = At+Size; + // Tell the handler we're starting a module + if (Handler) Handler->handleModuleBegin(ModuleID); - if (At + Size != MemEnd) - error("Invalid Top Level Block Length! Type:" + utostr(Type) - + ", Size:" + utostr(Size)); + // Get the module block and size and verify. This is handled specially + // because the module block/size is always written in long format. Other + // blocks are written in short format so the read_block method is used. + unsigned Type, Size; + Type = read_uint(); + Size = read_uint(); + if (Type != BytecodeFormat::ModuleBlockID) { + error("Expected Module Block! Type:" + utostr(Type) + ", Size:" + + utostr(Size)); + } - // Parse the module contents - this->ParseModule(); + // It looks like the darwin ranlib program is broken, and adds trailing + // garbage to the end of some bytecode files. This hack allows the bc + // reader to ignore trailing garbage on bytecode files. + if (At + Size < MemEnd) + MemEnd = BlockEnd = At+Size; + + if (At + Size != MemEnd) + error("Invalid Top Level Block Length! Type:" + utostr(Type) + + ", Size:" + utostr(Size)); + + // Parse the module contents + this->ParseModule(); + + // Check for missing functions + if (hasFunctions()) + error("Function expected, but bytecode stream ended!"); + + // Look for intrinsic functions to upgrade, upgrade them, and save the + // mapping from old function to new for use later when instructions are + // converted. + for (Module::iterator FI = TheModule->begin(), FE = TheModule->end(); + FI != FE; ++FI) + if (Function* newF = UpgradeIntrinsicFunction(FI)) { + upgradedFunctions.insert(std::make_pair(FI, newF)); + FI->setName(""); + } - // Check for missing functions - if (hasFunctions()) - error("Function expected, but bytecode stream ended!"); + // Tell the handler we're done with the module + if (Handler) + Handler->handleModuleEnd(ModuleID); - // Tell the handler we're done with the module - if (Handler) - Handler->handleModuleEnd(ModuleID); + // Tell the handler we're finished the parse + if (Handler) Handler->handleFinish(); - // Tell the handler we're finished the parse - if (Handler) Handler->handleFinish(); + return false; - } catch (std::string& errstr) { - if (Handler) Handler->handleError(errstr); - freeState(); - delete TheModule; - TheModule = 0; - throw; - } catch (...) { - std::string msg("Unknown Exception Occurred"); - if (Handler) Handler->handleError(msg); - freeState(); - delete TheModule; - TheModule = 0; - throw msg; - } } //===----------------------------------------------------------------------===// @@ -2232,4 +2538,3 @@ void BytecodeReader::ParseBytecode(BufPtr Buf, unsigned Length, BytecodeHandler::~BytecodeHandler() {} -// vim: sw=2