1 //===- Reader.cpp - Code to read bytecode files ---------------------------===//
3 // This library implements the functionality defined in llvm/Bytecode/Reader.h
5 // Note that this library should be as fast as possible, reentrant, and
8 // TODO: Return error messages to caller instead of printing them out directly.
9 // TODO: Allow passing in an option to ignore the symbol table
11 //===----------------------------------------------------------------------===//
13 #include "ReaderInternals.h"
14 #include "llvm/Bytecode/Reader.h"
15 #include "llvm/Bytecode/Format.h"
16 #include "llvm/Module.h"
17 #include "llvm/Constants.h"
18 #include "llvm/iPHINode.h"
19 #include "llvm/iOther.h"
20 #include <sys/types.h>
30 bool BytecodeParser::getTypeSlot(const Type *Ty, unsigned &Slot) {
31 if (Ty->isPrimitiveType()) {
32 Slot = Ty->getPrimitiveID();
34 // Check the method level types first...
35 TypeValuesListTy::iterator I = find(MethodTypeValues.begin(),
36 MethodTypeValues.end(), Ty);
37 if (I != MethodTypeValues.end()) {
38 Slot = FirstDerivedTyID+ModuleTypeValues.size()+
39 (&*I - &MethodTypeValues[0]);
41 I = find(ModuleTypeValues.begin(), ModuleTypeValues.end(), Ty);
42 if (I == ModuleTypeValues.end()) return true; // Didn't find type!
43 Slot = FirstDerivedTyID + (&*I - &ModuleTypeValues[0]);
46 //cerr << "getTypeSlot '" << Ty->getName() << "' = " << Slot << "\n";
50 const Type *BytecodeParser::getType(unsigned ID) {
51 if (ID < Type::NumPrimitiveIDs) {
52 const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID);
56 //cerr << "Looking up Type ID: " << ID << "\n";
57 const Value *V = getValue(Type::TypeTy, ID, false);
58 return cast_or_null<Type>(V);
61 int BytecodeParser::insertValue(Value *Val, std::vector<ValueList> &ValueTab) {
63 if (getTypeSlot(Val->getType(), type)) return -1;
64 assert(type != Type::TypeTyID && "Types should never be insertValue'd!");
66 if (ValueTab.size() <= type)
67 ValueTab.resize(type+1, ValueList());
69 //cerr << "insertValue Values[" << type << "][" << ValueTab[type].size()
70 // << "] = " << Val << "\n";
71 ValueTab[type].push_back(Val);
73 return ValueTab[type].size()-1;
76 Value *BytecodeParser::getValue(const Type *Ty, unsigned oNum, bool Create) {
78 unsigned type; // The type plane it lives in...
80 if (getTypeSlot(Ty, type)) return 0;
82 if (type == Type::TypeTyID) { // The 'type' plane has implicit values
83 assert(Create == false);
84 if (Num < Type::NumPrimitiveIDs) {
85 const Type *T = Type::getPrimitiveType((Type::PrimitiveID)Num);
86 if (T) return (Value*)T; // Asked for a primitive type...
89 // Otherwise, derived types need offset...
90 Num -= FirstDerivedTyID;
92 // Is it a module level type?
93 if (Num < ModuleTypeValues.size())
94 return (Value*)ModuleTypeValues[Num].get();
96 // Nope, is it a method level type?
97 Num -= ModuleTypeValues.size();
98 if (Num < MethodTypeValues.size())
99 return (Value*)MethodTypeValues[Num].get();
104 if (type < ModuleValues.size()) {
105 if (Num < ModuleValues[type].size())
106 return ModuleValues[type][Num];
107 Num -= ModuleValues[type].size();
110 if (Values.size() > type && Values[type].size() > Num)
111 return Values[type][Num];
113 if (!Create) return 0; // Do not create a placeholder?
116 switch (Ty->getPrimitiveID()) {
117 case Type::FunctionTyID:
118 cerr << "Creating method pholder! : " << type << ":" << oNum << " "
119 << Ty->getName() << "\n";
120 d = new FunctionPHolder(Ty, oNum);
121 if (insertValue(d, LateResolveModuleValues) == -1) return 0;
123 case Type::LabelTyID:
124 d = new BBPHolder(Ty, oNum);
127 d = new ValPHolder(Ty, oNum);
131 assert(d != 0 && "How did we not make something?");
132 if (insertValue(d, LateResolveValues) == -1) return 0;
136 /// getConstantValue - Just like getValue, except that it returns a null pointer
137 /// only on error. It always returns a constant (meaning that if the value is
138 /// defined, but is not a constant, that is an error). If the specified
139 /// constant hasn't been parsed yet, a placeholder is defined and used. Later,
140 /// after the real value is parsed, the placeholder is eliminated.
142 Constant *BytecodeParser::getConstantValue(const Type *Ty, unsigned Slot) {
143 if (Value *V = getValue(Ty, Slot, false))
144 return dyn_cast<Constant>(V); // If we already have the value parsed...
146 GlobalRefsType::iterator I = GlobalRefs.find(make_pair(Ty, Slot));
147 if (I != GlobalRefs.end()) {
148 BCR_TRACE(5, "Previous forward ref found!\n");
149 return cast<Constant>(I->second);
151 // Create a placeholder for the constant reference and
152 // keep track of the fact that we have a forward ref to recycle it
153 BCR_TRACE(5, "Creating new forward ref to a constant!\n");
154 Constant *C = new ConstPHolder(Ty, Slot);
156 // Keep track of the fact that we have a forward ref to recycle it
157 GlobalRefs.insert(make_pair(make_pair(Ty, Slot), C));
164 bool BytecodeParser::postResolveValues(ValueTable &ValTab) {
166 for (unsigned ty = 0; ty < ValTab.size(); ++ty) {
167 ValueList &DL = ValTab[ty];
169 while ((Size = DL.size())) {
170 unsigned IDNumber = getValueIDNumberFromPlaceHolder(DL[Size-1]);
172 Value *D = DL[Size-1];
175 Value *NewDef = getValue(D->getType(), IDNumber, false);
177 Error = true; // Unresolved thinger
178 cerr << "Unresolvable reference found: <"
179 << D->getType()->getDescription() << ">:" << IDNumber << "!\n";
181 // Fixup all of the uses of this placeholder def...
182 D->replaceAllUsesWith(NewDef);
184 // Now that all the uses are gone, delete the placeholder...
185 // If we couldn't find a def (error case), then leak a little
186 delete D; // memory, 'cause otherwise we can't remove all uses!
194 bool BytecodeParser::ParseBasicBlock(const uchar *&Buf, const uchar *EndBuf,
196 BB = new BasicBlock();
198 while (Buf < EndBuf) {
200 if (ParseInstruction(Buf, EndBuf, Inst,
206 if (Inst == 0) { delete BB; return true; }
207 if (insertValue(Inst, Values) == -1) { delete BB; return true; }
209 BB->getInstList().push_back(Inst);
217 bool BytecodeParser::ParseSymbolTable(const uchar *&Buf, const uchar *EndBuf,
219 while (Buf < EndBuf) {
220 // Symtab block header: [num entries][type id number]
221 unsigned NumEntries, Typ;
222 if (read_vbr(Buf, EndBuf, NumEntries) ||
223 read_vbr(Buf, EndBuf, Typ)) return true;
224 const Type *Ty = getType(Typ);
225 if (Ty == 0) return true;
227 BCR_TRACE(3, "Plane Type: '" << Ty << "' with " << NumEntries <<
230 for (unsigned i = 0; i < NumEntries; ++i) {
231 // Symtab entry: [def slot #][name]
233 if (read_vbr(Buf, EndBuf, slot)) return true;
235 if (read(Buf, EndBuf, Name, false)) // Not aligned...
238 Value *D = getValue(Ty, slot, false); // Find mapping...
240 BCR_TRACE(3, "FAILED LOOKUP: Slot #" << slot << "\n");
243 BCR_TRACE(4, "Map: '" << Name << "' to #" << slot << ":" << D;
244 if (!isa<Instruction>(D)) cerr << "\n");
246 D->setName(Name, ST);
250 if (Buf > EndBuf) return true;
254 void BytecodeParser::ResolveReferencesToValue(Value *NewV, unsigned Slot) {
255 GlobalRefsType::iterator I = GlobalRefs.find(make_pair(NewV->getType(),Slot));
256 if (I == GlobalRefs.end()) return; // Never forward referenced?
258 BCR_TRACE(3, "Mutating forward refs!\n");
259 Value *VPH = I->second; // Get the placeholder...
261 // Loop over all of the uses of the Value. What they are depends
262 // on what NewV is. Replacing a use of the old reference takes the
263 // use off the use list, so loop with !use_empty(), not the use_iterator.
264 while (!VPH->use_empty()) {
265 Constant *C = cast<Constant>(VPH->use_back());
266 unsigned numReplaced = C->mutateReferences(VPH, NewV);
267 assert(numReplaced > 0 && "Supposed user wasn't really a user?");
269 if (GlobalValue* GVal = dyn_cast<GlobalValue>(NewV)) {
270 // Remove the placeholder GlobalValue from the module...
271 GVal->getParent()->getGlobalList().remove(cast<GlobalVariable>(VPH));
275 delete VPH; // Delete the old placeholder
276 GlobalRefs.erase(I); // Remove the map entry for it
279 bool BytecodeParser::ParseMethod(const uchar *&Buf, const uchar *EndBuf,
281 // Clear out the local values table...
283 if (FunctionSignatureList.empty()) {
284 Error = "Function found, but FunctionSignatureList empty!";
285 return true; // Unexpected method!
288 const PointerType *PMTy = FunctionSignatureList.back().first; // PtrMeth
289 const FunctionType *MTy = dyn_cast<FunctionType>(PMTy->getElementType());
290 if (MTy == 0) return true; // Not ptr to method!
293 if (read_vbr(Buf, EndBuf, isInternal)) return true;
295 unsigned MethSlot = FunctionSignatureList.back().second;
296 FunctionSignatureList.pop_back();
297 Function *M = new Function(MTy, isInternal != 0);
299 BCR_TRACE(2, "METHOD TYPE: " << MTy << "\n");
301 const FunctionType::ParamTypes &Params = MTy->getParamTypes();
302 Function::aiterator AI = M->abegin();
303 for (FunctionType::ParamTypes::const_iterator It = Params.begin();
304 It != Params.end(); ++It, ++AI) {
305 if (insertValue(AI, Values) == -1) {
306 Error = "Error reading method arguments!\n";
307 delete M; return true;
311 while (Buf < EndBuf) {
313 const uchar *OldBuf = Buf;
314 if (readBlock(Buf, EndBuf, Type, Size)) {
315 Error = "Error reading Function level block!";
316 delete M; return true;
320 case BytecodeFormat::ConstantPool:
321 BCR_TRACE(2, "BLOCK BytecodeFormat::ConstantPool: {\n");
322 if (ParseConstantPool(Buf, Buf+Size, Values, MethodTypeValues)) {
323 delete M; return true;
327 case BytecodeFormat::BasicBlock: {
328 BCR_TRACE(2, "BLOCK BytecodeFormat::BasicBlock: {\n");
330 if (ParseBasicBlock(Buf, Buf+Size, BB) ||
331 insertValue(BB, Values) == -1) {
332 delete M; return true; // Parse error... :(
335 M->getBasicBlockList().push_back(BB);
339 case BytecodeFormat::SymbolTable:
340 BCR_TRACE(2, "BLOCK BytecodeFormat::SymbolTable: {\n");
341 if (ParseSymbolTable(Buf, Buf+Size, &M->getSymbolTable())) {
342 delete M; return true;
347 BCR_TRACE(2, "BLOCK <unknown>:ignored! {\n");
349 if (OldBuf > Buf) return true; // Wrap around!
352 BCR_TRACE(2, "} end block\n");
354 if (align32(Buf, EndBuf)) {
355 Error = "Error aligning Function level block!";
356 delete M; // Malformed bc file, read past end of block.
361 if (postResolveValues(LateResolveValues) ||
362 postResolveValues(LateResolveModuleValues)) {
363 Error = "Error resolving method values!";
364 delete M; return true; // Unresolvable references!
367 Value *FunctionPHolder = getValue(PMTy, MethSlot, false);
368 assert(FunctionPHolder && "Something is broken no placeholder found!");
369 assert(isa<Function>(FunctionPHolder) && "Not a function?");
371 unsigned type; // Type slot
372 assert(!getTypeSlot(MTy, type) && "How can meth type not exist?");
373 getTypeSlot(PMTy, type);
375 C->getFunctionList().push_back(M);
377 // Replace placeholder with the real method pointer...
378 ModuleValues[type][MethSlot] = M;
380 // Clear out method level types...
381 MethodTypeValues.clear();
383 // If anyone is using the placeholder make them use the real method instead
384 FunctionPHolder->replaceAllUsesWith(M);
386 // We don't need the placeholder anymore!
387 delete FunctionPHolder;
389 ResolveReferencesToValue(M, MethSlot);
394 bool BytecodeParser::ParseModuleGlobalInfo(const uchar *&Buf, const uchar *End,
396 if (!FunctionSignatureList.empty()) {
397 Error = "Two ModuleGlobalInfo packets found!";
398 return true; // Two ModuleGlobal blocks?
401 // Read global variables...
403 if (read_vbr(Buf, End, VarType)) return true;
404 while (VarType != Type::VoidTyID) { // List is terminated by Void
405 // VarType Fields: bit0 = isConstant, bit1 = hasInitializer,
406 // bit2 = isInternal, bit3+ = slot#
407 const Type *Ty = getType(VarType >> 3);
408 if (!Ty || !isa<PointerType>(Ty)) {
409 Error = "Global not pointer type! Ty = " + Ty->getDescription();
413 const PointerType *PTy = cast<const PointerType>(Ty);
414 const Type *ElTy = PTy->getElementType();
416 Constant *Initializer = 0;
417 if (VarType & 2) { // Does it have an initalizer?
418 // Do not improvise... values must have been stored in the constant pool,
419 // which should have been read before now.
422 if (read_vbr(Buf, End, InitSlot)) return true;
424 Value *V = getValue(ElTy, InitSlot, false);
425 if (V == 0) return true;
426 Initializer = cast<Constant>(V);
429 // Create the global variable...
430 GlobalVariable *GV = new GlobalVariable(ElTy, VarType & 1, VarType & 4,
432 int DestSlot = insertValue(GV, ModuleValues);
433 if (DestSlot == -1) return true;
435 Mod->getGlobalList().push_back(GV);
437 ResolveReferencesToValue(GV, (unsigned)DestSlot);
439 BCR_TRACE(2, "Global Variable of type: " << PTy->getDescription()
440 << " into slot #" << DestSlot << "\n");
442 if (read_vbr(Buf, End, VarType)) return true;
445 // Read the method signatures for all of the methods that are coming, and
446 // create fillers in the Value tables.
447 unsigned FnSignature;
448 if (read_vbr(Buf, End, FnSignature)) return true;
449 while (FnSignature != Type::VoidTyID) { // List is terminated by Void
450 const Type *Ty = getType(FnSignature);
451 if (!Ty || !isa<PointerType>(Ty) ||
452 !isa<FunctionType>(cast<PointerType>(Ty)->getElementType())) {
453 Error = "Function not ptr to func type! Ty = " + Ty->getDescription();
457 // We create methods by passing the underlying FunctionType to create...
458 Ty = cast<PointerType>(Ty)->getElementType();
460 // When the ModuleGlobalInfo section is read, we load the type of each
461 // method and the 'ModuleValues' slot that it lands in. We then load a
462 // placeholder into its slot to reserve it. When the method is loaded, this
463 // placeholder is replaced.
465 // Insert the placeholder...
466 Value *Val = new FunctionPHolder(Ty, 0);
467 if (insertValue(Val, ModuleValues) == -1) return true;
469 // Figure out which entry of its typeslot it went into...
471 if (getTypeSlot(Val->getType(), TypeSlot)) return true;
473 unsigned SlotNo = ModuleValues[TypeSlot].size()-1;
475 // Keep track of this information in a linked list that is emptied as
476 // methods are loaded...
478 FunctionSignatureList.push_back(
479 make_pair(cast<const PointerType>(Val->getType()), SlotNo));
480 if (read_vbr(Buf, End, FnSignature)) return true;
481 BCR_TRACE(2, "Function of type: " << Ty << "\n");
484 if (align32(Buf, End)) return true;
486 // Now that the function signature list is set up, reverse it so that we can
487 // remove elements efficiently from the back of the vector.
488 std::reverse(FunctionSignatureList.begin(), FunctionSignatureList.end());
490 // This is for future proofing... in the future extra fields may be added that
491 // we don't understand, so we transparently ignore them.
497 bool BytecodeParser::ParseModule(const uchar *Buf, const uchar *EndBuf,
501 if (readBlock(Buf, EndBuf, Type, Size)) return true;
502 if (Type != BytecodeFormat::Module || Buf+Size != EndBuf) {
503 Error = "Expected Module packet!";
504 return true; // Hrm, not a class?
507 BCR_TRACE(0, "BLOCK BytecodeFormat::Module: {\n");
508 FunctionSignatureList.clear(); // Just in case...
510 // Read into instance variables...
511 if (read_vbr(Buf, EndBuf, FirstDerivedTyID)) return true;
512 if (align32(Buf, EndBuf)) return true;
513 BCR_TRACE(1, "FirstDerivedTyID = " << FirstDerivedTyID << "\n");
515 TheModule = Mod = new Module();
517 while (Buf < EndBuf) {
518 const uchar *OldBuf = Buf;
519 if (readBlock(Buf, EndBuf, Type, Size)) { delete Mod; return true;}
521 case BytecodeFormat::ConstantPool:
522 BCR_TRACE(1, "BLOCK BytecodeFormat::ConstantPool: {\n");
523 if (ParseConstantPool(Buf, Buf+Size, ModuleValues, ModuleTypeValues)) {
524 delete Mod; return true;
528 case BytecodeFormat::ModuleGlobalInfo:
529 BCR_TRACE(1, "BLOCK BytecodeFormat::ModuleGlobalInfo: {\n");
531 if (ParseModuleGlobalInfo(Buf, Buf+Size, Mod)) {
532 delete Mod; return true;
536 case BytecodeFormat::Function: {
537 BCR_TRACE(1, "BLOCK BytecodeFormat::Function: {\n");
538 if (ParseMethod(Buf, Buf+Size, Mod)) {
539 delete Mod; return true; // Error parsing function
544 case BytecodeFormat::SymbolTable:
545 BCR_TRACE(1, "BLOCK BytecodeFormat::SymbolTable: {\n");
546 if (ParseSymbolTable(Buf, Buf+Size, &Mod->getSymbolTable())) {
547 delete Mod; return true;
552 Error = "Expected Module Block!";
554 if (OldBuf > Buf) return true; // Wrap around!
557 BCR_TRACE(1, "} end block\n");
558 if (align32(Buf, EndBuf)) { delete Mod; return true; }
561 if (!FunctionSignatureList.empty()) { // Expected more methods!
562 Error = "Function expected, but bytecode stream at end!";
566 BCR_TRACE(0, "} end block\n\n");
570 Module *BytecodeParser::ParseBytecode(const uchar *Buf, const uchar *EndBuf) {
571 LateResolveValues.clear();
573 // Read and check signature...
574 if (read(Buf, EndBuf, Sig) ||
575 Sig != ('l' | ('l' << 8) | ('v' << 16) | 'm' << 24)) {
576 Error = "Invalid bytecode signature!";
577 return 0; // Invalid signature!
581 if (ParseModule(Buf, EndBuf, Result)) return 0;
586 Module *ParseBytecodeBuffer(const unsigned char *Buffer, unsigned Length) {
587 BytecodeParser Parser;
588 return Parser.ParseBytecode(Buffer, Buffer+Length);
591 // Parse and return a class file...
593 Module *ParseBytecodeFile(const std::string &Filename, std::string *ErrorStr) {
597 if (Filename != std::string("-")) { // Read from a file...
598 int FD = open(Filename.c_str(), O_RDONLY);
600 if (ErrorStr) *ErrorStr = "Error opening file!";
604 if (fstat(FD, &StatBuf) == -1) { close(FD); return 0; }
606 int Length = StatBuf.st_size;
608 if (ErrorStr) *ErrorStr = "Error stat'ing file!";
611 uchar *Buffer = (uchar*)mmap(0, Length, PROT_READ,
613 if (Buffer == (uchar*)-1) {
614 if (ErrorStr) *ErrorStr = "Error mmapping file!";
618 BytecodeParser Parser;
619 Result = Parser.ParseBytecode(Buffer, Buffer+Length);
621 munmap((char*)Buffer, Length);
623 if (ErrorStr) *ErrorStr = Parser.getError();
624 } else { // Read from stdin
627 uchar Buffer[4096], *FileData = 0;
628 while ((BlockSize = read(0, Buffer, 4096))) {
629 if (BlockSize == -1) { free(FileData); return 0; }
631 FileData = (uchar*)realloc(FileData, FileSize+BlockSize);
632 memcpy(FileData+FileSize, Buffer, BlockSize);
633 FileSize += BlockSize;
637 if (ErrorStr) *ErrorStr = "Standard Input empty!";
638 free(FileData); return 0;
643 uchar *Buf = (uchar*)mmap(0, FileSize, PROT_READ|PROT_WRITE,
644 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
645 assert((Buf != (uchar*)-1) && "mmap returned error!");
646 memcpy(Buf, FileData, FileSize);
649 uchar *Buf = FileData;
652 BytecodeParser Parser;
653 Result = Parser.ParseBytecode(Buf, Buf+FileSize);
656 munmap((char*)Buf, FileSize); // Free mmap'd data area
658 free(FileData); // Free realloc'd block of memory
661 if (ErrorStr) *ErrorStr = Parser.getError();