1 //===- Reader.cpp - Code to read bytecode files ---------------------------===//
3 // This library implements the functionality defined in llvm/Bytecode/Reader.h
5 // Note that this library should be as fast as possible, reentrant, and
8 // TODO: Make error message outputs be configurable depending on an option?
9 // TODO: Allow passing in an option to ignore the symbol table
11 //===----------------------------------------------------------------------===//
13 #include "ReaderInternals.h"
14 #include "llvm/Bytecode/Reader.h"
15 #include "llvm/Bytecode/Format.h"
16 #include "llvm/GlobalVariable.h"
17 #include "llvm/Module.h"
18 #include "llvm/BasicBlock.h"
19 #include "llvm/ConstantVals.h"
20 #include "llvm/iPHINode.h"
21 #include "llvm/iOther.h"
22 #include <sys/types.h>
32 bool BytecodeParser::getTypeSlot(const Type *Ty, unsigned &Slot) {
33 if (Ty->isPrimitiveType()) {
34 Slot = Ty->getPrimitiveID();
36 // Check the method level types first...
37 TypeValuesListTy::iterator I = find(MethodTypeValues.begin(),
38 MethodTypeValues.end(), Ty);
39 if (I != MethodTypeValues.end()) {
40 Slot = FirstDerivedTyID+ModuleTypeValues.size()+
41 (&*I - &MethodTypeValues[0]);
43 I = find(ModuleTypeValues.begin(), ModuleTypeValues.end(), Ty);
44 if (I == ModuleTypeValues.end()) return true; // Didn't find type!
45 Slot = FirstDerivedTyID + (&*I - &ModuleTypeValues[0]);
48 //cerr << "getTypeSlot '" << Ty->getName() << "' = " << Slot << "\n";
52 const Type *BytecodeParser::getType(unsigned ID) {
53 const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID);
56 //cerr << "Looking up Type ID: " << ID << "\n";
58 const Value *D = getValue(Type::TypeTy, ID, false);
59 if (D == 0) return failure<const Type*>(0);
64 int BytecodeParser::insertValue(Value *Val, std::vector<ValueList> &ValueTab) {
66 if (getTypeSlot(Val->getType(), type)) return failure<int>(-1);
67 assert(type != Type::TypeTyID && "Types should never be insertValue'd!");
69 if (ValueTab.size() <= type)
70 ValueTab.resize(type+1, ValueList());
72 //cerr << "insertValue Values[" << type << "][" << ValueTab[type].size()
73 // << "] = " << Val << "\n";
74 ValueTab[type].push_back(Val);
76 return ValueTab[type].size()-1;
79 Value *BytecodeParser::getValue(const Type *Ty, unsigned oNum, bool Create) {
81 unsigned type; // The type plane it lives in...
83 if (getTypeSlot(Ty, type)) return failure<Value*>(0); // TODO: true
85 if (type == Type::TypeTyID) { // The 'type' plane has implicit values
86 assert(Create == false);
87 const Type *T = Type::getPrimitiveType((Type::PrimitiveID)Num);
88 if (T) return (Value*)T; // Asked for a primitive type...
90 // Otherwise, derived types need offset...
91 Num -= FirstDerivedTyID;
93 // Is it a module level type?
94 if (Num < ModuleTypeValues.size())
95 return (Value*)ModuleTypeValues[Num].get();
97 // Nope, is it a method level type?
98 Num -= ModuleTypeValues.size();
99 if (Num < MethodTypeValues.size())
100 return (Value*)MethodTypeValues[Num].get();
105 if (type < ModuleValues.size()) {
106 if (Num < ModuleValues[type].size())
107 return ModuleValues[type][Num];
108 Num -= ModuleValues[type].size();
111 if (Values.size() > type && Values[type].size() > Num)
112 return Values[type][Num];
114 if (!Create) return failure<Value*>(0); // Do not create a placeholder?
117 switch (Ty->getPrimitiveID()) {
118 case Type::LabelTyID: d = new BBPHolder(Ty, oNum); break;
119 case Type::MethodTyID:
120 cerr << "Creating method pholder! : " << type << ":" << oNum << " "
121 << Ty->getName() << "\n";
122 d = new MethPHolder(Ty, oNum);
123 if (insertValue(d, LateResolveModuleValues) ==-1) return failure<Value*>(0);
125 default: d = new DefPHolder(Ty, oNum); break;
128 assert(d != 0 && "How did we not make something?");
129 if (insertValue(d, LateResolveValues) == -1) return failure<Value*>(0);
133 bool BytecodeParser::postResolveValues(ValueTable &ValTab) {
135 for (unsigned ty = 0; ty < ValTab.size(); ++ty) {
136 ValueList &DL = ValTab[ty];
138 while ((Size = DL.size())) {
139 unsigned IDNumber = getValueIDNumberFromPlaceHolder(DL[Size-1]);
141 Value *D = DL[Size-1];
144 Value *NewDef = getValue(D->getType(), IDNumber, false);
146 Error = true; // Unresolved thinger
147 cerr << "Unresolvable reference found: <"
148 << D->getType()->getDescription() << ">:" << IDNumber << "!\n";
150 // Fixup all of the uses of this placeholder def...
151 D->replaceAllUsesWith(NewDef);
153 // Now that all the uses are gone, delete the placeholder...
154 // If we couldn't find a def (error case), then leak a little
155 delete D; // memory, 'cause otherwise we can't remove all uses!
163 bool BytecodeParser::ParseBasicBlock(const uchar *&Buf, const uchar *EndBuf,
165 BB = new BasicBlock();
167 while (Buf < EndBuf) {
169 if (ParseInstruction(Buf, EndBuf, Inst)) {
171 return failure(true);
174 if (Inst == 0) { delete BB; return failure(true); }
175 if (insertValue(Inst, Values) == -1) { delete BB; return failure(true); }
177 BB->getInstList().push_back(Inst);
185 bool BytecodeParser::ParseSymbolTable(const uchar *&Buf, const uchar *EndBuf,
187 while (Buf < EndBuf) {
188 // Symtab block header: [num entries][type id number]
189 unsigned NumEntries, Typ;
190 if (read_vbr(Buf, EndBuf, NumEntries) ||
191 read_vbr(Buf, EndBuf, Typ)) return failure(true);
192 const Type *Ty = getType(Typ);
193 if (Ty == 0) return failure(true);
195 BCR_TRACE(3, "Plane Type: '" << Ty << "' with " << NumEntries <<
198 for (unsigned i = 0; i < NumEntries; ++i) {
199 // Symtab entry: [def slot #][name]
201 if (read_vbr(Buf, EndBuf, slot)) return failure(true);
203 if (read(Buf, EndBuf, Name, false)) // Not aligned...
204 return failure(true);
206 Value *D = getValue(Ty, slot, false); // Find mapping...
208 BCR_TRACE(3, "FAILED LOOKUP: Slot #" << slot << "\n");
209 return failure(true);
211 BCR_TRACE(4, "Map: '" << Name << "' to #" << slot << ":" << D;
212 if (!isa<Instruction>(D)) cerr << "\n");
214 D->setName(Name, ST);
218 if (Buf > EndBuf) return failure(true);
222 // DeclareNewGlobalValue - Patch up forward references to global values in the
223 // form of ConstantPointerRef.
225 void BytecodeParser::DeclareNewGlobalValue(GlobalValue *GV, unsigned Slot) {
226 // Check to see if there is a forward reference to this global variable...
227 // if there is, eliminate it and patch the reference to use the new def'n.
228 GlobalRefsType::iterator I = GlobalRefs.find(make_pair(GV->getType(), Slot));
230 if (I != GlobalRefs.end()) {
231 GlobalVariable *OldGV = I->second; // Get the placeholder...
232 BCR_TRACE(3, "Mutating CPPR Forward Ref!\n");
234 // Loop over all of the uses of the GlobalValue. The only thing they are
235 // allowed to be at this point is ConstantPointerRef's.
236 assert(OldGV->use_size() == 1 && "Only one reference should exist!");
237 while (!OldGV->use_empty()) {
238 User *U = OldGV->use_back(); // Must be a ConstantPointerRef...
239 ConstantPointerRef *CPPR = cast<ConstantPointerRef>(U);
240 assert(CPPR->getValue() == OldGV && "Something isn't happy");
242 BCR_TRACE(4, "Mutating Forward Ref!\n");
244 // Change the const pool reference to point to the real global variable
245 // now. This should drop a use from the OldGV.
246 CPPR->mutateReference(GV);
249 // Remove GV from the module...
250 GV->getParent()->getGlobalList().remove(OldGV);
251 delete OldGV; // Delete the old placeholder
253 // Remove the map entry for the global now that it has been created...
258 bool BytecodeParser::ParseMethod(const uchar *&Buf, const uchar *EndBuf,
260 // Clear out the local values table...
262 if (MethodSignatureList.empty()) {
263 Error = "Method found, but MethodSignatureList empty!";
264 return failure(true); // Unexpected method!
267 const PointerType *PMTy = MethodSignatureList.front().first; // PtrMeth
268 const MethodType *MTy = dyn_cast<const MethodType>(PMTy->getElementType());
269 if (MTy == 0) return failure(true); // Not ptr to method!
272 if (read_vbr(Buf, EndBuf, isInternal)) return failure(true);
274 unsigned MethSlot = MethodSignatureList.front().second;
275 MethodSignatureList.pop_front();
276 Method *M = new Method(MTy, isInternal != 0);
278 BCR_TRACE(2, "METHOD TYPE: " << MTy << "\n");
280 const MethodType::ParamTypes &Params = MTy->getParamTypes();
281 for (MethodType::ParamTypes::const_iterator It = Params.begin();
282 It != Params.end(); ++It) {
283 FunctionArgument *FA = new FunctionArgument(*It);
284 if (insertValue(FA, Values) == -1) {
285 Error = "Error reading method arguments!\n";
286 delete M; return failure(true);
288 M->getArgumentList().push_back(FA);
291 while (Buf < EndBuf) {
293 const uchar *OldBuf = Buf;
294 if (readBlock(Buf, EndBuf, Type, Size)) {
295 Error = "Error reading Method level block!";
296 delete M; return failure(true);
300 case BytecodeFormat::ConstantPool:
301 BCR_TRACE(2, "BLOCK BytecodeFormat::ConstantPool: {\n");
302 if (ParseConstantPool(Buf, Buf+Size, Values, MethodTypeValues)) {
303 delete M; return failure(true);
307 case BytecodeFormat::BasicBlock: {
308 BCR_TRACE(2, "BLOCK BytecodeFormat::BasicBlock: {\n");
310 if (ParseBasicBlock(Buf, Buf+Size, BB) ||
311 insertValue(BB, Values) == -1) {
312 delete M; return failure(true); // Parse error... :(
315 M->getBasicBlocks().push_back(BB);
319 case BytecodeFormat::SymbolTable:
320 BCR_TRACE(2, "BLOCK BytecodeFormat::SymbolTable: {\n");
321 if (ParseSymbolTable(Buf, Buf+Size, M->getSymbolTableSure())) {
322 delete M; return failure(true);
327 BCR_TRACE(2, "BLOCK <unknown>:ignored! {\n");
329 if (OldBuf > Buf) return failure(true); // Wrap around!
332 BCR_TRACE(2, "} end block\n");
334 if (align32(Buf, EndBuf)) {
335 Error = "Error aligning Method level block!";
336 delete M; // Malformed bc file, read past end of block.
337 return failure(true);
341 if (postResolveValues(LateResolveValues) ||
342 postResolveValues(LateResolveModuleValues)) {
343 Error = "Error resolving method values!";
344 delete M; return failure(true); // Unresolvable references!
347 Value *MethPHolder = getValue(PMTy, MethSlot, false);
348 assert(MethPHolder && "Something is broken no placeholder found!");
349 assert(isa<Method>(MethPHolder) && "Not a method?");
351 unsigned type; // Type slot
352 assert(!getTypeSlot(MTy, type) && "How can meth type not exist?");
353 getTypeSlot(PMTy, type);
355 C->getFunctionList().push_back(M);
357 // Replace placeholder with the real method pointer...
358 ModuleValues[type][MethSlot] = M;
360 // Clear out method level types...
361 MethodTypeValues.clear();
363 // If anyone is using the placeholder make them use the real method instead
364 MethPHolder->replaceAllUsesWith(M);
366 // We don't need the placeholder anymore!
369 // If the method is empty, we don't need the method argument entries...
371 M->getArgumentList().delete_all();
373 DeclareNewGlobalValue(M, MethSlot);
378 bool BytecodeParser::ParseModuleGlobalInfo(const uchar *&Buf, const uchar *End,
380 if (!MethodSignatureList.empty()) {
381 Error = "Two ModuleGlobalInfo packets found!";
382 return failure(true); // Two ModuleGlobal blocks?
385 // Read global variables...
387 if (read_vbr(Buf, End, VarType)) return failure(true);
388 while (VarType != Type::VoidTyID) { // List is terminated by Void
389 // VarType Fields: bit0 = isConstant, bit1 = hasInitializer,
390 // bit2 = isInternal, bit3+ = slot#
391 const Type *Ty = getType(VarType >> 3);
392 if (!Ty || !Ty->isPointerType()) {
393 Error = "Global not pointer type! Ty = " + Ty->getDescription();
394 return failure(true);
397 const PointerType *PTy = cast<const PointerType>(Ty);
398 const Type *ElTy = PTy->getElementType();
400 Constant *Initializer = 0;
401 if (VarType & 2) { // Does it have an initalizer?
402 // Do not improvise... values must have been stored in the constant pool,
403 // which should have been read before now.
406 if (read_vbr(Buf, End, InitSlot)) return failure(true);
408 Value *V = getValue(ElTy, InitSlot, false);
409 if (V == 0) return failure(true);
410 Initializer = cast<Constant>(V);
413 // Create the global variable...
414 GlobalVariable *GV = new GlobalVariable(ElTy, VarType & 1, VarType & 4,
416 int DestSlot = insertValue(GV, ModuleValues);
417 if (DestSlot == -1) return failure(true);
419 Mod->getGlobalList().push_back(GV);
421 DeclareNewGlobalValue(GV, unsigned(DestSlot));
423 BCR_TRACE(2, "Global Variable of type: " << PTy->getDescription()
424 << " into slot #" << DestSlot << "\n");
426 if (read_vbr(Buf, End, VarType)) return failure(true);
429 // Read the method signatures for all of the methods that are coming, and
430 // create fillers in the Value tables.
431 unsigned MethSignature;
432 if (read_vbr(Buf, End, MethSignature)) return failure(true);
433 while (MethSignature != Type::VoidTyID) { // List is terminated by Void
434 const Type *Ty = getType(MethSignature);
435 if (!Ty || !isa<PointerType>(Ty) ||
436 !isa<MethodType>(cast<PointerType>(Ty)->getElementType())) {
437 Error = "Method not ptr to meth type! Ty = " + Ty->getDescription();
438 return failure(true);
441 // We create methods by passing the underlying MethodType to create...
442 Ty = cast<PointerType>(Ty)->getElementType();
444 // When the ModuleGlobalInfo section is read, we load the type of each
445 // method and the 'ModuleValues' slot that it lands in. We then load a
446 // placeholder into its slot to reserve it. When the method is loaded, this
447 // placeholder is replaced.
449 // Insert the placeholder...
450 Value *Val = new MethPHolder(Ty, 0);
451 if (insertValue(Val, ModuleValues) == -1) return failure(true);
453 // Figure out which entry of its typeslot it went into...
455 if (getTypeSlot(Val->getType(), TypeSlot)) return failure(true);
457 unsigned SlotNo = ModuleValues[TypeSlot].size()-1;
459 // Keep track of this information in a linked list that is emptied as
460 // methods are loaded...
462 MethodSignatureList.push_back(
463 make_pair(cast<const PointerType>(Val->getType()), SlotNo));
464 if (read_vbr(Buf, End, MethSignature)) return failure(true);
465 BCR_TRACE(2, "Method of type: " << Ty << "\n");
468 if (align32(Buf, End)) return failure(true);
470 // This is for future proofing... in the future extra fields may be added that
471 // we don't understand, so we transparently ignore them.
477 bool BytecodeParser::ParseModule(const uchar *Buf, const uchar *EndBuf,
481 if (readBlock(Buf, EndBuf, Type, Size)) return failure(true);
482 if (Type != BytecodeFormat::Module || Buf+Size != EndBuf) {
483 Error = "Expected Module packet!";
484 return failure(true); // Hrm, not a class?
487 BCR_TRACE(0, "BLOCK BytecodeFormat::Module: {\n");
488 MethodSignatureList.clear(); // Just in case...
490 // Read into instance variables...
491 if (read_vbr(Buf, EndBuf, FirstDerivedTyID)) return failure(true);
492 if (align32(Buf, EndBuf)) return failure(true);
493 BCR_TRACE(1, "FirstDerivedTyID = " << FirstDerivedTyID << "\n");
495 TheModule = C = new Module();
496 while (Buf < EndBuf) {
497 const uchar *OldBuf = Buf;
498 if (readBlock(Buf, EndBuf, Type, Size)) { delete C; return failure(true); }
500 case BytecodeFormat::ConstantPool:
501 BCR_TRACE(1, "BLOCK BytecodeFormat::ConstantPool: {\n");
502 if (ParseConstantPool(Buf, Buf+Size, ModuleValues, ModuleTypeValues)) {
503 delete C; return failure(true);
507 case BytecodeFormat::ModuleGlobalInfo:
508 BCR_TRACE(1, "BLOCK BytecodeFormat::ModuleGlobalInfo: {\n");
510 if (ParseModuleGlobalInfo(Buf, Buf+Size, C)) {
511 delete C; return failure(true);
515 case BytecodeFormat::Method: {
516 BCR_TRACE(1, "BLOCK BytecodeFormat::Method: {\n");
517 if (ParseMethod(Buf, Buf+Size, C)) {
518 delete C; return failure(true); // Error parsing method
523 case BytecodeFormat::SymbolTable:
524 BCR_TRACE(1, "BLOCK BytecodeFormat::SymbolTable: {\n");
525 if (ParseSymbolTable(Buf, Buf+Size, C->getSymbolTableSure())) {
526 delete C; return failure(true);
531 Error = "Expected Module Block!";
533 if (OldBuf > Buf) return failure(true); // Wrap around!
536 BCR_TRACE(1, "} end block\n");
537 if (align32(Buf, EndBuf)) { delete C; return failure(true); }
540 if (!MethodSignatureList.empty()) { // Expected more methods!
541 Error = "Method expected, but bytecode stream at end!";
542 return failure(true);
545 BCR_TRACE(0, "} end block\n\n");
549 Module *BytecodeParser::ParseBytecode(const uchar *Buf, const uchar *EndBuf) {
550 LateResolveValues.clear();
552 // Read and check signature...
553 if (read(Buf, EndBuf, Sig) ||
554 Sig != ('l' | ('l' << 8) | ('v' << 16) | 'm' << 24)) {
555 Error = "Invalid bytecode signature!";
556 return failure<Module*>(0); // Invalid signature!
560 if (ParseModule(Buf, EndBuf, Result)) return 0;
565 Module *ParseBytecodeBuffer(const uchar *Buffer, unsigned Length) {
566 BytecodeParser Parser;
567 return Parser.ParseBytecode(Buffer, Buffer+Length);
570 // Parse and return a class file...
572 Module *ParseBytecodeFile(const std::string &Filename, std::string *ErrorStr) {
576 if (Filename != std::string("-")) { // Read from a file...
577 int FD = open(Filename.c_str(), O_RDONLY);
579 if (ErrorStr) *ErrorStr = "Error opening file!";
580 return failure<Module*>(0);
583 if (fstat(FD, &StatBuf) == -1) { close(FD); return failure<Module*>(0); }
585 int Length = StatBuf.st_size;
587 if (ErrorStr) *ErrorStr = "Error stat'ing file!";
588 close(FD); return failure<Module*>(0);
590 uchar *Buffer = (uchar*)mmap(0, Length, PROT_READ,
592 if (Buffer == (uchar*)-1) {
593 if (ErrorStr) *ErrorStr = "Error mmapping file!";
594 close(FD); return failure<Module*>(0);
597 BytecodeParser Parser;
598 Result = Parser.ParseBytecode(Buffer, Buffer+Length);
600 munmap((char*)Buffer, Length);
602 if (ErrorStr) *ErrorStr = Parser.getError();
603 } else { // Read from stdin
606 uchar Buffer[4096], *FileData = 0;
607 while ((BlockSize = read(0, Buffer, 4))) {
608 if (BlockSize == -1) { free(FileData); return failure<Module*>(0); }
610 FileData = (uchar*)realloc(FileData, FileSize+BlockSize);
611 memcpy(FileData+FileSize, Buffer, BlockSize);
612 FileSize += BlockSize;
616 if (ErrorStr) *ErrorStr = "Standard Input empty!";
617 free(FileData); return failure<Module*>(0);
622 uchar *Buf = (uchar*)mmap(0, FileSize, PROT_READ|PROT_WRITE,
623 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
624 assert((Buf != (uchar*)-1) && "mmap returned error!");
625 memcpy(Buf, FileData, FileSize);
628 uchar *Buf = FileData;
631 BytecodeParser Parser;
632 Result = Parser.ParseBytecode(Buf, Buf+FileSize);
635 munmap((char*)Buf, FileSize); // Free mmap'd data area
637 free(FileData); // Free realloc'd block of memory
640 if (ErrorStr) *ErrorStr = Parser.getError();