1 //===- Reader.cpp - Code to read bytecode files -----------------------------===
3 // This library implements the functionality defined in llvm/Bytecode/Reader.h
5 // Note that this library should be as fast as possible, reentrant, and
8 // TODO: Make error message outputs be configurable depending on an option?
9 // TODO: Allow passing in an option to ignore the symbol table
11 //===------------------------------------------------------------------------===
13 #include "llvm/Bytecode/Reader.h"
14 #include "llvm/Bytecode/Format.h"
15 #include "llvm/Module.h"
16 #include "llvm/BasicBlock.h"
17 #include "llvm/DerivedTypes.h"
18 #include "llvm/ConstPoolVals.h"
19 #include "llvm/iOther.h"
20 #include "ReaderInternals.h"
21 #include <sys/types.h>
28 bool BytecodeParser::getTypeSlot(const Type *Ty, unsigned &Slot) {
29 if (Ty->isPrimitiveType()) {
30 Slot = Ty->getPrimitiveID();
32 TypeMapType::iterator I = TypeMap.find(Ty);
33 if (I == TypeMap.end()) return true; // Didn't find type!
36 //cerr << "getTypeSlot '" << Ty->getName() << "' = " << Slot << endl;
40 const Type *BytecodeParser::getType(unsigned ID) {
41 const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID);
44 //cerr << "Looking up Type ID: " << ID << endl;
46 const Value *D = getValue(Type::TypeTy, ID, false);
49 assert(D->getType() == Type::TypeTy &&
50 D->getValueType() == Value::ConstantVal);
53 return ((const ConstPoolType*)D)->getValue();;
56 bool BytecodeParser::insertValue(Value *Def, vector<ValueList> &ValueTab) {
58 if (getTypeSlot(Def->getType(), type)) return true;
60 if (ValueTab.size() <= type)
61 ValueTab.resize(type+1, ValueList());
63 //cerr << "insertValue Values[" << type << "][" << ValueTab[type].size()
64 // << "] = " << Def << endl;
66 if (type == Type::TypeTyID && Def->getValueType() == Value::ConstantVal) {
67 const Type *Ty = ((const ConstPoolType*)Def)->getValue();
68 unsigned ValueOffset = FirstDerivedTyID;
70 if (&ValueTab == &Values) // Take into consideration module level types
71 ValueOffset += ModuleValues[type].size();
73 if (TypeMap.find(Ty) == TypeMap.end())
74 TypeMap[Ty] = ValueTab[type].size()+ValueOffset;
77 ValueTab[type].push_back(Def);
82 Value *BytecodeParser::getValue(const Type *Ty, unsigned oNum, bool Create) {
84 unsigned type; // The type plane it lives in...
86 if (getTypeSlot(Ty, type)) return 0; // TODO: true
88 if (type == Type::TypeTyID) { // The 'type' plane has implicit values
89 const Type *T = Type::getPrimitiveType((Type::PrimitiveID)Num);
90 if (T) return (Value*)T; // Asked for a primitive type...
92 // Otherwise, derived types need offset...
93 Num -= FirstDerivedTyID;
96 if (ModuleValues.size() > type) {
97 if (ModuleValues[type].size() > Num)
98 return ModuleValues[type][Num];
99 Num -= ModuleValues[type].size();
102 if (Values.size() > type && Values[type].size() > Num)
103 return Values[type][Num];
105 if (!Create) return 0; // Do not create a placeholder?
108 switch (Ty->getPrimitiveID()) {
109 case Type::LabelTyID: d = new BBPHolder(Ty, oNum); break;
110 case Type::MethodTyID:
111 cerr << "Creating method pholder! : " << type << ":" << oNum << " "
112 << Ty->getName() << endl;
113 d = new MethPHolder(Ty, oNum);
114 insertValue(d, LateResolveModuleValues);
116 default: d = new DefPHolder(Ty, oNum); break;
119 assert(d != 0 && "How did we not make something?");
120 if (insertValue(d, LateResolveValues)) return 0;
124 bool BytecodeParser::postResolveValues(ValueTable &ValTab) {
126 for (unsigned ty = 0; ty < ValTab.size(); ty++) {
127 ValueList &DL = ValTab[ty];
129 while ((Size = DL.size())) {
130 unsigned IDNumber = getValueIDNumberFromPlaceHolder(DL[Size-1]);
132 Value *D = DL[Size-1];
135 Value *NewDef = getValue(D->getType(), IDNumber, false);
137 Error = true; // Unresolved thinger
138 cerr << "Unresolvable reference found: <" << D->getType()->getName()
139 << ">:" << IDNumber << "!\n";
141 // Fixup all of the uses of this placeholder def...
142 D->replaceAllUsesWith(NewDef);
144 // Now that all the uses are gone, delete the placeholder...
145 // If we couldn't find a def (error case), then leak a little
146 delete D; // memory, 'cause otherwise we can't remove all uses!
154 bool BytecodeParser::ParseBasicBlock(const uchar *&Buf, const uchar *EndBuf,
156 BB = new BasicBlock();
158 while (Buf < EndBuf) {
160 if (ParseInstruction(Buf, EndBuf, Def)) {
165 if (Def == 0) { delete BB; return true; }
166 if (insertValue(Def, Values)) { delete BB; return true; }
168 BB->getInstList().push_back(Def);
174 bool BytecodeParser::ParseSymbolTable(const uchar *&Buf, const uchar *EndBuf) {
175 while (Buf < EndBuf) {
176 // Symtab block header: [num entries][type id number]
177 unsigned NumEntries, Typ;
178 if (read_vbr(Buf, EndBuf, NumEntries) ||
179 read_vbr(Buf, EndBuf, Typ)) return true;
180 const Type *Ty = getType(Typ);
181 if (Ty == 0) return true;
183 for (unsigned i = 0; i < NumEntries; i++) {
184 // Symtab entry: [def slot #][name]
186 if (read_vbr(Buf, EndBuf, slot)) return true;
188 if (read(Buf, EndBuf, Name, false)) // Not aligned...
191 Value *D = getValue(Ty, slot, false); // Find mapping...
192 if (D == 0) return true;
201 bool BytecodeParser::ParseMethod(const uchar *&Buf, const uchar *EndBuf,
203 // Clear out the local values table...
205 if (MethodSignatureList.empty()) return true; // Unexpected method!
207 const MethodType *MTy = MethodSignatureList.front().first;
208 unsigned MethSlot = MethodSignatureList.front().second;
209 MethodSignatureList.pop_front();
210 Method *M = new Method(MTy);
212 const MethodType::ParamTypes &Params = MTy->getParamTypes();
213 for (MethodType::ParamTypes::const_iterator It = Params.begin();
214 It != Params.end(); It++) {
215 MethodArgument *MA = new MethodArgument(*It);
216 if (insertValue(MA, Values)) { delete M; return true; }
217 M->getArgumentList().push_back(MA);
220 while (Buf < EndBuf) {
222 const uchar *OldBuf = Buf;
223 if (readBlock(Buf, EndBuf, Type, Size)) { delete M; return true; }
226 case BytecodeFormat::ConstantPool:
227 if (ParseConstantPool(Buf, Buf+Size, M->getConstantPool(), Values)) {
228 cerr << "Error reading constant pool!\n";
229 delete M; return true;
233 case BytecodeFormat::BasicBlock: {
235 if (ParseBasicBlock(Buf, Buf+Size, BB) ||
236 insertValue(BB, Values)) {
237 cerr << "Error parsing basic block!\n";
238 delete M; return true; // Parse error... :(
241 M->getBasicBlocks().push_back(BB);
245 case BytecodeFormat::SymbolTable:
246 if (ParseSymbolTable(Buf, Buf+Size)) {
247 cerr << "Error reading method symbol table!\n";
248 delete M; return true;
254 if (OldBuf > Buf) return true; // Wrap around!
257 if (align32(Buf, EndBuf)) {
258 delete M; // Malformed bc file, read past end of block.
263 if (postResolveValues(LateResolveValues) ||
264 postResolveValues(LateResolveModuleValues)) {
265 delete M; return true; // Unresolvable references!
268 Value *MethPHolder = getValue(MTy, MethSlot, false);
269 assert(MethPHolder && "Something is broken no placeholder found!");
270 assert(MethPHolder->getValueType() == Value::MethodVal && "Not a method?");
272 unsigned type; // Type slot
273 assert(!getTypeSlot(MTy, type) && "How can meth type not exist?");
274 getTypeSlot(MTy, type);
276 C->getMethodList().push_back(M);
278 // Replace placeholder with the real method pointer...
279 ModuleValues[type][MethSlot] = M;
281 // If anyone is using the placeholder make them use the real method instead
282 MethPHolder->replaceAllUsesWith(M);
284 // We don't need the placeholder anymore!
290 bool BytecodeParser::ParseModuleGlobalInfo(const uchar *&Buf, const uchar *End,
293 if (!MethodSignatureList.empty()) return true; // Two ModuleGlobal blocks?
295 // Read the method signatures for all of the methods that are coming, and
296 // create fillers in the Value tables.
297 unsigned MethSignature;
298 if (read_vbr(Buf, End, MethSignature)) return true;
299 while (MethSignature != Type::VoidTyID) { // List is terminated by Void
300 const Type *Ty = getType(MethSignature);
301 if (!Ty || !Ty->isMethodType()) {
302 cerr << "Method not meth type! ";
303 if (Ty) cerr << Ty->getName(); else cerr << MethSignature; cerr << endl;
307 // When the ModuleGlobalInfo section is read, we load the type of each method
308 // and the 'ModuleValues' slot that it lands in. We then load a placeholder
309 // into its slot to reserve it. When the method is loaded, this placeholder
312 // Insert the placeholder...
313 Value *Def = new MethPHolder(Ty, 0);
314 insertValue(Def, ModuleValues);
316 // Figure out which entry of its typeslot it went into...
318 if (getTypeSlot(Def->getType(), TypeSlot)) return true;
320 unsigned SlotNo = ModuleValues[TypeSlot].size()-1;
322 // Keep track of this information in a linked list that is emptied as
323 // methods are loaded...
325 MethodSignatureList.push_back(make_pair((const MethodType*)Ty, SlotNo));
326 if (read_vbr(Buf, End, MethSignature)) return true;
329 if (align32(Buf, End)) return true;
331 // This is for future proofing... in the future extra fields may be added that
332 // we don't understand, so we transparently ignore them.
338 bool BytecodeParser::ParseModule(const uchar *Buf, const uchar *EndBuf,
342 if (readBlock(Buf, EndBuf, Type, Size)) return true;
343 if (Type != BytecodeFormat::Module || Buf+Size != EndBuf)
344 return true; // Hrm, not a class?
346 MethodSignatureList.clear(); // Just in case...
348 // Read into instance variables...
349 if (read_vbr(Buf, EndBuf, FirstDerivedTyID)) return true;
350 if (align32(Buf, EndBuf)) return true;
354 while (Buf < EndBuf) {
355 const uchar *OldBuf = Buf;
356 if (readBlock(Buf, EndBuf, Type, Size)) { delete C; return true; }
358 case BytecodeFormat::ModuleGlobalInfo:
359 if (ParseModuleGlobalInfo(Buf, Buf+Size, C)) {
360 cerr << "Error reading class global info section!\n";
361 delete C; return true;
365 case BytecodeFormat::ConstantPool:
366 if (ParseConstantPool(Buf, Buf+Size, C->getConstantPool(), ModuleValues)) {
367 cerr << "Error reading class constant pool!\n";
368 delete C; return true;
372 case BytecodeFormat::Method: {
373 if (ParseMethod(Buf, Buf+Size, C)) {
374 delete C; return true; // Error parsing method
379 case BytecodeFormat::SymbolTable:
380 if (ParseSymbolTable(Buf, Buf+Size)) {
381 cerr << "Error reading class symbol table!\n";
382 delete C; return true;
387 cerr << "Unknown class block: " << Type << endl;
389 if (OldBuf > Buf) return true; // Wrap around!
392 if (align32(Buf, EndBuf)) { delete C; return true; }
395 if (!MethodSignatureList.empty()) // Expected more methods!
400 Module *BytecodeParser::ParseBytecode(const uchar *Buf, const uchar *EndBuf) {
401 LateResolveValues.clear();
403 // Read and check signature...
404 if (read(Buf, EndBuf, Sig) ||
405 Sig != ('l' | ('l' << 8) | ('v' << 16) | 'm' << 24))
406 return 0; // Invalid signature!
409 if (ParseModule(Buf, EndBuf, Result)) return 0;
414 Module *ParseBytecodeBuffer(const uchar *Buffer, unsigned Length) {
415 BytecodeParser Parser;
416 return Parser.ParseBytecode(Buffer, Buffer+Length);
419 // Parse and return a class file...
421 Module *ParseBytecodeFile(const string &Filename) {
425 if (Filename != string("-")) { // Read from a file...
426 int FD = open(Filename.data(), O_RDONLY);
427 if (FD == -1) return 0;
429 if (fstat(FD, &StatBuf) == -1) { close(FD); return 0; }
431 int Length = StatBuf.st_size;
432 if (Length == 0) { close(FD); return 0; }
433 uchar *Buffer = (uchar*)mmap(0, Length, PROT_READ,
435 if (Buffer == (uchar*)-1) { close(FD); return 0; }
437 BytecodeParser Parser;
438 Result = Parser.ParseBytecode(Buffer, Buffer+Length);
440 munmap((char*)Buffer, Length);
442 } else { // Read from stdin
445 uchar Buffer[4096], *FileData = 0;
446 while ((BlockSize = read(0, Buffer, 4))) {
447 if (BlockSize == -1) { free(FileData); return 0; }
449 FileData = (uchar*)realloc(FileData, FileSize+BlockSize);
450 memcpy(FileData+FileSize, Buffer, BlockSize);
451 FileSize += BlockSize;
454 if (FileSize == 0) { free(FileData); return 0; }
458 uchar *Buf = (uchar*)mmap(0, FileSize, PROT_READ|PROT_WRITE,
459 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
460 assert((Buf != (uchar*)-1) && "mmap returned error!");
462 memcpy(Buf, FileData, FileSize);
464 uchar *Buf = FileData;
467 BytecodeParser Parser;
468 Result = Parser.ParseBytecode(Buf, Buf+FileSize);
471 munmap((char*)Buf, FileSize); // Free mmap'd data area
473 free(FileData); // Free realloc'd block of memory