1 //===- LevelRaise.cpp - Code to change LLVM to higher level -----------------=//
3 // This file implements the 'raising' part of the LevelChange API. This is
4 // useful because, in general, it makes the LLVM code terser and easier to
5 // analyze. Note that it is good to run DCE after doing this transformation.
7 // Eliminate silly things in the source that do not effect the level, but do
10 // - getelementptr/load & getelementptr/store are folded into a direct
12 // - Convert this code (for both alloca and malloc):
13 // %reg110 = shl uint %n, ubyte 2 ;;<uint>
14 // %reg108 = alloca ubyte, uint %reg110 ;;<ubyte*>
15 // %cast76 = cast ubyte* %reg108 to uint* ;;<uint*>
16 // To: %cast76 = alloca uint, uint %n
17 // Convert explicit addressing to use getelementptr instruction where possible
20 // Convert explicit addressing on pointers to use getelementptr instruction.
21 // - If a pointer is used by arithmetic operation, insert an array casted
22 // version into the source program, only for the following pointer types:
23 // * Method argument pointers
24 // - Pointers returned by alloca or malloc
25 // - Pointers returned by function calls
26 // - If a pointer is indexed with a value scaled by a constant size equal
27 // to the element size of the array, the expression is replaced with a
28 // getelementptr instruction.
30 //===----------------------------------------------------------------------===//
32 #include "llvm/Transforms/LevelChange.h"
33 #include "llvm/Method.h"
34 #include "llvm/Support/STLExtras.h"
35 #include "llvm/iOther.h"
36 #include "llvm/iMemory.h"
37 #include "llvm/ConstPoolVals.h"
38 #include "llvm/Target/TargetData.h"
42 #include "llvm/Assembly/Writer.h"
44 //#define DEBUG_PEEPHOLE_INSTS 1
46 #ifdef DEBUG_PEEPHOLE_INSTS
47 #define PRINT_PEEPHOLE(ID, NUM, I) \
48 cerr << "Inst P/H " << ID << "[" << NUM << "] " << I;
50 #define PRINT_PEEPHOLE(ID, NUM, I)
53 #define PRINT_PEEPHOLE1(ID, I1) do { PRINT_PEEPHOLE(ID, 0, I1); } while (0)
54 #define PRINT_PEEPHOLE2(ID, I1, I2) \
55 do { PRINT_PEEPHOLE(ID, 0, I1); PRINT_PEEPHOLE(ID, 1, I2); } while (0)
56 #define PRINT_PEEPHOLE3(ID, I1, I2, I3) \
57 do { PRINT_PEEPHOLE(ID, 0, I1); PRINT_PEEPHOLE(ID, 1, I2); \
58 PRINT_PEEPHOLE(ID, 2, I3); } while (0)
61 // TargetData Hack: Eventually we will have annotations given to us by the
62 // backend so that we know stuff about type size and alignments. For now
63 // though, just use this, because it happens to match the model that GCC uses.
65 const TargetData TD("LevelRaise: Should be GCC though!");
68 // losslessCastableTypes - Return true if the types are bitwise equivalent.
69 // This predicate returns true if it is possible to cast from one type to
70 // another without gaining or losing precision, or altering the bits in any way.
72 static bool losslessCastableTypes(const Type *T1, const Type *T2) {
73 assert(T1->isPrimitiveType() || isa<PointerType>(T1));
74 assert(T2->isPrimitiveType() || isa<PointerType>(T2));
76 if (T1->getPrimitiveID() == T2->getPrimitiveID())
77 return true; // Handles identity cast, and cast of differing pointer types
79 // Now we know that they are two differing primitive or pointer types
80 switch (T1->getPrimitiveID()) {
81 case Type::UByteTyID: return T2 == Type::SByteTy;
82 case Type::SByteTyID: return T2 == Type::UByteTy;
83 case Type::UShortTyID: return T2 == Type::ShortTy;
84 case Type::ShortTyID: return T2 == Type::UShortTy;
85 case Type::UIntTyID: return T2 == Type::IntTy;
86 case Type::IntTyID: return T2 == Type::UIntTy;
89 case Type::PointerTyID:
90 return T2 == Type::ULongTy || T2 == Type::LongTy ||
91 T2->getPrimitiveID() == Type::PointerTyID;
93 return false; // Other types have no identity values
98 // isReinterpretingCast - Return true if the cast instruction specified will
99 // cause the operand to be "reinterpreted". A value is reinterpreted if the
100 // cast instruction would cause the underlying bits to change.
102 static inline bool isReinterpretingCast(const CastInst *CI) {
103 return !losslessCastableTypes(CI->getOperand(0)->getType(), CI->getType());
107 // getPointedToStruct - If the argument is a pointer type, and the pointed to
108 // value is a struct type, return the struct type, else return null.
110 static const StructType *getPointedToStruct(const Type *Ty) {
111 const PointerType *PT = dyn_cast<PointerType>(Ty);
112 return PT ? dyn_cast<StructType>(PT->getValueType()) : 0;
116 // getStructOffsetType - Return a vector of offsets that are to be used to index
117 // into the specified struct type to get as close as possible to index as we
118 // can. Note that it is possible that we cannot get exactly to Offset, in which
119 // case we update offset to be the offset we actually obtained. The resultant
120 // leaf type is returned.
122 static const Type *getStructOffsetType(const Type *Ty, unsigned &Offset,
123 vector<ConstPoolVal*> &Offsets) {
124 if (!isa<StructType>(Ty)) {
125 Offset = 0; // Return the offset that we were able to acheive
126 return Ty; // Return the leaf type
129 assert(Offset < TD.getTypeSize(Ty) && "Offset not in struct!");
130 const StructType *STy = cast<StructType>(Ty);
131 const StructLayout *SL = TD.getStructLayout(STy);
133 // This loop terminates always on a 0 <= i < MemberOffsets.size()
135 for (i = 0; i < SL->MemberOffsets.size()-1; ++i)
136 if (Offset >= SL->MemberOffsets[i] && Offset < SL->MemberOffsets[i+1])
139 assert(Offset >= SL->MemberOffsets[i] && Offset < SL->MemberOffsets[i+1]);
141 // Make sure to save the current index...
142 Offsets.push_back(ConstPoolUInt::get(Type::UByteTy, i));
144 unsigned SubOffs = Offset - SL->MemberOffsets[i];
145 const Type *LeafTy = getStructOffsetType(STy->getElementTypes()[i], SubOffs,
147 Offset = SL->MemberOffsets[i] + SubOffs;
153 // ReplaceInstWithValue - Replace all uses of an instruction (specified by BI)
154 // with a value, then remove and delete the original instruction.
156 static void ReplaceInstWithValue(BasicBlock::InstListType &BIL,
157 BasicBlock::iterator &BI, Value *V) {
158 Instruction *I = *BI;
159 // Replaces all of the uses of the instruction with uses of the value
160 I->replaceAllUsesWith(V);
162 // Remove the unneccesary instruction now...
165 // Make sure to propogate a name if there is one already...
166 if (I->hasName() && !V->hasName())
167 V->setName(I->getName(), BIL.getParent()->getSymbolTable());
169 // Remove the dead instruction now...
174 // ReplaceInstWithInst - Replace the instruction specified by BI with the
175 // instruction specified by I. The original instruction is deleted and BI is
176 // updated to point to the new instruction.
178 static void ReplaceInstWithInst(BasicBlock::InstListType &BIL,
179 BasicBlock::iterator &BI, Instruction *I) {
180 assert(I->getParent() == 0 &&
181 "ReplaceInstWithInst: Instruction already inserted into basic block!");
183 // Insert the new instruction into the basic block...
184 BI = BIL.insert(BI, I)+1;
186 // Replace all uses of the old instruction, and delete it.
187 ReplaceInstWithValue(BIL, BI, I);
189 // Reexamine the instruction just inserted next time around the cleanup pass
195 // ExpressionConvertableToType - Return true if it is possible
196 static bool ExpressionConvertableToType(Value *V, const Type *Ty) {
197 Instruction *I = dyn_cast<Instruction>(V);
198 if (I == 0) return false; // Noninstructions can't convert
199 if (I->getType() == Ty) return false; // Expression already correct type!
201 switch (I->getOpcode()) {
202 case Instruction::Cast:
203 // We can convert the expr if the cast destination type is losslessly
204 // convertable to the requested type.
205 return losslessCastableTypes(Ty, I->getType());
207 case Instruction::Add:
208 case Instruction::Sub:
209 return ExpressionConvertableToType(I->getOperand(0), Ty) &&
210 ExpressionConvertableToType(I->getOperand(1), Ty);
211 case Instruction::Shl:
212 case Instruction::Shr:
213 return ExpressionConvertableToType(I->getOperand(0), Ty);
219 static Instruction *ConvertExpressionToType(Value *V, const Type *Ty) {
220 Instruction *I = cast<Instruction>(V);
221 assert(ExpressionConvertableToType(I, Ty) && "Inst is not convertable!");
222 BasicBlock *BB = I->getParent();
223 BasicBlock::InstListType &BIL = BB->getInstList();
224 string Name = I->getName(); if (!Name.empty()) I->setName("");
225 Instruction *Res; // Result of conversion
227 //cerr << endl << endl << "Type:\t" << Ty << "\nInst: " << I << "BB Before: " << BB << endl;
229 switch (I->getOpcode()) {
230 case Instruction::Cast:
231 Res = new CastInst(I->getOperand(0), Ty, Name);
234 case Instruction::Add:
235 case Instruction::Sub:
236 Res = BinaryOperator::create(cast<BinaryOperator>(I)->getOpcode(),
237 ConvertExpressionToType(I->getOperand(0), Ty),
238 ConvertExpressionToType(I->getOperand(1), Ty),
242 case Instruction::Shl:
243 case Instruction::Shr:
244 Res = new ShiftInst(cast<ShiftInst>(I)->getOpcode(),
245 ConvertExpressionToType(I->getOperand(0), Ty),
246 I->getOperand(1), Name);
250 assert(0 && "Expression convertable, but don't know how to convert?");
254 BasicBlock::iterator It = find(BIL.begin(), BIL.end(), I);
255 assert(It != BIL.end() && "Instruction not in own basic block??");
258 //cerr << "RInst: " << Res << "BB After: " << BB << endl << endl;
265 // DoInsertArrayCast - If the argument value has a pointer type, and if the
266 // argument value is used as an array, insert a cast before the specified
267 // basic block iterator that casts the value to an array pointer. Return the
268 // new cast instruction (in the CastResult var), or null if no cast is inserted.
270 static bool DoInsertArrayCast(Method *CurMeth, Value *V, BasicBlock *BB,
271 BasicBlock::iterator &InsertBefore,
272 CastInst *&CastResult) {
273 const PointerType *ThePtrType = dyn_cast<PointerType>(V->getType());
274 if (!ThePtrType) return false;
275 bool InsertCast = false;
277 for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
278 Instruction *Inst = cast<Instruction>(*I);
279 switch (Inst->getOpcode()) {
280 default: break; // Not an interesting use...
281 case Instruction::Add: // It's being used as an array index!
282 //case Instruction::Sub:
285 case Instruction::Cast: // There is already a cast instruction!
286 if (const PointerType *PT = dyn_cast<const PointerType>(Inst->getType()))
287 if (const ArrayType *AT = dyn_cast<const ArrayType>(PT->getValueType()))
288 if (AT->getElementType() == ThePtrType->getValueType()) {
289 // Cast already exists! Return the existing one!
290 CastResult = cast<CastInst>(Inst);
291 return false; // No changes made to program though...
297 if (!InsertCast) return false; // There is no reason to insert a cast!
300 const Type *ElTy = ThePtrType->getValueType();
301 const PointerType *DestTy = PointerType::get(ArrayType::get(ElTy));
303 CastResult = new CastInst(V, DestTy);
304 BB->getInstList().insert(InsertBefore, CastResult);
305 //cerr << "Inserted cast: " << CastResult;
306 return true; // Made a change!
310 // DoInsertArrayCasts - Loop over all "incoming" values in the specified method,
311 // inserting a cast for pointer values that are used as arrays. For our
312 // purposes, an incoming value is considered to be either a value that is
313 // either a method parameter, a value created by alloca or malloc, or a value
314 // returned from a function call. All casts are kept attached to their original
315 // values through the PtrCasts map.
317 static bool DoInsertArrayCasts(Method *M, map<Value*, CastInst*> &PtrCasts) {
318 assert(!M->isExternal() && "Can't handle external methods!");
320 // Insert casts for all arguments to the function...
321 bool Changed = false;
322 BasicBlock *CurBB = M->front();
323 BasicBlock::iterator It = CurBB->begin();
324 for (Method::ArgumentListType::iterator AI = M->getArgumentList().begin(),
325 AE = M->getArgumentList().end(); AI != AE; ++AI) {
326 CastInst *TheCast = 0;
327 if (DoInsertArrayCast(M, *AI, CurBB, It, TheCast)) {
328 It = CurBB->begin(); // We might have just invalidated the iterator!
329 Changed = true; // Yes we made a change
330 ++It; // Insert next cast AFTER this one...
333 if (TheCast) // Is there a cast associated with this value?
334 PtrCasts[*AI] = TheCast; // Yes, add it to the map...
337 // TODO: insert casts for alloca, malloc, and function call results. Also,
338 // look for pointers that already have casts, to add to the map.
346 // DoElminatePointerArithmetic - Loop over each incoming pointer variable,
347 // replacing indexing arithmetic with getelementptr calls.
349 static bool DoEliminatePointerArithmetic(const pair<Value*, CastInst*> &Val) {
350 Value *V = Val.first; // The original pointer
351 CastInst *CV = Val.second; // The array casted version of the pointer...
353 for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
354 Instruction *Inst = cast<Instruction>(*I);
355 if (Inst->getOpcode() != Instruction::Add)
356 continue; // We only care about add instructions
358 BinaryOperator *Add = cast<BinaryOperator>(Inst);
360 // Make sure the array is the first operand of the add expression...
361 if (Add->getOperand(0) != V)
364 // Get the amount added to the pointer value...
365 Value *AddAmount = Add->getOperand(1);
373 // Peephole Malloc instructions: we take a look at the use chain of the
374 // malloc instruction, and try to find out if the following conditions hold:
375 // 1. The malloc is of the form: 'malloc [sbyte], uint <constant>'
376 // 2. The only users of the malloc are cast instructions
377 // 3. Of the cast instructions, there is only one destination pointer type
378 // [RTy] where the size of the pointed to object is equal to the number
379 // of bytes allocated.
381 // If these conditions hold, we convert the malloc to allocate an [RTy]
382 // element. This should be extended in the future to handle arrays. TODO
384 static bool PeepholeMallocInst(BasicBlock *BB, BasicBlock::iterator &BI) {
385 MallocInst *MI = cast<MallocInst>(*BI);
386 if (!MI->isArrayAllocation()) return false; // No array allocation?
388 ConstPoolUInt *Amt = dyn_cast<ConstPoolUInt>(MI->getArraySize());
389 if (Amt == 0 || MI->getAllocatedType() != ArrayType::get(Type::SByteTy))
392 // Get the number of bytes allocated...
393 unsigned Size = Amt->getValue();
394 const Type *ResultTy = 0;
396 // Loop over all of the uses of the malloc instruction, inspecting casts.
397 for (Value::use_iterator I = MI->use_begin(), E = MI->use_end();
399 if (!isa<CastInst>(*I)) {
400 //cerr << "\tnon" << *I;
401 return false; // A non cast user?
403 CastInst *CI = cast<CastInst>(*I);
404 //cerr << "\t" << CI;
406 // We only work on casts to pointer types for sure, be conservative
407 if (!isa<PointerType>(CI->getType())) {
408 cerr << "Found cast of malloc value to non pointer type:\n" << CI;
412 const Type *DestTy = cast<PointerType>(CI->getType())->getValueType();
413 if (TD.getTypeSize(DestTy) == Size && DestTy != ResultTy) {
414 // Does the size of the allocated type match the number of bytes
418 ResultTy = DestTy; // Keep note of this for future uses...
420 // It's overdefined! We don't know which type to convert to!
426 // If we get this far, we have either found, or not, a type that is cast to
427 // that is of the same size as the malloc instruction.
428 if (!ResultTy) return false;
430 PRINT_PEEPHOLE1("mall-refine:in ", MI);
431 ReplaceInstWithInst(BB->getInstList(), BI,
432 MI = new MallocInst(PointerType::get(ResultTy)));
433 PRINT_PEEPHOLE1("mall-refine:out", MI);
439 static bool PeepholeOptimize(BasicBlock *BB, BasicBlock::iterator &BI) {
440 Instruction *I = *BI;
441 if (I->use_size() == 0) return false;
443 if (CastInst *CI = dyn_cast<CastInst>(I)) {
444 Value *Src = CI->getOperand(0);
445 Instruction *SrcI = dyn_cast<Instruction>(Src); // Nonnull if instr source
446 const Type *DestTy = CI->getType();
448 // Check for a cast of the same type as the destination!
449 if (DestTy == Src->getType()) {
450 PRINT_PEEPHOLE1("cast-of-self-ty", CI);
451 CI->replaceAllUsesWith(Src);
452 if (!Src->hasName() && CI->hasName()) {
453 string Name = CI->getName();
454 CI->setName(""); Src->setName(Name);
459 // Check for a cast of cast, where no size information is lost...
461 if (CastInst *CSrc = dyn_cast<CastInst>(SrcI))
462 if (isReinterpretingCast(CI) + isReinterpretingCast(CSrc) < 2) {
463 // We can only do c-c elimination if, at most, one cast does a
464 // reinterpretation of the input data.
466 // If legal, make this cast refer the the original casts argument!
468 PRINT_PEEPHOLE2("cast-cast:in ", CI, CSrc);
469 CI->setOperand(0, CSrc->getOperand(0));
470 PRINT_PEEPHOLE1("cast-cast:out", CI);
474 // Check to see if it's a cast of an instruction that does not depend on the
475 // specific type of the operands to do it's job.
476 if (SrcI && !isReinterpretingCast(CI) &&
477 ExpressionConvertableToType(SrcI, DestTy)) {
478 PRINT_PEEPHOLE2("EXPR-CONV:in ", CI, SrcI);
479 CI->setOperand(0, ConvertExpressionToType(SrcI, DestTy));
480 BI = BB->begin(); // Rescan basic block. BI might be invalidated.
481 PRINT_PEEPHOLE2("EXPR-CONV:out", CI, CI->getOperand(0));
485 } else if (MallocInst *MI = dyn_cast<MallocInst>(I)) {
486 if (PeepholeMallocInst(BB, BI)) return true;
487 } else if (I->getOpcode() == Instruction::Add &&
488 isa<CastInst>(I->getOperand(1))) {
490 // Peephole optimize the following instructions:
491 // %t1 = cast ulong <const int> to {<...>} *
492 // %t2 = add {<...>} * %SP, %t1 ;; Constant must be 2nd operand
495 // %t1 = cast {<...>}* %SP to int*
496 // %t5 = cast ulong <const int> to int*
497 // %t2 = add int* %t1, %t5 ;; int is same size as field
499 // Into: %t3 = getelementptr {<...>} * %SP, <element indices>
500 // %t2 = cast <eltype> * %t3 to {<...>}*
502 Value *AddOp1 = I->getOperand(0);
503 CastInst *AddOp2 = cast<CastInst>(I->getOperand(1));
504 ConstPoolUInt *OffsetV = dyn_cast<ConstPoolUInt>(AddOp2->getOperand(0));
505 unsigned Offset = OffsetV ? OffsetV->getValue() : 0;
506 Value *SrcPtr; // Of type pointer to struct...
507 const StructType *StructTy;
509 if ((StructTy = getPointedToStruct(AddOp1->getType()))) {
510 SrcPtr = AddOp1; // Handle the first case...
511 } else if (CastInst *AddOp1c = dyn_cast<CastInst>(AddOp1)) {
512 SrcPtr = AddOp1c->getOperand(0); // Handle the second case...
513 StructTy = getPointedToStruct(SrcPtr->getType());
516 // Only proceed if we have detected all of our conditions successfully...
517 if (Offset && StructTy && SrcPtr && Offset < TD.getTypeSize(StructTy)) {
518 const StructLayout *SL = TD.getStructLayout(StructTy);
519 vector<ConstPoolVal*> Offsets;
520 unsigned ActualOffset = Offset;
521 const Type *ElTy = getStructOffsetType(StructTy, ActualOffset, Offsets);
523 if (getPointedToStruct(AddOp1->getType())) { // case 1
524 PRINT_PEEPHOLE2("add-to-gep1:in", AddOp2, I);
526 PRINT_PEEPHOLE3("add-to-gep2:in", AddOp1, AddOp2, I);
529 GetElementPtrInst *GEP = new GetElementPtrInst(SrcPtr, Offsets);
530 BI = BB->getInstList().insert(BI, GEP)+1;
532 assert(Offset-ActualOffset == 0 &&
533 "GEP to middle of element not implemented yet!");
535 ReplaceInstWithInst(BB->getInstList(), BI,
536 I = new CastInst(GEP, I->getType()));
537 PRINT_PEEPHOLE2("add-to-gep:out", GEP, I);
548 static bool DoRaisePass(Method *M) {
549 bool Changed = false;
550 for (Method::iterator MI = M->begin(), ME = M->end(); MI != ME; ++MI) {
551 BasicBlock *BB = *MI;
552 BasicBlock::InstListType &BIL = BB->getInstList();
554 for (BasicBlock::iterator BI = BB->begin(); BI != BB->end();) {
555 if (PeepholeOptimize(BB, BI))
565 // RaisePointerReferences::doit - Raise a method representation to a higher
568 bool RaisePointerReferences::doit(Method *M) {
569 if (M->isExternal()) return false;
570 bool Changed = false;
572 while (DoRaisePass(M)) Changed = true;
574 // PtrCasts - Keep a mapping between the pointer values (the key of the
575 // map), and the cast to array pointer (the value) in this map. This is
576 // used when converting pointer math into array addressing.
578 map<Value*, CastInst*> PtrCasts;
580 // Insert casts for all incoming pointer values. Keep track of those casts
581 // and the identified incoming values in the PtrCasts map.
583 Changed |= DoInsertArrayCasts(M, PtrCasts);
585 // Loop over each incoming pointer variable, replacing indexing arithmetic
586 // with getelementptr calls.
588 Changed |= reduce_apply_bool(PtrCasts.begin(), PtrCasts.end(),
589 ptr_fun(DoEliminatePointerArithmetic));