Revert 101465, it broke internal OpenGL testing.
[oota-llvm.git] / lib / CodeGen / IntrinsicLowering.cpp
1 //===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the IntrinsicLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Constants.h"
15 #include "llvm/DerivedTypes.h"
16 #include "llvm/Module.h"
17 #include "llvm/Type.h"
18 #include "llvm/CodeGen/IntrinsicLowering.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/IRBuilder.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include "llvm/Target/TargetData.h"
23 #include "llvm/ADT/SmallVector.h"
24 using namespace llvm;
25
26 template <class ArgIt>
27 static void EnsureFunctionExists(Module &M, const char *Name,
28                                  ArgIt ArgBegin, ArgIt ArgEnd,
29                                  const Type *RetTy) {
30   // Insert a correctly-typed definition now.
31   std::vector<const Type *> ParamTys;
32   for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
33     ParamTys.push_back(I->getType());
34   M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
35 }
36
37 static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
38                                     const char *FName,
39                                     const char *DName, const char *LDName) {
40   // Insert definitions for all the floating point types.
41   switch((int)Fn->arg_begin()->getType()->getTypeID()) {
42   case Type::FloatTyID:
43     EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(),
44                          Type::getFloatTy(M.getContext()));
45     break;
46   case Type::DoubleTyID:
47     EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(),
48                          Type::getDoubleTy(M.getContext()));
49     break;
50   case Type::X86_FP80TyID:
51   case Type::FP128TyID:
52   case Type::PPC_FP128TyID:
53     EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(),
54                          Fn->arg_begin()->getType());
55     break;
56   }
57 }
58
59 /// ReplaceCallWith - This function is used when we want to lower an intrinsic
60 /// call to a call of an external function.  This handles hard cases such as
61 /// when there was already a prototype for the external function, and if that
62 /// prototype doesn't match the arguments we expect to pass in.
63 template <class ArgIt>
64 static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
65                                  ArgIt ArgBegin, ArgIt ArgEnd,
66                                  const Type *RetTy) {
67   // If we haven't already looked up this function, check to see if the
68   // program already contains a function with this name.
69   Module *M = CI->getParent()->getParent()->getParent();
70   // Get or insert the definition now.
71   std::vector<const Type *> ParamTys;
72   for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
73     ParamTys.push_back((*I)->getType());
74   Constant* FCache = M->getOrInsertFunction(NewFn,
75                                   FunctionType::get(RetTy, ParamTys, false));
76
77   IRBuilder<> Builder(CI->getParent(), CI);
78   SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
79   CallInst *NewCI = Builder.CreateCall(FCache, Args.begin(), Args.end());
80   NewCI->setName(CI->getName());
81   if (!CI->use_empty())
82     CI->replaceAllUsesWith(NewCI);
83   return NewCI;
84 }
85
86 void IntrinsicLowering::AddPrototypes(Module &M) {
87   LLVMContext &Context = M.getContext();
88   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
89     if (I->isDeclaration() && !I->use_empty())
90       switch (I->getIntrinsicID()) {
91       default: break;
92       case Intrinsic::setjmp:
93         EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
94                              Type::getInt32Ty(M.getContext()));
95         break;
96       case Intrinsic::longjmp:
97         EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
98                              Type::getVoidTy(M.getContext()));
99         break;
100       case Intrinsic::siglongjmp:
101         EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
102                              Type::getVoidTy(M.getContext()));
103         break;
104       case Intrinsic::memcpy:
105         M.getOrInsertFunction("memcpy",
106           Type::getInt8PtrTy(Context),
107                               Type::getInt8PtrTy(Context), 
108                               Type::getInt8PtrTy(Context), 
109                               TD.getIntPtrType(Context), (Type *)0);
110         break;
111       case Intrinsic::memmove:
112         M.getOrInsertFunction("memmove",
113           Type::getInt8PtrTy(Context),
114                               Type::getInt8PtrTy(Context), 
115                               Type::getInt8PtrTy(Context), 
116                               TD.getIntPtrType(Context), (Type *)0);
117         break;
118       case Intrinsic::memset:
119         M.getOrInsertFunction("memset",
120           Type::getInt8PtrTy(Context),
121                               Type::getInt8PtrTy(Context), 
122                               Type::getInt32Ty(M.getContext()), 
123                               TD.getIntPtrType(Context), (Type *)0);
124         break;
125       case Intrinsic::sqrt:
126         EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
127         break;
128       case Intrinsic::sin:
129         EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl");
130         break;
131       case Intrinsic::cos:
132         EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl");
133         break;
134       case Intrinsic::pow:
135         EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl");
136         break;
137       case Intrinsic::log:
138         EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl");
139         break;
140       case Intrinsic::log2:
141         EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l");
142         break;
143       case Intrinsic::log10:
144         EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l");
145         break;
146       case Intrinsic::exp:
147         EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl");
148         break;
149       case Intrinsic::exp2:
150         EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l");
151         break;
152       }
153 }
154
155 /// LowerBSWAP - Emit the code to lower bswap of V before the specified
156 /// instruction IP.
157 static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
158   assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!");
159
160   unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
161   
162   IRBuilder<> Builder(IP->getParent(), IP);
163
164   switch(BitSize) {
165   default: llvm_unreachable("Unhandled type size of value to byteswap!");
166   case 16: {
167     Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
168                                     "bswap.2");
169     Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
170                                      "bswap.1");
171     V = Builder.CreateOr(Tmp1, Tmp2, "bswap.i16");
172     break;
173   }
174   case 32: {
175     Value *Tmp4 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
176                                     "bswap.4");
177     Value *Tmp3 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
178                                     "bswap.3");
179     Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
180                                      "bswap.2");
181     Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24),
182                                      "bswap.1");
183     Tmp3 = Builder.CreateAnd(Tmp3,
184                          ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000),
185                              "bswap.and3");
186     Tmp2 = Builder.CreateAnd(Tmp2,
187                            ConstantInt::get(Type::getInt32Ty(Context), 0xFF00),
188                              "bswap.and2");
189     Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1");
190     Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2");
191     V = Builder.CreateOr(Tmp4, Tmp2, "bswap.i32");
192     break;
193   }
194   case 64: {
195     Value *Tmp8 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 56),
196                                     "bswap.8");
197     Value *Tmp7 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 40),
198                                     "bswap.7");
199     Value *Tmp6 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
200                                     "bswap.6");
201     Value *Tmp5 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
202                                     "bswap.5");
203     Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
204                                      "bswap.4");
205     Value* Tmp3 = Builder.CreateLShr(V, 
206                                      ConstantInt::get(V->getType(), 24),
207                                      "bswap.3");
208     Value* Tmp2 = Builder.CreateLShr(V, 
209                                      ConstantInt::get(V->getType(), 40),
210                                      "bswap.2");
211     Value* Tmp1 = Builder.CreateLShr(V, 
212                                      ConstantInt::get(V->getType(), 56),
213                                      "bswap.1");
214     Tmp7 = Builder.CreateAnd(Tmp7,
215                              ConstantInt::get(Type::getInt64Ty(Context),
216                                               0xFF000000000000ULL),
217                              "bswap.and7");
218     Tmp6 = Builder.CreateAnd(Tmp6,
219                              ConstantInt::get(Type::getInt64Ty(Context),
220                                               0xFF0000000000ULL),
221                              "bswap.and6");
222     Tmp5 = Builder.CreateAnd(Tmp5,
223                         ConstantInt::get(Type::getInt64Ty(Context),
224                              0xFF00000000ULL),
225                              "bswap.and5");
226     Tmp4 = Builder.CreateAnd(Tmp4,
227                         ConstantInt::get(Type::getInt64Ty(Context),
228                              0xFF000000ULL),
229                              "bswap.and4");
230     Tmp3 = Builder.CreateAnd(Tmp3,
231                              ConstantInt::get(Type::getInt64Ty(Context),
232                              0xFF0000ULL),
233                              "bswap.and3");
234     Tmp2 = Builder.CreateAnd(Tmp2,
235                              ConstantInt::get(Type::getInt64Ty(Context),
236                              0xFF00ULL),
237                              "bswap.and2");
238     Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1");
239     Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2");
240     Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or3");
241     Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or4");
242     Tmp8 = Builder.CreateOr(Tmp8, Tmp6, "bswap.or5");
243     Tmp4 = Builder.CreateOr(Tmp4, Tmp2, "bswap.or6");
244     V = Builder.CreateOr(Tmp8, Tmp4, "bswap.i64");
245     break;
246   }
247   }
248   return V;
249 }
250
251 /// LowerCTPOP - Emit the code to lower ctpop of V before the specified
252 /// instruction IP.
253 static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
254   assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!");
255
256   static const uint64_t MaskValues[6] = {
257     0x5555555555555555ULL, 0x3333333333333333ULL,
258     0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
259     0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
260   };
261
262   IRBuilder<> Builder(IP->getParent(), IP);
263
264   unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
265   unsigned WordSize = (BitSize + 63) / 64;
266   Value *Count = ConstantInt::get(V->getType(), 0);
267
268   for (unsigned n = 0; n < WordSize; ++n) {
269     Value *PartValue = V;
270     for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize); 
271          i <<= 1, ++ct) {
272       Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
273       Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1");
274       Value *VShift = Builder.CreateLShr(PartValue,
275                                         ConstantInt::get(V->getType(), i),
276                                          "ctpop.sh");
277       Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2");
278       PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step");
279     }
280     Count = Builder.CreateAdd(PartValue, Count, "ctpop.part");
281     if (BitSize > 64) {
282       V = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 64),
283                              "ctpop.part.sh");
284       BitSize -= 64;
285     }
286   }
287
288   return Count;
289 }
290
291 /// LowerCTLZ - Emit the code to lower ctlz of V before the specified
292 /// instruction IP.
293 static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
294
295   IRBuilder<> Builder(IP->getParent(), IP);
296
297   unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
298   for (unsigned i = 1; i < BitSize; i <<= 1) {
299     Value *ShVal = ConstantInt::get(V->getType(), i);
300     ShVal = Builder.CreateLShr(V, ShVal, "ctlz.sh");
301     V = Builder.CreateOr(V, ShVal, "ctlz.step");
302   }
303
304   V = Builder.CreateNot(V);
305   return LowerCTPOP(Context, V, IP);
306 }
307
308 static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
309                                        const char *Dname,
310                                        const char *LDname) {
311   switch (CI->getOperand(1)->getType()->getTypeID()) {
312   default: llvm_unreachable("Invalid type in intrinsic");
313   case Type::FloatTyID:
314     ReplaceCallWith(Fname, CI, CI->op_begin() + 1, CI->op_end(),
315                   Type::getFloatTy(CI->getContext()));
316     break;
317   case Type::DoubleTyID:
318     ReplaceCallWith(Dname, CI, CI->op_begin() + 1, CI->op_end(),
319                   Type::getDoubleTy(CI->getContext()));
320     break;
321   case Type::X86_FP80TyID:
322   case Type::FP128TyID:
323   case Type::PPC_FP128TyID:
324     ReplaceCallWith(LDname, CI, CI->op_begin() + 1, CI->op_end(),
325                   CI->getOperand(1)->getType());
326     break;
327   }
328 }
329
330 void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
331   IRBuilder<> Builder(CI->getParent(), CI);
332   LLVMContext &Context = CI->getContext();
333
334   const Function *Callee = CI->getCalledFunction();
335   assert(Callee && "Cannot lower an indirect call!");
336
337   switch (Callee->getIntrinsicID()) {
338   case Intrinsic::not_intrinsic:
339     report_fatal_error("Cannot lower a call to a non-intrinsic function '"+
340                       Callee->getName() + "'!");
341   default:
342     report_fatal_error("Code generator does not support intrinsic function '"+
343                       Callee->getName()+"'!");
344
345     // The setjmp/longjmp intrinsics should only exist in the code if it was
346     // never optimized (ie, right out of the CFE), or if it has been hacked on
347     // by the lowerinvoke pass.  In both cases, the right thing to do is to
348     // convert the call to an explicit setjmp or longjmp call.
349   case Intrinsic::setjmp: {
350     Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(),
351                                Type::getInt32Ty(Context));
352     if (!CI->getType()->isVoidTy())
353       CI->replaceAllUsesWith(V);
354     break;
355   }
356   case Intrinsic::sigsetjmp:
357      if (!CI->getType()->isVoidTy())
358        CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
359      break;
360
361   case Intrinsic::longjmp: {
362     ReplaceCallWith("longjmp", CI, CI->op_begin() + 1, CI->op_end(),
363                     Type::getVoidTy(Context));
364     break;
365   }
366
367   case Intrinsic::siglongjmp: {
368     // Insert the call to abort
369     ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(), 
370                     Type::getVoidTy(Context));
371     break;
372   }
373   case Intrinsic::ctpop:
374     CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getOperand(1), CI));
375     break;
376
377   case Intrinsic::bswap:
378     CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getOperand(1), CI));
379     break;
380     
381   case Intrinsic::ctlz:
382     CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getOperand(1), CI));
383     break;
384
385   case Intrinsic::cttz: {
386     // cttz(x) -> ctpop(~X & (X-1))
387     Value *Src = CI->getOperand(1);
388     Value *NotSrc = Builder.CreateNot(Src);
389     NotSrc->setName(Src->getName() + ".not");
390     Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
391     SrcM1 = Builder.CreateSub(Src, SrcM1);
392     Src = LowerCTPOP(Context, Builder.CreateAnd(NotSrc, SrcM1), CI);
393     CI->replaceAllUsesWith(Src);
394     break;
395   }
396
397   case Intrinsic::stacksave:
398   case Intrinsic::stackrestore: {
399     if (!Warned)
400       errs() << "WARNING: this target does not support the llvm.stack"
401              << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
402                "save" : "restore") << " intrinsic.\n";
403     Warned = true;
404     if (Callee->getIntrinsicID() == Intrinsic::stacksave)
405       CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
406     break;
407   }
408     
409   case Intrinsic::returnaddress:
410   case Intrinsic::frameaddress:
411     errs() << "WARNING: this target does not support the llvm."
412            << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
413              "return" : "frame") << "address intrinsic.\n";
414     CI->replaceAllUsesWith(ConstantPointerNull::get(
415                                             cast<PointerType>(CI->getType())));
416     break;
417
418   case Intrinsic::prefetch:
419     break;    // Simply strip out prefetches on unsupported architectures
420
421   case Intrinsic::pcmarker:
422     break;    // Simply strip out pcmarker on unsupported architectures
423   case Intrinsic::readcyclecounter: {
424     errs() << "WARNING: this target does not support the llvm.readcyclecoun"
425            << "ter intrinsic.  It is being lowered to a constant 0\n";
426     CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
427     break;
428   }
429
430   case Intrinsic::dbg_declare:
431     break;    // Simply strip out debugging intrinsics
432
433   case Intrinsic::eh_exception:
434   case Intrinsic::eh_selector:
435     CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
436     break;
437
438   case Intrinsic::eh_typeid_for:
439     // Return something different to eh_selector.
440     CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
441     break;
442
443   case Intrinsic::var_annotation:
444     break;   // Strip out annotate intrinsic
445     
446   case Intrinsic::memcpy: {
447     const IntegerType *IntPtr = TD.getIntPtrType(Context);
448     Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
449                                         /* isSigned */ false);
450     Value *Ops[3];
451     Ops[0] = CI->getOperand(1);
452     Ops[1] = CI->getOperand(2);
453     Ops[2] = Size;
454     ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType());
455     break;
456   }
457   case Intrinsic::memmove: {
458     const IntegerType *IntPtr = TD.getIntPtrType(Context);
459     Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
460                                         /* isSigned */ false);
461     Value *Ops[3];
462     Ops[0] = CI->getOperand(1);
463     Ops[1] = CI->getOperand(2);
464     Ops[2] = Size;
465     ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType());
466     break;
467   }
468   case Intrinsic::memset: {
469     const IntegerType *IntPtr = TD.getIntPtrType(Context);
470     Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
471                                         /* isSigned */ false);
472     Value *Ops[3];
473     Ops[0] = CI->getOperand(1);
474     // Extend the amount to i32.
475     Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::getInt32Ty(Context),
476                                    /* isSigned */ false);
477     Ops[2] = Size;
478     ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType());
479     break;
480   }
481   case Intrinsic::sqrt: {
482     ReplaceFPIntrinsicWithCall(CI, "sqrtf", "sqrt", "sqrtl");
483     break;
484   }
485   case Intrinsic::log: {
486     ReplaceFPIntrinsicWithCall(CI, "logf", "log", "logl");
487     break;
488   }
489   case Intrinsic::log2: {
490     ReplaceFPIntrinsicWithCall(CI, "log2f", "log2", "log2l");
491     break;
492   }
493   case Intrinsic::log10: {
494     ReplaceFPIntrinsicWithCall(CI, "log10f", "log10", "log10l");
495     break;
496   }
497   case Intrinsic::exp: {
498     ReplaceFPIntrinsicWithCall(CI, "expf", "exp", "expl");
499     break;
500   }
501   case Intrinsic::exp2: {
502     ReplaceFPIntrinsicWithCall(CI, "exp2f", "exp2", "exp2l");
503     break;
504   }
505   case Intrinsic::pow: {
506     ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl");
507     break;
508   }
509   case Intrinsic::flt_rounds:
510      // Lower to "round to the nearest"
511      if (!CI->getType()->isVoidTy())
512        CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
513      break;
514   case Intrinsic::invariant_start:
515   case Intrinsic::lifetime_start:
516     // Discard region information.
517     CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
518     break;
519   case Intrinsic::invariant_end:
520   case Intrinsic::lifetime_end:
521     // Discard region information.
522     break;
523   }
524
525   assert(CI->use_empty() &&
526          "Lowering should have eliminated any uses of the intrinsic call!");
527   CI->eraseFromParent();
528 }