Add intrinsics for log, log2, log10, exp, exp2.
[oota-llvm.git] / lib / CodeGen / IntrinsicLowering.cpp
1 //===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the IntrinsicLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Constants.h"
15 #include "llvm/DerivedTypes.h"
16 #include "llvm/Module.h"
17 #include "llvm/Instructions.h"
18 #include "llvm/Type.h"
19 #include "llvm/CodeGen/IntrinsicLowering.h"
20 #include "llvm/Support/Streams.h"
21 #include "llvm/Target/TargetData.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/STLExtras.h"
24 using namespace llvm;
25
26 template <class ArgIt>
27 static void EnsureFunctionExists(Module &M, const char *Name,
28                                  ArgIt ArgBegin, ArgIt ArgEnd,
29                                  const Type *RetTy) {
30   // Insert a correctly-typed definition now.
31   std::vector<const Type *> ParamTys;
32   for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
33     ParamTys.push_back(I->getType());
34   M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
35 }
36
37 /// ReplaceCallWith - This function is used when we want to lower an intrinsic
38 /// call to a call of an external function.  This handles hard cases such as
39 /// when there was already a prototype for the external function, and if that
40 /// prototype doesn't match the arguments we expect to pass in.
41 template <class ArgIt>
42 static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
43                                  ArgIt ArgBegin, ArgIt ArgEnd,
44                                  const Type *RetTy, Constant *&FCache) {
45   if (!FCache) {
46     // If we haven't already looked up this function, check to see if the
47     // program already contains a function with this name.
48     Module *M = CI->getParent()->getParent()->getParent();
49     // Get or insert the definition now.
50     std::vector<const Type *> ParamTys;
51     for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
52       ParamTys.push_back((*I)->getType());
53     FCache = M->getOrInsertFunction(NewFn,
54                                     FunctionType::get(RetTy, ParamTys, false));
55   }
56
57   SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
58   CallInst *NewCI = CallInst::Create(FCache, Args.begin(), Args.end(),
59                                      CI->getName(), CI);
60   if (!CI->use_empty())
61     CI->replaceAllUsesWith(NewCI);
62   return NewCI;
63 }
64
65 void IntrinsicLowering::AddPrototypes(Module &M) {
66   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
67     if (I->isDeclaration() && !I->use_empty())
68       switch (I->getIntrinsicID()) {
69       default: break;
70       case Intrinsic::setjmp:
71         EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
72                              Type::Int32Ty);
73         break;
74       case Intrinsic::longjmp:
75         EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
76                              Type::VoidTy);
77         break;
78       case Intrinsic::siglongjmp:
79         EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
80                              Type::VoidTy);
81         break;
82       case Intrinsic::memcpy_i32:
83       case Intrinsic::memcpy_i64:
84         M.getOrInsertFunction("memcpy", PointerType::getUnqual(Type::Int8Ty),
85                               PointerType::getUnqual(Type::Int8Ty), 
86                               PointerType::getUnqual(Type::Int8Ty), 
87                               TD.getIntPtrType(), (Type *)0);
88         break;
89       case Intrinsic::memmove_i32:
90       case Intrinsic::memmove_i64:
91         M.getOrInsertFunction("memmove", PointerType::getUnqual(Type::Int8Ty),
92                               PointerType::getUnqual(Type::Int8Ty), 
93                               PointerType::getUnqual(Type::Int8Ty), 
94                               TD.getIntPtrType(), (Type *)0);
95         break;
96       case Intrinsic::memset_i32:
97       case Intrinsic::memset_i64:
98         M.getOrInsertFunction("memset", PointerType::getUnqual(Type::Int8Ty),
99                               PointerType::getUnqual(Type::Int8Ty), 
100                               Type::Int32Ty, 
101                               TD.getIntPtrType(), (Type *)0);
102         break;
103       case Intrinsic::sqrt:
104         switch((int)I->arg_begin()->getType()->getTypeID()) {
105         case Type::FloatTyID:
106           EnsureFunctionExists(M, "sqrtf", I->arg_begin(), I->arg_end(),
107                                Type::FloatTy);
108         case Type::DoubleTyID:
109           EnsureFunctionExists(M, "sqrt", I->arg_begin(), I->arg_end(),
110                                Type::DoubleTy);
111         case Type::X86_FP80TyID:
112         case Type::FP128TyID:
113         case Type::PPC_FP128TyID:
114           EnsureFunctionExists(M, "sqrtl", I->arg_begin(), I->arg_end(),
115                                I->arg_begin()->getType());
116         }
117         break;
118       case Intrinsic::sin:
119         switch((int)I->arg_begin()->getType()->getTypeID()) {
120         case Type::FloatTyID:
121           EnsureFunctionExists(M, "sinf", I->arg_begin(), I->arg_end(),
122                                Type::FloatTy);
123         case Type::DoubleTyID:
124           EnsureFunctionExists(M, "sin", I->arg_begin(), I->arg_end(),
125                                Type::DoubleTy);
126         case Type::X86_FP80TyID:
127         case Type::FP128TyID:
128         case Type::PPC_FP128TyID:
129           EnsureFunctionExists(M, "sinl", I->arg_begin(), I->arg_end(),
130                                I->arg_begin()->getType());
131         }
132         break;
133       case Intrinsic::cos:
134         switch((int)I->arg_begin()->getType()->getTypeID()) {
135         case Type::FloatTyID:
136           EnsureFunctionExists(M, "cosf", I->arg_begin(), I->arg_end(),
137                                Type::FloatTy);
138         case Type::DoubleTyID:
139           EnsureFunctionExists(M, "cos", I->arg_begin(), I->arg_end(),
140                                Type::DoubleTy);
141         case Type::X86_FP80TyID:
142         case Type::FP128TyID:
143         case Type::PPC_FP128TyID:
144           EnsureFunctionExists(M, "cosl", I->arg_begin(), I->arg_end(),
145                                I->arg_begin()->getType());
146         }
147         break;
148       case Intrinsic::pow:
149         switch((int)I->arg_begin()->getType()->getTypeID()) {
150         case Type::FloatTyID:
151           EnsureFunctionExists(M, "powf", I->arg_begin(), I->arg_end(),
152                                Type::FloatTy);
153         case Type::DoubleTyID:
154           EnsureFunctionExists(M, "pow", I->arg_begin(), I->arg_end(),
155                                Type::DoubleTy);
156         case Type::X86_FP80TyID:
157         case Type::FP128TyID:
158         case Type::PPC_FP128TyID:
159           EnsureFunctionExists(M, "powl", I->arg_begin(), I->arg_end(),
160                                I->arg_begin()->getType());
161         }
162         break;
163       case Intrinsic::log:
164         switch((int)I->arg_begin()->getType()->getTypeID()) {
165         case Type::FloatTyID:
166           EnsureFunctionExists(M, "logf", I->arg_begin(), I->arg_end(),
167                                Type::FloatTy);
168         case Type::DoubleTyID:
169           EnsureFunctionExists(M, "log", I->arg_begin(), I->arg_end(),
170                                Type::DoubleTy);
171         case Type::X86_FP80TyID:
172         case Type::FP128TyID:
173         case Type::PPC_FP128TyID:
174           EnsureFunctionExists(M, "logl", I->arg_begin(), I->arg_end(),
175                                I->arg_begin()->getType());
176         }
177         break;
178       case Intrinsic::log2:
179         switch((int)I->arg_begin()->getType()->getTypeID()) {
180         case Type::FloatTyID:
181           EnsureFunctionExists(M, "log2f", I->arg_begin(), I->arg_end(),
182                                Type::FloatTy);
183         case Type::DoubleTyID:
184           EnsureFunctionExists(M, "log2", I->arg_begin(), I->arg_end(),
185                                Type::DoubleTy);
186         case Type::X86_FP80TyID:
187         case Type::FP128TyID:
188         case Type::PPC_FP128TyID:
189           EnsureFunctionExists(M, "log2l", I->arg_begin(), I->arg_end(),
190                                I->arg_begin()->getType());
191         }
192         break;
193       case Intrinsic::log10:
194         switch((int)I->arg_begin()->getType()->getTypeID()) {
195         case Type::FloatTyID:
196           EnsureFunctionExists(M, "log10f", I->arg_begin(), I->arg_end(),
197                                Type::FloatTy);
198         case Type::DoubleTyID:
199           EnsureFunctionExists(M, "log10", I->arg_begin(), I->arg_end(),
200                                Type::DoubleTy);
201         case Type::X86_FP80TyID:
202         case Type::FP128TyID:
203         case Type::PPC_FP128TyID:
204           EnsureFunctionExists(M, "log10l", I->arg_begin(), I->arg_end(),
205                                I->arg_begin()->getType());
206         }
207         break;
208       case Intrinsic::exp:
209         switch((int)I->arg_begin()->getType()->getTypeID()) {
210         case Type::FloatTyID:
211           EnsureFunctionExists(M, "expf", I->arg_begin(), I->arg_end(),
212                                Type::FloatTy);
213         case Type::DoubleTyID:
214           EnsureFunctionExists(M, "exp", I->arg_begin(), I->arg_end(),
215                                Type::DoubleTy);
216         case Type::X86_FP80TyID:
217         case Type::FP128TyID:
218         case Type::PPC_FP128TyID:
219           EnsureFunctionExists(M, "expl", I->arg_begin(), I->arg_end(),
220                                I->arg_begin()->getType());
221         }
222         break;
223       case Intrinsic::exp2:
224         switch((int)I->arg_begin()->getType()->getTypeID()) {
225         case Type::FloatTyID:
226           EnsureFunctionExists(M, "exp2f", I->arg_begin(), I->arg_end(),
227                                Type::FloatTy);
228         case Type::DoubleTyID:
229           EnsureFunctionExists(M, "exp2", I->arg_begin(), I->arg_end(),
230                                Type::DoubleTy);
231         case Type::X86_FP80TyID:
232         case Type::FP128TyID:
233         case Type::PPC_FP128TyID:
234           EnsureFunctionExists(M, "exp2l", I->arg_begin(), I->arg_end(),
235                                I->arg_begin()->getType());
236         }
237         break;
238       }
239 }
240
241 /// LowerBSWAP - Emit the code to lower bswap of V before the specified
242 /// instruction IP.
243 static Value *LowerBSWAP(Value *V, Instruction *IP) {
244   assert(V->getType()->isInteger() && "Can't bswap a non-integer type!");
245
246   unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
247   
248   switch(BitSize) {
249   default: assert(0 && "Unhandled type size of value to byteswap!");
250   case 16: {
251     Value *Tmp1 = BinaryOperator::CreateShl(V,
252                                 ConstantInt::get(V->getType(),8),"bswap.2",IP);
253     Value *Tmp2 = BinaryOperator::CreateLShr(V,
254                                 ConstantInt::get(V->getType(),8),"bswap.1",IP);
255     V = BinaryOperator::CreateOr(Tmp1, Tmp2, "bswap.i16", IP);
256     break;
257   }
258   case 32: {
259     Value *Tmp4 = BinaryOperator::CreateShl(V,
260                               ConstantInt::get(V->getType(),24),"bswap.4", IP);
261     Value *Tmp3 = BinaryOperator::CreateShl(V,
262                               ConstantInt::get(V->getType(),8),"bswap.3",IP);
263     Value *Tmp2 = BinaryOperator::CreateLShr(V,
264                               ConstantInt::get(V->getType(),8),"bswap.2",IP);
265     Value *Tmp1 = BinaryOperator::CreateLShr(V,
266                               ConstantInt::get(V->getType(),24),"bswap.1", IP);
267     Tmp3 = BinaryOperator::CreateAnd(Tmp3, 
268                                      ConstantInt::get(Type::Int32Ty, 0xFF0000),
269                                      "bswap.and3", IP);
270     Tmp2 = BinaryOperator::CreateAnd(Tmp2, 
271                                      ConstantInt::get(Type::Int32Ty, 0xFF00),
272                                      "bswap.and2", IP);
273     Tmp4 = BinaryOperator::CreateOr(Tmp4, Tmp3, "bswap.or1", IP);
274     Tmp2 = BinaryOperator::CreateOr(Tmp2, Tmp1, "bswap.or2", IP);
275     V = BinaryOperator::CreateOr(Tmp4, Tmp2, "bswap.i32", IP);
276     break;
277   }
278   case 64: {
279     Value *Tmp8 = BinaryOperator::CreateShl(V,
280                               ConstantInt::get(V->getType(),56),"bswap.8", IP);
281     Value *Tmp7 = BinaryOperator::CreateShl(V,
282                               ConstantInt::get(V->getType(),40),"bswap.7", IP);
283     Value *Tmp6 = BinaryOperator::CreateShl(V,
284                               ConstantInt::get(V->getType(),24),"bswap.6", IP);
285     Value *Tmp5 = BinaryOperator::CreateShl(V,
286                               ConstantInt::get(V->getType(),8),"bswap.5", IP);
287     Value* Tmp4 = BinaryOperator::CreateLShr(V,
288                               ConstantInt::get(V->getType(),8),"bswap.4", IP);
289     Value* Tmp3 = BinaryOperator::CreateLShr(V,
290                               ConstantInt::get(V->getType(),24),"bswap.3", IP);
291     Value* Tmp2 = BinaryOperator::CreateLShr(V,
292                               ConstantInt::get(V->getType(),40),"bswap.2", IP);
293     Value* Tmp1 = BinaryOperator::CreateLShr(V,
294                               ConstantInt::get(V->getType(),56),"bswap.1", IP);
295     Tmp7 = BinaryOperator::CreateAnd(Tmp7,
296                              ConstantInt::get(Type::Int64Ty, 
297                                0xFF000000000000ULL),
298                              "bswap.and7", IP);
299     Tmp6 = BinaryOperator::CreateAnd(Tmp6,
300                              ConstantInt::get(Type::Int64Ty, 0xFF0000000000ULL),
301                              "bswap.and6", IP);
302     Tmp5 = BinaryOperator::CreateAnd(Tmp5,
303                              ConstantInt::get(Type::Int64Ty, 0xFF00000000ULL),
304                              "bswap.and5", IP);
305     Tmp4 = BinaryOperator::CreateAnd(Tmp4,
306                              ConstantInt::get(Type::Int64Ty, 0xFF000000ULL),
307                              "bswap.and4", IP);
308     Tmp3 = BinaryOperator::CreateAnd(Tmp3,
309                              ConstantInt::get(Type::Int64Ty, 0xFF0000ULL),
310                              "bswap.and3", IP);
311     Tmp2 = BinaryOperator::CreateAnd(Tmp2,
312                              ConstantInt::get(Type::Int64Ty, 0xFF00ULL),
313                              "bswap.and2", IP);
314     Tmp8 = BinaryOperator::CreateOr(Tmp8, Tmp7, "bswap.or1", IP);
315     Tmp6 = BinaryOperator::CreateOr(Tmp6, Tmp5, "bswap.or2", IP);
316     Tmp4 = BinaryOperator::CreateOr(Tmp4, Tmp3, "bswap.or3", IP);
317     Tmp2 = BinaryOperator::CreateOr(Tmp2, Tmp1, "bswap.or4", IP);
318     Tmp8 = BinaryOperator::CreateOr(Tmp8, Tmp6, "bswap.or5", IP);
319     Tmp4 = BinaryOperator::CreateOr(Tmp4, Tmp2, "bswap.or6", IP);
320     V = BinaryOperator::CreateOr(Tmp8, Tmp4, "bswap.i64", IP);
321     break;
322   }
323   }
324   return V;
325 }
326
327 /// LowerCTPOP - Emit the code to lower ctpop of V before the specified
328 /// instruction IP.
329 static Value *LowerCTPOP(Value *V, Instruction *IP) {
330   assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!");
331
332   static const uint64_t MaskValues[6] = {
333     0x5555555555555555ULL, 0x3333333333333333ULL,
334     0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
335     0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
336   };
337
338   unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
339   unsigned WordSize = (BitSize + 63) / 64;
340   Value *Count = ConstantInt::get(V->getType(), 0);
341
342   for (unsigned n = 0; n < WordSize; ++n) {
343     Value *PartValue = V;
344     for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize); 
345          i <<= 1, ++ct) {
346       Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
347       Value *LHS = BinaryOperator::CreateAnd(
348                      PartValue, MaskCst, "cppop.and1", IP);
349       Value *VShift = BinaryOperator::CreateLShr(PartValue,
350                         ConstantInt::get(V->getType(), i), "ctpop.sh", IP);
351       Value *RHS = BinaryOperator::CreateAnd(VShift, MaskCst, "cppop.and2", IP);
352       PartValue = BinaryOperator::CreateAdd(LHS, RHS, "ctpop.step", IP);
353     }
354     Count = BinaryOperator::CreateAdd(PartValue, Count, "ctpop.part", IP);
355     if (BitSize > 64) {
356       V = BinaryOperator::CreateLShr(V, ConstantInt::get(V->getType(), 64), 
357                                      "ctpop.part.sh", IP);
358       BitSize -= 64;
359     }
360   }
361
362   return Count;
363 }
364
365 /// LowerCTLZ - Emit the code to lower ctlz of V before the specified
366 /// instruction IP.
367 static Value *LowerCTLZ(Value *V, Instruction *IP) {
368
369   unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
370   for (unsigned i = 1; i < BitSize; i <<= 1) {
371     Value *ShVal = ConstantInt::get(V->getType(), i);
372     ShVal = BinaryOperator::CreateLShr(V, ShVal, "ctlz.sh", IP);
373     V = BinaryOperator::CreateOr(V, ShVal, "ctlz.step", IP);
374   }
375
376   V = BinaryOperator::CreateNot(V, "", IP);
377   return LowerCTPOP(V, IP);
378 }
379
380 /// Convert the llvm.part.select.iX.iY intrinsic. This intrinsic takes 
381 /// three integer arguments. The first argument is the Value from which the
382 /// bits will be selected. It may be of any bit width. The second and third
383 /// arguments specify a range of bits to select with the second argument 
384 /// specifying the low bit and the third argument specifying the high bit. Both
385 /// must be type i32. The result is the corresponding selected bits from the
386 /// Value in the same width as the Value (first argument). If the low bit index
387 /// is higher than the high bit index then the inverse selection is done and 
388 /// the bits are returned in inverse order. 
389 /// @brief Lowering of llvm.part.select intrinsic.
390 static Instruction *LowerPartSelect(CallInst *CI) {
391   // Make sure we're dealing with a part select intrinsic here
392   Function *F = CI->getCalledFunction();
393   const FunctionType *FT = F->getFunctionType();
394   if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
395       FT->getNumParams() != 3 || !FT->getParamType(0)->isInteger() ||
396       !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger())
397     return CI;
398
399   // Get the intrinsic implementation function by converting all the . to _
400   // in the intrinsic's function name and then reconstructing the function
401   // declaration.
402   std::string Name(F->getName());
403   for (unsigned i = 4; i < Name.length(); ++i)
404     if (Name[i] == '.')
405       Name[i] = '_';
406   Module* M = F->getParent();
407   F = cast<Function>(M->getOrInsertFunction(Name, FT));
408   F->setLinkage(GlobalValue::WeakLinkage);
409
410   // If we haven't defined the impl function yet, do so now
411   if (F->isDeclaration()) {
412
413     // Get the arguments to the function
414     Function::arg_iterator args = F->arg_begin();
415     Value* Val = args++; Val->setName("Val");
416     Value* Lo = args++; Lo->setName("Lo");
417     Value* Hi = args++; Hi->setName("High");
418
419     // We want to select a range of bits here such that [Hi, Lo] is shifted
420     // down to the low bits. However, it is quite possible that Hi is smaller
421     // than Lo in which case the bits have to be reversed. 
422     
423     // Create the blocks we will need for the two cases (forward, reverse)
424     BasicBlock* CurBB   = BasicBlock::Create("entry", F);
425     BasicBlock *RevSize = BasicBlock::Create("revsize", CurBB->getParent());
426     BasicBlock *FwdSize = BasicBlock::Create("fwdsize", CurBB->getParent());
427     BasicBlock *Compute = BasicBlock::Create("compute", CurBB->getParent());
428     BasicBlock *Reverse = BasicBlock::Create("reverse", CurBB->getParent());
429     BasicBlock *RsltBlk = BasicBlock::Create("result",  CurBB->getParent());
430
431     // Cast Hi and Lo to the size of Val so the widths are all the same
432     if (Hi->getType() != Val->getType())
433       Hi = CastInst::CreateIntegerCast(Hi, Val->getType(), false, 
434                                          "tmp", CurBB);
435     if (Lo->getType() != Val->getType())
436       Lo = CastInst::CreateIntegerCast(Lo, Val->getType(), false, 
437                                           "tmp", CurBB);
438
439     // Compute a few things that both cases will need, up front.
440     Constant* Zero = ConstantInt::get(Val->getType(), 0);
441     Constant* One = ConstantInt::get(Val->getType(), 1);
442     Constant* AllOnes = ConstantInt::getAllOnesValue(Val->getType());
443
444     // Compare the Hi and Lo bit positions. This is used to determine 
445     // which case we have (forward or reverse)
446     ICmpInst *Cmp = new ICmpInst(ICmpInst::ICMP_ULT, Hi, Lo, "less",CurBB);
447     BranchInst::Create(RevSize, FwdSize, Cmp, CurBB);
448
449     // First, copmute the number of bits in the forward case.
450     Instruction* FBitSize = 
451       BinaryOperator::CreateSub(Hi, Lo,"fbits", FwdSize);
452     BranchInst::Create(Compute, FwdSize);
453
454     // Second, compute the number of bits in the reverse case.
455     Instruction* RBitSize = 
456       BinaryOperator::CreateSub(Lo, Hi, "rbits", RevSize);
457     BranchInst::Create(Compute, RevSize);
458
459     // Now, compute the bit range. Start by getting the bitsize and the shift
460     // amount (either Hi or Lo) from PHI nodes. Then we compute a mask for 
461     // the number of bits we want in the range. We shift the bits down to the 
462     // least significant bits, apply the mask to zero out unwanted high bits, 
463     // and we have computed the "forward" result. It may still need to be 
464     // reversed.
465
466     // Get the BitSize from one of the two subtractions
467     PHINode *BitSize = PHINode::Create(Val->getType(), "bits", Compute);
468     BitSize->reserveOperandSpace(2);
469     BitSize->addIncoming(FBitSize, FwdSize);
470     BitSize->addIncoming(RBitSize, RevSize);
471
472     // Get the ShiftAmount as the smaller of Hi/Lo
473     PHINode *ShiftAmt = PHINode::Create(Val->getType(), "shiftamt", Compute);
474     ShiftAmt->reserveOperandSpace(2);
475     ShiftAmt->addIncoming(Lo, FwdSize);
476     ShiftAmt->addIncoming(Hi, RevSize);
477
478     // Increment the bit size
479     Instruction *BitSizePlusOne = 
480       BinaryOperator::CreateAdd(BitSize, One, "bits", Compute);
481
482     // Create a Mask to zero out the high order bits.
483     Instruction* Mask = 
484       BinaryOperator::CreateShl(AllOnes, BitSizePlusOne, "mask", Compute);
485     Mask = BinaryOperator::CreateNot(Mask, "mask", Compute);
486
487     // Shift the bits down and apply the mask
488     Instruction* FRes = 
489       BinaryOperator::CreateLShr(Val, ShiftAmt, "fres", Compute);
490     FRes = BinaryOperator::CreateAnd(FRes, Mask, "fres", Compute);
491     BranchInst::Create(Reverse, RsltBlk, Cmp, Compute);
492
493     // In the Reverse block we have the mask already in FRes but we must reverse
494     // it by shifting FRes bits right and putting them in RRes by shifting them 
495     // in from left.
496
497     // First set up our loop counters
498     PHINode *Count = PHINode::Create(Val->getType(), "count", Reverse);
499     Count->reserveOperandSpace(2);
500     Count->addIncoming(BitSizePlusOne, Compute);
501
502     // Next, get the value that we are shifting.
503     PHINode *BitsToShift = PHINode::Create(Val->getType(), "val", Reverse);
504     BitsToShift->reserveOperandSpace(2);
505     BitsToShift->addIncoming(FRes, Compute);
506
507     // Finally, get the result of the last computation
508     PHINode *RRes = PHINode::Create(Val->getType(), "rres", Reverse);
509     RRes->reserveOperandSpace(2);
510     RRes->addIncoming(Zero, Compute);
511
512     // Decrement the counter
513     Instruction *Decr = BinaryOperator::CreateSub(Count, One, "decr", Reverse);
514     Count->addIncoming(Decr, Reverse);
515
516     // Compute the Bit that we want to move
517     Instruction *Bit = 
518       BinaryOperator::CreateAnd(BitsToShift, One, "bit", Reverse);
519
520     // Compute the new value for next iteration.
521     Instruction *NewVal = 
522       BinaryOperator::CreateLShr(BitsToShift, One, "rshift", Reverse);
523     BitsToShift->addIncoming(NewVal, Reverse);
524
525     // Shift the bit into the low bits of the result.
526     Instruction *NewRes = 
527       BinaryOperator::CreateShl(RRes, One, "lshift", Reverse);
528     NewRes = BinaryOperator::CreateOr(NewRes, Bit, "addbit", Reverse);
529     RRes->addIncoming(NewRes, Reverse);
530     
531     // Terminate loop if we've moved all the bits.
532     ICmpInst *Cond = 
533       new ICmpInst(ICmpInst::ICMP_EQ, Decr, Zero, "cond", Reverse);
534     BranchInst::Create(RsltBlk, Reverse, Cond, Reverse);
535
536     // Finally, in the result block, select one of the two results with a PHI
537     // node and return the result;
538     CurBB = RsltBlk;
539     PHINode *BitSelect = PHINode::Create(Val->getType(), "part_select", CurBB);
540     BitSelect->reserveOperandSpace(2);
541     BitSelect->addIncoming(FRes, Compute);
542     BitSelect->addIncoming(NewRes, Reverse);
543     ReturnInst::Create(BitSelect, CurBB);
544   }
545
546   // Return a call to the implementation function
547   Value *Args[] = {
548     CI->getOperand(1),
549     CI->getOperand(2),
550     CI->getOperand(3)
551   };
552   return CallInst::Create(F, Args, array_endof(Args), CI->getName(), CI);
553 }
554
555 /// Convert the llvm.part.set.iX.iY.iZ intrinsic. This intrinsic takes 
556 /// four integer arguments (iAny %Value, iAny %Replacement, i32 %Low, i32 %High)
557 /// The first two arguments can be any bit width. The result is the same width
558 /// as %Value. The operation replaces bits between %Low and %High with the value
559 /// in %Replacement. If %Replacement is not the same width, it is truncated or
560 /// zero extended as appropriate to fit the bits being replaced. If %Low is
561 /// greater than %High then the inverse set of bits are replaced.
562 /// @brief Lowering of llvm.bit.part.set intrinsic.
563 static Instruction *LowerPartSet(CallInst *CI) {
564   // Make sure we're dealing with a part select intrinsic here
565   Function *F = CI->getCalledFunction();
566   const FunctionType *FT = F->getFunctionType();
567   if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
568       FT->getNumParams() != 4 || !FT->getParamType(0)->isInteger() ||
569       !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger() ||
570       !FT->getParamType(3)->isInteger())
571     return CI;
572
573   // Get the intrinsic implementation function by converting all the . to _
574   // in the intrinsic's function name and then reconstructing the function
575   // declaration.
576   std::string Name(F->getName());
577   for (unsigned i = 4; i < Name.length(); ++i)
578     if (Name[i] == '.')
579       Name[i] = '_';
580   Module* M = F->getParent();
581   F = cast<Function>(M->getOrInsertFunction(Name, FT));
582   F->setLinkage(GlobalValue::WeakLinkage);
583
584   // If we haven't defined the impl function yet, do so now
585   if (F->isDeclaration()) {
586     // Get the arguments for the function.
587     Function::arg_iterator args = F->arg_begin();
588     Value* Val = args++; Val->setName("Val");
589     Value* Rep = args++; Rep->setName("Rep");
590     Value* Lo  = args++; Lo->setName("Lo");
591     Value* Hi  = args++; Hi->setName("Hi");
592
593     // Get some types we need
594     const IntegerType* ValTy = cast<IntegerType>(Val->getType());
595     const IntegerType* RepTy = cast<IntegerType>(Rep->getType());
596     uint32_t ValBits = ValTy->getBitWidth();
597     uint32_t RepBits = RepTy->getBitWidth();
598
599     // Constant Definitions
600     ConstantInt* RepBitWidth = ConstantInt::get(Type::Int32Ty, RepBits);
601     ConstantInt* RepMask = ConstantInt::getAllOnesValue(RepTy);
602     ConstantInt* ValMask = ConstantInt::getAllOnesValue(ValTy);
603     ConstantInt* One = ConstantInt::get(Type::Int32Ty, 1);
604     ConstantInt* ValOne = ConstantInt::get(ValTy, 1);
605     ConstantInt* Zero = ConstantInt::get(Type::Int32Ty, 0);
606     ConstantInt* ValZero = ConstantInt::get(ValTy, 0);
607
608     // Basic blocks we fill in below.
609     BasicBlock* entry = BasicBlock::Create("entry", F, 0);
610     BasicBlock* large = BasicBlock::Create("large", F, 0);
611     BasicBlock* small = BasicBlock::Create("small", F, 0);
612     BasicBlock* reverse = BasicBlock::Create("reverse", F, 0);
613     BasicBlock* result = BasicBlock::Create("result", F, 0);
614
615     // BASIC BLOCK: entry
616     // First, get the number of bits that we're placing as an i32
617     ICmpInst* is_forward = 
618       new ICmpInst(ICmpInst::ICMP_ULT, Lo, Hi, "", entry);
619     SelectInst* Hi_pn = SelectInst::Create(is_forward, Hi, Lo, "", entry);
620     SelectInst* Lo_pn = SelectInst::Create(is_forward, Lo, Hi, "", entry);
621     BinaryOperator* NumBits = BinaryOperator::CreateSub(Hi_pn, Lo_pn, "",entry);
622     NumBits = BinaryOperator::CreateAdd(NumBits, One, "", entry);
623     // Now, convert Lo and Hi to ValTy bit width
624     if (ValBits > 32) {
625       Lo = new ZExtInst(Lo_pn, ValTy, "", entry);
626     } else if (ValBits < 32) {
627       Lo = new TruncInst(Lo_pn, ValTy, "", entry);
628     }
629     // Determine if the replacement bits are larger than the number of bits we
630     // are replacing and deal with it.
631     ICmpInst* is_large = 
632       new ICmpInst(ICmpInst::ICMP_ULT, NumBits, RepBitWidth, "", entry);
633     BranchInst::Create(large, small, is_large, entry);
634
635     // BASIC BLOCK: large
636     Instruction* MaskBits = 
637       BinaryOperator::CreateSub(RepBitWidth, NumBits, "", large);
638     MaskBits = CastInst::CreateIntegerCast(MaskBits, RepMask->getType(), 
639                                            false, "", large);
640     BinaryOperator* Mask1 = 
641       BinaryOperator::CreateLShr(RepMask, MaskBits, "", large);
642     BinaryOperator* Rep2 = BinaryOperator::CreateAnd(Mask1, Rep, "", large);
643     BranchInst::Create(small, large);
644
645     // BASIC BLOCK: small
646     PHINode* Rep3 = PHINode::Create(RepTy, "", small);
647     Rep3->reserveOperandSpace(2);
648     Rep3->addIncoming(Rep2, large);
649     Rep3->addIncoming(Rep, entry);
650     Value* Rep4 = Rep3;
651     if (ValBits > RepBits)
652       Rep4 = new ZExtInst(Rep3, ValTy, "", small);
653     else if (ValBits < RepBits)
654       Rep4 = new TruncInst(Rep3, ValTy, "", small);
655     BranchInst::Create(result, reverse, is_forward, small);
656
657     // BASIC BLOCK: reverse (reverses the bits of the replacement)
658     // Set up our loop counter as a PHI so we can decrement on each iteration.
659     // We will loop for the number of bits in the replacement value.
660     PHINode *Count = PHINode::Create(Type::Int32Ty, "count", reverse);
661     Count->reserveOperandSpace(2);
662     Count->addIncoming(NumBits, small);
663
664     // Get the value that we are shifting bits out of as a PHI because
665     // we'll change this with each iteration.
666     PHINode *BitsToShift = PHINode::Create(Val->getType(), "val", reverse);
667     BitsToShift->reserveOperandSpace(2);
668     BitsToShift->addIncoming(Rep4, small);
669
670     // Get the result of the last computation or zero on first iteration
671     PHINode *RRes = PHINode::Create(Val->getType(), "rres", reverse);
672     RRes->reserveOperandSpace(2);
673     RRes->addIncoming(ValZero, small);
674
675     // Decrement the loop counter by one
676     Instruction *Decr = BinaryOperator::CreateSub(Count, One, "", reverse);
677     Count->addIncoming(Decr, reverse);
678
679     // Get the bit that we want to move into the result
680     Value *Bit = BinaryOperator::CreateAnd(BitsToShift, ValOne, "", reverse);
681
682     // Compute the new value of the bits to shift for the next iteration.
683     Value *NewVal = BinaryOperator::CreateLShr(BitsToShift, ValOne,"", reverse);
684     BitsToShift->addIncoming(NewVal, reverse);
685
686     // Shift the bit we extracted into the low bit of the result.
687     Instruction *NewRes = BinaryOperator::CreateShl(RRes, ValOne, "", reverse);
688     NewRes = BinaryOperator::CreateOr(NewRes, Bit, "", reverse);
689     RRes->addIncoming(NewRes, reverse);
690     
691     // Terminate loop if we've moved all the bits.
692     ICmpInst *Cond = new ICmpInst(ICmpInst::ICMP_EQ, Decr, Zero, "", reverse);
693     BranchInst::Create(result, reverse, Cond, reverse);
694
695     // BASIC BLOCK: result
696     PHINode *Rplcmnt = PHINode::Create(Val->getType(), "", result);
697     Rplcmnt->reserveOperandSpace(2);
698     Rplcmnt->addIncoming(NewRes, reverse);
699     Rplcmnt->addIncoming(Rep4, small);
700     Value* t0   = CastInst::CreateIntegerCast(NumBits,ValTy,false,"",result);
701     Value* t1   = BinaryOperator::CreateShl(ValMask, Lo, "", result);
702     Value* t2   = BinaryOperator::CreateNot(t1, "", result);
703     Value* t3   = BinaryOperator::CreateShl(t1, t0, "", result);
704     Value* t4   = BinaryOperator::CreateOr(t2, t3, "", result);
705     Value* t5   = BinaryOperator::CreateAnd(t4, Val, "", result);
706     Value* t6   = BinaryOperator::CreateShl(Rplcmnt, Lo, "", result);
707     Value* Rslt = BinaryOperator::CreateOr(t5, t6, "part_set", result);
708     ReturnInst::Create(Rslt, result);
709   }
710
711   // Return a call to the implementation function
712   Value *Args[] = {
713     CI->getOperand(1),
714     CI->getOperand(2),
715     CI->getOperand(3),
716     CI->getOperand(4)
717   };
718   return CallInst::Create(F, Args, array_endof(Args), CI->getName(), CI);
719 }
720
721
722 void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
723   Function *Callee = CI->getCalledFunction();
724   assert(Callee && "Cannot lower an indirect call!");
725
726   switch (Callee->getIntrinsicID()) {
727   case Intrinsic::not_intrinsic:
728     cerr << "Cannot lower a call to a non-intrinsic function '"
729          << Callee->getName() << "'!\n";
730     abort();
731   default:
732     cerr << "Error: Code generator does not support intrinsic function '"
733          << Callee->getName() << "'!\n";
734     abort();
735
736     // The setjmp/longjmp intrinsics should only exist in the code if it was
737     // never optimized (ie, right out of the CFE), or if it has been hacked on
738     // by the lowerinvoke pass.  In both cases, the right thing to do is to
739     // convert the call to an explicit setjmp or longjmp call.
740   case Intrinsic::setjmp: {
741     static Constant *SetjmpFCache = 0;
742     Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin()+1, CI->op_end(),
743                                Type::Int32Ty, SetjmpFCache);
744     if (CI->getType() != Type::VoidTy)
745       CI->replaceAllUsesWith(V);
746     break;
747   }
748   case Intrinsic::sigsetjmp:
749      if (CI->getType() != Type::VoidTy)
750        CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
751      break;
752
753   case Intrinsic::longjmp: {
754     static Constant *LongjmpFCache = 0;
755     ReplaceCallWith("longjmp", CI, CI->op_begin()+1, CI->op_end(),
756                     Type::VoidTy, LongjmpFCache);
757     break;
758   }
759
760   case Intrinsic::siglongjmp: {
761     // Insert the call to abort
762     static Constant *AbortFCache = 0;
763     ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(), 
764                     Type::VoidTy, AbortFCache);
765     break;
766   }
767   case Intrinsic::ctpop:
768     CI->replaceAllUsesWith(LowerCTPOP(CI->getOperand(1), CI));
769     break;
770
771   case Intrinsic::bswap:
772     CI->replaceAllUsesWith(LowerBSWAP(CI->getOperand(1), CI));
773     break;
774     
775   case Intrinsic::ctlz:
776     CI->replaceAllUsesWith(LowerCTLZ(CI->getOperand(1), CI));
777     break;
778
779   case Intrinsic::cttz: {
780     // cttz(x) -> ctpop(~X & (X-1))
781     Value *Src = CI->getOperand(1);
782     Value *NotSrc = BinaryOperator::CreateNot(Src, Src->getName()+".not", CI);
783     Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
784     SrcM1 = BinaryOperator::CreateSub(Src, SrcM1, "", CI);
785     Src = LowerCTPOP(BinaryOperator::CreateAnd(NotSrc, SrcM1, "", CI), CI);
786     CI->replaceAllUsesWith(Src);
787     break;
788   }
789
790   case Intrinsic::part_select:
791     CI->replaceAllUsesWith(LowerPartSelect(CI));
792     break;
793
794   case Intrinsic::part_set:
795     CI->replaceAllUsesWith(LowerPartSet(CI));
796     break;
797
798   case Intrinsic::stacksave:
799   case Intrinsic::stackrestore: {
800     static bool Warned = false;
801     if (!Warned)
802       cerr << "WARNING: this target does not support the llvm.stack"
803            << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
804                "save" : "restore") << " intrinsic.\n";
805     Warned = true;
806     if (Callee->getIntrinsicID() == Intrinsic::stacksave)
807       CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
808     break;
809   }
810     
811   case Intrinsic::returnaddress:
812   case Intrinsic::frameaddress:
813     cerr << "WARNING: this target does not support the llvm."
814          << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
815              "return" : "frame") << "address intrinsic.\n";
816     CI->replaceAllUsesWith(ConstantPointerNull::get(
817                                             cast<PointerType>(CI->getType())));
818     break;
819
820   case Intrinsic::prefetch:
821     break;    // Simply strip out prefetches on unsupported architectures
822
823   case Intrinsic::pcmarker:
824     break;    // Simply strip out pcmarker on unsupported architectures
825   case Intrinsic::readcyclecounter: {
826     cerr << "WARNING: this target does not support the llvm.readcyclecoun"
827          << "ter intrinsic.  It is being lowered to a constant 0\n";
828     CI->replaceAllUsesWith(ConstantInt::get(Type::Int64Ty, 0));
829     break;
830   }
831
832   case Intrinsic::dbg_stoppoint:
833   case Intrinsic::dbg_region_start:
834   case Intrinsic::dbg_region_end:
835   case Intrinsic::dbg_func_start:
836   case Intrinsic::dbg_declare:
837     break;    // Simply strip out debugging intrinsics
838
839   case Intrinsic::eh_exception:
840   case Intrinsic::eh_selector_i32:
841   case Intrinsic::eh_selector_i64:
842     CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
843     break;
844
845   case Intrinsic::eh_typeid_for_i32:
846   case Intrinsic::eh_typeid_for_i64:
847     // Return something different to eh_selector.
848     CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
849     break;
850
851   case Intrinsic::var_annotation:
852     break;   // Strip out annotate intrinsic
853     
854   case Intrinsic::memcpy_i32:
855   case Intrinsic::memcpy_i64: {
856     static Constant *MemcpyFCache = 0;
857     Value *Size = CI->getOperand(3);
858     const Type *IntPtr = TD.getIntPtrType();
859     if (Size->getType()->getPrimitiveSizeInBits() <
860         IntPtr->getPrimitiveSizeInBits())
861       Size = new ZExtInst(Size, IntPtr, "", CI);
862     else if (Size->getType()->getPrimitiveSizeInBits() >
863              IntPtr->getPrimitiveSizeInBits())
864       Size = new TruncInst(Size, IntPtr, "", CI);
865     Value *Ops[3];
866     Ops[0] = CI->getOperand(1);
867     Ops[1] = CI->getOperand(2);
868     Ops[2] = Size;
869     ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
870                     MemcpyFCache);
871     break;
872   }
873   case Intrinsic::memmove_i32: 
874   case Intrinsic::memmove_i64: {
875     static Constant *MemmoveFCache = 0;
876     Value *Size = CI->getOperand(3);
877     const Type *IntPtr = TD.getIntPtrType();
878     if (Size->getType()->getPrimitiveSizeInBits() <
879         IntPtr->getPrimitiveSizeInBits())
880       Size = new ZExtInst(Size, IntPtr, "", CI);
881     else if (Size->getType()->getPrimitiveSizeInBits() >
882              IntPtr->getPrimitiveSizeInBits())
883       Size = new TruncInst(Size, IntPtr, "", CI);
884     Value *Ops[3];
885     Ops[0] = CI->getOperand(1);
886     Ops[1] = CI->getOperand(2);
887     Ops[2] = Size;
888     ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
889                     MemmoveFCache);
890     break;
891   }
892   case Intrinsic::memset_i32:
893   case Intrinsic::memset_i64: {
894     static Constant *MemsetFCache = 0;
895     Value *Size = CI->getOperand(3);
896     const Type *IntPtr = TD.getIntPtrType();
897     if (Size->getType()->getPrimitiveSizeInBits() <
898         IntPtr->getPrimitiveSizeInBits())
899       Size = new ZExtInst(Size, IntPtr, "", CI);
900     else if (Size->getType()->getPrimitiveSizeInBits() >
901              IntPtr->getPrimitiveSizeInBits())
902       Size = new TruncInst(Size, IntPtr, "", CI);
903     Value *Ops[3];
904     Ops[0] = CI->getOperand(1);
905     // Extend the amount to i32.
906     Ops[1] = new ZExtInst(CI->getOperand(2), Type::Int32Ty, "", CI);
907     Ops[2] = Size;
908     ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
909                     MemsetFCache);
910     break;
911   }
912   case Intrinsic::sqrt: {
913     static Constant *sqrtfFCache = 0;
914     static Constant *sqrtFCache = 0;
915     static Constant *sqrtLDCache = 0;
916     switch (CI->getOperand(1)->getType()->getTypeID()) {
917     default: assert(0 && "Invalid type in sqrt"); abort();
918     case Type::FloatTyID:
919       ReplaceCallWith("sqrtf", CI, CI->op_begin()+1, CI->op_end(),
920                     Type::FloatTy, sqrtfFCache);
921       break;
922     case Type::DoubleTyID:
923       ReplaceCallWith("sqrt", CI, CI->op_begin()+1, CI->op_end(),
924                     Type::DoubleTy, sqrtFCache);
925       break;
926     case Type::X86_FP80TyID:
927     case Type::FP128TyID:
928     case Type::PPC_FP128TyID:
929       ReplaceCallWith("sqrtl", CI, CI->op_begin()+1, CI->op_end(),
930                     CI->getOperand(1)->getType(), sqrtLDCache);
931       break;
932     }
933     break;
934   }
935   case Intrinsic::log: {
936     static Constant *logfFCache = 0;
937     static Constant *logFCache = 0;
938     static Constant *logLDCache = 0;
939     switch (CI->getOperand(1)->getType()->getTypeID()) {
940     default: assert(0 && "Invalid type in log"); abort();
941     case Type::FloatTyID:
942       ReplaceCallWith("logf", CI, CI->op_begin()+1, CI->op_end(),
943                     Type::FloatTy, logfFCache);
944       break;
945     case Type::DoubleTyID:
946       ReplaceCallWith("log", CI, CI->op_begin()+1, CI->op_end(),
947                     Type::DoubleTy, logFCache);
948       break;
949     case Type::X86_FP80TyID:
950     case Type::FP128TyID:
951     case Type::PPC_FP128TyID:
952       ReplaceCallWith("logl", CI, CI->op_begin()+1, CI->op_end(),
953                     CI->getOperand(1)->getType(), logLDCache);
954       break;
955     }
956     break;
957   }
958   case Intrinsic::log2: {
959     static Constant *log2fFCache = 0;
960     static Constant *log2FCache = 0;
961     static Constant *log2LDCache = 0;
962     switch (CI->getOperand(1)->getType()->getTypeID()) {
963     default: assert(0 && "Invalid type in log2"); abort();
964     case Type::FloatTyID:
965       ReplaceCallWith("log2f", CI, CI->op_begin()+1, CI->op_end(),
966                     Type::FloatTy, log2fFCache);
967       break;
968     case Type::DoubleTyID:
969       ReplaceCallWith("log2", CI, CI->op_begin()+1, CI->op_end(),
970                     Type::DoubleTy, log2FCache);
971       break;
972     case Type::X86_FP80TyID:
973     case Type::FP128TyID:
974     case Type::PPC_FP128TyID:
975       ReplaceCallWith("log2l", CI, CI->op_begin()+1, CI->op_end(),
976                     CI->getOperand(1)->getType(), log2LDCache);
977       break;
978     }
979     break;
980   }
981   case Intrinsic::log10: {
982     static Constant *log10fFCache = 0;
983     static Constant *log10FCache = 0;
984     static Constant *log10LDCache = 0;
985     switch (CI->getOperand(1)->getType()->getTypeID()) {
986     default: assert(0 && "Invalid type in log10"); abort();
987     case Type::FloatTyID:
988       ReplaceCallWith("log10f", CI, CI->op_begin()+1, CI->op_end(),
989                     Type::FloatTy, log10fFCache);
990       break;
991     case Type::DoubleTyID:
992       ReplaceCallWith("log10", CI, CI->op_begin()+1, CI->op_end(),
993                     Type::DoubleTy, log10FCache);
994       break;
995     case Type::X86_FP80TyID:
996     case Type::FP128TyID:
997     case Type::PPC_FP128TyID:
998       ReplaceCallWith("log10l", CI, CI->op_begin()+1, CI->op_end(),
999                     CI->getOperand(1)->getType(), log10LDCache);
1000       break;
1001     }
1002     break;
1003   }
1004   case Intrinsic::exp: {
1005     static Constant *expfFCache = 0;
1006     static Constant *expFCache = 0;
1007     static Constant *expLDCache = 0;
1008     switch (CI->getOperand(1)->getType()->getTypeID()) {
1009     default: assert(0 && "Invalid type in exp"); abort();
1010     case Type::FloatTyID:
1011       ReplaceCallWith("expf", CI, CI->op_begin()+1, CI->op_end(),
1012                     Type::FloatTy, expfFCache);
1013       break;
1014     case Type::DoubleTyID:
1015       ReplaceCallWith("exp", CI, CI->op_begin()+1, CI->op_end(),
1016                     Type::DoubleTy, expFCache);
1017       break;
1018     case Type::X86_FP80TyID:
1019     case Type::FP128TyID:
1020     case Type::PPC_FP128TyID:
1021       ReplaceCallWith("expl", CI, CI->op_begin()+1, CI->op_end(),
1022                     CI->getOperand(1)->getType(), expLDCache);
1023       break;
1024     }
1025     break;
1026   }
1027   case Intrinsic::exp2: {
1028     static Constant *exp2fFCache = 0;
1029     static Constant *exp2FCache = 0;
1030     static Constant *exp2LDCache = 0;
1031     switch (CI->getOperand(1)->getType()->getTypeID()) {
1032     default: assert(0 && "Invalid type in exp2"); abort();
1033     case Type::FloatTyID:
1034       ReplaceCallWith("exp2f", CI, CI->op_begin()+1, CI->op_end(),
1035                     Type::FloatTy, exp2fFCache);
1036       break;
1037     case Type::DoubleTyID:
1038       ReplaceCallWith("exp2", CI, CI->op_begin()+1, CI->op_end(),
1039                     Type::DoubleTy, exp2FCache);
1040       break;
1041     case Type::X86_FP80TyID:
1042     case Type::FP128TyID:
1043     case Type::PPC_FP128TyID:
1044       ReplaceCallWith("exp2l", CI, CI->op_begin()+1, CI->op_end(),
1045                     CI->getOperand(1)->getType(), exp2LDCache);
1046       break;
1047     }
1048     break;
1049   }
1050   case Intrinsic::pow: {
1051     static Constant *powfFCache = 0;
1052     static Constant *powFCache = 0;
1053     static Constant *powLDCache = 0;
1054     switch (CI->getOperand(1)->getType()->getTypeID()) {
1055     default: assert(0 && "Invalid type in pow"); abort();
1056     case Type::FloatTyID:
1057       ReplaceCallWith("powf", CI, CI->op_begin()+1, CI->op_end(),
1058                     Type::FloatTy, powfFCache);
1059       break;
1060     case Type::DoubleTyID:
1061       ReplaceCallWith("pow", CI, CI->op_begin()+1, CI->op_end(),
1062                     Type::DoubleTy, powFCache);
1063       break;
1064     case Type::X86_FP80TyID:
1065     case Type::FP128TyID:
1066     case Type::PPC_FP128TyID:
1067       ReplaceCallWith("powl", CI, CI->op_begin()+1, CI->op_end(),
1068                     CI->getOperand(1)->getType(), powLDCache);
1069       break;
1070     }
1071     break;
1072   }
1073   case Intrinsic::flt_rounds:
1074      // Lower to "round to the nearest"
1075      if (CI->getType() != Type::VoidTy)
1076        CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
1077      break;
1078   }
1079
1080   assert(CI->use_empty() &&
1081          "Lowering should have eliminated any uses of the intrinsic call!");
1082   CI->eraseFromParent();
1083 }