lib/CodeGen/IntrinsicLowering.cpp

   1 //===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the IntrinsicLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/Constants.h"
  15 #include "llvm/DerivedTypes.h"
  16 #include "llvm/Module.h"
  17 #include "llvm/Type.h"
  18 #include "llvm/CodeGen/IntrinsicLowering.h"
  19 #include "llvm/Support/IRBuilder.h"
  20 #include "llvm/Support/ErrorHandling.h"
  21 #include "llvm/Target/TargetData.h"
  22 #include "llvm/ADT/SmallVector.h"
  23 using namespace llvm;
  24
  25 template <class ArgIt>
  26 static void EnsureFunctionExists(Module &M, const char *Name,
  27                                  ArgIt ArgBegin, ArgIt ArgEnd,
  28                                  const Type *RetTy) {
  29   // Insert a correctly-typed definition now.
  30   std::vector<const Type *> ParamTys;
  31   for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
  32     ParamTys.push_back(I->getType());
  33   M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
  34 }
  35
  36 static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
  37                                     const char *FName,
  38                                     const char *DName, const char *LDName) {
  39   // Insert definitions for all the floating point types.
  40   switch((int)Fn->arg_begin()->getType()->getTypeID()) {
  41   case Type::FloatTyID:
  42     EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(),
  43                          Type::FloatTy);
  44     break;
  45   case Type::DoubleTyID:
  46     EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(),
  47                          Type::DoubleTy);
  48     break;
  49   case Type::X86_FP80TyID:
  50   case Type::FP128TyID:
  51   case Type::PPC_FP128TyID:
  52     EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(),
  53                          Fn->arg_begin()->getType());
  54     break;
  55   }
  56 }
  57
  58 /// ReplaceCallWith - This function is used when we want to lower an intrinsic
  59 /// call to a call of an external function.  This handles hard cases such as
  60 /// when there was already a prototype for the external function, and if that
  61 /// prototype doesn't match the arguments we expect to pass in.
  62 template <class ArgIt>
  63 static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
  64                                  ArgIt ArgBegin, ArgIt ArgEnd,
  65                                  const Type *RetTy) {
  66   // If we haven't already looked up this function, check to see if the
  67   // program already contains a function with this name.
  68   Module *M = CI->getParent()->getParent()->getParent();
  69   // Get or insert the definition now.
  70   std::vector<const Type *> ParamTys;
  71   for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
  72     ParamTys.push_back((*I)->getType());
  73   Constant* FCache = M->getOrInsertFunction(NewFn,
  74                                   FunctionType::get(RetTy, ParamTys, false));
  75
  76   IRBuilder<> Builder(CI->getParent(), CI);
  77   SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
  78   CallInst *NewCI = Builder.CreateCall(FCache, Args.begin(), Args.end());
  79   NewCI->setName(CI->getName());
  80   if (!CI->use_empty())
  81     CI->replaceAllUsesWith(NewCI);
  82   return NewCI;
  83 }
  84
  85 void IntrinsicLowering::AddPrototypes(Module &M) {
  86   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
  87     if (I->isDeclaration() && !I->use_empty())
  88       switch (I->getIntrinsicID()) {
  89       default: break;
  90       case Intrinsic::setjmp:
  91         EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
  92                              Type::Int32Ty);
  93         break;
  94       case Intrinsic::longjmp:
  95         EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
  96                              Type::VoidTy);
  97         break;
  98       case Intrinsic::siglongjmp:
  99         EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
 100                              Type::VoidTy);
 101         break;
 102       case Intrinsic::memcpy:
 103         M.getOrInsertFunction("memcpy", PointerType::getUnqual(Type::Int8Ty),
 104                               PointerType::getUnqual(Type::Int8Ty),
 105                               PointerType::getUnqual(Type::Int8Ty),
 106                               TD.getIntPtrType(), (Type *)0);
 107         break;
 108       case Intrinsic::memmove:
 109         M.getOrInsertFunction("memmove", PointerType::getUnqual(Type::Int8Ty),
 110                               PointerType::getUnqual(Type::Int8Ty),
 111                               PointerType::getUnqual(Type::Int8Ty),
 112                               TD.getIntPtrType(), (Type *)0);
 113         break;
 114       case Intrinsic::memset:
 115         M.getOrInsertFunction("memset", PointerType::getUnqual(Type::Int8Ty),
 116                               PointerType::getUnqual(Type::Int8Ty),
 117                               Type::Int32Ty,
 118                               TD.getIntPtrType(), (Type *)0);
 119         break;
 120       case Intrinsic::sqrt:
 121         EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
 122         break;
 123       case Intrinsic::sin:
 124         EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl");
 125         break;
 126       case Intrinsic::cos:
 127         EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl");
 128         break;
 129       case Intrinsic::pow:
 130         EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl");
 131         break;
 132       case Intrinsic::log:
 133         EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl");
 134         break;
 135       case Intrinsic::log2:
 136         EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l");
 137         break;
 138       case Intrinsic::log10:
 139         EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l");
 140         break;
 141       case Intrinsic::exp:
 142         EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl");
 143         break;
 144       case Intrinsic::exp2:
 145         EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l");
 146         break;
 147       }
 148 }
 149
 150 /// LowerBSWAP - Emit the code to lower bswap of V before the specified
 151 /// instruction IP.
 152 static Value *LowerBSWAP(Value *V, Instruction *IP) {
 153   assert(V->getType()->isInteger() && "Can't bswap a non-integer type!");
 154
 155   unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
 156
 157   IRBuilder<> Builder(IP->getParent(), IP);
 158
 159   switch(BitSize) {
 160   default: assert(0 && "Unhandled type size of value to byteswap!");
 161   case 16: {
 162     Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
 163                                     "bswap.2");
 164     Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
 165                                      "bswap.1");
 166     V = Builder.CreateOr(Tmp1, Tmp2, "bswap.i16");
 167     break;
 168   }
 169   case 32: {
 170     Value *Tmp4 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
 171                                     "bswap.4");
 172     Value *Tmp3 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
 173                                     "bswap.3");
 174     Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
 175                                      "bswap.2");
 176     Value *Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24),
 177                                      "bswap.1");
 178     Tmp3 = Builder.CreateAnd(Tmp3, ConstantInt::get(Type::Int32Ty, 0xFF0000),
 179                              "bswap.and3");
 180     Tmp2 = Builder.CreateAnd(Tmp2, ConstantInt::get(Type::Int32Ty, 0xFF00),
 181                              "bswap.and2");
 182     Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1");
 183     Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2");
 184     V = Builder.CreateOr(Tmp4, Tmp2, "bswap.i32");
 185     break;
 186   }
 187   case 64: {
 188     Value *Tmp8 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 56),
 189                                     "bswap.8");
 190     Value *Tmp7 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 40),
 191                                     "bswap.7");
 192     Value *Tmp6 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
 193                                     "bswap.6");
 194     Value *Tmp5 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
 195                                     "bswap.5");
 196     Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
 197                                      "bswap.4");
 198     Value* Tmp3 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24),
 199                                      "bswap.3");
 200     Value* Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 40),
 201                                      "bswap.2");
 202     Value* Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 56),
 203                                      "bswap.1");
 204     Tmp7 = Builder.CreateAnd(Tmp7,
 205                              ConstantInt::get(Type::Int64Ty,
 206                                               0xFF000000000000ULL),
 207                              "bswap.and7");
 208     Tmp6 = Builder.CreateAnd(Tmp6,
 209                              ConstantInt::get(Type::Int64Ty,
 210                                               0xFF0000000000ULL),
 211                              "bswap.and6");
 212     Tmp5 = Builder.CreateAnd(Tmp5,
 213                              ConstantInt::get(Type::Int64Ty, 0xFF00000000ULL),
 214                              "bswap.and5");
 215     Tmp4 = Builder.CreateAnd(Tmp4,
 216                              ConstantInt::get(Type::Int64Ty, 0xFF000000ULL),
 217                              "bswap.and4");
 218     Tmp3 = Builder.CreateAnd(Tmp3,
 219                              ConstantInt::get(Type::Int64Ty, 0xFF0000ULL),
 220                              "bswap.and3");
 221     Tmp2 = Builder.CreateAnd(Tmp2,
 222                              ConstantInt::get(Type::Int64Ty, 0xFF00ULL),
 223                              "bswap.and2");
 224     Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1");
 225     Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2");
 226     Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or3");
 227     Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or4");
 228     Tmp8 = Builder.CreateOr(Tmp8, Tmp6, "bswap.or5");
 229     Tmp4 = Builder.CreateOr(Tmp4, Tmp2, "bswap.or6");
 230     V = Builder.CreateOr(Tmp8, Tmp4, "bswap.i64");
 231     break;
 232   }
 233   }
 234   return V;
 235 }
 236
 237 /// LowerCTPOP - Emit the code to lower ctpop of V before the specified
 238 /// instruction IP.
 239 static Value *LowerCTPOP(Value *V, Instruction *IP) {
 240   assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!");
 241
 242   static const uint64_t MaskValues[6] = {
 243     0x5555555555555555ULL, 0x3333333333333333ULL,
 244     0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
 245     0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
 246   };
 247
 248   IRBuilder<> Builder(IP->getParent(), IP);
 249
 250   unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
 251   unsigned WordSize = (BitSize + 63) / 64;
 252   Value *Count = ConstantInt::get(V->getType(), 0);
 253
 254   for (unsigned n = 0; n < WordSize; ++n) {
 255     Value *PartValue = V;
 256     for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
 257          i <<= 1, ++ct) {
 258       Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
 259       Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1");
 260       Value *VShift = Builder.CreateLShr(PartValue,
 261                                          ConstantInt::get(V->getType(), i),
 262                                          "ctpop.sh");
 263       Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2");
 264       PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step");
 265     }
 266     Count = Builder.CreateAdd(PartValue, Count, "ctpop.part");
 267     if (BitSize > 64) {
 268       V = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 64),
 269                              "ctpop.part.sh");
 270       BitSize -= 64;
 271     }
 272   }
 273
 274   return Count;
 275 }
 276
 277 /// LowerCTLZ - Emit the code to lower ctlz of V before the specified
 278 /// instruction IP.
 279 static Value *LowerCTLZ(Value *V, Instruction *IP) {
 280
 281   IRBuilder<> Builder(IP->getParent(), IP);
 282
 283   unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
 284   for (unsigned i = 1; i < BitSize; i <<= 1) {
 285     Value *ShVal = ConstantInt::get(V->getType(), i);
 286     ShVal = Builder.CreateLShr(V, ShVal, "ctlz.sh");
 287     V = Builder.CreateOr(V, ShVal, "ctlz.step");
 288   }
 289
 290   V = Builder.CreateNot(V);
 291   return LowerCTPOP(V, IP);
 292 }
 293
 294 /// Convert the llvm.part.select.iX.iY intrinsic. This intrinsic takes
 295 /// three integer arguments. The first argument is the Value from which the
 296 /// bits will be selected. It may be of any bit width. The second and third
 297 /// arguments specify a range of bits to select with the second argument
 298 /// specifying the low bit and the third argument specifying the high bit. Both
 299 /// must be type i32. The result is the corresponding selected bits from the
 300 /// Value in the same width as the Value (first argument). If the low bit index
 301 /// is higher than the high bit index then the inverse selection is done and
 302 /// the bits are returned in inverse order.
 303 /// @brief Lowering of llvm.part.select intrinsic.
 304 static Instruction *LowerPartSelect(CallInst *CI) {
 305   IRBuilder<> Builder(*CI->getParent()->getContext());
 306
 307   // Make sure we're dealing with a part select intrinsic here
 308   Function *F = CI->getCalledFunction();
 309   const FunctionType *FT = F->getFunctionType();
 310   if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
 311       FT->getNumParams() != 3 || !FT->getParamType(0)->isInteger() ||
 312       !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger())
 313     return CI;
 314
 315   // Get the intrinsic implementation function by converting all the . to _
 316   // in the intrinsic's function name and then reconstructing the function
 317   // declaration.
 318   std::string Name(F->getName());
 319   for (unsigned i = 4; i < Name.length(); ++i)
 320     if (Name[i] == '.')
 321       Name[i] = '_';
 322   Module* M = F->getParent();
 323   F = cast<Function>(M->getOrInsertFunction(Name, FT));
 324   F->setLinkage(GlobalValue::WeakAnyLinkage);
 325
 326   // If we haven't defined the impl function yet, do so now
 327   if (F->isDeclaration()) {
 328
 329     // Get the arguments to the function
 330     Function::arg_iterator args = F->arg_begin();
 331     Value* Val = args++; Val->setName("Val");
 332     Value* Lo = args++; Lo->setName("Lo");
 333     Value* Hi = args++; Hi->setName("High");
 334
 335     // We want to select a range of bits here such that [Hi, Lo] is shifted
 336     // down to the low bits. However, it is quite possible that Hi is smaller
 337     // than Lo in which case the bits have to be reversed.
 338
 339     // Create the blocks we will need for the two cases (forward, reverse)
 340     BasicBlock* CurBB   = BasicBlock::Create("entry", F);
 341     BasicBlock *RevSize = BasicBlock::Create("revsize", CurBB->getParent());
 342     BasicBlock *FwdSize = BasicBlock::Create("fwdsize", CurBB->getParent());
 343     BasicBlock *Compute = BasicBlock::Create("compute", CurBB->getParent());
 344     BasicBlock *Reverse = BasicBlock::Create("reverse", CurBB->getParent());
 345     BasicBlock *RsltBlk = BasicBlock::Create("result",  CurBB->getParent());
 346
 347     Builder.SetInsertPoint(CurBB);
 348
 349     // Cast Hi and Lo to the size of Val so the widths are all the same
 350     if (Hi->getType() != Val->getType())
 351       Hi = Builder.CreateIntCast(Hi, Val->getType(), /* isSigned */ false,
 352                                  "tmp");
 353     if (Lo->getType() != Val->getType())
 354       Lo = Builder.CreateIntCast(Lo, Val->getType(), /* isSigned */ false,
 355                                  "tmp");
 356
 357     // Compute a few things that both cases will need, up front.
 358     Constant* Zero = ConstantInt::get(Val->getType(), 0);
 359     Constant* One = ConstantInt::get(Val->getType(), 1);
 360     Constant* AllOnes = ConstantInt::getAllOnesValue(Val->getType());
 361
 362     // Compare the Hi and Lo bit positions. This is used to determine
 363     // which case we have (forward or reverse)
 364     Value *Cmp = Builder.CreateICmpULT(Hi, Lo, "less");
 365     Builder.CreateCondBr(Cmp, RevSize, FwdSize);
 366
 367     // First, compute the number of bits in the forward case.
 368     Builder.SetInsertPoint(FwdSize);
 369     Value* FBitSize = Builder.CreateSub(Hi, Lo, "fbits");
 370     Builder.CreateBr(Compute);
 371
 372     // Second, compute the number of bits in the reverse case.
 373     Builder.SetInsertPoint(RevSize);
 374     Value* RBitSize = Builder.CreateSub(Lo, Hi, "rbits");
 375     Builder.CreateBr(Compute);
 376
 377     // Now, compute the bit range. Start by getting the bitsize and the shift
 378     // amount (either Hi or Lo) from PHI nodes. Then we compute a mask for
 379     // the number of bits we want in the range. We shift the bits down to the
 380     // least significant bits, apply the mask to zero out unwanted high bits,
 381     // and we have computed the "forward" result. It may still need to be
 382     // reversed.
 383     Builder.SetInsertPoint(Compute);
 384
 385     // Get the BitSize from one of the two subtractions
 386     PHINode *BitSize = Builder.CreatePHI(Val->getType(), "bits");
 387     BitSize->reserveOperandSpace(2);
 388     BitSize->addIncoming(FBitSize, FwdSize);
 389     BitSize->addIncoming(RBitSize, RevSize);
 390
 391     // Get the ShiftAmount as the smaller of Hi/Lo
 392     PHINode *ShiftAmt = Builder.CreatePHI(Val->getType(), "shiftamt");
 393     ShiftAmt->reserveOperandSpace(2);
 394     ShiftAmt->addIncoming(Lo, FwdSize);
 395     ShiftAmt->addIncoming(Hi, RevSize);
 396
 397     // Increment the bit size
 398     Value *BitSizePlusOne = Builder.CreateAdd(BitSize, One, "bits");
 399
 400     // Create a Mask to zero out the high order bits.
 401     Value* Mask = Builder.CreateShl(AllOnes, BitSizePlusOne, "mask");
 402     Mask = Builder.CreateNot(Mask, "mask");
 403
 404     // Shift the bits down and apply the mask
 405     Value* FRes = Builder.CreateLShr(Val, ShiftAmt, "fres");
 406     FRes = Builder.CreateAnd(FRes, Mask, "fres");
 407     Builder.CreateCondBr(Cmp, Reverse, RsltBlk);
 408
 409     // In the Reverse block we have the mask already in FRes but we must reverse
 410     // it by shifting FRes bits right and putting them in RRes by shifting them
 411     // in from left.
 412     Builder.SetInsertPoint(Reverse);
 413
 414     // First set up our loop counters
 415     PHINode *Count = Builder.CreatePHI(Val->getType(), "count");
 416     Count->reserveOperandSpace(2);
 417     Count->addIncoming(BitSizePlusOne, Compute);
 418
 419     // Next, get the value that we are shifting.
 420     PHINode *BitsToShift = Builder.CreatePHI(Val->getType(), "val");
 421     BitsToShift->reserveOperandSpace(2);
 422     BitsToShift->addIncoming(FRes, Compute);
 423
 424     // Finally, get the result of the last computation
 425     PHINode *RRes = Builder.CreatePHI(Val->getType(), "rres");
 426     RRes->reserveOperandSpace(2);
 427     RRes->addIncoming(Zero, Compute);
 428
 429     // Decrement the counter
 430     Value *Decr = Builder.CreateSub(Count, One, "decr");
 431     Count->addIncoming(Decr, Reverse);
 432
 433     // Compute the Bit that we want to move
 434     Value *Bit = Builder.CreateAnd(BitsToShift, One, "bit");
 435
 436     // Compute the new value for next iteration.
 437     Value *NewVal = Builder.CreateLShr(BitsToShift, One, "rshift");
 438     BitsToShift->addIncoming(NewVal, Reverse);
 439
 440     // Shift the bit into the low bits of the result.
 441     Value *NewRes = Builder.CreateShl(RRes, One, "lshift");
 442     NewRes = Builder.CreateOr(NewRes, Bit, "addbit");
 443     RRes->addIncoming(NewRes, Reverse);
 444
 445     // Terminate loop if we've moved all the bits.
 446     Value *Cond = Builder.CreateICmpEQ(Decr, Zero, "cond");
 447     Builder.CreateCondBr(Cond, RsltBlk, Reverse);
 448
 449     // Finally, in the result block, select one of the two results with a PHI
 450     // node and return the result;
 451     Builder.SetInsertPoint(RsltBlk);
 452     PHINode *BitSelect = Builder.CreatePHI(Val->getType(), "part_select");
 453     BitSelect->reserveOperandSpace(2);
 454     BitSelect->addIncoming(FRes, Compute);
 455     BitSelect->addIncoming(NewRes, Reverse);
 456     Builder.CreateRet(BitSelect);
 457   }
 458
 459   // Return a call to the implementation function
 460   Builder.SetInsertPoint(CI->getParent(), CI);
 461   CallInst *NewCI = Builder.CreateCall3(F, CI->getOperand(1),
 462                                         CI->getOperand(2), CI->getOperand(3));
 463   NewCI->setName(CI->getName());
 464   return NewCI;
 465 }
 466
 467 /// Convert the llvm.part.set.iX.iY.iZ intrinsic. This intrinsic takes
 468 /// four integer arguments (iAny %Value, iAny %Replacement, i32 %Low, i32 %High)
 469 /// The first two arguments can be any bit width. The result is the same width
 470 /// as %Value. The operation replaces bits between %Low and %High with the value
 471 /// in %Replacement. If %Replacement is not the same width, it is truncated or
 472 /// zero extended as appropriate to fit the bits being replaced. If %Low is
 473 /// greater than %High then the inverse set of bits are replaced.
 474 /// @brief Lowering of llvm.bit.part.set intrinsic.
 475 static Instruction *LowerPartSet(CallInst *CI) {
 476   IRBuilder<> Builder(*CI->getParent()->getContext());
 477
 478   // Make sure we're dealing with a part select intrinsic here
 479   Function *F = CI->getCalledFunction();
 480   const FunctionType *FT = F->getFunctionType();
 481   if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
 482       FT->getNumParams() != 4 || !FT->getParamType(0)->isInteger() ||
 483       !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger() ||
 484       !FT->getParamType(3)->isInteger())
 485     return CI;
 486
 487   // Get the intrinsic implementation function by converting all the . to _
 488   // in the intrinsic's function name and then reconstructing the function
 489   // declaration.
 490   std::string Name(F->getName());
 491   for (unsigned i = 4; i < Name.length(); ++i)
 492     if (Name[i] == '.')
 493       Name[i] = '_';
 494   Module* M = F->getParent();
 495   F = cast<Function>(M->getOrInsertFunction(Name, FT));
 496   F->setLinkage(GlobalValue::WeakAnyLinkage);
 497
 498   // If we haven't defined the impl function yet, do so now
 499   if (F->isDeclaration()) {
 500     // Get the arguments for the function.
 501     Function::arg_iterator args = F->arg_begin();
 502     Value* Val = args++; Val->setName("Val");
 503     Value* Rep = args++; Rep->setName("Rep");
 504     Value* Lo  = args++; Lo->setName("Lo");
 505     Value* Hi  = args++; Hi->setName("Hi");
 506
 507     // Get some types we need
 508     const IntegerType* ValTy = cast<IntegerType>(Val->getType());
 509     const IntegerType* RepTy = cast<IntegerType>(Rep->getType());
 510     uint32_t RepBits = RepTy->getBitWidth();
 511
 512     // Constant Definitions
 513     ConstantInt* RepBitWidth = ConstantInt::get(Type::Int32Ty, RepBits);
 514     ConstantInt* RepMask = ConstantInt::getAllOnesValue(RepTy);
 515     ConstantInt* ValMask = ConstantInt::getAllOnesValue(ValTy);
 516     ConstantInt* One = ConstantInt::get(Type::Int32Ty, 1);
 517     ConstantInt* ValOne = ConstantInt::get(ValTy, 1);
 518     ConstantInt* Zero = ConstantInt::get(Type::Int32Ty, 0);
 519     ConstantInt* ValZero = ConstantInt::get(ValTy, 0);
 520
 521     // Basic blocks we fill in below.
 522     BasicBlock* entry = BasicBlock::Create("entry", F, 0);
 523     BasicBlock* large = BasicBlock::Create("large", F, 0);
 524     BasicBlock* small = BasicBlock::Create("small", F, 0);
 525     BasicBlock* reverse = BasicBlock::Create("reverse", F, 0);
 526     BasicBlock* result = BasicBlock::Create("result", F, 0);
 527
 528     // BASIC BLOCK: entry
 529     Builder.SetInsertPoint(entry);
 530     // First, get the number of bits that we're placing as an i32
 531     Value* is_forward = Builder.CreateICmpULT(Lo, Hi);
 532     Value* Hi_pn = Builder.CreateSelect(is_forward, Hi, Lo);
 533     Value* Lo_pn = Builder.CreateSelect(is_forward, Lo, Hi);
 534     Value* NumBits = Builder.CreateSub(Hi_pn, Lo_pn);
 535     NumBits = Builder.CreateAdd(NumBits, One);
 536     // Now, convert Lo and Hi to ValTy bit width
 537     Lo = Builder.CreateIntCast(Lo_pn, ValTy, /* isSigned */ false);
 538     // Determine if the replacement bits are larger than the number of bits we
 539     // are replacing and deal with it.
 540     Value* is_large = Builder.CreateICmpULT(NumBits, RepBitWidth);
 541     Builder.CreateCondBr(is_large, large, small);
 542
 543     // BASIC BLOCK: large
 544     Builder.SetInsertPoint(large);
 545     Value* MaskBits = Builder.CreateSub(RepBitWidth, NumBits);
 546     MaskBits = Builder.CreateIntCast(MaskBits, RepMask->getType(),
 547                                      /* isSigned */ false);
 548     Value* Mask1 = Builder.CreateLShr(RepMask, MaskBits);
 549     Value* Rep2 = Builder.CreateAnd(Mask1, Rep);
 550     Builder.CreateBr(small);
 551
 552     // BASIC BLOCK: small
 553     Builder.SetInsertPoint(small);
 554     PHINode* Rep3 = Builder.CreatePHI(RepTy);
 555     Rep3->reserveOperandSpace(2);
 556     Rep3->addIncoming(Rep2, large);
 557     Rep3->addIncoming(Rep, entry);
 558     Value* Rep4 = Builder.CreateIntCast(Rep3, ValTy, /* isSigned */ false);
 559     Builder.CreateCondBr(is_forward, result, reverse);
 560
 561     // BASIC BLOCK: reverse (reverses the bits of the replacement)
 562     Builder.SetInsertPoint(reverse);
 563     // Set up our loop counter as a PHI so we can decrement on each iteration.
 564     // We will loop for the number of bits in the replacement value.
 565     PHINode *Count = Builder.CreatePHI(Type::Int32Ty, "count");
 566     Count->reserveOperandSpace(2);
 567     Count->addIncoming(NumBits, small);
 568
 569     // Get the value that we are shifting bits out of as a PHI because
 570     // we'll change this with each iteration.
 571     PHINode *BitsToShift = Builder.CreatePHI(Val->getType(), "val");
 572     BitsToShift->reserveOperandSpace(2);
 573     BitsToShift->addIncoming(Rep4, small);
 574
 575     // Get the result of the last computation or zero on first iteration
 576     PHINode *RRes = Builder.CreatePHI(Val->getType(), "rres");
 577     RRes->reserveOperandSpace(2);
 578     RRes->addIncoming(ValZero, small);
 579
 580     // Decrement the loop counter by one
 581     Value *Decr = Builder.CreateSub(Count, One);
 582     Count->addIncoming(Decr, reverse);
 583
 584     // Get the bit that we want to move into the result
 585     Value *Bit = Builder.CreateAnd(BitsToShift, ValOne);
 586
 587     // Compute the new value of the bits to shift for the next iteration.
 588     Value *NewVal = Builder.CreateLShr(BitsToShift, ValOne);
 589     BitsToShift->addIncoming(NewVal, reverse);
 590
 591     // Shift the bit we extracted into the low bit of the result.
 592     Value *NewRes = Builder.CreateShl(RRes, ValOne);
 593     NewRes = Builder.CreateOr(NewRes, Bit);
 594     RRes->addIncoming(NewRes, reverse);
 595
 596     // Terminate loop if we've moved all the bits.
 597     Value *Cond = Builder.CreateICmpEQ(Decr, Zero);
 598     Builder.CreateCondBr(Cond, result, reverse);
 599
 600     // BASIC BLOCK: result
 601     Builder.SetInsertPoint(result);
 602     PHINode *Rplcmnt = Builder.CreatePHI(Val->getType());
 603     Rplcmnt->reserveOperandSpace(2);
 604     Rplcmnt->addIncoming(NewRes, reverse);
 605     Rplcmnt->addIncoming(Rep4, small);
 606     Value* t0   = Builder.CreateIntCast(NumBits, ValTy, /* isSigned */ false);
 607     Value* t1   = Builder.CreateShl(ValMask, Lo);
 608     Value* t2   = Builder.CreateNot(t1);
 609     Value* t3   = Builder.CreateShl(t1, t0);
 610     Value* t4   = Builder.CreateOr(t2, t3);
 611     Value* t5   = Builder.CreateAnd(t4, Val);
 612     Value* t6   = Builder.CreateShl(Rplcmnt, Lo);
 613     Value* Rslt = Builder.CreateOr(t5, t6, "part_set");
 614     Builder.CreateRet(Rslt);
 615   }
 616
 617   // Return a call to the implementation function
 618   Builder.SetInsertPoint(CI->getParent(), CI);
 619   CallInst *NewCI = Builder.CreateCall4(F, CI->getOperand(1),
 620                                         CI->getOperand(2), CI->getOperand(3),
 621                                         CI->getOperand(4));
 622   NewCI->setName(CI->getName());
 623   return NewCI;
 624 }
 625
 626 static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
 627                                        const char *Dname,
 628                                        const char *LDname) {
 629   switch (CI->getOperand(1)->getType()->getTypeID()) {
 630   default: LLVM_UNREACHABLE( "Invalid type in intrinsic");
 631   case Type::FloatTyID:
 632     ReplaceCallWith(Fname, CI, CI->op_begin() + 1, CI->op_end(),
 633                   Type::FloatTy);
 634     break;
 635   case Type::DoubleTyID:
 636     ReplaceCallWith(Dname, CI, CI->op_begin() + 1, CI->op_end(),
 637                   Type::DoubleTy);
 638     break;
 639   case Type::X86_FP80TyID:
 640   case Type::FP128TyID:
 641   case Type::PPC_FP128TyID:
 642     ReplaceCallWith(LDname, CI, CI->op_begin() + 1, CI->op_end(),
 643                   CI->getOperand(1)->getType());
 644     break;
 645   }
 646 }
 647
 648 void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
 649   IRBuilder<> Builder(CI->getParent(), CI);
 650
 651   Function *Callee = CI->getCalledFunction();
 652   assert(Callee && "Cannot lower an indirect call!");
 653
 654   switch (Callee->getIntrinsicID()) {
 655   case Intrinsic::not_intrinsic:
 656     llvm_report_error("Cannot lower a call to a non-intrinsic function '"+
 657                       Callee->getName() + "'!");
 658   default:
 659     llvm_report_error("Code generator does not support intrinsic function '"+
 660                       Callee->getName()+"'!");
 661
 662     // The setjmp/longjmp intrinsics should only exist in the code if it was
 663     // never optimized (ie, right out of the CFE), or if it has been hacked on
 664     // by the lowerinvoke pass.  In both cases, the right thing to do is to
 665     // convert the call to an explicit setjmp or longjmp call.
 666   case Intrinsic::setjmp: {
 667     Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(),
 668                                Type::Int32Ty);
 669     if (CI->getType() != Type::VoidTy)
 670       CI->replaceAllUsesWith(V);
 671     break;
 672   }
 673   case Intrinsic::sigsetjmp:
 674      if (CI->getType() != Type::VoidTy)
 675        CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
 676      break;
 677
 678   case Intrinsic::longjmp: {
 679     ReplaceCallWith("longjmp", CI, CI->op_begin() + 1, CI->op_end(),
 680                     Type::VoidTy);
 681     break;
 682   }
 683
 684   case Intrinsic::siglongjmp: {
 685     // Insert the call to abort
 686     ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(),
 687                     Type::VoidTy);
 688     break;
 689   }
 690   case Intrinsic::ctpop:
 691     CI->replaceAllUsesWith(LowerCTPOP(CI->getOperand(1), CI));
 692     break;
 693
 694   case Intrinsic::bswap:
 695     CI->replaceAllUsesWith(LowerBSWAP(CI->getOperand(1), CI));
 696     break;
 697
 698   case Intrinsic::ctlz:
 699     CI->replaceAllUsesWith(LowerCTLZ(CI->getOperand(1), CI));
 700     break;
 701
 702   case Intrinsic::cttz: {
 703     // cttz(x) -> ctpop(~X & (X-1))
 704     Value *Src = CI->getOperand(1);
 705     Value *NotSrc = Builder.CreateNot(Src);
 706     NotSrc->setName(Src->getName() + ".not");
 707     Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
 708     SrcM1 = Builder.CreateSub(Src, SrcM1);
 709     Src = LowerCTPOP(Builder.CreateAnd(NotSrc, SrcM1), CI);
 710     CI->replaceAllUsesWith(Src);
 711     break;
 712   }
 713
 714   case Intrinsic::part_select:
 715     CI->replaceAllUsesWith(LowerPartSelect(CI));
 716     break;
 717
 718   case Intrinsic::part_set:
 719     CI->replaceAllUsesWith(LowerPartSet(CI));
 720     break;
 721
 722   case Intrinsic::stacksave:
 723   case Intrinsic::stackrestore: {
 724     if (!Warned)
 725       cerr << "WARNING: this target does not support the llvm.stack"
 726            << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
 727                "save" : "restore") << " intrinsic.\n";
 728     Warned = true;
 729     if (Callee->getIntrinsicID() == Intrinsic::stacksave)
 730       CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
 731     break;
 732   }
 733
 734   case Intrinsic::returnaddress:
 735   case Intrinsic::frameaddress:
 736     cerr << "WARNING: this target does not support the llvm."
 737          << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
 738              "return" : "frame") << "address intrinsic.\n";
 739     CI->replaceAllUsesWith(ConstantPointerNull::get(
 740                                             cast<PointerType>(CI->getType())));
 741     break;
 742
 743   case Intrinsic::prefetch:
 744     break;    // Simply strip out prefetches on unsupported architectures
 745
 746   case Intrinsic::pcmarker:
 747     break;    // Simply strip out pcmarker on unsupported architectures
 748   case Intrinsic::readcyclecounter: {
 749     cerr << "WARNING: this target does not support the llvm.readcyclecoun"
 750          << "ter intrinsic.  It is being lowered to a constant 0\n";
 751     CI->replaceAllUsesWith(ConstantInt::get(Type::Int64Ty, 0));
 752     break;
 753   }
 754
 755   case Intrinsic::dbg_stoppoint:
 756   case Intrinsic::dbg_region_start:
 757   case Intrinsic::dbg_region_end:
 758   case Intrinsic::dbg_func_start:
 759   case Intrinsic::dbg_declare:
 760     break;    // Simply strip out debugging intrinsics
 761
 762   case Intrinsic::eh_exception:
 763   case Intrinsic::eh_selector_i32:
 764   case Intrinsic::eh_selector_i64:
 765     CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
 766     break;
 767
 768   case Intrinsic::eh_typeid_for_i32:
 769   case Intrinsic::eh_typeid_for_i64:
 770     // Return something different to eh_selector.
 771     CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
 772     break;
 773
 774   case Intrinsic::var_annotation:
 775     break;   // Strip out annotate intrinsic
 776
 777   case Intrinsic::memcpy: {
 778     const IntegerType *IntPtr = TD.getIntPtrType();
 779     Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
 780                                         /* isSigned */ false);
 781     Value *Ops[3];
 782     Ops[0] = CI->getOperand(1);
 783     Ops[1] = CI->getOperand(2);
 784     Ops[2] = Size;
 785     ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType());
 786     break;
 787   }
 788   case Intrinsic::memmove: {
 789     const IntegerType *IntPtr = TD.getIntPtrType();
 790     Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
 791                                         /* isSigned */ false);
 792     Value *Ops[3];
 793     Ops[0] = CI->getOperand(1);
 794     Ops[1] = CI->getOperand(2);
 795     Ops[2] = Size;
 796     ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType());
 797     break;
 798   }
 799   case Intrinsic::memset: {
 800     const IntegerType *IntPtr = TD.getIntPtrType();
 801     Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
 802                                         /* isSigned */ false);
 803     Value *Ops[3];
 804     Ops[0] = CI->getOperand(1);
 805     // Extend the amount to i32.
 806     Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::Int32Ty,
 807                                    /* isSigned */ false);
 808     Ops[2] = Size;
 809     ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType());
 810     break;
 811   }
 812   case Intrinsic::sqrt: {
 813     ReplaceFPIntrinsicWithCall(CI, "sqrtf", "sqrt", "sqrtl");
 814     break;
 815   }
 816   case Intrinsic::log: {
 817     ReplaceFPIntrinsicWithCall(CI, "logf", "log", "logl");
 818     break;
 819   }
 820   case Intrinsic::log2: {
 821     ReplaceFPIntrinsicWithCall(CI, "log2f", "log2", "log2l");
 822     break;
 823   }
 824   case Intrinsic::log10: {
 825     ReplaceFPIntrinsicWithCall(CI, "log10f", "log10", "log10l");
 826     break;
 827   }
 828   case Intrinsic::exp: {
 829     ReplaceFPIntrinsicWithCall(CI, "expf", "exp", "expl");
 830     break;
 831   }
 832   case Intrinsic::exp2: {
 833     ReplaceFPIntrinsicWithCall(CI, "exp2f", "exp2", "exp2l");
 834     break;
 835   }
 836   case Intrinsic::pow: {
 837     ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl");
 838     break;
 839   }
 840   case Intrinsic::flt_rounds:
 841      // Lower to "round to the nearest"
 842      if (CI->getType() != Type::VoidTy)
 843        CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
 844      break;
 845   }
 846
 847   assert(CI->use_empty() &&
 848          "Lowering should have eliminated any uses of the intrinsic call!");
 849   CI->eraseFromParent();
 850 }