1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/IR/CFG.h"
18 #include "llvm/IR/CallSite.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/Regex.h"
34 // Upgrade the declarations of the SSE4.1 functions whose arguments have
35 // changed their type from v4f32 to v2i64.
36 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
38 // Check whether this is an old version of the function, which received
40 Type *Arg0Type = F->getFunctionType()->getParamType(0);
41 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
44 // Yes, it's old, replace it with new version.
45 F->setName(F->getName() + ".old");
46 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
50 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
51 // arguments have changed their type from i32 to i8.
52 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
54 // Check that the last argument is an i32.
55 Type *LastArgType = F->getFunctionType()->getParamType(
56 F->getFunctionType()->getNumParams() - 1);
57 if (!LastArgType->isIntegerTy(32))
60 // Move this function aside and map down.
61 F->setName(F->getName() + ".old");
62 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
66 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
67 assert(F && "Illegal to upgrade a non-existent Function.");
69 // Quickly eliminate it, if it's not a candidate.
70 StringRef Name = F->getName();
71 if (Name.size() <= 8 || !Name.startswith("llvm."))
73 Name = Name.substr(5); // Strip off "llvm."
78 if (Name.startswith("arm.neon.vclz")) {
80 F->arg_begin()->getType(),
81 Type::getInt1Ty(F->getContext())
83 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
84 // the end of the name. Change name from llvm.arm.neon.vclz.* to
86 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
87 NewFn = Function::Create(fType, F->getLinkage(),
88 "llvm.ctlz." + Name.substr(14), F->getParent());
91 if (Name.startswith("arm.neon.vcnt")) {
92 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
93 F->arg_begin()->getType());
96 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
97 if (vldRegex.match(Name)) {
98 auto fArgs = F->getFunctionType()->params();
99 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
100 // Can't use Intrinsic::getDeclaration here as the return types might
101 // then only be structurally equal.
102 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
103 NewFn = Function::Create(fType, F->getLinkage(),
104 "llvm." + Name + ".p0i8", F->getParent());
107 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
108 if (vstRegex.match(Name)) {
109 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
110 Intrinsic::arm_neon_vst2,
111 Intrinsic::arm_neon_vst3,
112 Intrinsic::arm_neon_vst4};
114 static const Intrinsic::ID StoreLaneInts[] = {
115 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
116 Intrinsic::arm_neon_vst4lane
119 auto fArgs = F->getFunctionType()->params();
120 Type *Tys[] = {fArgs[0], fArgs[1]};
121 if (Name.find("lane") == StringRef::npos)
122 NewFn = Intrinsic::getDeclaration(F->getParent(),
123 StoreInts[fArgs.size() - 3], Tys);
125 NewFn = Intrinsic::getDeclaration(F->getParent(),
126 StoreLaneInts[fArgs.size() - 5], Tys);
133 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
134 F->setName(Name + ".old");
135 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
136 F->arg_begin()->getType());
139 if (Name.startswith("cttz.") && F->arg_size() == 1) {
140 F->setName(Name + ".old");
141 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
142 F->arg_begin()->getType());
149 // We only need to change the name to match the mangling including the
151 if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
152 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
153 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
154 F->setName(Name + ".old");
155 NewFn = Intrinsic::getDeclaration(F->getParent(),
156 Intrinsic::objectsize, Tys);
163 if (Name.startswith("x86.sse2.pcmpeq.") ||
164 Name.startswith("x86.sse2.pcmpgt.") ||
165 Name.startswith("x86.avx2.pcmpeq.") ||
166 Name.startswith("x86.avx2.pcmpgt.") ||
167 Name.startswith("x86.avx2.vbroadcast") ||
168 Name.startswith("x86.avx2.pbroadcast") ||
169 Name.startswith("x86.avx.vpermil.") ||
170 Name.startswith("x86.sse41.pmovsx") ||
171 Name == "x86.avx.vinsertf128.pd.256" ||
172 Name == "x86.avx.vinsertf128.ps.256" ||
173 Name == "x86.avx.vinsertf128.si.256" ||
174 Name == "x86.avx2.vinserti128" ||
175 Name == "x86.avx.vextractf128.pd.256" ||
176 Name == "x86.avx.vextractf128.ps.256" ||
177 Name == "x86.avx.vextractf128.si.256" ||
178 Name == "x86.avx2.vextracti128" ||
179 Name == "x86.avx.movnt.dq.256" ||
180 Name == "x86.avx.movnt.pd.256" ||
181 Name == "x86.avx.movnt.ps.256" ||
182 Name == "x86.sse42.crc32.64.8" ||
183 Name == "x86.avx.vbroadcast.ss" ||
184 Name == "x86.avx.vbroadcast.ss.256" ||
185 Name == "x86.avx.vbroadcast.sd.256" ||
186 Name == "x86.sse2.psll.dq" ||
187 Name == "x86.sse2.psrl.dq" ||
188 Name == "x86.avx2.psll.dq" ||
189 Name == "x86.avx2.psrl.dq" ||
190 Name == "x86.sse2.psll.dq.bs" ||
191 Name == "x86.sse2.psrl.dq.bs" ||
192 Name == "x86.avx2.psll.dq.bs" ||
193 Name == "x86.avx2.psrl.dq.bs" ||
194 Name == "x86.sse41.pblendw" ||
195 Name == "x86.sse41.blendpd" ||
196 Name == "x86.sse41.blendps" ||
197 Name == "x86.avx.blend.pd.256" ||
198 Name == "x86.avx.blend.ps.256" ||
199 Name == "x86.avx2.pblendw" ||
200 Name == "x86.avx2.pblendd.128" ||
201 Name == "x86.avx2.pblendd.256" ||
202 Name == "x86.avx2.vbroadcasti128" ||
203 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
207 // SSE4.1 ptest functions may have an old signature.
208 if (Name.startswith("x86.sse41.ptest")) {
209 if (Name == "x86.sse41.ptestc")
210 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
211 if (Name == "x86.sse41.ptestz")
212 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
213 if (Name == "x86.sse41.ptestnzc")
214 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
216 // Several blend and other instructions with masks used the wrong number of
218 if (Name == "x86.sse41.insertps")
219 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
221 if (Name == "x86.sse41.dppd")
222 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
224 if (Name == "x86.sse41.dpps")
225 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
227 if (Name == "x86.sse41.mpsadbw")
228 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
230 if (Name == "x86.avx.dp.ps.256")
231 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
233 if (Name == "x86.avx2.mpsadbw")
234 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
237 // frcz.ss/sd may need to have an argument dropped
238 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
239 F->setName(Name + ".old");
240 NewFn = Intrinsic::getDeclaration(F->getParent(),
241 Intrinsic::x86_xop_vfrcz_ss);
244 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
245 F->setName(Name + ".old");
246 NewFn = Intrinsic::getDeclaration(F->getParent(),
247 Intrinsic::x86_xop_vfrcz_sd);
250 // Fix the FMA4 intrinsics to remove the 4
251 if (Name.startswith("x86.fma4.")) {
252 F->setName("llvm.x86.fma" + Name.substr(8));
260 // This may not belong here. This function is effectively being overloaded
261 // to both detect an intrinsic which needs upgrading, and to provide the
262 // upgraded form of the intrinsic. We should perhaps have two separate
263 // functions for this.
267 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
269 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
270 assert(F != NewFn && "Intrinsic function upgraded to the same function");
272 // Upgrade intrinsic attributes. This does not change the function.
275 if (Intrinsic::ID id = F->getIntrinsicID())
276 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
280 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
281 // Nothing to do yet.
285 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
287 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
288 Value *Op, unsigned NumLanes,
290 // Each lane is 16 bytes.
291 unsigned NumElts = NumLanes * 16;
293 // Bitcast from a 64-bit element type to a byte element type.
294 Op = Builder.CreateBitCast(Op,
295 VectorType::get(Type::getInt8Ty(C), NumElts),
297 // We'll be shuffling in zeroes.
298 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
300 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
301 // we'll just return the zero vector.
303 SmallVector<Constant*, 32> Idxs;
304 // 256-bit version is split into two 16-byte lanes.
305 for (unsigned l = 0; l != NumElts; l += 16)
306 for (unsigned i = 0; i != 16; ++i) {
307 unsigned Idx = NumElts + i - Shift;
309 Idx -= NumElts - 16; // end of lane, switch operand.
310 Idxs.push_back(Builder.getInt32(Idx + l));
313 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs));
316 // Bitcast back to a 64-bit element type.
317 return Builder.CreateBitCast(Res,
318 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
322 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
324 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
325 Value *Op, unsigned NumLanes,
327 // Each lane is 16 bytes.
328 unsigned NumElts = NumLanes * 16;
330 // Bitcast from a 64-bit element type to a byte element type.
331 Op = Builder.CreateBitCast(Op,
332 VectorType::get(Type::getInt8Ty(C), NumElts),
334 // We'll be shuffling in zeroes.
335 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
337 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
338 // we'll just return the zero vector.
340 SmallVector<Constant*, 32> Idxs;
341 // 256-bit version is split into two 16-byte lanes.
342 for (unsigned l = 0; l != NumElts; l += 16)
343 for (unsigned i = 0; i != 16; ++i) {
344 unsigned Idx = i + Shift;
346 Idx += NumElts - 16; // end of lane, switch operand.
347 Idxs.push_back(Builder.getInt32(Idx + l));
350 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs));
353 // Bitcast back to a 64-bit element type.
354 return Builder.CreateBitCast(Res,
355 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
359 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
360 // upgraded intrinsic. All argument and return casting must be provided in
361 // order to seamlessly integrate with existing context.
362 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
363 Function *F = CI->getCalledFunction();
364 LLVMContext &C = CI->getContext();
365 IRBuilder<> Builder(C);
366 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
368 assert(F && "Intrinsic call is not direct?");
371 // Get the Function's name.
372 StringRef Name = F->getName();
375 // Upgrade packed integer vector compares intrinsics to compare instructions
376 if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
377 Name.startswith("llvm.x86.avx2.pcmpeq.")) {
378 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
380 // need to sign extend since icmp returns vector of i1
381 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
382 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
383 Name.startswith("llvm.x86.avx2.pcmpgt.")) {
384 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
386 // need to sign extend since icmp returns vector of i1
387 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
388 } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
389 Name == "llvm.x86.avx.movnt.ps.256" ||
390 Name == "llvm.x86.avx.movnt.pd.256") {
391 IRBuilder<> Builder(C);
392 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
394 Module *M = F->getParent();
395 SmallVector<Metadata *, 1> Elts;
397 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
398 MDNode *Node = MDNode::get(C, Elts);
400 Value *Arg0 = CI->getArgOperand(0);
401 Value *Arg1 = CI->getArgOperand(1);
403 // Convert the type of the pointer to a pointer to the stored type.
404 Value *BC = Builder.CreateBitCast(Arg0,
405 PointerType::getUnqual(Arg1->getType()),
407 StoreInst *SI = Builder.CreateStore(Arg1, BC);
408 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
409 SI->setAlignment(32);
412 CI->eraseFromParent();
414 } else if (Name.startswith("llvm.x86.xop.vpcom")) {
416 if (Name.endswith("ub"))
417 intID = Intrinsic::x86_xop_vpcomub;
418 else if (Name.endswith("uw"))
419 intID = Intrinsic::x86_xop_vpcomuw;
420 else if (Name.endswith("ud"))
421 intID = Intrinsic::x86_xop_vpcomud;
422 else if (Name.endswith("uq"))
423 intID = Intrinsic::x86_xop_vpcomuq;
424 else if (Name.endswith("b"))
425 intID = Intrinsic::x86_xop_vpcomb;
426 else if (Name.endswith("w"))
427 intID = Intrinsic::x86_xop_vpcomw;
428 else if (Name.endswith("d"))
429 intID = Intrinsic::x86_xop_vpcomd;
430 else if (Name.endswith("q"))
431 intID = Intrinsic::x86_xop_vpcomq;
433 llvm_unreachable("Unknown suffix");
435 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
437 if (Name.startswith("lt"))
439 else if (Name.startswith("le"))
441 else if (Name.startswith("gt"))
443 else if (Name.startswith("ge"))
445 else if (Name.startswith("eq"))
447 else if (Name.startswith("ne"))
449 else if (Name.startswith("false"))
451 else if (Name.startswith("true"))
454 llvm_unreachable("Unknown condition");
456 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
458 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
459 Builder.getInt8(Imm)});
460 } else if (Name == "llvm.x86.sse42.crc32.64.8") {
461 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
462 Intrinsic::x86_sse42_crc32_32_8);
463 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
464 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
465 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
466 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
467 // Replace broadcasts with a series of insertelements.
468 Type *VecTy = CI->getType();
469 Type *EltTy = VecTy->getVectorElementType();
470 unsigned EltNum = VecTy->getVectorNumElements();
471 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
472 EltTy->getPointerTo());
473 Value *Load = Builder.CreateLoad(EltTy, Cast);
474 Type *I32Ty = Type::getInt32Ty(C);
475 Rep = UndefValue::get(VecTy);
476 for (unsigned I = 0; I < EltNum; ++I)
477 Rep = Builder.CreateInsertElement(Rep, Load,
478 ConstantInt::get(I32Ty, I));
479 } else if (Name.startswith("llvm.x86.sse41.pmovsx")) {
480 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
481 VectorType *DstTy = cast<VectorType>(CI->getType());
482 unsigned NumDstElts = DstTy->getNumElements();
484 // Extract a subvector of the first NumDstElts lanes and sign extend.
485 SmallVector<int, 8> ShuffleMask;
486 for (int i = 0; i != (int)NumDstElts; ++i)
487 ShuffleMask.push_back(i);
489 Value *SV = Builder.CreateShuffleVector(
490 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
491 Rep = Builder.CreateSExt(SV, DstTy);
492 } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
493 // Replace vbroadcasts with a vector shuffle.
494 Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
495 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
496 PointerType::getUnqual(VT));
497 Value *Load = Builder.CreateLoad(VT, Op);
498 const int Idxs[4] = { 0, 1, 0, 1 };
499 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
501 } else if (Name.startswith("llvm.x86.avx2.pbroadcast") ||
502 Name.startswith("llvm.x86.avx2.vbroadcast")) {
503 // Replace vp?broadcasts with a vector shuffle.
504 Value *Op = CI->getArgOperand(0);
505 unsigned NumElts = CI->getType()->getVectorNumElements();
506 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
507 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
508 Constant::getNullValue(MaskTy));
509 } else if (Name == "llvm.x86.sse2.psll.dq") {
510 // 128-bit shift left specified in bits.
511 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
512 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
513 Shift / 8); // Shift is in bits.
514 } else if (Name == "llvm.x86.sse2.psrl.dq") {
515 // 128-bit shift right specified in bits.
516 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
517 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
518 Shift / 8); // Shift is in bits.
519 } else if (Name == "llvm.x86.avx2.psll.dq") {
520 // 256-bit shift left specified in bits.
521 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
522 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
523 Shift / 8); // Shift is in bits.
524 } else if (Name == "llvm.x86.avx2.psrl.dq") {
525 // 256-bit shift right specified in bits.
526 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
527 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
528 Shift / 8); // Shift is in bits.
529 } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
530 // 128-bit shift left specified in bytes.
531 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
532 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
534 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
535 // 128-bit shift right specified in bytes.
536 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
537 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
539 } else if (Name == "llvm.x86.avx2.psll.dq.bs") {
540 // 256-bit shift left specified in bytes.
541 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
542 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
544 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
545 // 256-bit shift right specified in bytes.
546 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
547 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
549 } else if (Name == "llvm.x86.sse41.pblendw" ||
550 Name == "llvm.x86.sse41.blendpd" ||
551 Name == "llvm.x86.sse41.blendps" ||
552 Name == "llvm.x86.avx.blend.pd.256" ||
553 Name == "llvm.x86.avx.blend.ps.256" ||
554 Name == "llvm.x86.avx2.pblendw" ||
555 Name == "llvm.x86.avx2.pblendd.128" ||
556 Name == "llvm.x86.avx2.pblendd.256") {
557 Value *Op0 = CI->getArgOperand(0);
558 Value *Op1 = CI->getArgOperand(1);
559 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
560 VectorType *VecTy = cast<VectorType>(CI->getType());
561 unsigned NumElts = VecTy->getNumElements();
563 SmallVector<Constant*, 16> Idxs;
564 for (unsigned i = 0; i != NumElts; ++i) {
565 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
566 Idxs.push_back(Builder.getInt32(Idx));
569 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
570 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
571 Name == "llvm.x86.avx.vinsertf128.ps.256" ||
572 Name == "llvm.x86.avx.vinsertf128.si.256" ||
573 Name == "llvm.x86.avx2.vinserti128") {
574 Value *Op0 = CI->getArgOperand(0);
575 Value *Op1 = CI->getArgOperand(1);
576 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
577 VectorType *VecTy = cast<VectorType>(CI->getType());
578 unsigned NumElts = VecTy->getNumElements();
580 // Mask off the high bits of the immediate value; hardware ignores those.
583 // Extend the second operand into a vector that is twice as big.
584 Value *UndefV = UndefValue::get(Op1->getType());
585 SmallVector<Constant*, 8> Idxs;
586 for (unsigned i = 0; i != NumElts; ++i) {
587 Idxs.push_back(Builder.getInt32(i));
589 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs));
591 // Insert the second operand into the first operand.
593 // Note that there is no guarantee that instruction lowering will actually
594 // produce a vinsertf128 instruction for the created shuffles. In
595 // particular, the 0 immediate case involves no lane changes, so it can
596 // be handled as a blend.
598 // Example of shuffle mask for 32-bit elements:
599 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
600 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
602 SmallVector<Constant*, 8> Idxs2;
603 // The low half of the result is either the low half of the 1st operand
604 // or the low half of the 2nd operand (the inserted vector).
605 for (unsigned i = 0; i != NumElts / 2; ++i) {
606 unsigned Idx = Imm ? i : (i + NumElts);
607 Idxs2.push_back(Builder.getInt32(Idx));
609 // The high half of the result is either the low half of the 2nd operand
610 // (the inserted vector) or the high half of the 1st operand.
611 for (unsigned i = NumElts / 2; i != NumElts; ++i) {
612 unsigned Idx = Imm ? (i + NumElts / 2) : i;
613 Idxs2.push_back(Builder.getInt32(Idx));
615 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
616 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
617 Name == "llvm.x86.avx.vextractf128.ps.256" ||
618 Name == "llvm.x86.avx.vextractf128.si.256" ||
619 Name == "llvm.x86.avx2.vextracti128") {
620 Value *Op0 = CI->getArgOperand(0);
621 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
622 VectorType *VecTy = cast<VectorType>(CI->getType());
623 unsigned NumElts = VecTy->getNumElements();
625 // Mask off the high bits of the immediate value; hardware ignores those.
628 // Get indexes for either the high half or low half of the input vector.
629 SmallVector<Constant*, 4> Idxs(NumElts);
630 for (unsigned i = 0; i != NumElts; ++i) {
631 unsigned Idx = Imm ? (i + NumElts) : i;
632 Idxs[i] = Builder.getInt32(Idx);
635 Value *UndefV = UndefValue::get(Op0->getType());
636 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs));
638 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
639 if (Name == "llvm.x86.avx.vpermil.pd.256")
641 else if (Name == "llvm.x86.avx.vpermil.pd")
643 else if (Name == "llvm.x86.avx.vpermil.ps.256")
645 else if (Name == "llvm.x86.avx.vpermil.ps")
648 if (PD256 || PD128 || PS256 || PS128) {
649 Value *Op0 = CI->getArgOperand(0);
650 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
651 SmallVector<Constant*, 8> Idxs;
654 for (unsigned i = 0; i != 2; ++i)
655 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
657 for (unsigned l = 0; l != 4; l+=2)
658 for (unsigned i = 0; i != 2; ++i)
659 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
661 for (unsigned i = 0; i != 4; ++i)
662 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
664 for (unsigned l = 0; l != 8; l+=4)
665 for (unsigned i = 0; i != 4; ++i)
666 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
668 llvm_unreachable("Unexpected function");
670 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
672 llvm_unreachable("Unknown function for CallInst upgrade.");
676 CI->replaceAllUsesWith(Rep);
677 CI->eraseFromParent();
681 std::string Name = CI->getName();
683 CI->setName(Name + ".old");
685 switch (NewFn->getIntrinsicID()) {
687 llvm_unreachable("Unknown function for CallInst upgrade.");
689 case Intrinsic::arm_neon_vld1:
690 case Intrinsic::arm_neon_vld2:
691 case Intrinsic::arm_neon_vld3:
692 case Intrinsic::arm_neon_vld4:
693 case Intrinsic::arm_neon_vld2lane:
694 case Intrinsic::arm_neon_vld3lane:
695 case Intrinsic::arm_neon_vld4lane:
696 case Intrinsic::arm_neon_vst1:
697 case Intrinsic::arm_neon_vst2:
698 case Intrinsic::arm_neon_vst3:
699 case Intrinsic::arm_neon_vst4:
700 case Intrinsic::arm_neon_vst2lane:
701 case Intrinsic::arm_neon_vst3lane:
702 case Intrinsic::arm_neon_vst4lane: {
703 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
704 CI->arg_operands().end());
705 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
706 CI->eraseFromParent();
710 case Intrinsic::ctlz:
711 case Intrinsic::cttz:
712 assert(CI->getNumArgOperands() == 1 &&
713 "Mismatch between function args and call args");
714 CI->replaceAllUsesWith(Builder.CreateCall(
715 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name));
716 CI->eraseFromParent();
719 case Intrinsic::objectsize:
720 CI->replaceAllUsesWith(Builder.CreateCall(
721 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name));
722 CI->eraseFromParent();
725 case Intrinsic::ctpop: {
726 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
727 CI->eraseFromParent();
731 case Intrinsic::x86_xop_vfrcz_ss:
732 case Intrinsic::x86_xop_vfrcz_sd:
733 CI->replaceAllUsesWith(
734 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
735 CI->eraseFromParent();
738 case Intrinsic::x86_sse41_ptestc:
739 case Intrinsic::x86_sse41_ptestz:
740 case Intrinsic::x86_sse41_ptestnzc: {
741 // The arguments for these intrinsics used to be v4f32, and changed
742 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
743 // So, the only thing required is a bitcast for both arguments.
744 // First, check the arguments have the old type.
745 Value *Arg0 = CI->getArgOperand(0);
746 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
749 // Old intrinsic, add bitcasts
750 Value *Arg1 = CI->getArgOperand(1);
752 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
754 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
755 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
757 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name);
758 CI->replaceAllUsesWith(NewCall);
759 CI->eraseFromParent();
763 case Intrinsic::x86_sse41_insertps:
764 case Intrinsic::x86_sse41_dppd:
765 case Intrinsic::x86_sse41_dpps:
766 case Intrinsic::x86_sse41_mpsadbw:
767 case Intrinsic::x86_avx_dp_ps_256:
768 case Intrinsic::x86_avx2_mpsadbw: {
769 // Need to truncate the last argument from i32 to i8 -- this argument models
770 // an inherently 8-bit immediate operand to these x86 instructions.
771 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
772 CI->arg_operands().end());
774 // Replace the last argument with a trunc.
775 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
777 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
778 CI->replaceAllUsesWith(NewCall);
779 CI->eraseFromParent();
785 // This tests each Function to determine if it needs upgrading. When we find
786 // one we are interested in, we then upgrade all calls to reflect the new
788 void llvm::UpgradeCallsToIntrinsic(Function* F) {
789 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
791 // Upgrade the function and check if it is a totaly new function.
793 if (UpgradeIntrinsicFunction(F, NewFn)) {
794 // Replace all uses to the old function with the new one if necessary.
795 for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
797 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
798 UpgradeIntrinsicCall(CI, NewFn);
800 // Remove old function, no longer used, from the module.
801 F->eraseFromParent();
805 void llvm::UpgradeInstWithTBAATag(Instruction *I) {
806 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
807 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
808 // Check if the tag uses struct-path aware TBAA format.
809 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
812 if (MD->getNumOperands() == 3) {
813 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
814 MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
815 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
816 Metadata *Elts2[] = {ScalarType, ScalarType,
817 ConstantAsMetadata::get(Constant::getNullValue(
818 Type::getInt64Ty(I->getContext()))),
820 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
822 // Create a MDNode <MD, MD, offset 0>
823 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
824 Type::getInt64Ty(I->getContext())))};
825 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
829 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
830 Instruction *&Temp) {
831 if (Opc != Instruction::BitCast)
835 Type *SrcTy = V->getType();
836 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
837 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
838 LLVMContext &Context = V->getContext();
840 // We have no information about target data layout, so we assume that
841 // the maximum pointer size is 64bit.
842 Type *MidTy = Type::getInt64Ty(Context);
843 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
845 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
851 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
852 if (Opc != Instruction::BitCast)
855 Type *SrcTy = C->getType();
856 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
857 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
858 LLVMContext &Context = C->getContext();
860 // We have no information about target data layout, so we assume that
861 // the maximum pointer size is 64bit.
862 Type *MidTy = Type::getInt64Ty(Context);
864 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
871 /// Check the debug info version number, if it is out-dated, drop the debug
872 /// info. Return true if module is modified.
873 bool llvm::UpgradeDebugInfo(Module &M) {
874 unsigned Version = getDebugMetadataVersionFromModule(M);
875 if (Version == DEBUG_METADATA_VERSION)
878 bool RetCode = StripDebugInfo(M);
880 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
881 M.getContext().diagnose(DiagVersion);
886 void llvm::UpgradeMDStringConstant(std::string &String) {
887 const std::string OldPrefix = "llvm.vectorizer.";
888 if (String == "llvm.vectorizer.unroll") {
889 String = "llvm.loop.interleave.count";
890 } else if (String.find(OldPrefix) == 0) {
891 String.replace(0, OldPrefix.size(), "llvm.loop.vectorize.");