1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/IR/CFG.h"
18 #include "llvm/IR/CallSite.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/Regex.h"
34 // Upgrade the declarations of the SSE4.1 functions whose arguments have
35 // changed their type from v4f32 to v2i64.
36 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
38 // Check whether this is an old version of the function, which received
40 Type *Arg0Type = F->getFunctionType()->getParamType(0);
41 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
44 // Yes, it's old, replace it with new version.
45 F->setName(F->getName() + ".old");
46 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
50 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
51 // arguments have changed their type from i32 to i8.
52 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
54 // Check that the last argument is an i32.
55 Type *LastArgType = F->getFunctionType()->getParamType(
56 F->getFunctionType()->getNumParams() - 1);
57 if (!LastArgType->isIntegerTy(32))
60 // Move this function aside and map down.
61 F->setName(F->getName() + ".old");
62 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
66 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
67 assert(F && "Illegal to upgrade a non-existent Function.");
69 // Quickly eliminate it, if it's not a candidate.
70 StringRef Name = F->getName();
71 if (Name.size() <= 8 || !Name.startswith("llvm."))
73 Name = Name.substr(5); // Strip off "llvm."
78 if (Name.startswith("arm.neon.vclz")) {
80 F->arg_begin()->getType(),
81 Type::getInt1Ty(F->getContext())
83 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
84 // the end of the name. Change name from llvm.arm.neon.vclz.* to
86 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
87 NewFn = Function::Create(fType, F->getLinkage(),
88 "llvm.ctlz." + Name.substr(14), F->getParent());
91 if (Name.startswith("arm.neon.vcnt")) {
92 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
93 F->arg_begin()->getType());
96 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
97 if (vldRegex.match(Name)) {
98 auto fArgs = F->getFunctionType()->params();
99 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
100 // Can't use Intrinsic::getDeclaration here as the return types might
101 // then only be structurally equal.
102 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
103 NewFn = Function::Create(fType, F->getLinkage(),
104 "llvm." + Name + ".p0i8", F->getParent());
107 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
108 if (vstRegex.match(Name)) {
109 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
110 Intrinsic::arm_neon_vst2,
111 Intrinsic::arm_neon_vst3,
112 Intrinsic::arm_neon_vst4};
114 static const Intrinsic::ID StoreLaneInts[] = {
115 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
116 Intrinsic::arm_neon_vst4lane
119 auto fArgs = F->getFunctionType()->params();
120 Type *Tys[] = {fArgs[0], fArgs[1]};
121 if (Name.find("lane") == StringRef::npos)
122 NewFn = Intrinsic::getDeclaration(F->getParent(),
123 StoreInts[fArgs.size() - 3], Tys);
125 NewFn = Intrinsic::getDeclaration(F->getParent(),
126 StoreLaneInts[fArgs.size() - 5], Tys);
133 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
134 F->setName(Name + ".old");
135 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
136 F->arg_begin()->getType());
139 if (Name.startswith("cttz.") && F->arg_size() == 1) {
140 F->setName(Name + ".old");
141 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
142 F->arg_begin()->getType());
148 if (Name.startswith("memcpy.") && F->arg_size() == 5) {
149 F->setName(Name + ".old");
150 // Get the types of dest, src, and len.
151 ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
152 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
156 if (Name.startswith("memmove.") && F->arg_size() == 5) {
157 F->setName(Name + ".old");
158 // Get the types of dest, src, and len.
159 ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
160 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
164 if (Name.startswith("memset.") && F->arg_size() == 5) {
165 F->setName(Name + ".old");
166 // Get the types of dest and len.
167 Type *ParamTypes[2] = {
168 F->getFunctionType()->getParamType(0),
169 F->getFunctionType()->getParamType(2)
171 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
179 // We only need to change the name to match the mangling including the
181 if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
182 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
183 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
184 F->setName(Name + ".old");
185 NewFn = Intrinsic::getDeclaration(F->getParent(),
186 Intrinsic::objectsize, Tys);
193 if (Name.startswith("x86.sse2.pcmpeq.") ||
194 Name.startswith("x86.sse2.pcmpgt.") ||
195 Name.startswith("x86.avx2.pcmpeq.") ||
196 Name.startswith("x86.avx2.pcmpgt.") ||
197 Name.startswith("x86.avx2.vbroadcast") ||
198 Name.startswith("x86.avx2.pbroadcast") ||
199 Name.startswith("x86.avx.vpermil.") ||
200 Name.startswith("x86.sse41.pmovsx") ||
201 Name == "x86.avx.vinsertf128.pd.256" ||
202 Name == "x86.avx.vinsertf128.ps.256" ||
203 Name == "x86.avx.vinsertf128.si.256" ||
204 Name == "x86.avx2.vinserti128" ||
205 Name == "x86.avx.vextractf128.pd.256" ||
206 Name == "x86.avx.vextractf128.ps.256" ||
207 Name == "x86.avx.vextractf128.si.256" ||
208 Name == "x86.avx2.vextracti128" ||
209 Name == "x86.avx.movnt.dq.256" ||
210 Name == "x86.avx.movnt.pd.256" ||
211 Name == "x86.avx.movnt.ps.256" ||
212 Name == "x86.sse42.crc32.64.8" ||
213 Name == "x86.avx.vbroadcast.ss" ||
214 Name == "x86.avx.vbroadcast.ss.256" ||
215 Name == "x86.avx.vbroadcast.sd.256" ||
216 Name == "x86.sse2.psll.dq" ||
217 Name == "x86.sse2.psrl.dq" ||
218 Name == "x86.avx2.psll.dq" ||
219 Name == "x86.avx2.psrl.dq" ||
220 Name == "x86.sse2.psll.dq.bs" ||
221 Name == "x86.sse2.psrl.dq.bs" ||
222 Name == "x86.avx2.psll.dq.bs" ||
223 Name == "x86.avx2.psrl.dq.bs" ||
224 Name == "x86.sse41.pblendw" ||
225 Name == "x86.sse41.blendpd" ||
226 Name == "x86.sse41.blendps" ||
227 Name == "x86.avx.blend.pd.256" ||
228 Name == "x86.avx.blend.ps.256" ||
229 Name == "x86.avx2.pblendw" ||
230 Name == "x86.avx2.pblendd.128" ||
231 Name == "x86.avx2.pblendd.256" ||
232 Name == "x86.avx2.vbroadcasti128" ||
233 Name == "x86.xop.vpcmov" ||
234 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
238 // SSE4.1 ptest functions may have an old signature.
239 if (Name.startswith("x86.sse41.ptest")) {
240 if (Name == "x86.sse41.ptestc")
241 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
242 if (Name == "x86.sse41.ptestz")
243 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
244 if (Name == "x86.sse41.ptestnzc")
245 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
247 // Several blend and other instructions with masks used the wrong number of
249 if (Name == "x86.sse41.insertps")
250 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
252 if (Name == "x86.sse41.dppd")
253 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
255 if (Name == "x86.sse41.dpps")
256 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
258 if (Name == "x86.sse41.mpsadbw")
259 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
261 if (Name == "x86.avx.dp.ps.256")
262 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
264 if (Name == "x86.avx2.mpsadbw")
265 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
268 // frcz.ss/sd may need to have an argument dropped
269 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
270 F->setName(Name + ".old");
271 NewFn = Intrinsic::getDeclaration(F->getParent(),
272 Intrinsic::x86_xop_vfrcz_ss);
275 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
276 F->setName(Name + ".old");
277 NewFn = Intrinsic::getDeclaration(F->getParent(),
278 Intrinsic::x86_xop_vfrcz_sd);
281 // Fix the FMA4 intrinsics to remove the 4
282 if (Name.startswith("x86.fma4.")) {
283 F->setName("llvm.x86.fma" + Name.substr(8));
291 // This may not belong here. This function is effectively being overloaded
292 // to both detect an intrinsic which needs upgrading, and to provide the
293 // upgraded form of the intrinsic. We should perhaps have two separate
294 // functions for this.
298 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
300 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
301 assert(F != NewFn && "Intrinsic function upgraded to the same function");
303 // Upgrade intrinsic attributes. This does not change the function.
306 if (Intrinsic::ID id = F->getIntrinsicID())
307 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
311 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
312 // Nothing to do yet.
316 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
318 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
319 Value *Op, unsigned NumLanes,
321 // Each lane is 16 bytes.
322 unsigned NumElts = NumLanes * 16;
324 // Bitcast from a 64-bit element type to a byte element type.
325 Op = Builder.CreateBitCast(Op,
326 VectorType::get(Type::getInt8Ty(C), NumElts),
328 // We'll be shuffling in zeroes.
329 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
331 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
332 // we'll just return the zero vector.
334 SmallVector<Constant*, 32> Idxs;
335 // 256-bit version is split into two 16-byte lanes.
336 for (unsigned l = 0; l != NumElts; l += 16)
337 for (unsigned i = 0; i != 16; ++i) {
338 unsigned Idx = NumElts + i - Shift;
340 Idx -= NumElts - 16; // end of lane, switch operand.
341 Idxs.push_back(Builder.getInt32(Idx + l));
344 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs));
347 // Bitcast back to a 64-bit element type.
348 return Builder.CreateBitCast(Res,
349 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
353 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
355 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
356 Value *Op, unsigned NumLanes,
358 // Each lane is 16 bytes.
359 unsigned NumElts = NumLanes * 16;
361 // Bitcast from a 64-bit element type to a byte element type.
362 Op = Builder.CreateBitCast(Op,
363 VectorType::get(Type::getInt8Ty(C), NumElts),
365 // We'll be shuffling in zeroes.
366 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
368 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
369 // we'll just return the zero vector.
371 SmallVector<Constant*, 32> Idxs;
372 // 256-bit version is split into two 16-byte lanes.
373 for (unsigned l = 0; l != NumElts; l += 16)
374 for (unsigned i = 0; i != 16; ++i) {
375 unsigned Idx = i + Shift;
377 Idx += NumElts - 16; // end of lane, switch operand.
378 Idxs.push_back(Builder.getInt32(Idx + l));
381 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs));
384 // Bitcast back to a 64-bit element type.
385 return Builder.CreateBitCast(Res,
386 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
390 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
391 // upgraded intrinsic. All argument and return casting must be provided in
392 // order to seamlessly integrate with existing context.
393 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
394 Function *F = CI->getCalledFunction();
395 LLVMContext &C = CI->getContext();
396 IRBuilder<> Builder(C);
397 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
399 assert(F && "Intrinsic call is not direct?");
402 // Get the Function's name.
403 StringRef Name = F->getName();
406 // Upgrade packed integer vector compares intrinsics to compare instructions
407 if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
408 Name.startswith("llvm.x86.avx2.pcmpeq.")) {
409 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
411 // need to sign extend since icmp returns vector of i1
412 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
413 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
414 Name.startswith("llvm.x86.avx2.pcmpgt.")) {
415 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
417 // need to sign extend since icmp returns vector of i1
418 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
419 } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
420 Name == "llvm.x86.avx.movnt.ps.256" ||
421 Name == "llvm.x86.avx.movnt.pd.256") {
422 IRBuilder<> Builder(C);
423 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
425 Module *M = F->getParent();
426 SmallVector<Metadata *, 1> Elts;
428 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
429 MDNode *Node = MDNode::get(C, Elts);
431 Value *Arg0 = CI->getArgOperand(0);
432 Value *Arg1 = CI->getArgOperand(1);
434 // Convert the type of the pointer to a pointer to the stored type.
435 Value *BC = Builder.CreateBitCast(Arg0,
436 PointerType::getUnqual(Arg1->getType()),
438 StoreInst *SI = Builder.CreateStore(Arg1, BC);
439 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
440 SI->setAlignment(32);
443 CI->eraseFromParent();
445 } else if (Name.startswith("llvm.x86.xop.vpcom")) {
447 if (Name.endswith("ub"))
448 intID = Intrinsic::x86_xop_vpcomub;
449 else if (Name.endswith("uw"))
450 intID = Intrinsic::x86_xop_vpcomuw;
451 else if (Name.endswith("ud"))
452 intID = Intrinsic::x86_xop_vpcomud;
453 else if (Name.endswith("uq"))
454 intID = Intrinsic::x86_xop_vpcomuq;
455 else if (Name.endswith("b"))
456 intID = Intrinsic::x86_xop_vpcomb;
457 else if (Name.endswith("w"))
458 intID = Intrinsic::x86_xop_vpcomw;
459 else if (Name.endswith("d"))
460 intID = Intrinsic::x86_xop_vpcomd;
461 else if (Name.endswith("q"))
462 intID = Intrinsic::x86_xop_vpcomq;
464 llvm_unreachable("Unknown suffix");
466 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
468 if (Name.startswith("lt"))
470 else if (Name.startswith("le"))
472 else if (Name.startswith("gt"))
474 else if (Name.startswith("ge"))
476 else if (Name.startswith("eq"))
478 else if (Name.startswith("ne"))
480 else if (Name.startswith("false"))
482 else if (Name.startswith("true"))
485 llvm_unreachable("Unknown condition");
487 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
489 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
490 Builder.getInt8(Imm)});
491 } else if (Name == "llvm.x86.xop.vpcmov") {
492 Value *Arg0 = CI->getArgOperand(0);
493 Value *Arg1 = CI->getArgOperand(1);
494 Value *Sel = CI->getArgOperand(2);
495 unsigned NumElts = CI->getType()->getVectorNumElements();
496 Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1));
497 Value *NotSel = Builder.CreateXor(Sel, MinusOne);
498 Value *Sel0 = Builder.CreateAnd(Arg0, Sel);
499 Value *Sel1 = Builder.CreateAnd(Arg1, NotSel);
500 Rep = Builder.CreateOr(Sel0, Sel1);
501 } else if (Name == "llvm.x86.sse42.crc32.64.8") {
502 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
503 Intrinsic::x86_sse42_crc32_32_8);
504 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
505 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
506 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
507 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
508 // Replace broadcasts with a series of insertelements.
509 Type *VecTy = CI->getType();
510 Type *EltTy = VecTy->getVectorElementType();
511 unsigned EltNum = VecTy->getVectorNumElements();
512 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
513 EltTy->getPointerTo());
514 Value *Load = Builder.CreateLoad(EltTy, Cast);
515 Type *I32Ty = Type::getInt32Ty(C);
516 Rep = UndefValue::get(VecTy);
517 for (unsigned I = 0; I < EltNum; ++I)
518 Rep = Builder.CreateInsertElement(Rep, Load,
519 ConstantInt::get(I32Ty, I));
520 } else if (Name.startswith("llvm.x86.sse41.pmovsx")) {
521 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
522 VectorType *DstTy = cast<VectorType>(CI->getType());
523 unsigned NumDstElts = DstTy->getNumElements();
525 // Extract a subvector of the first NumDstElts lanes and sign extend.
526 SmallVector<int, 8> ShuffleMask;
527 for (int i = 0; i != (int)NumDstElts; ++i)
528 ShuffleMask.push_back(i);
530 Value *SV = Builder.CreateShuffleVector(
531 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
532 Rep = Builder.CreateSExt(SV, DstTy);
533 } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
534 // Replace vbroadcasts with a vector shuffle.
535 Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
536 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
537 PointerType::getUnqual(VT));
538 Value *Load = Builder.CreateLoad(VT, Op);
539 const int Idxs[4] = { 0, 1, 0, 1 };
540 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
542 } else if (Name.startswith("llvm.x86.avx2.pbroadcast") ||
543 Name.startswith("llvm.x86.avx2.vbroadcast")) {
544 // Replace vp?broadcasts with a vector shuffle.
545 Value *Op = CI->getArgOperand(0);
546 unsigned NumElts = CI->getType()->getVectorNumElements();
547 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
548 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
549 Constant::getNullValue(MaskTy));
550 } else if (Name == "llvm.x86.sse2.psll.dq") {
551 // 128-bit shift left specified in bits.
552 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
553 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
554 Shift / 8); // Shift is in bits.
555 } else if (Name == "llvm.x86.sse2.psrl.dq") {
556 // 128-bit shift right specified in bits.
557 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
558 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
559 Shift / 8); // Shift is in bits.
560 } else if (Name == "llvm.x86.avx2.psll.dq") {
561 // 256-bit shift left specified in bits.
562 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
563 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
564 Shift / 8); // Shift is in bits.
565 } else if (Name == "llvm.x86.avx2.psrl.dq") {
566 // 256-bit shift right specified in bits.
567 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
568 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
569 Shift / 8); // Shift is in bits.
570 } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
571 // 128-bit shift left specified in bytes.
572 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
573 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
575 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
576 // 128-bit shift right specified in bytes.
577 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
578 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
580 } else if (Name == "llvm.x86.avx2.psll.dq.bs") {
581 // 256-bit shift left specified in bytes.
582 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
583 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
585 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
586 // 256-bit shift right specified in bytes.
587 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
588 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
590 } else if (Name == "llvm.x86.sse41.pblendw" ||
591 Name == "llvm.x86.sse41.blendpd" ||
592 Name == "llvm.x86.sse41.blendps" ||
593 Name == "llvm.x86.avx.blend.pd.256" ||
594 Name == "llvm.x86.avx.blend.ps.256" ||
595 Name == "llvm.x86.avx2.pblendw" ||
596 Name == "llvm.x86.avx2.pblendd.128" ||
597 Name == "llvm.x86.avx2.pblendd.256") {
598 Value *Op0 = CI->getArgOperand(0);
599 Value *Op1 = CI->getArgOperand(1);
600 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
601 VectorType *VecTy = cast<VectorType>(CI->getType());
602 unsigned NumElts = VecTy->getNumElements();
604 SmallVector<Constant*, 16> Idxs;
605 for (unsigned i = 0; i != NumElts; ++i) {
606 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
607 Idxs.push_back(Builder.getInt32(Idx));
610 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
611 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
612 Name == "llvm.x86.avx.vinsertf128.ps.256" ||
613 Name == "llvm.x86.avx.vinsertf128.si.256" ||
614 Name == "llvm.x86.avx2.vinserti128") {
615 Value *Op0 = CI->getArgOperand(0);
616 Value *Op1 = CI->getArgOperand(1);
617 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
618 VectorType *VecTy = cast<VectorType>(CI->getType());
619 unsigned NumElts = VecTy->getNumElements();
621 // Mask off the high bits of the immediate value; hardware ignores those.
624 // Extend the second operand into a vector that is twice as big.
625 Value *UndefV = UndefValue::get(Op1->getType());
626 SmallVector<Constant*, 8> Idxs;
627 for (unsigned i = 0; i != NumElts; ++i) {
628 Idxs.push_back(Builder.getInt32(i));
630 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs));
632 // Insert the second operand into the first operand.
634 // Note that there is no guarantee that instruction lowering will actually
635 // produce a vinsertf128 instruction for the created shuffles. In
636 // particular, the 0 immediate case involves no lane changes, so it can
637 // be handled as a blend.
639 // Example of shuffle mask for 32-bit elements:
640 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
641 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
643 SmallVector<Constant*, 8> Idxs2;
644 // The low half of the result is either the low half of the 1st operand
645 // or the low half of the 2nd operand (the inserted vector).
646 for (unsigned i = 0; i != NumElts / 2; ++i) {
647 unsigned Idx = Imm ? i : (i + NumElts);
648 Idxs2.push_back(Builder.getInt32(Idx));
650 // The high half of the result is either the low half of the 2nd operand
651 // (the inserted vector) or the high half of the 1st operand.
652 for (unsigned i = NumElts / 2; i != NumElts; ++i) {
653 unsigned Idx = Imm ? (i + NumElts / 2) : i;
654 Idxs2.push_back(Builder.getInt32(Idx));
656 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
657 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
658 Name == "llvm.x86.avx.vextractf128.ps.256" ||
659 Name == "llvm.x86.avx.vextractf128.si.256" ||
660 Name == "llvm.x86.avx2.vextracti128") {
661 Value *Op0 = CI->getArgOperand(0);
662 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
663 VectorType *VecTy = cast<VectorType>(CI->getType());
664 unsigned NumElts = VecTy->getNumElements();
666 // Mask off the high bits of the immediate value; hardware ignores those.
669 // Get indexes for either the high half or low half of the input vector.
670 SmallVector<Constant*, 4> Idxs(NumElts);
671 for (unsigned i = 0; i != NumElts; ++i) {
672 unsigned Idx = Imm ? (i + NumElts) : i;
673 Idxs[i] = Builder.getInt32(Idx);
676 Value *UndefV = UndefValue::get(Op0->getType());
677 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs));
679 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
680 if (Name == "llvm.x86.avx.vpermil.pd.256")
682 else if (Name == "llvm.x86.avx.vpermil.pd")
684 else if (Name == "llvm.x86.avx.vpermil.ps.256")
686 else if (Name == "llvm.x86.avx.vpermil.ps")
689 if (PD256 || PD128 || PS256 || PS128) {
690 Value *Op0 = CI->getArgOperand(0);
691 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
692 SmallVector<Constant*, 8> Idxs;
695 for (unsigned i = 0; i != 2; ++i)
696 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
698 for (unsigned l = 0; l != 4; l+=2)
699 for (unsigned i = 0; i != 2; ++i)
700 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
702 for (unsigned i = 0; i != 4; ++i)
703 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
705 for (unsigned l = 0; l != 8; l+=4)
706 for (unsigned i = 0; i != 4; ++i)
707 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
709 llvm_unreachable("Unexpected function");
711 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
713 llvm_unreachable("Unknown function for CallInst upgrade.");
717 CI->replaceAllUsesWith(Rep);
718 CI->eraseFromParent();
722 std::string Name = CI->getName();
724 CI->setName(Name + ".old");
726 switch (NewFn->getIntrinsicID()) {
728 llvm_unreachable("Unknown function for CallInst upgrade.");
730 case Intrinsic::arm_neon_vld1:
731 case Intrinsic::arm_neon_vld2:
732 case Intrinsic::arm_neon_vld3:
733 case Intrinsic::arm_neon_vld4:
734 case Intrinsic::arm_neon_vld2lane:
735 case Intrinsic::arm_neon_vld3lane:
736 case Intrinsic::arm_neon_vld4lane:
737 case Intrinsic::arm_neon_vst1:
738 case Intrinsic::arm_neon_vst2:
739 case Intrinsic::arm_neon_vst3:
740 case Intrinsic::arm_neon_vst4:
741 case Intrinsic::arm_neon_vst2lane:
742 case Intrinsic::arm_neon_vst3lane:
743 case Intrinsic::arm_neon_vst4lane: {
744 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
745 CI->arg_operands().end());
746 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
747 CI->eraseFromParent();
751 case Intrinsic::ctlz:
752 case Intrinsic::cttz:
753 assert(CI->getNumArgOperands() == 1 &&
754 "Mismatch between function args and call args");
755 CI->replaceAllUsesWith(Builder.CreateCall(
756 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name));
757 CI->eraseFromParent();
760 case Intrinsic::memcpy:
761 case Intrinsic::memmove:
762 case Intrinsic::memset: {
763 // Remove alignment argument (3), and add alignment attributes to the
764 // dest/src pointers.
766 CI->getArgOperand(0),
767 CI->getArgOperand(1),
768 CI->getArgOperand(2),
771 auto *MemCI = cast<MemIntrinsic>(Builder.CreateCall(NewFn, Args, Name));
773 // All mem intrinsics support dest alignment.
774 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
775 MemCI->setDestAlignment(Align->getZExtValue());
777 // Memcpy/Memmove also support source alignment.
778 if (auto *MemTransferI = dyn_cast<MemTransferInst>(MemCI))
779 MemTransferI->setSrcAlignment(Align->getZExtValue());
780 CI->replaceAllUsesWith(MemCI);
781 CI->eraseFromParent();
785 case Intrinsic::objectsize:
786 CI->replaceAllUsesWith(Builder.CreateCall(
787 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name));
788 CI->eraseFromParent();
791 case Intrinsic::ctpop: {
792 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
793 CI->eraseFromParent();
797 case Intrinsic::x86_xop_vfrcz_ss:
798 case Intrinsic::x86_xop_vfrcz_sd:
799 CI->replaceAllUsesWith(
800 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
801 CI->eraseFromParent();
804 case Intrinsic::x86_sse41_ptestc:
805 case Intrinsic::x86_sse41_ptestz:
806 case Intrinsic::x86_sse41_ptestnzc: {
807 // The arguments for these intrinsics used to be v4f32, and changed
808 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
809 // So, the only thing required is a bitcast for both arguments.
810 // First, check the arguments have the old type.
811 Value *Arg0 = CI->getArgOperand(0);
812 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
815 // Old intrinsic, add bitcasts
816 Value *Arg1 = CI->getArgOperand(1);
818 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
820 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
821 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
823 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name);
824 CI->replaceAllUsesWith(NewCall);
825 CI->eraseFromParent();
829 case Intrinsic::x86_sse41_insertps:
830 case Intrinsic::x86_sse41_dppd:
831 case Intrinsic::x86_sse41_dpps:
832 case Intrinsic::x86_sse41_mpsadbw:
833 case Intrinsic::x86_avx_dp_ps_256:
834 case Intrinsic::x86_avx2_mpsadbw: {
835 // Need to truncate the last argument from i32 to i8 -- this argument models
836 // an inherently 8-bit immediate operand to these x86 instructions.
837 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
838 CI->arg_operands().end());
840 // Replace the last argument with a trunc.
841 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
843 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
844 CI->replaceAllUsesWith(NewCall);
845 CI->eraseFromParent();
851 // This tests each Function to determine if it needs upgrading. When we find
852 // one we are interested in, we then upgrade all calls to reflect the new
854 void llvm::UpgradeCallsToIntrinsic(Function* F) {
855 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
857 // Upgrade the function and check if it is a totaly new function.
859 if (UpgradeIntrinsicFunction(F, NewFn)) {
860 // Replace all uses to the old function with the new one if necessary.
861 for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
863 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
864 UpgradeIntrinsicCall(CI, NewFn);
866 // Remove old function, no longer used, from the module.
867 F->eraseFromParent();
871 void llvm::UpgradeInstWithTBAATag(Instruction *I) {
872 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
873 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
874 // Check if the tag uses struct-path aware TBAA format.
875 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
878 if (MD->getNumOperands() == 3) {
879 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
880 MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
881 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
882 Metadata *Elts2[] = {ScalarType, ScalarType,
883 ConstantAsMetadata::get(Constant::getNullValue(
884 Type::getInt64Ty(I->getContext()))),
886 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
888 // Create a MDNode <MD, MD, offset 0>
889 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
890 Type::getInt64Ty(I->getContext())))};
891 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
895 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
896 Instruction *&Temp) {
897 if (Opc != Instruction::BitCast)
901 Type *SrcTy = V->getType();
902 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
903 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
904 LLVMContext &Context = V->getContext();
906 // We have no information about target data layout, so we assume that
907 // the maximum pointer size is 64bit.
908 Type *MidTy = Type::getInt64Ty(Context);
909 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
911 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
917 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
918 if (Opc != Instruction::BitCast)
921 Type *SrcTy = C->getType();
922 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
923 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
924 LLVMContext &Context = C->getContext();
926 // We have no information about target data layout, so we assume that
927 // the maximum pointer size is 64bit.
928 Type *MidTy = Type::getInt64Ty(Context);
930 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
937 /// Check the debug info version number, if it is out-dated, drop the debug
938 /// info. Return true if module is modified.
939 bool llvm::UpgradeDebugInfo(Module &M) {
940 unsigned Version = getDebugMetadataVersionFromModule(M);
941 if (Version == DEBUG_METADATA_VERSION)
944 bool RetCode = StripDebugInfo(M);
946 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
947 M.getContext().diagnose(DiagVersion);
952 void llvm::UpgradeMDStringConstant(std::string &String) {
953 const std::string OldPrefix = "llvm.vectorizer.";
954 if (String == "llvm.vectorizer.unroll") {
955 String = "llvm.loop.interleave.count";
956 } else if (String.find(OldPrefix) == 0) {
957 String.replace(0, OldPrefix.size(), "llvm.loop.vectorize.");