1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/IR/CFG.h"
18 #include "llvm/IR/CallSite.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/Regex.h"
34 // Upgrade the declarations of the SSE4.1 functions whose arguments have
35 // changed their type from v4f32 to v2i64.
36 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
38 // Check whether this is an old version of the function, which received
40 Type *Arg0Type = F->getFunctionType()->getParamType(0);
41 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
44 // Yes, it's old, replace it with new version.
45 F->setName(F->getName() + ".old");
46 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
50 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
51 // arguments have changed their type from i32 to i8.
52 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
54 // Check that the last argument is an i32.
55 Type *LastArgType = F->getFunctionType()->getParamType(
56 F->getFunctionType()->getNumParams() - 1);
57 if (!LastArgType->isIntegerTy(32))
60 // Move this function aside and map down.
61 F->setName(F->getName() + ".old");
62 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
66 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
67 assert(F && "Illegal to upgrade a non-existent Function.");
69 // Quickly eliminate it, if it's not a candidate.
70 StringRef Name = F->getName();
71 if (Name.size() <= 8 || !Name.startswith("llvm."))
73 Name = Name.substr(5); // Strip off "llvm."
78 if (Name.startswith("arm.neon.vclz")) {
80 F->arg_begin()->getType(),
81 Type::getInt1Ty(F->getContext())
83 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
84 // the end of the name. Change name from llvm.arm.neon.vclz.* to
86 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
87 NewFn = Function::Create(fType, F->getLinkage(),
88 "llvm.ctlz." + Name.substr(14), F->getParent());
91 if (Name.startswith("arm.neon.vcnt")) {
92 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
93 F->arg_begin()->getType());
96 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
97 if (vldRegex.match(Name)) {
98 auto fArgs = F->getFunctionType()->params();
99 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
100 // Can't use Intrinsic::getDeclaration here as the return types might
101 // then only be structurally equal.
102 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
103 NewFn = Function::Create(fType, F->getLinkage(),
104 "llvm." + Name + ".p0i8", F->getParent());
107 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
108 if (vstRegex.match(Name)) {
109 static Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
110 Intrinsic::arm_neon_vst2,
111 Intrinsic::arm_neon_vst3,
112 Intrinsic::arm_neon_vst4};
114 static Intrinsic::ID StoreLaneInts[] = {Intrinsic::arm_neon_vst2lane,
115 Intrinsic::arm_neon_vst3lane,
116 Intrinsic::arm_neon_vst4lane};
118 auto fArgs = F->getFunctionType()->params();
119 Type *Tys[] = {fArgs[0], fArgs[1]};
120 if (Name.find("lane") == StringRef::npos)
121 NewFn = Intrinsic::getDeclaration(F->getParent(),
122 StoreInts[fArgs.size() - 3], Tys);
124 NewFn = Intrinsic::getDeclaration(F->getParent(),
125 StoreLaneInts[fArgs.size() - 5], Tys);
132 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
133 F->setName(Name + ".old");
134 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
135 F->arg_begin()->getType());
138 if (Name.startswith("cttz.") && F->arg_size() == 1) {
139 F->setName(Name + ".old");
140 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
141 F->arg_begin()->getType());
148 // We only need to change the name to match the mangling including the
150 if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
151 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
152 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
153 F->setName(Name + ".old");
154 NewFn = Intrinsic::getDeclaration(F->getParent(),
155 Intrinsic::objectsize, Tys);
162 if (Name.startswith("x86.sse2.pcmpeq.") ||
163 Name.startswith("x86.sse2.pcmpgt.") ||
164 Name.startswith("x86.avx2.pcmpeq.") ||
165 Name.startswith("x86.avx2.pcmpgt.") ||
166 Name.startswith("x86.avx2.vbroadcast") ||
167 Name.startswith("x86.avx2.pbroadcast") ||
168 Name.startswith("x86.avx.vpermil.") ||
169 Name.startswith("x86.sse41.pmovsx") ||
170 Name == "x86.avx.vinsertf128.pd.256" ||
171 Name == "x86.avx.vinsertf128.ps.256" ||
172 Name == "x86.avx.vinsertf128.si.256" ||
173 Name == "x86.avx2.vinserti128" ||
174 Name == "x86.avx.vextractf128.pd.256" ||
175 Name == "x86.avx.vextractf128.ps.256" ||
176 Name == "x86.avx.vextractf128.si.256" ||
177 Name == "x86.avx2.vextracti128" ||
178 Name == "x86.avx.movnt.dq.256" ||
179 Name == "x86.avx.movnt.pd.256" ||
180 Name == "x86.avx.movnt.ps.256" ||
181 Name == "x86.sse42.crc32.64.8" ||
182 Name == "x86.avx.vbroadcast.ss" ||
183 Name == "x86.avx.vbroadcast.ss.256" ||
184 Name == "x86.avx.vbroadcast.sd.256" ||
185 Name == "x86.sse2.psll.dq" ||
186 Name == "x86.sse2.psrl.dq" ||
187 Name == "x86.avx2.psll.dq" ||
188 Name == "x86.avx2.psrl.dq" ||
189 Name == "x86.sse2.psll.dq.bs" ||
190 Name == "x86.sse2.psrl.dq.bs" ||
191 Name == "x86.avx2.psll.dq.bs" ||
192 Name == "x86.avx2.psrl.dq.bs" ||
193 Name == "x86.sse41.pblendw" ||
194 Name == "x86.sse41.blendpd" ||
195 Name == "x86.sse41.blendps" ||
196 Name == "x86.avx.blend.pd.256" ||
197 Name == "x86.avx.blend.ps.256" ||
198 Name == "x86.avx2.pblendw" ||
199 Name == "x86.avx2.pblendd.128" ||
200 Name == "x86.avx2.pblendd.256" ||
201 Name == "x86.avx2.vbroadcasti128" ||
202 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
206 // SSE4.1 ptest functions may have an old signature.
207 if (Name.startswith("x86.sse41.ptest")) {
208 if (Name == "x86.sse41.ptestc")
209 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
210 if (Name == "x86.sse41.ptestz")
211 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
212 if (Name == "x86.sse41.ptestnzc")
213 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
215 // Several blend and other instructions with masks used the wrong number of
217 if (Name == "x86.sse41.insertps")
218 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
220 if (Name == "x86.sse41.dppd")
221 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
223 if (Name == "x86.sse41.dpps")
224 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
226 if (Name == "x86.sse41.mpsadbw")
227 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
229 if (Name == "x86.avx.dp.ps.256")
230 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
232 if (Name == "x86.avx2.mpsadbw")
233 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
236 // frcz.ss/sd may need to have an argument dropped
237 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
238 F->setName(Name + ".old");
239 NewFn = Intrinsic::getDeclaration(F->getParent(),
240 Intrinsic::x86_xop_vfrcz_ss);
243 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
244 F->setName(Name + ".old");
245 NewFn = Intrinsic::getDeclaration(F->getParent(),
246 Intrinsic::x86_xop_vfrcz_sd);
249 // Fix the FMA4 intrinsics to remove the 4
250 if (Name.startswith("x86.fma4.")) {
251 F->setName("llvm.x86.fma" + Name.substr(8));
259 // This may not belong here. This function is effectively being overloaded
260 // to both detect an intrinsic which needs upgrading, and to provide the
261 // upgraded form of the intrinsic. We should perhaps have two separate
262 // functions for this.
266 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
268 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
269 assert(F != NewFn && "Intrinsic function upgraded to the same function");
271 // Upgrade intrinsic attributes. This does not change the function.
274 if (Intrinsic::ID id = F->getIntrinsicID())
275 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
279 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
280 // Nothing to do yet.
284 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
286 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
287 Value *Op, unsigned NumLanes,
289 // Each lane is 16 bytes.
290 unsigned NumElts = NumLanes * 16;
292 // Bitcast from a 64-bit element type to a byte element type.
293 Op = Builder.CreateBitCast(Op,
294 VectorType::get(Type::getInt8Ty(C), NumElts),
296 // We'll be shuffling in zeroes.
297 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
299 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
300 // we'll just return the zero vector.
302 SmallVector<Constant*, 32> Idxs;
303 // 256-bit version is split into two 16-byte lanes.
304 for (unsigned l = 0; l != NumElts; l += 16)
305 for (unsigned i = 0; i != 16; ++i) {
306 unsigned Idx = NumElts + i - Shift;
308 Idx -= NumElts - 16; // end of lane, switch operand.
309 Idxs.push_back(Builder.getInt32(Idx + l));
312 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs));
315 // Bitcast back to a 64-bit element type.
316 return Builder.CreateBitCast(Res,
317 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
321 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
323 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
324 Value *Op, unsigned NumLanes,
326 // Each lane is 16 bytes.
327 unsigned NumElts = NumLanes * 16;
329 // Bitcast from a 64-bit element type to a byte element type.
330 Op = Builder.CreateBitCast(Op,
331 VectorType::get(Type::getInt8Ty(C), NumElts),
333 // We'll be shuffling in zeroes.
334 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
336 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
337 // we'll just return the zero vector.
339 SmallVector<Constant*, 32> Idxs;
340 // 256-bit version is split into two 16-byte lanes.
341 for (unsigned l = 0; l != NumElts; l += 16)
342 for (unsigned i = 0; i != 16; ++i) {
343 unsigned Idx = i + Shift;
345 Idx += NumElts - 16; // end of lane, switch operand.
346 Idxs.push_back(Builder.getInt32(Idx + l));
349 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs));
352 // Bitcast back to a 64-bit element type.
353 return Builder.CreateBitCast(Res,
354 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
358 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
359 // upgraded intrinsic. All argument and return casting must be provided in
360 // order to seamlessly integrate with existing context.
361 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
362 Function *F = CI->getCalledFunction();
363 LLVMContext &C = CI->getContext();
364 IRBuilder<> Builder(C);
365 Builder.SetInsertPoint(CI->getParent(), CI);
367 assert(F && "Intrinsic call is not direct?");
370 // Get the Function's name.
371 StringRef Name = F->getName();
374 // Upgrade packed integer vector compares intrinsics to compare instructions
375 if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
376 Name.startswith("llvm.x86.avx2.pcmpeq.")) {
377 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
379 // need to sign extend since icmp returns vector of i1
380 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
381 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
382 Name.startswith("llvm.x86.avx2.pcmpgt.")) {
383 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
385 // need to sign extend since icmp returns vector of i1
386 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
387 } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
388 Name == "llvm.x86.avx.movnt.ps.256" ||
389 Name == "llvm.x86.avx.movnt.pd.256") {
390 IRBuilder<> Builder(C);
391 Builder.SetInsertPoint(CI->getParent(), CI);
393 Module *M = F->getParent();
394 SmallVector<Metadata *, 1> Elts;
396 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
397 MDNode *Node = MDNode::get(C, Elts);
399 Value *Arg0 = CI->getArgOperand(0);
400 Value *Arg1 = CI->getArgOperand(1);
402 // Convert the type of the pointer to a pointer to the stored type.
403 Value *BC = Builder.CreateBitCast(Arg0,
404 PointerType::getUnqual(Arg1->getType()),
406 StoreInst *SI = Builder.CreateStore(Arg1, BC);
407 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
408 SI->setAlignment(32);
411 CI->eraseFromParent();
413 } else if (Name.startswith("llvm.x86.xop.vpcom")) {
415 if (Name.endswith("ub"))
416 intID = Intrinsic::x86_xop_vpcomub;
417 else if (Name.endswith("uw"))
418 intID = Intrinsic::x86_xop_vpcomuw;
419 else if (Name.endswith("ud"))
420 intID = Intrinsic::x86_xop_vpcomud;
421 else if (Name.endswith("uq"))
422 intID = Intrinsic::x86_xop_vpcomuq;
423 else if (Name.endswith("b"))
424 intID = Intrinsic::x86_xop_vpcomb;
425 else if (Name.endswith("w"))
426 intID = Intrinsic::x86_xop_vpcomw;
427 else if (Name.endswith("d"))
428 intID = Intrinsic::x86_xop_vpcomd;
429 else if (Name.endswith("q"))
430 intID = Intrinsic::x86_xop_vpcomq;
432 llvm_unreachable("Unknown suffix");
434 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
436 if (Name.startswith("lt"))
438 else if (Name.startswith("le"))
440 else if (Name.startswith("gt"))
442 else if (Name.startswith("ge"))
444 else if (Name.startswith("eq"))
446 else if (Name.startswith("ne"))
448 else if (Name.startswith("false"))
450 else if (Name.startswith("true"))
453 llvm_unreachable("Unknown condition");
455 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
457 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
458 Builder.getInt8(Imm)});
459 } else if (Name == "llvm.x86.sse42.crc32.64.8") {
460 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
461 Intrinsic::x86_sse42_crc32_32_8);
462 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
463 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
464 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
465 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
466 // Replace broadcasts with a series of insertelements.
467 Type *VecTy = CI->getType();
468 Type *EltTy = VecTy->getVectorElementType();
469 unsigned EltNum = VecTy->getVectorNumElements();
470 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
471 EltTy->getPointerTo());
472 Value *Load = Builder.CreateLoad(EltTy, Cast);
473 Type *I32Ty = Type::getInt32Ty(C);
474 Rep = UndefValue::get(VecTy);
475 for (unsigned I = 0; I < EltNum; ++I)
476 Rep = Builder.CreateInsertElement(Rep, Load,
477 ConstantInt::get(I32Ty, I));
478 } else if (Name.startswith("llvm.x86.sse41.pmovsx")) {
479 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
480 VectorType *DstTy = cast<VectorType>(CI->getType());
481 unsigned NumDstElts = DstTy->getNumElements();
483 // Extract a subvector of the first NumDstElts lanes and sign extend.
484 SmallVector<int, 8> ShuffleMask;
485 for (int i = 0; i != (int)NumDstElts; ++i)
486 ShuffleMask.push_back(i);
488 Value *SV = Builder.CreateShuffleVector(
489 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
490 Rep = Builder.CreateSExt(SV, DstTy);
491 } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
492 // Replace vbroadcasts with a vector shuffle.
493 Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
494 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
495 PointerType::getUnqual(VT));
496 Value *Load = Builder.CreateLoad(VT, Op);
497 const int Idxs[4] = { 0, 1, 0, 1 };
498 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
500 } else if (Name.startswith("llvm.x86.avx2.pbroadcast") ||
501 Name.startswith("llvm.x86.avx2.vbroadcast")) {
502 // Replace vp?broadcasts with a vector shuffle.
503 Value *Op = CI->getArgOperand(0);
504 unsigned NumElts = CI->getType()->getVectorNumElements();
505 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
506 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
507 Constant::getNullValue(MaskTy));
508 } else if (Name == "llvm.x86.sse2.psll.dq") {
509 // 128-bit shift left specified in bits.
510 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
511 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
512 Shift / 8); // Shift is in bits.
513 } else if (Name == "llvm.x86.sse2.psrl.dq") {
514 // 128-bit shift right specified in bits.
515 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
516 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
517 Shift / 8); // Shift is in bits.
518 } else if (Name == "llvm.x86.avx2.psll.dq") {
519 // 256-bit shift left specified in bits.
520 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
521 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
522 Shift / 8); // Shift is in bits.
523 } else if (Name == "llvm.x86.avx2.psrl.dq") {
524 // 256-bit shift right specified in bits.
525 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
526 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
527 Shift / 8); // Shift is in bits.
528 } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
529 // 128-bit shift left specified in bytes.
530 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
531 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
533 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
534 // 128-bit shift right specified in bytes.
535 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
536 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
538 } else if (Name == "llvm.x86.avx2.psll.dq.bs") {
539 // 256-bit shift left specified in bytes.
540 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
541 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
543 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
544 // 256-bit shift right specified in bytes.
545 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
546 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
548 } else if (Name == "llvm.x86.sse41.pblendw" ||
549 Name == "llvm.x86.sse41.blendpd" ||
550 Name == "llvm.x86.sse41.blendps" ||
551 Name == "llvm.x86.avx.blend.pd.256" ||
552 Name == "llvm.x86.avx.blend.ps.256" ||
553 Name == "llvm.x86.avx2.pblendw" ||
554 Name == "llvm.x86.avx2.pblendd.128" ||
555 Name == "llvm.x86.avx2.pblendd.256") {
556 Value *Op0 = CI->getArgOperand(0);
557 Value *Op1 = CI->getArgOperand(1);
558 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
559 VectorType *VecTy = cast<VectorType>(CI->getType());
560 unsigned NumElts = VecTy->getNumElements();
562 SmallVector<Constant*, 16> Idxs;
563 for (unsigned i = 0; i != NumElts; ++i) {
564 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
565 Idxs.push_back(Builder.getInt32(Idx));
568 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
569 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
570 Name == "llvm.x86.avx.vinsertf128.ps.256" ||
571 Name == "llvm.x86.avx.vinsertf128.si.256" ||
572 Name == "llvm.x86.avx2.vinserti128") {
573 Value *Op0 = CI->getArgOperand(0);
574 Value *Op1 = CI->getArgOperand(1);
575 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
576 VectorType *VecTy = cast<VectorType>(CI->getType());
577 unsigned NumElts = VecTy->getNumElements();
579 // Mask off the high bits of the immediate value; hardware ignores those.
582 // Extend the second operand into a vector that is twice as big.
583 Value *UndefV = UndefValue::get(Op1->getType());
584 SmallVector<Constant*, 8> Idxs;
585 for (unsigned i = 0; i != NumElts; ++i) {
586 Idxs.push_back(Builder.getInt32(i));
588 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs));
590 // Insert the second operand into the first operand.
592 // Note that there is no guarantee that instruction lowering will actually
593 // produce a vinsertf128 instruction for the created shuffles. In
594 // particular, the 0 immediate case involves no lane changes, so it can
595 // be handled as a blend.
597 // Example of shuffle mask for 32-bit elements:
598 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
599 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
601 SmallVector<Constant*, 8> Idxs2;
602 // The low half of the result is either the low half of the 1st operand
603 // or the low half of the 2nd operand (the inserted vector).
604 for (unsigned i = 0; i != NumElts / 2; ++i) {
605 unsigned Idx = Imm ? i : (i + NumElts);
606 Idxs2.push_back(Builder.getInt32(Idx));
608 // The high half of the result is either the low half of the 2nd operand
609 // (the inserted vector) or the high half of the 1st operand.
610 for (unsigned i = NumElts / 2; i != NumElts; ++i) {
611 unsigned Idx = Imm ? (i + NumElts / 2) : i;
612 Idxs2.push_back(Builder.getInt32(Idx));
614 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
615 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
616 Name == "llvm.x86.avx.vextractf128.ps.256" ||
617 Name == "llvm.x86.avx.vextractf128.si.256" ||
618 Name == "llvm.x86.avx2.vextracti128") {
619 Value *Op0 = CI->getArgOperand(0);
620 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
621 VectorType *VecTy = cast<VectorType>(CI->getType());
622 unsigned NumElts = VecTy->getNumElements();
624 // Mask off the high bits of the immediate value; hardware ignores those.
627 // Get indexes for either the high half or low half of the input vector.
628 SmallVector<Constant*, 4> Idxs(NumElts);
629 for (unsigned i = 0; i != NumElts; ++i) {
630 unsigned Idx = Imm ? (i + NumElts) : i;
631 Idxs[i] = Builder.getInt32(Idx);
634 Value *UndefV = UndefValue::get(Op0->getType());
635 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs));
637 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
638 if (Name == "llvm.x86.avx.vpermil.pd.256")
640 else if (Name == "llvm.x86.avx.vpermil.pd")
642 else if (Name == "llvm.x86.avx.vpermil.ps.256")
644 else if (Name == "llvm.x86.avx.vpermil.ps")
647 if (PD256 || PD128 || PS256 || PS128) {
648 Value *Op0 = CI->getArgOperand(0);
649 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
650 SmallVector<Constant*, 8> Idxs;
653 for (unsigned i = 0; i != 2; ++i)
654 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
656 for (unsigned l = 0; l != 4; l+=2)
657 for (unsigned i = 0; i != 2; ++i)
658 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
660 for (unsigned i = 0; i != 4; ++i)
661 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
663 for (unsigned l = 0; l != 8; l+=4)
664 for (unsigned i = 0; i != 4; ++i)
665 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
667 llvm_unreachable("Unexpected function");
669 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
671 llvm_unreachable("Unknown function for CallInst upgrade.");
675 CI->replaceAllUsesWith(Rep);
676 CI->eraseFromParent();
680 std::string Name = CI->getName();
682 CI->setName(Name + ".old");
684 switch (NewFn->getIntrinsicID()) {
686 llvm_unreachable("Unknown function for CallInst upgrade.");
688 case Intrinsic::arm_neon_vld1:
689 case Intrinsic::arm_neon_vld2:
690 case Intrinsic::arm_neon_vld3:
691 case Intrinsic::arm_neon_vld4:
692 case Intrinsic::arm_neon_vld2lane:
693 case Intrinsic::arm_neon_vld3lane:
694 case Intrinsic::arm_neon_vld4lane:
695 case Intrinsic::arm_neon_vst1:
696 case Intrinsic::arm_neon_vst2:
697 case Intrinsic::arm_neon_vst3:
698 case Intrinsic::arm_neon_vst4:
699 case Intrinsic::arm_neon_vst2lane:
700 case Intrinsic::arm_neon_vst3lane:
701 case Intrinsic::arm_neon_vst4lane: {
702 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
703 CI->arg_operands().end());
704 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
705 CI->eraseFromParent();
709 case Intrinsic::ctlz:
710 case Intrinsic::cttz:
711 assert(CI->getNumArgOperands() == 1 &&
712 "Mismatch between function args and call args");
713 CI->replaceAllUsesWith(Builder.CreateCall(
714 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name));
715 CI->eraseFromParent();
718 case Intrinsic::objectsize:
719 CI->replaceAllUsesWith(Builder.CreateCall(
720 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name));
721 CI->eraseFromParent();
724 case Intrinsic::ctpop: {
725 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
726 CI->eraseFromParent();
730 case Intrinsic::x86_xop_vfrcz_ss:
731 case Intrinsic::x86_xop_vfrcz_sd:
732 CI->replaceAllUsesWith(
733 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
734 CI->eraseFromParent();
737 case Intrinsic::x86_sse41_ptestc:
738 case Intrinsic::x86_sse41_ptestz:
739 case Intrinsic::x86_sse41_ptestnzc: {
740 // The arguments for these intrinsics used to be v4f32, and changed
741 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
742 // So, the only thing required is a bitcast for both arguments.
743 // First, check the arguments have the old type.
744 Value *Arg0 = CI->getArgOperand(0);
745 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
748 // Old intrinsic, add bitcasts
749 Value *Arg1 = CI->getArgOperand(1);
751 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
753 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
754 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
756 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name);
757 CI->replaceAllUsesWith(NewCall);
758 CI->eraseFromParent();
762 case Intrinsic::x86_sse41_insertps:
763 case Intrinsic::x86_sse41_dppd:
764 case Intrinsic::x86_sse41_dpps:
765 case Intrinsic::x86_sse41_mpsadbw:
766 case Intrinsic::x86_avx_dp_ps_256:
767 case Intrinsic::x86_avx2_mpsadbw: {
768 // Need to truncate the last argument from i32 to i8 -- this argument models
769 // an inherently 8-bit immediate operand to these x86 instructions.
770 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
771 CI->arg_operands().end());
773 // Replace the last argument with a trunc.
774 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
776 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
777 CI->replaceAllUsesWith(NewCall);
778 CI->eraseFromParent();
784 // This tests each Function to determine if it needs upgrading. When we find
785 // one we are interested in, we then upgrade all calls to reflect the new
787 void llvm::UpgradeCallsToIntrinsic(Function* F) {
788 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
790 // Upgrade the function and check if it is a totaly new function.
792 if (UpgradeIntrinsicFunction(F, NewFn)) {
793 // Replace all uses to the old function with the new one if necessary.
794 for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
796 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
797 UpgradeIntrinsicCall(CI, NewFn);
799 // Remove old function, no longer used, from the module.
800 F->eraseFromParent();
804 void llvm::UpgradeInstWithTBAATag(Instruction *I) {
805 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
806 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
807 // Check if the tag uses struct-path aware TBAA format.
808 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
811 if (MD->getNumOperands() == 3) {
812 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
813 MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
814 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
815 Metadata *Elts2[] = {ScalarType, ScalarType,
816 ConstantAsMetadata::get(Constant::getNullValue(
817 Type::getInt64Ty(I->getContext()))),
819 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
821 // Create a MDNode <MD, MD, offset 0>
822 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
823 Type::getInt64Ty(I->getContext())))};
824 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
828 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
829 Instruction *&Temp) {
830 if (Opc != Instruction::BitCast)
834 Type *SrcTy = V->getType();
835 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
836 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
837 LLVMContext &Context = V->getContext();
839 // We have no information about target data layout, so we assume that
840 // the maximum pointer size is 64bit.
841 Type *MidTy = Type::getInt64Ty(Context);
842 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
844 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
850 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
851 if (Opc != Instruction::BitCast)
854 Type *SrcTy = C->getType();
855 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
856 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
857 LLVMContext &Context = C->getContext();
859 // We have no information about target data layout, so we assume that
860 // the maximum pointer size is 64bit.
861 Type *MidTy = Type::getInt64Ty(Context);
863 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
870 /// Check the debug info version number, if it is out-dated, drop the debug
871 /// info. Return true if module is modified.
872 bool llvm::UpgradeDebugInfo(Module &M) {
873 unsigned Version = getDebugMetadataVersionFromModule(M);
874 if (Version == DEBUG_METADATA_VERSION)
877 bool RetCode = StripDebugInfo(M);
879 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
880 M.getContext().diagnose(DiagVersion);
885 void llvm::UpgradeMDStringConstant(std::string &String) {
886 const std::string OldPrefix = "llvm.vectorizer.";
887 if (String == "llvm.vectorizer.unroll") {
888 String = "llvm.loop.interleave.count";
889 } else if (String.find(OldPrefix) == 0) {
890 String.replace(0, OldPrefix.size(), "llvm.loop.vectorize.");