1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/IR/CFG.h"
18 #include "llvm/IR/CallSite.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/ErrorHandling.h"
33 // Upgrade the declarations of the SSE4.1 functions whose arguments have
34 // changed their type from v4f32 to v2i64.
35 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
37 // Check whether this is an old version of the function, which received
39 Type *Arg0Type = F->getFunctionType()->getParamType(0);
40 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
43 // Yes, it's old, replace it with new version.
44 F->setName(F->getName() + ".old");
45 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
49 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
50 // arguments have changed their type from i32 to i8.
51 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
53 // Check that the last argument is an i32.
54 Type *LastArgType = F->getFunctionType()->getParamType(
55 F->getFunctionType()->getNumParams() - 1);
56 if (!LastArgType->isIntegerTy(32))
59 // Move this function aside and map down.
60 F->setName(F->getName() + ".old");
61 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
65 // Upgrade the declarations of AVX-512 cmp intrinsic functions whose 8-bit
66 // immediates have changed their type from i32 to i8.
67 static bool UpgradeAVX512CmpIntrinsic(Function *F, Intrinsic::ID IID,
69 // Check that the last argument is an i32.
70 Type *LastArgType = F->getFunctionType()->getParamType(2);
71 if (!LastArgType->isIntegerTy(32))
74 // Move this function aside and map down.
75 F->setName(F->getName() + ".old");
76 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
80 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
81 assert(F && "Illegal to upgrade a non-existent Function.");
83 // Quickly eliminate it, if it's not a candidate.
84 StringRef Name = F->getName();
85 if (Name.size() <= 8 || !Name.startswith("llvm."))
87 Name = Name.substr(5); // Strip off "llvm."
92 if (Name.startswith("arm.neon.vclz")) {
94 F->arg_begin()->getType(),
95 Type::getInt1Ty(F->getContext())
97 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
98 // the end of the name. Change name from llvm.arm.neon.vclz.* to
100 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
101 NewFn = Function::Create(fType, F->getLinkage(),
102 "llvm.ctlz." + Name.substr(14), F->getParent());
105 if (Name.startswith("arm.neon.vcnt")) {
106 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
107 F->arg_begin()->getType());
113 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
114 F->setName(Name + ".old");
115 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
116 F->arg_begin()->getType());
119 if (Name.startswith("cttz.") && F->arg_size() == 1) {
120 F->setName(Name + ".old");
121 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
122 F->arg_begin()->getType());
128 if (Name.startswith("dbg.declare") && F->arg_size() == 2) {
129 F->setName(Name + ".old");
130 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_declare);
133 if (Name.startswith("dbg.value") && F->arg_size() == 3) {
134 F->setName(Name + ".old");
135 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
142 // We only need to change the name to match the mangling including the
144 if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
145 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
146 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
147 F->setName(Name + ".old");
148 NewFn = Intrinsic::getDeclaration(F->getParent(),
149 Intrinsic::objectsize, Tys);
156 if (Name.startswith("x86.sse2.pcmpeq.") ||
157 Name.startswith("x86.sse2.pcmpgt.") ||
158 Name.startswith("x86.avx2.pcmpeq.") ||
159 Name.startswith("x86.avx2.pcmpgt.") ||
160 Name.startswith("x86.avx.vpermil.") ||
161 Name == "x86.avx.vinsertf128.pd.256" ||
162 Name == "x86.avx.vinsertf128.ps.256" ||
163 Name == "x86.avx.vinsertf128.si.256" ||
164 Name == "x86.avx.vextractf128.pd.256" ||
165 Name == "x86.avx.vextractf128.ps.256" ||
166 Name == "x86.avx.vextractf128.si.256" ||
167 Name == "x86.avx.movnt.dq.256" ||
168 Name == "x86.avx.movnt.pd.256" ||
169 Name == "x86.avx.movnt.ps.256" ||
170 Name == "x86.sse42.crc32.64.8" ||
171 Name == "x86.avx.vbroadcast.ss" ||
172 Name == "x86.avx.vbroadcast.ss.256" ||
173 Name == "x86.avx.vbroadcast.sd.256" ||
174 Name == "x86.sse2.psll.dq" ||
175 Name == "x86.sse2.psrl.dq" ||
176 Name == "x86.avx2.psll.dq" ||
177 Name == "x86.avx2.psrl.dq" ||
178 Name == "x86.sse2.psll.dq.bs" ||
179 Name == "x86.sse2.psrl.dq.bs" ||
180 Name == "x86.avx2.psll.dq.bs" ||
181 Name == "x86.avx2.psrl.dq.bs" ||
182 Name == "x86.sse41.pblendw" ||
183 Name == "x86.sse41.blendpd" ||
184 Name == "x86.sse41.blendps" ||
185 Name == "x86.avx.blend.pd.256" ||
186 Name == "x86.avx.blend.ps.256" ||
187 Name == "x86.avx2.pblendw" ||
188 Name == "x86.avx2.pblendd.128" ||
189 Name == "x86.avx2.pblendd.256" ||
190 Name == "x86.avx2.vbroadcasti128" ||
191 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
195 // SSE4.1 ptest functions may have an old signature.
196 if (Name.startswith("x86.sse41.ptest")) {
197 if (Name == "x86.sse41.ptestc")
198 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
199 if (Name == "x86.sse41.ptestz")
200 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
201 if (Name == "x86.sse41.ptestnzc")
202 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
204 // Several blend and other instructions with masks used the wrong number of
206 if (Name == "x86.sse41.insertps")
207 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
209 if (Name == "x86.sse41.dppd")
210 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
212 if (Name == "x86.sse41.dpps")
213 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
215 if (Name == "x86.sse41.mpsadbw")
216 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
218 if (Name == "x86.avx.dp.ps.256")
219 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
221 if (Name == "x86.avx2.mpsadbw")
222 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
225 if (Name == "x86.avx512.mask.cmp.ps.512")
226 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
228 if (Name == "x86.avx512.mask.cmp.pd.512")
229 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
232 if (Name == "x86.avx512.mask.cmp.b.512")
233 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_512,
235 if (Name == "x86.avx512.mask.cmp.w.512")
236 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_w_512,
238 if (Name == "x86.avx512.mask.cmp.d.512")
239 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_d_512,
241 if (Name == "x86.avx512.mask.cmp.q.512")
242 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_q_512,
244 if (Name == "x86.avx512.mask.ucmp.b.512")
245 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_b_512,
247 if (Name == "x86.avx512.mask.ucmp.w.512")
248 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_w_512,
250 if (Name == "x86.avx512.mask.ucmp.d.512")
251 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_d_512,
253 if (Name == "x86.avx512.mask.ucmp.q.512")
254 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_q_512,
257 if (Name == "x86.avx512.mask.cmp.b.256")
258 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_256,
260 if (Name == "x86.avx512.mask.cmp.w.256")
261 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_w_256,
263 if (Name == "x86.avx512.mask.cmp.d.256")
264 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_d_256,
266 if (Name == "x86.avx512.mask.cmp.q.256")
267 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_q_256,
269 if (Name == "x86.avx512.mask.ucmp.b.256")
270 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_b_256,
272 if (Name == "x86.avx512.mask.ucmp.w.256")
273 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_w_256,
275 if (Name == "x86.avx512.mask.ucmp.d.256")
276 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_d_256,
278 if (Name == "x86.avx512.mask.ucmp.q.256")
279 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_q_256,
282 if (Name == "x86.avx512.mask.cmp.b.128")
283 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_128,
285 if (Name == "x86.avx512.mask.cmp.w.128")
286 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_w_128,
288 if (Name == "x86.avx512.mask.cmp.d.128")
289 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_d_128,
291 if (Name == "x86.avx512.mask.cmp.q.128")
292 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_q_128,
294 if (Name == "x86.avx512.mask.ucmp.b.128")
295 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_b_128,
297 if (Name == "x86.avx512.mask.ucmp.w.128")
298 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_w_128,
300 if (Name == "x86.avx512.mask.ucmp.d.128")
301 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_d_128,
303 if (Name == "x86.avx512.mask.ucmp.q.128")
304 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_q_128,
307 // frcz.ss/sd may need to have an argument dropped
308 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
309 F->setName(Name + ".old");
310 NewFn = Intrinsic::getDeclaration(F->getParent(),
311 Intrinsic::x86_xop_vfrcz_ss);
314 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
315 F->setName(Name + ".old");
316 NewFn = Intrinsic::getDeclaration(F->getParent(),
317 Intrinsic::x86_xop_vfrcz_sd);
320 // Fix the FMA4 intrinsics to remove the 4
321 if (Name.startswith("x86.fma4.")) {
322 F->setName("llvm.x86.fma" + Name.substr(8));
330 // This may not belong here. This function is effectively being overloaded
331 // to both detect an intrinsic which needs upgrading, and to provide the
332 // upgraded form of the intrinsic. We should perhaps have two separate
333 // functions for this.
337 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
339 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
341 // Upgrade intrinsic attributes. This does not change the function.
344 if (unsigned id = F->getIntrinsicID())
345 F->setAttributes(Intrinsic::getAttributes(F->getContext(),
350 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
351 // Nothing to do yet.
355 static MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) {
356 if (!DbgNode || Elt >= DbgNode->getNumOperands())
358 return dyn_cast_or_null<MDNode>(DbgNode->getOperand(Elt));
361 static MetadataAsValue *getExpression(Value *VarOperand, Function *F) {
362 // Old-style DIVariables have an optional expression as the 8th element.
363 DIExpression Expr(getNodeField(
364 cast<MDNode>(cast<MetadataAsValue>(VarOperand)->getMetadata()), 8));
366 DIBuilder DIB(*F->getParent(), /*AllowUnresolved*/ false);
367 Expr = DIB.createExpression();
369 return MetadataAsValue::get(F->getContext(), Expr);
372 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
374 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
375 Value *Op, unsigned NumLanes,
377 // Each lane is 16 bytes.
378 unsigned NumElts = NumLanes * 16;
380 // Bitcast from a 64-bit element type to a byte element type.
381 Op = Builder.CreateBitCast(Op,
382 VectorType::get(Type::getInt8Ty(C), NumElts),
384 // We'll be shuffling in zeroes.
385 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
387 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
388 // we'll just return the zero vector.
390 SmallVector<Constant*, 32> Idxs;
391 // 256-bit version is split into two 16-byte lanes.
392 for (unsigned l = 0; l != NumElts; l += 16)
393 for (unsigned i = 0; i != 16; ++i) {
394 unsigned Idx = NumElts + i - Shift;
396 Idx -= NumElts - 16; // end of lane, switch operand.
397 Idxs.push_back(Builder.getInt32(Idx + l));
400 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs));
403 // Bitcast back to a 64-bit element type.
404 return Builder.CreateBitCast(Res,
405 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
409 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
411 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
412 Value *Op, unsigned NumLanes,
414 // Each lane is 16 bytes.
415 unsigned NumElts = NumLanes * 16;
417 // Bitcast from a 64-bit element type to a byte element type.
418 Op = Builder.CreateBitCast(Op,
419 VectorType::get(Type::getInt8Ty(C), NumElts),
421 // We'll be shuffling in zeroes.
422 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
424 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
425 // we'll just return the zero vector.
427 SmallVector<Constant*, 32> Idxs;
428 // 256-bit version is split into two 16-byte lanes.
429 for (unsigned l = 0; l != NumElts; l += 16)
430 for (unsigned i = 0; i != 16; ++i) {
431 unsigned Idx = i + Shift;
433 Idx += NumElts - 16; // end of lane, switch operand.
434 Idxs.push_back(Builder.getInt32(Idx + l));
437 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs));
440 // Bitcast back to a 64-bit element type.
441 return Builder.CreateBitCast(Res,
442 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
446 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
447 // upgraded intrinsic. All argument and return casting must be provided in
448 // order to seamlessly integrate with existing context.
449 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
450 Function *F = CI->getCalledFunction();
451 LLVMContext &C = CI->getContext();
452 IRBuilder<> Builder(C);
453 Builder.SetInsertPoint(CI->getParent(), CI);
455 assert(F && "Intrinsic call is not direct?");
458 // Get the Function's name.
459 StringRef Name = F->getName();
462 // Upgrade packed integer vector compares intrinsics to compare instructions
463 if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
464 Name.startswith("llvm.x86.avx2.pcmpeq.")) {
465 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
467 // need to sign extend since icmp returns vector of i1
468 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
469 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
470 Name.startswith("llvm.x86.avx2.pcmpgt.")) {
471 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
473 // need to sign extend since icmp returns vector of i1
474 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
475 } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
476 Name == "llvm.x86.avx.movnt.ps.256" ||
477 Name == "llvm.x86.avx.movnt.pd.256") {
478 IRBuilder<> Builder(C);
479 Builder.SetInsertPoint(CI->getParent(), CI);
481 Module *M = F->getParent();
482 SmallVector<Metadata *, 1> Elts;
484 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
485 MDNode *Node = MDNode::get(C, Elts);
487 Value *Arg0 = CI->getArgOperand(0);
488 Value *Arg1 = CI->getArgOperand(1);
490 // Convert the type of the pointer to a pointer to the stored type.
491 Value *BC = Builder.CreateBitCast(Arg0,
492 PointerType::getUnqual(Arg1->getType()),
494 StoreInst *SI = Builder.CreateStore(Arg1, BC);
495 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
496 SI->setAlignment(16);
499 CI->eraseFromParent();
501 } else if (Name.startswith("llvm.x86.xop.vpcom")) {
503 if (Name.endswith("ub"))
504 intID = Intrinsic::x86_xop_vpcomub;
505 else if (Name.endswith("uw"))
506 intID = Intrinsic::x86_xop_vpcomuw;
507 else if (Name.endswith("ud"))
508 intID = Intrinsic::x86_xop_vpcomud;
509 else if (Name.endswith("uq"))
510 intID = Intrinsic::x86_xop_vpcomuq;
511 else if (Name.endswith("b"))
512 intID = Intrinsic::x86_xop_vpcomb;
513 else if (Name.endswith("w"))
514 intID = Intrinsic::x86_xop_vpcomw;
515 else if (Name.endswith("d"))
516 intID = Intrinsic::x86_xop_vpcomd;
517 else if (Name.endswith("q"))
518 intID = Intrinsic::x86_xop_vpcomq;
520 llvm_unreachable("Unknown suffix");
522 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
524 if (Name.startswith("lt"))
526 else if (Name.startswith("le"))
528 else if (Name.startswith("gt"))
530 else if (Name.startswith("ge"))
532 else if (Name.startswith("eq"))
534 else if (Name.startswith("ne"))
536 else if (Name.startswith("false"))
538 else if (Name.startswith("true"))
541 llvm_unreachable("Unknown condition");
543 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
544 Rep = Builder.CreateCall3(VPCOM, CI->getArgOperand(0),
545 CI->getArgOperand(1), Builder.getInt8(Imm));
546 } else if (Name == "llvm.x86.sse42.crc32.64.8") {
547 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
548 Intrinsic::x86_sse42_crc32_32_8);
549 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
550 Rep = Builder.CreateCall2(CRC32, Trunc0, CI->getArgOperand(1));
551 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
552 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
553 // Replace broadcasts with a series of insertelements.
554 Type *VecTy = CI->getType();
555 Type *EltTy = VecTy->getVectorElementType();
556 unsigned EltNum = VecTy->getVectorNumElements();
557 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
558 EltTy->getPointerTo());
559 Value *Load = Builder.CreateLoad(Cast);
560 Type *I32Ty = Type::getInt32Ty(C);
561 Rep = UndefValue::get(VecTy);
562 for (unsigned I = 0; I < EltNum; ++I)
563 Rep = Builder.CreateInsertElement(Rep, Load,
564 ConstantInt::get(I32Ty, I));
565 } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
566 // Replace vbroadcasts with a vector shuffle.
567 Value *Op = Builder.CreatePointerCast(
568 CI->getArgOperand(0),
569 PointerType::getUnqual(VectorType::get(Type::getInt64Ty(C), 2)));
570 Value *Load = Builder.CreateLoad(Op);
571 int Idxs[4] = { 0, 1, 0, 1 };
572 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
574 } else if (Name == "llvm.x86.sse2.psll.dq") {
575 // 128-bit shift left specified in bits.
576 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
577 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
578 Shift / 8); // Shift is in bits.
579 } else if (Name == "llvm.x86.sse2.psrl.dq") {
580 // 128-bit shift right specified in bits.
581 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
582 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
583 Shift / 8); // Shift is in bits.
584 } else if (Name == "llvm.x86.avx2.psll.dq") {
585 // 256-bit shift left specified in bits.
586 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
587 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
588 Shift / 8); // Shift is in bits.
589 } else if (Name == "llvm.x86.avx2.psrl.dq") {
590 // 256-bit shift right specified in bits.
591 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
592 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
593 Shift / 8); // Shift is in bits.
594 } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
595 // 128-bit shift left specified in bytes.
596 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
597 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
599 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
600 // 128-bit shift right specified in bytes.
601 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
602 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
604 } else if (Name == "llvm.x86.avx2.psll.dq.bs") {
605 // 256-bit shift left specified in bytes.
606 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
607 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
609 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
610 // 256-bit shift right specified in bytes.
611 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
612 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
614 } else if (Name == "llvm.x86.sse41.pblendw" ||
615 Name == "llvm.x86.sse41.blendpd" ||
616 Name == "llvm.x86.sse41.blendps" ||
617 Name == "llvm.x86.avx.blend.pd.256" ||
618 Name == "llvm.x86.avx.blend.ps.256" ||
619 Name == "llvm.x86.avx2.pblendw" ||
620 Name == "llvm.x86.avx2.pblendd.128" ||
621 Name == "llvm.x86.avx2.pblendd.256") {
622 Value *Op0 = CI->getArgOperand(0);
623 Value *Op1 = CI->getArgOperand(1);
624 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
625 VectorType *VecTy = cast<VectorType>(CI->getType());
626 unsigned NumElts = VecTy->getNumElements();
628 SmallVector<Constant*, 16> Idxs;
629 for (unsigned i = 0; i != NumElts; ++i) {
630 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
631 Idxs.push_back(Builder.getInt32(Idx));
634 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
635 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
636 Name == "llvm.x86.avx.vinsertf128.ps.256" ||
637 Name == "llvm.x86.avx.vinsertf128.si.256") {
638 Value *Op0 = CI->getArgOperand(0);
639 Value *Op1 = CI->getArgOperand(1);
640 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
641 VectorType *VecTy = cast<VectorType>(CI->getType());
642 unsigned NumElts = VecTy->getNumElements();
644 // Mask off the high bits of the immediate value; hardware ignores those.
647 // Extend the second operand into a vector that is twice as big.
648 Value *UndefV = UndefValue::get(Op1->getType());
649 SmallVector<Constant*, 8> Idxs;
650 for (unsigned i = 0; i != NumElts; ++i) {
651 Idxs.push_back(Builder.getInt32(i));
653 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs));
655 // Insert the second operand into the first operand.
657 // Note that there is no guarantee that instruction lowering will actually
658 // produce a vinsertf128 instruction for the created shuffles. In
659 // particular, the 0 immediate case involves no lane changes, so it can
660 // be handled as a blend.
662 // Example of shuffle mask for 32-bit elements:
663 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
664 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
666 SmallVector<Constant*, 8> Idxs2;
667 // The low half of the result is either the low half of the 1st operand
668 // or the low half of the 2nd operand (the inserted vector).
669 for (unsigned i = 0; i != NumElts / 2; ++i) {
670 unsigned Idx = Imm ? i : (i + NumElts);
671 Idxs2.push_back(Builder.getInt32(Idx));
673 // The high half of the result is either the low half of the 2nd operand
674 // (the inserted vector) or the high half of the 1st operand.
675 for (unsigned i = NumElts / 2; i != NumElts; ++i) {
676 unsigned Idx = Imm ? (i + NumElts / 2) : i;
677 Idxs2.push_back(Builder.getInt32(Idx));
679 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
680 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
681 Name == "llvm.x86.avx.vextractf128.ps.256" ||
682 Name == "llvm.x86.avx.vextractf128.si.256") {
683 Value *Op0 = CI->getArgOperand(0);
684 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
685 VectorType *VecTy = cast<VectorType>(CI->getType());
686 unsigned NumElts = VecTy->getNumElements();
688 // Mask off the high bits of the immediate value; hardware ignores those.
691 // Get indexes for either the high half or low half of the input vector.
692 SmallVector<Constant*, 4> Idxs(NumElts);
693 for (unsigned i = 0; i != NumElts; ++i) {
694 unsigned Idx = Imm ? (i + NumElts) : i;
695 Idxs[i] = Builder.getInt32(Idx);
698 Value *UndefV = UndefValue::get(Op0->getType());
699 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs));
701 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
702 if (Name == "llvm.x86.avx.vpermil.pd.256")
704 else if (Name == "llvm.x86.avx.vpermil.pd")
706 else if (Name == "llvm.x86.avx.vpermil.ps.256")
708 else if (Name == "llvm.x86.avx.vpermil.ps")
711 if (PD256 || PD128 || PS256 || PS128) {
712 Value *Op0 = CI->getArgOperand(0);
713 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
714 SmallVector<Constant*, 8> Idxs;
717 for (unsigned i = 0; i != 2; ++i)
718 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
720 for (unsigned l = 0; l != 4; l+=2)
721 for (unsigned i = 0; i != 2; ++i)
722 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
724 for (unsigned i = 0; i != 4; ++i)
725 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
727 for (unsigned l = 0; l != 8; l+=4)
728 for (unsigned i = 0; i != 4; ++i)
729 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
731 llvm_unreachable("Unexpected function");
733 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
735 llvm_unreachable("Unknown function for CallInst upgrade.");
739 CI->replaceAllUsesWith(Rep);
740 CI->eraseFromParent();
744 std::string Name = CI->getName().str();
746 CI->setName(Name + ".old");
748 switch (NewFn->getIntrinsicID()) {
750 llvm_unreachable("Unknown function for CallInst upgrade.");
752 // Upgrade debug intrinsics to use an additional DIExpression argument.
753 case Intrinsic::dbg_declare: {
755 Builder.CreateCall3(NewFn, CI->getArgOperand(0), CI->getArgOperand(1),
756 getExpression(CI->getArgOperand(1), F), Name);
757 NewCI->setDebugLoc(CI->getDebugLoc());
758 CI->replaceAllUsesWith(NewCI);
759 CI->eraseFromParent();
762 case Intrinsic::dbg_value: {
763 auto NewCI = Builder.CreateCall4(
764 NewFn, CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
765 getExpression(CI->getArgOperand(2), F), Name);
766 NewCI->setDebugLoc(CI->getDebugLoc());
767 CI->replaceAllUsesWith(NewCI);
768 CI->eraseFromParent();
771 case Intrinsic::ctlz:
772 case Intrinsic::cttz:
773 assert(CI->getNumArgOperands() == 1 &&
774 "Mismatch between function args and call args");
775 CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
776 Builder.getFalse(), Name));
777 CI->eraseFromParent();
780 case Intrinsic::objectsize:
781 CI->replaceAllUsesWith(Builder.CreateCall2(NewFn,
782 CI->getArgOperand(0),
783 CI->getArgOperand(1),
785 CI->eraseFromParent();
788 case Intrinsic::ctpop: {
789 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(0)));
790 CI->eraseFromParent();
794 case Intrinsic::x86_xop_vfrcz_ss:
795 case Intrinsic::x86_xop_vfrcz_sd:
796 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(1),
798 CI->eraseFromParent();
801 case Intrinsic::x86_sse41_ptestc:
802 case Intrinsic::x86_sse41_ptestz:
803 case Intrinsic::x86_sse41_ptestnzc: {
804 // The arguments for these intrinsics used to be v4f32, and changed
805 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
806 // So, the only thing required is a bitcast for both arguments.
807 // First, check the arguments have the old type.
808 Value *Arg0 = CI->getArgOperand(0);
809 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
812 // Old intrinsic, add bitcasts
813 Value *Arg1 = CI->getArgOperand(1);
816 Builder.CreateBitCast(Arg0,
817 VectorType::get(Type::getInt64Ty(C), 2),
820 Builder.CreateBitCast(Arg1,
821 VectorType::get(Type::getInt64Ty(C), 2),
824 CallInst* NewCall = Builder.CreateCall2(NewFn, BC0, BC1, Name);
825 CI->replaceAllUsesWith(NewCall);
826 CI->eraseFromParent();
830 case Intrinsic::x86_sse41_insertps:
831 case Intrinsic::x86_sse41_dppd:
832 case Intrinsic::x86_sse41_dpps:
833 case Intrinsic::x86_sse41_mpsadbw:
834 case Intrinsic::x86_avx_dp_ps_256:
835 case Intrinsic::x86_avx2_mpsadbw: {
836 // Need to truncate the last argument from i32 to i8 -- this argument models
837 // an inherently 8-bit immediate operand to these x86 instructions.
838 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
839 CI->arg_operands().end());
841 // Replace the last argument with a trunc.
842 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
844 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
845 CI->replaceAllUsesWith(NewCall);
846 CI->eraseFromParent();
849 case Intrinsic::x86_avx512_mask_cmp_ps_512:
850 case Intrinsic::x86_avx512_mask_cmp_pd_512: {
851 // Need to truncate the last argument from i32 to i8 -- this argument models
852 // an inherently 8-bit immediate operand to these x86 instructions.
853 SmallVector<Value *, 5> Args(CI->arg_operands().begin(),
854 CI->arg_operands().end());
856 // Replace the last argument with a trunc.
857 Args[2] = Builder.CreateTrunc(Args[2], Type::getInt8Ty(C), "trunc");
859 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
860 CI->replaceAllUsesWith(NewCall);
861 CI->eraseFromParent();
867 // This tests each Function to determine if it needs upgrading. When we find
868 // one we are interested in, we then upgrade all calls to reflect the new
870 void llvm::UpgradeCallsToIntrinsic(Function* F) {
871 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
873 // Upgrade the function and check if it is a totaly new function.
875 if (UpgradeIntrinsicFunction(F, NewFn)) {
877 // Replace all uses to the old function with the new one if necessary.
878 for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
880 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
881 UpgradeIntrinsicCall(CI, NewFn);
883 // Remove old function, no longer used, from the module.
884 F->eraseFromParent();
889 void llvm::UpgradeInstWithTBAATag(Instruction *I) {
890 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
891 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
892 // Check if the tag uses struct-path aware TBAA format.
893 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
896 if (MD->getNumOperands() == 3) {
897 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
898 MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
899 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
900 Metadata *Elts2[] = {ScalarType, ScalarType,
901 ConstantAsMetadata::get(Constant::getNullValue(
902 Type::getInt64Ty(I->getContext()))),
904 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
906 // Create a MDNode <MD, MD, offset 0>
907 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
908 Type::getInt64Ty(I->getContext())))};
909 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
913 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
914 Instruction *&Temp) {
915 if (Opc != Instruction::BitCast)
919 Type *SrcTy = V->getType();
920 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
921 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
922 LLVMContext &Context = V->getContext();
924 // We have no information about target data layout, so we assume that
925 // the maximum pointer size is 64bit.
926 Type *MidTy = Type::getInt64Ty(Context);
927 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
929 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
935 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
936 if (Opc != Instruction::BitCast)
939 Type *SrcTy = C->getType();
940 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
941 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
942 LLVMContext &Context = C->getContext();
944 // We have no information about target data layout, so we assume that
945 // the maximum pointer size is 64bit.
946 Type *MidTy = Type::getInt64Ty(Context);
948 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
955 /// Check the debug info version number, if it is out-dated, drop the debug
956 /// info. Return true if module is modified.
957 bool llvm::UpgradeDebugInfo(Module &M) {
958 unsigned Version = getDebugMetadataVersionFromModule(M);
959 if (Version == DEBUG_METADATA_VERSION)
962 bool RetCode = StripDebugInfo(M);
964 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
965 M.getContext().diagnose(DiagVersion);
970 void llvm::UpgradeMDStringConstant(std::string &String) {
971 const std::string OldPrefix = "llvm.vectorizer.";
972 if (String == "llvm.vectorizer.unroll") {
973 String = "llvm.loop.interleave.count";
974 } else if (String.find(OldPrefix) == 0) {
975 String.replace(0, OldPrefix.size(), "llvm.loop.vectorize.");