1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/IR/CFG.h"
18 #include "llvm/IR/CallSite.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/ErrorHandling.h"
33 // Upgrade the declarations of the SSE4.1 functions whose arguments have
34 // changed their type from v4f32 to v2i64.
35 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
37 // Check whether this is an old version of the function, which received
39 Type *Arg0Type = F->getFunctionType()->getParamType(0);
40 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
43 // Yes, it's old, replace it with new version.
44 F->setName(F->getName() + ".old");
45 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
49 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
50 // arguments have changed their type from i32 to i8.
51 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
53 // Check that the last argument is an i32.
54 Type *LastArgType = F->getFunctionType()->getParamType(
55 F->getFunctionType()->getNumParams() - 1);
56 if (!LastArgType->isIntegerTy(32))
59 // Move this function aside and map down.
60 F->setName(F->getName() + ".old");
61 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
65 // Upgrade the declarations of AVX-512 cmp intrinsic functions whose 8-bit
66 // immediates have changed their type from i32 to i8.
67 static bool UpgradeAVX512CmpIntrinsic(Function *F, Intrinsic::ID IID,
69 // Check that the last argument is an i32.
70 Type *LastArgType = F->getFunctionType()->getParamType(2);
71 if (!LastArgType->isIntegerTy(32))
74 // Move this function aside and map down.
75 F->setName(F->getName() + ".old");
76 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
80 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
81 assert(F && "Illegal to upgrade a non-existent Function.");
83 // Quickly eliminate it, if it's not a candidate.
84 StringRef Name = F->getName();
85 if (Name.size() <= 8 || !Name.startswith("llvm."))
87 Name = Name.substr(5); // Strip off "llvm."
92 if (Name.startswith("arm.neon.vclz")) {
94 F->arg_begin()->getType(),
95 Type::getInt1Ty(F->getContext())
97 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
98 // the end of the name. Change name from llvm.arm.neon.vclz.* to
100 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
101 NewFn = Function::Create(fType, F->getLinkage(),
102 "llvm.ctlz." + Name.substr(14), F->getParent());
105 if (Name.startswith("arm.neon.vcnt")) {
106 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
107 F->arg_begin()->getType());
113 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
114 F->setName(Name + ".old");
115 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
116 F->arg_begin()->getType());
119 if (Name.startswith("cttz.") && F->arg_size() == 1) {
120 F->setName(Name + ".old");
121 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
122 F->arg_begin()->getType());
128 if (Name.startswith("dbg.declare") && F->arg_size() == 2) {
129 F->setName(Name + ".old");
130 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_declare);
133 if (Name.startswith("dbg.value") && F->arg_size() == 3) {
134 F->setName(Name + ".old");
135 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
142 // We only need to change the name to match the mangling including the
144 if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
145 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
146 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
147 F->setName(Name + ".old");
148 NewFn = Intrinsic::getDeclaration(F->getParent(),
149 Intrinsic::objectsize, Tys);
156 if (Name.startswith("x86.sse2.pcmpeq.") ||
157 Name.startswith("x86.sse2.pcmpgt.") ||
158 Name.startswith("x86.avx2.pcmpeq.") ||
159 Name.startswith("x86.avx2.pcmpgt.") ||
160 Name.startswith("x86.avx.vpermil.") ||
161 Name == "x86.avx.vinsertf128.pd.256" ||
162 Name == "x86.avx.vinsertf128.ps.256" ||
163 Name == "x86.avx.vinsertf128.si.256" ||
164 Name == "x86.avx2.vinserti128" ||
165 Name == "x86.avx.vextractf128.pd.256" ||
166 Name == "x86.avx.vextractf128.ps.256" ||
167 Name == "x86.avx.vextractf128.si.256" ||
168 Name == "x86.avx2.vextracti128" ||
169 Name == "x86.avx.movnt.dq.256" ||
170 Name == "x86.avx.movnt.pd.256" ||
171 Name == "x86.avx.movnt.ps.256" ||
172 Name == "x86.sse42.crc32.64.8" ||
173 Name == "x86.avx.vbroadcast.ss" ||
174 Name == "x86.avx.vbroadcast.ss.256" ||
175 Name == "x86.avx.vbroadcast.sd.256" ||
176 Name == "x86.sse2.psll.dq" ||
177 Name == "x86.sse2.psrl.dq" ||
178 Name == "x86.avx2.psll.dq" ||
179 Name == "x86.avx2.psrl.dq" ||
180 Name == "x86.sse2.psll.dq.bs" ||
181 Name == "x86.sse2.psrl.dq.bs" ||
182 Name == "x86.avx2.psll.dq.bs" ||
183 Name == "x86.avx2.psrl.dq.bs" ||
184 Name == "x86.sse41.pblendw" ||
185 Name == "x86.sse41.blendpd" ||
186 Name == "x86.sse41.blendps" ||
187 Name == "x86.avx.blend.pd.256" ||
188 Name == "x86.avx.blend.ps.256" ||
189 Name == "x86.avx2.pblendw" ||
190 Name == "x86.avx2.pblendd.128" ||
191 Name == "x86.avx2.pblendd.256" ||
192 Name == "x86.avx2.vbroadcasti128" ||
193 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
197 // SSE4.1 ptest functions may have an old signature.
198 if (Name.startswith("x86.sse41.ptest")) {
199 if (Name == "x86.sse41.ptestc")
200 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
201 if (Name == "x86.sse41.ptestz")
202 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
203 if (Name == "x86.sse41.ptestnzc")
204 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
206 // Several blend and other instructions with masks used the wrong number of
208 if (Name == "x86.sse41.insertps")
209 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
211 if (Name == "x86.sse41.dppd")
212 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
214 if (Name == "x86.sse41.dpps")
215 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
217 if (Name == "x86.sse41.mpsadbw")
218 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
220 if (Name == "x86.avx.dp.ps.256")
221 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
223 if (Name == "x86.avx2.mpsadbw")
224 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
227 if (Name == "x86.avx512.mask.cmp.ps.512")
228 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
230 if (Name == "x86.avx512.mask.cmp.pd.512")
231 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
234 if (Name == "x86.avx512.mask.cmp.b.512")
235 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_512,
237 if (Name == "x86.avx512.mask.cmp.w.512")
238 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_w_512,
240 if (Name == "x86.avx512.mask.cmp.d.512")
241 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_d_512,
243 if (Name == "x86.avx512.mask.cmp.q.512")
244 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_q_512,
246 if (Name == "x86.avx512.mask.ucmp.b.512")
247 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_b_512,
249 if (Name == "x86.avx512.mask.ucmp.w.512")
250 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_w_512,
252 if (Name == "x86.avx512.mask.ucmp.d.512")
253 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_d_512,
255 if (Name == "x86.avx512.mask.ucmp.q.512")
256 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_q_512,
259 if (Name == "x86.avx512.mask.cmp.b.256")
260 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_256,
262 if (Name == "x86.avx512.mask.cmp.w.256")
263 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_w_256,
265 if (Name == "x86.avx512.mask.cmp.d.256")
266 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_d_256,
268 if (Name == "x86.avx512.mask.cmp.q.256")
269 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_q_256,
271 if (Name == "x86.avx512.mask.ucmp.b.256")
272 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_b_256,
274 if (Name == "x86.avx512.mask.ucmp.w.256")
275 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_w_256,
277 if (Name == "x86.avx512.mask.ucmp.d.256")
278 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_d_256,
280 if (Name == "x86.avx512.mask.ucmp.q.256")
281 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_q_256,
284 if (Name == "x86.avx512.mask.cmp.b.128")
285 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_128,
287 if (Name == "x86.avx512.mask.cmp.w.128")
288 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_w_128,
290 if (Name == "x86.avx512.mask.cmp.d.128")
291 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_d_128,
293 if (Name == "x86.avx512.mask.cmp.q.128")
294 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_q_128,
296 if (Name == "x86.avx512.mask.ucmp.b.128")
297 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_b_128,
299 if (Name == "x86.avx512.mask.ucmp.w.128")
300 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_w_128,
302 if (Name == "x86.avx512.mask.ucmp.d.128")
303 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_d_128,
305 if (Name == "x86.avx512.mask.ucmp.q.128")
306 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_q_128,
309 // frcz.ss/sd may need to have an argument dropped
310 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
311 F->setName(Name + ".old");
312 NewFn = Intrinsic::getDeclaration(F->getParent(),
313 Intrinsic::x86_xop_vfrcz_ss);
316 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
317 F->setName(Name + ".old");
318 NewFn = Intrinsic::getDeclaration(F->getParent(),
319 Intrinsic::x86_xop_vfrcz_sd);
322 // Fix the FMA4 intrinsics to remove the 4
323 if (Name.startswith("x86.fma4.")) {
324 F->setName("llvm.x86.fma" + Name.substr(8));
332 // This may not belong here. This function is effectively being overloaded
333 // to both detect an intrinsic which needs upgrading, and to provide the
334 // upgraded form of the intrinsic. We should perhaps have two separate
335 // functions for this.
339 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
341 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
343 // Upgrade intrinsic attributes. This does not change the function.
346 if (unsigned id = F->getIntrinsicID())
347 F->setAttributes(Intrinsic::getAttributes(F->getContext(),
352 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
353 // Nothing to do yet.
357 static MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) {
358 if (!DbgNode || Elt >= DbgNode->getNumOperands())
360 return dyn_cast_or_null<MDNode>(DbgNode->getOperand(Elt));
363 static MetadataAsValue *getExpression(Value *VarOperand, Function *F) {
364 // Old-style DIVariables have an optional expression as the 8th element.
365 DIExpression Expr(getNodeField(
366 cast<MDNode>(cast<MetadataAsValue>(VarOperand)->getMetadata()), 8));
368 DIBuilder DIB(*F->getParent(), /*AllowUnresolved*/ false);
369 Expr = DIB.createExpression();
371 return MetadataAsValue::get(F->getContext(), Expr);
374 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
376 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
377 Value *Op, unsigned NumLanes,
379 // Each lane is 16 bytes.
380 unsigned NumElts = NumLanes * 16;
382 // Bitcast from a 64-bit element type to a byte element type.
383 Op = Builder.CreateBitCast(Op,
384 VectorType::get(Type::getInt8Ty(C), NumElts),
386 // We'll be shuffling in zeroes.
387 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
389 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
390 // we'll just return the zero vector.
392 SmallVector<Constant*, 32> Idxs;
393 // 256-bit version is split into two 16-byte lanes.
394 for (unsigned l = 0; l != NumElts; l += 16)
395 for (unsigned i = 0; i != 16; ++i) {
396 unsigned Idx = NumElts + i - Shift;
398 Idx -= NumElts - 16; // end of lane, switch operand.
399 Idxs.push_back(Builder.getInt32(Idx + l));
402 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs));
405 // Bitcast back to a 64-bit element type.
406 return Builder.CreateBitCast(Res,
407 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
411 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
413 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
414 Value *Op, unsigned NumLanes,
416 // Each lane is 16 bytes.
417 unsigned NumElts = NumLanes * 16;
419 // Bitcast from a 64-bit element type to a byte element type.
420 Op = Builder.CreateBitCast(Op,
421 VectorType::get(Type::getInt8Ty(C), NumElts),
423 // We'll be shuffling in zeroes.
424 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
426 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
427 // we'll just return the zero vector.
429 SmallVector<Constant*, 32> Idxs;
430 // 256-bit version is split into two 16-byte lanes.
431 for (unsigned l = 0; l != NumElts; l += 16)
432 for (unsigned i = 0; i != 16; ++i) {
433 unsigned Idx = i + Shift;
435 Idx += NumElts - 16; // end of lane, switch operand.
436 Idxs.push_back(Builder.getInt32(Idx + l));
439 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs));
442 // Bitcast back to a 64-bit element type.
443 return Builder.CreateBitCast(Res,
444 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
448 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
449 // upgraded intrinsic. All argument and return casting must be provided in
450 // order to seamlessly integrate with existing context.
451 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
452 Function *F = CI->getCalledFunction();
453 LLVMContext &C = CI->getContext();
454 IRBuilder<> Builder(C);
455 Builder.SetInsertPoint(CI->getParent(), CI);
457 assert(F && "Intrinsic call is not direct?");
460 // Get the Function's name.
461 StringRef Name = F->getName();
464 // Upgrade packed integer vector compares intrinsics to compare instructions
465 if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
466 Name.startswith("llvm.x86.avx2.pcmpeq.")) {
467 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
469 // need to sign extend since icmp returns vector of i1
470 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
471 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
472 Name.startswith("llvm.x86.avx2.pcmpgt.")) {
473 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
475 // need to sign extend since icmp returns vector of i1
476 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
477 } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
478 Name == "llvm.x86.avx.movnt.ps.256" ||
479 Name == "llvm.x86.avx.movnt.pd.256") {
480 IRBuilder<> Builder(C);
481 Builder.SetInsertPoint(CI->getParent(), CI);
483 Module *M = F->getParent();
484 SmallVector<Metadata *, 1> Elts;
486 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
487 MDNode *Node = MDNode::get(C, Elts);
489 Value *Arg0 = CI->getArgOperand(0);
490 Value *Arg1 = CI->getArgOperand(1);
492 // Convert the type of the pointer to a pointer to the stored type.
493 Value *BC = Builder.CreateBitCast(Arg0,
494 PointerType::getUnqual(Arg1->getType()),
496 StoreInst *SI = Builder.CreateStore(Arg1, BC);
497 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
498 SI->setAlignment(16);
501 CI->eraseFromParent();
503 } else if (Name.startswith("llvm.x86.xop.vpcom")) {
505 if (Name.endswith("ub"))
506 intID = Intrinsic::x86_xop_vpcomub;
507 else if (Name.endswith("uw"))
508 intID = Intrinsic::x86_xop_vpcomuw;
509 else if (Name.endswith("ud"))
510 intID = Intrinsic::x86_xop_vpcomud;
511 else if (Name.endswith("uq"))
512 intID = Intrinsic::x86_xop_vpcomuq;
513 else if (Name.endswith("b"))
514 intID = Intrinsic::x86_xop_vpcomb;
515 else if (Name.endswith("w"))
516 intID = Intrinsic::x86_xop_vpcomw;
517 else if (Name.endswith("d"))
518 intID = Intrinsic::x86_xop_vpcomd;
519 else if (Name.endswith("q"))
520 intID = Intrinsic::x86_xop_vpcomq;
522 llvm_unreachable("Unknown suffix");
524 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
526 if (Name.startswith("lt"))
528 else if (Name.startswith("le"))
530 else if (Name.startswith("gt"))
532 else if (Name.startswith("ge"))
534 else if (Name.startswith("eq"))
536 else if (Name.startswith("ne"))
538 else if (Name.startswith("false"))
540 else if (Name.startswith("true"))
543 llvm_unreachable("Unknown condition");
545 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
546 Rep = Builder.CreateCall3(VPCOM, CI->getArgOperand(0),
547 CI->getArgOperand(1), Builder.getInt8(Imm));
548 } else if (Name == "llvm.x86.sse42.crc32.64.8") {
549 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
550 Intrinsic::x86_sse42_crc32_32_8);
551 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
552 Rep = Builder.CreateCall2(CRC32, Trunc0, CI->getArgOperand(1));
553 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
554 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
555 // Replace broadcasts with a series of insertelements.
556 Type *VecTy = CI->getType();
557 Type *EltTy = VecTy->getVectorElementType();
558 unsigned EltNum = VecTy->getVectorNumElements();
559 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
560 EltTy->getPointerTo());
561 Value *Load = Builder.CreateLoad(Cast);
562 Type *I32Ty = Type::getInt32Ty(C);
563 Rep = UndefValue::get(VecTy);
564 for (unsigned I = 0; I < EltNum; ++I)
565 Rep = Builder.CreateInsertElement(Rep, Load,
566 ConstantInt::get(I32Ty, I));
567 } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
568 // Replace vbroadcasts with a vector shuffle.
569 Value *Op = Builder.CreatePointerCast(
570 CI->getArgOperand(0),
571 PointerType::getUnqual(VectorType::get(Type::getInt64Ty(C), 2)));
572 Value *Load = Builder.CreateLoad(Op);
573 const int Idxs[4] = { 0, 1, 0, 1 };
574 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
576 } else if (Name == "llvm.x86.sse2.psll.dq") {
577 // 128-bit shift left specified in bits.
578 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
579 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
580 Shift / 8); // Shift is in bits.
581 } else if (Name == "llvm.x86.sse2.psrl.dq") {
582 // 128-bit shift right specified in bits.
583 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
584 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
585 Shift / 8); // Shift is in bits.
586 } else if (Name == "llvm.x86.avx2.psll.dq") {
587 // 256-bit shift left specified in bits.
588 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
589 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
590 Shift / 8); // Shift is in bits.
591 } else if (Name == "llvm.x86.avx2.psrl.dq") {
592 // 256-bit shift right specified in bits.
593 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
594 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
595 Shift / 8); // Shift is in bits.
596 } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
597 // 128-bit shift left specified in bytes.
598 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
599 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
601 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
602 // 128-bit shift right specified in bytes.
603 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
604 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
606 } else if (Name == "llvm.x86.avx2.psll.dq.bs") {
607 // 256-bit shift left specified in bytes.
608 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
609 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
611 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
612 // 256-bit shift right specified in bytes.
613 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
614 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
616 } else if (Name == "llvm.x86.sse41.pblendw" ||
617 Name == "llvm.x86.sse41.blendpd" ||
618 Name == "llvm.x86.sse41.blendps" ||
619 Name == "llvm.x86.avx.blend.pd.256" ||
620 Name == "llvm.x86.avx.blend.ps.256" ||
621 Name == "llvm.x86.avx2.pblendw" ||
622 Name == "llvm.x86.avx2.pblendd.128" ||
623 Name == "llvm.x86.avx2.pblendd.256") {
624 Value *Op0 = CI->getArgOperand(0);
625 Value *Op1 = CI->getArgOperand(1);
626 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
627 VectorType *VecTy = cast<VectorType>(CI->getType());
628 unsigned NumElts = VecTy->getNumElements();
630 SmallVector<Constant*, 16> Idxs;
631 for (unsigned i = 0; i != NumElts; ++i) {
632 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
633 Idxs.push_back(Builder.getInt32(Idx));
636 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
637 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
638 Name == "llvm.x86.avx.vinsertf128.ps.256" ||
639 Name == "llvm.x86.avx.vinsertf128.si.256" ||
640 Name == "llvm.x86.avx2.vinserti128") {
641 Value *Op0 = CI->getArgOperand(0);
642 Value *Op1 = CI->getArgOperand(1);
643 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
644 VectorType *VecTy = cast<VectorType>(CI->getType());
645 unsigned NumElts = VecTy->getNumElements();
647 // Mask off the high bits of the immediate value; hardware ignores those.
650 // Extend the second operand into a vector that is twice as big.
651 Value *UndefV = UndefValue::get(Op1->getType());
652 SmallVector<Constant*, 8> Idxs;
653 for (unsigned i = 0; i != NumElts; ++i) {
654 Idxs.push_back(Builder.getInt32(i));
656 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs));
658 // Insert the second operand into the first operand.
660 // Note that there is no guarantee that instruction lowering will actually
661 // produce a vinsertf128 instruction for the created shuffles. In
662 // particular, the 0 immediate case involves no lane changes, so it can
663 // be handled as a blend.
665 // Example of shuffle mask for 32-bit elements:
666 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
667 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
669 SmallVector<Constant*, 8> Idxs2;
670 // The low half of the result is either the low half of the 1st operand
671 // or the low half of the 2nd operand (the inserted vector).
672 for (unsigned i = 0; i != NumElts / 2; ++i) {
673 unsigned Idx = Imm ? i : (i + NumElts);
674 Idxs2.push_back(Builder.getInt32(Idx));
676 // The high half of the result is either the low half of the 2nd operand
677 // (the inserted vector) or the high half of the 1st operand.
678 for (unsigned i = NumElts / 2; i != NumElts; ++i) {
679 unsigned Idx = Imm ? (i + NumElts / 2) : i;
680 Idxs2.push_back(Builder.getInt32(Idx));
682 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
683 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
684 Name == "llvm.x86.avx.vextractf128.ps.256" ||
685 Name == "llvm.x86.avx.vextractf128.si.256" ||
686 Name == "llvm.x86.avx2.vextracti128") {
687 Value *Op0 = CI->getArgOperand(0);
688 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
689 VectorType *VecTy = cast<VectorType>(CI->getType());
690 unsigned NumElts = VecTy->getNumElements();
692 // Mask off the high bits of the immediate value; hardware ignores those.
695 // Get indexes for either the high half or low half of the input vector.
696 SmallVector<Constant*, 4> Idxs(NumElts);
697 for (unsigned i = 0; i != NumElts; ++i) {
698 unsigned Idx = Imm ? (i + NumElts) : i;
699 Idxs[i] = Builder.getInt32(Idx);
702 Value *UndefV = UndefValue::get(Op0->getType());
703 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs));
705 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
706 if (Name == "llvm.x86.avx.vpermil.pd.256")
708 else if (Name == "llvm.x86.avx.vpermil.pd")
710 else if (Name == "llvm.x86.avx.vpermil.ps.256")
712 else if (Name == "llvm.x86.avx.vpermil.ps")
715 if (PD256 || PD128 || PS256 || PS128) {
716 Value *Op0 = CI->getArgOperand(0);
717 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
718 SmallVector<Constant*, 8> Idxs;
721 for (unsigned i = 0; i != 2; ++i)
722 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
724 for (unsigned l = 0; l != 4; l+=2)
725 for (unsigned i = 0; i != 2; ++i)
726 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
728 for (unsigned i = 0; i != 4; ++i)
729 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
731 for (unsigned l = 0; l != 8; l+=4)
732 for (unsigned i = 0; i != 4; ++i)
733 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
735 llvm_unreachable("Unexpected function");
737 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
739 llvm_unreachable("Unknown function for CallInst upgrade.");
743 CI->replaceAllUsesWith(Rep);
744 CI->eraseFromParent();
748 std::string Name = CI->getName();
750 CI->setName(Name + ".old");
752 switch (NewFn->getIntrinsicID()) {
754 llvm_unreachable("Unknown function for CallInst upgrade.");
756 // Upgrade debug intrinsics to use an additional DIExpression argument.
757 case Intrinsic::dbg_declare: {
759 Builder.CreateCall3(NewFn, CI->getArgOperand(0), CI->getArgOperand(1),
760 getExpression(CI->getArgOperand(1), F), Name);
761 NewCI->setDebugLoc(CI->getDebugLoc());
762 CI->replaceAllUsesWith(NewCI);
763 CI->eraseFromParent();
766 case Intrinsic::dbg_value: {
767 auto NewCI = Builder.CreateCall4(
768 NewFn, CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
769 getExpression(CI->getArgOperand(2), F), Name);
770 NewCI->setDebugLoc(CI->getDebugLoc());
771 CI->replaceAllUsesWith(NewCI);
772 CI->eraseFromParent();
775 case Intrinsic::ctlz:
776 case Intrinsic::cttz:
777 assert(CI->getNumArgOperands() == 1 &&
778 "Mismatch between function args and call args");
779 CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
780 Builder.getFalse(), Name));
781 CI->eraseFromParent();
784 case Intrinsic::objectsize:
785 CI->replaceAllUsesWith(Builder.CreateCall2(NewFn,
786 CI->getArgOperand(0),
787 CI->getArgOperand(1),
789 CI->eraseFromParent();
792 case Intrinsic::ctpop: {
793 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(0)));
794 CI->eraseFromParent();
798 case Intrinsic::x86_xop_vfrcz_ss:
799 case Intrinsic::x86_xop_vfrcz_sd:
800 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(1),
802 CI->eraseFromParent();
805 case Intrinsic::x86_sse41_ptestc:
806 case Intrinsic::x86_sse41_ptestz:
807 case Intrinsic::x86_sse41_ptestnzc: {
808 // The arguments for these intrinsics used to be v4f32, and changed
809 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
810 // So, the only thing required is a bitcast for both arguments.
811 // First, check the arguments have the old type.
812 Value *Arg0 = CI->getArgOperand(0);
813 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
816 // Old intrinsic, add bitcasts
817 Value *Arg1 = CI->getArgOperand(1);
820 Builder.CreateBitCast(Arg0,
821 VectorType::get(Type::getInt64Ty(C), 2),
824 Builder.CreateBitCast(Arg1,
825 VectorType::get(Type::getInt64Ty(C), 2),
828 CallInst* NewCall = Builder.CreateCall2(NewFn, BC0, BC1, Name);
829 CI->replaceAllUsesWith(NewCall);
830 CI->eraseFromParent();
834 case Intrinsic::x86_sse41_insertps:
835 case Intrinsic::x86_sse41_dppd:
836 case Intrinsic::x86_sse41_dpps:
837 case Intrinsic::x86_sse41_mpsadbw:
838 case Intrinsic::x86_avx_dp_ps_256:
839 case Intrinsic::x86_avx2_mpsadbw: {
840 // Need to truncate the last argument from i32 to i8 -- this argument models
841 // an inherently 8-bit immediate operand to these x86 instructions.
842 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
843 CI->arg_operands().end());
845 // Replace the last argument with a trunc.
846 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
848 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
849 CI->replaceAllUsesWith(NewCall);
850 CI->eraseFromParent();
853 case Intrinsic::x86_avx512_mask_cmp_ps_512:
854 case Intrinsic::x86_avx512_mask_cmp_pd_512: {
855 // Need to truncate the last argument from i32 to i8 -- this argument models
856 // an inherently 8-bit immediate operand to these x86 instructions.
857 SmallVector<Value *, 5> Args(CI->arg_operands().begin(),
858 CI->arg_operands().end());
860 // Replace the last argument with a trunc.
861 Args[2] = Builder.CreateTrunc(Args[2], Type::getInt8Ty(C), "trunc");
863 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
864 CI->replaceAllUsesWith(NewCall);
865 CI->eraseFromParent();
871 // This tests each Function to determine if it needs upgrading. When we find
872 // one we are interested in, we then upgrade all calls to reflect the new
874 void llvm::UpgradeCallsToIntrinsic(Function* F) {
875 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
877 // Upgrade the function and check if it is a totaly new function.
879 if (UpgradeIntrinsicFunction(F, NewFn)) {
881 // Replace all uses to the old function with the new one if necessary.
882 for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
884 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
885 UpgradeIntrinsicCall(CI, NewFn);
887 // Remove old function, no longer used, from the module.
888 F->eraseFromParent();
893 void llvm::UpgradeInstWithTBAATag(Instruction *I) {
894 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
895 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
896 // Check if the tag uses struct-path aware TBAA format.
897 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
900 if (MD->getNumOperands() == 3) {
901 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
902 MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
903 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
904 Metadata *Elts2[] = {ScalarType, ScalarType,
905 ConstantAsMetadata::get(Constant::getNullValue(
906 Type::getInt64Ty(I->getContext()))),
908 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
910 // Create a MDNode <MD, MD, offset 0>
911 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
912 Type::getInt64Ty(I->getContext())))};
913 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
917 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
918 Instruction *&Temp) {
919 if (Opc != Instruction::BitCast)
923 Type *SrcTy = V->getType();
924 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
925 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
926 LLVMContext &Context = V->getContext();
928 // We have no information about target data layout, so we assume that
929 // the maximum pointer size is 64bit.
930 Type *MidTy = Type::getInt64Ty(Context);
931 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
933 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
939 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
940 if (Opc != Instruction::BitCast)
943 Type *SrcTy = C->getType();
944 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
945 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
946 LLVMContext &Context = C->getContext();
948 // We have no information about target data layout, so we assume that
949 // the maximum pointer size is 64bit.
950 Type *MidTy = Type::getInt64Ty(Context);
952 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
959 /// Check the debug info version number, if it is out-dated, drop the debug
960 /// info. Return true if module is modified.
961 bool llvm::UpgradeDebugInfo(Module &M) {
962 unsigned Version = getDebugMetadataVersionFromModule(M);
963 if (Version == DEBUG_METADATA_VERSION)
966 bool RetCode = StripDebugInfo(M);
968 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
969 M.getContext().diagnose(DiagVersion);
974 void llvm::UpgradeMDStringConstant(std::string &String) {
975 const std::string OldPrefix = "llvm.vectorizer.";
976 if (String == "llvm.vectorizer.unroll") {
977 String = "llvm.loop.interleave.count";
978 } else if (String.find(OldPrefix) == 0) {
979 String.replace(0, OldPrefix.size(), "llvm.loop.vectorize.");