1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/IR/CFG.h"
18 #include "llvm/IR/CallSite.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/ErrorHandling.h"
33 // Upgrade the declarations of the SSE4.1 functions whose arguments have
34 // changed their type from v4f32 to v2i64.
35 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
37 // Check whether this is an old version of the function, which received
39 Type *Arg0Type = F->getFunctionType()->getParamType(0);
40 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
43 // Yes, it's old, replace it with new version.
44 F->setName(F->getName() + ".old");
45 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
49 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
50 // arguments have changed their type from i32 to i8.
51 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
53 // Check that the last argument is an i32.
54 Type *LastArgType = F->getFunctionType()->getParamType(
55 F->getFunctionType()->getNumParams() - 1);
56 if (!LastArgType->isIntegerTy(32))
59 // Move this function aside and map down.
60 F->setName(F->getName() + ".old");
61 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
65 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
66 assert(F && "Illegal to upgrade a non-existent Function.");
68 // Quickly eliminate it, if it's not a candidate.
69 StringRef Name = F->getName();
70 if (Name.size() <= 8 || !Name.startswith("llvm."))
72 Name = Name.substr(5); // Strip off "llvm."
77 if (Name.startswith("arm.neon.vclz")) {
79 F->arg_begin()->getType(),
80 Type::getInt1Ty(F->getContext())
82 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
83 // the end of the name. Change name from llvm.arm.neon.vclz.* to
85 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
86 NewFn = Function::Create(fType, F->getLinkage(),
87 "llvm.ctlz." + Name.substr(14), F->getParent());
90 if (Name.startswith("arm.neon.vcnt")) {
91 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
92 F->arg_begin()->getType());
98 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
99 F->setName(Name + ".old");
100 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
101 F->arg_begin()->getType());
104 if (Name.startswith("cttz.") && F->arg_size() == 1) {
105 F->setName(Name + ".old");
106 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
107 F->arg_begin()->getType());
114 // We only need to change the name to match the mangling including the
116 if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
117 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
118 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
119 F->setName(Name + ".old");
120 NewFn = Intrinsic::getDeclaration(F->getParent(),
121 Intrinsic::objectsize, Tys);
128 if (Name.startswith("x86.sse2.pcmpeq.") ||
129 Name.startswith("x86.sse2.pcmpgt.") ||
130 Name.startswith("x86.avx2.pcmpeq.") ||
131 Name.startswith("x86.avx2.pcmpgt.") ||
132 Name.startswith("x86.avx2.vbroadcast") ||
133 Name.startswith("x86.avx2.pbroadcast") ||
134 Name.startswith("x86.avx.vpermil.") ||
135 Name.startswith("x86.sse41.pmovsx") ||
136 Name == "x86.avx.vinsertf128.pd.256" ||
137 Name == "x86.avx.vinsertf128.ps.256" ||
138 Name == "x86.avx.vinsertf128.si.256" ||
139 Name == "x86.avx2.vinserti128" ||
140 Name == "x86.avx.vextractf128.pd.256" ||
141 Name == "x86.avx.vextractf128.ps.256" ||
142 Name == "x86.avx.vextractf128.si.256" ||
143 Name == "x86.avx2.vextracti128" ||
144 Name == "x86.avx.movnt.dq.256" ||
145 Name == "x86.avx.movnt.pd.256" ||
146 Name == "x86.avx.movnt.ps.256" ||
147 Name == "x86.sse42.crc32.64.8" ||
148 Name == "x86.avx.vbroadcast.ss" ||
149 Name == "x86.avx.vbroadcast.ss.256" ||
150 Name == "x86.avx.vbroadcast.sd.256" ||
151 Name == "x86.sse2.psll.dq" ||
152 Name == "x86.sse2.psrl.dq" ||
153 Name == "x86.avx2.psll.dq" ||
154 Name == "x86.avx2.psrl.dq" ||
155 Name == "x86.sse2.psll.dq.bs" ||
156 Name == "x86.sse2.psrl.dq.bs" ||
157 Name == "x86.avx2.psll.dq.bs" ||
158 Name == "x86.avx2.psrl.dq.bs" ||
159 Name == "x86.sse41.pblendw" ||
160 Name == "x86.sse41.blendpd" ||
161 Name == "x86.sse41.blendps" ||
162 Name == "x86.avx.blend.pd.256" ||
163 Name == "x86.avx.blend.ps.256" ||
164 Name == "x86.avx2.pblendw" ||
165 Name == "x86.avx2.pblendd.128" ||
166 Name == "x86.avx2.pblendd.256" ||
167 Name == "x86.avx2.vbroadcasti128" ||
168 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
172 // SSE4.1 ptest functions may have an old signature.
173 if (Name.startswith("x86.sse41.ptest")) {
174 if (Name == "x86.sse41.ptestc")
175 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
176 if (Name == "x86.sse41.ptestz")
177 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
178 if (Name == "x86.sse41.ptestnzc")
179 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
181 // Several blend and other instructions with masks used the wrong number of
183 if (Name == "x86.sse41.insertps")
184 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
186 if (Name == "x86.sse41.dppd")
187 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
189 if (Name == "x86.sse41.dpps")
190 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
192 if (Name == "x86.sse41.mpsadbw")
193 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
195 if (Name == "x86.avx.dp.ps.256")
196 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
198 if (Name == "x86.avx2.mpsadbw")
199 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
202 // frcz.ss/sd may need to have an argument dropped
203 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
204 F->setName(Name + ".old");
205 NewFn = Intrinsic::getDeclaration(F->getParent(),
206 Intrinsic::x86_xop_vfrcz_ss);
209 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
210 F->setName(Name + ".old");
211 NewFn = Intrinsic::getDeclaration(F->getParent(),
212 Intrinsic::x86_xop_vfrcz_sd);
215 // Fix the FMA4 intrinsics to remove the 4
216 if (Name.startswith("x86.fma4.")) {
217 F->setName("llvm.x86.fma" + Name.substr(8));
225 // This may not belong here. This function is effectively being overloaded
226 // to both detect an intrinsic which needs upgrading, and to provide the
227 // upgraded form of the intrinsic. We should perhaps have two separate
228 // functions for this.
232 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
234 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
235 assert(F != NewFn && "Intrinsic function upgraded to the same function");
237 // Upgrade intrinsic attributes. This does not change the function.
240 if (Intrinsic::ID id = F->getIntrinsicID())
241 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
245 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
246 // Nothing to do yet.
250 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
252 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
253 Value *Op, unsigned NumLanes,
255 // Each lane is 16 bytes.
256 unsigned NumElts = NumLanes * 16;
258 // Bitcast from a 64-bit element type to a byte element type.
259 Op = Builder.CreateBitCast(Op,
260 VectorType::get(Type::getInt8Ty(C), NumElts),
262 // We'll be shuffling in zeroes.
263 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
265 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
266 // we'll just return the zero vector.
268 SmallVector<Constant*, 32> Idxs;
269 // 256-bit version is split into two 16-byte lanes.
270 for (unsigned l = 0; l != NumElts; l += 16)
271 for (unsigned i = 0; i != 16; ++i) {
272 unsigned Idx = NumElts + i - Shift;
274 Idx -= NumElts - 16; // end of lane, switch operand.
275 Idxs.push_back(Builder.getInt32(Idx + l));
278 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs));
281 // Bitcast back to a 64-bit element type.
282 return Builder.CreateBitCast(Res,
283 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
287 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
289 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
290 Value *Op, unsigned NumLanes,
292 // Each lane is 16 bytes.
293 unsigned NumElts = NumLanes * 16;
295 // Bitcast from a 64-bit element type to a byte element type.
296 Op = Builder.CreateBitCast(Op,
297 VectorType::get(Type::getInt8Ty(C), NumElts),
299 // We'll be shuffling in zeroes.
300 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
302 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
303 // we'll just return the zero vector.
305 SmallVector<Constant*, 32> Idxs;
306 // 256-bit version is split into two 16-byte lanes.
307 for (unsigned l = 0; l != NumElts; l += 16)
308 for (unsigned i = 0; i != 16; ++i) {
309 unsigned Idx = i + Shift;
311 Idx += NumElts - 16; // end of lane, switch operand.
312 Idxs.push_back(Builder.getInt32(Idx + l));
315 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs));
318 // Bitcast back to a 64-bit element type.
319 return Builder.CreateBitCast(Res,
320 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
324 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
325 // upgraded intrinsic. All argument and return casting must be provided in
326 // order to seamlessly integrate with existing context.
327 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
328 Function *F = CI->getCalledFunction();
329 LLVMContext &C = CI->getContext();
330 IRBuilder<> Builder(C);
331 Builder.SetInsertPoint(CI->getParent(), CI);
333 assert(F && "Intrinsic call is not direct?");
336 // Get the Function's name.
337 StringRef Name = F->getName();
340 // Upgrade packed integer vector compares intrinsics to compare instructions
341 if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
342 Name.startswith("llvm.x86.avx2.pcmpeq.")) {
343 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
345 // need to sign extend since icmp returns vector of i1
346 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
347 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
348 Name.startswith("llvm.x86.avx2.pcmpgt.")) {
349 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
351 // need to sign extend since icmp returns vector of i1
352 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
353 } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
354 Name == "llvm.x86.avx.movnt.ps.256" ||
355 Name == "llvm.x86.avx.movnt.pd.256") {
356 IRBuilder<> Builder(C);
357 Builder.SetInsertPoint(CI->getParent(), CI);
359 Module *M = F->getParent();
360 SmallVector<Metadata *, 1> Elts;
362 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
363 MDNode *Node = MDNode::get(C, Elts);
365 Value *Arg0 = CI->getArgOperand(0);
366 Value *Arg1 = CI->getArgOperand(1);
368 // Convert the type of the pointer to a pointer to the stored type.
369 Value *BC = Builder.CreateBitCast(Arg0,
370 PointerType::getUnqual(Arg1->getType()),
372 StoreInst *SI = Builder.CreateStore(Arg1, BC);
373 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
374 SI->setAlignment(32);
377 CI->eraseFromParent();
379 } else if (Name.startswith("llvm.x86.xop.vpcom")) {
381 if (Name.endswith("ub"))
382 intID = Intrinsic::x86_xop_vpcomub;
383 else if (Name.endswith("uw"))
384 intID = Intrinsic::x86_xop_vpcomuw;
385 else if (Name.endswith("ud"))
386 intID = Intrinsic::x86_xop_vpcomud;
387 else if (Name.endswith("uq"))
388 intID = Intrinsic::x86_xop_vpcomuq;
389 else if (Name.endswith("b"))
390 intID = Intrinsic::x86_xop_vpcomb;
391 else if (Name.endswith("w"))
392 intID = Intrinsic::x86_xop_vpcomw;
393 else if (Name.endswith("d"))
394 intID = Intrinsic::x86_xop_vpcomd;
395 else if (Name.endswith("q"))
396 intID = Intrinsic::x86_xop_vpcomq;
398 llvm_unreachable("Unknown suffix");
400 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
402 if (Name.startswith("lt"))
404 else if (Name.startswith("le"))
406 else if (Name.startswith("gt"))
408 else if (Name.startswith("ge"))
410 else if (Name.startswith("eq"))
412 else if (Name.startswith("ne"))
414 else if (Name.startswith("false"))
416 else if (Name.startswith("true"))
419 llvm_unreachable("Unknown condition");
421 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
423 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
424 Builder.getInt8(Imm)});
425 } else if (Name == "llvm.x86.sse42.crc32.64.8") {
426 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
427 Intrinsic::x86_sse42_crc32_32_8);
428 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
429 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
430 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
431 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
432 // Replace broadcasts with a series of insertelements.
433 Type *VecTy = CI->getType();
434 Type *EltTy = VecTy->getVectorElementType();
435 unsigned EltNum = VecTy->getVectorNumElements();
436 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
437 EltTy->getPointerTo());
438 Value *Load = Builder.CreateLoad(EltTy, Cast);
439 Type *I32Ty = Type::getInt32Ty(C);
440 Rep = UndefValue::get(VecTy);
441 for (unsigned I = 0; I < EltNum; ++I)
442 Rep = Builder.CreateInsertElement(Rep, Load,
443 ConstantInt::get(I32Ty, I));
444 } else if (Name.startswith("llvm.x86.sse41.pmovsx")) {
445 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
446 VectorType *DstTy = cast<VectorType>(CI->getType());
447 unsigned NumDstElts = DstTy->getNumElements();
449 // Extract a subvector of the first NumDstElts lanes and sign extend.
450 SmallVector<int, 8> ShuffleMask;
451 for (int i = 0; i != (int)NumDstElts; ++i)
452 ShuffleMask.push_back(i);
454 Value *SV = Builder.CreateShuffleVector(
455 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
456 Rep = Builder.CreateSExt(SV, DstTy);
457 } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
458 // Replace vbroadcasts with a vector shuffle.
459 Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
460 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
461 PointerType::getUnqual(VT));
462 Value *Load = Builder.CreateLoad(VT, Op);
463 const int Idxs[4] = { 0, 1, 0, 1 };
464 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
466 } else if (Name.startswith("llvm.x86.avx2.pbroadcast") ||
467 Name.startswith("llvm.x86.avx2.vbroadcast")) {
468 // Replace vp?broadcasts with a vector shuffle.
469 Value *Op = CI->getArgOperand(0);
470 unsigned NumElts = CI->getType()->getVectorNumElements();
471 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
472 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
473 Constant::getNullValue(MaskTy));
474 } else if (Name == "llvm.x86.sse2.psll.dq") {
475 // 128-bit shift left specified in bits.
476 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
477 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
478 Shift / 8); // Shift is in bits.
479 } else if (Name == "llvm.x86.sse2.psrl.dq") {
480 // 128-bit shift right specified in bits.
481 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
482 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
483 Shift / 8); // Shift is in bits.
484 } else if (Name == "llvm.x86.avx2.psll.dq") {
485 // 256-bit shift left specified in bits.
486 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
487 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
488 Shift / 8); // Shift is in bits.
489 } else if (Name == "llvm.x86.avx2.psrl.dq") {
490 // 256-bit shift right specified in bits.
491 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
492 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
493 Shift / 8); // Shift is in bits.
494 } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
495 // 128-bit shift left specified in bytes.
496 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
497 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
499 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
500 // 128-bit shift right specified in bytes.
501 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
502 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
504 } else if (Name == "llvm.x86.avx2.psll.dq.bs") {
505 // 256-bit shift left specified in bytes.
506 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
507 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
509 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
510 // 256-bit shift right specified in bytes.
511 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
512 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
514 } else if (Name == "llvm.x86.sse41.pblendw" ||
515 Name == "llvm.x86.sse41.blendpd" ||
516 Name == "llvm.x86.sse41.blendps" ||
517 Name == "llvm.x86.avx.blend.pd.256" ||
518 Name == "llvm.x86.avx.blend.ps.256" ||
519 Name == "llvm.x86.avx2.pblendw" ||
520 Name == "llvm.x86.avx2.pblendd.128" ||
521 Name == "llvm.x86.avx2.pblendd.256") {
522 Value *Op0 = CI->getArgOperand(0);
523 Value *Op1 = CI->getArgOperand(1);
524 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
525 VectorType *VecTy = cast<VectorType>(CI->getType());
526 unsigned NumElts = VecTy->getNumElements();
528 SmallVector<Constant*, 16> Idxs;
529 for (unsigned i = 0; i != NumElts; ++i) {
530 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
531 Idxs.push_back(Builder.getInt32(Idx));
534 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
535 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
536 Name == "llvm.x86.avx.vinsertf128.ps.256" ||
537 Name == "llvm.x86.avx.vinsertf128.si.256" ||
538 Name == "llvm.x86.avx2.vinserti128") {
539 Value *Op0 = CI->getArgOperand(0);
540 Value *Op1 = CI->getArgOperand(1);
541 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
542 VectorType *VecTy = cast<VectorType>(CI->getType());
543 unsigned NumElts = VecTy->getNumElements();
545 // Mask off the high bits of the immediate value; hardware ignores those.
548 // Extend the second operand into a vector that is twice as big.
549 Value *UndefV = UndefValue::get(Op1->getType());
550 SmallVector<Constant*, 8> Idxs;
551 for (unsigned i = 0; i != NumElts; ++i) {
552 Idxs.push_back(Builder.getInt32(i));
554 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs));
556 // Insert the second operand into the first operand.
558 // Note that there is no guarantee that instruction lowering will actually
559 // produce a vinsertf128 instruction for the created shuffles. In
560 // particular, the 0 immediate case involves no lane changes, so it can
561 // be handled as a blend.
563 // Example of shuffle mask for 32-bit elements:
564 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
565 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
567 SmallVector<Constant*, 8> Idxs2;
568 // The low half of the result is either the low half of the 1st operand
569 // or the low half of the 2nd operand (the inserted vector).
570 for (unsigned i = 0; i != NumElts / 2; ++i) {
571 unsigned Idx = Imm ? i : (i + NumElts);
572 Idxs2.push_back(Builder.getInt32(Idx));
574 // The high half of the result is either the low half of the 2nd operand
575 // (the inserted vector) or the high half of the 1st operand.
576 for (unsigned i = NumElts / 2; i != NumElts; ++i) {
577 unsigned Idx = Imm ? (i + NumElts / 2) : i;
578 Idxs2.push_back(Builder.getInt32(Idx));
580 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
581 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
582 Name == "llvm.x86.avx.vextractf128.ps.256" ||
583 Name == "llvm.x86.avx.vextractf128.si.256" ||
584 Name == "llvm.x86.avx2.vextracti128") {
585 Value *Op0 = CI->getArgOperand(0);
586 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
587 VectorType *VecTy = cast<VectorType>(CI->getType());
588 unsigned NumElts = VecTy->getNumElements();
590 // Mask off the high bits of the immediate value; hardware ignores those.
593 // Get indexes for either the high half or low half of the input vector.
594 SmallVector<Constant*, 4> Idxs(NumElts);
595 for (unsigned i = 0; i != NumElts; ++i) {
596 unsigned Idx = Imm ? (i + NumElts) : i;
597 Idxs[i] = Builder.getInt32(Idx);
600 Value *UndefV = UndefValue::get(Op0->getType());
601 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs));
603 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
604 if (Name == "llvm.x86.avx.vpermil.pd.256")
606 else if (Name == "llvm.x86.avx.vpermil.pd")
608 else if (Name == "llvm.x86.avx.vpermil.ps.256")
610 else if (Name == "llvm.x86.avx.vpermil.ps")
613 if (PD256 || PD128 || PS256 || PS128) {
614 Value *Op0 = CI->getArgOperand(0);
615 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
616 SmallVector<Constant*, 8> Idxs;
619 for (unsigned i = 0; i != 2; ++i)
620 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
622 for (unsigned l = 0; l != 4; l+=2)
623 for (unsigned i = 0; i != 2; ++i)
624 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
626 for (unsigned i = 0; i != 4; ++i)
627 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
629 for (unsigned l = 0; l != 8; l+=4)
630 for (unsigned i = 0; i != 4; ++i)
631 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
633 llvm_unreachable("Unexpected function");
635 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
637 llvm_unreachable("Unknown function for CallInst upgrade.");
641 CI->replaceAllUsesWith(Rep);
642 CI->eraseFromParent();
646 std::string Name = CI->getName();
648 CI->setName(Name + ".old");
650 switch (NewFn->getIntrinsicID()) {
652 llvm_unreachable("Unknown function for CallInst upgrade.");
654 case Intrinsic::ctlz:
655 case Intrinsic::cttz:
656 assert(CI->getNumArgOperands() == 1 &&
657 "Mismatch between function args and call args");
658 CI->replaceAllUsesWith(Builder.CreateCall(
659 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name));
660 CI->eraseFromParent();
663 case Intrinsic::objectsize:
664 CI->replaceAllUsesWith(Builder.CreateCall(
665 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name));
666 CI->eraseFromParent();
669 case Intrinsic::ctpop: {
670 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
671 CI->eraseFromParent();
675 case Intrinsic::x86_xop_vfrcz_ss:
676 case Intrinsic::x86_xop_vfrcz_sd:
677 CI->replaceAllUsesWith(
678 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
679 CI->eraseFromParent();
682 case Intrinsic::x86_sse41_ptestc:
683 case Intrinsic::x86_sse41_ptestz:
684 case Intrinsic::x86_sse41_ptestnzc: {
685 // The arguments for these intrinsics used to be v4f32, and changed
686 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
687 // So, the only thing required is a bitcast for both arguments.
688 // First, check the arguments have the old type.
689 Value *Arg0 = CI->getArgOperand(0);
690 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
693 // Old intrinsic, add bitcasts
694 Value *Arg1 = CI->getArgOperand(1);
696 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
698 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
699 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
701 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name);
702 CI->replaceAllUsesWith(NewCall);
703 CI->eraseFromParent();
707 case Intrinsic::x86_sse41_insertps:
708 case Intrinsic::x86_sse41_dppd:
709 case Intrinsic::x86_sse41_dpps:
710 case Intrinsic::x86_sse41_mpsadbw:
711 case Intrinsic::x86_avx_dp_ps_256:
712 case Intrinsic::x86_avx2_mpsadbw: {
713 // Need to truncate the last argument from i32 to i8 -- this argument models
714 // an inherently 8-bit immediate operand to these x86 instructions.
715 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
716 CI->arg_operands().end());
718 // Replace the last argument with a trunc.
719 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
721 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
722 CI->replaceAllUsesWith(NewCall);
723 CI->eraseFromParent();
729 // This tests each Function to determine if it needs upgrading. When we find
730 // one we are interested in, we then upgrade all calls to reflect the new
732 void llvm::UpgradeCallsToIntrinsic(Function* F) {
733 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
735 // Upgrade the function and check if it is a totaly new function.
737 if (UpgradeIntrinsicFunction(F, NewFn)) {
738 // Replace all uses to the old function with the new one if necessary.
739 for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
741 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
742 UpgradeIntrinsicCall(CI, NewFn);
744 // Remove old function, no longer used, from the module.
745 F->eraseFromParent();
749 void llvm::UpgradeInstWithTBAATag(Instruction *I) {
750 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
751 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
752 // Check if the tag uses struct-path aware TBAA format.
753 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
756 if (MD->getNumOperands() == 3) {
757 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
758 MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
759 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
760 Metadata *Elts2[] = {ScalarType, ScalarType,
761 ConstantAsMetadata::get(Constant::getNullValue(
762 Type::getInt64Ty(I->getContext()))),
764 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
766 // Create a MDNode <MD, MD, offset 0>
767 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
768 Type::getInt64Ty(I->getContext())))};
769 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
773 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
774 Instruction *&Temp) {
775 if (Opc != Instruction::BitCast)
779 Type *SrcTy = V->getType();
780 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
781 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
782 LLVMContext &Context = V->getContext();
784 // We have no information about target data layout, so we assume that
785 // the maximum pointer size is 64bit.
786 Type *MidTy = Type::getInt64Ty(Context);
787 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
789 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
795 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
796 if (Opc != Instruction::BitCast)
799 Type *SrcTy = C->getType();
800 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
801 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
802 LLVMContext &Context = C->getContext();
804 // We have no information about target data layout, so we assume that
805 // the maximum pointer size is 64bit.
806 Type *MidTy = Type::getInt64Ty(Context);
808 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
815 /// Check the debug info version number, if it is out-dated, drop the debug
816 /// info. Return true if module is modified.
817 bool llvm::UpgradeDebugInfo(Module &M) {
818 unsigned Version = getDebugMetadataVersionFromModule(M);
819 if (Version == DEBUG_METADATA_VERSION)
822 bool RetCode = StripDebugInfo(M);
824 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
825 M.getContext().diagnose(DiagVersion);
830 void llvm::UpgradeMDStringConstant(std::string &String) {
831 const std::string OldPrefix = "llvm.vectorizer.";
832 if (String == "llvm.vectorizer.unroll") {
833 String = "llvm.loop.interleave.count";
834 } else if (String.find(OldPrefix) == 0) {
835 String.replace(0, OldPrefix.size(), "llvm.loop.vectorize.");