1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/IR/CFG.h"
18 #include "llvm/IR/CallSite.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/ErrorHandling.h"
33 // Upgrade the declarations of the SSE4.1 functions whose arguments have
34 // changed their type from v4f32 to v2i64.
35 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
37 // Check whether this is an old version of the function, which received
39 Type *Arg0Type = F->getFunctionType()->getParamType(0);
40 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
43 // Yes, it's old, replace it with new version.
44 F->setName(F->getName() + ".old");
45 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
49 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
50 // arguments have changed their type from i32 to i8.
51 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
53 // Check that the last argument is an i32.
54 Type *LastArgType = F->getFunctionType()->getParamType(
55 F->getFunctionType()->getNumParams() - 1);
56 if (!LastArgType->isIntegerTy(32))
59 // Move this function aside and map down.
60 F->setName(F->getName() + ".old");
61 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
65 // Upgrade the declarations of AVX-512 cmp intrinsic functions whose 8-bit
66 // immediates have changed their type from i32 to i8.
67 static bool UpgradeAVX512CmpIntrinsic(Function *F, Intrinsic::ID IID,
69 // Check that the last argument is an i32.
70 Type *LastArgType = F->getFunctionType()->getParamType(2);
71 if (!LastArgType->isIntegerTy(32))
74 // Move this function aside and map down.
75 F->setName(F->getName() + ".old");
76 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
80 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
81 assert(F && "Illegal to upgrade a non-existent Function.");
83 // Quickly eliminate it, if it's not a candidate.
84 StringRef Name = F->getName();
85 if (Name.size() <= 8 || !Name.startswith("llvm."))
87 Name = Name.substr(5); // Strip off "llvm."
92 if (Name.startswith("arm.neon.vclz")) {
94 F->arg_begin()->getType(),
95 Type::getInt1Ty(F->getContext())
97 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
98 // the end of the name. Change name from llvm.arm.neon.vclz.* to
100 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
101 NewFn = Function::Create(fType, F->getLinkage(),
102 "llvm.ctlz." + Name.substr(14), F->getParent());
105 if (Name.startswith("arm.neon.vcnt")) {
106 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
107 F->arg_begin()->getType());
113 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
114 F->setName(Name + ".old");
115 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
116 F->arg_begin()->getType());
119 if (Name.startswith("cttz.") && F->arg_size() == 1) {
120 F->setName(Name + ".old");
121 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
122 F->arg_begin()->getType());
129 // We only need to change the name to match the mangling including the
131 if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
132 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
133 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
134 F->setName(Name + ".old");
135 NewFn = Intrinsic::getDeclaration(F->getParent(),
136 Intrinsic::objectsize, Tys);
143 if (Name.startswith("x86.sse2.pcmpeq.") ||
144 Name.startswith("x86.sse2.pcmpgt.") ||
145 Name.startswith("x86.avx2.pcmpeq.") ||
146 Name.startswith("x86.avx2.pcmpgt.") ||
147 Name.startswith("x86.avx.vpermil.") ||
148 Name == "x86.avx.vinsertf128.pd.256" ||
149 Name == "x86.avx.vinsertf128.ps.256" ||
150 Name == "x86.avx.vinsertf128.si.256" ||
151 Name == "x86.avx2.vinserti128" ||
152 Name == "x86.avx.vextractf128.pd.256" ||
153 Name == "x86.avx.vextractf128.ps.256" ||
154 Name == "x86.avx.vextractf128.si.256" ||
155 Name == "x86.avx2.vextracti128" ||
156 Name == "x86.avx.movnt.dq.256" ||
157 Name == "x86.avx.movnt.pd.256" ||
158 Name == "x86.avx.movnt.ps.256" ||
159 Name == "x86.sse42.crc32.64.8" ||
160 Name == "x86.avx.vbroadcast.ss" ||
161 Name == "x86.avx.vbroadcast.ss.256" ||
162 Name == "x86.avx.vbroadcast.sd.256" ||
163 Name == "x86.sse2.psll.dq" ||
164 Name == "x86.sse2.psrl.dq" ||
165 Name == "x86.avx2.psll.dq" ||
166 Name == "x86.avx2.psrl.dq" ||
167 Name == "x86.sse2.psll.dq.bs" ||
168 Name == "x86.sse2.psrl.dq.bs" ||
169 Name == "x86.avx2.psll.dq.bs" ||
170 Name == "x86.avx2.psrl.dq.bs" ||
171 Name == "x86.sse41.pblendw" ||
172 Name == "x86.sse41.blendpd" ||
173 Name == "x86.sse41.blendps" ||
174 Name == "x86.avx.blend.pd.256" ||
175 Name == "x86.avx.blend.ps.256" ||
176 Name == "x86.avx2.pblendw" ||
177 Name == "x86.avx2.pblendd.128" ||
178 Name == "x86.avx2.pblendd.256" ||
179 Name == "x86.avx2.vbroadcasti128" ||
180 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
184 // SSE4.1 ptest functions may have an old signature.
185 if (Name.startswith("x86.sse41.ptest")) {
186 if (Name == "x86.sse41.ptestc")
187 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
188 if (Name == "x86.sse41.ptestz")
189 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
190 if (Name == "x86.sse41.ptestnzc")
191 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
193 // Several blend and other instructions with masks used the wrong number of
195 if (Name == "x86.sse41.insertps")
196 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
198 if (Name == "x86.sse41.dppd")
199 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
201 if (Name == "x86.sse41.dpps")
202 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
204 if (Name == "x86.sse41.mpsadbw")
205 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
207 if (Name == "x86.avx.dp.ps.256")
208 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
210 if (Name == "x86.avx2.mpsadbw")
211 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
213 if (Name == "x86.avx512.mask.cmp.b.512")
214 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_512,
216 if (Name == "x86.avx512.mask.cmp.w.512")
217 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_w_512,
219 if (Name == "x86.avx512.mask.cmp.d.512")
220 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_d_512,
222 if (Name == "x86.avx512.mask.cmp.q.512")
223 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_q_512,
225 if (Name == "x86.avx512.mask.ucmp.b.512")
226 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_b_512,
228 if (Name == "x86.avx512.mask.ucmp.w.512")
229 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_w_512,
231 if (Name == "x86.avx512.mask.ucmp.d.512")
232 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_d_512,
234 if (Name == "x86.avx512.mask.ucmp.q.512")
235 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_q_512,
238 if (Name == "x86.avx512.mask.cmp.b.256")
239 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_256,
241 if (Name == "x86.avx512.mask.cmp.w.256")
242 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_w_256,
244 if (Name == "x86.avx512.mask.cmp.d.256")
245 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_d_256,
247 if (Name == "x86.avx512.mask.cmp.q.256")
248 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_q_256,
250 if (Name == "x86.avx512.mask.ucmp.b.256")
251 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_b_256,
253 if (Name == "x86.avx512.mask.ucmp.w.256")
254 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_w_256,
256 if (Name == "x86.avx512.mask.ucmp.d.256")
257 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_d_256,
259 if (Name == "x86.avx512.mask.ucmp.q.256")
260 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_q_256,
263 if (Name == "x86.avx512.mask.cmp.b.128")
264 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_128,
266 if (Name == "x86.avx512.mask.cmp.w.128")
267 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_w_128,
269 if (Name == "x86.avx512.mask.cmp.d.128")
270 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_d_128,
272 if (Name == "x86.avx512.mask.cmp.q.128")
273 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_q_128,
275 if (Name == "x86.avx512.mask.ucmp.b.128")
276 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_b_128,
278 if (Name == "x86.avx512.mask.ucmp.w.128")
279 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_w_128,
281 if (Name == "x86.avx512.mask.ucmp.d.128")
282 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_d_128,
284 if (Name == "x86.avx512.mask.ucmp.q.128")
285 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_q_128,
288 // frcz.ss/sd may need to have an argument dropped
289 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
290 F->setName(Name + ".old");
291 NewFn = Intrinsic::getDeclaration(F->getParent(),
292 Intrinsic::x86_xop_vfrcz_ss);
295 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
296 F->setName(Name + ".old");
297 NewFn = Intrinsic::getDeclaration(F->getParent(),
298 Intrinsic::x86_xop_vfrcz_sd);
301 // Fix the FMA4 intrinsics to remove the 4
302 if (Name.startswith("x86.fma4.")) {
303 F->setName("llvm.x86.fma" + Name.substr(8));
311 // This may not belong here. This function is effectively being overloaded
312 // to both detect an intrinsic which needs upgrading, and to provide the
313 // upgraded form of the intrinsic. We should perhaps have two separate
314 // functions for this.
318 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
320 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
322 // Upgrade intrinsic attributes. This does not change the function.
325 if (unsigned id = F->getIntrinsicID())
326 F->setAttributes(Intrinsic::getAttributes(F->getContext(),
331 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
332 // Nothing to do yet.
336 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
338 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
339 Value *Op, unsigned NumLanes,
341 // Each lane is 16 bytes.
342 unsigned NumElts = NumLanes * 16;
344 // Bitcast from a 64-bit element type to a byte element type.
345 Op = Builder.CreateBitCast(Op,
346 VectorType::get(Type::getInt8Ty(C), NumElts),
348 // We'll be shuffling in zeroes.
349 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
351 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
352 // we'll just return the zero vector.
354 SmallVector<Constant*, 32> Idxs;
355 // 256-bit version is split into two 16-byte lanes.
356 for (unsigned l = 0; l != NumElts; l += 16)
357 for (unsigned i = 0; i != 16; ++i) {
358 unsigned Idx = NumElts + i - Shift;
360 Idx -= NumElts - 16; // end of lane, switch operand.
361 Idxs.push_back(Builder.getInt32(Idx + l));
364 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs));
367 // Bitcast back to a 64-bit element type.
368 return Builder.CreateBitCast(Res,
369 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
373 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
375 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
376 Value *Op, unsigned NumLanes,
378 // Each lane is 16 bytes.
379 unsigned NumElts = NumLanes * 16;
381 // Bitcast from a 64-bit element type to a byte element type.
382 Op = Builder.CreateBitCast(Op,
383 VectorType::get(Type::getInt8Ty(C), NumElts),
385 // We'll be shuffling in zeroes.
386 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
388 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
389 // we'll just return the zero vector.
391 SmallVector<Constant*, 32> Idxs;
392 // 256-bit version is split into two 16-byte lanes.
393 for (unsigned l = 0; l != NumElts; l += 16)
394 for (unsigned i = 0; i != 16; ++i) {
395 unsigned Idx = i + Shift;
397 Idx += NumElts - 16; // end of lane, switch operand.
398 Idxs.push_back(Builder.getInt32(Idx + l));
401 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs));
404 // Bitcast back to a 64-bit element type.
405 return Builder.CreateBitCast(Res,
406 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
410 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
411 // upgraded intrinsic. All argument and return casting must be provided in
412 // order to seamlessly integrate with existing context.
413 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
414 Function *F = CI->getCalledFunction();
415 LLVMContext &C = CI->getContext();
416 IRBuilder<> Builder(C);
417 Builder.SetInsertPoint(CI->getParent(), CI);
419 assert(F && "Intrinsic call is not direct?");
422 // Get the Function's name.
423 StringRef Name = F->getName();
426 // Upgrade packed integer vector compares intrinsics to compare instructions
427 if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
428 Name.startswith("llvm.x86.avx2.pcmpeq.")) {
429 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
431 // need to sign extend since icmp returns vector of i1
432 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
433 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
434 Name.startswith("llvm.x86.avx2.pcmpgt.")) {
435 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
437 // need to sign extend since icmp returns vector of i1
438 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
439 } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
440 Name == "llvm.x86.avx.movnt.ps.256" ||
441 Name == "llvm.x86.avx.movnt.pd.256") {
442 IRBuilder<> Builder(C);
443 Builder.SetInsertPoint(CI->getParent(), CI);
445 Module *M = F->getParent();
446 SmallVector<Metadata *, 1> Elts;
448 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
449 MDNode *Node = MDNode::get(C, Elts);
451 Value *Arg0 = CI->getArgOperand(0);
452 Value *Arg1 = CI->getArgOperand(1);
454 // Convert the type of the pointer to a pointer to the stored type.
455 Value *BC = Builder.CreateBitCast(Arg0,
456 PointerType::getUnqual(Arg1->getType()),
458 StoreInst *SI = Builder.CreateStore(Arg1, BC);
459 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
460 SI->setAlignment(16);
463 CI->eraseFromParent();
465 } else if (Name.startswith("llvm.x86.xop.vpcom")) {
467 if (Name.endswith("ub"))
468 intID = Intrinsic::x86_xop_vpcomub;
469 else if (Name.endswith("uw"))
470 intID = Intrinsic::x86_xop_vpcomuw;
471 else if (Name.endswith("ud"))
472 intID = Intrinsic::x86_xop_vpcomud;
473 else if (Name.endswith("uq"))
474 intID = Intrinsic::x86_xop_vpcomuq;
475 else if (Name.endswith("b"))
476 intID = Intrinsic::x86_xop_vpcomb;
477 else if (Name.endswith("w"))
478 intID = Intrinsic::x86_xop_vpcomw;
479 else if (Name.endswith("d"))
480 intID = Intrinsic::x86_xop_vpcomd;
481 else if (Name.endswith("q"))
482 intID = Intrinsic::x86_xop_vpcomq;
484 llvm_unreachable("Unknown suffix");
486 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
488 if (Name.startswith("lt"))
490 else if (Name.startswith("le"))
492 else if (Name.startswith("gt"))
494 else if (Name.startswith("ge"))
496 else if (Name.startswith("eq"))
498 else if (Name.startswith("ne"))
500 else if (Name.startswith("false"))
502 else if (Name.startswith("true"))
505 llvm_unreachable("Unknown condition");
507 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
508 Rep = Builder.CreateCall3(VPCOM, CI->getArgOperand(0),
509 CI->getArgOperand(1), Builder.getInt8(Imm));
510 } else if (Name == "llvm.x86.sse42.crc32.64.8") {
511 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
512 Intrinsic::x86_sse42_crc32_32_8);
513 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
514 Rep = Builder.CreateCall2(CRC32, Trunc0, CI->getArgOperand(1));
515 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
516 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
517 // Replace broadcasts with a series of insertelements.
518 Type *VecTy = CI->getType();
519 Type *EltTy = VecTy->getVectorElementType();
520 unsigned EltNum = VecTy->getVectorNumElements();
521 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
522 EltTy->getPointerTo());
523 Value *Load = Builder.CreateLoad(Cast);
524 Type *I32Ty = Type::getInt32Ty(C);
525 Rep = UndefValue::get(VecTy);
526 for (unsigned I = 0; I < EltNum; ++I)
527 Rep = Builder.CreateInsertElement(Rep, Load,
528 ConstantInt::get(I32Ty, I));
529 } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
530 // Replace vbroadcasts with a vector shuffle.
531 Value *Op = Builder.CreatePointerCast(
532 CI->getArgOperand(0),
533 PointerType::getUnqual(VectorType::get(Type::getInt64Ty(C), 2)));
534 Value *Load = Builder.CreateLoad(Op);
535 const int Idxs[4] = { 0, 1, 0, 1 };
536 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
538 } else if (Name == "llvm.x86.sse2.psll.dq") {
539 // 128-bit shift left specified in bits.
540 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
541 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
542 Shift / 8); // Shift is in bits.
543 } else if (Name == "llvm.x86.sse2.psrl.dq") {
544 // 128-bit shift right specified in bits.
545 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
546 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
547 Shift / 8); // Shift is in bits.
548 } else if (Name == "llvm.x86.avx2.psll.dq") {
549 // 256-bit shift left specified in bits.
550 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
551 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
552 Shift / 8); // Shift is in bits.
553 } else if (Name == "llvm.x86.avx2.psrl.dq") {
554 // 256-bit shift right specified in bits.
555 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
556 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
557 Shift / 8); // Shift is in bits.
558 } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
559 // 128-bit shift left specified in bytes.
560 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
561 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
563 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
564 // 128-bit shift right specified in bytes.
565 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
566 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
568 } else if (Name == "llvm.x86.avx2.psll.dq.bs") {
569 // 256-bit shift left specified in bytes.
570 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
571 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
573 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
574 // 256-bit shift right specified in bytes.
575 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
576 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
578 } else if (Name == "llvm.x86.sse41.pblendw" ||
579 Name == "llvm.x86.sse41.blendpd" ||
580 Name == "llvm.x86.sse41.blendps" ||
581 Name == "llvm.x86.avx.blend.pd.256" ||
582 Name == "llvm.x86.avx.blend.ps.256" ||
583 Name == "llvm.x86.avx2.pblendw" ||
584 Name == "llvm.x86.avx2.pblendd.128" ||
585 Name == "llvm.x86.avx2.pblendd.256") {
586 Value *Op0 = CI->getArgOperand(0);
587 Value *Op1 = CI->getArgOperand(1);
588 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
589 VectorType *VecTy = cast<VectorType>(CI->getType());
590 unsigned NumElts = VecTy->getNumElements();
592 SmallVector<Constant*, 16> Idxs;
593 for (unsigned i = 0; i != NumElts; ++i) {
594 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
595 Idxs.push_back(Builder.getInt32(Idx));
598 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
599 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
600 Name == "llvm.x86.avx.vinsertf128.ps.256" ||
601 Name == "llvm.x86.avx.vinsertf128.si.256" ||
602 Name == "llvm.x86.avx2.vinserti128") {
603 Value *Op0 = CI->getArgOperand(0);
604 Value *Op1 = CI->getArgOperand(1);
605 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
606 VectorType *VecTy = cast<VectorType>(CI->getType());
607 unsigned NumElts = VecTy->getNumElements();
609 // Mask off the high bits of the immediate value; hardware ignores those.
612 // Extend the second operand into a vector that is twice as big.
613 Value *UndefV = UndefValue::get(Op1->getType());
614 SmallVector<Constant*, 8> Idxs;
615 for (unsigned i = 0; i != NumElts; ++i) {
616 Idxs.push_back(Builder.getInt32(i));
618 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs));
620 // Insert the second operand into the first operand.
622 // Note that there is no guarantee that instruction lowering will actually
623 // produce a vinsertf128 instruction for the created shuffles. In
624 // particular, the 0 immediate case involves no lane changes, so it can
625 // be handled as a blend.
627 // Example of shuffle mask for 32-bit elements:
628 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
629 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
631 SmallVector<Constant*, 8> Idxs2;
632 // The low half of the result is either the low half of the 1st operand
633 // or the low half of the 2nd operand (the inserted vector).
634 for (unsigned i = 0; i != NumElts / 2; ++i) {
635 unsigned Idx = Imm ? i : (i + NumElts);
636 Idxs2.push_back(Builder.getInt32(Idx));
638 // The high half of the result is either the low half of the 2nd operand
639 // (the inserted vector) or the high half of the 1st operand.
640 for (unsigned i = NumElts / 2; i != NumElts; ++i) {
641 unsigned Idx = Imm ? (i + NumElts / 2) : i;
642 Idxs2.push_back(Builder.getInt32(Idx));
644 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
645 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
646 Name == "llvm.x86.avx.vextractf128.ps.256" ||
647 Name == "llvm.x86.avx.vextractf128.si.256" ||
648 Name == "llvm.x86.avx2.vextracti128") {
649 Value *Op0 = CI->getArgOperand(0);
650 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
651 VectorType *VecTy = cast<VectorType>(CI->getType());
652 unsigned NumElts = VecTy->getNumElements();
654 // Mask off the high bits of the immediate value; hardware ignores those.
657 // Get indexes for either the high half or low half of the input vector.
658 SmallVector<Constant*, 4> Idxs(NumElts);
659 for (unsigned i = 0; i != NumElts; ++i) {
660 unsigned Idx = Imm ? (i + NumElts) : i;
661 Idxs[i] = Builder.getInt32(Idx);
664 Value *UndefV = UndefValue::get(Op0->getType());
665 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs));
667 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
668 if (Name == "llvm.x86.avx.vpermil.pd.256")
670 else if (Name == "llvm.x86.avx.vpermil.pd")
672 else if (Name == "llvm.x86.avx.vpermil.ps.256")
674 else if (Name == "llvm.x86.avx.vpermil.ps")
677 if (PD256 || PD128 || PS256 || PS128) {
678 Value *Op0 = CI->getArgOperand(0);
679 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
680 SmallVector<Constant*, 8> Idxs;
683 for (unsigned i = 0; i != 2; ++i)
684 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
686 for (unsigned l = 0; l != 4; l+=2)
687 for (unsigned i = 0; i != 2; ++i)
688 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
690 for (unsigned i = 0; i != 4; ++i)
691 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
693 for (unsigned l = 0; l != 8; l+=4)
694 for (unsigned i = 0; i != 4; ++i)
695 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
697 llvm_unreachable("Unexpected function");
699 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
701 llvm_unreachable("Unknown function for CallInst upgrade.");
705 CI->replaceAllUsesWith(Rep);
706 CI->eraseFromParent();
710 std::string Name = CI->getName();
712 CI->setName(Name + ".old");
714 switch (NewFn->getIntrinsicID()) {
716 llvm_unreachable("Unknown function for CallInst upgrade.");
718 case Intrinsic::ctlz:
719 case Intrinsic::cttz:
720 assert(CI->getNumArgOperands() == 1 &&
721 "Mismatch between function args and call args");
722 CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
723 Builder.getFalse(), Name));
724 CI->eraseFromParent();
727 case Intrinsic::objectsize:
728 CI->replaceAllUsesWith(Builder.CreateCall2(NewFn,
729 CI->getArgOperand(0),
730 CI->getArgOperand(1),
732 CI->eraseFromParent();
735 case Intrinsic::ctpop: {
736 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(0)));
737 CI->eraseFromParent();
741 case Intrinsic::x86_xop_vfrcz_ss:
742 case Intrinsic::x86_xop_vfrcz_sd:
743 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(1),
745 CI->eraseFromParent();
748 case Intrinsic::x86_sse41_ptestc:
749 case Intrinsic::x86_sse41_ptestz:
750 case Intrinsic::x86_sse41_ptestnzc: {
751 // The arguments for these intrinsics used to be v4f32, and changed
752 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
753 // So, the only thing required is a bitcast for both arguments.
754 // First, check the arguments have the old type.
755 Value *Arg0 = CI->getArgOperand(0);
756 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
759 // Old intrinsic, add bitcasts
760 Value *Arg1 = CI->getArgOperand(1);
762 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
764 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
765 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
767 Type *Ty[] = {NewVecTy, NewVecTy};
768 CallInst *NewCall = Builder.CreateCall2(
769 FunctionType::get(CI->getType(), Ty, false), NewFn, BC0, BC1, Name);
770 CI->replaceAllUsesWith(NewCall);
771 CI->eraseFromParent();
775 case Intrinsic::x86_sse41_insertps:
776 case Intrinsic::x86_sse41_dppd:
777 case Intrinsic::x86_sse41_dpps:
778 case Intrinsic::x86_sse41_mpsadbw:
779 case Intrinsic::x86_avx_dp_ps_256:
780 case Intrinsic::x86_avx2_mpsadbw: {
781 // Need to truncate the last argument from i32 to i8 -- this argument models
782 // an inherently 8-bit immediate operand to these x86 instructions.
783 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
784 CI->arg_operands().end());
786 // Replace the last argument with a trunc.
787 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
789 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
790 CI->replaceAllUsesWith(NewCall);
791 CI->eraseFromParent();
797 // This tests each Function to determine if it needs upgrading. When we find
798 // one we are interested in, we then upgrade all calls to reflect the new
800 void llvm::UpgradeCallsToIntrinsic(Function* F) {
801 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
803 // Upgrade the function and check if it is a totaly new function.
805 if (UpgradeIntrinsicFunction(F, NewFn)) {
807 // Replace all uses to the old function with the new one if necessary.
808 for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
810 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
811 UpgradeIntrinsicCall(CI, NewFn);
813 // Remove old function, no longer used, from the module.
814 F->eraseFromParent();
819 void llvm::UpgradeInstWithTBAATag(Instruction *I) {
820 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
821 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
822 // Check if the tag uses struct-path aware TBAA format.
823 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
826 if (MD->getNumOperands() == 3) {
827 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
828 MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
829 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
830 Metadata *Elts2[] = {ScalarType, ScalarType,
831 ConstantAsMetadata::get(Constant::getNullValue(
832 Type::getInt64Ty(I->getContext()))),
834 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
836 // Create a MDNode <MD, MD, offset 0>
837 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
838 Type::getInt64Ty(I->getContext())))};
839 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
843 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
844 Instruction *&Temp) {
845 if (Opc != Instruction::BitCast)
849 Type *SrcTy = V->getType();
850 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
851 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
852 LLVMContext &Context = V->getContext();
854 // We have no information about target data layout, so we assume that
855 // the maximum pointer size is 64bit.
856 Type *MidTy = Type::getInt64Ty(Context);
857 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
859 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
865 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
866 if (Opc != Instruction::BitCast)
869 Type *SrcTy = C->getType();
870 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
871 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
872 LLVMContext &Context = C->getContext();
874 // We have no information about target data layout, so we assume that
875 // the maximum pointer size is 64bit.
876 Type *MidTy = Type::getInt64Ty(Context);
878 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
885 /// Check the debug info version number, if it is out-dated, drop the debug
886 /// info. Return true if module is modified.
887 bool llvm::UpgradeDebugInfo(Module &M) {
888 unsigned Version = getDebugMetadataVersionFromModule(M);
889 if (Version == DEBUG_METADATA_VERSION)
892 bool RetCode = StripDebugInfo(M);
894 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
895 M.getContext().diagnose(DiagVersion);
900 void llvm::UpgradeMDStringConstant(std::string &String) {
901 const std::string OldPrefix = "llvm.vectorizer.";
902 if (String == "llvm.vectorizer.unroll") {
903 String = "llvm.loop.interleave.count";
904 } else if (String.find(OldPrefix) == 0) {
905 String.replace(0, OldPrefix.size(), "llvm.loop.vectorize.");