1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/IR/CFG.h"
18 #include "llvm/IR/CallSite.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/ErrorHandling.h"
33 // Upgrade the declarations of the SSE4.1 functions whose arguments have
34 // changed their type from v4f32 to v2i64.
35 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
37 // Check whether this is an old version of the function, which received
39 Type *Arg0Type = F->getFunctionType()->getParamType(0);
40 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
43 // Yes, it's old, replace it with new version.
44 F->setName(F->getName() + ".old");
45 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
49 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
50 // arguments have changed their type from i32 to i8.
51 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
53 // Check that the last argument is an i32.
54 Type *LastArgType = F->getFunctionType()->getParamType(
55 F->getFunctionType()->getNumParams() - 1);
56 if (!LastArgType->isIntegerTy(32))
59 // Move this function aside and map down.
60 F->setName(F->getName() + ".old");
61 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
65 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
66 assert(F && "Illegal to upgrade a non-existent Function.");
68 // Quickly eliminate it, if it's not a candidate.
69 StringRef Name = F->getName();
70 if (Name.size() <= 8 || !Name.startswith("llvm."))
72 Name = Name.substr(5); // Strip off "llvm."
77 if (Name.startswith("arm.neon.vclz")) {
79 F->arg_begin()->getType(),
80 Type::getInt1Ty(F->getContext())
82 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
83 // the end of the name. Change name from llvm.arm.neon.vclz.* to
85 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
86 NewFn = Function::Create(fType, F->getLinkage(),
87 "llvm.ctlz." + Name.substr(14), F->getParent());
90 if (Name.startswith("arm.neon.vcnt")) {
91 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
92 F->arg_begin()->getType());
98 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
99 F->setName(Name + ".old");
100 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
101 F->arg_begin()->getType());
104 if (Name.startswith("cttz.") && F->arg_size() == 1) {
105 F->setName(Name + ".old");
106 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
107 F->arg_begin()->getType());
114 // We only need to change the name to match the mangling including the
116 if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
117 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
118 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
119 F->setName(Name + ".old");
120 NewFn = Intrinsic::getDeclaration(F->getParent(),
121 Intrinsic::objectsize, Tys);
128 if (Name.startswith("x86.sse2.pcmpeq.") ||
129 Name.startswith("x86.sse2.pcmpgt.") ||
130 Name.startswith("x86.avx2.pcmpeq.") ||
131 Name.startswith("x86.avx2.pcmpgt.") ||
132 Name.startswith("x86.avx.vpermil.") ||
133 Name == "x86.avx.vinsertf128.pd.256" ||
134 Name == "x86.avx.vinsertf128.ps.256" ||
135 Name == "x86.avx.vinsertf128.si.256" ||
136 Name == "x86.avx2.vinserti128" ||
137 Name == "x86.avx.vextractf128.pd.256" ||
138 Name == "x86.avx.vextractf128.ps.256" ||
139 Name == "x86.avx.vextractf128.si.256" ||
140 Name == "x86.avx2.vextracti128" ||
141 Name == "x86.avx.movnt.dq.256" ||
142 Name == "x86.avx.movnt.pd.256" ||
143 Name == "x86.avx.movnt.ps.256" ||
144 Name == "x86.sse42.crc32.64.8" ||
145 Name == "x86.avx.vbroadcast.ss" ||
146 Name == "x86.avx.vbroadcast.ss.256" ||
147 Name == "x86.avx.vbroadcast.sd.256" ||
148 Name == "x86.sse2.psll.dq" ||
149 Name == "x86.sse2.psrl.dq" ||
150 Name == "x86.avx2.psll.dq" ||
151 Name == "x86.avx2.psrl.dq" ||
152 Name == "x86.sse2.psll.dq.bs" ||
153 Name == "x86.sse2.psrl.dq.bs" ||
154 Name == "x86.avx2.psll.dq.bs" ||
155 Name == "x86.avx2.psrl.dq.bs" ||
156 Name == "x86.sse41.pblendw" ||
157 Name == "x86.sse41.blendpd" ||
158 Name == "x86.sse41.blendps" ||
159 Name == "x86.avx.blend.pd.256" ||
160 Name == "x86.avx.blend.ps.256" ||
161 Name == "x86.avx2.pblendw" ||
162 Name == "x86.avx2.pblendd.128" ||
163 Name == "x86.avx2.pblendd.256" ||
164 Name == "x86.avx2.vbroadcasti128" ||
165 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
169 // SSE4.1 ptest functions may have an old signature.
170 if (Name.startswith("x86.sse41.ptest")) {
171 if (Name == "x86.sse41.ptestc")
172 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
173 if (Name == "x86.sse41.ptestz")
174 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
175 if (Name == "x86.sse41.ptestnzc")
176 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
178 // Several blend and other instructions with masks used the wrong number of
180 if (Name == "x86.sse41.insertps")
181 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
183 if (Name == "x86.sse41.dppd")
184 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
186 if (Name == "x86.sse41.dpps")
187 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
189 if (Name == "x86.sse41.mpsadbw")
190 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
192 if (Name == "x86.avx.dp.ps.256")
193 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
195 if (Name == "x86.avx2.mpsadbw")
196 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
199 // frcz.ss/sd may need to have an argument dropped
200 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
201 F->setName(Name + ".old");
202 NewFn = Intrinsic::getDeclaration(F->getParent(),
203 Intrinsic::x86_xop_vfrcz_ss);
206 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
207 F->setName(Name + ".old");
208 NewFn = Intrinsic::getDeclaration(F->getParent(),
209 Intrinsic::x86_xop_vfrcz_sd);
212 // Fix the FMA4 intrinsics to remove the 4
213 if (Name.startswith("x86.fma4.")) {
214 F->setName("llvm.x86.fma" + Name.substr(8));
222 // This may not belong here. This function is effectively being overloaded
223 // to both detect an intrinsic which needs upgrading, and to provide the
224 // upgraded form of the intrinsic. We should perhaps have two separate
225 // functions for this.
229 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
231 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
233 // Upgrade intrinsic attributes. This does not change the function.
236 if (unsigned id = F->getIntrinsicID())
237 F->setAttributes(Intrinsic::getAttributes(F->getContext(),
242 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
243 // Nothing to do yet.
247 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
249 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
250 Value *Op, unsigned NumLanes,
252 // Each lane is 16 bytes.
253 unsigned NumElts = NumLanes * 16;
255 // Bitcast from a 64-bit element type to a byte element type.
256 Op = Builder.CreateBitCast(Op,
257 VectorType::get(Type::getInt8Ty(C), NumElts),
259 // We'll be shuffling in zeroes.
260 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
262 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
263 // we'll just return the zero vector.
265 SmallVector<Constant*, 32> Idxs;
266 // 256-bit version is split into two 16-byte lanes.
267 for (unsigned l = 0; l != NumElts; l += 16)
268 for (unsigned i = 0; i != 16; ++i) {
269 unsigned Idx = NumElts + i - Shift;
271 Idx -= NumElts - 16; // end of lane, switch operand.
272 Idxs.push_back(Builder.getInt32(Idx + l));
275 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs));
278 // Bitcast back to a 64-bit element type.
279 return Builder.CreateBitCast(Res,
280 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
284 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
286 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
287 Value *Op, unsigned NumLanes,
289 // Each lane is 16 bytes.
290 unsigned NumElts = NumLanes * 16;
292 // Bitcast from a 64-bit element type to a byte element type.
293 Op = Builder.CreateBitCast(Op,
294 VectorType::get(Type::getInt8Ty(C), NumElts),
296 // We'll be shuffling in zeroes.
297 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
299 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
300 // we'll just return the zero vector.
302 SmallVector<Constant*, 32> Idxs;
303 // 256-bit version is split into two 16-byte lanes.
304 for (unsigned l = 0; l != NumElts; l += 16)
305 for (unsigned i = 0; i != 16; ++i) {
306 unsigned Idx = i + Shift;
308 Idx += NumElts - 16; // end of lane, switch operand.
309 Idxs.push_back(Builder.getInt32(Idx + l));
312 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs));
315 // Bitcast back to a 64-bit element type.
316 return Builder.CreateBitCast(Res,
317 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
321 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
322 // upgraded intrinsic. All argument and return casting must be provided in
323 // order to seamlessly integrate with existing context.
324 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
325 Function *F = CI->getCalledFunction();
326 LLVMContext &C = CI->getContext();
327 IRBuilder<> Builder(C);
328 Builder.SetInsertPoint(CI->getParent(), CI);
330 assert(F && "Intrinsic call is not direct?");
333 // Get the Function's name.
334 StringRef Name = F->getName();
337 // Upgrade packed integer vector compares intrinsics to compare instructions
338 if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
339 Name.startswith("llvm.x86.avx2.pcmpeq.")) {
340 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
342 // need to sign extend since icmp returns vector of i1
343 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
344 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
345 Name.startswith("llvm.x86.avx2.pcmpgt.")) {
346 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
348 // need to sign extend since icmp returns vector of i1
349 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
350 } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
351 Name == "llvm.x86.avx.movnt.ps.256" ||
352 Name == "llvm.x86.avx.movnt.pd.256") {
353 IRBuilder<> Builder(C);
354 Builder.SetInsertPoint(CI->getParent(), CI);
356 Module *M = F->getParent();
357 SmallVector<Metadata *, 1> Elts;
359 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
360 MDNode *Node = MDNode::get(C, Elts);
362 Value *Arg0 = CI->getArgOperand(0);
363 Value *Arg1 = CI->getArgOperand(1);
365 // Convert the type of the pointer to a pointer to the stored type.
366 Value *BC = Builder.CreateBitCast(Arg0,
367 PointerType::getUnqual(Arg1->getType()),
369 StoreInst *SI = Builder.CreateStore(Arg1, BC);
370 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
371 SI->setAlignment(16);
374 CI->eraseFromParent();
376 } else if (Name.startswith("llvm.x86.xop.vpcom")) {
378 if (Name.endswith("ub"))
379 intID = Intrinsic::x86_xop_vpcomub;
380 else if (Name.endswith("uw"))
381 intID = Intrinsic::x86_xop_vpcomuw;
382 else if (Name.endswith("ud"))
383 intID = Intrinsic::x86_xop_vpcomud;
384 else if (Name.endswith("uq"))
385 intID = Intrinsic::x86_xop_vpcomuq;
386 else if (Name.endswith("b"))
387 intID = Intrinsic::x86_xop_vpcomb;
388 else if (Name.endswith("w"))
389 intID = Intrinsic::x86_xop_vpcomw;
390 else if (Name.endswith("d"))
391 intID = Intrinsic::x86_xop_vpcomd;
392 else if (Name.endswith("q"))
393 intID = Intrinsic::x86_xop_vpcomq;
395 llvm_unreachable("Unknown suffix");
397 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
399 if (Name.startswith("lt"))
401 else if (Name.startswith("le"))
403 else if (Name.startswith("gt"))
405 else if (Name.startswith("ge"))
407 else if (Name.startswith("eq"))
409 else if (Name.startswith("ne"))
411 else if (Name.startswith("false"))
413 else if (Name.startswith("true"))
416 llvm_unreachable("Unknown condition");
418 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
419 Rep = Builder.CreateCall3(VPCOM, CI->getArgOperand(0),
420 CI->getArgOperand(1), Builder.getInt8(Imm));
421 } else if (Name == "llvm.x86.sse42.crc32.64.8") {
422 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
423 Intrinsic::x86_sse42_crc32_32_8);
424 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
425 Rep = Builder.CreateCall2(CRC32, Trunc0, CI->getArgOperand(1));
426 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
427 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
428 // Replace broadcasts with a series of insertelements.
429 Type *VecTy = CI->getType();
430 Type *EltTy = VecTy->getVectorElementType();
431 unsigned EltNum = VecTy->getVectorNumElements();
432 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
433 EltTy->getPointerTo());
434 Value *Load = Builder.CreateLoad(Cast);
435 Type *I32Ty = Type::getInt32Ty(C);
436 Rep = UndefValue::get(VecTy);
437 for (unsigned I = 0; I < EltNum; ++I)
438 Rep = Builder.CreateInsertElement(Rep, Load,
439 ConstantInt::get(I32Ty, I));
440 } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
441 // Replace vbroadcasts with a vector shuffle.
442 Value *Op = Builder.CreatePointerCast(
443 CI->getArgOperand(0),
444 PointerType::getUnqual(VectorType::get(Type::getInt64Ty(C), 2)));
445 Value *Load = Builder.CreateLoad(Op);
446 const int Idxs[4] = { 0, 1, 0, 1 };
447 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
449 } else if (Name == "llvm.x86.sse2.psll.dq") {
450 // 128-bit shift left specified in bits.
451 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
452 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
453 Shift / 8); // Shift is in bits.
454 } else if (Name == "llvm.x86.sse2.psrl.dq") {
455 // 128-bit shift right specified in bits.
456 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
457 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
458 Shift / 8); // Shift is in bits.
459 } else if (Name == "llvm.x86.avx2.psll.dq") {
460 // 256-bit shift left specified in bits.
461 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
462 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
463 Shift / 8); // Shift is in bits.
464 } else if (Name == "llvm.x86.avx2.psrl.dq") {
465 // 256-bit shift right specified in bits.
466 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
467 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
468 Shift / 8); // Shift is in bits.
469 } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
470 // 128-bit shift left specified in bytes.
471 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
472 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
474 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
475 // 128-bit shift right specified in bytes.
476 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
477 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
479 } else if (Name == "llvm.x86.avx2.psll.dq.bs") {
480 // 256-bit shift left specified in bytes.
481 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
482 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
484 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
485 // 256-bit shift right specified in bytes.
486 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
487 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
489 } else if (Name == "llvm.x86.sse41.pblendw" ||
490 Name == "llvm.x86.sse41.blendpd" ||
491 Name == "llvm.x86.sse41.blendps" ||
492 Name == "llvm.x86.avx.blend.pd.256" ||
493 Name == "llvm.x86.avx.blend.ps.256" ||
494 Name == "llvm.x86.avx2.pblendw" ||
495 Name == "llvm.x86.avx2.pblendd.128" ||
496 Name == "llvm.x86.avx2.pblendd.256") {
497 Value *Op0 = CI->getArgOperand(0);
498 Value *Op1 = CI->getArgOperand(1);
499 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
500 VectorType *VecTy = cast<VectorType>(CI->getType());
501 unsigned NumElts = VecTy->getNumElements();
503 SmallVector<Constant*, 16> Idxs;
504 for (unsigned i = 0; i != NumElts; ++i) {
505 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
506 Idxs.push_back(Builder.getInt32(Idx));
509 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
510 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
511 Name == "llvm.x86.avx.vinsertf128.ps.256" ||
512 Name == "llvm.x86.avx.vinsertf128.si.256" ||
513 Name == "llvm.x86.avx2.vinserti128") {
514 Value *Op0 = CI->getArgOperand(0);
515 Value *Op1 = CI->getArgOperand(1);
516 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
517 VectorType *VecTy = cast<VectorType>(CI->getType());
518 unsigned NumElts = VecTy->getNumElements();
520 // Mask off the high bits of the immediate value; hardware ignores those.
523 // Extend the second operand into a vector that is twice as big.
524 Value *UndefV = UndefValue::get(Op1->getType());
525 SmallVector<Constant*, 8> Idxs;
526 for (unsigned i = 0; i != NumElts; ++i) {
527 Idxs.push_back(Builder.getInt32(i));
529 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs));
531 // Insert the second operand into the first operand.
533 // Note that there is no guarantee that instruction lowering will actually
534 // produce a vinsertf128 instruction for the created shuffles. In
535 // particular, the 0 immediate case involves no lane changes, so it can
536 // be handled as a blend.
538 // Example of shuffle mask for 32-bit elements:
539 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
540 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
542 SmallVector<Constant*, 8> Idxs2;
543 // The low half of the result is either the low half of the 1st operand
544 // or the low half of the 2nd operand (the inserted vector).
545 for (unsigned i = 0; i != NumElts / 2; ++i) {
546 unsigned Idx = Imm ? i : (i + NumElts);
547 Idxs2.push_back(Builder.getInt32(Idx));
549 // The high half of the result is either the low half of the 2nd operand
550 // (the inserted vector) or the high half of the 1st operand.
551 for (unsigned i = NumElts / 2; i != NumElts; ++i) {
552 unsigned Idx = Imm ? (i + NumElts / 2) : i;
553 Idxs2.push_back(Builder.getInt32(Idx));
555 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
556 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
557 Name == "llvm.x86.avx.vextractf128.ps.256" ||
558 Name == "llvm.x86.avx.vextractf128.si.256" ||
559 Name == "llvm.x86.avx2.vextracti128") {
560 Value *Op0 = CI->getArgOperand(0);
561 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
562 VectorType *VecTy = cast<VectorType>(CI->getType());
563 unsigned NumElts = VecTy->getNumElements();
565 // Mask off the high bits of the immediate value; hardware ignores those.
568 // Get indexes for either the high half or low half of the input vector.
569 SmallVector<Constant*, 4> Idxs(NumElts);
570 for (unsigned i = 0; i != NumElts; ++i) {
571 unsigned Idx = Imm ? (i + NumElts) : i;
572 Idxs[i] = Builder.getInt32(Idx);
575 Value *UndefV = UndefValue::get(Op0->getType());
576 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs));
578 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
579 if (Name == "llvm.x86.avx.vpermil.pd.256")
581 else if (Name == "llvm.x86.avx.vpermil.pd")
583 else if (Name == "llvm.x86.avx.vpermil.ps.256")
585 else if (Name == "llvm.x86.avx.vpermil.ps")
588 if (PD256 || PD128 || PS256 || PS128) {
589 Value *Op0 = CI->getArgOperand(0);
590 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
591 SmallVector<Constant*, 8> Idxs;
594 for (unsigned i = 0; i != 2; ++i)
595 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
597 for (unsigned l = 0; l != 4; l+=2)
598 for (unsigned i = 0; i != 2; ++i)
599 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
601 for (unsigned i = 0; i != 4; ++i)
602 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
604 for (unsigned l = 0; l != 8; l+=4)
605 for (unsigned i = 0; i != 4; ++i)
606 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
608 llvm_unreachable("Unexpected function");
610 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
612 llvm_unreachable("Unknown function for CallInst upgrade.");
616 CI->replaceAllUsesWith(Rep);
617 CI->eraseFromParent();
621 std::string Name = CI->getName();
623 CI->setName(Name + ".old");
625 switch (NewFn->getIntrinsicID()) {
627 llvm_unreachable("Unknown function for CallInst upgrade.");
629 case Intrinsic::ctlz:
630 case Intrinsic::cttz:
631 assert(CI->getNumArgOperands() == 1 &&
632 "Mismatch between function args and call args");
633 CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
634 Builder.getFalse(), Name));
635 CI->eraseFromParent();
638 case Intrinsic::objectsize:
639 CI->replaceAllUsesWith(Builder.CreateCall2(NewFn,
640 CI->getArgOperand(0),
641 CI->getArgOperand(1),
643 CI->eraseFromParent();
646 case Intrinsic::ctpop: {
647 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(0)));
648 CI->eraseFromParent();
652 case Intrinsic::x86_xop_vfrcz_ss:
653 case Intrinsic::x86_xop_vfrcz_sd:
654 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(1),
656 CI->eraseFromParent();
659 case Intrinsic::x86_sse41_ptestc:
660 case Intrinsic::x86_sse41_ptestz:
661 case Intrinsic::x86_sse41_ptestnzc: {
662 // The arguments for these intrinsics used to be v4f32, and changed
663 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
664 // So, the only thing required is a bitcast for both arguments.
665 // First, check the arguments have the old type.
666 Value *Arg0 = CI->getArgOperand(0);
667 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
670 // Old intrinsic, add bitcasts
671 Value *Arg1 = CI->getArgOperand(1);
673 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
675 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
676 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
678 Type *Ty[] = {NewVecTy, NewVecTy};
679 CallInst *NewCall = Builder.CreateCall2(
680 FunctionType::get(CI->getType(), Ty, false), NewFn, BC0, BC1, Name);
681 CI->replaceAllUsesWith(NewCall);
682 CI->eraseFromParent();
686 case Intrinsic::x86_sse41_insertps:
687 case Intrinsic::x86_sse41_dppd:
688 case Intrinsic::x86_sse41_dpps:
689 case Intrinsic::x86_sse41_mpsadbw:
690 case Intrinsic::x86_avx_dp_ps_256:
691 case Intrinsic::x86_avx2_mpsadbw: {
692 // Need to truncate the last argument from i32 to i8 -- this argument models
693 // an inherently 8-bit immediate operand to these x86 instructions.
694 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
695 CI->arg_operands().end());
697 // Replace the last argument with a trunc.
698 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
700 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
701 CI->replaceAllUsesWith(NewCall);
702 CI->eraseFromParent();
708 // This tests each Function to determine if it needs upgrading. When we find
709 // one we are interested in, we then upgrade all calls to reflect the new
711 void llvm::UpgradeCallsToIntrinsic(Function* F) {
712 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
714 // Upgrade the function and check if it is a totaly new function.
716 if (UpgradeIntrinsicFunction(F, NewFn)) {
718 // Replace all uses to the old function with the new one if necessary.
719 for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
721 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
722 UpgradeIntrinsicCall(CI, NewFn);
724 // Remove old function, no longer used, from the module.
725 F->eraseFromParent();
730 void llvm::UpgradeInstWithTBAATag(Instruction *I) {
731 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
732 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
733 // Check if the tag uses struct-path aware TBAA format.
734 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
737 if (MD->getNumOperands() == 3) {
738 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
739 MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
740 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
741 Metadata *Elts2[] = {ScalarType, ScalarType,
742 ConstantAsMetadata::get(Constant::getNullValue(
743 Type::getInt64Ty(I->getContext()))),
745 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
747 // Create a MDNode <MD, MD, offset 0>
748 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
749 Type::getInt64Ty(I->getContext())))};
750 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
754 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
755 Instruction *&Temp) {
756 if (Opc != Instruction::BitCast)
760 Type *SrcTy = V->getType();
761 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
762 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
763 LLVMContext &Context = V->getContext();
765 // We have no information about target data layout, so we assume that
766 // the maximum pointer size is 64bit.
767 Type *MidTy = Type::getInt64Ty(Context);
768 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
770 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
776 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
777 if (Opc != Instruction::BitCast)
780 Type *SrcTy = C->getType();
781 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
782 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
783 LLVMContext &Context = C->getContext();
785 // We have no information about target data layout, so we assume that
786 // the maximum pointer size is 64bit.
787 Type *MidTy = Type::getInt64Ty(Context);
789 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
796 /// Check the debug info version number, if it is out-dated, drop the debug
797 /// info. Return true if module is modified.
798 bool llvm::UpgradeDebugInfo(Module &M) {
799 unsigned Version = getDebugMetadataVersionFromModule(M);
800 if (Version == DEBUG_METADATA_VERSION)
803 bool RetCode = StripDebugInfo(M);
805 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
806 M.getContext().diagnose(DiagVersion);
811 void llvm::UpgradeMDStringConstant(std::string &String) {
812 const std::string OldPrefix = "llvm.vectorizer.";
813 if (String == "llvm.vectorizer.unroll") {
814 String = "llvm.loop.interleave.count";
815 } else if (String.find(OldPrefix) == 0) {
816 String.replace(0, OldPrefix.size(), "llvm.loop.vectorize.");