Remove x86_sse42_crc32_64_8 intrinsic. It has no functional difference from x86_sse42...
[oota-llvm.git] / lib / IR / AutoUpgrade.cpp
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the auto-upgrade helper functions
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/AutoUpgrade.h"
15 #include "llvm/IR/Constants.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/IRBuilder.h"
18 #include "llvm/IR/Instruction.h"
19 #include "llvm/IR/IntrinsicInst.h"
20 #include "llvm/IR/LLVMContext.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/Support/CFG.h"
23 #include "llvm/Support/CallSite.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include <cstring>
26 using namespace llvm;
27
28 // Upgrade the declarations of the SSE4.1 functions whose arguments have
29 // changed their type from v4f32 to v2i64.
30 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
31                                  Function *&NewFn) {
32   // Check whether this is an old version of the function, which received
33   // v4f32 arguments.
34   Type *Arg0Type = F->getFunctionType()->getParamType(0);
35   if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
36     return false;
37
38   // Yes, it's old, replace it with new version.
39   F->setName(F->getName() + ".old");
40   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
41   return true;
42 }
43
44 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
45   assert(F && "Illegal to upgrade a non-existent Function.");
46
47   // Quickly eliminate it, if it's not a candidate.
48   StringRef Name = F->getName();
49   if (Name.size() <= 8 || !Name.startswith("llvm."))
50     return false;
51   Name = Name.substr(5); // Strip off "llvm."
52
53   switch (Name[0]) {
54   default: break;
55   case 'a': {
56     if (Name.startswith("arm.neon.vclz")) {
57       Type* args[2] = {
58         F->arg_begin()->getType(),
59         Type::getInt1Ty(F->getContext())
60       };
61       // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
62       // the end of the name. Change name from llvm.arm.neon.vclz.* to
63       //  llvm.ctlz.*
64       FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
65       NewFn = Function::Create(fType, F->getLinkage(),
66                                "llvm.ctlz." + Name.substr(14), F->getParent());
67       return true;
68     }
69     if (Name.startswith("arm.neon.vcnt")) {
70       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
71                                         F->arg_begin()->getType());
72       return true;
73     }
74     break;
75   }
76   case 'c': {
77     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
78       F->setName(Name + ".old");
79       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
80                                         F->arg_begin()->getType());
81       return true;
82     }
83     if (Name.startswith("cttz.") && F->arg_size() == 1) {
84       F->setName(Name + ".old");
85       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
86                                         F->arg_begin()->getType());
87       return true;
88     }
89     break;
90   }
91   case 'o':
92     // We only need to change the name to match the mangling including the
93     // address space.
94     if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
95       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
96       if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
97         F->setName(Name + ".old");
98         NewFn = Intrinsic::getDeclaration(F->getParent(),
99                                           Intrinsic::objectsize, Tys);
100         return true;
101       }
102     }
103     break;
104
105   case 'x': {
106     if (Name.startswith("x86.sse2.pcmpeq.") ||
107         Name.startswith("x86.sse2.pcmpgt.") ||
108         Name.startswith("x86.avx2.pcmpeq.") ||
109         Name.startswith("x86.avx2.pcmpgt.") ||
110         Name.startswith("x86.avx.vpermil.") ||
111         Name == "x86.avx.movnt.dq.256" ||
112         Name == "x86.avx.movnt.pd.256" ||
113         Name == "x86.avx.movnt.ps.256" ||
114         Name == "x86.sse42.crc32.64.8" ||
115         (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
116       NewFn = 0;
117       return true;
118     }
119     // SSE4.1 ptest functions may have an old signature.
120     if (Name.startswith("x86.sse41.ptest")) {
121       if (Name == "x86.sse41.ptestc")
122         return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
123       if (Name == "x86.sse41.ptestz")
124         return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
125       if (Name == "x86.sse41.ptestnzc")
126         return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
127     }
128     // frcz.ss/sd may need to have an argument dropped
129     if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
130       F->setName(Name + ".old");
131       NewFn = Intrinsic::getDeclaration(F->getParent(),
132                                         Intrinsic::x86_xop_vfrcz_ss);
133       return true;
134     }
135     if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
136       F->setName(Name + ".old");
137       NewFn = Intrinsic::getDeclaration(F->getParent(),
138                                         Intrinsic::x86_xop_vfrcz_sd);
139       return true;
140     }
141     // Fix the FMA4 intrinsics to remove the 4
142     if (Name.startswith("x86.fma4.")) {
143       F->setName("llvm.x86.fma" + Name.substr(8));
144       NewFn = F;
145       return true;
146     }
147     break;
148   }
149   }
150
151   //  This may not belong here. This function is effectively being overloaded
152   //  to both detect an intrinsic which needs upgrading, and to provide the
153   //  upgraded form of the intrinsic. We should perhaps have two separate
154   //  functions for this.
155   return false;
156 }
157
158 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
159   NewFn = 0;
160   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
161
162   // Upgrade intrinsic attributes.  This does not change the function.
163   if (NewFn)
164     F = NewFn;
165   if (unsigned id = F->getIntrinsicID())
166     F->setAttributes(Intrinsic::getAttributes(F->getContext(),
167                                               (Intrinsic::ID)id));
168   return Upgraded;
169 }
170
171 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
172   // Nothing to do yet.
173   return false;
174 }
175
176 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
177 // upgraded intrinsic. All argument and return casting must be provided in
178 // order to seamlessly integrate with existing context.
179 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
180   Function *F = CI->getCalledFunction();
181   LLVMContext &C = CI->getContext();
182   IRBuilder<> Builder(C);
183   Builder.SetInsertPoint(CI->getParent(), CI);
184
185   assert(F && "Intrinsic call is not direct?");
186
187   if (!NewFn) {
188     // Get the Function's name.
189     StringRef Name = F->getName();
190
191     Value *Rep;
192     // Upgrade packed integer vector compares intrinsics to compare instructions
193     if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
194         Name.startswith("llvm.x86.avx2.pcmpeq.")) {
195       Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
196                                  "pcmpeq");
197       // need to sign extend since icmp returns vector of i1
198       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
199     } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
200                Name.startswith("llvm.x86.avx2.pcmpgt.")) {
201       Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
202                                   "pcmpgt");
203       // need to sign extend since icmp returns vector of i1
204       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
205     } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
206                Name == "llvm.x86.avx.movnt.ps.256" ||
207                Name == "llvm.x86.avx.movnt.pd.256") {
208       IRBuilder<> Builder(C);
209       Builder.SetInsertPoint(CI->getParent(), CI);
210
211       Module *M = F->getParent();
212       SmallVector<Value *, 1> Elts;
213       Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
214       MDNode *Node = MDNode::get(C, Elts);
215
216       Value *Arg0 = CI->getArgOperand(0);
217       Value *Arg1 = CI->getArgOperand(1);
218
219       // Convert the type of the pointer to a pointer to the stored type.
220       Value *BC = Builder.CreateBitCast(Arg0,
221                                         PointerType::getUnqual(Arg1->getType()),
222                                         "cast");
223       StoreInst *SI = Builder.CreateStore(Arg1, BC);
224       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
225       SI->setAlignment(16);
226
227       // Remove intrinsic.
228       CI->eraseFromParent();
229       return;
230     } else if (Name.startswith("llvm.x86.xop.vpcom")) {
231       Intrinsic::ID intID;
232       if (Name.endswith("ub"))
233         intID = Intrinsic::x86_xop_vpcomub;
234       else if (Name.endswith("uw"))
235         intID = Intrinsic::x86_xop_vpcomuw;
236       else if (Name.endswith("ud"))
237         intID = Intrinsic::x86_xop_vpcomud;
238       else if (Name.endswith("uq"))
239         intID = Intrinsic::x86_xop_vpcomuq;
240       else if (Name.endswith("b"))
241         intID = Intrinsic::x86_xop_vpcomb;
242       else if (Name.endswith("w"))
243         intID = Intrinsic::x86_xop_vpcomw;
244       else if (Name.endswith("d"))
245         intID = Intrinsic::x86_xop_vpcomd;
246       else if (Name.endswith("q"))
247         intID = Intrinsic::x86_xop_vpcomq;
248       else
249         llvm_unreachable("Unknown suffix");
250
251       Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
252       unsigned Imm;
253       if (Name.startswith("lt"))
254         Imm = 0;
255       else if (Name.startswith("le"))
256         Imm = 1;
257       else if (Name.startswith("gt"))
258         Imm = 2;
259       else if (Name.startswith("ge"))
260         Imm = 3;
261       else if (Name.startswith("eq"))
262         Imm = 4;
263       else if (Name.startswith("ne"))
264         Imm = 5;
265       else if (Name.startswith("true"))
266         Imm = 6;
267       else if (Name.startswith("false"))
268         Imm = 7;
269       else
270         llvm_unreachable("Unknown condition");
271
272       Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
273       Rep = Builder.CreateCall3(VPCOM, CI->getArgOperand(0),
274                                 CI->getArgOperand(1), Builder.getInt8(Imm));
275     } else if (Name == "llvm.x86.sse42.crc32.64.8") {
276       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
277                                                Intrinsic::x86_sse42_crc32_32_8);
278       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
279       Rep = Builder.CreateCall2(CRC32, Trunc0, CI->getArgOperand(1));
280       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
281     } else {
282       bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
283       if (Name == "llvm.x86.avx.vpermil.pd.256")
284         PD256 = true;
285       else if (Name == "llvm.x86.avx.vpermil.pd")
286         PD128 = true;
287       else if (Name == "llvm.x86.avx.vpermil.ps.256")
288         PS256 = true;
289       else if (Name == "llvm.x86.avx.vpermil.ps")
290         PS128 = true;
291
292       if (PD256 || PD128 || PS256 || PS128) {
293         Value *Op0 = CI->getArgOperand(0);
294         unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
295         SmallVector<Constant*, 8> Idxs;
296
297         if (PD128)
298           for (unsigned i = 0; i != 2; ++i)
299             Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
300         else if (PD256)
301           for (unsigned l = 0; l != 4; l+=2)
302             for (unsigned i = 0; i != 2; ++i)
303               Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
304         else if (PS128)
305           for (unsigned i = 0; i != 4; ++i)
306             Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
307         else if (PS256)
308           for (unsigned l = 0; l != 8; l+=4)
309             for (unsigned i = 0; i != 4; ++i)
310               Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
311         else
312           llvm_unreachable("Unexpected function");
313
314         Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
315       } else {
316         llvm_unreachable("Unknown function for CallInst upgrade.");
317       }
318     }
319
320     CI->replaceAllUsesWith(Rep);
321     CI->eraseFromParent();
322     return;
323   }
324
325   std::string Name = CI->getName().str();
326   CI->setName(Name + ".old");
327
328   switch (NewFn->getIntrinsicID()) {
329   default:
330     llvm_unreachable("Unknown function for CallInst upgrade.");
331
332   case Intrinsic::ctlz:
333   case Intrinsic::cttz:
334     assert(CI->getNumArgOperands() == 1 &&
335            "Mismatch between function args and call args");
336     CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
337                                                Builder.getFalse(), Name));
338     CI->eraseFromParent();
339     return;
340
341   case Intrinsic::objectsize:
342     CI->replaceAllUsesWith(Builder.CreateCall2(NewFn,
343                                                CI->getArgOperand(0),
344                                                CI->getArgOperand(1),
345                                                Name));
346     CI->eraseFromParent();
347     return;
348
349   case Intrinsic::arm_neon_vclz: {
350     // Change name from llvm.arm.neon.vclz.* to llvm.ctlz.*
351     CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
352                                                Builder.getFalse(),
353                                                "llvm.ctlz." + Name.substr(14)));
354     CI->eraseFromParent();
355     return;
356   }
357   case Intrinsic::ctpop: {
358     CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(0)));
359     CI->eraseFromParent();
360     return;
361   }
362
363   case Intrinsic::x86_xop_vfrcz_ss:
364   case Intrinsic::x86_xop_vfrcz_sd:
365     CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(1),
366                                               Name));
367     CI->eraseFromParent();
368     return;
369
370   case Intrinsic::x86_sse41_ptestc:
371   case Intrinsic::x86_sse41_ptestz:
372   case Intrinsic::x86_sse41_ptestnzc: {
373     // The arguments for these intrinsics used to be v4f32, and changed
374     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
375     // So, the only thing required is a bitcast for both arguments.
376     // First, check the arguments have the old type.
377     Value *Arg0 = CI->getArgOperand(0);
378     if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
379       return;
380
381     // Old intrinsic, add bitcasts
382     Value *Arg1 = CI->getArgOperand(1);
383
384     Value *BC0 =
385       Builder.CreateBitCast(Arg0,
386                             VectorType::get(Type::getInt64Ty(C), 2),
387                             "cast");
388     Value *BC1 =
389       Builder.CreateBitCast(Arg1,
390                             VectorType::get(Type::getInt64Ty(C), 2),
391                             "cast");
392
393     CallInst* NewCall = Builder.CreateCall2(NewFn, BC0, BC1, Name);
394     CI->replaceAllUsesWith(NewCall);
395     CI->eraseFromParent();
396     return;
397   }
398   }
399 }
400
401 // This tests each Function to determine if it needs upgrading. When we find
402 // one we are interested in, we then upgrade all calls to reflect the new
403 // function.
404 void llvm::UpgradeCallsToIntrinsic(Function* F) {
405   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
406
407   // Upgrade the function and check if it is a totaly new function.
408   Function *NewFn;
409   if (UpgradeIntrinsicFunction(F, NewFn)) {
410     if (NewFn != F) {
411       // Replace all uses to the old function with the new one if necessary.
412       for (Value::use_iterator UI = F->use_begin(), UE = F->use_end();
413            UI != UE; ) {
414         if (CallInst *CI = dyn_cast<CallInst>(*UI++))
415           UpgradeIntrinsicCall(CI, NewFn);
416       }
417       // Remove old function, no longer used, from the module.
418       F->eraseFromParent();
419     }
420   }
421 }
422
423 void llvm::UpgradeInstWithTBAATag(Instruction *I) {
424   MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
425   assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
426   // Check if the tag uses struct-path aware TBAA format.
427   if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
428     return;
429
430   if (MD->getNumOperands() == 3) {
431     Value *Elts[] = {
432       MD->getOperand(0),
433       MD->getOperand(1)
434     };
435     MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
436     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
437     Value *Elts2[] = {
438       ScalarType, ScalarType,
439       Constant::getNullValue(Type::getInt64Ty(I->getContext())),
440       MD->getOperand(2)
441     };
442     I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
443   } else {
444     // Create a MDNode <MD, MD, offset 0>
445     Value *Elts[] = {MD, MD,
446       Constant::getNullValue(Type::getInt64Ty(I->getContext()))};
447     I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
448   }
449 }