Remove NEON vmovn intrinsic, replacing it with vector truncate operations.
[oota-llvm.git] / lib / VMCore / AutoUpgrade.cpp
index 4f4daeba4d37042847dc717a162c88dcf3532dc2..c99433965338dde780c045c53cdb2a64dd8802ee 100644 (file)
@@ -78,6 +78,57 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
         NewFn = F;
         return true;
       }
+    } else if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
+      if (((Name.compare(14, 5, "vmovl", 5) == 0 ||
+            Name.compare(14, 5, "vaddl", 5) == 0 ||
+            Name.compare(14, 5, "vsubl", 5) == 0) &&
+           (Name.compare(19, 2, "s.", 2) == 0 ||
+            Name.compare(19, 2, "u.", 2) == 0)) ||
+
+          ((Name.compare(14, 5, "vaddw", 5) == 0 ||
+            Name.compare(14, 5, "vsubw", 5) == 0) &&
+           (Name.compare(19, 2, "s.", 2) == 0 ||
+            Name.compare(19, 2, "u.", 2) == 0)) ||
+
+          (Name.compare(14, 6, "vmovn.", 6) == 0)) {
+
+        // Calls to these are transformed into IR without intrinsics.
+        NewFn = 0;
+        return true;
+      }
+      // Old versions of NEON ld/st intrinsics are missing alignment arguments.
+      bool isVLd = (Name.compare(14, 3, "vld", 3) == 0);
+      bool isVSt = (Name.compare(14, 3, "vst", 3) == 0);
+      if (isVLd || isVSt) {
+        unsigned NumVecs = Name.at(17) - '0';
+        if (NumVecs == 0 || NumVecs > 4)
+          return false;
+        bool isLaneOp = (Name.compare(18, 5, "lane.", 5) == 0);
+        if (!isLaneOp && Name.at(18) != '.')
+          return false;
+        unsigned ExpectedArgs = 2; // for the address and alignment
+        if (isVSt || isLaneOp)
+          ExpectedArgs += NumVecs;
+        if (isLaneOp)
+          ExpectedArgs += 1; // for the lane number
+        unsigned NumP = FTy->getNumParams();
+        if (NumP != ExpectedArgs - 1)
+          return false;
+
+        // Change the name of the old (bad) intrinsic, because 
+        // its type is incorrect, but we cannot overload that name.
+        F->setName("");
+
+        // One argument is missing: add the alignment argument.
+        std::vector<const Type*> NewParams;
+        for (unsigned p = 0; p < NumP; ++p)
+          NewParams.push_back(FTy->getParamType(p));
+        NewParams.push_back(Type::getInt32Ty(F->getContext()));
+        FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(),
+                                                 NewParams, false);
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, NewFTy));
+        return true;
+      }
     }
     break;
   case 'b':
@@ -182,7 +233,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
         NewFnName = "llvm.memset.p0i8.i64";
     }
     if (NewFnName) {
-      const FunctionType *FTy = F->getFunctionType();
       NewFn = cast<Function>(M->getOrInsertFunction(NewFnName, 
                                             FTy->getReturnType(),
                                             FTy->getParamType(0),
@@ -320,6 +370,52 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   assert(F && "CallInst has no function associated with it.");
 
   if (!NewFn) {
+    // Get the Function's name.
+    const std::string& Name = F->getName();
+
+    // Upgrade ARM NEON intrinsics.
+    if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
+      Instruction *NewI;
+      if (Name.compare(14, 7, "vmovls.", 7) == 0) {
+        NewI = new SExtInst(CI->getArgOperand(0), CI->getType(),
+                            "upgraded." + CI->getName(), CI);
+      } else if (Name.compare(14, 7, "vmovlu.", 7) == 0) {
+        NewI = new ZExtInst(CI->getArgOperand(0), CI->getType(),
+                            "upgraded." + CI->getName(), CI);
+
+      } else if (Name.compare(14, 4, "vadd", 4) == 0 ||
+                 Name.compare(14, 4, "vsub", 4) == 0) {
+        // Extend one (vaddw/vsubw) or both (vaddl/vsubl) operands.
+        Value *V0 = CI->getArgOperand(0);
+        Value *V1 = CI->getArgOperand(1);
+        if (Name.at(19) == 's') {
+          if (Name.at(18) == 'l')
+            V0 = new SExtInst(CI->getArgOperand(0), CI->getType(), "", CI);
+          V1 = new SExtInst(CI->getArgOperand(1), CI->getType(), "", CI);
+        } else {
+          assert(Name.at(19) == 'u' && "unexpected vadd/vsub intrinsic");
+          if (Name.at(18) == 'l')
+            V0 = new ZExtInst(CI->getArgOperand(0), CI->getType(), "", CI);
+          V1 = new ZExtInst(CI->getArgOperand(1), CI->getType(), "", CI);
+        }
+        if (Name.compare(14, 4, "vadd", 4) == 0)
+          NewI = BinaryOperator::CreateAdd(V0, V1,"upgraded."+CI->getName(),CI);
+        else
+          NewI = BinaryOperator::CreateSub(V0, V1,"upgraded."+CI->getName(),CI);
+
+      } else if (Name.compare(14, 6, "vmovn.", 6) == 0) {
+        NewI = new TruncInst(CI->getArgOperand(0), CI->getType(),
+                             "upgraded." + CI->getName(), CI);
+      } else {
+        llvm_unreachable("Unknown arm.neon function for CallInst upgrade.");
+      }
+      // Replace any uses of the old CallInst.
+      if (!CI->use_empty())
+        CI->replaceAllUsesWith(NewI);
+      CI->eraseFromParent();
+      return;
+    }
+
     bool isLoadH = false, isLoadL = false, isMovL = false;
     bool isMovSD = false, isShufPD = false;
     bool isUnpckhPD = false, isUnpcklPD = false;
@@ -549,6 +645,39 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
 
   switch (NewFn->getIntrinsicID()) {
   default:  llvm_unreachable("Unknown function for CallInst upgrade.");
+  case Intrinsic::arm_neon_vld1:
+  case Intrinsic::arm_neon_vld2:
+  case Intrinsic::arm_neon_vld3:
+  case Intrinsic::arm_neon_vld4:
+  case Intrinsic::arm_neon_vst1:
+  case Intrinsic::arm_neon_vst2:
+  case Intrinsic::arm_neon_vst3:
+  case Intrinsic::arm_neon_vst4:
+  case Intrinsic::arm_neon_vld2lane:
+  case Intrinsic::arm_neon_vld3lane:
+  case Intrinsic::arm_neon_vld4lane:
+  case Intrinsic::arm_neon_vst2lane:
+  case Intrinsic::arm_neon_vst3lane:
+  case Intrinsic::arm_neon_vst4lane: {
+    // Add a default alignment argument of 1.
+    SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
+    Operands.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
+    CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
+                                       CI->getName(), CI);
+    NewCI->setTailCall(CI->isTailCall());
+    NewCI->setCallingConv(CI->getCallingConv());
+
+    //  Handle any uses of the old CallInst.
+    if (!CI->use_empty())
+      //  Replace all uses of the old call with the new cast which has the 
+      //  correct type.
+      CI->replaceAllUsesWith(NewCI);
+    
+    //  Clean up the old call now that it has been completely upgraded.
+    CI->eraseFromParent();
+    break;
+  }        
+
   case Intrinsic::x86_mmx_psll_d:
   case Intrinsic::x86_mmx_psll_q:
   case Intrinsic::x86_mmx_psll_w: