Remove NEON vmovn intrinsic, replacing it with vector truncate operations.

author Bob Wilson <bob.wilson@apple.com>

Mon, 30 Aug 2010 20:02:30 +0000 (20:02 +0000)

committer Bob Wilson <bob.wilson@apple.com>

Mon, 30 Aug 2010 20:02:30 +0000 (20:02 +0000)
author Bob Wilson <bob.wilson@apple.com>
Mon, 30 Aug 2010 20:02:30 +0000 (20:02 +0000)
committer Bob Wilson <bob.wilson@apple.com>
Mon, 30 Aug 2010 20:02:30 +0000 (20:02 +0000)
diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IntrinsicsARM.td

index 2528eaa973896bd2348ed16b724506f3a3032cfd..fc302e64d5017f96794b6fa60976e39618d67e39 100644 (file)
--- a/include/llvm/IntrinsicsARM.td
+++ b/include/llvm/IntrinsicsARM.td
@@ -303,7 +303,6 @@ def int_arm_neon_vcvtfxs2fp : Neon_CvtFxToFP_Intrinsic;
  def int_arm_neon_vcvtfxu2fp : Neon_CvtFxToFP_Intrinsic;
  
  // Narrowing and Lengthening Vector Moves.
-def int_arm_neon_vmovn : Neon_1Arg_Narrow_Intrinsic;
  def int_arm_neon_vqmovns : Neon_1Arg_Narrow_Intrinsic;
  def int_arm_neon_vqmovnu : Neon_1Arg_Narrow_Intrinsic;
  def int_arm_neon_vqmovnsu : Neon_1Arg_Narrow_Intrinsic;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td

index 741df7e9d4a3ccd995770370d9f4628f59e56c36..113291176cb84fd90813f66a66bd12d5ec5992ea 100644 (file)
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -956,6 +956,15 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
          (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
          [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
  
+// Narrow 2-register operations.
+class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+           InstrItinClass itin, string OpcodeStr, string Dt,
+           ValueType TyD, ValueType TyQ, SDNode OpNode>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst),
+        (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
+        [(set DPR:$dst, (TyD (OpNode (TyQ QPR:$src))))]>;
+
  // Narrow 2-register intrinsics.
  class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
                bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
@@ -1579,6 +1588,23 @@ multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
  }
  
  
+// Neon Narrowing 2-register vector operations,
+//   source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+                    bits<5> op11_7, bit op6, bit op4, 
+                    InstrItinClass itin, string OpcodeStr, string Dt,
+                    SDNode OpNode> {
+  def v8i8  : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+                   itin, OpcodeStr, !strconcat(Dt, "16"),
+                   v8i8, v8i16, OpNode>;
+  def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+                   itin, OpcodeStr, !strconcat(Dt, "32"),
+                   v4i16, v4i32, OpNode>;
+  def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+                   itin, OpcodeStr, !strconcat(Dt, "64"),
+                   v2i32, v2i64, OpNode>;
+}
+
  // Neon Narrowing 2-register vector intrinsics,
  //   source operand element sizes of 16, 32 and 64 bits:
  multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
@@ -3221,8 +3247,8 @@ def  VDUPfqf  : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
                      [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
  
  //   VMOVN    : Vector Narrowing Move
-defm VMOVN    : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
-                            "vmovn", "i", int_arm_neon_vmovn>;
+defm VMOVN    : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
+                         "vmovn", "i", trunc>;
  //   VQMOVN   : Vector Saturating Narrowing Move
  defm VQMOVNs  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
                              "vqmovn", "s", int_arm_neon_vqmovns>;
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp

index 62a46259ac259e57676d5c1c261a7b4de793f2bd..c99433965338dde780c045c53cdb2a64dd8802ee 100644 (file)
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -88,7 +88,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
            ((Name.compare(14, 5, "vaddw", 5) == 0 ||
              Name.compare(14, 5, "vsubw", 5) == 0) &&
             (Name.compare(19, 2, "s.", 2) == 0 ||
-            Name.compare(19, 2, "u.", 2) == 0))) {
+            Name.compare(19, 2, "u.", 2) == 0)) ||
+
+          (Name.compare(14, 6, "vmovn.", 6) == 0)) {
  
          // Calls to these are transformed into IR without intrinsics.
          NewFn = 0;
@@ -401,6 +403,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
          else
            NewI = BinaryOperator::CreateSub(V0, V1,"upgraded."+CI->getName(),CI);
  
+      } else if (Name.compare(14, 6, "vmovn.", 6) == 0) {
+        NewI = new TruncInst(CI->getArgOperand(0), CI->getType(),
+                             "upgraded." + CI->getName(), CI);
        } else {
          llvm_unreachable("Unknown arm.neon function for CallInst upgrade.");
        }
diff --git a/test/Bitcode/neon-intrinsics.ll b/test/Bitcode/neon-intrinsics.ll

index d6d03d0970f56b1b68982ac73cc79e67810954ba..eafa94bc8ec53dd3c47852545fccffcf69c63a12 100644 (file)
--- a/test/Bitcode/neon-intrinsics.ll
+++ b/test/Bitcode/neon-intrinsics.ll
@@ -76,6 +76,20 @@
  ; CHECK: zext <4 x i16>
  ; CHECK-NEXT: sub <4 x i32>
  
+; vmovn should be auto-upgraded to trunc
+
+; CHECK: vmovni16
+; CHECK-NOT: arm.neon.vmovn.v8i8
+; CHECK: trunc <8 x i16>
+
+; CHECK: vmovni32
+; CHECK-NOT: arm.neon.vmovn.v4i16
+; CHECK: trunc <4 x i32>
+
+; CHECK: vmovni64
+; CHECK-NOT: arm.neon.vmovn.v2i32
+; CHECK: trunc <2 x i64>
+
  ; vld* and vst* intrinsic calls need an alignment argument (defaulted to 1)
  
  ; CHECK: vld1i8
diff --git a/test/Bitcode/neon-intrinsics.ll.bc b/test/Bitcode/neon-intrinsics.ll.bc

index bc01eb540e6c2621b85d1ab3fa8a4104d15398c3..0441aa16698aaf943236790ed5434eb00765f913 100644 (file)

Binary files a/test/Bitcode/neon-intrinsics.ll.bc and b/test/Bitcode/neon-intrinsics.ll.bc differ
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll

index f863bf8a092c73f071aee886de27a7eba4309e0c..8cd94576b0c2142385351b7a26618d20636dca1b 100644 (file)
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -240,7 +240,7 @@ define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
  ;CHECK: vmovni16:
  ;CHECK: vmovn.i16
         %tmp1 = load <8 x i16>* %A
-       %tmp2 = call <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16> %tmp1)
+       %tmp2 = trunc <8 x i16> %tmp1 to <8 x i8>
         ret <8 x i8> %tmp2
  }
  
@@ -248,7 +248,7 @@ define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
  ;CHECK: vmovni32:
  ;CHECK: vmovn.i32
         %tmp1 = load <4 x i32>* %A
-       %tmp2 = call <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32> %tmp1)
+       %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
         ret <4 x i16> %tmp2
  }
  
@@ -256,14 +256,10 @@ define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
  ;CHECK: vmovni64:
  ;CHECK: vmovn.i64
         %tmp1 = load <2 x i64>* %A
-       %tmp2 = call <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64> %tmp1)
+       %tmp2 = trunc <2 x i64> %tmp1 to <2 x i32>
         ret <2 x i32> %tmp2
  }
  
-declare <8 x i8>  @llvm.arm.neon.vmovn.v8i8(<8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64>) nounwind readnone
-
  define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
  ;CHECK: vqmovns16:
  ;CHECK: vqmovn.s16
author	Bob Wilson <bob.wilson@apple.com>
	Mon, 30 Aug 2010 20:02:30 +0000 (20:02 +0000)
committer	Bob Wilson <bob.wilson@apple.com>
	Mon, 30 Aug 2010 20:02:30 +0000 (20:02 +0000)
include/llvm/IntrinsicsARM.td		patch \| blob \| history
lib/Target/ARM/ARMInstrNEON.td		patch \| blob \| history
lib/VMCore/AutoUpgrade.cpp		patch \| blob \| history
test/Bitcode/neon-intrinsics.ll		patch \| blob \| history
test/Bitcode/neon-intrinsics.ll.bc		patch \| blob \| history
test/CodeGen/ARM/vmov.ll		patch \| blob \| history