Use vAny type to get rid of Neon intrinsics that differed only in whether

author Bob Wilson <bob.wilson@apple.com>

Tue, 11 Aug 2009 05:39:44 +0000 (05:39 +0000)

committer Bob Wilson <bob.wilson@apple.com>

Tue, 11 Aug 2009 05:39:44 +0000 (05:39 +0000)
author Bob Wilson <bob.wilson@apple.com>
Tue, 11 Aug 2009 05:39:44 +0000 (05:39 +0000)
committer Bob Wilson <bob.wilson@apple.com>
Tue, 11 Aug 2009 05:39:44 +0000 (05:39 +0000)
diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IntrinsicsARM.td

index d86dd087107ccf2c6c46092e2e9bc1c4c977ce65..7b7208276383bff1b5e88ff39b66eafd18537445 100644 (file)
--- a/include/llvm/IntrinsicsARM.td
+++ b/include/llvm/IntrinsicsARM.td
@@ -27,53 +27,42 @@ let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
  
    // The following classes do not correspond directly to GCC builtins.
    class Neon_1Arg_Intrinsic
-    : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-  class Neon_1Arg_Float_Intrinsic
-    : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+    : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
    class Neon_1Arg_Narrow_Intrinsic
-    : Intrinsic<[llvm_anyint_ty],
+    : Intrinsic<[llvm_anyvector_ty],
                  [LLVMExtendedElementVectorType<0>], [IntrNoMem]>;
    class Neon_1Arg_Long_Intrinsic
-    : Intrinsic<[llvm_anyint_ty],
+    : Intrinsic<[llvm_anyvector_ty],
                  [LLVMTruncatedElementVectorType<0>], [IntrNoMem]>;
    class Neon_2Arg_Intrinsic
-    : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
-                [IntrNoMem]>;
-  class Neon_2Arg_Float_Intrinsic
-    : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
-                [IntrNoMem]>;
-  class Neon_2Arg_Vector_Intrinsic
      : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
                  [IntrNoMem]>;
    class Neon_2Arg_Narrow_Intrinsic
-    : Intrinsic<[llvm_anyint_ty],
+    : Intrinsic<[llvm_anyvector_ty],
                  [LLVMExtendedElementVectorType<0>,
                   LLVMExtendedElementVectorType<0>],
                  [IntrNoMem]>;
    class Neon_2Arg_Long_Intrinsic
-    : Intrinsic<[llvm_anyint_ty],
+    : Intrinsic<[llvm_anyvector_ty],
                  [LLVMTruncatedElementVectorType<0>,
                   LLVMTruncatedElementVectorType<0>],
                  [IntrNoMem]>;
    class Neon_2Arg_Wide_Intrinsic
-    : Intrinsic<[llvm_anyint_ty],
+    : Intrinsic<[llvm_anyvector_ty],
                  [LLVMMatchType<0>, LLVMTruncatedElementVectorType<0>],
                  [IntrNoMem]>;
    class Neon_3Arg_Intrinsic
-    : Intrinsic<[llvm_anyint_ty],
+    : Intrinsic<[llvm_anyvector_ty],
                  [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
                  [IntrNoMem]>;
    class Neon_3Arg_Long_Intrinsic
-    : Intrinsic<[llvm_anyint_ty],
+    : Intrinsic<[llvm_anyvector_ty],
                  [LLVMMatchType<0>,
                   LLVMTruncatedElementVectorType<0>,
                   LLVMTruncatedElementVectorType<0>],
                  [IntrNoMem]>;
    class Neon_2Result_Intrinsic
-    : Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>],
-                [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
-  class Neon_2Result_Float_Intrinsic
-    : Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>],
+    : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
                  [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
    class Neon_CvtFxToFP_Intrinsic
      : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -141,18 +130,16 @@ let Properties = [IntrNoMem, Commutative] in {
    // Vector Maximum.
    def int_arm_neon_vmaxs : Neon_2Arg_Intrinsic;
    def int_arm_neon_vmaxu : Neon_2Arg_Intrinsic;
-  def int_arm_neon_vmaxf : Neon_2Arg_Float_Intrinsic;
  
    // Vector Minimum.
    def int_arm_neon_vmins : Neon_2Arg_Intrinsic;
    def int_arm_neon_vminu : Neon_2Arg_Intrinsic;
-  def int_arm_neon_vminf : Neon_2Arg_Float_Intrinsic;
  
    // Vector Reciprocal Step.
-  def int_arm_neon_vrecps : Neon_2Arg_Float_Intrinsic;
+  def int_arm_neon_vrecps : Neon_2Arg_Intrinsic;
  
    // Vector Reciprocal Square Root Step.
-  def int_arm_neon_vrsqrts : Neon_2Arg_Float_Intrinsic;
+  def int_arm_neon_vrsqrts : Neon_2Arg_Intrinsic;
  }
  
  // Vector Subtract.
@@ -186,7 +173,6 @@ let TargetPrefix = "arm" in {
  // Vector Absolute Differences.
  def int_arm_neon_vabds : Neon_2Arg_Intrinsic;
  def int_arm_neon_vabdu : Neon_2Arg_Intrinsic;
-def int_arm_neon_vabdf : Neon_2Arg_Float_Intrinsic;
  def int_arm_neon_vabdls : Neon_2Arg_Long_Intrinsic;
  def int_arm_neon_vabdlu : Neon_2Arg_Long_Intrinsic;
  
@@ -197,16 +183,16 @@ def int_arm_neon_vabals : Neon_3Arg_Long_Intrinsic;
  def int_arm_neon_vabalu : Neon_3Arg_Long_Intrinsic;
  
  // Vector Pairwise Add.
-def int_arm_neon_vpadd : Neon_2Arg_Vector_Intrinsic;
+def int_arm_neon_vpadd : Neon_2Arg_Intrinsic;
  
  // Vector Pairwise Add Long.
  // Note: This is different than the other "long" NEON intrinsics because
  // the result vector has half as many elements as the source vector.
  // The source and destination vector types must be specified separately.
  let TargetPrefix = "arm" in {
-  def int_arm_neon_vpaddls : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty],
+  def int_arm_neon_vpaddls : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
                                         [IntrNoMem]>;
-  def int_arm_neon_vpaddlu : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty],
+  def int_arm_neon_vpaddlu : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
                                         [IntrNoMem]>;
  }
  
@@ -214,21 +200,19 @@ let TargetPrefix = "arm" in {
  // Note: This is similar to vpaddl but the destination vector also appears
  // as the first argument.
  let TargetPrefix = "arm" in {
-  def int_arm_neon_vpadals : Intrinsic<[llvm_anyint_ty],
-                                       [LLVMMatchType<0>, llvm_anyint_ty],
+  def int_arm_neon_vpadals : Intrinsic<[llvm_anyvector_ty],
+                                       [LLVMMatchType<0>, llvm_anyvector_ty],
                                         [IntrNoMem]>;
-  def int_arm_neon_vpadalu : Intrinsic<[llvm_anyint_ty],
-                                       [LLVMMatchType<0>, llvm_anyint_ty],
+  def int_arm_neon_vpadalu : Intrinsic<[llvm_anyvector_ty],
+                                       [LLVMMatchType<0>, llvm_anyvector_ty],
                                         [IntrNoMem]>;
  }
  
  // Vector Pairwise Maximum and Minimum.
  def int_arm_neon_vpmaxs : Neon_2Arg_Intrinsic;
  def int_arm_neon_vpmaxu : Neon_2Arg_Intrinsic;
-def int_arm_neon_vpmaxf : Neon_2Arg_Float_Intrinsic;
  def int_arm_neon_vpmins : Neon_2Arg_Intrinsic;
  def int_arm_neon_vpminu : Neon_2Arg_Intrinsic;
-def int_arm_neon_vpminf : Neon_2Arg_Float_Intrinsic;
  
  // Vector Shifts:
  //
@@ -283,7 +267,6 @@ def int_arm_neon_vshiftins : Neon_3Arg_Intrinsic;
  
  // Vector Absolute Value and Saturating Absolute Value.
  def int_arm_neon_vabs : Neon_1Arg_Intrinsic;
-def int_arm_neon_vabsf : Neon_1Arg_Float_Intrinsic;
  def int_arm_neon_vqabs : Neon_1Arg_Intrinsic;
  
  // Vector Saturating Negate.
@@ -298,11 +281,9 @@ def int_arm_neon_vcnt : Neon_1Arg_Intrinsic;
  
  // Vector Reciprocal Estimate.
  def int_arm_neon_vrecpe : Neon_1Arg_Intrinsic;
-def int_arm_neon_vrecpef : Neon_1Arg_Float_Intrinsic;
  
  // Vector Reciprocal Square Root Estimate.
  def int_arm_neon_vrsqrte : Neon_1Arg_Intrinsic;
-def int_arm_neon_vrsqrtef : Neon_1Arg_Float_Intrinsic;
  
  // Vector Conversions Between Floating-point and Fixed-point.
  def int_arm_neon_vcvtfp2fxs : Neon_CvtFPToFx_Intrinsic;
@@ -331,68 +312,41 @@ def int_arm_neon_vtbx3 : Neon_Tbl5Arg_Intrinsic;
  def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic;
  
  // Vector Transpose.
-def int_arm_neon_vtrni : Neon_2Result_Intrinsic;
-def int_arm_neon_vtrnf : Neon_2Result_Float_Intrinsic;
+def int_arm_neon_vtrn : Neon_2Result_Intrinsic;
  
  // Vector Interleave (vzip).
-def int_arm_neon_vzipi : Neon_2Result_Intrinsic;
-def int_arm_neon_vzipf : Neon_2Result_Float_Intrinsic;
+def int_arm_neon_vzip : Neon_2Result_Intrinsic;
  
  // Vector Deinterleave (vuzp).
-def int_arm_neon_vuzpi : Neon_2Result_Intrinsic;
-def int_arm_neon_vuzpf : Neon_2Result_Float_Intrinsic;
+def int_arm_neon_vuzp : Neon_2Result_Intrinsic;
  
  let TargetPrefix = "arm" in {
  
    // De-interleaving vector loads from N-element structures.
-  def int_arm_neon_vld1i : Intrinsic<[llvm_anyint_ty],
-                                     [llvm_ptr_ty], [IntrReadArgMem]>;
-  def int_arm_neon_vld1f : Intrinsic<[llvm_anyfloat_ty],
-                                     [llvm_ptr_ty], [IntrReadArgMem]>;
-  def int_arm_neon_vld2i : Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>],
-                                     [llvm_ptr_ty], [IntrReadArgMem]>;
-  def int_arm_neon_vld2f : Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>],
-                                     [llvm_ptr_ty], [IntrReadArgMem]>;
-  def int_arm_neon_vld3i : Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>,
-                                      LLVMMatchType<0>],
-                                     [llvm_ptr_ty], [IntrReadArgMem]>;
-  def int_arm_neon_vld3f : Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>,
-                                      LLVMMatchType<0>],
-                                     [llvm_ptr_ty], [IntrReadArgMem]>;
-  def int_arm_neon_vld4i : Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>,
-                                      LLVMMatchType<0>, LLVMMatchType<0>],
-                                     [llvm_ptr_ty], [IntrReadArgMem]>;
-  def int_arm_neon_vld4f : Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>,
-                                      LLVMMatchType<0>, LLVMMatchType<0>],
-                                     [llvm_ptr_ty], [IntrReadArgMem]>;
+  def int_arm_neon_vld1 : Intrinsic<[llvm_anyvector_ty],
+                                    [llvm_ptr_ty], [IntrReadArgMem]>;
+  def int_arm_neon_vld2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+                                    [llvm_ptr_ty], [IntrReadArgMem]>;
+  def int_arm_neon_vld3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+                                     LLVMMatchType<0>],
+                                    [llvm_ptr_ty], [IntrReadArgMem]>;
+  def int_arm_neon_vld4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+                                     LLVMMatchType<0>, LLVMMatchType<0>],
+                                    [llvm_ptr_ty], [IntrReadArgMem]>;
  
    // Interleaving vector stores from N-element structures.
-  def int_arm_neon_vst1i : Intrinsic<[llvm_void_ty],
-                                     [llvm_ptr_ty, llvm_anyint_ty],
-                                     [IntrWriteArgMem]>;
-  def int_arm_neon_vst1f : Intrinsic<[llvm_void_ty],
-                                     [llvm_ptr_ty, llvm_anyfloat_ty],
+  def int_arm_neon_vst1 : Intrinsic<[llvm_void_ty],
+                                    [llvm_ptr_ty, llvm_anyvector_ty],
+                                    [IntrWriteArgMem]>;
+  def int_arm_neon_vst2 : Intrinsic<[llvm_void_ty],
+                                    [llvm_ptr_ty, llvm_anyvector_ty,
+                                     LLVMMatchType<0>], [IntrWriteArgMem]>;
+  def int_arm_neon_vst3 : Intrinsic<[llvm_void_ty],
+                                    [llvm_ptr_ty, llvm_anyvector_ty,
+                                     LLVMMatchType<0>, LLVMMatchType<0>],
                                       [IntrWriteArgMem]>;
-  def int_arm_neon_vst2i : Intrinsic<[llvm_void_ty],
-                                     [llvm_ptr_ty, llvm_anyint_ty,
-                                      LLVMMatchType<0>], [IntrWriteArgMem]>;
-  def int_arm_neon_vst2f : Intrinsic<[llvm_void_ty],
-                                     [llvm_ptr_ty, llvm_anyfloat_ty,
-                                      LLVMMatchType<0>], [IntrWriteArgMem]>;
-  def int_arm_neon_vst3i : Intrinsic<[llvm_void_ty],
-                                     [llvm_ptr_ty, llvm_anyint_ty,
-                                      LLVMMatchType<0>, LLVMMatchType<0>],
-                                      [IntrWriteArgMem]>;
-  def int_arm_neon_vst3f : Intrinsic<[llvm_void_ty],
-                                     [llvm_ptr_ty, llvm_anyfloat_ty,
-                                      LLVMMatchType<0>, LLVMMatchType<0>],
-                                      [IntrWriteArgMem]>;
-  def int_arm_neon_vst4i : Intrinsic<[llvm_void_ty],
-                                     [llvm_ptr_ty, llvm_anyint_ty,
-                                      LLVMMatchType<0>, LLVMMatchType<0>,
-                                      LLVMMatchType<0>], [IntrWriteArgMem]>;
-  def int_arm_neon_vst4f : Intrinsic<[llvm_void_ty],
-                                     [llvm_ptr_ty, llvm_anyfloat_ty,
-                                      LLVMMatchType<0>, LLVMMatchType<0>,
-                                      LLVMMatchType<0>], [IntrWriteArgMem]>;
+  def int_arm_neon_vst4 : Intrinsic<[llvm_void_ty],
+                                    [llvm_ptr_ty, llvm_anyvector_ty,
+                                     LLVMMatchType<0>, LLVMMatchType<0>,
+                                     LLVMMatchType<0>], [IntrWriteArgMem]>;
  }
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp

index 1096e8eb01cf00cfc68d87a42865bc43811ac5e8..a927da2a380678e9b0adf3e6602e9dafa7dfaf00 100644 (file)
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1466,8 +1466,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
      switch (IntNo) {
      default: break;
  
-    case Intrinsic::arm_neon_vtrni:
-    case Intrinsic::arm_neon_vtrnf:
+    case Intrinsic::arm_neon_vtrn:
        switch (VT.getSimpleVT()) {
        default: return NULL;
        case EVT::v8i8:  Opc = ARM::VTRNd8; break;
@@ -1482,8 +1481,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
        return CurDAG->getTargetNode(Opc, dl, VT, VT, N->getOperand(1),
                                     N->getOperand(2));
  
-    case Intrinsic::arm_neon_vuzpi:
-    case Intrinsic::arm_neon_vuzpf:
+    case Intrinsic::arm_neon_vuzp:
        switch (VT.getSimpleVT()) {
        default: return NULL;
        case EVT::v8i8:  Opc = ARM::VUZPd8; break;
@@ -1498,8 +1496,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
        return CurDAG->getTargetNode(Opc, dl, VT, VT, N->getOperand(1),
                                     N->getOperand(2));
  
-    case Intrinsic::arm_neon_vzipi:
-    case Intrinsic::arm_neon_vzipf:
+    case Intrinsic::arm_neon_vzip:
        switch (VT.getSimpleVT()) {
        default: return NULL;
        case EVT::v8i8:  Opc = ARM::VZIPd8; break;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index 61722d44faed340f35e074f8ab166b8adf82da82..1a662d9d872371d6c5e7da19ebde4ee6d4cf48c6 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1360,23 +1360,17 @@ SDValue
  ARMTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
    unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
    switch (IntNo) {
-  case Intrinsic::arm_neon_vld2i:
-  case Intrinsic::arm_neon_vld2f:
+  case Intrinsic::arm_neon_vld2:
      return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD2D);
-  case Intrinsic::arm_neon_vld3i:
-  case Intrinsic::arm_neon_vld3f:
+  case Intrinsic::arm_neon_vld3:
      return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD3D);
-  case Intrinsic::arm_neon_vld4i:
-  case Intrinsic::arm_neon_vld4f:
+  case Intrinsic::arm_neon_vld4:
      return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD4D);
-  case Intrinsic::arm_neon_vst2i:
-  case Intrinsic::arm_neon_vst2f:
+  case Intrinsic::arm_neon_vst2:
      return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST2D, 2);
-  case Intrinsic::arm_neon_vst3i:
-  case Intrinsic::arm_neon_vst3f:
+  case Intrinsic::arm_neon_vst3:
      return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST3D, 3);
-  case Intrinsic::arm_neon_vst4i:
-  case Intrinsic::arm_neon_vst4f:
+  case Intrinsic::arm_neon_vst4:
      return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST4D, 4);
    default: return SDValue();    // Don't custom lower most intrinsics.
    }
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td

index 1ed3a619a0f5c7b2fa7b99a5fda3572c3366ed0b..53283e84ead47e35ffe6d00f8e3f58d8864dbf7c 100644 (file)
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -173,17 +173,17 @@ class VLD1Q<string OpcodeStr, ValueType Ty, Intrinsic IntOp>
            !strconcat(OpcodeStr, "\t${dst:dregpair}, $addr"),
            [(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>;
  
-def  VLD1d8   : VLD1D<"vld1.8",  v8i8,  int_arm_neon_vld1i>;
-def  VLD1d16  : VLD1D<"vld1.16", v4i16, int_arm_neon_vld1i>;
-def  VLD1d32  : VLD1D<"vld1.32", v2i32, int_arm_neon_vld1i>;
-def  VLD1df   : VLD1D<"vld1.32", v2f32, int_arm_neon_vld1f>;
-def  VLD1d64  : VLD1D<"vld1.64", v1i64, int_arm_neon_vld1i>;
-
-def  VLD1q8   : VLD1Q<"vld1.8",  v16i8, int_arm_neon_vld1i>;
-def  VLD1q16  : VLD1Q<"vld1.16", v8i16, int_arm_neon_vld1i>;
-def  VLD1q32  : VLD1Q<"vld1.32", v4i32, int_arm_neon_vld1i>;
-def  VLD1qf   : VLD1Q<"vld1.32", v4f32, int_arm_neon_vld1f>;
-def  VLD1q64  : VLD1Q<"vld1.64", v2i64, int_arm_neon_vld1i>;
+def  VLD1d8   : VLD1D<"vld1.8",  v8i8,  int_arm_neon_vld1>;
+def  VLD1d16  : VLD1D<"vld1.16", v4i16, int_arm_neon_vld1>;
+def  VLD1d32  : VLD1D<"vld1.32", v2i32, int_arm_neon_vld1>;
+def  VLD1df   : VLD1D<"vld1.32", v2f32, int_arm_neon_vld1>;
+def  VLD1d64  : VLD1D<"vld1.64", v1i64, int_arm_neon_vld1>;
+
+def  VLD1q8   : VLD1Q<"vld1.8",  v16i8, int_arm_neon_vld1>;
+def  VLD1q16  : VLD1Q<"vld1.16", v8i16, int_arm_neon_vld1>;
+def  VLD1q32  : VLD1Q<"vld1.32", v4i32, int_arm_neon_vld1>;
+def  VLD1qf   : VLD1Q<"vld1.32", v4f32, int_arm_neon_vld1>;
+def  VLD1q64  : VLD1Q<"vld1.64", v2i64, int_arm_neon_vld1>;
  
  //   VLD2     : Vector Load (multiple 2-element structures)
  class VLD2D<string OpcodeStr>
@@ -228,17 +228,17 @@ class VST1Q<string OpcodeStr, ValueType Ty, Intrinsic IntOp>
            !strconcat(OpcodeStr, "\t${src:dregpair}, $addr"),
            [(IntOp addrmode6:$addr, (Ty QPR:$src))]>;
  
-def  VST1d8   : VST1D<"vst1.8",  v8i8,  int_arm_neon_vst1i>;
-def  VST1d16  : VST1D<"vst1.16", v4i16, int_arm_neon_vst1i>;
-def  VST1d32  : VST1D<"vst1.32", v2i32, int_arm_neon_vst1i>;
-def  VST1df   : VST1D<"vst1.32", v2f32, int_arm_neon_vst1f>;
-def  VST1d64  : VST1D<"vst1.64", v1i64, int_arm_neon_vst1i>;
+def  VST1d8   : VST1D<"vst1.8",  v8i8,  int_arm_neon_vst1>;
+def  VST1d16  : VST1D<"vst1.16", v4i16, int_arm_neon_vst1>;
+def  VST1d32  : VST1D<"vst1.32", v2i32, int_arm_neon_vst1>;
+def  VST1df   : VST1D<"vst1.32", v2f32, int_arm_neon_vst1>;
+def  VST1d64  : VST1D<"vst1.64", v1i64, int_arm_neon_vst1>;
  
-def  VST1q8   : VST1Q<"vst1.8",  v16i8, int_arm_neon_vst1i>;
-def  VST1q16  : VST1Q<"vst1.16", v8i16, int_arm_neon_vst1i>;
-def  VST1q32  : VST1Q<"vst1.32", v4i32, int_arm_neon_vst1i>;
-def  VST1qf   : VST1Q<"vst1.32", v4f32, int_arm_neon_vst1f>;
-def  VST1q64  : VST1Q<"vst1.64", v2i64, int_arm_neon_vst1i>;
+def  VST1q8   : VST1Q<"vst1.8",  v16i8, int_arm_neon_vst1>;
+def  VST1q16  : VST1Q<"vst1.16", v8i16, int_arm_neon_vst1>;
+def  VST1q32  : VST1Q<"vst1.32", v4i32, int_arm_neon_vst1>;
+def  VST1qf   : VST1Q<"vst1.32", v4f32, int_arm_neon_vst1>;
+def  VST1q64  : VST1Q<"vst1.64", v2i64, int_arm_neon_vst1>;
  
  //   VST2     : Vector Store (multiple 2-element structures)
  class VST2D<string OpcodeStr>
@@ -1223,9 +1223,9 @@ def  VBSLq    : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
  defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, "vabd.s", int_arm_neon_vabds, 0>;
  defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, "vabd.u", int_arm_neon_vabdu, 0>;
  def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v2f32, v2f32,
-                        int_arm_neon_vabdf, 0>;
+                        int_arm_neon_vabds, 0>;
  def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v4f32, v4f32,
-                        int_arm_neon_vabdf, 0>;
+                        int_arm_neon_vabds, 0>;
  
  //   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
  defm VABDLs   : N3VLInt_QHS<0,1,0b0111,0, "vabdl.s", int_arm_neon_vabdls, 0>;
@@ -1245,17 +1245,17 @@ defm VABALu   : N3VLInt3_QHS<1,1,0b0101,0, "vabal.u", int_arm_neon_vabalu>;
  defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, "vmax.s", int_arm_neon_vmaxs, 1>;
  defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, "vmax.u", int_arm_neon_vmaxu, 1>;
  def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v2f32, v2f32,
-                        int_arm_neon_vmaxf, 1>;
+                        int_arm_neon_vmaxs, 1>;
  def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v4f32, v4f32,
-                        int_arm_neon_vmaxf, 1>;
+                        int_arm_neon_vmaxs, 1>;
  
  //   VMIN     : Vector Minimum
  defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, "vmin.s", int_arm_neon_vmins, 1>;
  defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, "vmin.u", int_arm_neon_vminu, 1>;
  def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v2f32, v2f32,
-                        int_arm_neon_vminf, 1>;
+                        int_arm_neon_vmins, 1>;
  def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v4f32, v4f32,
-                        int_arm_neon_vminf, 1>;
+                        int_arm_neon_vmins, 1>;
  
  // Vector Pairwise Operations.
  
@@ -1295,7 +1295,7 @@ def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, "vpmax.u16", v4i16, v4i16,
  def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, "vpmax.u32", v2i32, v2i32,
                          int_arm_neon_vpmaxu, 0>;
  def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, "vpmax.f32", v2f32, v2f32,
-                        int_arm_neon_vpmaxf, 0>;
+                        int_arm_neon_vpmaxs, 0>;
  
  //   VPMIN    : Vector Pairwise Minimum
  def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, "vpmin.s8", v8i8, v8i8,
@@ -1311,7 +1311,7 @@ def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, "vpmin.u16", v4i16, v4i16,
  def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, "vpmin.u32", v2i32, v2i32,
                          int_arm_neon_vpminu, 0>;
  def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, "vpmin.f32", v2f32, v2f32,
-                        int_arm_neon_vpminf, 0>;
+                        int_arm_neon_vpmins, 0>;
  
  // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
  
@@ -1321,9 +1321,9 @@ def  VRECPEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, "vrecpe.u32",
  def  VRECPEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, "vrecpe.u32",
                          v4i32, v4i32, int_arm_neon_vrecpe>;
  def  VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, "vrecpe.f32",
-                        v2f32, v2f32, int_arm_neon_vrecpef>;
+                        v2f32, v2f32, int_arm_neon_vrecpe>;
  def  VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, "vrecpe.f32",
-                        v4f32, v4f32, int_arm_neon_vrecpef>;
+                        v4f32, v4f32, int_arm_neon_vrecpe>;
  
  //   VRECPS   : Vector Reciprocal Step
  def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, "vrecps.f32", v2f32, v2f32,
@@ -1337,9 +1337,9 @@ def  VRSQRTEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, "vrsqrte.u32",
  def  VRSQRTEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, "vrsqrte.u32",
                          v4i32, v4i32, int_arm_neon_vrsqrte>;
  def  VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, "vrsqrte.f32",
-                        v2f32, v2f32, int_arm_neon_vrsqrtef>;
+                        v2f32, v2f32, int_arm_neon_vrsqrte>;
  def  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, "vrsqrte.f32",
-                        v4f32, v4f32, int_arm_neon_vrsqrtef>;
+                        v4f32, v4f32, int_arm_neon_vrsqrte>;
  
  //   VRSQRTS  : Vector Reciprocal Square Root Step
  def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, "vrsqrts.f32", v2f32, v2f32,
@@ -1480,9 +1480,9 @@ defm VSRI     : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri.", NEONvsri>;
  defm VABS     : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, "vabs.s",
                             int_arm_neon_vabs>;
  def  VABSfd   : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32",
-                        v2f32, v2f32, int_arm_neon_vabsf>;
+                        v2f32, v2f32, int_arm_neon_vabs>;
  def  VABSfq   : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32",
-                        v4f32, v4f32, int_arm_neon_vabsf>;
+                        v4f32, v4f32, int_arm_neon_vabs>;
  
  //   VQABS    : Vector Saturating Absolute Value
  defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, "vqabs.s",
@@ -2017,7 +2017,7 @@ def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>;
  // Vector Absolute used for single-precision FP
  let neverHasSideEffects = 1 in
  def  VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32",
-                           v2f32, v2f32, int_arm_neon_vabsf>;
+                           v2f32, v2f32, int_arm_neon_vabs>;
  def : N2VDIntsPat<fabs, VABSfd_sfp>;
  
  // Vector Negate used for single-precision FP
diff --git a/test/CodeGen/ARM/vabd.ll b/test/CodeGen/ARM/vabd.ll

index c0497f9134e49083c0f300e1b90f57df7f30f631..e764840154859d08e38156ec05f2ddb6e433849b 100644 (file)
--- a/test/CodeGen/ARM/vabd.ll
+++ b/test/CodeGen/ARM/vabd.ll
@@ -59,7 +59,7 @@ define <2 x float> @vabdf32(<2 x float>* %A, <2 x float>* %B) nounwind {
  ;CHECK: vabd.f32
         %tmp1 = load <2 x float>* %A
         %tmp2 = load <2 x float>* %B
-       %tmp3 = call <2 x float> @llvm.arm.neon.vabdf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+       %tmp3 = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
         ret <2 x float> %tmp3
  }
  
@@ -122,7 +122,7 @@ define <4 x float> @vabdQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
  ;CHECK: vabd.f32
         %tmp1 = load <4 x float>* %A
         %tmp2 = load <4 x float>* %B
-       %tmp3 = call <4 x float> @llvm.arm.neon.vabdf.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+       %tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
         ret <4 x float> %tmp3
  }
  
@@ -134,7 +134,7 @@ declare <8 x i8>  @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnon
  declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
  declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
  
-declare <2 x float> @llvm.arm.neon.vabdf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>) nounwind readnone
  
  declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
  declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
@@ -144,4 +144,4 @@ declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind read
  declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
  declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
  
-declare <4 x float> @llvm.arm.neon.vabdf.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vabs.ll b/test/CodeGen/ARM/vabs.ll

index a7979eec52a5619fea0664aef0a223806343b5e5..1195f087ef4d60d0ad5667ab228e59c009e317b5 100644 (file)
--- a/test/CodeGen/ARM/vabs.ll
+++ b/test/CodeGen/ARM/vabs.ll
@@ -28,7 +28,7 @@ define <2 x float> @vabsf32(<2 x float>* %A) nounwind {
  ;CHECK: vabsf32:
  ;CHECK: vabs.f32
         %tmp1 = load <2 x float>* %A
-       %tmp2 = call <2 x float> @llvm.arm.neon.vabsf.v2f32(<2 x float> %tmp1)
+       %tmp2 = call <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float> %tmp1)
         ret <2 x float> %tmp2
  }
  
@@ -60,17 +60,17 @@ define <4 x float> @vabsQf32(<4 x float>* %A) nounwind {
  ;CHECK: vabsQf32:
  ;CHECK: vabs.f32
         %tmp1 = load <4 x float>* %A
-       %tmp2 = call <4 x float> @llvm.arm.neon.vabsf.v4f32(<4 x float> %tmp1)
+       %tmp2 = call <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float> %tmp1)
         ret <4 x float> %tmp2
  }
  
  declare <8 x i8>  @llvm.arm.neon.vabs.v8i8(<8 x i8>) nounwind readnone
  declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) nounwind readnone
  declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) nounwind readnone
-declare <2 x float> @llvm.arm.neon.vabsf.v2f32(<2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float>) nounwind readnone
  
  declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) nounwind readnone
  declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) nounwind readnone
  declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vabsf.v4f32(<4 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float>) nounwind readnone
  
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll

index d5191338c9a0e79d2b1e06f88e36fd8344d05931..81f1bdec9ee5fb1b2c41690438f6a5b8c0151eca 100644 (file)
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -3,81 +3,81 @@
  define <8 x i8> @vld1i8(i8* %A) nounwind {
  ;CHECK: vld1i8:
  ;CHECK: vld1.8
-       %tmp1 = call <8 x i8> @llvm.arm.neon.vld1i.v8i8(i8* %A)
+       %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A)
         ret <8 x i8> %tmp1
  }
  
  define <4 x i16> @vld1i16(i16* %A) nounwind {
  ;CHECK: vld1i16:
  ;CHECK: vld1.16
-       %tmp1 = call <4 x i16> @llvm.arm.neon.vld1i.v4i16(i16* %A)
+       %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i16* %A)
         ret <4 x i16> %tmp1
  }
  
  define <2 x i32> @vld1i32(i32* %A) nounwind {
  ;CHECK: vld1i32:
  ;CHECK: vld1.32
-       %tmp1 = call <2 x i32> @llvm.arm.neon.vld1i.v2i32(i32* %A)
+       %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i32* %A)
         ret <2 x i32> %tmp1
  }
  
  define <2 x float> @vld1f(float* %A) nounwind {
  ;CHECK: vld1f:
  ;CHECK: vld1.32
-       %tmp1 = call <2 x float> @llvm.arm.neon.vld1f.v2f32(float* %A)
+       %tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(float* %A)
         ret <2 x float> %tmp1
  }
  
  define <1 x i64> @vld1i64(i64* %A) nounwind {
  ;CHECK: vld1i64:
  ;CHECK: vld1.64
-       %tmp1 = call <1 x i64> @llvm.arm.neon.vld1i.v1i64(i64* %A)
+       %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i64* %A)
         ret <1 x i64> %tmp1
  }
  
  define <16 x i8> @vld1Qi8(i8* %A) nounwind {
  ;CHECK: vld1Qi8:
  ;CHECK: vld1.8
-       %tmp1 = call <16 x i8> @llvm.arm.neon.vld1i.v16i8(i8* %A)
+       %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A)
         ret <16 x i8> %tmp1
  }
  
  define <8 x i16> @vld1Qi16(i16* %A) nounwind {
  ;CHECK: vld1Qi16:
  ;CHECK: vld1.16
-       %tmp1 = call <8 x i16> @llvm.arm.neon.vld1i.v8i16(i16* %A)
+       %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i16* %A)
         ret <8 x i16> %tmp1
  }
  
  define <4 x i32> @vld1Qi32(i32* %A) nounwind {
  ;CHECK: vld1Qi32:
  ;CHECK: vld1.32
-       %tmp1 = call <4 x i32> @llvm.arm.neon.vld1i.v4i32(i32* %A)
+       %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i32* %A)
         ret <4 x i32> %tmp1
  }
  
  define <4 x float> @vld1Qf(float* %A) nounwind {
  ;CHECK: vld1Qf:
  ;CHECK: vld1.32
-       %tmp1 = call <4 x float> @llvm.arm.neon.vld1f.v4f32(float* %A)
+       %tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(float* %A)
         ret <4 x float> %tmp1
  }
  
  define <2 x i64> @vld1Qi64(i64* %A) nounwind {
  ;CHECK: vld1Qi64:
  ;CHECK: vld1.64
-       %tmp1 = call <2 x i64> @llvm.arm.neon.vld1i.v2i64(i64* %A)
+       %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i64* %A)
         ret <2 x i64> %tmp1
  }
  
-declare <8 x i8>  @llvm.arm.neon.vld1i.v8i8(i8*) nounwind readonly
-declare <4 x i16> @llvm.arm.neon.vld1i.v4i16(i8*) nounwind readonly
-declare <2 x i32> @llvm.arm.neon.vld1i.v2i32(i8*) nounwind readonly
-declare <2 x float> @llvm.arm.neon.vld1f.v2f32(i8*) nounwind readonly
-declare <1 x i64> @llvm.arm.neon.vld1i.v1i64(i8*) nounwind readonly
+declare <8 x i8>  @llvm.arm.neon.vld1.v8i8(i8*) nounwind readonly
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*) nounwind readonly
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*) nounwind readonly
+declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*) nounwind readonly
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*) nounwind readonly
  
-declare <16 x i8> @llvm.arm.neon.vld1i.v16i8(i8*) nounwind readonly
-declare <8 x i16> @llvm.arm.neon.vld1i.v8i16(i8*) nounwind readonly
-declare <4 x i32> @llvm.arm.neon.vld1i.v4i32(i8*) nounwind readonly
-declare <4 x float> @llvm.arm.neon.vld1f.v4f32(i8*) nounwind readonly
-declare <2 x i64> @llvm.arm.neon.vld1i.v2i64(i8*) nounwind readonly
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*) nounwind readonly
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll

index 2c16ac19d8d0ad7e705e31a95f1681bb4a8beabd..168b62b9ab4d36da6241a903cf1ccc605d1bd6a7 100644 (file)
--- a/test/CodeGen/ARM/vld2.ll
+++ b/test/CodeGen/ARM/vld2.ll
@@ -8,7 +8,7 @@
  define <8 x i8> @vld2i8(i8* %A) nounwind {
  ;CHECK: vld2i8:
  ;CHECK: vld2.8
-       %tmp1 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2i.v8i8(i8* %A)
+       %tmp1 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2.v8i8(i8* %A)
          %tmp2 = extractvalue %struct.__builtin_neon_v8qi2 %tmp1, 0
          %tmp3 = extractvalue %struct.__builtin_neon_v8qi2 %tmp1, 1
          %tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -18,7 +18,7 @@ define <8 x i8> @vld2i8(i8* %A) nounwind {
  define <4 x i16> @vld2i16(i16* %A) nounwind {
  ;CHECK: vld2i16:
  ;CHECK: vld2.16
-       %tmp1 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2i.v4i16(i16* %A)
+       %tmp1 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2.v4i16(i16* %A)
          %tmp2 = extractvalue %struct.__builtin_neon_v4hi2 %tmp1, 0
          %tmp3 = extractvalue %struct.__builtin_neon_v4hi2 %tmp1, 1
          %tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -28,7 +28,7 @@ define <4 x i16> @vld2i16(i16* %A) nounwind {
  define <2 x i32> @vld2i32(i32* %A) nounwind {
  ;CHECK: vld2i32:
  ;CHECK: vld2.32
-       %tmp1 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2i.v2i32(i32* %A)
+       %tmp1 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2.v2i32(i32* %A)
          %tmp2 = extractvalue %struct.__builtin_neon_v2si2 %tmp1, 0
          %tmp3 = extractvalue %struct.__builtin_neon_v2si2 %tmp1, 1
          %tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -38,14 +38,14 @@ define <2 x i32> @vld2i32(i32* %A) nounwind {
  define <2 x float> @vld2f(float* %A) nounwind {
  ;CHECK: vld2f:
  ;CHECK: vld2.32
-       %tmp1 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2f.v2f32(float* %A)
+       %tmp1 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2.v2f32(float* %A)
          %tmp2 = extractvalue %struct.__builtin_neon_v2sf2 %tmp1, 0
          %tmp3 = extractvalue %struct.__builtin_neon_v2sf2 %tmp1, 1
          %tmp4 = add <2 x float> %tmp2, %tmp3
         ret <2 x float> %tmp4
  }
  
-declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2i.v8i8(i8*) nounwind readonly
-declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2i.v4i16(i8*) nounwind readonly
-declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2i.v2i32(i8*) nounwind readonly
-declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2f.v2f32(i8*) nounwind readonly
+declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2.v8i8(i8*) nounwind readonly
+declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2.v4i16(i8*) nounwind readonly
+declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2.v2i32(i8*) nounwind readonly
+declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2.v2f32(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll

index 49665f6de33f787173aeafa2d6a8b9b678d4f9c6..5e528c04560df5958a6d71e90c09b3b4d1a21779 100644 (file)
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -8,7 +8,7 @@
  define <8 x i8> @vld3i8(i8* %A) nounwind {
  ;CHECK: vld3i8:
  ;CHECK: vld3.8
-       %tmp1 = call %struct.__builtin_neon_v8qi3 @llvm.arm.neon.vld3i.v8i8(i8* %A)
+       %tmp1 = call %struct.__builtin_neon_v8qi3 @llvm.arm.neon.vld3.v8i8(i8* %A)
          %tmp2 = extractvalue %struct.__builtin_neon_v8qi3 %tmp1, 0
          %tmp3 = extractvalue %struct.__builtin_neon_v8qi3 %tmp1, 2
          %tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -18,7 +18,7 @@ define <8 x i8> @vld3i8(i8* %A) nounwind {
  define <4 x i16> @vld3i16(i16* %A) nounwind {
  ;CHECK: vld3i16:
  ;CHECK: vld3.16
-       %tmp1 = call %struct.__builtin_neon_v4hi3 @llvm.arm.neon.vld3i.v4i16(i16* %A)
+       %tmp1 = call %struct.__builtin_neon_v4hi3 @llvm.arm.neon.vld3.v4i16(i16* %A)
          %tmp2 = extractvalue %struct.__builtin_neon_v4hi3 %tmp1, 0
          %tmp3 = extractvalue %struct.__builtin_neon_v4hi3 %tmp1, 2
          %tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -28,7 +28,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind {
  define <2 x i32> @vld3i32(i32* %A) nounwind {
  ;CHECK: vld3i32:
  ;CHECK: vld3.32
-       %tmp1 = call %struct.__builtin_neon_v2si3 @llvm.arm.neon.vld3i.v2i32(i32* %A)
+       %tmp1 = call %struct.__builtin_neon_v2si3 @llvm.arm.neon.vld3.v2i32(i32* %A)
          %tmp2 = extractvalue %struct.__builtin_neon_v2si3 %tmp1, 0
          %tmp3 = extractvalue %struct.__builtin_neon_v2si3 %tmp1, 2
          %tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -38,14 +38,14 @@ define <2 x i32> @vld3i32(i32* %A) nounwind {
  define <2 x float> @vld3f(float* %A) nounwind {
  ;CHECK: vld3f:
  ;CHECK: vld3.32
-       %tmp1 = call %struct.__builtin_neon_v2sf3 @llvm.arm.neon.vld3f.v2f32(float* %A)
+       %tmp1 = call %struct.__builtin_neon_v2sf3 @llvm.arm.neon.vld3.v2f32(float* %A)
          %tmp2 = extractvalue %struct.__builtin_neon_v2sf3 %tmp1, 0
          %tmp3 = extractvalue %struct.__builtin_neon_v2sf3 %tmp1, 2
          %tmp4 = add <2 x float> %tmp2, %tmp3
         ret <2 x float> %tmp4
  }
  
-declare %struct.__builtin_neon_v8qi3 @llvm.arm.neon.vld3i.v8i8(i8*) nounwind readonly
-declare %struct.__builtin_neon_v4hi3 @llvm.arm.neon.vld3i.v4i16(i8*) nounwind readonly
-declare %struct.__builtin_neon_v2si3 @llvm.arm.neon.vld3i.v2i32(i8*) nounwind readonly
-declare %struct.__builtin_neon_v2sf3 @llvm.arm.neon.vld3f.v2f32(i8*) nounwind readonly
+declare %struct.__builtin_neon_v8qi3 @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly
+declare %struct.__builtin_neon_v4hi3 @llvm.arm.neon.vld3.v4i16(i8*) nounwind readonly
+declare %struct.__builtin_neon_v2si3 @llvm.arm.neon.vld3.v2i32(i8*) nounwind readonly
+declare %struct.__builtin_neon_v2sf3 @llvm.arm.neon.vld3.v2f32(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll

index a0f41cfa67c40f40e4e6fc9b330005cd0afd8ea5..48125be51424ae3763a84345aa7bc3714054b88b 100644 (file)
--- a/test/CodeGen/ARM/vld4.ll
+++ b/test/CodeGen/ARM/vld4.ll
@@ -8,7 +8,7 @@
  define <8 x i8> @vld4i8(i8* %A) nounwind {
  ;CHECK: vld4i8:
  ;CHECK: vld4.8
-       %tmp1 = call %struct.__builtin_neon_v8qi4 @llvm.arm.neon.vld4i.v8i8(i8* %A)
+       %tmp1 = call %struct.__builtin_neon_v8qi4 @llvm.arm.neon.vld4.v8i8(i8* %A)
          %tmp2 = extractvalue %struct.__builtin_neon_v8qi4 %tmp1, 0
          %tmp3 = extractvalue %struct.__builtin_neon_v8qi4 %tmp1, 2
          %tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -18,7 +18,7 @@ define <8 x i8> @vld4i8(i8* %A) nounwind {
  define <4 x i16> @vld4i16(i16* %A) nounwind {
  ;CHECK: vld4i16:
  ;CHECK: vld4.16
-       %tmp1 = call %struct.__builtin_neon_v4hi4 @llvm.arm.neon.vld4i.v4i16(i16* %A)
+       %tmp1 = call %struct.__builtin_neon_v4hi4 @llvm.arm.neon.vld4.v4i16(i16* %A)
          %tmp2 = extractvalue %struct.__builtin_neon_v4hi4 %tmp1, 0
          %tmp3 = extractvalue %struct.__builtin_neon_v4hi4 %tmp1, 2
          %tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -28,7 +28,7 @@ define <4 x i16> @vld4i16(i16* %A) nounwind {
  define <2 x i32> @vld4i32(i32* %A) nounwind {
  ;CHECK: vld4i32:
  ;CHECK: vld4.32
-       %tmp1 = call %struct.__builtin_neon_v2si4 @llvm.arm.neon.vld4i.v2i32(i32* %A)
+       %tmp1 = call %struct.__builtin_neon_v2si4 @llvm.arm.neon.vld4.v2i32(i32* %A)
          %tmp2 = extractvalue %struct.__builtin_neon_v2si4 %tmp1, 0
          %tmp3 = extractvalue %struct.__builtin_neon_v2si4 %tmp1, 2
          %tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -38,14 +38,14 @@ define <2 x i32> @vld4i32(i32* %A) nounwind {
  define <2 x float> @vld4f(float* %A) nounwind {
  ;CHECK: vld4f:
  ;CHECK: vld4.32
-       %tmp1 = call %struct.__builtin_neon_v2sf4 @llvm.arm.neon.vld4f.v2f32(float* %A)
+       %tmp1 = call %struct.__builtin_neon_v2sf4 @llvm.arm.neon.vld4.v2f32(float* %A)
          %tmp2 = extractvalue %struct.__builtin_neon_v2sf4 %tmp1, 0
          %tmp3 = extractvalue %struct.__builtin_neon_v2sf4 %tmp1, 2
          %tmp4 = add <2 x float> %tmp2, %tmp3
         ret <2 x float> %tmp4
  }
  
-declare %struct.__builtin_neon_v8qi4 @llvm.arm.neon.vld4i.v8i8(i8*) nounwind readonly
-declare %struct.__builtin_neon_v4hi4 @llvm.arm.neon.vld4i.v4i16(i8*) nounwind readonly
-declare %struct.__builtin_neon_v2si4 @llvm.arm.neon.vld4i.v2i32(i8*) nounwind readonly
-declare %struct.__builtin_neon_v2sf4 @llvm.arm.neon.vld4f.v2f32(i8*) nounwind readonly
+declare %struct.__builtin_neon_v8qi4 @llvm.arm.neon.vld4.v8i8(i8*) nounwind readonly
+declare %struct.__builtin_neon_v4hi4 @llvm.arm.neon.vld4.v4i16(i8*) nounwind readonly
+declare %struct.__builtin_neon_v2si4 @llvm.arm.neon.vld4.v2i32(i8*) nounwind readonly
+declare %struct.__builtin_neon_v2sf4 @llvm.arm.neon.vld4.v2f32(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vmax.ll b/test/CodeGen/ARM/vmax.ll

index 60322f85d399fb484d1893b68dd93468bc82fa9a..65f607671c7f1e0c005d113704a2dbe92f23cbeb 100644 (file)
--- a/test/CodeGen/ARM/vmax.ll
+++ b/test/CodeGen/ARM/vmax.ll
@@ -52,7 +52,7 @@ define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
  define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
         %tmp1 = load <2 x float>* %A
         %tmp2 = load <2 x float>* %B
-       %tmp3 = call <2 x float> @llvm.arm.neon.vmaxf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+       %tmp3 = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
         ret <2 x float> %tmp3
  }
  
@@ -101,7 +101,7 @@ define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
  define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
         %tmp1 = load <4 x float>* %A
         %tmp2 = load <4 x float>* %B
-       %tmp3 = call <4 x float> @llvm.arm.neon.vmaxf.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+       %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
         ret <4 x float> %tmp3
  }
  
@@ -113,7 +113,7 @@ declare <8 x i8>  @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnon
  declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
  declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
  
-declare <2 x float> @llvm.arm.neon.vmaxf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
  
  declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
  declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
@@ -123,4 +123,4 @@ declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>) nounwind read
  declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
  declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
  
-declare <4 x float> @llvm.arm.neon.vmaxf.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmin.ll b/test/CodeGen/ARM/vmin.ll

index a6936937c7b6c7e21cfcf1d991759a5fb1f2e20e..08a3f090991657dcbf61e671b65f73e879f813a1 100644 (file)
--- a/test/CodeGen/ARM/vmin.ll
+++ b/test/CodeGen/ARM/vmin.ll
@@ -52,7 +52,7 @@ define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
  define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
         %tmp1 = load <2 x float>* %A
         %tmp2 = load <2 x float>* %B
-       %tmp3 = call <2 x float> @llvm.arm.neon.vminf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+       %tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
         ret <2 x float> %tmp3
  }
  
@@ -101,7 +101,7 @@ define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
  define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
         %tmp1 = load <4 x float>* %A
         %tmp2 = load <4 x float>* %B
-       %tmp3 = call <4 x float> @llvm.arm.neon.vminf.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+       %tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
         ret <4 x float> %tmp3
  }
  
@@ -113,7 +113,7 @@ declare <8 x i8>  @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnon
  declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
  declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
  
-declare <2 x float> @llvm.arm.neon.vminf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
  
  declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
  declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
@@ -123,4 +123,4 @@ declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) nounwind read
  declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
  declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
  
-declare <4 x float> @llvm.arm.neon.vminf.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vpmax.ll b/test/CodeGen/ARM/vpmax.ll

index 9878ca8c7ba650892fa0ab04d98fff008c047f9c..90ae70ff94bb11b9bd5df64295ad173cb28c8016 100644 (file)
--- a/test/CodeGen/ARM/vpmax.ll
+++ b/test/CodeGen/ARM/vpmax.ll
@@ -52,7 +52,7 @@ define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
  define <2 x float> @vpmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
         %tmp1 = load <2 x float>* %A
         %tmp2 = load <2 x float>* %B
-       %tmp3 = call <2 x float> @llvm.arm.neon.vpmaxf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+       %tmp3 = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
         ret <2 x float> %tmp3
  }
  
@@ -64,4 +64,4 @@ declare <8 x i8>  @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readno
  declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
  declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
  
-declare <2 x float> @llvm.arm.neon.vpmaxf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vpmin.ll b/test/CodeGen/ARM/vpmin.ll

index 7b5348baa544304bd50c0a74e945017033ce16c7..0f982f4610adc4ab5d3798f47848b9e15d1416ef 100644 (file)
--- a/test/CodeGen/ARM/vpmin.ll
+++ b/test/CodeGen/ARM/vpmin.ll
@@ -52,7 +52,7 @@ define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
  define <2 x float> @vpminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
         %tmp1 = load <2 x float>* %A
         %tmp2 = load <2 x float>* %B
-       %tmp3 = call <2 x float> @llvm.arm.neon.vpminf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+       %tmp3 = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
         ret <2 x float> %tmp3
  }
  
@@ -64,4 +64,4 @@ declare <8 x i8>  @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>) nounwind readno
  declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
  declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
  
-declare <2 x float> @llvm.arm.neon.vpminf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vrecpe.ll b/test/CodeGen/ARM/vrecpe.ll

index 79cb595bc8401d20ead49ae7571e4a4e83dd609b..622725bce3bce324b3e9efc5aa3fbc23d753746e 100644 (file)
--- a/test/CodeGen/ARM/vrecpe.ll
+++ b/test/CodeGen/ARM/vrecpe.ll
@@ -16,18 +16,18 @@ define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind {
  
  define <2 x float> @vrecpef32(<2 x float>* %A) nounwind {
         %tmp1 = load <2 x float>* %A
-       %tmp2 = call <2 x float> @llvm.arm.neon.vrecpef.v2f32(<2 x float> %tmp1)
+       %tmp2 = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %tmp1)
         ret <2 x float> %tmp2
  }
  
  define <4 x float> @vrecpeQf32(<4 x float>* %A) nounwind {
         %tmp1 = load <4 x float>* %A
-       %tmp2 = call <4 x float> @llvm.arm.neon.vrecpef.v4f32(<4 x float> %tmp1)
+       %tmp2 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp1)
         ret <4 x float> %tmp2
  }
  
  declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) nounwind readnone
  declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) nounwind readnone
  
-declare <2 x float> @llvm.arm.neon.vrecpef.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vrecpef.v4f32(<4 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vrsqrte.ll b/test/CodeGen/ARM/vrsqrte.ll

index 10529f61b56f2912d742300c12bc75c182aa8e49..4f119775248727651e849bf485ce973046694c31 100644 (file)
--- a/test/CodeGen/ARM/vrsqrte.ll
+++ b/test/CodeGen/ARM/vrsqrte.ll
@@ -16,18 +16,18 @@ define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind {
  
  define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind {
         %tmp1 = load <2 x float>* %A
-       %tmp2 = call <2 x float> @llvm.arm.neon.vrsqrtef.v2f32(<2 x float> %tmp1)
+       %tmp2 = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %tmp1)
         ret <2 x float> %tmp2
  }
  
  define <4 x float> @vrsqrteQf32(<4 x float>* %A) nounwind {
         %tmp1 = load <4 x float>* %A
-       %tmp2 = call <4 x float> @llvm.arm.neon.vrsqrtef.v4f32(<4 x float> %tmp1)
+       %tmp2 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %tmp1)
         ret <4 x float> %tmp2
  }
  
  declare <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32>) nounwind readnone
  declare <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32>) nounwind readnone
  
-declare <2 x float> @llvm.arm.neon.vrsqrtef.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vrsqrtef.v4f32(<4 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll

index d84f75882f3ecf962f4fd799296832fb2189af98..8fbae12a032688d8d191c5130ea00ed7591e7f52 100644 (file)
--- a/test/CodeGen/ARM/vst1.ll
+++ b/test/CodeGen/ARM/vst1.ll
@@ -4,7 +4,7 @@ define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
  ;CHECK: vst1i8:
  ;CHECK: vst1.8
         %tmp1 = load <8 x i8>* %B
-       call void @llvm.arm.neon.vst1i.v8i8(i8* %A, <8 x i8> %tmp1)
+       call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1)
         ret void
  }
  
@@ -12,7 +12,7 @@ define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
  ;CHECK: vst1i16:
  ;CHECK: vst1.16
         %tmp1 = load <4 x i16>* %B
-       call void @llvm.arm.neon.vst1i.v4i16(i16* %A, <4 x i16> %tmp1)
+       call void @llvm.arm.neon.vst1.v4i16(i16* %A, <4 x i16> %tmp1)
         ret void
  }
  
@@ -20,7 +20,7 @@ define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
  ;CHECK: vst1i32:
  ;CHECK: vst1.32
         %tmp1 = load <2 x i32>* %B
-       call void @llvm.arm.neon.vst1i.v2i32(i32* %A, <2 x i32> %tmp1)
+       call void @llvm.arm.neon.vst1.v2i32(i32* %A, <2 x i32> %tmp1)
         ret void
  }
  
@@ -28,7 +28,7 @@ define void @vst1f(float* %A, <2 x float>* %B) nounwind {
  ;CHECK: vst1f:
  ;CHECK: vst1.32
         %tmp1 = load <2 x float>* %B
-       call void @llvm.arm.neon.vst1f.v2f32(float* %A, <2 x float> %tmp1)
+       call void @llvm.arm.neon.vst1.v2f32(float* %A, <2 x float> %tmp1)
         ret void
  }
  
@@ -36,7 +36,7 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
  ;CHECK: vst1i64:
  ;CHECK: vst1.64
         %tmp1 = load <1 x i64>* %B
-       call void @llvm.arm.neon.vst1i.v1i64(i64* %A, <1 x i64> %tmp1)
+       call void @llvm.arm.neon.vst1.v1i64(i64* %A, <1 x i64> %tmp1)
         ret void
  }
  
@@ -44,7 +44,7 @@ define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
  ;CHECK: vst1Qi8:
  ;CHECK: vst1.8
         %tmp1 = load <16 x i8>* %B
-       call void @llvm.arm.neon.vst1i.v16i8(i8* %A, <16 x i8> %tmp1)
+       call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1)
         ret void
  }
  
@@ -52,7 +52,7 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
  ;CHECK: vst1Qi16:
  ;CHECK: vst1.16
         %tmp1 = load <8 x i16>* %B
-       call void @llvm.arm.neon.vst1i.v8i16(i16* %A, <8 x i16> %tmp1)
+       call void @llvm.arm.neon.vst1.v8i16(i16* %A, <8 x i16> %tmp1)
         ret void
  }
  
@@ -60,7 +60,7 @@ define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
  ;CHECK: vst1Qi32:
  ;CHECK: vst1.32
         %tmp1 = load <4 x i32>* %B
-       call void @llvm.arm.neon.vst1i.v4i32(i32* %A, <4 x i32> %tmp1)
+       call void @llvm.arm.neon.vst1.v4i32(i32* %A, <4 x i32> %tmp1)
         ret void
  }
  
@@ -68,7 +68,7 @@ define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
  ;CHECK: vst1Qf:
  ;CHECK: vst1.32
         %tmp1 = load <4 x float>* %B
-       call void @llvm.arm.neon.vst1f.v4f32(float* %A, <4 x float> %tmp1)
+       call void @llvm.arm.neon.vst1.v4f32(float* %A, <4 x float> %tmp1)
         ret void
  }
  
@@ -76,18 +76,18 @@ define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
  ;CHECK: vst1Qi64:
  ;CHECK: vst1.64
         %tmp1 = load <2 x i64>* %B
-       call void @llvm.arm.neon.vst1i.v2i64(i64* %A, <2 x i64> %tmp1)
+       call void @llvm.arm.neon.vst1.v2i64(i64* %A, <2 x i64> %tmp1)
         ret void
  }
  
-declare void @llvm.arm.neon.vst1i.v8i8(i8*, <8 x i8>) nounwind
-declare void @llvm.arm.neon.vst1i.v4i16(i8*, <4 x i16>) nounwind
-declare void @llvm.arm.neon.vst1i.v2i32(i8*, <2 x i32>) nounwind
-declare void @llvm.arm.neon.vst1f.v2f32(i8*, <2 x float>) nounwind
-declare void @llvm.arm.neon.vst1i.v1i64(i8*, <1 x i64>) nounwind
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>) nounwind
  
-declare void @llvm.arm.neon.vst1i.v16i8(i8*, <16 x i8>) nounwind
-declare void @llvm.arm.neon.vst1i.v8i16(i8*, <8 x i16>) nounwind
-declare void @llvm.arm.neon.vst1i.v4i32(i8*, <4 x i32>) nounwind
-declare void @llvm.arm.neon.vst1f.v4f32(i8*, <4 x float>) nounwind
-declare void @llvm.arm.neon.vst1i.v2i64(i8*, <2 x i64>) nounwind
+declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>) nounwind
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind
+declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>) nounwind
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll

index f8f34f4aae30493154a3f251e82ea215275caff0..3e2d028e7264dc5ba565db30692f0c59e9f21469 100644 (file)
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -4,7 +4,7 @@ define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
  ;CHECK: vst2i8:
  ;CHECK: vst2.8
         %tmp1 = load <8 x i8>* %B
-       call void @llvm.arm.neon.vst2i.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1)
+       call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1)
         ret void
  }
  
@@ -12,7 +12,7 @@ define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
  ;CHECK: vst2i16:
  ;CHECK: vst2.16
         %tmp1 = load <4 x i16>* %B
-       call void @llvm.arm.neon.vst2i.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1)
+       call void @llvm.arm.neon.vst2.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1)
         ret void
  }
  
@@ -20,7 +20,7 @@ define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind {
  ;CHECK: vst2i32:
  ;CHECK: vst2.32
         %tmp1 = load <2 x i32>* %B
-       call void @llvm.arm.neon.vst2i.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1)
+       call void @llvm.arm.neon.vst2.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1)
         ret void
  }
  
@@ -28,11 +28,11 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind {
  ;CHECK: vst2f:
  ;CHECK: vst2.32
         %tmp1 = load <2 x float>* %B
-       call void @llvm.arm.neon.vst2f.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1)
+       call void @llvm.arm.neon.vst2.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1)
         ret void
  }
  
-declare void @llvm.arm.neon.vst2i.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind
-declare void @llvm.arm.neon.vst2i.v4i16(i8*, <4 x i16>, <4 x i16>) nounwind
-declare void @llvm.arm.neon.vst2i.v2i32(i8*, <2 x i32>, <2 x i32>) nounwind
-declare void @llvm.arm.neon.vst2f.v2f32(i8*, <2 x float>, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>) nounwind
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll

index c1a6ce86b4c1d5f5d48006f4c89ee10dc7006e06..0a47efae202b43fd8b2ddab5939b61215c89da8f 100644 (file)
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -4,7 +4,7 @@ define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
  ;CHECK: vst3i8:
  ;CHECK: vst3.8
         %tmp1 = load <8 x i8>* %B
-       call void @llvm.arm.neon.vst3i.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
+       call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
         ret void
  }
  
@@ -12,7 +12,7 @@ define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
  ;CHECK: vst3i16:
  ;CHECK: vst3.16
         %tmp1 = load <4 x i16>* %B
-       call void @llvm.arm.neon.vst3i.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
+       call void @llvm.arm.neon.vst3.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
         ret void
  }
  
@@ -20,7 +20,7 @@ define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
  ;CHECK: vst3i32:
  ;CHECK: vst3.32
         %tmp1 = load <2 x i32>* %B
-       call void @llvm.arm.neon.vst3i.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
+       call void @llvm.arm.neon.vst3.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
         ret void
  }
  
@@ -28,11 +28,11 @@ define void @vst3f(float* %A, <2 x float>* %B) nounwind {
  ;CHECK: vst3f:
  ;CHECK: vst3.32
         %tmp1 = load <2 x float>* %B
-       call void @llvm.arm.neon.vst3f.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
+       call void @llvm.arm.neon.vst3.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
         ret void
  }
  
-declare void @llvm.arm.neon.vst3i.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
-declare void @llvm.arm.neon.vst3i.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
-declare void @llvm.arm.neon.vst3i.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
-declare void @llvm.arm.neon.vst3f.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>) nounwind
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll

index 1d6f109a72810d4089277164b4389f1dd5c418b7..fa745ebc133f47c0b09cd15a809a91948287a343 100644 (file)
--- a/test/CodeGen/ARM/vst4.ll
+++ b/test/CodeGen/ARM/vst4.ll
@@ -4,7 +4,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
  ;CHECK: vst4i8:
  ;CHECK: vst4.8
         %tmp1 = load <8 x i8>* %B
-       call void @llvm.arm.neon.vst4i.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
+       call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
         ret void
  }
  
@@ -12,7 +12,7 @@ define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
  ;CHECK: vst4i16:
  ;CHECK: vst4.16
         %tmp1 = load <4 x i16>* %B
-       call void @llvm.arm.neon.vst4i.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
+       call void @llvm.arm.neon.vst4.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
         ret void
  }
  
@@ -20,7 +20,7 @@ define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
  ;CHECK: vst4i32:
  ;CHECK: vst4.32
         %tmp1 = load <2 x i32>* %B
-       call void @llvm.arm.neon.vst4i.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
+       call void @llvm.arm.neon.vst4.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
         ret void
  }
  
@@ -28,11 +28,11 @@ define void @vst4f(float* %A, <2 x float>* %B) nounwind {
  ;CHECK: vst4f:
  ;CHECK: vst4.32
         %tmp1 = load <2 x float>* %B
-       call void @llvm.arm.neon.vst4f.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
+       call void @llvm.arm.neon.vst4.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
         ret void
  }
  
-declare void @llvm.arm.neon.vst4i.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
-declare void @llvm.arm.neon.vst4i.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
-declare void @llvm.arm.neon.vst4i.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
-declare void @llvm.arm.neon.vst4f.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>) nounwind
diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll

index 205052cdd1652cdf9d1c999c9edd1a9ff313c727..36a05617055a65b6074c7117b70b4c99fcf483f4 100644 (file)
--- a/test/CodeGen/ARM/vtrn.ll
+++ b/test/CodeGen/ARM/vtrn.ll
@@ -15,7 +15,7 @@ define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
  ;CHECK: vtrn.8
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-       %tmp3 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vtrni.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vtrn.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v8qi2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v8qi2 %tmp3, 1
          %tmp6 = add <8 x i8> %tmp4, %tmp5
@@ -27,7 +27,7 @@ define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
  ;CHECK: vtrn.16
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-       %tmp3 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vtrni.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vtrn.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v4hi2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v4hi2 %tmp3, 1
          %tmp6 = add <4 x i16> %tmp4, %tmp5
@@ -39,7 +39,7 @@ define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
  ;CHECK: vtrn.32
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-       %tmp3 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vtrni.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vtrn.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v2si2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v2si2 %tmp3, 1
          %tmp6 = add <2 x i32> %tmp4, %tmp5
@@ -51,7 +51,7 @@ define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
  ;CHECK: vtrn.32
         %tmp1 = load <2 x float>* %A
         %tmp2 = load <2 x float>* %B
-       %tmp3 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vtrnf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vtrn.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v2sf2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v2sf2 %tmp3, 1
          %tmp6 = add <2 x float> %tmp4, %tmp5
@@ -63,7 +63,7 @@ define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
  ;CHECK: vtrn.8
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-       %tmp3 = call %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vtrni.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vtrn.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v16qi2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v16qi2 %tmp3, 1
          %tmp6 = add <16 x i8> %tmp4, %tmp5
@@ -75,7 +75,7 @@ define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
  ;CHECK: vtrn.16
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-       %tmp3 = call %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vtrni.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vtrn.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v8hi2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v8hi2 %tmp3, 1
          %tmp6 = add <8 x i16> %tmp4, %tmp5
@@ -87,7 +87,7 @@ define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
  ;CHECK: vtrn.32
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-       %tmp3 = call %struct.__builtin_neon_v4si2 @llvm.arm.neon.vtrni.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v4si2 @llvm.arm.neon.vtrn.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v4si2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v4si2 %tmp3, 1
          %tmp6 = add <4 x i32> %tmp4, %tmp5
@@ -99,19 +99,19 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
  ;CHECK: vtrn.32
         %tmp1 = load <4 x float>* %A
         %tmp2 = load <4 x float>* %B
-       %tmp3 = call %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vtrnf.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vtrn.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v4sf2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v4sf2 %tmp3, 1
          %tmp6 = add <4 x float> %tmp4, %tmp5
         ret <4 x float> %tmp6
  }
  
-declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vtrni.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vtrni.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vtrni.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vtrnf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vtrn.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vtrn.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vtrn.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vtrn.v2f32(<2 x float>, <2 x float>) nounwind readnone
  
-declare %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vtrni.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vtrni.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare %struct.__builtin_neon_v4si2 @llvm.arm.neon.vtrni.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vtrnf.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vtrn.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vtrn.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare %struct.__builtin_neon_v4si2 @llvm.arm.neon.vtrn.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vtrn.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll

index 508ae1470665a696885497390f07188c2c05064d..883e0722abc4f48c94fc4a8cb8ab094edbceb453 100644 (file)
--- a/test/CodeGen/ARM/vuzp.ll
+++ b/test/CodeGen/ARM/vuzp.ll
@@ -15,7 +15,7 @@ define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
  ;CHECK: vuzp.8
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-       %tmp3 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vuzpi.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vuzp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v8qi2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v8qi2 %tmp3, 1
          %tmp6 = add <8 x i8> %tmp4, %tmp5
@@ -27,7 +27,7 @@ define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
  ;CHECK: vuzp.16
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-       %tmp3 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vuzpi.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vuzp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v4hi2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v4hi2 %tmp3, 1
          %tmp6 = add <4 x i16> %tmp4, %tmp5
@@ -39,7 +39,7 @@ define <2 x i32> @vuzpi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
  ;CHECK: vuzp.32
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-       %tmp3 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vuzpi.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vuzp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v2si2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v2si2 %tmp3, 1
          %tmp6 = add <2 x i32> %tmp4, %tmp5
@@ -51,7 +51,7 @@ define <2 x float> @vuzpf(<2 x float>* %A, <2 x float>* %B) nounwind {
  ;CHECK: vuzp.32
         %tmp1 = load <2 x float>* %A
         %tmp2 = load <2 x float>* %B
-       %tmp3 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vuzpf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vuzp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v2sf2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v2sf2 %tmp3, 1
          %tmp6 = add <2 x float> %tmp4, %tmp5
@@ -63,7 +63,7 @@ define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
  ;CHECK: vuzp.8
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-       %tmp3 = call %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vuzpi.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vuzp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v16qi2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v16qi2 %tmp3, 1
          %tmp6 = add <16 x i8> %tmp4, %tmp5
@@ -75,7 +75,7 @@ define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
  ;CHECK: vuzp.16
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-       %tmp3 = call %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vuzpi.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vuzp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v8hi2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v8hi2 %tmp3, 1
          %tmp6 = add <8 x i16> %tmp4, %tmp5
@@ -87,7 +87,7 @@ define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
  ;CHECK: vuzp.32
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-       %tmp3 = call %struct.__builtin_neon_v4si2 @llvm.arm.neon.vuzpi.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v4si2 @llvm.arm.neon.vuzp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v4si2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v4si2 %tmp3, 1
          %tmp6 = add <4 x i32> %tmp4, %tmp5
@@ -99,19 +99,19 @@ define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
  ;CHECK: vuzp.32
         %tmp1 = load <4 x float>* %A
         %tmp2 = load <4 x float>* %B
-       %tmp3 = call %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vuzpf.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vuzp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v4sf2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v4sf2 %tmp3, 1
          %tmp6 = add <4 x float> %tmp4, %tmp5
         ret <4 x float> %tmp6
  }
  
-declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vuzpi.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vuzpi.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vuzpi.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vuzpf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vuzp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vuzp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vuzp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vuzp.v2f32(<2 x float>, <2 x float>) nounwind readnone
  
-declare %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vuzpi.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vuzpi.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare %struct.__builtin_neon_v4si2 @llvm.arm.neon.vuzpi.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vuzpf.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vuzp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vuzp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare %struct.__builtin_neon_v4si2 @llvm.arm.neon.vuzp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vuzp.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll

index ede5ab6209f25c06930ab9d41ef9a3280d7c8454..0485b30954b80625b6877077c92892f14d6cc556 100644 (file)
--- a/test/CodeGen/ARM/vzip.ll
+++ b/test/CodeGen/ARM/vzip.ll
@@ -15,7 +15,7 @@ define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
  ;CHECK: vzip.8
         %tmp1 = load <8 x i8>* %A
         %tmp2 = load <8 x i8>* %B
-       %tmp3 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vzipi.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vzip.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v8qi2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v8qi2 %tmp3, 1
          %tmp6 = add <8 x i8> %tmp4, %tmp5
@@ -27,7 +27,7 @@ define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
  ;CHECK: vzip.16
         %tmp1 = load <4 x i16>* %A
         %tmp2 = load <4 x i16>* %B
-       %tmp3 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vzipi.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vzip.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v4hi2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v4hi2 %tmp3, 1
          %tmp6 = add <4 x i16> %tmp4, %tmp5
@@ -39,7 +39,7 @@ define <2 x i32> @vzipi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
  ;CHECK: vzip.32
         %tmp1 = load <2 x i32>* %A
         %tmp2 = load <2 x i32>* %B
-       %tmp3 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vzipi.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vzip.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v2si2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v2si2 %tmp3, 1
          %tmp6 = add <2 x i32> %tmp4, %tmp5
@@ -51,7 +51,7 @@ define <2 x float> @vzipf(<2 x float>* %A, <2 x float>* %B) nounwind {
  ;CHECK: vzip.32
         %tmp1 = load <2 x float>* %A
         %tmp2 = load <2 x float>* %B
-       %tmp3 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vzipf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vzip.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v2sf2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v2sf2 %tmp3, 1
          %tmp6 = add <2 x float> %tmp4, %tmp5
@@ -63,7 +63,7 @@ define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
  ;CHECK: vzip.8
         %tmp1 = load <16 x i8>* %A
         %tmp2 = load <16 x i8>* %B
-       %tmp3 = call %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vzipi.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vzip.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v16qi2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v16qi2 %tmp3, 1
          %tmp6 = add <16 x i8> %tmp4, %tmp5
@@ -75,7 +75,7 @@ define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
  ;CHECK: vzip.16
         %tmp1 = load <8 x i16>* %A
         %tmp2 = load <8 x i16>* %B
-       %tmp3 = call %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vzipi.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vzip.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v8hi2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v8hi2 %tmp3, 1
          %tmp6 = add <8 x i16> %tmp4, %tmp5
@@ -87,7 +87,7 @@ define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
  ;CHECK: vzip.32
         %tmp1 = load <4 x i32>* %A
         %tmp2 = load <4 x i32>* %B
-       %tmp3 = call %struct.__builtin_neon_v4si2 @llvm.arm.neon.vzipi.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v4si2 @llvm.arm.neon.vzip.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v4si2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v4si2 %tmp3, 1
          %tmp6 = add <4 x i32> %tmp4, %tmp5
@@ -99,19 +99,19 @@ define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
  ;CHECK: vzip.32
         %tmp1 = load <4 x float>* %A
         %tmp2 = load <4 x float>* %B
-       %tmp3 = call %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vzipf.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+       %tmp3 = call %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vzip.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
          %tmp4 = extractvalue %struct.__builtin_neon_v4sf2 %tmp3, 0
          %tmp5 = extractvalue %struct.__builtin_neon_v4sf2 %tmp3, 1
          %tmp6 = add <4 x float> %tmp4, %tmp5
         ret <4 x float> %tmp6
  }
  
-declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vzipi.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vzipi.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vzipi.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vzipf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vzip.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vzip.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vzip.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vzip.v2f32(<2 x float>, <2 x float>) nounwind readnone
  
-declare %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vzipi.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vzipi.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare %struct.__builtin_neon_v4si2 @llvm.arm.neon.vzipi.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vzipf.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vzip.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vzip.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare %struct.__builtin_neon_v4si2 @llvm.arm.neon.vzip.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vzip.v4f32(<4 x float>, <4 x float>) nounwind readnone
author	Bob Wilson <bob.wilson@apple.com>
	Tue, 11 Aug 2009 05:39:44 +0000 (05:39 +0000)
committer	Bob Wilson <bob.wilson@apple.com>
	Tue, 11 Aug 2009 05:39:44 +0000 (05:39 +0000)
include/llvm/IntrinsicsARM.td		patch \| blob \| history
lib/Target/ARM/ARMISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
lib/Target/ARM/ARMInstrNEON.td		patch \| blob \| history
test/CodeGen/ARM/vabd.ll		patch \| blob \| history
test/CodeGen/ARM/vabs.ll		patch \| blob \| history
test/CodeGen/ARM/vld1.ll		patch \| blob \| history
test/CodeGen/ARM/vld2.ll		patch \| blob \| history
test/CodeGen/ARM/vld3.ll		patch \| blob \| history
test/CodeGen/ARM/vld4.ll		patch \| blob \| history
test/CodeGen/ARM/vmax.ll		patch \| blob \| history
test/CodeGen/ARM/vmin.ll		patch \| blob \| history
test/CodeGen/ARM/vpmax.ll		patch \| blob \| history
test/CodeGen/ARM/vpmin.ll		patch \| blob \| history
test/CodeGen/ARM/vrecpe.ll		patch \| blob \| history
test/CodeGen/ARM/vrsqrte.ll		patch \| blob \| history
test/CodeGen/ARM/vst1.ll		patch \| blob \| history
test/CodeGen/ARM/vst2.ll		patch \| blob \| history
test/CodeGen/ARM/vst3.ll		patch \| blob \| history
test/CodeGen/ARM/vst4.ll		patch \| blob \| history
test/CodeGen/ARM/vtrn.ll		patch \| blob \| history
test/CodeGen/ARM/vuzp.ll		patch \| blob \| history
test/CodeGen/ARM/vzip.ll		patch \| blob \| history