updated patch for the ARM fused multiply add/sub

author Sebastian Pop <spop@codeaurora.org>

Mon, 5 Mar 2012 17:39:52 +0000 (17:39 +0000)

committer Sebastian Pop <spop@codeaurora.org>

Mon, 5 Mar 2012 17:39:52 +0000 (17:39 +0000)
author Sebastian Pop <spop@codeaurora.org>
Mon, 5 Mar 2012 17:39:52 +0000 (17:39 +0000)
committer Sebastian Pop <spop@codeaurora.org>
Mon, 5 Mar 2012 17:39:52 +0000 (17:39 +0000)
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td

index 16af8cfbe2cb67c05a76ed8075a3b1b0d209c11a..b05fe629b746004abf49d185391c84621dca1f88 100644 (file)
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -38,9 +38,9 @@ def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
  def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
                                     "Enable NEON instructions",
                                     [FeatureVFP3]>;
-def FeatureNEONVFP4 : SubtargetFeature<"neon-vfpv4", "HasNEONVFPv4", "true",
-                                       "Enable NEON-VFP4 instructions",
-                                       [FeatureVFP4, FeatureNEON]>;
+def FeatureNEON2 : SubtargetFeature<"neon2", "HasNEON2", "true",
+                                   "Enable Advanced SIMD2 instructions",
+                                   [FeatureNEON]>;
  def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
                                       "Enable Thumb2 instructions">;
  def FeatureNoARM  : SubtargetFeature<"noarm", "NoARM", "true",
@@ -76,6 +76,8 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
  def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
                                          "true",
                                          "Use NEON for single precision FP">;
+// Allow more precision in FP computation
+def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">;
  
  // Disable 32-bit to 16-bit narrowing for experimentation.
  def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp

index 9d8c97a8323b71c9c5c8f368cd10f21f152671d4..4ec19ccb42ec191f2583a0a69f109a5cfe5a6fef 100644 (file)
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -732,10 +732,10 @@ void ARMAsmPrinter::emitAttributes() {
    if (Subtarget->hasNEON() && emitFPU) {
      /* NEON is not exactly a VFP architecture, but GAS emit one of
       * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
-    if (Subtarget->hasNEONVFP4())
+    if (Subtarget->hasNEON2())
        AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon-vfpv4");
      else
-     AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
+      AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
      /* If emitted for NEON, omit from VFP below, since you can have both
       * NEON and VFP in build attributes but only one .fpu */
      emitFPU = false;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td

index 6f510ba665d96315453b122ed3a42132ec94b34f..0b1406e657aa35f1a9fb8798aec3056dff314b1c 100644 (file)
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -184,9 +184,9 @@ def HasVFP4          : Predicate<"Subtarget->hasVFP4()">,
  def NoVFP4            : Predicate<"!Subtarget->hasVFP4()">;
  def HasNEON          : Predicate<"Subtarget->hasNEON()">,
                                   AssemblerPredicate<"FeatureNEON">;
-def HasNEONVFP4      : Predicate<"Subtarget->hasNEONVFP4()">,
-                                 AssemblerPredicate<"FeatureNEONVFP4">;
-def NoNEONVFP4       : Predicate<"!Subtarget->hasNEONVFP4()">;
+def HasNEON2         : Predicate<"Subtarget->hasNEON2()">,
+                                 AssemblerPredicate<"FeatureNEON2">;
+def NoNEON2          : Predicate<"!Subtarget->hasNEON2()">;
  def HasFP16          : Predicate<"Subtarget->hasFP16()">,
                                   AssemblerPredicate<"FeatureFP16">;
  def HasDivide        : Predicate<"Subtarget->hasDivide()">,
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td

index 387e16d1462db1cf5c3954d663452e0a5d251023..17fe80851c04aa16776110b6394c22566f010ea4 100644 (file)
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -4060,10 +4060,10 @@ defm VMLA     : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
                               IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
  def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
                            v2f32, fmul_su, fadd_mlx>,
-                Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
+                Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
  def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
                            v4f32, fmul_su, fadd_mlx>,
-                Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
+                Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
  defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
                                IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
  def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
@@ -4118,10 +4118,10 @@ defm VMLS     : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
                               IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
  def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
                            v2f32, fmul_su, fsub_mlx>,
-                Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
+                Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
  def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
                            v4f32, fmul_su, fsub_mlx>,
-                Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
+                Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
  defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
                                IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
  def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
@@ -4174,19 +4174,19 @@ defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
  // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
  def  VFMAfd   : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
                            v2f32, fmul_su, fadd_mlx>,
-                Requires<[HasNEONVFP4]>;
+                Requires<[HasNEON2,FPContractions]>;
  
  def  VFMAfq   : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
                            v4f32, fmul_su, fadd_mlx>,
-                Requires<[HasNEONVFP4]>;
+                Requires<[HasNEON2,FPContractions]>;
  
  //   Fused Vector Multiply Subtract (floating-point)
  def  VFMSfd   : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
                            v2f32, fmul_su, fsub_mlx>,
-                Requires<[HasNEONVFP4]>;
+                Requires<[HasNEON2,FPContractions]>;
  def  VFMSfq   : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
                            v4f32, fmul_su, fsub_mlx>,
-                Requires<[HasNEONVFP4]>;
+                Requires<[HasNEON2,FPContractions]>;
  
  // Vector Subtract Operations.
  
@@ -5541,13 +5541,13 @@ def : N3VSPat<fadd, VADDfd>;
  def : N3VSPat<fsub, VSUBfd>;
  def : N3VSPat<fmul, VMULfd>;
  def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
-      Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>;
+      Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>;
  def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
-      Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>;
+      Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>;
  def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
-      Requires<[HasNEONVFP4, UseNEONForFP]>;
+      Requires<[HasNEON2, UseNEONForFP,FPContractions]>;
  def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
-      Requires<[HasNEONVFP4, UseNEONForFP]>;
+      Requires<[HasNEON2, UseNEONForFP,FPContractions]>;
  def : N2VSPat<fabs, VABSfd>;
  def : N2VSPat<fneg, VNEGfd>;
  def : N3VSPat<NEONfmax, VMAXfd>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td

index bf32b49640f936a901c8d2141b18aa32dee7feec..aa10af756d723d08bfd7e146f031f32367c71dd3 100644 (file)
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -1030,7 +1030,7 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0,
                   [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
                                            (f64 DPR:$Ddin)))]>,
                RegConstraint<"$Ddin = $Dd">,
-              Requires<[HasVFP4]>;
+              Requires<[HasVFP4,FPContractions]>;
  
  def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
                    (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1038,17 +1038,17 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
                    [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
                                             SPR:$Sdin))]>,
                RegConstraint<"$Sdin = $Sd">,
-              Requires<[HasVFP4,DontUseNEONForFP]> {
+              Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> {
    // Some single precision VFP instructions may be executed on both NEON and
    // VFP pipelines.
  }
  
  def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
            (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
-          Requires<[HasVFP4]>;
+          Requires<[HasVFP4,FPContractions]>;
  def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
            (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
-          Requires<[HasVFP4,DontUseNEONForFP]>;
+          Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>;
  
  def VFMSD : ADbI<0b11101, 0b10, 1, 0,
                   (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -1056,7 +1056,7 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0,
                   [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
                                            (f64 DPR:$Ddin)))]>,
                RegConstraint<"$Ddin = $Dd">,
-              Requires<[HasVFP4]>;
+              Requires<[HasVFP4,FPContractions]>;
  
  def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
                    (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1064,17 +1064,17 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
                    [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
                                             SPR:$Sdin))]>,
                RegConstraint<"$Sdin = $Sd">,
-              Requires<[HasVFP4,DontUseNEONForFP]> {
+              Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> {
    // Some single precision VFP instructions may be executed on both NEON and
    // VFP pipelines.
  }
  
  def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
            (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
-          Requires<[HasVFP4]>;
+          Requires<[HasVFP4,FPContractions]>;
  def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
            (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
-          Requires<[HasVFP4,DontUseNEONForFP]>;
+          Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>;
  
  def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
                    (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -1082,7 +1082,7 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
                    [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
                                            (f64 DPR:$Ddin)))]>,
                  RegConstraint<"$Ddin = $Dd">,
-                Requires<[HasVFP4]>;
+                Requires<[HasVFP4,FPContractions]>;
  
  def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
                    (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1090,17 +1090,17 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
                    [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
                                             SPR:$Sdin))]>,
                  RegConstraint<"$Sdin = $Sd">,
-                Requires<[HasVFP4,DontUseNEONForFP]> {
+                Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> {
    // Some single precision VFP instructions may be executed on both NEON and
    // VFP pipelines.
  }
  
  def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
            (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
-          Requires<[HasVFP4]>;
+          Requires<[HasVFP4,FPContractions]>;
  def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
            (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
-          Requires<[HasVFP4,DontUseNEONForFP]>;
+          Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>;
  
  def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
                    (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -1108,24 +1108,24 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
                    [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
                                             (f64 DPR:$Ddin)))]>,
                 RegConstraint<"$Ddin = $Dd">,
-               Requires<[HasVFP4]>;
+               Requires<[HasVFP4,FPContractions]>;
  
  def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
                    (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
                    IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
               [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
                           RegConstraint<"$Sdin = $Sd">,
-                  Requires<[HasVFP4,DontUseNEONForFP]> {
+                  Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> {
    // Some single precision VFP instructions may be executed on both NEON and
    // VFP pipelines.
  }
  
  def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
            (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
-          Requires<[HasVFP4]>;
+          Requires<[HasVFP4,FPContractions]>;
  def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
            (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
-          Requires<[HasVFP4,DontUseNEONForFP]>;
+          Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>;
  
  //===----------------------------------------------------------------------===//
  // FP Conditional moves.
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp

index 64b2b620eb4eba20f07476caf6dc06017793eeb2..1bd6f1cbdf53ab3308f6172e49701bdd507ac6a9 100644 (file)
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -49,7 +49,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
    , HasVFPv3(false)
    , HasVFPv4(false)
    , HasNEON(false)
-  , HasNEONVFPv4(false)
+  , HasNEON2(false)
    , UseNEONForSinglePrecisionFP(false)
    , SlowFPVMLx(false)
    , HasVMLxForwarding(false)
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h

index c94795f753f0dc88570bfc6d233f867cf8bf0294..3d9c03d5dd2275c8feeda75047c9d2086cfd6a13 100644 (file)
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -51,7 +51,7 @@ protected:
    bool HasVFPv3;
    bool HasVFPv4;
    bool HasNEON;
-  bool HasNEONVFPv4;
+  bool HasNEON2;
  
    /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
    /// specified. Use the method useNEONForSinglePrecisionFP() to
@@ -205,7 +205,7 @@ protected:
    bool hasVFP3() const { return HasVFPv3; }
    bool hasVFP4() const { return HasVFPv4; }
    bool hasNEON() const { return HasNEON;  }
-  bool hasNEONVFP4() const { return HasNEONVFPv4;  }
+  bool hasNEON2() const { return HasNEON2 || (HasNEON && HasVFPv4);  }
    bool useNEONForSinglePrecisionFP() const {
      return hasNEON() && UseNEONForSinglePrecisionFP; }
  
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll

index d7b8ed165b4c5e57b43425ba543a6bea1a5407a7..40e8bb2f00fe20a17e8283372cbe5dd7893738a8 100644 (file)
--- a/test/CodeGen/ARM/fusedMAC.ll
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=arm -mattr=+neon-vfpv4 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon,+vfp4 | FileCheck %s
  ; Check generated fused MAC and MLS.
  
-define double @fusedMACTest1(double %d1, double %d2, double %d3) nounwind readnone noinline {
+define double @fusedMACTest1(double %d1, double %d2, double %d3) {
  ;CHECK: fusedMACTest1:
  ;CHECK: vfma.f64
    %1 = fmul double %d1, %d2
@@ -9,7 +9,7 @@ define double @fusedMACTest1(double %d1, double %d2, double %d3) nounwind readno
    ret double %2
  }
  
-define float @fusedMACTest2(float %f1, float %f2, float %f3) nounwind readnone noinline {
+define float @fusedMACTest2(float %f1, float %f2, float %f3) {
  ;CHECK: fusedMACTest2:
  ;CHECK: vfma.f32
    %1 = fmul float %f1, %f2
@@ -17,7 +17,7 @@ define float @fusedMACTest2(float %f1, float %f2, float %f3) nounwind readnone n
    ret float %2
  }
  
-define double @fusedMACTest3(double %d1, double %d2, double %d3) nounwind readnone noinline {
+define double @fusedMACTest3(double %d1, double %d2, double %d3) {
  ;CHECK: fusedMACTest3:
  ;CHECK: vfms.f64
    %1 = fmul double %d2, %d3
@@ -25,7 +25,7 @@ define double @fusedMACTest3(double %d1, double %d2, double %d3) nounwind readno
    ret double %2
  }
  
-define float @fusedMACTest4(float %f1, float %f2, float %f3) nounwind readnone noinline {
+define float @fusedMACTest4(float %f1, float %f2, float %f3) {
  ;CHECK: fusedMACTest4:
  ;CHECK: vfms.f32
    %1 = fmul float %f2, %f3
@@ -33,7 +33,7 @@ define float @fusedMACTest4(float %f1, float %f2, float %f3) nounwind readnone n
    ret float %2
  }
  
-define double @fusedMACTest5(double %d1, double %d2, double %d3) nounwind readnone noinline {
+define double @fusedMACTest5(double %d1, double %d2, double %d3) {
  ;CHECK: fusedMACTest5:
  ;CHECK: vfnma.f64
    %1 = fmul double %d1, %d2
@@ -42,7 +42,7 @@ define double @fusedMACTest5(double %d1, double %d2, double %d3) nounwind readno
    ret double %3
  }
  
-define float @fusedMACTest6(float %f1, float %f2, float %f3) nounwind {
+define float @fusedMACTest6(float %f1, float %f2, float %f3) {
  ;CHECK: fusedMACTest6:
  ;CHECK: vfnma.f32
    %1 = fmul float %f1, %f2
@@ -51,7 +51,7 @@ define float @fusedMACTest6(float %f1, float %f2, float %f3) nounwind {
    ret float %3
  }
  
-define double @fusedMACTest7(double %d1, double %d2, double %d3) nounwind {
+define double @fusedMACTest7(double %d1, double %d2, double %d3) {
  ;CHECK: fusedMACTest7:
  ;CHECK: vfnms.f64
    %1 = fmul double %d1, %d2
@@ -59,10 +59,42 @@ define double @fusedMACTest7(double %d1, double %d2, double %d3) nounwind {
    ret double %2
  }
  
-define float @fusedMACTest8(float %f1, float %f2, float %f3) nounwind {
+define float @fusedMACTest8(float %f1, float %f2, float %f3) {
  ;CHECK: fusedMACTest8:
  ;CHECK: vfnms.f32
    %1 = fmul float %f1, %f2
    %2 = fsub float %1, %f3
    ret float %2
  }
+
+define <2 x float> @fusedMACTest9(<2 x float> %a, <2 x float> %b) {
+;CHECK: fusedMACTest9:
+;CHECK: vfma.f32
+  %mul = fmul <2 x float> %a, %b
+  %add = fadd <2 x float> %mul, %a
+  ret <2 x float> %add
+}
+
+define <2 x float> @fusedMACTest10(<2 x float> %a, <2 x float> %b) {
+;CHECK: fusedMACTest10:
+;CHECK: vfms.f32
+  %mul = fmul <2 x float> %a, %b
+  %sub = fsub <2 x float> %a, %mul
+  ret <2 x float> %sub
+}
+
+define <4 x float> @fusedMACTest11(<4 x float> %a, <4 x float> %b) {
+;CHECK: fusedMACTest11:
+;CHECK: vfma.f32
+  %mul = fmul <4 x float> %a, %b
+  %add = fadd <4 x float> %mul, %a
+  ret <4 x float> %add
+}
+
+define <4 x float> @fusedMACTest12(<4 x float> %a, <4 x float> %b) {
+;CHECK: fusedMACTest12:
+;CHECK: vfms.f32
+  %mul = fmul <4 x float> %a, %b
+  %sub = fsub <4 x float> %a, %mul
+  ret <4 x float> %sub
+}
author	Sebastian Pop <spop@codeaurora.org>
	Mon, 5 Mar 2012 17:39:52 +0000 (17:39 +0000)
committer	Sebastian Pop <spop@codeaurora.org>
	Mon, 5 Mar 2012 17:39:52 +0000 (17:39 +0000)
lib/Target/ARM/ARM.td		patch \| blob \| history
lib/Target/ARM/ARMAsmPrinter.cpp		patch \| blob \| history
lib/Target/ARM/ARMInstrInfo.td		patch \| blob \| history
lib/Target/ARM/ARMInstrNEON.td		patch \| blob \| history
lib/Target/ARM/ARMInstrVFP.td		patch \| blob \| history
lib/Target/ARM/ARMSubtarget.cpp		patch \| blob \| history
lib/Target/ARM/ARMSubtarget.h		patch \| blob \| history
test/CodeGen/ARM/fusedMAC.ll		patch \| blob \| history