Fix the order of the operands in the llvm.fma intrinsic patterns for ARM,

author Lang Hames <lhames@gmail.com>

Fri, 27 Apr 2012 18:51:24 +0000 (18:51 +0000)

committer Lang Hames <lhames@gmail.com>

Fri, 27 Apr 2012 18:51:24 +0000 (18:51 +0000)
author Lang Hames <lhames@gmail.com>
Fri, 27 Apr 2012 18:51:24 +0000 (18:51 +0000)
committer Lang Hames <lhames@gmail.com>
Fri, 27 Apr 2012 18:51:24 +0000 (18:51 +0000)
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td

index 31dd843fe898096c7b1bc8d739fc77b241de1f6a..6132989b636715c90051f5e845a87d3d59599238 100644 (file)
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -4136,16 +4136,16 @@ def  VFMSfq   : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
                  Requires<[HasVFP4,UseFusedMAC]>;
  
  // Match @llvm.fma.* intrinsics
-def : Pat<(v2f32 (fma DPR:$src1, DPR:$Vn, DPR:$Vm)),
+def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
            (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
            Requires<[HasVFP4]>;
-def : Pat<(v4f32 (fma QPR:$src1, QPR:$Vn, QPR:$Vm)),
+def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
            (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
            Requires<[HasVFP4]>;
-def : Pat<(v2f32 (fma (fneg DPR:$src1), DPR:$Vn, DPR:$Vm)),
+def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
            (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
        Requires<[HasVFP4]>;
-def : Pat<(v4f32 (fma (fneg QPR:$src1), QPR:$Vn, QPR:$Vm)),
+def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
            (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
        Requires<[HasVFP4]>;
  
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td

index f447e304b4960be133e07558fe28692f6fe3b96e..dccbffa4c9fdb577ec510a50f993523e65a8f57b 100644 (file)
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -1089,10 +1089,11 @@ def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
            Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
  
  // Match @llvm.fma.* intrinsics
-def : Pat<(f64 (fma DPR:$Ddin, DPR:$Dn, DPR:$Dm)),
+// (fma x, y, z) -> (vfms z, x, y)
+def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)),
            (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
        Requires<[HasVFP4]>;
-def : Pat<(f32 (fma SPR:$Sdin, SPR:$Sn, SPR:$Sm)),
+def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)),
            (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
        Requires<[HasVFP4]>;
  
@@ -1123,18 +1124,18 @@ def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
            Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
  
  // Match @llvm.fma.* intrinsics
-// (fma (fneg x), y, z) -> (vfms x, y, z)
-def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm)),
+// (fma (fneg x), y, z) -> (vfms z, x, y)
+def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)),
            (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
        Requires<[HasVFP4]>;
-def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm)),
+def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)),
            (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
        Requires<[HasVFP4]>;
-// (fneg (fma x, (fneg y), z) -> (vfms x, y, z)
-def : Pat<(fneg (f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm))),
+// (fma x, (fneg y), z) -> (vfms z, x, y)
+def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
            (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
        Requires<[HasVFP4]>;
-def : Pat<(fneg (f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm))),
+def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)),
            (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
        Requires<[HasVFP4]>;
  
@@ -1165,18 +1166,18 @@ def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
            Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
  
  // Match @llvm.fma.* intrinsics
-// (fneg (fma x, y, z)) -> (vfnma x, y, z)
-def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))),
+// (fneg (fma x, y, z)) -> (vfnma z, x, y)
+def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))),
            (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
        Requires<[HasVFP4]>;
-def : Pat<(fneg (fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm))),
+def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))),
            (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
        Requires<[HasVFP4]>;
-// (fma (fneg x), y, (fneg z)) -> (vfnma x, y, z)
-def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, (fneg DPR:$Dm))),
+// (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y)
+def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
            (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
        Requires<[HasVFP4]>;
-def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, (fneg SPR:$Sm))),
+def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))),
            (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
        Requires<[HasVFP4]>;
  
@@ -1206,18 +1207,18 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
            Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
  
  // Match @llvm.fma.* intrinsics
-// (fneg (fma (fneg x), y, z)) -> (vnfms x, y, z)
-def : Pat<(fneg (f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm))),
+// (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y)
+def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
            (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
        Requires<[HasVFP4]>;
-def : Pat<(fneg (f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm))),
+def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))),
            (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
        Requires<[HasVFP4]>;
-// (fma x, (fneg y), z) -> (vnfms x, y, z)
-def : Pat<(f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm)),
+// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y)
+def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
            (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
        Requires<[HasVFP4]>;
-def : Pat<(f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm)),
+def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
            (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
        Requires<[HasVFP4]>;
  
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll

index 802d1b8b3932b45f92361d1e3affa38beb469460..1ad7ce18ea1eda182c170ad72d99d8be15332c10 100644 (file)
--- a/test/CodeGen/ARM/fusedMAC.ll
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -138,8 +138,7 @@ entry:
  ; CHECK: vfms.f64
    %tmp1 = fsub double -0.0, %b
    %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
-  %tmp3 = fsub double -0.0, %tmp2
-  ret double %tmp3
+  ret double %tmp2
  }
  
  define double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp {
@@ -158,7 +157,8 @@ entry:
  ; CHECK: vfnms.f64
    %tmp1 = fsub double -0.0, %b
    %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
-  ret double %tmp2
+  %tmp3 = fsub double -0.0, %tmp2
+  ret double %tmp3
  }
  
  define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp {
author	Lang Hames <lhames@gmail.com>
	Fri, 27 Apr 2012 18:51:24 +0000 (18:51 +0000)
committer	Lang Hames <lhames@gmail.com>
	Fri, 27 Apr 2012 18:51:24 +0000 (18:51 +0000)
lib/Target/ARM/ARMInstrNEON.td		patch \| blob \| history
lib/Target/ARM/ARMInstrVFP.td		patch \| blob \| history
test/CodeGen/ARM/fusedMAC.ll		patch \| blob \| history