AArch64/PowerPC/SystemZ/X86: This patch fixes the interface, usage, and all

author Stephen Lin <stephenwlin@gmail.com>

Tue, 9 Jul 2013 18:16:56 +0000 (18:16 +0000)

committer Stephen Lin <stephenwlin@gmail.com>

Tue, 9 Jul 2013 18:16:56 +0000 (18:16 +0000)
author Stephen Lin <stephenwlin@gmail.com>
Tue, 9 Jul 2013 18:16:56 +0000 (18:16 +0000)
committer Stephen Lin <stephenwlin@gmail.com>
Tue, 9 Jul 2013 18:16:56 +0000 (18:16 +0000)
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h

index 70b285ea75e91404604677471d681ce09816404c..d1c98f66d23d52d9790581bbebd64f16f5cbcaaa 100644 (file)
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -1213,11 +1213,16 @@ public:
      return false;
    }
  
-  /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
-  /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
-  /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
-  /// is expanded to mul + add.
-  virtual bool isFMAFasterThanMulAndAdd(EVT) const {
+  /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+  /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+  /// expanded to FMAs when this method returns true, otherwise fmuladd is
+  /// expanded to fmul + fadd.
+  ///
+  /// NOTE: This may be called before legalization on types for which FMAs are
+  /// not legal, but should return true if those types will eventually legalize
+  /// to types that support FMAs. After legalization, it will only be called on
+  /// types that support FMAs (via Legal or Custom actions)
+  virtual bool isFMAFasterThanFMulAndFAdd(EVT) const {
      return false;
    }
  
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 9eb63e2c7113537d11e05a5cc11df06a09269292..98806551e4d70b9e25cb28a43e00270b0fb97203 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6084,8 +6084,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
    // FADD -> FMA combines:
    if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
         DAG.getTarget().Options.UnsafeFPMath) &&
-      DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
-      TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
+      DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
+      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
  
      // fold (fadd (fmul x, y), z) -> (fma x, y, z)
      if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
@@ -6161,8 +6161,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
    // FSUB -> FMA combines:
    if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
         DAG.getTarget().Options.UnsafeFPMath) &&
-      DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
-      TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
+      DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
+      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
  
      // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
      if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index dcde5ac3b89a89777bb5ed3e384c8bfad5a23041..2a1ded04b483847e1f0df34f0e96eddcf261ca7f 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4922,7 +4922,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
    case Intrinsic::fmuladd: {
      EVT VT = TLI->getValueType(I.getType());
      if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
-        TLI->isFMAFasterThanMulAndAdd(VT)) {
+        TLI->isFMAFasterThanFMulAndFAdd(VT)) {
        setValue(&I, DAG.getNode(ISD::FMA, sdl,
                                 getValue(I.getArgOperand(0)).getValueType(),
                                 getValue(I.getArgOperand(0)),
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index 84051d40ecae0916bceea34c14c15b0aa78d6fbf..1fa1edba190b31d37b788c8bfdb2e0e8ca8ee05d 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2798,6 +2798,27 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N,
    return SDValue();
  }
  
+bool
+AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+  VT = VT.getScalarType();
+
+  if (!VT.isSimple())
+    return false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::f16:
+  case MVT::f32:
+  case MVT::f64:
+    return true;
+  case MVT::f128:
+    return false;
+  default:
+    break;
+  }
+
+  return false;
+}
+
  AArch64TargetLowering::ConstraintType
  AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
    if (Constraint.size() == 1) {
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h

index 901a9bec28a5c2f5ae3fbf8924741c16501a3036..320346e60b738a20afd40a8c055ba1e95def35ad 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -229,11 +229,11 @@ public:
  
    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
  
-  /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
-  /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
-  /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
-  /// is expanded to mul + add.
-  virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
+  /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+  /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+  /// expanded to FMAs when this method returns true, otherwise fmuladd is
+  /// expanded to fmul + fadd.
+  virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
  
    ConstraintType getConstraintType(const std::string &Constraint) const;
  
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index cf41c02e749e2b1274ae52ab9881ac9a4af5078a..812f096cdd677351c679e7d8cbd763363e91a615 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7809,18 +7809,15 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
    return true;
  }
  
-/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
-/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
-/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
-/// is expanded to mul + add.
-bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
+bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+  VT = VT.getScalarType();
+
    if (!VT.isSimple())
      return false;
  
    switch (VT.getSimpleVT().SimpleTy) {
    case MVT::f32:
    case MVT::f64:
-  case MVT::v4f32:
      return true;
    default:
      break;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h

index 4801a415ca454d025cb981d21c14a9c103243579..776ad2a75ff4d050330bb92bf5f039197cdb4cf7 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -459,11 +459,11 @@ namespace llvm {
      /// relative to software emulation.
      virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
  
-    /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
-    /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
-    /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
-    /// is expanded to mul + add.
-    virtual bool isFMAFasterThanMulAndAdd(EVT VT) const;
+    /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+    /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+    /// expanded to FMAs when this method returns true, otherwise fmuladd is
+    /// expanded to fmul + fadd.
+    virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
  
    private:
      SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp

index b1abc2c3c101a7243897a1749456f771175af074..d344134b24f634313ea7afd7c60205a70b80e7f6 100644 (file)
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -255,6 +255,26 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
    MaxStoresPerMemsetOptSize = 0;
  }
  
+bool
+SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+  VT = VT.getScalarType();
+
+  if (!VT.isSimple())
+    return false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::f32:
+  case MVT::f64:
+    return true;
+  case MVT::f128:
+    return false;
+  default:
+    break;
+  }
+
+  return false;
+}
+
  bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
    // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
    return Imm.isZero() || Imm.isNegZero();
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h

index 4ddfcbbda0531f252aab230423ca4c62d9306ec6..88e1fa7d7463f95e99a0c759e71779a22bf4dc08 100644 (file)
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -129,9 +129,7 @@ public:
    virtual EVT getSetCCResultType(LLVMContext &, EVT) const {
      return MVT::i32;
    }
-  virtual bool isFMAFasterThanMulAndAdd(EVT) const LLVM_OVERRIDE {
-    return true;
-  }
+  virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const LLVM_OVERRIDE;
    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
    virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
    virtual const char *getTargetNodeName(unsigned Opcode) const LLVM_OVERRIDE;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index a680ac09b5f02d12e5cd6d632d7a03588f7b1ce6..f00df3543a8bdf2f14c8665d90409042efa6108e 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -12966,6 +12966,27 @@ bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
    return false;
  }
  
+bool
+X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+  if (!(Subtarget->hasFMA() || Subtarget->hasFMA4()))
+    return false;
+
+  VT = VT.getScalarType();
+
+  if (!VT.isSimple())
+    return false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::f32:
+  case MVT::f64:
+    return true;
+  default:
+    break;
+  }
+
+  return false;
+}
+
  bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
    // i16 instructions are longer (0x66 prefix) and potentially slower.
    return !(VT1 == MVT::i32 && VT2 == MVT::i16);
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index 0e5e822e6bfaea4b8568a068ed6e5283638488d7..8317824b84d71015f905bbe3ed86b3e297646137 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -646,11 +646,11 @@ namespace llvm {
      virtual bool isZExtFree(EVT VT1, EVT VT2) const;
      virtual bool isZExtFree(SDValue Val, EVT VT2) const;
  
-    /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
-    /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
-    /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
-    /// is expanded to mul + add.
-    virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
+    /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+    /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+    /// expanded to FMAs when this method returns true, otherwise fmuladd is
+    /// expanded to fmul + fadd.
+    virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
  
      /// isNarrowingProfitable - Return true if it's profitable to narrow
      /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
diff --git a/test/CodeGen/AArch64/fp-dp3.ll b/test/CodeGen/AArch64/fp-dp3.ll

index 39db9be15771b4cd35988ae7d957ce2dfe3ae6df..f372c43159b75680b66cbe463bf0815bfec2ce22 100644 (file)
--- a/test/CodeGen/AArch64/fp-dp3.ll
+++ b/test/CodeGen/AArch64/fp-dp3.ll
@@ -1,102 +1,136 @@
  ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -fp-contract=fast | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -check-prefix=CHECK-NOFAST
  
  declare float @llvm.fma.f32(float, float, float)
  declare double @llvm.fma.f64(double, double, double)
  
  define float @test_fmadd(float %a, float %b, float %c) {
  ; CHECK: test_fmadd:
+; CHECK-NOFAST: test_fmadd:
    %val = call float @llvm.fma.f32(float %a, float %b, float %c)
  ; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
    ret float %val
  }
  
  define float @test_fmsub(float %a, float %b, float %c) {
  ; CHECK: test_fmsub:
+; CHECK-NOFAST: test_fmsub:
    %nega = fsub float -0.0, %a
    %val = call float @llvm.fma.f32(float %nega, float %b, float %c)
  ; CHECK: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
    ret float %val
  }
  
  define float @test_fnmadd(float %a, float %b, float %c) {
  ; CHECK: test_fnmadd:
+; CHECK-NOFAST: test_fnmadd:
    %negc = fsub float -0.0, %c
    %val = call float @llvm.fma.f32(float %a, float %b, float %negc)
  ; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
    ret float %val
  }
  
  define float @test_fnmsub(float %a, float %b, float %c) {
  ; CHECK: test_fnmsub:
+; CHECK-NOFAST: test_fnmsub:
    %nega = fsub float -0.0, %a
    %negc = fsub float -0.0, %c
    %val = call float @llvm.fma.f32(float %nega, float %b, float %negc)
  ; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
    ret float %val
  }
  
  define double @testd_fmadd(double %a, double %b, double %c) {
  ; CHECK: testd_fmadd:
+; CHECK-NOFAST: testd_fmadd:
    %val = call double @llvm.fma.f64(double %a, double %b, double %c)
  ; CHECK: fmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NOFAST: fmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
    ret double %val
  }
  
  define double @testd_fmsub(double %a, double %b, double %c) {
  ; CHECK: testd_fmsub:
+; CHECK-NOFAST: testd_fmsub:
    %nega = fsub double -0.0, %a
    %val = call double @llvm.fma.f64(double %nega, double %b, double %c)
  ; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NOFAST: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
    ret double %val
  }
  
  define double @testd_fnmadd(double %a, double %b, double %c) {
  ; CHECK: testd_fnmadd:
+; CHECK-NOFAST: testd_fnmadd:
    %negc = fsub double -0.0, %c
    %val = call double @llvm.fma.f64(double %a, double %b, double %negc)
  ; CHECK: fnmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NOFAST: fnmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
    ret double %val
  }
  
  define double @testd_fnmsub(double %a, double %b, double %c) {
  ; CHECK: testd_fnmsub:
+; CHECK-NOFAST: testd_fnmsub:
    %nega = fsub double -0.0, %a
    %negc = fsub double -0.0, %c
    %val = call double @llvm.fma.f64(double %nega, double %b, double %negc)
  ; CHECK: fnmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NOFAST: fnmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
    ret double %val
  }
  
  define float @test_fmadd_unfused(float %a, float %b, float %c) {
  ; CHECK: test_fmadd_unfused:
+; CHECK-NOFAST: test_fmadd_unfused:
    %prod = fmul float %b, %c
    %sum = fadd float %a, %prod
  ; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-NOT: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
    ret float %sum
  }
  
  define float @test_fmsub_unfused(float %a, float %b, float %c) {
  ; CHECK: test_fmsub_unfused:
+; CHECK-NOFAST: test_fmsub_unfused:
    %prod = fmul float %b, %c
    %diff = fsub float %a, %prod
  ; CHECK: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-NOT: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
    ret float %diff
  }
  
  define float @test_fnmadd_unfused(float %a, float %b, float %c) {
  ; CHECK: test_fnmadd_unfused:
+; CHECK-NOFAST: test_fnmadd_unfused:
    %nega = fsub float -0.0, %a
    %prod = fmul float %b, %c
    %sum = fadd float %nega, %prod
  ; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-NOT: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
    ret float %sum
  }
  
  define float @test_fnmsub_unfused(float %a, float %b, float %c) {
  ; CHECK: test_fnmsub_unfused:
+; CHECK-NOFAST: test_fnmsub_unfused:
    %nega = fsub float -0.0, %a
    %prod = fmul float %b, %c
    %diff = fsub float %nega, %prod
  ; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-NOT: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fneg {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
    ret float %diff
  }
diff --git a/test/CodeGen/AArch64/illegal-float-ops.ll b/test/CodeGen/AArch64/illegal-float-ops.ll

index 446151b8ffacdf31cff3a01940eca57ee7eb19d7..a398f7bff613828f48cabd526eedd90b4a6ce4d9 100644 (file)
--- a/test/CodeGen/AArch64/illegal-float-ops.ll
+++ b/test/CodeGen/AArch64/illegal-float-ops.ll
@@ -219,3 +219,29 @@ define void @test_frem(float %float, double %double, fp128 %fp128) {
  
    ret void
  }
+
+declare fp128 @llvm.fma.f128(fp128, fp128, fp128)
+
+define void @test_fma(fp128 %fp128) {
+; CHECK: test_fma:
+
+  %fmafp128 = call fp128 @llvm.fma.f128(fp128 %fp128, fp128 %fp128, fp128 %fp128)
+  store fp128 %fmafp128, fp128* @varfp128
+; CHECK: bl fmal
+
+  ret void
+}
+
+declare fp128 @llvm.fmuladd.f128(fp128, fp128, fp128)
+
+define void @test_fmuladd(fp128 %fp128) {
+; CHECK: test_fmuladd:
+
+  %fmuladdfp128 = call fp128 @llvm.fmuladd.f128(fp128 %fp128, fp128 %fp128, fp128 %fp128)
+  store fp128 %fmuladdfp128, fp128* @varfp128
+; CHECK-NOT: bl fmal
+; CHECK: bl __multf3
+; CHECK: bl __addtf3
+
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/vec_fmuladd.ll b/test/CodeGen/PowerPC/vec_fmuladd.ll

new file mode 100644 (file)

index 0000000..b1bc377
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_fmuladd.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <2 x float> @llvm.fmuladd.v2f32(<2 x float> %val, <2 x float>, <2 x float>)
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float> %val, <4 x float>, <4 x float>)
+declare <8 x float> @llvm.fmuladd.v8f32(<8 x float> %val, <8 x float>, <8 x float>)
+declare <2 x double> @llvm.fmuladd.v2f64(<2 x double> %val, <2 x double>, <2 x double>)
+declare <4 x double> @llvm.fmuladd.v4f64(<4 x double> %val, <4 x double>, <4 x double>)
+
+define <2 x float> @v2f32_fmuladd(<2 x float> %x) nounwind readnone {
+entry:
+  %fmuladd = call <2 x float> @llvm.fmuladd.v2f32 (<2 x float> %x, <2 x float> %x, <2 x float> %x)
+  ret <2 x float> %fmuladd
+}
+; fmuladd (<2 x float>) is promoted to fmuladd (<4 x float>)
+; CHECK: v2f32_fmuladd:
+; CHECK: vmaddfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+define <4 x float> @v4f32_fmuladd(<4 x float> %x) nounwind readnone {
+entry:
+  %fmuladd = call <4 x float> @llvm.fmuladd.v4f32 (<4 x float> %x, <4 x float> %x, <4 x float> %x)
+  ret <4 x float> %fmuladd
+}
+; CHECK: v4f32_fmuladd:
+; CHECK: vmaddfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+define <8 x float> @v8f32_fmuladd(<8 x float> %x) nounwind readnone {
+entry:
+  %fmuladd = call <8 x float> @llvm.fmuladd.v8f32 (<8 x float> %x, <8 x float> %x, <8 x float> %x)
+  ret <8 x float> %fmuladd
+}
+; CHECK: v8f32_fmuladd:
+; CHECK: vmaddfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vmaddfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+define <2 x double> @v2f64_fmuladd(<2 x double> %x) nounwind readnone {
+entry:
+  %fmuladd = call <2 x double> @llvm.fmuladd.v2f64 (<2 x double> %x, <2 x double> %x, <2 x double> %x)
+  ret <2 x double> %fmuladd
+}
+; CHECK: v2f64_fmuladd:
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+define <4 x double> @v4f64_fmuladd(<4 x double> %x) nounwind readnone {
+entry:
+  %fmuladd = call <4 x double> @llvm.fmuladd.v4f64 (<4 x double> %x, <4 x double> %x, <4 x double> %x)
+  ret <4 x double> %fmuladd
+}
+; CHECK: v4f64_fmuladd:
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} 
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} 
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} 
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} 
diff --git a/test/CodeGen/X86/extended-fma-contraction.ll b/test/CodeGen/X86/extended-fma-contraction.ll

new file mode 100644 (file)

index 0000000..ef2c22b
--- /dev/null
+++ b/test/CodeGen/X86/extended-fma-contraction.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=x86 -mattr=+fma4 -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -march=x86 -mtriple=x86_64-apple-darwin < %s | FileCheck %s --check-prefix=CHECK-NOFMA
+
+; CHECK: fmafunc
+define <3 x float> @fmafunc(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
+
+; CHECK-NOT: vmulps
+; CHECK-NOT: vaddps
+; CHECK: vfmaddps
+; CHECK-NOT: vmulps
+; CHECK-NOT: vaddps
+
+; CHECK-NOFMA-NOT: calll
+; CHECK-NOFMA: vmulps
+; CHECK-NOFMA: vaddps
+; CHECK-NOFMA-NOT: calll
+
+  %ret = tail call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c)
+  ret <3 x float> %ret
+}
+
+declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/fma_patterns_wide.ll b/test/CodeGen/X86/fma_patterns_wide.ll

new file mode 100644 (file)

index 0000000..d84e5a0
--- /dev/null
+++ b/test/CodeGen/X86/fma_patterns_wide.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=avx2,+fma -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 -fp-contract=fast | FileCheck %s --check-prefix=CHECK_FMA4
+
+; CHECK: test_x86_fmadd_ps_y_wide
+; CHECK: vfmadd213ps
+; CHECK: vfmadd213ps
+; CHECK: ret
+; CHECK_FMA4: test_x86_fmadd_ps_y_wide
+; CHECK_FMA4: vfmaddps
+; CHECK_FMA4: vfmaddps
+; CHECK_FMA4: ret
+define <16 x float> @test_x86_fmadd_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %res = fadd <16 x float> %x, %a2
+  ret <16 x float> %res
+}
+
+; CHECK: test_x86_fmsub_ps_y_wide
+; CHECK: vfmsub213ps
+; CHECK: vfmsub213ps
+; CHECK: ret
+; CHECK_FMA4: test_x86_fmsub_ps_y_wide
+; CHECK_FMA4: vfmsubps
+; CHECK_FMA4: vfmsubps
+; CHECK_FMA4: ret
+define <16 x float> @test_x86_fmsub_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %res = fsub <16 x float> %x, %a2
+  ret <16 x float> %res
+}
+
+; CHECK: test_x86_fnmadd_ps_y_wide
+; CHECK: vfnmadd213ps
+; CHECK: vfnmadd213ps
+; CHECK: ret
+; CHECK_FMA4: test_x86_fnmadd_ps_y_wide
+; CHECK_FMA4: vfnmaddps
+; CHECK_FMA4: vfnmaddps
+; CHECK_FMA4: ret
+define <16 x float> @test_x86_fnmadd_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %res = fsub <16 x float> %a2, %x
+  ret <16 x float> %res
+}
+
+; CHECK: test_x86_fnmsub_ps_y_wide
+; CHECK: vfnmsub213ps
+; CHECK: vfnmsub213ps
+; CHECK: ret
+define <16 x float> @test_x86_fnmsub_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
+  %res = fsub <16 x float> %y, %a2
+  ret <16 x float> %res
+}
+
+; CHECK: test_x86_fmadd_pd_y_wide
+; CHECK: vfmadd213pd
+; CHECK: vfmadd213pd
+; CHECK: ret
+; CHECK_FMA4: test_x86_fmadd_pd_y_wide
+; CHECK_FMA4: vfmaddpd
+; CHECK_FMA4: vfmaddpd
+; CHECK_FMA4: ret
+define <8 x double> @test_x86_fmadd_pd_y_wide(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  %x = fmul <8 x double> %a0, %a1
+  %res = fadd <8 x double> %x, %a2
+  ret <8 x double> %res
+}
+
+; CHECK: test_x86_fmsub_pd_y_wide
+; CHECK: vfmsub213pd
+; CHECK: vfmsub213pd
+; CHECK: ret
+; CHECK_FMA4: test_x86_fmsub_pd_y_wide
+; CHECK_FMA4: vfmsubpd
+; CHECK_FMA4: vfmsubpd
+; CHECK_FMA4: ret
+define <8 x double> @test_x86_fmsub_pd_y_wide(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  %x = fmul <8 x double> %a0, %a1
+  %res = fsub <8 x double> %x, %a2
+  ret <8 x double> %res
+}
diff --git a/test/CodeGen/X86/wide-fma-contraction.ll b/test/CodeGen/X86/wide-fma-contraction.ll

index d93f33ba0e581f75144b0e074c27866b81939e48..7ee0fbaf59cdee186b4659ac757814fa1fd18786 100644 (file)
--- a/test/CodeGen/X86/wide-fma-contraction.ll
+++ b/test/CodeGen/X86/wide-fma-contraction.ll
@@ -1,7 +1,9 @@
  ; RUN: llc -march=x86 -mattr=+fma4 -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -march=x86 -mtriple=x86_64-apple-darwin < %s | FileCheck %s --check-prefix=CHECK-NOFMA
  
  ; CHECK: fmafunc
  define <16 x float> @fmafunc(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
+
  ; CHECK-NOT: vmulps
  ; CHECK-NOT: vaddps
  ; CHECK: vfmaddps
@@ -10,11 +12,17 @@ define <16 x float> @fmafunc(<16 x float> %a, <16 x float> %b, <16 x float> %c)
  ; CHECK: vfmaddps
  ; CHECK-NOT: vmulps
  ; CHECK-NOT: vaddps
+
+; CHECK-NOFMA-NOT: calll
+; CHECK-NOFMA: vmulps
+; CHECK-NOFMA: vaddps
+; CHECK-NOFMA-NOT: calll
+; CHECK-NOFMA: vmulps
+; CHECK-NOFMA: vaddps
+; CHECK-NOFMA-NOT: calll
+
    %ret = tail call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c)
    ret <16 x float> %ret
  }
  
  declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
-
-
-
author	Stephen Lin <stephenwlin@gmail.com>
	Tue, 9 Jul 2013 18:16:56 +0000 (18:16 +0000)
committer	Stephen Lin <stephenwlin@gmail.com>
	Tue, 9 Jul 2013 18:16:56 +0000 (18:16 +0000)
include/llvm/Target/TargetLowering.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64ISelLowering.h		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.h		patch \| blob \| history
lib/Target/SystemZ/SystemZISelLowering.cpp		patch \| blob \| history
lib/Target/SystemZ/SystemZISelLowering.h		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
test/CodeGen/AArch64/fp-dp3.ll		patch \| blob \| history
test/CodeGen/AArch64/illegal-float-ops.ll		patch \| blob \| history
test/CodeGen/PowerPC/vec_fmuladd.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/extended-fma-contraction.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/fma_patterns_wide.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/wide-fma-contraction.ll		patch \| blob \| history