[mips] For the FP64A ABI, odd-numbered double-precision moves must not use mtc1/mfc1.

author Daniel Sanders <daniel.sanders@imgtec.com>

Mon, 14 Jul 2014 13:08:14 +0000 (13:08 +0000)

committer Daniel Sanders <daniel.sanders@imgtec.com>

Mon, 14 Jul 2014 13:08:14 +0000 (13:08 +0000)
author Daniel Sanders <daniel.sanders@imgtec.com>
Mon, 14 Jul 2014 13:08:14 +0000 (13:08 +0000)
committer Daniel Sanders <daniel.sanders@imgtec.com>
Mon, 14 Jul 2014 13:08:14 +0000 (13:08 +0000)
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp

index a3306686fc4389fc6018d0f9fdb58a3451653454..bc896be4e1de287db2703693cedb0ff5c411cff8 100644 (file)
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -137,12 +137,12 @@ MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *Val) {
    return MachinePointerInfo(E);
  }
  
-int MipsFunctionInfo::getBuildPairF64_FI(const TargetRegisterClass *RC) {
-  if (BuildPairF64_FI == -1) {
-    BuildPairF64_FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
-        RC->getAlignment(), false);
+int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) {
+  if (MoveF64ViaSpillFI == -1) {
+    MoveF64ViaSpillFI = MF.getFrameInfo()->CreateStackObject(
+        RC->getSize(), RC->getAlignment(), false);
    }
-  return BuildPairF64_FI;
+  return MoveF64ViaSpillFI;
  }
  
  void MipsFunctionInfo::anchor() { }
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h

index a667d43724c17110c763bd369f29e339d621d8b8..61260e5781596c212e44da977994f55ff4982095 100644 (file)
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -55,7 +55,7 @@ public:
    MipsFunctionInfo(MachineFunction &MF)
        : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), Mips16SPAliasReg(0),
          VarArgsFrameIndex(0), CallsEhReturn(false), SaveS2(false),
-        BuildPairF64_FI(-1) {}
+        MoveF64ViaSpillFI(-1) {}
  
    ~MipsFunctionInfo();
  
@@ -97,7 +97,7 @@ public:
    void setSaveS2() { SaveS2 = true; }
    bool hasSaveS2() const { return SaveS2; }
  
-  int getBuildPairF64_FI(const TargetRegisterClass *RC);
+  int getMoveF64ViaSpillFI(const TargetRegisterClass *RC);
  
    std::map<const char *, const llvm::Mips16HardFloatInfo::FuncSignature *>
    StubsNeeded;
@@ -141,7 +141,7 @@ private:
  
    /// FrameIndex for expanding BuildPairF64 nodes to spill and reload when the
    /// O32 FPXX ABI is enabled. -1 is used to denote invalid index.
-  int BuildPairF64_FI;
+  int MoveF64ViaSpillFI;
  
    /// MipsCallEntry maps.
    StringMap<const MipsCallEntry *> ExternalCallEntries;
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp

index f2276f19afa160df8a742bd1fcf24d8a2bfeefeb..d0a17cd834a01dd2b7a3794ddb0cc6bf7e39b5e9 100644 (file)
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -66,6 +66,8 @@ private:
                       unsigned MFLoOpc);
    bool expandBuildPairF64(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator I, bool FP64) const;
+  bool expandExtractElementF64(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator I, bool FP64) const;
  
    MachineFunction &MF;
    MachineRegisterInfo &MRI;
@@ -118,6 +120,14 @@ bool ExpandPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) {
      if (expandBuildPairF64(MBB, I, true))
        MBB.erase(I);
      return false;
+  case Mips::ExtractElementF64:
+    if (expandExtractElementF64(MBB, I, false))
+      MBB.erase(I);
+    return false;
+  case Mips::ExtractElementF64_64:
+    if (expandExtractElementF64(MBB, I, true))
+      MBB.erase(I);
+    return false;
    case TargetOpcode::COPY:
      if (!expandCopy(MBB, I))
        return false;
@@ -269,9 +279,10 @@ bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I,
  }
  
  /// This method expands the same instruction that MipsSEInstrInfo::
-/// expandBuildPairF64 does, for the case when ABI is fpxx and mthc1 is
-/// not available. It is implemented here because frame indexes are
-/// eliminated before MipsSEInstrInfo::expandBuildPairF64 is called.
+/// expandBuildPairF64 does, for the case when ABI is fpxx and mthc1 is not
+/// available and the case where the ABI is FP64A. It is implemented here
+/// because frame indexes are eliminated before MipsSEInstrInfo::
+/// expandBuildPairF64 is called.
  bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator I,
                                        bool FP64) const {
@@ -280,10 +291,18 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB,
    //
    // The case where dmtc1 is available doesn't need to be handled here
    // because it never creates a BuildPairF64 node.
+  //
+  // The FP64A ABI (fp64 with nooddspreg) must also use a spill/reload sequence
+  // for odd-numbered double precision values (because the lower 32-bits is
+  // transferred with mtc1 which is redirected to the upper half of the even
+  // register). Unfortunately, we have to make this decision before register
+  // allocation so for now we use a spill/reload sequence for all
+  // double-precision values in regardless of being an odd/even register.
  
    const TargetMachine &TM = MF.getTarget();
-  if (TM.getSubtarget<MipsSubtarget>().isABI_FPXX()
-      && !TM.getSubtarget<MipsSubtarget>().hasMTHC1()) {
+  const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+  if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) ||
+      (FP64 && !Subtarget.useOddSPReg())) {
      const MipsSEInstrInfo &TII =
        *static_cast<const MipsSEInstrInfo*>(TM.getInstrInfo());
      const MipsRegisterInfo &TRI =
@@ -294,13 +313,18 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB,
      unsigned HiReg = I->getOperand(2).getReg();
  
      // It should be impossible to have FGR64 on MIPS-II or MIPS32r1 (which are
-    // the cases where mthc1 is not available).
-    assert(!TM.getSubtarget<MipsSubtarget>().isFP64bit());
+    // the cases where mthc1 is not available). 64-bit architectures and
+    // MIPS32r2 or later can use FGR64 though.
+    assert(Subtarget.isGP64bit() || Subtarget.hasMTHC1() ||
+           !Subtarget.isFP64bit());
  
      const TargetRegisterClass *RC = &Mips::GPR32RegClass;
-    const TargetRegisterClass *RC2 = &Mips::AFGR64RegClass;
+    const TargetRegisterClass *RC2 =
+        FP64 ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass;
  
-    int FI = MF.getInfo<MipsFunctionInfo>()->getBuildPairF64_FI(RC2);
+    // We re-use the same spill slot each time so that the stack frame doesn't
+    // grow too much in functions with a large number of moves.
+    int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(RC2);
      TII.storeRegToStack(MBB, I, LoReg, I->getOperand(1).isKill(), FI, RC, &TRI,
                          0);
      TII.storeRegToStack(MBB, I, HiReg, I->getOperand(2).isKill(), FI, RC, &TRI,
@@ -312,6 +336,62 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB,
    return false;
  }
  
+/// This method expands the same instruction that MipsSEInstrInfo::
+/// expandExtractElementF64 does, for the case when ABI is fpxx and mfhc1 is not
+/// available and the case where the ABI is FP64A. It is implemented here
+/// because frame indexes are eliminated before MipsSEInstrInfo::
+/// expandExtractElementF64 is called.
+bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator I,
+                                           bool FP64) const {
+  // For fpxx and when mfhc1 is not available, use:
+  //   spill + reload via ldc1
+  //
+  // The case where dmfc1 is available doesn't need to be handled here
+  // because it never creates a ExtractElementF64 node.
+  //
+  // The FP64A ABI (fp64 with nooddspreg) must also use a spill/reload sequence
+  // for odd-numbered double precision values (because the lower 32-bits is
+  // transferred with mfc1 which is redirected to the upper half of the even
+  // register). Unfortunately, we have to make this decision before register
+  // allocation so for now we use a spill/reload sequence for all
+  // double-precision values in regardless of being an odd/even register.
+
+  const TargetMachine &TM = MF.getTarget();
+  const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+  if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) ||
+      (FP64 && !Subtarget.useOddSPReg())) {
+    const MipsSEInstrInfo &TII =
+        *static_cast<const MipsSEInstrInfo *>(TM.getInstrInfo());
+    const MipsRegisterInfo &TRI =
+        *static_cast<const MipsRegisterInfo *>(TM.getRegisterInfo());
+
+    unsigned DstReg = I->getOperand(0).getReg();
+    unsigned SrcReg = I->getOperand(1).getReg();
+    unsigned N = I->getOperand(2).getImm();
+
+    // It should be impossible to have FGR64 on MIPS-II or MIPS32r1 (which are
+    // the cases where mfhc1 is not available). 64-bit architectures and
+    // MIPS32r2 or later can use FGR64 though.
+    assert(Subtarget.isGP64bit() || Subtarget.hasMTHC1() ||
+           !Subtarget.isFP64bit());
+
+    const TargetRegisterClass *RC =
+        FP64 ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass;
+    const TargetRegisterClass *RC2 = &Mips::GPR32RegClass;
+
+    // We re-use the same spill slot each time so that the stack frame doesn't
+    // grow too much in functions with a large number of moves.
+    int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(RC);
+    TII.storeRegToStack(MBB, I, SrcReg, I->getOperand(1).isKill(), FI, RC, &TRI,
+                        0);
+    TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &TRI, N * 4);
+    return true;
+  }
+
+  return false;
+}
+
  MipsSEFrameLowering::MipsSEFrameLowering(const MipsSubtarget &STI)
      : MipsFrameLowering(STI, STI.stackAlignment()) {}
  
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp

index d242659d0764af202483b718a87efe8aa884255b..26764611c502cc52a6cc02eff1ae7ec657615aa3 100644 (file)
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -512,6 +512,7 @@ void MipsSEInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB,
  void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB,
                                                MachineBasicBlock::iterator I,
                                                bool FP64) const {
+  const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
    unsigned DstReg = I->getOperand(0).getReg();
    unsigned SrcReg = I->getOperand(1).getReg();
    unsigned N = I->getOperand(2).getImm();
@@ -521,7 +522,15 @@ void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB,
    unsigned SubIdx = N ? Mips::sub_hi : Mips::sub_lo;
    unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx);
  
-  if (SubIdx == Mips::sub_hi && TM.getSubtarget<MipsSubtarget>().hasMTHC1()) {
+  // FPXX on MIPS-II or MIPS32r1 should have been handled with a spill/reload
+  // in MipsSEFrameLowering.cpp.
+  assert(!(Subtarget.isABI_FPXX() && !Subtarget.hasMips32r2()));
+
+  // FP64A (FP64 with nooddspreg) should have been handled with a spill/reload
+  // in MipsSEFrameLowering.cpp.
+  assert(!(Subtarget.isFP64bit() && !Subtarget.useOddSPReg()));
+
+  if (SubIdx == Mips::sub_hi && Subtarget.hasMTHC1()) {
      // FIXME: Strictly speaking MFHC1 only reads the top 32-bits however, we
      //        claim to read the whole 64-bits as part of a white lie used to
      //        temporarily work around a widespread bug in the -mfp64 support.
@@ -543,6 +552,7 @@ void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB,
  void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator I,
                                           bool FP64) const {
+  const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
    unsigned DstReg = I->getOperand(0).getReg();
    unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
    const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1);
@@ -564,10 +574,18 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB,
    // The case where dmtc1 is available doesn't need to be handled here
    // because it never creates a BuildPairF64 node.
  
+  // FPXX on MIPS-II or MIPS32r1 should have been handled with a spill/reload
+  // in MipsSEFrameLowering.cpp.
+  assert(!(Subtarget.isABI_FPXX() && !Subtarget.hasMips32r2()));
+
+  // FP64A (FP64 with nooddspreg) should have been handled with a spill/reload
+  // in MipsSEFrameLowering.cpp.
+  assert(!(Subtarget.isFP64bit() && !Subtarget.useOddSPReg()));
+
    BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_lo))
      .addReg(LoReg);
  
-  if (TM.getSubtarget<MipsSubtarget>().hasMTHC1()) {
+  if (Subtarget.hasMTHC1()) {
      // FIXME: The .addReg(DstReg) is a white lie used to temporarily work
      //        around a widespread bug in the -mfp64 support.
      //        The problem is that none of the 32-bit fpu ops mention the fact
@@ -582,7 +600,7 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB,
      BuildMI(MBB, I, dl, get(FP64 ? Mips::MTHC1_D64 : Mips::MTHC1_D32), DstReg)
          .addReg(DstReg)
          .addReg(HiReg);
-  } else if (TM.getSubtarget<MipsSubtarget>().isABI_FPXX())
+  } else if (Subtarget.isABI_FPXX())
      llvm_unreachable("BuildPairF64 not expanded in frame lowering code!");
    else
      BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_hi))
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp

index 0254d4da4d24243091c1af86bbfd9515f85b9231..902735d7810f88a4b061be37123f6d2bf2ceff24 100644 (file)
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -153,9 +153,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
                         false);
  
    if (!isABI_O32() && !useOddSPReg())
-    report_fatal_error("-mattr=+nooddspreg is not currently permitted for a "
-                       "the O32 ABI.",
-                       false);
+    report_fatal_error("-mattr=+nooddspreg requires the O32 ABI.", false);
  
    if (IsFPXX && (isABI_N32() || isABI_N64()))
      report_fatal_error("FPXX is not permitted for the N32/N64 ABI's.", false);
diff --git a/test/CodeGen/Mips/fp64a.ll b/test/CodeGen/Mips/fp64a.ll

new file mode 100644 (file)

index 0000000..5c2c873
--- /dev/null
+++ b/test/CodeGen/Mips/fp64a.ll
@@ -0,0 +1,197 @@
+; Test that the FP64A ABI performs double precision moves via a spill/reload.
+; The requirement is really that odd-numbered double precision registers do not
+; use mfc1/mtc1 to move the bottom 32-bits (because the hardware will redirect
+; this to the top 32-bits of the even register) but we have to make the decision
+; before register allocation so we do this for all double-precision values.
+
+; We don't test MIPS32r1 since support for 64-bit coprocessors (such as a 64-bit
+; FPU) on a 32-bit architecture was added in MIPS32r2.
+; FIXME: We currently don't test that attempting to use FP64 on MIPS32r1 is an
+;        error either. This is because a large number of CodeGen tests are
+;        incorrectly using this case. We should fix those test cases then add
+;        this check here.
+
+; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-NO-FP64A-BE
+; RUN: llc -march=mips -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A-BE
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-NO-FP64A-LE
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=fp64,nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=32R2-FP64A-LE
+
+; RUN: llc -march=mips64 -mcpu=mips64 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-NO-FP64A
+; RUN: not llc -march=mips64 -mcpu=mips64 -mattr=fp64,nooddspreg < %s 2>&1 | FileCheck %s -check-prefix=64-FP64A
+; RUN: llc -march=mips64el -mcpu=mips64 -mattr=fp64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-NO-FP64A
+; RUN: not llc -march=mips64el -mcpu=mips64 -mattr=fp64,nooddspreg < %s 2>&1 | FileCheck %s -check-prefix=64-FP64A
+
+; 64-FP64A: LLVM ERROR: -mattr=+nooddspreg requires the O32 ABI.
+
+declare double @dbl();
+
+define double @call1(double %d, ...) {
+  ret double %d
+
+; ALL-LABEL:            call1:
+
+; 32R2-NO-FP64A-LE-NOT:     addiu   $sp, $sp
+; 32R2-NO-FP64A-LE:         mtc1    $4, $f0
+; 32R2-NO-FP64A-LE:         mthc1   $5, $f0
+
+; 32R2-NO-FP64A-BE-NOT:     addiu   $sp, $sp
+; 32R2-NO-FP64A-BE:         mtc1    $5, $f0
+; 32R2-NO-FP64A-BE:         mthc1   $4, $f0
+
+; 32R2-FP64A-LE:            addiu   $sp, $sp, -8
+; 32R2-FP64A-LE:            sw      $4, 0($sp)
+; 32R2-FP64A-LE:            sw      $5, 4($sp)
+; 32R2-FP64A-LE:            ldc1    $f0, 0($sp)
+
+; 32R2-FP64A-BE:            addiu   $sp, $sp, -8
+; 32R2-FP64A-BE:            sw      $5, 0($sp)
+; 32R2-FP64A-BE:            sw      $4, 4($sp)
+; 32R2-FP64A-BE:            ldc1    $f0, 0($sp)
+
+; 64-NO-FP64A:              daddiu  $sp, $sp, -64
+; 64-NO-FP64A:              mov.d   $f0, $f12
+}
+
+define double @call2(i32 %i, double %d) {
+  ret double %d
+
+; ALL-LABEL:        call2:
+
+; 32R2-NO-FP64A-LE:     mtc1    $6, $f0
+; 32R2-NO-FP64A-LE:     mthc1   $7, $f0
+
+; 32R2-NO-FP64A-BE:     mtc1    $7, $f0
+; 32R2-NO-FP64A-BE:     mthc1   $6, $f0
+
+; 32R2-FP64A-LE:        addiu   $sp, $sp, -8
+; 32R2-FP64A-LE:        sw      $6, 0($sp)
+; 32R2-FP64A-LE:        sw      $7, 4($sp)
+; 32R2-FP64A-LE:        ldc1    $f0, 0($sp)
+
+; 32R2-FP64A-BE:        addiu   $sp, $sp, -8
+; 32R2-FP64A-BE:        sw      $7, 0($sp)
+; 32R2-FP64A-BE:        sw      $6, 4($sp)
+; 32R2-FP64A-BE:        ldc1    $f0, 0($sp)
+
+; 64-NO-FP64A-NOT:      daddiu  $sp, $sp
+; 64-NO-FP64A:          mov.d   $f0, $f13
+}
+
+define double @call3(float %f1, float %f2, double %d) {
+  ret double %d
+
+; ALL-LABEL:        call3:
+
+; 32R2-NO-FP64A-LE:     mtc1    $6, $f0
+; 32R2-NO-FP64A-LE:     mthc1   $7, $f0
+
+; 32R2-NO-FP64A-BE:     mtc1    $7, $f0
+; 32R2-NO-FP64A-BE:     mthc1   $6, $f0
+
+; 32R2-FP64A-LE:        addiu   $sp, $sp, -8
+; 32R2-FP64A-LE:        sw      $6, 0($sp)
+; 32R2-FP64A-LE:        sw      $7, 4($sp)
+; 32R2-FP64A-LE:        ldc1    $f0, 0($sp)
+
+; 32R2-FP64A-BE:        addiu   $sp, $sp, -8
+; 32R2-FP64A-BE:        sw      $7, 0($sp)
+; 32R2-FP64A-BE:        sw      $6, 4($sp)
+; 32R2-FP64A-BE:        ldc1    $f0, 0($sp)
+
+; 64-NO-FP64A-NOT:      daddiu  $sp, $sp
+; 64-NO-FP64A:          mov.d   $f0, $f14
+}
+
+define double @call4(float %f, double %d, ...) {
+  ret double %d
+
+; ALL-LABEL:        call4:
+
+; 32R2-NO-FP64A-LE:     mtc1    $6, $f0
+; 32R2-NO-FP64A-LE:     mthc1   $7, $f0
+
+; 32R2-NO-FP64A-BE:     mtc1    $7, $f0
+; 32R2-NO-FP64A-BE:     mthc1   $6, $f0
+
+; 32R2-FP64A-LE:        addiu   $sp, $sp, -8
+; 32R2-FP64A-LE:        sw      $6, 0($sp)
+; 32R2-FP64A-LE:        sw      $7, 4($sp)
+; 32R2-FP64A-LE:        ldc1    $f0, 0($sp)
+
+; 32R2-FP64A-BE:        addiu   $sp, $sp, -8
+; 32R2-FP64A-BE:        sw      $7, 0($sp)
+; 32R2-FP64A-BE:        sw      $6, 4($sp)
+; 32R2-FP64A-BE:        ldc1    $f0, 0($sp)
+
+; 64-NO-FP64A:          daddiu  $sp, $sp, -48
+; 64-NO-FP64A:          mov.d   $f0, $f13
+}
+
+define double @call5(double %a, double %b, ...) {
+  %1 = fsub double %a, %b
+  ret double %1
+
+; ALL-LABEL:            call5:
+
+; 32R2-NO-FP64A-LE-DAG:     mtc1    $4, $[[T0:f[0-9]+]]
+; 32R2-NO-FP64A-LE-DAG:     mthc1   $5, $[[T0:f[0-9]+]]
+; 32R2-NO-FP64A-LE-DAG:     mtc1    $6, $[[T1:f[0-9]+]]
+; 32R2-NO-FP64A-LE-DAG:     mthc1   $7, $[[T1:f[0-9]+]]
+; 32R2-NO-FP64A-LE:         sub.d   $f0, $[[T0]], $[[T1]]
+
+; 32R2-NO-FP64A-BE-DAG:     mtc1    $5, $[[T0:f[0-9]+]]
+; 32R2-NO-FP64A-BE-DAG:     mthc1   $4, $[[T0:f[0-9]+]]
+; 32R2-NO-FP64A-BE-DAG:     mtc1    $7, $[[T1:f[0-9]+]]
+; 32R2-NO-FP64A-BE-DAG:     mthc1   $6, $[[T1:f[0-9]+]]
+; 32R2-NO-FP64A-BE:         sub.d   $f0, $[[T0]], $[[T1]]
+
+; 32R2-FP64A-LE:            addiu   $sp, $sp, -8
+; 32R2-FP64A-LE:            sw      $6, 0($sp)
+; 32R2-FP64A-LE:            sw      $7, 4($sp)
+; 32R2-FP64A-LE:            ldc1    $[[T1:f[0-9]+]], 0($sp)
+; 32R2-FP64A-LE:            sw      $4, 0($sp)
+; 32R2-FP64A-LE:            sw      $5, 4($sp)
+; 32R2-FP64A-LE:            ldc1    $[[T0:f[0-9]+]], 0($sp)
+; 32R2-FP64A-LE:            sub.d   $f0, $[[T0]], $[[T1]]
+
+; 32R2-FP64A-BE:            addiu   $sp, $sp, -8
+; 32R2-FP64A-BE:            sw      $7, 0($sp)
+; 32R2-FP64A-BE:            sw      $6, 4($sp)
+; 32R2-FP64A-BE:            ldc1    $[[T1:f[0-9]+]], 0($sp)
+; 32R2-FP64A-BE:            sw      $5, 0($sp)
+; 32R2-FP64A-BE:            sw      $4, 4($sp)
+; 32R2-FP64A-BE:            ldc1    $[[T0:f[0-9]+]], 0($sp)
+; 32R2-FP64A-BE:            sub.d   $f0, $[[T0]], $[[T1]]
+
+; 64-NO-FP64A:              sub.d   $f0, $f12, $f13
+}
+
+define double @move_from(double %d) {
+  %1 = call double @dbl()
+  %2 = call double @call2(i32 0, double %1)
+  ret double %2
+
+; ALL-LABEL:        move_from:
+
+; 32R2-NO-FP64A-LE-DAG: mfc1    $6, $f0
+; 32R2-NO-FP64A-LE-DAG: mfhc1   $7, $f0
+
+; 32R2-NO-FP64A-BE-DAG: mfc1    $7, $f0
+; 32R2-NO-FP64A-BE-DAG: mfhc1   $6, $f0
+
+; 32R2-FP64A-LE:        addiu   $sp, $sp, -32
+; 32R2-FP64A-LE:        sdc1    $f0, 16($sp)
+; 32R2-FP64A-LE:        lw      $6, 16($sp)
+; FIXME: This store is redundant
+; 32R2-FP64A-LE:        sdc1    $f0, 16($sp)
+; 32R2-FP64A-LE:        lw      $7, 20($sp)
+
+; 32R2-FP64A-BE:        addiu   $sp, $sp, -32
+; 32R2-FP64A-BE:        sdc1    $f0, 16($sp)
+; 32R2-FP64A-BE:        lw      $6, 20($sp)
+; FIXME: This store is redundant
+; 32R2-FP64A-BE:        sdc1    $f0, 16($sp)
+; 32R2-FP64A-BE:        lw      $7, 16($sp)
+
+; 64-NO-FP64A:          mov.d   $f13, $f0
+}
diff --git a/test/CodeGen/Mips/fpxx.ll b/test/CodeGen/Mips/fpxx.ll

index fb75e36059743d767fa101dfd599e79ad0d54f55..7e2ed22e2d805aeb573c9840dd9e23785f247ad0 100644 (file)
--- a/test/CodeGen/Mips/fpxx.ll
+++ b/test/CodeGen/Mips/fpxx.ll
@@ -11,38 +11,39 @@
  ; RUN: not llc -march=mips64 -mcpu=mips64 -mattr=fpxx < %s 2>&1 | FileCheck %s -check-prefix=64-FPXX
  
  ; RUN-TODO: llc -march=mips64 -mcpu=mips4 -mattr=-n64,+o32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=4-O32-NOFPXX
-; RUN-TOOD: llc -march=mips64 -mcpu=mips4 -mattr=-n64,+o32 -mattr=fpxx < %s | FileCheck %s -check-prefix=ALL -check-prefix=4-O32-FPXX
+; RUN-TODO: llc -march=mips64 -mcpu=mips4 -mattr=-n64,+o32 -mattr=fpxx < %s | FileCheck %s -check-prefix=ALL -check-prefix=4-O32-FPXX
  
  ; RUN-TODO: llc -march=mips64 -mcpu=mips64 -mattr=-n64,+o32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-O32-NOFPXX
-; RUN-TOOD: llc -march=mips64 -mcpu=mips64 -mattr=-n64,+o32 -mattr=fpxx < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-O32-FPXX
+; RUN-TODO: llc -march=mips64 -mcpu=mips64 -mattr=-n64,+o32 -mattr=fpxx < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-O32-FPXX
  
+declare double @dbl();
  
-; 4-FPXX:    LLVM ERROR: FPXX is not permitted for the N32/N64 ABI's.
-; 64-FPXX:    LLVM ERROR: FPXX is not permitted for the N32/N64 ABI's.
+; 4-FPXX:  LLVM ERROR: FPXX is not permitted for the N32/N64 ABI's.
+; 64-FPXX: LLVM ERROR: FPXX is not permitted for the N32/N64 ABI's.
  
  define double @test1(double %d, ...) {
    ret double %d
  
  ; ALL-LABEL: test1:
  
-; 32-NOFPXX:    mtc1    $4, $f0
-; 32-NOFPXX:    mtc1    $5, $f1
+; 32-NOFPXX:     mtc1    $4, $f0
+; 32-NOFPXX:     mtc1    $5, $f1
  
  ; 32-FPXX:       addiu   $sp, $sp, -8
  ; 32-FPXX:       sw      $4, 0($sp)
  ; 32-FPXX:       sw      $5, 4($sp)
  ; 32-FPXX:       ldc1    $f0, 0($sp)
  
-; 32R2-NOFPXX:    mtc1    $4, $f0
-; 32R2-NOFPXX:    mthc1   $5, $f0
+; 32R2-NOFPXX:   mtc1    $4, $f0
+; 32R2-NOFPXX:   mthc1   $5, $f0
  
-; 32R2-FPXX:    mtc1    $4, $f0
-; 32R2-FPXX:    mthc1   $5, $f0
+; 32R2-FPXX:     mtc1    $4, $f0
+; 32R2-FPXX:     mthc1   $5, $f0
  
  ; floats/doubles are not passed in integer registers for n64, so dmtc1 is not used.
-; 4-NOFPXX:    mov.d   $f0, $f12
+; 4-NOFPXX:      mov.d   $f0, $f12
  
-; 64-NOFPXX:    mov.d   $f0, $f12
+; 64-NOFPXX:     mov.d   $f0, $f12
  }
  
  define double @test2(i32 %i, double %d) {
@@ -50,23 +51,23 @@ define double @test2(i32 %i, double %d) {
  
  ; ALL-LABEL: test2:
  
-; 32-NOFPXX:    mtc1    $6, $f0
-; 32-NOFPXX:    mtc1    $7, $f1
+; 32-NOFPXX:     mtc1    $6, $f0
+; 32-NOFPXX:     mtc1    $7, $f1
  
  ; 32-FPXX:       addiu   $sp, $sp, -8
  ; 32-FPXX:       sw      $6, 0($sp)
  ; 32-FPXX:       sw      $7, 4($sp)
  ; 32-FPXX:       ldc1    $f0, 0($sp)
  
-; 32R2-NOFPXX:    mtc1    $6, $f0
-; 32R2-NOFPXX:    mthc1   $7, $f0
+; 32R2-NOFPXX:   mtc1    $6, $f0
+; 32R2-NOFPXX:   mthc1   $7, $f0
  
-; 32R2-FPXX:    mtc1    $6, $f0
-; 32R2-FPXX:    mthc1   $7, $f0
+; 32R2-FPXX:     mtc1    $6, $f0
+; 32R2-FPXX:     mthc1   $7, $f0
  
-; 4-NOFPXX:    mov.d   $f0, $f13
+; 4-NOFPXX:      mov.d   $f0, $f13
  
-; 64-NOFPXX:    mov.d   $f0, $f13
+; 64-NOFPXX:     mov.d   $f0, $f13
  }
  
  define double @test3(float %f1, float %f2, double %d) {
@@ -74,23 +75,23 @@ define double @test3(float %f1, float %f2, double %d) {
  
  ; ALL-LABEL: test3:
  
-; 32-NOFPXX:    mtc1    $6, $f0
-; 32-NOFPXX:    mtc1    $7, $f1
+; 32-NOFPXX:     mtc1    $6, $f0
+; 32-NOFPXX:     mtc1    $7, $f1
  
  ; 32-FPXX:       addiu   $sp, $sp, -8
  ; 32-FPXX:       sw      $6, 0($sp)
  ; 32-FPXX:       sw      $7, 4($sp)
  ; 32-FPXX:       ldc1    $f0, 0($sp)
  
-; 32R2-NOFPXX:    mtc1    $6, $f0
-; 32R2-NOFPXX:    mthc1   $7, $f0
+; 32R2-NOFPXX:   mtc1    $6, $f0
+; 32R2-NOFPXX:   mthc1   $7, $f0
  
-; 32R2-FPXX:    mtc1    $6, $f0
-; 32R2-FPXX:    mthc1   $7, $f0
+; 32R2-FPXX:     mtc1    $6, $f0
+; 32R2-FPXX:     mthc1   $7, $f0
  
-; 4-NOFPXX:    mov.d   $f0, $f14
+; 4-NOFPXX:      mov.d   $f0, $f14
  
-; 64-NOFPXX:    mov.d   $f0, $f14
+; 64-NOFPXX:     mov.d   $f0, $f14
  }
  
  define double @test4(float %f, double %d, ...) {
@@ -98,23 +99,23 @@ define double @test4(float %f, double %d, ...) {
  
  ; ALL-LABEL: test4:
  
-; 32-NOFPXX:    mtc1    $6, $f0
-; 32-NOFPXX:    mtc1    $7, $f1
+; 32-NOFPXX:     mtc1    $6, $f0
+; 32-NOFPXX:     mtc1    $7, $f1
  
  ; 32-FPXX:       addiu   $sp, $sp, -8
  ; 32-FPXX:       sw      $6, 0($sp)
  ; 32-FPXX:       sw      $7, 4($sp)
  ; 32-FPXX:       ldc1    $f0, 0($sp)
  
-; 32R2-NOFPXX:    mtc1    $6, $f0
-; 32R2-NOFPXX:    mthc1   $7, $f0
+; 32R2-NOFPXX:   mtc1    $6, $f0
+; 32R2-NOFPXX:   mthc1   $7, $f0
  
-; 32R2-FPXX:    mtc1    $6, $f0
-; 32R2-FPXX:    mthc1   $7, $f0
+; 32R2-FPXX:     mtc1    $6, $f0
+; 32R2-FPXX:     mthc1   $7, $f0
  
-; 4-NOFPXX:    mov.d   $f0, $f13
+; 4-NOFPXX:      mov.d   $f0, $f13
  
-; 64-NOFPXX:    mov.d   $f0, $f13
+; 64-NOFPXX:     mov.d   $f0, $f13
  }
  
  define double @test5() {
@@ -122,21 +123,99 @@ define double @test5() {
  
  ; ALL-LABEL: test5:
  
-; 32-NOFPXX:    mtc1    $zero, $f0
-; 32-NOFPXX:    mtc1    $zero, $f1
+; 32-NOFPXX:     mtc1    $zero, $f0
+; 32-NOFPXX:     mtc1    $zero, $f1
  
-; 32-FPXX:    addiu   $sp, $sp, -8
-; 32-FPXX:    sw      $zero, 0($sp)
-; 32-FPXX:    sw      $zero, 4($sp)
-; 32-FPXX:    ldc1    $f0, 0($sp)
+; 32-FPXX:       addiu   $sp, $sp, -8
+; 32-FPXX:       sw      $zero, 0($sp)
+; 32-FPXX:       sw      $zero, 4($sp)
+; 32-FPXX:       ldc1    $f0, 0($sp)
+
+; 32R2-NOFPXX:   mtc1    $zero, $f0
+; 32R2-NOFPXX:   mthc1   $zero, $f0
+
+; 32R2-FPXX:     mtc1    $zero, $f0
+; 32R2-FPXX:     mthc1   $zero, $f0
+
+; 4-NOFPXX:      dmtc1 $zero, $f0
  
-; 32R2-NOFPXX:    mtc1    $zero, $f0
-; 32R2-NOFPXX:    mthc1   $zero, $f0
+; 64-NOFPXX:     dmtc1 $zero, $f0
+}
+
+define double @test6(double %a, double %b, ...) {
+  %1 = fsub double %a, %b
+  ret double %1
+
+; ALL-LABEL:     test6:
+
+; 32-NOFPXX-DAG:     mtc1    $4, $[[T0:f[0-9]+]]
+; 32-NOFPXX-DAG:     mtc1    $5, ${{f[0-9]*[13579]}}
+; 32-NOFPXX-DAG:     mtc1    $6, $[[T1:f[0-9]+]]
+; 32-NOFPXX-DAG:     mtc1    $7, ${{f[0-9]*[13579]}}
+; 32-NOFPXX:         sub.d   $f0, $[[T0]], $[[T1]]
+
+; 32-FPXX:           addiu   $sp, $sp, -8
+; 32-FPXX:           sw      $6, 0($sp)
+; 32-FPXX:           sw      $7, 4($sp)
+; 32-FPXX:           ldc1    $[[T1:f[0-9]+]], 0($sp)
+; 32-FPXX:           sw      $4, 0($sp)
+; 32-FPXX:           sw      $5, 4($sp)
+; 32-FPXX:           ldc1    $[[T0:f[0-9]+]], 0($sp)
+; 32-FPXX:           sub.d   $f0, $[[T0]], $[[T1]]
+
+; 32R2-NOFPXX-DAG:   mtc1    $4, $[[T0:f[0-9]+]]
+; 32R2-NOFPXX-DAG:   mthc1   $5, $[[T0]]
+; 32R2-NOFPXX-DAG:   mtc1    $6, $[[T1:f[0-9]+]]
+; 32R2-NOFPXX-DAG:   mthc1   $7, $[[T1]]
+; 32R2-NOFPXX:       sub.d   $f0, $[[T0]], $[[T1]]
+
+; 32R2-FPXX-DAG:     mtc1    $4, $[[T0:f[0-9]+]]
+; 32R2-FPXX-DAG:     mthc1   $5, $[[T0]]
+; 32R2-FPXX-DAG:     mtc1    $6, $[[T1:f[0-9]+]]
+; 32R2-FPXX-DAG:     mthc1   $7, $[[T1]]
+; 32R2-FPXX:         sub.d   $f0, $[[T0]], $[[T1]]
  
-; 32R2-FPXX:    mtc1    $zero, $f0
-; 32R2-FPXX:    mthc1   $zero, $f0
+; floats/doubles are not passed in integer registers for n64, so dmtc1 is not used.
+; 4-NOFPXX:          sub.d   $f0, $f12, $f13
  
-; 4-NOFPXX:    dmtc1 $zero, $f0
+; floats/doubles are not passed in integer registers for n64, so dmtc1 is not used.
+; 64-NOFPXX:         sub.d   $f0, $f12, $f13
+}
  
-; 64-NOFPXX:    dmtc1 $zero, $f0
+define double @move_from1(double %d) {
+  %1 = call double @dbl()
+  %2 = call double @test2(i32 0, double %1)
+  ret double %2
+
+; ALL-LABEL:   move_from1:
+
+; 32-NOFPXX-DAG:   mfc1    $6, $f0
+; 32-NOFPXX-DAG:   mfc1    $7, $f1
+
+; 32-FPXX:         addiu   $sp, $sp, -32
+; 32-FPXX:         sdc1    $f0, 16($sp)
+; 32-FPXX:         lw      $6, 16($sp)
+; FIXME: This store is redundant
+; 32-FPXX:         sdc1    $f0, 16($sp)
+; 32-FPXX:         lw      $7, 20($sp)
+
+; 32R2-NOFPXX-DAG: mfc1    $6, $f0
+; 32R2-NOFPXX-DAG: mfhc1   $7, $f0
+
+; 32R2-FPXX-DAG:   mfc1    $6, $f0
+; 32R2-FPXX-DAG:   mfhc1   $7, $f0
+
+; floats/doubles are not passed in integer registers for n64, so dmfc1 is not used.
+; We can't use inline assembly to force a copy either because trying to force
+; a copy to a GPR this way fails with ; "couldn't allocate input reg for
+; constraint 'r'". It therefore seems impossible to test the generation of dmfc1
+; in a simple test.
+; 4-NOFPXX:        mov.d   $f13, $f0
+
+; floats/doubles are not passed in integer registers for n64, so dmfc1 is not used.
+; We can't use inline assembly to force a copy either because trying to force
+; a copy to a GPR this way fails with ; "couldn't allocate input reg for
+; constraint 'r'". It therefore seems impossible to test the generation of dmfc1
+; in a simple test.
+; 64-NOFPXX:       mov.d   $f13, $f0
  }
author	Daniel Sanders <daniel.sanders@imgtec.com>
	Mon, 14 Jul 2014 13:08:14 +0000 (13:08 +0000)
committer	Daniel Sanders <daniel.sanders@imgtec.com>
	Mon, 14 Jul 2014 13:08:14 +0000 (13:08 +0000)
lib/Target/Mips/MipsMachineFunction.cpp		patch \| blob \| history
lib/Target/Mips/MipsMachineFunction.h		patch \| blob \| history
lib/Target/Mips/MipsSEFrameLowering.cpp		patch \| blob \| history
lib/Target/Mips/MipsSEInstrInfo.cpp		patch \| blob \| history
lib/Target/Mips/MipsSubtarget.cpp		patch \| blob \| history
test/CodeGen/Mips/fp64a.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/Mips/fpxx.ll		patch \| blob \| history