X86: Don't emit SAHF/LAHF for 64-bit targets unless explicitly supported

[oota-llvm.git] / lib / Target / X86 / X86InstrInfo.cpp
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index aa635fc634ae923145d4ac49fcac2185a1c55316..ebe329064c50a5affe4379e4260db16b09e0803c 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -3517,23 +3517,23 @@ unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(MachineInstr *MI,
    bool IsIntrinOpcode;
    isFMA3(Opc, &IsIntrinOpcode);
  
-  unsigned GroupsNum;
+  size_t GroupsNum;
    const unsigned (*OpcodeGroups)[3];
    if (IsIntrinOpcode) {
-    GroupsNum = sizeof(IntrinOpcodeGroups) / sizeof(IntrinOpcodeGroups[0]);
+    GroupsNum = array_lengthof(IntrinOpcodeGroups);
      OpcodeGroups = IntrinOpcodeGroups;
    } else {
-    GroupsNum = sizeof(RegularOpcodeGroups) / sizeof(RegularOpcodeGroups[0]);
+    GroupsNum = array_lengthof(RegularOpcodeGroups);
      OpcodeGroups = RegularOpcodeGroups;
    }
  
    const unsigned *FoundOpcodesGroup = nullptr;
-  unsigned FormIndex;
+  size_t FormIndex;
  
    // Look for the input opcode in the corresponding opcodes table.
-  unsigned GroupIndex = 0;
-  for (; GroupIndex < GroupsNum && !FoundOpcodesGroup; GroupIndex++) {
-    for (FormIndex = 0; FormIndex < FormsNum; FormIndex++) {
+  for (size_t GroupIndex = 0; GroupIndex < GroupsNum && !FoundOpcodesGroup;
+         ++GroupIndex) {
+    for (FormIndex = 0; FormIndex < FormsNum; ++FormIndex) {
        if (OpcodeGroups[GroupIndex][FormIndex] == Opc) {
          FoundOpcodesGroup = OpcodeGroups[GroupIndex];
          break;
@@ -4385,7 +4385,32 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
    int Reg = FromEFLAGS ? DestReg : SrcReg;
    bool is32 = X86::GR32RegClass.contains(Reg);
    bool is64 = X86::GR64RegClass.contains(Reg);
+
    if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) {
+    int Mov = is64 ? X86::MOV64rr : X86::MOV32rr;
+    int Push = is64 ? X86::PUSH64r : X86::PUSH32r;
+    int PushF = is64 ? X86::PUSHF64 : X86::PUSHF32;
+    int Pop = is64 ? X86::POP64r : X86::POP32r;
+    int PopF = is64 ? X86::POPF64 : X86::POPF32;
+    int AX = is64 ? X86::RAX : X86::EAX;
+
+    if (!Subtarget.hasLAHFSAHF()) {
+      assert(is64 && "Not having LAHF/SAHF only happens on 64-bit.");
+      // Moving EFLAGS to / from another register requires a push and a pop.
+      // Notice that we have to adjust the stack if we don't want to clobber the
+      // first frame index. See X86FrameLowering.cpp - clobbersTheStack.
+      if (FromEFLAGS) {
+        BuildMI(MBB, MI, DL, get(PushF));
+        BuildMI(MBB, MI, DL, get(Pop), DestReg);
+      }
+      if (ToEFLAGS) {
+        BuildMI(MBB, MI, DL, get(Push))
+            .addReg(SrcReg, getKillRegState(KillSrc));
+        BuildMI(MBB, MI, DL, get(PopF));
+      }
+      return;
+    }
+
      // The flags need to be saved, but saving EFLAGS with PUSHF/POPF is
      // inefficient. Instead:
      //   - Save the overflow flag OF into AL using SETO, and restore it using a
@@ -4407,14 +4432,20 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
      // Notice that we have to adjust the stack if we don't want to clobber the
      // first frame index. See X86FrameLowering.cpp - clobbersTheStack.
  
-    int Mov = is64 ? X86::MOV64rr : X86::MOV32rr;
-    int Push = is64 ? X86::PUSH64r : X86::PUSH32r;
-    int Pop = is64 ? X86::POP64r : X86::POP32r;
-    int AX = is64 ? X86::RAX : X86::EAX;
  
-    bool AXDead = (Reg == AX) ||
-                  (MachineBasicBlock::LQR_Dead ==
-                   MBB.computeRegisterLiveness(&getRegisterInfo(), AX, MI));
+    bool AXDead = (Reg == AX);
+    // FIXME: The above could figure out that AX is dead in more cases with:
+    //          || (MachineBasicBlock::LQR_Dead ==
+    //            MBB.computeRegisterLiveness(&getRegisterInfo(), AX, MI));
+    //
+    //        Unfortunately this is slightly broken, see PR24535 and the likely
+    //        related PR25033 PR24991 PR24992 PR25201. These issues seem to
+    //        showcase sub-register / super-register confusion: a previous kill
+    //        of AH but no kill of AL leads computeRegisterLiveness to
+    //        erroneously conclude that AX is dead.
+    //
+    //        Once fixed, also update cmpxchg-clobber-flags.ll and
+    //        peephole-na-phys-copy-folding.ll.
  
      if (!AXDead)
        BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true));
@@ -5867,6 +5898,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
      case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int:
      case X86::MULSSrr_Int: case X86::VMULSSrr_Int:
      case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int:
+    case X86::VFMADDSSr132r_Int: case X86::VFNMADDSSr132r_Int:
+    case X86::VFMADDSSr213r_Int: case X86::VFNMADDSSr213r_Int:
+    case X86::VFMADDSSr231r_Int: case X86::VFNMADDSSr231r_Int:
+    case X86::VFMSUBSSr132r_Int: case X86::VFNMSUBSSr132r_Int:
+    case X86::VFMSUBSSr213r_Int: case X86::VFNMSUBSSr213r_Int:
+    case X86::VFMSUBSSr231r_Int: case X86::VFNMSUBSSr231r_Int:
        return false;
      default:
        return true;
@@ -5882,6 +5919,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
      case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int:
      case X86::MULSDrr_Int: case X86::VMULSDrr_Int:
      case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int:
+    case X86::VFMADDSDr132r_Int: case X86::VFNMADDSDr132r_Int:
+    case X86::VFMADDSDr213r_Int: case X86::VFNMADDSDr213r_Int:
+    case X86::VFMADDSDr231r_Int: case X86::VFNMADDSDr231r_Int:
+    case X86::VFMSUBSDr132r_Int: case X86::VFNMSUBSDr132r_Int:
+    case X86::VFMSUBSDr213r_Int: case X86::VFNMSUBSDr213r_Int:
+    case X86::VFMSUBSDr231r_Int: case X86::VFNMSUBSDr231r_Int:
        return false;
      default:
        return true;
@@ -6703,16 +6746,16 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
  // domains, but they require a bit more work than just switching opcodes.
  
  static const uint16_t *lookup(unsigned opcode, unsigned domain) {
-  for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
-    if (ReplaceableInstrs[i][domain-1] == opcode)
-      return ReplaceableInstrs[i];
+  for (const uint16_t (&Row)[3] : ReplaceableInstrs)
+    if (Row[domain-1] == opcode)
+      return Row;
    return nullptr;
  }
  
  static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
-  for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
-    if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
-      return ReplaceableInstrsAVX2[i];
+  for (const uint16_t (&Row)[3] : ReplaceableInstrsAVX2)
+    if (Row[domain-1] == opcode)
+      return Row;
    return nullptr;
  }