[X86] Fix a bug in X86's peephole optimization.

author Akira Hatanaka <ahatanaka@apple.com>

Mon, 15 Sep 2014 18:23:52 +0000 (18:23 +0000)

committer Akira Hatanaka <ahatanaka@apple.com>

Mon, 15 Sep 2014 18:23:52 +0000 (18:23 +0000)
author Akira Hatanaka <ahatanaka@apple.com>
Mon, 15 Sep 2014 18:23:52 +0000 (18:23 +0000)
committer Akira Hatanaka <ahatanaka@apple.com>
Mon, 15 Sep 2014 18:23:52 +0000 (18:23 +0000)
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index f14179603eb6ecdff6bcf57686e3bbe8d3a4bf9a..614b84c392cabbb9ef1f1b9fa511159b0ddcd21d 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -4423,6 +4423,25 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
    return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment);
  }
  
+static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
+                                  const MachineFunction &MF) {
+  unsigned Opc = LoadMI.getOpcode();
+  unsigned RegSize =
+      MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize();
+
+  if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm) && RegSize > 4)
+    // These instructions only load 32 bits, we can't fold them if the
+    // destination register is wider than 32 bits (4 bytes).
+    return true;
+
+  if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm) && RegSize > 8)
+    // These instructions only load 64 bits, we can't fold them if the
+    // destination register is wider than 64 bits (8 bytes).
+    return true;
+
+  return false;
+}
+
  MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                                    MachineInstr *MI,
                                             const SmallVectorImpl<unsigned> &Ops,
@@ -4430,8 +4449,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
    // If loading from a FrameIndex, fold directly from the FrameIndex.
    unsigned NumOps = LoadMI->getDesc().getNumOperands();
    int FrameIndex;
-  if (isLoadFromStackSlot(LoadMI, FrameIndex))
+  if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
+    if (isPartialRegisterLoad(*LoadMI, MF))
+      return nullptr;
      return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex);
+  }
  
    // Check switch flag
    if (NoFusing) return nullptr;
@@ -4542,19 +4564,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
      break;
    }
    default: {
-    if ((LoadMI->getOpcode() == X86::MOVSSrm ||
-         LoadMI->getOpcode() == X86::VMOVSSrm) &&
-        MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
-          > 4)
-      // These instructions only load 32 bits, we can't fold them if the
-      // destination register is wider than 32 bits (4 bytes).
-      return nullptr;
-    if ((LoadMI->getOpcode() == X86::MOVSDrm ||
-         LoadMI->getOpcode() == X86::VMOVSDrm) &&
-        MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
-          > 8)
-      // These instructions only load 64 bits, we can't fold them if the
-      // destination register is wider than 64 bits (8 bytes).
+    if (isPartialRegisterLoad(*LoadMI, MF))
        return nullptr;
  
      // Folding a normal load. Just copy the load's address operands.
diff --git a/test/CodeGen/X86/peephole-fold-movsd.ll b/test/CodeGen/X86/peephole-fold-movsd.ll

new file mode 100644 (file)

index 0000000..cb0dfce
--- /dev/null
+++ b/test/CodeGen/X86/peephole-fold-movsd.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+;
+; Check that x86's peephole optimization doesn't fold a 64-bit load (movsd) into
+; addpd.
+; rdar://problem/18236850
+
+%struct.S1 = type { double, double }
+
+@g = common global %struct.S1 zeroinitializer, align 8
+
+declare void @foo3(%struct.S1*)
+
+; CHECK: movsd (%rsp), [[R0:%xmm[0-9]+]]
+; CHECK: addpd [[R0]], %xmm{{[0-9]+}}
+
+define void @foo1(double %a.coerce0, double %a.coerce1, double %b.coerce0, double %b.coerce1) {
+  %1 = alloca <2 x double>, align 16
+  %tmpcast = bitcast <2 x double>* %1 to %struct.S1*
+  call void @foo3(%struct.S1* %tmpcast) #2
+  %p2 = getelementptr inbounds %struct.S1* %tmpcast, i64 0, i32 0
+  %2 = load double* %p2, align 16
+  %p3 = getelementptr inbounds %struct.S1* %tmpcast, i64 0, i32 1
+  %3 = load double* %p3, align 8
+  %4 = insertelement <2 x double> undef, double %2, i32 0
+  %5 = insertelement <2 x double> %4, double 0.000000e+00, i32 1
+  %6 = insertelement <2 x double> undef, double %3, i32 1
+  %7 = insertelement <2 x double> %6, double 1.000000e+00, i32 0
+  %8 = fadd <2 x double> %5, %7
+  store <2 x double> %8, <2 x double>* bitcast (%struct.S1* @g to <2 x double>*), align 16
+  ret void
+}
author	Akira Hatanaka <ahatanaka@apple.com>
	Mon, 15 Sep 2014 18:23:52 +0000 (18:23 +0000)
committer	Akira Hatanaka <ahatanaka@apple.com>
	Mon, 15 Sep 2014 18:23:52 +0000 (18:23 +0000)
lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
test/CodeGen/X86/peephole-fold-movsd.ll	[new file with mode: 0644]	patch \| blob