It's not legal to fold a load from a narrower stack slot into a wider instruction...

author Evan Cheng <evan.cheng@apple.com>

Fri, 11 Sep 2009 00:39:26 +0000 (00:39 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Fri, 11 Sep 2009 00:39:26 +0000 (00:39 +0000)
author Evan Cheng <evan.cheng@apple.com>
Fri, 11 Sep 2009 00:39:26 +0000 (00:39 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Fri, 11 Sep 2009 00:39:26 +0000 (00:39 +0000)
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index 1a7f06cbe3a1dec3fce76727464aca63e82a5b52..46e63b2d3bd951e332ffb4b1c8eebc17beadf092 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2162,7 +2162,7 @@ MachineInstr*
  X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                      MachineInstr *MI, unsigned i,
                                      const SmallVectorImpl<MachineOperand> &MOs,
-                                    unsigned Align) const {
+                                    unsigned Size, unsigned Align) const {
    const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
    bool isTwoAddrFold = false;
    unsigned NumOps = MI->getDesc().getNumOperands();
@@ -2202,13 +2202,28 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
      DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
        OpcodeTablePtr->find((unsigned*)MI->getOpcode());
      if (I != OpcodeTablePtr->end()) {
+      unsigned Opcode = I->second.first;
        unsigned MinAlign = I->second.second;
        if (Align < MinAlign)
          return NULL;
+      if (Size) {
+        unsigned RCSize =  MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize();
+        if (Size < RCSize) {
+          // Check if it's safe to fold the load. If the size of the object is
+          // narrower than the load width, then it's not.
+          if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
+            return NULL;
+          // If this is a 64-bit load, but the spill slot is 32, then we can do
+          // a 32-bit load which is implicitly zero-extended. This likely is due
+          // to liveintervalanalysis remat'ing a load from stack slot.
+          Opcode = X86::MOV32rm;
+        }
+      }
+
        if (isTwoAddrFold)
-        NewMI = FuseTwoAddrInst(MF, I->second.first, MOs, MI, *this);
+        NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this);
        else
-        NewMI = FuseInst(MF, I->second.first, i, MOs, MI, *this);
+        NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this);
        return NewMI;
      }
    }
@@ -2228,16 +2243,22 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
    if (NoFusing) return NULL;
  
    const MachineFrameInfo *MFI = MF.getFrameInfo();
+  unsigned Size = MFI->getObjectSize(FrameIndex);
    unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
    if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
      unsigned NewOpc = 0;
+    unsigned RCSize = 0;
      switch (MI->getOpcode()) {
      default: return NULL;
-    case X86::TEST8rr:  NewOpc = X86::CMP8ri; break;
-    case X86::TEST16rr: NewOpc = X86::CMP16ri; break;
-    case X86::TEST32rr: NewOpc = X86::CMP32ri; break;
-    case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
+    case X86::TEST8rr:  NewOpc = X86::CMP8ri; RCSize = 1; break;
+    case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break;
+    case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break;
+    case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break;
      }
+    // Check if it's safe to fold the load. If the size of the object is
+    // narrower than the load width, then it's not.
+    if (Size < RCSize)
+      return NULL;
      // Change to CMPXXri r, 0 first.
      MI->setDesc(get(NewOpc));
      MI->getOperand(1).ChangeToImmediate(0);
@@ -2246,7 +2267,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
  
    SmallVector<MachineOperand,4> MOs;
    MOs.push_back(MachineOperand::CreateFI(FrameIndex));
-  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Alignment);
+  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment);
  }
  
  MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
@@ -2318,7 +2339,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
      for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i)
        MOs.push_back(LoadMI->getOperand(i));
    }
-  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Alignment);
+  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment);
  }
  
  
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h

index fd4984448884bc67af1c1378e7c7ed0e0ef338f5..aff3603fd97efe631ee129d42a561626525ef0bc 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -603,7 +603,7 @@ private:
                                       MachineInstr* MI,
                                       unsigned OpNum,
                                       const SmallVectorImpl<MachineOperand> &MOs,
-                                     unsigned Alignment) const;
+                                     unsigned Size, unsigned Alignment) const;
  };
  
  } // End llvm namespace
diff --git a/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll b/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll

new file mode 100644 (file)

index 0000000..df52949
--- /dev/null
+++ b/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim | FileCheck %s
+
+; It's not legal to fold a load from 32-bit stack slot into a 64-bit
+; instruction. If done, the instruction does a 64-bit load and that's not
+; safe. This can happen we a subreg_to_reg 0 has been coalesced. One
+; exception is when the instruction that folds the load is a move, then we
+; can simply turn it into a 32-bit load from the stack slot.
+; rdar://7170444
+
+%struct.ComplexType = type { i32 }
+
+define i32 @t(i32 %clientPort, i32 %pluginID, i32 %requestID, i32 %objectID, i64 %serverIdentifier, i64 %argumentsData, i32 %argumentsLength) ssp {
+entry:
+; CHECK: _t:
+; CHECK: movl 16(%rbp),
+; CHECK: movl 16(%rbp),
+  %0 = zext i32 %argumentsLength to i64           ; <i64> [#uses=1]
+  %1 = zext i32 %clientPort to i64                ; <i64> [#uses=1]
+  %2 = inttoptr i64 %1 to %struct.ComplexType*    ; <%struct.ComplexType*> [#uses=1]
+  %3 = invoke i8* @pluginInstance(i8* undef, i32 %pluginID)
+          to label %invcont unwind label %lpad    ; <i8*> [#uses=1]
+
+invcont:                                          ; preds = %entry
+  %4 = add i32 %requestID, %pluginID              ; <i32> [#uses=0]
+  %5 = invoke zeroext i8 @invoke(i8* %3, i32 %objectID, i8* undef, i64 %argumentsData, i32 %argumentsLength, i64* undef, i32* undef)
+          to label %invcont1 unwind label %lpad   ; <i8> [#uses=0]
+
+invcont1:                                         ; preds = %invcont
+  %6 = getelementptr inbounds %struct.ComplexType* %2, i64 0, i32 0 ; <i32*> [#uses=1]
+  %7 = load i32* %6, align 4                      ; <i32> [#uses=1]
+  invoke void @booleanAndDataReply(i32 %7, i32 undef, i32 %requestID, i32 undef, i64 undef, i32 undef)
+          to label %invcont2 unwind label %lpad
+
+invcont2:                                         ; preds = %invcont1
+  ret i32 0
+
+lpad:                                             ; preds = %invcont1, %invcont, %entry
+  %8 = call i32 @vm_deallocate(i32 undef, i64 0, i64 %0) ; <i32> [#uses=0]
+  unreachable
+}
+
+declare i32 @vm_deallocate(i32, i64, i64)
+
+declare i8* @pluginInstance(i8*, i32)
+
+declare zeroext i8 @invoke(i8*, i32, i8*, i64, i32, i64*, i32*)
+
+declare void @booleanAndDataReply(i32, i32, i32, i32, i64, i32)
author	Evan Cheng <evan.cheng@apple.com>
	Fri, 11 Sep 2009 00:39:26 +0000 (00:39 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Fri, 11 Sep 2009 00:39:26 +0000 (00:39 +0000)
lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
lib/Target/X86/X86InstrInfo.h		patch \| blob \| history
test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll	[new file with mode: 0644]	patch \| blob