From d2b1fb27df44151e153c19055ad1bd4b415b1e9d Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Fri, 22 Feb 2008 05:18:04 +0000
Subject: [PATCH] copy mmx values from/to memory with GPRs on x86-32 instead of
 with mmx registers.  This horribleness is apparently done by gcc to avoid
 having to insert emms in places that really should have it.  This is the
 second half of rdar://5741668.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@47474 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelLowering.cpp | 26 +++++++++++++++++++++++++-
 test/CodeGen/X86/mmx-copy-gprs.ll  |  5 +++--
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 96a58c11ef7..a2e0036e38d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5897,7 +5897,31 @@ static SDOperand PerformSTORECombine(StoreSDNode *St, SelectionDAG &DAG,
                           St->isVolatile(), St->getAlignment());
     }
     
-    // TODO: 2 32-bit copies.
+    // Otherwise, lower to two 32-bit copies.
+    SDOperand LoAddr = Ld->getBasePtr();
+    SDOperand HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr,
+                                   DAG.getConstant(MVT::i32, 4));
+    
+    SDOperand LoLd = DAG.getLoad(MVT::i32, Ld->getChain(), LoAddr,
+                                 Ld->getSrcValue(), Ld->getSrcValueOffset(),
+                                 Ld->isVolatile(), Ld->getAlignment());
+    SDOperand HiLd = DAG.getLoad(MVT::i32, Ld->getChain(), HiAddr,
+                                 Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
+                                 Ld->isVolatile(), 
+                                 MinAlign(Ld->getAlignment(), 4));
+    
+    LoAddr = St->getBasePtr();
+    HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr,
+                         DAG.getConstant(MVT::i32, 4));
+    
+    SDOperand LoSt = DAG.getStore(LoLd.getValue(1), LoLd, LoAddr,
+                        St->getSrcValue(), St->getSrcValueOffset(),
+                        St->isVolatile(), St->getAlignment());
+    SDOperand HiSt = DAG.getStore(HiLd.getValue(1), HiLd, HiAddr,
+                                  St->getSrcValue(), St->getSrcValueOffset()+4,
+                                  St->isVolatile(), 
+                                  MinAlign(St->getAlignment(), 4));
+    return DAG.getNode(ISD::TokenFactor, MVT::Other, LoSt, HiSt);
   }
   return SDOperand();
 }
diff --git a/test/CodeGen/X86/mmx-copy-gprs.ll b/test/CodeGen/X86/mmx-copy-gprs.ll
index 8cf36e05a83..da17a04a466 100644
--- a/test/CodeGen/X86/mmx-copy-gprs.ll
+++ b/test/CodeGen/X86/mmx-copy-gprs.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq.*(%rsi), %rax}
+; RUN: llvm-as < %s | llc -march=x86 | grep {movl.*4(%eax),}
 
 ; This test should use GPRs to copy the mmx value, not MMX regs.  Using mmx regs,
 ; increases the places that need to use emms.
@@ -6,9 +7,9 @@
 ; rdar://5741668
 target triple = "x86_64-apple-darwin8"
 
-define i32 @foo(<1 x i64>* %x, <1 x i64>* %y) nounwind  {
+define void @foo(<1 x i64>* %x, <1 x i64>* %y) nounwind  {
 entry:
 	%tmp1 = load <1 x i64>* %y, align 8		; <<1 x i64>> [#uses=1]
 	store <1 x i64> %tmp1, <1 x i64>* %x, align 8
-	ret i32 undef
+	ret void
 }
-- 
2.34.1