From 0b18e59336dcb18e036540dd667fed8d17c3cc09 Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Tue, 17 Mar 2009 19:36:00 +0000
Subject: [PATCH] Fix codegen to compute the size of an allocation by
 multiplying the size by the array amount as an i32 value instead of promoting
 from i32 to i64 then doing the multiply.  Not doing this broke wrap-around
 assumptions that the optimizers (validly) made.  The ultimate real fix for
 this is to introduce i64 version of alloca and remove mallocinst.

This fixes PR3829


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@67093 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../SelectionDAG/SelectionDAGBuild.cpp        | 25 +++++++++++++------
 test/CodeGen/X86/x86-64-malloc.ll             | 10 ++++++++
 2 files changed, 27 insertions(+), 8 deletions(-)
 create mode 100644 test/CodeGen/X86/x86-64-malloc.ll
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
index 0dbc35b2248..855f9a443b7 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -2763,6 +2763,13 @@ void SelectionDAGLowering::visitAlloca(AllocaInst &I) {
              I.getAlignment());
 
   SDValue AllocSize = getValue(I.getArraySize());
+  
+  AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(),
+                          AllocSize,
+                          DAG.getConstant(TySize, AllocSize.getValueType()));
+  
+  
+  
   MVT IntPtr = TLI.getPointerTy();
   if (IntPtr.bitsLT(AllocSize.getValueType()))
     AllocSize = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
@@ -2771,9 +2778,6 @@ void SelectionDAGLowering::visitAlloca(AllocaInst &I) {
     AllocSize = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
                             IntPtr, AllocSize);
 
-  AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr, AllocSize,
-                          DAG.getIntPtrConstant(TySize));
-
   // Handle alignment.  If the requested alignment is less than or equal to
   // the stack alignment, ignore it.  If the size is greater than or equal to
   // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
@@ -5425,6 +5429,16 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
 void SelectionDAGLowering::visitMalloc(MallocInst &I) {
   SDValue Src = getValue(I.getOperand(0));
 
+  // Scale up by the type size in the original i32 type width.  Various
+  // mid-level optimizers may make assumptions about demanded bits etc from the
+  // i32-ness of the optimizer: we do not want to promote to i64 and then
+  // multiply on 64-bit targets.
+  // FIXME: Malloc inst should go away: PR715.
+  uint64_t ElementSize = TD->getTypePaddedSize(I.getType()->getElementType());
+  if (ElementSize != 1)
+    Src = DAG.getNode(ISD::MUL, getCurDebugLoc(), Src.getValueType(),
+                      Src, DAG.getConstant(ElementSize, Src.getValueType()));
+  
   MVT IntPtr = TLI.getPointerTy();
 
   if (IntPtr.bitsLT(Src.getValueType()))
@@ -5432,11 +5446,6 @@ void SelectionDAGLowering::visitMalloc(MallocInst &I) {
   else if (IntPtr.bitsGT(Src.getValueType()))
     Src = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), IntPtr, Src);
 
-  // Scale the source by the type size.
-  uint64_t ElementSize = TD->getTypePaddedSize(I.getType()->getElementType());
-  Src = DAG.getNode(ISD::MUL, getCurDebugLoc(), Src.getValueType(),
-                    Src, DAG.getIntPtrConstant(ElementSize));
-
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   Entry.Node = Src;
diff --git a/test/CodeGen/X86/x86-64-malloc.ll b/test/CodeGen/X86/x86-64-malloc.ll
new file mode 100644
index 00000000000..4beb5c21aca
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-malloc.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep {shll.*3, %edi}
+; PR3829
+; The generated code should multiply by 3 (sizeof i8*) as an i32,
+; not as an i64!
+
+define i8** @test(i32 %sz) {
+	%sub = add i32 %sz, 536870911		; <i32> [#uses=1]
+	%call = malloc i8*, i32 %sub		; <i8**> [#uses=1]
+	ret i8** %call
+}
-- 
2.34.1