Fix a number of byval / memcpy / memset related codegen issues.

author Evan Cheng <evan.cheng@apple.com>

Thu, 21 Aug 2008 21:00:15 +0000 (21:00 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Thu, 21 Aug 2008 21:00:15 +0000 (21:00 +0000)
author Evan Cheng <evan.cheng@apple.com>
Thu, 21 Aug 2008 21:00:15 +0000 (21:00 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Thu, 21 Aug 2008 21:00:15 +0000 (21:00 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index fdc461894d6a4db16dc504e510941872a811c79a..a6bffe285f7dd35d1ba2d7c22eee7b3583ab9366 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -796,8 +796,14 @@ static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) {
  /// that contain SSE vectors are placed at 16-byte boundaries while the rest
  /// are at 4-byte boundaries.
  unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
-  if (Subtarget->is64Bit())
-    return getTargetData()->getABITypeAlignment(Ty);
+  if (Subtarget->is64Bit()) {
+    // Max of 8 and alignment of type.
+    unsigned TyAlign = getTargetData()->getABITypeAlignment(Ty);
+    if (TyAlign > 8)
+      return TyAlign;
+    return 8;
+  }
+
    unsigned Align = 4;
    if (Subtarget->hasSSE1())
      getMaxByValAlign(Ty, Align);
@@ -5014,16 +5020,16 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
  
  SDValue
  X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG,
-                                           SDValue Chain,
-                                           SDValue Dst, SDValue Src,
-                                           SDValue Size, unsigned Align,
+                                        SDValue Chain,
+                                        SDValue Dst, SDValue Src,
+                                        SDValue Size, unsigned Align,
                                          const Value *DstSV, uint64_t DstSVOff) {
    ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
  
    /// If not DWORD aligned or size is more than the threshold, call the library.
    /// The libc version is likely to be faster for these cases. It can use the
    /// address value and run time information about the CPU.
-  if ((Align & 3) == 0 ||
+  if ((Align & 3) != 0 ||
        !ConstantSize ||
        ConstantSize->getValue() > getSubtarget()->getMaxInlineSizeThreshold()) {
      SDValue InFlag(0, 0);
@@ -5065,27 +5071,27 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG,
  
      // If the value is a constant, then we can potentially use larger sets.
      switch (Align & 3) {
-      case 2:   // WORD aligned
-        AVT = MVT::i16;
-        ValReg = X86::AX;
-        Val = (Val << 8) | Val;
-        break;
-      case 0:  // DWORD aligned
-        AVT = MVT::i32;
-        ValReg = X86::EAX;
-        Val = (Val << 8)  | Val;
-        Val = (Val << 16) | Val;
-        if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) {  // QWORD aligned
-          AVT = MVT::i64;
-          ValReg = X86::RAX;
-          Val = (Val << 32) | Val;
-        }
-        break;
-      default:  // Byte aligned
-        AVT = MVT::i8;
-        ValReg = X86::AL;
-        Count = DAG.getIntPtrConstant(SizeVal);
-        break;
+    case 2:   // WORD aligned
+      AVT = MVT::i16;
+      ValReg = X86::AX;
+      Val = (Val << 8) | Val;
+      break;
+    case 0:  // DWORD aligned
+      AVT = MVT::i32;
+      ValReg = X86::EAX;
+      Val = (Val << 8)  | Val;
+      Val = (Val << 16) | Val;
+      if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) {  // QWORD aligned
+        AVT = MVT::i64;
+        ValReg = X86::RAX;
+        Val = (Val << 32) | Val;
+      }
+      break;
+    default:  // Byte aligned
+      AVT = MVT::i8;
+      ValReg = X86::AL;
+      Count = DAG.getIntPtrConstant(SizeVal);
+      break;
      }
  
      if (AVT.bitsGT(MVT::i8)) {
@@ -5153,13 +5159,11 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG,
  
  SDValue
  X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG,
-                                           SDValue Chain,
-                                           SDValue Dst, SDValue Src,
-                                           SDValue Size, unsigned Align,
-                                           bool AlwaysInline,
-                                           const Value *DstSV, uint64_t DstSVOff,
-                                           const Value *SrcSV, uint64_t SrcSVOff){
-  
+                                      SDValue Chain, SDValue Dst, SDValue Src,
+                                      SDValue Size, unsigned Align,
+                                      bool AlwaysInline,
+                                      const Value *DstSV, uint64_t DstSVOff,
+                                      const Value *SrcSV, uint64_t SrcSVOff) {  
    // This requires the copy size to be a constant, preferrably
    // within a subtarget-specific limit.
    ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
@@ -5169,21 +5173,19 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG,
    if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
      return SDValue();
  
-  MVT AVT;
-  unsigned BytesLeft = 0;
-  if (Align >= 8 && Subtarget->is64Bit())
+  /// If not DWORD aligned, call the library.
+  if ((Align & 3) != 0)
+    return SDValue();
+
+  // DWORD aligned
+  MVT AVT = MVT::i32;
+  if (Subtarget->is64Bit() && ((Align & 0x7) == 0))  // QWORD aligned
      AVT = MVT::i64;
-  else if (Align >= 4)
-    AVT = MVT::i32;
-  else if (Align >= 2)
-    AVT = MVT::i16;
-  else
-    AVT = MVT::i8;
  
    unsigned UBytes = AVT.getSizeInBits() / 8;
    unsigned CountVal = SizeVal / UBytes;
    SDValue Count = DAG.getIntPtrConstant(CountVal);
-  BytesLeft = SizeVal % UBytes;
+  unsigned BytesLeft = SizeVal % UBytes;
  
    SDValue InFlag(0, 0);
    Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
diff --git a/test/CodeGen/X86/2004-02-12-Memcpy.ll b/test/CodeGen/X86/2004-02-12-Memcpy.ll

index 59364c1f6d65305806ac0c81d058d8bb710c2b64..56bb21caf3ca9889dcd4b19b43697c70ef084880 100644 (file)
--- a/test/CodeGen/X86/2004-02-12-Memcpy.ll
+++ b/test/CodeGen/X86/2004-02-12-Memcpy.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 3
+; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1
  
  @A = global [32 x i32] zeroinitializer
  @B = global [32 x i32] zeroinitializer
  
  declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
  
-define void @main() {
+define void @main() nounwind {
    ; dword copy
    call void @llvm.memcpy.i32(i8* bitcast ([32 x i32]* @A to i8*),
                             i8* bitcast ([32 x i32]* @B to i8*),
diff --git a/test/CodeGen/X86/byval3.ll b/test/CodeGen/X86/byval3.ll

index 074bab4c0a9ee408ee180f3487dc231c4175efd8..707a4c5d2785dc80b3498bd2015012d554f9fca8 100644 (file)
--- a/test/CodeGen/X86/byval3.ll
+++ b/test/CodeGen/X86/byval3.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsl | count 2
+; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
  ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
  
  %struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32,
@@ -7,7 +7,7 @@
                     i32, i32, i32, i32, i32, i32, i32, i32,
                     i32 }
  
-define void @g(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6) {
+define void @g(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6) nounwind {
  entry:
          %d = alloca %struct.s, align 16
          %tmp = getelementptr %struct.s* %d, i32 0, i32 0
diff --git a/test/CodeGen/X86/byval4.ll b/test/CodeGen/X86/byval4.ll

index d2fa9e289e79f5da6c5770b0debc9137bd240e8b..5576c361ae1638ff7dd4aeb07e5adda035fdd926 100644 (file)
--- a/test/CodeGen/X86/byval4.ll
+++ b/test/CodeGen/X86/byval4.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsw | count 2
+; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
  ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl   | count 2
  
  %struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,
@@ -13,7 +13,7 @@
  
  
  define void @g(i16 signext  %a1, i16 signext  %a2, i16 signext  %a3,
-        i16 signext  %a4, i16 signext  %a5, i16 signext  %a6) {
+        i16 signext  %a4, i16 signext  %a5, i16 signext  %a6) nounwind {
  entry:
          %a = alloca %struct.s, align 16
          %tmp = getelementptr %struct.s* %a, i32 0, i32 0
diff --git a/test/CodeGen/X86/byval5.ll b/test/CodeGen/X86/byval5.ll

index fd9c197bbfd96d47804f03995998ee616fcdde48..c6f4588dd45df3dba962a22b5c3a87041eaa1578 100644 (file)
--- a/test/CodeGen/X86/byval5.ll
+++ b/test/CodeGen/X86/byval5.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsb | count 2
+; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
  ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl   | count 2
  
  %struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8,
diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll

new file mode 100644 (file)

index 0000000..2ad665c
--- /dev/null
+++ b/test/CodeGen/X86/memset-2.ll
@@ -0,0 +1,45 @@
+; RUN: llvm-as < %s | llc -march=x86 | not grep rep
+; RUN: llvm-as < %s | llc -march=x86 | grep memset
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
+
+define fastcc i32 @cli_scanzip(i32 %desc) nounwind {
+entry:
+       br label %bb8.i.i.i.i
+
+bb8.i.i.i.i:           ; preds = %bb8.i.i.i.i, %entry
+       icmp eq i32 0, 0                ; <i1>:0 [#uses=1]
+       br i1 %0, label %bb61.i.i.i, label %bb8.i.i.i.i
+
+bb32.i.i.i:            ; preds = %bb61.i.i.i
+       ptrtoint i8* %tail.0.i.i.i to i32               ; <i32>:1 [#uses=1]
+       sub i32 0, %1           ; <i32>:2 [#uses=1]
+       icmp sgt i32 %2, 19             ; <i1>:3 [#uses=1]
+       br i1 %3, label %bb34.i.i.i, label %bb61.i.i.i
+
+bb34.i.i.i:            ; preds = %bb32.i.i.i
+       load i32* null, align 4         ; <i32>:4 [#uses=1]
+       icmp eq i32 %4, 101010256               ; <i1>:5 [#uses=1]
+       br i1 %5, label %bb8.i11.i.i.i, label %bb61.i.i.i
+
+bb8.i11.i.i.i:         ; preds = %bb8.i11.i.i.i, %bb34.i.i.i
+       icmp eq i32 0, 0                ; <i1>:6 [#uses=1]
+       br i1 %6, label %cli_dbgmsg.exit49.i, label %bb8.i11.i.i.i
+
+cli_dbgmsg.exit49.i:           ; preds = %bb8.i11.i.i.i
+       icmp eq [32768 x i8]* null, null                ; <i1>:7 [#uses=1]
+       br i1 %7, label %bb1.i28.i, label %bb8.i.i
+
+bb61.i.i.i:            ; preds = %bb61.i.i.i, %bb34.i.i.i, %bb32.i.i.i, %bb8.i.i.i.i
+       %tail.0.i.i.i = getelementptr [1024 x i8]* null, i32 0, i32 0           ; <i8*> [#uses=2]
+       load i8* %tail.0.i.i.i, align 1         ; <i8>:8 [#uses=1]
+       icmp eq i8 %8, 80               ; <i1>:9 [#uses=1]
+       br i1 %9, label %bb32.i.i.i, label %bb61.i.i.i
+
+bb1.i28.i:             ; preds = %cli_dbgmsg.exit49.i
+       call void @llvm.memset.i32( i8* null, i8 0, i32 88, i32 1 ) nounwind
+       unreachable
+
+bb8.i.i:               ; preds = %bb8.i.i, %cli_dbgmsg.exit49.i
+       br label %bb8.i.i
+}
diff --git a/test/CodeGen/X86/memset64-on-x86-32.ll b/test/CodeGen/X86/memset64-on-x86-32.ll

index 7045c0faf0b9b4269965052b5fe4be7aab32ed3f..d76d4d47924635dad3970b625708d3ea53a6df4a 100644 (file)
--- a/test/CodeGen/X86/memset64-on-x86-32.ll
+++ b/test/CodeGen/X86/memset64-on-x86-32.ll
@@ -1,12 +1,10 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep stosb
+; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep stosl
+; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movq | count 10
  
-target triple = "i386-apple-darwin9"
-        %struct.S = type { [80 x i8] }
-
-define %struct.S* @bork() {
+define void @bork() nounwind {
  entry:
-        call void @llvm.memset.i64( i8* null, i8 0, i64 80, i32 1 )
-        ret %struct.S* null
+        call void @llvm.memset.i64( i8* null, i8 0, i64 80, i32 4 )
+        ret void
  }
  
  declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind
author	Evan Cheng <evan.cheng@apple.com>
	Thu, 21 Aug 2008 21:00:15 +0000 (21:00 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Thu, 21 Aug 2008 21:00:15 +0000 (21:00 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/2004-02-12-Memcpy.ll		patch \| blob \| history
test/CodeGen/X86/byval3.ll		patch \| blob \| history
test/CodeGen/X86/byval4.ll		patch \| blob \| history
test/CodeGen/X86/byval5.ll		patch \| blob \| history
test/CodeGen/X86/memset-2.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/memset64-on-x86-32.ll		patch \| blob \| history