From: Evan Cheng Date: Wed, 23 Jan 2008 23:17:41 +0000 (+0000) Subject: Let each target decide byval alignment. For X86, it's 4-byte unless the aggregare... X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=29286502628867b31872ead2f2527592480f0970;p=oota-llvm.git Let each target decide byval alignment. For X86, it's 4-byte unless the aggregare contains SSE vector(s). For x86-64, it's max of 8 or alignment of the type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@46286 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index f55c2d0da2b..55147172fc4 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -405,6 +405,10 @@ public: return VT == MVT::iPTR ? PointerTy : VT; } + /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate + /// function arguments in the caller parameter area. + virtual unsigned getByValTypeAlignment(const Type *Ty) const; + /// getRegisterType - Return the type of registers that this ValueType will /// eventually require. MVT::ValueType getRegisterType(MVT::ValueType VT) const { @@ -433,7 +437,7 @@ public: } assert(0 && "Unsupported extended type!"); } - + /// hasTargetDAGCombine - If true, the target has custom DAG combine /// transformations that it can perform for the specified node. bool hasTargetDAGCombine(ISD::NodeType NT) const { diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5c1ce69eb3a..9140539e382 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -672,6 +672,46 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) allowUnalignedMemoryAccesses = true; // x86 supports it! } +/// getMaxByValAlign - Helper for getByValTypeAlignment to determine +/// the desired ByVal argument alignment. +static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) { + if (MaxAlign == 16) + return; + if (const VectorType *VTy = dyn_cast(Ty)) { + if (VTy->getBitWidth() == 128) + MaxAlign = 16; + else if (VTy->getBitWidth() == 64) + if (MaxAlign < 8) + MaxAlign = 8; + } else if (const ArrayType *ATy = dyn_cast(Ty)) { + unsigned EltAlign = 0; + getMaxByValAlign(ATy->getElementType(), EltAlign); + if (EltAlign > MaxAlign) + MaxAlign = EltAlign; + } else if (const StructType *STy = dyn_cast(Ty)) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + unsigned EltAlign = 0; + getMaxByValAlign(STy->getElementType(i), EltAlign); + if (EltAlign > MaxAlign) + MaxAlign = EltAlign; + if (MaxAlign == 16) + break; + } + } + return; +} + +/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate +/// function arguments in the caller parameter area. For X86, aggregates +/// that contains are placed at 16-byte boundaries while the rest are at +/// 4-byte boundaries. +unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { + if (Subtarget->is64Bit()) + return getTargetData()->getABITypeAlignment(Ty); + unsigned Align = 4; + getMaxByValAlign(Ty, Align); + return Align; +} /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC /// jumptable. diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index acdc579f7a4..9df3a2e9fcd 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -322,6 +322,12 @@ namespace llvm { /// getStackPtrReg - Return the stack pointer register we are using: either /// ESP or RSP. unsigned getStackPtrReg() const { return X86StackPtr; } + + /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate + /// function arguments in the caller parameter area. For X86, aggregates + /// that contains are placed at 16-byte boundaries while the rest are at + /// 4-byte boundaries. + virtual unsigned getByValTypeAlignment(const Type *Ty) const; /// LowerOperation - Provide custom lowering hooks for some operations. /// diff --git a/test/CodeGen/X86/byval4.ll b/test/CodeGen/X86/byval4.ll index 9ce635b562d..591749f768e 100644 --- a/test/CodeGen/X86/byval4.ll +++ b/test/CodeGen/X86/byval4.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsl | count 2 -; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsw | count 2 +; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsw | count 2 +; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2 %struct.s = type { i16, i16, i16, i16, i16, i16 } diff --git a/test/CodeGen/X86/byval5.ll b/test/CodeGen/X86/byval5.ll index 95bf3f4239d..4965d166666 100644 --- a/test/CodeGen/X86/byval5.ll +++ b/test/CodeGen/X86/byval5.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsl | count 2 -; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsb | count 2 +; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsb | count 2 +; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2 %struct.s = type { i8, i8, i8, i8, i8, i8 } diff --git a/test/CodeGen/X86/byval6.ll b/test/CodeGen/X86/byval6.ll new file mode 100644 index 00000000000..47269d21d93 --- /dev/null +++ b/test/CodeGen/X86/byval6.ll @@ -0,0 +1,16 @@ +; RUN: llvm-as < %s | llc -march=x86 | grep add | not grep 16 + + %struct.W = type { x86_fp80, x86_fp80 } +@B = global %struct.W { x86_fp80 0xK4001A000000000000000, x86_fp80 0xK4001C000000000000000 }, align 32 +@.cpx = internal constant %struct.W { x86_fp80 0xK4001E000000000000000, x86_fp80 0xK40028000000000000000 } + +define i32 @main() nounwind { +entry: + tail call void (i32, ...)* @bar( i32 3, %struct.W* byval @.cpx ) nounwind + tail call void (i32, ...)* @baz( i32 3, %struct.W* byval @B ) nounwind + ret i32 undef +} + +declare void @bar(i32, ...) + +declare void @baz(i32, ...) diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll new file mode 100644 index 00000000000..54210c8036d --- /dev/null +++ b/test/CodeGen/X86/byval7.ll @@ -0,0 +1,14 @@ +; RUN: llvm-as < %s | llc -march=x86 | grep add | grep 16 + + %struct.S = type { <2 x i64> } + +define i32 @main() nounwind { +entry: + %s = alloca %struct.S ; <%struct.S*> [#uses=2] + %tmp15 = getelementptr %struct.S* %s, i32 0, i32 0 ; <<2 x i64>*> [#uses=1] + store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16 + call void @t( i32 1, %struct.S* byval %s ) nounwind + ret i32 0 +} + +declare void @t(i32, %struct.S* byval )