From: Chad Rosier Date: Thu, 6 Dec 2012 01:34:31 +0000 (+0000) Subject: [arm fast-isel] Make the fast-isel implementation of memcpy respect alignment. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=c9758b13668013dea491a08b4f0c9256263927c2;p=oota-llvm.git [arm fast-isel] Make the fast-isel implementation of memcpy respect alignment. rdar://12821569 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169460 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 23d372cceae..21bb1476440 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -186,7 +186,8 @@ class ARMFastISel : public FastISel { bool ARMComputeAddress(const Value *Obj, Address &Addr); void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3); bool ARMIsMemCpySmall(uint64_t Len); - bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len); + bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, + unsigned Alignment); unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt); unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); unsigned ARMMaterializeInt(const Constant *C, EVT VT); @@ -2422,21 +2423,30 @@ bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) { } bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, - uint64_t Len) { + uint64_t Len, unsigned Alignment) { // Make sure we don't bloat code by inlining very large memcpy's. if (!ARMIsMemCpySmall(Len)) return false; - // We don't care about alignment here since we just emit integer accesses. while (Len) { MVT VT; - if (Len >= 4) - VT = MVT::i32; - else if (Len >= 2) - VT = MVT::i16; - else { - assert(Len == 1); - VT = MVT::i8; + if (!Alignment || Alignment >= 4) { + if (Len >= 4) + VT = MVT::i32; + else if (Len >= 2) + VT = MVT::i16; + else { + assert (Len == 1 && "Expected a length of 1!"); + VT = MVT::i8; + } + } else { + // Bound based on alignment. + if (Len >= 2 && Alignment == 2) + VT = MVT::i16; + else { + assert (Alignment == 1 && "Expected an alignment of 1!"); + VT = MVT::i8; + } } bool RV; @@ -2515,7 +2525,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { if (!ARMComputeAddress(MTI.getRawDest(), Dest) || !ARMComputeAddress(MTI.getRawSource(), Src)) return false; - if (ARMTryEmitSmallMemCpy(Dest, Src, Len)) + unsigned Alignment = MTI.getAlignment(); + if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment)) return true; } } diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll index b73fceff6cd..7d38cc2a7f8 100644 --- a/test/CodeGen/ARM/fast-isel-intrinsic.ll +++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll @@ -35,7 +35,7 @@ define void @t1() nounwind ssp { ; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] ; THUMB-LONG: blx r3 - call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 4, i1 false) ret void } @@ -73,7 +73,7 @@ define void @t2() nounwind ssp { ; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] ; THUMB-LONG: blx r3 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 4, i1 false) ret void } @@ -125,6 +125,7 @@ define void @t4() nounwind ssp { ; ARM: ldrh r1, [r0, #24] ; ARM: strh r1, [r0, #12] ; ARM: bx lr +; THUMB: t4 ; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr ; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr ; THUMB: ldr r0, [r0] @@ -135,8 +136,98 @@ define void @t4() nounwind ssp { ; THUMB: ldrh r1, [r0, #24] ; THUMB: strh r1, [r0, #12] ; THUMB: bx lr - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 4, i1 false) ret void } declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind + +define void @t5() nounwind ssp { +; ARM: t5 +; ARM: movw r0, :lower16:L_temp$non_lazy_ptr +; ARM: movt r0, :upper16:L_temp$non_lazy_ptr +; ARM: ldr r0, [r0] +; ARM: ldrh r1, [r0, #16] +; ARM: strh r1, [r0, #4] +; ARM: ldrh r1, [r0, #18] +; ARM: strh r1, [r0, #6] +; ARM: ldrh r1, [r0, #20] +; ARM: strh r1, [r0, #8] +; ARM: ldrh r1, [r0, #22] +; ARM: strh r1, [r0, #10] +; ARM: ldrh r1, [r0, #24] +; ARM: strh r1, [r0, #12] +; ARM: bx lr +; THUMB: t5 +; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr +; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr +; THUMB: ldr r0, [r0] +; THUMB: ldrh r1, [r0, #16] +; THUMB: strh r1, [r0, #4] +; THUMB: ldrh r1, [r0, #18] +; THUMB: strh r1, [r0, #6] +; THUMB: ldrh r1, [r0, #20] +; THUMB: strh r1, [r0, #8] +; THUMB: ldrh r1, [r0, #22] +; THUMB: strh r1, [r0, #10] +; THUMB: ldrh r1, [r0, #24] +; THUMB: strh r1, [r0, #12] +; THUMB: bx lr + call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 2, i1 false) + ret void +} + +define void @t6() nounwind ssp { +; ARM: t6 +; ARM: movw r0, :lower16:L_temp$non_lazy_ptr +; ARM: movt r0, :upper16:L_temp$non_lazy_ptr +; ARM: ldr r0, [r0] +; ARM: ldrb r1, [r0, #16] +; ARM: strb r1, [r0, #4] +; ARM: ldrb r1, [r0, #17] +; ARM: strb r1, [r0, #5] +; ARM: ldrb r1, [r0, #18] +; ARM: strb r1, [r0, #6] +; ARM: ldrb r1, [r0, #19] +; ARM: strb r1, [r0, #7] +; ARM: ldrb r1, [r0, #20] +; ARM: strb r1, [r0, #8] +; ARM: ldrb r1, [r0, #21] +; ARM: strb r1, [r0, #9] +; ARM: ldrb r1, [r0, #22] +; ARM: strb r1, [r0, #10] +; ARM: ldrb r1, [r0, #23] +; ARM: strb r1, [r0, #11] +; ARM: ldrb r1, [r0, #24] +; ARM: strb r1, [r0, #12] +; ARM: ldrb r1, [r0, #25] +; ARM: strb r1, [r0, #13] +; ARM: bx lr +; THUMB: t6 +; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr +; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr +; THUMB: ldr r0, [r0] +; THUMB: ldrb r1, [r0, #16] +; THUMB: strb r1, [r0, #4] +; THUMB: ldrb r1, [r0, #17] +; THUMB: strb r1, [r0, #5] +; THUMB: ldrb r1, [r0, #18] +; THUMB: strb r1, [r0, #6] +; THUMB: ldrb r1, [r0, #19] +; THUMB: strb r1, [r0, #7] +; THUMB: ldrb r1, [r0, #20] +; THUMB: strb r1, [r0, #8] +; THUMB: ldrb r1, [r0, #21] +; THUMB: strb r1, [r0, #9] +; THUMB: ldrb r1, [r0, #22] +; THUMB: strb r1, [r0, #10] +; THUMB: ldrb r1, [r0, #23] +; THUMB: strb r1, [r0, #11] +; THUMB: ldrb r1, [r0, #24] +; THUMB: strb r1, [r0, #12] +; THUMB: ldrb r1, [r0, #25] +; THUMB: strb r1, [r0, #13] +; THUMB: bx lr + call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false) + ret void +}