From d5561bb1f0fe5c67af59d79a6961ebf4b3c7b953 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 3 Apr 2014 15:10:35 +0000 Subject: [PATCH] ARM: tell LLVM about zext properties of ldrexb/ldrexh Implementing this via ComputeMaskedBits has two advantages: + It actually works. DAGISel doesn't deal with the chains properly in the previous pattern-based solution, so they never trigger. + The information can be used in other DAG combines, as well as the trivial "get rid of truncs". For example if the trunc is in a different basic block. rdar://problem/16227836 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205540 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 14 ++++++++++++++ lib/Target/ARM/ARMInstrInfo.td | 8 -------- test/CodeGen/ARM/atomic-ops-v8.ll | 22 ++++++++++------------ test/CodeGen/ARM/ldaex-stlex.ll | 12 ++++++++---- test/CodeGen/ARM/ldstrex.ll | 28 ++++++++++++++++++++++++---- 5 files changed, 56 insertions(+), 28 deletions(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 310d845db11..2ebad8e0e43 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -9964,6 +9964,20 @@ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, KnownOne &= KnownOneRHS; return; } + case ISD::INTRINSIC_W_CHAIN: { + ConstantSDNode *CN = cast(Op->getOperand(1)); + Intrinsic::ID IntID = static_cast(CN->getZExtValue()); + switch (IntID) { + default: return; + case Intrinsic::arm_ldaex: + case Intrinsic::arm_ldrex: { + EVT VT = cast(Op)->getMemoryVT(); + unsigned MemBits = VT.getScalarType().getSizeInBits(); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + return; + } + } + } } } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 1f09c9f07d2..75a109e2c3c 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -4476,19 +4476,11 @@ def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", let Inst{31-0} = 0b11110101011111111111000000011111; } -def : ARMPat<(and (ldrex_1 addr_offset_none:$addr), 0xff), - (LDREXB addr_offset_none:$addr)>; -def : ARMPat<(and (ldrex_2 addr_offset_none:$addr), 0xffff), - (LDREXH addr_offset_none:$addr)>; def : ARMPat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), (STREXB GPR:$Rt, addr_offset_none:$addr)>; def : ARMPat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), (STREXH GPR:$Rt, addr_offset_none:$addr)>; -def : ARMPat<(and (ldaex_1 addr_offset_none:$addr), 0xff), - (LDAEXB addr_offset_none:$addr)>; -def : ARMPat<(and (ldaex_2 addr_offset_none:$addr), 0xffff), - (LDAEXH addr_offset_none:$addr)>; def : ARMPat<(stlex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), (STLEXB GPR:$Rt, addr_offset_none:$addr)>; def : ARMPat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll index 00f9006c155..7922e222030 100644 --- a/test/CodeGen/ARM/atomic-ops-v8.ll +++ b/test/CodeGen/ARM/atomic-ops-v8.ll @@ -805,8 +805,8 @@ define i8 @test_atomic_load_umin_i8(i8 zeroext %offset) nounwind { ; CHECK: ldrexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: uxtb r[[OLDX]], r[[OLD]] -; CHECK-NEXT: cmp r[[OLDX]], r0 +; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 +; CHECK-NEXT: cmp r[[OLD]], r0 ; Thumb mode: it ls ; CHECK: movls r[[NEW]], r[[OLD]] ; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]] @@ -831,8 +831,8 @@ define i16 @test_atomic_load_umin_i16(i16 zeroext %offset) nounwind { ; CHECK: ldaexh r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: uxth r[[OLDX]], r[[OLD]] -; CHECK-NEXT: cmp r[[OLDX]], r0 +; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 +; CHECK-NEXT: cmp r[[OLD]], r0 ; Thumb mode: it ls ; CHECK: movls r[[NEW]], r[[OLD]] ; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]] @@ -919,8 +919,8 @@ define i8 @test_atomic_load_umax_i8(i8 zeroext %offset) nounwind { ; CHECK: ldaexb r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: uxtb r[[OLDX:[0-9]+]], r[[OLD]] -; CHECK-NEXT: cmp r[[OLDX]], r0 +; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 +; CHECK-NEXT: cmp r[[OLD]], r0 ; Thumb mode: it hi ; CHECK: movhi r[[NEW]], r[[OLD]] ; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]] @@ -945,8 +945,8 @@ define i16 @test_atomic_load_umax_i16(i16 zeroext %offset) nounwind { ; CHECK: ldrexh r[[OLD:[0-9]+]], {{.*}}[[ADDR]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: uxth r[[OLDX:[0-9]+]], r[[OLD]] -; CHECK-NEXT: cmp r[[OLDX]], r0 +; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 +; CHECK-NEXT: cmp r[[OLD]], r0 ; Thumb mode: it hi ; CHECK: movhi r[[NEW]], r[[OLD]] ; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], {{.*}}[[ADDR]] @@ -1033,8 +1033,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 zeroext %wanted, i8 zeroext %new) nounwind ; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: uxtb r[[OLDX:[0-9]+]], r[[OLD]] -; CHECK-NEXT: cmp r[[OLDX]], r0 +; CHECK-NEXT: cmp r[[OLD]], r0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 ; CHECK-NEXT: BB#2: ; As above, r1 is a reasonable guess. @@ -1060,8 +1059,7 @@ define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounw ; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] ; r0 below is a reasonable guess but could change: it certainly comes into the ; function there. -; CHECK-NEXT: uxth r[[OLDX:[0-9]+]], r[[OLD]] -; CHECK-NEXT: cmp r[[OLDX]], r0 +; CHECK-NEXT: cmp r[[OLD]], r0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 ; CHECK-NEXT: BB#2: ; As above, r1 is a reasonable guess. diff --git a/test/CodeGen/ARM/ldaex-stlex.ll b/test/CodeGen/ARM/ldaex-stlex.ll index f309f77fd5f..bfdfea33156 100644 --- a/test/CodeGen/ARM/ldaex-stlex.ll +++ b/test/CodeGen/ARM/ldaex-stlex.ll @@ -34,17 +34,21 @@ declare i32 @llvm.arm.stlexd(i32, i32, i8*) nounwind ; CHECK-LABEL: test_load_i8: ; CHECK: ldaexb r0, [r0] ; CHECK-NOT: uxtb -define i32 @test_load_i8(i8* %addr) { +; CHECK-NOT: and +define zeroext i8 @test_load_i8(i8* %addr) { %val = call i32 @llvm.arm.ldaex.p0i8(i8* %addr) - ret i32 %val + %val8 = trunc i32 %val to i8 + ret i8 %val8 } ; CHECK-LABEL: test_load_i16: ; CHECK: ldaexh r0, [r0] ; CHECK-NOT: uxth -define i32 @test_load_i16(i16* %addr) { +; CHECK-NOT: and +define zeroext i16 @test_load_i16(i16* %addr) { %val = call i32 @llvm.arm.ldaex.p0i16(i16* %addr) - ret i32 %val + %val16 = trunc i32 %val to i16 + ret i16 %val16 } ; CHECK-LABEL: test_load_i32: diff --git a/test/CodeGen/ARM/ldstrex.ll b/test/CodeGen/ARM/ldstrex.ll index 5eaae53da99..a40e255e83e 100644 --- a/test/CodeGen/ARM/ldstrex.ll +++ b/test/CodeGen/ARM/ldstrex.ll @@ -36,17 +36,21 @@ declare i32 @llvm.arm.strexd(i32, i32, i8*) nounwind ; CHECK-LABEL: test_load_i8: ; CHECK: ldrexb r0, [r0] ; CHECK-NOT: uxtb -define i32 @test_load_i8(i8* %addr) { +; CHECK-NOT: and +define zeroext i8 @test_load_i8(i8* %addr) { %val = call i32 @llvm.arm.ldrex.p0i8(i8* %addr) - ret i32 %val + %val8 = trunc i32 %val to i8 + ret i8 %val8 } ; CHECK-LABEL: test_load_i16: ; CHECK: ldrexh r0, [r0] ; CHECK-NOT: uxth -define i32 @test_load_i16(i16* %addr) { +; CHECK-NOT: and +define zeroext i16 @test_load_i16(i16* %addr) { %val = call i32 @llvm.arm.ldrex.p0i16(i16* %addr) - ret i32 %val + %val16 = trunc i32 %val to i16 + ret i16 %val16 } ; CHECK-LABEL: test_load_i32: @@ -137,3 +141,19 @@ define void @excl_addrmode() { ret void } + +; LLVM should know, even across basic blocks, that ldrex is setting the high +; bits of its i32 to 0. There should be no zero-extend operation. +define zeroext i8 @test_cross_block_zext_i8(i1 %tst, i8* %addr) { +; CHECK: test_cross_block_zext_i8: +; CHECK-NOT: uxtb +; CHECK-NOT: and +; CHECK: bx lr + %val = call i32 @llvm.arm.ldrex.p0i8(i8* %addr) + br i1 %tst, label %end, label %mid +mid: + ret i8 42 +end: + %val8 = trunc i32 %val to i8 + ret i8 %val8 +} -- 2.34.1