From 46d6fd2908f4e4d92bacc35d5e2274280bb970d4 Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Wed, 17 Sep 2014 19:51:38 +0000 Subject: [PATCH] [FastISel][AArch64] Fold mul into add/sub and logical operations. Try to fold the multiply into the add/sub or logical operations (when possible). This is related to rdar://problem/18369687. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217978 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64FastISel.cpp | 88 +++++++++++++++++----- test/CodeGen/AArch64/fast-isel-logic-op.ll | 48 ++++++++++++ 2 files changed, 117 insertions(+), 19 deletions(-) diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index f9037a31b04..f4d6665e835 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -949,8 +949,13 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, if (UseAdd && isa(LHS) && !isa(RHS)) std::swap(LHS, RHS); + // Canonicalize mul by power of 2 to the RHS. + if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) + if (isMulPowOf2(LHS)) + std::swap(LHS, RHS); + // Canonicalize shift immediate to the RHS. - if (UseAdd && isValueAvailable(LHS)) + if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) if (const auto *SI = dyn_cast(LHS)) if (isa(SI->getOperand(1))) if (SI->getOpcode() == Instruction::Shl || @@ -980,7 +985,8 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, return ResultReg; // Only extend the RHS within the instruction if there is a valid extend type. - if (ExtendType != AArch64_AM::InvalidShiftExtend && isValueAvailable(RHS)) { + if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && + isValueAvailable(RHS)) { if (const auto *SI = dyn_cast(RHS)) if (const auto *C = dyn_cast(SI->getOperand(1))) if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { @@ -1000,8 +1006,28 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, ExtendType, 0, SetFlags, WantResult); } + // Check if the mul can be folded into the instruction. + if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (isMulPowOf2(RHS)) { + const Value *MulLHS = cast(RHS)->getOperand(0); + const Value *MulRHS = cast(RHS)->getOperand(1); + + if (const auto *C = dyn_cast(MulLHS)) + if (C->getValue().isPowerOf2()) + std::swap(MulLHS, MulRHS); + + assert(isa(MulRHS) && "Expected a ConstantInt."); + uint64_t ShiftVal = cast(MulRHS)->getValue().logBase2(); + unsigned RHSReg = getRegForValue(MulLHS); + if (!RHSReg) + return 0; + bool RHSIsKill = hasTrivialKill(MulLHS); + return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, + AArch64_AM::LSL, ShiftVal, SetFlags, WantResult); + } + // Check if the shift can be folded into the instruction. - if (isValueAvailable(RHS)) + if (RHS->hasOneUse() && isValueAvailable(RHS)) if (const auto *SI = dyn_cast(RHS)) { if (const auto *C = dyn_cast(SI->getOperand(1))) { AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; @@ -1296,12 +1322,16 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, if (isa(LHS) && !isa(RHS)) std::swap(LHS, RHS); + // Canonicalize mul by power-of-2 to the RHS. + if (LHS->hasOneUse() && isValueAvailable(LHS)) + if (isMulPowOf2(LHS)) + std::swap(LHS, RHS); + // Canonicalize shift immediate to the RHS. - if (isValueAvailable(LHS)) - if (const auto *SI = dyn_cast(LHS)) + if (LHS->hasOneUse() && isValueAvailable(LHS)) + if (const auto *SI = dyn_cast(LHS)) if (isa(SI->getOperand(1))) - if (SI->getOpcode() == Instruction::Shl) - std::swap(LHS, RHS); + std::swap(LHS, RHS); unsigned LHSReg = getRegForValue(LHS); if (!LHSReg) @@ -1316,19 +1346,39 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, if (ResultReg) return ResultReg; + // Check if the mul can be folded into the instruction. + if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (isMulPowOf2(RHS)) { + const Value *MulLHS = cast(RHS)->getOperand(0); + const Value *MulRHS = cast(RHS)->getOperand(1); + + if (const auto *C = dyn_cast(MulLHS)) + if (C->getValue().isPowerOf2()) + std::swap(MulLHS, MulRHS); + + assert(isa(MulRHS) && "Expected a ConstantInt."); + uint64_t ShiftVal = cast(MulRHS)->getValue().logBase2(); + + unsigned RHSReg = getRegForValue(MulLHS); + if (!RHSReg) + return 0; + bool RHSIsKill = hasTrivialKill(MulLHS); + return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftVal); + } + // Check if the shift can be folded into the instruction. - if (isValueAvailable(RHS)) - if (const auto *SI = dyn_cast(RHS)) - if (const auto *C = dyn_cast(SI->getOperand(1))) - if (SI->getOpcode() == Instruction::Shl) { - uint64_t ShiftVal = C->getZExtValue(); - unsigned RHSReg = getRegForValue(SI->getOperand(0)); - if (!RHSReg) - return 0; - bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); - return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, - RHSIsKill, ShiftVal); - } + if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (const auto *SI = dyn_cast(RHS)) + if (const auto *C = dyn_cast(SI->getOperand(1))) { + uint64_t ShiftVal = C->getZExtValue(); + unsigned RHSReg = getRegForValue(SI->getOperand(0)); + if (!RHSReg) + return 0; + bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); + return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftVal); + } unsigned RHSReg = getRegForValue(RHS); if (!RHSReg) diff --git a/test/CodeGen/AArch64/fast-isel-logic-op.ll b/test/CodeGen/AArch64/fast-isel-logic-op.ll index 152fff808ad..2c7486e4cf8 100644 --- a/test/CodeGen/AArch64/fast-isel-logic-op.ll +++ b/test/CodeGen/AArch64/fast-isel-logic-op.ll @@ -108,6 +108,22 @@ define i64 @and_rs_i64(i64 %a, i64 %b) { ret i64 %2 } +define i32 @and_mul_i32(i32 %a, i32 %b) { +; CHECK-LABEL: and_mul_i32 +; CHECK: and w0, w0, w1, lsl #2 + %1 = mul i32 %b, 4 + %2 = and i32 %a, %1 + ret i32 %2 +} + +define i64 @and_mul_i64(i64 %a, i64 %b) { +; CHECK-LABEL: and_mul_i64 +; CHECK: and x0, x0, x1, lsl #2 + %1 = mul i64 %b, 4 + %2 = and i64 %a, %1 + ret i64 %2 +} + ; OR define zeroext i1 @or_rr_i1(i1 signext %a, i1 signext %b) { ; CHECK-LABEL: or_rr_i1 @@ -210,6 +226,22 @@ define i64 @or_rs_i64(i64 %a, i64 %b) { ret i64 %2 } +define i32 @or_mul_i32(i32 %a, i32 %b) { +; CHECK-LABEL: or_mul_i32 +; CHECK: orr w0, w0, w1, lsl #2 + %1 = mul i32 %b, 4 + %2 = or i32 %a, %1 + ret i32 %2 +} + +define i64 @or_mul_i64(i64 %a, i64 %b) { +; CHECK-LABEL: or_mul_i64 +; CHECK: orr x0, x0, x1, lsl #2 + %1 = mul i64 %b, 4 + %2 = or i64 %a, %1 + ret i64 %2 +} + ; XOR define zeroext i1 @xor_rr_i1(i1 signext %a, i1 signext %b) { ; CHECK-LABEL: xor_rr_i1 @@ -312,3 +344,19 @@ define i64 @xor_rs_i64(i64 %a, i64 %b) { ret i64 %2 } +define i32 @xor_mul_i32(i32 %a, i32 %b) { +; CHECK-LABEL: xor_mul_i32 +; CHECK: eor w0, w0, w1, lsl #2 + %1 = mul i32 %b, 4 + %2 = xor i32 %a, %1 + ret i32 %2 +} + +define i64 @xor_mul_i64(i64 %a, i64 %b) { +; CHECK-LABEL: xor_mul_i64 +; CHECK: eor x0, x0, x1, lsl #2 + %1 = mul i64 %b, 4 + %2 = xor i64 %a, %1 + ret i64 %2 +} + -- 2.34.1