From 5461af97bcfb6f27be51cad2118cbebb87f1c78e Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Wed, 17 Sep 2014 19:19:31 +0000 Subject: [PATCH] [FastISel][AArch64] Fold mul into the address computation of memory operations. Teach 'computeAddress' to also fold multiplies into the address computation (when possible). This fixes rdar://problem/18369443. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217977 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64FastISel.cpp | 70 +++++++++++++++++++ .../AArch64/fast-isel-addressing-modes.ll | 41 +++++++++++ 2 files changed, 111 insertions(+) diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index da69735c8f1..f9037a31b04 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -425,6 +425,19 @@ unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true); } +/// \brief Check if the multiply is by a power-of-2 constant. +static bool isMulPowOf2(const Value *I) { + if (const auto *MI = dyn_cast(I)) { + if (const auto *C = dyn_cast(MI->getOperand(0))) + if (C->getValue().isPowerOf2()) + return true; + if (const auto *C = dyn_cast(MI->getOperand(1))) + if (C->getValue().isPowerOf2()) + return true; + } + return false; +} + // Computes the address to get to an object. bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) { @@ -589,7 +602,64 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) return true; } break; + case Instruction::Mul: { + if (Addr.getOffsetReg()) + break; + + if (!isMulPowOf2(U)) + break; + + const Value *LHS = U->getOperand(0); + const Value *RHS = U->getOperand(1); + + // Canonicalize power-of-2 value to the RHS. + if (const auto *C = dyn_cast(LHS)) + if (C->getValue().isPowerOf2()) + std::swap(LHS, RHS); + + assert(isa(RHS) && "Expected an ConstantInt."); + const auto *C = cast(RHS); + unsigned Val = C->getValue().logBase2(); + if (Val < 1 || Val > 3) + break; + + uint64_t NumBytes = 0; + if (Ty && Ty->isSized()) { + uint64_t NumBits = DL.getTypeSizeInBits(Ty); + NumBytes = NumBits / 8; + if (!isPowerOf2_64(NumBits)) + NumBytes = 0; + } + + if (NumBytes != (1ULL << Val)) + break; + + Addr.setShift(Val); + Addr.setExtendType(AArch64_AM::LSL); + + if (const auto *I = dyn_cast(LHS)) + if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) + U = I; + + if (const auto *ZE = dyn_cast(U)) + if (ZE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::UXTW); + LHS = U->getOperand(0); + } + + if (const auto *SE = dyn_cast(U)) + if (SE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::SXTW); + LHS = U->getOperand(0); + } + + unsigned Reg = getRegForValue(LHS); + if (!Reg) + return false; + Addr.setOffsetReg(Reg); + return true; } + } // end switch if (Addr.getReg()) { if (!Addr.getOffsetReg()) { diff --git a/test/CodeGen/AArch64/fast-isel-addressing-modes.ll b/test/CodeGen/AArch64/fast-isel-addressing-modes.ll index e2444fb27ef..21fc6645009 100644 --- a/test/CodeGen/AArch64/fast-isel-addressing-modes.ll +++ b/test/CodeGen/AArch64/fast-isel-addressing-modes.ll @@ -339,6 +339,16 @@ define i32 @load_shift_offreg_1(i64 %a) { ret i32 %3 } +define i32 @load_mul_offreg_1(i64 %a) { +; CHECK-LABEL: load_mul_offreg_1 +; CHECK: lsl [[REG:x[0-9]+]], x0, #2 +; CHECK: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}} + %1 = mul i64 %a, 4 + %2 = inttoptr i64 %1 to i32* + %3 = load i32* %2 + ret i32 %3 +} + ; Load Base Register + Scaled Register Offset define i32 @load_breg_shift_offreg_1(i64 %a, i64 %b) { ; CHECK-LABEL: load_breg_shift_offreg_1 @@ -405,6 +415,15 @@ define i32 @load_breg_shift_offreg_5(i64 %a, i64 %b) { ret i32 %5 } +define i32 @load_breg_mul_offreg_1(i64 %a, i64 %b) { +; CHECK-LABEL: load_breg_mul_offreg_1 +; CHECK: ldr {{w[0-9]+}}, [x1, x0, lsl #2] + %1 = mul i64 %a, 4 + %2 = add i64 %1, %b + %3 = inttoptr i64 %2 to i32* + %4 = load i32* %3 + ret i32 %4 +} ; Load Base Register + Scaled Register Offset + Sign/Zero extension define i32 @load_breg_zext_shift_offreg_1(i32 %a, i64 %b) { @@ -429,6 +448,17 @@ define i32 @load_breg_zext_shift_offreg_2(i32 %a, i64 %b) { ret i32 %5 } +define i32 @load_breg_zext_mul_offreg_1(i32 %a, i64 %b) { +; CHECK-LABEL: load_breg_zext_mul_offreg_1 +; CHECK: ldr {{w[0-9]+}}, [x1, w0, uxtw #2] + %1 = zext i32 %a to i64 + %2 = mul i64 %1, 4 + %3 = add i64 %2, %b + %4 = inttoptr i64 %3 to i32* + %5 = load i32* %4 + ret i32 %5 +} + define i32 @load_breg_sext_shift_offreg_1(i32 %a, i64 %b) { ; CHECK-LABEL: load_breg_sext_shift_offreg_1 ; CHECK: ldr {{w[0-9]+}}, [x1, w0, sxtw #2] @@ -451,6 +481,17 @@ define i32 @load_breg_sext_shift_offreg_2(i32 %a, i64 %b) { ret i32 %5 } +define i32 @load_breg_sext_mul_offreg_1(i32 %a, i64 %b) { +; CHECK-LABEL: load_breg_sext_mul_offreg_1 +; CHECK: ldr {{w[0-9]+}}, [x1, w0, sxtw #2] + %1 = sext i32 %a to i64 + %2 = mul i64 %1, 4 + %3 = add i64 %2, %b + %4 = inttoptr i64 %3 to i32* + %5 = load i32* %4 + ret i32 %5 +} + ; Load Scaled Register Offset + Immediate Offset + Sign/Zero extension define i64 @load_sext_shift_offreg_imm1(i32 %a) { ; CHECK-LABEL: load_sext_shift_offreg_imm1 -- 2.34.1