From: Juergen Ributzka Date: Fri, 29 Aug 2014 00:19:21 +0000 (+0000) Subject: [FastISel][AArch64] Don't fold instructions that are not in the same basic block. X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=cf45151b2c859e29f07ac1f77f69ac00f2ced4fc [FastISel][AArch64] Don't fold instructions that are not in the same basic block. This fix checks first if the instruction to be folded (e.g. sign-/zero-extend, or shift) is in the same machine basic block as the instruction we are folding into. Not doing so can result in incorrect code, because the value might not be live-out of the basic block, where the value is defined. This fixes rdar://problem/18169495. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216700 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index df294bdc149..179fe4dd7ff 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -134,6 +134,7 @@ private: // Utility helper routines. bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadStoreTypeLegal(Type *Ty, MVT &VT); + bool isValueAvailable(const Value *V) const; bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); bool ComputeCallAddress(const Value *V, Address &Addr); bool SimplifyAddress(Address &Addr, MVT VT); @@ -679,6 +680,17 @@ bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) { return false; } +bool AArch64FastISel::isValueAvailable(const Value *V) const { + if (!isa(V)) + return true; + + const auto *I = cast(V); + if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) + return true; + + return false; +} + bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) { unsigned ScaleFactor; switch (VT.SimpleTy) { @@ -853,7 +865,7 @@ unsigned AArch64FastISel::emitAddsSubs(bool UseAdds, MVT RetVT, std::swap(LHS, RHS); // Canonicalize shift immediate to the RHS. - if (UseAdds) + if (UseAdds && isValueAvailable(LHS)) if (const auto *SI = dyn_cast(LHS)) if (isa(SI->getOperand(1))) if (SI->getOpcode() == Instruction::Shl || @@ -883,7 +895,7 @@ unsigned AArch64FastISel::emitAddsSubs(bool UseAdds, MVT RetVT, return ResultReg; // Only extend the RHS within the instruction if there is a valid extend type. - if (ExtendType != AArch64_AM::InvalidShiftExtend) { + if (ExtendType != AArch64_AM::InvalidShiftExtend && isValueAvailable(RHS)) { if (const auto *SI = dyn_cast(RHS)) if (const auto *C = dyn_cast(SI->getOperand(1))) if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { @@ -904,26 +916,27 @@ unsigned AArch64FastISel::emitAddsSubs(bool UseAdds, MVT RetVT, } // Check if the shift can be folded into the instruction. - if (const auto *SI = dyn_cast(RHS)) { - if (const auto *C = dyn_cast(SI->getOperand(1))) { - AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; - switch (SI->getOpcode()) { - default: break; - case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; - case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; - case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; - } - uint64_t ShiftVal = C->getZExtValue(); - if (ShiftType != AArch64_AM::InvalidShiftExtend) { - unsigned RHSReg = getRegForValue(SI->getOperand(0)); - if (!RHSReg) - return 0; - bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); - return emitAddsSubs_rs(UseAdds, RetVT, LHSReg, LHSIsKill, RHSReg, - RHSIsKill, ShiftType, ShiftVal, WantResult); + if (isValueAvailable(RHS)) + if (const auto *SI = dyn_cast(RHS)) { + if (const auto *C = dyn_cast(SI->getOperand(1))) { + AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; + switch (SI->getOpcode()) { + default: break; + case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; + case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; + case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; + } + uint64_t ShiftVal = C->getZExtValue(); + if (ShiftType != AArch64_AM::InvalidShiftExtend) { + unsigned RHSReg = getRegForValue(SI->getOperand(0)); + if (!RHSReg) + return 0; + bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); + return emitAddsSubs_rs(UseAdds, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftType, ShiftVal, WantResult); + } } } - } unsigned RHSReg = getRegForValue(RHS); if (!RHSReg) @@ -3281,17 +3294,19 @@ bool AArch64FastISel::SelectShift(const Instruction *I) { uint64_t ShiftVal = C->getZExtValue(); MVT SrcVT = RetVT; bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true; - const Value * Op0 = I->getOperand(0); + const Value *Op0 = I->getOperand(0); if (const auto *ZExt = dyn_cast(Op0)) { MVT TmpVT; - if (isLoadStoreTypeLegal(ZExt->getSrcTy(), TmpVT)) { + if (isValueAvailable(ZExt) && + isLoadStoreTypeLegal(ZExt->getSrcTy(), TmpVT)) { SrcVT = TmpVT; IsZExt = true; Op0 = ZExt->getOperand(0); } } else if (const auto *SExt = dyn_cast(Op0)) { MVT TmpVT; - if (isLoadStoreTypeLegal(SExt->getSrcTy(), TmpVT)) { + if (isValueAvailable(SExt) && + isLoadStoreTypeLegal(SExt->getSrcTy(), TmpVT)) { SrcVT = TmpVT; IsZExt = false; Op0 = SExt->getOperand(0); diff --git a/test/CodeGen/AArch64/fast-isel-folding.ll b/test/CodeGen/AArch64/fast-isel-folding.ll new file mode 100644 index 00000000000..6b524ff2c09 --- /dev/null +++ b/test/CodeGen/AArch64/fast-isel-folding.ll @@ -0,0 +1,54 @@ +; RUN: llc -mtriple=aarch64-apple-darwin -O0 -fast-isel-abort -verify-machineinstrs < %s + +; Test that we don't fold the shift. +define i64 @fold_shift_test(i64 %a, i1 %c) { + %1 = sub i64 %a, 8 + %2 = ashr i64 %1, 3 + br i1 %c, label %bb1, label %bb2 +bb1: + %3 = icmp ult i64 0, %2 + br i1 %3, label %bb2, label %bb3 +bb2: + ret i64 1 +bb3: + ret i64 2 +} + +; Test that we don't fold the sign-extend. +define i64 @fold_sext_test1(i32 %a, i1 %c) { + %1 = sub i32 %a, 8 + %2 = sext i32 %1 to i64 + br i1 %c, label %bb1, label %bb2 +bb1: + %3 = icmp ult i64 0, %2 + br i1 %3, label %bb2, label %bb3 +bb2: + ret i64 1 +bb3: + ret i64 2 +} + +; Test that we don't fold the sign-extend. +define i64 @fold_sext_test2(i32 %a, i1 %c) { + %1 = sub i32 %a, 8 + %2 = sext i32 %1 to i64 + br i1 %c, label %bb1, label %bb2 +bb1: + %3 = shl i64 %2, 4 + ret i64 %3 +bb2: + ret i64 %2 +} + +; Test that we clear the kill flag. +define i32 @fold_kill_test(i32 %a) { + %1 = sub i32 %a, 8 + %2 = shl i32 %1, 3 + %3 = icmp ult i32 0, %2 + br i1 %3, label %bb1, label %bb2 +bb1: + ret i32 %2 +bb2: + %4 = add i32 %2, 4 + ret i32 %4 +}