X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FAArch64%2FAArch64FastISel.cpp;h=cb0fa958a5a9f84ce4eac546ef95850643fe768c;hb=4b6f00ad1816ec3dd15fc83090e970e83728dc22;hp=619dfd66d1c44a0188566626cca081ce3bf12c7b;hpb=488f228a4f240bfa1153f9cc96dab3d7861d02b2;p=oota-llvm.git diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 619dfd66d1c..cb0fa958a5a 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -133,6 +133,7 @@ private: bool selectShift(const Instruction *I); bool selectBitCast(const Instruction *I); bool selectFRem(const Instruction *I); + bool selectSDiv(const Instruction *I); // Utility helper routines. bool isTypeLegal(Type *Ty, MVT &VT); @@ -425,6 +426,19 @@ unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true); } +/// \brief Check if the multiply is by a power-of-2 constant. +static bool isMulPowOf2(const Value *I) { + if (const auto *MI = dyn_cast(I)) { + if (const auto *C = dyn_cast(MI->getOperand(0))) + if (C->getValue().isPowerOf2()) + return true; + if (const auto *C = dyn_cast(MI->getOperand(1))) + if (C->getValue().isPowerOf2()) + return true; + } + return false; +} + // Computes the address to get to an object. bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) { @@ -582,6 +596,29 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) if (SE->getOperand(0)->getType()->isIntegerTy(32)) Addr.setExtendType(AArch64_AM::SXTW); + if (const auto *AI = dyn_cast(U)) + if (AI->getOpcode() == Instruction::And) { + const Value *LHS = AI->getOperand(0); + const Value *RHS = AI->getOperand(1); + + if (const auto *C = dyn_cast(LHS)) + if (C->getValue() == 0xffffffff) + std::swap(LHS, RHS); + + if (const auto *C = cast(RHS)) + if (C->getValue() == 0xffffffff) { + Addr.setExtendType(AArch64_AM::UXTW); + unsigned Reg = getRegForValue(LHS); + if (!Reg) + return false; + bool RegIsKill = hasTrivialKill(LHS); + Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, + AArch64::sub_32); + Addr.setOffsetReg(Reg); + return true; + } + } + unsigned Reg = getRegForValue(U->getOperand(0)); if (!Reg) return false; @@ -589,7 +626,95 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) return true; } break; + case Instruction::Mul: { + if (Addr.getOffsetReg()) + break; + + if (!isMulPowOf2(U)) + break; + + const Value *LHS = U->getOperand(0); + const Value *RHS = U->getOperand(1); + + // Canonicalize power-of-2 value to the RHS. + if (const auto *C = dyn_cast(LHS)) + if (C->getValue().isPowerOf2()) + std::swap(LHS, RHS); + + assert(isa(RHS) && "Expected an ConstantInt."); + const auto *C = cast(RHS); + unsigned Val = C->getValue().logBase2(); + if (Val < 1 || Val > 3) + break; + + uint64_t NumBytes = 0; + if (Ty && Ty->isSized()) { + uint64_t NumBits = DL.getTypeSizeInBits(Ty); + NumBytes = NumBits / 8; + if (!isPowerOf2_64(NumBits)) + NumBytes = 0; + } + + if (NumBytes != (1ULL << Val)) + break; + + Addr.setShift(Val); + Addr.setExtendType(AArch64_AM::LSL); + + if (const auto *I = dyn_cast(LHS)) + if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) + U = I; + + if (const auto *ZE = dyn_cast(U)) + if (ZE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::UXTW); + LHS = U->getOperand(0); + } + + if (const auto *SE = dyn_cast(U)) + if (SE->getOperand(0)->getType()->isIntegerTy(32)) { + Addr.setExtendType(AArch64_AM::SXTW); + LHS = U->getOperand(0); + } + + unsigned Reg = getRegForValue(LHS); + if (!Reg) + return false; + Addr.setOffsetReg(Reg); + return true; } + case Instruction::And: { + if (Addr.getOffsetReg()) + break; + + if (DL.getTypeSizeInBits(Ty) != 8) + break; + + const Value *LHS = U->getOperand(0); + const Value *RHS = U->getOperand(1); + + if (const auto *C = dyn_cast(LHS)) + if (C->getValue() == 0xffffffff) + std::swap(LHS, RHS); + + if (const auto *C = cast(RHS)) + if (C->getValue() == 0xffffffff) { + Addr.setShift(0); + Addr.setExtendType(AArch64_AM::LSL); + Addr.setExtendType(AArch64_AM::UXTW); + + unsigned Reg = getRegForValue(LHS); + if (!Reg) + return false; + bool RegIsKill = hasTrivialKill(LHS); + Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, + AArch64::sub_32); + Addr.setOffsetReg(Reg); + return true; + } + break; + } + } // end switch if (Addr.getReg()) { if (!Addr.getOffsetReg()) { @@ -792,11 +917,21 @@ bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { // Since the offset is too large for the load/store instruction get the // reg+offset into a register. if (ImmediateOffsetNeedsLowering) { - unsigned ResultReg = 0; - if (Addr.getReg()) - ResultReg = fastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), - /*IsKill=*/false, Offset, MVT::i64); - else + unsigned ResultReg; + if (Addr.getReg()) { + // Try to fold the immediate into the add instruction. + if (Offset < 0) + ResultReg = emitAddSub_ri(/*UseAdd=*/false, MVT::i64, Addr.getReg(), + /*IsKill=*/false, -Offset); + else + ResultReg = emitAddSub_ri(/*UseAdd=*/true, MVT::i64, Addr.getReg(), + /*IsKill=*/false, Offset); + if (!ResultReg) { + unsigned ImmReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); + ResultReg = emitAddSub_rr(/*UseAdd=*/true, MVT::i64, Addr.getReg(), + /*IsKill=*/false, ImmReg, /*IsKill=*/true); + } + } else ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); if (!ResultReg) @@ -879,8 +1014,13 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, if (UseAdd && isa(LHS) && !isa(RHS)) std::swap(LHS, RHS); + // Canonicalize mul by power of 2 to the RHS. + if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) + if (isMulPowOf2(LHS)) + std::swap(LHS, RHS); + // Canonicalize shift immediate to the RHS. - if (UseAdd && isValueAvailable(LHS)) + if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) if (const auto *SI = dyn_cast(LHS)) if (isa(SI->getOperand(1))) if (SI->getOpcode() == Instruction::Shl || @@ -910,7 +1050,8 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, return ResultReg; // Only extend the RHS within the instruction if there is a valid extend type. - if (ExtendType != AArch64_AM::InvalidShiftExtend && isValueAvailable(RHS)) { + if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && + isValueAvailable(RHS)) { if (const auto *SI = dyn_cast(RHS)) if (const auto *C = dyn_cast(SI->getOperand(1))) if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { @@ -930,8 +1071,28 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, ExtendType, 0, SetFlags, WantResult); } + // Check if the mul can be folded into the instruction. + if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (isMulPowOf2(RHS)) { + const Value *MulLHS = cast(RHS)->getOperand(0); + const Value *MulRHS = cast(RHS)->getOperand(1); + + if (const auto *C = dyn_cast(MulLHS)) + if (C->getValue().isPowerOf2()) + std::swap(MulLHS, MulRHS); + + assert(isa(MulRHS) && "Expected a ConstantInt."); + uint64_t ShiftVal = cast(MulRHS)->getValue().logBase2(); + unsigned RHSReg = getRegForValue(MulLHS); + if (!RHSReg) + return 0; + bool RHSIsKill = hasTrivialKill(MulLHS); + return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, + AArch64_AM::LSL, ShiftVal, SetFlags, WantResult); + } + // Check if the shift can be folded into the instruction. - if (isValueAvailable(RHS)) + if (RHS->hasOneUse() && isValueAvailable(RHS)) if (const auto *SI = dyn_cast(RHS)) { if (const auto *C = dyn_cast(SI->getOperand(1))) { AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; @@ -1226,12 +1387,16 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, if (isa(LHS) && !isa(RHS)) std::swap(LHS, RHS); + // Canonicalize mul by power-of-2 to the RHS. + if (LHS->hasOneUse() && isValueAvailable(LHS)) + if (isMulPowOf2(LHS)) + std::swap(LHS, RHS); + // Canonicalize shift immediate to the RHS. - if (isValueAvailable(LHS)) - if (const auto *SI = dyn_cast(LHS)) + if (LHS->hasOneUse() && isValueAvailable(LHS)) + if (const auto *SI = dyn_cast(LHS)) if (isa(SI->getOperand(1))) - if (SI->getOpcode() == Instruction::Shl) - std::swap(LHS, RHS); + std::swap(LHS, RHS); unsigned LHSReg = getRegForValue(LHS); if (!LHSReg) @@ -1246,19 +1411,39 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, if (ResultReg) return ResultReg; + // Check if the mul can be folded into the instruction. + if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (isMulPowOf2(RHS)) { + const Value *MulLHS = cast(RHS)->getOperand(0); + const Value *MulRHS = cast(RHS)->getOperand(1); + + if (const auto *C = dyn_cast(MulLHS)) + if (C->getValue().isPowerOf2()) + std::swap(MulLHS, MulRHS); + + assert(isa(MulRHS) && "Expected a ConstantInt."); + uint64_t ShiftVal = cast(MulRHS)->getValue().logBase2(); + + unsigned RHSReg = getRegForValue(MulLHS); + if (!RHSReg) + return 0; + bool RHSIsKill = hasTrivialKill(MulLHS); + return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftVal); + } + // Check if the shift can be folded into the instruction. - if (isValueAvailable(RHS)) - if (const auto *SI = dyn_cast(RHS)) - if (const auto *C = dyn_cast(SI->getOperand(1))) - if (SI->getOpcode() == Instruction::Shl) { - uint64_t ShiftVal = C->getZExtValue(); - unsigned RHSReg = getRegForValue(SI->getOperand(0)); - if (!RHSReg) - return 0; - bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); - return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, - RHSIsKill, ShiftVal); - } + if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (const auto *SI = dyn_cast(RHS)) + if (const auto *C = dyn_cast(SI->getOperand(1))) { + uint64_t ShiftVal = C->getZExtValue(); + unsigned RHSReg = getRegForValue(SI->getOperand(0)); + if (!RHSReg) + return 0; + bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); + return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftVal); + } unsigned RHSReg = getRegForValue(RHS); if (!RHSReg) @@ -1673,6 +1858,32 @@ static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { } } +/// \brief Check if the comparison against zero and the following branch can be +/// folded into a single instruction (CBZ or CBNZ). +static bool canFoldZeroCheckIntoBranch(const CmpInst *CI) { + CmpInst::Predicate Predicate = CI->getPredicate(); + if ((Predicate != CmpInst::ICMP_EQ) && (Predicate != CmpInst::ICMP_NE)) + return false; + + Type *Ty = CI->getOperand(0)->getType(); + if (!Ty->isIntegerTy()) + return false; + + unsigned BW = cast(Ty)->getBitWidth(); + if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64) + return false; + + if (const auto *C = dyn_cast(CI->getOperand(0))) + if (C->isNullValue()) + return true; + + if (const auto *C = dyn_cast(CI->getOperand(1))) + if (C->isNullValue()) + return true; + + return false; +} + bool AArch64FastISel::selectBranch(const Instruction *I) { const BranchInst *BI = cast(I); if (BI->isUnconditional()) { @@ -1686,16 +1897,117 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { AArch64CC::CondCode CC = AArch64CC::NE; if (const CmpInst *CI = dyn_cast(BI->getCondition())) { - if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { - // We may not handle every CC for now. - CC = getCompareCC(CI->getPredicate()); - if (CC == AArch64CC::AL) - return false; + if (CI->hasOneUse() && isValueAvailable(CI)) { + // Try to optimize or fold the cmp. + CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); + switch (Predicate) { + default: + break; + case CmpInst::FCMP_FALSE: + fastEmitBranch(FBB, DbgLoc); + return true; + case CmpInst::FCMP_TRUE: + fastEmitBranch(TBB, DbgLoc); + return true; + } + + // Try to take advantage of fallthrough opportunities. + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + Predicate = CmpInst::getInversePredicate(Predicate); + } + + // Try to optimize comparisons against zero. + if (canFoldZeroCheckIntoBranch(CI)) { + const Value *LHS = CI->getOperand(0); + const Value *RHS = CI->getOperand(1); + + // Canonicalize zero values to the RHS. + if (const auto *C = dyn_cast(LHS)) + if (C->isNullValue()) + std::swap(LHS, RHS); + + int TestBit = -1; + if (const auto *AI = dyn_cast(LHS)) + if (AI->getOpcode() == Instruction::And) { + const Value *AndLHS = AI->getOperand(0); + const Value *AndRHS = AI->getOperand(1); + + if (const auto *C = dyn_cast(AndLHS)) + if (C->getValue().isPowerOf2()) + std::swap(AndLHS, AndRHS); + + if (const auto *C = dyn_cast(AndRHS)) + if (C->getValue().isPowerOf2()) { + TestBit = C->getValue().logBase2(); + LHS = AndLHS; + } + } + + static const unsigned OpcTable[2][2][2] = { + { {AArch64::CBZW, AArch64::CBZX }, + {AArch64::CBNZW, AArch64::CBNZX} }, + { {AArch64::TBZW, AArch64::TBZX }, + {AArch64::TBNZW, AArch64::TBNZX} } + }; + bool IsBitTest = TestBit != -1; + bool IsCmpNE = Predicate == CmpInst::ICMP_NE; + bool Is64Bit = LHS->getType()->isIntegerTy(64); + unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; + + unsigned SrcReg = getRegForValue(LHS); + if (!SrcReg) + return false; + bool SrcIsKill = hasTrivialKill(LHS); + + // Emit the combined compare and branch instruction. + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addReg(SrcReg, getKillRegState(SrcIsKill)); + if (IsBitTest) + MIB.addImm(TestBit); + MIB.addMBB(TBB); + + // Obtain the branch weight and add the TrueBB to the successor list. + uint32_t BranchWeight = 0; + if (FuncInfo.BPI) + BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), + TBB->getBasicBlock()); + FuncInfo.MBB->addSuccessor(TBB, BranchWeight); + + fastEmitBranch(FBB, DbgLoc); + return true; + } // Emit the cmp. if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) return false; + // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch + // instruction. + CC = getCompareCC(Predicate); + AArch64CC::CondCode ExtraCC = AArch64CC::AL; + switch (Predicate) { + default: + break; + case CmpInst::FCMP_UEQ: + ExtraCC = AArch64CC::EQ; + CC = AArch64CC::VS; + break; + case CmpInst::FCMP_ONE: + ExtraCC = AArch64CC::MI; + CC = AArch64CC::GT; + break; + } + assert((CC != AArch64CC::AL) && "Unexpected condition code."); + + // Emit the extra branch for FCMP_UEQ and FCMP_ONE. + if (ExtraCC != AArch64CC::AL) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) + .addImm(ExtraCC) + .addMBB(TBB); + } + // Emit the branch. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) .addImm(CC) @@ -1713,8 +2025,8 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { } } else if (TruncInst *TI = dyn_cast(BI->getCondition())) { MVT SrcVT; - if (TI->hasOneUse() && TI->getParent() == I->getParent() && - (isTypeSupported(TI->getOperand(0)->getType(), SrcVT))) { + if (TI->hasOneUse() && isValueAvailable(TI) && + isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) { unsigned CondReg = getRegForValue(TI->getOperand(0)); if (!CondReg) return false; @@ -1749,8 +2061,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { fastEmitBranch(FBB, DbgLoc); return true; } - } else if (const ConstantInt *CI = - dyn_cast(BI->getCondition())) { + } else if (const auto *CI = dyn_cast(BI->getCondition())) { uint64_t Imm = CI->getZExtValue(); MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) @@ -2096,8 +2407,7 @@ bool AArch64FastISel::fastLowerArguments() { if (CC != CallingConv::C) return false; - // Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and - // FPR each. + // Only handle simple cases of up to 8 GPR and FPR each. unsigned GPRCnt = 0; unsigned FPRCnt = 0; unsigned Idx = 0; @@ -2111,32 +2421,34 @@ bool AArch64FastISel::fastLowerArguments() { return false; Type *ArgTy = Arg.getType(); - if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) + if (ArgTy->isStructTy() || ArgTy->isArrayTy()) return false; EVT ArgVT = TLI.getValueType(ArgTy); - if (!ArgVT.isSimple()) return false; - switch (ArgVT.getSimpleVT().SimpleTy) { - default: return false; - case MVT::i1: - case MVT::i8: - case MVT::i16: - case MVT::i32: - case MVT::i64: + if (!ArgVT.isSimple()) + return false; + + MVT VT = ArgVT.getSimpleVT().SimpleTy; + if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) + return false; + + if (VT.isVector() && + (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) + return false; + + if (VT >= MVT::i1 && VT <= MVT::i64) ++GPRCnt; - break; - case MVT::f16: - case MVT::f32: - case MVT::f64: + else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || + VT.is128BitVector()) ++FPRCnt; - break; - } + else + return false; if (GPRCnt > 8 || FPRCnt > 8) return false; } - static const MCPhysReg Registers[5][8] = { + static const MCPhysReg Registers[6][8] = { { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, AArch64::W5, AArch64::W6, AArch64::W7 }, { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, @@ -2146,7 +2458,9 @@ bool AArch64FastISel::fastLowerArguments() { { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, AArch64::S5, AArch64::S6, AArch64::S7 }, { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, - AArch64::D5, AArch64::D6, AArch64::D7 } + AArch64::D5, AArch64::D6, AArch64::D7 }, + { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, + AArch64::Q5, AArch64::Q6, AArch64::Q7 } }; unsigned GPRIdx = 0; @@ -2154,29 +2468,28 @@ bool AArch64FastISel::fastLowerArguments() { for (auto const &Arg : F->args()) { MVT VT = TLI.getSimpleValueType(Arg.getType()); unsigned SrcReg; - const TargetRegisterClass *RC = nullptr; - switch (VT.SimpleTy) { - default: llvm_unreachable("Unexpected value type."); - case MVT::i1: - case MVT::i8: - case MVT::i16: VT = MVT::i32; // fall-through - case MVT::i32: - SrcReg = Registers[0][GPRIdx++]; RC = &AArch64::GPR32RegClass; break; - case MVT::i64: - SrcReg = Registers[1][GPRIdx++]; RC = &AArch64::GPR64RegClass; break; - case MVT::f16: - SrcReg = Registers[2][FPRIdx++]; RC = &AArch64::FPR16RegClass; break; - case MVT::f32: - SrcReg = Registers[3][FPRIdx++]; RC = &AArch64::FPR32RegClass; break; - case MVT::f64: - SrcReg = Registers[4][FPRIdx++]; RC = &AArch64::FPR64RegClass; break; - } - - // Skip unused arguments. - if (Arg.use_empty()) { - updateValueMap(&Arg, 0); - continue; - } + const TargetRegisterClass *RC; + if (VT >= MVT::i1 && VT <= MVT::i32) { + SrcReg = Registers[0][GPRIdx++]; + RC = &AArch64::GPR32RegClass; + VT = MVT::i32; + } else if (VT == MVT::i64) { + SrcReg = Registers[1][GPRIdx++]; + RC = &AArch64::GPR64RegClass; + } else if (VT == MVT::f16) { + SrcReg = Registers[2][FPRIdx++]; + RC = &AArch64::FPR16RegClass; + } else if (VT == MVT::f32) { + SrcReg = Registers[3][FPRIdx++]; + RC = &AArch64::FPR32RegClass; + } else if ((VT == MVT::f64) || VT.is64BitVector()) { + SrcReg = Registers[4][FPRIdx++]; + RC = &AArch64::FPR64RegClass; + } else if (VT.is128BitVector()) { + SrcReg = Registers[5][FPRIdx++]; + RC = &AArch64::FPR128RegClass; + } else + llvm_unreachable("Unexpected value type."); unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. @@ -2532,7 +2845,7 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, } // Check if both instructions are in the same basic block. - if (II->getParent() != I->getParent()) + if (!isValueAvailable(II)) return false; // Make sure nothing is in the way @@ -3525,15 +3838,54 @@ bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { } bool AArch64FastISel::selectMul(const Instruction *I) { - EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true); - if (!SrcEVT.isSimple()) + MVT VT; + if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) return false; - MVT SrcVT = SrcEVT.getSimpleVT(); - // Must be simple value type. Don't handle vectors. - if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && - SrcVT != MVT::i8) - return false; + if (VT.isVector()) + return selectBinaryOp(I, ISD::MUL); + + const Value *Src0 = I->getOperand(0); + const Value *Src1 = I->getOperand(1); + if (const auto *C = dyn_cast(Src0)) + if (C->getValue().isPowerOf2()) + std::swap(Src0, Src1); + + // Try to simplify to a shift instruction. + if (const auto *C = dyn_cast(Src1)) + if (C->getValue().isPowerOf2()) { + uint64_t ShiftVal = C->getValue().logBase2(); + MVT SrcVT = VT; + bool IsZExt = true; + if (const auto *ZExt = dyn_cast(Src0)) { + MVT VT; + if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { + SrcVT = VT; + IsZExt = true; + Src0 = ZExt->getOperand(0); + } + } else if (const auto *SExt = dyn_cast(Src0)) { + MVT VT; + if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { + SrcVT = VT; + IsZExt = false; + Src0 = SExt->getOperand(0); + } + } + + unsigned Src0Reg = getRegForValue(Src0); + if (!Src0Reg) + return false; + bool Src0IsKill = hasTrivialKill(Src0); + + unsigned ResultReg = + emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt); + + if (ResultReg) { + updateValueMap(I, ResultReg); + return true; + } + } unsigned Src0Reg = getRegForValue(I->getOperand(0)); if (!Src0Reg) @@ -3545,8 +3897,7 @@ bool AArch64FastISel::selectMul(const Instruction *I) { return false; bool Src1IsKill = hasTrivialKill(I->getOperand(1)); - unsigned ResultReg = - emitMul_rr(SrcVT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill); + unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill); if (!ResultReg) return false; @@ -3718,6 +4069,75 @@ bool AArch64FastISel::selectFRem(const Instruction *I) { return true; } +bool AArch64FastISel::selectSDiv(const Instruction *I) { + MVT VT; + if (!isTypeLegal(I->getType(), VT)) + return false; + + if (!isa(I->getOperand(1))) + return selectBinaryOp(I, ISD::SDIV); + + const APInt &C = cast(I->getOperand(1))->getValue(); + if ((VT != MVT::i32 && VT != MVT::i64) || !C || + !(C.isPowerOf2() || (-C).isPowerOf2())) + return selectBinaryOp(I, ISD::SDIV); + + unsigned Lg2 = C.countTrailingZeros(); + unsigned Src0Reg = getRegForValue(I->getOperand(0)); + if (!Src0Reg) + return false; + bool Src0IsKill = hasTrivialKill(I->getOperand(0)); + + if (cast(I)->isExact()) { + unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2); + if (!ResultReg) + return false; + updateValueMap(I, ResultReg); + return true; + } + + unsigned Pow2MinusOne = (1 << Lg2) - 1; + unsigned AddReg = emitAddSub_ri(/*UseAdd=*/true, VT, Src0Reg, + /*IsKill=*/false, Pow2MinusOne); + if (!AddReg) + return false; + + // (Src0 < 0) ? Pow2 - 1 : 0; + if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0)) + return false; + + unsigned SelectOpc; + const TargetRegisterClass *RC; + if (VT == MVT::i64) { + SelectOpc = AArch64::CSELXr; + RC = &AArch64::GPR64RegClass; + } else { + SelectOpc = AArch64::CSELWr; + RC = &AArch64::GPR32RegClass; + } + unsigned SelectReg = + fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg, + Src0IsKill, AArch64CC::LT); + if (!SelectReg) + return false; + + // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also + // negate the result. + unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; + unsigned ResultReg; + if (C.isNegative()) + ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true, + SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2); + else + ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2); + + if (!ResultReg) + return false; + + updateValueMap(I, ResultReg); + return true; +} + bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { default: @@ -3726,9 +4146,9 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { case Instruction::Sub: return selectAddSub(I); case Instruction::Mul: - if (!selectBinaryOp(I, ISD::MUL)) - return selectMul(I); - return true; + return selectMul(I); + case Instruction::SDiv: + return selectSDiv(I); case Instruction::SRem: if (!selectBinaryOp(I, ISD::SREM)) return selectRem(I, ISD::SREM);