//===----------------------------------------------------------------------===//
#include "AArch64.h"
+#include "AArch64CallingConvention.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
return Base.Reg;
}
void setOffsetReg(unsigned Reg) {
- assert(isRegBase() && "Invalid offset register access!");
OffsetReg = Reg;
}
unsigned getOffsetReg() const {
- assert(isRegBase() && "Invalid offset register access!");
return OffsetReg;
}
void setFI(unsigned FI) {
bool selectBitCast(const Instruction *I);
bool selectFRem(const Instruction *I);
bool selectSDiv(const Instruction *I);
+ bool selectGetElementPtr(const Instruction *I);
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
unsigned Alignment);
bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
const Value *Cond);
+ bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
+ bool optimizeSelect(const SelectInst *SI);
+ std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
// Emit helper routines.
unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
bool WantResult = true);
// Emit functions.
+ bool emitCompareAndBranch(const BranchInst *BI);
bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
- bool emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
- MachineMemOperand *MMO = nullptr);
+ unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
+ MachineMemOperand *MMO = nullptr);
bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
MachineMemOperand *MMO = nullptr);
unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
bool SetFlags = false, bool WantResult = true,
bool IsZExt = false);
+ unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
bool SetFlags = false, bool WantResult = true,
bool IsZExt = false);
#include "AArch64GenCallingConv.inc"
+/// \brief Check if the sign-/zero-extend will be a noop.
+static bool isIntExtFree(const Instruction *I) {
+ assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
+ "Unexpected integer extend instruction.");
+ assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
+ "Unexpected value type.");
+ bool IsZExt = isa<ZExtInst>(I);
+
+ if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
+ if (LI->hasOneUse())
+ return true;
+
+ if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
+ if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
+ return true;
+
+ return false;
+}
+
+/// \brief Determine the implicit scale factor that is applied by a memory
+/// operation for a given value type.
+static unsigned getImplicitScaleFactor(MVT VT) {
+ switch (VT.SimpleTy) {
+ default:
+ return 0; // invalid
+ case MVT::i1: // fall-through
+ case MVT::i8:
+ return 1;
+ case MVT::i16:
+ return 2;
+ case MVT::i32: // fall-through
+ case MVT::f32:
+ return 4;
+ case MVT::i64: // fall-through
+ case MVT::f64:
+ return 8;
+ }
+}
+
CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
if (CC == CallingConv::WebKit_JS)
return CC_AArch64_WebKit_JS;
return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
}
+ // For the MachO large code model materialize the FP constant in code.
+ if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
+ unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
+ const TargetRegisterClass *RC = Is64Bit ?
+ &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+
+ unsigned TmpReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
+ .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(TmpReg, getKillRegState(true));
+
+ return ResultReg;
+ }
+
// Materialize via constant pool. MachineConstantPool wants an explicit
// alignment.
unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
std::swap(LHS, RHS);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
- Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
+ Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
return computeAddress(LHS, Addr, Ty);
}
break;
}
- case Instruction::Shl:
+ case Instruction::Sub: {
+ // Subs of constants are common and easy enough.
+ const Value *LHS = U->getOperand(0);
+ const Value *RHS = U->getOperand(1);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
+ return computeAddress(LHS, Addr, Ty);
+ }
+ break;
+ }
+ case Instruction::Shl: {
if (Addr.getOffsetReg())
break;
- if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
- unsigned Val = CI->getZExtValue();
- if (Val < 1 || Val > 3)
- break;
+ const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
+ if (!CI)
+ break;
- uint64_t NumBytes = 0;
- if (Ty && Ty->isSized()) {
- uint64_t NumBits = DL.getTypeSizeInBits(Ty);
- NumBytes = NumBits / 8;
- if (!isPowerOf2_64(NumBits))
- NumBytes = 0;
- }
+ unsigned Val = CI->getZExtValue();
+ if (Val < 1 || Val > 3)
+ break;
- if (NumBytes != (1ULL << Val))
- break;
+ uint64_t NumBytes = 0;
+ if (Ty && Ty->isSized()) {
+ uint64_t NumBits = DL.getTypeSizeInBits(Ty);
+ NumBytes = NumBits / 8;
+ if (!isPowerOf2_64(NumBits))
+ NumBytes = 0;
+ }
+
+ if (NumBytes != (1ULL << Val))
+ break;
- Addr.setShift(Val);
- Addr.setExtendType(AArch64_AM::LSL);
+ Addr.setShift(Val);
+ Addr.setExtendType(AArch64_AM::LSL);
- if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
- if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
- U = I;
+ const Value *Src = U->getOperand(0);
+ if (const auto *I = dyn_cast<Instruction>(Src))
+ if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
+ Src = I;
- if (const auto *ZE = dyn_cast<ZExtInst>(U))
- if (ZE->getOperand(0)->getType()->isIntegerTy(32))
+ // Fold the zext or sext when it won't become a noop.
+ if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
+ if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::UXTW);
+ Src = ZE->getOperand(0);
+ }
+ } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
+ if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::SXTW);
+ Src = SE->getOperand(0);
+ }
+ }
- if (const auto *SE = dyn_cast<SExtInst>(U))
- if (SE->getOperand(0)->getType()->isIntegerTy(32))
- Addr.setExtendType(AArch64_AM::SXTW);
-
- if (const auto *AI = dyn_cast<BinaryOperator>(U))
- if (AI->getOpcode() == Instruction::And) {
- const Value *LHS = AI->getOperand(0);
- const Value *RHS = AI->getOperand(1);
-
- if (const auto *C = dyn_cast<ConstantInt>(LHS))
- if (C->getValue() == 0xffffffff)
- std::swap(LHS, RHS);
-
- if (const auto *C = cast<ConstantInt>(RHS))
- if (C->getValue() == 0xffffffff) {
- Addr.setExtendType(AArch64_AM::UXTW);
- unsigned Reg = getRegForValue(LHS);
- if (!Reg)
- return false;
- bool RegIsKill = hasTrivialKill(LHS);
- Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
- AArch64::sub_32);
- Addr.setOffsetReg(Reg);
- return true;
- }
- }
+ if (const auto *AI = dyn_cast<BinaryOperator>(Src))
+ if (AI->getOpcode() == Instruction::And) {
+ const Value *LHS = AI->getOperand(0);
+ const Value *RHS = AI->getOperand(1);
- unsigned Reg = getRegForValue(U->getOperand(0));
- if (!Reg)
- return false;
- Addr.setOffsetReg(Reg);
- return true;
- }
- break;
+ if (const auto *C = dyn_cast<ConstantInt>(LHS))
+ if (C->getValue() == 0xffffffff)
+ std::swap(LHS, RHS);
+
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue() == 0xffffffff) {
+ Addr.setExtendType(AArch64_AM::UXTW);
+ unsigned Reg = getRegForValue(LHS);
+ if (!Reg)
+ return false;
+ bool RegIsKill = hasTrivialKill(LHS);
+ Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
+ AArch64::sub_32);
+ Addr.setOffsetReg(Reg);
+ return true;
+ }
+ }
+
+ unsigned Reg = getRegForValue(Src);
+ if (!Reg)
+ return false;
+ Addr.setOffsetReg(Reg);
+ return true;
+ }
case Instruction::Mul: {
if (Addr.getOffsetReg())
break;
Addr.setShift(Val);
Addr.setExtendType(AArch64_AM::LSL);
- if (const auto *I = dyn_cast<Instruction>(LHS))
+ const Value *Src = LHS;
+ if (const auto *I = dyn_cast<Instruction>(Src))
if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
- U = I;
+ Src = I;
- if (const auto *ZE = dyn_cast<ZExtInst>(U))
- if (ZE->getOperand(0)->getType()->isIntegerTy(32)) {
+
+ // Fold the zext or sext when it won't become a noop.
+ if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
+ if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::UXTW);
- LHS = U->getOperand(0);
+ Src = ZE->getOperand(0);
}
-
- if (const auto *SE = dyn_cast<SExtInst>(U))
- if (SE->getOperand(0)->getType()->isIntegerTy(32)) {
+ } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
+ if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
Addr.setExtendType(AArch64_AM::SXTW);
- LHS = U->getOperand(0);
+ Src = SE->getOperand(0);
}
+ }
- unsigned Reg = getRegForValue(LHS);
+ unsigned Reg = getRegForValue(Src);
if (!Reg)
return false;
Addr.setOffsetReg(Reg);
if (Addr.getOffsetReg())
break;
- if (DL.getTypeSizeInBits(Ty) != 8)
+ if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
break;
const Value *LHS = U->getOperand(0);
if (C->getValue() == 0xffffffff)
std::swap(LHS, RHS);
- if (const auto *C = cast<ConstantInt>(RHS))
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
if (C->getValue() == 0xffffffff) {
Addr.setShift(0);
Addr.setExtendType(AArch64_AM::LSL);
}
break;
}
- } // end switch
+ case Instruction::SExt:
+ case Instruction::ZExt: {
+ if (!Addr.getReg() || Addr.getOffsetReg())
+ break;
- if (Addr.getReg()) {
- if (!Addr.getOffsetReg()) {
- unsigned Reg = getRegForValue(Obj);
- if (!Reg)
- return false;
- Addr.setOffsetReg(Reg);
- return true;
+ const Value *Src = nullptr;
+ // Fold the zext or sext when it won't become a noop.
+ if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
+ if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::UXTW);
+ Src = ZE->getOperand(0);
+ }
+ } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
+ if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
+ Addr.setExtendType(AArch64_AM::SXTW);
+ Src = SE->getOperand(0);
+ }
}
- return false;
+
+ if (!Src)
+ break;
+
+ Addr.setShift(0);
+ unsigned Reg = getRegForValue(Src);
+ if (!Reg)
+ return false;
+ Addr.setOffsetReg(Reg);
+ return true;
}
+ } // end switch
- unsigned Reg = getRegForValue(Obj);
- if (!Reg)
- return false;
- Addr.setReg(Reg);
- return true;
+ if (Addr.isRegBase() && !Addr.getReg()) {
+ unsigned Reg = getRegForValue(Obj);
+ if (!Reg)
+ return false;
+ Addr.setReg(Reg);
+ return true;
+ }
+
+ if (!Addr.getOffsetReg()) {
+ unsigned Reg = getRegForValue(Obj);
+ if (!Reg)
+ return false;
+ Addr.setOffsetReg(Reg);
+ return true;
+ }
+
+ return false;
}
bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
}
bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
- unsigned ScaleFactor;
- switch (VT.SimpleTy) {
- default: return false;
- case MVT::i1: // fall-through
- case MVT::i8: ScaleFactor = 1; break;
- case MVT::i16: ScaleFactor = 2; break;
- case MVT::i32: // fall-through
- case MVT::f32: ScaleFactor = 4; break;
- case MVT::i64: // fall-through
- case MVT::f64: ScaleFactor = 8; break;
- }
+ unsigned ScaleFactor = getImplicitScaleFactor(VT);
+ if (!ScaleFactor)
+ return false;
bool ImmediateOffsetNeedsLowering = false;
bool RegisterOffsetNeedsLowering = false;
// Cannot encode an offset register and an immediate offset in the same
// instruction. Fold the immediate offset into the load/store instruction and
// emit an additonal add to take care of the offset register.
- if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() &&
- Addr.getOffsetReg())
+ if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
RegisterOffsetNeedsLowering = true;
// Cannot encode zero register as base.
// If this is a stack pointer and the offset needs to be simplified then put
// the alloca address into a register, set the base type back to register and
// continue. This should almost never happen.
- if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) {
+ if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
+ {
unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
ResultReg)
// Since the offset is too large for the load/store instruction get the
// reg+offset into a register.
if (ImmediateOffsetNeedsLowering) {
- unsigned ResultReg = 0;
+ unsigned ResultReg;
if (Addr.getReg())
- ResultReg = fastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(),
- /*IsKill=*/false, Offset, MVT::i64);
+ // Try to fold the immediate into the add instruction.
+ ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
else
ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
MIB.addReg(Addr.getOffsetReg());
MIB.addImm(IsSigned);
MIB.addImm(Addr.getShift() != 0);
- } else {
- MIB.addReg(Addr.getReg());
- MIB.addImm(Offset);
- }
+ } else
+ MIB.addReg(Addr.getReg()).addImm(Offset);
}
if (MMO)
RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
// Canonicalize immediates to the RHS first.
- if (UseAdd && isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
+ if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
std::swap(LHS, RHS);
// Canonicalize mul by power of 2 to the RHS.
else
ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
WantResult);
- }
+ } else if (const auto *C = dyn_cast<Constant>(RHS))
+ if (C->isNullValue())
+ ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
+ WantResult);
+
if (ResultReg)
return ResultReg;
IsZExt);
}
+/// \brief This method is a wrapper to simplify add emission.
+///
+/// First try to emit an add with an immediate operand using emitAddSub_ri. If
+/// that fails, then try to materialize the immediate into a register and use
+/// emitAddSub_rr instead.
+unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
+ int64_t Imm) {
+ unsigned ResultReg;
+ if (Imm < 0)
+ ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
+ else
+ ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
+
+ if (ResultReg)
+ return ResultReg;
+
+ unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
+ if (!CReg)
+ return 0;
+
+ ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
+ return ResultReg;
+}
+
unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
bool SetFlags, bool WantResult, bool IsZExt) {
return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
}
-bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr,
- MachineMemOperand *MMO) {
+unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
+ bool WantZExt, MachineMemOperand *MMO) {
// Simplify this down to something we can handle.
if (!simplifyAddress(Addr, VT))
- return false;
+ return 0;
- unsigned ScaleFactor;
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type.");
- case MVT::i1: // fall-through
- case MVT::i8: ScaleFactor = 1; break;
- case MVT::i16: ScaleFactor = 2; break;
- case MVT::i32: // fall-through
- case MVT::f32: ScaleFactor = 4; break;
- case MVT::i64: // fall-through
- case MVT::f64: ScaleFactor = 8; break;
- }
+ unsigned ScaleFactor = getImplicitScaleFactor(VT);
+ if (!ScaleFactor)
+ llvm_unreachable("Unexpected value type.");
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
ScaleFactor = 1;
}
- static const unsigned OpcTable[4][6] = {
- { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, AArch64::LDURXi,
- AArch64::LDURSi, AArch64::LDURDi },
- { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, AArch64::LDRXui,
- AArch64::LDRSui, AArch64::LDRDui },
- { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX,
- AArch64::LDRSroX, AArch64::LDRDroX },
- { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW,
- AArch64::LDRSroW, AArch64::LDRDroW }
+ static const unsigned GPOpcTable[2][8][4] = {
+ // Sign-extend.
+ { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
+ AArch64::LDURXi },
+ { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
+ AArch64::LDURXi },
+ { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
+ AArch64::LDRXui },
+ { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
+ AArch64::LDRXui },
+ { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
+ AArch64::LDRXroX },
+ { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
+ AArch64::LDRXroX },
+ { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
+ AArch64::LDRXroW },
+ { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
+ AArch64::LDRXroW }
+ },
+ // Zero-extend.
+ { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
+ AArch64::LDURXi },
+ { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
+ AArch64::LDURXi },
+ { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
+ AArch64::LDRXui },
+ { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
+ AArch64::LDRXui },
+ { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
+ AArch64::LDRXroX },
+ { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
+ AArch64::LDRXroX },
+ { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
+ AArch64::LDRXroW },
+ { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
+ AArch64::LDRXroW }
+ }
+ };
+
+ static const unsigned FPOpcTable[4][2] = {
+ { AArch64::LDURSi, AArch64::LDURDi },
+ { AArch64::LDRSui, AArch64::LDRDui },
+ { AArch64::LDRSroX, AArch64::LDRDroX },
+ { AArch64::LDRSroW, AArch64::LDRDroW }
};
unsigned Opc;
const TargetRegisterClass *RC;
- bool VTIsi1 = false;
bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
Addr.getOffsetReg();
unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
Addr.getExtendType() == AArch64_AM::SXTW)
Idx++;
+ bool IsRet64Bit = RetVT == MVT::i64;
switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type.");
- case MVT::i1: VTIsi1 = true; // Intentional fall-through.
- case MVT::i8: Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break;
- case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break;
- case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break;
- case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break;
- case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break;
- case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break;
+ default:
+ llvm_unreachable("Unexpected value type.");
+ case MVT::i1: // Intentional fall-through.
+ case MVT::i8:
+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
+ RC = (IsRet64Bit && !WantZExt) ?
+ &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
+ break;
+ case MVT::i16:
+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
+ RC = (IsRet64Bit && !WantZExt) ?
+ &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
+ break;
+ case MVT::i32:
+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
+ RC = (IsRet64Bit && !WantZExt) ?
+ &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
+ break;
+ case MVT::i64:
+ Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
+ RC = &AArch64::GPR64RegClass;
+ break;
+ case MVT::f32:
+ Opc = FPOpcTable[Idx][0];
+ RC = &AArch64::FPR32RegClass;
+ break;
+ case MVT::f64:
+ Opc = FPOpcTable[Idx][1];
+ RC = &AArch64::FPR64RegClass;
+ break;
}
// Create the base instruction, then add the operands.
- ResultReg = createResultReg(RC);
+ unsigned ResultReg = createResultReg(RC);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg);
addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
// Loading an i1 requires special handling.
- if (VTIsi1) {
+ if (VT == MVT::i1) {
unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
assert(ANDReg && "Unexpected AND instruction emission failure.");
ResultReg = ANDReg;
}
- return true;
+
+ // For zero-extending loads to 64bit we emit a 32bit load and then convert
+ // the 32bit reg to a 64bit reg.
+ if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
+ unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), Reg64)
+ .addImm(0)
+ .addReg(ResultReg, getKillRegState(true))
+ .addImm(AArch64::sub_32);
+ ResultReg = Reg64;
+ }
+ return ResultReg;
}
bool AArch64FastISel::selectAddSub(const Instruction *I) {
if (!computeAddress(I->getOperand(0), Addr, I->getType()))
return false;
- unsigned ResultReg;
- if (!emitLoad(VT, ResultReg, Addr, createMachineMemOperandFor(I)))
+ // Fold the following sign-/zero-extend into the load instruction.
+ bool WantZExt = true;
+ MVT RetVT = VT;
+ const Value *IntExtVal = nullptr;
+ if (I->hasOneUse()) {
+ if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
+ if (isTypeSupported(ZE->getType(), RetVT))
+ IntExtVal = ZE;
+ else
+ RetVT = VT;
+ } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
+ if (isTypeSupported(SE->getType(), RetVT))
+ IntExtVal = SE;
+ else
+ RetVT = VT;
+ WantZExt = false;
+ }
+ }
+
+ unsigned ResultReg =
+ emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
+ if (!ResultReg)
return false;
+ // There are a few different cases we have to handle, because the load or the
+ // sign-/zero-extend might not be selected by FastISel if we fall-back to
+ // SelectionDAG. There is also an ordering issue when both instructions are in
+ // different basic blocks.
+ // 1.) The load instruction is selected by FastISel, but the integer extend
+ // not. This usually happens when the integer extend is in a different
+ // basic block and SelectionDAG took over for that basic block.
+ // 2.) The load instruction is selected before the integer extend. This only
+ // happens when the integer extend is in a different basic block.
+ // 3.) The load instruction is selected by SelectionDAG and the integer extend
+ // by FastISel. This happens if there are instructions between the load
+ // and the integer extend that couldn't be selected by FastISel.
+ if (IntExtVal) {
+ // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
+ // could select it. Emit a copy to subreg if necessary. FastISel will remove
+ // it when it selects the integer extend.
+ unsigned Reg = lookUpRegForValue(IntExtVal);
+ if (!Reg) {
+ if (RetVT == MVT::i64 && VT <= MVT::i32) {
+ if (WantZExt) {
+ // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
+ std::prev(FuncInfo.InsertPt)->eraseFromParent();
+ ResultReg = std::prev(FuncInfo.InsertPt)->getOperand(0).getReg();
+ } else
+ ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
+ /*IsKill=*/true,
+ AArch64::sub_32);
+ }
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ // The integer extend has already been emitted - delete all the instructions
+ // that have been emitted by the integer extend lowering code and use the
+ // result from the load instruction directly.
+ while (Reg) {
+ auto *MI = MRI.getUniqueVRegDef(Reg);
+ if (!MI)
+ break;
+ Reg = 0;
+ for (auto &Opnd : MI->uses()) {
+ if (Opnd.isReg()) {
+ Reg = Opnd.getReg();
+ break;
+ }
+ }
+ MI->eraseFromParent();
+ }
+ updateValueMap(IntExtVal, ResultReg);
+ return true;
+ }
+
updateValueMap(I, ResultReg);
return true;
}
if (!simplifyAddress(Addr, VT))
return false;
- unsigned ScaleFactor;
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type.");
- case MVT::i1: // fall-through
- case MVT::i8: ScaleFactor = 1; break;
- case MVT::i16: ScaleFactor = 2; break;
- case MVT::i32: // fall-through
- case MVT::f32: ScaleFactor = 4; break;
- case MVT::i64: // fall-through
- case MVT::f64: ScaleFactor = 8; break;
- }
+ unsigned ScaleFactor = getImplicitScaleFactor(VT);
+ if (!ScaleFactor)
+ llvm_unreachable("Unexpected value type.");
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
ScaleFactor = 1;
}
-
static const unsigned OpcTable[4][6] = {
{ AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
AArch64::STURSi, AArch64::STURDi },
AArch64::STRSroX, AArch64::STRDroX },
{ AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
AArch64::STRSroW, AArch64::STRDroW }
-
};
unsigned Opc;
}
}
-/// \brief Check if the comparison against zero and the following branch can be
-/// folded into a single instruction (CBZ or CBNZ).
-static bool canFoldZeroCheckIntoBranch(const CmpInst *CI) {
- CmpInst::Predicate Predicate = CI->getPredicate();
- if ((Predicate != CmpInst::ICMP_EQ) && (Predicate != CmpInst::ICMP_NE))
+/// \brief Try to emit a combined compare-and-branch instruction.
+bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
+ assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
+ const CmpInst *CI = cast<CmpInst>(BI->getCondition());
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+
+ const Value *LHS = CI->getOperand(0);
+ const Value *RHS = CI->getOperand(1);
+
+ MVT VT;
+ if (!isTypeSupported(LHS->getType(), VT))
return false;
- Type *Ty = CI->getOperand(0)->getType();
- if (!Ty->isIntegerTy())
+ unsigned BW = VT.getSizeInBits();
+ if (BW > 64)
return false;
- unsigned BW = cast<IntegerType>(Ty)->getBitWidth();
- if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64)
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // Try to take advantage of fallthrough opportunities.
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ Predicate = CmpInst::getInversePredicate(Predicate);
+ }
+
+ int TestBit = -1;
+ bool IsCmpNE;
+ switch (Predicate) {
+ default:
return false;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::ICMP_NE:
+ if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
+ std::swap(LHS, RHS);
- if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(0)))
- if (C->isNullValue())
- return true;
+ if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
+ return false;
- if (const auto *C = dyn_cast<ConstantInt>(CI->getOperand(1)))
- if (C->isNullValue())
- return true;
+ if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
+ if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
+ const Value *AndLHS = AI->getOperand(0);
+ const Value *AndRHS = AI->getOperand(1);
+
+ if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
+ if (C->getValue().isPowerOf2())
+ std::swap(AndLHS, AndRHS);
+
+ if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
+ if (C->getValue().isPowerOf2()) {
+ TestBit = C->getValue().logBase2();
+ LHS = AndLHS;
+ }
+ }
+
+ if (VT == MVT::i1)
+ TestBit = 0;
+
+ IsCmpNE = Predicate == CmpInst::ICMP_NE;
+ break;
+ case CmpInst::ICMP_SLT:
+ case CmpInst::ICMP_SGE:
+ if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
+ return false;
+
+ TestBit = BW - 1;
+ IsCmpNE = Predicate == CmpInst::ICMP_SLT;
+ break;
+ case CmpInst::ICMP_SGT:
+ case CmpInst::ICMP_SLE:
+ if (!isa<ConstantInt>(RHS))
+ return false;
+
+ if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
+ return false;
+
+ TestBit = BW - 1;
+ IsCmpNE = Predicate == CmpInst::ICMP_SLE;
+ break;
+ } // end switch
+
+ static const unsigned OpcTable[2][2][2] = {
+ { {AArch64::CBZW, AArch64::CBZX },
+ {AArch64::CBNZW, AArch64::CBNZX} },
+ { {AArch64::TBZW, AArch64::TBZX },
+ {AArch64::TBNZW, AArch64::TBNZX} }
+ };
+
+ bool IsBitTest = TestBit != -1;
+ bool Is64Bit = BW == 64;
+ if (TestBit < 32 && TestBit >= 0)
+ Is64Bit = false;
+
+ unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
+ const MCInstrDesc &II = TII.get(Opc);
+
+ unsigned SrcReg = getRegForValue(LHS);
+ if (!SrcReg)
+ return false;
+ bool SrcIsKill = hasTrivialKill(LHS);
+
+ if (BW == 64 && !Is64Bit)
+ SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
+ AArch64::sub_32);
+
+ if ((BW < 32) && !IsBitTest)
+ SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
+
+ // Emit the combined compare and branch instruction.
+ SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
+ if (IsBitTest)
+ MIB.addImm(TestBit);
+ MIB.addMBB(TBB);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+ fastEmitBranch(FBB, DbgLoc);
- return false;
+ return true;
}
bool AArch64FastISel::selectBranch(const Instruction *I) {
return true;
}
+ // Try to emit a combined compare-and-branch first.
+ if (emitCompareAndBranch(BI))
+ return true;
+
// Try to take advantage of fallthrough opportunities.
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
Predicate = CmpInst::getInversePredicate(Predicate);
}
- // Try to optimize comparisons against zero.
- if (canFoldZeroCheckIntoBranch(CI)) {
- const Value *LHS = CI->getOperand(0);
- const Value *RHS = CI->getOperand(1);
-
- // Canonicalize zero values to the RHS.
- if (const auto *C = dyn_cast<ConstantInt>(LHS))
- if (C->isNullValue())
- std::swap(LHS, RHS);
-
- int TestBit = -1;
- if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
- if (AI->getOpcode() == Instruction::And) {
- const Value *AndLHS = AI->getOperand(0);
- const Value *AndRHS = AI->getOperand(1);
-
- if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
- if (C->getValue().isPowerOf2())
- std::swap(AndLHS, AndRHS);
-
- if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
- if (C->getValue().isPowerOf2()) {
- TestBit = C->getValue().logBase2();
- LHS = AndLHS;
- }
- }
-
- static const unsigned OpcTable[2][2][2] = {
- { {AArch64::CBZW, AArch64::CBZX },
- {AArch64::CBNZW, AArch64::CBNZX} },
- { {AArch64::TBZW, AArch64::TBZX },
- {AArch64::TBNZW, AArch64::TBNZX} }
- };
- bool IsBitTest = TestBit != -1;
- bool IsCmpNE = Predicate == CmpInst::ICMP_NE;
- bool Is64Bit = LHS->getType()->isIntegerTy(64);
- unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
-
- unsigned SrcReg = getRegForValue(LHS);
- if (!SrcReg)
- return false;
- bool SrcIsKill = hasTrivialKill(LHS);
-
- // Emit the combined compare and branch instruction.
- MachineInstrBuilder MIB =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
- .addReg(SrcReg, getKillRegState(SrcIsKill));
- if (IsBitTest)
- MIB.addImm(TestBit);
- MIB.addMBB(TBB);
-
- // Obtain the branch weight and add the TrueBB to the successor list.
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
- fastEmitBranch(FBB, DbgLoc);
- return true;
- }
-
// Emit the cmp.
if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
return false;
return true;
}
-bool AArch64FastISel::selectSelect(const Instruction *I) {
- const SelectInst *SI = cast<SelectInst>(I);
+/// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false'
+/// value.
+bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
+ if (!SI->getType()->isIntegerTy(1))
+ return false;
- EVT DestEVT = TLI.getValueType(SI->getType(), true);
- if (!DestEVT.isSimple())
+ const Value *Src1Val, *Src2Val;
+ unsigned Opc = 0;
+ bool NeedExtraOp = false;
+ if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
+ if (CI->isOne()) {
+ Src1Val = SI->getCondition();
+ Src2Val = SI->getFalseValue();
+ Opc = AArch64::ORRWrr;
+ } else {
+ assert(CI->isZero());
+ Src1Val = SI->getFalseValue();
+ Src2Val = SI->getCondition();
+ Opc = AArch64::BICWrr;
+ }
+ } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
+ if (CI->isOne()) {
+ Src1Val = SI->getCondition();
+ Src2Val = SI->getTrueValue();
+ Opc = AArch64::ORRWrr;
+ NeedExtraOp = true;
+ } else {
+ assert(CI->isZero());
+ Src1Val = SI->getCondition();
+ Src2Val = SI->getTrueValue();
+ Opc = AArch64::ANDWrr;
+ }
+ }
+
+ if (!Opc)
return false;
- MVT DestVT = DestEVT.getSimpleVT();
- if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
- DestVT != MVT::f64)
+ unsigned Src1Reg = getRegForValue(Src1Val);
+ if (!Src1Reg)
return false;
+ bool Src1IsKill = hasTrivialKill(Src1Val);
- unsigned SelectOpc;
- const TargetRegisterClass *RC = nullptr;
- switch (DestVT.SimpleTy) {
- default: return false;
+ unsigned Src2Reg = getRegForValue(Src2Val);
+ if (!Src2Reg)
+ return false;
+ bool Src2IsKill = hasTrivialKill(Src2Val);
+
+ if (NeedExtraOp) {
+ Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
+ Src1IsKill = true;
+ }
+ unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32spRegClass, Src1Reg,
+ Src1IsKill, Src2Reg, Src2IsKill);
+ updateValueMap(SI, ResultReg);
+ return true;
+}
+
+bool AArch64FastISel::selectSelect(const Instruction *I) {
+ assert(isa<SelectInst>(I) && "Expected a select instruction.");
+ MVT VT;
+ if (!isTypeSupported(I->getType(), VT))
+ return false;
+
+ unsigned Opc;
+ const TargetRegisterClass *RC;
+ switch (VT.SimpleTy) {
+ default:
+ return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
case MVT::i32:
- SelectOpc = AArch64::CSELWr; RC = &AArch64::GPR32RegClass; break;
+ Opc = AArch64::CSELWr;
+ RC = &AArch64::GPR32RegClass;
+ break;
case MVT::i64:
- SelectOpc = AArch64::CSELXr; RC = &AArch64::GPR64RegClass; break;
+ Opc = AArch64::CSELXr;
+ RC = &AArch64::GPR64RegClass;
+ break;
case MVT::f32:
- SelectOpc = AArch64::FCSELSrrr; RC = &AArch64::FPR32RegClass; break;
+ Opc = AArch64::FCSELSrrr;
+ RC = &AArch64::FPR32RegClass;
+ break;
case MVT::f64:
- SelectOpc = AArch64::FCSELDrrr; RC = &AArch64::FPR64RegClass; break;
+ Opc = AArch64::FCSELDrrr;
+ RC = &AArch64::FPR64RegClass;
+ break;
}
+ const SelectInst *SI = cast<SelectInst>(I);
const Value *Cond = SI->getCondition();
- bool NeedTest = true;
AArch64CC::CondCode CC = AArch64CC::NE;
- if (foldXALUIntrinsic(CC, I, Cond))
- NeedTest = false;
+ AArch64CC::CondCode ExtraCC = AArch64CC::AL;
- unsigned CondReg = getRegForValue(Cond);
- if (!CondReg)
- return false;
- bool CondIsKill = hasTrivialKill(Cond);
+ if (optimizeSelect(SI))
+ return true;
- if (NeedTest) {
- unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
- assert(ANDReg && "Unexpected AND instruction emission failure.");
- emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
+ // Try to pickup the flags, so we don't have to emit another compare.
+ if (foldXALUIntrinsic(CC, I, Cond)) {
+ // Fake request the condition to force emission of the XALU intrinsic.
+ unsigned CondReg = getRegForValue(Cond);
+ if (!CondReg)
+ return false;
+ } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
+ isValueAvailable(Cond)) {
+ const auto *Cmp = cast<CmpInst>(Cond);
+ // Try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
+ const Value *FoldSelect = nullptr;
+ switch (Predicate) {
+ default:
+ break;
+ case CmpInst::FCMP_FALSE:
+ FoldSelect = SI->getFalseValue();
+ break;
+ case CmpInst::FCMP_TRUE:
+ FoldSelect = SI->getTrueValue();
+ break;
+ }
+
+ if (FoldSelect) {
+ unsigned SrcReg = getRegForValue(FoldSelect);
+ if (!SrcReg)
+ return false;
+ unsigned UseReg = lookUpRegForValue(SI);
+ if (UseReg)
+ MRI.clearKillFlags(UseReg);
+
+ updateValueMap(I, SrcReg);
+ return true;
+ }
+
+ // Emit the cmp.
+ if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
+ return false;
+
+ // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
+ CC = getCompareCC(Predicate);
+ switch (Predicate) {
+ default:
+ break;
+ case CmpInst::FCMP_UEQ:
+ ExtraCC = AArch64CC::EQ;
+ CC = AArch64CC::VS;
+ break;
+ case CmpInst::FCMP_ONE:
+ ExtraCC = AArch64CC::MI;
+ CC = AArch64CC::GT;
+ break;
+ }
+ assert((CC != AArch64CC::AL) && "Unexpected condition code.");
+ } else {
+ unsigned CondReg = getRegForValue(Cond);
+ if (!CondReg)
+ return false;
+ bool CondIsKill = hasTrivialKill(Cond);
+
+ // Emit a TST instruction (ANDS wzr, reg, #imm).
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDSWri),
+ AArch64::WZR)
+ .addReg(CondReg, getKillRegState(CondIsKill))
+ .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
}
- unsigned TrueReg = getRegForValue(SI->getTrueValue());
- bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
+ unsigned Src1Reg = getRegForValue(SI->getTrueValue());
+ bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
- unsigned FalseReg = getRegForValue(SI->getFalseValue());
- bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
+ unsigned Src2Reg = getRegForValue(SI->getFalseValue());
+ bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
- if (!TrueReg || !FalseReg)
+ if (!Src1Reg || !Src2Reg)
return false;
- unsigned ResultReg = fastEmitInst_rri(SelectOpc, RC, TrueReg, TrueIsKill,
- FalseReg, FalseIsKill, CC);
+ if (ExtraCC != AArch64CC::AL) {
+ Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
+ Src2IsKill, ExtraCC);
+ Src2IsKill = true;
+ }
+ unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
+ Src2IsKill, CC);
updateValueMap(I, ResultReg);
return true;
}
}
}
- bool RV;
- unsigned ResultReg;
- RV = emitLoad(VT, ResultReg, Src);
- if (!RV)
+ unsigned ResultReg = emitLoad(VT, VT, Src);
+ if (!ResultReg)
return false;
- RV = emitStore(VT, ResultReg, Dest);
- if (!RV)
+ if (!emitStore(VT, ResultReg, Dest))
return false;
int64_t Size = VT.getSizeInBits() / 8;
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return false;
+ const Value *LHS = II->getArgOperand(0);
+ const Value *RHS = II->getArgOperand(1);
+
+ // Canonicalize immediate to the RHS.
+ if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
+ isCommutativeIntrinsic(II))
+ std::swap(LHS, RHS);
+
+ // Simplify multiplies.
+ unsigned IID = II->getIntrinsicID();
+ switch (IID) {
+ default:
+ break;
+ case Intrinsic::smul_with_overflow:
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue() == 2)
+ IID = Intrinsic::sadd_with_overflow;
+ break;
+ case Intrinsic::umul_with_overflow:
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue() == 2)
+ IID = Intrinsic::uadd_with_overflow;
+ break;
+ }
+
AArch64CC::CondCode TmpCC;
- switch (II->getIntrinsicID()) {
- default: return false;
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break;
- case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break;
- case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break;
- case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break;
+ switch (IID) {
+ default:
+ return false;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ TmpCC = AArch64CC::VS;
+ break;
+ case Intrinsic::uadd_with_overflow:
+ TmpCC = AArch64CC::HS;
+ break;
+ case Intrinsic::usub_with_overflow:
+ TmpCC = AArch64CC::LO;
+ break;
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ TmpCC = AArch64CC::NE;
+ break;
}
// Check if both instructions are in the same basic block.
updateValueMap(II, CLI.ResultReg);
return true;
}
+ case Intrinsic::fabs: {
+ MVT VT;
+ if (!isTypeLegal(II->getType(), VT))
+ return false;
+
+ unsigned Opc;
+ switch (VT.SimpleTy) {
+ default:
+ return false;
+ case MVT::f32:
+ Opc = AArch64::FABSSr;
+ break;
+ case MVT::f64:
+ Opc = AArch64::FABSDr;
+ break;
+ }
+ unsigned SrcReg = getRegForValue(II->getOperand(0));
+ if (!SrcReg)
+ return false;
+ bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+ .addReg(SrcReg, getKillRegState(SrcRegIsKill));
+ updateValueMap(II, ResultReg);
+ return true;
+ }
case Intrinsic::trap: {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
.addImm(1);
isCommutativeIntrinsic(II))
std::swap(LHS, RHS);
+ // Simplify multiplies.
+ unsigned IID = II->getIntrinsicID();
+ switch (IID) {
+ default:
+ break;
+ case Intrinsic::smul_with_overflow:
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue() == 2) {
+ IID = Intrinsic::sadd_with_overflow;
+ RHS = LHS;
+ }
+ break;
+ case Intrinsic::umul_with_overflow:
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ if (C->getValue() == 2) {
+ IID = Intrinsic::uadd_with_overflow;
+ RHS = LHS;
+ }
+ break;
+ }
+
unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
AArch64CC::CondCode CC = AArch64CC::Invalid;
- switch (II->getIntrinsicID()) {
+ switch (IID) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::sadd_with_overflow:
ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
/*IsKill=*/true, getInvertedCondCode(CC));
+ (void)ResultReg2;
assert((ResultReg1 + 1) == ResultReg2 &&
"Nonconsecutive result registers.");
updateValueMap(II, ResultReg1, 2);
unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
bool Op0IsKill, uint64_t Shift,
- bool IsZext) {
+ bool IsZExt) {
assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
"Unexpected source/return type pair.");
- assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
- SrcVT == MVT::i64) && "Unexpected source value type.");
+ assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
+ SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
+ "Unexpected source value type.");
assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
RetVT == MVT::i64) && "Unexpected return value type.");
unsigned RegSize = Is64Bit ? 64 : 32;
unsigned DstBits = RetVT.getSizeInBits();
unsigned SrcBits = SrcVT.getSizeInBits();
+ const TargetRegisterClass *RC =
+ Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+
+ // Just emit a copy for "zero" shifts.
+ if (Shift == 0) {
+ if (RetVT == SrcVT) {
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill));
+ return ResultReg;
+ } else
+ return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
+ }
// Don't deal with undefined shifts.
if (Shift >= DstBits)
{AArch64::SBFMWri, AArch64::SBFMXri},
{AArch64::UBFMWri, AArch64::UBFMXri}
};
- unsigned Opc = OpcTable[IsZext][Is64Bit];
- const TargetRegisterClass *RC =
- Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ unsigned Opc = OpcTable[IsZExt][Is64Bit];
if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
unsigned TmpReg = MRI.createVirtualRegister(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
bool IsZExt) {
assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
"Unexpected source/return type pair.");
- assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
- SrcVT == MVT::i64) && "Unexpected source value type.");
+ assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
+ SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
+ "Unexpected source value type.");
assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
RetVT == MVT::i64) && "Unexpected return value type.");
unsigned RegSize = Is64Bit ? 64 : 32;
unsigned DstBits = RetVT.getSizeInBits();
unsigned SrcBits = SrcVT.getSizeInBits();
+ const TargetRegisterClass *RC =
+ Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+
+ // Just emit a copy for "zero" shifts.
+ if (Shift == 0) {
+ if (RetVT == SrcVT) {
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill));
+ return ResultReg;
+ } else
+ return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
+ }
// Don't deal with undefined shifts.
if (Shift >= DstBits)
{AArch64::UBFMWri, AArch64::UBFMXri}
};
unsigned Opc = OpcTable[IsZExt][Is64Bit];
- const TargetRegisterClass *RC =
- Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
unsigned TmpReg = MRI.createVirtualRegister(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
bool IsZExt) {
assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
"Unexpected source/return type pair.");
- assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
- SrcVT == MVT::i64) && "Unexpected source value type.");
+ assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
+ SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
+ "Unexpected source value type.");
assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
RetVT == MVT::i64) && "Unexpected return value type.");
unsigned RegSize = Is64Bit ? 64 : 32;
unsigned DstBits = RetVT.getSizeInBits();
unsigned SrcBits = SrcVT.getSizeInBits();
+ const TargetRegisterClass *RC =
+ Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+
+ // Just emit a copy for "zero" shifts.
+ if (Shift == 0) {
+ if (RetVT == SrcVT) {
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill));
+ return ResultReg;
+ } else
+ return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
+ }
// Don't deal with undefined shifts.
if (Shift >= DstBits)
{AArch64::UBFMWri, AArch64::UBFMXri}
};
unsigned Opc = OpcTable[IsZExt][Is64Bit];
- const TargetRegisterClass *RC =
- Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
unsigned TmpReg = MRI.createVirtualRegister(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
}
-bool AArch64FastISel::selectIntExt(const Instruction *I) {
- // On ARM, in general, integer casts don't involve legal types; this code
- // handles promotable integers. The high bits for a type smaller than
- // the register size are assumed to be undefined.
- Type *DestTy = I->getType();
- Value *Src = I->getOperand(0);
- Type *SrcTy = Src->getType();
+static bool isZExtLoad(const MachineInstr *LI) {
+ switch (LI->getOpcode()) {
+ default:
+ return false;
+ case AArch64::LDURBBi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURWi:
+ case AArch64::LDRBBui:
+ case AArch64::LDRHHui:
+ case AArch64::LDRWui:
+ case AArch64::LDRBBroX:
+ case AArch64::LDRHHroX:
+ case AArch64::LDRWroX:
+ case AArch64::LDRBBroW:
+ case AArch64::LDRHHroW:
+ case AArch64::LDRWroW:
+ return true;
+ }
+}
- unsigned SrcReg = getRegForValue(Src);
- if (!SrcReg)
+static bool isSExtLoad(const MachineInstr *LI) {
+ switch (LI->getOpcode()) {
+ default:
return false;
+ case AArch64::LDURSBWi:
+ case AArch64::LDURSHWi:
+ case AArch64::LDURSBXi:
+ case AArch64::LDURSHXi:
+ case AArch64::LDURSWi:
+ case AArch64::LDRSBWui:
+ case AArch64::LDRSHWui:
+ case AArch64::LDRSBXui:
+ case AArch64::LDRSHXui:
+ case AArch64::LDRSWui:
+ case AArch64::LDRSBWroX:
+ case AArch64::LDRSHWroX:
+ case AArch64::LDRSBXroX:
+ case AArch64::LDRSHXroX:
+ case AArch64::LDRSWroX:
+ case AArch64::LDRSBWroW:
+ case AArch64::LDRSHWroW:
+ case AArch64::LDRSBXroW:
+ case AArch64::LDRSHXroW:
+ case AArch64::LDRSWroW:
+ return true;
+ }
+}
- EVT SrcEVT = TLI.getValueType(SrcTy, true);
- EVT DestEVT = TLI.getValueType(DestTy, true);
- if (!SrcEVT.isSimple())
+bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
+ MVT SrcVT) {
+ const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
+ if (!LI || !LI->hasOneUse())
return false;
- if (!DestEVT.isSimple())
+
+ // Check if the load instruction has already been selected.
+ unsigned Reg = lookUpRegForValue(LI);
+ if (!Reg)
return false;
- MVT SrcVT = SrcEVT.getSimpleVT();
- MVT DestVT = DestEVT.getSimpleVT();
- unsigned ResultReg = 0;
+ MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
+ if (!MI)
+ return false;
+
+ // Check if the correct load instruction has been emitted - SelectionDAG might
+ // have emitted a zero-extending load, but we need a sign-extending load.
+ bool IsZExt = isa<ZExtInst>(I);
+ const auto *LoadMI = MI;
+ if (LoadMI->getOpcode() == TargetOpcode::COPY &&
+ LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
+ unsigned LoadReg = MI->getOperand(1).getReg();
+ LoadMI = MRI.getUniqueVRegDef(LoadReg);
+ assert(LoadMI && "Expected valid instruction");
+ }
+ if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
+ return false;
+
+ // Nothing to be done.
+ if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
+ updateValueMap(I, Reg);
+ return true;
+ }
+
+ if (IsZExt) {
+ unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), Reg64)
+ .addImm(0)
+ .addReg(Reg, getKillRegState(true))
+ .addImm(AArch64::sub_32);
+ Reg = Reg64;
+ } else {
+ assert((MI->getOpcode() == TargetOpcode::COPY &&
+ MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
+ "Expected copy instruction");
+ Reg = MI->getOperand(1).getReg();
+ MI->eraseFromParent();
+ }
+ updateValueMap(I, Reg);
+ return true;
+}
+
+bool AArch64FastISel::selectIntExt(const Instruction *I) {
+ assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
+ "Unexpected integer extend instruction.");
+ MVT RetVT;
+ MVT SrcVT;
+ if (!isTypeSupported(I->getType(), RetVT))
+ return false;
+
+ if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
+ return false;
+
+ // Try to optimize already sign-/zero-extended values from load instructions.
+ if (optimizeIntExtLoad(I, RetVT, SrcVT))
+ return true;
+
+ unsigned SrcReg = getRegForValue(I->getOperand(0));
+ if (!SrcReg)
+ return false;
+ bool SrcIsKill = hasTrivialKill(I->getOperand(0));
+ // Try to optimize already sign-/zero-extended values from function arguments.
bool IsZExt = isa<ZExtInst>(I);
- // Check if it is an argument and if it is already zero/sign-extended.
- if (const auto *Arg = dyn_cast<Argument>(Src)) {
+ if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
- if (DestVT == MVT::i64) {
- ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
+ if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
+ unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBREG_TO_REG), ResultReg)
- .addImm(0)
- .addReg(SrcReg)
- .addImm(AArch64::sub_32);
- } else
- ResultReg = SrcReg;
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(SrcIsKill))
+ .addImm(AArch64::sub_32);
+ SrcReg = ResultReg;
+ }
+ // Conservatively clear all kill flags from all uses, because we are
+ // replacing a sign-/zero-extend instruction at IR level with a nop at MI
+ // level. The result of the instruction at IR level might have been
+ // trivially dead, which is now not longer true.
+ unsigned UseReg = lookUpRegForValue(I);
+ if (UseReg)
+ MRI.clearKillFlags(UseReg);
+
+ updateValueMap(I, SrcReg);
+ return true;
}
}
- if (!ResultReg)
- ResultReg = emitIntExt(SrcVT, SrcReg, DestVT, IsZExt);
-
+ unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
if (!ResultReg)
return false;
MVT SrcVT = VT;
bool IsZExt = true;
if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
- MVT VT;
- if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
- SrcVT = VT;
- IsZExt = true;
- Src0 = ZExt->getOperand(0);
+ if (!isIntExtFree(ZExt)) {
+ MVT VT;
+ if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
+ SrcVT = VT;
+ IsZExt = true;
+ Src0 = ZExt->getOperand(0);
+ }
}
} else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
- MVT VT;
- if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
- SrcVT = VT;
- IsZExt = false;
- Src0 = SExt->getOperand(0);
+ if (!isIntExtFree(SExt)) {
+ MVT VT;
+ if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
+ SrcVT = VT;
+ IsZExt = false;
+ Src0 = SExt->getOperand(0);
+ }
}
}
bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
const Value *Op0 = I->getOperand(0);
if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
- MVT TmpVT;
- if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
- SrcVT = TmpVT;
- IsZExt = true;
- Op0 = ZExt->getOperand(0);
+ if (!isIntExtFree(ZExt)) {
+ MVT TmpVT;
+ if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
+ SrcVT = TmpVT;
+ IsZExt = true;
+ Op0 = ZExt->getOperand(0);
+ }
}
} else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
- MVT TmpVT;
- if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
- SrcVT = TmpVT;
- IsZExt = false;
- Op0 = SExt->getOperand(0);
+ if (!isIntExtFree(SExt)) {
+ MVT TmpVT;
+ if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
+ SrcVT = TmpVT;
+ IsZExt = false;
+ Op0 = SExt->getOperand(0);
+ }
}
}
return true;
}
- unsigned Pow2MinusOne = (1 << Lg2) - 1;
- unsigned AddReg = emitAddSub_ri(/*UseAdd=*/true, VT, Src0Reg,
- /*IsKill=*/false, Pow2MinusOne);
+ int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
+ unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
if (!AddReg)
return false;
return true;
}
+/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
+/// have to duplicate it for AArch64, because otherwise we would fail during the
+/// sign-extend emission.
+std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
+ unsigned IdxN = getRegForValue(Idx);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return std::pair<unsigned, bool>(0, false);
+
+ bool IdxNIsKill = hasTrivialKill(Idx);
+
+ // If the index is smaller or larger than intptr_t, truncate or extend it.
+ MVT PtrVT = TLI.getPointerTy();
+ EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
+ if (IdxVT.bitsLT(PtrVT)) {
+ IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
+ IdxNIsKill = true;
+ } else if (IdxVT.bitsGT(PtrVT))
+ llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
+ return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
+}
+
+/// This is mostly a copy of the existing FastISel GEP code, but we have to
+/// duplicate it for AArch64, because otherwise we would bail out even for
+/// simple cases. This is because the standard fastEmit functions don't cover
+/// MUL at all and ADD is lowered very inefficientily.
+bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
+ unsigned N = getRegForValue(I->getOperand(0));
+ if (!N)
+ return false;
+ bool NIsKill = hasTrivialKill(I->getOperand(0));
+
+ // Keep a running tab of the total offset to coalesce multiple N = N + Offset
+ // into a single N = N + TotalOffset.
+ uint64_t TotalOffs = 0;
+ Type *Ty = I->getOperand(0)->getType();
+ MVT VT = TLI.getPointerTy();
+ for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) {
+ const Value *Idx = *OI;
+ if (auto *StTy = dyn_cast<StructType>(Ty)) {
+ unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ // N = N + Offset
+ if (Field)
+ TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
+ Ty = StTy->getElementType(Field);
+ } else {
+ Ty = cast<SequentialType>(Ty)->getElementType();
+ // If this is a constant subscript, handle it quickly.
+ if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->isZero())
+ continue;
+ // N = N + Offset
+ TotalOffs +=
+ DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
+ continue;
+ }
+ if (TotalOffs) {
+ N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
+ if (!N)
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+
+ // N = N + Idx * ElementSize;
+ uint64_t ElementSize = DL.getTypeAllocSize(Ty);
+ std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
+ unsigned IdxN = Pair.first;
+ bool IdxNIsKill = Pair.second;
+ if (!IdxN)
+ return false;
+
+ if (ElementSize != 1) {
+ unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
+ if (!C)
+ return false;
+ IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
+ if (!IdxN)
+ return false;
+ IdxNIsKill = true;
+ }
+ N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
+ if (!N)
+ return false;
+ }
+ }
+ if (TotalOffs) {
+ N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
+ if (!N)
+ return false;
+ }
+ updateValueMap(I, N);
+ return true;
+}
+
bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
default:
case Instruction::FPToUI:
return selectFPToInt(I, /*Signed=*/false);
case Instruction::ZExt:
- if (!selectCast(I, ISD::ZERO_EXTEND))
- return selectIntExt(I);
- return true;
case Instruction::SExt:
- if (!selectCast(I, ISD::SIGN_EXTEND))
- return selectIntExt(I);
- return true;
+ return selectIntExt(I);
case Instruction::Trunc:
if (!selectCast(I, ISD::TRUNCATE))
return selectTrunc(I);
return selectRet(I);
case Instruction::FRem:
return selectFRem(I);
+ case Instruction::GetElementPtr:
+ return selectGetElementPtr(I);
}
// fall-back to target-independent instruction selection.