static cl::opt<bool>
EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
- cl::desc("Allow AArch64 SLI/SRI formation"),
- cl::init(false));
+ cl::desc("Allow AArch64 SLI/SRI formation"),
+ cl::init(false));
+// FIXME: The necessary dtprel relocations don't seem to be supported
+// well in the GNU bfd and gold linkers at the moment. Therefore, by
+// default, for now, fall back to GeneralDynamic code generation.
+cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
+ "aarch64-elf-ldtls-generation", cl::Hidden,
+ cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
+ cl::init(false));
-AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
- : TargetLowering(TM) {
- Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
+ const AArch64Subtarget &STI)
+ : TargetLowering(TM), Subtarget(&STI) {
// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
// we have to make something up. Arbitrarily, choose ZeroOrOne.
}
// Compute derived properties from the register classes
- computeRegisterProperties();
+ computeRegisterProperties(Subtarget->getRegisterInfo());
// Provide all sorts of operation actions
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
- // f16 is storage-only, so we promote operations to f32 if we know this is
- // valid, and ignore them otherwise. The operations not mentioned here will
- // fail to select, but this is not a major problem as no source language
- // should be emitting native f16 operations yet.
- setOperationAction(ISD::FADD, MVT::f16, Promote);
- setOperationAction(ISD::FDIV, MVT::f16, Promote);
- setOperationAction(ISD::FMUL, MVT::f16, Promote);
- setOperationAction(ISD::FSUB, MVT::f16, Promote);
+ // f16 is a storage-only type, always promote it to f32.
+ setOperationAction(ISD::SETCC, MVT::f16, Promote);
+ setOperationAction(ISD::BR_CC, MVT::f16, Promote);
+ setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
+ setOperationAction(ISD::SELECT, MVT::f16, Promote);
+ setOperationAction(ISD::FADD, MVT::f16, Promote);
+ setOperationAction(ISD::FSUB, MVT::f16, Promote);
+ setOperationAction(ISD::FMUL, MVT::f16, Promote);
+ setOperationAction(ISD::FDIV, MVT::f16, Promote);
+ setOperationAction(ISD::FREM, MVT::f16, Promote);
+ setOperationAction(ISD::FMA, MVT::f16, Promote);
+ setOperationAction(ISD::FNEG, MVT::f16, Promote);
+ setOperationAction(ISD::FABS, MVT::f16, Promote);
+ setOperationAction(ISD::FCEIL, MVT::f16, Promote);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
+ setOperationAction(ISD::FCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
+ setOperationAction(ISD::FPOW, MVT::f16, Promote);
+ setOperationAction(ISD::FPOWI, MVT::f16, Promote);
+ setOperationAction(ISD::FRINT, MVT::f16, Promote);
+ setOperationAction(ISD::FSIN, MVT::f16, Promote);
+ setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FSQRT, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP2, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG2, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG10, MVT::f16, Promote);
+ setOperationAction(ISD::FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
+ setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
// v4f16 is also a storage-only type, so promote it to v4f32 when that is
// known to be safe.
setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
// AArch64 has implementations of a lot of rounding-like FP operations.
- static MVT RoundingTypes[] = { MVT::f32, MVT::f64};
- for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) {
- MVT Ty = RoundingTypes[I];
+ for (MVT Ty : {MVT::f32, MVT::f64}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
// Enable TBZ/TBNZ
MaskAndBranchFoldingIsLegal = true;
+ EnableExtLdPromotion = true;
setMinFunctionAlignment(2);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote);
+ // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
+ // -> v8f16 conversions.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Promote);
// Similarly, there is no direct i32 -> f64 vector conversion instruction.
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
+ // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
+ // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
// AArch64 doesn't have MUL.2d:
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
}
// AArch64 has implementations of a lot of rounding-like FP operations.
- static MVT RoundingVecTypes[] = {MVT::v2f32, MVT::v4f32, MVT::v2f64 };
- for (unsigned I = 0; I < array_lengthof(RoundingVecTypes); ++I) {
- MVT Ty = RoundingVecTypes[I];
+ for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
return MVT::i64;
}
-unsigned AArch64TargetLowering::getMaximalGlobalOffset() const {
- // FIXME: On AArch64, this depends on the type.
- // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
- // and the offset has to be a multiple of the related size in bytes.
- return 4095;
-}
-
FastISel *
AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const {
case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
- case AArch64ISD::TLSDESC_CALL: return "AArch64ISD::TLSDESC_CALL";
+ case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
case AArch64ISD::ADC: return "AArch64ISD::ADC";
case AArch64ISD::SBC: return "AArch64ISD::SBC";
case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
+ case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
+ case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
+ case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
+ case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
+ case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
+ case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
case AArch64ISD::NOT: return "AArch64ISD::NOT";
case AArch64ISD::BIT: return "AArch64ISD::BIT";
case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
// EndBB:
// Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
DebugLoc DL = MI->getDebugLoc();
MachineFunction::iterator It = MBB;
isLegalArithImmed(C - 1ULL))) {
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
- RHS = DAG.getConstant(C, VT);
+ RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETULT:
(VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
- RHS = DAG.getConstant(C, VT);
+ RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETLE:
isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
- RHS = DAG.getConstant(C, VT);
+ RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETULE:
isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
- RHS = DAG.getConstant(C, VT);
+ RHS = DAG.getConstant(C, dl, VT);
}
break;
}
DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
DAG.getValueType(MVT::i16));
Cmp = emitComparison(SExt,
- DAG.getConstant(ValueofRHS, RHS.getValueType()),
+ DAG.getConstant(ValueofRHS, dl,
+ RHS.getValueType()),
CC, dl, DAG);
AArch64CC = changeIntCCToAArch64CC(CC);
- AArch64cc = DAG.getConstant(AArch64CC, MVT::i32);
+ AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
return Cmp;
}
}
}
Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
AArch64CC = changeIntCCToAArch64CC(CC);
- AArch64cc = DAG.getConstant(AArch64CC, MVT::i32);
+ AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
return Cmp;
}
case ISD::SMULO:
case ISD::UMULO: {
CC = AArch64CC::NE;
- bool IsSigned = (Op.getOpcode() == ISD::SMULO) ? true : false;
+ bool IsSigned = Op.getOpcode() == ISD::SMULO;
if (Op.getValueType() == MVT::i32) {
unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
// For a 32 bit multiply with overflow check we want the instruction
RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
- DAG.getConstant(0, MVT::i64));
+ DAG.getConstant(0, DL, MVT::i64));
// On AArch64 the upper 32 bits are always zero extended for a 32 bit
// operation. We need to clear out the upper 32 bits, because we used a
// widening multiply that wrote all 64 bits. In the end this should be a
// check we have to arithmetic shift right the 32nd bit of the result by
// 31 bits. Then we compare the result to the upper 32 bits.
SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
- DAG.getConstant(32, MVT::i64));
+ DAG.getConstant(32, DL, MVT::i64));
UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
- DAG.getConstant(31, MVT::i64));
+ DAG.getConstant(31, DL, MVT::i64));
// It is important that LowerBits is last, otherwise the arithmetic
// shift will not be folded into the compare (SUBS).
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
// pattern:
// (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
- DAG.getConstant(32, MVT::i64));
+ DAG.getConstant(32, DL, MVT::i64));
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
Overflow =
- DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64),
+ DAG.getNode(AArch64ISD::SUBS, DL, VTs,
+ DAG.getConstant(0, DL, MVT::i64),
UpperBits).getValue(1);
}
break;
if (IsSigned) {
SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
- DAG.getConstant(63, MVT::i64));
+ DAG.getConstant(63, DL, MVT::i64));
// It is important that LowerBits is last, otherwise the arithmetic
// shift will not be folded into the compare (SUBS).
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
Overflow =
- DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64),
+ DAG.getNode(AArch64ISD::SUBS, DL, VTs,
+ DAG.getConstant(0, DL, MVT::i64),
UpperBits).getValue(1);
}
break;
SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
RTLIB::Libcall Call) const {
- SmallVector<SDValue, 2> Ops;
- for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i)
- Ops.push_back(Op.getOperand(i));
-
+ SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
return makeLibCall(DAG, Call, MVT::f128, &Ops[0], Ops.size(), false,
SDLoc(Op)).first;
}
FVal = Other;
TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
- DAG.getConstant(-1ULL, Other.getValueType()));
+ DAG.getConstant(-1ULL, dl, Other.getValueType()));
return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
CCVal, Cmp);
if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
return SDValue();
+ SDLoc dl(Op);
AArch64CC::CondCode CC;
// The actual operation that sets the overflow or carry flag.
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
// We use 0 and 1 as false and true values.
- SDValue TVal = DAG.getConstant(1, MVT::i32);
- SDValue FVal = DAG.getConstant(0, MVT::i32);
+ SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
+ SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
// We use an inverted condition, because the conditional select is inverted
// too. This will allow it to be selected to a single instruction:
// CSINC Wd, WZR, WZR, invert(cond).
- SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), MVT::i32);
- Overflow = DAG.getNode(AArch64ISD::CSEL, SDLoc(Op), MVT::i32, FVal, TVal,
+ SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
+ Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
CCVal, Overflow);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
}
// Prefetch operands are:
(Locality << 1) | // Cache level bits
(unsigned)IsStream; // Stream bit
return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
- DAG.getConstant(PrfOp, MVT::i32), Op.getOperand(1));
+ DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
}
SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
if (Op.getOperand(0).getValueType().isVector())
return LowerVectorFP_TO_INT(Op, DAG);
+ // f16 conversions are promoted to f32.
+ if (Op.getOperand(0).getValueType() == MVT::f16) {
+ SDLoc dl(Op);
+ return DAG.getNode(
+ Op.getOpcode(), dl, Op.getValueType(),
+ DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
+ }
+
if (Op.getOperand(0).getValueType() != MVT::f128) {
// It's legal except when f128 is involved
return Op;
else
LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
- SmallVector<SDValue, 2> Ops;
- for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i)
- Ops.push_back(Op.getOperand(i));
-
+ SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
return makeLibCall(DAG, LC, Op.getValueType(), &Ops[0], Ops.size(), false,
SDLoc(Op)).first;
}
MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
InVT.getVectorNumElements());
In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
- return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0));
+ return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
}
if (VT.getSizeInBits() > InVT.getSizeInBits()) {
if (Op.getValueType().isVector())
return LowerVectorINT_TO_FP(Op, DAG);
+ // f16 conversions are promoted to f32.
+ if (Op.getValueType() == MVT::f16) {
+ SDLoc dl(Op);
+ return DAG.getNode(
+ ISD::FP_ROUND, dl, MVT::f16,
+ DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
+ DAG.getIntPtrConstant(0, dl));
+ }
+
// i128 conversions are libcalls.
if (Op.getOperand(0).getValueType() == MVT::i128)
return SDValue();
Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
return SDValue(
DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
- DAG.getTargetConstant(AArch64::hsub, MVT::i32)),
+ DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
0);
}
assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
EVT VT = N->getValueType(0);
+ SDLoc dl(N);
unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
unsigned NumElts = VT.getVectorNumElements();
MVT TruncVT = MVT::getIntegerVT(EltSize);
const APInt &CInt = C->getAPIntValue();
// Element types smaller than 32 bits are not legal, so use i32 elements.
// The values are implicitly truncated so sext vs. zext doesn't matter.
- Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
+ Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
}
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
+ return DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::getVectorVT(TruncVT, NumElts), Ops);
}
llvm_unreachable("Unsupported calling convention.");
case CallingConv::WebKit_JS:
return CC_AArch64_WebKit_JS;
+ case CallingConv::GHC:
+ return CC_AArch64_GHC;
case CallingConv::C:
case CallingConv::Fast:
if (!Subtarget->isTargetDarwin())
unsigned CurArgIdx = 0;
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ValVT = Ins[i].VT;
- std::advance(CurOrigArg, Ins[i].OrigArgIndex - CurArgIdx);
- CurArgIdx = Ins[i].OrigArgIndex;
-
- // Get type of the original argument.
- EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
- MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
- // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
- if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
- ValVT = MVT::i8;
- else if (ActualMVT == MVT::i16)
- ValVT = MVT::i16;
+ if (Ins[i].isOrigArg()) {
+ std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
+ CurArgIdx = Ins[i].getOrigArgIndex();
+ // Get type of the original argument.
+ EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
+ MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
+ // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
+ if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
+ ValVT = MVT::i8;
+ else if (ActualMVT == MVT::i16)
+ ValVT = MVT::i16;
+ }
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
bool Res =
AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
AArch64::X3, AArch64::X4, AArch64::X5,
AArch64::X6, AArch64::X7 };
static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
- unsigned FirstVariadicGPR =
- CCInfo.getFirstUnallocated(GPRArgRegs, NumGPRArgRegs);
+ unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
int GPRIdx = 0;
MachinePointerInfo::getStack(i * 8), false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
- DAG.getConstant(8, getPointerTy()));
+ DAG.getConstant(8, DL, getPointerTy()));
}
}
FuncInfo->setVarArgsGPRIndex(GPRIdx);
AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
- unsigned FirstVariadicFPR =
- CCInfo.getFirstUnallocated(FPRArgRegs, NumFPRArgRegs);
+ unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
int FPRIdx = 0;
MachinePointerInfo::getStack(i * 16), false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
- DAG.getConstant(16, getPointerTy()));
+ DAG.getConstant(16, DL, getPointerTy()));
}
}
FuncInfo->setVarArgsFPRIndex(FPRIdx);
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!IsSibCall)
- Chain =
- DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), DL);
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, DL,
+ true),
+ DL);
SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, getPointerTy());
}
unsigned LocMemOffset = VA.getLocMemOffset();
int32_t Offset = LocMemOffset + BEAlign;
- SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
if (IsTailCall) {
// clobbered.
Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
} else {
- SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
DstAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
DstInfo = MachinePointerInfo::getStack(LocMemOffset);
if (Outs[i].Flags.isByVal()) {
SDValue SizeNode =
- DAG.getConstant(Outs[i].Flags.getByValSize(), MVT::i64);
+ DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
SDValue Cpy = DAG.getMemcpy(
Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
- /*isVol = */ false,
- /*AlwaysInline = */ false, DstInfo, MachinePointerInfo());
+ /*isVol = */ false, /*AlwaysInline = */ false,
+ /*isTailCall = */ false,
+ DstInfo, MachinePointerInfo());
MemOpChains.push_back(Cpy);
} else {
// we've carefully laid out the parameters so that when sp is reset they'll be
// in the correct location.
if (IsTailCall && !IsSibCall) {
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(0, true), InFlag, DL);
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
+ DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
InFlag = Chain.getValue(1);
}
// Each tail call may have to adjust the stack by a different amount, so
// this information must travel along with the operation for eventual
// consumption by emitEpilogue.
- Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32));
+ Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
}
// Add argument registers to the end of the list so that they are known live
// Add a register mask operand representing the call-preserved registers.
const uint32_t *Mask;
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
- const AArch64RegisterInfo *ARI =
- static_cast<const AArch64RegisterInfo *>(TRI);
+ const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
if (IsThisReturn) {
// For 'this' returns, use the X0-preserving mask if applicable
- Mask = ARI->getThisReturnPreservedMask(CallConv);
+ Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
if (!Mask) {
IsThisReturn = false;
- Mask = ARI->getCallPreservedMask(CallConv);
+ Mask = TRI->getCallPreservedMask(MF, CallConv);
}
} else
- Mask = ARI->getCallPreservedMask(CallConv);
+ Mask = TRI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
? RoundUpToAlignment(NumBytes, 16)
: 0;
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(CalleePopBytes, true),
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
+ DAG.getIntPtrConstant(CalleePopBytes, DL, true),
InFlag, DL);
if (!Ins.empty())
InFlag = Chain.getValue(1);
/*isInvariant=*/ true, 8);
if (GN->getOffset() != 0)
return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalAddr,
- DAG.getConstant(GN->getOffset(), PtrVT));
+ DAG.getConstant(GN->getOffset(), DL, PtrVT));
return GlobalAddr;
}
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
- const AArch64RegisterInfo *ARI =
- static_cast<const AArch64RegisterInfo *>(TRI);
- const uint32_t *Mask = ARI->getTLSCallPreservedMask();
+ const uint32_t *Mask =
+ Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
// Finally, we can make the call. This is just a degenerate version of a
// normal AArch64 call node: x0 takes the address of the descriptor, and
/// When accessing thread-local variables under either the general-dynamic or
/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
-/// is a function pointer to carry out the resolution. This function takes the
-/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All
-/// other registers (except LR, NZCV) are preserved.
+/// is a function pointer to carry out the resolution.
///
-/// Thus, the ideal call sequence on AArch64 is:
+/// The sequence is:
+/// adrp x0, :tlsdesc:var
+/// ldr x1, [x0, #:tlsdesc_lo12:var]
+/// add x0, x0, #:tlsdesc_lo12:var
+/// .tlsdesccall var
+/// blr x1
+/// (TPIDR_EL0 offset now in x0)
///
-/// adrp x0, :tlsdesc:thread_var
-/// ldr x8, [x0, :tlsdesc_lo12:thread_var]
-/// add x0, x0, :tlsdesc_lo12:thread_var
-/// .tlsdesccall thread_var
-/// blr x8
-/// (TPIDR_EL0 offset now in x0).
-///
-/// The ".tlsdesccall" directive instructs the assembler to insert a particular
-/// relocation to help the linker relax this sequence if it turns out to be too
-/// conservative.
-///
-/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this
-/// is harmless.
-SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
- SDValue DescAddr, SDLoc DL,
- SelectionDAG &DAG) const {
+/// The above sequence must be produced unscheduled, to enable the linker to
+/// optimize/relax this sequence.
+/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
+/// above sequence, and expanded really late in the compilation flow, to ensure
+/// the sequence is produced as per above.
+SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
+ SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
- // The function we need to call is simply the first entry in the GOT for this
- // descriptor, load it in preparation.
- SDValue Func = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, SymAddr);
+ SDValue Chain = DAG.getEntryNode();
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- // TLS calls preserve all registers except those that absolutely must be
- // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
- // silly).
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
- const AArch64RegisterInfo *ARI =
- static_cast<const AArch64RegisterInfo *>(TRI);
- const uint32_t *Mask = ARI->getTLSCallPreservedMask();
-
- // The function takes only one argument: the address of the descriptor itself
- // in X0.
- SDValue Glue, Chain;
- Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
- Glue = Chain.getValue(1);
-
- // We're now ready to populate the argument list, as with a normal call:
- SmallVector<SDValue, 6> Ops;
+ SmallVector<SDValue, 2> Ops;
Ops.push_back(Chain);
- Ops.push_back(Func);
Ops.push_back(SymAddr);
- Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
- Ops.push_back(DAG.getRegisterMask(Mask));
- Ops.push_back(Glue);
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(AArch64ISD::TLSDESC_CALL, DL, NodeTys, Ops);
- Glue = Chain.getValue(1);
+ Chain = DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, Ops);
+ SDValue Glue = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
}
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
"ELF TLS only supported in small memory model");
+ // Different choices can be made for the maximum size of the TLS area for a
+ // module. For the small address model, the default TLS size is 16MiB and the
+ // maximum TLS size is 4GiB.
+ // FIXME: add -mtls-size command line option and make it control the 16MiB
+ // vs. 4GiB code sequence generation.
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
+ if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
+ if (Model == TLSModel::LocalDynamic)
+ Model = TLSModel::GeneralDynamic;
+ }
SDValue TPOff;
EVT PtrVT = getPointerTy();
if (Model == TLSModel::LocalExec) {
SDValue HiVar = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+ GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
- AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
+ AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
- DAG.getTargetConstant(16, MVT::i32)),
- 0);
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)),
- 0);
+ SDValue TPWithOff_lo =
+ SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
+ HiVar,
+ DAG.getTargetConstant(0, DL, MVT::i32)),
+ 0);
+ SDValue TPWithOff =
+ SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
+ LoVar,
+ DAG.getTargetConstant(0, DL, MVT::i32)),
+ 0);
+ return TPWithOff;
} else if (Model == TLSModel::InitialExec) {
TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
MFI->incNumLocalDynamicTLSAccesses();
- // Accesses used in this sequence go via the TLS descriptor which lives in
- // the GOT. Prepare an address we can use to handle this.
- SDValue HiDesc = DAG.getTargetExternalSymbol(
- "_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS | AArch64II::MO_PAGE);
- SDValue LoDesc = DAG.getTargetExternalSymbol(
- "_TLS_MODULE_BASE_", PtrVT,
- AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
-
- // First argument to the descriptor call is the address of the descriptor
- // itself.
- SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
- DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
// Now we can calculate the offset from TPIDR_EL0 to this module's
// thread-local area.
- TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+ TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
// Now use :dtprel_whatever: operations to calculate this variable's offset
// in its thread-storage area.
SDValue HiVar = DAG.getTargetGlobalAddress(
- GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+ GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, MVT::i64, 0,
- AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
-
- SDValue DTPOff =
- SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
- DAG.getTargetConstant(16, MVT::i32)),
- 0);
- DTPOff =
- SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, DTPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)),
- 0);
-
- TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff);
- } else if (Model == TLSModel::GeneralDynamic) {
- // Accesses used in this sequence go via the TLS descriptor which lives in
- // the GOT. Prepare an address we can use to handle this.
- SDValue HiDesc = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE);
- SDValue LoDesc = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- // First argument to the descriptor call is the address of the descriptor
- // itself.
- SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
- DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
+ TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
+ DAG.getTargetConstant(0, DL, MVT::i32)),
+ 0);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
+ DAG.getTargetConstant(0, DL, MVT::i32)),
+ 0);
+ } else if (Model == TLSModel::GeneralDynamic) {
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
// Finally we can make a call to calculate the offset from tpidr_el0.
- TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+ TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
} else
llvm_unreachable("Unsupported ELF TLS access model");
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
if (!RHS.getNode()) {
- RHS = DAG.getConstant(0, LHS.getValueType());
+ RHS = DAG.getConstant(0, dl, LHS.getValueType());
CC = ISD::SETNE;
}
}
if (CC == ISD::SETNE)
OFCC = getInvertedCondCode(OFCC);
- SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
+ SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
- return DAG.getNode(AArch64ISD::BRCOND, SDLoc(LHS), MVT::Other, Chain, Dest,
- CCVal, Overflow);
+ return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
+ Overflow);
}
if (LHS.getValueType().isInteger()) {
SDValue Test = LHS.getOperand(0);
uint64_t Mask = LHS.getConstantOperandVal(1);
return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
- DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
+ DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
+ Dest);
}
return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
SDValue Test = LHS.getOperand(0);
uint64_t Mask = LHS.getConstantOperandVal(1);
return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
- DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
+ DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
+ Dest);
}
return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
// becomes redundant. This would also increase register pressure.
uint64_t Mask = LHS.getValueType().getSizeInBits() - 1;
return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
- DAG.getConstant(Mask, MVT::i64), Dest);
+ DAG.getConstant(Mask, dl, MVT::i64), Dest);
}
}
if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
// becomes redundant. This would also increase register pressure.
uint64_t Mask = LHS.getValueType().getSizeInBits() - 1;
return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
- DAG.getConstant(Mask, MVT::i64), Dest);
+ DAG.getConstant(Mask, dl, MVT::i64), Dest);
}
SDValue CCVal;
SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
AArch64CC::CondCode CC1, CC2;
changeFPCCToAArch64CC(CC, CC1, CC2);
- SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+ SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
SDValue BR1 =
DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
if (CC2 != AArch64CC::AL) {
- SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
+ SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
Cmp);
}
if (SrcVT == MVT::f32 && VT == MVT::f64)
In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
else if (SrcVT == MVT::f64 && VT == MVT::f32)
- In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0));
+ In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2,
+ DAG.getIntPtrConstant(0, DL));
else
// FIXME: Src type is different, bail out for now. Can VT really be a
// vector type?
EVT VecVT;
EVT EltVT;
- SDValue EltMask, VecVal1, VecVal2;
+ uint64_t EltMask;
+ SDValue VecVal1, VecVal2;
if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
EltVT = MVT::i32;
VecVT = MVT::v4i32;
- EltMask = DAG.getConstant(0x80000000ULL, EltVT);
+ EltMask = 0x80000000ULL;
if (!VT.isVector()) {
VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
// We want to materialize a mask with the the high bit set, but the AdvSIMD
// immediate moves cannot materialize that in a single instruction for
// 64-bit elements. Instead, materialize zero and then negate it.
- EltMask = DAG.getConstant(0, EltVT);
+ EltMask = 0;
if (!VT.isVector()) {
VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
llvm_unreachable("Invalid type for copysign!");
}
- std::vector<SDValue> BuildVectorOps;
- for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i)
- BuildVectorOps.push_back(EltMask);
-
- SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, BuildVectorOps);
+ SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
// If we couldn't materialize the mask above, then the mask vector will be
// the zero vector, and we need to negate it here.
}
SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
- if (DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::NoImplicitFloat))
+ if (DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ Attribute::NoImplicitFloat))
return SDValue();
if (!Subtarget->hasNEON())
SDValue Val = Op.getOperand(0);
SDLoc DL(Op);
EVT VT = Op.getValueType();
- SDValue ZeroVec = DAG.getUNDEF(MVT::v8i8);
- SDValue VecVal;
- if (VT == MVT::i32) {
- VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
- VecVal = DAG.getTargetInsertSubreg(AArch64::ssub, DL, MVT::v8i8, ZeroVec,
- VecVal);
- } else {
- VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
- }
+ if (VT == MVT::i32)
+ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
- SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, VecVal);
+ SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
SDValue UaddLV = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
- DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, MVT::i32), CtPop);
+ DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
if (VT == MVT::i64)
UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
// We chose ZeroOrOneBooleanContents, so use zero and one.
EVT VT = Op.getValueType();
- SDValue TVal = DAG.getConstant(1, VT);
- SDValue FVal = DAG.getConstant(0, VT);
+ SDValue TVal = DAG.getConstant(1, dl, VT);
+ SDValue FVal = DAG.getConstant(0, dl, VT);
// Handle f128 first, since one possible outcome is a normal integer
// comparison which gets picked up by the next if statement.
changeFPCCToAArch64CC(CC, CC1, CC2);
if (CC2 == AArch64CC::AL) {
changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
- SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+ SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
// Note that we inverted the condition above, so we reverse the order of
// the true and false operands here. This will allow the setcc to be
// of the first as the RHS. We're effectively OR'ing the two CC's together.
// FIXME: It would be nice if we could match the two CSELs to two CSINCs.
- SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+ SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
SDValue CS1 =
DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
- SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
+ SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
}
}
return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp;
}
-SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
- SelectionDAG &DAG) const {
- SDValue CC = Op->getOperand(0);
- SDValue TVal = Op->getOperand(1);
- SDValue FVal = Op->getOperand(2);
- SDLoc DL(Op);
-
- unsigned Opc = CC.getOpcode();
- // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
- // instruction.
- if (CC.getResNo() == 1 &&
- (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
- Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
- // Only lower legal XALUO ops.
- if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0)))
- return SDValue();
-
- AArch64CC::CondCode OFCC;
- SDValue Value, Overflow;
- std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CC.getValue(0), DAG);
- SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
-
- return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
- CCVal, Overflow);
- }
-
- if (CC.getOpcode() == ISD::SETCC)
- return DAG.getSelectCC(DL, CC.getOperand(0), CC.getOperand(1), TVal, FVal,
- cast<CondCodeSDNode>(CC.getOperand(2))->get());
- else
- return DAG.getSelectCC(DL, CC, DAG.getConstant(0, CC.getValueType()), TVal,
- FVal, ISD::SETNE);
-}
-
-SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
+SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
+ SDValue RHS, SDValue TVal,
+ SDValue FVal, SDLoc dl,
SelectionDAG &DAG) const {
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue TVal = Op.getOperand(2);
- SDValue FVal = Op.getOperand(3);
- SDLoc dl(Op);
-
// Handle f128 first, because it will result in a comparison of some RTLIB
// call result against zero.
if (LHS.getValueType() == MVT::f128) {
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
if (!RHS.getNode()) {
- RHS = DAG.getConstant(0, LHS.getValueType());
+ RHS = DAG.getConstant(0, dl, LHS.getValueType());
CC = ISD::SETNE;
}
}
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
- EVT VT = Op.getValueType();
+ EVT VT = TVal.getValueType();
return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
}
// Now we know we're dealing with FP values.
assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
assert(LHS.getValueType() == RHS.getValueType());
- EVT VT = Op.getValueType();
+ EVT VT = TVal.getValueType();
// Try to match this select into a max/min operation, which have dedicated
// opcode in the instruction set.
// clean. Some of them require two CSELs to implement.
AArch64CC::CondCode CC1, CC2;
changeFPCCToAArch64CC(CC, CC1, CC2);
- SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+ SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
// If we need a second CSEL, emit it, using the output of the first as the
// RHS. We're effectively OR'ing the two CC's together.
if (CC2 != AArch64CC::AL) {
- SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
+ SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
}
return CS1;
}
+SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
+ SelectionDAG &DAG) const {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TVal = Op.getOperand(2);
+ SDValue FVal = Op.getOperand(3);
+ SDLoc DL(Op);
+ return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
+}
+
+SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue CCVal = Op->getOperand(0);
+ SDValue TVal = Op->getOperand(1);
+ SDValue FVal = Op->getOperand(2);
+ SDLoc DL(Op);
+
+ unsigned Opc = CCVal.getOpcode();
+ // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
+ // instruction.
+ if (CCVal.getResNo() == 1 &&
+ (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+ Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
+ // Only lower legal XALUO ops.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
+ return SDValue();
+
+ AArch64CC::CondCode OFCC;
+ SDValue Value, Overflow;
+ std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
+ SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
+
+ return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
+ CCVal, Overflow);
+ }
+
+ // Lower it the same way as we would lower a SELECT_CC node.
+ ISD::CondCode CC;
+ SDValue LHS, RHS;
+ if (CCVal.getOpcode() == ISD::SETCC) {
+ LHS = CCVal.getOperand(0);
+ RHS = CCVal.getOperand(1);
+ CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
+ } else {
+ LHS = CCVal;
+ RHS = DAG.getConstant(0, DL, CCVal.getValueType());
+ CC = ISD::SETNE;
+ }
+ return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
+}
+
SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
SelectionDAG &DAG) const {
// Jump table entries as PC relative offsets. No additional tweaking
SDValue GRTop, GRTopAddr;
GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(8, getPointerTy()));
+ DAG.getConstant(8, DL, getPointerTy()));
GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), getPointerTy());
GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
- DAG.getConstant(GPRSize, getPointerTy()));
+ DAG.getConstant(GPRSize, DL, getPointerTy()));
MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
MachinePointerInfo(SV, 8), false, false, 8));
if (FPRSize > 0) {
SDValue VRTop, VRTopAddr;
VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(16, getPointerTy()));
+ DAG.getConstant(16, DL, getPointerTy()));
VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), getPointerTy());
VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
- DAG.getConstant(FPRSize, getPointerTy()));
+ DAG.getConstant(FPRSize, DL, getPointerTy()));
MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
MachinePointerInfo(SV, 16), false, false, 8));
// int __gr_offs at offset 24
SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(24, getPointerTy()));
- MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32),
+ DAG.getConstant(24, DL, getPointerTy()));
+ MemOps.push_back(DAG.getStore(Chain, DL,
+ DAG.getConstant(-GPRSize, DL, MVT::i32),
GROffsAddr, MachinePointerInfo(SV, 24), false,
false, 4));
// int __vr_offs at offset 28
SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(28, getPointerTy()));
- MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32),
+ DAG.getConstant(28, DL, getPointerTy()));
+ MemOps.push_back(DAG.getStore(Chain, DL,
+ DAG.getConstant(-FPRSize, DL, MVT::i32),
VROffsAddr, MachinePointerInfo(SV, 28), false,
false, 4));
SelectionDAG &DAG) const {
// AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
// pointer.
+ SDLoc DL(Op);
unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32;
const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1),
- Op.getOperand(2), DAG.getConstant(VaListSize, MVT::i32),
- 8, false, false, MachinePointerInfo(DestSV),
+ return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1),
+ Op.getOperand(2),
+ DAG.getConstant(VaListSize, DL, MVT::i32),
+ 8, false, false, false, MachinePointerInfo(DestSV),
MachinePointerInfo(SrcSV));
}
if (Align > 8) {
assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2");
VAList = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(Align - 1, getPointerTy()));
+ DAG.getConstant(Align - 1, DL, getPointerTy()));
VAList = DAG.getNode(ISD::AND, DL, getPointerTy(), VAList,
- DAG.getConstant(-(int64_t)Align, getPointerTy()));
+ DAG.getConstant(-(int64_t)Align, DL, getPointerTy()));
}
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
// Increment the pointer, VAList, to the next vaarg
SDValue VANext = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(ArgSize, getPointerTy()));
+ DAG.getConstant(ArgSize, DL, getPointerTy()));
// Store the incremented VAList to the legalized pointer
SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V),
false, false, 0);
MachinePointerInfo(), false, false, false, 0);
// Round the value down to an f32.
SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
- DAG.getIntPtrConstant(1));
+ DAG.getIntPtrConstant(1, DL));
SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
// Merge the rounded value with the chain output of the load.
return DAG.getMergeValues(Ops, DL);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
- SDValue Offset = DAG.getConstant(8, getPointerTy());
+ SDValue Offset = DAG.getConstant(8, DL, getPointerTy());
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
MachinePointerInfo(), false, false, false, 0);
assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
- DAG.getConstant(VTBits, MVT::i64), ShAmt);
+ DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
- DAG.getConstant(VTBits, MVT::i64));
+ DAG.getConstant(VTBits, dl, MVT::i64));
SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
- SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64),
+ SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64),
ISD::SETGE, dl, DAG);
- SDValue CCVal = DAG.getConstant(AArch64CC::GE, MVT::i32);
+ SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
SDValue TrueValHi = Opc == ISD::SRA
? DAG.getNode(Opc, dl, VT, ShOpHi,
- DAG.getConstant(VTBits - 1, MVT::i64))
- : DAG.getConstant(0, VT);
+ DAG.getConstant(VTBits - 1, dl,
+ MVT::i64))
+ : DAG.getConstant(0, dl, VT);
SDValue Hi =
DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp);
assert(Op.getOpcode() == ISD::SHL_PARTS);
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
- DAG.getConstant(VTBits, MVT::i64), ShAmt);
+ DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
- DAG.getConstant(VTBits, MVT::i64));
+ DAG.getConstant(VTBits, dl, MVT::i64));
SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
- SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64),
+ SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64),
ISD::SETGE, dl, DAG);
- SDValue CCVal = DAG.getConstant(AArch64CC::GE, MVT::i32);
+ SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
SDValue Hi =
DAG.getNode(AArch64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp);
// AArch64 shifts of larger than register sizes are wrapped rather than
// clamped, so we can't just emit "lo << a" if a is too big.
- SDValue TrueValLo = DAG.getConstant(0, VT);
+ SDValue TrueValLo = DAG.getConstant(0, dl, VT);
SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
SDValue Lo =
DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
std::pair<unsigned, const TargetRegisterClass *>
AArch64TargetLowering::getRegForInlineAsmConstraint(
- const std::string &Constraint, MVT VT) const {
+ const TargetRegisterInfo *TRI, const std::string &Constraint,
+ MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
// Use the default implementation in TargetLowering to convert the register
// constraint into a member of a register class.
std::pair<unsigned, const TargetRegisterClass *> Res;
- Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+ Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
// Not found as a standard register?
if (!Res.second) {
}
// All assembler immediates are 64-bit integers.
- Result = DAG.getTargetConstant(CVal, MVT::i64);
+ Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
break;
}
SDLoc DL(V64Reg);
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
- V64Reg, DAG.getConstant(0, MVT::i32));
+ V64Reg, DAG.getConstant(0, DL, MVT::i32));
}
/// getExtFactor - Determine the adjustment factor for the position when
// The extraction can just take the second half
Src.ShuffleVec =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
- DAG.getConstant(NumSrcElts, MVT::i64));
+ DAG.getConstant(NumSrcElts, dl, MVT::i64));
Src.WindowBase = -NumSrcElts;
} else if (Src.MaxElt < NumSrcElts) {
// The extraction can just take the first half
Src.ShuffleVec =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
- DAG.getConstant(0, MVT::i64));
+ DAG.getConstant(0, dl, MVT::i64));
} else {
// An actual VEXT is needed
SDValue VEXTSrc1 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
- DAG.getConstant(0, MVT::i64));
+ DAG.getConstant(0, dl, MVT::i64));
SDValue VEXTSrc2 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
- DAG.getConstant(NumSrcElts, MVT::i64));
+ DAG.getConstant(NumSrcElts, dl, MVT::i64));
unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
- VEXTSrc2, DAG.getConstant(Imm, MVT::i32));
+ VEXTSrc2,
+ DAG.getConstant(Imm, dl, MVT::i32));
Src.WindowBase = -Src.MinElt;
}
}
VT.getVectorNumElements() / 2);
if (SplitV0) {
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
- DAG.getConstant(0, MVT::i64));
+ DAG.getConstant(0, DL, MVT::i64));
}
if (V1.getValueType().getSizeInBits() == 128) {
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
- DAG.getConstant(0, MVT::i64));
+ DAG.getConstant(0, DL, MVT::i64));
}
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
}
unsigned Opcode;
if (EltTy == MVT::i8)
Opcode = AArch64ISD::DUPLANE8;
- else if (EltTy == MVT::i16)
+ else if (EltTy == MVT::i16 || EltTy == MVT::f16)
Opcode = AArch64ISD::DUPLANE16;
else if (EltTy == MVT::i32 || EltTy == MVT::f32)
Opcode = AArch64ISD::DUPLANE32;
if (VT.getSizeInBits() == 64)
OpLHS = WidenVector(OpLHS, DAG);
- SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, MVT::i64);
+ SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
}
case OP_VEXT1:
case OP_VEXT3: {
unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
- DAG.getConstant(Imm, MVT::i32));
+ DAG.getConstant(Imm, dl, MVT::i32));
}
case OP_VUZPL:
return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
for (int Val : ShuffleMask) {
for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
unsigned Offset = Byte + Val * BytesPerElt;
- TBLMask.push_back(DAG.getConstant(Offset, MVT::i32));
+ TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
}
}
V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
Shuffle = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
- DAG.getConstant(Intrinsic::aarch64_neon_tbl1, MVT::i32), V1Cst,
+ DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
makeArrayRef(TBLMask.data(), IndexLen)));
} else {
V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
Shuffle = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
- DAG.getConstant(Intrinsic::aarch64_neon_tbl1, MVT::i32), V1Cst,
+ DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
makeArrayRef(TBLMask.data(), IndexLen)));
} else {
// &TBLMask[0], IndexLen));
Shuffle = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
- DAG.getConstant(Intrinsic::aarch64_neon_tbl2, MVT::i32), V1Cst, V2Cst,
+ DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32),
+ V1Cst, V2Cst,
DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
makeArrayRef(TBLMask.data(), IndexLen)));
}
} else if (VT.getSizeInBits() == 64)
V1 = WidenVector(V1, DAG);
- return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, MVT::i64));
+ return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, dl, MVT::i64));
}
if (isREVMask(ShuffleMask, VT, 64))
std::swap(V1, V2);
Imm *= getExtFactor(V1);
return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
- DAG.getConstant(Imm, MVT::i32));
+ DAG.getConstant(Imm, dl, MVT::i32));
} else if (V2->getOpcode() == ISD::UNDEF &&
isSingletonEXTMask(ShuffleMask, VT, Imm)) {
Imm *= getExtFactor(V1);
return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
- DAG.getConstant(Imm, MVT::i32));
+ DAG.getConstant(Imm, dl, MVT::i32));
}
unsigned WhichResult;
int NumInputElements = V1.getValueType().getVectorNumElements();
if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
SDValue DstVec = DstIsLeft ? V1 : V2;
- SDValue DstLaneV = DAG.getConstant(Anomaly, MVT::i64);
+ SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
SDValue SrcVec = V1;
int SrcLane = ShuffleMask[Anomaly];
SrcVec = V2;
SrcLane -= VT.getVectorNumElements();
}
- SDValue SrcLaneV = DAG.getConstant(SrcLane, MVT::i64);
+ SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
EVT ScalarVT = VT.getVectorElementType();
CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(8, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(16, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(16, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(24, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(24, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(8, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli;
SDValue ResultSLI =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrin, MVT::i32), X, Y, Shift.getOperand(1));
+ DAG.getConstant(Intrin, DL, MVT::i32), X, Y,
+ Shift.getOperand(1));
DEBUG(dbgs() << "aarch64-lower: transformed: \n");
DEBUG(N->dump(&DAG));
CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(8, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(16, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(16, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(24, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(24, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(8, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
if (Lane.getOpcode() == ISD::Constant) {
APInt LowBits(EltTy.getSizeInBits(),
cast<ConstantSDNode>(Lane)->getZExtValue());
- Lane = DAG.getConstant(LowBits.getZExtValue(), MVT::i32);
+ Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
}
Ops.push_back(Lane);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType10(CnstVal);
if (VT.getSizeInBits() == 128) {
SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::v2i64,
- DAG.getConstant(CnstVal, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
// Support the V64 version via subregister insertion.
SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::f64,
- DAG.getConstant(CnstVal, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(8, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(16, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(16, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(24, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(24, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(8, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(264, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(264, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(272, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(272, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType9(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType11(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32;
SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
VT.getSizeInBits() == 128) {
CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal);
SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64,
- DAG.getConstant(CnstVal, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(8, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(16, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(16, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(24, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(24, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(8, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(264, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(264, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(272, MVT::i32));
+ DAG.getConstant(CnstVal, dl, MVT::i32),
+ DAG.getConstant(272, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
if (VT.getVectorElementType().isFloatingPoint()) {
SmallVector<SDValue, 8> Ops;
- MVT NewType =
- (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+ EVT EltTy = VT.getVectorElementType();
+ assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) &&
+ "Unsupported floating-point vector type");
+ MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
// Now insert the non-constant lanes.
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
- SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
+ SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V)) {
// Note that type legalization likely mucked about with the VT of the
// source operand, so we may have to convert it here before inserting.
unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub;
MachineSDNode *N =
DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0,
- DAG.getTargetConstant(SubIdx, MVT::i32));
+ DAG.getTargetConstant(SubIdx, dl, MVT::i32));
Vec = SDValue(N, 0);
++i;
}
SDValue V = Op.getOperand(i);
if (V.getOpcode() == ISD::UNDEF)
continue;
- SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
+ SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
}
return Vec;
case ISD::SHL:
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
- return DAG.getNode(AArch64ISD::VSHL, SDLoc(Op), VT, Op.getOperand(0),
- DAG.getConstant(Cnt, MVT::i32));
+ return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
+ DAG.getConstant(Cnt, DL, MVT::i32));
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::aarch64_neon_ushl, MVT::i32),
+ DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
+ MVT::i32),
Op.getOperand(0), Op.getOperand(1));
case ISD::SRA:
case ISD::SRL:
Cnt < EltSize) {
unsigned Opc =
(Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
- return DAG.getNode(Opc, SDLoc(Op), VT, Op.getOperand(0),
- DAG.getConstant(Cnt, MVT::i32));
+ return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
+ DAG.getConstant(Cnt, DL, MVT::i32));
}
// Right shift register. Note, there is not a shift right register
SDValue NegShift = DAG.getNode(AArch64ISD::NEG, DL, VT, Op.getOperand(1));
SDValue NegShiftLeft =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Opc, MVT::i32), Op.getOperand(0), NegShift);
+ DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
+ NegShift);
return NegShiftLeft;
}
AArch64CC::CondCode CC, bool NoNans, EVT VT,
SDLoc dl, SelectionDAG &DAG) {
EVT SrcVT = LHS.getValueType();
+ assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
+ "function only supposed to emit natural comparisons");
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
APInt CnstBits(VT.getSizeInBits(), 0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
+ EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger();
SDLoc dl(Op);
if (LHS.getValueType().getVectorElementType().isInteger()) {
assert(LHS.getValueType() == RHS.getValueType());
AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
- return EmitVectorComparison(LHS, RHS, AArch64CC, false, Op.getValueType(),
- dl, DAG);
+ SDValue Cmp =
+ EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
+ return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
}
assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||
bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
SDValue Cmp =
- EmitVectorComparison(LHS, RHS, CC1, NoNaNs, Op.getValueType(), dl, DAG);
+ EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
if (!Cmp.getNode())
return SDValue();
if (CC2 != AArch64CC::AL) {
SDValue Cmp2 =
- EmitVectorComparison(LHS, RHS, CC2, NoNaNs, Op.getValueType(), dl, DAG);
+ EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
if (!Cmp2.getNode())
return SDValue();
- Cmp = DAG.getNode(ISD::OR, dl, Cmp.getValueType(), Cmp, Cmp2);
+ Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
}
+ Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
+
if (ShouldInvert)
return Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
return NumBits1 > NumBits2;
}
+/// Check if it is profitable to hoist instruction in then/else to if.
+/// Not profitable if I and it's user can form a FMA instruction
+/// because we prefer FMSUB/FMADD.
+bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
+ if (I->getOpcode() != Instruction::FMul)
+ return true;
+
+ if (I->getNumUses() != 1)
+ return true;
+
+ Instruction *User = I->user_back();
+
+ if (User &&
+ !(User->getOpcode() == Instruction::FSub ||
+ User->getOpcode() == Instruction::FAdd))
+ return true;
+
+ const TargetOptions &Options = getTargetMachine().Options;
+ EVT VT = getValueType(User->getOperand(0)->getType());
+
+ if (isFMAFasterThanFMulAndFAdd(VT) &&
+ isOperationLegalOrCustom(ISD::FMA, VT) &&
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath))
+ return false;
+
+ return true;
+}
+
// All 32-bit GPR operations implicitly zero the high-half of the corresponding
// 64-bit GPR.
bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
VT1.getSizeInBits() <= 32);
}
+bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
+ if (isa<FPExtInst>(Ext))
+ return false;
+
+ // Vector types are next free.
+ if (Ext->getType()->isVectorTy())
+ return false;
+
+ for (const Use &U : Ext->uses()) {
+ // The extension is free if we can fold it with a left shift in an
+ // addressing mode or an arithmetic operation: add, sub, and cmp.
+
+ // Is there a shift?
+ const Instruction *Instr = cast<Instruction>(U.getUser());
+
+ // Is this a constant shift?
+ switch (Instr->getOpcode()) {
+ case Instruction::Shl:
+ if (!isa<ConstantInt>(Instr->getOperand(1)))
+ return false;
+ break;
+ case Instruction::GetElementPtr: {
+ gep_type_iterator GTI = gep_type_begin(Instr);
+ std::advance(GTI, U.getOperandNo());
+ Type *IdxTy = *GTI;
+ // This extension will end up with a shift because of the scaling factor.
+ // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
+ // Get the shift amount based on the scaling factor:
+ // log2(sizeof(IdxTy)) - log2(8).
+ uint64_t ShiftAmt =
+ countTrailingZeros(getDataLayout()->getTypeStoreSizeInBits(IdxTy)) - 3;
+ // Is the constant foldable in the shift of the addressing mode?
+ // I.e., shift amount is between 1 and 4 inclusive.
+ if (ShiftAmt == 0 || ShiftAmt > 4)
+ return false;
+ break;
+ }
+ case Instruction::Trunc:
+ // Check if this is a noop.
+ // trunc(sext ty1 to ty2) to ty1.
+ if (Instr->getType() == Ext->getOperand(0)->getType())
+ continue;
+ // FALL THROUGH.
+ default:
+ return false;
+ }
+
+ // At this point we can use the bfm family, so this extension is free
+ // for that use.
+ }
+ return true;
+}
+
bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType,
unsigned &RequiredAligment) const {
if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy())
bool Fast;
const Function *F = MF.getFunction();
if (Subtarget->hasFPARMv8() && !IsMemset && Size >= 16 &&
- !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoImplicitFloat) &&
+ !F->hasFnAttribute(Attribute::NoImplicitFloat) &&
(memOpAlign(SrcAlign, DstAlign, 16) ||
(allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
return MVT::f128;
- return Size >= 8 ? MVT::i64 : MVT::i32;
+ if (Size >= 8 &&
+ (memOpAlign(SrcAlign, DstAlign, 8) ||
+ (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast)))
+ return MVT::i64;
+
+ if (Size >= 4 &&
+ (memOpAlign(SrcAlign, DstAlign, 4) ||
+ (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast)))
+ return MVT::i32;
+
+ return MVT::Other;
}
// 12-bit optionally shifted immediates are legal for adds.
unsigned LZ = countLeadingZeros((uint64_t)Val);
unsigned Shift = (63 - LZ) / 16;
// MOVZ is free so return true for one or fewer MOVK.
- return (Shift < 3) ? true : false;
+ return Shift < 3;
}
// Generate SUBS and CSEL for integer abs.
N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0))
if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
if (Y1C->getAPIntValue() == VT.getSizeInBits() - 1) {
- SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
N0.getOperand(0));
// Generate SUBS & CSEL.
SDValue Cmp =
DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
- N0.getOperand(0), DAG.getConstant(0, VT));
+ N0.getOperand(0), DAG.getConstant(0, DL, VT));
return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0.getOperand(0), Neg,
- DAG.getConstant(AArch64CC::PL, MVT::i32),
+ DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
SDValue(Cmp.getNode(), 1));
}
return SDValue();
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
unsigned Lg2 = Divisor.countTrailingZeros();
- SDValue Zero = DAG.getConstant(0, VT);
- SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, VT);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
// Add (N0 < 0) ? Pow2 - 1 : 0;
SDValue CCVal;
// Divide by pow2.
SDValue SRA =
- DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, MVT::i64));
+ DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
if (Created)
Created->push_back(SRA.getNode());
- return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), SRA);
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
}
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
APInt Value = C->getAPIntValue();
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
if (Value.isNonNegative()) {
// (mul x, 2^N + 1) => (add (shl x, N), x)
APInt VM1 = Value - 1;
if (VM1.isPowerOf2()) {
SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VM1.logBase2(), MVT::i64));
- return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal,
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(VM1.logBase2(), DL, MVT::i64));
+ return DAG.getNode(ISD::ADD, DL, VT, ShiftedVal,
N->getOperand(0));
}
// (mul x, 2^N - 1) => (sub (shl x, N), x)
APInt VP1 = Value + 1;
if (VP1.isPowerOf2()) {
SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VP1.logBase2(), MVT::i64));
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal,
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(VP1.logBase2(), DL, MVT::i64));
+ return DAG.getNode(ISD::SUB, DL, VT, ShiftedVal,
N->getOperand(0));
}
} else {
- // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
- APInt VNM1 = -Value - 1;
- if (VNM1.isPowerOf2()) {
- SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VNM1.logBase2(), MVT::i64));
- SDValue Add =
- DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), Add);
- }
// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
APInt VNP1 = -Value + 1;
if (VNP1.isPowerOf2()) {
SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VNP1.logBase2(), MVT::i64));
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0),
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(VNP1.logBase2(), DL, MVT::i64));
+ return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0),
ShiftedVal);
}
+ // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+ APInt VNM1 = -Value - 1;
+ if (VNM1.isPowerOf2()) {
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(VNM1.logBase2(), DL, MVT::i64));
+ SDValue Add =
+ DAG.getNode(ISD::ADD, DL, VT, ShiftedVal, N->getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Add);
+ }
}
}
return SDValue();
}
return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
- DAG.getConstant(ShiftRHS, MVT::i64));
+ DAG.getConstant(ShiftRHS, DL, MVT::i64));
}
static SDValue tryCombineToBSL(SDNode *N,
SDLoc dl(N);
unsigned NumElements = VT.getVectorNumElements();
if (idx) {
- SDValue HalfIdx = DAG.getConstant(NumElements, MVT::i64);
+ SDValue HalfIdx = DAG.getConstant(NumElements, dl, MVT::i64);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx);
} else {
- SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, MVT::i32);
+ SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, dl, MVT::i32);
return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT,
Source, SubReg),
0);
static SDValue performConcatVectorsCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+
+ // Optimize concat_vectors of truncated vectors, where the intermediate
+ // type is illegal, to avoid said illegality, e.g.,
+ // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
+ // (v2i16 (truncate (v2i64)))))
+ // ->
+ // (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
+ // (v4i32 (bitcast (v2i64))),
+ // <0, 2, 4, 6>)))
+ // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
+ // on both input and result type, so we might generate worse code.
+ // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
+ if (N->getNumOperands() == 2 &&
+ N0->getOpcode() == ISD::TRUNCATE &&
+ N1->getOpcode() == ISD::TRUNCATE) {
+ SDValue N00 = N0->getOperand(0);
+ SDValue N10 = N1->getOperand(0);
+ EVT N00VT = N00.getValueType();
+
+ if (N00VT == N10.getValueType() &&
+ (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
+ N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
+ MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
+ SmallVector<int, 8> Mask(MidVT.getVectorNumElements());
+ for (size_t i = 0; i < Mask.size(); ++i)
+ Mask[i] = i * 2;
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getVectorShuffle(
+ MidVT, dl,
+ DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
+ DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
+ }
+ }
+
// Wait 'til after everything is legalized to try this. That way we have
// legal vector types and such.
if (DCI.isBeforeLegalizeOps())
return SDValue();
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
-
// If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
// splat. The indexed instructions are going to be expecting a DUPLANE64, so
// canonicalise to that.
- if (N->getOperand(0) == N->getOperand(1) && VT.getVectorNumElements() == 2) {
+ if (N0 == N1 && VT.getVectorNumElements() == 2) {
assert(VT.getVectorElementType().getSizeInBits() == 64);
- return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT,
- WidenVector(N->getOperand(0), DAG),
- DAG.getConstant(0, MVT::i64));
+ return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
+ DAG.getConstant(0, dl, MVT::i64));
}
// Canonicalise concat_vectors so that the right-hand vector has as few
// becomes
// (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
- SDValue Op1 = N->getOperand(1);
- if (Op1->getOpcode() != ISD::BITCAST)
+ if (N1->getOpcode() != ISD::BITCAST)
return SDValue();
- SDValue RHS = Op1->getOperand(0);
+ SDValue RHS = N1->getOperand(0);
MVT RHSTy = RHS.getValueType().getSimpleVT();
// If the RHS is not a vector, this is not the pattern we're looking for.
if (!RHSTy.isVector())
MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
RHSTy.getVectorNumElements() * 2);
- return DAG.getNode(
- ISD::BITCAST, dl, VT,
- DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
- DAG.getNode(ISD::BITCAST, dl, RHSTy, N->getOperand(0)), RHS));
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
+ DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
+ RHS));
}
static SDValue tryCombineFixedPointConvert(SDNode *N,
unsigned NumElems = NarrowTy.getVectorNumElements();
MVT NewDUPVT = MVT::getVectorVT(ElementTy, NumElems * 2);
+ SDLoc dl(N);
SDValue NewDUP;
if (IsDUPLANE)
- NewDUP = DAG.getNode(N.getOpcode(), SDLoc(N), NewDUPVT, N.getOperand(0),
+ NewDUP = DAG.getNode(N.getOpcode(), dl, NewDUPVT, N.getOperand(0),
N.getOperand(1));
else
- NewDUP = DAG.getNode(AArch64ISD::DUP, SDLoc(N), NewDUPVT, N.getOperand(0));
+ NewDUP = DAG.getNode(AArch64ISD::DUP, dl, NewDUPVT, N.getOperand(0));
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N.getNode()), NarrowTy,
- NewDUP, DAG.getConstant(NumElems, MVT::i64));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy, NewDUP,
+ DAG.getConstant(NumElems, dl, MVT::i64));
}
static bool isEssentiallyExtractSubvector(SDValue N) {
SDLoc dl(Op);
if (InfoAndKind.IsAArch64) {
CCVal = DAG.getConstant(
- AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), MVT::i32);
+ AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
+ MVT::i32);
Cmp = *InfoAndKind.Info.AArch64.Cmp;
} else
Cmp = getAArch64Cmp(*InfoAndKind.Info.Generic.Opnd0,
CCVal, DAG, dl);
EVT VT = Op->getValueType(0);
- LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, VT));
+ LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
}
break;
}
- if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits)
- return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
- DAG.getConstant(-ShiftAmount, MVT::i32));
- else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits)
- return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
- DAG.getConstant(ShiftAmount, MVT::i32));
+ if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
+ SDLoc dl(N);
+ return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
+ DAG.getConstant(-ShiftAmount, dl, MVT::i32));
+ } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
+ SDLoc dl(N);
+ return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
+ DAG.getConstant(ShiftAmount, dl, MVT::i32));
+ }
return SDValue();
}
N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
}
+static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
+ SelectionDAG &DAG) {
+ SDLoc dl(N);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
+ DAG.getNode(Opc, dl,
+ N->getOperand(1).getSimpleValueType(),
+ N->getOperand(1)),
+ DAG.getConstant(0, dl, MVT::i64));
+}
+
static SDValue performIntrinsicCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
case Intrinsic::aarch64_neon_vcvtfxu2fp:
return tryCombineFixedPointConvert(N, DCI, DAG);
break;
+ case Intrinsic::aarch64_neon_saddv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG);
+ case Intrinsic::aarch64_neon_uaddv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG);
+ case Intrinsic::aarch64_neon_sminv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG);
+ case Intrinsic::aarch64_neon_uminv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG);
+ case Intrinsic::aarch64_neon_smaxv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG);
+ case Intrinsic::aarch64_neon_umaxv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
case Intrinsic::aarch64_neon_fmax:
return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
LoVT.getVectorNumElements());
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
- DAG.getConstant(0, MVT::i64));
+ DAG.getConstant(0, DL, MVT::i64));
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
- DAG.getConstant(InNVT.getVectorNumElements(), MVT::i64));
+ DAG.getConstant(InNVT.getVectorNumElements(), DL, MVT::i64));
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
unsigned Offset = EltOffset;
while (--NumVecElts) {
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
- DAG.getConstant(Offset, MVT::i64));
+ DAG.getConstant(Offset, DL, MVT::i64));
NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
St->getPointerInfo(), St->isVolatile(),
St->isNonTemporal(), Alignment);
return SDValue();
// Cyclone has bad performance on unaligned 16B stores when crossing line and
- // page boundries. We want to split such stores.
+ // page boundaries. We want to split such stores.
if (!Subtarget->isCyclone())
return SDValue();
// Don't split at Oz.
MachineFunction &MF = DAG.getMachineFunction();
- bool IsMinSize = MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::MinSize);
+ bool IsMinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
if (IsMinSize)
return SDValue();
EVT HalfVT =
EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts);
SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
- DAG.getConstant(0, MVT::i64));
+ DAG.getConstant(0, DL, MVT::i64));
SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
- DAG.getConstant(NumElts, MVT::i64));
+ DAG.getConstant(NumElts, DL, MVT::i64));
SDValue BasePtr = S->getBasePtr();
SDValue NewST1 =
DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
S->isVolatile(), S->isNonTemporal(), S->getAlignment());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
- DAG.getConstant(8, MVT::i64));
+ DAG.getConstant(8, DL, MVT::i64));
return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
S->getPointerInfo(), S->isVolatile(), S->isNonTemporal(),
S->getAlignment());
Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
}
+ // Finally, check that the vector doesn't depend on the load.
+ // Again, this would create a cycle.
+ // The load depending on the vector is fine, as that's the case for the
+ // LD1*post we'll eventually generate anyway.
+ if (LoadSDN->isPredecessorOf(Vector.getNode()))
+ continue;
+
SmallVector<SDValue, 8> Ops;
Ops.push_back(LD->getOperand(0)); // Chain
if (IsLaneOp) {
LoadSDN->getMemOperand());
// Update the uses.
- std::vector<SDValue> NewResults;
+ SmallVector<SDValue, 2> NewResults;
NewResults.push_back(SDValue(LD, 0)); // The result of load
NewResults.push_back(SDValue(UpdN.getNode(), 2)); // Chain
DCI.CombineTo(LD, NewResults);
/// the compare-mask instructions rather than going via NZCV, even if LHS and
/// RHS are really scalar. This replaces any scalar setcc in the above pattern
/// with a vector one followed by a DUP shuffle on the result.
-static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performSelectCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0);
EVT ResVT = N->getValueType(0);
- if (N0.getOpcode() != ISD::SETCC || N0.getValueType() != MVT::i1)
+ if (N0.getOpcode() != ISD::SETCC)
return SDValue();
+ // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
+ // scalar SetCCResultType. We also don't expect vectors, because we assume
+ // that selects fed by vector SETCCs are canonicalized to VSELECT.
+ assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&
+ "Scalar-SETCC feeding SELECT has unexpected result type!");
+
// If NumMaskElts == 0, the comparison is larger than select result. The
// largest real NEON comparison is 64-bits per lane, which means the result is
// at most 32-bits and an illegal vector. Just bail out for now.
SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
+ // Also bail out if the vector CCVT isn't the same size as ResVT.
+ // This can happen if the SETCC operand size doesn't divide the ResVT size
+ // (e.g., f64 vs v3f32).
+ if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
+ return SDValue();
+
+ // Make sure we didn't create illegal types, if we're not supposed to.
+ assert(DCI.isBeforeLegalize() ||
+ DAG.getTargetLoweringInfo().isTypeLegal(SrcVT));
+
// First perform a vector comparison, where lane 0 is the one we're interested
// in.
SDLoc DL(N0);
case ISD::CONCAT_VECTORS:
return performConcatVectorsCombine(N, DCI, DAG);
case ISD::SELECT:
- return performSelectCombine(N, DAG);
+ return performSelectCombine(N, DCI);
case ISD::VSELECT:
return performVSelectCombine(N, DCI.DAG);
case ISD::STORE:
Op = SDValue(
DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
DAG.getUNDEF(MVT::i32), Op,
- DAG.getTargetConstant(AArch64::hsub, MVT::i32)),
+ DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
0);
Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
}
// For the real atomic operations, we have ldxr/stxr up to 128 bits,
-bool AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+TargetLoweringBase::AtomicRMWExpansionKind
+AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- return Size <= 128;
+ return Size <= 128 ? AtomicRMWExpansionKind::LLSC
+ : AtomicRMWExpansionKind::None;
}
bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const {