MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
// Set up the TargetLowering object.
- static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
// X86 is weird. It always uses i8 for shift amounts and setcc results.
setBooleanContents(ZeroOrOneBooleanContent);
// (low) operations are left as Legal, as there are single-result
// instructions for this in x86. Using the two-result multiply instructions
// when both high and low results are needed must be arranged by dagcombine.
- for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
- MVT VT = IntVTs[i];
+ for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
// Expand certain atomics
- for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
- MVT VT = IntVTs[i];
+ for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
// ISD::CTTZ_ZERO_UNDEF v2i64 - scalarization is faster.
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
- for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
- // Do not attempt to custom lower non-power-of-2 vectors
- if (!isPowerOf2_32(VT.getVectorNumElements()))
- continue;
- // Do not attempt to custom lower non-128-bit vectors
- if (!VT.is128BitVector())
- continue;
+ for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
}
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
- for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
-
- // Do not attempt to promote non-128-bit vectors
- if (!VT.is128BitVector())
- continue;
-
+ for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationAction(ISD::AND, VT, Promote);
AddPromotedToType (ISD::AND, VT, MVT::v2i64);
setOperationAction(ISD::OR, VT, Promote);
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
// Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
- for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
-
- // Do not attempt to promote non-256-bit vectors
- if (!VT.is256BitVector())
- continue;
-
+ for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
setOperationAction(ISD::AND, VT, Promote);
AddPromotedToType (ISD::AND, VT, MVT::v4i64);
setOperationAction(ISD::OR, VT, Promote);
setOperationAction(ISD::MSTORE, VT, Legal);
}
}
- for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
-
- // Do not attempt to promote non-512-bit vectors.
- if (!VT.is512BitVector())
- continue;
-
+ for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
}
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v64i8, Custom);
}
- for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
- const MVT VT = (MVT::SimpleValueType)i;
-
- const unsigned EltSize = VT.getVectorElementType().getSizeInBits();
-
- // Do not attempt to promote non-512-bit vectors.
- if (!VT.is512BitVector())
- continue;
-
- if (EltSize < 32) {
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Legal);
- }
+ for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Legal);
}
}
// FIXME: We really should do custom legalization for addition and
// subtraction on x86-32 once PR3203 is fixed. We really can't do much better
// than generic legalization for 64-bit multiplication-with-overflow, though.
- for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
+ for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+ if (VT == MVT::i64 && !Subtarget->is64Bit())
+ continue;
// Add/Sub/Mul with overflow operations are custom lowered.
- MVT VT = IntVTs[i];
setOperationAction(ISD::SADDO, VT, Custom);
setOperationAction(ISD::UADDO, VT, Custom);
setOperationAction(ISD::SSUBO, VT, Custom);
return true;
}
-/// Android provides a fixed TLS slot for the SafeStack pointer.
-/// See the definition of TLS_SLOT_SAFESTACK in
-/// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
-bool X86TargetLowering::getSafeStackPointerLocation(unsigned &AddressSpace,
- unsigned &Offset) const {
+Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
if (!Subtarget->isTargetAndroid())
- return false;
+ return TargetLowering::getSafeStackPointerLocation(IRB);
+ // Android provides a fixed TLS slot for the SafeStack pointer. See the
+ // definition of TLS_SLOT_SAFESTACK in
+ // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
+ unsigned AddressSpace, Offset;
if (Subtarget->is64Bit()) {
// %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
Offset = 0x48;
Offset = 0x24;
AddressSpace = 256;
}
- return true;
+
+ return ConstantExpr::getIntToPtr(
+ ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
+ Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
}
bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
// Determine the extraction length from the part of the
// lower half that isn't zeroable.
int Len = HalfSize;
- for (; Len >= 0; --Len)
+ for (; Len > 0; --Len)
if (!Zeroable[Len - 1])
break;
assert(Len > 0 && "Zeroable shuffle mask");
M = M % Size;
// All mask elements must be in the lower half.
- if (M > HalfSize)
+ if (M >= HalfSize)
return SDValue();
if (Idx < 0 || (Src == V && Idx == (M - i))) {
EVT ExtVT;
switch (VT.SimpleTy) {
default:
- assert(false && "Expected a vector of i1 elements");
- break;
+ llvm_unreachable("Expected a vector of i1 elements");
case MVT::v2i1:
ExtVT = MVT::v2i64;
break;
EltNo);
}
-/// \brief Detect bitcasts between i32 to x86mmx low word. Since MMX types are
-/// special and don't usually play with other vector types, it's better to
-/// handle them early to be sure we emit efficient code by avoiding
-/// store-load conversions.
-static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
- if (N->getValueType(0) != MVT::x86mmx ||
- N->getOperand(0)->getOpcode() != ISD::BUILD_VECTOR ||
- N->getOperand(0)->getValueType(0) != MVT::v2i32)
- return SDValue();
+static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
- SDValue V = N->getOperand(0);
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
- if (C && C->getZExtValue() == 0 && V.getOperand(0).getValueType() == MVT::i32)
- return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(V.getOperand(0)),
- N->getValueType(0), V.getOperand(0));
+ // Detect bitcasts between i32 to x86mmx low word. Since MMX types are
+ // special and don't usually play with other vector types, it's better to
+ // handle them early to be sure we emit efficient code by avoiding
+ // store-load conversions.
+ if (VT == MVT::x86mmx && N0.getOpcode() == ISD::BUILD_VECTOR &&
+ N0.getValueType() == MVT::v2i32 &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ SDValue N00 = N0->getOperand(0);
+ if (N0.getConstantOperandVal(1) == 0 && N00.getValueType() == MVT::i32)
+ return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00);
+ }
+
+ // Convert a bitcasted integer logic operation that has one bitcasted
+ // floating-point operand and one constant operand into a floating-point
+ // logic operation. This may create a load of the constant, but that is
+ // cheaper than materializing the constant in an integer register and
+ // transferring it to an SSE register or transferring the SSE operand to
+ // integer register and back.
+ unsigned FPOpcode;
+ switch (N0.getOpcode()) {
+ case ISD::AND: FPOpcode = X86ISD::FAND; break;
+ case ISD::OR: FPOpcode = X86ISD::FOR; break;
+ case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
+ default: return SDValue();
+ }
+ if (((Subtarget->hasSSE1() && VT == MVT::f32) ||
+ (Subtarget->hasSSE2() && VT == MVT::f64)) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getOperand(0).getOpcode() == ISD::BITCAST &&
+ N0.getOperand(0).getOperand(0).getValueType() == VT) {
+ SDValue N000 = N0.getOperand(0).getOperand(0);
+ SDValue FPConst = DAG.getBitcast(VT, N0.getOperand(1));
+ return DAG.getNode(FPOpcode, SDLoc(N0), VT, N000, FPConst);
+ }
return SDValue();
}
case ISD::SELECT:
case X86ISD::SHRUNKBLEND:
return PerformSELECTCombine(N, DAG, DCI, Subtarget);
- case ISD::BITCAST: return PerformBITCASTCombine(N, DAG);
+ case ISD::BITCAST: return PerformBITCASTCombine(N, DAG, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget);
case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget);
case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);
Attribute::MinSize);
return OptSize && !VT.isVector();
}
+
+void X86TargetLowering::markInRegArguments(SelectionDAG &DAG,
+ TargetLowering::ArgListTy& Args) const {
+ // The MCU psABI requires some arguments to be passed in-register.
+ // For regular calls, the inreg arguments are marked by the front-end.
+ // However, for compiler generated library calls, we have to patch this
+ // up here.
+ if (!Subtarget->isTargetMCU() || !Args.size())
+ return;
+
+ unsigned FreeRegs = 3;
+ for (auto &Arg : Args) {
+ // For library functions, we do not expect any fancy types.
+ unsigned Size = DAG.getDataLayout().getTypeSizeInBits(Arg.Ty);
+ unsigned SizeInRegs = (Size + 31) / 32;
+ if (SizeInRegs > 2 || SizeInRegs > FreeRegs)
+ continue;
+
+ Arg.isInReg = true;
+ FreeRegs -= SizeInRegs;
+ if (!FreeRegs)
+ break;
+ }
+}