setSchedulingPreference(Sched::Hybrid);
//// temporary - rewrite interface to use type
- maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
- maxStoresPerMemset = 16;
+ maxStoresPerMemset = 8;
maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ maxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
+ maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+ maxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
+ maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
// On ARM arguments smaller than 4 bytes are extended, so all arguments
// are at least 4 bytes aligned.
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
-const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
+const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
// Map v4i64 to QQ registers but do not make the type legal. Similarly map
// v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
// load / store 4 to 8 consecutive D registers.
return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
}
-bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
+bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
// The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
return false;
case MVT::i8:
case MVT::i16:
- case MVT::i32:
+ case MVT::i32: {
// Unaligned access can use (for example) LRDB, LRDH, LDR
- return AllowsUnaligned;
+ if (AllowsUnaligned) {
+ if (Fast)
+ *Fast = Subtarget->hasV7Ops();
+ return true;
+ }
+ return false;
+ }
case MVT::f64:
- case MVT::v2f64:
+ case MVT::v2f64: {
// For any little-endian targets with neon, we can support unaligned ld/st
// of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
// A big-endian target may also explictly support unaligned accesses
- return Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian());
+ if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) {
+ if (Fast)
+ *Fast = true;
+ return true;
+ }
+ return false;
+ }
}
}
// See if we can use NEON instructions for this...
if (IsZeroVal &&
- !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat) &&
- Subtarget->hasNEON()) {
- if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) {
- return MVT::v4i32;
- } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) {
- return MVT::v2i32;
+ Subtarget->hasNEON() &&
+ !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) {
+ bool Fast;
+ if (Size >= 16 &&
+ (memOpAlign(SrcAlign, DstAlign, 16) ||
+ (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && Fast))) {
+ return MVT::v2f64;
+ } else if (Size >= 8 &&
+ (memOpAlign(SrcAlign, DstAlign, 8) ||
+ (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && Fast))) {
+ return MVT::f64;
}
}
// Lowering to i32/i16 if the size permits.
- if (Size >= 4) {
+ if (Size >= 4)
return MVT::i32;
- } else if (Size >= 2) {
+ else if (Size >= 2)
return MVT::i16;
- }
// Let the target-independent logic figure it out.
return MVT::Other;
return false;
}
+bool ARMTargetLowering::isIntImmLegal(const APInt &Imm, EVT VT) const {
+ if (VT.getSizeInBits() > 32)
+ return false;
+
+ int32_t ImmVal = Imm.getSExtValue();
+ if (!Subtarget->isThumb()) {
+ return (ImmVal >= 0 && ImmVal < 65536) ||
+ (ARM_AM::getSOImmVal(ImmVal) != -1) ||
+ (ARM_AM::getSOImmVal(~ImmVal) != -1);
+ } else if (Subtarget->isThumb2()) {
+ return (ImmVal >= 0 && ImmVal < 65536) ||
+ (ARM_AM::getT2SOImmVal(ImmVal) != -1) ||
+ (ARM_AM::getT2SOImmVal(~ImmVal) != -1);
+ } else /*Thumb1*/ {
+ return (ImmVal >= 0 && ImmVal < 256);
+ }
+}
+
/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
/// specified in the intrinsic calls.