//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
+def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
+ AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<"FeatureCRC", "crc">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
+def IsCyclone : Predicate<"Subtarget->isCyclone()">;
//===----------------------------------------------------------------------===//
// AArch64-specific DAG Nodes.
SDTCisSameAs<0, 2>,
SDTCisInt<3>,
SDTCisVT<4, i32>]>;
+def SDT_AArch64CCMP : SDTypeProfile<1, 5,
+ [SDTCisVT<0, i32>,
+ SDTCisInt<1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisInt<3>,
+ SDTCisInt<4>,
+ SDTCisVT<5, i32>]>;
+def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
+ [SDTCisVT<0, i32>,
+ SDTCisFP<1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisInt<3>,
+ SDTCisInt<4>,
+ SDTCisVT<5, i32>]>;
def SDT_AArch64FCmp : SDTypeProfile<0, 2,
[SDTCisFP<0>,
SDTCisSameAs<0, 1>]>;
def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
SDTCisPtrTy<1>]>;
+
+// Generates the general dynamic sequences, i.e.
+// adrp x0, :tlsdesc:var
+// ldr x1, [x0, #:tlsdesc_lo12:var]
+// add x0, x0, #:tlsdesc_lo12:var
+// .tlsdesccall var
+// blr x1
+
+// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
+// number of operands (the variable)
+def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
+ [SDTCisPtrTy<0>]>;
+
def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
[SDTCisVT<0, i64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>;
def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>;
+def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>;
+def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>;
+def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
+
def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
-def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL",
- SDT_AArch64TLSDescCall,
- [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
- SDNPVariadic]>;
+def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
+ SDT_AArch64TLSDescCallSeq,
+ [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
+ SDNPVariadic]>;
+
def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
SDT_AArch64WrapperLarge>;
+def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
+
+def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;
+def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;
+
+def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
+def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
+def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
+def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
+def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
+def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
//===----------------------------------------------------------------------===//
// System instructions.
//===----------------------------------------------------------------------===//
-def HINT : HintI<"hint">;
+def HINT : HintI<"hint">;
def : InstAlias<"nop", (HINT 0b000)>;
def : InstAlias<"yield",(HINT 0b001)>;
def : InstAlias<"wfe", (HINT 0b010)>;
def : InstAlias<"sev", (HINT 0b100)>;
def : InstAlias<"sevl", (HINT 0b101)>;
- // As far as LLVM is concerned this writes to the system's exclusive monitors.
+// As far as LLVM is concerned this writes to the system's exclusive monitors.
let mayLoad = 1, mayStore = 1 in
def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
-def DMB : CRmSystemI<barrier_op, 0b101, "dmb">;
-def DSB : CRmSystemI<barrier_op, 0b100, "dsb">;
-def ISB : CRmSystemI<barrier_op, 0b110, "isb">;
+// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
+// model patterns with sufficiently fine granularity.
+let mayLoad = ?, mayStore = ? in {
+def DMB : CRmSystemI<barrier_op, 0b101, "dmb",
+ [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>;
+
+def DSB : CRmSystemI<barrier_op, 0b100, "dsb",
+ [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>;
+
+def ISB : CRmSystemI<barrier_op, 0b110, "isb",
+ [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>;
+}
+
def : InstAlias<"clrex", (CLREX 0xf)>;
def : InstAlias<"isb", (ISB 0xf)>;
}]>;
def trunc_imm : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i32);
+ return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
}]>;
def : Pat<(i64 i64imm_32bit:$src),
(SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
+// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
+def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
+return CurDAG->getTargetConstant(
+ N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
+def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
+return CurDAG->getTargetConstant(
+ N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
+}]>;
+
+
+def : Pat<(f32 fpimm:$in),
+ (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>;
+def : Pat<(f64 fpimm:$in),
+ (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>;
+
+
// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
// sequences.
def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
// Add/subtract
-defm ADD : AddSub<0, "add", add>;
-defm SUB : AddSub<1, "sub">;
+defm ADD : AddSub<0, "add", "sub", add>;
+defm SUB : AddSub<1, "sub", "add">;
def : InstAlias<"mov $dst, $src",
(ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
def : InstAlias<"mov $dst, $src",
(ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
-defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn">;
-defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp">;
+defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">;
+defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">;
// Use SUBS instead of SUB to enable CSE between SUBS and SUB.
def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
(MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
(MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
+def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
+ (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
+def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
+ (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
} // AddedComplexity = 7
let AddedComplexity = 5 in {
def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
+// v8.1 atomic CAS
+defm CAS : CompareAndSwap<0, 0, "">;
+defm CASA : CompareAndSwap<1, 0, "a">;
+defm CASL : CompareAndSwap<0, 1, "l">;
+defm CASAL : CompareAndSwap<1, 1, "al">;
+
+// v8.1 atomic CASP
+defm CASP : CompareAndSwapPair<0, 0, "">;
+defm CASPA : CompareAndSwapPair<1, 0, "a">;
+defm CASPL : CompareAndSwapPair<0, 1, "l">;
+defm CASPAL : CompareAndSwapPair<1, 1, "al">;
+
+// v8.1 atomic SWP
+defm SWP : Swap<0, 0, "">;
+defm SWPA : Swap<1, 0, "a">;
+defm SWPL : Swap<0, 1, "l">;
+defm SWPAL : Swap<1, 1, "al">;
+
+// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register)
+defm LDADD : LDOPregister<0b000, "add", 0, 0, "">;
+defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">;
+defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">;
+defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">;
+
+defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">;
+defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">;
+defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">;
+defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">;
+
+defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">;
+defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">;
+defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">;
+defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">;
+
+defm LDSET : LDOPregister<0b011, "set", 0, 0, "">;
+defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">;
+defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">;
+defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">;
+
+defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">;
+defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">;
+defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">;
+defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">;
+
+defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">;
+defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">;
+defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">;
+defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">;
+
+defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">;
+defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">;
+defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">;
+defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">;
+
+defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">;
+defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">;
+defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">;
+defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">;
+
+// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR"
+defm : STOPregister<"stadd","LDADD">; // STADDx
+defm : STOPregister<"stclr","LDCLR">; // STCLRx
+defm : STOPregister<"steor","LDEOR">; // STEORx
+defm : STOPregister<"stset","LDSET">; // STSETx
+defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx
+defm : STOPregister<"stsmin","LDSMIN">;// STSMINx
+defm : STOPregister<"stumax","LDUMAX">;// STUMAXx
+defm : STOPregister<"stumin","LDUMIN">;// STUMINx
//===----------------------------------------------------------------------===//
// Logical instructions.
//===----------------------------------------------------------------------===//
// (immediate)
-defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag>;
-defm AND : LogicalImm<0b00, "and", and>;
-defm EOR : LogicalImm<0b10, "eor", xor>;
-defm ORR : LogicalImm<0b01, "orr", or>;
+defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">;
+defm AND : LogicalImm<0b00, "and", and, "bic">;
+defm EOR : LogicalImm<0b10, "eor", xor, "eon">;
+defm ORR : LogicalImm<0b01, "orr", or, "orn">;
// FIXME: these aliases *are* canonical sometimes (when movz can't be
// used). Actually, it seems to be working right now, but putting logical_immXX
defm BIC : LogicalReg<0b00, 1, "bic",
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
defm EON : LogicalReg<0b10, 1, "eon",
- BinOpFrag<(xor node:$LHS, (not node:$RHS))>>;
+ BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
defm EOR : LogicalReg<0b10, 0, "eor", xor>;
defm ORN : LogicalReg<0b01, 1, "orn",
BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
defm CLS : OneOperandData<0b101, "cls">;
defm CLZ : OneOperandData<0b100, "clz", ctlz>;
defm RBIT : OneOperandData<0b000, "rbit">;
+
+def : Pat<(int_aarch64_rbit GPR32:$Rn), (RBITWr $Rn)>;
+def : Pat<(int_aarch64_rbit GPR64:$Rn), (RBITXr $Rn)>;
+
def REV16Wr : OneWRegData<0b001, "rev16",
UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
def REV16Xr : OneXRegData<0b001, "rev16", null_frag>;
def REV32Xr : OneXRegData<0b010, "rev32",
UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
+def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
+
// The bswap commutes with the rotr so we want a pattern for both possible
// orders.
def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
//===----------------------------------------------------------------------===//
// Bitfield immediate extraction instruction.
//===----------------------------------------------------------------------===//
-let neverHasSideEffects = 1 in
+let hasSideEffects = 0 in
defm EXTR : ExtractImm<"extr">;
def : InstAlias<"ror $dst, $src, $shift",
(EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
//===----------------------------------------------------------------------===//
// Other bitfield immediate instructions.
//===----------------------------------------------------------------------===//
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">;
defm SBFM : BitfieldImm<0b00, "sbfm">;
defm UBFM : BitfieldImm<0b10, "ubfm">;
def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
- return CurDAG->getTargetConstant(enc, MVT::i64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 31 - N->getZExtValue();
- return CurDAG->getTargetConstant(enc, MVT::i64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
// min(7, 31 - shift_amt)
def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 31 - N->getZExtValue();
enc = enc > 7 ? 7 : enc;
- return CurDAG->getTargetConstant(enc, MVT::i64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
// min(15, 31 - shift_amt)
def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 31 - N->getZExtValue();
enc = enc > 15 ? 15 : enc;
- return CurDAG->getTargetConstant(enc, MVT::i64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
- return CurDAG->getTargetConstant(enc, MVT::i64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 63 - N->getZExtValue();
- return CurDAG->getTargetConstant(enc, MVT::i64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
// min(7, 63 - shift_amt)
def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 63 - N->getZExtValue();
enc = enc > 7 ? 7 : enc;
- return CurDAG->getTargetConstant(enc, MVT::i64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
// min(15, 63 - shift_amt)
def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 63 - N->getZExtValue();
enc = enc > 15 ? 15 : enc;
- return CurDAG->getTargetConstant(enc, MVT::i64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
// min(31, 63 - shift_amt)
def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 63 - N->getZExtValue();
enc = enc > 31 ? 31 : enc;
- return CurDAG->getTargetConstant(enc, MVT::i64);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
//===----------------------------------------------------------------------===//
-// Conditionally set flags instructions.
+// Conditional comparison instructions.
//===----------------------------------------------------------------------===//
-defm CCMN : CondSetFlagsImm<0, "ccmn">;
-defm CCMP : CondSetFlagsImm<1, "ccmp">;
-
-defm CCMN : CondSetFlagsReg<0, "ccmn">;
-defm CCMP : CondSetFlagsReg<1, "ccmp">;
+defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
+defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
//===----------------------------------------------------------------------===//
// Conditional select instructions.
// PC-relative instructions.
//===----------------------------------------------------------------------===//
let isReMaterializable = 1 in {
-let neverHasSideEffects = 1, mayStore = 0, mayLoad = 0 in {
+let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
def ADR : ADRI<0, "adr", adrlabel, []>;
-} // neverHasSideEffects = 1
+} // hasSideEffects = 0
def ADRP : ADRI<1, "adrp", adrplabel,
[(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
let AsmString = ".tlsdesccall $sym";
}
-// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It
-// gets expanded to two MCInsts during lowering.
-let isCall = 1, Defs = [LR] in
-def TLSDESC_BLR
- : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym),
- [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>;
+// FIXME: maybe the scratch register used shouldn't be fixed to X1?
+// FIXME: can "hasSideEffects be dropped?
+let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
+ isCodeGenOnly = 1 in
+def TLSDESC_CALLSEQ
+ : Pseudo<(outs), (ins i64imm:$sym),
+ [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>;
+def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
+ (TLSDESC_CALLSEQ texternalsym:$sym)>;
-def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym),
- (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>;
//===----------------------------------------------------------------------===//
// Conditional branch (immediate) instruction.
//===----------------------------------------------------------------------===//
defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>;
defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>;
+defm : ScalToVecROLoadPat<ro16, load, i32, v4f16, LDRHroW, LDRHroX, hsub>;
+defm : ScalToVecROLoadPat<ro16, load, i32, v8f16, LDRHroW, LDRHroX, hsub>;
+
defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>;
defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>;
defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
+ defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>;
}
defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>;
+ defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>;
}
} // AddedComplexity = 10
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
}
def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
}
def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
+ (LDURDi GPR64sp:$Rn, simm9:$offset)>;
}
def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
+ (LDURQi GPR64sp:$Rn, simm9:$offset)>;
}
// anyext -> zext
defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
+ defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>;
}
defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
+ defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>;
}
} // AddedComplexity = 10
+// Match stores from lane 0 to the appropriate subreg's store.
+multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
+ ValueType VecTy, ValueType STy,
+ SubRegIndex SubRegIdx,
+ Instruction STRW, Instruction STRX> {
+
+ def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
+ (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
+ (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
+ GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
+
+ def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
+ (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
+ (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
+ GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
+}
+
+let AddedComplexity = 19 in {
+ defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>;
+ defm : VecROStoreLane0Pat<ro16, store , v8i16, i16, hsub, STRHroW, STRHroX>;
+ defm : VecROStoreLane0Pat<ro32, truncstorei32, v4i32, i32, ssub, STRSroW, STRSroX>;
+ defm : VecROStoreLane0Pat<ro32, store , v4i32, i32, ssub, STRSroW, STRSroX>;
+ defm : VecROStoreLane0Pat<ro32, store , v4f32, f32, ssub, STRSroW, STRSroX>;
+ defm : VecROStoreLane0Pat<ro64, store , v2i64, i64, dsub, STRDroW, STRDroX>;
+ defm : VecROStoreLane0Pat<ro64, store , v2f64, f64, dsub, STRDroW, STRDroX>;
+}
+
//---
// (unsigned immediate)
defm STRX : StoreUI<0b11, 0, 0b00, GPR64, uimm12s8, "str",
def : Pat<(store (v2i32 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(store (v4f16 FPR64:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
+ (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
}
def : Pat<(store (v1f64 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
def : Pat<(store (v2i64 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(store (v8f16 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
}
def : Pat<(store (f128 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
def : Pat<(store (v2i32 FPR64:$Rt),
(am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
(STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v4f16 FPR64:$Rt),
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
}
def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
(STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(store (v2f64 FPR128:$Rt),
(am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
(STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v8f16 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
}
// unscaled i64 truncating stores
(STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
//---
// (immediate post-indexed)
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
//===----------------------------------------------------------------------===//
// Load/store exclusive instructions.
def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
+let Predicates = [HasV8_1a] in {
+ // v8.1a "Limited Order Region" extension load-acquire instructions
+ def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
+ def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
+ def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
+ def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
+
+ // v8.1a "Limited Order Region" extension store-release instructions
+ def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">;
+ def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">;
+ def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
+ def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
+}
+
//===----------------------------------------------------------------------===//
// Scaled floating point to integer conversion instructions.
//===----------------------------------------------------------------------===//
defm FMOV : UnscaledConversion<"fmov">;
-def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>;
-def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>;
+// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
+let isReMaterializable = 1, isCodeGenOnly = 1 in {
+def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
+ PseudoInstExpansion<(FMOVWSr FPR32:$Rd, WZR)>,
+ Requires<[NoZCZ]>;
+def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
+ PseudoInstExpansion<(FMOVXDr FPR64:$Rd, XZR)>,
+ Requires<[NoZCZ]>;
+}
//===----------------------------------------------------------------------===//
// Floating point conversion instruction.
defm FCVT : FPConversion<"fcvt">;
-def : Pat<(f32_to_f16 FPR32:$Rn),
- (i32 (COPY_TO_REGCLASS
- (f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)),
- GPR32))>;
-
-def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn),
- [(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>;
-
//===----------------------------------------------------------------------===//
// Floating point single operand instructions.
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
defm FCCMPE : FPCondComparison<1, "fccmpe">;
-defm FCCMP : FPCondComparison<0, "fccmp">;
+defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>;
//===----------------------------------------------------------------------===//
// Floating point conditional select instruction.
//===----------------------------------------------------------------------===//
defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_aarch64_neon_abs>;
+def : Pat<(xor (v8i8 (AArch64vashr V64:$src, (i32 7))),
+ (v8i8 (add V64:$src, (AArch64vashr V64:$src, (i32 7))))),
+ (ABSv8i8 V64:$src)>;
+def : Pat<(xor (v4i16 (AArch64vashr V64:$src, (i32 15))),
+ (v4i16 (add V64:$src, (AArch64vashr V64:$src, (i32 15))))),
+ (ABSv4i16 V64:$src)>;
+def : Pat<(xor (v2i32 (AArch64vashr V64:$src, (i32 31))),
+ (v2i32 (add V64:$src, (AArch64vashr V64:$src, (i32 31))))),
+ (ABSv2i32 V64:$src)>;
+def : Pat<(xor (v16i8 (AArch64vashr V128:$src, (i32 7))),
+ (v16i8 (add V128:$src, (AArch64vashr V128:$src, (i32 7))))),
+ (ABSv16i8 V128:$src)>;
+def : Pat<(xor (v8i16 (AArch64vashr V128:$src, (i32 15))),
+ (v8i16 (add V128:$src, (AArch64vashr V128:$src, (i32 15))))),
+ (ABSv8i16 V128:$src)>;
+def : Pat<(xor (v4i32 (AArch64vashr V128:$src, (i32 31))),
+ (v4i32 (add V128:$src, (AArch64vashr V128:$src, (i32 31))))),
+ (ABSv4i32 V128:$src)>;
+def : Pat<(xor (v2i64 (AArch64vashr V128:$src, (i32 63))),
+ (v2i64 (add V128:$src, (AArch64vashr V128:$src, (i32 63))))),
+ (ABSv2i64 V128:$src)>;
+
defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
(i64 2))))),
(FCVTLv4i32 V128:$Rn)>;
+def : Pat<(v4f32 (fextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>;
+def : Pat<(v4f32 (fextend (v4f16 (extract_subvector (v8f16 V128:$Rn),
+ (i64 4))))),
+ (FCVTLv8i16 V128:$Rn)>;
+
defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
(FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
def : Pat<(v2f32 (fround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
+def : Pat<(v4f16 (fround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>;
def : Pat<(concat_vectors V64:$Rd, (v2f32 (fround (v2f64 V128:$Rn)))),
(FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
+def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>;
+def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>;
+def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
+def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>;
def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
- TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >;
-defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>;
+ TriOpFrag<(add node:$LHS, (sabsdiff node:$MHS, node:$RHS))> >;
+defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", sabsdiff>;
defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
- TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >;
-defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
+ TriOpFrag<(add node:$LHS, (uabsdiff node:$MHS, node:$RHS))> >;
+defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", uabsdiff>;
defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>;
defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
+defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
+ int_aarch64_neon_sqadd>;
+defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
+ int_aarch64_neon_sqsub>;
defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
+def : Pat<(v8i8 (smin V64:$Rn, V64:$Rm)),
+ (SMINv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (smin V64:$Rn, V64:$Rm)),
+ (SMINv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (smin V64:$Rn, V64:$Rm)),
+ (SMINv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (smin V128:$Rn, V128:$Rm)),
+ (SMINv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (smin V128:$Rn, V128:$Rm)),
+ (SMINv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (smin V128:$Rn, V128:$Rm)),
+ (SMINv4i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i8 (smax V64:$Rn, V64:$Rm)),
+ (SMAXv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (smax V64:$Rn, V64:$Rm)),
+ (SMAXv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (smax V64:$Rn, V64:$Rm)),
+ (SMAXv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (smax V128:$Rn, V128:$Rm)),
+ (SMAXv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (smax V128:$Rn, V128:$Rm)),
+ (SMAXv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (smax V128:$Rn, V128:$Rm)),
+ (SMAXv4i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i8 (umin V64:$Rn, V64:$Rm)),
+ (UMINv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (umin V64:$Rn, V64:$Rm)),
+ (UMINv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (umin V64:$Rn, V64:$Rm)),
+ (UMINv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (umin V128:$Rn, V128:$Rm)),
+ (UMINv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (umin V128:$Rn, V128:$Rm)),
+ (UMINv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (umin V128:$Rn, V128:$Rm)),
+ (UMINv4i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i8 (umax V64:$Rn, V64:$Rm)),
+ (UMAXv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (umax V64:$Rn, V64:$Rm)),
+ (UMAXv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (umax V64:$Rn, V64:$Rm)),
+ (UMAXv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (umax V128:$Rn, V128:$Rm)),
+ (UMAXv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (umax V128:$Rn, V128:$Rm)),
+ (UMAXv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (umax V128:$Rn, V128:$Rm)),
+ (UMAXv4i32 V128:$Rn, V128:$Rm)>;
+
def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
(BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
+let Predicates = [HasV8_1a] in {
+ defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
+ defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
+ def : Pat<(i32 (int_aarch64_neon_sqadd
+ (i32 FPR32:$Rd),
+ (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
+ (i32 FPR32:$Rm))))),
+ (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
+ def : Pat<(i32 (int_aarch64_neon_sqsub
+ (i32 FPR32:$Rd),
+ (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
+ (i32 FPR32:$Rm))))),
+ (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
+}
def : InstAlias<"cmls $dst, $src1, $src2",
(CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
- int_aarch64_neon_sabd>;
+ sabsdiff>;
defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
- int_aarch64_neon_sabd>;
+ sabsdiff>;
defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
- int_aarch64_neon_uabd>;
+ uabsdiff>;
defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
- int_aarch64_neon_uabd>;
+ uabsdiff>;
defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw",
BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>;
+// Additional patterns for SMULL and UMULL
+multiclass Neon_mul_widen_patterns<SDPatternOperator opnode,
+ Instruction INST8B, Instruction INST4H, Instruction INST2S> {
+ def : Pat<(v8i16 (opnode (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
+ (INST8B V64:$Rn, V64:$Rm)>;
+ def : Pat<(v4i32 (opnode (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
+ (INST4H V64:$Rn, V64:$Rm)>;
+ def : Pat<(v2i64 (opnode (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
+ (INST2S V64:$Rn, V64:$Rm)>;
+}
+
+defm : Neon_mul_widen_patterns<AArch64smull, SMULLv8i8_v8i16,
+ SMULLv4i16_v4i32, SMULLv2i32_v2i64>;
+defm : Neon_mul_widen_patterns<AArch64umull, UMULLv8i8_v8i16,
+ UMULLv4i16_v4i32, UMULLv2i32_v2i64>;
+
+// Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL
+multiclass Neon_mulacc_widen_patterns<SDPatternOperator opnode,
+ Instruction INST8B, Instruction INST4H, Instruction INST2S> {
+ def : Pat<(v8i16 (opnode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
+ (INST8B V128:$Rd, V64:$Rn, V64:$Rm)>;
+ def : Pat<(v4i32 (opnode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
+ (INST4H V128:$Rd, V64:$Rn, V64:$Rm)>;
+ def : Pat<(v2i64 (opnode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
+ (INST2S V128:$Rd, V64:$Rn, V64:$Rm)>;
+}
+
+defm : Neon_mulacc_widen_patterns<
+ TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>,
+ SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
+defm : Neon_mulacc_widen_patterns<
+ TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>,
+ UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
+defm : Neon_mulacc_widen_patterns<
+ TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>,
+ SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
+defm : Neon_mulacc_widen_patterns<
+ TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>,
+ UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
+
// Patterns for 64-bit pmull
def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm),
(PMULLv1i64 V64:$Rn, V64:$Rm)>;
(EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
def : Pat<(v2f64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
(EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
+def : Pat<(v4f16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
+ (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
+def : Pat<(v8f16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+ (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
// We use EXT to handle extract_subvector to copy the upper 64-bits of a
// 128-bit vector.
(EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))),
(EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 4))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))),
(EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))),
defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">;
defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">;
defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">;
-def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))),
- (ADDPv2i64p V128:$Rn)>;
-def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))),
- (ADDPv2i64p V128:$Rn)>;
+def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
+def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
(FADDPv2i32p V64:$Rn)>;
def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
// AdvSIMD INS/DUP instructions
//----------------------------------------------------------------------------
-def DUPv8i8gpr : SIMDDupFromMain<0, 0b00001, ".8b", v8i8, V64, GPR32>;
-def DUPv16i8gpr : SIMDDupFromMain<1, 0b00001, ".16b", v16i8, V128, GPR32>;
-def DUPv4i16gpr : SIMDDupFromMain<0, 0b00010, ".4h", v4i16, V64, GPR32>;
-def DUPv8i16gpr : SIMDDupFromMain<1, 0b00010, ".8h", v8i16, V128, GPR32>;
-def DUPv2i32gpr : SIMDDupFromMain<0, 0b00100, ".2s", v2i32, V64, GPR32>;
-def DUPv4i32gpr : SIMDDupFromMain<1, 0b00100, ".4s", v4i32, V128, GPR32>;
-def DUPv2i64gpr : SIMDDupFromMain<1, 0b01000, ".2d", v2i64, V128, GPR64>;
+def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
+def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
+def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
+def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
+def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
+def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
+def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
def DUPv2i64lane : SIMDDup64FromElement;
def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
(v2f64 (DUPv2i64lane
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
(i64 0)))>;
+def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))),
+ (v4f16 (DUPv4i16lane
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
+ (i64 0)))>;
+def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))),
+ (v8f16 (DUPv8i16lane
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
+ (i64 0)))>;
+
+def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
+ (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
+def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
+ (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
(DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
// instruction even if the types don't match: we just have to remap the lane
// carefully. N.b. this trick only applies to truncations.
def VecIndex_x2 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(2 * N->getZExtValue(), MVT::i64);
+ return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
}]>;
def VecIndex_x4 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(4 * N->getZExtValue(), MVT::i64);
+ return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
}]>;
def VecIndex_x8 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(8 * N->getZExtValue(), MVT::i64);
+ return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
}]>;
multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT,
def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
+def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
+ (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
+ (EXTRACT_SUBREG
+ (INSvi16lane
+ (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
+ VectorIndexS:$imm,
+ (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
+ (i64 0)),
+ dsub)>;
+
+def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
+ (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
+ (INSvi16lane
+ V128:$Rn, VectorIndexH:$imm,
+ (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
+ (i64 0))>;
+
def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
(f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
(EXTRACT_SUBREG
dsub)>;
}
+defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
-defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>;
-defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>;
-defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>;
-defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi32lane>;
// Floating point vector extractions are codegen'd as either a sequence of
-// subregister extractions, possibly fed by an INS if the lane number is
-// anything other than zero.
+// subregister extractions, or a MOV (aka CPY here, alias for DUP) if
+// the lane number is anything other than zero.
def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
(f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
(f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
+def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
+ (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
+
def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
- (f64 (EXTRACT_SUBREG
- (INSvi64lane (v2f64 (IMPLICIT_DEF)), 0,
- V128:$Rn, VectorIndexD:$idx),
- dsub))>;
+ (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>;
def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
- (f32 (EXTRACT_SUBREG
- (INSvi32lane (v4f32 (IMPLICIT_DEF)), 0,
- V128:$Rn, VectorIndexS:$idx),
- ssub))>;
+ (f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>;
+def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
+ (f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>;
// All concat_vectors operations are canonicalised to act on i64 vectors for
// AArch64. In the general case we need an instruction, which had just as well be
def : ConcatPat<v4i32, v2i32>;
def : ConcatPat<v4f32, v2f32>;
def : ConcatPat<v8i16, v4i16>;
+def : ConcatPat<v8f16, v4f16>;
def : ConcatPat<v16i8, v8i8>;
// If the high lanes are undef, though, we can just ignore them:
defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
-multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> {
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
- def : Pat<(i32 (sext_inreg (i32 (intOp (v8i8 V64:$Rn))), i8)),
- (i32 (SMOVvi8to32
+// Patterns for across-vector intrinsics, that have a node equivalent, that
+// returns a vector (with only the low lane defined) instead of a scalar.
+// In effect, opNode is the same as (scalar_to_vector (IntNode)).
+multiclass SIMDAcrossLanesIntrinsic<string baseOpc,
+ SDPatternOperator opNode> {
+// If a lane instruction caught the vector_extract around opNode, we can
+// directly match the latter to the instruction.
+def : Pat<(v8i8 (opNode V64:$Rn)),
+ (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>;
+def : Pat<(v16i8 (opNode V128:$Rn)),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- (i64 0)))>;
- def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- (i64 0)))>;
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v16i8 V128:$Rn))), i8)),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- (i64 0)))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- (i64 0)))>;
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>;
+def : Pat<(v4i16 (opNode V64:$Rn)),
+ (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>;
+def : Pat<(v8i16 (opNode V128:$Rn)),
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
+def : Pat<(v4i32 (opNode V128:$Rn)),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;
+
+
+// If none did, fallback to the explicit patterns, consuming the vector_extract.
+def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
+ (i32 0)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
+ bsub), ssub)>;
+def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
+ bsub), ssub)>;
+def : Pat<(i32 (vector_extract (insert_subvector undef,
+ (v4i16 (opNode V64:$Rn)), (i32 0)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
+ hsub), ssub)>;
+def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
+ hsub), ssub)>;
+def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
+ ssub), ssub)>;
+
+}
+
+multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
+ SDPatternOperator opNode>
+ : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
// If there is a sign extension after this intrinsic, consume it as smov already
// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v4i16 V64:$Rn))), i16)),
+def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
+ (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), i8)),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ (i64 0)))>;
+def : Pat<(i32 (sext_inreg (i32 (vector_extract
+ (opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
+ (i64 0)))>;
+def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
+ (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), i16)),
(i32 (SMOVvi16to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
(i64 0)))>;
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+def : Pat<(i32 (sext_inreg (i32 (vector_extract
+ (opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
(i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
- (i64 0)))>;
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v8i16 V128:$Rn))), i16)),
- (i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- (i64 0)))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
- (i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- (i64 0)))>;
-
-def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
- ssub))>;
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
+ (i64 0)))>;
}
-multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, Intrinsic intOp> {
+multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
+ SDPatternOperator opNode>
+ : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
// If there is a masking operation keeping only what has been actually
// generated, consume it.
- def : Pat<(i32 (and (i32 (intOp (v8i8 V64:$Rn))), maski8_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- ssub))>;
- def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- ssub))>;
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v16i8 V128:$Rn))), maski8_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- ssub))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
+def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
+ (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), maski8_or_more)),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ ssub))>;
+def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
+ maski8_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
ssub))>;
-
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v4i16 V64:$Rn))), maski16_or_more)),
+def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
+ (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), maski16_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
ssub))>;
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
- ssub))>;
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v8i16 V128:$Rn))), maski16_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- ssub))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
+def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
+ maski16_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
ssub))>;
+}
-def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
- ssub))>;
+defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>;
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
+ (ADDPv2i32 V64:$Rn, V64:$Rn)>;
-}
+defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>;
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))),
+ (ADDPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>;
+def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))),
+ (SMAXPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>;
+def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))),
+ (SMINPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>;
+def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))),
+ (UMAXPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
+def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
+ (UMINPv2i32 V64:$Rn, V64:$Rn)>;
multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
dsub))>;
}
-defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", int_aarch64_neon_saddv>;
-// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
-def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", int_aarch64_neon_uaddv>;
-// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
-def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_aarch64_neon_smaxv>;
-def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_aarch64_neon_sminv>;
-def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_aarch64_neon_umaxv>;
-def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_aarch64_neon_uminv>;
-def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
// AdvSIMD indexed element
//----------------------------------------------------------------------------
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
defm FMLA : SIMDFPIndexedSDTied<0, 0b0001, "fmla">;
defm FMLS : SIMDFPIndexedSDTied<0, 0b0101, "fmls">;
}
int_aarch64_neon_sqadd>;
defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
int_aarch64_neon_sqsub>;
+defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
+ int_aarch64_neon_sqadd>;
+defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
+ int_aarch64_neon_sqsub>;
defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
0),
dsub)),
0),
- ssub)))>, Requires<[NotForCodeSize]>;
+ ssub)))>, Requires<[NotForCodeSize, IsCyclone]>;
def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
(LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
0),
dsub)),
0),
- dsub)))>, Requires<[NotForCodeSize]>;
-
+ dsub)))>, Requires<[NotForCodeSize, IsCyclone]>;
+
def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
-let mayLoad = 1, neverHasSideEffects = 1 in {
+let mayLoad = 1, hasSideEffects = 0 in {
defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>;
defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>;
defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>;
(LD1Rv2d GPR64sp:$Rn)>;
def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
(LD1Rv1d GPR64sp:$Rn)>;
+def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
+ (LD1Rv4h GPR64sp:$Rn)>;
+def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
+ (LD1Rv8h GPR64sp:$Rn)>;
class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction LD1>
def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>;
def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>;
def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>;
+def : Ld1Lane128Pat<load, VectorIndexH, v8f16, f16, LD1i16>;
class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction LD1>
def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>;
def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>;
+def : Ld1Lane64Pat<load, VectorIndexH, v4f16, f16, LD1i16>;
defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
-let AddedComplexity = 15 in
+let AddedComplexity = 19 in
class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1>
: Pat<(scalar_store
def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>;
def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>;
def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>;
+def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>;
-let AddedComplexity = 15 in
+let AddedComplexity = 19 in
class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1>
: Pat<(scalar_store
def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>;
def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>;
+def : St1Lane64Pat<store, VectorIndexH, v4f16, f16, ST1i16>;
multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1,
defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
+defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>;
multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1,
defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
+defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>;
-let mayStore = 1, neverHasSideEffects = 1 in {
+let mayStore = 1, hasSideEffects = 0 in {
defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>;
defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>;
defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>;
// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
//
+// Natural vector casts (64 bit)
+def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1f64 (AArch64NvCast (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
+
+def : Pat<(v8i8 (AArch64NvCast (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+// Natural vector casts (128 bit)
+def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
+
+def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
+
let Predicates = [IsLE] in {
def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
(REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
def : Pat<(v2i32 (bitconvert GPR64:$Xn)),
(REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
+def : Pat<(v4f16 (bitconvert GPR64:$Xn)),
+ (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
def : Pat<(v2f32 (bitconvert GPR64:$Xn)),
(REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
(REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
(REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
+def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
+ (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
(REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
}
def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
}
let Predicates = [IsBE] in {
(v1i64 (REV64v4i16 FPR64:$src))>;
def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))),
(v1i64 (REV64v8i8 FPR64:$src))>;
+def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))),
+ (v1i64 (REV64v4i16 FPR64:$src))>;
def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))),
(v1i64 (REV64v2i32 FPR64:$src))>;
}
def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))),
(v2i32 (REV64v2i32 FPR64:$src))>;
def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))),
(v2i32 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))),
+ (v2i32 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
}
(v4i16 (REV16v8i8 FPR64:$src))>;
def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))),
(v4i16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))),
+ (v4i16 (REV32v4i16 FPR64:$src))>;
def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))),
(v4i16 (REV32v4i16 FPR64:$src))>;
def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
(v4i16 (REV64v4i16 FPR64:$src))>;
}
+let Predicates = [IsLE] in {
+def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>;
+def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))),
+ (v4f16 (REV16v8i8 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))),
+ (v4f16 (REV64v4i16 FPR64:$src))>;
+}
+
+
+
let Predicates = [IsLE] in {
def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))),
(v8i8 (REV32v8i8 FPR64:$src))>;
def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))),
(v8i8 (REV64v8i8 FPR64:$src))>;
+def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))),
+ (v8i8 (REV16v8i8 FPR64:$src))>;
}
let Predicates = [IsLE] in {
def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>;
+def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))),
(f64 (REV64v2i32 FPR64:$src))>;
def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))),
(f64 (REV64v8i8 FPR64:$src))>;
+def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))),
+ (f64 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
+def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))),
(v1f64 (REV64v8i8 FPR64:$src))>;
def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))),
(v1f64 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))),
+ (v1f64 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))),
(v2f32 (REV64v2i32 FPR64:$src))>;
def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))),
(v2f32 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))),
+ (v2f32 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
+def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
(f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
(REV64v8i16 FPR128:$src), (i32 8)))>;
+def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))),
+ (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
+ (REV64v8i16 FPR128:$src), (i32 8)))>;
def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
(f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
}
(v2f64 (REV64v4i32 FPR128:$src))>;
def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
(v2f64 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))),
+ (v2f64 (REV64v8i16 FPR128:$src))>;
def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
(v2f64 (REV64v16i8 FPR128:$src))>;
def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
let Predicates = [IsLE] in {
def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
(REV64v4i32 FPR128:$src), (i32 8)))>;
def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
(v4f32 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))),
+ (v4f32 (REV32v8i16 FPR128:$src))>;
def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
(v4f32 (REV32v16i8 FPR128:$src))>;
def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))),
(v2i64 (REV64v16i8 FPR128:$src))>;
def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))),
(v2i64 (REV64v4i32 FPR128:$src))>;
+def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))),
+ (v2i64 (REV64v8i16 FPR128:$src))>;
}
def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))),
(v4i32 (REV32v16i8 FPR128:$src))>;
def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))),
(v4i32 (REV64v4i32 FPR128:$src))>;
+def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))),
+ (v4i32 (REV32v8i16 FPR128:$src))>;
}
def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))),
(v8i16 (REV64v8i16 FPR128:$src))>;
def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))),
(v8i16 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))),
+ (v8i16 (REV32v8i16 FPR128:$src))>;
+}
+
+let Predicates = [IsLE] in {
+def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))),
+ (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src),
+ (REV64v8i16 FPR128:$src),
+ (i32 8)))>;
+def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))),
+ (v8f16 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))),
+ (v8f16 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))),
+ (v8f16 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))),
+ (v8f16 (REV16v16i8 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))),
+ (v8f16 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))),
+ (v8f16 (REV32v8i16 FPR128:$src))>;
}
let Predicates = [IsLE] in {
def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))),
(v16i8 (REV64v16i8 FPR128:$src))>;
def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))),
(v16i8 (REV32v16i8 FPR128:$src))>;
+def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
+ (v16i8 (REV16v16i8 FPR128:$src))>;
}
def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (i32 0)),
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (i32 0)),
+ (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (i32 0)),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;