IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
v2f32, fmul_su, fadd_mlx>,
- Requires<[HasNEON, UseFPVMLx]>;
+ Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
v4f32, fmul_su, fadd_mlx>,
- Requires<[HasNEON, UseFPVMLx]>;
+ Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
v2f32, fmul_su, fsub_mlx>,
- Requires<[HasNEON, UseFPVMLx]>;
+ Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
v4f32, fmul_su, fsub_mlx>,
- Requires<[HasNEON, UseFPVMLx]>;
+ Requires<[HasNEON, UseFPVMLx, NoNEONVFP4]>;
defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
"vqdmlsl", "s", int_arm_neon_vqdmlsl>;
defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+
+// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
+def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
+ v2f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEONVFP4]>;
+
+def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
+ v4f32, fmul_su, fadd_mlx>,
+ Requires<[HasNEONVFP4]>;
+
+// Fused Vector Multiply Subtract (floating-point)
+def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
+ v2f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEONVFP4]>;
+def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
+ v4f32, fmul_su, fsub_mlx>,
+ Requires<[HasNEONVFP4]>;
+
// Vector Subtract Operations.
// VSUB : Vector Subtract (integer and floating-point)
def : N3VSPat<fsub, VSUBfd>;
def : N3VSPat<fmul, VMULfd>;
def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
- Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>;
def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
- Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+ Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEONVFP4]>;
+def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
+ Requires<[HasNEONVFP4, UseNEONForFP]>;
+def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
+ Requires<[HasNEONVFP4, UseNEONForFP]>;
def : N2VSPat<fabs, VABSfd>;
def : N2VSPat<fneg, VNEGfd>;
def : N3VSPat<NEONfmax, VMAXfd>;