1 //===-- X86InstrFMA.td - FMA Instruction Set ---------------*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes FMA (Fused Multiply-Add) instructions.
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // FMA3 - Intel 3 operand Fused Multiply-Add instructions
16 //===----------------------------------------------------------------------===//
18 let Constraints = "$src1 = $dst" in {
19 multiclass fma3p_rm<bits<8> opc, string OpcodeStr> {
20 let neverHasSideEffects = 1 in {
21 def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
22 (ins VR128:$src1, VR128:$src2, VR128:$src3),
24 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
26 def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
27 (ins VR128:$src1, VR128:$src2, f128mem:$src3),
29 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
30 def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
31 (ins VR256:$src1, VR256:$src2, VR256:$src3),
33 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
35 def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
36 (ins VR256:$src1, VR256:$src2, f256mem:$src3),
38 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
39 } // neverHasSideEffects = 1
42 // Intrinsic for 213 pattern
43 multiclass fma3p_rm_int<bits<8> opc, string OpcodeStr,
44 PatFrag MemFrag128, PatFrag MemFrag256,
45 Intrinsic Int128, Intrinsic Int256, SDNode Op213,
46 ValueType OpVT128, ValueType OpVT256> {
47 def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
48 (ins VR128:$src1, VR128:$src2, VR128:$src3),
50 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
51 [(set VR128:$dst, (Int128 VR128:$src2, VR128:$src1,
54 def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
55 (ins VR128:$src1, VR128:$src2, VR128:$src3),
57 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
58 [(set VR128:$dst, (OpVT128 (Op213 VR128:$src2,
59 VR128:$src1, VR128:$src3)))]>;
61 def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
62 (ins VR128:$src1, VR128:$src2, f128mem:$src3),
64 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
65 [(set VR128:$dst, (Int128 VR128:$src2, VR128:$src1,
66 (MemFrag128 addr:$src3)))]>;
68 def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
69 (ins VR128:$src1, VR128:$src2, f128mem:$src3),
71 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
72 [(set VR128:$dst, (OpVT128 (Op213 VR128:$src2, VR128:$src1,
73 (MemFrag128 addr:$src3))))]>;
76 def rY_Int : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
77 (ins VR256:$src1, VR256:$src2, VR256:$src3),
79 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
80 [(set VR256:$dst, (Int256 VR256:$src2, VR256:$src1,
83 def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
84 (ins VR256:$src1, VR256:$src2, VR256:$src3),
86 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
87 [(set VR256:$dst, (OpVT256 (Op213 VR256:$src2, VR256:$src1,
90 def mY_Int : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
91 (ins VR256:$src1, VR256:$src2, f256mem:$src3),
93 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
94 [(set VR256:$dst, (Int256 VR256:$src2, VR256:$src1,
95 (MemFrag256 addr:$src3)))]>;
97 def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
98 (ins VR256:$src1, VR256:$src2, f256mem:$src3),
100 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
102 (OpVT256 (Op213 VR256:$src2, VR256:$src1,
103 (MemFrag256 addr:$src3))))]>;
105 } // Constraints = "$src1 = $dst"
107 multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
108 string OpcodeStr, string PackTy,
109 PatFrag MemFrag128, PatFrag MemFrag256,
110 Intrinsic Int128, Intrinsic Int256, SDNode Op,
111 ValueType OpTy128, ValueType OpTy256> {
112 defm r213 : fma3p_rm_int <opc213, !strconcat(OpcodeStr,
113 !strconcat("213", PackTy)), MemFrag128, MemFrag256,
114 Int128, Int256, Op, OpTy128, OpTy256>;
115 defm r132 : fma3p_rm <opc132,
116 !strconcat(OpcodeStr, !strconcat("132", PackTy))>;
117 defm r231 : fma3p_rm <opc231,
118 !strconcat(OpcodeStr, !strconcat("231", PackTy))>;
121 // Fused Multiply-Add
122 let ExeDomain = SSEPackedSingle in {
123 defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", memopv4f32,
124 memopv8f32, int_x86_fma_vfmadd_ps,
125 int_x86_fma_vfmadd_ps_256, X86Fmadd,
127 defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32,
128 memopv8f32, int_x86_fma_vfmsub_ps,
129 int_x86_fma_vfmsub_ps_256, X86Fmsub,
131 defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps",
132 memopv4f32, memopv8f32,
133 int_x86_fma_vfmaddsub_ps,
134 int_x86_fma_vfmaddsub_ps_256, X86Fmaddsub,
136 defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps",
137 memopv4f32, memopv8f32,
138 int_x86_fma_vfmsubadd_ps,
139 int_x86_fma_vfmaddsub_ps_256, X86Fmsubadd,
143 let ExeDomain = SSEPackedDouble in {
144 defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", memopv2f64,
145 memopv4f64, int_x86_fma_vfmadd_pd,
146 int_x86_fma_vfmadd_pd_256, X86Fmadd, v2f64,
148 defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", memopv2f64,
149 memopv4f64, int_x86_fma_vfmsub_pd,
150 int_x86_fma_vfmsub_pd_256, X86Fmsub, v2f64,
152 defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd",
153 memopv2f64, memopv4f64,
154 int_x86_fma_vfmaddsub_pd,
155 int_x86_fma_vfmaddsub_pd_256, X86Fmaddsub,
156 v2f64, v4f64>, VEX_W;
157 defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd",
158 memopv2f64, memopv4f64,
159 int_x86_fma_vfmsubadd_pd,
160 int_x86_fma_vfmsubadd_pd_256, X86Fmsubadd,
161 v2f64, v4f64>, VEX_W;
164 // Fused Negative Multiply-Add
165 let ExeDomain = SSEPackedSingle in {
166 defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", memopv4f32,
167 memopv8f32, int_x86_fma_vfnmadd_ps,
168 int_x86_fma_vfnmadd_ps_256, X86Fnmadd, v4f32,
170 defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", memopv4f32,
171 memopv8f32, int_x86_fma_vfnmsub_ps,
172 int_x86_fma_vfnmsub_ps_256, X86Fnmsub, v4f32,
175 let ExeDomain = SSEPackedDouble in {
176 defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", memopv2f64,
177 memopv4f64, int_x86_fma_vfnmadd_pd,
178 int_x86_fma_vfnmadd_pd_256, X86Fnmadd, v2f64,
180 defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd",
182 memopv4f64, int_x86_fma_vfnmsub_pd,
183 int_x86_fma_vfnmsub_pd_256, X86Fnmsub, v2f64,
187 let Constraints = "$src1 = $dst" in {
188 multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
190 let neverHasSideEffects = 1 in {
191 def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
192 (ins RC:$src1, RC:$src2, RC:$src3),
193 !strconcat(OpcodeStr,
194 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
196 def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
197 (ins RC:$src1, RC:$src2, x86memop:$src3),
198 !strconcat(OpcodeStr,
199 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
200 } // neverHasSideEffects = 1
203 multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, Operand memop,
204 ComplexPattern mem_cpat, Intrinsic IntId,
205 RegisterClass RC, SDNode OpNode, ValueType OpVT> {
206 def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
207 (ins VR128:$src1, VR128:$src2, VR128:$src3),
208 !strconcat(OpcodeStr,
209 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
210 [(set VR128:$dst, (IntId VR128:$src2, VR128:$src1,
212 def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
213 (ins VR128:$src1, VR128:$src2, memop:$src3),
214 !strconcat(OpcodeStr,
215 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
217 (IntId VR128:$src2, VR128:$src1, mem_cpat:$src3))]>;
218 def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
219 (ins RC:$src1, RC:$src2, RC:$src3),
220 !strconcat(OpcodeStr,
221 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
223 (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
225 def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
226 (ins RC:$src1, RC:$src2, memop:$src3),
227 !strconcat(OpcodeStr,
228 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
230 } // Constraints = "$src1 = $dst"
232 multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
233 string OpStr, Intrinsic IntF32, Intrinsic IntF64,
235 defm SSr132 : fma3s_rm<opc132, !strconcat(OpStr, "132ss"), f32mem, FR32>;
236 defm SSr231 : fma3s_rm<opc231, !strconcat(OpStr, "231ss"), f32mem, FR32>;
237 defm SDr132 : fma3s_rm<opc132, !strconcat(OpStr, "132sd"), f64mem, FR64>,
239 defm SDr231 : fma3s_rm<opc231, !strconcat(OpStr, "231sd"), f64mem, FR64>,
241 defm SSr213 : fma3s_rm_int <opc213, !strconcat(OpStr, "213ss"), ssmem,
242 sse_load_f32, IntF32, FR32, OpNode, f32>;
243 defm SDr213 : fma3s_rm_int <opc213, !strconcat(OpStr, "213sd"), sdmem,
244 sse_load_f64, IntF64, FR64, OpNode, f64>, VEX_W;
247 defm VFMADD : fma3s_forms<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
248 int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG;
249 defm VFMSUB : fma3s_forms<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss,
250 int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG;
252 defm VFNMADD : fma3s_forms<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss,
253 int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG;
254 defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
255 int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG;
258 //===----------------------------------------------------------------------===//
259 // FMA4 - AMD 4 operand Fused Multiply-Add instructions
260 //===----------------------------------------------------------------------===//
263 multiclass fma4s<bits<8> opc, string OpcodeStr, Operand memop,
264 ComplexPattern mem_cpat, Intrinsic Int> {
265 def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
266 (ins VR128:$src1, VR128:$src2, VR128:$src3),
267 !strconcat(OpcodeStr,
268 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
270 (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4;
271 def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
272 (ins VR128:$src1, VR128:$src2, memop:$src3),
273 !strconcat(OpcodeStr,
274 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
276 (Int VR128:$src1, VR128:$src2, mem_cpat:$src3))]>, VEX_W, MemOp4;
277 def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
278 (ins VR128:$src1, memop:$src2, VR128:$src3),
279 !strconcat(OpcodeStr,
280 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
282 (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>;
284 let isCodeGenOnly = 1 in
285 def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
286 (ins VR128:$src1, VR128:$src2, VR128:$src3),
287 !strconcat(OpcodeStr,
288 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
291 multiclass fma4p<bits<8> opc, string OpcodeStr,
292 Intrinsic Int128, Intrinsic Int256,
293 PatFrag ld_frag128, PatFrag ld_frag256> {
294 def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
295 (ins VR128:$src1, VR128:$src2, VR128:$src3),
296 !strconcat(OpcodeStr,
297 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
299 (Int128 VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4;
300 def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
301 (ins VR128:$src1, VR128:$src2, f128mem:$src3),
302 !strconcat(OpcodeStr,
303 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
304 [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2,
305 (ld_frag128 addr:$src3)))]>, VEX_W, MemOp4;
306 def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
307 (ins VR128:$src1, f128mem:$src2, VR128:$src3),
308 !strconcat(OpcodeStr,
309 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
311 (Int128 VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>;
312 def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
313 (ins VR256:$src1, VR256:$src2, VR256:$src3),
314 !strconcat(OpcodeStr,
315 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
317 (Int256 VR256:$src1, VR256:$src2, VR256:$src3))]>, VEX_W, MemOp4;
318 def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
319 (ins VR256:$src1, VR256:$src2, f256mem:$src3),
320 !strconcat(OpcodeStr,
321 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
322 [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2,
323 (ld_frag256 addr:$src3)))]>, VEX_W, MemOp4;
324 def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
325 (ins VR256:$src1, f256mem:$src2, VR256:$src3),
326 !strconcat(OpcodeStr,
327 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
329 (Int256 VR256:$src1, (ld_frag256 addr:$src2), VR256:$src3))]>;
331 let isCodeGenOnly = 1 in {
332 def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
333 (ins VR128:$src1, VR128:$src2, VR128:$src3),
334 !strconcat(OpcodeStr,
335 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
336 def rrY_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
337 (ins VR256:$src1, VR256:$src2, VR256:$src3),
338 !strconcat(OpcodeStr,
339 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
340 } // isCodeGenOnly = 1
343 let Predicates = [HasFMA4] in {
345 defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", ssmem, sse_load_f32,
346 int_x86_fma_vfmadd_ss>;
347 defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", sdmem, sse_load_f64,
348 int_x86_fma_vfmadd_sd>;
349 defm VFMADDPS4 : fma4p<0x68, "vfmaddps", int_x86_fma_vfmadd_ps,
350 int_x86_fma_vfmadd_ps_256, memopv4f32, memopv8f32>;
351 defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", int_x86_fma_vfmadd_pd,
352 int_x86_fma_vfmadd_pd_256, memopv2f64, memopv4f64>;
353 defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", ssmem, sse_load_f32,
354 int_x86_fma_vfmsub_ss>;
355 defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", sdmem, sse_load_f64,
356 int_x86_fma_vfmsub_sd>;
357 defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", int_x86_fma_vfmsub_ps,
358 int_x86_fma_vfmsub_ps_256, memopv4f32, memopv8f32>;
359 defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", int_x86_fma_vfmsub_pd,
360 int_x86_fma_vfmsub_pd_256, memopv2f64, memopv4f64>;
361 defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", ssmem, sse_load_f32,
362 int_x86_fma_vfnmadd_ss>;
363 defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
364 int_x86_fma_vfnmadd_sd>;
365 defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", int_x86_fma_vfnmadd_ps,
366 int_x86_fma_vfnmadd_ps_256, memopv4f32, memopv8f32>;
367 defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", int_x86_fma_vfnmadd_pd,
368 int_x86_fma_vfnmadd_pd_256, memopv2f64, memopv4f64>;
369 defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", ssmem, sse_load_f32,
370 int_x86_fma_vfnmsub_ss>;
371 defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
372 int_x86_fma_vfnmsub_sd>;
373 defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", int_x86_fma_vfnmsub_ps,
374 int_x86_fma_vfnmsub_ps_256, memopv4f32, memopv8f32>;
375 defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", int_x86_fma_vfnmsub_pd,
376 int_x86_fma_vfnmsub_pd_256, memopv2f64, memopv4f64>;
377 defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", int_x86_fma_vfmaddsub_ps,
378 int_x86_fma_vfmaddsub_ps_256, memopv4f32, memopv8f32>;
379 defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", int_x86_fma_vfmaddsub_pd,
380 int_x86_fma_vfmaddsub_pd_256, memopv2f64, memopv4f64>;
381 defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", int_x86_fma_vfmsubadd_ps,
382 int_x86_fma_vfmsubadd_ps_256, memopv4f32, memopv8f32>;
383 defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", int_x86_fma_vfmsubadd_pd,
384 int_x86_fma_vfmsubadd_pd_256, memopv2f64, memopv4f64>;