Fix for PR 1505 (and 1489). Rewrite X87 register
[oota-llvm.git] / lib / Target / X86 / X86InstrFPStack.td
1 //==- X86InstrFPStack.td - Describe the X86 Instruction Set -------*- C++ -*-=//
2 // 
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
7 // 
8 //===----------------------------------------------------------------------===//
9 //
10 // This file describes the X86 x87 FPU instruction set, defining the
11 // instructions, and properties of the instructions which are needed for code
12 // generation, machine code emission, and analysis.
13 //
14 //===----------------------------------------------------------------------===//
15
16 //===----------------------------------------------------------------------===//
17 // FPStack specific DAG Nodes.
18 //===----------------------------------------------------------------------===//
19
20 def SDTX86FpGet   : SDTypeProfile<1, 0, [SDTCisFP<0>]>;
21 def SDTX86FpSet   : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
22 def SDTX86Fld     : SDTypeProfile<1, 2, [SDTCisFP<0>,
23                                          SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>;
24 def SDTX86Fst     : SDTypeProfile<0, 3, [SDTCisFP<0>,
25                                          SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>;
26 def SDTX86Fild    : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
27                                          SDTCisVT<2, OtherVT>]>;
28 def SDTX86FpToIMem: SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
29
30 def X86fpget   : SDNode<"X86ISD::FP_GET_RESULT", SDTX86FpGet,
31                         [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
32 def X86fpset   : SDNode<"X86ISD::FP_SET_RESULT", SDTX86FpSet,
33                         [SDNPHasChain, SDNPOutFlag]>;
34 def X86fld     : SDNode<"X86ISD::FLD",      SDTX86Fld,
35                         [SDNPHasChain]>;
36 def X86fst     : SDNode<"X86ISD::FST",      SDTX86Fst,
37                         [SDNPHasChain, SDNPInFlag]>;
38 def X86fild    : SDNode<"X86ISD::FILD",     SDTX86Fild,
39                         [SDNPHasChain]>;
40 def X86fildflag: SDNode<"X86ISD::FILD_FLAG",SDTX86Fild,
41                         [SDNPHasChain, SDNPOutFlag]>;
42 def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
43                         [SDNPHasChain]>;
44 def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
45                         [SDNPHasChain]>;
46 def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
47                         [SDNPHasChain]>;
48
49 //===----------------------------------------------------------------------===//
50 // FPStack pattern fragments
51 //===----------------------------------------------------------------------===//
52
53 def fpimm0 : PatLeaf<(fpimm), [{
54   return N->isExactlyValue(+0.0);
55 }]>;
56
57 def fpimmneg0 : PatLeaf<(fpimm), [{
58   return N->isExactlyValue(-0.0);
59 }]>;
60
61 def fpimm1 : PatLeaf<(fpimm), [{
62   return N->isExactlyValue(+1.0);
63 }]>;
64
65 def fpimmneg1 : PatLeaf<(fpimm), [{
66   return N->isExactlyValue(-1.0);
67 }]>;
68
69 def extloadf64f32  : PatFrag<(ops node:$ptr), (f64 (extloadf32 node:$ptr))>;
70
71 // Some 'special' instructions
72 let usesCustomDAGSchedInserter = 1 in {  // Expanded by the scheduler.
73   def FP32_TO_INT16_IN_MEM : I<0, Pseudo,
74                             (ops i16mem:$dst, RFP32:$src),
75                             "#FP32_TO_INT16_IN_MEM PSEUDO!",
76                             [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
77   def FP32_TO_INT32_IN_MEM : I<0, Pseudo,
78                             (ops i32mem:$dst, RFP32:$src),
79                             "#FP32_TO_INT32_IN_MEM PSEUDO!",
80                             [(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
81   def FP32_TO_INT64_IN_MEM : I<0, Pseudo,
82                             (ops i64mem:$dst, RFP32:$src),
83                             "#FP32_TO_INT64_IN_MEM PSEUDO!",
84                             [(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
85   def FP64_TO_INT16_IN_MEM : I<0, Pseudo,
86                             (ops i16mem:$dst, RFP64:$src),
87                             "#FP64_TO_INT16_IN_MEM PSEUDO!",
88                             [(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
89   def FP64_TO_INT32_IN_MEM : I<0, Pseudo,
90                             (ops i32mem:$dst, RFP64:$src),
91                             "#FP64_TO_INT32_IN_MEM PSEUDO!",
92                             [(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
93   def FP64_TO_INT64_IN_MEM : I<0, Pseudo,
94                             (ops i64mem:$dst, RFP64:$src),
95                             "#FP64_TO_INT64_IN_MEM PSEUDO!",
96                             [(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
97 }
98
99 let isTerminator = 1 in
100   let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in
101     def FP_REG_KILL  : I<0, Pseudo, (ops), "#FP_REG_KILL", []>;
102
103 // All FP Stack operations are represented with two instructions here.  The
104 // first instruction, generated by the instruction selector, uses "RFP"
105 // registers: a traditional register file to reference floating point values.
106 // These instructions are all psuedo instructions and use the "Fp" prefix.
107 // The second instruction is defined with FPI, which is the actual instruction
108 // emitted by the assembler.  The FP stackifier pass converts one to the other
109 // after register allocation occurs.
110 //
111 // Note that the FpI instruction should have instruction selection info (e.g.
112 // a pattern) and the FPI instruction should have emission info (e.g. opcode
113 // encoding and asm printing info).
114
115 // FPI - Floating Point Instruction template.
116 class FPI<bits<8> o, Format F, dag ops, string asm> : I<o, F, ops, asm, []> {}
117
118 // FpI_ - Floating Point Psuedo Instruction template. Not Predicated.
119 class FpI_<dag ops, FPFormat fp, list<dag> pattern>
120   : X86Inst<0, Pseudo, NoImm, ops, ""> {
121   let FPForm = fp; let FPFormBits = FPForm.Value;
122   let Pattern = pattern;
123 }
124
125 // Random Pseudo Instructions.
126 def FpGETRESULT32 : FpI_<(ops RFP32:$dst), SpecialFP,
127                    [(set RFP32:$dst, X86fpget)]>;                    // FPR = ST(0)
128
129 def FpGETRESULT64 : FpI_<(ops RFP64:$dst), SpecialFP,
130                    [(set RFP64:$dst, X86fpget)]>;                    // FPR = ST(0)
131
132 let noResults = 1 in {
133   def FpSETRESULT32 : FpI_<(ops RFP32:$src), SpecialFP,
134                         [(X86fpset RFP32:$src)]>, Imp<[], [ST0]>;   // ST(0) = FPR
135
136   def FpSETRESULT64 : FpI_<(ops RFP64:$src), SpecialFP,
137                         [(X86fpset RFP64:$src)]>, Imp<[], [ST0]>;   // ST(0) = FPR
138 }
139 // FpI - Floating Point Psuedo Instruction template. Predicated on FPStack.
140 class FpI<dag ops, FPFormat fp, list<dag> pattern> :
141   FpI_<ops, fp, pattern>, Requires<[FPStack]>;
142
143 // Register copies.  Just copies, the 64->32 version does not truncate.
144 def FpMOV3232       : FpI<(ops RFP32:$dst, RFP32:$src), SpecialFP, []>; // f1 = fmov f2
145 def FpMOV3264       : FpI<(ops RFP64:$dst, RFP32:$src), SpecialFP, []>; // f1 = fmov f2
146 def FpMOV6432       : FpI<(ops RFP32:$dst, RFP64:$src), SpecialFP, []>; // f1 = fmov f2
147 def FpMOV6464       : FpI<(ops RFP64:$dst, RFP64:$src), SpecialFP, []>; // f1 = fmov f2
148
149 // Arithmetic
150 // Add, Sub, Mul, Div.
151 def FpADD32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), TwoArgFP,
152                 [(set RFP32:$dst, (fadd RFP32:$src1, RFP32:$src2))]>;
153 def FpSUB32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), TwoArgFP,
154                 [(set RFP32:$dst, (fsub RFP32:$src1, RFP32:$src2))]>;
155 def FpMUL32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), TwoArgFP,
156                 [(set RFP32:$dst, (fmul RFP32:$src1, RFP32:$src2))]>;
157 def FpDIV32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), TwoArgFP,
158                 [(set RFP32:$dst, (fdiv RFP32:$src1, RFP32:$src2))]>;
159 def FpADD64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), TwoArgFP,
160                 [(set RFP64:$dst, (fadd RFP64:$src1, RFP64:$src2))]>;
161 def FpSUB64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), TwoArgFP,
162                 [(set RFP64:$dst, (fsub RFP64:$src1, RFP64:$src2))]>;
163 def FpMUL64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), TwoArgFP,
164                 [(set RFP64:$dst, (fmul RFP64:$src1, RFP64:$src2))]>;
165 def FpDIV64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), TwoArgFP,
166                 [(set RFP64:$dst, (fdiv RFP64:$src1, RFP64:$src2))]>;
167
168 class FPST0rInst<bits<8> o, string asm>
169   : FPI<o, AddRegFrm, (ops RST:$op), asm>, D8;
170 class FPrST0Inst<bits<8> o, string asm>
171   : FPI<o, AddRegFrm, (ops RST:$op), asm>, DC;
172 class FPrST0PInst<bits<8> o, string asm>
173   : FPI<o, AddRegFrm, (ops RST:$op), asm>, DE;
174
175 // Binary Ops with a memory source.
176 def FpADD32m  : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
177                     [(set RFP32:$dst, (fadd RFP32:$src1, (loadf32 addr:$src2)))]>;
178                 // ST(0) = ST(0) + [mem32]
179 def FpADD64m  : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
180                     [(set RFP64:$dst, (fadd RFP64:$src1, (loadf64 addr:$src2)))]>;
181                 // ST(0) = ST(0) + [mem64]
182 def FpMUL32m  : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
183                     [(set RFP32:$dst, (fmul RFP32:$src1, (loadf32 addr:$src2)))]>;
184                 // ST(0) = ST(0) * [mem32]
185 def FpMUL64m  : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
186                     [(set RFP64:$dst, (fmul RFP64:$src1, (loadf64 addr:$src2)))]>;
187                 // ST(0) = ST(0) * [mem64]
188 def FpSUB32m  : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
189                     [(set RFP32:$dst, (fsub RFP32:$src1, (loadf32 addr:$src2)))]>;
190                 // ST(0) = ST(0) - [mem32]
191 def FpSUB64m  : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
192                     [(set RFP64:$dst, (fsub RFP64:$src1, (loadf64 addr:$src2)))]>;
193                 // ST(0) = ST(0) - [mem64]
194 def FpSUBR32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
195                     [(set RFP32:$dst, (fsub (loadf32 addr:$src2), RFP32:$src1))]>;
196                 // ST(0) = [mem32] - ST(0)
197 def FpSUBR64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
198                     [(set RFP64:$dst, (fsub (loadf64 addr:$src2), RFP64:$src1))]>;
199                 // ST(0) = [mem64] - ST(0)
200 def FpDIV32m  : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
201                     [(set RFP32:$dst, (fdiv RFP32:$src1, (loadf32 addr:$src2)))]>;
202                 // ST(0) = ST(0) / [mem32]
203 def FpDIV64m  : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
204                     [(set RFP64:$dst, (fdiv RFP64:$src1, (loadf64 addr:$src2)))]>;
205                 // ST(0) = ST(0) / [mem64]
206 def FpDIVR32m : FpI<(ops RFP32:$dst, RFP32:$src1, f32mem:$src2), OneArgFPRW,
207                     [(set RFP32:$dst, (fdiv (loadf32 addr:$src2), RFP32:$src1))]>;
208                 // ST(0) = [mem32] / ST(0)
209 def FpDIVR64m : FpI<(ops RFP64:$dst, RFP64:$src1, f64mem:$src2), OneArgFPRW,
210                     [(set RFP64:$dst, (fdiv (loadf64 addr:$src2), RFP64:$src1))]>;
211                 // ST(0) = [mem64] / ST(0)
212
213
214 def FADD32m  : FPI<0xD8, MRM0m, (ops f32mem:$src), "fadd{s} $src">;
215 def FADD64m  : FPI<0xDC, MRM0m, (ops f64mem:$src), "fadd{l} $src">;
216 def FMUL32m  : FPI<0xD8, MRM1m, (ops f32mem:$src), "fmul{s} $src">;
217 def FMUL64m  : FPI<0xDC, MRM1m, (ops f64mem:$src), "fmul{l} $src">;
218 def FSUB32m  : FPI<0xD8, MRM4m, (ops f32mem:$src), "fsub{s} $src">;
219 def FSUB64m  : FPI<0xDC, MRM4m, (ops f64mem:$src), "fsub{l} $src">;
220 def FSUBR32m : FPI<0xD8, MRM5m, (ops f32mem:$src), "fsubr{s} $src">;
221 def FSUBR64m : FPI<0xDC, MRM5m, (ops f64mem:$src), "fsubr{l} $src">;
222 def FDIV32m  : FPI<0xD8, MRM6m, (ops f32mem:$src), "fdiv{s} $src">;
223 def FDIV64m  : FPI<0xDC, MRM6m, (ops f64mem:$src), "fdiv{l} $src">;
224 def FDIVR32m : FPI<0xD8, MRM7m, (ops f32mem:$src), "fdivr{s} $src">;
225 def FDIVR64m : FPI<0xDC, MRM7m, (ops f64mem:$src), "fdivr{l} $src">;
226
227 def FpIADD16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
228                     [(set RFP32:$dst, (fadd RFP32:$src1,
229                                      (X86fild addr:$src2, i16)))]>;
230                 // ST(0) = ST(0) + [mem16int]
231 def FpIADD32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
232                     [(set RFP32:$dst, (fadd RFP32:$src1,
233                                      (X86fild addr:$src2, i32)))]>;
234                 // ST(0) = ST(0) + [mem32int]
235 def FpIMUL16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
236                     [(set RFP32:$dst, (fmul RFP32:$src1,
237                                      (X86fild addr:$src2, i16)))]>;
238                 // ST(0) = ST(0) * [mem16int]
239 def FpIMUL32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
240                     [(set RFP32:$dst, (fmul RFP32:$src1,
241                                      (X86fild addr:$src2, i32)))]>;
242                 // ST(0) = ST(0) * [mem32int]
243 def FpISUB16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
244                     [(set RFP32:$dst, (fsub RFP32:$src1,
245                                      (X86fild addr:$src2, i16)))]>;
246                 // ST(0) = ST(0) - [mem16int]
247 def FpISUB32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
248                     [(set RFP32:$dst, (fsub RFP32:$src1,
249                                      (X86fild addr:$src2, i32)))]>;
250                 // ST(0) = ST(0) - [mem32int]
251 def FpISUBR16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
252                      [(set RFP32:$dst, (fsub (X86fild addr:$src2, i16),
253                                       RFP32:$src1))]>;
254                 // ST(0) = [mem16int] - ST(0)
255 def FpISUBR32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
256                      [(set RFP32:$dst, (fsub (X86fild addr:$src2, i32),
257                                       RFP32:$src1))]>;
258                 // ST(0) = [mem32int] - ST(0)
259 def FpIDIV16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
260                     [(set RFP32:$dst, (fdiv RFP32:$src1,
261                                      (X86fild addr:$src2, i16)))]>;
262                 // ST(0) = ST(0) / [mem16int]
263 def FpIDIV32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
264                     [(set RFP32:$dst, (fdiv RFP32:$src1,
265                                      (X86fild addr:$src2, i32)))]>;
266                 // ST(0) = ST(0) / [mem32int]
267 def FpIDIVR16m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i16mem:$src2), OneArgFPRW,
268                      [(set RFP32:$dst, (fdiv (X86fild addr:$src2, i16),
269                                       RFP32:$src1))]>;
270                 // ST(0) = [mem16int] / ST(0)
271 def FpIDIVR32m32 : FpI<(ops RFP32:$dst, RFP32:$src1, i32mem:$src2), OneArgFPRW,
272                      [(set RFP32:$dst, (fdiv (X86fild addr:$src2, i32),
273                                       RFP32:$src1))]>;
274                 // ST(0) = [mem32int] / ST(0)
275
276 def FpIADD16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
277                     [(set RFP64:$dst, (fadd RFP64:$src1,
278                                      (X86fild addr:$src2, i16)))]>;
279                 // ST(0) = ST(0) + [mem16int]
280 def FpIADD32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
281                     [(set RFP64:$dst, (fadd RFP64:$src1,
282                                      (X86fild addr:$src2, i32)))]>;
283                 // ST(0) = ST(0) + [mem32int]
284 def FpIMUL16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
285                     [(set RFP64:$dst, (fmul RFP64:$src1,
286                                      (X86fild addr:$src2, i16)))]>;
287                 // ST(0) = ST(0) * [mem16int]
288 def FpIMUL32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
289                     [(set RFP64:$dst, (fmul RFP64:$src1,
290                                      (X86fild addr:$src2, i32)))]>;
291                 // ST(0) = ST(0) * [mem32int]
292 def FpISUB16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
293                     [(set RFP64:$dst, (fsub RFP64:$src1,
294                                      (X86fild addr:$src2, i16)))]>;
295                 // ST(0) = ST(0) - [mem16int]
296 def FpISUB32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
297                     [(set RFP64:$dst, (fsub RFP64:$src1,
298                                      (X86fild addr:$src2, i32)))]>;
299                 // ST(0) = ST(0) - [mem32int]
300 def FpISUBR16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
301                      [(set RFP64:$dst, (fsub (X86fild addr:$src2, i16),
302                                       RFP64:$src1))]>;
303                 // ST(0) = [mem16int] - ST(0)
304 def FpISUBR32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
305                      [(set RFP64:$dst, (fsub (X86fild addr:$src2, i32),
306                                       RFP64:$src1))]>;
307                 // ST(0) = [mem32int] - ST(0)
308 def FpIDIV16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
309                     [(set RFP64:$dst, (fdiv RFP64:$src1,
310                                      (X86fild addr:$src2, i16)))]>;
311                 // ST(0) = ST(0) / [mem16int]
312 def FpIDIV32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
313                     [(set RFP64:$dst, (fdiv RFP64:$src1,
314                                      (X86fild addr:$src2, i32)))]>;
315                 // ST(0) = ST(0) / [mem32int]
316 def FpIDIVR16m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i16mem:$src2), OneArgFPRW,
317                      [(set RFP64:$dst, (fdiv (X86fild addr:$src2, i16),
318                                       RFP64:$src1))]>;
319                 // ST(0) = [mem16int] / ST(0)
320 def FpIDIVR32m64 : FpI<(ops RFP64:$dst, RFP64:$src1, i32mem:$src2), OneArgFPRW,
321                      [(set RFP64:$dst, (fdiv (X86fild addr:$src2, i32),
322                                       RFP64:$src1))]>;
323                 // ST(0) = [mem32int] / ST(0)
324
325 def FIADD16m  : FPI<0xDE, MRM0m, (ops i16mem:$src), "fiadd{s} $src">;
326 def FIADD32m  : FPI<0xDA, MRM0m, (ops i32mem:$src), "fiadd{l} $src">;
327 def FIMUL16m  : FPI<0xDE, MRM1m, (ops i16mem:$src), "fimul{s} $src">;
328 def FIMUL32m  : FPI<0xDA, MRM1m, (ops i32mem:$src), "fimul{l} $src">;
329 def FISUB16m  : FPI<0xDE, MRM4m, (ops i16mem:$src), "fisub{s} $src">;
330 def FISUB32m  : FPI<0xDA, MRM4m, (ops i32mem:$src), "fisub{l} $src">;
331 def FISUBR16m : FPI<0xDE, MRM5m, (ops i16mem:$src), "fisubr{s} $src">;
332 def FISUBR32m : FPI<0xDA, MRM5m, (ops i32mem:$src), "fisubr{l} $src">;
333 def FIDIV16m  : FPI<0xDE, MRM6m, (ops i16mem:$src), "fidiv{s} $src">;
334 def FIDIV32m  : FPI<0xDA, MRM6m, (ops i32mem:$src), "fidiv{l} $src">;
335 def FIDIVR16m : FPI<0xDE, MRM7m, (ops i16mem:$src), "fidivr{s} $src">;
336 def FIDIVR32m : FPI<0xDA, MRM7m, (ops i32mem:$src), "fidivr{l} $src">;
337
338 // NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
339 // of some of the 'reverse' forms of the fsub and fdiv instructions.  As such,
340 // we have to put some 'r's in and take them out of weird places.
341 def FADDST0r   : FPST0rInst <0xC0, "fadd $op">;
342 def FADDrST0   : FPrST0Inst <0xC0, "fadd {%st(0), $op|$op, %ST(0)}">;
343 def FADDPrST0  : FPrST0PInst<0xC0, "faddp $op">;
344 def FSUBRST0r  : FPST0rInst <0xE8, "fsubr $op">;
345 def FSUBrST0   : FPrST0Inst <0xE8, "fsub{r} {%st(0), $op|$op, %ST(0)}">;
346 def FSUBPrST0  : FPrST0PInst<0xE8, "fsub{r}p $op">;
347 def FSUBST0r   : FPST0rInst <0xE0, "fsub $op">;
348 def FSUBRrST0  : FPrST0Inst <0xE0, "fsub{|r} {%st(0), $op|$op, %ST(0)}">;
349 def FSUBRPrST0 : FPrST0PInst<0xE0, "fsub{|r}p $op">;
350 def FMULST0r   : FPST0rInst <0xC8, "fmul $op">;
351 def FMULrST0   : FPrST0Inst <0xC8, "fmul {%st(0), $op|$op, %ST(0)}">;
352 def FMULPrST0  : FPrST0PInst<0xC8, "fmulp $op">;
353 def FDIVRST0r  : FPST0rInst <0xF8, "fdivr $op">;
354 def FDIVrST0   : FPrST0Inst <0xF8, "fdiv{r} {%st(0), $op|$op, %ST(0)}">;
355 def FDIVPrST0  : FPrST0PInst<0xF8, "fdiv{r}p $op">;
356 def FDIVST0r   : FPST0rInst <0xF0, "fdiv $op">;
357 def FDIVRrST0  : FPrST0Inst <0xF0, "fdiv{|r} {%st(0), $op|$op, %ST(0)}">;
358 def FDIVRPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p $op">;
359
360 // Unary operations.
361 def FpCHS32  : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW,
362                  [(set RFP32:$dst, (fneg RFP32:$src))]>;
363 def FpABS32  : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW,
364                  [(set RFP32:$dst, (fabs RFP32:$src))]>;
365 def FpSQRT32 : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW,
366                  [(set RFP32:$dst, (fsqrt RFP32:$src))]>;
367 def FpSIN32  : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW,
368                  [(set RFP32:$dst, (fsin RFP32:$src))]>;
369 def FpCOS32  : FpI<(ops RFP32:$dst, RFP32:$src), OneArgFPRW,
370                  [(set RFP32:$dst, (fcos RFP32:$src))]>;
371 def FpTST32  : FpI<(ops RFP32:$src), OneArgFP,
372                  []>;
373
374 def FpCHS64  : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW,
375                  [(set RFP64:$dst, (fneg RFP64:$src))]>;
376 def FpABS64  : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW,
377                  [(set RFP64:$dst, (fabs RFP64:$src))]>;
378 def FpSQRT64 : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW,
379                  [(set RFP64:$dst, (fsqrt RFP64:$src))]>;
380 def FpSIN64  : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW,
381                  [(set RFP64:$dst, (fsin RFP64:$src))]>;
382 def FpCOS64  : FpI<(ops RFP64:$dst, RFP64:$src), OneArgFPRW,
383                  [(set RFP64:$dst, (fcos RFP64:$src))]>;
384 def FpTST64  : FpI<(ops RFP64:$src), OneArgFP,
385                  []>;
386
387 def FCHS  : FPI<0xE0, RawFrm, (ops), "fchs">, D9;
388 def FABS  : FPI<0xE1, RawFrm, (ops), "fabs">, D9;
389 def FSQRT : FPI<0xFA, RawFrm, (ops), "fsqrt">, D9;
390 def FSIN  : FPI<0xFE, RawFrm, (ops), "fsin">, D9;
391 def FCOS  : FPI<0xFF, RawFrm, (ops), "fcos">, D9;
392 def FTST  : FPI<0xE4, RawFrm, (ops), "ftst">, D9;
393
394
395 // Floating point cmovs.
396 let isTwoAddress = 1 in {
397   def FpCMOVB32  : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
398                      [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
399                                       X86_COND_B))]>;
400   def FpCMOVBE32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
401                      [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
402                                       X86_COND_BE))]>;
403   def FpCMOVE32  : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
404                      [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
405                                       X86_COND_E))]>;
406   def FpCMOVP32  : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
407                      [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
408                                       X86_COND_P))]>;
409   def FpCMOVNB32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
410                      [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
411                                       X86_COND_AE))]>;
412   def FpCMOVNBE32: FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
413                      [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
414                                       X86_COND_A))]>;
415   def FpCMOVNE32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
416                      [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
417                                       X86_COND_NE))]>;
418   def FpCMOVNP32 : FpI<(ops RFP32:$dst, RFP32:$src1, RFP32:$src2), CondMovFP,
419                      [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
420                                       X86_COND_NP))]>;
421
422   def FpCMOVB64  : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
423                      [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
424                                       X86_COND_B))]>;
425   def FpCMOVBE64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
426                      [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
427                                       X86_COND_BE))]>;
428   def FpCMOVE64  : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
429                      [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
430                                       X86_COND_E))]>;
431   def FpCMOVP64  : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
432                      [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
433                                       X86_COND_P))]>;
434   def FpCMOVNB64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
435                      [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
436                                       X86_COND_AE))]>;
437   def FpCMOVNBE64: FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
438                      [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
439                                       X86_COND_A))]>;
440   def FpCMOVNE64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
441                      [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
442                                       X86_COND_NE))]>;
443   def FpCMOVNP64 : FpI<(ops RFP64:$dst, RFP64:$src1, RFP64:$src2), CondMovFP,
444                      [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
445                                       X86_COND_NP))]>;
446 }
447
448 def FCMOVB  : FPI<0xC0, AddRegFrm, (ops RST:$op),
449                   "fcmovb {$op, %st(0)|%ST(0), $op}">, DA;
450 def FCMOVBE : FPI<0xD0, AddRegFrm, (ops RST:$op),
451                   "fcmovbe {$op, %st(0)|%ST(0), $op}">, DA;
452 def FCMOVE  : FPI<0xC8, AddRegFrm, (ops RST:$op),
453                   "fcmove {$op, %st(0)|%ST(0), $op}">, DA;
454 def FCMOVP  : FPI<0xD8, AddRegFrm, (ops RST:$op),
455                   "fcmovu  {$op, %st(0)|%ST(0), $op}">, DA;
456 def FCMOVNB : FPI<0xC0, AddRegFrm, (ops RST:$op),
457                   "fcmovnb {$op, %st(0)|%ST(0), $op}">, DB;
458 def FCMOVNBE  : FPI<0xD0, AddRegFrm, (ops RST:$op),
459                   "fcmovnbe {$op, %st(0)|%ST(0), $op}">, DB;
460 def FCMOVNE : FPI<0xC8, AddRegFrm, (ops RST:$op),
461                   "fcmovne {$op, %st(0)|%ST(0), $op}">, DB;
462 def FCMOVNP : FPI<0xD8, AddRegFrm, (ops RST:$op),
463                   "fcmovnu {$op, %st(0)|%ST(0), $op}">, DB;
464
465 // Floating point loads & stores.
466 def FpLD32m  : FpI<(ops RFP32:$dst, f32mem:$src), ZeroArgFP,
467                    [(set RFP32:$dst, (loadf32 addr:$src))]>;
468 def FpLD64m  : FpI<(ops RFP64:$dst, f64mem:$src), ZeroArgFP,
469                    [(set RFP64:$dst, (loadf64 addr:$src))]>;
470 def FpILD16m32 : FpI<(ops RFP32:$dst, i16mem:$src), ZeroArgFP,
471                    [(set RFP32:$dst, (X86fild addr:$src, i16))]>;
472 def FpILD32m32 : FpI<(ops RFP32:$dst, i32mem:$src), ZeroArgFP,
473                    [(set RFP32:$dst, (X86fild addr:$src, i32))]>;
474 def FpILD64m32 : FpI<(ops RFP32:$dst, i64mem:$src), ZeroArgFP,
475                    [(set RFP32:$dst, (X86fild addr:$src, i64))]>;
476 def FpILD16m64 : FpI<(ops RFP64:$dst, i16mem:$src), ZeroArgFP,
477                    [(set RFP64:$dst, (X86fild addr:$src, i16))]>;
478 def FpILD32m64 : FpI<(ops RFP64:$dst, i32mem:$src), ZeroArgFP,
479                    [(set RFP64:$dst, (X86fild addr:$src, i32))]>;
480 def FpILD64m64 : FpI<(ops RFP64:$dst, i64mem:$src), ZeroArgFP,
481                    [(set RFP64:$dst, (X86fild addr:$src, i64))]>;
482
483 def FpST32m   : FpI<(ops f32mem:$op, RFP32:$src), OneArgFP,
484                 [(store RFP32:$src, addr:$op)]>;
485 def FpST64m32 : FpI<(ops f32mem:$op, RFP64:$src), OneArgFP,
486                 [(truncstoref32 RFP64:$src, addr:$op)]>;
487 def FpST64m   : FpI<(ops f64mem:$op, RFP64:$src), OneArgFP,
488                 [(store RFP64:$src, addr:$op)]>;
489
490 def FpSTP32m    : FpI<(ops f32mem:$op, RFP32:$src), OneArgFP, []>;
491 def FpSTP64m32  : FpI<(ops f32mem:$op, RFP64:$src), OneArgFP, []>;
492 def FpSTP64m    : FpI<(ops f64mem:$op, RFP64:$src), OneArgFP, []>;
493 def FpIST16m32  : FpI<(ops i16mem:$op, RFP32:$src), OneArgFP, []>;
494 def FpIST32m32  : FpI<(ops i32mem:$op, RFP32:$src), OneArgFP, []>;
495 def FpIST64m32  : FpI<(ops i64mem:$op, RFP32:$src), OneArgFP, []>;
496 def FpIST16m64  : FpI<(ops i16mem:$op, RFP64:$src), OneArgFP, []>;
497 def FpIST32m64  : FpI<(ops i32mem:$op, RFP64:$src), OneArgFP, []>;
498 def FpIST64m64  : FpI<(ops i64mem:$op, RFP64:$src), OneArgFP, []>;
499
500 def FLD32m   : FPI<0xD9, MRM0m, (ops f32mem:$src), "fld{s} $src">;
501 def FLD64m   : FPI<0xDD, MRM0m, (ops f64mem:$src), "fld{l} $src">;
502 def FILD16m  : FPI<0xDF, MRM0m, (ops i16mem:$src), "fild{s} $src">;
503 def FILD32m  : FPI<0xDB, MRM0m, (ops i32mem:$src), "fild{l} $src">;
504 def FILD64m  : FPI<0xDF, MRM5m, (ops i64mem:$src), "fild{ll} $src">;
505 def FST32m   : FPI<0xD9, MRM2m, (ops f32mem:$dst), "fst{s} $dst">;
506 def FST64m   : FPI<0xDD, MRM2m, (ops f64mem:$dst), "fst{l} $dst">;
507 def FSTP32m  : FPI<0xD9, MRM3m, (ops f32mem:$dst), "fstp{s} $dst">;
508 def FSTP64m  : FPI<0xDD, MRM3m, (ops f64mem:$dst), "fstp{l} $dst">;
509 def FIST16m  : FPI<0xDF, MRM2m, (ops i16mem:$dst), "fist{s} $dst">;
510 def FIST32m  : FPI<0xDB, MRM2m, (ops i32mem:$dst), "fist{l} $dst">;
511 def FISTP16m : FPI<0xDF, MRM3m, (ops i16mem:$dst), "fistp{s} $dst">;
512 def FISTP32m : FPI<0xDB, MRM3m, (ops i32mem:$dst), "fistp{l} $dst">;
513 def FISTP64m : FPI<0xDF, MRM7m, (ops i64mem:$dst), "fistp{ll} $dst">;
514
515 // FISTTP requires SSE3 even though it's a FPStack op.
516 def FpISTT16m32  : FpI_<(ops i16mem:$op, RFP32:$src), OneArgFP,
517                 [(X86fp_to_i16mem RFP32:$src, addr:$op)]>,
518                 Requires<[HasSSE3]>;
519 def FpISTT32m32 : FpI_<(ops i32mem:$op, RFP32:$src), OneArgFP,
520                 [(X86fp_to_i32mem RFP32:$src, addr:$op)]>,
521                 Requires<[HasSSE3]>;
522 def FpISTT64m32  : FpI_<(ops i64mem:$op, RFP32:$src), OneArgFP,
523                 [(X86fp_to_i64mem RFP32:$src, addr:$op)]>,
524                 Requires<[HasSSE3]>;
525 def FpISTT16m64  : FpI_<(ops i16mem:$op, RFP64:$src), OneArgFP,
526                 [(X86fp_to_i16mem RFP64:$src, addr:$op)]>,
527                 Requires<[HasSSE3]>;
528 def FpISTT32m64  : FpI_<(ops i32mem:$op, RFP64:$src), OneArgFP,
529                 [(X86fp_to_i32mem RFP64:$src, addr:$op)]>,
530                 Requires<[HasSSE3]>;
531 def FpISTT64m64  : FpI_<(ops i64mem:$op, RFP64:$src), OneArgFP,
532                 [(X86fp_to_i64mem RFP64:$src, addr:$op)]>,
533                 Requires<[HasSSE3]>;
534
535 def FISTTP16m : FPI<0xDF, MRM1m, (ops i16mem:$dst), "fisttp{s} $dst">;
536 def FISTTP32m : FPI<0xDB, MRM1m, (ops i32mem:$dst), "fisttp{l} $dst">;
537 def FISTTP64m : FPI<0xDD, MRM1m, (ops i64mem:$dst), "fisttp{ll} $dst">;
538
539 // FP Stack manipulation instructions.
540 def FLDrr   : FPI<0xC0, AddRegFrm, (ops RST:$op), "fld $op">, D9;
541 def FSTrr   : FPI<0xD0, AddRegFrm, (ops RST:$op), "fst $op">, DD;
542 def FSTPrr  : FPI<0xD8, AddRegFrm, (ops RST:$op), "fstp $op">, DD;
543 def FXCH    : FPI<0xC8, AddRegFrm, (ops RST:$op), "fxch $op">, D9;
544
545 // Floating point constant loads.
546 let isReMaterializable = 1 in {
547 def FpLD032 : FpI<(ops RFP32:$dst), ZeroArgFP,
548                 [(set RFP32:$dst, fpimm0)]>;
549 def FpLD132 : FpI<(ops RFP32:$dst), ZeroArgFP,
550                 [(set RFP32:$dst, fpimm1)]>;
551 def FpLD064 : FpI<(ops RFP64:$dst), ZeroArgFP,
552                 [(set RFP64:$dst, fpimm0)]>;
553 def FpLD164 : FpI<(ops RFP64:$dst), ZeroArgFP,
554                 [(set RFP64:$dst, fpimm1)]>;
555 }
556
557 def FLD0 : FPI<0xEE, RawFrm, (ops), "fldz">, D9;
558 def FLD1 : FPI<0xE8, RawFrm, (ops), "fld1">, D9;
559
560
561 // Floating point compares.
562 def FpUCOMr32   : FpI<(ops RFP32:$lhs, RFP32:$rhs), CompareFP,
563                     []>;  // FPSW = cmp ST(0) with ST(i)
564 def FpUCOMIr32  : FpI<(ops RFP32:$lhs, RFP32:$rhs), CompareFP,
565                     [(X86cmp RFP32:$lhs, RFP32:$rhs)]>; // CC = cmp ST(0) with ST(i)
566 def FpUCOMr64   : FpI<(ops RFP64:$lhs, RFP64:$rhs), CompareFP,
567                     []>;  // FPSW = cmp ST(0) with ST(i)
568 def FpUCOMIr64  : FpI<(ops RFP64:$lhs, RFP64:$rhs), CompareFP,
569                     [(X86cmp RFP64:$lhs, RFP64:$rhs)]>; // CC = cmp ST(0) with ST(i)
570
571 def FUCOMr    : FPI<0xE0, AddRegFrm,    // FPSW = cmp ST(0) with ST(i)
572                     (ops RST:$reg),
573                     "fucom $reg">, DD, Imp<[ST0],[]>;
574 def FUCOMPr   : FPI<0xE8, AddRegFrm,    // FPSW = cmp ST(0) with ST(i), pop
575                   (ops RST:$reg),
576                   "fucomp $reg">, DD, Imp<[ST0],[]>;
577 def FUCOMPPr  : FPI<0xE9, RawFrm,       // cmp ST(0) with ST(1), pop, pop
578                   (ops),
579                   "fucompp">, DA, Imp<[ST0],[]>;
580
581 def FUCOMIr  : FPI<0xE8, AddRegFrm,     // CC = cmp ST(0) with ST(i)
582                    (ops RST:$reg),
583                    "fucomi {$reg, %st(0)|%ST(0), $reg}">, DB, Imp<[ST0],[]>;
584 def FUCOMIPr : FPI<0xE8, AddRegFrm,     // CC = cmp ST(0) with ST(i), pop
585                  (ops RST:$reg),
586                  "fucomip {$reg, %st(0)|%ST(0), $reg}">, DF, Imp<[ST0],[]>;
587
588 // Floating point flag ops.
589 def FNSTSW8r  : I<0xE0, RawFrm,                  // AX = fp flags
590                   (ops), "fnstsw", []>, DF, Imp<[],[AX]>;
591
592 def FNSTCW16m : I<0xD9, MRM7m,                   // [mem16] = X87 control world
593                   (ops i16mem:$dst), "fnstcw $dst", []>;
594 def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
595                   (ops i16mem:$dst), "fldcw $dst", []>;
596
597 //===----------------------------------------------------------------------===//
598 // Non-Instruction Patterns
599 //===----------------------------------------------------------------------===//
600
601 // Required for RET of f32 / f64 values.
602 def : Pat<(X86fld addr:$src, f32), (FpLD32m addr:$src)>;
603 def : Pat<(X86fld addr:$src, f64), (FpLD64m addr:$src)>;
604
605 // Required for CALL which return f32 / f64 values.
606 def : Pat<(X86fst RFP32:$src, addr:$op, f32), (FpST32m addr:$op, RFP32:$src)>;
607 def : Pat<(X86fst RFP64:$src, addr:$op, f32), (FpST64m32 addr:$op, RFP64:$src)>;
608 def : Pat<(X86fst RFP64:$src, addr:$op, f64), (FpST64m addr:$op, RFP64:$src)>;
609
610 // Floating point constant -0.0 and -1.0
611 def : Pat<(f32 fpimmneg0), (FpCHS32 (FpLD032))>, Requires<[FPStack]>;
612 def : Pat<(f32 fpimmneg1), (FpCHS32 (FpLD132))>, Requires<[FPStack]>;
613 def : Pat<(f64 fpimmneg0), (FpCHS64 (FpLD064))>, Requires<[FPStack]>;
614 def : Pat<(f64 fpimmneg1), (FpCHS64 (FpLD164))>, Requires<[FPStack]>;
615
616 // Used to conv. i64 to f64 since there isn't a SSE version.
617 def : Pat<(X86fildflag addr:$src, i64), (FpILD64m64 addr:$src)>;
618
619 def : Pat<(extloadf32 addr:$src), (FpMOV3264 (FpLD32m addr:$src))>, Requires<[FPStack]>;
620 def : Pat<(fextend RFP32:$src), (FpMOV3264 RFP32:$src)>, Requires<[FPStack]>;