1 // Group template arguments that can be derived from the vector type (EltNum x
2 // EltVT). These are things like the register class for the writemask, etc.
3 // The idea is to pass one of these as the template argument rather than the
4 // individual arguments.
5 class X86VectorVTInfo<int NumElts, ValueType EltVT, RegisterClass rc,
9 // Corresponding mask register class.
10 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
12 // Corresponding write-mask register class.
13 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
15 // The GPR register class that can hold the write mask. Use GR8 for fewer
16 // than 8 elements. Use shift-right and equal to work around the lack of
19 !cast<RegisterClass>("GR" #
20 !if (!eq (!srl(NumElts, 3), 0), 8, NumElts));
22 // Suffix used in the instruction mnemonic.
23 string Suffix = suffix;
26 ValueType VT = !cast<ValueType>("v" # NumElts # EltVT);
28 string EltTypeName = !cast<string>(EltVT);
29 // Size of the element type in bits, e.g. 32 for v16i32.
30 string EltSize = !subst("i", "", !subst("f", "", EltTypeName));
32 // "i" for integer types and "f" for floating-point types
33 string TypeVariantName = !subst(EltSize, "", EltTypeName);
35 // Size of RC in bits, e.g. 512 for VR512.
38 // The corresponding memory operand, e.g. i512mem for VR512.
39 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
41 // The corresponding float type, e.g. v16f32 for v16i32
42 ValueType FloatVT = !if (!eq(TypeVariantName, "i"),
43 !cast<ValueType>("v" # NumElts # "f" # EltSize),
46 // The string to specify embedded broadcast in assembly.
47 string BroadcastStr = "{1to" # NumElts # "}";
50 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
51 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
53 // Common base class of AVX512_masking and AVX512_masking_3src.
54 multiclass AVX512_masking_common<bits<8> O, Format F, dag Outs, dag Ins,
55 dag MaskingIns, dag ZeroMaskingIns,
57 string AttSrcAsm, string IntelSrcAsm,
58 dag RHS, dag MaskingRHS, ValueType OpVT,
59 RegisterClass RC, RegisterClass KRC,
60 string MaskingConstraint = ""> {
61 def NAME: AVX512<O, F, Outs, Ins,
62 OpcodeStr#" \t{"#AttSrcAsm#", $dst|"#
63 "$dst, "#IntelSrcAsm#"}",
64 [(set RC:$dst, RHS)]>;
66 // Prefer over VMOV*rrk Pat<>
67 let AddedComplexity = 20 in
68 def NAME#k: AVX512<O, F, Outs, MaskingIns,
69 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}}|"#
70 "$dst {${mask}}, "#IntelSrcAsm#"}",
71 [(set RC:$dst, MaskingRHS)]>,
73 // In case of the 3src subclass this is overridden with a let.
74 string Constraints = MaskingConstraint;
76 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
77 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
78 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
79 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
81 (vselect KRC:$mask, RHS,
83 (v16i32 immAllZerosV)))))]>,
87 // This multiclass generates the unconditional/non-masking, the masking and
88 // the zero-masking variant of the instruction. In the masking case, the
89 // perserved vector elements come from a new dummy input operand tied to $dst.
90 multiclass AVX512_masking<bits<8> O, Format F, dag Outs, dag Ins,
92 string AttSrcAsm, string IntelSrcAsm,
93 dag RHS, ValueType OpVT, RegisterClass RC,
95 AVX512_masking_common<O, F, Outs,
97 !con((ins RC:$src0, KRC:$mask), Ins),
98 !con((ins KRC:$mask), Ins),
99 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
100 (vselect KRC:$mask, RHS, RC:$src0), OpVT, RC, KRC,
103 // Similar to AVX512_masking but in this case one of the source operands
104 // ($src1) is already tied to $dst so we just use that for the preserved
105 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
107 multiclass AVX512_masking_3src<bits<8> O, Format F, dag Outs, dag NonTiedIns,
109 string AttSrcAsm, string IntelSrcAsm,
110 dag RHS, ValueType OpVT,
111 RegisterClass RC, RegisterClass KRC> :
112 AVX512_masking_common<O, F, Outs,
113 !con((ins RC:$src1), NonTiedIns),
114 !con((ins RC:$src1), !con((ins KRC:$mask),
116 !con((ins RC:$src1), !con((ins KRC:$mask),
118 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
119 (vselect KRC:$mask, RHS, RC:$src1), OpVT, RC, KRC>;
121 // Bitcasts between 512-bit vector types. Return the original type since
122 // no instruction is needed for the conversion
123 let Predicates = [HasAVX512] in {
124 def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
125 def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
126 def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
127 def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
128 def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
129 def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
130 def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
131 def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
132 def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
133 def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
134 def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
135 def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
136 def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
137 def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
138 def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
139 def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
140 def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
141 def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
142 def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
143 def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
144 def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
145 def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
146 def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
147 def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
148 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
149 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
150 def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
151 def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
152 def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
153 def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
154 def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
156 def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
157 def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
158 def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>;
159 def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>;
160 def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>;
161 def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>;
162 def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>;
163 def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>;
164 def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>;
165 def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>;
166 def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>;
167 def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>;
168 def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>;
169 def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>;
170 def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>;
171 def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>;
172 def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>;
173 def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>;
174 def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>;
175 def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>;
176 def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>;
177 def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>;
178 def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>;
179 def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>;
180 def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>;
181 def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>;
182 def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>;
183 def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>;
184 def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>;
185 def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>;
187 // Bitcasts between 256-bit vector types. Return the original type since
188 // no instruction is needed for the conversion
189 def : Pat<(v4f64 (bitconvert (v8f32 VR256X:$src))), (v4f64 VR256X:$src)>;
190 def : Pat<(v4f64 (bitconvert (v8i32 VR256X:$src))), (v4f64 VR256X:$src)>;
191 def : Pat<(v4f64 (bitconvert (v4i64 VR256X:$src))), (v4f64 VR256X:$src)>;
192 def : Pat<(v4f64 (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>;
193 def : Pat<(v4f64 (bitconvert (v32i8 VR256X:$src))), (v4f64 VR256X:$src)>;
194 def : Pat<(v8f32 (bitconvert (v8i32 VR256X:$src))), (v8f32 VR256X:$src)>;
195 def : Pat<(v8f32 (bitconvert (v4i64 VR256X:$src))), (v8f32 VR256X:$src)>;
196 def : Pat<(v8f32 (bitconvert (v4f64 VR256X:$src))), (v8f32 VR256X:$src)>;
197 def : Pat<(v8f32 (bitconvert (v32i8 VR256X:$src))), (v8f32 VR256X:$src)>;
198 def : Pat<(v8f32 (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>;
199 def : Pat<(v4i64 (bitconvert (v8f32 VR256X:$src))), (v4i64 VR256X:$src)>;
200 def : Pat<(v4i64 (bitconvert (v8i32 VR256X:$src))), (v4i64 VR256X:$src)>;
201 def : Pat<(v4i64 (bitconvert (v4f64 VR256X:$src))), (v4i64 VR256X:$src)>;
202 def : Pat<(v4i64 (bitconvert (v32i8 VR256X:$src))), (v4i64 VR256X:$src)>;
203 def : Pat<(v4i64 (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>;
204 def : Pat<(v32i8 (bitconvert (v4f64 VR256X:$src))), (v32i8 VR256X:$src)>;
205 def : Pat<(v32i8 (bitconvert (v4i64 VR256X:$src))), (v32i8 VR256X:$src)>;
206 def : Pat<(v32i8 (bitconvert (v8f32 VR256X:$src))), (v32i8 VR256X:$src)>;
207 def : Pat<(v32i8 (bitconvert (v8i32 VR256X:$src))), (v32i8 VR256X:$src)>;
208 def : Pat<(v32i8 (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>;
209 def : Pat<(v8i32 (bitconvert (v32i8 VR256X:$src))), (v8i32 VR256X:$src)>;
210 def : Pat<(v8i32 (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>;
211 def : Pat<(v8i32 (bitconvert (v8f32 VR256X:$src))), (v8i32 VR256X:$src)>;
212 def : Pat<(v8i32 (bitconvert (v4i64 VR256X:$src))), (v8i32 VR256X:$src)>;
213 def : Pat<(v8i32 (bitconvert (v4f64 VR256X:$src))), (v8i32 VR256X:$src)>;
214 def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))), (v16i16 VR256X:$src)>;
215 def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))), (v16i16 VR256X:$src)>;
216 def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))), (v16i16 VR256X:$src)>;
217 def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))), (v16i16 VR256X:$src)>;
218 def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>;
222 // AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros.
225 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
226 isPseudo = 1, Predicates = [HasAVX512] in {
227 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
228 [(set VR512:$dst, (v16f32 immAllZerosV))]>;
231 let Predicates = [HasAVX512] in {
232 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
233 def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>;
234 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
237 //===----------------------------------------------------------------------===//
238 // AVX-512 - VECTOR INSERT
241 let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
242 def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst),
243 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
244 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
245 []>, EVEX_4V, EVEX_V512;
247 def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst),
248 (ins VR512:$src1, f128mem:$src2, i8imm:$src3),
249 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
250 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
253 // -- 64x4 fp form --
254 let hasSideEffects = 0, ExeDomain = SSEPackedDouble in {
255 def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst),
256 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
257 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
258 []>, EVEX_4V, EVEX_V512, VEX_W;
260 def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst),
261 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
262 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
263 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
265 // -- 32x4 integer form --
266 let hasSideEffects = 0 in {
267 def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst),
268 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
269 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
270 []>, EVEX_4V, EVEX_V512;
272 def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
273 (ins VR512:$src1, i128mem:$src2, i8imm:$src3),
274 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
275 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
278 let hasSideEffects = 0 in {
280 def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst),
281 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
282 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
283 []>, EVEX_4V, EVEX_V512, VEX_W;
285 def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst),
286 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
287 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
288 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
291 def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2),
292 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
293 (INSERT_get_vinsert128_imm VR512:$ins))>;
294 def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2),
295 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
296 (INSERT_get_vinsert128_imm VR512:$ins))>;
297 def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2),
298 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
299 (INSERT_get_vinsert128_imm VR512:$ins))>;
300 def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2),
301 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
302 (INSERT_get_vinsert128_imm VR512:$ins))>;
304 def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2),
305 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
306 (INSERT_get_vinsert128_imm VR512:$ins))>;
307 def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1),
308 (bc_v4i32 (loadv2i64 addr:$src2)),
309 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
310 (INSERT_get_vinsert128_imm VR512:$ins))>;
311 def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2),
312 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
313 (INSERT_get_vinsert128_imm VR512:$ins))>;
314 def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2),
315 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
316 (INSERT_get_vinsert128_imm VR512:$ins))>;
318 def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2),
319 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
320 (INSERT_get_vinsert256_imm VR512:$ins))>;
321 def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2),
322 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
323 (INSERT_get_vinsert256_imm VR512:$ins))>;
324 def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2),
325 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
326 (INSERT_get_vinsert256_imm VR512:$ins))>;
327 def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2),
328 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
329 (INSERT_get_vinsert256_imm VR512:$ins))>;
331 def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2),
332 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
333 (INSERT_get_vinsert256_imm VR512:$ins))>;
334 def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2),
335 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
336 (INSERT_get_vinsert256_imm VR512:$ins))>;
337 def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2),
338 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
339 (INSERT_get_vinsert256_imm VR512:$ins))>;
340 def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
341 (bc_v8i32 (loadv4i64 addr:$src2)),
342 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
343 (INSERT_get_vinsert256_imm VR512:$ins))>;
345 // vinsertps - insert f32 to XMM
346 def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
347 (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3),
348 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
349 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
351 def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
352 (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3),
353 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
354 [(set VR128X:$dst, (X86insertps VR128X:$src1,
355 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
356 imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
358 //===----------------------------------------------------------------------===//
359 // AVX-512 VECTOR EXTRACT
361 let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
363 def VEXTRACTF32x4rr : AVX512AIi8<0x19, MRMDestReg, (outs VR128X:$dst),
364 (ins VR512:$src1, i8imm:$src2),
365 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
366 []>, EVEX, EVEX_V512;
367 def VEXTRACTF32x4mr : AVX512AIi8<0x19, MRMDestMem, (outs),
368 (ins f128mem:$dst, VR512:$src1, i8imm:$src2),
369 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
370 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
373 def VEXTRACTF64x4rr : AVX512AIi8<0x1b, MRMDestReg, (outs VR256X:$dst),
374 (ins VR512:$src1, i8imm:$src2),
375 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
376 []>, EVEX, EVEX_V512, VEX_W;
378 def VEXTRACTF64x4mr : AVX512AIi8<0x1b, MRMDestMem, (outs),
379 (ins f256mem:$dst, VR512:$src1, i8imm:$src2),
380 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
381 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
384 let hasSideEffects = 0 in {
386 def VEXTRACTI32x4rr : AVX512AIi8<0x39, MRMDestReg, (outs VR128X:$dst),
387 (ins VR512:$src1, i8imm:$src2),
388 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
389 []>, EVEX, EVEX_V512;
390 def VEXTRACTI32x4mr : AVX512AIi8<0x39, MRMDestMem, (outs),
391 (ins i128mem:$dst, VR512:$src1, i8imm:$src2),
392 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
393 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
396 def VEXTRACTI64x4rr : AVX512AIi8<0x3b, MRMDestReg, (outs VR256X:$dst),
397 (ins VR512:$src1, i8imm:$src2),
398 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
399 []>, EVEX, EVEX_V512, VEX_W;
401 def VEXTRACTI64x4mr : AVX512AIi8<0x3b, MRMDestMem, (outs),
402 (ins i256mem:$dst, VR512:$src1, i8imm:$src2),
403 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
404 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
407 def : Pat<(vextract128_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
408 (v4f32 (VEXTRACTF32x4rr VR512:$src1,
409 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
411 def : Pat<(vextract128_extract:$ext VR512:$src1, (iPTR imm)),
412 (v4i32 (VEXTRACTF32x4rr VR512:$src1,
413 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
415 def : Pat<(vextract128_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
416 (v2f64 (VEXTRACTF32x4rr VR512:$src1,
417 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
419 def : Pat<(vextract128_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
420 (v2i64 (VEXTRACTI32x4rr VR512:$src1,
421 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
424 def : Pat<(vextract256_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
425 (v8f32 (VEXTRACTF64x4rr VR512:$src1,
426 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
428 def : Pat<(vextract256_extract:$ext (v16i32 VR512:$src1), (iPTR imm)),
429 (v8i32 (VEXTRACTI64x4rr VR512:$src1,
430 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
432 def : Pat<(vextract256_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
433 (v4f64 (VEXTRACTF64x4rr VR512:$src1,
434 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
436 def : Pat<(vextract256_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
437 (v4i64 (VEXTRACTI64x4rr VR512:$src1,
438 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
440 // A 256-bit subvector extract from the first 512-bit vector position
441 // is a subregister copy that needs no instruction.
442 def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
443 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>;
444 def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
445 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>;
446 def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
447 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>;
448 def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
449 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>;
452 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
453 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
454 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
455 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
456 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
457 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
458 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
459 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
462 // A 128-bit subvector insert to the first 512-bit vector position
463 // is a subregister copy that needs no instruction.
464 def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
465 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
466 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
468 def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)),
469 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
470 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
472 def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)),
473 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
474 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
476 def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)),
477 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
478 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
481 def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)),
482 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
483 def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)),
484 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
485 def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
486 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
487 def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
488 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
490 // vextractps - extract 32 bits from XMM
491 def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
492 (ins VR128X:$src1, u32u8imm:$src2),
493 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
494 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
497 def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
498 (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2),
499 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
500 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
501 addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
503 //===---------------------------------------------------------------------===//
506 multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr,
507 RegisterClass DestRC,
508 RegisterClass SrcRC, X86MemOperand x86memop> {
509 def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
510 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
512 def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
513 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX;
515 let ExeDomain = SSEPackedSingle in {
516 defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512,
518 EVEX_V512, EVEX_CD8<32, CD8VT1>;
521 let ExeDomain = SSEPackedDouble in {
522 defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512,
524 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
527 def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
528 (VBROADCASTSSZrm addr:$src)>;
529 def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
530 (VBROADCASTSDZrm addr:$src)>;
532 def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
533 (VBROADCASTSSZrm addr:$src)>;
534 def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
535 (VBROADCASTSDZrm addr:$src)>;
537 multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
538 RegisterClass SrcRC, RegisterClass KRC> {
539 def Zrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins SrcRC:$src),
540 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
541 []>, EVEX, EVEX_V512;
542 def Zkrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst),
543 (ins KRC:$mask, SrcRC:$src),
544 !strconcat(OpcodeStr,
545 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
546 []>, EVEX, EVEX_V512, EVEX_KZ;
549 defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>;
550 defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>,
553 def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
554 (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
556 def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
557 (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
559 def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
560 (VPBROADCASTDrZrr GR32:$src)>;
561 def : Pat<(v16i32 (X86VBroadcastm VK16WM:$mask, (i32 GR32:$src))),
562 (VPBROADCASTDrZkrr VK16WM:$mask, GR32:$src)>;
563 def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
564 (VPBROADCASTQrZrr GR64:$src)>;
565 def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))),
566 (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>;
568 def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
569 (VPBROADCASTDrZrr GR32:$src)>;
570 def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
571 (VPBROADCASTQrZrr GR64:$src)>;
573 def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src),
574 (v16i32 immAllZerosV), (i16 GR16:$mask))),
575 (VPBROADCASTDrZkrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>;
576 def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
577 (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
578 (VPBROADCASTQrZkrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
580 multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
581 X86MemOperand x86memop, PatFrag ld_frag,
582 RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
584 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
585 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
587 (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
588 def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
590 !strconcat(OpcodeStr,
591 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
593 (OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
596 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
597 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
599 (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
600 def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
602 !strconcat(OpcodeStr,
603 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
604 [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
605 (ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
609 defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
610 loadi32, VR512, v16i32, v4i32, VK16WM>,
611 EVEX_V512, EVEX_CD8<32, CD8VT1>;
612 defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
613 loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
614 EVEX_CD8<64, CD8VT1>;
616 multiclass avx512_int_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
617 X86MemOperand x86memop, PatFrag ld_frag,
620 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins x86memop:$src),
621 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
623 def krm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins KRC:$mask,
625 !strconcat(OpcodeStr,
626 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
631 defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
632 i128mem, loadv2i64, VK16WM>,
633 EVEX_V512, EVEX_CD8<32, CD8VT4>;
634 defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
635 i256mem, loadv4i64, VK16WM>, VEX_W,
636 EVEX_V512, EVEX_CD8<64, CD8VT4>;
638 def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
639 (VPBROADCASTDZrr VR128X:$src)>;
640 def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
641 (VPBROADCASTQZrr VR128X:$src)>;
643 def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
644 (VBROADCASTSSZrr VR128X:$src)>;
645 def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
646 (VBROADCASTSDZrr VR128X:$src)>;
648 def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
649 (VBROADCASTSSZrr VR128X:$src)>;
650 def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
651 (VBROADCASTSDZrr VR128X:$src)>;
653 // Provide fallback in case the load node that is used in the patterns above
654 // is used by additional users, which prevents the pattern selection.
655 def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
656 (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
657 def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
658 (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
661 let Predicates = [HasAVX512] in {
662 def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
664 (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
665 addr:$src)), sub_ymm)>;
667 //===----------------------------------------------------------------------===//
668 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
671 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
672 RegisterClass DstRC, RegisterClass KRC,
673 ValueType OpVT, ValueType SrcVT> {
674 def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
675 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
679 let Predicates = [HasCDI] in {
680 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
681 VK16, v16i32, v16i1>, EVEX_V512;
682 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
683 VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
686 //===----------------------------------------------------------------------===//
689 // -- immediate form --
690 multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
691 SDNode OpNode, PatFrag mem_frag,
692 X86MemOperand x86memop, ValueType OpVT> {
693 def ri : AVX512AIi8<opc, MRMSrcReg, (outs RC:$dst),
694 (ins RC:$src1, i8imm:$src2),
695 !strconcat(OpcodeStr,
696 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
698 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
700 def mi : AVX512AIi8<opc, MRMSrcMem, (outs RC:$dst),
701 (ins x86memop:$src1, i8imm:$src2),
702 !strconcat(OpcodeStr,
703 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
705 (OpVT (OpNode (mem_frag addr:$src1),
706 (i8 imm:$src2))))]>, EVEX;
709 defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64,
710 i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
711 let ExeDomain = SSEPackedDouble in
712 defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64,
713 f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
715 // -- VPERM - register form --
716 multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
717 PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
719 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
720 (ins RC:$src1, RC:$src2),
721 !strconcat(OpcodeStr,
722 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
724 (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V;
726 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
727 (ins RC:$src1, x86memop:$src2),
728 !strconcat(OpcodeStr,
729 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
731 (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>,
735 defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem,
736 v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
737 defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
738 v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
739 let ExeDomain = SSEPackedSingle in
740 defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem,
741 v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
742 let ExeDomain = SSEPackedDouble in
743 defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
744 v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
746 // -- VPERM2I - 3 source operands form --
747 multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
748 PatFrag mem_frag, X86MemOperand x86memop,
749 SDNode OpNode, ValueType OpVT, RegisterClass KRC> {
750 let Constraints = "$src1 = $dst" in {
751 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
752 (ins RC:$src1, RC:$src2, RC:$src3),
753 !strconcat(OpcodeStr,
754 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
756 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
759 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
760 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
761 !strconcat(OpcodeStr,
762 " \t{$src3, $src2, $dst {${mask}}|"
763 "$dst {${mask}}, $src2, $src3}"),
764 [(set RC:$dst, (OpVT (vselect KRC:$mask,
765 (OpNode RC:$src1, RC:$src2,
770 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
771 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
772 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
773 !strconcat(OpcodeStr,
774 " \t{$src3, $src2, $dst {${mask}} {z} |",
775 "$dst {${mask}} {z}, $src2, $src3}"),
776 [(set RC:$dst, (OpVT (vselect KRC:$mask,
777 (OpNode RC:$src1, RC:$src2,
780 (v16i32 immAllZerosV))))))]>,
783 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
784 (ins RC:$src1, RC:$src2, x86memop:$src3),
785 !strconcat(OpcodeStr,
786 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
788 (OpVT (OpNode RC:$src1, RC:$src2,
789 (mem_frag addr:$src3))))]>, EVEX_4V;
791 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
792 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
793 !strconcat(OpcodeStr,
794 " \t{$src3, $src2, $dst {${mask}}|"
795 "$dst {${mask}}, $src2, $src3}"),
797 (OpVT (vselect KRC:$mask,
798 (OpNode RC:$src1, RC:$src2,
799 (mem_frag addr:$src3)),
803 let AddedComplexity = 10 in // Prefer over the rrkz variant
804 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
805 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
806 !strconcat(OpcodeStr,
807 " \t{$src3, $src2, $dst {${mask}} {z}|"
808 "$dst {${mask}} {z}, $src2, $src3}"),
810 (OpVT (vselect KRC:$mask,
811 (OpNode RC:$src1, RC:$src2,
812 (mem_frag addr:$src3)),
814 (v16i32 immAllZerosV))))))]>,
818 defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32,
819 i512mem, X86VPermiv3, v16i32, VK16WM>,
820 EVEX_V512, EVEX_CD8<32, CD8VF>;
821 defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64,
822 i512mem, X86VPermiv3, v8i64, VK8WM>,
823 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
824 defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32,
825 i512mem, X86VPermiv3, v16f32, VK16WM>,
826 EVEX_V512, EVEX_CD8<32, CD8VF>;
827 defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64,
828 i512mem, X86VPermiv3, v8f64, VK8WM>,
829 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
831 multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC,
832 PatFrag mem_frag, X86MemOperand x86memop,
833 SDNode OpNode, ValueType OpVT, RegisterClass KRC,
834 ValueType MaskVT, RegisterClass MRC> :
835 avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode,
837 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
838 VR512:$idx, VR512:$src1, VR512:$src2, -1)),
839 (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>;
841 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
842 VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)),
843 (!cast<Instruction>(NAME#rrk) VR512:$src1,
844 (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>;
847 defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem,
848 X86VPermv3, v16i32, VK16WM, v16i1, GR16>,
849 EVEX_V512, EVEX_CD8<32, CD8VF>;
850 defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem,
851 X86VPermv3, v8i64, VK8WM, v8i1, GR8>,
852 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
853 defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem,
854 X86VPermv3, v16f32, VK16WM, v16i1, GR16>,
855 EVEX_V512, EVEX_CD8<32, CD8VF>;
856 defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem,
857 X86VPermv3, v8f64, VK8WM, v8i1, GR8>,
858 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
860 //===----------------------------------------------------------------------===//
861 // AVX-512 - BLEND using mask
863 multiclass avx512_blendmask<bits<8> opc, string OpcodeStr,
864 RegisterClass KRC, RegisterClass RC,
865 X86MemOperand x86memop, PatFrag mem_frag,
866 SDNode OpNode, ValueType vt> {
867 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
868 (ins KRC:$mask, RC:$src1, RC:$src2),
869 !strconcat(OpcodeStr,
870 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
871 [(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2),
872 (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
874 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
875 (ins KRC:$mask, RC:$src1, x86memop:$src2),
876 !strconcat(OpcodeStr,
877 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
878 []>, EVEX_4V, EVEX_K;
881 let ExeDomain = SSEPackedSingle in
882 defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
883 VK16WM, VR512, f512mem,
884 memopv16f32, vselect, v16f32>,
885 EVEX_CD8<32, CD8VF>, EVEX_V512;
886 let ExeDomain = SSEPackedDouble in
887 defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
888 VK8WM, VR512, f512mem,
889 memopv8f64, vselect, v8f64>,
890 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
892 def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1),
893 (v16f32 VR512:$src2), (i16 GR16:$mask))),
894 (VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16WM),
895 VR512:$src1, VR512:$src2)>;
897 def : Pat<(v8f64 (int_x86_avx512_mask_blend_pd_512 (v8f64 VR512:$src1),
898 (v8f64 VR512:$src2), (i8 GR8:$mask))),
899 (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM),
900 VR512:$src1, VR512:$src2)>;
902 defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
903 VK16WM, VR512, f512mem,
904 memopv16i32, vselect, v16i32>,
905 EVEX_CD8<32, CD8VF>, EVEX_V512;
907 defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
908 VK8WM, VR512, f512mem,
909 memopv8i64, vselect, v8i64>,
910 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
912 def : Pat<(v16i32 (int_x86_avx512_mask_blend_d_512 (v16i32 VR512:$src1),
913 (v16i32 VR512:$src2), (i16 GR16:$mask))),
914 (VPBLENDMDZrr (COPY_TO_REGCLASS GR16:$mask, VK16),
915 VR512:$src1, VR512:$src2)>;
917 def : Pat<(v8i64 (int_x86_avx512_mask_blend_q_512 (v8i64 VR512:$src1),
918 (v8i64 VR512:$src2), (i8 GR8:$mask))),
919 (VPBLENDMQZrr (COPY_TO_REGCLASS GR8:$mask, VK8),
920 VR512:$src1, VR512:$src2)>;
922 let Predicates = [HasAVX512] in {
923 def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
924 (v8f32 VR256X:$src2))),
926 (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
927 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
928 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
930 def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
931 (v8i32 VR256X:$src2))),
933 (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
934 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
935 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
937 //===----------------------------------------------------------------------===//
938 // Compare Instructions
939 //===----------------------------------------------------------------------===//
941 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
942 multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
943 Operand CC, SDNode OpNode, ValueType VT,
944 PatFrag ld_frag, string asm, string asm_alt> {
945 def rr : AVX512Ii8<0xC2, MRMSrcReg,
946 (outs VK1:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
947 [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
948 IIC_SSE_ALU_F32S_RR>, EVEX_4V;
949 def rm : AVX512Ii8<0xC2, MRMSrcMem,
950 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
951 [(set VK1:$dst, (OpNode (VT RC:$src1),
952 (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
953 let isAsmParserOnly = 1, hasSideEffects = 0 in {
954 def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
955 (outs VK1:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
956 asm_alt, [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
957 def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
958 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
959 asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
963 let Predicates = [HasAVX512] in {
964 defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, AVXCC, X86cmpms, f32, loadf32,
965 "vcmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
966 "vcmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
968 defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, AVXCC, X86cmpms, f64, loadf64,
969 "vcmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
970 "vcmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
974 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, RegisterClass KRC,
975 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
976 SDNode OpNode, ValueType vt> {
977 def rr : AVX512BI<opc, MRMSrcReg,
978 (outs KRC:$dst), (ins RC:$src1, RC:$src2),
979 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
980 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
981 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
982 def rm : AVX512BI<opc, MRMSrcMem,
983 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
984 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
985 [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2)))],
986 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
989 defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512, i512mem,
990 memopv16i32, X86pcmpeqm, v16i32>, EVEX_V512,
992 defm VPCMPEQQZ : avx512_icmp_packed<0x29, "vpcmpeqq", VK8, VR512, i512mem,
993 memopv8i64, X86pcmpeqm, v8i64>, T8PD, EVEX_V512,
994 VEX_W, EVEX_CD8<64, CD8VF>;
996 defm VPCMPGTDZ : avx512_icmp_packed<0x66, "vpcmpgtd", VK16, VR512, i512mem,
997 memopv16i32, X86pcmpgtm, v16i32>, EVEX_V512,
999 defm VPCMPGTQZ : avx512_icmp_packed<0x37, "vpcmpgtq", VK8, VR512, i512mem,
1000 memopv8i64, X86pcmpgtm, v8i64>, T8PD, EVEX_V512,
1001 VEX_W, EVEX_CD8<64, CD8VF>;
1003 def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1004 (COPY_TO_REGCLASS (VPCMPGTDZrr
1005 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1006 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1008 def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1009 (COPY_TO_REGCLASS (VPCMPEQDZrr
1010 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1011 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1013 multiclass avx512_icmp_cc<bits<8> opc, RegisterClass WMRC, RegisterClass KRC,
1014 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
1015 SDNode OpNode, ValueType vt, Operand CC, string Suffix> {
1016 def rri : AVX512AIi8<opc, MRMSrcReg,
1017 (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc),
1018 !strconcat("vpcmp${cc}", Suffix,
1019 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1020 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))],
1021 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1022 def rmi : AVX512AIi8<opc, MRMSrcMem,
1023 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc),
1024 !strconcat("vpcmp${cc}", Suffix,
1025 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1026 [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2),
1027 imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1028 // Accept explicit immediate argument form instead of comparison code.
1029 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1030 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
1031 (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
1032 !strconcat("vpcmp", Suffix,
1033 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1034 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1035 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
1036 (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, RC:$src2, i8imm:$cc),
1037 !strconcat("vpcmp", Suffix,
1038 "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"),
1039 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1040 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
1041 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
1042 !strconcat("vpcmp", Suffix,
1043 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1044 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1045 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
1046 (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, x86memop:$src2, i8imm:$cc),
1047 !strconcat("vpcmp", Suffix,
1048 "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"),
1049 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1053 defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16WM, VK16, VR512, i512mem, memopv16i32,
1054 X86cmpm, v16i32, AVXCC, "d">,
1055 EVEX_V512, EVEX_CD8<32, CD8VF>;
1056 defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16WM, VK16, VR512, i512mem, memopv16i32,
1057 X86cmpmu, v16i32, AVXCC, "ud">,
1058 EVEX_V512, EVEX_CD8<32, CD8VF>;
1060 defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8WM, VK8, VR512, i512mem, memopv8i64,
1061 X86cmpm, v8i64, AVXCC, "q">,
1062 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
1063 defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8WM, VK8, VR512, i512mem, memopv8i64,
1064 X86cmpmu, v8i64, AVXCC, "uq">,
1065 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
1067 // avx512_cmp_packed - compare packed instructions
1068 multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
1069 X86MemOperand x86memop, ValueType vt,
1070 string suffix, Domain d> {
1071 def rri : AVX512PIi8<0xC2, MRMSrcReg,
1072 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
1073 !strconcat("vcmp${cc}", suffix,
1074 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1075 [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
1076 def rrib: AVX512PIi8<0xC2, MRMSrcReg,
1077 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
1078 !strconcat("vcmp${cc}", suffix,
1079 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
1081 def rmi : AVX512PIi8<0xC2, MRMSrcMem,
1082 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
1083 !strconcat("vcmp${cc}", suffix,
1084 " \t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1086 (X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
1088 // Accept explicit immediate argument form instead of comparison code.
1089 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1090 def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
1091 (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
1092 !strconcat("vcmp", suffix,
1093 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
1094 def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
1095 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
1096 !strconcat("vcmp", suffix,
1097 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
1101 defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, v16f32,
1102 "ps", SSEPackedSingle>, PS, EVEX_4V, EVEX_V512,
1103 EVEX_CD8<32, CD8VF>;
1104 defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, v8f64,
1105 "pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512,
1106 EVEX_CD8<64, CD8VF>;
1108 def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
1109 (COPY_TO_REGCLASS (VCMPPSZrri
1110 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1111 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1113 def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1114 (COPY_TO_REGCLASS (VPCMPDZrri
1115 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1116 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1118 def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1119 (COPY_TO_REGCLASS (VPCMPUDZrri
1120 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1121 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1124 def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1125 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1127 (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
1128 (I8Imm imm:$cc)), GR16)>;
1130 def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1131 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1133 (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
1134 (I8Imm imm:$cc)), GR8)>;
1136 def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1137 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1139 (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
1140 (I8Imm imm:$cc)), GR16)>;
1142 def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1143 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1145 (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
1146 (I8Imm imm:$cc)), GR8)>;
1148 // Mask register copy, including
1149 // - copy between mask registers
1150 // - load/store mask registers
1151 // - copy from GPR to mask register and vice versa
1153 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
1154 string OpcodeStr, RegisterClass KRC,
1155 ValueType vvt, ValueType ivt, X86MemOperand x86memop> {
1156 let hasSideEffects = 0 in {
1157 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
1158 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1160 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
1161 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
1162 [(set KRC:$dst, (vvt (bitconvert (ivt (load addr:$src)))))]>;
1164 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
1165 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1169 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
1171 RegisterClass KRC, RegisterClass GRC> {
1172 let hasSideEffects = 0 in {
1173 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
1174 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1175 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
1176 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1180 let Predicates = [HasDQI] in
1181 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8,
1183 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
1186 let Predicates = [HasAVX512] in
1187 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16,
1189 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
1192 let Predicates = [HasBWI] in {
1193 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1, i32,
1194 i32mem>, VEX, PD, VEX_W;
1195 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
1199 let Predicates = [HasBWI] in {
1200 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64,
1201 i64mem>, VEX, PS, VEX_W;
1202 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
1206 // GR from/to mask register
1207 let Predicates = [HasDQI] in {
1208 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1209 (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>;
1210 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1211 (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>;
1213 let Predicates = [HasAVX512] in {
1214 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
1215 (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>;
1216 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
1217 (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
1219 let Predicates = [HasBWI] in {
1220 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>;
1221 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (KMOVDrk VK32:$src)>;
1223 let Predicates = [HasBWI] in {
1224 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (KMOVQkr GR64:$src)>;
1225 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (KMOVQrk VK64:$src)>;
1229 let Predicates = [HasDQI] in {
1230 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
1231 (KMOVBmk addr:$dst, VK8:$src)>;
1233 let Predicates = [HasAVX512] in {
1234 def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
1235 (KMOVWmk addr:$dst, VK16:$src)>;
1236 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
1237 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
1238 def : Pat<(i1 (load addr:$src)),
1239 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
1240 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
1241 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
1243 let Predicates = [HasBWI] in {
1244 def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
1245 (KMOVDmk addr:$dst, VK32:$src)>;
1247 let Predicates = [HasBWI] in {
1248 def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
1249 (KMOVQmk addr:$dst, VK64:$src)>;
1252 let Predicates = [HasAVX512] in {
1253 def : Pat<(i1 (trunc (i64 GR64:$src))),
1254 (COPY_TO_REGCLASS (KMOVWkr (AND32ri (EXTRACT_SUBREG $src, sub_32bit),
1257 def : Pat<(i1 (trunc (i32 GR32:$src))),
1258 (COPY_TO_REGCLASS (KMOVWkr (AND32ri $src, (i32 1))), VK1)>;
1260 def : Pat<(i1 (trunc (i8 GR8:$src))),
1262 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))),
1264 def : Pat<(i1 (trunc (i16 GR16:$src))),
1266 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))),
1269 def : Pat<(i32 (zext VK1:$src)),
1270 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
1271 def : Pat<(i8 (zext VK1:$src)),
1274 (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
1275 def : Pat<(i64 (zext VK1:$src)),
1276 (AND64ri8 (SUBREG_TO_REG (i64 0),
1277 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
1278 def : Pat<(i16 (zext VK1:$src)),
1280 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
1282 def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
1283 (COPY_TO_REGCLASS VK1:$src, VK16)>;
1284 def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
1285 (COPY_TO_REGCLASS VK1:$src, VK8)>;
1287 let Predicates = [HasBWI] in {
1288 def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
1289 (COPY_TO_REGCLASS VK1:$src, VK32)>;
1290 def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
1291 (COPY_TO_REGCLASS VK1:$src, VK64)>;
1295 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1296 let Predicates = [HasAVX512] in {
1297 // GR from/to 8-bit mask without native support
1298 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1300 (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
1302 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1304 (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
1307 def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
1308 (COPY_TO_REGCLASS VK16:$src, VK1)>;
1309 def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
1310 (COPY_TO_REGCLASS VK8:$src, VK1)>;
1312 let Predicates = [HasBWI] in {
1313 def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))),
1314 (COPY_TO_REGCLASS VK32:$src, VK1)>;
1315 def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))),
1316 (COPY_TO_REGCLASS VK64:$src, VK1)>;
1319 // Mask unary operation
1321 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
1322 RegisterClass KRC, SDPatternOperator OpNode,
1324 let Predicates = [prd] in
1325 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
1326 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
1327 [(set KRC:$dst, (OpNode KRC:$src))]>;
1330 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
1331 SDPatternOperator OpNode> {
1332 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1334 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1335 HasAVX512>, VEX, PS;
1336 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1337 HasBWI>, VEX, PD, VEX_W;
1338 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1339 HasBWI>, VEX, PS, VEX_W;
1342 defm KNOT : avx512_mask_unop_all<0x44, "knot", not>;
1344 multiclass avx512_mask_unop_int<string IntName, string InstName> {
1345 let Predicates = [HasAVX512] in
1346 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1348 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1349 (v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>;
1351 defm : avx512_mask_unop_int<"knot", "KNOT">;
1353 let Predicates = [HasDQI] in
1354 def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>;
1355 let Predicates = [HasAVX512] in
1356 def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
1357 let Predicates = [HasBWI] in
1358 def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>;
1359 let Predicates = [HasBWI] in
1360 def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
1362 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
1363 let Predicates = [HasAVX512] in {
1364 def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)),
1365 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
1367 def : Pat<(not VK8:$src),
1369 (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
1372 // Mask binary operation
1373 // - KAND, KANDN, KOR, KXNOR, KXOR
1374 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
1375 RegisterClass KRC, SDPatternOperator OpNode,
1377 let Predicates = [prd] in
1378 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
1379 !strconcat(OpcodeStr,
1380 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1381 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
1384 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
1385 SDPatternOperator OpNode> {
1386 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1387 HasDQI>, VEX_4V, VEX_L, PD;
1388 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1389 HasAVX512>, VEX_4V, VEX_L, PS;
1390 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1391 HasBWI>, VEX_4V, VEX_L, VEX_W, PD;
1392 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1393 HasBWI>, VEX_4V, VEX_L, VEX_W, PS;
1396 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
1397 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
1399 let isCommutable = 1 in {
1400 defm KAND : avx512_mask_binop_all<0x41, "kand", and>;
1401 defm KOR : avx512_mask_binop_all<0x45, "kor", or>;
1402 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor>;
1403 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor>;
1405 let isCommutable = 0 in
1406 defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn>;
1408 def : Pat<(xor VK1:$src1, VK1:$src2),
1409 (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1410 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1412 def : Pat<(or VK1:$src1, VK1:$src2),
1413 (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1414 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1416 def : Pat<(and VK1:$src1, VK1:$src2),
1417 (COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1418 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1420 multiclass avx512_mask_binop_int<string IntName, string InstName> {
1421 let Predicates = [HasAVX512] in
1422 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1423 (i16 GR16:$src1), (i16 GR16:$src2)),
1424 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1425 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1426 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
1429 defm : avx512_mask_binop_int<"kand", "KAND">;
1430 defm : avx512_mask_binop_int<"kandn", "KANDN">;
1431 defm : avx512_mask_binop_int<"kor", "KOR">;
1432 defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
1433 defm : avx512_mask_binop_int<"kxor", "KXOR">;
1435 // With AVX-512, 8-bit mask is promoted to 16-bit mask.
1436 multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
1437 let Predicates = [HasAVX512] in
1438 def : Pat<(OpNode VK8:$src1, VK8:$src2),
1440 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
1441 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
1444 defm : avx512_binop_pat<and, KANDWrr>;
1445 defm : avx512_binop_pat<andn, KANDNWrr>;
1446 defm : avx512_binop_pat<or, KORWrr>;
1447 defm : avx512_binop_pat<xnor, KXNORWrr>;
1448 defm : avx512_binop_pat<xor, KXORWrr>;
1451 multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
1452 RegisterClass KRC> {
1453 let Predicates = [HasAVX512] in
1454 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
1455 !strconcat(OpcodeStr,
1456 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
1459 multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
1460 defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16>,
1464 defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">;
1465 def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))),
1466 (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16),
1467 (COPY_TO_REGCLASS VK8:$src1, VK16))>;
1470 multiclass avx512_mask_unpck_int<string IntName, string InstName> {
1471 let Predicates = [HasAVX512] in
1472 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_bw")
1473 (i16 GR16:$src1), (i16 GR16:$src2)),
1474 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"BWrr")
1475 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1476 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
1478 defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
1481 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1483 let Predicates = [HasAVX512], Defs = [EFLAGS] in
1484 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
1485 !strconcat(OpcodeStr, " \t{$src2, $src1|$src1, $src2}"),
1486 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
1489 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
1490 defm W : avx512_mask_testop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
1494 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
1496 def : Pat<(X86cmp VK1:$src1, (i1 0)),
1497 (KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1498 (COPY_TO_REGCLASS VK1:$src1, VK16))>;
1501 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1503 let Predicates = [HasAVX512] in
1504 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, i8imm:$imm),
1505 !strconcat(OpcodeStr,
1506 " \t{$imm, $src, $dst|$dst, $src, $imm}"),
1507 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
1510 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
1512 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
1516 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
1517 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
1519 // Mask setting all 0s or 1s
1520 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
1521 let Predicates = [HasAVX512] in
1522 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
1523 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
1524 [(set KRC:$dst, (VT Val))]>;
1527 multiclass avx512_mask_setop_w<PatFrag Val> {
1528 defm B : avx512_mask_setop<VK8, v8i1, Val>;
1529 defm W : avx512_mask_setop<VK16, v16i1, Val>;
1532 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
1533 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
1535 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1536 let Predicates = [HasAVX512] in {
1537 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
1538 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
1539 def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
1540 def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
1541 def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
1543 def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))),
1544 (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>;
1546 def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
1547 (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16))>;
1549 def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
1550 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
1552 def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
1553 (v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
1555 def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
1556 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
1557 //===----------------------------------------------------------------------===//
1558 // AVX-512 - Aligned and unaligned load and store
1561 multiclass avx512_load<bits<8> opc, string OpcodeStr, PatFrag ld_frag,
1562 RegisterClass KRC, RegisterClass RC,
1563 ValueType vt, ValueType zvt, X86MemOperand memop,
1564 Domain d, bit IsReMaterializable = 1> {
1565 let hasSideEffects = 0 in {
1566 def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
1567 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
1569 def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
1570 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1571 "${dst} {${mask}} {z}, $src}"), [], d>, EVEX, EVEX_KZ;
1573 let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
1574 SchedRW = [WriteLoad] in
1575 def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins memop:$src),
1576 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1577 [(set RC:$dst, (vt (bitconvert (ld_frag addr:$src))))],
1580 let AddedComplexity = 20 in {
1581 let Constraints = "$src0 = $dst", hasSideEffects = 0 in {
1582 let hasSideEffects = 0 in
1583 def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
1584 (ins RC:$src0, KRC:$mask, RC:$src1),
1585 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1586 "${dst} {${mask}}, $src1}"),
1587 [(set RC:$dst, (vt (vselect KRC:$mask,
1591 let mayLoad = 1, SchedRW = [WriteLoad] in
1592 def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1593 (ins RC:$src0, KRC:$mask, memop:$src1),
1594 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1595 "${dst} {${mask}}, $src1}"),
1598 (vt (bitconvert (ld_frag addr:$src1))),
1602 let mayLoad = 1, SchedRW = [WriteLoad] in
1603 def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1604 (ins KRC:$mask, memop:$src),
1605 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1606 "${dst} {${mask}} {z}, $src}"),
1609 (vt (bitconvert (ld_frag addr:$src))),
1610 (vt (bitconvert (zvt immAllZerosV))))))],
1615 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, string ld_pat,
1616 string elty, string elsz, string vsz512,
1617 string vsz256, string vsz128, Domain d,
1618 Predicate prd, bit IsReMaterializable = 1> {
1619 let Predicates = [prd] in
1620 defm Z : avx512_load<opc, OpcodeStr,
1621 !cast<PatFrag>(ld_pat##"v"##vsz512##elty##elsz),
1622 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1623 !cast<ValueType>("v"##vsz512##elty##elsz), v16i32,
1624 !cast<X86MemOperand>(elty##"512mem"), d,
1625 IsReMaterializable>, EVEX_V512;
1627 let Predicates = [prd, HasVLX] in {
1628 defm Z256 : avx512_load<opc, OpcodeStr,
1629 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1630 "v"##vsz256##elty##elsz, "v4i64")),
1631 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1632 !cast<ValueType>("v"##vsz256##elty##elsz), v8i32,
1633 !cast<X86MemOperand>(elty##"256mem"), d,
1634 IsReMaterializable>, EVEX_V256;
1636 defm Z128 : avx512_load<opc, OpcodeStr,
1637 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1638 "v"##vsz128##elty##elsz, "v2i64")),
1639 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1640 !cast<ValueType>("v"##vsz128##elty##elsz), v4i32,
1641 !cast<X86MemOperand>(elty##"128mem"), d,
1642 IsReMaterializable>, EVEX_V128;
1647 multiclass avx512_store<bits<8> opc, string OpcodeStr, PatFrag st_frag,
1648 ValueType OpVT, RegisterClass KRC, RegisterClass RC,
1649 X86MemOperand memop, Domain d> {
1650 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1651 def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src),
1652 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], d>,
1654 let Constraints = "$src1 = $dst" in
1655 def rrk_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
1656 (ins RC:$src1, KRC:$mask, RC:$src2),
1657 !strconcat(OpcodeStr,
1658 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
1660 def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
1661 (ins KRC:$mask, RC:$src),
1662 !strconcat(OpcodeStr,
1663 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
1664 [], d>, EVEX, EVEX_KZ;
1666 let mayStore = 1 in {
1667 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
1668 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1669 [(st_frag (OpVT RC:$src), addr:$dst)], d>, EVEX;
1670 def mrk : AVX512PI<opc, MRMDestMem, (outs),
1671 (ins memop:$dst, KRC:$mask, RC:$src),
1672 !strconcat(OpcodeStr,
1673 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
1674 [], d>, EVEX, EVEX_K;
1679 multiclass avx512_store_vl<bits<8> opc, string OpcodeStr, string st_pat,
1680 string st_suff_512, string st_suff_256,
1681 string st_suff_128, string elty, string elsz,
1682 string vsz512, string vsz256, string vsz128,
1683 Domain d, Predicate prd> {
1684 let Predicates = [prd] in
1685 defm Z : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_512),
1686 !cast<ValueType>("v"##vsz512##elty##elsz),
1687 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1688 !cast<X86MemOperand>(elty##"512mem"), d>, EVEX_V512;
1690 let Predicates = [prd, HasVLX] in {
1691 defm Z256 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_256),
1692 !cast<ValueType>("v"##vsz256##elty##elsz),
1693 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1694 !cast<X86MemOperand>(elty##"256mem"), d>, EVEX_V256;
1696 defm Z128 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_128),
1697 !cast<ValueType>("v"##vsz128##elty##elsz),
1698 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1699 !cast<X86MemOperand>(elty##"128mem"), d>, EVEX_V128;
1703 defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32",
1704 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1705 avx512_store_vl<0x29, "vmovaps", "alignedstore",
1706 "512", "256", "", "f", "32", "16", "8", "4",
1707 SSEPackedSingle, HasAVX512>,
1708 PS, EVEX_CD8<32, CD8VF>;
1710 defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64",
1711 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1712 avx512_store_vl<0x29, "vmovapd", "alignedstore",
1713 "512", "256", "", "f", "64", "8", "4", "2",
1714 SSEPackedDouble, HasAVX512>,
1715 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1717 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32",
1718 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1719 avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32",
1720 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1721 PS, EVEX_CD8<32, CD8VF>;
1723 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64",
1724 "8", "4", "2", SSEPackedDouble, HasAVX512, 0>,
1725 avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64",
1726 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1727 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1729 def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
1730 (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
1731 (VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
1733 def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
1734 (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
1735 (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
1737 def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
1739 (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
1741 def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
1743 (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
1746 defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32",
1747 "16", "8", "4", SSEPackedInt, HasAVX512>,
1748 avx512_store_vl<0x7F, "vmovdqa32", "alignedstore",
1749 "512", "256", "", "i", "32", "16", "8", "4",
1750 SSEPackedInt, HasAVX512>,
1751 PD, EVEX_CD8<32, CD8VF>;
1753 defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64",
1754 "8", "4", "2", SSEPackedInt, HasAVX512>,
1755 avx512_store_vl<0x7F, "vmovdqa64", "alignedstore",
1756 "512", "256", "", "i", "64", "8", "4", "2",
1757 SSEPackedInt, HasAVX512>,
1758 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1760 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8",
1761 "64", "32", "16", SSEPackedInt, HasBWI>,
1762 avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "",
1763 "i", "8", "64", "32", "16", SSEPackedInt,
1764 HasBWI>, XD, EVEX_CD8<8, CD8VF>;
1766 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16",
1767 "32", "16", "8", SSEPackedInt, HasBWI>,
1768 avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "",
1769 "i", "16", "32", "16", "8", SSEPackedInt,
1770 HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
1772 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32",
1773 "16", "8", "4", SSEPackedInt, HasAVX512>,
1774 avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "",
1775 "i", "32", "16", "8", "4", SSEPackedInt,
1776 HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
1778 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64",
1779 "8", "4", "2", SSEPackedInt, HasAVX512>,
1780 avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "",
1781 "i", "64", "8", "4", "2", SSEPackedInt,
1782 HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
1784 def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
1785 (v16i32 immAllZerosV), GR16:$mask)),
1786 (VMOVDQU32Zrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
1788 def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
1789 (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
1790 (VMOVDQU64Zrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
1792 def: Pat<(int_x86_avx512_mask_storeu_d_512 addr:$ptr, (v16i32 VR512:$src),
1794 (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
1796 def: Pat<(int_x86_avx512_mask_storeu_q_512 addr:$ptr, (v8i64 VR512:$src),
1798 (VMOVDQU64Zmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
1801 let AddedComplexity = 20 in {
1802 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
1803 (bc_v8i64 (v16i32 immAllZerosV)))),
1804 (VMOVDQU64Zrrkz VK8WM:$mask, VR512:$src)>;
1806 def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
1807 (v8i64 VR512:$src))),
1808 (VMOVDQU64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
1811 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
1812 (v16i32 immAllZerosV))),
1813 (VMOVDQU32Zrrkz VK16WM:$mask, VR512:$src)>;
1815 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
1816 (v16i32 VR512:$src))),
1817 (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
1820 // Move Int Doubleword to Packed Double Int
1822 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
1823 "vmovd\t{$src, $dst|$dst, $src}",
1825 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
1827 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
1828 "vmovd\t{$src, $dst|$dst, $src}",
1830 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
1831 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1832 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
1833 "vmovq\t{$src, $dst|$dst, $src}",
1835 (v2i64 (scalar_to_vector GR64:$src)))],
1836 IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
1837 let isCodeGenOnly = 1 in {
1838 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
1839 "vmovq\t{$src, $dst|$dst, $src}",
1840 [(set FR64:$dst, (bitconvert GR64:$src))],
1841 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
1842 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
1843 "vmovq\t{$src, $dst|$dst, $src}",
1844 [(set GR64:$dst, (bitconvert FR64:$src))],
1845 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
1847 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
1848 "vmovq\t{$src, $dst|$dst, $src}",
1849 [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
1850 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
1851 EVEX_CD8<64, CD8VT1>;
1853 // Move Int Doubleword to Single Scalar
1855 let isCodeGenOnly = 1 in {
1856 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
1857 "vmovd\t{$src, $dst|$dst, $src}",
1858 [(set FR32X:$dst, (bitconvert GR32:$src))],
1859 IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
1861 def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
1862 "vmovd\t{$src, $dst|$dst, $src}",
1863 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
1864 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1867 // Move doubleword from xmm register to r/m32
1869 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
1870 "vmovd\t{$src, $dst|$dst, $src}",
1871 [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
1872 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
1874 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
1875 (ins i32mem:$dst, VR128X:$src),
1876 "vmovd\t{$src, $dst|$dst, $src}",
1877 [(store (i32 (vector_extract (v4i32 VR128X:$src),
1878 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
1879 EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1881 // Move quadword from xmm1 register to r/m64
1883 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
1884 "vmovq\t{$src, $dst|$dst, $src}",
1885 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
1887 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W,
1888 Requires<[HasAVX512, In64BitMode]>;
1890 def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
1891 (ins i64mem:$dst, VR128X:$src),
1892 "vmovq\t{$src, $dst|$dst, $src}",
1893 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
1894 addr:$dst)], IIC_SSE_MOVDQ>,
1895 EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>,
1896 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
1898 // Move Scalar Single to Double Int
1900 let isCodeGenOnly = 1 in {
1901 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
1903 "vmovd\t{$src, $dst|$dst, $src}",
1904 [(set GR32:$dst, (bitconvert FR32X:$src))],
1905 IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
1906 def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
1907 (ins i32mem:$dst, FR32X:$src),
1908 "vmovd\t{$src, $dst|$dst, $src}",
1909 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
1910 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1913 // Move Quadword Int to Packed Quadword Int
1915 def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
1917 "vmovq\t{$src, $dst|$dst, $src}",
1919 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
1920 EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
1922 //===----------------------------------------------------------------------===//
1923 // AVX-512 MOVSS, MOVSD
1924 //===----------------------------------------------------------------------===//
1926 multiclass avx512_move_scalar <string asm, RegisterClass RC,
1927 SDNode OpNode, ValueType vt,
1928 X86MemOperand x86memop, PatFrag mem_pat> {
1929 let hasSideEffects = 0 in {
1930 def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
1931 !strconcat(asm, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1932 [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
1933 (scalar_to_vector RC:$src2))))],
1934 IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
1935 let Constraints = "$src1 = $dst" in
1936 def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
1937 (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
1939 " \t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
1940 [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
1941 def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
1942 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
1943 [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
1945 def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
1946 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
1947 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
1949 } //hasSideEffects = 0
1952 let ExeDomain = SSEPackedSingle in
1953 defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem,
1954 loadf32>, XS, EVEX_CD8<32, CD8VT1>;
1956 let ExeDomain = SSEPackedDouble in
1957 defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
1958 loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
1960 def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
1961 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
1962 VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
1964 def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
1965 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
1966 VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
1968 // For the disassembler
1969 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
1970 def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
1971 (ins VR128X:$src1, FR32X:$src2),
1972 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
1974 XS, EVEX_4V, VEX_LIG;
1975 def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
1976 (ins VR128X:$src1, FR64X:$src2),
1977 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
1979 XD, EVEX_4V, VEX_LIG, VEX_W;
1982 let Predicates = [HasAVX512] in {
1983 let AddedComplexity = 15 in {
1984 // Move scalar to XMM zero-extended, zeroing a VR128X then do a
1985 // MOVS{S,D} to the lower bits.
1986 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
1987 (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
1988 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
1989 (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
1990 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
1991 (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
1992 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
1993 (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
1995 // Move low f32 and clear high bits.
1996 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
1997 (SUBREG_TO_REG (i32 0),
1998 (VMOVSSZrr (v4f32 (V_SET0)),
1999 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
2000 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
2001 (SUBREG_TO_REG (i32 0),
2002 (VMOVSSZrr (v4i32 (V_SET0)),
2003 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
2006 let AddedComplexity = 20 in {
2007 // MOVSSrm zeros the high parts of the register; represent this
2008 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2009 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
2010 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2011 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
2012 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2013 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
2014 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2016 // MOVSDrm zeros the high parts of the register; represent this
2017 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2018 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
2019 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2020 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
2021 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2022 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
2023 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2024 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
2025 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2026 def : Pat<(v2f64 (X86vzload addr:$src)),
2027 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2029 // Represent the same patterns above but in the form they appear for
2031 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2032 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
2033 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
2034 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2035 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
2036 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
2037 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2038 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
2039 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
2041 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2042 (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
2043 (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
2044 FR32X:$src)), sub_xmm)>;
2045 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2046 (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
2047 (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
2048 FR64X:$src)), sub_xmm)>;
2049 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2050 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
2051 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
2053 // Move low f64 and clear high bits.
2054 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
2055 (SUBREG_TO_REG (i32 0),
2056 (VMOVSDZrr (v2f64 (V_SET0)),
2057 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
2059 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
2060 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
2061 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
2063 // Extract and store.
2064 def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))),
2066 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
2067 def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))),
2069 (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
2071 // Shuffle with VMOVSS
2072 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
2073 (VMOVSSZrr (v4i32 VR128X:$src1),
2074 (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
2075 def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
2076 (VMOVSSZrr (v4f32 VR128X:$src1),
2077 (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
2080 def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
2081 (SUBREG_TO_REG (i32 0),
2082 (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
2083 (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
2085 def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
2086 (SUBREG_TO_REG (i32 0),
2087 (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
2088 (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
2091 // Shuffle with VMOVSD
2092 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2093 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2094 def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2095 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2096 def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2097 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2098 def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2099 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2102 def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2103 (SUBREG_TO_REG (i32 0),
2104 (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
2105 (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
2107 def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2108 (SUBREG_TO_REG (i32 0),
2109 (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
2110 (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
2113 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2114 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2115 def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2116 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2117 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2118 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2119 def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2120 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2123 let AddedComplexity = 15 in
2124 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
2126 "vmovq\t{$src, $dst|$dst, $src}",
2127 [(set VR128X:$dst, (v2i64 (X86vzmovl
2128 (v2i64 VR128X:$src))))],
2129 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
2131 let AddedComplexity = 20 in
2132 def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
2134 "vmovq\t{$src, $dst|$dst, $src}",
2135 [(set VR128X:$dst, (v2i64 (X86vzmovl
2136 (loadv2i64 addr:$src))))],
2137 IIC_SSE_MOVDQ>, EVEX, VEX_W,
2138 EVEX_CD8<8, CD8VT8>;
2140 let Predicates = [HasAVX512] in {
2141 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
2142 let AddedComplexity = 20 in {
2143 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
2144 (VMOVDI2PDIZrm addr:$src)>;
2145 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
2146 (VMOV64toPQIZrr GR64:$src)>;
2147 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
2148 (VMOVDI2PDIZrr GR32:$src)>;
2150 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
2151 (VMOVDI2PDIZrm addr:$src)>;
2152 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
2153 (VMOVDI2PDIZrm addr:$src)>;
2154 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
2155 (VMOVZPQILo2PQIZrm addr:$src)>;
2156 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
2157 (VMOVZPQILo2PQIZrr VR128X:$src)>;
2158 def : Pat<(v2i64 (X86vzload addr:$src)),
2159 (VMOVZPQILo2PQIZrm addr:$src)>;
2162 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
2163 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2164 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
2165 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
2166 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2167 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
2168 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
2171 def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
2172 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2174 def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
2175 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2177 def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
2178 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2180 def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
2181 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2183 //===----------------------------------------------------------------------===//
2184 // AVX-512 - Non-temporals
2185 //===----------------------------------------------------------------------===//
2186 let SchedRW = [WriteLoad] in {
2187 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
2188 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
2189 [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
2190 SSEPackedInt>, EVEX, T8PD, EVEX_V512,
2191 EVEX_CD8<64, CD8VF>;
2193 let Predicates = [HasAVX512, HasVLX] in {
2194 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
2196 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2197 SSEPackedInt>, EVEX, T8PD, EVEX_V256,
2198 EVEX_CD8<64, CD8VF>;
2200 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
2202 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2203 SSEPackedInt>, EVEX, T8PD, EVEX_V128,
2204 EVEX_CD8<64, CD8VF>;
2208 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2209 ValueType OpVT, RegisterClass RC, X86MemOperand memop,
2210 Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
2211 let SchedRW = [WriteStore], mayStore = 1,
2212 AddedComplexity = 400 in
2213 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
2214 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2215 [(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
2218 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2219 string elty, string elsz, string vsz512,
2220 string vsz256, string vsz128, Domain d,
2221 Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
2222 let Predicates = [prd] in
2223 defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
2224 !cast<ValueType>("v"##vsz512##elty##elsz), VR512,
2225 !cast<X86MemOperand>(elty##"512mem"), d, itin>,
2228 let Predicates = [prd, HasVLX] in {
2229 defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
2230 !cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
2231 !cast<X86MemOperand>(elty##"256mem"), d, itin>,
2234 defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
2235 !cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
2236 !cast<X86MemOperand>(elty##"128mem"), d, itin>,
2241 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
2242 "i", "64", "8", "4", "2", SSEPackedInt,
2243 HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
2245 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
2246 "f", "64", "8", "4", "2", SSEPackedDouble,
2247 HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2249 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
2250 "f", "32", "16", "8", "4", SSEPackedSingle,
2251 HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
2253 //===----------------------------------------------------------------------===//
2254 // AVX-512 - Integer arithmetic
2256 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
2257 ValueType OpVT, RegisterClass KRC,
2258 RegisterClass RC, PatFrag memop_frag,
2259 X86MemOperand x86memop, PatFrag scalar_mfrag,
2260 X86MemOperand x86scalar_mop, string BrdcstStr,
2261 OpndItins itins, bit IsCommutable = 0> {
2262 let isCommutable = IsCommutable in
2263 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2264 (ins RC:$src1, RC:$src2),
2265 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2266 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2268 let AddedComplexity = 30 in {
2269 let Constraints = "$src0 = $dst" in
2270 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2271 (ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2),
2272 !strconcat(OpcodeStr,
2273 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2274 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2275 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2277 itins.rr>, EVEX_4V, EVEX_K;
2278 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2279 (ins KRC:$mask, RC:$src1, RC:$src2),
2280 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2281 "|$dst {${mask}} {z}, $src1, $src2}"),
2282 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2283 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2284 (OpVT immAllZerosV))))],
2285 itins.rr>, EVEX_4V, EVEX_KZ;
2288 let mayLoad = 1 in {
2289 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2290 (ins RC:$src1, x86memop:$src2),
2291 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2292 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
2294 let AddedComplexity = 30 in {
2295 let Constraints = "$src0 = $dst" in
2296 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2297 (ins RC:$src0, KRC:$mask, RC:$src1, x86memop:$src2),
2298 !strconcat(OpcodeStr,
2299 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2300 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2301 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2303 itins.rm>, EVEX_4V, EVEX_K;
2304 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2305 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2306 !strconcat(OpcodeStr,
2307 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2308 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2309 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2310 (OpVT immAllZerosV))))],
2311 itins.rm>, EVEX_4V, EVEX_KZ;
2313 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2314 (ins RC:$src1, x86scalar_mop:$src2),
2315 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2316 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2317 [(set RC:$dst, (OpNode RC:$src1,
2318 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
2319 itins.rm>, EVEX_4V, EVEX_B;
2320 let AddedComplexity = 30 in {
2321 let Constraints = "$src0 = $dst" in
2322 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2323 (ins RC:$src0, KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2324 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2325 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2327 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2328 (OpNode (OpVT RC:$src1),
2329 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2331 itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2332 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2333 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2334 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2335 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2337 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2338 (OpNode (OpVT RC:$src1),
2339 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2340 (OpVT immAllZerosV))))],
2341 itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2346 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
2347 ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
2348 PatFrag memop_frag, X86MemOperand x86memop,
2349 PatFrag scalar_mfrag, X86MemOperand x86scalar_mop,
2350 string BrdcstStr, OpndItins itins, bit IsCommutable = 0> {
2351 let isCommutable = IsCommutable in
2353 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2354 (ins RC:$src1, RC:$src2),
2355 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2357 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2358 (ins KRC:$mask, RC:$src1, RC:$src2),
2359 !strconcat(OpcodeStr,
2360 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2361 [], itins.rr>, EVEX_4V, EVEX_K;
2362 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2363 (ins KRC:$mask, RC:$src1, RC:$src2),
2364 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2365 "|$dst {${mask}} {z}, $src1, $src2}"),
2366 [], itins.rr>, EVEX_4V, EVEX_KZ;
2368 let mayLoad = 1 in {
2369 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2370 (ins RC:$src1, x86memop:$src2),
2371 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2373 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2374 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2375 !strconcat(OpcodeStr,
2376 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2377 [], itins.rm>, EVEX_4V, EVEX_K;
2378 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2379 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2380 !strconcat(OpcodeStr,
2381 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2382 [], itins.rm>, EVEX_4V, EVEX_KZ;
2383 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2384 (ins RC:$src1, x86scalar_mop:$src2),
2385 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2386 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2387 [], itins.rm>, EVEX_4V, EVEX_B;
2388 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2389 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2390 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2391 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2393 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2394 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2395 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2396 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2397 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2399 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2403 defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VK16WM, VR512,
2404 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2405 SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
2407 defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VK16WM, VR512,
2408 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2409 SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
2411 defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VK16WM, VR512,
2412 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2413 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2415 defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VK8WM, VR512,
2416 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2417 SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
2419 defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VK8WM, VR512,
2420 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2421 SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2423 defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
2424 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2425 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512,
2426 EVEX_CD8<64, CD8VF>, VEX_W;
2428 defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512,
2429 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2430 SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
2432 def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
2433 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2435 def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1),
2436 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2437 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2438 def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
2439 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2440 (VPMULDQZrr VR512:$src1, VR512:$src2)>;
2442 defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VK16WM, VR512,
2443 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2444 SSE_INTALU_ITINS_P, 1>,
2445 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2446 defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VK8WM, VR512,
2447 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2448 SSE_INTALU_ITINS_P, 0>,
2449 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2451 defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VK16WM, VR512,
2452 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2453 SSE_INTALU_ITINS_P, 1>,
2454 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2455 defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VK8WM, VR512,
2456 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2457 SSE_INTALU_ITINS_P, 0>,
2458 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2460 defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VK16WM, VR512,
2461 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2462 SSE_INTALU_ITINS_P, 1>,
2463 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2464 defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VK8WM, VR512,
2465 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2466 SSE_INTALU_ITINS_P, 0>,
2467 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2469 defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VK16WM, VR512,
2470 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2471 SSE_INTALU_ITINS_P, 1>,
2472 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2473 defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VK8WM, VR512,
2474 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2475 SSE_INTALU_ITINS_P, 0>,
2476 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2478 def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1),
2479 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2480 (VPMAXSDZrr VR512:$src1, VR512:$src2)>;
2481 def : Pat <(v16i32 (int_x86_avx512_mask_pmaxu_d_512 (v16i32 VR512:$src1),
2482 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2483 (VPMAXUDZrr VR512:$src1, VR512:$src2)>;
2484 def : Pat <(v8i64 (int_x86_avx512_mask_pmaxs_q_512 (v8i64 VR512:$src1),
2485 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2486 (VPMAXSQZrr VR512:$src1, VR512:$src2)>;
2487 def : Pat <(v8i64 (int_x86_avx512_mask_pmaxu_q_512 (v8i64 VR512:$src1),
2488 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2489 (VPMAXUQZrr VR512:$src1, VR512:$src2)>;
2490 def : Pat <(v16i32 (int_x86_avx512_mask_pmins_d_512 (v16i32 VR512:$src1),
2491 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2492 (VPMINSDZrr VR512:$src1, VR512:$src2)>;
2493 def : Pat <(v16i32 (int_x86_avx512_mask_pminu_d_512 (v16i32 VR512:$src1),
2494 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2495 (VPMINUDZrr VR512:$src1, VR512:$src2)>;
2496 def : Pat <(v8i64 (int_x86_avx512_mask_pmins_q_512 (v8i64 VR512:$src1),
2497 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2498 (VPMINSQZrr VR512:$src1, VR512:$src2)>;
2499 def : Pat <(v8i64 (int_x86_avx512_mask_pminu_q_512 (v8i64 VR512:$src1),
2500 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2501 (VPMINUQZrr VR512:$src1, VR512:$src2)>;
2502 //===----------------------------------------------------------------------===//
2503 // AVX-512 - Unpack Instructions
2504 //===----------------------------------------------------------------------===//
2506 multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
2507 PatFrag mem_frag, RegisterClass RC,
2508 X86MemOperand x86memop, string asm,
2510 def rr : AVX512PI<opc, MRMSrcReg,
2511 (outs RC:$dst), (ins RC:$src1, RC:$src2),
2513 (vt (OpNode RC:$src1, RC:$src2)))],
2515 def rm : AVX512PI<opc, MRMSrcMem,
2516 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2518 (vt (OpNode RC:$src1,
2519 (bitconvert (mem_frag addr:$src2)))))],
2523 defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64,
2524 VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2525 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
2526 defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64,
2527 VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2528 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2529 defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64,
2530 VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2531 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
2532 defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64,
2533 VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2534 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2536 multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
2537 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
2538 X86MemOperand x86memop> {
2539 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2540 (ins RC:$src1, RC:$src2),
2541 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2542 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2543 IIC_SSE_UNPCK>, EVEX_4V;
2544 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2545 (ins RC:$src1, x86memop:$src2),
2546 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2547 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
2548 (bitconvert (memop_frag addr:$src2)))))],
2549 IIC_SSE_UNPCK>, EVEX_4V;
2551 defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
2552 VR512, memopv16i32, i512mem>, EVEX_V512,
2553 EVEX_CD8<32, CD8VF>;
2554 defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
2555 VR512, memopv8i64, i512mem>, EVEX_V512,
2556 VEX_W, EVEX_CD8<64, CD8VF>;
2557 defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
2558 VR512, memopv16i32, i512mem>, EVEX_V512,
2559 EVEX_CD8<32, CD8VF>;
2560 defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
2561 VR512, memopv8i64, i512mem>, EVEX_V512,
2562 VEX_W, EVEX_CD8<64, CD8VF>;
2563 //===----------------------------------------------------------------------===//
2567 multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
2568 SDNode OpNode, PatFrag mem_frag,
2569 X86MemOperand x86memop, ValueType OpVT> {
2570 def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
2571 (ins RC:$src1, i8imm:$src2),
2572 !strconcat(OpcodeStr,
2573 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2575 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
2577 def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
2578 (ins x86memop:$src1, i8imm:$src2),
2579 !strconcat(OpcodeStr,
2580 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2582 (OpVT (OpNode (mem_frag addr:$src1),
2583 (i8 imm:$src2))))]>, EVEX;
2586 defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
2587 i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2589 let ExeDomain = SSEPackedSingle in
2590 defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp,
2591 memopv16f32, i512mem, v16f32>, TAPD, EVEX_V512,
2592 EVEX_CD8<32, CD8VF>;
2593 let ExeDomain = SSEPackedDouble in
2594 defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp,
2595 memopv8f64, i512mem, v8f64>, TAPD, EVEX_V512,
2596 VEX_W, EVEX_CD8<32, CD8VF>;
2598 def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
2599 (VPERMILPSZri VR512:$src1, imm:$imm)>;
2600 def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
2601 (VPERMILPDZri VR512:$src1, imm:$imm)>;
2603 //===----------------------------------------------------------------------===//
2604 // AVX-512 Logical Instructions
2605 //===----------------------------------------------------------------------===//
2607 defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VK16WM, VR512, memopv16i32,
2608 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2609 EVEX_V512, EVEX_CD8<32, CD8VF>;
2610 defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VK8WM, VR512, memopv8i64,
2611 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2612 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2613 defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VK16WM, VR512, memopv16i32,
2614 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2615 EVEX_V512, EVEX_CD8<32, CD8VF>;
2616 defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VK8WM, VR512, memopv8i64,
2617 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2618 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2619 defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VK16WM, VR512, memopv16i32,
2620 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2621 EVEX_V512, EVEX_CD8<32, CD8VF>;
2622 defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VK8WM, VR512, memopv8i64,
2623 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2624 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2625 defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VK16WM, VR512,
2626 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2627 SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
2628 defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VK8WM, VR512,
2629 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2630 SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2632 //===----------------------------------------------------------------------===//
2633 // AVX-512 FP arithmetic
2634 //===----------------------------------------------------------------------===//
2636 multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
2638 defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32X,
2639 f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
2640 EVEX_CD8<32, CD8VT1>;
2641 defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64X,
2642 f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
2643 EVEX_CD8<64, CD8VT1>;
2646 let isCommutable = 1 in {
2647 defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
2648 defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
2649 defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
2650 defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
2652 let isCommutable = 0 in {
2653 defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
2654 defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
2657 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
2659 RegisterClass RC, ValueType vt,
2660 X86MemOperand x86memop, PatFrag mem_frag,
2661 X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
2663 Domain d, OpndItins itins, bit commutable> {
2664 let isCommutable = commutable in {
2665 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
2666 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2667 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
2670 def rrk: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2671 !strconcat(OpcodeStr,
2672 " \t{$src2, $src1, $dst {${mask}} |$dst {${mask}}, $src1, $src2}"),
2673 [], itins.rr, d>, EVEX_4V, EVEX_K;
2675 def rrkz: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2676 !strconcat(OpcodeStr,
2677 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2678 [], itins.rr, d>, EVEX_4V, EVEX_KZ;
2681 let mayLoad = 1 in {
2682 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2683 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2684 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
2685 itins.rm, d>, EVEX_4V;
2687 def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
2688 (ins RC:$src1, x86scalar_mop:$src2),
2689 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2690 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2691 [(set RC:$dst, (OpNode RC:$src1,
2692 (vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
2693 itins.rm, d>, EVEX_4V, EVEX_B;
2695 def rmk : PI<opc, MRMSrcMem, (outs RC:$dst),
2696 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2697 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2698 [], itins.rm, d>, EVEX_4V, EVEX_K;
2700 def rmkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2701 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2702 "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2703 [], itins.rm, d>, EVEX_4V, EVEX_KZ;
2705 def rmbk : PI<opc, MRMSrcMem, (outs RC:$dst),
2706 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2707 " \t{${src2}", BrdcstStr,
2708 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}", BrdcstStr, "}"),
2709 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_K;
2711 def rmbkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2712 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2713 " \t{${src2}", BrdcstStr,
2714 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2716 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_KZ;
2720 defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VK16WM, VR512, v16f32, f512mem,
2721 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2722 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2724 defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VK8WM, VR512, v8f64, f512mem,
2725 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2726 SSE_ALU_ITINS_P.d, 1>,
2727 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2729 defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VK16WM, VR512, v16f32, f512mem,
2730 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2731 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2732 defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VK8WM, VR512, v8f64, f512mem,
2733 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2734 SSE_ALU_ITINS_P.d, 1>,
2735 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2737 defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VK16WM, VR512, v16f32, f512mem,
2738 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2739 SSE_ALU_ITINS_P.s, 1>,
2740 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2741 defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VK16WM, VR512, v16f32, f512mem,
2742 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2743 SSE_ALU_ITINS_P.s, 1>,
2744 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2746 defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VK8WM, VR512, v8f64, f512mem,
2747 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2748 SSE_ALU_ITINS_P.d, 1>,
2749 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2750 defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VK8WM, VR512, v8f64, f512mem,
2751 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2752 SSE_ALU_ITINS_P.d, 1>,
2753 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2755 defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VK16WM, VR512, v16f32, f512mem,
2756 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2757 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2758 defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VK16WM, VR512, v16f32, f512mem,
2759 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2760 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2762 defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VK8WM, VR512, v8f64, f512mem,
2763 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2764 SSE_ALU_ITINS_P.d, 0>,
2765 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2766 defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VK8WM, VR512, v8f64, f512mem,
2767 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2768 SSE_ALU_ITINS_P.d, 0>,
2769 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2771 def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
2772 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
2773 (i16 -1), FROUND_CURRENT)),
2774 (VMAXPSZrr VR512:$src1, VR512:$src2)>;
2776 def : Pat<(v8f64 (int_x86_avx512_mask_max_pd_512 (v8f64 VR512:$src1),
2777 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
2778 (i8 -1), FROUND_CURRENT)),
2779 (VMAXPDZrr VR512:$src1, VR512:$src2)>;
2781 def : Pat<(v16f32 (int_x86_avx512_mask_min_ps_512 (v16f32 VR512:$src1),
2782 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
2783 (i16 -1), FROUND_CURRENT)),
2784 (VMINPSZrr VR512:$src1, VR512:$src2)>;
2786 def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
2787 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
2788 (i8 -1), FROUND_CURRENT)),
2789 (VMINPDZrr VR512:$src1, VR512:$src2)>;
2790 //===----------------------------------------------------------------------===//
2791 // AVX-512 VPTESTM instructions
2792 //===----------------------------------------------------------------------===//
2794 multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2795 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
2796 SDNode OpNode, ValueType vt> {
2797 def rr : AVX512PI<opc, MRMSrcReg,
2798 (outs KRC:$dst), (ins RC:$src1, RC:$src2),
2799 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2800 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
2801 SSEPackedInt>, EVEX_4V;
2802 def rm : AVX512PI<opc, MRMSrcMem,
2803 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
2804 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2805 [(set KRC:$dst, (OpNode (vt RC:$src1),
2806 (bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
2809 defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
2810 memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512,
2811 EVEX_CD8<32, CD8VF>;
2812 defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
2813 memopv8i64, X86testm, v8i64>, T8PD, EVEX_V512, VEX_W,
2814 EVEX_CD8<64, CD8VF>;
2816 let Predicates = [HasCDI] in {
2817 defm VPTESTNMDZ : avx512_vptest<0x27, "vptestnmd", VK16, VR512, f512mem,
2818 memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512,
2819 EVEX_CD8<32, CD8VF>;
2820 defm VPTESTNMQZ : avx512_vptest<0x27, "vptestnmq", VK8, VR512, f512mem,
2821 memopv8i64, X86testnm, v8i64>, T8XS, EVEX_V512, VEX_W,
2822 EVEX_CD8<64, CD8VF>;
2825 def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
2826 (v16i32 VR512:$src2), (i16 -1))),
2827 (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
2829 def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
2830 (v8i64 VR512:$src2), (i8 -1))),
2831 (COPY_TO_REGCLASS (VPTESTMQZrr VR512:$src1, VR512:$src2), GR8)>;
2832 //===----------------------------------------------------------------------===//
2833 // AVX-512 Shift instructions
2834 //===----------------------------------------------------------------------===//
2835 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
2836 string OpcodeStr, SDNode OpNode, RegisterClass RC,
2837 ValueType vt, X86MemOperand x86memop, PatFrag mem_frag,
2838 RegisterClass KRC> {
2839 def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
2840 (ins RC:$src1, i8imm:$src2),
2841 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2842 [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))],
2843 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
2844 def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
2845 (ins KRC:$mask, RC:$src1, i8imm:$src2),
2846 !strconcat(OpcodeStr,
2847 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2848 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
2849 def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
2850 (ins x86memop:$src1, i8imm:$src2),
2851 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2852 [(set RC:$dst, (OpNode (mem_frag addr:$src1),
2853 (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
2854 def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
2855 (ins KRC:$mask, x86memop:$src1, i8imm:$src2),
2856 !strconcat(OpcodeStr,
2857 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2858 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
2861 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
2862 RegisterClass RC, ValueType vt, ValueType SrcVT,
2863 PatFrag bc_frag, RegisterClass KRC> {
2864 // src2 is always 128-bit
2865 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2866 (ins RC:$src1, VR128X:$src2),
2867 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2868 [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
2869 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
2870 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2871 (ins KRC:$mask, RC:$src1, VR128X:$src2),
2872 !strconcat(OpcodeStr,
2873 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2874 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
2875 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2876 (ins RC:$src1, i128mem:$src2),
2877 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2878 [(set RC:$dst, (vt (OpNode RC:$src1,
2879 (bc_frag (memopv2i64 addr:$src2)))))],
2880 SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
2881 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2882 (ins KRC:$mask, RC:$src1, i128mem:$src2),
2883 !strconcat(OpcodeStr,
2884 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2885 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
2888 defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
2889 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
2890 EVEX_V512, EVEX_CD8<32, CD8VF>;
2891 defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
2892 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
2893 EVEX_CD8<32, CD8VQ>;
2895 defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
2896 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
2897 EVEX_CD8<64, CD8VF>, VEX_W;
2898 defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
2899 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
2900 EVEX_CD8<64, CD8VQ>, VEX_W;
2902 defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
2903 VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512,
2904 EVEX_CD8<32, CD8VF>;
2905 defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
2906 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
2907 EVEX_CD8<32, CD8VQ>;
2909 defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
2910 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
2911 EVEX_CD8<64, CD8VF>, VEX_W;
2912 defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
2913 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
2914 EVEX_CD8<64, CD8VQ>, VEX_W;
2916 defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
2917 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
2918 EVEX_V512, EVEX_CD8<32, CD8VF>;
2919 defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
2920 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
2921 EVEX_CD8<32, CD8VQ>;
2923 defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
2924 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
2925 EVEX_CD8<64, CD8VF>, VEX_W;
2926 defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
2927 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
2928 EVEX_CD8<64, CD8VQ>, VEX_W;
2930 //===-------------------------------------------------------------------===//
2931 // Variable Bit Shifts
2932 //===-------------------------------------------------------------------===//
2933 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
2934 RegisterClass RC, ValueType vt,
2935 X86MemOperand x86memop, PatFrag mem_frag> {
2936 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
2937 (ins RC:$src1, RC:$src2),
2938 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2940 (vt (OpNode RC:$src1, (vt RC:$src2))))]>,
2942 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
2943 (ins RC:$src1, x86memop:$src2),
2944 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2946 (vt (OpNode RC:$src1, (mem_frag addr:$src2))))]>,
2950 defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
2951 i512mem, memopv16i32>, EVEX_V512,
2952 EVEX_CD8<32, CD8VF>;
2953 defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
2954 i512mem, memopv8i64>, EVEX_V512, VEX_W,
2955 EVEX_CD8<64, CD8VF>;
2956 defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
2957 i512mem, memopv16i32>, EVEX_V512,
2958 EVEX_CD8<32, CD8VF>;
2959 defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
2960 i512mem, memopv8i64>, EVEX_V512, VEX_W,
2961 EVEX_CD8<64, CD8VF>;
2962 defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
2963 i512mem, memopv16i32>, EVEX_V512,
2964 EVEX_CD8<32, CD8VF>;
2965 defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
2966 i512mem, memopv8i64>, EVEX_V512, VEX_W,
2967 EVEX_CD8<64, CD8VF>;
2969 //===----------------------------------------------------------------------===//
2970 // AVX-512 - MOVDDUP
2971 //===----------------------------------------------------------------------===//
2973 multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
2974 X86MemOperand x86memop, PatFrag memop_frag> {
2975 def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
2976 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
2977 [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
2978 def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
2979 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
2981 (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
2984 defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>,
2985 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
2986 def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
2987 (VMOVDDUPZrm addr:$src)>;
2989 //===---------------------------------------------------------------------===//
2990 // Replicate Single FP - MOVSHDUP and MOVSLDUP
2991 //===---------------------------------------------------------------------===//
2992 multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
2993 ValueType vt, RegisterClass RC, PatFrag mem_frag,
2994 X86MemOperand x86memop> {
2995 def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
2996 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
2997 [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
2999 def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
3000 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3001 [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
3004 defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
3005 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3006 EVEX_CD8<32, CD8VF>;
3007 defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
3008 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3009 EVEX_CD8<32, CD8VF>;
3011 def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
3012 def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))),
3013 (VMOVSHDUPZrm addr:$src)>;
3014 def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
3015 def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))),
3016 (VMOVSLDUPZrm addr:$src)>;
3018 //===----------------------------------------------------------------------===//
3019 // Move Low to High and High to Low packed FP Instructions
3020 //===----------------------------------------------------------------------===//
3021 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
3022 (ins VR128X:$src1, VR128X:$src2),
3023 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3024 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
3025 IIC_SSE_MOV_LH>, EVEX_4V;
3026 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
3027 (ins VR128X:$src1, VR128X:$src2),
3028 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3029 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
3030 IIC_SSE_MOV_LH>, EVEX_4V;
3032 let Predicates = [HasAVX512] in {
3034 def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3035 (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
3036 def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3037 (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
3040 def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
3041 (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
3044 //===----------------------------------------------------------------------===//
3045 // FMA - Fused Multiply Operations
3047 let Constraints = "$src1 = $dst" in {
3048 multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
3049 RegisterClass RC, X86MemOperand x86memop,
3050 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
3051 string BrdcstStr, SDNode OpNode, ValueType OpVT,
3052 RegisterClass KRC> {
3053 defm r: AVX512_masking_3src<opc, MRMSrcReg, (outs RC:$dst),
3054 (ins RC:$src2, RC:$src3),
3055 OpcodeStr, "$src3, $src2", "$src2, $src3",
3056 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)), OpVT, RC, KRC>,
3060 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3061 (ins RC:$src1, RC:$src2, x86memop:$src3),
3062 !strconcat(OpcodeStr, " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3063 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
3064 (mem_frag addr:$src3))))]>;
3065 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3066 (ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
3067 !strconcat(OpcodeStr, " \t{${src3}", BrdcstStr,
3068 ", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
3069 [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
3070 (OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
3072 } // Constraints = "$src1 = $dst"
3074 let ExeDomain = SSEPackedSingle in {
3075 defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
3076 memopv16f32, f32mem, loadf32, "{1to16}",
3077 X86Fmadd, v16f32, VK16WM>, EVEX_V512,
3078 EVEX_CD8<32, CD8VF>;
3079 defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
3080 memopv16f32, f32mem, loadf32, "{1to16}",
3081 X86Fmsub, v16f32, VK16WM>, EVEX_V512,
3082 EVEX_CD8<32, CD8VF>;
3083 defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
3084 memopv16f32, f32mem, loadf32, "{1to16}",
3085 X86Fmaddsub, v16f32, VK16WM>,
3086 EVEX_V512, EVEX_CD8<32, CD8VF>;
3087 defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
3088 memopv16f32, f32mem, loadf32, "{1to16}",
3089 X86Fmsubadd, v16f32, VK16WM>,
3090 EVEX_V512, EVEX_CD8<32, CD8VF>;
3091 defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
3092 memopv16f32, f32mem, loadf32, "{1to16}",
3093 X86Fnmadd, v16f32, VK16WM>, EVEX_V512,
3094 EVEX_CD8<32, CD8VF>;
3095 defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
3096 memopv16f32, f32mem, loadf32, "{1to16}",
3097 X86Fnmsub, v16f32, VK16WM>, EVEX_V512,
3098 EVEX_CD8<32, CD8VF>;
3100 let ExeDomain = SSEPackedDouble in {
3101 defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
3102 memopv8f64, f64mem, loadf64, "{1to8}",
3103 X86Fmadd, v8f64, VK8WM>, EVEX_V512,
3104 VEX_W, EVEX_CD8<64, CD8VF>;
3105 defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
3106 memopv8f64, f64mem, loadf64, "{1to8}",
3107 X86Fmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
3108 EVEX_CD8<64, CD8VF>;
3109 defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
3110 memopv8f64, f64mem, loadf64, "{1to8}",
3111 X86Fmaddsub, v8f64, VK8WM>,
3112 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
3113 defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
3114 memopv8f64, f64mem, loadf64, "{1to8}",
3115 X86Fmsubadd, v8f64, VK8WM>,
3116 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
3117 defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
3118 memopv8f64, f64mem, loadf64, "{1to8}",
3119 X86Fnmadd, v8f64, VK8WM>, EVEX_V512, VEX_W,
3120 EVEX_CD8<64, CD8VF>;
3121 defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
3122 memopv8f64, f64mem, loadf64, "{1to8}",
3123 X86Fnmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
3124 EVEX_CD8<64, CD8VF>;
3127 let Constraints = "$src1 = $dst" in {
3128 multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
3129 RegisterClass RC, X86MemOperand x86memop,
3130 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
3131 string BrdcstStr, SDNode OpNode, ValueType OpVT> {
3133 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3134 (ins RC:$src1, RC:$src3, x86memop:$src2),
3135 !strconcat(OpcodeStr, " \t{$src2, $src3, $dst|$dst, $src3, $src2}"),
3136 [(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
3137 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3138 (ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
3139 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
3140 ", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
3141 [(set RC:$dst, (OpNode RC:$src1,
3142 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
3144 } // Constraints = "$src1 = $dst"
3147 let ExeDomain = SSEPackedSingle in {
3148 defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
3149 memopv16f32, f32mem, loadf32, "{1to16}",
3150 X86Fmadd, v16f32>, EVEX_V512,
3151 EVEX_CD8<32, CD8VF>;
3152 defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
3153 memopv16f32, f32mem, loadf32, "{1to16}",
3154 X86Fmsub, v16f32>, EVEX_V512,
3155 EVEX_CD8<32, CD8VF>;
3156 defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
3157 memopv16f32, f32mem, loadf32, "{1to16}",
3158 X86Fmaddsub, v16f32>,
3159 EVEX_V512, EVEX_CD8<32, CD8VF>;
3160 defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
3161 memopv16f32, f32mem, loadf32, "{1to16}",
3162 X86Fmsubadd, v16f32>,
3163 EVEX_V512, EVEX_CD8<32, CD8VF>;
3164 defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
3165 memopv16f32, f32mem, loadf32, "{1to16}",
3166 X86Fnmadd, v16f32>, EVEX_V512,
3167 EVEX_CD8<32, CD8VF>;
3168 defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
3169 memopv16f32, f32mem, loadf32, "{1to16}",
3170 X86Fnmsub, v16f32>, EVEX_V512,
3171 EVEX_CD8<32, CD8VF>;
3173 let ExeDomain = SSEPackedDouble in {
3174 defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
3175 memopv8f64, f64mem, loadf64, "{1to8}",
3176 X86Fmadd, v8f64>, EVEX_V512,
3177 VEX_W, EVEX_CD8<64, CD8VF>;
3178 defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
3179 memopv8f64, f64mem, loadf64, "{1to8}",
3180 X86Fmsub, v8f64>, EVEX_V512, VEX_W,
3181 EVEX_CD8<64, CD8VF>;
3182 defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
3183 memopv8f64, f64mem, loadf64, "{1to8}",
3184 X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
3185 EVEX_CD8<64, CD8VF>;
3186 defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
3187 memopv8f64, f64mem, loadf64, "{1to8}",
3188 X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
3189 EVEX_CD8<64, CD8VF>;
3190 defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
3191 memopv8f64, f64mem, loadf64, "{1to8}",
3192 X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
3193 EVEX_CD8<64, CD8VF>;
3194 defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
3195 memopv8f64, f64mem, loadf64, "{1to8}",
3196 X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
3197 EVEX_CD8<64, CD8VF>;
3201 let Constraints = "$src1 = $dst" in {
3202 multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3203 RegisterClass RC, ValueType OpVT,
3204 X86MemOperand x86memop, Operand memop,
3206 let isCommutable = 1 in
3207 def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
3208 (ins RC:$src1, RC:$src2, RC:$src3),
3209 !strconcat(OpcodeStr,
3210 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3212 (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
3214 def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3215 (ins RC:$src1, RC:$src2, f128mem:$src3),
3216 !strconcat(OpcodeStr,
3217 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3219 (OpVT (OpNode RC:$src2, RC:$src1,
3220 (mem_frag addr:$src3))))]>;
3223 } // Constraints = "$src1 = $dst"
3225 defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
3226 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3227 defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X,
3228 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3229 defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X,
3230 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3231 defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X,
3232 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3233 defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X,
3234 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3235 defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X,
3236 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3237 defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X,
3238 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3239 defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X,
3240 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3242 //===----------------------------------------------------------------------===//
3243 // AVX-512 Scalar convert from sign integer to float/double
3244 //===----------------------------------------------------------------------===//
3246 multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3247 X86MemOperand x86memop, string asm> {
3248 let hasSideEffects = 0 in {
3249 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
3250 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
3253 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
3254 (ins DstRC:$src1, x86memop:$src),
3255 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
3257 } // hasSideEffects = 0
3259 let Predicates = [HasAVX512] in {
3260 defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
3261 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3262 defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">,
3263 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3264 defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">,
3265 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3266 defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">,
3267 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3269 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
3270 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3271 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
3272 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3273 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
3274 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3275 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
3276 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3278 def : Pat<(f32 (sint_to_fp GR32:$src)),
3279 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3280 def : Pat<(f32 (sint_to_fp GR64:$src)),
3281 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
3282 def : Pat<(f64 (sint_to_fp GR32:$src)),
3283 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3284 def : Pat<(f64 (sint_to_fp GR64:$src)),
3285 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
3287 defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">,
3288 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3289 defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">,
3290 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3291 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">,
3292 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3293 defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">,
3294 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3296 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
3297 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3298 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
3299 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3300 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
3301 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3302 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
3303 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3305 def : Pat<(f32 (uint_to_fp GR32:$src)),
3306 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3307 def : Pat<(f32 (uint_to_fp GR64:$src)),
3308 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
3309 def : Pat<(f64 (uint_to_fp GR32:$src)),
3310 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3311 def : Pat<(f64 (uint_to_fp GR64:$src)),
3312 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
3315 //===----------------------------------------------------------------------===//
3316 // AVX-512 Scalar convert from float/double to integer
3317 //===----------------------------------------------------------------------===//
3318 multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3319 Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
3321 let hasSideEffects = 0 in {
3322 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3323 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3324 [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG,
3325 Requires<[HasAVX512]>;
3327 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
3328 !strconcat(asm," \t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG,
3329 Requires<[HasAVX512]>;
3330 } // hasSideEffects = 0
3332 let Predicates = [HasAVX512] in {
3333 // Convert float/double to signed/unsigned int 32/64
3334 defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
3335 ssmem, sse_load_f32, "cvtss2si">,
3336 XS, EVEX_CD8<32, CD8VT1>;
3337 defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
3338 ssmem, sse_load_f32, "cvtss2si">,
3339 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
3340 defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
3341 ssmem, sse_load_f32, "cvtss2usi">,
3342 XS, EVEX_CD8<32, CD8VT1>;
3343 defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3344 int_x86_avx512_cvtss2usi64, ssmem,
3345 sse_load_f32, "cvtss2usi">, XS, VEX_W,
3346 EVEX_CD8<32, CD8VT1>;
3347 defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
3348 sdmem, sse_load_f64, "cvtsd2si">,
3349 XD, EVEX_CD8<64, CD8VT1>;
3350 defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
3351 sdmem, sse_load_f64, "cvtsd2si">,
3352 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3353 defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
3354 sdmem, sse_load_f64, "cvtsd2usi">,
3355 XD, EVEX_CD8<64, CD8VT1>;
3356 defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3357 int_x86_avx512_cvtsd2usi64, sdmem,
3358 sse_load_f64, "cvtsd2usi">, XD, VEX_W,
3359 EVEX_CD8<64, CD8VT1>;
3361 let isCodeGenOnly = 1 in {
3362 defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3363 int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
3364 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3365 defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3366 int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
3367 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3368 defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3369 int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
3370 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3371 defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3372 int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
3373 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
3375 defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3376 int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}",
3377 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3378 defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3379 int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}",
3380 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3381 defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3382 int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
3383 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3384 defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3385 int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}",
3386 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
3387 } // isCodeGenOnly = 1
3389 // Convert float/double to signed/unsigned int 32/64 with truncation
3390 let isCodeGenOnly = 1 in {
3391 defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
3392 ssmem, sse_load_f32, "cvttss2si">,
3393 XS, EVEX_CD8<32, CD8VT1>;
3394 defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3395 int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
3396 "cvttss2si">, XS, VEX_W,
3397 EVEX_CD8<32, CD8VT1>;
3398 defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
3399 sdmem, sse_load_f64, "cvttsd2si">, XD,
3400 EVEX_CD8<64, CD8VT1>;
3401 defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3402 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
3403 "cvttsd2si">, XD, VEX_W,
3404 EVEX_CD8<64, CD8VT1>;
3405 defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3406 int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
3407 "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>;
3408 defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3409 int_x86_avx512_cvttss2usi64, ssmem,
3410 sse_load_f32, "cvttss2usi">, XS, VEX_W,
3411 EVEX_CD8<32, CD8VT1>;
3412 defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3413 int_x86_avx512_cvttsd2usi,
3414 sdmem, sse_load_f64, "cvttsd2usi">, XD,
3415 EVEX_CD8<64, CD8VT1>;
3416 defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3417 int_x86_avx512_cvttsd2usi64, sdmem,
3418 sse_load_f64, "cvttsd2usi">, XD, VEX_W,
3419 EVEX_CD8<64, CD8VT1>;
3420 } // isCodeGenOnly = 1
3422 multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3423 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
3425 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3426 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3427 [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
3428 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3429 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3430 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
3433 defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
3434 loadf32, "cvttss2si">, XS,
3435 EVEX_CD8<32, CD8VT1>;
3436 defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
3437 loadf32, "cvttss2usi">, XS,
3438 EVEX_CD8<32, CD8VT1>;
3439 defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
3440 loadf32, "cvttss2si">, XS, VEX_W,
3441 EVEX_CD8<32, CD8VT1>;
3442 defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
3443 loadf32, "cvttss2usi">, XS, VEX_W,
3444 EVEX_CD8<32, CD8VT1>;
3445 defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
3446 loadf64, "cvttsd2si">, XD,
3447 EVEX_CD8<64, CD8VT1>;
3448 defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
3449 loadf64, "cvttsd2usi">, XD,
3450 EVEX_CD8<64, CD8VT1>;
3451 defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
3452 loadf64, "cvttsd2si">, XD, VEX_W,
3453 EVEX_CD8<64, CD8VT1>;
3454 defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
3455 loadf64, "cvttsd2usi">, XD, VEX_W,
3456 EVEX_CD8<64, CD8VT1>;
3458 //===----------------------------------------------------------------------===//
3459 // AVX-512 Convert form float to double and back
3460 //===----------------------------------------------------------------------===//
3461 let hasSideEffects = 0 in {
3462 def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
3463 (ins FR32X:$src1, FR32X:$src2),
3464 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3465 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
3467 def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
3468 (ins FR32X:$src1, f32mem:$src2),
3469 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3470 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
3471 EVEX_CD8<32, CD8VT1>;
3473 // Convert scalar double to scalar single
3474 def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
3475 (ins FR64X:$src1, FR64X:$src2),
3476 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3477 []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
3479 def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
3480 (ins FR64X:$src1, f64mem:$src2),
3481 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3482 []>, EVEX_4V, VEX_LIG, VEX_W,
3483 Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
3486 def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
3487 Requires<[HasAVX512]>;
3488 def : Pat<(fextend (loadf32 addr:$src)),
3489 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
3491 def : Pat<(extloadf32 addr:$src),
3492 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
3493 Requires<[HasAVX512, OptForSize]>;
3495 def : Pat<(extloadf32 addr:$src),
3496 (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
3497 Requires<[HasAVX512, OptForSpeed]>;
3499 def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
3500 Requires<[HasAVX512]>;
3502 multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
3503 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3504 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3506 let hasSideEffects = 0 in {
3507 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3508 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3510 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
3511 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
3512 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
3513 [], d>, EVEX, EVEX_B, EVEX_RC;
3515 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3516 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3518 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
3519 } // hasSideEffects = 0
3522 multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
3523 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3524 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3526 let hasSideEffects = 0 in {
3527 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3528 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3530 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
3532 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3533 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3535 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
3536 } // hasSideEffects = 0
3539 defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
3540 memopv8f64, f512mem, v8f32, v8f64,
3541 SSEPackedSingle>, EVEX_V512, VEX_W, PD,
3542 EVEX_CD8<64, CD8VF>;
3544 defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
3545 memopv4f64, f256mem, v8f64, v8f32,
3546 SSEPackedDouble>, EVEX_V512, PS,
3547 EVEX_CD8<32, CD8VH>;
3548 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3549 (VCVTPS2PDZrm addr:$src)>;
3551 def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3552 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))),
3553 (VCVTPD2PSZrr VR512:$src)>;
3555 def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3556 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), imm:$rc)),
3557 (VCVTPD2PSZrrb VR512:$src, imm:$rc)>;
3559 //===----------------------------------------------------------------------===//
3560 // AVX-512 Vector convert from sign integer to float/double
3561 //===----------------------------------------------------------------------===//
3563 defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
3564 memopv8i64, i512mem, v16f32, v16i32,
3565 SSEPackedSingle>, EVEX_V512, PS,
3566 EVEX_CD8<32, CD8VF>;
3568 defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
3569 memopv4i64, i256mem, v8f64, v8i32,
3570 SSEPackedDouble>, EVEX_V512, XS,
3571 EVEX_CD8<32, CD8VH>;
3573 defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
3574 memopv16f32, f512mem, v16i32, v16f32,
3575 SSEPackedSingle>, EVEX_V512, XS,
3576 EVEX_CD8<32, CD8VF>;
3578 defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
3579 memopv8f64, f512mem, v8i32, v8f64,
3580 SSEPackedDouble>, EVEX_V512, PD, VEX_W,
3581 EVEX_CD8<64, CD8VF>;
3583 defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
3584 memopv16f32, f512mem, v16i32, v16f32,
3585 SSEPackedSingle>, EVEX_V512, PS,
3586 EVEX_CD8<32, CD8VF>;
3588 // cvttps2udq (src, 0, mask-all-ones, sae-current)
3589 def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src),
3590 (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)),
3591 (VCVTTPS2UDQZrr VR512:$src)>;
3593 defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
3594 memopv8f64, f512mem, v8i32, v8f64,
3595 SSEPackedDouble>, EVEX_V512, PS, VEX_W,
3596 EVEX_CD8<64, CD8VF>;
3598 // cvttpd2udq (src, 0, mask-all-ones, sae-current)
3599 def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
3600 (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
3601 (VCVTTPD2UDQZrr VR512:$src)>;
3603 defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
3604 memopv4i64, f256mem, v8f64, v8i32,
3605 SSEPackedDouble>, EVEX_V512, XS,
3606 EVEX_CD8<32, CD8VH>;
3608 defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
3609 memopv16i32, f512mem, v16f32, v16i32,
3610 SSEPackedSingle>, EVEX_V512, XD,
3611 EVEX_CD8<32, CD8VF>;
3613 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
3614 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3615 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3617 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
3618 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3619 (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
3621 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
3622 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3623 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3625 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
3626 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3627 (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
3629 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
3630 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
3631 (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
3633 def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
3634 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
3635 (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
3636 def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src),
3637 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3638 (VCVTDQ2PDZrr VR256X:$src)>;
3639 def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src),
3640 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
3641 (VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>;
3642 def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src),
3643 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3644 (VCVTUDQ2PDZrr VR256X:$src)>;
3646 multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC,
3647 RegisterClass DstRC, PatFrag mem_frag,
3648 X86MemOperand x86memop, Domain d> {
3649 let hasSideEffects = 0 in {
3650 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3651 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3653 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
3654 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
3655 [], d>, EVEX, EVEX_B, EVEX_RC;
3657 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3658 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3660 } // hasSideEffects = 0
3663 defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512,
3664 memopv16f32, f512mem, SSEPackedSingle>, PD,
3665 EVEX_V512, EVEX_CD8<32, CD8VF>;
3666 defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X,
3667 memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W,
3668 EVEX_V512, EVEX_CD8<64, CD8VF>;
3670 def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src),
3671 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3672 (VCVTPS2DQZrrb VR512:$src, imm:$rc)>;
3674 def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src),
3675 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3676 (VCVTPD2DQZrrb VR512:$src, imm:$rc)>;
3678 defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512,
3679 memopv16f32, f512mem, SSEPackedSingle>,
3680 PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
3681 defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X,
3682 memopv8f64, f512mem, SSEPackedDouble>, VEX_W,
3683 PS, EVEX_V512, EVEX_CD8<64, CD8VF>;
3685 def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src),
3686 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3687 (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>;
3689 def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src),
3690 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3691 (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>;
3693 let Predicates = [HasAVX512] in {
3694 def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
3695 (VCVTPD2PSZrm addr:$src)>;
3696 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3697 (VCVTPS2PDZrm addr:$src)>;
3700 //===----------------------------------------------------------------------===//
3701 // Half precision conversion instructions
3702 //===----------------------------------------------------------------------===//
3703 multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
3704 X86MemOperand x86memop> {
3705 def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
3706 "vcvtph2ps\t{$src, $dst|$dst, $src}",
3708 let hasSideEffects = 0, mayLoad = 1 in
3709 def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
3710 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
3713 multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
3714 X86MemOperand x86memop> {
3715 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
3716 (ins srcRC:$src1, i32i8imm:$src2),
3717 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}",
3719 let hasSideEffects = 0, mayStore = 1 in
3720 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
3721 (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
3722 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
3725 defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
3726 EVEX_CD8<32, CD8VH>;
3727 defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
3728 EVEX_CD8<32, CD8VH>;
3730 def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
3731 imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
3732 (VCVTPS2PHZrr VR512:$src, imm:$rc)>;
3734 def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
3735 (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
3736 (VCVTPH2PSZrr VR256X:$src)>;
3738 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
3739 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
3740 "ucomiss">, PS, EVEX, VEX_LIG,
3741 EVEX_CD8<32, CD8VT1>;
3742 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
3743 "ucomisd">, PD, EVEX,
3744 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3745 let Pattern = []<dag> in {
3746 defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
3747 "comiss">, PS, EVEX, VEX_LIG,
3748 EVEX_CD8<32, CD8VT1>;
3749 defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
3750 "comisd">, PD, EVEX,
3751 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3753 let isCodeGenOnly = 1 in {
3754 defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
3755 load, "ucomiss">, PS, EVEX, VEX_LIG,
3756 EVEX_CD8<32, CD8VT1>;
3757 defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
3758 load, "ucomisd">, PD, EVEX,
3759 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3761 defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
3762 load, "comiss">, PS, EVEX, VEX_LIG,
3763 EVEX_CD8<32, CD8VT1>;
3764 defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
3765 load, "comisd">, PD, EVEX,
3766 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3770 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
3771 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
3772 X86MemOperand x86memop> {
3773 let hasSideEffects = 0 in {
3774 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3775 (ins RC:$src1, RC:$src2),
3776 !strconcat(OpcodeStr,
3777 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
3778 let mayLoad = 1 in {
3779 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3780 (ins RC:$src1, x86memop:$src2),
3781 !strconcat(OpcodeStr,
3782 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
3787 defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>,
3788 EVEX_CD8<32, CD8VT1>;
3789 defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>,
3790 VEX_W, EVEX_CD8<64, CD8VT1>;
3791 defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>,
3792 EVEX_CD8<32, CD8VT1>;
3793 defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>,
3794 VEX_W, EVEX_CD8<64, CD8VT1>;
3796 def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1),
3797 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
3798 (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
3799 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
3801 def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1),
3802 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
3803 (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
3804 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
3806 def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1),
3807 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
3808 (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
3809 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
3811 def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
3812 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
3813 (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
3814 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
3816 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
3817 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
3818 RegisterClass RC, X86MemOperand x86memop,
3819 PatFrag mem_frag, ValueType OpVt> {
3820 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3821 !strconcat(OpcodeStr,
3822 " \t{$src, $dst|$dst, $src}"),
3823 [(set RC:$dst, (OpVt (OpNode RC:$src)))]>,
3825 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
3826 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3827 [(set RC:$dst, (OpVt (OpNode (mem_frag addr:$src))))]>,
3830 defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem,
3831 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
3832 defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem,
3833 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3834 defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem,
3835 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
3836 defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem,
3837 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3839 def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
3840 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
3841 (VRSQRT14PSZr VR512:$src)>;
3842 def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src),
3843 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3844 (VRSQRT14PDZr VR512:$src)>;
3846 def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src),
3847 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
3848 (VRCP14PSZr VR512:$src)>;
3849 def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
3850 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3851 (VRCP14PDZr VR512:$src)>;
3853 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
3854 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
3855 X86MemOperand x86memop> {
3856 let hasSideEffects = 0, Predicates = [HasERI] in {
3857 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3858 (ins RC:$src1, RC:$src2),
3859 !strconcat(OpcodeStr,
3860 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
3861 def rrb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3862 (ins RC:$src1, RC:$src2),
3863 !strconcat(OpcodeStr,
3864 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
3865 []>, EVEX_4V, EVEX_B;
3866 let mayLoad = 1 in {
3867 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3868 (ins RC:$src1, x86memop:$src2),
3869 !strconcat(OpcodeStr,
3870 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
3875 defm VRCP28SS : avx512_fp28_s<0xCB, "vrcp28ss", FR32X, f32mem>,
3876 EVEX_CD8<32, CD8VT1>;
3877 defm VRCP28SD : avx512_fp28_s<0xCB, "vrcp28sd", FR64X, f64mem>,
3878 VEX_W, EVEX_CD8<64, CD8VT1>;
3879 defm VRSQRT28SS : avx512_fp28_s<0xCD, "vrsqrt28ss", FR32X, f32mem>,
3880 EVEX_CD8<32, CD8VT1>;
3881 defm VRSQRT28SD : avx512_fp28_s<0xCD, "vrsqrt28sd", FR64X, f64mem>,
3882 VEX_W, EVEX_CD8<64, CD8VT1>;
3884 def : Pat <(v4f32 (int_x86_avx512_rcp28_ss (v4f32 VR128X:$src1),
3885 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
3887 (COPY_TO_REGCLASS (VRCP28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
3888 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
3890 def : Pat <(v2f64 (int_x86_avx512_rcp28_sd (v2f64 VR128X:$src1),
3891 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
3893 (COPY_TO_REGCLASS (VRCP28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
3894 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
3896 def : Pat <(v4f32 (int_x86_avx512_rsqrt28_ss (v4f32 VR128X:$src1),
3897 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
3899 (COPY_TO_REGCLASS (VRSQRT28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
3900 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
3902 def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1),
3903 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
3905 (COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
3906 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
3908 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
3909 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr,
3910 RegisterClass RC, X86MemOperand x86memop> {
3911 let hasSideEffects = 0, Predicates = [HasERI] in {
3912 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3913 !strconcat(OpcodeStr,
3914 " \t{$src, $dst|$dst, $src}"),
3916 def rb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3917 !strconcat(OpcodeStr,
3918 " \t{{sae}, $src, $dst|$dst, $src, {sae}}"),
3920 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
3921 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3925 defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>,
3926 EVEX_V512, EVEX_CD8<32, CD8VF>;
3927 defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>,
3928 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3929 defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>,
3930 EVEX_V512, EVEX_CD8<32, CD8VF>;
3931 defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>,
3932 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3934 def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src),
3935 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
3936 (VRSQRT28PSZrb VR512:$src)>;
3937 def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src),
3938 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
3939 (VRSQRT28PDZrb VR512:$src)>;
3941 def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src),
3942 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
3943 (VRCP28PSZrb VR512:$src)>;
3944 def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src),
3945 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
3946 (VRCP28PDZrb VR512:$src)>;
3948 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
3949 OpndItins itins_s, OpndItins itins_d> {
3950 def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
3951 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
3952 [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
3956 def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
3957 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
3959 (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
3960 itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
3962 def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
3963 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
3964 [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
3968 def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
3969 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
3970 [(set VR512:$dst, (OpNode
3971 (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
3972 itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
3976 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
3977 Intrinsic F32Int, Intrinsic F64Int,
3978 OpndItins itins_s, OpndItins itins_d> {
3979 def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
3980 (ins FR32X:$src1, FR32X:$src2),
3981 !strconcat(OpcodeStr,
3982 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3983 [], itins_s.rr>, XS, EVEX_4V;
3984 let isCodeGenOnly = 1 in
3985 def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
3986 (ins VR128X:$src1, VR128X:$src2),
3987 !strconcat(OpcodeStr,
3988 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3990 (F32Int VR128X:$src1, VR128X:$src2))],
3991 itins_s.rr>, XS, EVEX_4V;
3992 let mayLoad = 1 in {
3993 def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
3994 (ins FR32X:$src1, f32mem:$src2),
3995 !strconcat(OpcodeStr,
3996 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3997 [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
3998 let isCodeGenOnly = 1 in
3999 def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4000 (ins VR128X:$src1, ssmem:$src2),
4001 !strconcat(OpcodeStr,
4002 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4004 (F32Int VR128X:$src1, sse_load_f32:$src2))],
4005 itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
4007 def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
4008 (ins FR64X:$src1, FR64X:$src2),
4009 !strconcat(OpcodeStr,
4010 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
4012 let isCodeGenOnly = 1 in
4013 def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4014 (ins VR128X:$src1, VR128X:$src2),
4015 !strconcat(OpcodeStr,
4016 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4018 (F64Int VR128X:$src1, VR128X:$src2))],
4019 itins_s.rr>, XD, EVEX_4V, VEX_W;
4020 let mayLoad = 1 in {
4021 def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
4022 (ins FR64X:$src1, f64mem:$src2),
4023 !strconcat(OpcodeStr,
4024 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
4025 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
4026 let isCodeGenOnly = 1 in
4027 def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4028 (ins VR128X:$src1, sdmem:$src2),
4029 !strconcat(OpcodeStr,
4030 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4032 (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
4033 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
4038 defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
4039 int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
4040 SSE_SQRTSS, SSE_SQRTSD>,
4041 avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
4042 SSE_SQRTPS, SSE_SQRTPD>;
4044 let Predicates = [HasAVX512] in {
4045 def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
4046 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
4047 (VSQRTPSZrr VR512:$src1)>;
4048 def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
4049 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
4050 (VSQRTPDZrr VR512:$src1)>;
4052 def : Pat<(f32 (fsqrt FR32X:$src)),
4053 (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4054 def : Pat<(f32 (fsqrt (load addr:$src))),
4055 (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
4056 Requires<[OptForSize]>;
4057 def : Pat<(f64 (fsqrt FR64X:$src)),
4058 (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
4059 def : Pat<(f64 (fsqrt (load addr:$src))),
4060 (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
4061 Requires<[OptForSize]>;
4063 def : Pat<(f32 (X86frsqrt FR32X:$src)),
4064 (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4065 def : Pat<(f32 (X86frsqrt (load addr:$src))),
4066 (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
4067 Requires<[OptForSize]>;
4069 def : Pat<(f32 (X86frcp FR32X:$src)),
4070 (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4071 def : Pat<(f32 (X86frcp (load addr:$src))),
4072 (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
4073 Requires<[OptForSize]>;
4075 def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
4076 (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
4077 (COPY_TO_REGCLASS VR128X:$src, FR32)),
4079 def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
4080 (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
4082 def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
4083 (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
4084 (COPY_TO_REGCLASS VR128X:$src, FR64)),
4086 def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
4087 (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
4091 multiclass avx512_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
4092 X86MemOperand x86memop, RegisterClass RC,
4093 PatFrag mem_frag32, PatFrag mem_frag64,
4094 Intrinsic V4F32Int, Intrinsic V2F64Int,
4096 let ExeDomain = SSEPackedSingle in {
4097 // Intrinsic operation, reg.
4098 // Vector intrinsic operation, reg
4099 def PSr : AVX512AIi8<opcps, MRMSrcReg,
4100 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4101 !strconcat(OpcodeStr,
4102 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4103 [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>;
4105 // Vector intrinsic operation, mem
4106 def PSm : AVX512AIi8<opcps, MRMSrcMem,
4107 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4108 !strconcat(OpcodeStr,
4109 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4111 (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
4112 EVEX_CD8<32, VForm>;
4113 } // ExeDomain = SSEPackedSingle
4115 let ExeDomain = SSEPackedDouble in {
4116 // Vector intrinsic operation, reg
4117 def PDr : AVX512AIi8<opcpd, MRMSrcReg,
4118 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4119 !strconcat(OpcodeStr,
4120 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4121 [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>;
4123 // Vector intrinsic operation, mem
4124 def PDm : AVX512AIi8<opcpd, MRMSrcMem,
4125 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4126 !strconcat(OpcodeStr,
4127 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4129 (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
4130 EVEX_CD8<64, VForm>;
4131 } // ExeDomain = SSEPackedDouble
4134 multiclass avx512_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
4138 let ExeDomain = GenericDomain in {
4140 let hasSideEffects = 0 in
4141 def SSr : AVX512AIi8<opcss, MRMSrcReg,
4142 (outs FR32X:$dst), (ins FR32X:$src1, FR32X:$src2, i32i8imm:$src3),
4143 !strconcat(OpcodeStr,
4144 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4147 // Intrinsic operation, reg.
4148 let isCodeGenOnly = 1 in
4149 def SSr_Int : AVX512AIi8<opcss, MRMSrcReg,
4150 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4151 !strconcat(OpcodeStr,
4152 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4153 [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2, imm:$src3))]>;
4155 // Intrinsic operation, mem.
4156 def SSm : AVX512AIi8<opcss, MRMSrcMem, (outs VR128X:$dst),
4157 (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
4158 !strconcat(OpcodeStr,
4159 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4160 [(set VR128X:$dst, (F32Int VR128X:$src1,
4161 sse_load_f32:$src2, imm:$src3))]>,
4162 EVEX_CD8<32, CD8VT1>;
4165 let hasSideEffects = 0 in
4166 def SDr : AVX512AIi8<opcsd, MRMSrcReg,
4167 (outs FR64X:$dst), (ins FR64X:$src1, FR64X:$src2, i32i8imm:$src3),
4168 !strconcat(OpcodeStr,
4169 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4172 // Intrinsic operation, reg.
4173 let isCodeGenOnly = 1 in
4174 def SDr_Int : AVX512AIi8<opcsd, MRMSrcReg,
4175 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4176 !strconcat(OpcodeStr,
4177 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4178 [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2, imm:$src3))]>,
4181 // Intrinsic operation, mem.
4182 def SDm : AVX512AIi8<opcsd, MRMSrcMem,
4183 (outs VR128X:$dst), (ins VR128X:$src1, sdmem:$src2, i32i8imm:$src3),
4184 !strconcat(OpcodeStr,
4185 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4187 (F64Int VR128X:$src1, sse_load_f64:$src2, imm:$src3))]>,
4188 VEX_W, EVEX_CD8<64, CD8VT1>;
4189 } // ExeDomain = GenericDomain
4192 multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
4193 X86MemOperand x86memop, RegisterClass RC,
4194 PatFrag mem_frag, Domain d> {
4195 let ExeDomain = d in {
4196 // Intrinsic operation, reg.
4197 // Vector intrinsic operation, reg
4198 def r : AVX512AIi8<opc, MRMSrcReg,
4199 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4200 !strconcat(OpcodeStr,
4201 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4204 // Vector intrinsic operation, mem
4205 def m : AVX512AIi8<opc, MRMSrcMem,
4206 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4207 !strconcat(OpcodeStr,
4208 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4214 defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
4215 memopv16f32, SSEPackedSingle>, EVEX_V512,
4216 EVEX_CD8<32, CD8VF>;
4218 def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
4219 imm:$src2, (v16f32 VR512:$src1), (i16 -1),
4221 (VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
4224 defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
4225 memopv8f64, SSEPackedDouble>, EVEX_V512,
4226 VEX_W, EVEX_CD8<64, CD8VF>;
4228 def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
4229 imm:$src2, (v8f64 VR512:$src1), (i8 -1),
4231 (VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
4233 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
4234 Operand x86memop, RegisterClass RC, Domain d> {
4235 let ExeDomain = d in {
4236 def r : AVX512AIi8<opc, MRMSrcReg,
4237 (outs RC:$dst), (ins RC:$src1, RC:$src2, i32i8imm:$src3),
4238 !strconcat(OpcodeStr,
4239 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4242 def m : AVX512AIi8<opc, MRMSrcMem,
4243 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
4244 !strconcat(OpcodeStr,
4245 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4250 defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X,
4251 SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
4253 defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X,
4254 SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
4256 def : Pat<(ffloor FR32X:$src),
4257 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
4258 def : Pat<(f64 (ffloor FR64X:$src)),
4259 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
4260 def : Pat<(f32 (fnearbyint FR32X:$src)),
4261 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
4262 def : Pat<(f64 (fnearbyint FR64X:$src)),
4263 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
4264 def : Pat<(f32 (fceil FR32X:$src)),
4265 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
4266 def : Pat<(f64 (fceil FR64X:$src)),
4267 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
4268 def : Pat<(f32 (frint FR32X:$src)),
4269 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
4270 def : Pat<(f64 (frint FR64X:$src)),
4271 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
4272 def : Pat<(f32 (ftrunc FR32X:$src)),
4273 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
4274 def : Pat<(f64 (ftrunc FR64X:$src)),
4275 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
4277 def : Pat<(v16f32 (ffloor VR512:$src)),
4278 (VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
4279 def : Pat<(v16f32 (fnearbyint VR512:$src)),
4280 (VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
4281 def : Pat<(v16f32 (fceil VR512:$src)),
4282 (VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
4283 def : Pat<(v16f32 (frint VR512:$src)),
4284 (VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
4285 def : Pat<(v16f32 (ftrunc VR512:$src)),
4286 (VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
4288 def : Pat<(v8f64 (ffloor VR512:$src)),
4289 (VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
4290 def : Pat<(v8f64 (fnearbyint VR512:$src)),
4291 (VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
4292 def : Pat<(v8f64 (fceil VR512:$src)),
4293 (VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
4294 def : Pat<(v8f64 (frint VR512:$src)),
4295 (VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
4296 def : Pat<(v8f64 (ftrunc VR512:$src)),
4297 (VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
4299 //-------------------------------------------------
4300 // Integer truncate and extend operations
4301 //-------------------------------------------------
4303 multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
4304 RegisterClass dstRC, RegisterClass srcRC,
4305 RegisterClass KRC, X86MemOperand x86memop> {
4306 def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4308 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
4311 def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4312 (ins KRC:$mask, srcRC:$src),
4313 !strconcat(OpcodeStr,
4314 " \t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
4317 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4318 (ins KRC:$mask, srcRC:$src),
4319 !strconcat(OpcodeStr,
4320 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
4323 def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
4324 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4327 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
4328 (ins x86memop:$dst, KRC:$mask, srcRC:$src),
4329 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
4333 defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
4334 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4335 defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
4336 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4337 defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
4338 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4339 defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
4340 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4341 defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
4342 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4343 defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
4344 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4345 defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
4346 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4347 defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
4348 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4349 defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
4350 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4351 defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
4352 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4353 defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
4354 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4355 defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
4356 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4357 defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
4358 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4359 defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
4360 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4361 defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
4362 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4364 def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
4365 def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
4366 def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
4367 def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
4368 def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
4370 def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
4371 (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>;
4372 def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
4373 (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>;
4374 def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
4375 (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>;
4376 def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
4377 (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>;
4380 multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4381 RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode,
4382 PatFrag mem_frag, X86MemOperand x86memop,
4383 ValueType OpVT, ValueType InVT> {
4385 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4387 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4388 [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
4390 def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4391 (ins KRC:$mask, SrcRC:$src),
4392 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4395 def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4396 (ins KRC:$mask, SrcRC:$src),
4397 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4400 let mayLoad = 1 in {
4401 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4402 (ins x86memop:$src),
4403 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
4405 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
4408 def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4409 (ins KRC:$mask, x86memop:$src),
4410 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4414 def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4415 (ins KRC:$mask, x86memop:$src),
4416 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4422 defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext,
4423 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4425 defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext,
4426 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4428 defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext,
4429 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4430 EVEX_CD8<16, CD8VH>;
4431 defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext,
4432 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4433 EVEX_CD8<16, CD8VQ>;
4434 defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext,
4435 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4436 EVEX_CD8<32, CD8VH>;
4438 defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext,
4439 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4441 defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext,
4442 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4444 defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext,
4445 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4446 EVEX_CD8<16, CD8VH>;
4447 defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext,
4448 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4449 EVEX_CD8<16, CD8VQ>;
4450 defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext,
4451 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4452 EVEX_CD8<32, CD8VH>;
4454 //===----------------------------------------------------------------------===//
4455 // GATHER - SCATTER Operations
4457 multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4458 RegisterClass RC, X86MemOperand memop> {
4460 Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
4461 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
4462 (ins RC:$src1, KRC:$mask, memop:$src2),
4463 !strconcat(OpcodeStr,
4464 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
4468 let ExeDomain = SSEPackedDouble in {
4469 defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
4470 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4471 defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
4472 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4475 let ExeDomain = SSEPackedSingle in {
4476 defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
4477 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4478 defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
4479 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4482 defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
4483 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4484 defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
4485 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4487 defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>,
4488 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4489 defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>,
4490 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4492 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4493 RegisterClass RC, X86MemOperand memop> {
4494 let mayStore = 1, Constraints = "$mask = $mask_wb" in
4495 def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
4496 (ins memop:$dst, KRC:$mask, RC:$src2),
4497 !strconcat(OpcodeStr,
4498 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
4502 let ExeDomain = SSEPackedDouble in {
4503 defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
4504 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4505 defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
4506 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4509 let ExeDomain = SSEPackedSingle in {
4510 defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
4511 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4512 defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
4513 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4516 defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
4517 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4518 defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
4519 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4521 defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
4522 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4523 defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
4524 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4527 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
4528 RegisterClass KRC, X86MemOperand memop> {
4529 let Predicates = [HasPFI], hasSideEffects = 1 in
4530 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
4531 !strconcat(OpcodeStr, " \t{$src {${mask}}|{${mask}}, $src}"),
4535 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
4536 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4538 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
4539 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4541 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
4542 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4544 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
4545 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4547 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
4548 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4550 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
4551 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4553 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
4554 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4556 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
4557 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4559 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
4560 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4562 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
4563 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4565 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
4566 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4568 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
4569 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4571 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
4572 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4574 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
4575 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4577 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
4578 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4580 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
4581 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4582 //===----------------------------------------------------------------------===//
4583 // VSHUFPS - VSHUFPD Operations
4585 multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
4586 ValueType vt, string OpcodeStr, PatFrag mem_frag,
4588 def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
4589 (ins RC:$src1, x86memop:$src2, i8imm:$src3),
4590 !strconcat(OpcodeStr,
4591 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4592 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
4593 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
4594 EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
4595 def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
4596 (ins RC:$src1, RC:$src2, i8imm:$src3),
4597 !strconcat(OpcodeStr,
4598 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4599 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
4600 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
4601 EVEX_4V, Sched<[WriteShuffle]>;
4604 defm VSHUFPSZ : avx512_shufp<VR512, f512mem, v16f32, "vshufps", memopv16f32,
4605 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
4606 defm VSHUFPDZ : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", memopv8f64,
4607 SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4609 def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4610 (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4611 def : Pat<(v16i32 (X86Shufp VR512:$src1,
4612 (memopv16i32 addr:$src2), (i8 imm:$imm))),
4613 (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
4615 def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4616 (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4617 def : Pat<(v8i64 (X86Shufp VR512:$src1,
4618 (memopv8i64 addr:$src2), (i8 imm:$imm))),
4619 (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
4621 multiclass avx512_valign<X86VectorVTInfo _> {
4622 defm rri : AVX512_masking<0x03, MRMSrcReg, (outs _.RC:$dst),
4623 (ins _.RC:$src1, _.RC:$src2, i8imm:$src3),
4625 "$src3, $src2, $src1", "$src1, $src2, $src3",
4626 (_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
4628 _.VT, _.RC, _.KRCWM>,
4629 AVX512AIi8Base, EVEX_4V;
4631 // Also match valign of packed floats.
4632 def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
4633 (!cast<Instruction>(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>;
4636 def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst),
4637 (ins _.RC:$src1, _.MemOp:$src2, i8imm:$src3),
4638 !strconcat("valign"##_.Suffix,
4639 " \t{$src3, $src2, $src1, $dst|"
4640 "$dst, $src1, $src2, $src3}"),
4643 defm VALIGND : avx512_valign<v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4644 defm VALIGNQ : avx512_valign<v8i64_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4646 // Helper fragments to match sext vXi1 to vXiY.
4647 def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
4648 def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
4650 multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT,
4651 RegisterClass KRC, RegisterClass RC,
4652 X86MemOperand x86memop, X86MemOperand x86scalar_mop,
4654 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4655 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4657 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4658 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4660 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4661 !strconcat(OpcodeStr,
4662 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4664 let mayLoad = 1 in {
4665 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4666 (ins x86memop:$src),
4667 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4669 def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4670 (ins KRC:$mask, x86memop:$src),
4671 !strconcat(OpcodeStr,
4672 " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4674 def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4675 (ins KRC:$mask, x86memop:$src),
4676 !strconcat(OpcodeStr,
4677 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4679 def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4680 (ins x86scalar_mop:$src),
4681 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4682 ", $dst|$dst, ${src}", BrdcstStr, "}"),
4684 def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4685 (ins KRC:$mask, x86scalar_mop:$src),
4686 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4687 ", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"),
4688 []>, EVEX, EVEX_B, EVEX_K;
4689 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4690 (ins KRC:$mask, x86scalar_mop:$src),
4691 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4692 ", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}",
4694 []>, EVEX, EVEX_B, EVEX_KZ;
4698 defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512,
4699 i512mem, i32mem, "{1to16}">, EVEX_V512,
4700 EVEX_CD8<32, CD8VF>;
4701 defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512,
4702 i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W,
4703 EVEX_CD8<64, CD8VF>;
4706 (bc_v16i32 (v16i1sextv16i32)),
4707 (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
4708 (VPABSDZrr VR512:$src)>;
4710 (bc_v8i64 (v8i1sextv8i64)),
4711 (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
4712 (VPABSQZrr VR512:$src)>;
4714 def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src),
4715 (v16i32 immAllZerosV), (i16 -1))),
4716 (VPABSDZrr VR512:$src)>;
4717 def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src),
4718 (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
4719 (VPABSQZrr VR512:$src)>;
4721 multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
4722 RegisterClass RC, RegisterClass KRC,
4723 X86MemOperand x86memop,
4724 X86MemOperand x86scalar_mop, string BrdcstStr> {
4725 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4727 !strconcat(OpcodeStr, " \t{$src, ${dst} |${dst}, $src}"),
4729 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4730 (ins x86memop:$src),
4731 !strconcat(OpcodeStr, " \t{$src, ${dst}|${dst}, $src}"),
4733 def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4734 (ins x86scalar_mop:$src),
4735 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4736 ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
4738 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4739 (ins KRC:$mask, RC:$src),
4740 !strconcat(OpcodeStr,
4741 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
4743 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4744 (ins KRC:$mask, x86memop:$src),
4745 !strconcat(OpcodeStr,
4746 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
4748 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4749 (ins KRC:$mask, x86scalar_mop:$src),
4750 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4751 ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
4753 []>, EVEX, EVEX_KZ, EVEX_B;
4755 let Constraints = "$src1 = $dst" in {
4756 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4757 (ins RC:$src1, KRC:$mask, RC:$src2),
4758 !strconcat(OpcodeStr,
4759 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
4761 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4762 (ins RC:$src1, KRC:$mask, x86memop:$src2),
4763 !strconcat(OpcodeStr,
4764 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
4766 def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4767 (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
4768 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
4769 ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
4770 []>, EVEX, EVEX_K, EVEX_B;
4774 let Predicates = [HasCDI] in {
4775 defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
4776 i512mem, i32mem, "{1to16}">,
4777 EVEX_V512, EVEX_CD8<32, CD8VF>;
4780 defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
4781 i512mem, i64mem, "{1to8}">,
4782 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
4786 def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1,
4788 (VPCONFLICTDrrk VR512:$src1,
4789 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
4791 def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
4793 (VPCONFLICTQrrk VR512:$src1,
4794 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
4796 let Predicates = [HasCDI] in {
4797 defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
4798 i512mem, i32mem, "{1to16}">,
4799 EVEX_V512, EVEX_CD8<32, CD8VF>;
4802 defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
4803 i512mem, i64mem, "{1to8}">,
4804 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
4808 def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
4810 (VPLZCNTDrrk VR512:$src1,
4811 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
4813 def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
4815 (VPLZCNTQrrk VR512:$src1,
4816 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
4818 def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))),
4819 (VPLZCNTDrm addr:$src)>;
4820 def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
4821 (VPLZCNTDrr VR512:$src)>;
4822 def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))),
4823 (VPLZCNTQrm addr:$src)>;
4824 def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
4825 (VPLZCNTQrr VR512:$src)>;
4827 def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
4828 def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
4829 def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
4831 def : Pat<(store VK1:$src, addr:$dst),
4832 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK16))>;
4834 def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
4835 (truncstore node:$val, node:$ptr), [{
4836 return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
4839 def : Pat<(truncstorei1 GR8:$src, addr:$dst),
4840 (MOV8mr addr:$dst, GR8:$src)>;