1 // Group template arguments that can be derived from the vector type (EltNum x
2 // EltVT). These are things like the register class for the writemask, etc.
3 // The idea is to pass one of these as the template argument rather than the
4 // individual arguments.
5 class X86VectorVTInfo<int NumElts, ValueType EltVT, RegisterClass rc,
9 // Corresponding mask register class.
10 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
12 // Corresponding write-mask register class.
13 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
15 // The GPR register class that can hold the write mask. Use GR8 for fewer
16 // than 8 elements. Use shift-right and equal to work around the lack of
19 !cast<RegisterClass>("GR" #
20 !if (!eq (!srl(NumElts, 3), 0), 8, NumElts));
22 // Suffix used in the instruction mnemonic.
23 string Suffix = suffix;
25 string VTName = "v" # NumElts # EltVT;
28 ValueType VT = !cast<ValueType>(VTName);
30 string EltTypeName = !cast<string>(EltVT);
31 // Size of the element type in bits, e.g. 32 for v16i32.
32 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
33 int EltSize = EltVT.Size;
35 // "i" for integer types and "f" for floating-point types
36 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
38 // Size of RC in bits, e.g. 512 for VR512.
41 // The corresponding memory operand, e.g. i512mem for VR512.
42 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
43 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
46 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
47 // due to load promotion during legalization
48 PatFrag LdFrag = !cast<PatFrag>("load" #
49 !if (!eq (TypeVariantName, "i"),
50 !if (!eq (Size, 128), "v2i64",
51 !if (!eq (Size, 256), "v4i64",
53 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
55 // The corresponding float type, e.g. v16f32 for v16i32
56 // Note: For EltSize < 32, FloatVT is illegal and TableGen
57 // fails to compile, so we choose FloatVT = VT
58 ValueType FloatVT = !cast<ValueType>(
59 !if (!eq (!srl(EltSize,5),0),
61 !if (!eq(TypeVariantName, "i"),
62 "v" # NumElts # "f" # EltSize,
65 // The string to specify embedded broadcast in assembly.
66 string BroadcastStr = "{1to" # NumElts # "}";
68 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
69 !if (!eq (Size, 256), sub_ymm, ?));
71 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
72 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
76 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
77 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
78 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
79 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
81 // "x" in v32i8x_info means RC = VR256X
82 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
83 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
84 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
85 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
87 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
88 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
89 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
90 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
92 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
93 X86VectorVTInfo i128> {
94 X86VectorVTInfo info512 = i512;
95 X86VectorVTInfo info256 = i256;
96 X86VectorVTInfo info128 = i128;
99 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
101 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
103 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
105 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
109 // Common base class of AVX512_masking and AVX512_masking_3src.
110 multiclass AVX512_masking_common<bits<8> O, Format F, dag Outs, dag Ins,
111 dag MaskingIns, dag ZeroMaskingIns,
113 string AttSrcAsm, string IntelSrcAsm,
114 dag RHS, dag MaskingRHS, ValueType OpVT,
115 RegisterClass RC, RegisterClass KRC,
116 string MaskingConstraint = ""> {
117 def NAME: AVX512<O, F, Outs, Ins,
118 OpcodeStr#" \t{"#AttSrcAsm#", $dst|"#
119 "$dst, "#IntelSrcAsm#"}",
120 [(set RC:$dst, RHS)]>;
122 // Prefer over VMOV*rrk Pat<>
123 let AddedComplexity = 20 in
124 def NAME#k: AVX512<O, F, Outs, MaskingIns,
125 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}}|"#
126 "$dst {${mask}}, "#IntelSrcAsm#"}",
127 [(set RC:$dst, MaskingRHS)]>,
129 // In case of the 3src subclass this is overridden with a let.
130 string Constraints = MaskingConstraint;
132 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
133 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
134 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
135 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
137 (vselect KRC:$mask, RHS,
139 (v16i32 immAllZerosV)))))]>,
143 // This multiclass generates the unconditional/non-masking, the masking and
144 // the zero-masking variant of the instruction. In the masking case, the
145 // perserved vector elements come from a new dummy input operand tied to $dst.
146 multiclass AVX512_masking<bits<8> O, Format F, dag Outs, dag Ins,
148 string AttSrcAsm, string IntelSrcAsm,
149 dag RHS, ValueType OpVT, RegisterClass RC,
151 AVX512_masking_common<O, F, Outs,
153 !con((ins RC:$src0, KRC:$mask), Ins),
154 !con((ins KRC:$mask), Ins),
155 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
156 (vselect KRC:$mask, RHS, RC:$src0), OpVT, RC, KRC,
159 // Similar to AVX512_masking but in this case one of the source operands
160 // ($src1) is already tied to $dst so we just use that for the preserved
161 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
163 multiclass AVX512_masking_3src<bits<8> O, Format F, dag Outs, dag NonTiedIns,
165 string AttSrcAsm, string IntelSrcAsm,
166 dag RHS, ValueType OpVT,
167 RegisterClass RC, RegisterClass KRC> :
168 AVX512_masking_common<O, F, Outs,
169 !con((ins RC:$src1), NonTiedIns),
170 !con((ins RC:$src1, KRC:$mask), NonTiedIns),
171 !con((ins RC:$src1, KRC:$mask), NonTiedIns),
172 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
173 (vselect KRC:$mask, RHS, RC:$src1), OpVT, RC, KRC>;
175 // Bitcasts between 512-bit vector types. Return the original type since
176 // no instruction is needed for the conversion
177 let Predicates = [HasAVX512] in {
178 def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
179 def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
180 def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
181 def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
182 def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
183 def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
184 def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
185 def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
186 def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
187 def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
188 def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
189 def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
190 def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
191 def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
192 def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
193 def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
194 def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
195 def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
196 def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
197 def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
198 def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
199 def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
200 def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
201 def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
202 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
203 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
204 def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
205 def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
206 def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
207 def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
208 def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
210 def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
211 def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
212 def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>;
213 def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>;
214 def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>;
215 def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>;
216 def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>;
217 def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>;
218 def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>;
219 def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>;
220 def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>;
221 def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>;
222 def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>;
223 def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>;
224 def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>;
225 def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>;
226 def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>;
227 def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>;
228 def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>;
229 def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>;
230 def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>;
231 def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>;
232 def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>;
233 def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>;
234 def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>;
235 def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>;
236 def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>;
237 def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>;
238 def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>;
239 def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>;
241 // Bitcasts between 256-bit vector types. Return the original type since
242 // no instruction is needed for the conversion
243 def : Pat<(v4f64 (bitconvert (v8f32 VR256X:$src))), (v4f64 VR256X:$src)>;
244 def : Pat<(v4f64 (bitconvert (v8i32 VR256X:$src))), (v4f64 VR256X:$src)>;
245 def : Pat<(v4f64 (bitconvert (v4i64 VR256X:$src))), (v4f64 VR256X:$src)>;
246 def : Pat<(v4f64 (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>;
247 def : Pat<(v4f64 (bitconvert (v32i8 VR256X:$src))), (v4f64 VR256X:$src)>;
248 def : Pat<(v8f32 (bitconvert (v8i32 VR256X:$src))), (v8f32 VR256X:$src)>;
249 def : Pat<(v8f32 (bitconvert (v4i64 VR256X:$src))), (v8f32 VR256X:$src)>;
250 def : Pat<(v8f32 (bitconvert (v4f64 VR256X:$src))), (v8f32 VR256X:$src)>;
251 def : Pat<(v8f32 (bitconvert (v32i8 VR256X:$src))), (v8f32 VR256X:$src)>;
252 def : Pat<(v8f32 (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>;
253 def : Pat<(v4i64 (bitconvert (v8f32 VR256X:$src))), (v4i64 VR256X:$src)>;
254 def : Pat<(v4i64 (bitconvert (v8i32 VR256X:$src))), (v4i64 VR256X:$src)>;
255 def : Pat<(v4i64 (bitconvert (v4f64 VR256X:$src))), (v4i64 VR256X:$src)>;
256 def : Pat<(v4i64 (bitconvert (v32i8 VR256X:$src))), (v4i64 VR256X:$src)>;
257 def : Pat<(v4i64 (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>;
258 def : Pat<(v32i8 (bitconvert (v4f64 VR256X:$src))), (v32i8 VR256X:$src)>;
259 def : Pat<(v32i8 (bitconvert (v4i64 VR256X:$src))), (v32i8 VR256X:$src)>;
260 def : Pat<(v32i8 (bitconvert (v8f32 VR256X:$src))), (v32i8 VR256X:$src)>;
261 def : Pat<(v32i8 (bitconvert (v8i32 VR256X:$src))), (v32i8 VR256X:$src)>;
262 def : Pat<(v32i8 (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>;
263 def : Pat<(v8i32 (bitconvert (v32i8 VR256X:$src))), (v8i32 VR256X:$src)>;
264 def : Pat<(v8i32 (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>;
265 def : Pat<(v8i32 (bitconvert (v8f32 VR256X:$src))), (v8i32 VR256X:$src)>;
266 def : Pat<(v8i32 (bitconvert (v4i64 VR256X:$src))), (v8i32 VR256X:$src)>;
267 def : Pat<(v8i32 (bitconvert (v4f64 VR256X:$src))), (v8i32 VR256X:$src)>;
268 def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))), (v16i16 VR256X:$src)>;
269 def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))), (v16i16 VR256X:$src)>;
270 def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))), (v16i16 VR256X:$src)>;
271 def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))), (v16i16 VR256X:$src)>;
272 def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>;
276 // AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros.
279 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
280 isPseudo = 1, Predicates = [HasAVX512] in {
281 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
282 [(set VR512:$dst, (v16f32 immAllZerosV))]>;
285 let Predicates = [HasAVX512] in {
286 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
287 def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>;
288 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
291 //===----------------------------------------------------------------------===//
292 // AVX-512 - VECTOR INSERT
295 let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
296 def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst),
297 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
298 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
299 []>, EVEX_4V, EVEX_V512;
301 def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst),
302 (ins VR512:$src1, f128mem:$src2, i8imm:$src3),
303 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
304 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
307 // -- 64x4 fp form --
308 let hasSideEffects = 0, ExeDomain = SSEPackedDouble in {
309 def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst),
310 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
311 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
312 []>, EVEX_4V, EVEX_V512, VEX_W;
314 def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst),
315 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
316 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
317 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
319 // -- 32x4 integer form --
320 let hasSideEffects = 0 in {
321 def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst),
322 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
323 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
324 []>, EVEX_4V, EVEX_V512;
326 def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
327 (ins VR512:$src1, i128mem:$src2, i8imm:$src3),
328 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
329 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
332 let hasSideEffects = 0 in {
334 def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst),
335 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
336 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
337 []>, EVEX_4V, EVEX_V512, VEX_W;
339 def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst),
340 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
341 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
342 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
345 def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2),
346 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
347 (INSERT_get_vinsert128_imm VR512:$ins))>;
348 def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2),
349 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
350 (INSERT_get_vinsert128_imm VR512:$ins))>;
351 def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2),
352 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
353 (INSERT_get_vinsert128_imm VR512:$ins))>;
354 def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2),
355 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
356 (INSERT_get_vinsert128_imm VR512:$ins))>;
358 def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2),
359 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
360 (INSERT_get_vinsert128_imm VR512:$ins))>;
361 def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1),
362 (bc_v4i32 (loadv2i64 addr:$src2)),
363 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
364 (INSERT_get_vinsert128_imm VR512:$ins))>;
365 def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2),
366 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
367 (INSERT_get_vinsert128_imm VR512:$ins))>;
368 def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2),
369 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
370 (INSERT_get_vinsert128_imm VR512:$ins))>;
372 def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2),
373 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
374 (INSERT_get_vinsert256_imm VR512:$ins))>;
375 def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2),
376 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
377 (INSERT_get_vinsert256_imm VR512:$ins))>;
378 def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2),
379 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
380 (INSERT_get_vinsert256_imm VR512:$ins))>;
381 def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2),
382 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
383 (INSERT_get_vinsert256_imm VR512:$ins))>;
385 def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2),
386 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
387 (INSERT_get_vinsert256_imm VR512:$ins))>;
388 def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2),
389 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
390 (INSERT_get_vinsert256_imm VR512:$ins))>;
391 def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2),
392 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
393 (INSERT_get_vinsert256_imm VR512:$ins))>;
394 def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
395 (bc_v8i32 (loadv4i64 addr:$src2)),
396 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
397 (INSERT_get_vinsert256_imm VR512:$ins))>;
399 // vinsertps - insert f32 to XMM
400 def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
401 (ins VR128X:$src1, VR128X:$src2, i8imm:$src3),
402 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
403 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
405 def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
406 (ins VR128X:$src1, f32mem:$src2, i8imm:$src3),
407 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
408 [(set VR128X:$dst, (X86insertps VR128X:$src1,
409 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
410 imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
412 //===----------------------------------------------------------------------===//
413 // AVX-512 VECTOR EXTRACT
416 multiclass vextract_for_size<int Opcode,
417 X86VectorVTInfo From, X86VectorVTInfo To,
418 X86VectorVTInfo AltFrom, X86VectorVTInfo AltTo,
419 PatFrag vextract_extract,
420 SDNodeXForm EXTRACT_get_vextract_imm> {
421 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
422 def rr : AVX512AIi8<Opcode, MRMDestReg, (outs To.RC:$dst),
423 (ins VR512:$src1, i8imm:$idx),
424 "vextract" # To.EltTypeName # "x4\t{$idx, $src1, $dst|"
425 "$dst, $src1, $idx}",
426 [(set To.RC:$dst, (vextract_extract:$idx (From.VT VR512:$src1),
430 def rm : AVX512AIi8<Opcode, MRMDestMem, (outs),
431 (ins To.MemOp:$dst, VR512:$src1, i8imm:$src2),
432 "vextract" # To.EltTypeName # "x4\t{$src2, $src1, $dst|"
433 "$dst, $src1, $src2}",
434 []>, EVEX, EVEX_V512, EVEX_CD8<To.EltSize, CD8VT4>;
437 // Codegen pattern with the alternative types, e.g. v8i64 -> v2i64 for
439 def : Pat<(vextract_extract:$ext (AltFrom.VT VR512:$src1), (iPTR imm)),
440 (AltTo.VT (!cast<Instruction>(NAME # To.EltSize # "x4rr")
442 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
444 // A 128/256-bit subvector extract from the first 512-bit vector position is
445 // a subregister copy that needs no instruction.
446 def : Pat<(To.VT (extract_subvector (From.VT VR512:$src), (iPTR 0))),
448 (EXTRACT_SUBREG (From.VT VR512:$src), To.SubRegIdx))>;
450 // And for the alternative types.
451 def : Pat<(AltTo.VT (extract_subvector (AltFrom.VT VR512:$src), (iPTR 0))),
453 (EXTRACT_SUBREG (AltFrom.VT VR512:$src), AltTo.SubRegIdx))>;
456 multiclass vextract_for_type<ValueType EltVT32, int Opcode32,
457 ValueType EltVT64, int Opcode64> {
458 defm NAME # "32x4" : vextract_for_size<Opcode32,
459 X86VectorVTInfo<16, EltVT32, VR512>,
460 X86VectorVTInfo< 4, EltVT32, VR128X>,
461 X86VectorVTInfo< 8, EltVT64, VR512>,
462 X86VectorVTInfo< 2, EltVT64, VR128X>,
464 EXTRACT_get_vextract128_imm>;
465 defm NAME # "64x4" : vextract_for_size<Opcode64,
466 X86VectorVTInfo< 8, EltVT64, VR512>,
467 X86VectorVTInfo< 4, EltVT64, VR256X>,
468 X86VectorVTInfo<16, EltVT32, VR512>,
469 X86VectorVTInfo< 8, EltVT32, VR256>,
471 EXTRACT_get_vextract256_imm>, VEX_W;
474 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b>;
475 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b>;
477 // A 128-bit subvector insert to the first 512-bit vector position
478 // is a subregister copy that needs no instruction.
479 def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
480 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
481 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
483 def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)),
484 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
485 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
487 def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)),
488 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
489 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
491 def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)),
492 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
493 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
496 def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)),
497 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
498 def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)),
499 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
500 def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
501 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
502 def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
503 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
505 // vextractps - extract 32 bits from XMM
506 def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
507 (ins VR128X:$src1, i32i8imm:$src2),
508 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
509 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
512 def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
513 (ins f32mem:$dst, VR128X:$src1, i32i8imm:$src2),
514 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
515 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
516 addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
518 //===---------------------------------------------------------------------===//
521 multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr,
522 RegisterClass DestRC,
523 RegisterClass SrcRC, X86MemOperand x86memop> {
524 def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
525 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
527 def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
528 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX;
530 let ExeDomain = SSEPackedSingle in {
531 defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512,
533 EVEX_V512, EVEX_CD8<32, CD8VT1>;
536 let ExeDomain = SSEPackedDouble in {
537 defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512,
539 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
542 def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
543 (VBROADCASTSSZrm addr:$src)>;
544 def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
545 (VBROADCASTSDZrm addr:$src)>;
547 def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
548 (VBROADCASTSSZrm addr:$src)>;
549 def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
550 (VBROADCASTSDZrm addr:$src)>;
552 multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
553 RegisterClass SrcRC, RegisterClass KRC> {
554 def Zrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins SrcRC:$src),
555 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
556 []>, EVEX, EVEX_V512;
557 def Zkrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst),
558 (ins KRC:$mask, SrcRC:$src),
559 !strconcat(OpcodeStr,
560 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
561 []>, EVEX, EVEX_V512, EVEX_KZ;
564 defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>;
565 defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>,
568 def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
569 (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
571 def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
572 (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
574 def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
575 (VPBROADCASTDrZrr GR32:$src)>;
576 def : Pat<(v16i32 (X86VBroadcastm VK16WM:$mask, (i32 GR32:$src))),
577 (VPBROADCASTDrZkrr VK16WM:$mask, GR32:$src)>;
578 def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
579 (VPBROADCASTQrZrr GR64:$src)>;
580 def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))),
581 (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>;
583 def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
584 (VPBROADCASTDrZrr GR32:$src)>;
585 def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
586 (VPBROADCASTQrZrr GR64:$src)>;
588 def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src),
589 (v16i32 immAllZerosV), (i16 GR16:$mask))),
590 (VPBROADCASTDrZkrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>;
591 def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
592 (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
593 (VPBROADCASTQrZkrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
595 multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
596 X86MemOperand x86memop, PatFrag ld_frag,
597 RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
599 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
600 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
602 (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
603 def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
605 !strconcat(OpcodeStr,
606 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
608 (OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
611 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
612 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
614 (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
615 def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
617 !strconcat(OpcodeStr,
618 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
619 [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
620 (ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
624 defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
625 loadi32, VR512, v16i32, v4i32, VK16WM>,
626 EVEX_V512, EVEX_CD8<32, CD8VT1>;
627 defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
628 loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
629 EVEX_CD8<64, CD8VT1>;
631 multiclass avx512_int_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
632 X86MemOperand x86memop, PatFrag ld_frag,
635 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins x86memop:$src),
636 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
638 def krm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins KRC:$mask,
640 !strconcat(OpcodeStr,
641 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
646 defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
647 i128mem, loadv2i64, VK16WM>,
648 EVEX_V512, EVEX_CD8<32, CD8VT4>;
649 defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
650 i256mem, loadv4i64, VK16WM>, VEX_W,
651 EVEX_V512, EVEX_CD8<64, CD8VT4>;
653 def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
654 (VPBROADCASTDZrr VR128X:$src)>;
655 def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
656 (VPBROADCASTQZrr VR128X:$src)>;
658 def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
659 (VBROADCASTSSZrr VR128X:$src)>;
660 def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
661 (VBROADCASTSDZrr VR128X:$src)>;
663 def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
664 (VBROADCASTSSZrr VR128X:$src)>;
665 def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
666 (VBROADCASTSDZrr VR128X:$src)>;
668 // Provide fallback in case the load node that is used in the patterns above
669 // is used by additional users, which prevents the pattern selection.
670 def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
671 (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
672 def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
673 (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
676 let Predicates = [HasAVX512] in {
677 def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
679 (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
680 addr:$src)), sub_ymm)>;
682 //===----------------------------------------------------------------------===//
683 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
686 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
687 RegisterClass DstRC, RegisterClass KRC,
688 ValueType OpVT, ValueType SrcVT> {
689 def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
690 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
694 let Predicates = [HasCDI] in {
695 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
696 VK16, v16i32, v16i1>, EVEX_V512;
697 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
698 VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
701 //===----------------------------------------------------------------------===//
704 // -- immediate form --
705 multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
706 SDNode OpNode, PatFrag mem_frag,
707 X86MemOperand x86memop, ValueType OpVT> {
708 def ri : AVX512AIi8<opc, MRMSrcReg, (outs RC:$dst),
709 (ins RC:$src1, i8imm:$src2),
710 !strconcat(OpcodeStr,
711 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
713 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
715 def mi : AVX512AIi8<opc, MRMSrcMem, (outs RC:$dst),
716 (ins x86memop:$src1, i8imm:$src2),
717 !strconcat(OpcodeStr,
718 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
720 (OpVT (OpNode (mem_frag addr:$src1),
721 (i8 imm:$src2))))]>, EVEX;
724 defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64,
725 i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
726 let ExeDomain = SSEPackedDouble in
727 defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64,
728 f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
730 // -- VPERM - register form --
731 multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
732 PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
734 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
735 (ins RC:$src1, RC:$src2),
736 !strconcat(OpcodeStr,
737 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
739 (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V;
741 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
742 (ins RC:$src1, x86memop:$src2),
743 !strconcat(OpcodeStr,
744 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
746 (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>,
750 defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem,
751 v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
752 defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
753 v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
754 let ExeDomain = SSEPackedSingle in
755 defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem,
756 v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
757 let ExeDomain = SSEPackedDouble in
758 defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
759 v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
761 // -- VPERM2I - 3 source operands form --
762 multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
763 PatFrag mem_frag, X86MemOperand x86memop,
764 SDNode OpNode, ValueType OpVT, RegisterClass KRC> {
765 let Constraints = "$src1 = $dst" in {
766 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
767 (ins RC:$src1, RC:$src2, RC:$src3),
768 !strconcat(OpcodeStr,
769 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
771 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
774 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
775 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
776 !strconcat(OpcodeStr,
777 " \t{$src3, $src2, $dst {${mask}}|"
778 "$dst {${mask}}, $src2, $src3}"),
779 [(set RC:$dst, (OpVT (vselect KRC:$mask,
780 (OpNode RC:$src1, RC:$src2,
785 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
786 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
787 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
788 !strconcat(OpcodeStr,
789 " \t{$src3, $src2, $dst {${mask}} {z} |",
790 "$dst {${mask}} {z}, $src2, $src3}"),
791 [(set RC:$dst, (OpVT (vselect KRC:$mask,
792 (OpNode RC:$src1, RC:$src2,
795 (v16i32 immAllZerosV))))))]>,
798 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
799 (ins RC:$src1, RC:$src2, x86memop:$src3),
800 !strconcat(OpcodeStr,
801 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
803 (OpVT (OpNode RC:$src1, RC:$src2,
804 (mem_frag addr:$src3))))]>, EVEX_4V;
806 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
807 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
808 !strconcat(OpcodeStr,
809 " \t{$src3, $src2, $dst {${mask}}|"
810 "$dst {${mask}}, $src2, $src3}"),
812 (OpVT (vselect KRC:$mask,
813 (OpNode RC:$src1, RC:$src2,
814 (mem_frag addr:$src3)),
818 let AddedComplexity = 10 in // Prefer over the rrkz variant
819 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
820 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
821 !strconcat(OpcodeStr,
822 " \t{$src3, $src2, $dst {${mask}} {z}|"
823 "$dst {${mask}} {z}, $src2, $src3}"),
825 (OpVT (vselect KRC:$mask,
826 (OpNode RC:$src1, RC:$src2,
827 (mem_frag addr:$src3)),
829 (v16i32 immAllZerosV))))))]>,
833 defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32,
834 i512mem, X86VPermiv3, v16i32, VK16WM>,
835 EVEX_V512, EVEX_CD8<32, CD8VF>;
836 defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64,
837 i512mem, X86VPermiv3, v8i64, VK8WM>,
838 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
839 defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32,
840 i512mem, X86VPermiv3, v16f32, VK16WM>,
841 EVEX_V512, EVEX_CD8<32, CD8VF>;
842 defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64,
843 i512mem, X86VPermiv3, v8f64, VK8WM>,
844 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
846 multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC,
847 PatFrag mem_frag, X86MemOperand x86memop,
848 SDNode OpNode, ValueType OpVT, RegisterClass KRC,
849 ValueType MaskVT, RegisterClass MRC> :
850 avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode,
852 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
853 VR512:$idx, VR512:$src1, VR512:$src2, -1)),
854 (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>;
856 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
857 VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)),
858 (!cast<Instruction>(NAME#rrk) VR512:$src1,
859 (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>;
862 defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem,
863 X86VPermv3, v16i32, VK16WM, v16i1, GR16>,
864 EVEX_V512, EVEX_CD8<32, CD8VF>;
865 defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem,
866 X86VPermv3, v8i64, VK8WM, v8i1, GR8>,
867 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
868 defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem,
869 X86VPermv3, v16f32, VK16WM, v16i1, GR16>,
870 EVEX_V512, EVEX_CD8<32, CD8VF>;
871 defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem,
872 X86VPermv3, v8f64, VK8WM, v8i1, GR8>,
873 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
875 //===----------------------------------------------------------------------===//
876 // AVX-512 - BLEND using mask
878 multiclass avx512_blendmask<bits<8> opc, string OpcodeStr,
879 RegisterClass KRC, RegisterClass RC,
880 X86MemOperand x86memop, PatFrag mem_frag,
881 SDNode OpNode, ValueType vt> {
882 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
883 (ins KRC:$mask, RC:$src1, RC:$src2),
884 !strconcat(OpcodeStr,
885 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
886 [(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2),
887 (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
889 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
890 (ins KRC:$mask, RC:$src1, x86memop:$src2),
891 !strconcat(OpcodeStr,
892 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
893 []>, EVEX_4V, EVEX_K;
896 let ExeDomain = SSEPackedSingle in
897 defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
898 VK16WM, VR512, f512mem,
899 memopv16f32, vselect, v16f32>,
900 EVEX_CD8<32, CD8VF>, EVEX_V512;
901 let ExeDomain = SSEPackedDouble in
902 defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
903 VK8WM, VR512, f512mem,
904 memopv8f64, vselect, v8f64>,
905 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
907 def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1),
908 (v16f32 VR512:$src2), (i16 GR16:$mask))),
909 (VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16WM),
910 VR512:$src1, VR512:$src2)>;
912 def : Pat<(v8f64 (int_x86_avx512_mask_blend_pd_512 (v8f64 VR512:$src1),
913 (v8f64 VR512:$src2), (i8 GR8:$mask))),
914 (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM),
915 VR512:$src1, VR512:$src2)>;
917 defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
918 VK16WM, VR512, f512mem,
919 memopv16i32, vselect, v16i32>,
920 EVEX_CD8<32, CD8VF>, EVEX_V512;
922 defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
923 VK8WM, VR512, f512mem,
924 memopv8i64, vselect, v8i64>,
925 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
927 def : Pat<(v16i32 (int_x86_avx512_mask_blend_d_512 (v16i32 VR512:$src1),
928 (v16i32 VR512:$src2), (i16 GR16:$mask))),
929 (VPBLENDMDZrr (COPY_TO_REGCLASS GR16:$mask, VK16),
930 VR512:$src1, VR512:$src2)>;
932 def : Pat<(v8i64 (int_x86_avx512_mask_blend_q_512 (v8i64 VR512:$src1),
933 (v8i64 VR512:$src2), (i8 GR8:$mask))),
934 (VPBLENDMQZrr (COPY_TO_REGCLASS GR8:$mask, VK8),
935 VR512:$src1, VR512:$src2)>;
937 let Predicates = [HasAVX512] in {
938 def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
939 (v8f32 VR256X:$src2))),
941 (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
942 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
943 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
945 def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
946 (v8i32 VR256X:$src2))),
948 (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
949 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
950 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
952 //===----------------------------------------------------------------------===//
953 // Compare Instructions
954 //===----------------------------------------------------------------------===//
956 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
957 multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
958 Operand CC, SDNode OpNode, ValueType VT,
959 PatFrag ld_frag, string asm, string asm_alt> {
960 def rr : AVX512Ii8<0xC2, MRMSrcReg,
961 (outs VK1:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
962 [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
963 IIC_SSE_ALU_F32S_RR>, EVEX_4V;
964 def rm : AVX512Ii8<0xC2, MRMSrcMem,
965 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
966 [(set VK1:$dst, (OpNode (VT RC:$src1),
967 (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
968 let isAsmParserOnly = 1, hasSideEffects = 0 in {
969 def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
970 (outs VK1:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
971 asm_alt, [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
972 def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
973 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
974 asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
978 let Predicates = [HasAVX512] in {
979 defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, AVXCC, X86cmpms, f32, loadf32,
980 "vcmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
981 "vcmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
983 defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, AVXCC, X86cmpms, f64, loadf64,
984 "vcmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
985 "vcmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
989 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
991 def rr : AVX512BI<opc, MRMSrcReg,
992 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
993 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
994 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
995 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
997 def rm : AVX512BI<opc, MRMSrcMem,
998 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
999 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1000 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1001 (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
1002 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1003 def rrk : AVX512BI<opc, MRMSrcReg,
1004 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1005 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1006 "$dst {${mask}}, $src1, $src2}"),
1007 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1008 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
1009 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1011 def rmk : AVX512BI<opc, MRMSrcMem,
1012 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1013 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1014 "$dst {${mask}}, $src1, $src2}"),
1015 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1016 (OpNode (_.VT _.RC:$src1),
1018 (_.LdFrag addr:$src2))))))],
1019 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1022 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
1023 X86VectorVTInfo _> :
1024 avx512_icmp_packed<opc, OpcodeStr, OpNode, _> {
1025 let mayLoad = 1 in {
1026 def rmb : AVX512BI<opc, MRMSrcMem,
1027 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
1028 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
1029 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1030 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1031 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
1032 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1033 def rmbk : AVX512BI<opc, MRMSrcMem,
1034 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1035 _.ScalarMemOp:$src2),
1036 !strconcat(OpcodeStr,
1037 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1038 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1039 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1040 (OpNode (_.VT _.RC:$src1),
1042 (_.ScalarLdFrag addr:$src2)))))],
1043 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1047 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
1048 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1049 let Predicates = [prd] in
1050 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512>,
1053 let Predicates = [prd, HasVLX] in {
1054 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256>,
1056 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128>,
1061 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
1062 SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
1064 let Predicates = [prd] in
1065 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
1068 let Predicates = [prd, HasVLX] in {
1069 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
1071 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
1076 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
1077 avx512vl_i8_info, HasBWI>,
1080 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
1081 avx512vl_i16_info, HasBWI>,
1082 EVEX_CD8<16, CD8VF>;
1084 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
1085 avx512vl_i32_info, HasAVX512>,
1086 EVEX_CD8<32, CD8VF>;
1088 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
1089 avx512vl_i64_info, HasAVX512>,
1090 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1092 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
1093 avx512vl_i8_info, HasBWI>,
1096 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
1097 avx512vl_i16_info, HasBWI>,
1098 EVEX_CD8<16, CD8VF>;
1100 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
1101 avx512vl_i32_info, HasAVX512>,
1102 EVEX_CD8<32, CD8VF>;
1104 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
1105 avx512vl_i64_info, HasAVX512>,
1106 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1108 def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1109 (COPY_TO_REGCLASS (VPCMPGTDZrr
1110 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1111 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1113 def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1114 (COPY_TO_REGCLASS (VPCMPEQDZrr
1115 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1116 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1118 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
1119 X86VectorVTInfo _> {
1120 def rri : AVX512AIi8<opc, MRMSrcReg,
1121 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1122 !strconcat("vpcmp${cc}", Suffix,
1123 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1124 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1126 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1128 def rmi : AVX512AIi8<opc, MRMSrcMem,
1129 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
1130 !strconcat("vpcmp${cc}", Suffix,
1131 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1132 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1133 (_.VT (bitconvert (_.LdFrag addr:$src2))),
1135 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1136 def rrik : AVX512AIi8<opc, MRMSrcReg,
1137 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1139 !strconcat("vpcmp${cc}", Suffix,
1140 "\t{$src2, $src1, $dst {${mask}}|",
1141 "$dst {${mask}}, $src1, $src2}"),
1142 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1143 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1145 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1147 def rmik : AVX512AIi8<opc, MRMSrcMem,
1148 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1150 !strconcat("vpcmp${cc}", Suffix,
1151 "\t{$src2, $src1, $dst {${mask}}|",
1152 "$dst {${mask}}, $src1, $src2}"),
1153 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1154 (OpNode (_.VT _.RC:$src1),
1155 (_.VT (bitconvert (_.LdFrag addr:$src2))),
1157 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1159 // Accept explicit immediate argument form instead of comparison code.
1160 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1161 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
1162 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, i8imm:$cc),
1163 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
1164 "$dst, $src1, $src2, $cc}"),
1165 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1166 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
1167 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, i8imm:$cc),
1168 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
1169 "$dst, $src1, $src2, $cc}"),
1170 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1171 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
1172 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1174 !strconcat("vpcmp", Suffix,
1175 "\t{$cc, $src2, $src1, $dst {${mask}}|",
1176 "$dst {${mask}}, $src1, $src2, $cc}"),
1177 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1178 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
1179 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1181 !strconcat("vpcmp", Suffix,
1182 "\t{$cc, $src2, $src1, $dst {${mask}}|",
1183 "$dst {${mask}}, $src1, $src2, $cc}"),
1184 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1188 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
1189 X86VectorVTInfo _> :
1190 avx512_icmp_cc<opc, Suffix, OpNode, _> {
1191 let mayLoad = 1 in {
1192 def rmib : AVX512AIi8<opc, MRMSrcMem,
1193 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
1195 !strconcat("vpcmp${cc}", Suffix,
1196 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1197 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1198 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1199 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1201 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1202 def rmibk : AVX512AIi8<opc, MRMSrcMem,
1203 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1204 _.ScalarMemOp:$src2, AVXCC:$cc),
1205 !strconcat("vpcmp${cc}", Suffix,
1206 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1207 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1208 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1209 (OpNode (_.VT _.RC:$src1),
1210 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
1212 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1215 // Accept explicit immediate argument form instead of comparison code.
1216 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1217 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
1218 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
1220 !strconcat("vpcmp", Suffix,
1221 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
1222 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
1223 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1224 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
1225 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1226 _.ScalarMemOp:$src2, i8imm:$cc),
1227 !strconcat("vpcmp", Suffix,
1228 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1229 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
1230 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1234 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
1235 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1236 let Predicates = [prd] in
1237 defm Z : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info512>, EVEX_V512;
1239 let Predicates = [prd, HasVLX] in {
1240 defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info256>, EVEX_V256;
1241 defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info128>, EVEX_V128;
1245 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
1246 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1247 let Predicates = [prd] in
1248 defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info512>,
1251 let Predicates = [prd, HasVLX] in {
1252 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info256>,
1254 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info128>,
1259 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, avx512vl_i8_info,
1260 HasBWI>, EVEX_CD8<8, CD8VF>;
1261 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, avx512vl_i8_info,
1262 HasBWI>, EVEX_CD8<8, CD8VF>;
1264 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, avx512vl_i16_info,
1265 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
1266 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, avx512vl_i16_info,
1267 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
1269 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, avx512vl_i32_info,
1270 HasAVX512>, EVEX_CD8<32, CD8VF>;
1271 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info,
1272 HasAVX512>, EVEX_CD8<32, CD8VF>;
1274 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
1275 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
1276 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
1277 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
1279 // avx512_cmp_packed - compare packed instructions
1280 multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
1281 X86MemOperand x86memop, ValueType vt,
1282 string suffix, Domain d> {
1283 def rri : AVX512PIi8<0xC2, MRMSrcReg,
1284 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
1285 !strconcat("vcmp${cc}", suffix,
1286 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1287 [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
1288 def rrib: AVX512PIi8<0xC2, MRMSrcReg,
1289 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
1290 !strconcat("vcmp${cc}", suffix,
1291 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
1293 def rmi : AVX512PIi8<0xC2, MRMSrcMem,
1294 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
1295 !strconcat("vcmp${cc}", suffix,
1296 " \t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1298 (X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
1300 // Accept explicit immediate argument form instead of comparison code.
1301 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1302 def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
1303 (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
1304 !strconcat("vcmp", suffix,
1305 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
1306 def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
1307 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
1308 !strconcat("vcmp", suffix,
1309 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
1313 defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, v16f32,
1314 "ps", SSEPackedSingle>, PS, EVEX_4V, EVEX_V512,
1315 EVEX_CD8<32, CD8VF>;
1316 defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, v8f64,
1317 "pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512,
1318 EVEX_CD8<64, CD8VF>;
1320 def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
1321 (COPY_TO_REGCLASS (VCMPPSZrri
1322 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1323 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1325 def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1326 (COPY_TO_REGCLASS (VPCMPDZrri
1327 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1328 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1330 def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1331 (COPY_TO_REGCLASS (VPCMPUDZrri
1332 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1333 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1336 def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1337 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1339 (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
1340 (I8Imm imm:$cc)), GR16)>;
1342 def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1343 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1345 (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
1346 (I8Imm imm:$cc)), GR8)>;
1348 def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1349 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1351 (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
1352 (I8Imm imm:$cc)), GR16)>;
1354 def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1355 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1357 (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
1358 (I8Imm imm:$cc)), GR8)>;
1360 // Mask register copy, including
1361 // - copy between mask registers
1362 // - load/store mask registers
1363 // - copy from GPR to mask register and vice versa
1365 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
1366 string OpcodeStr, RegisterClass KRC,
1367 ValueType vvt, ValueType ivt, X86MemOperand x86memop> {
1368 let hasSideEffects = 0 in {
1369 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
1370 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1372 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
1373 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
1374 [(set KRC:$dst, (vvt (bitconvert (ivt (load addr:$src)))))]>;
1376 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
1377 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1381 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
1383 RegisterClass KRC, RegisterClass GRC> {
1384 let hasSideEffects = 0 in {
1385 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
1386 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1387 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
1388 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1392 let Predicates = [HasDQI] in
1393 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8,
1395 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
1398 let Predicates = [HasAVX512] in
1399 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16,
1401 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
1404 let Predicates = [HasBWI] in {
1405 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1, i32,
1406 i32mem>, VEX, PD, VEX_W;
1407 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
1411 let Predicates = [HasBWI] in {
1412 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64,
1413 i64mem>, VEX, PS, VEX_W;
1414 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
1418 // GR from/to mask register
1419 let Predicates = [HasDQI] in {
1420 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1421 (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>;
1422 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1423 (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>;
1425 let Predicates = [HasAVX512] in {
1426 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
1427 (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>;
1428 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
1429 (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
1431 let Predicates = [HasBWI] in {
1432 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>;
1433 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (KMOVDrk VK32:$src)>;
1435 let Predicates = [HasBWI] in {
1436 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (KMOVQkr GR64:$src)>;
1437 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (KMOVQrk VK64:$src)>;
1441 let Predicates = [HasDQI] in {
1442 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
1443 (KMOVBmk addr:$dst, VK8:$src)>;
1445 let Predicates = [HasAVX512] in {
1446 def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
1447 (KMOVWmk addr:$dst, VK16:$src)>;
1448 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
1449 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
1450 def : Pat<(i1 (load addr:$src)),
1451 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
1452 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
1453 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
1455 let Predicates = [HasBWI] in {
1456 def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
1457 (KMOVDmk addr:$dst, VK32:$src)>;
1459 let Predicates = [HasBWI] in {
1460 def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
1461 (KMOVQmk addr:$dst, VK64:$src)>;
1464 let Predicates = [HasAVX512] in {
1465 def : Pat<(i1 (trunc (i64 GR64:$src))),
1466 (COPY_TO_REGCLASS (KMOVWkr (AND32ri (EXTRACT_SUBREG $src, sub_32bit),
1469 def : Pat<(i1 (trunc (i32 GR32:$src))),
1470 (COPY_TO_REGCLASS (KMOVWkr (AND32ri $src, (i32 1))), VK1)>;
1472 def : Pat<(i1 (trunc (i8 GR8:$src))),
1474 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))),
1476 def : Pat<(i1 (trunc (i16 GR16:$src))),
1478 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))),
1481 def : Pat<(i32 (zext VK1:$src)),
1482 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
1483 def : Pat<(i8 (zext VK1:$src)),
1486 (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
1487 def : Pat<(i64 (zext VK1:$src)),
1488 (AND64ri8 (SUBREG_TO_REG (i64 0),
1489 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
1490 def : Pat<(i16 (zext VK1:$src)),
1492 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
1494 def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
1495 (COPY_TO_REGCLASS VK1:$src, VK16)>;
1496 def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
1497 (COPY_TO_REGCLASS VK1:$src, VK8)>;
1499 let Predicates = [HasBWI] in {
1500 def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
1501 (COPY_TO_REGCLASS VK1:$src, VK32)>;
1502 def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
1503 (COPY_TO_REGCLASS VK1:$src, VK64)>;
1507 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1508 let Predicates = [HasAVX512] in {
1509 // GR from/to 8-bit mask without native support
1510 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1512 (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
1514 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1516 (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
1519 def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
1520 (COPY_TO_REGCLASS VK16:$src, VK1)>;
1521 def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
1522 (COPY_TO_REGCLASS VK8:$src, VK1)>;
1524 let Predicates = [HasBWI] in {
1525 def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))),
1526 (COPY_TO_REGCLASS VK32:$src, VK1)>;
1527 def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))),
1528 (COPY_TO_REGCLASS VK64:$src, VK1)>;
1531 // Mask unary operation
1533 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
1534 RegisterClass KRC, SDPatternOperator OpNode,
1536 let Predicates = [prd] in
1537 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
1538 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
1539 [(set KRC:$dst, (OpNode KRC:$src))]>;
1542 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
1543 SDPatternOperator OpNode> {
1544 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1546 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1547 HasAVX512>, VEX, PS;
1548 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1549 HasBWI>, VEX, PD, VEX_W;
1550 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1551 HasBWI>, VEX, PS, VEX_W;
1554 defm KNOT : avx512_mask_unop_all<0x44, "knot", not>;
1556 multiclass avx512_mask_unop_int<string IntName, string InstName> {
1557 let Predicates = [HasAVX512] in
1558 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1560 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1561 (v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>;
1563 defm : avx512_mask_unop_int<"knot", "KNOT">;
1565 let Predicates = [HasDQI] in
1566 def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>;
1567 let Predicates = [HasAVX512] in
1568 def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
1569 let Predicates = [HasBWI] in
1570 def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>;
1571 let Predicates = [HasBWI] in
1572 def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
1574 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
1575 let Predicates = [HasAVX512] in {
1576 def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)),
1577 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
1579 def : Pat<(not VK8:$src),
1581 (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
1584 // Mask binary operation
1585 // - KAND, KANDN, KOR, KXNOR, KXOR
1586 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
1587 RegisterClass KRC, SDPatternOperator OpNode,
1589 let Predicates = [prd] in
1590 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
1591 !strconcat(OpcodeStr,
1592 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1593 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
1596 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
1597 SDPatternOperator OpNode> {
1598 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1599 HasDQI>, VEX_4V, VEX_L, PD;
1600 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1601 HasAVX512>, VEX_4V, VEX_L, PS;
1602 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1603 HasBWI>, VEX_4V, VEX_L, VEX_W, PD;
1604 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1605 HasBWI>, VEX_4V, VEX_L, VEX_W, PS;
1608 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
1609 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
1611 let isCommutable = 1 in {
1612 defm KAND : avx512_mask_binop_all<0x41, "kand", and>;
1613 defm KOR : avx512_mask_binop_all<0x45, "kor", or>;
1614 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor>;
1615 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor>;
1617 let isCommutable = 0 in
1618 defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn>;
1620 def : Pat<(xor VK1:$src1, VK1:$src2),
1621 (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1622 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1624 def : Pat<(or VK1:$src1, VK1:$src2),
1625 (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1626 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1628 def : Pat<(and VK1:$src1, VK1:$src2),
1629 (COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1630 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1632 multiclass avx512_mask_binop_int<string IntName, string InstName> {
1633 let Predicates = [HasAVX512] in
1634 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1635 (i16 GR16:$src1), (i16 GR16:$src2)),
1636 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1637 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1638 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
1641 defm : avx512_mask_binop_int<"kand", "KAND">;
1642 defm : avx512_mask_binop_int<"kandn", "KANDN">;
1643 defm : avx512_mask_binop_int<"kor", "KOR">;
1644 defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
1645 defm : avx512_mask_binop_int<"kxor", "KXOR">;
1647 // With AVX-512, 8-bit mask is promoted to 16-bit mask.
1648 multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
1649 let Predicates = [HasAVX512] in
1650 def : Pat<(OpNode VK8:$src1, VK8:$src2),
1652 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
1653 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
1656 defm : avx512_binop_pat<and, KANDWrr>;
1657 defm : avx512_binop_pat<andn, KANDNWrr>;
1658 defm : avx512_binop_pat<or, KORWrr>;
1659 defm : avx512_binop_pat<xnor, KXNORWrr>;
1660 defm : avx512_binop_pat<xor, KXORWrr>;
1663 multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
1664 RegisterClass KRC> {
1665 let Predicates = [HasAVX512] in
1666 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
1667 !strconcat(OpcodeStr,
1668 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
1671 multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
1672 defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16>,
1676 defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">;
1677 def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))),
1678 (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16),
1679 (COPY_TO_REGCLASS VK8:$src1, VK16))>;
1682 multiclass avx512_mask_unpck_int<string IntName, string InstName> {
1683 let Predicates = [HasAVX512] in
1684 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_bw")
1685 (i16 GR16:$src1), (i16 GR16:$src2)),
1686 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"BWrr")
1687 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1688 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
1690 defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
1693 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1695 let Predicates = [HasAVX512], Defs = [EFLAGS] in
1696 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
1697 !strconcat(OpcodeStr, " \t{$src2, $src1|$src1, $src2}"),
1698 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
1701 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
1702 defm W : avx512_mask_testop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
1706 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
1708 def : Pat<(X86cmp VK1:$src1, (i1 0)),
1709 (KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1710 (COPY_TO_REGCLASS VK1:$src1, VK16))>;
1713 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1715 let Predicates = [HasAVX512] in
1716 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, i8imm:$imm),
1717 !strconcat(OpcodeStr,
1718 " \t{$imm, $src, $dst|$dst, $src, $imm}"),
1719 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
1722 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
1724 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
1728 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
1729 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
1731 // Mask setting all 0s or 1s
1732 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
1733 let Predicates = [HasAVX512] in
1734 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
1735 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
1736 [(set KRC:$dst, (VT Val))]>;
1739 multiclass avx512_mask_setop_w<PatFrag Val> {
1740 defm B : avx512_mask_setop<VK8, v8i1, Val>;
1741 defm W : avx512_mask_setop<VK16, v16i1, Val>;
1744 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
1745 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
1747 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1748 let Predicates = [HasAVX512] in {
1749 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
1750 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
1751 def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
1752 def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
1753 def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
1755 def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))),
1756 (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>;
1758 def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
1759 (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16))>;
1761 def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
1762 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
1764 def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
1765 (v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
1767 def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
1768 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
1769 //===----------------------------------------------------------------------===//
1770 // AVX-512 - Aligned and unaligned load and store
1773 multiclass avx512_load<bits<8> opc, string OpcodeStr, PatFrag ld_frag,
1774 RegisterClass KRC, RegisterClass RC,
1775 ValueType vt, ValueType zvt, X86MemOperand memop,
1776 Domain d, bit IsReMaterializable = 1> {
1777 let hasSideEffects = 0 in {
1778 def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
1779 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
1781 def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
1782 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1783 "${dst} {${mask}} {z}, $src}"), [], d>, EVEX, EVEX_KZ;
1785 let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
1786 SchedRW = [WriteLoad] in
1787 def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins memop:$src),
1788 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1789 [(set RC:$dst, (vt (bitconvert (ld_frag addr:$src))))],
1792 let AddedComplexity = 20 in {
1793 let Constraints = "$src0 = $dst", hasSideEffects = 0 in {
1794 let hasSideEffects = 0 in
1795 def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
1796 (ins RC:$src0, KRC:$mask, RC:$src1),
1797 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1798 "${dst} {${mask}}, $src1}"),
1799 [(set RC:$dst, (vt (vselect KRC:$mask,
1803 let mayLoad = 1, SchedRW = [WriteLoad] in
1804 def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1805 (ins RC:$src0, KRC:$mask, memop:$src1),
1806 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1807 "${dst} {${mask}}, $src1}"),
1810 (vt (bitconvert (ld_frag addr:$src1))),
1814 let mayLoad = 1, SchedRW = [WriteLoad] in
1815 def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1816 (ins KRC:$mask, memop:$src),
1817 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1818 "${dst} {${mask}} {z}, $src}"),
1821 (vt (bitconvert (ld_frag addr:$src))),
1822 (vt (bitconvert (zvt immAllZerosV))))))],
1827 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, string ld_pat,
1828 string elty, string elsz, string vsz512,
1829 string vsz256, string vsz128, Domain d,
1830 Predicate prd, bit IsReMaterializable = 1> {
1831 let Predicates = [prd] in
1832 defm Z : avx512_load<opc, OpcodeStr,
1833 !cast<PatFrag>(ld_pat##"v"##vsz512##elty##elsz),
1834 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1835 !cast<ValueType>("v"##vsz512##elty##elsz), v16i32,
1836 !cast<X86MemOperand>(elty##"512mem"), d,
1837 IsReMaterializable>, EVEX_V512;
1839 let Predicates = [prd, HasVLX] in {
1840 defm Z256 : avx512_load<opc, OpcodeStr,
1841 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1842 "v"##vsz256##elty##elsz, "v4i64")),
1843 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1844 !cast<ValueType>("v"##vsz256##elty##elsz), v8i32,
1845 !cast<X86MemOperand>(elty##"256mem"), d,
1846 IsReMaterializable>, EVEX_V256;
1848 defm Z128 : avx512_load<opc, OpcodeStr,
1849 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1850 "v"##vsz128##elty##elsz, "v2i64")),
1851 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1852 !cast<ValueType>("v"##vsz128##elty##elsz), v4i32,
1853 !cast<X86MemOperand>(elty##"128mem"), d,
1854 IsReMaterializable>, EVEX_V128;
1859 multiclass avx512_store<bits<8> opc, string OpcodeStr, PatFrag st_frag,
1860 ValueType OpVT, RegisterClass KRC, RegisterClass RC,
1861 X86MemOperand memop, Domain d> {
1862 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1863 def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src),
1864 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], d>,
1866 let Constraints = "$src1 = $dst" in
1867 def rrk_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
1868 (ins RC:$src1, KRC:$mask, RC:$src2),
1869 !strconcat(OpcodeStr,
1870 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
1872 def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
1873 (ins KRC:$mask, RC:$src),
1874 !strconcat(OpcodeStr,
1875 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
1876 [], d>, EVEX, EVEX_KZ;
1878 let mayStore = 1 in {
1879 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
1880 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1881 [(st_frag (OpVT RC:$src), addr:$dst)], d>, EVEX;
1882 def mrk : AVX512PI<opc, MRMDestMem, (outs),
1883 (ins memop:$dst, KRC:$mask, RC:$src),
1884 !strconcat(OpcodeStr,
1885 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
1886 [], d>, EVEX, EVEX_K;
1891 multiclass avx512_store_vl<bits<8> opc, string OpcodeStr, string st_pat,
1892 string st_suff_512, string st_suff_256,
1893 string st_suff_128, string elty, string elsz,
1894 string vsz512, string vsz256, string vsz128,
1895 Domain d, Predicate prd> {
1896 let Predicates = [prd] in
1897 defm Z : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_512),
1898 !cast<ValueType>("v"##vsz512##elty##elsz),
1899 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1900 !cast<X86MemOperand>(elty##"512mem"), d>, EVEX_V512;
1902 let Predicates = [prd, HasVLX] in {
1903 defm Z256 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_256),
1904 !cast<ValueType>("v"##vsz256##elty##elsz),
1905 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1906 !cast<X86MemOperand>(elty##"256mem"), d>, EVEX_V256;
1908 defm Z128 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_128),
1909 !cast<ValueType>("v"##vsz128##elty##elsz),
1910 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1911 !cast<X86MemOperand>(elty##"128mem"), d>, EVEX_V128;
1915 defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32",
1916 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1917 avx512_store_vl<0x29, "vmovaps", "alignedstore",
1918 "512", "256", "", "f", "32", "16", "8", "4",
1919 SSEPackedSingle, HasAVX512>,
1920 PS, EVEX_CD8<32, CD8VF>;
1922 defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64",
1923 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1924 avx512_store_vl<0x29, "vmovapd", "alignedstore",
1925 "512", "256", "", "f", "64", "8", "4", "2",
1926 SSEPackedDouble, HasAVX512>,
1927 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1929 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32",
1930 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1931 avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32",
1932 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1933 PS, EVEX_CD8<32, CD8VF>;
1935 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64",
1936 "8", "4", "2", SSEPackedDouble, HasAVX512, 0>,
1937 avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64",
1938 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1939 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1941 def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
1942 (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
1943 (VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
1945 def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
1946 (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
1947 (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
1949 def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
1951 (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
1953 def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
1955 (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
1958 defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32",
1959 "16", "8", "4", SSEPackedInt, HasAVX512>,
1960 avx512_store_vl<0x7F, "vmovdqa32", "alignedstore",
1961 "512", "256", "", "i", "32", "16", "8", "4",
1962 SSEPackedInt, HasAVX512>,
1963 PD, EVEX_CD8<32, CD8VF>;
1965 defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64",
1966 "8", "4", "2", SSEPackedInt, HasAVX512>,
1967 avx512_store_vl<0x7F, "vmovdqa64", "alignedstore",
1968 "512", "256", "", "i", "64", "8", "4", "2",
1969 SSEPackedInt, HasAVX512>,
1970 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1972 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8",
1973 "64", "32", "16", SSEPackedInt, HasBWI>,
1974 avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "",
1975 "i", "8", "64", "32", "16", SSEPackedInt,
1976 HasBWI>, XD, EVEX_CD8<8, CD8VF>;
1978 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16",
1979 "32", "16", "8", SSEPackedInt, HasBWI>,
1980 avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "",
1981 "i", "16", "32", "16", "8", SSEPackedInt,
1982 HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
1984 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32",
1985 "16", "8", "4", SSEPackedInt, HasAVX512>,
1986 avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "",
1987 "i", "32", "16", "8", "4", SSEPackedInt,
1988 HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
1990 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64",
1991 "8", "4", "2", SSEPackedInt, HasAVX512>,
1992 avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "",
1993 "i", "64", "8", "4", "2", SSEPackedInt,
1994 HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
1996 def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
1997 (v16i32 immAllZerosV), GR16:$mask)),
1998 (VMOVDQU32Zrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
2000 def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
2001 (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
2002 (VMOVDQU64Zrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
2004 def: Pat<(int_x86_avx512_mask_storeu_d_512 addr:$ptr, (v16i32 VR512:$src),
2006 (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
2008 def: Pat<(int_x86_avx512_mask_storeu_q_512 addr:$ptr, (v8i64 VR512:$src),
2010 (VMOVDQU64Zmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
2013 let AddedComplexity = 20 in {
2014 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
2015 (bc_v8i64 (v16i32 immAllZerosV)))),
2016 (VMOVDQU64Zrrkz VK8WM:$mask, VR512:$src)>;
2018 def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
2019 (v8i64 VR512:$src))),
2020 (VMOVDQU64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
2023 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
2024 (v16i32 immAllZerosV))),
2025 (VMOVDQU32Zrrkz VK16WM:$mask, VR512:$src)>;
2027 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
2028 (v16i32 VR512:$src))),
2029 (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
2032 // Move Int Doubleword to Packed Double Int
2034 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
2035 "vmovd\t{$src, $dst|$dst, $src}",
2037 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
2039 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
2040 "vmovd\t{$src, $dst|$dst, $src}",
2042 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
2043 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2044 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
2045 "vmovq\t{$src, $dst|$dst, $src}",
2047 (v2i64 (scalar_to_vector GR64:$src)))],
2048 IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
2049 let isCodeGenOnly = 1 in {
2050 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
2051 "vmovq\t{$src, $dst|$dst, $src}",
2052 [(set FR64:$dst, (bitconvert GR64:$src))],
2053 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
2054 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
2055 "vmovq\t{$src, $dst|$dst, $src}",
2056 [(set GR64:$dst, (bitconvert FR64:$src))],
2057 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
2059 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
2060 "vmovq\t{$src, $dst|$dst, $src}",
2061 [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
2062 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
2063 EVEX_CD8<64, CD8VT1>;
2065 // Move Int Doubleword to Single Scalar
2067 let isCodeGenOnly = 1 in {
2068 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
2069 "vmovd\t{$src, $dst|$dst, $src}",
2070 [(set FR32X:$dst, (bitconvert GR32:$src))],
2071 IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
2073 def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
2074 "vmovd\t{$src, $dst|$dst, $src}",
2075 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
2076 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2079 // Move doubleword from xmm register to r/m32
2081 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
2082 "vmovd\t{$src, $dst|$dst, $src}",
2083 [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
2084 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
2086 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
2087 (ins i32mem:$dst, VR128X:$src),
2088 "vmovd\t{$src, $dst|$dst, $src}",
2089 [(store (i32 (vector_extract (v4i32 VR128X:$src),
2090 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
2091 EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2093 // Move quadword from xmm1 register to r/m64
2095 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
2096 "vmovq\t{$src, $dst|$dst, $src}",
2097 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
2099 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W,
2100 Requires<[HasAVX512, In64BitMode]>;
2102 def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
2103 (ins i64mem:$dst, VR128X:$src),
2104 "vmovq\t{$src, $dst|$dst, $src}",
2105 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
2106 addr:$dst)], IIC_SSE_MOVDQ>,
2107 EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>,
2108 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
2110 // Move Scalar Single to Double Int
2112 let isCodeGenOnly = 1 in {
2113 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
2115 "vmovd\t{$src, $dst|$dst, $src}",
2116 [(set GR32:$dst, (bitconvert FR32X:$src))],
2117 IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
2118 def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
2119 (ins i32mem:$dst, FR32X:$src),
2120 "vmovd\t{$src, $dst|$dst, $src}",
2121 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
2122 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2125 // Move Quadword Int to Packed Quadword Int
2127 def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
2129 "vmovq\t{$src, $dst|$dst, $src}",
2131 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
2132 EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
2134 //===----------------------------------------------------------------------===//
2135 // AVX-512 MOVSS, MOVSD
2136 //===----------------------------------------------------------------------===//
2138 multiclass avx512_move_scalar <string asm, RegisterClass RC,
2139 SDNode OpNode, ValueType vt,
2140 X86MemOperand x86memop, PatFrag mem_pat> {
2141 let hasSideEffects = 0 in {
2142 def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
2143 !strconcat(asm, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2144 [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
2145 (scalar_to_vector RC:$src2))))],
2146 IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
2147 let Constraints = "$src1 = $dst" in
2148 def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
2149 (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
2151 " \t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
2152 [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
2153 def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
2154 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
2155 [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
2157 let mayStore = 1 in {
2158 def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
2159 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
2160 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
2162 def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src),
2163 !strconcat(asm, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
2164 [], IIC_SSE_MOV_S_MR>,
2165 EVEX, VEX_LIG, EVEX_K;
2167 } //hasSideEffects = 0
2170 let ExeDomain = SSEPackedSingle in
2171 defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem,
2172 loadf32>, XS, EVEX_CD8<32, CD8VT1>;
2174 let ExeDomain = SSEPackedDouble in
2175 defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
2176 loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
2178 def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
2179 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
2180 VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
2182 def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
2183 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
2184 VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
2186 def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
2187 (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
2188 (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2190 // For the disassembler
2191 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
2192 def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2193 (ins VR128X:$src1, FR32X:$src2),
2194 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
2196 XS, EVEX_4V, VEX_LIG;
2197 def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2198 (ins VR128X:$src1, FR64X:$src2),
2199 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
2201 XD, EVEX_4V, VEX_LIG, VEX_W;
2204 let Predicates = [HasAVX512] in {
2205 let AddedComplexity = 15 in {
2206 // Move scalar to XMM zero-extended, zeroing a VR128X then do a
2207 // MOVS{S,D} to the lower bits.
2208 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
2209 (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
2210 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
2211 (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2212 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
2213 (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2214 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
2215 (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
2217 // Move low f32 and clear high bits.
2218 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
2219 (SUBREG_TO_REG (i32 0),
2220 (VMOVSSZrr (v4f32 (V_SET0)),
2221 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
2222 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
2223 (SUBREG_TO_REG (i32 0),
2224 (VMOVSSZrr (v4i32 (V_SET0)),
2225 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
2228 let AddedComplexity = 20 in {
2229 // MOVSSrm zeros the high parts of the register; represent this
2230 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2231 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
2232 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2233 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
2234 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2235 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
2236 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2238 // MOVSDrm zeros the high parts of the register; represent this
2239 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2240 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
2241 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2242 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
2243 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2244 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
2245 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2246 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
2247 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2248 def : Pat<(v2f64 (X86vzload addr:$src)),
2249 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2251 // Represent the same patterns above but in the form they appear for
2253 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2254 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
2255 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
2256 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2257 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
2258 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
2259 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2260 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
2261 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
2263 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2264 (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
2265 (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
2266 FR32X:$src)), sub_xmm)>;
2267 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2268 (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
2269 (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
2270 FR64X:$src)), sub_xmm)>;
2271 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2272 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
2273 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
2275 // Move low f64 and clear high bits.
2276 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
2277 (SUBREG_TO_REG (i32 0),
2278 (VMOVSDZrr (v2f64 (V_SET0)),
2279 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
2281 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
2282 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
2283 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
2285 // Extract and store.
2286 def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))),
2288 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
2289 def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))),
2291 (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
2293 // Shuffle with VMOVSS
2294 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
2295 (VMOVSSZrr (v4i32 VR128X:$src1),
2296 (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
2297 def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
2298 (VMOVSSZrr (v4f32 VR128X:$src1),
2299 (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
2302 def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
2303 (SUBREG_TO_REG (i32 0),
2304 (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
2305 (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
2307 def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
2308 (SUBREG_TO_REG (i32 0),
2309 (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
2310 (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
2313 // Shuffle with VMOVSD
2314 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2315 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2316 def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2317 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2318 def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2319 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2320 def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2321 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2324 def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2325 (SUBREG_TO_REG (i32 0),
2326 (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
2327 (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
2329 def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2330 (SUBREG_TO_REG (i32 0),
2331 (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
2332 (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
2335 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2336 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2337 def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2338 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2339 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2340 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2341 def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2342 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2345 let AddedComplexity = 15 in
2346 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
2348 "vmovq\t{$src, $dst|$dst, $src}",
2349 [(set VR128X:$dst, (v2i64 (X86vzmovl
2350 (v2i64 VR128X:$src))))],
2351 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
2353 let AddedComplexity = 20 in
2354 def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
2356 "vmovq\t{$src, $dst|$dst, $src}",
2357 [(set VR128X:$dst, (v2i64 (X86vzmovl
2358 (loadv2i64 addr:$src))))],
2359 IIC_SSE_MOVDQ>, EVEX, VEX_W,
2360 EVEX_CD8<8, CD8VT8>;
2362 let Predicates = [HasAVX512] in {
2363 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
2364 let AddedComplexity = 20 in {
2365 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
2366 (VMOVDI2PDIZrm addr:$src)>;
2367 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
2368 (VMOV64toPQIZrr GR64:$src)>;
2369 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
2370 (VMOVDI2PDIZrr GR32:$src)>;
2372 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
2373 (VMOVDI2PDIZrm addr:$src)>;
2374 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
2375 (VMOVDI2PDIZrm addr:$src)>;
2376 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
2377 (VMOVZPQILo2PQIZrm addr:$src)>;
2378 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
2379 (VMOVZPQILo2PQIZrr VR128X:$src)>;
2380 def : Pat<(v2i64 (X86vzload addr:$src)),
2381 (VMOVZPQILo2PQIZrm addr:$src)>;
2384 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
2385 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2386 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
2387 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
2388 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2389 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
2390 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
2393 def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
2394 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2396 def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
2397 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2399 def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
2400 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2402 def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
2403 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2405 //===----------------------------------------------------------------------===//
2406 // AVX-512 - Non-temporals
2407 //===----------------------------------------------------------------------===//
2408 let SchedRW = [WriteLoad] in {
2409 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
2410 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
2411 [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
2412 SSEPackedInt>, EVEX, T8PD, EVEX_V512,
2413 EVEX_CD8<64, CD8VF>;
2415 let Predicates = [HasAVX512, HasVLX] in {
2416 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
2418 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2419 SSEPackedInt>, EVEX, T8PD, EVEX_V256,
2420 EVEX_CD8<64, CD8VF>;
2422 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
2424 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2425 SSEPackedInt>, EVEX, T8PD, EVEX_V128,
2426 EVEX_CD8<64, CD8VF>;
2430 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2431 ValueType OpVT, RegisterClass RC, X86MemOperand memop,
2432 Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
2433 let SchedRW = [WriteStore], mayStore = 1,
2434 AddedComplexity = 400 in
2435 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
2436 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2437 [(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
2440 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2441 string elty, string elsz, string vsz512,
2442 string vsz256, string vsz128, Domain d,
2443 Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
2444 let Predicates = [prd] in
2445 defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
2446 !cast<ValueType>("v"##vsz512##elty##elsz), VR512,
2447 !cast<X86MemOperand>(elty##"512mem"), d, itin>,
2450 let Predicates = [prd, HasVLX] in {
2451 defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
2452 !cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
2453 !cast<X86MemOperand>(elty##"256mem"), d, itin>,
2456 defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
2457 !cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
2458 !cast<X86MemOperand>(elty##"128mem"), d, itin>,
2463 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
2464 "i", "64", "8", "4", "2", SSEPackedInt,
2465 HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
2467 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
2468 "f", "64", "8", "4", "2", SSEPackedDouble,
2469 HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2471 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
2472 "f", "32", "16", "8", "4", SSEPackedSingle,
2473 HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
2475 //===----------------------------------------------------------------------===//
2476 // AVX-512 - Integer arithmetic
2478 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
2479 ValueType OpVT, RegisterClass KRC,
2480 RegisterClass RC, PatFrag memop_frag,
2481 X86MemOperand x86memop, PatFrag scalar_mfrag,
2482 X86MemOperand x86scalar_mop, string BrdcstStr,
2483 OpndItins itins, bit IsCommutable = 0> {
2484 let isCommutable = IsCommutable in
2485 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2486 (ins RC:$src1, RC:$src2),
2487 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2488 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2490 let AddedComplexity = 30 in {
2491 let Constraints = "$src0 = $dst" in
2492 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2493 (ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2),
2494 !strconcat(OpcodeStr,
2495 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2496 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2497 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2499 itins.rr>, EVEX_4V, EVEX_K;
2500 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2501 (ins KRC:$mask, RC:$src1, RC:$src2),
2502 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2503 "|$dst {${mask}} {z}, $src1, $src2}"),
2504 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2505 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2506 (OpVT immAllZerosV))))],
2507 itins.rr>, EVEX_4V, EVEX_KZ;
2510 let mayLoad = 1 in {
2511 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2512 (ins RC:$src1, x86memop:$src2),
2513 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2514 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
2516 let AddedComplexity = 30 in {
2517 let Constraints = "$src0 = $dst" in
2518 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2519 (ins RC:$src0, KRC:$mask, RC:$src1, x86memop:$src2),
2520 !strconcat(OpcodeStr,
2521 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2522 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2523 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2525 itins.rm>, EVEX_4V, EVEX_K;
2526 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2527 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2528 !strconcat(OpcodeStr,
2529 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2530 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2531 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2532 (OpVT immAllZerosV))))],
2533 itins.rm>, EVEX_4V, EVEX_KZ;
2535 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2536 (ins RC:$src1, x86scalar_mop:$src2),
2537 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2538 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2539 [(set RC:$dst, (OpNode RC:$src1,
2540 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
2541 itins.rm>, EVEX_4V, EVEX_B;
2542 let AddedComplexity = 30 in {
2543 let Constraints = "$src0 = $dst" in
2544 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2545 (ins RC:$src0, KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2546 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2547 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2549 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2550 (OpNode (OpVT RC:$src1),
2551 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2553 itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2554 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2555 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2556 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2557 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2559 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2560 (OpNode (OpVT RC:$src1),
2561 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2562 (OpVT immAllZerosV))))],
2563 itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2568 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
2569 ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
2570 PatFrag memop_frag, X86MemOperand x86memop,
2571 PatFrag scalar_mfrag, X86MemOperand x86scalar_mop,
2572 string BrdcstStr, OpndItins itins, bit IsCommutable = 0> {
2573 let isCommutable = IsCommutable in
2575 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2576 (ins RC:$src1, RC:$src2),
2577 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2579 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2580 (ins KRC:$mask, RC:$src1, RC:$src2),
2581 !strconcat(OpcodeStr,
2582 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2583 [], itins.rr>, EVEX_4V, EVEX_K;
2584 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2585 (ins KRC:$mask, RC:$src1, RC:$src2),
2586 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2587 "|$dst {${mask}} {z}, $src1, $src2}"),
2588 [], itins.rr>, EVEX_4V, EVEX_KZ;
2590 let mayLoad = 1 in {
2591 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2592 (ins RC:$src1, x86memop:$src2),
2593 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2595 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2596 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2597 !strconcat(OpcodeStr,
2598 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2599 [], itins.rm>, EVEX_4V, EVEX_K;
2600 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2601 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2602 !strconcat(OpcodeStr,
2603 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2604 [], itins.rm>, EVEX_4V, EVEX_KZ;
2605 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2606 (ins RC:$src1, x86scalar_mop:$src2),
2607 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2608 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2609 [], itins.rm>, EVEX_4V, EVEX_B;
2610 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2611 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2612 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2613 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2615 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2616 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2617 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2618 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2619 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2621 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2625 defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VK16WM, VR512,
2626 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2627 SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
2629 defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VK16WM, VR512,
2630 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2631 SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
2633 defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VK16WM, VR512,
2634 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2635 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2637 defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VK8WM, VR512,
2638 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2639 SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
2641 defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VK8WM, VR512,
2642 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2643 SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2645 defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
2646 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2647 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512,
2648 EVEX_CD8<64, CD8VF>, VEX_W;
2650 defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512,
2651 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2652 SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
2654 def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
2655 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2657 def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1),
2658 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2659 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2660 def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
2661 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2662 (VPMULDQZrr VR512:$src1, VR512:$src2)>;
2664 defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VK16WM, VR512,
2665 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2666 SSE_INTALU_ITINS_P, 1>,
2667 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2668 defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VK8WM, VR512,
2669 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2670 SSE_INTALU_ITINS_P, 0>,
2671 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2673 defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VK16WM, VR512,
2674 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2675 SSE_INTALU_ITINS_P, 1>,
2676 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2677 defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VK8WM, VR512,
2678 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2679 SSE_INTALU_ITINS_P, 0>,
2680 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2682 defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VK16WM, VR512,
2683 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2684 SSE_INTALU_ITINS_P, 1>,
2685 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2686 defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VK8WM, VR512,
2687 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2688 SSE_INTALU_ITINS_P, 0>,
2689 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2691 defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VK16WM, VR512,
2692 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2693 SSE_INTALU_ITINS_P, 1>,
2694 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2695 defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VK8WM, VR512,
2696 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2697 SSE_INTALU_ITINS_P, 0>,
2698 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2700 def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1),
2701 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2702 (VPMAXSDZrr VR512:$src1, VR512:$src2)>;
2703 def : Pat <(v16i32 (int_x86_avx512_mask_pmaxu_d_512 (v16i32 VR512:$src1),
2704 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2705 (VPMAXUDZrr VR512:$src1, VR512:$src2)>;
2706 def : Pat <(v8i64 (int_x86_avx512_mask_pmaxs_q_512 (v8i64 VR512:$src1),
2707 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2708 (VPMAXSQZrr VR512:$src1, VR512:$src2)>;
2709 def : Pat <(v8i64 (int_x86_avx512_mask_pmaxu_q_512 (v8i64 VR512:$src1),
2710 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2711 (VPMAXUQZrr VR512:$src1, VR512:$src2)>;
2712 def : Pat <(v16i32 (int_x86_avx512_mask_pmins_d_512 (v16i32 VR512:$src1),
2713 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2714 (VPMINSDZrr VR512:$src1, VR512:$src2)>;
2715 def : Pat <(v16i32 (int_x86_avx512_mask_pminu_d_512 (v16i32 VR512:$src1),
2716 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2717 (VPMINUDZrr VR512:$src1, VR512:$src2)>;
2718 def : Pat <(v8i64 (int_x86_avx512_mask_pmins_q_512 (v8i64 VR512:$src1),
2719 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2720 (VPMINSQZrr VR512:$src1, VR512:$src2)>;
2721 def : Pat <(v8i64 (int_x86_avx512_mask_pminu_q_512 (v8i64 VR512:$src1),
2722 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2723 (VPMINUQZrr VR512:$src1, VR512:$src2)>;
2724 //===----------------------------------------------------------------------===//
2725 // AVX-512 - Unpack Instructions
2726 //===----------------------------------------------------------------------===//
2728 multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
2729 PatFrag mem_frag, RegisterClass RC,
2730 X86MemOperand x86memop, string asm,
2732 def rr : AVX512PI<opc, MRMSrcReg,
2733 (outs RC:$dst), (ins RC:$src1, RC:$src2),
2735 (vt (OpNode RC:$src1, RC:$src2)))],
2737 def rm : AVX512PI<opc, MRMSrcMem,
2738 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2740 (vt (OpNode RC:$src1,
2741 (bitconvert (mem_frag addr:$src2)))))],
2745 defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64,
2746 VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2747 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
2748 defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64,
2749 VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2750 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2751 defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64,
2752 VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2753 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
2754 defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64,
2755 VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2756 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2758 multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
2759 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
2760 X86MemOperand x86memop> {
2761 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2762 (ins RC:$src1, RC:$src2),
2763 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2764 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2765 IIC_SSE_UNPCK>, EVEX_4V;
2766 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2767 (ins RC:$src1, x86memop:$src2),
2768 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2769 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
2770 (bitconvert (memop_frag addr:$src2)))))],
2771 IIC_SSE_UNPCK>, EVEX_4V;
2773 defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
2774 VR512, memopv16i32, i512mem>, EVEX_V512,
2775 EVEX_CD8<32, CD8VF>;
2776 defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
2777 VR512, memopv8i64, i512mem>, EVEX_V512,
2778 VEX_W, EVEX_CD8<64, CD8VF>;
2779 defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
2780 VR512, memopv16i32, i512mem>, EVEX_V512,
2781 EVEX_CD8<32, CD8VF>;
2782 defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
2783 VR512, memopv8i64, i512mem>, EVEX_V512,
2784 VEX_W, EVEX_CD8<64, CD8VF>;
2785 //===----------------------------------------------------------------------===//
2789 multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
2790 SDNode OpNode, PatFrag mem_frag,
2791 X86MemOperand x86memop, ValueType OpVT> {
2792 def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
2793 (ins RC:$src1, i8imm:$src2),
2794 !strconcat(OpcodeStr,
2795 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2797 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
2799 def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
2800 (ins x86memop:$src1, i8imm:$src2),
2801 !strconcat(OpcodeStr,
2802 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2804 (OpVT (OpNode (mem_frag addr:$src1),
2805 (i8 imm:$src2))))]>, EVEX;
2808 defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
2809 i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2811 let ExeDomain = SSEPackedSingle in
2812 defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilpi,
2813 memopv16f32, i512mem, v16f32>, TAPD, EVEX_V512,
2814 EVEX_CD8<32, CD8VF>;
2815 let ExeDomain = SSEPackedDouble in
2816 defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilpi,
2817 memopv8f64, i512mem, v8f64>, TAPD, EVEX_V512,
2818 VEX_W, EVEX_CD8<32, CD8VF>;
2820 def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
2821 (VPERMILPSZri VR512:$src1, imm:$imm)>;
2822 def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
2823 (VPERMILPDZri VR512:$src1, imm:$imm)>;
2825 //===----------------------------------------------------------------------===//
2826 // AVX-512 Logical Instructions
2827 //===----------------------------------------------------------------------===//
2829 defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VK16WM, VR512, memopv16i32,
2830 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2831 EVEX_V512, EVEX_CD8<32, CD8VF>;
2832 defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VK8WM, VR512, memopv8i64,
2833 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2834 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2835 defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VK16WM, VR512, memopv16i32,
2836 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2837 EVEX_V512, EVEX_CD8<32, CD8VF>;
2838 defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VK8WM, VR512, memopv8i64,
2839 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2840 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2841 defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VK16WM, VR512, memopv16i32,
2842 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2843 EVEX_V512, EVEX_CD8<32, CD8VF>;
2844 defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VK8WM, VR512, memopv8i64,
2845 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2846 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2847 defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VK16WM, VR512,
2848 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2849 SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
2850 defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VK8WM, VR512,
2851 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2852 SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2854 //===----------------------------------------------------------------------===//
2855 // AVX-512 FP arithmetic
2856 //===----------------------------------------------------------------------===//
2858 multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
2860 defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32X,
2861 f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
2862 EVEX_CD8<32, CD8VT1>;
2863 defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64X,
2864 f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
2865 EVEX_CD8<64, CD8VT1>;
2868 let isCommutable = 1 in {
2869 defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
2870 defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
2871 defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
2872 defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
2874 let isCommutable = 0 in {
2875 defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
2876 defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
2879 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
2881 RegisterClass RC, ValueType vt,
2882 X86MemOperand x86memop, PatFrag mem_frag,
2883 X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
2885 Domain d, OpndItins itins, bit commutable> {
2886 let isCommutable = commutable in {
2887 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
2888 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2889 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
2892 def rrk: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2893 !strconcat(OpcodeStr,
2894 " \t{$src2, $src1, $dst {${mask}} |$dst {${mask}}, $src1, $src2}"),
2895 [], itins.rr, d>, EVEX_4V, EVEX_K;
2897 def rrkz: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2898 !strconcat(OpcodeStr,
2899 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2900 [], itins.rr, d>, EVEX_4V, EVEX_KZ;
2903 let mayLoad = 1 in {
2904 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2905 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2906 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
2907 itins.rm, d>, EVEX_4V;
2909 def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
2910 (ins RC:$src1, x86scalar_mop:$src2),
2911 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2912 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2913 [(set RC:$dst, (OpNode RC:$src1,
2914 (vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
2915 itins.rm, d>, EVEX_4V, EVEX_B;
2917 def rmk : PI<opc, MRMSrcMem, (outs RC:$dst),
2918 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2919 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2920 [], itins.rm, d>, EVEX_4V, EVEX_K;
2922 def rmkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2923 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2924 "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2925 [], itins.rm, d>, EVEX_4V, EVEX_KZ;
2927 def rmbk : PI<opc, MRMSrcMem, (outs RC:$dst),
2928 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2929 " \t{${src2}", BrdcstStr,
2930 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}", BrdcstStr, "}"),
2931 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_K;
2933 def rmbkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2934 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2935 " \t{${src2}", BrdcstStr,
2936 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2938 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_KZ;
2942 defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VK16WM, VR512, v16f32, f512mem,
2943 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2944 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2946 defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VK8WM, VR512, v8f64, f512mem,
2947 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2948 SSE_ALU_ITINS_P.d, 1>,
2949 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2951 defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VK16WM, VR512, v16f32, f512mem,
2952 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2953 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2954 defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VK8WM, VR512, v8f64, f512mem,
2955 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2956 SSE_ALU_ITINS_P.d, 1>,
2957 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2959 defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VK16WM, VR512, v16f32, f512mem,
2960 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2961 SSE_ALU_ITINS_P.s, 1>,
2962 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2963 defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VK16WM, VR512, v16f32, f512mem,
2964 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2965 SSE_ALU_ITINS_P.s, 1>,
2966 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2968 defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VK8WM, VR512, v8f64, f512mem,
2969 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2970 SSE_ALU_ITINS_P.d, 1>,
2971 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2972 defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VK8WM, VR512, v8f64, f512mem,
2973 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2974 SSE_ALU_ITINS_P.d, 1>,
2975 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2977 defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VK16WM, VR512, v16f32, f512mem,
2978 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2979 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2980 defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VK16WM, VR512, v16f32, f512mem,
2981 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2982 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2984 defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VK8WM, VR512, v8f64, f512mem,
2985 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2986 SSE_ALU_ITINS_P.d, 0>,
2987 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2988 defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VK8WM, VR512, v8f64, f512mem,
2989 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2990 SSE_ALU_ITINS_P.d, 0>,
2991 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2993 def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
2994 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
2995 (i16 -1), FROUND_CURRENT)),
2996 (VMAXPSZrr VR512:$src1, VR512:$src2)>;
2998 def : Pat<(v8f64 (int_x86_avx512_mask_max_pd_512 (v8f64 VR512:$src1),
2999 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
3000 (i8 -1), FROUND_CURRENT)),
3001 (VMAXPDZrr VR512:$src1, VR512:$src2)>;
3003 def : Pat<(v16f32 (int_x86_avx512_mask_min_ps_512 (v16f32 VR512:$src1),
3004 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
3005 (i16 -1), FROUND_CURRENT)),
3006 (VMINPSZrr VR512:$src1, VR512:$src2)>;
3008 def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
3009 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
3010 (i8 -1), FROUND_CURRENT)),
3011 (VMINPDZrr VR512:$src1, VR512:$src2)>;
3012 //===----------------------------------------------------------------------===//
3013 // AVX-512 VPTESTM instructions
3014 //===----------------------------------------------------------------------===//
3016 multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3017 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
3018 SDNode OpNode, ValueType vt> {
3019 def rr : AVX512PI<opc, MRMSrcReg,
3020 (outs KRC:$dst), (ins RC:$src1, RC:$src2),
3021 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3022 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
3023 SSEPackedInt>, EVEX_4V;
3024 def rm : AVX512PI<opc, MRMSrcMem,
3025 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
3026 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3027 [(set KRC:$dst, (OpNode (vt RC:$src1),
3028 (bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
3031 defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
3032 memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512,
3033 EVEX_CD8<32, CD8VF>;
3034 defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
3035 memopv8i64, X86testm, v8i64>, T8PD, EVEX_V512, VEX_W,
3036 EVEX_CD8<64, CD8VF>;
3038 let Predicates = [HasCDI] in {
3039 defm VPTESTNMDZ : avx512_vptest<0x27, "vptestnmd", VK16, VR512, f512mem,
3040 memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512,
3041 EVEX_CD8<32, CD8VF>;
3042 defm VPTESTNMQZ : avx512_vptest<0x27, "vptestnmq", VK8, VR512, f512mem,
3043 memopv8i64, X86testnm, v8i64>, T8XS, EVEX_V512, VEX_W,
3044 EVEX_CD8<64, CD8VF>;
3047 def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
3048 (v16i32 VR512:$src2), (i16 -1))),
3049 (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
3051 def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
3052 (v8i64 VR512:$src2), (i8 -1))),
3053 (COPY_TO_REGCLASS (VPTESTMQZrr VR512:$src1, VR512:$src2), GR8)>;
3054 //===----------------------------------------------------------------------===//
3055 // AVX-512 Shift instructions
3056 //===----------------------------------------------------------------------===//
3057 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
3058 string OpcodeStr, SDNode OpNode, RegisterClass RC,
3059 ValueType vt, X86MemOperand x86memop, PatFrag mem_frag,
3060 RegisterClass KRC> {
3061 def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
3062 (ins RC:$src1, i8imm:$src2),
3063 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3064 [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))],
3065 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3066 def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
3067 (ins KRC:$mask, RC:$src1, i8imm:$src2),
3068 !strconcat(OpcodeStr,
3069 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
3070 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3071 def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
3072 (ins x86memop:$src1, i8imm:$src2),
3073 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3074 [(set RC:$dst, (OpNode (mem_frag addr:$src1),
3075 (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
3076 def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
3077 (ins KRC:$mask, x86memop:$src1, i8imm:$src2),
3078 !strconcat(OpcodeStr,
3079 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
3080 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3083 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3084 RegisterClass RC, ValueType vt, ValueType SrcVT,
3085 PatFrag bc_frag, RegisterClass KRC> {
3086 // src2 is always 128-bit
3087 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3088 (ins RC:$src1, VR128X:$src2),
3089 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3090 [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
3091 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3092 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3093 (ins KRC:$mask, RC:$src1, VR128X:$src2),
3094 !strconcat(OpcodeStr,
3095 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
3096 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3097 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3098 (ins RC:$src1, i128mem:$src2),
3099 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3100 [(set RC:$dst, (vt (OpNode RC:$src1,
3101 (bc_frag (memopv2i64 addr:$src2)))))],
3102 SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
3103 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3104 (ins KRC:$mask, RC:$src1, i128mem:$src2),
3105 !strconcat(OpcodeStr,
3106 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
3107 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3110 defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
3111 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3112 EVEX_V512, EVEX_CD8<32, CD8VF>;
3113 defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
3114 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3115 EVEX_CD8<32, CD8VQ>;
3117 defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
3118 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3119 EVEX_CD8<64, CD8VF>, VEX_W;
3120 defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
3121 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3122 EVEX_CD8<64, CD8VQ>, VEX_W;
3124 defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
3125 VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512,
3126 EVEX_CD8<32, CD8VF>;
3127 defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
3128 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3129 EVEX_CD8<32, CD8VQ>;
3131 defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
3132 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3133 EVEX_CD8<64, CD8VF>, VEX_W;
3134 defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
3135 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3136 EVEX_CD8<64, CD8VQ>, VEX_W;
3138 defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
3139 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3140 EVEX_V512, EVEX_CD8<32, CD8VF>;
3141 defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
3142 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3143 EVEX_CD8<32, CD8VQ>;
3145 defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
3146 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3147 EVEX_CD8<64, CD8VF>, VEX_W;
3148 defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
3149 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3150 EVEX_CD8<64, CD8VQ>, VEX_W;
3152 //===-------------------------------------------------------------------===//
3153 // Variable Bit Shifts
3154 //===-------------------------------------------------------------------===//
3155 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
3156 RegisterClass RC, ValueType vt,
3157 X86MemOperand x86memop, PatFrag mem_frag> {
3158 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3159 (ins RC:$src1, RC:$src2),
3160 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3162 (vt (OpNode RC:$src1, (vt RC:$src2))))]>,
3164 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3165 (ins RC:$src1, x86memop:$src2),
3166 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3168 (vt (OpNode RC:$src1, (mem_frag addr:$src2))))]>,
3172 defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
3173 i512mem, memopv16i32>, EVEX_V512,
3174 EVEX_CD8<32, CD8VF>;
3175 defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
3176 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3177 EVEX_CD8<64, CD8VF>;
3178 defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
3179 i512mem, memopv16i32>, EVEX_V512,
3180 EVEX_CD8<32, CD8VF>;
3181 defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
3182 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3183 EVEX_CD8<64, CD8VF>;
3184 defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
3185 i512mem, memopv16i32>, EVEX_V512,
3186 EVEX_CD8<32, CD8VF>;
3187 defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
3188 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3189 EVEX_CD8<64, CD8VF>;
3191 //===----------------------------------------------------------------------===//
3192 // AVX-512 - MOVDDUP
3193 //===----------------------------------------------------------------------===//
3195 multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
3196 X86MemOperand x86memop, PatFrag memop_frag> {
3197 def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3198 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3199 [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
3200 def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
3201 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3203 (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
3206 defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>,
3207 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3208 def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
3209 (VMOVDDUPZrm addr:$src)>;
3211 //===---------------------------------------------------------------------===//
3212 // Replicate Single FP - MOVSHDUP and MOVSLDUP
3213 //===---------------------------------------------------------------------===//
3214 multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
3215 ValueType vt, RegisterClass RC, PatFrag mem_frag,
3216 X86MemOperand x86memop> {
3217 def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3218 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3219 [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
3221 def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
3222 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3223 [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
3226 defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
3227 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3228 EVEX_CD8<32, CD8VF>;
3229 defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
3230 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3231 EVEX_CD8<32, CD8VF>;
3233 def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
3234 def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))),
3235 (VMOVSHDUPZrm addr:$src)>;
3236 def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
3237 def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))),
3238 (VMOVSLDUPZrm addr:$src)>;
3240 //===----------------------------------------------------------------------===//
3241 // Move Low to High and High to Low packed FP Instructions
3242 //===----------------------------------------------------------------------===//
3243 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
3244 (ins VR128X:$src1, VR128X:$src2),
3245 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3246 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
3247 IIC_SSE_MOV_LH>, EVEX_4V;
3248 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
3249 (ins VR128X:$src1, VR128X:$src2),
3250 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3251 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
3252 IIC_SSE_MOV_LH>, EVEX_4V;
3254 let Predicates = [HasAVX512] in {
3256 def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3257 (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
3258 def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3259 (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
3262 def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
3263 (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
3266 //===----------------------------------------------------------------------===//
3267 // FMA - Fused Multiply Operations
3269 let Constraints = "$src1 = $dst" in {
3270 multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
3271 RegisterClass RC, X86MemOperand x86memop,
3272 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
3273 string BrdcstStr, SDNode OpNode, ValueType OpVT,
3274 RegisterClass KRC> {
3275 defm r: AVX512_masking_3src<opc, MRMSrcReg, (outs RC:$dst),
3276 (ins RC:$src2, RC:$src3),
3277 OpcodeStr, "$src3, $src2", "$src2, $src3",
3278 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)), OpVT, RC, KRC>,
3282 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3283 (ins RC:$src1, RC:$src2, x86memop:$src3),
3284 !strconcat(OpcodeStr, " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3285 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
3286 (mem_frag addr:$src3))))]>;
3287 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3288 (ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
3289 !strconcat(OpcodeStr, " \t{${src3}", BrdcstStr,
3290 ", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
3291 [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
3292 (OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
3294 } // Constraints = "$src1 = $dst"
3296 let ExeDomain = SSEPackedSingle in {
3297 defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
3298 memopv16f32, f32mem, loadf32, "{1to16}",
3299 X86Fmadd, v16f32, VK16WM>, EVEX_V512,
3300 EVEX_CD8<32, CD8VF>;
3301 defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
3302 memopv16f32, f32mem, loadf32, "{1to16}",
3303 X86Fmsub, v16f32, VK16WM>, EVEX_V512,
3304 EVEX_CD8<32, CD8VF>;
3305 defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
3306 memopv16f32, f32mem, loadf32, "{1to16}",
3307 X86Fmaddsub, v16f32, VK16WM>,
3308 EVEX_V512, EVEX_CD8<32, CD8VF>;
3309 defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
3310 memopv16f32, f32mem, loadf32, "{1to16}",
3311 X86Fmsubadd, v16f32, VK16WM>,
3312 EVEX_V512, EVEX_CD8<32, CD8VF>;
3313 defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
3314 memopv16f32, f32mem, loadf32, "{1to16}",
3315 X86Fnmadd, v16f32, VK16WM>, EVEX_V512,
3316 EVEX_CD8<32, CD8VF>;
3317 defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
3318 memopv16f32, f32mem, loadf32, "{1to16}",
3319 X86Fnmsub, v16f32, VK16WM>, EVEX_V512,
3320 EVEX_CD8<32, CD8VF>;
3322 let ExeDomain = SSEPackedDouble in {
3323 defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
3324 memopv8f64, f64mem, loadf64, "{1to8}",
3325 X86Fmadd, v8f64, VK8WM>, EVEX_V512,
3326 VEX_W, EVEX_CD8<64, CD8VF>;
3327 defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
3328 memopv8f64, f64mem, loadf64, "{1to8}",
3329 X86Fmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
3330 EVEX_CD8<64, CD8VF>;
3331 defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
3332 memopv8f64, f64mem, loadf64, "{1to8}",
3333 X86Fmaddsub, v8f64, VK8WM>,
3334 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
3335 defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
3336 memopv8f64, f64mem, loadf64, "{1to8}",
3337 X86Fmsubadd, v8f64, VK8WM>,
3338 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
3339 defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
3340 memopv8f64, f64mem, loadf64, "{1to8}",
3341 X86Fnmadd, v8f64, VK8WM>, EVEX_V512, VEX_W,
3342 EVEX_CD8<64, CD8VF>;
3343 defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
3344 memopv8f64, f64mem, loadf64, "{1to8}",
3345 X86Fnmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
3346 EVEX_CD8<64, CD8VF>;
3349 let Constraints = "$src1 = $dst" in {
3350 multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
3351 RegisterClass RC, X86MemOperand x86memop,
3352 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
3353 string BrdcstStr, SDNode OpNode, ValueType OpVT> {
3355 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3356 (ins RC:$src1, RC:$src3, x86memop:$src2),
3357 !strconcat(OpcodeStr, " \t{$src2, $src3, $dst|$dst, $src3, $src2}"),
3358 [(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
3359 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3360 (ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
3361 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
3362 ", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
3363 [(set RC:$dst, (OpNode RC:$src1,
3364 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
3366 } // Constraints = "$src1 = $dst"
3369 let ExeDomain = SSEPackedSingle in {
3370 defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
3371 memopv16f32, f32mem, loadf32, "{1to16}",
3372 X86Fmadd, v16f32>, EVEX_V512,
3373 EVEX_CD8<32, CD8VF>;
3374 defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
3375 memopv16f32, f32mem, loadf32, "{1to16}",
3376 X86Fmsub, v16f32>, EVEX_V512,
3377 EVEX_CD8<32, CD8VF>;
3378 defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
3379 memopv16f32, f32mem, loadf32, "{1to16}",
3380 X86Fmaddsub, v16f32>,
3381 EVEX_V512, EVEX_CD8<32, CD8VF>;
3382 defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
3383 memopv16f32, f32mem, loadf32, "{1to16}",
3384 X86Fmsubadd, v16f32>,
3385 EVEX_V512, EVEX_CD8<32, CD8VF>;
3386 defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
3387 memopv16f32, f32mem, loadf32, "{1to16}",
3388 X86Fnmadd, v16f32>, EVEX_V512,
3389 EVEX_CD8<32, CD8VF>;
3390 defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
3391 memopv16f32, f32mem, loadf32, "{1to16}",
3392 X86Fnmsub, v16f32>, EVEX_V512,
3393 EVEX_CD8<32, CD8VF>;
3395 let ExeDomain = SSEPackedDouble in {
3396 defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
3397 memopv8f64, f64mem, loadf64, "{1to8}",
3398 X86Fmadd, v8f64>, EVEX_V512,
3399 VEX_W, EVEX_CD8<64, CD8VF>;
3400 defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
3401 memopv8f64, f64mem, loadf64, "{1to8}",
3402 X86Fmsub, v8f64>, EVEX_V512, VEX_W,
3403 EVEX_CD8<64, CD8VF>;
3404 defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
3405 memopv8f64, f64mem, loadf64, "{1to8}",
3406 X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
3407 EVEX_CD8<64, CD8VF>;
3408 defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
3409 memopv8f64, f64mem, loadf64, "{1to8}",
3410 X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
3411 EVEX_CD8<64, CD8VF>;
3412 defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
3413 memopv8f64, f64mem, loadf64, "{1to8}",
3414 X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
3415 EVEX_CD8<64, CD8VF>;
3416 defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
3417 memopv8f64, f64mem, loadf64, "{1to8}",
3418 X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
3419 EVEX_CD8<64, CD8VF>;
3423 let Constraints = "$src1 = $dst" in {
3424 multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3425 RegisterClass RC, ValueType OpVT,
3426 X86MemOperand x86memop, Operand memop,
3428 let isCommutable = 1 in
3429 def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
3430 (ins RC:$src1, RC:$src2, RC:$src3),
3431 !strconcat(OpcodeStr,
3432 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3434 (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
3436 def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3437 (ins RC:$src1, RC:$src2, f128mem:$src3),
3438 !strconcat(OpcodeStr,
3439 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3441 (OpVT (OpNode RC:$src2, RC:$src1,
3442 (mem_frag addr:$src3))))]>;
3445 } // Constraints = "$src1 = $dst"
3447 defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
3448 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3449 defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X,
3450 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3451 defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X,
3452 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3453 defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X,
3454 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3455 defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X,
3456 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3457 defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X,
3458 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3459 defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X,
3460 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3461 defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X,
3462 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3464 //===----------------------------------------------------------------------===//
3465 // AVX-512 Scalar convert from sign integer to float/double
3466 //===----------------------------------------------------------------------===//
3468 multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3469 X86MemOperand x86memop, string asm> {
3470 let hasSideEffects = 0 in {
3471 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
3472 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
3475 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
3476 (ins DstRC:$src1, x86memop:$src),
3477 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
3479 } // hasSideEffects = 0
3481 let Predicates = [HasAVX512] in {
3482 defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
3483 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3484 defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">,
3485 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3486 defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">,
3487 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3488 defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">,
3489 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3491 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
3492 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3493 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
3494 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3495 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
3496 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3497 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
3498 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3500 def : Pat<(f32 (sint_to_fp GR32:$src)),
3501 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3502 def : Pat<(f32 (sint_to_fp GR64:$src)),
3503 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
3504 def : Pat<(f64 (sint_to_fp GR32:$src)),
3505 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3506 def : Pat<(f64 (sint_to_fp GR64:$src)),
3507 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
3509 defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">,
3510 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3511 defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">,
3512 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3513 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">,
3514 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3515 defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">,
3516 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3518 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
3519 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3520 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
3521 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3522 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
3523 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3524 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
3525 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3527 def : Pat<(f32 (uint_to_fp GR32:$src)),
3528 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3529 def : Pat<(f32 (uint_to_fp GR64:$src)),
3530 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
3531 def : Pat<(f64 (uint_to_fp GR32:$src)),
3532 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3533 def : Pat<(f64 (uint_to_fp GR64:$src)),
3534 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
3537 //===----------------------------------------------------------------------===//
3538 // AVX-512 Scalar convert from float/double to integer
3539 //===----------------------------------------------------------------------===//
3540 multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3541 Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
3543 let hasSideEffects = 0 in {
3544 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3545 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3546 [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG,
3547 Requires<[HasAVX512]>;
3549 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
3550 !strconcat(asm," \t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG,
3551 Requires<[HasAVX512]>;
3552 } // hasSideEffects = 0
3554 let Predicates = [HasAVX512] in {
3555 // Convert float/double to signed/unsigned int 32/64
3556 defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
3557 ssmem, sse_load_f32, "cvtss2si">,
3558 XS, EVEX_CD8<32, CD8VT1>;
3559 defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
3560 ssmem, sse_load_f32, "cvtss2si">,
3561 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
3562 defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
3563 ssmem, sse_load_f32, "cvtss2usi">,
3564 XS, EVEX_CD8<32, CD8VT1>;
3565 defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3566 int_x86_avx512_cvtss2usi64, ssmem,
3567 sse_load_f32, "cvtss2usi">, XS, VEX_W,
3568 EVEX_CD8<32, CD8VT1>;
3569 defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
3570 sdmem, sse_load_f64, "cvtsd2si">,
3571 XD, EVEX_CD8<64, CD8VT1>;
3572 defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
3573 sdmem, sse_load_f64, "cvtsd2si">,
3574 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3575 defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
3576 sdmem, sse_load_f64, "cvtsd2usi">,
3577 XD, EVEX_CD8<64, CD8VT1>;
3578 defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3579 int_x86_avx512_cvtsd2usi64, sdmem,
3580 sse_load_f64, "cvtsd2usi">, XD, VEX_W,
3581 EVEX_CD8<64, CD8VT1>;
3583 let isCodeGenOnly = 1 in {
3584 defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3585 int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
3586 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3587 defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3588 int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
3589 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3590 defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3591 int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
3592 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3593 defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3594 int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
3595 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
3597 defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3598 int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}",
3599 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3600 defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3601 int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}",
3602 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3603 defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3604 int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
3605 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3606 defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3607 int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}",
3608 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
3609 } // isCodeGenOnly = 1
3611 // Convert float/double to signed/unsigned int 32/64 with truncation
3612 let isCodeGenOnly = 1 in {
3613 defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
3614 ssmem, sse_load_f32, "cvttss2si">,
3615 XS, EVEX_CD8<32, CD8VT1>;
3616 defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3617 int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
3618 "cvttss2si">, XS, VEX_W,
3619 EVEX_CD8<32, CD8VT1>;
3620 defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
3621 sdmem, sse_load_f64, "cvttsd2si">, XD,
3622 EVEX_CD8<64, CD8VT1>;
3623 defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3624 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
3625 "cvttsd2si">, XD, VEX_W,
3626 EVEX_CD8<64, CD8VT1>;
3627 defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3628 int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
3629 "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>;
3630 defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3631 int_x86_avx512_cvttss2usi64, ssmem,
3632 sse_load_f32, "cvttss2usi">, XS, VEX_W,
3633 EVEX_CD8<32, CD8VT1>;
3634 defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3635 int_x86_avx512_cvttsd2usi,
3636 sdmem, sse_load_f64, "cvttsd2usi">, XD,
3637 EVEX_CD8<64, CD8VT1>;
3638 defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3639 int_x86_avx512_cvttsd2usi64, sdmem,
3640 sse_load_f64, "cvttsd2usi">, XD, VEX_W,
3641 EVEX_CD8<64, CD8VT1>;
3642 } // isCodeGenOnly = 1
3644 multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3645 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
3647 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3648 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3649 [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
3650 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3651 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3652 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
3655 defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
3656 loadf32, "cvttss2si">, XS,
3657 EVEX_CD8<32, CD8VT1>;
3658 defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
3659 loadf32, "cvttss2usi">, XS,
3660 EVEX_CD8<32, CD8VT1>;
3661 defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
3662 loadf32, "cvttss2si">, XS, VEX_W,
3663 EVEX_CD8<32, CD8VT1>;
3664 defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
3665 loadf32, "cvttss2usi">, XS, VEX_W,
3666 EVEX_CD8<32, CD8VT1>;
3667 defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
3668 loadf64, "cvttsd2si">, XD,
3669 EVEX_CD8<64, CD8VT1>;
3670 defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
3671 loadf64, "cvttsd2usi">, XD,
3672 EVEX_CD8<64, CD8VT1>;
3673 defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
3674 loadf64, "cvttsd2si">, XD, VEX_W,
3675 EVEX_CD8<64, CD8VT1>;
3676 defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
3677 loadf64, "cvttsd2usi">, XD, VEX_W,
3678 EVEX_CD8<64, CD8VT1>;
3680 //===----------------------------------------------------------------------===//
3681 // AVX-512 Convert form float to double and back
3682 //===----------------------------------------------------------------------===//
3683 let hasSideEffects = 0 in {
3684 def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
3685 (ins FR32X:$src1, FR32X:$src2),
3686 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3687 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
3689 def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
3690 (ins FR32X:$src1, f32mem:$src2),
3691 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3692 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
3693 EVEX_CD8<32, CD8VT1>;
3695 // Convert scalar double to scalar single
3696 def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
3697 (ins FR64X:$src1, FR64X:$src2),
3698 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3699 []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
3701 def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
3702 (ins FR64X:$src1, f64mem:$src2),
3703 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3704 []>, EVEX_4V, VEX_LIG, VEX_W,
3705 Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
3708 def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
3709 Requires<[HasAVX512]>;
3710 def : Pat<(fextend (loadf32 addr:$src)),
3711 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
3713 def : Pat<(extloadf32 addr:$src),
3714 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
3715 Requires<[HasAVX512, OptForSize]>;
3717 def : Pat<(extloadf32 addr:$src),
3718 (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
3719 Requires<[HasAVX512, OptForSpeed]>;
3721 def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
3722 Requires<[HasAVX512]>;
3724 multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
3725 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3726 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3728 let hasSideEffects = 0 in {
3729 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3730 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3732 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
3733 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
3734 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
3735 [], d>, EVEX, EVEX_B, EVEX_RC;
3737 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3738 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3740 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
3741 } // hasSideEffects = 0
3744 multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
3745 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3746 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3748 let hasSideEffects = 0 in {
3749 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3750 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3752 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
3754 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3755 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3757 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
3758 } // hasSideEffects = 0
3761 defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
3762 memopv8f64, f512mem, v8f32, v8f64,
3763 SSEPackedSingle>, EVEX_V512, VEX_W, PD,
3764 EVEX_CD8<64, CD8VF>;
3766 defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
3767 memopv4f64, f256mem, v8f64, v8f32,
3768 SSEPackedDouble>, EVEX_V512, PS,
3769 EVEX_CD8<32, CD8VH>;
3770 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3771 (VCVTPS2PDZrm addr:$src)>;
3773 def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3774 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))),
3775 (VCVTPD2PSZrr VR512:$src)>;
3777 def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3778 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), imm:$rc)),
3779 (VCVTPD2PSZrrb VR512:$src, imm:$rc)>;
3781 //===----------------------------------------------------------------------===//
3782 // AVX-512 Vector convert from sign integer to float/double
3783 //===----------------------------------------------------------------------===//
3785 defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
3786 memopv8i64, i512mem, v16f32, v16i32,
3787 SSEPackedSingle>, EVEX_V512, PS,
3788 EVEX_CD8<32, CD8VF>;
3790 defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
3791 memopv4i64, i256mem, v8f64, v8i32,
3792 SSEPackedDouble>, EVEX_V512, XS,
3793 EVEX_CD8<32, CD8VH>;
3795 defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
3796 memopv16f32, f512mem, v16i32, v16f32,
3797 SSEPackedSingle>, EVEX_V512, XS,
3798 EVEX_CD8<32, CD8VF>;
3800 defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
3801 memopv8f64, f512mem, v8i32, v8f64,
3802 SSEPackedDouble>, EVEX_V512, PD, VEX_W,
3803 EVEX_CD8<64, CD8VF>;
3805 defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
3806 memopv16f32, f512mem, v16i32, v16f32,
3807 SSEPackedSingle>, EVEX_V512, PS,
3808 EVEX_CD8<32, CD8VF>;
3810 // cvttps2udq (src, 0, mask-all-ones, sae-current)
3811 def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src),
3812 (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)),
3813 (VCVTTPS2UDQZrr VR512:$src)>;
3815 defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
3816 memopv8f64, f512mem, v8i32, v8f64,
3817 SSEPackedDouble>, EVEX_V512, PS, VEX_W,
3818 EVEX_CD8<64, CD8VF>;
3820 // cvttpd2udq (src, 0, mask-all-ones, sae-current)
3821 def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
3822 (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
3823 (VCVTTPD2UDQZrr VR512:$src)>;
3825 defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
3826 memopv4i64, f256mem, v8f64, v8i32,
3827 SSEPackedDouble>, EVEX_V512, XS,
3828 EVEX_CD8<32, CD8VH>;
3830 defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
3831 memopv16i32, f512mem, v16f32, v16i32,
3832 SSEPackedSingle>, EVEX_V512, XD,
3833 EVEX_CD8<32, CD8VF>;
3835 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
3836 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3837 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3839 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
3840 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3841 (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
3843 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
3844 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3845 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3847 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
3848 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3849 (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
3851 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
3852 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
3853 (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
3855 def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
3856 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
3857 (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
3858 def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src),
3859 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3860 (VCVTDQ2PDZrr VR256X:$src)>;
3861 def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src),
3862 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
3863 (VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>;
3864 def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src),
3865 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3866 (VCVTUDQ2PDZrr VR256X:$src)>;
3868 multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC,
3869 RegisterClass DstRC, PatFrag mem_frag,
3870 X86MemOperand x86memop, Domain d> {
3871 let hasSideEffects = 0 in {
3872 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3873 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3875 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
3876 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
3877 [], d>, EVEX, EVEX_B, EVEX_RC;
3879 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3880 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3882 } // hasSideEffects = 0
3885 defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512,
3886 memopv16f32, f512mem, SSEPackedSingle>, PD,
3887 EVEX_V512, EVEX_CD8<32, CD8VF>;
3888 defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X,
3889 memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W,
3890 EVEX_V512, EVEX_CD8<64, CD8VF>;
3892 def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src),
3893 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3894 (VCVTPS2DQZrrb VR512:$src, imm:$rc)>;
3896 def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src),
3897 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3898 (VCVTPD2DQZrrb VR512:$src, imm:$rc)>;
3900 defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512,
3901 memopv16f32, f512mem, SSEPackedSingle>,
3902 PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
3903 defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X,
3904 memopv8f64, f512mem, SSEPackedDouble>, VEX_W,
3905 PS, EVEX_V512, EVEX_CD8<64, CD8VF>;
3907 def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src),
3908 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3909 (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>;
3911 def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src),
3912 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3913 (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>;
3915 let Predicates = [HasAVX512] in {
3916 def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
3917 (VCVTPD2PSZrm addr:$src)>;
3918 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3919 (VCVTPS2PDZrm addr:$src)>;
3922 //===----------------------------------------------------------------------===//
3923 // Half precision conversion instructions
3924 //===----------------------------------------------------------------------===//
3925 multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
3926 X86MemOperand x86memop> {
3927 def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
3928 "vcvtph2ps\t{$src, $dst|$dst, $src}",
3930 let hasSideEffects = 0, mayLoad = 1 in
3931 def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
3932 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
3935 multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
3936 X86MemOperand x86memop> {
3937 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
3938 (ins srcRC:$src1, i32i8imm:$src2),
3939 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}",
3941 let hasSideEffects = 0, mayStore = 1 in
3942 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
3943 (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
3944 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
3947 defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
3948 EVEX_CD8<32, CD8VH>;
3949 defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
3950 EVEX_CD8<32, CD8VH>;
3952 def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
3953 imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
3954 (VCVTPS2PHZrr VR512:$src, imm:$rc)>;
3956 def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
3957 (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
3958 (VCVTPH2PSZrr VR256X:$src)>;
3960 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
3961 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
3962 "ucomiss">, PS, EVEX, VEX_LIG,
3963 EVEX_CD8<32, CD8VT1>;
3964 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
3965 "ucomisd">, PD, EVEX,
3966 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3967 let Pattern = []<dag> in {
3968 defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
3969 "comiss">, PS, EVEX, VEX_LIG,
3970 EVEX_CD8<32, CD8VT1>;
3971 defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
3972 "comisd">, PD, EVEX,
3973 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3975 let isCodeGenOnly = 1 in {
3976 defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
3977 load, "ucomiss">, PS, EVEX, VEX_LIG,
3978 EVEX_CD8<32, CD8VT1>;
3979 defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
3980 load, "ucomisd">, PD, EVEX,
3981 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3983 defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
3984 load, "comiss">, PS, EVEX, VEX_LIG,
3985 EVEX_CD8<32, CD8VT1>;
3986 defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
3987 load, "comisd">, PD, EVEX,
3988 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3992 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
3993 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
3994 X86MemOperand x86memop> {
3995 let hasSideEffects = 0 in {
3996 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3997 (ins RC:$src1, RC:$src2),
3998 !strconcat(OpcodeStr,
3999 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
4000 let mayLoad = 1 in {
4001 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4002 (ins RC:$src1, x86memop:$src2),
4003 !strconcat(OpcodeStr,
4004 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
4009 defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>,
4010 EVEX_CD8<32, CD8VT1>;
4011 defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>,
4012 VEX_W, EVEX_CD8<64, CD8VT1>;
4013 defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>,
4014 EVEX_CD8<32, CD8VT1>;
4015 defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>,
4016 VEX_W, EVEX_CD8<64, CD8VT1>;
4018 def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1),
4019 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
4020 (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4021 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4023 def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1),
4024 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
4025 (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4026 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4028 def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1),
4029 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
4030 (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4031 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4033 def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
4034 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
4035 (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4036 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4038 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
4039 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
4040 RegisterClass RC, X86MemOperand x86memop,
4041 PatFrag mem_frag, ValueType OpVt> {
4042 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4043 !strconcat(OpcodeStr,
4044 " \t{$src, $dst|$dst, $src}"),
4045 [(set RC:$dst, (OpVt (OpNode RC:$src)))]>,
4047 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
4048 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4049 [(set RC:$dst, (OpVt (OpNode (mem_frag addr:$src))))]>,
4052 defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem,
4053 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4054 defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem,
4055 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4056 defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem,
4057 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4058 defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem,
4059 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4061 def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
4062 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
4063 (VRSQRT14PSZr VR512:$src)>;
4064 def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src),
4065 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
4066 (VRSQRT14PDZr VR512:$src)>;
4068 def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src),
4069 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
4070 (VRCP14PSZr VR512:$src)>;
4071 def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
4072 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
4073 (VRCP14PDZr VR512:$src)>;
4075 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
4076 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
4077 X86MemOperand x86memop> {
4078 let hasSideEffects = 0, Predicates = [HasERI] in {
4079 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4080 (ins RC:$src1, RC:$src2),
4081 !strconcat(OpcodeStr,
4082 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
4083 def rrb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4084 (ins RC:$src1, RC:$src2),
4085 !strconcat(OpcodeStr,
4086 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
4087 []>, EVEX_4V, EVEX_B;
4088 let mayLoad = 1 in {
4089 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4090 (ins RC:$src1, x86memop:$src2),
4091 !strconcat(OpcodeStr,
4092 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
4097 defm VRCP28SS : avx512_fp28_s<0xCB, "vrcp28ss", FR32X, f32mem>,
4098 EVEX_CD8<32, CD8VT1>;
4099 defm VRCP28SD : avx512_fp28_s<0xCB, "vrcp28sd", FR64X, f64mem>,
4100 VEX_W, EVEX_CD8<64, CD8VT1>;
4101 defm VRSQRT28SS : avx512_fp28_s<0xCD, "vrsqrt28ss", FR32X, f32mem>,
4102 EVEX_CD8<32, CD8VT1>;
4103 defm VRSQRT28SD : avx512_fp28_s<0xCD, "vrsqrt28sd", FR64X, f64mem>,
4104 VEX_W, EVEX_CD8<64, CD8VT1>;
4106 def : Pat <(v4f32 (int_x86_avx512_rcp28_ss (v4f32 VR128X:$src1),
4107 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4109 (COPY_TO_REGCLASS (VRCP28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4110 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4112 def : Pat <(v2f64 (int_x86_avx512_rcp28_sd (v2f64 VR128X:$src1),
4113 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4115 (COPY_TO_REGCLASS (VRCP28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4116 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4118 def : Pat <(v4f32 (int_x86_avx512_rsqrt28_ss (v4f32 VR128X:$src1),
4119 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4121 (COPY_TO_REGCLASS (VRSQRT28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4122 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4124 def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1),
4125 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4127 (COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4128 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4130 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
4131 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr,
4132 RegisterClass RC, X86MemOperand x86memop> {
4133 let hasSideEffects = 0, Predicates = [HasERI] in {
4134 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4135 !strconcat(OpcodeStr,
4136 " \t{$src, $dst|$dst, $src}"),
4138 def rb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4139 !strconcat(OpcodeStr,
4140 " \t{{sae}, $src, $dst|$dst, $src, {sae}}"),
4142 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
4143 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4147 defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>,
4148 EVEX_V512, EVEX_CD8<32, CD8VF>;
4149 defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>,
4150 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4151 defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>,
4152 EVEX_V512, EVEX_CD8<32, CD8VF>;
4153 defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>,
4154 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4156 def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src),
4157 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4158 (VRSQRT28PSZrb VR512:$src)>;
4159 def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src),
4160 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4161 (VRSQRT28PDZrb VR512:$src)>;
4163 def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src),
4164 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4165 (VRCP28PSZrb VR512:$src)>;
4166 def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src),
4167 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4168 (VRCP28PDZrb VR512:$src)>;
4170 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
4171 OpndItins itins_s, OpndItins itins_d> {
4172 def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
4173 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
4174 [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
4178 def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
4179 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
4181 (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
4182 itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
4184 def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
4185 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
4186 [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
4190 def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
4191 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
4192 [(set VR512:$dst, (OpNode
4193 (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
4194 itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
4198 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
4199 Intrinsic F32Int, Intrinsic F64Int,
4200 OpndItins itins_s, OpndItins itins_d> {
4201 def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
4202 (ins FR32X:$src1, FR32X:$src2),
4203 !strconcat(OpcodeStr,
4204 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4205 [], itins_s.rr>, XS, EVEX_4V;
4206 let isCodeGenOnly = 1 in
4207 def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4208 (ins VR128X:$src1, VR128X:$src2),
4209 !strconcat(OpcodeStr,
4210 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4212 (F32Int VR128X:$src1, VR128X:$src2))],
4213 itins_s.rr>, XS, EVEX_4V;
4214 let mayLoad = 1 in {
4215 def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
4216 (ins FR32X:$src1, f32mem:$src2),
4217 !strconcat(OpcodeStr,
4218 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4219 [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
4220 let isCodeGenOnly = 1 in
4221 def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4222 (ins VR128X:$src1, ssmem:$src2),
4223 !strconcat(OpcodeStr,
4224 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4226 (F32Int VR128X:$src1, sse_load_f32:$src2))],
4227 itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
4229 def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
4230 (ins FR64X:$src1, FR64X:$src2),
4231 !strconcat(OpcodeStr,
4232 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
4234 let isCodeGenOnly = 1 in
4235 def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4236 (ins VR128X:$src1, VR128X:$src2),
4237 !strconcat(OpcodeStr,
4238 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4240 (F64Int VR128X:$src1, VR128X:$src2))],
4241 itins_s.rr>, XD, EVEX_4V, VEX_W;
4242 let mayLoad = 1 in {
4243 def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
4244 (ins FR64X:$src1, f64mem:$src2),
4245 !strconcat(OpcodeStr,
4246 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
4247 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
4248 let isCodeGenOnly = 1 in
4249 def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4250 (ins VR128X:$src1, sdmem:$src2),
4251 !strconcat(OpcodeStr,
4252 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4254 (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
4255 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
4260 defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
4261 int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
4262 SSE_SQRTSS, SSE_SQRTSD>,
4263 avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
4264 SSE_SQRTPS, SSE_SQRTPD>;
4266 let Predicates = [HasAVX512] in {
4267 def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
4268 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
4269 (VSQRTPSZrr VR512:$src1)>;
4270 def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
4271 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
4272 (VSQRTPDZrr VR512:$src1)>;
4274 def : Pat<(f32 (fsqrt FR32X:$src)),
4275 (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4276 def : Pat<(f32 (fsqrt (load addr:$src))),
4277 (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
4278 Requires<[OptForSize]>;
4279 def : Pat<(f64 (fsqrt FR64X:$src)),
4280 (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
4281 def : Pat<(f64 (fsqrt (load addr:$src))),
4282 (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
4283 Requires<[OptForSize]>;
4285 def : Pat<(f32 (X86frsqrt FR32X:$src)),
4286 (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4287 def : Pat<(f32 (X86frsqrt (load addr:$src))),
4288 (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
4289 Requires<[OptForSize]>;
4291 def : Pat<(f32 (X86frcp FR32X:$src)),
4292 (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4293 def : Pat<(f32 (X86frcp (load addr:$src))),
4294 (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
4295 Requires<[OptForSize]>;
4297 def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
4298 (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
4299 (COPY_TO_REGCLASS VR128X:$src, FR32)),
4301 def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
4302 (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
4304 def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
4305 (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
4306 (COPY_TO_REGCLASS VR128X:$src, FR64)),
4308 def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
4309 (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
4313 multiclass avx512_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
4314 X86MemOperand x86memop, RegisterClass RC,
4315 PatFrag mem_frag32, PatFrag mem_frag64,
4316 Intrinsic V4F32Int, Intrinsic V2F64Int,
4318 let ExeDomain = SSEPackedSingle in {
4319 // Intrinsic operation, reg.
4320 // Vector intrinsic operation, reg
4321 def PSr : AVX512AIi8<opcps, MRMSrcReg,
4322 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4323 !strconcat(OpcodeStr,
4324 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4325 [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>;
4327 // Vector intrinsic operation, mem
4328 def PSm : AVX512AIi8<opcps, MRMSrcMem,
4329 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4330 !strconcat(OpcodeStr,
4331 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4333 (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
4334 EVEX_CD8<32, VForm>;
4335 } // ExeDomain = SSEPackedSingle
4337 let ExeDomain = SSEPackedDouble in {
4338 // Vector intrinsic operation, reg
4339 def PDr : AVX512AIi8<opcpd, MRMSrcReg,
4340 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4341 !strconcat(OpcodeStr,
4342 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4343 [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>;
4345 // Vector intrinsic operation, mem
4346 def PDm : AVX512AIi8<opcpd, MRMSrcMem,
4347 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4348 !strconcat(OpcodeStr,
4349 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4351 (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
4352 EVEX_CD8<64, VForm>;
4353 } // ExeDomain = SSEPackedDouble
4356 multiclass avx512_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
4360 let ExeDomain = GenericDomain in {
4362 let hasSideEffects = 0 in
4363 def SSr : AVX512AIi8<opcss, MRMSrcReg,
4364 (outs FR32X:$dst), (ins FR32X:$src1, FR32X:$src2, i32i8imm:$src3),
4365 !strconcat(OpcodeStr,
4366 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4369 // Intrinsic operation, reg.
4370 let isCodeGenOnly = 1 in
4371 def SSr_Int : AVX512AIi8<opcss, MRMSrcReg,
4372 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4373 !strconcat(OpcodeStr,
4374 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4375 [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2, imm:$src3))]>;
4377 // Intrinsic operation, mem.
4378 def SSm : AVX512AIi8<opcss, MRMSrcMem, (outs VR128X:$dst),
4379 (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
4380 !strconcat(OpcodeStr,
4381 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4382 [(set VR128X:$dst, (F32Int VR128X:$src1,
4383 sse_load_f32:$src2, imm:$src3))]>,
4384 EVEX_CD8<32, CD8VT1>;
4387 let hasSideEffects = 0 in
4388 def SDr : AVX512AIi8<opcsd, MRMSrcReg,
4389 (outs FR64X:$dst), (ins FR64X:$src1, FR64X:$src2, i32i8imm:$src3),
4390 !strconcat(OpcodeStr,
4391 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4394 // Intrinsic operation, reg.
4395 let isCodeGenOnly = 1 in
4396 def SDr_Int : AVX512AIi8<opcsd, MRMSrcReg,
4397 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4398 !strconcat(OpcodeStr,
4399 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4400 [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2, imm:$src3))]>,
4403 // Intrinsic operation, mem.
4404 def SDm : AVX512AIi8<opcsd, MRMSrcMem,
4405 (outs VR128X:$dst), (ins VR128X:$src1, sdmem:$src2, i32i8imm:$src3),
4406 !strconcat(OpcodeStr,
4407 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4409 (F64Int VR128X:$src1, sse_load_f64:$src2, imm:$src3))]>,
4410 VEX_W, EVEX_CD8<64, CD8VT1>;
4411 } // ExeDomain = GenericDomain
4414 multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
4415 X86MemOperand x86memop, RegisterClass RC,
4416 PatFrag mem_frag, Domain d> {
4417 let ExeDomain = d in {
4418 // Intrinsic operation, reg.
4419 // Vector intrinsic operation, reg
4420 def r : AVX512AIi8<opc, MRMSrcReg,
4421 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4422 !strconcat(OpcodeStr,
4423 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4426 // Vector intrinsic operation, mem
4427 def m : AVX512AIi8<opc, MRMSrcMem,
4428 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4429 !strconcat(OpcodeStr,
4430 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4436 defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
4437 memopv16f32, SSEPackedSingle>, EVEX_V512,
4438 EVEX_CD8<32, CD8VF>;
4440 def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
4441 imm:$src2, (v16f32 VR512:$src1), (i16 -1),
4443 (VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
4446 defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
4447 memopv8f64, SSEPackedDouble>, EVEX_V512,
4448 VEX_W, EVEX_CD8<64, CD8VF>;
4450 def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
4451 imm:$src2, (v8f64 VR512:$src1), (i8 -1),
4453 (VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
4455 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
4456 Operand x86memop, RegisterClass RC, Domain d> {
4457 let ExeDomain = d in {
4458 def r : AVX512AIi8<opc, MRMSrcReg,
4459 (outs RC:$dst), (ins RC:$src1, RC:$src2, i32i8imm:$src3),
4460 !strconcat(OpcodeStr,
4461 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4464 def m : AVX512AIi8<opc, MRMSrcMem,
4465 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
4466 !strconcat(OpcodeStr,
4467 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4472 defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X,
4473 SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
4475 defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X,
4476 SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
4478 def : Pat<(ffloor FR32X:$src),
4479 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
4480 def : Pat<(f64 (ffloor FR64X:$src)),
4481 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
4482 def : Pat<(f32 (fnearbyint FR32X:$src)),
4483 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
4484 def : Pat<(f64 (fnearbyint FR64X:$src)),
4485 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
4486 def : Pat<(f32 (fceil FR32X:$src)),
4487 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
4488 def : Pat<(f64 (fceil FR64X:$src)),
4489 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
4490 def : Pat<(f32 (frint FR32X:$src)),
4491 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
4492 def : Pat<(f64 (frint FR64X:$src)),
4493 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
4494 def : Pat<(f32 (ftrunc FR32X:$src)),
4495 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
4496 def : Pat<(f64 (ftrunc FR64X:$src)),
4497 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
4499 def : Pat<(v16f32 (ffloor VR512:$src)),
4500 (VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
4501 def : Pat<(v16f32 (fnearbyint VR512:$src)),
4502 (VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
4503 def : Pat<(v16f32 (fceil VR512:$src)),
4504 (VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
4505 def : Pat<(v16f32 (frint VR512:$src)),
4506 (VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
4507 def : Pat<(v16f32 (ftrunc VR512:$src)),
4508 (VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
4510 def : Pat<(v8f64 (ffloor VR512:$src)),
4511 (VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
4512 def : Pat<(v8f64 (fnearbyint VR512:$src)),
4513 (VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
4514 def : Pat<(v8f64 (fceil VR512:$src)),
4515 (VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
4516 def : Pat<(v8f64 (frint VR512:$src)),
4517 (VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
4518 def : Pat<(v8f64 (ftrunc VR512:$src)),
4519 (VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
4521 //-------------------------------------------------
4522 // Integer truncate and extend operations
4523 //-------------------------------------------------
4525 multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
4526 RegisterClass dstRC, RegisterClass srcRC,
4527 RegisterClass KRC, X86MemOperand x86memop> {
4528 def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4530 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
4533 def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4534 (ins KRC:$mask, srcRC:$src),
4535 !strconcat(OpcodeStr,
4536 " \t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
4539 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4540 (ins KRC:$mask, srcRC:$src),
4541 !strconcat(OpcodeStr,
4542 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
4545 def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
4546 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4549 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
4550 (ins x86memop:$dst, KRC:$mask, srcRC:$src),
4551 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
4555 defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
4556 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4557 defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
4558 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4559 defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
4560 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4561 defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
4562 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4563 defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
4564 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4565 defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
4566 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4567 defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
4568 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4569 defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
4570 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4571 defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
4572 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4573 defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
4574 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4575 defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
4576 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4577 defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
4578 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4579 defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
4580 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4581 defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
4582 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4583 defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
4584 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4586 def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
4587 def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
4588 def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
4589 def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
4590 def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
4592 def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
4593 (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>;
4594 def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
4595 (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>;
4596 def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
4597 (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>;
4598 def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
4599 (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>;
4602 multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4603 RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode,
4604 PatFrag mem_frag, X86MemOperand x86memop,
4605 ValueType OpVT, ValueType InVT> {
4607 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4609 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4610 [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
4612 def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4613 (ins KRC:$mask, SrcRC:$src),
4614 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4617 def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4618 (ins KRC:$mask, SrcRC:$src),
4619 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4622 let mayLoad = 1 in {
4623 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4624 (ins x86memop:$src),
4625 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
4627 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
4630 def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4631 (ins KRC:$mask, x86memop:$src),
4632 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4636 def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4637 (ins KRC:$mask, x86memop:$src),
4638 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4644 defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext,
4645 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4647 defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext,
4648 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4650 defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext,
4651 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4652 EVEX_CD8<16, CD8VH>;
4653 defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext,
4654 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4655 EVEX_CD8<16, CD8VQ>;
4656 defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext,
4657 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4658 EVEX_CD8<32, CD8VH>;
4660 defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext,
4661 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4663 defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext,
4664 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4666 defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext,
4667 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4668 EVEX_CD8<16, CD8VH>;
4669 defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext,
4670 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4671 EVEX_CD8<16, CD8VQ>;
4672 defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext,
4673 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4674 EVEX_CD8<32, CD8VH>;
4676 //===----------------------------------------------------------------------===//
4677 // GATHER - SCATTER Operations
4679 multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4680 RegisterClass RC, X86MemOperand memop> {
4682 Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
4683 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
4684 (ins RC:$src1, KRC:$mask, memop:$src2),
4685 !strconcat(OpcodeStr,
4686 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
4690 let ExeDomain = SSEPackedDouble in {
4691 defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
4692 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4693 defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
4694 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4697 let ExeDomain = SSEPackedSingle in {
4698 defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
4699 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4700 defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
4701 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4704 defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
4705 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4706 defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
4707 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4709 defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>,
4710 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4711 defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>,
4712 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4714 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4715 RegisterClass RC, X86MemOperand memop> {
4716 let mayStore = 1, Constraints = "$mask = $mask_wb" in
4717 def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
4718 (ins memop:$dst, KRC:$mask, RC:$src2),
4719 !strconcat(OpcodeStr,
4720 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
4724 let ExeDomain = SSEPackedDouble in {
4725 defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
4726 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4727 defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
4728 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4731 let ExeDomain = SSEPackedSingle in {
4732 defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
4733 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4734 defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
4735 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4738 defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
4739 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4740 defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
4741 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4743 defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
4744 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4745 defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
4746 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4749 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
4750 RegisterClass KRC, X86MemOperand memop> {
4751 let Predicates = [HasPFI], hasSideEffects = 1 in
4752 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
4753 !strconcat(OpcodeStr, " \t{$src {${mask}}|{${mask}}, $src}"),
4757 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
4758 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4760 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
4761 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4763 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
4764 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4766 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
4767 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4769 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
4770 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4772 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
4773 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4775 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
4776 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4778 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
4779 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4781 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
4782 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4784 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
4785 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4787 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
4788 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4790 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
4791 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4793 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
4794 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4796 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
4797 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4799 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
4800 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4802 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
4803 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4804 //===----------------------------------------------------------------------===//
4805 // VSHUFPS - VSHUFPD Operations
4807 multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
4808 ValueType vt, string OpcodeStr, PatFrag mem_frag,
4810 def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
4811 (ins RC:$src1, x86memop:$src2, i8imm:$src3),
4812 !strconcat(OpcodeStr,
4813 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4814 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
4815 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
4816 EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
4817 def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
4818 (ins RC:$src1, RC:$src2, i8imm:$src3),
4819 !strconcat(OpcodeStr,
4820 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4821 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
4822 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
4823 EVEX_4V, Sched<[WriteShuffle]>;
4826 defm VSHUFPSZ : avx512_shufp<VR512, f512mem, v16f32, "vshufps", memopv16f32,
4827 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
4828 defm VSHUFPDZ : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", memopv8f64,
4829 SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4831 def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4832 (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4833 def : Pat<(v16i32 (X86Shufp VR512:$src1,
4834 (memopv16i32 addr:$src2), (i8 imm:$imm))),
4835 (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
4837 def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4838 (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4839 def : Pat<(v8i64 (X86Shufp VR512:$src1,
4840 (memopv8i64 addr:$src2), (i8 imm:$imm))),
4841 (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
4843 multiclass avx512_valign<X86VectorVTInfo _> {
4844 defm rri : AVX512_masking<0x03, MRMSrcReg, (outs _.RC:$dst),
4845 (ins _.RC:$src1, _.RC:$src2, i8imm:$src3),
4847 "$src3, $src2, $src1", "$src1, $src2, $src3",
4848 (_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
4850 _.VT, _.RC, _.KRCWM>,
4851 AVX512AIi8Base, EVEX_4V;
4853 // Also match valign of packed floats.
4854 def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
4855 (!cast<Instruction>(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>;
4858 def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst),
4859 (ins _.RC:$src1, _.MemOp:$src2, i8imm:$src3),
4860 !strconcat("valign"##_.Suffix,
4861 " \t{$src3, $src2, $src1, $dst|"
4862 "$dst, $src1, $src2, $src3}"),
4865 defm VALIGND : avx512_valign<v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4866 defm VALIGNQ : avx512_valign<v8i64_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4868 // Helper fragments to match sext vXi1 to vXiY.
4869 def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
4870 def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
4872 multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT,
4873 RegisterClass KRC, RegisterClass RC,
4874 X86MemOperand x86memop, X86MemOperand x86scalar_mop,
4876 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4877 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4879 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4880 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4882 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4883 !strconcat(OpcodeStr,
4884 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4886 let mayLoad = 1 in {
4887 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4888 (ins x86memop:$src),
4889 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4891 def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4892 (ins KRC:$mask, x86memop:$src),
4893 !strconcat(OpcodeStr,
4894 " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4896 def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4897 (ins KRC:$mask, x86memop:$src),
4898 !strconcat(OpcodeStr,
4899 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4901 def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4902 (ins x86scalar_mop:$src),
4903 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4904 ", $dst|$dst, ${src}", BrdcstStr, "}"),
4906 def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4907 (ins KRC:$mask, x86scalar_mop:$src),
4908 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4909 ", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"),
4910 []>, EVEX, EVEX_B, EVEX_K;
4911 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4912 (ins KRC:$mask, x86scalar_mop:$src),
4913 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4914 ", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}",
4916 []>, EVEX, EVEX_B, EVEX_KZ;
4920 defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512,
4921 i512mem, i32mem, "{1to16}">, EVEX_V512,
4922 EVEX_CD8<32, CD8VF>;
4923 defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512,
4924 i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W,
4925 EVEX_CD8<64, CD8VF>;
4928 (bc_v16i32 (v16i1sextv16i32)),
4929 (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
4930 (VPABSDZrr VR512:$src)>;
4932 (bc_v8i64 (v8i1sextv8i64)),
4933 (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
4934 (VPABSQZrr VR512:$src)>;
4936 def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src),
4937 (v16i32 immAllZerosV), (i16 -1))),
4938 (VPABSDZrr VR512:$src)>;
4939 def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src),
4940 (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
4941 (VPABSQZrr VR512:$src)>;
4943 multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
4944 RegisterClass RC, RegisterClass KRC,
4945 X86MemOperand x86memop,
4946 X86MemOperand x86scalar_mop, string BrdcstStr> {
4947 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4949 !strconcat(OpcodeStr, " \t{$src, ${dst} |${dst}, $src}"),
4951 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4952 (ins x86memop:$src),
4953 !strconcat(OpcodeStr, " \t{$src, ${dst}|${dst}, $src}"),
4955 def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4956 (ins x86scalar_mop:$src),
4957 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4958 ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
4960 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4961 (ins KRC:$mask, RC:$src),
4962 !strconcat(OpcodeStr,
4963 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
4965 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4966 (ins KRC:$mask, x86memop:$src),
4967 !strconcat(OpcodeStr,
4968 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
4970 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4971 (ins KRC:$mask, x86scalar_mop:$src),
4972 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4973 ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
4975 []>, EVEX, EVEX_KZ, EVEX_B;
4977 let Constraints = "$src1 = $dst" in {
4978 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4979 (ins RC:$src1, KRC:$mask, RC:$src2),
4980 !strconcat(OpcodeStr,
4981 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
4983 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4984 (ins RC:$src1, KRC:$mask, x86memop:$src2),
4985 !strconcat(OpcodeStr,
4986 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
4988 def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4989 (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
4990 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
4991 ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
4992 []>, EVEX, EVEX_K, EVEX_B;
4996 let Predicates = [HasCDI] in {
4997 defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
4998 i512mem, i32mem, "{1to16}">,
4999 EVEX_V512, EVEX_CD8<32, CD8VF>;
5002 defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
5003 i512mem, i64mem, "{1to8}">,
5004 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5008 def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1,
5010 (VPCONFLICTDrrk VR512:$src1,
5011 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
5013 def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
5015 (VPCONFLICTQrrk VR512:$src1,
5016 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
5018 let Predicates = [HasCDI] in {
5019 defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
5020 i512mem, i32mem, "{1to16}">,
5021 EVEX_V512, EVEX_CD8<32, CD8VF>;
5024 defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
5025 i512mem, i64mem, "{1to8}">,
5026 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5030 def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
5032 (VPLZCNTDrrk VR512:$src1,
5033 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
5035 def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
5037 (VPLZCNTQrrk VR512:$src1,
5038 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
5040 def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))),
5041 (VPLZCNTDrm addr:$src)>;
5042 def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
5043 (VPLZCNTDrr VR512:$src)>;
5044 def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))),
5045 (VPLZCNTQrm addr:$src)>;
5046 def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
5047 (VPLZCNTQrr VR512:$src)>;
5049 def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
5050 def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
5051 def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
5053 def : Pat<(store VK1:$src, addr:$dst),
5054 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK16))>;
5056 def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
5057 (truncstore node:$val, node:$ptr), [{
5058 return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
5061 def : Pat<(truncstorei1 GR8:$src, addr:$dst),
5062 (MOV8mr addr:$dst, GR8:$src)>;