1 // Group template arguments that can be derived from the vector type (EltNum x
2 // EltVT). These are things like the register class for the writemask, etc.
3 // The idea is to pass one of these as the template argument rather than the
4 // individual arguments.
5 class X86VectorVTInfo<int NumElts, ValueType EltVT, RegisterClass rc,
9 // Corresponding mask register class.
10 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
12 // Corresponding write-mask register class.
13 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
15 // The GPR register class that can hold the write mask. Use GR8 for fewer
16 // than 8 elements. Use shift-right and equal to work around the lack of
19 !cast<RegisterClass>("GR" #
20 !if (!eq (!srl(NumElts, 3), 0), 8, NumElts));
22 // Suffix used in the instruction mnemonic.
23 string Suffix = suffix;
25 string VTName = "v" # NumElts # EltVT;
28 ValueType VT = !cast<ValueType>(VTName);
30 string EltTypeName = !cast<string>(EltVT);
31 // Size of the element type in bits, e.g. 32 for v16i32.
32 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
33 int EltSize = EltVT.Size;
35 // "i" for integer types and "f" for floating-point types
36 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
38 // Size of RC in bits, e.g. 512 for VR512.
41 // The corresponding memory operand, e.g. i512mem for VR512.
42 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
43 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
46 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
47 // due to load promotion during legalization
48 PatFrag LdFrag = !cast<PatFrag>("load" #
49 !if (!eq (TypeVariantName, "i"),
50 !if (!eq (Size, 128), "v2i64",
51 !if (!eq (Size, 256), "v4i64",
53 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
55 // The corresponding float type, e.g. v16f32 for v16i32
56 // Note: For EltSize < 32, FloatVT is illegal and TableGen
57 // fails to compile, so we choose FloatVT = VT
58 ValueType FloatVT = !cast<ValueType>(
59 !if (!eq (!srl(EltSize,5),0),
61 !if (!eq(TypeVariantName, "i"),
62 "v" # NumElts # "f" # EltSize,
65 // The string to specify embedded broadcast in assembly.
66 string BroadcastStr = "{1to" # NumElts # "}";
69 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
70 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
71 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
72 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
74 // "x" in v32i8x_info means RC = VR256X
75 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
76 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
77 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
78 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
80 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
81 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
82 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
83 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
85 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
86 X86VectorVTInfo i128> {
87 X86VectorVTInfo info512 = i512;
88 X86VectorVTInfo info256 = i256;
89 X86VectorVTInfo info128 = i128;
92 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
94 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
96 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
98 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
102 // Common base class of AVX512_masking and AVX512_masking_3src.
103 multiclass AVX512_masking_common<bits<8> O, Format F, dag Outs, dag Ins,
104 dag MaskingIns, dag ZeroMaskingIns,
106 string AttSrcAsm, string IntelSrcAsm,
107 dag RHS, dag MaskingRHS, ValueType OpVT,
108 RegisterClass RC, RegisterClass KRC,
109 string MaskingConstraint = ""> {
110 def NAME: AVX512<O, F, Outs, Ins,
111 OpcodeStr#" \t{"#AttSrcAsm#", $dst|"#
112 "$dst, "#IntelSrcAsm#"}",
113 [(set RC:$dst, RHS)]>;
115 // Prefer over VMOV*rrk Pat<>
116 let AddedComplexity = 20 in
117 def NAME#k: AVX512<O, F, Outs, MaskingIns,
118 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}}|"#
119 "$dst {${mask}}, "#IntelSrcAsm#"}",
120 [(set RC:$dst, MaskingRHS)]>,
122 // In case of the 3src subclass this is overridden with a let.
123 string Constraints = MaskingConstraint;
125 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
126 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
127 OpcodeStr#" \t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
128 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
130 (vselect KRC:$mask, RHS,
132 (v16i32 immAllZerosV)))))]>,
136 // This multiclass generates the unconditional/non-masking, the masking and
137 // the zero-masking variant of the instruction. In the masking case, the
138 // perserved vector elements come from a new dummy input operand tied to $dst.
139 multiclass AVX512_masking<bits<8> O, Format F, dag Outs, dag Ins,
141 string AttSrcAsm, string IntelSrcAsm,
142 dag RHS, ValueType OpVT, RegisterClass RC,
144 AVX512_masking_common<O, F, Outs,
146 !con((ins RC:$src0, KRC:$mask), Ins),
147 !con((ins KRC:$mask), Ins),
148 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
149 (vselect KRC:$mask, RHS, RC:$src0), OpVT, RC, KRC,
152 // Similar to AVX512_masking but in this case one of the source operands
153 // ($src1) is already tied to $dst so we just use that for the preserved
154 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
156 multiclass AVX512_masking_3src<bits<8> O, Format F, dag Outs, dag NonTiedIns,
158 string AttSrcAsm, string IntelSrcAsm,
159 dag RHS, ValueType OpVT,
160 RegisterClass RC, RegisterClass KRC> :
161 AVX512_masking_common<O, F, Outs,
162 !con((ins RC:$src1), NonTiedIns),
163 !con((ins RC:$src1), !con((ins KRC:$mask),
165 !con((ins RC:$src1), !con((ins KRC:$mask),
167 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
168 (vselect KRC:$mask, RHS, RC:$src1), OpVT, RC, KRC>;
170 // Bitcasts between 512-bit vector types. Return the original type since
171 // no instruction is needed for the conversion
172 let Predicates = [HasAVX512] in {
173 def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
174 def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
175 def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
176 def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
177 def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
178 def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
179 def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
180 def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
181 def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
182 def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
183 def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
184 def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
185 def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
186 def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
187 def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
188 def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
189 def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
190 def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
191 def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
192 def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
193 def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
194 def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
195 def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
196 def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
197 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
198 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
199 def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
200 def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
201 def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
202 def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
203 def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
205 def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
206 def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
207 def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>;
208 def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>;
209 def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>;
210 def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>;
211 def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>;
212 def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>;
213 def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>;
214 def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>;
215 def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>;
216 def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>;
217 def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>;
218 def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>;
219 def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>;
220 def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>;
221 def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>;
222 def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>;
223 def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>;
224 def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>;
225 def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>;
226 def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>;
227 def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>;
228 def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>;
229 def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>;
230 def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>;
231 def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>;
232 def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>;
233 def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>;
234 def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>;
236 // Bitcasts between 256-bit vector types. Return the original type since
237 // no instruction is needed for the conversion
238 def : Pat<(v4f64 (bitconvert (v8f32 VR256X:$src))), (v4f64 VR256X:$src)>;
239 def : Pat<(v4f64 (bitconvert (v8i32 VR256X:$src))), (v4f64 VR256X:$src)>;
240 def : Pat<(v4f64 (bitconvert (v4i64 VR256X:$src))), (v4f64 VR256X:$src)>;
241 def : Pat<(v4f64 (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>;
242 def : Pat<(v4f64 (bitconvert (v32i8 VR256X:$src))), (v4f64 VR256X:$src)>;
243 def : Pat<(v8f32 (bitconvert (v8i32 VR256X:$src))), (v8f32 VR256X:$src)>;
244 def : Pat<(v8f32 (bitconvert (v4i64 VR256X:$src))), (v8f32 VR256X:$src)>;
245 def : Pat<(v8f32 (bitconvert (v4f64 VR256X:$src))), (v8f32 VR256X:$src)>;
246 def : Pat<(v8f32 (bitconvert (v32i8 VR256X:$src))), (v8f32 VR256X:$src)>;
247 def : Pat<(v8f32 (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>;
248 def : Pat<(v4i64 (bitconvert (v8f32 VR256X:$src))), (v4i64 VR256X:$src)>;
249 def : Pat<(v4i64 (bitconvert (v8i32 VR256X:$src))), (v4i64 VR256X:$src)>;
250 def : Pat<(v4i64 (bitconvert (v4f64 VR256X:$src))), (v4i64 VR256X:$src)>;
251 def : Pat<(v4i64 (bitconvert (v32i8 VR256X:$src))), (v4i64 VR256X:$src)>;
252 def : Pat<(v4i64 (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>;
253 def : Pat<(v32i8 (bitconvert (v4f64 VR256X:$src))), (v32i8 VR256X:$src)>;
254 def : Pat<(v32i8 (bitconvert (v4i64 VR256X:$src))), (v32i8 VR256X:$src)>;
255 def : Pat<(v32i8 (bitconvert (v8f32 VR256X:$src))), (v32i8 VR256X:$src)>;
256 def : Pat<(v32i8 (bitconvert (v8i32 VR256X:$src))), (v32i8 VR256X:$src)>;
257 def : Pat<(v32i8 (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>;
258 def : Pat<(v8i32 (bitconvert (v32i8 VR256X:$src))), (v8i32 VR256X:$src)>;
259 def : Pat<(v8i32 (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>;
260 def : Pat<(v8i32 (bitconvert (v8f32 VR256X:$src))), (v8i32 VR256X:$src)>;
261 def : Pat<(v8i32 (bitconvert (v4i64 VR256X:$src))), (v8i32 VR256X:$src)>;
262 def : Pat<(v8i32 (bitconvert (v4f64 VR256X:$src))), (v8i32 VR256X:$src)>;
263 def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))), (v16i16 VR256X:$src)>;
264 def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))), (v16i16 VR256X:$src)>;
265 def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))), (v16i16 VR256X:$src)>;
266 def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))), (v16i16 VR256X:$src)>;
267 def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>;
271 // AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros.
274 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
275 isPseudo = 1, Predicates = [HasAVX512] in {
276 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
277 [(set VR512:$dst, (v16f32 immAllZerosV))]>;
280 let Predicates = [HasAVX512] in {
281 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
282 def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>;
283 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
286 //===----------------------------------------------------------------------===//
287 // AVX-512 - VECTOR INSERT
290 let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
291 def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst),
292 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
293 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
294 []>, EVEX_4V, EVEX_V512;
296 def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst),
297 (ins VR512:$src1, f128mem:$src2, i8imm:$src3),
298 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
299 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
302 // -- 64x4 fp form --
303 let hasSideEffects = 0, ExeDomain = SSEPackedDouble in {
304 def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst),
305 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
306 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
307 []>, EVEX_4V, EVEX_V512, VEX_W;
309 def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst),
310 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
311 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
312 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
314 // -- 32x4 integer form --
315 let hasSideEffects = 0 in {
316 def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst),
317 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
318 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
319 []>, EVEX_4V, EVEX_V512;
321 def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
322 (ins VR512:$src1, i128mem:$src2, i8imm:$src3),
323 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
324 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
327 let hasSideEffects = 0 in {
329 def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst),
330 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
331 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
332 []>, EVEX_4V, EVEX_V512, VEX_W;
334 def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst),
335 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
336 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
337 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
340 def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2),
341 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
342 (INSERT_get_vinsert128_imm VR512:$ins))>;
343 def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2),
344 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
345 (INSERT_get_vinsert128_imm VR512:$ins))>;
346 def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2),
347 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
348 (INSERT_get_vinsert128_imm VR512:$ins))>;
349 def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2),
350 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
351 (INSERT_get_vinsert128_imm VR512:$ins))>;
353 def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2),
354 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
355 (INSERT_get_vinsert128_imm VR512:$ins))>;
356 def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1),
357 (bc_v4i32 (loadv2i64 addr:$src2)),
358 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
359 (INSERT_get_vinsert128_imm VR512:$ins))>;
360 def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2),
361 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
362 (INSERT_get_vinsert128_imm VR512:$ins))>;
363 def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2),
364 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
365 (INSERT_get_vinsert128_imm VR512:$ins))>;
367 def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2),
368 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
369 (INSERT_get_vinsert256_imm VR512:$ins))>;
370 def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2),
371 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
372 (INSERT_get_vinsert256_imm VR512:$ins))>;
373 def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2),
374 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
375 (INSERT_get_vinsert256_imm VR512:$ins))>;
376 def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2),
377 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
378 (INSERT_get_vinsert256_imm VR512:$ins))>;
380 def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2),
381 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
382 (INSERT_get_vinsert256_imm VR512:$ins))>;
383 def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2),
384 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
385 (INSERT_get_vinsert256_imm VR512:$ins))>;
386 def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2),
387 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
388 (INSERT_get_vinsert256_imm VR512:$ins))>;
389 def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
390 (bc_v8i32 (loadv4i64 addr:$src2)),
391 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
392 (INSERT_get_vinsert256_imm VR512:$ins))>;
394 // vinsertps - insert f32 to XMM
395 def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
396 (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3),
397 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
398 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
400 def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
401 (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3),
402 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
403 [(set VR128X:$dst, (X86insertps VR128X:$src1,
404 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
405 imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
407 //===----------------------------------------------------------------------===//
408 // AVX-512 VECTOR EXTRACT
410 let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
412 def VEXTRACTF32x4rr : AVX512AIi8<0x19, MRMDestReg, (outs VR128X:$dst),
413 (ins VR512:$src1, i8imm:$src2),
414 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
415 []>, EVEX, EVEX_V512;
416 def VEXTRACTF32x4mr : AVX512AIi8<0x19, MRMDestMem, (outs),
417 (ins f128mem:$dst, VR512:$src1, i8imm:$src2),
418 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
419 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
422 def VEXTRACTF64x4rr : AVX512AIi8<0x1b, MRMDestReg, (outs VR256X:$dst),
423 (ins VR512:$src1, i8imm:$src2),
424 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
425 []>, EVEX, EVEX_V512, VEX_W;
427 def VEXTRACTF64x4mr : AVX512AIi8<0x1b, MRMDestMem, (outs),
428 (ins f256mem:$dst, VR512:$src1, i8imm:$src2),
429 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
430 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
433 let hasSideEffects = 0 in {
435 def VEXTRACTI32x4rr : AVX512AIi8<0x39, MRMDestReg, (outs VR128X:$dst),
436 (ins VR512:$src1, i8imm:$src2),
437 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
438 []>, EVEX, EVEX_V512;
439 def VEXTRACTI32x4mr : AVX512AIi8<0x39, MRMDestMem, (outs),
440 (ins i128mem:$dst, VR512:$src1, i8imm:$src2),
441 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
442 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
445 def VEXTRACTI64x4rr : AVX512AIi8<0x3b, MRMDestReg, (outs VR256X:$dst),
446 (ins VR512:$src1, i8imm:$src2),
447 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
448 []>, EVEX, EVEX_V512, VEX_W;
450 def VEXTRACTI64x4mr : AVX512AIi8<0x3b, MRMDestMem, (outs),
451 (ins i256mem:$dst, VR512:$src1, i8imm:$src2),
452 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
453 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
456 def : Pat<(vextract128_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
457 (v4f32 (VEXTRACTF32x4rr VR512:$src1,
458 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
460 def : Pat<(vextract128_extract:$ext VR512:$src1, (iPTR imm)),
461 (v4i32 (VEXTRACTF32x4rr VR512:$src1,
462 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
464 def : Pat<(vextract128_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
465 (v2f64 (VEXTRACTF32x4rr VR512:$src1,
466 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
468 def : Pat<(vextract128_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
469 (v2i64 (VEXTRACTI32x4rr VR512:$src1,
470 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
473 def : Pat<(vextract256_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
474 (v8f32 (VEXTRACTF64x4rr VR512:$src1,
475 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
477 def : Pat<(vextract256_extract:$ext (v16i32 VR512:$src1), (iPTR imm)),
478 (v8i32 (VEXTRACTI64x4rr VR512:$src1,
479 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
481 def : Pat<(vextract256_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
482 (v4f64 (VEXTRACTF64x4rr VR512:$src1,
483 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
485 def : Pat<(vextract256_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
486 (v4i64 (VEXTRACTI64x4rr VR512:$src1,
487 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
489 // A 256-bit subvector extract from the first 512-bit vector position
490 // is a subregister copy that needs no instruction.
491 def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
492 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>;
493 def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
494 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>;
495 def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
496 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>;
497 def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
498 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>;
501 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
502 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
503 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
504 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
505 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
506 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
507 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
508 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
511 // A 128-bit subvector insert to the first 512-bit vector position
512 // is a subregister copy that needs no instruction.
513 def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
514 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
515 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
517 def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)),
518 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
519 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
521 def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)),
522 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
523 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
525 def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)),
526 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
527 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
530 def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)),
531 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
532 def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)),
533 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
534 def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
535 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
536 def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
537 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
539 // vextractps - extract 32 bits from XMM
540 def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
541 (ins VR128X:$src1, u32u8imm:$src2),
542 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
543 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
546 def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
547 (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2),
548 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
549 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
550 addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
552 //===---------------------------------------------------------------------===//
555 multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr,
556 RegisterClass DestRC,
557 RegisterClass SrcRC, X86MemOperand x86memop> {
558 def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
559 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
561 def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
562 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX;
564 let ExeDomain = SSEPackedSingle in {
565 defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512,
567 EVEX_V512, EVEX_CD8<32, CD8VT1>;
570 let ExeDomain = SSEPackedDouble in {
571 defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512,
573 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
576 def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
577 (VBROADCASTSSZrm addr:$src)>;
578 def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
579 (VBROADCASTSDZrm addr:$src)>;
581 def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
582 (VBROADCASTSSZrm addr:$src)>;
583 def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
584 (VBROADCASTSDZrm addr:$src)>;
586 multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
587 RegisterClass SrcRC, RegisterClass KRC> {
588 def Zrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins SrcRC:$src),
589 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
590 []>, EVEX, EVEX_V512;
591 def Zkrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst),
592 (ins KRC:$mask, SrcRC:$src),
593 !strconcat(OpcodeStr,
594 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
595 []>, EVEX, EVEX_V512, EVEX_KZ;
598 defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>;
599 defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>,
602 def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
603 (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
605 def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
606 (VPBROADCASTQrZkrr VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
608 def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
609 (VPBROADCASTDrZrr GR32:$src)>;
610 def : Pat<(v16i32 (X86VBroadcastm VK16WM:$mask, (i32 GR32:$src))),
611 (VPBROADCASTDrZkrr VK16WM:$mask, GR32:$src)>;
612 def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
613 (VPBROADCASTQrZrr GR64:$src)>;
614 def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))),
615 (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>;
617 def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
618 (VPBROADCASTDrZrr GR32:$src)>;
619 def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
620 (VPBROADCASTQrZrr GR64:$src)>;
622 def : Pat<(v16i32 (int_x86_avx512_mask_pbroadcast_d_gpr_512 (i32 GR32:$src),
623 (v16i32 immAllZerosV), (i16 GR16:$mask))),
624 (VPBROADCASTDrZkrr (COPY_TO_REGCLASS GR16:$mask, VK16WM), GR32:$src)>;
625 def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
626 (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
627 (VPBROADCASTQrZkrr (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
629 multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
630 X86MemOperand x86memop, PatFrag ld_frag,
631 RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
633 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
634 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
636 (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
637 def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
639 !strconcat(OpcodeStr,
640 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
642 (OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
645 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
646 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
648 (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
649 def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
651 !strconcat(OpcodeStr,
652 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
653 [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
654 (ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
658 defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
659 loadi32, VR512, v16i32, v4i32, VK16WM>,
660 EVEX_V512, EVEX_CD8<32, CD8VT1>;
661 defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
662 loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
663 EVEX_CD8<64, CD8VT1>;
665 multiclass avx512_int_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
666 X86MemOperand x86memop, PatFrag ld_frag,
669 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins x86memop:$src),
670 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
672 def krm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins KRC:$mask,
674 !strconcat(OpcodeStr,
675 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
680 defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
681 i128mem, loadv2i64, VK16WM>,
682 EVEX_V512, EVEX_CD8<32, CD8VT4>;
683 defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
684 i256mem, loadv4i64, VK16WM>, VEX_W,
685 EVEX_V512, EVEX_CD8<64, CD8VT4>;
687 def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
688 (VPBROADCASTDZrr VR128X:$src)>;
689 def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
690 (VPBROADCASTQZrr VR128X:$src)>;
692 def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
693 (VBROADCASTSSZrr VR128X:$src)>;
694 def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
695 (VBROADCASTSDZrr VR128X:$src)>;
697 def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
698 (VBROADCASTSSZrr VR128X:$src)>;
699 def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
700 (VBROADCASTSDZrr VR128X:$src)>;
702 // Provide fallback in case the load node that is used in the patterns above
703 // is used by additional users, which prevents the pattern selection.
704 def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
705 (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
706 def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
707 (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
710 let Predicates = [HasAVX512] in {
711 def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
713 (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
714 addr:$src)), sub_ymm)>;
716 //===----------------------------------------------------------------------===//
717 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
720 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
721 RegisterClass DstRC, RegisterClass KRC,
722 ValueType OpVT, ValueType SrcVT> {
723 def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
724 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
728 let Predicates = [HasCDI] in {
729 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
730 VK16, v16i32, v16i1>, EVEX_V512;
731 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
732 VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
735 //===----------------------------------------------------------------------===//
738 // -- immediate form --
739 multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
740 SDNode OpNode, PatFrag mem_frag,
741 X86MemOperand x86memop, ValueType OpVT> {
742 def ri : AVX512AIi8<opc, MRMSrcReg, (outs RC:$dst),
743 (ins RC:$src1, i8imm:$src2),
744 !strconcat(OpcodeStr,
745 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
747 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
749 def mi : AVX512AIi8<opc, MRMSrcMem, (outs RC:$dst),
750 (ins x86memop:$src1, i8imm:$src2),
751 !strconcat(OpcodeStr,
752 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
754 (OpVT (OpNode (mem_frag addr:$src1),
755 (i8 imm:$src2))))]>, EVEX;
758 defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64,
759 i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
760 let ExeDomain = SSEPackedDouble in
761 defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64,
762 f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
764 // -- VPERM - register form --
765 multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
766 PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
768 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
769 (ins RC:$src1, RC:$src2),
770 !strconcat(OpcodeStr,
771 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
773 (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V;
775 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
776 (ins RC:$src1, x86memop:$src2),
777 !strconcat(OpcodeStr,
778 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
780 (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>,
784 defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem,
785 v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
786 defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
787 v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
788 let ExeDomain = SSEPackedSingle in
789 defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem,
790 v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
791 let ExeDomain = SSEPackedDouble in
792 defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
793 v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
795 // -- VPERM2I - 3 source operands form --
796 multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
797 PatFrag mem_frag, X86MemOperand x86memop,
798 SDNode OpNode, ValueType OpVT, RegisterClass KRC> {
799 let Constraints = "$src1 = $dst" in {
800 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
801 (ins RC:$src1, RC:$src2, RC:$src3),
802 !strconcat(OpcodeStr,
803 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
805 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
808 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
809 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
810 !strconcat(OpcodeStr,
811 " \t{$src3, $src2, $dst {${mask}}|"
812 "$dst {${mask}}, $src2, $src3}"),
813 [(set RC:$dst, (OpVT (vselect KRC:$mask,
814 (OpNode RC:$src1, RC:$src2,
819 let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
820 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
821 (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
822 !strconcat(OpcodeStr,
823 " \t{$src3, $src2, $dst {${mask}} {z} |",
824 "$dst {${mask}} {z}, $src2, $src3}"),
825 [(set RC:$dst, (OpVT (vselect KRC:$mask,
826 (OpNode RC:$src1, RC:$src2,
829 (v16i32 immAllZerosV))))))]>,
832 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
833 (ins RC:$src1, RC:$src2, x86memop:$src3),
834 !strconcat(OpcodeStr,
835 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
837 (OpVT (OpNode RC:$src1, RC:$src2,
838 (mem_frag addr:$src3))))]>, EVEX_4V;
840 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
841 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
842 !strconcat(OpcodeStr,
843 " \t{$src3, $src2, $dst {${mask}}|"
844 "$dst {${mask}}, $src2, $src3}"),
846 (OpVT (vselect KRC:$mask,
847 (OpNode RC:$src1, RC:$src2,
848 (mem_frag addr:$src3)),
852 let AddedComplexity = 10 in // Prefer over the rrkz variant
853 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
854 (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
855 !strconcat(OpcodeStr,
856 " \t{$src3, $src2, $dst {${mask}} {z}|"
857 "$dst {${mask}} {z}, $src2, $src3}"),
859 (OpVT (vselect KRC:$mask,
860 (OpNode RC:$src1, RC:$src2,
861 (mem_frag addr:$src3)),
863 (v16i32 immAllZerosV))))))]>,
867 defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32,
868 i512mem, X86VPermiv3, v16i32, VK16WM>,
869 EVEX_V512, EVEX_CD8<32, CD8VF>;
870 defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64,
871 i512mem, X86VPermiv3, v8i64, VK8WM>,
872 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
873 defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32,
874 i512mem, X86VPermiv3, v16f32, VK16WM>,
875 EVEX_V512, EVEX_CD8<32, CD8VF>;
876 defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64,
877 i512mem, X86VPermiv3, v8f64, VK8WM>,
878 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
880 multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC,
881 PatFrag mem_frag, X86MemOperand x86memop,
882 SDNode OpNode, ValueType OpVT, RegisterClass KRC,
883 ValueType MaskVT, RegisterClass MRC> :
884 avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode,
886 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
887 VR512:$idx, VR512:$src1, VR512:$src2, -1)),
888 (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>;
890 def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
891 VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)),
892 (!cast<Instruction>(NAME#rrk) VR512:$src1,
893 (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>;
896 defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem,
897 X86VPermv3, v16i32, VK16WM, v16i1, GR16>,
898 EVEX_V512, EVEX_CD8<32, CD8VF>;
899 defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem,
900 X86VPermv3, v8i64, VK8WM, v8i1, GR8>,
901 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
902 defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem,
903 X86VPermv3, v16f32, VK16WM, v16i1, GR16>,
904 EVEX_V512, EVEX_CD8<32, CD8VF>;
905 defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem,
906 X86VPermv3, v8f64, VK8WM, v8i1, GR8>,
907 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
909 //===----------------------------------------------------------------------===//
910 // AVX-512 - BLEND using mask
912 multiclass avx512_blendmask<bits<8> opc, string OpcodeStr,
913 RegisterClass KRC, RegisterClass RC,
914 X86MemOperand x86memop, PatFrag mem_frag,
915 SDNode OpNode, ValueType vt> {
916 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
917 (ins KRC:$mask, RC:$src1, RC:$src2),
918 !strconcat(OpcodeStr,
919 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
920 [(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2),
921 (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
923 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
924 (ins KRC:$mask, RC:$src1, x86memop:$src2),
925 !strconcat(OpcodeStr,
926 " \t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
927 []>, EVEX_4V, EVEX_K;
930 let ExeDomain = SSEPackedSingle in
931 defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
932 VK16WM, VR512, f512mem,
933 memopv16f32, vselect, v16f32>,
934 EVEX_CD8<32, CD8VF>, EVEX_V512;
935 let ExeDomain = SSEPackedDouble in
936 defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
937 VK8WM, VR512, f512mem,
938 memopv8f64, vselect, v8f64>,
939 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
941 def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1),
942 (v16f32 VR512:$src2), (i16 GR16:$mask))),
943 (VBLENDMPSZrr (COPY_TO_REGCLASS GR16:$mask, VK16WM),
944 VR512:$src1, VR512:$src2)>;
946 def : Pat<(v8f64 (int_x86_avx512_mask_blend_pd_512 (v8f64 VR512:$src1),
947 (v8f64 VR512:$src2), (i8 GR8:$mask))),
948 (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM),
949 VR512:$src1, VR512:$src2)>;
951 defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
952 VK16WM, VR512, f512mem,
953 memopv16i32, vselect, v16i32>,
954 EVEX_CD8<32, CD8VF>, EVEX_V512;
956 defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
957 VK8WM, VR512, f512mem,
958 memopv8i64, vselect, v8i64>,
959 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
961 def : Pat<(v16i32 (int_x86_avx512_mask_blend_d_512 (v16i32 VR512:$src1),
962 (v16i32 VR512:$src2), (i16 GR16:$mask))),
963 (VPBLENDMDZrr (COPY_TO_REGCLASS GR16:$mask, VK16),
964 VR512:$src1, VR512:$src2)>;
966 def : Pat<(v8i64 (int_x86_avx512_mask_blend_q_512 (v8i64 VR512:$src1),
967 (v8i64 VR512:$src2), (i8 GR8:$mask))),
968 (VPBLENDMQZrr (COPY_TO_REGCLASS GR8:$mask, VK8),
969 VR512:$src1, VR512:$src2)>;
971 let Predicates = [HasAVX512] in {
972 def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
973 (v8f32 VR256X:$src2))),
975 (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
976 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
977 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
979 def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
980 (v8i32 VR256X:$src2))),
982 (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
983 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
984 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
986 //===----------------------------------------------------------------------===//
987 // Compare Instructions
988 //===----------------------------------------------------------------------===//
990 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
991 multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
992 Operand CC, SDNode OpNode, ValueType VT,
993 PatFrag ld_frag, string asm, string asm_alt> {
994 def rr : AVX512Ii8<0xC2, MRMSrcReg,
995 (outs VK1:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
996 [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
997 IIC_SSE_ALU_F32S_RR>, EVEX_4V;
998 def rm : AVX512Ii8<0xC2, MRMSrcMem,
999 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
1000 [(set VK1:$dst, (OpNode (VT RC:$src1),
1001 (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1002 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1003 def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
1004 (outs VK1:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
1005 asm_alt, [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
1006 def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
1007 (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
1008 asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1012 let Predicates = [HasAVX512] in {
1013 defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, AVXCC, X86cmpms, f32, loadf32,
1014 "vcmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1015 "vcmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
1017 defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, AVXCC, X86cmpms, f64, loadf64,
1018 "vcmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1019 "vcmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
1023 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
1024 X86VectorVTInfo _> {
1025 def rr : AVX512BI<opc, MRMSrcReg,
1026 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
1027 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1028 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
1029 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1031 def rm : AVX512BI<opc, MRMSrcMem,
1032 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
1033 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1034 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1035 (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
1036 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1037 def rrk : AVX512BI<opc, MRMSrcReg,
1038 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1039 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1040 "$dst {${mask}}, $src1, $src2}"),
1041 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1042 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
1043 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1045 def rmk : AVX512BI<opc, MRMSrcMem,
1046 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1047 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1048 "$dst {${mask}}, $src1, $src2}"),
1049 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1050 (OpNode (_.VT _.RC:$src1),
1052 (_.LdFrag addr:$src2))))))],
1053 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1056 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
1057 X86VectorVTInfo _> {
1058 let mayLoad = 1 in {
1059 def rmb : AVX512BI<opc, MRMSrcMem,
1060 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
1061 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
1062 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1063 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1064 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
1065 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1066 def rmbk : AVX512BI<opc, MRMSrcMem,
1067 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1068 _.ScalarMemOp:$src2),
1069 !strconcat(OpcodeStr,
1070 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1071 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1072 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1073 (OpNode (_.VT _.RC:$src1),
1075 (_.ScalarLdFrag addr:$src2)))))],
1076 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1080 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
1081 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
1082 let Predicates = [prd] in
1083 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512>,
1086 let Predicates = [prd, HasVLX] in {
1087 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256>,
1089 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128>,
1094 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
1095 SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
1097 let Predicates = [prd] in
1098 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
1101 let Predicates = [prd, HasVLX] in {
1102 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
1104 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
1109 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
1110 avx512vl_i8_info, HasBWI>,
1113 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
1114 avx512vl_i16_info, HasBWI>,
1115 EVEX_CD8<16, CD8VF>;
1117 defm VPCMPEQD : avx512_icmp_packed_vl<0x76, "vpcmpeqd", X86pcmpeqm,
1118 avx512vl_i32_info, HasAVX512>,
1119 avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
1120 avx512vl_i32_info, HasAVX512>,
1121 EVEX_CD8<32, CD8VF>;
1123 defm VPCMPEQQ : avx512_icmp_packed_vl<0x29, "vpcmpeqq", X86pcmpeqm,
1124 avx512vl_i64_info, HasAVX512>,
1125 avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
1126 avx512vl_i64_info, HasAVX512>,
1127 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1129 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
1130 avx512vl_i8_info, HasBWI>,
1133 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
1134 avx512vl_i16_info, HasBWI>,
1135 EVEX_CD8<16, CD8VF>;
1137 defm VPCMPGTD : avx512_icmp_packed_vl<0x66, "vpcmpgtd", X86pcmpgtm,
1138 avx512vl_i32_info, HasAVX512>,
1139 avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
1140 avx512vl_i32_info, HasAVX512>,
1141 EVEX_CD8<32, CD8VF>;
1143 defm VPCMPGTQ : avx512_icmp_packed_vl<0x37, "vpcmpgtq", X86pcmpgtm,
1144 avx512vl_i64_info, HasAVX512>,
1145 avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
1146 avx512vl_i64_info, HasAVX512>,
1147 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1149 def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1150 (COPY_TO_REGCLASS (VPCMPGTDZrr
1151 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1152 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1154 def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
1155 (COPY_TO_REGCLASS (VPCMPEQDZrr
1156 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1157 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
1159 multiclass avx512_icmp_cc<bits<8> opc, RegisterClass WMRC, RegisterClass KRC,
1160 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
1161 SDNode OpNode, ValueType vt, Operand CC, string Suffix> {
1162 def rri : AVX512AIi8<opc, MRMSrcReg,
1163 (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc),
1164 !strconcat("vpcmp${cc}", Suffix,
1165 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1166 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))],
1167 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1168 def rmi : AVX512AIi8<opc, MRMSrcMem,
1169 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc),
1170 !strconcat("vpcmp${cc}", Suffix,
1171 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1172 [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2),
1173 imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1174 // Accept explicit immediate argument form instead of comparison code.
1175 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1176 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
1177 (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
1178 !strconcat("vpcmp", Suffix,
1179 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1180 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1181 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
1182 (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, RC:$src2, i8imm:$cc),
1183 !strconcat("vpcmp", Suffix,
1184 "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"),
1185 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1186 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
1187 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
1188 !strconcat("vpcmp", Suffix,
1189 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1190 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1191 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
1192 (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, x86memop:$src2, i8imm:$cc),
1193 !strconcat("vpcmp", Suffix,
1194 "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"),
1195 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1199 defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16WM, VK16, VR512, i512mem, memopv16i32,
1200 X86cmpm, v16i32, AVXCC, "d">,
1201 EVEX_V512, EVEX_CD8<32, CD8VF>;
1202 defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16WM, VK16, VR512, i512mem, memopv16i32,
1203 X86cmpmu, v16i32, AVXCC, "ud">,
1204 EVEX_V512, EVEX_CD8<32, CD8VF>;
1206 defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8WM, VK8, VR512, i512mem, memopv8i64,
1207 X86cmpm, v8i64, AVXCC, "q">,
1208 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
1209 defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8WM, VK8, VR512, i512mem, memopv8i64,
1210 X86cmpmu, v8i64, AVXCC, "uq">,
1211 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
1213 // avx512_cmp_packed - compare packed instructions
1214 multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
1215 X86MemOperand x86memop, ValueType vt,
1216 string suffix, Domain d> {
1217 def rri : AVX512PIi8<0xC2, MRMSrcReg,
1218 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
1219 !strconcat("vcmp${cc}", suffix,
1220 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1221 [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
1222 def rrib: AVX512PIi8<0xC2, MRMSrcReg,
1223 (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
1224 !strconcat("vcmp${cc}", suffix,
1225 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
1227 def rmi : AVX512PIi8<0xC2, MRMSrcMem,
1228 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
1229 !strconcat("vcmp${cc}", suffix,
1230 " \t{$src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
1232 (X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
1234 // Accept explicit immediate argument form instead of comparison code.
1235 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1236 def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
1237 (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
1238 !strconcat("vcmp", suffix,
1239 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
1240 def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
1241 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
1242 !strconcat("vcmp", suffix,
1243 " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
1247 defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, v16f32,
1248 "ps", SSEPackedSingle>, PS, EVEX_4V, EVEX_V512,
1249 EVEX_CD8<32, CD8VF>;
1250 defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, v8f64,
1251 "pd", SSEPackedDouble>, PD, EVEX_4V, VEX_W, EVEX_V512,
1252 EVEX_CD8<64, CD8VF>;
1254 def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
1255 (COPY_TO_REGCLASS (VCMPPSZrri
1256 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1257 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1259 def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1260 (COPY_TO_REGCLASS (VPCMPDZrri
1261 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1262 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1264 def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
1265 (COPY_TO_REGCLASS (VPCMPUDZrri
1266 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
1267 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
1270 def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1271 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1273 (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
1274 (I8Imm imm:$cc)), GR16)>;
1276 def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1277 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1279 (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
1280 (I8Imm imm:$cc)), GR8)>;
1282 def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
1283 (v16f32 VR512:$src2), imm:$cc, (i16 -1),
1285 (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
1286 (I8Imm imm:$cc)), GR16)>;
1288 def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
1289 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
1291 (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
1292 (I8Imm imm:$cc)), GR8)>;
1294 // Mask register copy, including
1295 // - copy between mask registers
1296 // - load/store mask registers
1297 // - copy from GPR to mask register and vice versa
1299 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
1300 string OpcodeStr, RegisterClass KRC,
1301 ValueType vvt, ValueType ivt, X86MemOperand x86memop> {
1302 let hasSideEffects = 0 in {
1303 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
1304 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1306 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
1307 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
1308 [(set KRC:$dst, (vvt (bitconvert (ivt (load addr:$src)))))]>;
1310 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
1311 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1315 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
1317 RegisterClass KRC, RegisterClass GRC> {
1318 let hasSideEffects = 0 in {
1319 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
1320 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1321 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
1322 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), []>;
1326 let Predicates = [HasDQI] in
1327 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8,
1329 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
1332 let Predicates = [HasAVX512] in
1333 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16,
1335 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
1338 let Predicates = [HasBWI] in {
1339 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1, i32,
1340 i32mem>, VEX, PD, VEX_W;
1341 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
1345 let Predicates = [HasBWI] in {
1346 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64,
1347 i64mem>, VEX, PS, VEX_W;
1348 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
1352 // GR from/to mask register
1353 let Predicates = [HasDQI] in {
1354 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1355 (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>;
1356 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1357 (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>;
1359 let Predicates = [HasAVX512] in {
1360 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
1361 (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>;
1362 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
1363 (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
1365 let Predicates = [HasBWI] in {
1366 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>;
1367 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (KMOVDrk VK32:$src)>;
1369 let Predicates = [HasBWI] in {
1370 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (KMOVQkr GR64:$src)>;
1371 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (KMOVQrk VK64:$src)>;
1375 let Predicates = [HasDQI] in {
1376 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
1377 (KMOVBmk addr:$dst, VK8:$src)>;
1379 let Predicates = [HasAVX512] in {
1380 def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
1381 (KMOVWmk addr:$dst, VK16:$src)>;
1382 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
1383 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
1384 def : Pat<(i1 (load addr:$src)),
1385 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
1386 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
1387 (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>;
1389 let Predicates = [HasBWI] in {
1390 def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
1391 (KMOVDmk addr:$dst, VK32:$src)>;
1393 let Predicates = [HasBWI] in {
1394 def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
1395 (KMOVQmk addr:$dst, VK64:$src)>;
1398 let Predicates = [HasAVX512] in {
1399 def : Pat<(i1 (trunc (i64 GR64:$src))),
1400 (COPY_TO_REGCLASS (KMOVWkr (AND32ri (EXTRACT_SUBREG $src, sub_32bit),
1403 def : Pat<(i1 (trunc (i32 GR32:$src))),
1404 (COPY_TO_REGCLASS (KMOVWkr (AND32ri $src, (i32 1))), VK1)>;
1406 def : Pat<(i1 (trunc (i8 GR8:$src))),
1408 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))),
1410 def : Pat<(i1 (trunc (i16 GR16:$src))),
1412 (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))),
1415 def : Pat<(i32 (zext VK1:$src)),
1416 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
1417 def : Pat<(i8 (zext VK1:$src)),
1420 (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
1421 def : Pat<(i64 (zext VK1:$src)),
1422 (AND64ri8 (SUBREG_TO_REG (i64 0),
1423 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
1424 def : Pat<(i16 (zext VK1:$src)),
1426 (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
1428 def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
1429 (COPY_TO_REGCLASS VK1:$src, VK16)>;
1430 def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
1431 (COPY_TO_REGCLASS VK1:$src, VK8)>;
1433 let Predicates = [HasBWI] in {
1434 def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
1435 (COPY_TO_REGCLASS VK1:$src, VK32)>;
1436 def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
1437 (COPY_TO_REGCLASS VK1:$src, VK64)>;
1441 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1442 let Predicates = [HasAVX512] in {
1443 // GR from/to 8-bit mask without native support
1444 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
1446 (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
1448 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
1450 (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
1453 def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
1454 (COPY_TO_REGCLASS VK16:$src, VK1)>;
1455 def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
1456 (COPY_TO_REGCLASS VK8:$src, VK1)>;
1458 let Predicates = [HasBWI] in {
1459 def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))),
1460 (COPY_TO_REGCLASS VK32:$src, VK1)>;
1461 def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))),
1462 (COPY_TO_REGCLASS VK64:$src, VK1)>;
1465 // Mask unary operation
1467 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
1468 RegisterClass KRC, SDPatternOperator OpNode,
1470 let Predicates = [prd] in
1471 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
1472 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
1473 [(set KRC:$dst, (OpNode KRC:$src))]>;
1476 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
1477 SDPatternOperator OpNode> {
1478 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1480 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1481 HasAVX512>, VEX, PS;
1482 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1483 HasBWI>, VEX, PD, VEX_W;
1484 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1485 HasBWI>, VEX, PS, VEX_W;
1488 defm KNOT : avx512_mask_unop_all<0x44, "knot", not>;
1490 multiclass avx512_mask_unop_int<string IntName, string InstName> {
1491 let Predicates = [HasAVX512] in
1492 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1494 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1495 (v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>;
1497 defm : avx512_mask_unop_int<"knot", "KNOT">;
1499 let Predicates = [HasDQI] in
1500 def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>;
1501 let Predicates = [HasAVX512] in
1502 def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
1503 let Predicates = [HasBWI] in
1504 def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>;
1505 let Predicates = [HasBWI] in
1506 def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
1508 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
1509 let Predicates = [HasAVX512] in {
1510 def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)),
1511 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
1513 def : Pat<(not VK8:$src),
1515 (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
1518 // Mask binary operation
1519 // - KAND, KANDN, KOR, KXNOR, KXOR
1520 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
1521 RegisterClass KRC, SDPatternOperator OpNode,
1523 let Predicates = [prd] in
1524 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
1525 !strconcat(OpcodeStr,
1526 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1527 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
1530 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
1531 SDPatternOperator OpNode> {
1532 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
1533 HasDQI>, VEX_4V, VEX_L, PD;
1534 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
1535 HasAVX512>, VEX_4V, VEX_L, PS;
1536 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
1537 HasBWI>, VEX_4V, VEX_L, VEX_W, PD;
1538 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
1539 HasBWI>, VEX_4V, VEX_L, VEX_W, PS;
1542 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
1543 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
1545 let isCommutable = 1 in {
1546 defm KAND : avx512_mask_binop_all<0x41, "kand", and>;
1547 defm KOR : avx512_mask_binop_all<0x45, "kor", or>;
1548 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor>;
1549 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor>;
1551 let isCommutable = 0 in
1552 defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn>;
1554 def : Pat<(xor VK1:$src1, VK1:$src2),
1555 (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1556 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1558 def : Pat<(or VK1:$src1, VK1:$src2),
1559 (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1560 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1562 def : Pat<(and VK1:$src1, VK1:$src2),
1563 (COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1564 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
1566 multiclass avx512_mask_binop_int<string IntName, string InstName> {
1567 let Predicates = [HasAVX512] in
1568 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
1569 (i16 GR16:$src1), (i16 GR16:$src2)),
1570 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
1571 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1572 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
1575 defm : avx512_mask_binop_int<"kand", "KAND">;
1576 defm : avx512_mask_binop_int<"kandn", "KANDN">;
1577 defm : avx512_mask_binop_int<"kor", "KOR">;
1578 defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
1579 defm : avx512_mask_binop_int<"kxor", "KXOR">;
1581 // With AVX-512, 8-bit mask is promoted to 16-bit mask.
1582 multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
1583 let Predicates = [HasAVX512] in
1584 def : Pat<(OpNode VK8:$src1, VK8:$src2),
1586 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
1587 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
1590 defm : avx512_binop_pat<and, KANDWrr>;
1591 defm : avx512_binop_pat<andn, KANDNWrr>;
1592 defm : avx512_binop_pat<or, KORWrr>;
1593 defm : avx512_binop_pat<xnor, KXNORWrr>;
1594 defm : avx512_binop_pat<xor, KXORWrr>;
1597 multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
1598 RegisterClass KRC> {
1599 let Predicates = [HasAVX512] in
1600 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
1601 !strconcat(OpcodeStr,
1602 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
1605 multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
1606 defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16>,
1610 defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">;
1611 def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))),
1612 (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16),
1613 (COPY_TO_REGCLASS VK8:$src1, VK16))>;
1616 multiclass avx512_mask_unpck_int<string IntName, string InstName> {
1617 let Predicates = [HasAVX512] in
1618 def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_bw")
1619 (i16 GR16:$src1), (i16 GR16:$src2)),
1620 (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"BWrr")
1621 (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
1622 (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
1624 defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
1627 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1629 let Predicates = [HasAVX512], Defs = [EFLAGS] in
1630 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
1631 !strconcat(OpcodeStr, " \t{$src2, $src1|$src1, $src2}"),
1632 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
1635 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
1636 defm W : avx512_mask_testop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
1640 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
1642 def : Pat<(X86cmp VK1:$src1, (i1 0)),
1643 (KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
1644 (COPY_TO_REGCLASS VK1:$src1, VK16))>;
1647 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
1649 let Predicates = [HasAVX512] in
1650 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, i8imm:$imm),
1651 !strconcat(OpcodeStr,
1652 " \t{$imm, $src, $dst|$dst, $src, $imm}"),
1653 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
1656 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
1658 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
1662 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
1663 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
1665 // Mask setting all 0s or 1s
1666 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
1667 let Predicates = [HasAVX512] in
1668 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
1669 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
1670 [(set KRC:$dst, (VT Val))]>;
1673 multiclass avx512_mask_setop_w<PatFrag Val> {
1674 defm B : avx512_mask_setop<VK8, v8i1, Val>;
1675 defm W : avx512_mask_setop<VK16, v16i1, Val>;
1678 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
1679 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
1681 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
1682 let Predicates = [HasAVX512] in {
1683 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
1684 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
1685 def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
1686 def : Pat<(i1 1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
1687 def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSET1W), VK1)>;
1689 def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 0))),
1690 (v8i1 (COPY_TO_REGCLASS VK16:$src, VK8))>;
1692 def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
1693 (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16))>;
1695 def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
1696 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
1698 def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
1699 (v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
1701 def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
1702 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
1703 //===----------------------------------------------------------------------===//
1704 // AVX-512 - Aligned and unaligned load and store
1707 multiclass avx512_load<bits<8> opc, string OpcodeStr, PatFrag ld_frag,
1708 RegisterClass KRC, RegisterClass RC,
1709 ValueType vt, ValueType zvt, X86MemOperand memop,
1710 Domain d, bit IsReMaterializable = 1> {
1711 let hasSideEffects = 0 in {
1712 def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
1713 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
1715 def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
1716 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1717 "${dst} {${mask}} {z}, $src}"), [], d>, EVEX, EVEX_KZ;
1719 let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
1720 SchedRW = [WriteLoad] in
1721 def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins memop:$src),
1722 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1723 [(set RC:$dst, (vt (bitconvert (ld_frag addr:$src))))],
1726 let AddedComplexity = 20 in {
1727 let Constraints = "$src0 = $dst", hasSideEffects = 0 in {
1728 let hasSideEffects = 0 in
1729 def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
1730 (ins RC:$src0, KRC:$mask, RC:$src1),
1731 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1732 "${dst} {${mask}}, $src1}"),
1733 [(set RC:$dst, (vt (vselect KRC:$mask,
1737 let mayLoad = 1, SchedRW = [WriteLoad] in
1738 def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1739 (ins RC:$src0, KRC:$mask, memop:$src1),
1740 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
1741 "${dst} {${mask}}, $src1}"),
1744 (vt (bitconvert (ld_frag addr:$src1))),
1748 let mayLoad = 1, SchedRW = [WriteLoad] in
1749 def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
1750 (ins KRC:$mask, memop:$src),
1751 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1752 "${dst} {${mask}} {z}, $src}"),
1755 (vt (bitconvert (ld_frag addr:$src))),
1756 (vt (bitconvert (zvt immAllZerosV))))))],
1761 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, string ld_pat,
1762 string elty, string elsz, string vsz512,
1763 string vsz256, string vsz128, Domain d,
1764 Predicate prd, bit IsReMaterializable = 1> {
1765 let Predicates = [prd] in
1766 defm Z : avx512_load<opc, OpcodeStr,
1767 !cast<PatFrag>(ld_pat##"v"##vsz512##elty##elsz),
1768 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1769 !cast<ValueType>("v"##vsz512##elty##elsz), v16i32,
1770 !cast<X86MemOperand>(elty##"512mem"), d,
1771 IsReMaterializable>, EVEX_V512;
1773 let Predicates = [prd, HasVLX] in {
1774 defm Z256 : avx512_load<opc, OpcodeStr,
1775 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1776 "v"##vsz256##elty##elsz, "v4i64")),
1777 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1778 !cast<ValueType>("v"##vsz256##elty##elsz), v8i32,
1779 !cast<X86MemOperand>(elty##"256mem"), d,
1780 IsReMaterializable>, EVEX_V256;
1782 defm Z128 : avx512_load<opc, OpcodeStr,
1783 !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
1784 "v"##vsz128##elty##elsz, "v2i64")),
1785 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1786 !cast<ValueType>("v"##vsz128##elty##elsz), v4i32,
1787 !cast<X86MemOperand>(elty##"128mem"), d,
1788 IsReMaterializable>, EVEX_V128;
1793 multiclass avx512_store<bits<8> opc, string OpcodeStr, PatFrag st_frag,
1794 ValueType OpVT, RegisterClass KRC, RegisterClass RC,
1795 X86MemOperand memop, Domain d> {
1796 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1797 def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src),
1798 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], d>,
1800 let Constraints = "$src1 = $dst" in
1801 def rrk_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
1802 (ins RC:$src1, KRC:$mask, RC:$src2),
1803 !strconcat(OpcodeStr,
1804 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
1806 def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
1807 (ins KRC:$mask, RC:$src),
1808 !strconcat(OpcodeStr,
1809 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
1810 [], d>, EVEX, EVEX_KZ;
1812 let mayStore = 1 in {
1813 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
1814 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1815 [(st_frag (OpVT RC:$src), addr:$dst)], d>, EVEX;
1816 def mrk : AVX512PI<opc, MRMDestMem, (outs),
1817 (ins memop:$dst, KRC:$mask, RC:$src),
1818 !strconcat(OpcodeStr,
1819 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
1820 [], d>, EVEX, EVEX_K;
1825 multiclass avx512_store_vl<bits<8> opc, string OpcodeStr, string st_pat,
1826 string st_suff_512, string st_suff_256,
1827 string st_suff_128, string elty, string elsz,
1828 string vsz512, string vsz256, string vsz128,
1829 Domain d, Predicate prd> {
1830 let Predicates = [prd] in
1831 defm Z : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_512),
1832 !cast<ValueType>("v"##vsz512##elty##elsz),
1833 !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
1834 !cast<X86MemOperand>(elty##"512mem"), d>, EVEX_V512;
1836 let Predicates = [prd, HasVLX] in {
1837 defm Z256 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_256),
1838 !cast<ValueType>("v"##vsz256##elty##elsz),
1839 !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
1840 !cast<X86MemOperand>(elty##"256mem"), d>, EVEX_V256;
1842 defm Z128 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_128),
1843 !cast<ValueType>("v"##vsz128##elty##elsz),
1844 !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
1845 !cast<X86MemOperand>(elty##"128mem"), d>, EVEX_V128;
1849 defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32",
1850 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1851 avx512_store_vl<0x29, "vmovaps", "alignedstore",
1852 "512", "256", "", "f", "32", "16", "8", "4",
1853 SSEPackedSingle, HasAVX512>,
1854 PS, EVEX_CD8<32, CD8VF>;
1856 defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64",
1857 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1858 avx512_store_vl<0x29, "vmovapd", "alignedstore",
1859 "512", "256", "", "f", "64", "8", "4", "2",
1860 SSEPackedDouble, HasAVX512>,
1861 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1863 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32",
1864 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1865 avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32",
1866 "16", "8", "4", SSEPackedSingle, HasAVX512>,
1867 PS, EVEX_CD8<32, CD8VF>;
1869 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64",
1870 "8", "4", "2", SSEPackedDouble, HasAVX512, 0>,
1871 avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64",
1872 "8", "4", "2", SSEPackedDouble, HasAVX512>,
1873 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1875 def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
1876 (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
1877 (VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
1879 def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
1880 (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
1881 (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
1883 def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
1885 (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
1887 def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
1889 (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
1892 defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32",
1893 "16", "8", "4", SSEPackedInt, HasAVX512>,
1894 avx512_store_vl<0x7F, "vmovdqa32", "alignedstore",
1895 "512", "256", "", "i", "32", "16", "8", "4",
1896 SSEPackedInt, HasAVX512>,
1897 PD, EVEX_CD8<32, CD8VF>;
1899 defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64",
1900 "8", "4", "2", SSEPackedInt, HasAVX512>,
1901 avx512_store_vl<0x7F, "vmovdqa64", "alignedstore",
1902 "512", "256", "", "i", "64", "8", "4", "2",
1903 SSEPackedInt, HasAVX512>,
1904 PD, VEX_W, EVEX_CD8<64, CD8VF>;
1906 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8",
1907 "64", "32", "16", SSEPackedInt, HasBWI>,
1908 avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "",
1909 "i", "8", "64", "32", "16", SSEPackedInt,
1910 HasBWI>, XD, EVEX_CD8<8, CD8VF>;
1912 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16",
1913 "32", "16", "8", SSEPackedInt, HasBWI>,
1914 avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "",
1915 "i", "16", "32", "16", "8", SSEPackedInt,
1916 HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
1918 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32",
1919 "16", "8", "4", SSEPackedInt, HasAVX512>,
1920 avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "",
1921 "i", "32", "16", "8", "4", SSEPackedInt,
1922 HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
1924 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64",
1925 "8", "4", "2", SSEPackedInt, HasAVX512>,
1926 avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "",
1927 "i", "64", "8", "4", "2", SSEPackedInt,
1928 HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
1930 def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
1931 (v16i32 immAllZerosV), GR16:$mask)),
1932 (VMOVDQU32Zrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
1934 def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
1935 (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
1936 (VMOVDQU64Zrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
1938 def: Pat<(int_x86_avx512_mask_storeu_d_512 addr:$ptr, (v16i32 VR512:$src),
1940 (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
1942 def: Pat<(int_x86_avx512_mask_storeu_q_512 addr:$ptr, (v8i64 VR512:$src),
1944 (VMOVDQU64Zmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
1947 let AddedComplexity = 20 in {
1948 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
1949 (bc_v8i64 (v16i32 immAllZerosV)))),
1950 (VMOVDQU64Zrrkz VK8WM:$mask, VR512:$src)>;
1952 def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
1953 (v8i64 VR512:$src))),
1954 (VMOVDQU64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
1957 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src),
1958 (v16i32 immAllZerosV))),
1959 (VMOVDQU32Zrrkz VK16WM:$mask, VR512:$src)>;
1961 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
1962 (v16i32 VR512:$src))),
1963 (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
1966 // Move Int Doubleword to Packed Double Int
1968 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
1969 "vmovd\t{$src, $dst|$dst, $src}",
1971 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
1973 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
1974 "vmovd\t{$src, $dst|$dst, $src}",
1976 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
1977 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1978 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
1979 "vmovq\t{$src, $dst|$dst, $src}",
1981 (v2i64 (scalar_to_vector GR64:$src)))],
1982 IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
1983 let isCodeGenOnly = 1 in {
1984 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
1985 "vmovq\t{$src, $dst|$dst, $src}",
1986 [(set FR64:$dst, (bitconvert GR64:$src))],
1987 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
1988 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
1989 "vmovq\t{$src, $dst|$dst, $src}",
1990 [(set GR64:$dst, (bitconvert FR64:$src))],
1991 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
1993 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
1994 "vmovq\t{$src, $dst|$dst, $src}",
1995 [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
1996 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
1997 EVEX_CD8<64, CD8VT1>;
1999 // Move Int Doubleword to Single Scalar
2001 let isCodeGenOnly = 1 in {
2002 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
2003 "vmovd\t{$src, $dst|$dst, $src}",
2004 [(set FR32X:$dst, (bitconvert GR32:$src))],
2005 IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
2007 def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
2008 "vmovd\t{$src, $dst|$dst, $src}",
2009 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
2010 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2013 // Move doubleword from xmm register to r/m32
2015 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
2016 "vmovd\t{$src, $dst|$dst, $src}",
2017 [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
2018 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
2020 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
2021 (ins i32mem:$dst, VR128X:$src),
2022 "vmovd\t{$src, $dst|$dst, $src}",
2023 [(store (i32 (vector_extract (v4i32 VR128X:$src),
2024 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
2025 EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2027 // Move quadword from xmm1 register to r/m64
2029 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
2030 "vmovq\t{$src, $dst|$dst, $src}",
2031 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
2033 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W,
2034 Requires<[HasAVX512, In64BitMode]>;
2036 def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
2037 (ins i64mem:$dst, VR128X:$src),
2038 "vmovq\t{$src, $dst|$dst, $src}",
2039 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
2040 addr:$dst)], IIC_SSE_MOVDQ>,
2041 EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>,
2042 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
2044 // Move Scalar Single to Double Int
2046 let isCodeGenOnly = 1 in {
2047 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
2049 "vmovd\t{$src, $dst|$dst, $src}",
2050 [(set GR32:$dst, (bitconvert FR32X:$src))],
2051 IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
2052 def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
2053 (ins i32mem:$dst, FR32X:$src),
2054 "vmovd\t{$src, $dst|$dst, $src}",
2055 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
2056 IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
2059 // Move Quadword Int to Packed Quadword Int
2061 def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
2063 "vmovq\t{$src, $dst|$dst, $src}",
2065 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
2066 EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
2068 //===----------------------------------------------------------------------===//
2069 // AVX-512 MOVSS, MOVSD
2070 //===----------------------------------------------------------------------===//
2072 multiclass avx512_move_scalar <string asm, RegisterClass RC,
2073 SDNode OpNode, ValueType vt,
2074 X86MemOperand x86memop, PatFrag mem_pat> {
2075 let hasSideEffects = 0 in {
2076 def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
2077 !strconcat(asm, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2078 [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
2079 (scalar_to_vector RC:$src2))))],
2080 IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
2081 let Constraints = "$src1 = $dst" in
2082 def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
2083 (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
2085 " \t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
2086 [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
2087 def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
2088 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
2089 [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
2091 let mayStore = 1 in {
2092 def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
2093 !strconcat(asm, " \t{$src, $dst|$dst, $src}"),
2094 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
2096 def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src),
2097 !strconcat(asm, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
2098 [], IIC_SSE_MOV_S_MR>,
2099 EVEX, VEX_LIG, EVEX_K;
2101 } //hasSideEffects = 0
2104 let ExeDomain = SSEPackedSingle in
2105 defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem,
2106 loadf32>, XS, EVEX_CD8<32, CD8VT1>;
2108 let ExeDomain = SSEPackedDouble in
2109 defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
2110 loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
2112 def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
2113 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
2114 VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
2116 def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
2117 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
2118 VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
2120 def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
2121 (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
2122 (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2124 // For the disassembler
2125 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
2126 def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2127 (ins VR128X:$src1, FR32X:$src2),
2128 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
2130 XS, EVEX_4V, VEX_LIG;
2131 def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
2132 (ins VR128X:$src1, FR64X:$src2),
2133 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
2135 XD, EVEX_4V, VEX_LIG, VEX_W;
2138 let Predicates = [HasAVX512] in {
2139 let AddedComplexity = 15 in {
2140 // Move scalar to XMM zero-extended, zeroing a VR128X then do a
2141 // MOVS{S,D} to the lower bits.
2142 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
2143 (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
2144 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
2145 (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2146 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
2147 (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
2148 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
2149 (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
2151 // Move low f32 and clear high bits.
2152 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
2153 (SUBREG_TO_REG (i32 0),
2154 (VMOVSSZrr (v4f32 (V_SET0)),
2155 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
2156 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
2157 (SUBREG_TO_REG (i32 0),
2158 (VMOVSSZrr (v4i32 (V_SET0)),
2159 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
2162 let AddedComplexity = 20 in {
2163 // MOVSSrm zeros the high parts of the register; represent this
2164 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2165 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
2166 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2167 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
2168 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2169 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
2170 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
2172 // MOVSDrm zeros the high parts of the register; represent this
2173 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
2174 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
2175 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2176 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
2177 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2178 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
2179 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2180 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
2181 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2182 def : Pat<(v2f64 (X86vzload addr:$src)),
2183 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
2185 // Represent the same patterns above but in the form they appear for
2187 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2188 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
2189 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
2190 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2191 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
2192 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
2193 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2194 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
2195 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
2197 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
2198 (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
2199 (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
2200 FR32X:$src)), sub_xmm)>;
2201 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
2202 (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
2203 (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
2204 FR64X:$src)), sub_xmm)>;
2205 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2206 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
2207 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
2209 // Move low f64 and clear high bits.
2210 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
2211 (SUBREG_TO_REG (i32 0),
2212 (VMOVSDZrr (v2f64 (V_SET0)),
2213 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
2215 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
2216 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
2217 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
2219 // Extract and store.
2220 def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))),
2222 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
2223 def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))),
2225 (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
2227 // Shuffle with VMOVSS
2228 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
2229 (VMOVSSZrr (v4i32 VR128X:$src1),
2230 (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
2231 def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
2232 (VMOVSSZrr (v4f32 VR128X:$src1),
2233 (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
2236 def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
2237 (SUBREG_TO_REG (i32 0),
2238 (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
2239 (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
2241 def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
2242 (SUBREG_TO_REG (i32 0),
2243 (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
2244 (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
2247 // Shuffle with VMOVSD
2248 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2249 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2250 def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
2251 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2252 def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2253 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2254 def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
2255 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2258 def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2259 (SUBREG_TO_REG (i32 0),
2260 (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
2261 (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
2263 def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
2264 (SUBREG_TO_REG (i32 0),
2265 (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
2266 (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
2269 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2270 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2271 def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
2272 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2273 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2274 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2275 def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
2276 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
2279 let AddedComplexity = 15 in
2280 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
2282 "vmovq\t{$src, $dst|$dst, $src}",
2283 [(set VR128X:$dst, (v2i64 (X86vzmovl
2284 (v2i64 VR128X:$src))))],
2285 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
2287 let AddedComplexity = 20 in
2288 def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
2290 "vmovq\t{$src, $dst|$dst, $src}",
2291 [(set VR128X:$dst, (v2i64 (X86vzmovl
2292 (loadv2i64 addr:$src))))],
2293 IIC_SSE_MOVDQ>, EVEX, VEX_W,
2294 EVEX_CD8<8, CD8VT8>;
2296 let Predicates = [HasAVX512] in {
2297 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
2298 let AddedComplexity = 20 in {
2299 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
2300 (VMOVDI2PDIZrm addr:$src)>;
2301 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
2302 (VMOV64toPQIZrr GR64:$src)>;
2303 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
2304 (VMOVDI2PDIZrr GR32:$src)>;
2306 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
2307 (VMOVDI2PDIZrm addr:$src)>;
2308 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
2309 (VMOVDI2PDIZrm addr:$src)>;
2310 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
2311 (VMOVZPQILo2PQIZrm addr:$src)>;
2312 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
2313 (VMOVZPQILo2PQIZrr VR128X:$src)>;
2314 def : Pat<(v2i64 (X86vzload addr:$src)),
2315 (VMOVZPQILo2PQIZrm addr:$src)>;
2318 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
2319 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
2320 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
2321 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
2322 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
2323 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
2324 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
2327 def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
2328 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2330 def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
2331 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2333 def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
2334 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
2336 def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
2337 (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
2339 //===----------------------------------------------------------------------===//
2340 // AVX-512 - Non-temporals
2341 //===----------------------------------------------------------------------===//
2342 let SchedRW = [WriteLoad] in {
2343 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
2344 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
2345 [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
2346 SSEPackedInt>, EVEX, T8PD, EVEX_V512,
2347 EVEX_CD8<64, CD8VF>;
2349 let Predicates = [HasAVX512, HasVLX] in {
2350 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
2352 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2353 SSEPackedInt>, EVEX, T8PD, EVEX_V256,
2354 EVEX_CD8<64, CD8VF>;
2356 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
2358 "vmovntdqa\t{$src, $dst|$dst, $src}", [],
2359 SSEPackedInt>, EVEX, T8PD, EVEX_V128,
2360 EVEX_CD8<64, CD8VF>;
2364 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2365 ValueType OpVT, RegisterClass RC, X86MemOperand memop,
2366 Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
2367 let SchedRW = [WriteStore], mayStore = 1,
2368 AddedComplexity = 400 in
2369 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
2370 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2371 [(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
2374 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
2375 string elty, string elsz, string vsz512,
2376 string vsz256, string vsz128, Domain d,
2377 Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
2378 let Predicates = [prd] in
2379 defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
2380 !cast<ValueType>("v"##vsz512##elty##elsz), VR512,
2381 !cast<X86MemOperand>(elty##"512mem"), d, itin>,
2384 let Predicates = [prd, HasVLX] in {
2385 defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
2386 !cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
2387 !cast<X86MemOperand>(elty##"256mem"), d, itin>,
2390 defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
2391 !cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
2392 !cast<X86MemOperand>(elty##"128mem"), d, itin>,
2397 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
2398 "i", "64", "8", "4", "2", SSEPackedInt,
2399 HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
2401 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
2402 "f", "64", "8", "4", "2", SSEPackedDouble,
2403 HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2405 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
2406 "f", "32", "16", "8", "4", SSEPackedSingle,
2407 HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
2409 //===----------------------------------------------------------------------===//
2410 // AVX-512 - Integer arithmetic
2412 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
2413 ValueType OpVT, RegisterClass KRC,
2414 RegisterClass RC, PatFrag memop_frag,
2415 X86MemOperand x86memop, PatFrag scalar_mfrag,
2416 X86MemOperand x86scalar_mop, string BrdcstStr,
2417 OpndItins itins, bit IsCommutable = 0> {
2418 let isCommutable = IsCommutable in
2419 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2420 (ins RC:$src1, RC:$src2),
2421 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2422 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2424 let AddedComplexity = 30 in {
2425 let Constraints = "$src0 = $dst" in
2426 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2427 (ins RC:$src0, KRC:$mask, RC:$src1, RC:$src2),
2428 !strconcat(OpcodeStr,
2429 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2430 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2431 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2433 itins.rr>, EVEX_4V, EVEX_K;
2434 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2435 (ins KRC:$mask, RC:$src1, RC:$src2),
2436 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2437 "|$dst {${mask}} {z}, $src1, $src2}"),
2438 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2439 (OpNode (OpVT RC:$src1), (OpVT RC:$src2)),
2440 (OpVT immAllZerosV))))],
2441 itins.rr>, EVEX_4V, EVEX_KZ;
2444 let mayLoad = 1 in {
2445 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2446 (ins RC:$src1, x86memop:$src2),
2447 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2448 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
2450 let AddedComplexity = 30 in {
2451 let Constraints = "$src0 = $dst" in
2452 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2453 (ins RC:$src0, KRC:$mask, RC:$src1, x86memop:$src2),
2454 !strconcat(OpcodeStr,
2455 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2456 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2457 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2459 itins.rm>, EVEX_4V, EVEX_K;
2460 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2461 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2462 !strconcat(OpcodeStr,
2463 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2464 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2465 (OpNode (OpVT RC:$src1), (memop_frag addr:$src2)),
2466 (OpVT immAllZerosV))))],
2467 itins.rm>, EVEX_4V, EVEX_KZ;
2469 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2470 (ins RC:$src1, x86scalar_mop:$src2),
2471 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2472 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2473 [(set RC:$dst, (OpNode RC:$src1,
2474 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
2475 itins.rm>, EVEX_4V, EVEX_B;
2476 let AddedComplexity = 30 in {
2477 let Constraints = "$src0 = $dst" in
2478 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2479 (ins RC:$src0, KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2480 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2481 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2483 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2484 (OpNode (OpVT RC:$src1),
2485 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2487 itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2488 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2489 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2490 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2491 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2493 [(set RC:$dst, (OpVT (vselect KRC:$mask,
2494 (OpNode (OpVT RC:$src1),
2495 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))),
2496 (OpVT immAllZerosV))))],
2497 itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2502 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
2503 ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
2504 PatFrag memop_frag, X86MemOperand x86memop,
2505 PatFrag scalar_mfrag, X86MemOperand x86scalar_mop,
2506 string BrdcstStr, OpndItins itins, bit IsCommutable = 0> {
2507 let isCommutable = IsCommutable in
2509 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2510 (ins RC:$src1, RC:$src2),
2511 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2513 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2514 (ins KRC:$mask, RC:$src1, RC:$src2),
2515 !strconcat(OpcodeStr,
2516 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2517 [], itins.rr>, EVEX_4V, EVEX_K;
2518 def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2519 (ins KRC:$mask, RC:$src1, RC:$src2),
2520 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst {${mask}} {z}" ,
2521 "|$dst {${mask}} {z}, $src1, $src2}"),
2522 [], itins.rr>, EVEX_4V, EVEX_KZ;
2524 let mayLoad = 1 in {
2525 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2526 (ins RC:$src1, x86memop:$src2),
2527 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2529 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2530 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2531 !strconcat(OpcodeStr,
2532 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2533 [], itins.rm>, EVEX_4V, EVEX_K;
2534 def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2535 (ins KRC:$mask, RC:$src1, x86memop:$src2),
2536 !strconcat(OpcodeStr,
2537 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2538 [], itins.rm>, EVEX_4V, EVEX_KZ;
2539 def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2540 (ins RC:$src1, x86scalar_mop:$src2),
2541 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2542 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2543 [], itins.rm>, EVEX_4V, EVEX_B;
2544 def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2545 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2546 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2547 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
2549 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
2550 def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2551 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
2552 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2553 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2555 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
2559 defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VK16WM, VR512,
2560 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2561 SSE_INTALU_ITINS_P, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
2563 defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VK16WM, VR512,
2564 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2565 SSE_INTALU_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
2567 defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VK16WM, VR512,
2568 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2569 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2571 defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VK8WM, VR512,
2572 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2573 SSE_INTALU_ITINS_P, 1>, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
2575 defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VK8WM, VR512,
2576 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2577 SSE_INTALU_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2579 defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
2580 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2581 SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512,
2582 EVEX_CD8<64, CD8VF>, VEX_W;
2584 defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512,
2585 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2586 SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
2588 def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
2589 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2591 def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1),
2592 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2593 (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
2594 def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
2595 (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2596 (VPMULDQZrr VR512:$src1, VR512:$src2)>;
2598 defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VK16WM, VR512,
2599 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2600 SSE_INTALU_ITINS_P, 1>,
2601 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2602 defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VK8WM, VR512,
2603 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2604 SSE_INTALU_ITINS_P, 0>,
2605 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2607 defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VK16WM, VR512,
2608 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2609 SSE_INTALU_ITINS_P, 1>,
2610 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2611 defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VK8WM, VR512,
2612 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2613 SSE_INTALU_ITINS_P, 0>,
2614 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2616 defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VK16WM, VR512,
2617 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2618 SSE_INTALU_ITINS_P, 1>,
2619 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2620 defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VK8WM, VR512,
2621 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2622 SSE_INTALU_ITINS_P, 0>,
2623 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2625 defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VK16WM, VR512,
2626 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2627 SSE_INTALU_ITINS_P, 1>,
2628 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2629 defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VK8WM, VR512,
2630 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2631 SSE_INTALU_ITINS_P, 0>,
2632 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2634 def : Pat <(v16i32 (int_x86_avx512_mask_pmaxs_d_512 (v16i32 VR512:$src1),
2635 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2636 (VPMAXSDZrr VR512:$src1, VR512:$src2)>;
2637 def : Pat <(v16i32 (int_x86_avx512_mask_pmaxu_d_512 (v16i32 VR512:$src1),
2638 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2639 (VPMAXUDZrr VR512:$src1, VR512:$src2)>;
2640 def : Pat <(v8i64 (int_x86_avx512_mask_pmaxs_q_512 (v8i64 VR512:$src1),
2641 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2642 (VPMAXSQZrr VR512:$src1, VR512:$src2)>;
2643 def : Pat <(v8i64 (int_x86_avx512_mask_pmaxu_q_512 (v8i64 VR512:$src1),
2644 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2645 (VPMAXUQZrr VR512:$src1, VR512:$src2)>;
2646 def : Pat <(v16i32 (int_x86_avx512_mask_pmins_d_512 (v16i32 VR512:$src1),
2647 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2648 (VPMINSDZrr VR512:$src1, VR512:$src2)>;
2649 def : Pat <(v16i32 (int_x86_avx512_mask_pminu_d_512 (v16i32 VR512:$src1),
2650 (v16i32 VR512:$src2), (v16i32 immAllZerosV), (i16 -1))),
2651 (VPMINUDZrr VR512:$src1, VR512:$src2)>;
2652 def : Pat <(v8i64 (int_x86_avx512_mask_pmins_q_512 (v8i64 VR512:$src1),
2653 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2654 (VPMINSQZrr VR512:$src1, VR512:$src2)>;
2655 def : Pat <(v8i64 (int_x86_avx512_mask_pminu_q_512 (v8i64 VR512:$src1),
2656 (v8i64 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
2657 (VPMINUQZrr VR512:$src1, VR512:$src2)>;
2658 //===----------------------------------------------------------------------===//
2659 // AVX-512 - Unpack Instructions
2660 //===----------------------------------------------------------------------===//
2662 multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
2663 PatFrag mem_frag, RegisterClass RC,
2664 X86MemOperand x86memop, string asm,
2666 def rr : AVX512PI<opc, MRMSrcReg,
2667 (outs RC:$dst), (ins RC:$src1, RC:$src2),
2669 (vt (OpNode RC:$src1, RC:$src2)))],
2671 def rm : AVX512PI<opc, MRMSrcMem,
2672 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2674 (vt (OpNode RC:$src1,
2675 (bitconvert (mem_frag addr:$src2)))))],
2679 defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64,
2680 VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2681 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
2682 defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64,
2683 VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2684 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2685 defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64,
2686 VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2687 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
2688 defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64,
2689 VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2690 SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2692 multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
2693 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
2694 X86MemOperand x86memop> {
2695 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
2696 (ins RC:$src1, RC:$src2),
2697 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2698 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2699 IIC_SSE_UNPCK>, EVEX_4V;
2700 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
2701 (ins RC:$src1, x86memop:$src2),
2702 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2703 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
2704 (bitconvert (memop_frag addr:$src2)))))],
2705 IIC_SSE_UNPCK>, EVEX_4V;
2707 defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
2708 VR512, memopv16i32, i512mem>, EVEX_V512,
2709 EVEX_CD8<32, CD8VF>;
2710 defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
2711 VR512, memopv8i64, i512mem>, EVEX_V512,
2712 VEX_W, EVEX_CD8<64, CD8VF>;
2713 defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
2714 VR512, memopv16i32, i512mem>, EVEX_V512,
2715 EVEX_CD8<32, CD8VF>;
2716 defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
2717 VR512, memopv8i64, i512mem>, EVEX_V512,
2718 VEX_W, EVEX_CD8<64, CD8VF>;
2719 //===----------------------------------------------------------------------===//
2723 multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
2724 SDNode OpNode, PatFrag mem_frag,
2725 X86MemOperand x86memop, ValueType OpVT> {
2726 def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
2727 (ins RC:$src1, i8imm:$src2),
2728 !strconcat(OpcodeStr,
2729 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2731 (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
2733 def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
2734 (ins x86memop:$src1, i8imm:$src2),
2735 !strconcat(OpcodeStr,
2736 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2738 (OpVT (OpNode (mem_frag addr:$src1),
2739 (i8 imm:$src2))))]>, EVEX;
2742 defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
2743 i512mem, v16i32>, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
2745 let ExeDomain = SSEPackedSingle in
2746 defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp,
2747 memopv16f32, i512mem, v16f32>, TAPD, EVEX_V512,
2748 EVEX_CD8<32, CD8VF>;
2749 let ExeDomain = SSEPackedDouble in
2750 defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp,
2751 memopv8f64, i512mem, v8f64>, TAPD, EVEX_V512,
2752 VEX_W, EVEX_CD8<32, CD8VF>;
2754 def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
2755 (VPERMILPSZri VR512:$src1, imm:$imm)>;
2756 def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
2757 (VPERMILPDZri VR512:$src1, imm:$imm)>;
2759 //===----------------------------------------------------------------------===//
2760 // AVX-512 Logical Instructions
2761 //===----------------------------------------------------------------------===//
2763 defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VK16WM, VR512, memopv16i32,
2764 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2765 EVEX_V512, EVEX_CD8<32, CD8VF>;
2766 defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VK8WM, VR512, memopv8i64,
2767 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2768 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2769 defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VK16WM, VR512, memopv16i32,
2770 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2771 EVEX_V512, EVEX_CD8<32, CD8VF>;
2772 defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VK8WM, VR512, memopv8i64,
2773 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2774 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2775 defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VK16WM, VR512, memopv16i32,
2776 i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
2777 EVEX_V512, EVEX_CD8<32, CD8VF>;
2778 defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VK8WM, VR512, memopv8i64,
2779 i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
2780 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2781 defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VK16WM, VR512,
2782 memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
2783 SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
2784 defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VK8WM, VR512,
2785 memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
2786 SSE_BIT_ITINS_P, 0>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
2788 //===----------------------------------------------------------------------===//
2789 // AVX-512 FP arithmetic
2790 //===----------------------------------------------------------------------===//
2792 multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
2794 defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32X,
2795 f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
2796 EVEX_CD8<32, CD8VT1>;
2797 defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64X,
2798 f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
2799 EVEX_CD8<64, CD8VT1>;
2802 let isCommutable = 1 in {
2803 defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
2804 defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
2805 defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
2806 defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
2808 let isCommutable = 0 in {
2809 defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
2810 defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
2813 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
2815 RegisterClass RC, ValueType vt,
2816 X86MemOperand x86memop, PatFrag mem_frag,
2817 X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
2819 Domain d, OpndItins itins, bit commutable> {
2820 let isCommutable = commutable in {
2821 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
2822 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2823 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
2826 def rrk: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2827 !strconcat(OpcodeStr,
2828 " \t{$src2, $src1, $dst {${mask}} |$dst {${mask}}, $src1, $src2}"),
2829 [], itins.rr, d>, EVEX_4V, EVEX_K;
2831 def rrkz: PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src1, RC:$src2),
2832 !strconcat(OpcodeStr,
2833 " \t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2834 [], itins.rr, d>, EVEX_4V, EVEX_KZ;
2837 let mayLoad = 1 in {
2838 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2839 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2840 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
2841 itins.rm, d>, EVEX_4V;
2843 def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
2844 (ins RC:$src1, x86scalar_mop:$src2),
2845 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
2846 ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
2847 [(set RC:$dst, (OpNode RC:$src1,
2848 (vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
2849 itins.rm, d>, EVEX_4V, EVEX_B;
2851 def rmk : PI<opc, MRMSrcMem, (outs RC:$dst),
2852 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2853 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
2854 [], itins.rm, d>, EVEX_4V, EVEX_K;
2856 def rmkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2857 (ins KRC:$mask, RC:$src1, x86memop:$src2), !strconcat(OpcodeStr,
2858 "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
2859 [], itins.rm, d>, EVEX_4V, EVEX_KZ;
2861 def rmbk : PI<opc, MRMSrcMem, (outs RC:$dst),
2862 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2863 " \t{${src2}", BrdcstStr,
2864 ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}", BrdcstStr, "}"),
2865 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_K;
2867 def rmbkz : PI<opc, MRMSrcMem, (outs RC:$dst),
2868 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2), !strconcat(OpcodeStr,
2869 " \t{${src2}", BrdcstStr,
2870 ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
2872 [], itins.rm, d>, EVEX_4V, EVEX_B, EVEX_KZ;
2876 defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VK16WM, VR512, v16f32, f512mem,
2877 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2878 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2880 defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VK8WM, VR512, v8f64, f512mem,
2881 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2882 SSE_ALU_ITINS_P.d, 1>,
2883 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2885 defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VK16WM, VR512, v16f32, f512mem,
2886 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2887 SSE_ALU_ITINS_P.s, 1>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2888 defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VK8WM, VR512, v8f64, f512mem,
2889 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2890 SSE_ALU_ITINS_P.d, 1>,
2891 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2893 defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VK16WM, VR512, v16f32, f512mem,
2894 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2895 SSE_ALU_ITINS_P.s, 1>,
2896 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2897 defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VK16WM, VR512, v16f32, f512mem,
2898 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2899 SSE_ALU_ITINS_P.s, 1>,
2900 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2902 defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VK8WM, VR512, v8f64, f512mem,
2903 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2904 SSE_ALU_ITINS_P.d, 1>,
2905 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2906 defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VK8WM, VR512, v8f64, f512mem,
2907 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2908 SSE_ALU_ITINS_P.d, 1>,
2909 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2911 defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VK16WM, VR512, v16f32, f512mem,
2912 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2913 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2914 defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VK16WM, VR512, v16f32, f512mem,
2915 memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
2916 SSE_ALU_ITINS_P.s, 0>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
2918 defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VK8WM, VR512, v8f64, f512mem,
2919 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2920 SSE_ALU_ITINS_P.d, 0>,
2921 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2922 defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VK8WM, VR512, v8f64, f512mem,
2923 memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
2924 SSE_ALU_ITINS_P.d, 0>,
2925 EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>;
2927 def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1),
2928 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
2929 (i16 -1), FROUND_CURRENT)),
2930 (VMAXPSZrr VR512:$src1, VR512:$src2)>;
2932 def : Pat<(v8f64 (int_x86_avx512_mask_max_pd_512 (v8f64 VR512:$src1),
2933 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
2934 (i8 -1), FROUND_CURRENT)),
2935 (VMAXPDZrr VR512:$src1, VR512:$src2)>;
2937 def : Pat<(v16f32 (int_x86_avx512_mask_min_ps_512 (v16f32 VR512:$src1),
2938 (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)),
2939 (i16 -1), FROUND_CURRENT)),
2940 (VMINPSZrr VR512:$src1, VR512:$src2)>;
2942 def : Pat<(v8f64 (int_x86_avx512_mask_min_pd_512 (v8f64 VR512:$src1),
2943 (v8f64 VR512:$src2), (bc_v8f64 (v16i32 immAllZerosV)),
2944 (i8 -1), FROUND_CURRENT)),
2945 (VMINPDZrr VR512:$src1, VR512:$src2)>;
2946 //===----------------------------------------------------------------------===//
2947 // AVX-512 VPTESTM instructions
2948 //===----------------------------------------------------------------------===//
2950 multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2951 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
2952 SDNode OpNode, ValueType vt> {
2953 def rr : AVX512PI<opc, MRMSrcReg,
2954 (outs KRC:$dst), (ins RC:$src1, RC:$src2),
2955 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2956 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
2957 SSEPackedInt>, EVEX_4V;
2958 def rm : AVX512PI<opc, MRMSrcMem,
2959 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
2960 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2961 [(set KRC:$dst, (OpNode (vt RC:$src1),
2962 (bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
2965 defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
2966 memopv16i32, X86testm, v16i32>, T8PD, EVEX_V512,
2967 EVEX_CD8<32, CD8VF>;
2968 defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
2969 memopv8i64, X86testm, v8i64>, T8PD, EVEX_V512, VEX_W,
2970 EVEX_CD8<64, CD8VF>;
2972 let Predicates = [HasCDI] in {
2973 defm VPTESTNMDZ : avx512_vptest<0x27, "vptestnmd", VK16, VR512, f512mem,
2974 memopv16i32, X86testnm, v16i32>, T8XS, EVEX_V512,
2975 EVEX_CD8<32, CD8VF>;
2976 defm VPTESTNMQZ : avx512_vptest<0x27, "vptestnmq", VK8, VR512, f512mem,
2977 memopv8i64, X86testnm, v8i64>, T8XS, EVEX_V512, VEX_W,
2978 EVEX_CD8<64, CD8VF>;
2981 def : Pat <(i16 (int_x86_avx512_mask_ptestm_d_512 (v16i32 VR512:$src1),
2982 (v16i32 VR512:$src2), (i16 -1))),
2983 (COPY_TO_REGCLASS (VPTESTMDZrr VR512:$src1, VR512:$src2), GR16)>;
2985 def : Pat <(i8 (int_x86_avx512_mask_ptestm_q_512 (v8i64 VR512:$src1),
2986 (v8i64 VR512:$src2), (i8 -1))),
2987 (COPY_TO_REGCLASS (VPTESTMQZrr VR512:$src1, VR512:$src2), GR8)>;
2988 //===----------------------------------------------------------------------===//
2989 // AVX-512 Shift instructions
2990 //===----------------------------------------------------------------------===//
2991 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
2992 string OpcodeStr, SDNode OpNode, RegisterClass RC,
2993 ValueType vt, X86MemOperand x86memop, PatFrag mem_frag,
2994 RegisterClass KRC> {
2995 def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
2996 (ins RC:$src1, i8imm:$src2),
2997 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2998 [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))],
2999 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3000 def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
3001 (ins KRC:$mask, RC:$src1, i8imm:$src2),
3002 !strconcat(OpcodeStr,
3003 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
3004 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3005 def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
3006 (ins x86memop:$src1, i8imm:$src2),
3007 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3008 [(set RC:$dst, (OpNode (mem_frag addr:$src1),
3009 (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
3010 def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
3011 (ins KRC:$mask, x86memop:$src1, i8imm:$src2),
3012 !strconcat(OpcodeStr,
3013 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
3014 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3017 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3018 RegisterClass RC, ValueType vt, ValueType SrcVT,
3019 PatFrag bc_frag, RegisterClass KRC> {
3020 // src2 is always 128-bit
3021 def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3022 (ins RC:$src1, VR128X:$src2),
3023 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3024 [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
3025 SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
3026 def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
3027 (ins KRC:$mask, RC:$src1, VR128X:$src2),
3028 !strconcat(OpcodeStr,
3029 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
3030 [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
3031 def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3032 (ins RC:$src1, i128mem:$src2),
3033 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3034 [(set RC:$dst, (vt (OpNode RC:$src1,
3035 (bc_frag (memopv2i64 addr:$src2)))))],
3036 SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
3037 def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
3038 (ins KRC:$mask, RC:$src1, i128mem:$src2),
3039 !strconcat(OpcodeStr,
3040 " \t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
3041 [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
3044 defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
3045 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3046 EVEX_V512, EVEX_CD8<32, CD8VF>;
3047 defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
3048 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3049 EVEX_CD8<32, CD8VQ>;
3051 defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
3052 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3053 EVEX_CD8<64, CD8VF>, VEX_W;
3054 defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
3055 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3056 EVEX_CD8<64, CD8VQ>, VEX_W;
3058 defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
3059 VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512,
3060 EVEX_CD8<32, CD8VF>;
3061 defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
3062 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3063 EVEX_CD8<32, CD8VQ>;
3065 defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
3066 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3067 EVEX_CD8<64, CD8VF>, VEX_W;
3068 defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
3069 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3070 EVEX_CD8<64, CD8VQ>, VEX_W;
3072 defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
3073 VR512, v16i32, i512mem, memopv16i32, VK16WM>,
3074 EVEX_V512, EVEX_CD8<32, CD8VF>;
3075 defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
3076 VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
3077 EVEX_CD8<32, CD8VQ>;
3079 defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
3080 VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
3081 EVEX_CD8<64, CD8VF>, VEX_W;
3082 defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
3083 VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
3084 EVEX_CD8<64, CD8VQ>, VEX_W;
3086 //===-------------------------------------------------------------------===//
3087 // Variable Bit Shifts
3088 //===-------------------------------------------------------------------===//
3089 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
3090 RegisterClass RC, ValueType vt,
3091 X86MemOperand x86memop, PatFrag mem_frag> {
3092 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3093 (ins RC:$src1, RC:$src2),
3094 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3096 (vt (OpNode RC:$src1, (vt RC:$src2))))]>,
3098 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3099 (ins RC:$src1, x86memop:$src2),
3100 !strconcat(OpcodeStr, " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3102 (vt (OpNode RC:$src1, (mem_frag addr:$src2))))]>,
3106 defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
3107 i512mem, memopv16i32>, EVEX_V512,
3108 EVEX_CD8<32, CD8VF>;
3109 defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
3110 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3111 EVEX_CD8<64, CD8VF>;
3112 defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
3113 i512mem, memopv16i32>, EVEX_V512,
3114 EVEX_CD8<32, CD8VF>;
3115 defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
3116 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3117 EVEX_CD8<64, CD8VF>;
3118 defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
3119 i512mem, memopv16i32>, EVEX_V512,
3120 EVEX_CD8<32, CD8VF>;
3121 defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
3122 i512mem, memopv8i64>, EVEX_V512, VEX_W,
3123 EVEX_CD8<64, CD8VF>;
3125 //===----------------------------------------------------------------------===//
3126 // AVX-512 - MOVDDUP
3127 //===----------------------------------------------------------------------===//
3129 multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
3130 X86MemOperand x86memop, PatFrag memop_frag> {
3131 def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3132 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3133 [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
3134 def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
3135 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3137 (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
3140 defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>,
3141 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3142 def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
3143 (VMOVDDUPZrm addr:$src)>;
3145 //===---------------------------------------------------------------------===//
3146 // Replicate Single FP - MOVSHDUP and MOVSLDUP
3147 //===---------------------------------------------------------------------===//
3148 multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
3149 ValueType vt, RegisterClass RC, PatFrag mem_frag,
3150 X86MemOperand x86memop> {
3151 def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3152 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3153 [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
3155 def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
3156 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3157 [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
3160 defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
3161 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3162 EVEX_CD8<32, CD8VF>;
3163 defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
3164 v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
3165 EVEX_CD8<32, CD8VF>;
3167 def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
3168 def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))),
3169 (VMOVSHDUPZrm addr:$src)>;
3170 def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
3171 def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))),
3172 (VMOVSLDUPZrm addr:$src)>;
3174 //===----------------------------------------------------------------------===//
3175 // Move Low to High and High to Low packed FP Instructions
3176 //===----------------------------------------------------------------------===//
3177 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
3178 (ins VR128X:$src1, VR128X:$src2),
3179 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3180 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
3181 IIC_SSE_MOV_LH>, EVEX_4V;
3182 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
3183 (ins VR128X:$src1, VR128X:$src2),
3184 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3185 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
3186 IIC_SSE_MOV_LH>, EVEX_4V;
3188 let Predicates = [HasAVX512] in {
3190 def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3191 (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
3192 def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
3193 (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
3196 def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
3197 (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
3200 //===----------------------------------------------------------------------===//
3201 // FMA - Fused Multiply Operations
3203 let Constraints = "$src1 = $dst" in {
3204 multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
3205 RegisterClass RC, X86MemOperand x86memop,
3206 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
3207 string BrdcstStr, SDNode OpNode, ValueType OpVT,
3208 RegisterClass KRC> {
3209 defm r: AVX512_masking_3src<opc, MRMSrcReg, (outs RC:$dst),
3210 (ins RC:$src2, RC:$src3),
3211 OpcodeStr, "$src3, $src2", "$src2, $src3",
3212 (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)), OpVT, RC, KRC>,
3216 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3217 (ins RC:$src1, RC:$src2, x86memop:$src3),
3218 !strconcat(OpcodeStr, " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3219 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
3220 (mem_frag addr:$src3))))]>;
3221 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3222 (ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
3223 !strconcat(OpcodeStr, " \t{${src3}", BrdcstStr,
3224 ", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
3225 [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
3226 (OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
3228 } // Constraints = "$src1 = $dst"
3230 let ExeDomain = SSEPackedSingle in {
3231 defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
3232 memopv16f32, f32mem, loadf32, "{1to16}",
3233 X86Fmadd, v16f32, VK16WM>, EVEX_V512,
3234 EVEX_CD8<32, CD8VF>;
3235 defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
3236 memopv16f32, f32mem, loadf32, "{1to16}",
3237 X86Fmsub, v16f32, VK16WM>, EVEX_V512,
3238 EVEX_CD8<32, CD8VF>;
3239 defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
3240 memopv16f32, f32mem, loadf32, "{1to16}",
3241 X86Fmaddsub, v16f32, VK16WM>,
3242 EVEX_V512, EVEX_CD8<32, CD8VF>;
3243 defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
3244 memopv16f32, f32mem, loadf32, "{1to16}",
3245 X86Fmsubadd, v16f32, VK16WM>,
3246 EVEX_V512, EVEX_CD8<32, CD8VF>;
3247 defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
3248 memopv16f32, f32mem, loadf32, "{1to16}",
3249 X86Fnmadd, v16f32, VK16WM>, EVEX_V512,
3250 EVEX_CD8<32, CD8VF>;
3251 defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
3252 memopv16f32, f32mem, loadf32, "{1to16}",
3253 X86Fnmsub, v16f32, VK16WM>, EVEX_V512,
3254 EVEX_CD8<32, CD8VF>;
3256 let ExeDomain = SSEPackedDouble in {
3257 defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
3258 memopv8f64, f64mem, loadf64, "{1to8}",
3259 X86Fmadd, v8f64, VK8WM>, EVEX_V512,
3260 VEX_W, EVEX_CD8<64, CD8VF>;
3261 defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
3262 memopv8f64, f64mem, loadf64, "{1to8}",
3263 X86Fmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
3264 EVEX_CD8<64, CD8VF>;
3265 defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
3266 memopv8f64, f64mem, loadf64, "{1to8}",
3267 X86Fmaddsub, v8f64, VK8WM>,
3268 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
3269 defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
3270 memopv8f64, f64mem, loadf64, "{1to8}",
3271 X86Fmsubadd, v8f64, VK8WM>,
3272 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
3273 defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
3274 memopv8f64, f64mem, loadf64, "{1to8}",
3275 X86Fnmadd, v8f64, VK8WM>, EVEX_V512, VEX_W,
3276 EVEX_CD8<64, CD8VF>;
3277 defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
3278 memopv8f64, f64mem, loadf64, "{1to8}",
3279 X86Fnmsub, v8f64, VK8WM>, EVEX_V512, VEX_W,
3280 EVEX_CD8<64, CD8VF>;
3283 let Constraints = "$src1 = $dst" in {
3284 multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
3285 RegisterClass RC, X86MemOperand x86memop,
3286 PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
3287 string BrdcstStr, SDNode OpNode, ValueType OpVT> {
3289 def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3290 (ins RC:$src1, RC:$src3, x86memop:$src2),
3291 !strconcat(OpcodeStr, " \t{$src2, $src3, $dst|$dst, $src3, $src2}"),
3292 [(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
3293 def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3294 (ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
3295 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
3296 ", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
3297 [(set RC:$dst, (OpNode RC:$src1,
3298 (OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
3300 } // Constraints = "$src1 = $dst"
3303 let ExeDomain = SSEPackedSingle in {
3304 defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
3305 memopv16f32, f32mem, loadf32, "{1to16}",
3306 X86Fmadd, v16f32>, EVEX_V512,
3307 EVEX_CD8<32, CD8VF>;
3308 defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
3309 memopv16f32, f32mem, loadf32, "{1to16}",
3310 X86Fmsub, v16f32>, EVEX_V512,
3311 EVEX_CD8<32, CD8VF>;
3312 defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
3313 memopv16f32, f32mem, loadf32, "{1to16}",
3314 X86Fmaddsub, v16f32>,
3315 EVEX_V512, EVEX_CD8<32, CD8VF>;
3316 defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
3317 memopv16f32, f32mem, loadf32, "{1to16}",
3318 X86Fmsubadd, v16f32>,
3319 EVEX_V512, EVEX_CD8<32, CD8VF>;
3320 defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
3321 memopv16f32, f32mem, loadf32, "{1to16}",
3322 X86Fnmadd, v16f32>, EVEX_V512,
3323 EVEX_CD8<32, CD8VF>;
3324 defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
3325 memopv16f32, f32mem, loadf32, "{1to16}",
3326 X86Fnmsub, v16f32>, EVEX_V512,
3327 EVEX_CD8<32, CD8VF>;
3329 let ExeDomain = SSEPackedDouble in {
3330 defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
3331 memopv8f64, f64mem, loadf64, "{1to8}",
3332 X86Fmadd, v8f64>, EVEX_V512,
3333 VEX_W, EVEX_CD8<64, CD8VF>;
3334 defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
3335 memopv8f64, f64mem, loadf64, "{1to8}",
3336 X86Fmsub, v8f64>, EVEX_V512, VEX_W,
3337 EVEX_CD8<64, CD8VF>;
3338 defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
3339 memopv8f64, f64mem, loadf64, "{1to8}",
3340 X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
3341 EVEX_CD8<64, CD8VF>;
3342 defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
3343 memopv8f64, f64mem, loadf64, "{1to8}",
3344 X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
3345 EVEX_CD8<64, CD8VF>;
3346 defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
3347 memopv8f64, f64mem, loadf64, "{1to8}",
3348 X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
3349 EVEX_CD8<64, CD8VF>;
3350 defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
3351 memopv8f64, f64mem, loadf64, "{1to8}",
3352 X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
3353 EVEX_CD8<64, CD8VF>;
3357 let Constraints = "$src1 = $dst" in {
3358 multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
3359 RegisterClass RC, ValueType OpVT,
3360 X86MemOperand x86memop, Operand memop,
3362 let isCommutable = 1 in
3363 def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
3364 (ins RC:$src1, RC:$src2, RC:$src3),
3365 !strconcat(OpcodeStr,
3366 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3368 (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
3370 def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
3371 (ins RC:$src1, RC:$src2, f128mem:$src3),
3372 !strconcat(OpcodeStr,
3373 " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
3375 (OpVT (OpNode RC:$src2, RC:$src1,
3376 (mem_frag addr:$src3))))]>;
3379 } // Constraints = "$src1 = $dst"
3381 defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
3382 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3383 defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X,
3384 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3385 defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X,
3386 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3387 defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X,
3388 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3389 defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X,
3390 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3391 defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X,
3392 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3393 defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X,
3394 f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
3395 defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X,
3396 f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
3398 //===----------------------------------------------------------------------===//
3399 // AVX-512 Scalar convert from sign integer to float/double
3400 //===----------------------------------------------------------------------===//
3402 multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3403 X86MemOperand x86memop, string asm> {
3404 let hasSideEffects = 0 in {
3405 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
3406 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
3409 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
3410 (ins DstRC:$src1, x86memop:$src),
3411 !strconcat(asm," \t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
3413 } // hasSideEffects = 0
3415 let Predicates = [HasAVX512] in {
3416 defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
3417 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3418 defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">,
3419 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3420 defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">,
3421 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3422 defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">,
3423 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3425 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
3426 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3427 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
3428 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3429 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
3430 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3431 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
3432 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3434 def : Pat<(f32 (sint_to_fp GR32:$src)),
3435 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3436 def : Pat<(f32 (sint_to_fp GR64:$src)),
3437 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
3438 def : Pat<(f64 (sint_to_fp GR32:$src)),
3439 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3440 def : Pat<(f64 (sint_to_fp GR64:$src)),
3441 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
3443 defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">,
3444 XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3445 defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">,
3446 XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3447 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">,
3448 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
3449 defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">,
3450 XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
3452 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
3453 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3454 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
3455 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
3456 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
3457 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3458 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
3459 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
3461 def : Pat<(f32 (uint_to_fp GR32:$src)),
3462 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
3463 def : Pat<(f32 (uint_to_fp GR64:$src)),
3464 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
3465 def : Pat<(f64 (uint_to_fp GR32:$src)),
3466 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
3467 def : Pat<(f64 (uint_to_fp GR64:$src)),
3468 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
3471 //===----------------------------------------------------------------------===//
3472 // AVX-512 Scalar convert from float/double to integer
3473 //===----------------------------------------------------------------------===//
3474 multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3475 Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
3477 let hasSideEffects = 0 in {
3478 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3479 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3480 [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG,
3481 Requires<[HasAVX512]>;
3483 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
3484 !strconcat(asm," \t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG,
3485 Requires<[HasAVX512]>;
3486 } // hasSideEffects = 0
3488 let Predicates = [HasAVX512] in {
3489 // Convert float/double to signed/unsigned int 32/64
3490 defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
3491 ssmem, sse_load_f32, "cvtss2si">,
3492 XS, EVEX_CD8<32, CD8VT1>;
3493 defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
3494 ssmem, sse_load_f32, "cvtss2si">,
3495 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
3496 defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
3497 ssmem, sse_load_f32, "cvtss2usi">,
3498 XS, EVEX_CD8<32, CD8VT1>;
3499 defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3500 int_x86_avx512_cvtss2usi64, ssmem,
3501 sse_load_f32, "cvtss2usi">, XS, VEX_W,
3502 EVEX_CD8<32, CD8VT1>;
3503 defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
3504 sdmem, sse_load_f64, "cvtsd2si">,
3505 XD, EVEX_CD8<64, CD8VT1>;
3506 defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
3507 sdmem, sse_load_f64, "cvtsd2si">,
3508 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3509 defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
3510 sdmem, sse_load_f64, "cvtsd2usi">,
3511 XD, EVEX_CD8<64, CD8VT1>;
3512 defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
3513 int_x86_avx512_cvtsd2usi64, sdmem,
3514 sse_load_f64, "cvtsd2usi">, XD, VEX_W,
3515 EVEX_CD8<64, CD8VT1>;
3517 let isCodeGenOnly = 1 in {
3518 defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3519 int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
3520 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3521 defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3522 int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
3523 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3524 defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3525 int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
3526 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3527 defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3528 int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
3529 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
3531 defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3532 int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}",
3533 SSE_CVT_Scalar, 0>, XS, EVEX_4V;
3534 defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3535 int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}",
3536 SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
3537 defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
3538 int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
3539 SSE_CVT_Scalar, 0>, XD, EVEX_4V;
3540 defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
3541 int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}",
3542 SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
3543 } // isCodeGenOnly = 1
3545 // Convert float/double to signed/unsigned int 32/64 with truncation
3546 let isCodeGenOnly = 1 in {
3547 defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
3548 ssmem, sse_load_f32, "cvttss2si">,
3549 XS, EVEX_CD8<32, CD8VT1>;
3550 defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3551 int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
3552 "cvttss2si">, XS, VEX_W,
3553 EVEX_CD8<32, CD8VT1>;
3554 defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
3555 sdmem, sse_load_f64, "cvttsd2si">, XD,
3556 EVEX_CD8<64, CD8VT1>;
3557 defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
3558 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
3559 "cvttsd2si">, XD, VEX_W,
3560 EVEX_CD8<64, CD8VT1>;
3561 defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3562 int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
3563 "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>;
3564 defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3565 int_x86_avx512_cvttss2usi64, ssmem,
3566 sse_load_f32, "cvttss2usi">, XS, VEX_W,
3567 EVEX_CD8<32, CD8VT1>;
3568 defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
3569 int_x86_avx512_cvttsd2usi,
3570 sdmem, sse_load_f64, "cvttsd2usi">, XD,
3571 EVEX_CD8<64, CD8VT1>;
3572 defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
3573 int_x86_avx512_cvttsd2usi64, sdmem,
3574 sse_load_f64, "cvttsd2usi">, XD, VEX_W,
3575 EVEX_CD8<64, CD8VT1>;
3576 } // isCodeGenOnly = 1
3578 multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
3579 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
3581 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3582 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3583 [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
3584 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3585 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3586 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
3589 defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
3590 loadf32, "cvttss2si">, XS,
3591 EVEX_CD8<32, CD8VT1>;
3592 defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
3593 loadf32, "cvttss2usi">, XS,
3594 EVEX_CD8<32, CD8VT1>;
3595 defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
3596 loadf32, "cvttss2si">, XS, VEX_W,
3597 EVEX_CD8<32, CD8VT1>;
3598 defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
3599 loadf32, "cvttss2usi">, XS, VEX_W,
3600 EVEX_CD8<32, CD8VT1>;
3601 defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
3602 loadf64, "cvttsd2si">, XD,
3603 EVEX_CD8<64, CD8VT1>;
3604 defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
3605 loadf64, "cvttsd2usi">, XD,
3606 EVEX_CD8<64, CD8VT1>;
3607 defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
3608 loadf64, "cvttsd2si">, XD, VEX_W,
3609 EVEX_CD8<64, CD8VT1>;
3610 defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
3611 loadf64, "cvttsd2usi">, XD, VEX_W,
3612 EVEX_CD8<64, CD8VT1>;
3614 //===----------------------------------------------------------------------===//
3615 // AVX-512 Convert form float to double and back
3616 //===----------------------------------------------------------------------===//
3617 let hasSideEffects = 0 in {
3618 def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
3619 (ins FR32X:$src1, FR32X:$src2),
3620 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3621 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
3623 def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
3624 (ins FR32X:$src1, f32mem:$src2),
3625 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3626 []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
3627 EVEX_CD8<32, CD8VT1>;
3629 // Convert scalar double to scalar single
3630 def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
3631 (ins FR64X:$src1, FR64X:$src2),
3632 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3633 []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
3635 def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
3636 (ins FR64X:$src1, f64mem:$src2),
3637 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3638 []>, EVEX_4V, VEX_LIG, VEX_W,
3639 Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
3642 def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
3643 Requires<[HasAVX512]>;
3644 def : Pat<(fextend (loadf32 addr:$src)),
3645 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
3647 def : Pat<(extloadf32 addr:$src),
3648 (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
3649 Requires<[HasAVX512, OptForSize]>;
3651 def : Pat<(extloadf32 addr:$src),
3652 (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
3653 Requires<[HasAVX512, OptForSpeed]>;
3655 def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
3656 Requires<[HasAVX512]>;
3658 multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
3659 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3660 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3662 let hasSideEffects = 0 in {
3663 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3664 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3666 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
3667 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
3668 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
3669 [], d>, EVEX, EVEX_B, EVEX_RC;
3671 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3672 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3674 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
3675 } // hasSideEffects = 0
3678 multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
3679 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3680 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
3682 let hasSideEffects = 0 in {
3683 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3684 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3686 (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
3688 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3689 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3691 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
3692 } // hasSideEffects = 0
3695 defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
3696 memopv8f64, f512mem, v8f32, v8f64,
3697 SSEPackedSingle>, EVEX_V512, VEX_W, PD,
3698 EVEX_CD8<64, CD8VF>;
3700 defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
3701 memopv4f64, f256mem, v8f64, v8f32,
3702 SSEPackedDouble>, EVEX_V512, PS,
3703 EVEX_CD8<32, CD8VH>;
3704 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3705 (VCVTPS2PDZrm addr:$src)>;
3707 def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3708 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))),
3709 (VCVTPD2PSZrr VR512:$src)>;
3711 def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
3712 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), imm:$rc)),
3713 (VCVTPD2PSZrrb VR512:$src, imm:$rc)>;
3715 //===----------------------------------------------------------------------===//
3716 // AVX-512 Vector convert from sign integer to float/double
3717 //===----------------------------------------------------------------------===//
3719 defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
3720 memopv8i64, i512mem, v16f32, v16i32,
3721 SSEPackedSingle>, EVEX_V512, PS,
3722 EVEX_CD8<32, CD8VF>;
3724 defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
3725 memopv4i64, i256mem, v8f64, v8i32,
3726 SSEPackedDouble>, EVEX_V512, XS,
3727 EVEX_CD8<32, CD8VH>;
3729 defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
3730 memopv16f32, f512mem, v16i32, v16f32,
3731 SSEPackedSingle>, EVEX_V512, XS,
3732 EVEX_CD8<32, CD8VF>;
3734 defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
3735 memopv8f64, f512mem, v8i32, v8f64,
3736 SSEPackedDouble>, EVEX_V512, PD, VEX_W,
3737 EVEX_CD8<64, CD8VF>;
3739 defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
3740 memopv16f32, f512mem, v16i32, v16f32,
3741 SSEPackedSingle>, EVEX_V512, PS,
3742 EVEX_CD8<32, CD8VF>;
3744 // cvttps2udq (src, 0, mask-all-ones, sae-current)
3745 def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src),
3746 (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)),
3747 (VCVTTPS2UDQZrr VR512:$src)>;
3749 defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
3750 memopv8f64, f512mem, v8i32, v8f64,
3751 SSEPackedDouble>, EVEX_V512, PS, VEX_W,
3752 EVEX_CD8<64, CD8VF>;
3754 // cvttpd2udq (src, 0, mask-all-ones, sae-current)
3755 def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
3756 (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
3757 (VCVTTPD2UDQZrr VR512:$src)>;
3759 defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
3760 memopv4i64, f256mem, v8f64, v8i32,
3761 SSEPackedDouble>, EVEX_V512, XS,
3762 EVEX_CD8<32, CD8VH>;
3764 defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
3765 memopv16i32, f512mem, v16f32, v16i32,
3766 SSEPackedSingle>, EVEX_V512, XD,
3767 EVEX_CD8<32, CD8VF>;
3769 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
3770 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3771 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3773 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
3774 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3775 (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
3777 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
3778 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3779 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3781 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
3782 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
3783 (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
3785 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
3786 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
3787 (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
3789 def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
3790 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
3791 (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
3792 def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src),
3793 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3794 (VCVTDQ2PDZrr VR256X:$src)>;
3795 def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src),
3796 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
3797 (VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>;
3798 def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src),
3799 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
3800 (VCVTUDQ2PDZrr VR256X:$src)>;
3802 multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC,
3803 RegisterClass DstRC, PatFrag mem_frag,
3804 X86MemOperand x86memop, Domain d> {
3805 let hasSideEffects = 0 in {
3806 def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
3807 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3809 def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
3810 !strconcat(asm," \t{$rc, $src, $dst|$dst, $src, $rc}"),
3811 [], d>, EVEX, EVEX_B, EVEX_RC;
3813 def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
3814 !strconcat(asm," \t{$src, $dst|$dst, $src}"),
3816 } // hasSideEffects = 0
3819 defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512,
3820 memopv16f32, f512mem, SSEPackedSingle>, PD,
3821 EVEX_V512, EVEX_CD8<32, CD8VF>;
3822 defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X,
3823 memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W,
3824 EVEX_V512, EVEX_CD8<64, CD8VF>;
3826 def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src),
3827 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3828 (VCVTPS2DQZrrb VR512:$src, imm:$rc)>;
3830 def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src),
3831 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3832 (VCVTPD2DQZrrb VR512:$src, imm:$rc)>;
3834 defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512,
3835 memopv16f32, f512mem, SSEPackedSingle>,
3836 PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
3837 defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X,
3838 memopv8f64, f512mem, SSEPackedDouble>, VEX_W,
3839 PS, EVEX_V512, EVEX_CD8<64, CD8VF>;
3841 def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src),
3842 (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
3843 (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>;
3845 def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src),
3846 (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
3847 (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>;
3849 let Predicates = [HasAVX512] in {
3850 def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
3851 (VCVTPD2PSZrm addr:$src)>;
3852 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
3853 (VCVTPS2PDZrm addr:$src)>;
3856 //===----------------------------------------------------------------------===//
3857 // Half precision conversion instructions
3858 //===----------------------------------------------------------------------===//
3859 multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
3860 X86MemOperand x86memop> {
3861 def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
3862 "vcvtph2ps\t{$src, $dst|$dst, $src}",
3864 let hasSideEffects = 0, mayLoad = 1 in
3865 def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
3866 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
3869 multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
3870 X86MemOperand x86memop> {
3871 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
3872 (ins srcRC:$src1, i32i8imm:$src2),
3873 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}",
3875 let hasSideEffects = 0, mayStore = 1 in
3876 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
3877 (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
3878 "vcvtps2ph \t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
3881 defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
3882 EVEX_CD8<32, CD8VH>;
3883 defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
3884 EVEX_CD8<32, CD8VH>;
3886 def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
3887 imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
3888 (VCVTPS2PHZrr VR512:$src, imm:$rc)>;
3890 def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
3891 (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
3892 (VCVTPH2PSZrr VR256X:$src)>;
3894 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
3895 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
3896 "ucomiss">, PS, EVEX, VEX_LIG,
3897 EVEX_CD8<32, CD8VT1>;
3898 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
3899 "ucomisd">, PD, EVEX,
3900 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3901 let Pattern = []<dag> in {
3902 defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
3903 "comiss">, PS, EVEX, VEX_LIG,
3904 EVEX_CD8<32, CD8VT1>;
3905 defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
3906 "comisd">, PD, EVEX,
3907 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3909 let isCodeGenOnly = 1 in {
3910 defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
3911 load, "ucomiss">, PS, EVEX, VEX_LIG,
3912 EVEX_CD8<32, CD8VT1>;
3913 defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
3914 load, "ucomisd">, PD, EVEX,
3915 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3917 defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
3918 load, "comiss">, PS, EVEX, VEX_LIG,
3919 EVEX_CD8<32, CD8VT1>;
3920 defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
3921 load, "comisd">, PD, EVEX,
3922 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
3926 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
3927 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
3928 X86MemOperand x86memop> {
3929 let hasSideEffects = 0 in {
3930 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
3931 (ins RC:$src1, RC:$src2),
3932 !strconcat(OpcodeStr,
3933 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
3934 let mayLoad = 1 in {
3935 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
3936 (ins RC:$src1, x86memop:$src2),
3937 !strconcat(OpcodeStr,
3938 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
3943 defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>,
3944 EVEX_CD8<32, CD8VT1>;
3945 defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>,
3946 VEX_W, EVEX_CD8<64, CD8VT1>;
3947 defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>,
3948 EVEX_CD8<32, CD8VT1>;
3949 defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>,
3950 VEX_W, EVEX_CD8<64, CD8VT1>;
3952 def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1),
3953 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
3954 (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
3955 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
3957 def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1),
3958 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
3959 (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
3960 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
3962 def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1),
3963 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
3964 (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
3965 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
3967 def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
3968 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
3969 (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
3970 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
3972 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
3973 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
3974 RegisterClass RC, X86MemOperand x86memop,
3975 PatFrag mem_frag, ValueType OpVt> {
3976 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
3977 !strconcat(OpcodeStr,
3978 " \t{$src, $dst|$dst, $src}"),
3979 [(set RC:$dst, (OpVt (OpNode RC:$src)))]>,
3981 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
3982 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
3983 [(set RC:$dst, (OpVt (OpNode (mem_frag addr:$src))))]>,
3986 defm VRSQRT14PSZ : avx512_fp14_p<0x4E, "vrsqrt14ps", X86frsqrt, VR512, f512mem,
3987 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
3988 defm VRSQRT14PDZ : avx512_fp14_p<0x4E, "vrsqrt14pd", X86frsqrt, VR512, f512mem,
3989 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3990 defm VRCP14PSZ : avx512_fp14_p<0x4C, "vrcp14ps", X86frcp, VR512, f512mem,
3991 memopv16f32, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
3992 defm VRCP14PDZ : avx512_fp14_p<0x4C, "vrcp14pd", X86frcp, VR512, f512mem,
3993 memopv8f64, v8f64>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
3995 def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
3996 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
3997 (VRSQRT14PSZr VR512:$src)>;
3998 def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src),
3999 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
4000 (VRSQRT14PDZr VR512:$src)>;
4002 def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src),
4003 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
4004 (VRCP14PSZr VR512:$src)>;
4005 def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
4006 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
4007 (VRCP14PDZr VR512:$src)>;
4009 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
4010 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
4011 X86MemOperand x86memop> {
4012 let hasSideEffects = 0, Predicates = [HasERI] in {
4013 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4014 (ins RC:$src1, RC:$src2),
4015 !strconcat(OpcodeStr,
4016 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
4017 def rrb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4018 (ins RC:$src1, RC:$src2),
4019 !strconcat(OpcodeStr,
4020 " \t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
4021 []>, EVEX_4V, EVEX_B;
4022 let mayLoad = 1 in {
4023 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4024 (ins RC:$src1, x86memop:$src2),
4025 !strconcat(OpcodeStr,
4026 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
4031 defm VRCP28SS : avx512_fp28_s<0xCB, "vrcp28ss", FR32X, f32mem>,
4032 EVEX_CD8<32, CD8VT1>;
4033 defm VRCP28SD : avx512_fp28_s<0xCB, "vrcp28sd", FR64X, f64mem>,
4034 VEX_W, EVEX_CD8<64, CD8VT1>;
4035 defm VRSQRT28SS : avx512_fp28_s<0xCD, "vrsqrt28ss", FR32X, f32mem>,
4036 EVEX_CD8<32, CD8VT1>;
4037 defm VRSQRT28SD : avx512_fp28_s<0xCD, "vrsqrt28sd", FR64X, f64mem>,
4038 VEX_W, EVEX_CD8<64, CD8VT1>;
4040 def : Pat <(v4f32 (int_x86_avx512_rcp28_ss (v4f32 VR128X:$src1),
4041 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4043 (COPY_TO_REGCLASS (VRCP28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4044 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4046 def : Pat <(v2f64 (int_x86_avx512_rcp28_sd (v2f64 VR128X:$src1),
4047 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4049 (COPY_TO_REGCLASS (VRCP28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4050 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4052 def : Pat <(v4f32 (int_x86_avx512_rsqrt28_ss (v4f32 VR128X:$src1),
4053 (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1),
4055 (COPY_TO_REGCLASS (VRSQRT28SSrrb (COPY_TO_REGCLASS VR128X:$src1, FR32X),
4056 (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
4058 def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1),
4059 (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1),
4061 (COPY_TO_REGCLASS (VRSQRT28SDrrb (COPY_TO_REGCLASS VR128X:$src1, FR64X),
4062 (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
4064 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
4065 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr,
4066 RegisterClass RC, X86MemOperand x86memop> {
4067 let hasSideEffects = 0, Predicates = [HasERI] in {
4068 def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4069 !strconcat(OpcodeStr,
4070 " \t{$src, $dst|$dst, $src}"),
4072 def rb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4073 !strconcat(OpcodeStr,
4074 " \t{{sae}, $src, $dst|$dst, $src, {sae}}"),
4076 def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
4077 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4081 defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>,
4082 EVEX_V512, EVEX_CD8<32, CD8VF>;
4083 defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>,
4084 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4085 defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>,
4086 EVEX_V512, EVEX_CD8<32, CD8VF>;
4087 defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>,
4088 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4090 def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src),
4091 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4092 (VRSQRT28PSZrb VR512:$src)>;
4093 def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src),
4094 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4095 (VRSQRT28PDZrb VR512:$src)>;
4097 def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src),
4098 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)),
4099 (VRCP28PSZrb VR512:$src)>;
4100 def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src),
4101 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)),
4102 (VRCP28PDZrb VR512:$src)>;
4104 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
4105 OpndItins itins_s, OpndItins itins_d> {
4106 def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
4107 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
4108 [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
4112 def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
4113 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
4115 (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
4116 itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
4118 def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
4119 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
4120 [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
4124 def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
4125 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
4126 [(set VR512:$dst, (OpNode
4127 (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
4128 itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
4132 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
4133 Intrinsic F32Int, Intrinsic F64Int,
4134 OpndItins itins_s, OpndItins itins_d> {
4135 def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
4136 (ins FR32X:$src1, FR32X:$src2),
4137 !strconcat(OpcodeStr,
4138 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4139 [], itins_s.rr>, XS, EVEX_4V;
4140 let isCodeGenOnly = 1 in
4141 def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4142 (ins VR128X:$src1, VR128X:$src2),
4143 !strconcat(OpcodeStr,
4144 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4146 (F32Int VR128X:$src1, VR128X:$src2))],
4147 itins_s.rr>, XS, EVEX_4V;
4148 let mayLoad = 1 in {
4149 def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
4150 (ins FR32X:$src1, f32mem:$src2),
4151 !strconcat(OpcodeStr,
4152 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4153 [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
4154 let isCodeGenOnly = 1 in
4155 def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4156 (ins VR128X:$src1, ssmem:$src2),
4157 !strconcat(OpcodeStr,
4158 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4160 (F32Int VR128X:$src1, sse_load_f32:$src2))],
4161 itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
4163 def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
4164 (ins FR64X:$src1, FR64X:$src2),
4165 !strconcat(OpcodeStr,
4166 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
4168 let isCodeGenOnly = 1 in
4169 def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
4170 (ins VR128X:$src1, VR128X:$src2),
4171 !strconcat(OpcodeStr,
4172 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4174 (F64Int VR128X:$src1, VR128X:$src2))],
4175 itins_s.rr>, XD, EVEX_4V, VEX_W;
4176 let mayLoad = 1 in {
4177 def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
4178 (ins FR64X:$src1, f64mem:$src2),
4179 !strconcat(OpcodeStr,
4180 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
4181 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
4182 let isCodeGenOnly = 1 in
4183 def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
4184 (ins VR128X:$src1, sdmem:$src2),
4185 !strconcat(OpcodeStr,
4186 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4188 (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
4189 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
4194 defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
4195 int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
4196 SSE_SQRTSS, SSE_SQRTSD>,
4197 avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
4198 SSE_SQRTPS, SSE_SQRTPD>;
4200 let Predicates = [HasAVX512] in {
4201 def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
4202 (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
4203 (VSQRTPSZrr VR512:$src1)>;
4204 def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
4205 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
4206 (VSQRTPDZrr VR512:$src1)>;
4208 def : Pat<(f32 (fsqrt FR32X:$src)),
4209 (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4210 def : Pat<(f32 (fsqrt (load addr:$src))),
4211 (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
4212 Requires<[OptForSize]>;
4213 def : Pat<(f64 (fsqrt FR64X:$src)),
4214 (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
4215 def : Pat<(f64 (fsqrt (load addr:$src))),
4216 (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
4217 Requires<[OptForSize]>;
4219 def : Pat<(f32 (X86frsqrt FR32X:$src)),
4220 (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4221 def : Pat<(f32 (X86frsqrt (load addr:$src))),
4222 (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
4223 Requires<[OptForSize]>;
4225 def : Pat<(f32 (X86frcp FR32X:$src)),
4226 (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
4227 def : Pat<(f32 (X86frcp (load addr:$src))),
4228 (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
4229 Requires<[OptForSize]>;
4231 def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
4232 (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
4233 (COPY_TO_REGCLASS VR128X:$src, FR32)),
4235 def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
4236 (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
4238 def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
4239 (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
4240 (COPY_TO_REGCLASS VR128X:$src, FR64)),
4242 def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
4243 (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
4247 multiclass avx512_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
4248 X86MemOperand x86memop, RegisterClass RC,
4249 PatFrag mem_frag32, PatFrag mem_frag64,
4250 Intrinsic V4F32Int, Intrinsic V2F64Int,
4252 let ExeDomain = SSEPackedSingle in {
4253 // Intrinsic operation, reg.
4254 // Vector intrinsic operation, reg
4255 def PSr : AVX512AIi8<opcps, MRMSrcReg,
4256 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4257 !strconcat(OpcodeStr,
4258 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4259 [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>;
4261 // Vector intrinsic operation, mem
4262 def PSm : AVX512AIi8<opcps, MRMSrcMem,
4263 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4264 !strconcat(OpcodeStr,
4265 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4267 (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
4268 EVEX_CD8<32, VForm>;
4269 } // ExeDomain = SSEPackedSingle
4271 let ExeDomain = SSEPackedDouble in {
4272 // Vector intrinsic operation, reg
4273 def PDr : AVX512AIi8<opcpd, MRMSrcReg,
4274 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4275 !strconcat(OpcodeStr,
4276 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4277 [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>;
4279 // Vector intrinsic operation, mem
4280 def PDm : AVX512AIi8<opcpd, MRMSrcMem,
4281 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4282 !strconcat(OpcodeStr,
4283 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4285 (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
4286 EVEX_CD8<64, VForm>;
4287 } // ExeDomain = SSEPackedDouble
4290 multiclass avx512_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
4294 let ExeDomain = GenericDomain in {
4296 let hasSideEffects = 0 in
4297 def SSr : AVX512AIi8<opcss, MRMSrcReg,
4298 (outs FR32X:$dst), (ins FR32X:$src1, FR32X:$src2, i32i8imm:$src3),
4299 !strconcat(OpcodeStr,
4300 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4303 // Intrinsic operation, reg.
4304 let isCodeGenOnly = 1 in
4305 def SSr_Int : AVX512AIi8<opcss, MRMSrcReg,
4306 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4307 !strconcat(OpcodeStr,
4308 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4309 [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2, imm:$src3))]>;
4311 // Intrinsic operation, mem.
4312 def SSm : AVX512AIi8<opcss, MRMSrcMem, (outs VR128X:$dst),
4313 (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
4314 !strconcat(OpcodeStr,
4315 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4316 [(set VR128X:$dst, (F32Int VR128X:$src1,
4317 sse_load_f32:$src2, imm:$src3))]>,
4318 EVEX_CD8<32, CD8VT1>;
4321 let hasSideEffects = 0 in
4322 def SDr : AVX512AIi8<opcsd, MRMSrcReg,
4323 (outs FR64X:$dst), (ins FR64X:$src1, FR64X:$src2, i32i8imm:$src3),
4324 !strconcat(OpcodeStr,
4325 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4328 // Intrinsic operation, reg.
4329 let isCodeGenOnly = 1 in
4330 def SDr_Int : AVX512AIi8<opcsd, MRMSrcReg,
4331 (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
4332 !strconcat(OpcodeStr,
4333 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4334 [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2, imm:$src3))]>,
4337 // Intrinsic operation, mem.
4338 def SDm : AVX512AIi8<opcsd, MRMSrcMem,
4339 (outs VR128X:$dst), (ins VR128X:$src1, sdmem:$src2, i32i8imm:$src3),
4340 !strconcat(OpcodeStr,
4341 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4343 (F64Int VR128X:$src1, sse_load_f64:$src2, imm:$src3))]>,
4344 VEX_W, EVEX_CD8<64, CD8VT1>;
4345 } // ExeDomain = GenericDomain
4348 multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
4349 X86MemOperand x86memop, RegisterClass RC,
4350 PatFrag mem_frag, Domain d> {
4351 let ExeDomain = d in {
4352 // Intrinsic operation, reg.
4353 // Vector intrinsic operation, reg
4354 def r : AVX512AIi8<opc, MRMSrcReg,
4355 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
4356 !strconcat(OpcodeStr,
4357 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4360 // Vector intrinsic operation, mem
4361 def m : AVX512AIi8<opc, MRMSrcMem,
4362 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
4363 !strconcat(OpcodeStr,
4364 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4370 defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
4371 memopv16f32, SSEPackedSingle>, EVEX_V512,
4372 EVEX_CD8<32, CD8VF>;
4374 def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
4375 imm:$src2, (v16f32 VR512:$src1), (i16 -1),
4377 (VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
4380 defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
4381 memopv8f64, SSEPackedDouble>, EVEX_V512,
4382 VEX_W, EVEX_CD8<64, CD8VF>;
4384 def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
4385 imm:$src2, (v8f64 VR512:$src1), (i8 -1),
4387 (VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
4389 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
4390 Operand x86memop, RegisterClass RC, Domain d> {
4391 let ExeDomain = d in {
4392 def r : AVX512AIi8<opc, MRMSrcReg,
4393 (outs RC:$dst), (ins RC:$src1, RC:$src2, i32i8imm:$src3),
4394 !strconcat(OpcodeStr,
4395 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4398 def m : AVX512AIi8<opc, MRMSrcMem,
4399 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
4400 !strconcat(OpcodeStr,
4401 " \t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4406 defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X,
4407 SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
4409 defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X,
4410 SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
4412 def : Pat<(ffloor FR32X:$src),
4413 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
4414 def : Pat<(f64 (ffloor FR64X:$src)),
4415 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
4416 def : Pat<(f32 (fnearbyint FR32X:$src)),
4417 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
4418 def : Pat<(f64 (fnearbyint FR64X:$src)),
4419 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
4420 def : Pat<(f32 (fceil FR32X:$src)),
4421 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
4422 def : Pat<(f64 (fceil FR64X:$src)),
4423 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
4424 def : Pat<(f32 (frint FR32X:$src)),
4425 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
4426 def : Pat<(f64 (frint FR64X:$src)),
4427 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
4428 def : Pat<(f32 (ftrunc FR32X:$src)),
4429 (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
4430 def : Pat<(f64 (ftrunc FR64X:$src)),
4431 (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
4433 def : Pat<(v16f32 (ffloor VR512:$src)),
4434 (VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
4435 def : Pat<(v16f32 (fnearbyint VR512:$src)),
4436 (VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
4437 def : Pat<(v16f32 (fceil VR512:$src)),
4438 (VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
4439 def : Pat<(v16f32 (frint VR512:$src)),
4440 (VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
4441 def : Pat<(v16f32 (ftrunc VR512:$src)),
4442 (VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
4444 def : Pat<(v8f64 (ffloor VR512:$src)),
4445 (VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
4446 def : Pat<(v8f64 (fnearbyint VR512:$src)),
4447 (VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
4448 def : Pat<(v8f64 (fceil VR512:$src)),
4449 (VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
4450 def : Pat<(v8f64 (frint VR512:$src)),
4451 (VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
4452 def : Pat<(v8f64 (ftrunc VR512:$src)),
4453 (VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
4455 //-------------------------------------------------
4456 // Integer truncate and extend operations
4457 //-------------------------------------------------
4459 multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
4460 RegisterClass dstRC, RegisterClass srcRC,
4461 RegisterClass KRC, X86MemOperand x86memop> {
4462 def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4464 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
4467 def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4468 (ins KRC:$mask, srcRC:$src),
4469 !strconcat(OpcodeStr,
4470 " \t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
4473 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
4474 (ins KRC:$mask, srcRC:$src),
4475 !strconcat(OpcodeStr,
4476 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
4479 def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
4480 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4483 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
4484 (ins x86memop:$dst, KRC:$mask, srcRC:$src),
4485 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
4489 defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
4490 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4491 defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
4492 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4493 defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
4494 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
4495 defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
4496 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4497 defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
4498 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4499 defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
4500 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
4501 defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
4502 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4503 defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
4504 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4505 defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
4506 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
4507 defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
4508 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4509 defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
4510 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4511 defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
4512 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
4513 defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
4514 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4515 defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
4516 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4517 defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
4518 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
4520 def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
4521 def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
4522 def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
4523 def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
4524 def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
4526 def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
4527 (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>;
4528 def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
4529 (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>;
4530 def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
4531 (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>;
4532 def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
4533 (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>;
4536 multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4537 RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode,
4538 PatFrag mem_frag, X86MemOperand x86memop,
4539 ValueType OpVT, ValueType InVT> {
4541 def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4543 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4544 [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
4546 def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4547 (ins KRC:$mask, SrcRC:$src),
4548 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4551 def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
4552 (ins KRC:$mask, SrcRC:$src),
4553 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4556 let mayLoad = 1 in {
4557 def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4558 (ins x86memop:$src),
4559 !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
4561 (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
4564 def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4565 (ins KRC:$mask, x86memop:$src),
4566 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
4570 def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
4571 (ins KRC:$mask, x86memop:$src),
4572 !strconcat(OpcodeStr," \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4578 defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext,
4579 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4581 defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext,
4582 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4584 defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext,
4585 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4586 EVEX_CD8<16, CD8VH>;
4587 defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext,
4588 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4589 EVEX_CD8<16, CD8VQ>;
4590 defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext,
4591 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4592 EVEX_CD8<32, CD8VH>;
4594 defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext,
4595 memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
4597 defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext,
4598 memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
4600 defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext,
4601 memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
4602 EVEX_CD8<16, CD8VH>;
4603 defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext,
4604 memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
4605 EVEX_CD8<16, CD8VQ>;
4606 defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext,
4607 memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
4608 EVEX_CD8<32, CD8VH>;
4610 //===----------------------------------------------------------------------===//
4611 // GATHER - SCATTER Operations
4613 multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4614 RegisterClass RC, X86MemOperand memop> {
4616 Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
4617 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
4618 (ins RC:$src1, KRC:$mask, memop:$src2),
4619 !strconcat(OpcodeStr,
4620 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
4624 let ExeDomain = SSEPackedDouble in {
4625 defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
4626 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4627 defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
4628 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4631 let ExeDomain = SSEPackedSingle in {
4632 defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
4633 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4634 defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
4635 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4638 defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
4639 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4640 defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
4641 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4643 defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>,
4644 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4645 defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>,
4646 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4648 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
4649 RegisterClass RC, X86MemOperand memop> {
4650 let mayStore = 1, Constraints = "$mask = $mask_wb" in
4651 def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
4652 (ins memop:$dst, KRC:$mask, RC:$src2),
4653 !strconcat(OpcodeStr,
4654 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
4658 let ExeDomain = SSEPackedDouble in {
4659 defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
4660 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4661 defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
4662 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4665 let ExeDomain = SSEPackedSingle in {
4666 defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
4667 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4668 defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
4669 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4672 defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
4673 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4674 defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
4675 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4677 defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
4678 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4679 defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
4680 EVEX_V512, EVEX_CD8<32, CD8VT1>;
4683 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
4684 RegisterClass KRC, X86MemOperand memop> {
4685 let Predicates = [HasPFI], hasSideEffects = 1 in
4686 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
4687 !strconcat(OpcodeStr, " \t{$src {${mask}}|{${mask}}, $src}"),
4691 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
4692 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4694 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
4695 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4697 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
4698 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4700 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
4701 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4703 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
4704 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4706 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
4707 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4709 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
4710 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4712 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
4713 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4715 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
4716 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4718 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
4719 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4721 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
4722 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4724 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
4725 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4727 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
4728 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
4730 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
4731 VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
4733 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
4734 VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
4736 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
4737 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4738 //===----------------------------------------------------------------------===//
4739 // VSHUFPS - VSHUFPD Operations
4741 multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
4742 ValueType vt, string OpcodeStr, PatFrag mem_frag,
4744 def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
4745 (ins RC:$src1, x86memop:$src2, i8imm:$src3),
4746 !strconcat(OpcodeStr,
4747 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4748 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
4749 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
4750 EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
4751 def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
4752 (ins RC:$src1, RC:$src2, i8imm:$src3),
4753 !strconcat(OpcodeStr,
4754 " \t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4755 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
4756 (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
4757 EVEX_4V, Sched<[WriteShuffle]>;
4760 defm VSHUFPSZ : avx512_shufp<VR512, f512mem, v16f32, "vshufps", memopv16f32,
4761 SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
4762 defm VSHUFPDZ : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", memopv8f64,
4763 SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4765 def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4766 (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4767 def : Pat<(v16i32 (X86Shufp VR512:$src1,
4768 (memopv16i32 addr:$src2), (i8 imm:$imm))),
4769 (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
4771 def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
4772 (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
4773 def : Pat<(v8i64 (X86Shufp VR512:$src1,
4774 (memopv8i64 addr:$src2), (i8 imm:$imm))),
4775 (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
4777 multiclass avx512_valign<X86VectorVTInfo _> {
4778 defm rri : AVX512_masking<0x03, MRMSrcReg, (outs _.RC:$dst),
4779 (ins _.RC:$src1, _.RC:$src2, i8imm:$src3),
4781 "$src3, $src2, $src1", "$src1, $src2, $src3",
4782 (_.VT (X86VAlign _.RC:$src2, _.RC:$src1,
4784 _.VT, _.RC, _.KRCWM>,
4785 AVX512AIi8Base, EVEX_4V;
4787 // Also match valign of packed floats.
4788 def : Pat<(_.FloatVT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
4789 (!cast<Instruction>(NAME##rri) _.RC:$src2, _.RC:$src1, imm:$imm)>;
4792 def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs _.RC:$dst),
4793 (ins _.RC:$src1, _.MemOp:$src2, i8imm:$src3),
4794 !strconcat("valign"##_.Suffix,
4795 " \t{$src3, $src2, $src1, $dst|"
4796 "$dst, $src1, $src2, $src3}"),
4799 defm VALIGND : avx512_valign<v16i32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
4800 defm VALIGNQ : avx512_valign<v8i64_info>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
4802 // Helper fragments to match sext vXi1 to vXiY.
4803 def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
4804 def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
4806 multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, ValueType OpVT,
4807 RegisterClass KRC, RegisterClass RC,
4808 X86MemOperand x86memop, X86MemOperand x86scalar_mop,
4810 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4811 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4813 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4814 !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4816 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
4817 !strconcat(OpcodeStr,
4818 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4820 let mayLoad = 1 in {
4821 def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4822 (ins x86memop:$src),
4823 !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
4825 def rmk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4826 (ins KRC:$mask, x86memop:$src),
4827 !strconcat(OpcodeStr,
4828 " \t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4830 def rmkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4831 (ins KRC:$mask, x86memop:$src),
4832 !strconcat(OpcodeStr,
4833 " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
4835 def rmb : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4836 (ins x86scalar_mop:$src),
4837 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4838 ", $dst|$dst, ${src}", BrdcstStr, "}"),
4840 def rmbk : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4841 (ins KRC:$mask, x86scalar_mop:$src),
4842 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4843 ", $dst {${mask}}|$dst {${mask}}, ${src}", BrdcstStr, "}"),
4844 []>, EVEX, EVEX_B, EVEX_K;
4845 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
4846 (ins KRC:$mask, x86scalar_mop:$src),
4847 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4848 ", $dst {${mask}} {z}|$dst {${mask}} {z}, ${src}",
4850 []>, EVEX, EVEX_B, EVEX_KZ;
4854 defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512,
4855 i512mem, i32mem, "{1to16}">, EVEX_V512,
4856 EVEX_CD8<32, CD8VF>;
4857 defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512,
4858 i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W,
4859 EVEX_CD8<64, CD8VF>;
4862 (bc_v16i32 (v16i1sextv16i32)),
4863 (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
4864 (VPABSDZrr VR512:$src)>;
4866 (bc_v8i64 (v8i1sextv8i64)),
4867 (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
4868 (VPABSQZrr VR512:$src)>;
4870 def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src),
4871 (v16i32 immAllZerosV), (i16 -1))),
4872 (VPABSDZrr VR512:$src)>;
4873 def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src),
4874 (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
4875 (VPABSQZrr VR512:$src)>;
4877 multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
4878 RegisterClass RC, RegisterClass KRC,
4879 X86MemOperand x86memop,
4880 X86MemOperand x86scalar_mop, string BrdcstStr> {
4881 def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4883 !strconcat(OpcodeStr, " \t{$src, ${dst} |${dst}, $src}"),
4885 def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4886 (ins x86memop:$src),
4887 !strconcat(OpcodeStr, " \t{$src, ${dst}|${dst}, $src}"),
4889 def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4890 (ins x86scalar_mop:$src),
4891 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4892 ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
4894 def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4895 (ins KRC:$mask, RC:$src),
4896 !strconcat(OpcodeStr,
4897 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
4899 def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4900 (ins KRC:$mask, x86memop:$src),
4901 !strconcat(OpcodeStr,
4902 " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
4904 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4905 (ins KRC:$mask, x86scalar_mop:$src),
4906 !strconcat(OpcodeStr, " \t{${src}", BrdcstStr,
4907 ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
4909 []>, EVEX, EVEX_KZ, EVEX_B;
4911 let Constraints = "$src1 = $dst" in {
4912 def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
4913 (ins RC:$src1, KRC:$mask, RC:$src2),
4914 !strconcat(OpcodeStr,
4915 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
4917 def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4918 (ins RC:$src1, KRC:$mask, x86memop:$src2),
4919 !strconcat(OpcodeStr,
4920 " \t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
4922 def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
4923 (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
4924 !strconcat(OpcodeStr, " \t{${src2}", BrdcstStr,
4925 ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
4926 []>, EVEX, EVEX_K, EVEX_B;
4930 let Predicates = [HasCDI] in {
4931 defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
4932 i512mem, i32mem, "{1to16}">,
4933 EVEX_V512, EVEX_CD8<32, CD8VF>;
4936 defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
4937 i512mem, i64mem, "{1to8}">,
4938 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
4942 def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1,
4944 (VPCONFLICTDrrk VR512:$src1,
4945 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
4947 def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
4949 (VPCONFLICTQrrk VR512:$src1,
4950 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
4952 let Predicates = [HasCDI] in {
4953 defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
4954 i512mem, i32mem, "{1to16}">,
4955 EVEX_V512, EVEX_CD8<32, CD8VF>;
4958 defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
4959 i512mem, i64mem, "{1to8}">,
4960 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
4964 def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
4966 (VPLZCNTDrrk VR512:$src1,
4967 (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
4969 def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
4971 (VPLZCNTQrrk VR512:$src1,
4972 (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
4974 def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))),
4975 (VPLZCNTDrm addr:$src)>;
4976 def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
4977 (VPLZCNTDrr VR512:$src)>;
4978 def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))),
4979 (VPLZCNTQrm addr:$src)>;
4980 def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
4981 (VPLZCNTQrr VR512:$src)>;
4983 def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
4984 def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
4985 def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
4987 def : Pat<(store VK1:$src, addr:$dst),
4988 (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK16))>;
4990 def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
4991 (truncstore node:$val, node:$ptr), [{
4992 return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
4995 def : Pat<(truncstorei1 GR8:$src, addr:$dst),
4996 (MOV8mr addr:$dst, GR8:$src)>;