1 // Bitcasts between 512-bit vector types. Return the original type since
2 // no instruction is needed for the conversion
3 let Predicates = [HasAVX512] in {
4 def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
5 def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
6 def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
7 def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
8 def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
9 def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
10 def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
11 def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
12 def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
13 def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
14 def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
15 def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
16 def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
18 def : Pat<(v2i64 (bitconvert (v4i32 VR128X:$src))), (v2i64 VR128X:$src)>;
19 def : Pat<(v2i64 (bitconvert (v8i16 VR128X:$src))), (v2i64 VR128X:$src)>;
20 def : Pat<(v2i64 (bitconvert (v16i8 VR128X:$src))), (v2i64 VR128X:$src)>;
21 def : Pat<(v2i64 (bitconvert (v2f64 VR128X:$src))), (v2i64 VR128X:$src)>;
22 def : Pat<(v2i64 (bitconvert (v4f32 VR128X:$src))), (v2i64 VR128X:$src)>;
23 def : Pat<(v4i32 (bitconvert (v2i64 VR128X:$src))), (v4i32 VR128X:$src)>;
24 def : Pat<(v4i32 (bitconvert (v8i16 VR128X:$src))), (v4i32 VR128X:$src)>;
25 def : Pat<(v4i32 (bitconvert (v16i8 VR128X:$src))), (v4i32 VR128X:$src)>;
26 def : Pat<(v4i32 (bitconvert (v2f64 VR128X:$src))), (v4i32 VR128X:$src)>;
27 def : Pat<(v4i32 (bitconvert (v4f32 VR128X:$src))), (v4i32 VR128X:$src)>;
28 def : Pat<(v8i16 (bitconvert (v2i64 VR128X:$src))), (v8i16 VR128X:$src)>;
29 def : Pat<(v8i16 (bitconvert (v4i32 VR128X:$src))), (v8i16 VR128X:$src)>;
30 def : Pat<(v8i16 (bitconvert (v16i8 VR128X:$src))), (v8i16 VR128X:$src)>;
31 def : Pat<(v8i16 (bitconvert (v2f64 VR128X:$src))), (v8i16 VR128X:$src)>;
32 def : Pat<(v8i16 (bitconvert (v4f32 VR128X:$src))), (v8i16 VR128X:$src)>;
33 def : Pat<(v16i8 (bitconvert (v2i64 VR128X:$src))), (v16i8 VR128X:$src)>;
34 def : Pat<(v16i8 (bitconvert (v4i32 VR128X:$src))), (v16i8 VR128X:$src)>;
35 def : Pat<(v16i8 (bitconvert (v8i16 VR128X:$src))), (v16i8 VR128X:$src)>;
36 def : Pat<(v16i8 (bitconvert (v2f64 VR128X:$src))), (v16i8 VR128X:$src)>;
37 def : Pat<(v16i8 (bitconvert (v4f32 VR128X:$src))), (v16i8 VR128X:$src)>;
38 def : Pat<(v4f32 (bitconvert (v2i64 VR128X:$src))), (v4f32 VR128X:$src)>;
39 def : Pat<(v4f32 (bitconvert (v4i32 VR128X:$src))), (v4f32 VR128X:$src)>;
40 def : Pat<(v4f32 (bitconvert (v8i16 VR128X:$src))), (v4f32 VR128X:$src)>;
41 def : Pat<(v4f32 (bitconvert (v16i8 VR128X:$src))), (v4f32 VR128X:$src)>;
42 def : Pat<(v4f32 (bitconvert (v2f64 VR128X:$src))), (v4f32 VR128X:$src)>;
43 def : Pat<(v2f64 (bitconvert (v2i64 VR128X:$src))), (v2f64 VR128X:$src)>;
44 def : Pat<(v2f64 (bitconvert (v4i32 VR128X:$src))), (v2f64 VR128X:$src)>;
45 def : Pat<(v2f64 (bitconvert (v8i16 VR128X:$src))), (v2f64 VR128X:$src)>;
46 def : Pat<(v2f64 (bitconvert (v16i8 VR128X:$src))), (v2f64 VR128X:$src)>;
47 def : Pat<(v2f64 (bitconvert (v4f32 VR128X:$src))), (v2f64 VR128X:$src)>;
49 // Bitcasts between 256-bit vector types. Return the original type since
50 // no instruction is needed for the conversion
51 def : Pat<(v4f64 (bitconvert (v8f32 VR256X:$src))), (v4f64 VR256X:$src)>;
52 def : Pat<(v4f64 (bitconvert (v8i32 VR256X:$src))), (v4f64 VR256X:$src)>;
53 def : Pat<(v4f64 (bitconvert (v4i64 VR256X:$src))), (v4f64 VR256X:$src)>;
54 def : Pat<(v4f64 (bitconvert (v16i16 VR256X:$src))), (v4f64 VR256X:$src)>;
55 def : Pat<(v4f64 (bitconvert (v32i8 VR256X:$src))), (v4f64 VR256X:$src)>;
56 def : Pat<(v8f32 (bitconvert (v8i32 VR256X:$src))), (v8f32 VR256X:$src)>;
57 def : Pat<(v8f32 (bitconvert (v4i64 VR256X:$src))), (v8f32 VR256X:$src)>;
58 def : Pat<(v8f32 (bitconvert (v4f64 VR256X:$src))), (v8f32 VR256X:$src)>;
59 def : Pat<(v8f32 (bitconvert (v32i8 VR256X:$src))), (v8f32 VR256X:$src)>;
60 def : Pat<(v8f32 (bitconvert (v16i16 VR256X:$src))), (v8f32 VR256X:$src)>;
61 def : Pat<(v4i64 (bitconvert (v8f32 VR256X:$src))), (v4i64 VR256X:$src)>;
62 def : Pat<(v4i64 (bitconvert (v8i32 VR256X:$src))), (v4i64 VR256X:$src)>;
63 def : Pat<(v4i64 (bitconvert (v4f64 VR256X:$src))), (v4i64 VR256X:$src)>;
64 def : Pat<(v4i64 (bitconvert (v32i8 VR256X:$src))), (v4i64 VR256X:$src)>;
65 def : Pat<(v4i64 (bitconvert (v16i16 VR256X:$src))), (v4i64 VR256X:$src)>;
66 def : Pat<(v32i8 (bitconvert (v4f64 VR256X:$src))), (v32i8 VR256X:$src)>;
67 def : Pat<(v32i8 (bitconvert (v4i64 VR256X:$src))), (v32i8 VR256X:$src)>;
68 def : Pat<(v32i8 (bitconvert (v8f32 VR256X:$src))), (v32i8 VR256X:$src)>;
69 def : Pat<(v32i8 (bitconvert (v8i32 VR256X:$src))), (v32i8 VR256X:$src)>;
70 def : Pat<(v32i8 (bitconvert (v16i16 VR256X:$src))), (v32i8 VR256X:$src)>;
71 def : Pat<(v8i32 (bitconvert (v32i8 VR256X:$src))), (v8i32 VR256X:$src)>;
72 def : Pat<(v8i32 (bitconvert (v16i16 VR256X:$src))), (v8i32 VR256X:$src)>;
73 def : Pat<(v8i32 (bitconvert (v8f32 VR256X:$src))), (v8i32 VR256X:$src)>;
74 def : Pat<(v8i32 (bitconvert (v4i64 VR256X:$src))), (v8i32 VR256X:$src)>;
75 def : Pat<(v8i32 (bitconvert (v4f64 VR256X:$src))), (v8i32 VR256X:$src)>;
76 def : Pat<(v16i16 (bitconvert (v8f32 VR256X:$src))), (v16i16 VR256X:$src)>;
77 def : Pat<(v16i16 (bitconvert (v8i32 VR256X:$src))), (v16i16 VR256X:$src)>;
78 def : Pat<(v16i16 (bitconvert (v4i64 VR256X:$src))), (v16i16 VR256X:$src)>;
79 def : Pat<(v16i16 (bitconvert (v4f64 VR256X:$src))), (v16i16 VR256X:$src)>;
80 def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>;
83 //===----------------------------------------------------------------------===//
84 // AVX-512 - VECTOR INSERT
87 let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
88 def VINSERTF32x4rr : AVX512AIi8<0x18, MRMSrcReg, (outs VR512:$dst),
89 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
90 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
91 []>, EVEX_4V, EVEX_V512;
93 def VINSERTF32x4rm : AVX512AIi8<0x18, MRMSrcMem, (outs VR512:$dst),
94 (ins VR512:$src1, f128mem:$src2, i8imm:$src3),
95 "vinsertf32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
96 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
100 let neverHasSideEffects = 1, ExeDomain = SSEPackedDouble in {
101 def VINSERTF64x4rr : AVX512AIi8<0x1a, MRMSrcReg, (outs VR512:$dst),
102 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
103 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
104 []>, EVEX_4V, EVEX_V512, VEX_W;
106 def VINSERTF64x4rm : AVX512AIi8<0x1a, MRMSrcMem, (outs VR512:$dst),
107 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
108 "vinsertf64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
109 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
111 // -- 32x4 integer form --
112 let neverHasSideEffects = 1 in {
113 def VINSERTI32x4rr : AVX512AIi8<0x38, MRMSrcReg, (outs VR512:$dst),
114 (ins VR512:$src1, VR128X:$src2, i8imm:$src3),
115 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
116 []>, EVEX_4V, EVEX_V512;
118 def VINSERTI32x4rm : AVX512AIi8<0x38, MRMSrcMem, (outs VR512:$dst),
119 (ins VR512:$src1, i128mem:$src2, i8imm:$src3),
120 "vinserti32x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
121 []>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VT4>;
125 let neverHasSideEffects = 1 in {
127 def VINSERTI64x4rr : AVX512AIi8<0x3a, MRMSrcReg, (outs VR512:$dst),
128 (ins VR512:$src1, VR256X:$src2, i8imm:$src3),
129 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
130 []>, EVEX_4V, EVEX_V512, VEX_W;
132 def VINSERTI64x4rm : AVX512AIi8<0x3a, MRMSrcMem, (outs VR512:$dst),
133 (ins VR512:$src1, i256mem:$src2, i8imm:$src3),
134 "vinserti64x4\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
135 []>, EVEX_4V, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
138 def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (v4f32 VR128X:$src2),
139 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
140 (INSERT_get_vinsert128_imm VR512:$ins))>;
141 def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (v2f64 VR128X:$src2),
142 (iPTR imm)), (VINSERTF32x4rr VR512:$src1, VR128X:$src2,
143 (INSERT_get_vinsert128_imm VR512:$ins))>;
144 def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v2i64 VR128X:$src2),
145 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
146 (INSERT_get_vinsert128_imm VR512:$ins))>;
147 def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v4i32 VR128X:$src2),
148 (iPTR imm)), (VINSERTI32x4rr VR512:$src1, VR128X:$src2,
149 (INSERT_get_vinsert128_imm VR512:$ins))>;
151 def : Pat<(vinsert128_insert:$ins (v16f32 VR512:$src1), (loadv4f32 addr:$src2),
152 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
153 (INSERT_get_vinsert128_imm VR512:$ins))>;
154 def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1),
155 (bc_v4i32 (loadv2i64 addr:$src2)),
156 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
157 (INSERT_get_vinsert128_imm VR512:$ins))>;
158 def : Pat<(vinsert128_insert:$ins (v8f64 VR512:$src1), (loadv2f64 addr:$src2),
159 (iPTR imm)), (VINSERTF32x4rm VR512:$src1, addr:$src2,
160 (INSERT_get_vinsert128_imm VR512:$ins))>;
161 def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (loadv2i64 addr:$src2),
162 (iPTR imm)), (VINSERTI32x4rm VR512:$src1, addr:$src2,
163 (INSERT_get_vinsert128_imm VR512:$ins))>;
165 def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (v8f32 VR256X:$src2),
166 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
167 (INSERT_get_vinsert256_imm VR512:$ins))>;
168 def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (v4f64 VR256X:$src2),
169 (iPTR imm)), (VINSERTF64x4rr VR512:$src1, VR256X:$src2,
170 (INSERT_get_vinsert256_imm VR512:$ins))>;
171 def : Pat<(vinsert128_insert:$ins (v8i64 VR512:$src1), (v4i64 VR256X:$src2),
172 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
173 (INSERT_get_vinsert256_imm VR512:$ins))>;
174 def : Pat<(vinsert128_insert:$ins (v16i32 VR512:$src1), (v8i32 VR256X:$src2),
175 (iPTR imm)), (VINSERTI64x4rr VR512:$src1, VR256X:$src2,
176 (INSERT_get_vinsert256_imm VR512:$ins))>;
178 def : Pat<(vinsert256_insert:$ins (v16f32 VR512:$src1), (loadv8f32 addr:$src2),
179 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
180 (INSERT_get_vinsert256_imm VR512:$ins))>;
181 def : Pat<(vinsert256_insert:$ins (v8f64 VR512:$src1), (loadv4f64 addr:$src2),
182 (iPTR imm)), (VINSERTF64x4rm VR512:$src1, addr:$src2,
183 (INSERT_get_vinsert256_imm VR512:$ins))>;
184 def : Pat<(vinsert256_insert:$ins (v8i64 VR512:$src1), (loadv4i64 addr:$src2),
185 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
186 (INSERT_get_vinsert256_imm VR512:$ins))>;
187 def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
188 (bc_v8i32 (loadv4i64 addr:$src2)),
189 (iPTR imm)), (VINSERTI64x4rm VR512:$src1, addr:$src2,
190 (INSERT_get_vinsert256_imm VR512:$ins))>;
192 // vinsertps - insert f32 to XMM
193 def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
194 (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3),
195 "vinsertps{z}\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
196 [(set VR128X:$dst, (X86insrtps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
198 def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
199 (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3),
200 "vinsertps{z}\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
201 [(set VR128X:$dst, (X86insrtps VR128X:$src1,
202 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
203 imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
205 //===----------------------------------------------------------------------===//
206 // AVX-512 VECTOR EXTRACT
208 let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
210 def VEXTRACTF32x4rr : AVX512AIi8<0x19, MRMDestReg, (outs VR128X:$dst),
211 (ins VR512:$src1, i8imm:$src2),
212 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
213 []>, EVEX, EVEX_V512;
214 def VEXTRACTF32x4mr : AVX512AIi8<0x19, MRMDestMem, (outs),
215 (ins f128mem:$dst, VR512:$src1, i8imm:$src2),
216 "vextractf32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
217 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
220 def VEXTRACTF64x4rr : AVX512AIi8<0x1b, MRMDestReg, (outs VR256X:$dst),
221 (ins VR512:$src1, i8imm:$src2),
222 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
223 []>, EVEX, EVEX_V512, VEX_W;
225 def VEXTRACTF64x4mr : AVX512AIi8<0x1b, MRMDestMem, (outs),
226 (ins f256mem:$dst, VR512:$src1, i8imm:$src2),
227 "vextractf64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
228 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
231 let neverHasSideEffects = 1 in {
233 def VEXTRACTI32x4rr : AVX512AIi8<0x39, MRMDestReg, (outs VR128X:$dst),
234 (ins VR512:$src1, i8imm:$src2),
235 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
236 []>, EVEX, EVEX_V512;
237 def VEXTRACTI32x4mr : AVX512AIi8<0x39, MRMDestMem, (outs),
238 (ins i128mem:$dst, VR512:$src1, i8imm:$src2),
239 "vextracti32x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
240 []>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VT4>;
243 def VEXTRACTI64x4rr : AVX512AIi8<0x3b, MRMDestReg, (outs VR256X:$dst),
244 (ins VR512:$src1, i8imm:$src2),
245 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
246 []>, EVEX, EVEX_V512, VEX_W;
248 def VEXTRACTI64x4mr : AVX512AIi8<0x3b, MRMDestMem, (outs),
249 (ins i256mem:$dst, VR512:$src1, i8imm:$src2),
250 "vextracti64x4\t{$src2, $src1, $dst|$dst, $src1, $src2}",
251 []>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT4>;
254 def : Pat<(vextract128_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
255 (v4f32 (VEXTRACTF32x4rr VR512:$src1,
256 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
258 def : Pat<(vextract128_extract:$ext VR512:$src1, (iPTR imm)),
259 (v4i32 (VEXTRACTF32x4rr VR512:$src1,
260 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
262 def : Pat<(vextract128_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
263 (v2f64 (VEXTRACTF32x4rr VR512:$src1,
264 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
266 def : Pat<(vextract128_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
267 (v2i64 (VEXTRACTI32x4rr VR512:$src1,
268 (EXTRACT_get_vextract128_imm VR128X:$ext)))>;
271 def : Pat<(vextract256_extract:$ext (v16f32 VR512:$src1), (iPTR imm)),
272 (v8f32 (VEXTRACTF64x4rr VR512:$src1,
273 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
275 def : Pat<(vextract256_extract:$ext (v16i32 VR512:$src1), (iPTR imm)),
276 (v8i32 (VEXTRACTI64x4rr VR512:$src1,
277 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
279 def : Pat<(vextract256_extract:$ext (v8f64 VR512:$src1), (iPTR imm)),
280 (v4f64 (VEXTRACTF64x4rr VR512:$src1,
281 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
283 def : Pat<(vextract256_extract:$ext (v8i64 VR512:$src1), (iPTR imm)),
284 (v4i64 (VEXTRACTI64x4rr VR512:$src1,
285 (EXTRACT_get_vextract256_imm VR256X:$ext)))>;
287 // A 256-bit subvector extract from the first 512-bit vector position
288 // is a subregister copy that needs no instruction.
289 def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
290 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>;
291 def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
292 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>;
293 def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
294 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>;
295 def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
296 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>;
299 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
300 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
301 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
302 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
303 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
304 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
305 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
306 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
309 // A 128-bit subvector insert to the first 512-bit vector position
310 // is a subregister copy that needs no instruction.
311 def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
312 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
313 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
315 def : Pat<(insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0)),
316 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
317 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
319 def : Pat<(insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0)),
320 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
321 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
323 def : Pat<(insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0)),
324 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
325 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
328 def : Pat<(insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0)),
329 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
330 def : Pat<(insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0)),
331 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
332 def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
333 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
334 def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
335 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
337 // vextractps - extract 32 bits from XMM
338 def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
339 (ins VR128X:$src1, u32u8imm:$src2),
340 "vextractps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
341 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
344 def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
345 (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2),
346 "vextractps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
347 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),