1 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the interfaces that X86 uses to lower LLVM code into a
13 //===----------------------------------------------------------------------===//
15 #include "X86ISelLowering.h"
16 #include "Utils/X86ShuffleDecode.h"
17 #include "X86CallingConv.h"
18 #include "X86FrameLowering.h"
19 #include "X86InstrBuilder.h"
20 #include "X86MachineFunctionInfo.h"
21 #include "X86ShuffleDecodeConstantPool.h"
22 #include "X86TargetMachine.h"
23 #include "X86TargetObjectFile.h"
24 #include "llvm/ADT/SmallBitVector.h"
25 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/ADT/StringExtras.h"
28 #include "llvm/ADT/StringSwitch.h"
29 #include "llvm/Analysis/EHPersonalities.h"
30 #include "llvm/CodeGen/IntrinsicLowering.h"
31 #include "llvm/CodeGen/MachineFrameInfo.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineInstrBuilder.h"
34 #include "llvm/CodeGen/MachineJumpTableInfo.h"
35 #include "llvm/CodeGen/MachineModuleInfo.h"
36 #include "llvm/CodeGen/MachineRegisterInfo.h"
37 #include "llvm/CodeGen/WinEHFuncInfo.h"
38 #include "llvm/IR/CallSite.h"
39 #include "llvm/IR/CallingConv.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DerivedTypes.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/GlobalAlias.h"
44 #include "llvm/IR/GlobalVariable.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/IR/Intrinsics.h"
47 #include "llvm/MC/MCAsmInfo.h"
48 #include "llvm/MC/MCContext.h"
49 #include "llvm/MC/MCExpr.h"
50 #include "llvm/MC/MCSymbol.h"
51 #include "llvm/Support/CommandLine.h"
52 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/ErrorHandling.h"
54 #include "llvm/Support/MathExtras.h"
55 #include "llvm/Target/TargetOptions.h"
56 #include "X86IntrinsicsInfo.h"
62 #define DEBUG_TYPE "x86-isel"
64 STATISTIC(NumTailCalls, "Number of tail calls");
66 static cl::opt<bool> ExperimentalVectorWideningLegalization(
67 "x86-experimental-vector-widening-legalization", cl::init(false),
68 cl::desc("Enable an experimental vector type legalization through widening "
69 "rather than promotion."),
72 X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
73 const X86Subtarget &STI)
74 : TargetLowering(TM), Subtarget(&STI) {
75 X86ScalarSSEf64 = Subtarget->hasSSE2();
76 X86ScalarSSEf32 = Subtarget->hasSSE1();
77 MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
79 // Set up the TargetLowering object.
81 // X86 is weird. It always uses i8 for shift amounts and setcc results.
82 setBooleanContents(ZeroOrOneBooleanContent);
83 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
84 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
86 // For 64-bit, since we have so many registers, use the ILP scheduler.
87 // For 32-bit, use the register pressure specific scheduling.
88 // For Atom, always use ILP scheduling.
89 if (Subtarget->isAtom())
90 setSchedulingPreference(Sched::ILP);
91 else if (Subtarget->is64Bit())
92 setSchedulingPreference(Sched::ILP);
94 setSchedulingPreference(Sched::RegPressure);
95 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
96 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
98 // Bypass expensive divides on Atom when compiling with O2.
99 if (TM.getOptLevel() >= CodeGenOpt::Default) {
100 if (Subtarget->hasSlowDivide32())
101 addBypassSlowDiv(32, 8);
102 if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
103 addBypassSlowDiv(64, 16);
106 if (Subtarget->isTargetKnownWindowsMSVC()) {
107 // Setup Windows compiler runtime calls.
108 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
109 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
110 setLibcallName(RTLIB::SREM_I64, "_allrem");
111 setLibcallName(RTLIB::UREM_I64, "_aullrem");
112 setLibcallName(RTLIB::MUL_I64, "_allmul");
113 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
114 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
115 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
116 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
117 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
120 if (Subtarget->isTargetDarwin()) {
121 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
122 setUseUnderscoreSetJmp(false);
123 setUseUnderscoreLongJmp(false);
124 } else if (Subtarget->isTargetWindowsGNU()) {
125 // MS runtime is weird: it exports _setjmp, but longjmp!
126 setUseUnderscoreSetJmp(true);
127 setUseUnderscoreLongJmp(false);
129 setUseUnderscoreSetJmp(true);
130 setUseUnderscoreLongJmp(true);
133 // Set up the register classes.
134 addRegisterClass(MVT::i8, &X86::GR8RegClass);
135 addRegisterClass(MVT::i16, &X86::GR16RegClass);
136 addRegisterClass(MVT::i32, &X86::GR32RegClass);
137 if (Subtarget->is64Bit())
138 addRegisterClass(MVT::i64, &X86::GR64RegClass);
140 for (MVT VT : MVT::integer_valuetypes())
141 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
143 // We don't accept any truncstore of integer registers.
144 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
145 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
146 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
147 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
148 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
149 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
151 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
153 // SETOEQ and SETUNE require checking two conditions.
154 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
155 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
156 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
157 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
158 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
159 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
161 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
163 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
164 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
165 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
167 if (Subtarget->is64Bit()) {
168 if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512())
169 // f32/f64 are legal, f80 is custom.
170 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
172 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
173 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
174 } else if (!Subtarget->useSoftFloat()) {
175 // We have an algorithm for SSE2->double, and we turn this into a
176 // 64-bit FILD followed by conditional FADD for other targets.
177 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
178 // We have an algorithm for SSE2, and we turn this into a 64-bit
179 // FILD or VCVTUSI2SS/SD for other targets.
180 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
183 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
185 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
186 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
188 if (!Subtarget->useSoftFloat()) {
189 // SSE has no i16 to fp conversion, only i32
190 if (X86ScalarSSEf32) {
191 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
192 // f32 and f64 cases are Legal, f80 case is not
193 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
195 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
196 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
199 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
200 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
203 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
205 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
206 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
208 if (!Subtarget->useSoftFloat()) {
209 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
210 // are Legal, f80 is custom lowered.
211 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
212 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
214 if (X86ScalarSSEf32) {
215 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
216 // f32 and f64 cases are Legal, f80 case is not
217 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
219 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
220 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
223 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
224 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
225 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
228 // Handle FP_TO_UINT by promoting the destination to a larger signed
230 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
231 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
232 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
234 if (Subtarget->is64Bit()) {
235 if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
236 // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
237 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
238 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
240 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
241 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
243 } else if (!Subtarget->useSoftFloat()) {
244 // Since AVX is a superset of SSE3, only check for SSE here.
245 if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
246 // Expand FP_TO_UINT into a select.
247 // FIXME: We would like to use a Custom expander here eventually to do
248 // the optimal thing for SSE vs. the default expansion in the legalizer.
249 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
251 // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
252 // With SSE3 we can use fisttpll to convert to a signed i64; without
253 // SSE, we're stuck with a fistpll.
254 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
256 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
259 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
260 if (!X86ScalarSSEf64) {
261 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
262 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
263 if (Subtarget->is64Bit()) {
264 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
265 // Without SSE, i64->f64 goes through memory.
266 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
268 } else if (!Subtarget->is64Bit())
269 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
271 // Scalar integer divide and remainder are lowered to use operations that
272 // produce two results, to match the available instructions. This exposes
273 // the two-result form to trivial CSE, which is able to combine x/y and x%y
274 // into a single instruction.
276 // Scalar integer multiply-high is also lowered to use two-result
277 // operations, to match the available instructions. However, plain multiply
278 // (low) operations are left as Legal, as there are single-result
279 // instructions for this in x86. Using the two-result multiply instructions
280 // when both high and low results are needed must be arranged by dagcombine.
281 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
282 setOperationAction(ISD::MULHS, VT, Expand);
283 setOperationAction(ISD::MULHU, VT, Expand);
284 setOperationAction(ISD::SDIV, VT, Expand);
285 setOperationAction(ISD::UDIV, VT, Expand);
286 setOperationAction(ISD::SREM, VT, Expand);
287 setOperationAction(ISD::UREM, VT, Expand);
289 // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
290 setOperationAction(ISD::ADDC, VT, Custom);
291 setOperationAction(ISD::ADDE, VT, Custom);
292 setOperationAction(ISD::SUBC, VT, Custom);
293 setOperationAction(ISD::SUBE, VT, Custom);
296 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
297 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
298 setOperationAction(ISD::BR_CC , MVT::f32, Expand);
299 setOperationAction(ISD::BR_CC , MVT::f64, Expand);
300 setOperationAction(ISD::BR_CC , MVT::f80, Expand);
301 setOperationAction(ISD::BR_CC , MVT::f128, Expand);
302 setOperationAction(ISD::BR_CC , MVT::i8, Expand);
303 setOperationAction(ISD::BR_CC , MVT::i16, Expand);
304 setOperationAction(ISD::BR_CC , MVT::i32, Expand);
305 setOperationAction(ISD::BR_CC , MVT::i64, Expand);
306 setOperationAction(ISD::SELECT_CC , MVT::f32, Expand);
307 setOperationAction(ISD::SELECT_CC , MVT::f64, Expand);
308 setOperationAction(ISD::SELECT_CC , MVT::f80, Expand);
309 setOperationAction(ISD::SELECT_CC , MVT::f128, Expand);
310 setOperationAction(ISD::SELECT_CC , MVT::i8, Expand);
311 setOperationAction(ISD::SELECT_CC , MVT::i16, Expand);
312 setOperationAction(ISD::SELECT_CC , MVT::i32, Expand);
313 setOperationAction(ISD::SELECT_CC , MVT::i64, Expand);
314 if (Subtarget->is64Bit())
315 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
316 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
317 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
318 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
319 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
321 if (Subtarget->is32Bit() && Subtarget->isTargetKnownWindowsMSVC()) {
322 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
323 // is. We should promote the value to 64-bits to solve this.
324 // This is what the CRT headers do - `fmodf` is an inline header
325 // function casting to f64 and calling `fmod`.
326 setOperationAction(ISD::FREM , MVT::f32 , Promote);
328 setOperationAction(ISD::FREM , MVT::f32 , Expand);
331 setOperationAction(ISD::FREM , MVT::f64 , Expand);
332 setOperationAction(ISD::FREM , MVT::f80 , Expand);
333 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
335 // Promote the i8 variants and force them on up to i32 which has a shorter
337 setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
338 AddPromotedToType (ISD::CTTZ , MVT::i8 , MVT::i32);
339 setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Promote);
340 AddPromotedToType (ISD::CTTZ_ZERO_UNDEF , MVT::i8 , MVT::i32);
341 if (Subtarget->hasBMI()) {
342 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Expand);
343 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Expand);
344 if (Subtarget->is64Bit())
345 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
347 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
348 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
349 if (Subtarget->is64Bit())
350 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
353 if (Subtarget->hasLZCNT()) {
354 // When promoting the i8 variants, force them to i32 for a shorter
356 setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
357 AddPromotedToType (ISD::CTLZ , MVT::i8 , MVT::i32);
358 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Promote);
359 AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
360 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand);
361 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand);
362 if (Subtarget->is64Bit())
363 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
365 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
366 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
367 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
368 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
369 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
370 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
371 if (Subtarget->is64Bit()) {
372 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
373 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
377 // Special handling for half-precision floating point conversions.
378 // If we don't have F16C support, then lower half float conversions
379 // into library calls.
380 if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) {
381 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
382 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
385 // There's never any support for operations beyond MVT::f32.
386 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
387 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
388 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
389 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
391 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
392 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
393 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
394 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
395 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
396 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
398 if (Subtarget->hasPOPCNT()) {
399 setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
401 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
402 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
403 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
404 if (Subtarget->is64Bit())
405 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
408 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
410 if (!Subtarget->hasMOVBE())
411 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
413 // These should be promoted to a larger select which is supported.
414 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
415 // X86 wants to expand cmov itself.
416 setOperationAction(ISD::SELECT , MVT::i8 , Custom);
417 setOperationAction(ISD::SELECT , MVT::i16 , Custom);
418 setOperationAction(ISD::SELECT , MVT::i32 , Custom);
419 setOperationAction(ISD::SELECT , MVT::f32 , Custom);
420 setOperationAction(ISD::SELECT , MVT::f64 , Custom);
421 setOperationAction(ISD::SELECT , MVT::f80 , Custom);
422 setOperationAction(ISD::SELECT , MVT::f128 , Custom);
423 setOperationAction(ISD::SETCC , MVT::i8 , Custom);
424 setOperationAction(ISD::SETCC , MVT::i16 , Custom);
425 setOperationAction(ISD::SETCC , MVT::i32 , Custom);
426 setOperationAction(ISD::SETCC , MVT::f32 , Custom);
427 setOperationAction(ISD::SETCC , MVT::f64 , Custom);
428 setOperationAction(ISD::SETCC , MVT::f80 , Custom);
429 setOperationAction(ISD::SETCC , MVT::f128 , Custom);
430 setOperationAction(ISD::SETCCE , MVT::i8 , Custom);
431 setOperationAction(ISD::SETCCE , MVT::i16 , Custom);
432 setOperationAction(ISD::SETCCE , MVT::i32 , Custom);
433 if (Subtarget->is64Bit()) {
434 setOperationAction(ISD::SELECT , MVT::i64 , Custom);
435 setOperationAction(ISD::SETCC , MVT::i64 , Custom);
436 setOperationAction(ISD::SETCCE , MVT::i64 , Custom);
438 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
439 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
440 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
441 // support continuation, user-level threading, and etc.. As a result, no
442 // other SjLj exception interfaces are implemented and please don't build
443 // your own exception handling based on them.
444 // LLVM/Clang supports zero-cost DWARF exception handling.
445 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
446 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
449 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
450 setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
451 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
452 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
453 if (Subtarget->is64Bit())
454 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
455 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
456 setOperationAction(ISD::BlockAddress , MVT::i32 , Custom);
457 if (Subtarget->is64Bit()) {
458 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
459 setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
460 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
461 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
462 setOperationAction(ISD::BlockAddress , MVT::i64 , Custom);
464 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
465 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
466 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
467 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
468 if (Subtarget->is64Bit()) {
469 setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom);
470 setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
471 setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
474 if (Subtarget->hasSSE1())
475 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
477 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
479 // Expand certain atomics
480 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
481 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
482 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
483 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
486 if (Subtarget->hasCmpxchg16b()) {
487 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
490 // FIXME - use subtarget debug flags
491 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
492 !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
493 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
496 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
497 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
499 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
500 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
502 setOperationAction(ISD::TRAP, MVT::Other, Legal);
503 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
505 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
506 setOperationAction(ISD::VASTART , MVT::Other, Custom);
507 setOperationAction(ISD::VAEND , MVT::Other, Expand);
508 if (Subtarget->is64Bit()) {
509 setOperationAction(ISD::VAARG , MVT::Other, Custom);
510 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
512 // TargetInfo::CharPtrBuiltinVaList
513 setOperationAction(ISD::VAARG , MVT::Other, Expand);
514 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
517 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
518 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
520 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
522 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
523 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
524 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
526 if (!Subtarget->useSoftFloat() && X86ScalarSSEf64) {
527 // f32 and f64 use SSE.
528 // Set up the FP register classes.
529 addRegisterClass(MVT::f32, &X86::FR32RegClass);
530 addRegisterClass(MVT::f64, &X86::FR64RegClass);
532 // Use ANDPD to simulate FABS.
533 setOperationAction(ISD::FABS , MVT::f64, Custom);
534 setOperationAction(ISD::FABS , MVT::f32, Custom);
536 // Use XORP to simulate FNEG.
537 setOperationAction(ISD::FNEG , MVT::f64, Custom);
538 setOperationAction(ISD::FNEG , MVT::f32, Custom);
540 // Use ANDPD and ORPD to simulate FCOPYSIGN.
541 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
542 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
544 // Lower this to FGETSIGNx86 plus an AND.
545 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
546 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
548 // We don't support sin/cos/fmod
549 setOperationAction(ISD::FSIN , MVT::f64, Expand);
550 setOperationAction(ISD::FCOS , MVT::f64, Expand);
551 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
552 setOperationAction(ISD::FSIN , MVT::f32, Expand);
553 setOperationAction(ISD::FCOS , MVT::f32, Expand);
554 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
556 // Expand FP immediates into loads from the stack, except for the special
558 addLegalFPImmediate(APFloat(+0.0)); // xorpd
559 addLegalFPImmediate(APFloat(+0.0f)); // xorps
560 } else if (!Subtarget->useSoftFloat() && X86ScalarSSEf32) {
561 // Use SSE for f32, x87 for f64.
562 // Set up the FP register classes.
563 addRegisterClass(MVT::f32, &X86::FR32RegClass);
564 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
566 // Use ANDPS to simulate FABS.
567 setOperationAction(ISD::FABS , MVT::f32, Custom);
569 // Use XORP to simulate FNEG.
570 setOperationAction(ISD::FNEG , MVT::f32, Custom);
572 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
574 // Use ANDPS and ORPS to simulate FCOPYSIGN.
575 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
576 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
578 // We don't support sin/cos/fmod
579 setOperationAction(ISD::FSIN , MVT::f32, Expand);
580 setOperationAction(ISD::FCOS , MVT::f32, Expand);
581 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
583 // Special cases we handle for FP constants.
584 addLegalFPImmediate(APFloat(+0.0f)); // xorps
585 addLegalFPImmediate(APFloat(+0.0)); // FLD0
586 addLegalFPImmediate(APFloat(+1.0)); // FLD1
587 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
588 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
590 if (!TM.Options.UnsafeFPMath) {
591 setOperationAction(ISD::FSIN , MVT::f64, Expand);
592 setOperationAction(ISD::FCOS , MVT::f64, Expand);
593 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
595 } else if (!Subtarget->useSoftFloat()) {
596 // f32 and f64 in x87.
597 // Set up the FP register classes.
598 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
599 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
601 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
602 setOperationAction(ISD::UNDEF, MVT::f32, Expand);
603 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
604 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
606 if (!TM.Options.UnsafeFPMath) {
607 setOperationAction(ISD::FSIN , MVT::f64, Expand);
608 setOperationAction(ISD::FSIN , MVT::f32, Expand);
609 setOperationAction(ISD::FCOS , MVT::f64, Expand);
610 setOperationAction(ISD::FCOS , MVT::f32, Expand);
611 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
612 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
614 addLegalFPImmediate(APFloat(+0.0)); // FLD0
615 addLegalFPImmediate(APFloat(+1.0)); // FLD1
616 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
617 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
618 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
619 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
620 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
621 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
624 // We don't support FMA.
625 setOperationAction(ISD::FMA, MVT::f64, Expand);
626 setOperationAction(ISD::FMA, MVT::f32, Expand);
628 // Long double always uses X87, except f128 in MMX.
629 if (!Subtarget->useSoftFloat()) {
630 if (Subtarget->is64Bit() && Subtarget->hasMMX()) {
631 addRegisterClass(MVT::f128, &X86::FR128RegClass);
632 ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
633 setOperationAction(ISD::FABS , MVT::f128, Custom);
634 setOperationAction(ISD::FNEG , MVT::f128, Custom);
635 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
638 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
639 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
640 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
642 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
643 addLegalFPImmediate(TmpFlt); // FLD0
645 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
648 APFloat TmpFlt2(+1.0);
649 TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
651 addLegalFPImmediate(TmpFlt2); // FLD1
652 TmpFlt2.changeSign();
653 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
656 if (!TM.Options.UnsafeFPMath) {
657 setOperationAction(ISD::FSIN , MVT::f80, Expand);
658 setOperationAction(ISD::FCOS , MVT::f80, Expand);
659 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
662 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
663 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
664 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
665 setOperationAction(ISD::FRINT, MVT::f80, Expand);
666 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
667 setOperationAction(ISD::FMA, MVT::f80, Expand);
670 // Always use a library call for pow.
671 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
672 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
673 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
675 setOperationAction(ISD::FLOG, MVT::f80, Expand);
676 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
677 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
678 setOperationAction(ISD::FEXP, MVT::f80, Expand);
679 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
680 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
681 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
683 // First set operation action for all vector types to either promote
684 // (for widening) or expand (for scalarization). Then we will selectively
685 // turn on ones that can be effectively codegen'd.
686 for (MVT VT : MVT::vector_valuetypes()) {
687 setOperationAction(ISD::ADD , VT, Expand);
688 setOperationAction(ISD::SUB , VT, Expand);
689 setOperationAction(ISD::FADD, VT, Expand);
690 setOperationAction(ISD::FNEG, VT, Expand);
691 setOperationAction(ISD::FSUB, VT, Expand);
692 setOperationAction(ISD::MUL , VT, Expand);
693 setOperationAction(ISD::FMUL, VT, Expand);
694 setOperationAction(ISD::SDIV, VT, Expand);
695 setOperationAction(ISD::UDIV, VT, Expand);
696 setOperationAction(ISD::FDIV, VT, Expand);
697 setOperationAction(ISD::SREM, VT, Expand);
698 setOperationAction(ISD::UREM, VT, Expand);
699 setOperationAction(ISD::LOAD, VT, Expand);
700 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
701 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
702 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
703 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
704 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
705 setOperationAction(ISD::FABS, VT, Expand);
706 setOperationAction(ISD::FSIN, VT, Expand);
707 setOperationAction(ISD::FSINCOS, VT, Expand);
708 setOperationAction(ISD::FCOS, VT, Expand);
709 setOperationAction(ISD::FSINCOS, VT, Expand);
710 setOperationAction(ISD::FREM, VT, Expand);
711 setOperationAction(ISD::FMA, VT, Expand);
712 setOperationAction(ISD::FPOWI, VT, Expand);
713 setOperationAction(ISD::FSQRT, VT, Expand);
714 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
715 setOperationAction(ISD::FFLOOR, VT, Expand);
716 setOperationAction(ISD::FCEIL, VT, Expand);
717 setOperationAction(ISD::FTRUNC, VT, Expand);
718 setOperationAction(ISD::FRINT, VT, Expand);
719 setOperationAction(ISD::FNEARBYINT, VT, Expand);
720 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
721 setOperationAction(ISD::MULHS, VT, Expand);
722 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
723 setOperationAction(ISD::MULHU, VT, Expand);
724 setOperationAction(ISD::SDIVREM, VT, Expand);
725 setOperationAction(ISD::UDIVREM, VT, Expand);
726 setOperationAction(ISD::FPOW, VT, Expand);
727 setOperationAction(ISD::CTPOP, VT, Expand);
728 setOperationAction(ISD::CTTZ, VT, Expand);
729 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
730 setOperationAction(ISD::CTLZ, VT, Expand);
731 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
732 setOperationAction(ISD::SHL, VT, Expand);
733 setOperationAction(ISD::SRA, VT, Expand);
734 setOperationAction(ISD::SRL, VT, Expand);
735 setOperationAction(ISD::ROTL, VT, Expand);
736 setOperationAction(ISD::ROTR, VT, Expand);
737 setOperationAction(ISD::BSWAP, VT, Expand);
738 setOperationAction(ISD::SETCC, VT, Expand);
739 setOperationAction(ISD::FLOG, VT, Expand);
740 setOperationAction(ISD::FLOG2, VT, Expand);
741 setOperationAction(ISD::FLOG10, VT, Expand);
742 setOperationAction(ISD::FEXP, VT, Expand);
743 setOperationAction(ISD::FEXP2, VT, Expand);
744 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
745 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
746 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
747 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
748 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
749 setOperationAction(ISD::TRUNCATE, VT, Expand);
750 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
751 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
752 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
753 setOperationAction(ISD::VSELECT, VT, Expand);
754 setOperationAction(ISD::SELECT_CC, VT, Expand);
755 for (MVT InnerVT : MVT::vector_valuetypes()) {
756 setTruncStoreAction(InnerVT, VT, Expand);
758 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
759 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
761 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
762 // types, we have to deal with them whether we ask for Expansion or not.
763 // Setting Expand causes its own optimisation problems though, so leave
765 if (VT.getVectorElementType() == MVT::i1)
766 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
768 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
769 // split/scalarized right now.
770 if (VT.getVectorElementType() == MVT::f16)
771 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
775 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
776 // with -msoft-float, disable use of MMX as well.
777 if (!Subtarget->useSoftFloat() && Subtarget->hasMMX()) {
778 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
779 // No operations on x86mmx supported, everything uses intrinsics.
782 // MMX-sized vectors (other than x86mmx) are expected to be expanded
783 // into smaller operations.
784 for (MVT MMXTy : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64}) {
785 setOperationAction(ISD::MULHS, MMXTy, Expand);
786 setOperationAction(ISD::AND, MMXTy, Expand);
787 setOperationAction(ISD::OR, MMXTy, Expand);
788 setOperationAction(ISD::XOR, MMXTy, Expand);
789 setOperationAction(ISD::SCALAR_TO_VECTOR, MMXTy, Expand);
790 setOperationAction(ISD::SELECT, MMXTy, Expand);
791 setOperationAction(ISD::BITCAST, MMXTy, Expand);
793 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
795 if (!Subtarget->useSoftFloat() && Subtarget->hasSSE1()) {
796 addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
798 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
799 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
800 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
801 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
802 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
803 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
804 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
805 setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
806 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
807 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
808 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
809 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
810 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
811 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
814 if (!Subtarget->useSoftFloat() && Subtarget->hasSSE2()) {
815 addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
817 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
818 // registers cannot be used even for integer operations.
819 addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
820 addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
821 addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
822 addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
824 setOperationAction(ISD::ADD, MVT::v16i8, Legal);
825 setOperationAction(ISD::ADD, MVT::v8i16, Legal);
826 setOperationAction(ISD::ADD, MVT::v4i32, Legal);
827 setOperationAction(ISD::ADD, MVT::v2i64, Legal);
828 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
829 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
830 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
831 setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
832 setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
833 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
834 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
835 setOperationAction(ISD::SUB, MVT::v16i8, Legal);
836 setOperationAction(ISD::SUB, MVT::v8i16, Legal);
837 setOperationAction(ISD::SUB, MVT::v4i32, Legal);
838 setOperationAction(ISD::SUB, MVT::v2i64, Legal);
839 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
840 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
841 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
842 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
843 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
844 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
845 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
846 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
848 setOperationAction(ISD::SMAX, MVT::v8i16, Legal);
849 setOperationAction(ISD::UMAX, MVT::v16i8, Legal);
850 setOperationAction(ISD::SMIN, MVT::v8i16, Legal);
851 setOperationAction(ISD::UMIN, MVT::v16i8, Legal);
853 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
854 setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
855 setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
856 setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
858 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
859 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
860 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
861 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
862 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
864 setOperationAction(ISD::CTPOP, MVT::v16i8, Custom);
865 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
866 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
867 setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
869 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
870 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
871 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
872 // ISD::CTTZ v2i64 - scalarization is faster.
873 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
874 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
875 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
876 // ISD::CTTZ_ZERO_UNDEF v2i64 - scalarization is faster.
878 // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
879 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
880 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
881 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
882 setOperationAction(ISD::VSELECT, VT, Custom);
883 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
886 // We support custom legalizing of sext and anyext loads for specific
887 // memory vector types which we can load as a scalar (or sequence of
888 // scalars) and extend in-register to a legal 128-bit vector type. For sext
889 // loads these must work with a single scalar load.
890 for (MVT VT : MVT::integer_vector_valuetypes()) {
891 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
892 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
893 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
894 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
895 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
896 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
897 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
898 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
899 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
902 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
903 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
904 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
905 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
906 setOperationAction(ISD::VSELECT, MVT::v2f64, Custom);
907 setOperationAction(ISD::VSELECT, MVT::v2i64, Custom);
908 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
909 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
911 if (Subtarget->is64Bit()) {
912 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
913 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
916 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
917 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
918 setOperationAction(ISD::AND, VT, Promote);
919 AddPromotedToType (ISD::AND, VT, MVT::v2i64);
920 setOperationAction(ISD::OR, VT, Promote);
921 AddPromotedToType (ISD::OR, VT, MVT::v2i64);
922 setOperationAction(ISD::XOR, VT, Promote);
923 AddPromotedToType (ISD::XOR, VT, MVT::v2i64);
924 setOperationAction(ISD::LOAD, VT, Promote);
925 AddPromotedToType (ISD::LOAD, VT, MVT::v2i64);
926 setOperationAction(ISD::SELECT, VT, Promote);
927 AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
930 // Custom lower v2i64 and v2f64 selects.
931 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
932 setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
933 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
934 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
936 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
937 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
939 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
941 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
942 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
943 // As there is no 64-bit GPR available, we need build a special custom
944 // sequence to convert from v2i32 to v2f32.
945 if (!Subtarget->is64Bit())
946 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
948 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
949 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
951 for (MVT VT : MVT::fp_vector_valuetypes())
952 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
954 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
955 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
956 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
959 if (!Subtarget->useSoftFloat() && Subtarget->hasSSE41()) {
960 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
961 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
962 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
963 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
964 setOperationAction(ISD::FRINT, RoundedTy, Legal);
965 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
968 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
969 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
970 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
971 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
972 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
973 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
974 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
975 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
977 // FIXME: Do we need to handle scalar-to-vector here?
978 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
980 // We directly match byte blends in the backend as they match the VSELECT
982 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
984 // SSE41 brings specific instructions for doing vector sign extend even in
985 // cases where we don't have SRA.
986 for (MVT VT : MVT::integer_vector_valuetypes()) {
987 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
988 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
989 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
992 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
993 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
994 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
995 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
996 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
997 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
998 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
1000 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
1001 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
1002 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
1003 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
1004 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
1005 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
1007 // i8 and i16 vectors are custom because the source register and source
1008 // source memory operand types are not the same width. f32 vectors are
1009 // custom since the immediate controlling the insert encodes additional
1011 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1012 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1013 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1014 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1016 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
1017 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
1018 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
1019 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
1021 // FIXME: these should be Legal, but that's only for the case where
1022 // the index is constant. For now custom expand to deal with that.
1023 if (Subtarget->is64Bit()) {
1024 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
1025 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
1029 if (Subtarget->hasSSE2()) {
1030 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1031 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1032 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1034 setOperationAction(ISD::SRL, MVT::v8i16, Custom);
1035 setOperationAction(ISD::SRL, MVT::v16i8, Custom);
1037 setOperationAction(ISD::SHL, MVT::v8i16, Custom);
1038 setOperationAction(ISD::SHL, MVT::v16i8, Custom);
1040 setOperationAction(ISD::SRA, MVT::v8i16, Custom);
1041 setOperationAction(ISD::SRA, MVT::v16i8, Custom);
1043 // In the customized shift lowering, the legal cases in AVX2 will be
1045 setOperationAction(ISD::SRL, MVT::v2i64, Custom);
1046 setOperationAction(ISD::SRL, MVT::v4i32, Custom);
1048 setOperationAction(ISD::SHL, MVT::v2i64, Custom);
1049 setOperationAction(ISD::SHL, MVT::v4i32, Custom);
1051 setOperationAction(ISD::SRA, MVT::v2i64, Custom);
1052 setOperationAction(ISD::SRA, MVT::v4i32, Custom);
1055 if (Subtarget->hasXOP()) {
1056 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1057 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1058 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1059 setOperationAction(ISD::ROTL, MVT::v2i64, Custom);
1060 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1061 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1062 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1063 setOperationAction(ISD::ROTL, MVT::v4i64, Custom);
1066 if (!Subtarget->useSoftFloat() && Subtarget->hasFp256()) {
1067 addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
1068 addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
1069 addRegisterClass(MVT::v8i32, &X86::VR256RegClass);
1070 addRegisterClass(MVT::v8f32, &X86::VR256RegClass);
1071 addRegisterClass(MVT::v4i64, &X86::VR256RegClass);
1072 addRegisterClass(MVT::v4f64, &X86::VR256RegClass);
1074 setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
1075 setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
1076 setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
1078 setOperationAction(ISD::FADD, MVT::v8f32, Legal);
1079 setOperationAction(ISD::FSUB, MVT::v8f32, Legal);
1080 setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
1081 setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
1082 setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
1083 setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
1084 setOperationAction(ISD::FCEIL, MVT::v8f32, Legal);
1085 setOperationAction(ISD::FTRUNC, MVT::v8f32, Legal);
1086 setOperationAction(ISD::FRINT, MVT::v8f32, Legal);
1087 setOperationAction(ISD::FNEARBYINT, MVT::v8f32, Legal);
1088 setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
1089 setOperationAction(ISD::FABS, MVT::v8f32, Custom);
1091 setOperationAction(ISD::FADD, MVT::v4f64, Legal);
1092 setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
1093 setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
1094 setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
1095 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
1096 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
1097 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
1098 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
1099 setOperationAction(ISD::FRINT, MVT::v4f64, Legal);
1100 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Legal);
1101 setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
1102 setOperationAction(ISD::FABS, MVT::v4f64, Custom);
1104 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1105 // even though v8i16 is a legal type.
1106 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote);
1107 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote);
1108 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1110 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
1111 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1112 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1114 setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
1115 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1117 for (MVT VT : MVT::fp_vector_valuetypes())
1118 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
1120 setOperationAction(ISD::SRL, MVT::v16i16, Custom);
1121 setOperationAction(ISD::SRL, MVT::v32i8, Custom);
1123 setOperationAction(ISD::SHL, MVT::v16i16, Custom);
1124 setOperationAction(ISD::SHL, MVT::v32i8, Custom);
1126 setOperationAction(ISD::SRA, MVT::v16i16, Custom);
1127 setOperationAction(ISD::SRA, MVT::v32i8, Custom);
1129 setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
1130 setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
1131 setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
1132 setOperationAction(ISD::SETCC, MVT::v4i64, Custom);
1134 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1135 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1136 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1138 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1139 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
1140 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1141 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
1142 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
1143 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
1144 setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
1145 setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
1146 setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom);
1147 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1148 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1149 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1151 setOperationAction(ISD::CTPOP, MVT::v32i8, Custom);
1152 setOperationAction(ISD::CTPOP, MVT::v16i16, Custom);
1153 setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
1154 setOperationAction(ISD::CTPOP, MVT::v4i64, Custom);
1156 setOperationAction(ISD::CTTZ, MVT::v32i8, Custom);
1157 setOperationAction(ISD::CTTZ, MVT::v16i16, Custom);
1158 setOperationAction(ISD::CTTZ, MVT::v8i32, Custom);
1159 setOperationAction(ISD::CTTZ, MVT::v4i64, Custom);
1160 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v32i8, Custom);
1161 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i16, Custom);
1162 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
1163 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
1165 if (Subtarget->hasAnyFMA()) {
1166 setOperationAction(ISD::FMA, MVT::v8f32, Legal);
1167 setOperationAction(ISD::FMA, MVT::v4f64, Legal);
1168 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
1169 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1170 setOperationAction(ISD::FMA, MVT::f32, Legal);
1171 setOperationAction(ISD::FMA, MVT::f64, Legal);
1174 if (Subtarget->hasInt256()) {
1175 setOperationAction(ISD::ADD, MVT::v4i64, Legal);
1176 setOperationAction(ISD::ADD, MVT::v8i32, Legal);
1177 setOperationAction(ISD::ADD, MVT::v16i16, Legal);
1178 setOperationAction(ISD::ADD, MVT::v32i8, Legal);
1180 setOperationAction(ISD::SUB, MVT::v4i64, Legal);
1181 setOperationAction(ISD::SUB, MVT::v8i32, Legal);
1182 setOperationAction(ISD::SUB, MVT::v16i16, Legal);
1183 setOperationAction(ISD::SUB, MVT::v32i8, Legal);
1185 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1186 setOperationAction(ISD::MUL, MVT::v8i32, Legal);
1187 setOperationAction(ISD::MUL, MVT::v16i16, Legal);
1188 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1190 setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
1191 setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
1192 setOperationAction(ISD::MULHU, MVT::v16i16, Legal);
1193 setOperationAction(ISD::MULHS, MVT::v16i16, Legal);
1195 setOperationAction(ISD::SMAX, MVT::v32i8, Legal);
1196 setOperationAction(ISD::SMAX, MVT::v16i16, Legal);
1197 setOperationAction(ISD::SMAX, MVT::v8i32, Legal);
1198 setOperationAction(ISD::UMAX, MVT::v32i8, Legal);
1199 setOperationAction(ISD::UMAX, MVT::v16i16, Legal);
1200 setOperationAction(ISD::UMAX, MVT::v8i32, Legal);
1201 setOperationAction(ISD::SMIN, MVT::v32i8, Legal);
1202 setOperationAction(ISD::SMIN, MVT::v16i16, Legal);
1203 setOperationAction(ISD::SMIN, MVT::v8i32, Legal);
1204 setOperationAction(ISD::UMIN, MVT::v32i8, Legal);
1205 setOperationAction(ISD::UMIN, MVT::v16i16, Legal);
1206 setOperationAction(ISD::UMIN, MVT::v8i32, Legal);
1208 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1209 // when we have a 256bit-wide blend with immediate.
1210 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1212 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1213 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
1214 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
1215 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
1216 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
1217 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
1218 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
1220 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
1221 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
1222 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
1223 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
1224 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
1225 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
1227 setOperationAction(ISD::ADD, MVT::v4i64, Custom);
1228 setOperationAction(ISD::ADD, MVT::v8i32, Custom);
1229 setOperationAction(ISD::ADD, MVT::v16i16, Custom);
1230 setOperationAction(ISD::ADD, MVT::v32i8, Custom);
1232 setOperationAction(ISD::SUB, MVT::v4i64, Custom);
1233 setOperationAction(ISD::SUB, MVT::v8i32, Custom);
1234 setOperationAction(ISD::SUB, MVT::v16i16, Custom);
1235 setOperationAction(ISD::SUB, MVT::v32i8, Custom);
1237 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1238 setOperationAction(ISD::MUL, MVT::v8i32, Custom);
1239 setOperationAction(ISD::MUL, MVT::v16i16, Custom);
1240 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1242 setOperationAction(ISD::SMAX, MVT::v32i8, Custom);
1243 setOperationAction(ISD::SMAX, MVT::v16i16, Custom);
1244 setOperationAction(ISD::SMAX, MVT::v8i32, Custom);
1245 setOperationAction(ISD::UMAX, MVT::v32i8, Custom);
1246 setOperationAction(ISD::UMAX, MVT::v16i16, Custom);
1247 setOperationAction(ISD::UMAX, MVT::v8i32, Custom);
1248 setOperationAction(ISD::SMIN, MVT::v32i8, Custom);
1249 setOperationAction(ISD::SMIN, MVT::v16i16, Custom);
1250 setOperationAction(ISD::SMIN, MVT::v8i32, Custom);
1251 setOperationAction(ISD::UMIN, MVT::v32i8, Custom);
1252 setOperationAction(ISD::UMIN, MVT::v16i16, Custom);
1253 setOperationAction(ISD::UMIN, MVT::v8i32, Custom);
1256 // In the customized shift lowering, the legal cases in AVX2 will be
1258 setOperationAction(ISD::SRL, MVT::v4i64, Custom);
1259 setOperationAction(ISD::SRL, MVT::v8i32, Custom);
1261 setOperationAction(ISD::SHL, MVT::v4i64, Custom);
1262 setOperationAction(ISD::SHL, MVT::v8i32, Custom);
1264 setOperationAction(ISD::SRA, MVT::v4i64, Custom);
1265 setOperationAction(ISD::SRA, MVT::v8i32, Custom);
1267 // Custom lower several nodes for 256-bit types.
1268 for (MVT VT : MVT::vector_valuetypes()) {
1269 if (VT.getScalarSizeInBits() >= 32) {
1270 setOperationAction(ISD::MLOAD, VT, Legal);
1271 setOperationAction(ISD::MSTORE, VT, Legal);
1273 // Extract subvector is special because the value type
1274 // (result) is 128-bit but the source is 256-bit wide.
1275 if (VT.is128BitVector()) {
1276 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1278 // Do not attempt to custom lower other non-256-bit vectors
1279 if (!VT.is256BitVector())
1282 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1283 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1284 setOperationAction(ISD::VSELECT, VT, Custom);
1285 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1286 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1287 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1288 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1289 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1292 if (Subtarget->hasInt256())
1293 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1295 // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
1296 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1297 setOperationAction(ISD::AND, VT, Promote);
1298 AddPromotedToType (ISD::AND, VT, MVT::v4i64);
1299 setOperationAction(ISD::OR, VT, Promote);
1300 AddPromotedToType (ISD::OR, VT, MVT::v4i64);
1301 setOperationAction(ISD::XOR, VT, Promote);
1302 AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
1303 setOperationAction(ISD::LOAD, VT, Promote);
1304 AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
1305 setOperationAction(ISD::SELECT, VT, Promote);
1306 AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
1310 if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
1311 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1312 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1313 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1314 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1316 addRegisterClass(MVT::i1, &X86::VK1RegClass);
1317 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1318 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1320 for (MVT VT : MVT::fp_vector_valuetypes())
1321 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
1323 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
1324 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
1325 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
1326 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
1327 setLoadExtAction(ISD::ZEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
1328 setLoadExtAction(ISD::SEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
1329 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i8, Legal);
1330 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i8, Legal);
1331 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
1332 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
1333 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
1334 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
1336 setOperationAction(ISD::BR_CC, MVT::i1, Expand);
1337 setOperationAction(ISD::SETCC, MVT::i1, Custom);
1338 setOperationAction(ISD::SETCCE, MVT::i1, Custom);
1339 setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
1340 setOperationAction(ISD::XOR, MVT::i1, Legal);
1341 setOperationAction(ISD::OR, MVT::i1, Legal);
1342 setOperationAction(ISD::AND, MVT::i1, Legal);
1343 setOperationAction(ISD::SUB, MVT::i1, Custom);
1344 setOperationAction(ISD::ADD, MVT::i1, Custom);
1345 setOperationAction(ISD::MUL, MVT::i1, Custom);
1346 setOperationAction(ISD::LOAD, MVT::v16f32, Legal);
1347 setOperationAction(ISD::LOAD, MVT::v8f64, Legal);
1348 setOperationAction(ISD::LOAD, MVT::v8i64, Legal);
1349 setOperationAction(ISD::LOAD, MVT::v16i32, Legal);
1350 setOperationAction(ISD::LOAD, MVT::v16i1, Legal);
1352 setOperationAction(ISD::FADD, MVT::v16f32, Legal);
1353 setOperationAction(ISD::FSUB, MVT::v16f32, Legal);
1354 setOperationAction(ISD::FMUL, MVT::v16f32, Legal);
1355 setOperationAction(ISD::FDIV, MVT::v16f32, Legal);
1356 setOperationAction(ISD::FSQRT, MVT::v16f32, Legal);
1357 setOperationAction(ISD::FNEG, MVT::v16f32, Custom);
1358 setOperationAction(ISD::FABS, MVT::v16f32, Custom);
1360 setOperationAction(ISD::FADD, MVT::v8f64, Legal);
1361 setOperationAction(ISD::FSUB, MVT::v8f64, Legal);
1362 setOperationAction(ISD::FMUL, MVT::v8f64, Legal);
1363 setOperationAction(ISD::FDIV, MVT::v8f64, Legal);
1364 setOperationAction(ISD::FSQRT, MVT::v8f64, Legal);
1365 setOperationAction(ISD::FNEG, MVT::v8f64, Custom);
1366 setOperationAction(ISD::FABS, MVT::v8f64, Custom);
1367 setOperationAction(ISD::FMA, MVT::v8f64, Legal);
1368 setOperationAction(ISD::FMA, MVT::v16f32, Legal);
1370 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1371 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1372 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1373 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1374 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1375 setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
1376 setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
1377 setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote);
1378 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote);
1379 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1380 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1381 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1382 setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
1383 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom);
1384 setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
1385 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
1387 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1388 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1389 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1390 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1391 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1392 if (Subtarget->hasVLX()){
1393 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1394 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1395 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1396 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1397 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1399 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1400 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1401 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1402 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1403 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1405 setOperationAction(ISD::MLOAD, MVT::v8i32, Custom);
1406 setOperationAction(ISD::MLOAD, MVT::v8f32, Custom);
1407 setOperationAction(ISD::MSTORE, MVT::v8i32, Custom);
1408 setOperationAction(ISD::MSTORE, MVT::v8f32, Custom);
1410 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
1411 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1412 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1413 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom);
1414 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom);
1415 if (Subtarget->hasDQI()) {
1416 setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
1417 setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
1419 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1420 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1421 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1422 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1423 if (Subtarget->hasVLX()) {
1424 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal);
1425 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1426 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal);
1427 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1428 setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal);
1429 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1430 setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal);
1431 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1434 if (Subtarget->hasVLX()) {
1435 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1436 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1437 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1438 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1439 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1440 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1441 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
1442 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1444 setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
1445 setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
1446 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1447 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1448 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1449 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1450 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1451 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1452 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1453 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
1454 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
1455 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1456 if (Subtarget->hasDQI()) {
1457 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
1458 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);
1460 setOperationAction(ISD::FFLOOR, MVT::v16f32, Legal);
1461 setOperationAction(ISD::FFLOOR, MVT::v8f64, Legal);
1462 setOperationAction(ISD::FCEIL, MVT::v16f32, Legal);
1463 setOperationAction(ISD::FCEIL, MVT::v8f64, Legal);
1464 setOperationAction(ISD::FTRUNC, MVT::v16f32, Legal);
1465 setOperationAction(ISD::FTRUNC, MVT::v8f64, Legal);
1466 setOperationAction(ISD::FRINT, MVT::v16f32, Legal);
1467 setOperationAction(ISD::FRINT, MVT::v8f64, Legal);
1468 setOperationAction(ISD::FNEARBYINT, MVT::v16f32, Legal);
1469 setOperationAction(ISD::FNEARBYINT, MVT::v8f64, Legal);
1471 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1472 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1473 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1474 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1475 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
1477 setOperationAction(ISD::SETCC, MVT::v16i1, Custom);
1478 setOperationAction(ISD::SETCC, MVT::v8i1, Custom);
1480 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1482 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
1483 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
1484 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
1485 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom);
1486 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom);
1487 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
1488 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
1489 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1490 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1491 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1492 setOperationAction(ISD::SELECT, MVT::v16i1, Custom);
1493 setOperationAction(ISD::SELECT, MVT::v8i1, Custom);
1495 setOperationAction(ISD::SMAX, MVT::v16i32, Legal);
1496 setOperationAction(ISD::SMAX, MVT::v8i64, Legal);
1497 setOperationAction(ISD::UMAX, MVT::v16i32, Legal);
1498 setOperationAction(ISD::UMAX, MVT::v8i64, Legal);
1499 setOperationAction(ISD::SMIN, MVT::v16i32, Legal);
1500 setOperationAction(ISD::SMIN, MVT::v8i64, Legal);
1501 setOperationAction(ISD::UMIN, MVT::v16i32, Legal);
1502 setOperationAction(ISD::UMIN, MVT::v8i64, Legal);
1504 setOperationAction(ISD::ADD, MVT::v8i64, Legal);
1505 setOperationAction(ISD::ADD, MVT::v16i32, Legal);
1507 setOperationAction(ISD::SUB, MVT::v8i64, Legal);
1508 setOperationAction(ISD::SUB, MVT::v16i32, Legal);
1510 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1512 setOperationAction(ISD::SRL, MVT::v8i64, Custom);
1513 setOperationAction(ISD::SRL, MVT::v16i32, Custom);
1515 setOperationAction(ISD::SHL, MVT::v8i64, Custom);
1516 setOperationAction(ISD::SHL, MVT::v16i32, Custom);
1518 setOperationAction(ISD::SRA, MVT::v8i64, Custom);
1519 setOperationAction(ISD::SRA, MVT::v16i32, Custom);
1521 setOperationAction(ISD::AND, MVT::v8i64, Legal);
1522 setOperationAction(ISD::OR, MVT::v8i64, Legal);
1523 setOperationAction(ISD::XOR, MVT::v8i64, Legal);
1524 setOperationAction(ISD::AND, MVT::v16i32, Legal);
1525 setOperationAction(ISD::OR, MVT::v16i32, Legal);
1526 setOperationAction(ISD::XOR, MVT::v16i32, Legal);
1528 if (Subtarget->hasCDI()) {
1529 setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
1530 setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
1531 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i64, Expand);
1532 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i32, Expand);
1534 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1535 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1536 setOperationAction(ISD::CTLZ, MVT::v16i16, Custom);
1537 setOperationAction(ISD::CTLZ, MVT::v32i8, Custom);
1538 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i16, Expand);
1539 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i8, Expand);
1540 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i16, Expand);
1541 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i8, Expand);
1543 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i64, Custom);
1544 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i32, Custom);
1546 if (Subtarget->hasVLX()) {
1547 setOperationAction(ISD::CTLZ, MVT::v4i64, Legal);
1548 setOperationAction(ISD::CTLZ, MVT::v8i32, Legal);
1549 setOperationAction(ISD::CTLZ, MVT::v2i64, Legal);
1550 setOperationAction(ISD::CTLZ, MVT::v4i32, Legal);
1551 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Expand);
1552 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Expand);
1553 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Expand);
1554 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Expand);
1556 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
1557 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
1558 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
1559 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
1561 setOperationAction(ISD::CTLZ, MVT::v4i64, Custom);
1562 setOperationAction(ISD::CTLZ, MVT::v8i32, Custom);
1563 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1564 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1565 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Expand);
1566 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Expand);
1567 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Expand);
1568 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Expand);
1570 } // Subtarget->hasCDI()
1572 if (Subtarget->hasDQI()) {
1573 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
1574 setOperationAction(ISD::MUL, MVT::v4i64, Legal);
1575 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1577 // Custom lower several nodes.
1578 for (MVT VT : MVT::vector_valuetypes()) {
1579 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
1581 setOperationAction(ISD::AND, VT, Legal);
1582 setOperationAction(ISD::OR, VT, Legal);
1583 setOperationAction(ISD::XOR, VT, Legal);
1585 if ((VT.is128BitVector() || VT.is256BitVector()) && EltSize >= 32) {
1586 setOperationAction(ISD::MGATHER, VT, Custom);
1587 setOperationAction(ISD::MSCATTER, VT, Custom);
1589 // Extract subvector is special because the value type
1590 // (result) is 256/128-bit but the source is 512-bit wide.
1591 if (VT.is128BitVector() || VT.is256BitVector()) {
1592 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1594 if (VT.getVectorElementType() == MVT::i1)
1595 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1597 // Do not attempt to custom lower other non-512-bit vectors
1598 if (!VT.is512BitVector())
1601 if (EltSize >= 32) {
1602 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1603 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1604 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1605 setOperationAction(ISD::VSELECT, VT, Legal);
1606 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1607 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1608 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1609 setOperationAction(ISD::MLOAD, VT, Legal);
1610 setOperationAction(ISD::MSTORE, VT, Legal);
1611 setOperationAction(ISD::MGATHER, VT, Legal);
1612 setOperationAction(ISD::MSCATTER, VT, Custom);
1615 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
1616 setOperationAction(ISD::SELECT, VT, Promote);
1617 AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
1621 if (!Subtarget->useSoftFloat() && Subtarget->hasBWI()) {
1622 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1623 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1625 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1626 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1628 setOperationAction(ISD::LOAD, MVT::v32i16, Legal);
1629 setOperationAction(ISD::LOAD, MVT::v64i8, Legal);
1630 setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
1631 setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
1632 setOperationAction(ISD::ADD, MVT::v32i16, Legal);
1633 setOperationAction(ISD::ADD, MVT::v64i8, Legal);
1634 setOperationAction(ISD::SUB, MVT::v32i16, Legal);
1635 setOperationAction(ISD::SUB, MVT::v64i8, Legal);
1636 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1637 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1638 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1639 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1640 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1641 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1642 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1643 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1644 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1645 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Custom);
1646 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Custom);
1647 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1648 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1649 setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
1650 setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
1651 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1652 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1653 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1654 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1655 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1656 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1657 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1658 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1659 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
1660 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
1661 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1662 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1663 setOperationAction(ISD::VSELECT, MVT::v32i16, Legal);
1664 setOperationAction(ISD::VSELECT, MVT::v64i8, Legal);
1665 setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
1666 setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
1667 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1668 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i1, Custom);
1669 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i1, Custom);
1671 setOperationAction(ISD::SMAX, MVT::v64i8, Legal);
1672 setOperationAction(ISD::SMAX, MVT::v32i16, Legal);
1673 setOperationAction(ISD::UMAX, MVT::v64i8, Legal);
1674 setOperationAction(ISD::UMAX, MVT::v32i16, Legal);
1675 setOperationAction(ISD::SMIN, MVT::v64i8, Legal);
1676 setOperationAction(ISD::SMIN, MVT::v32i16, Legal);
1677 setOperationAction(ISD::UMIN, MVT::v64i8, Legal);
1678 setOperationAction(ISD::UMIN, MVT::v32i16, Legal);
1680 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1681 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1682 if (Subtarget->hasVLX())
1683 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1685 if (Subtarget->hasCDI()) {
1686 setOperationAction(ISD::CTLZ, MVT::v32i16, Custom);
1687 setOperationAction(ISD::CTLZ, MVT::v64i8, Custom);
1688 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i16, Expand);
1689 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v64i8, Expand);
1692 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1693 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1694 setOperationAction(ISD::VSELECT, VT, Legal);
1695 setOperationAction(ISD::SRL, VT, Custom);
1696 setOperationAction(ISD::SHL, VT, Custom);
1697 setOperationAction(ISD::SRA, VT, Custom);
1699 setOperationAction(ISD::AND, VT, Promote);
1700 AddPromotedToType (ISD::AND, VT, MVT::v8i64);
1701 setOperationAction(ISD::OR, VT, Promote);
1702 AddPromotedToType (ISD::OR, VT, MVT::v8i64);
1703 setOperationAction(ISD::XOR, VT, Promote);
1704 AddPromotedToType (ISD::XOR, VT, MVT::v8i64);
1708 if (!Subtarget->useSoftFloat() && Subtarget->hasVLX()) {
1709 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1710 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1712 setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
1713 setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
1714 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
1715 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1716 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
1717 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
1718 setOperationAction(ISD::SELECT, MVT::v4i1, Custom);
1719 setOperationAction(ISD::SELECT, MVT::v2i1, Custom);
1720 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
1721 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i1, Custom);
1722 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i1, Custom);
1723 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i1, Custom);
1725 setOperationAction(ISD::AND, MVT::v8i32, Legal);
1726 setOperationAction(ISD::OR, MVT::v8i32, Legal);
1727 setOperationAction(ISD::XOR, MVT::v8i32, Legal);
1728 setOperationAction(ISD::AND, MVT::v4i32, Legal);
1729 setOperationAction(ISD::OR, MVT::v4i32, Legal);
1730 setOperationAction(ISD::XOR, MVT::v4i32, Legal);
1731 setOperationAction(ISD::SRA, MVT::v2i64, Custom);
1732 setOperationAction(ISD::SRA, MVT::v4i64, Custom);
1734 setOperationAction(ISD::SMAX, MVT::v2i64, Legal);
1735 setOperationAction(ISD::SMAX, MVT::v4i64, Legal);
1736 setOperationAction(ISD::UMAX, MVT::v2i64, Legal);
1737 setOperationAction(ISD::UMAX, MVT::v4i64, Legal);
1738 setOperationAction(ISD::SMIN, MVT::v2i64, Legal);
1739 setOperationAction(ISD::SMIN, MVT::v4i64, Legal);
1740 setOperationAction(ISD::UMIN, MVT::v2i64, Legal);
1741 setOperationAction(ISD::UMIN, MVT::v4i64, Legal);
1744 // We want to custom lower some of our intrinsics.
1745 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1746 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1747 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1748 if (!Subtarget->is64Bit()) {
1749 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1750 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1753 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1754 // handle type legalization for these operations here.
1756 // FIXME: We really should do custom legalization for addition and
1757 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1758 // than generic legalization for 64-bit multiplication-with-overflow, though.
1759 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1760 if (VT == MVT::i64 && !Subtarget->is64Bit())
1762 // Add/Sub/Mul with overflow operations are custom lowered.
1763 setOperationAction(ISD::SADDO, VT, Custom);
1764 setOperationAction(ISD::UADDO, VT, Custom);
1765 setOperationAction(ISD::SSUBO, VT, Custom);
1766 setOperationAction(ISD::USUBO, VT, Custom);
1767 setOperationAction(ISD::SMULO, VT, Custom);
1768 setOperationAction(ISD::UMULO, VT, Custom);
1771 if (!Subtarget->is64Bit()) {
1772 // These libcalls are not available in 32-bit.
1773 setLibcallName(RTLIB::SHL_I128, nullptr);
1774 setLibcallName(RTLIB::SRL_I128, nullptr);
1775 setLibcallName(RTLIB::SRA_I128, nullptr);
1778 // Combine sin / cos into one node or libcall if possible.
1779 if (Subtarget->hasSinCos()) {
1780 setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1781 setLibcallName(RTLIB::SINCOS_F64, "sincos");
1782 if (Subtarget->isTargetDarwin()) {
1783 // For MacOSX, we don't want the normal expansion of a libcall to sincos.
1784 // We want to issue a libcall to __sincos_stret to avoid memory traffic.
1785 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1786 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1790 if (Subtarget->isTargetWin64()) {
1791 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1792 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1793 setOperationAction(ISD::SREM, MVT::i128, Custom);
1794 setOperationAction(ISD::UREM, MVT::i128, Custom);
1795 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1796 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1799 // We have target-specific dag combine patterns for the following nodes:
1800 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1801 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1802 setTargetDAGCombine(ISD::BITCAST);
1803 setTargetDAGCombine(ISD::VSELECT);
1804 setTargetDAGCombine(ISD::SELECT);
1805 setTargetDAGCombine(ISD::SHL);
1806 setTargetDAGCombine(ISD::SRA);
1807 setTargetDAGCombine(ISD::SRL);
1808 setTargetDAGCombine(ISD::OR);
1809 setTargetDAGCombine(ISD::AND);
1810 setTargetDAGCombine(ISD::ADD);
1811 setTargetDAGCombine(ISD::FADD);
1812 setTargetDAGCombine(ISD::FSUB);
1813 setTargetDAGCombine(ISD::FNEG);
1814 setTargetDAGCombine(ISD::FMA);
1815 setTargetDAGCombine(ISD::FMINNUM);
1816 setTargetDAGCombine(ISD::FMAXNUM);
1817 setTargetDAGCombine(ISD::SUB);
1818 setTargetDAGCombine(ISD::LOAD);
1819 setTargetDAGCombine(ISD::MLOAD);
1820 setTargetDAGCombine(ISD::STORE);
1821 setTargetDAGCombine(ISD::MSTORE);
1822 setTargetDAGCombine(ISD::TRUNCATE);
1823 setTargetDAGCombine(ISD::ZERO_EXTEND);
1824 setTargetDAGCombine(ISD::ANY_EXTEND);
1825 setTargetDAGCombine(ISD::SIGN_EXTEND);
1826 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1827 setTargetDAGCombine(ISD::SINT_TO_FP);
1828 setTargetDAGCombine(ISD::UINT_TO_FP);
1829 setTargetDAGCombine(ISD::SETCC);
1830 setTargetDAGCombine(ISD::BUILD_VECTOR);
1831 setTargetDAGCombine(ISD::MUL);
1832 setTargetDAGCombine(ISD::XOR);
1833 setTargetDAGCombine(ISD::MSCATTER);
1834 setTargetDAGCombine(ISD::MGATHER);
1836 computeRegisterProperties(Subtarget->getRegisterInfo());
1838 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1839 MaxStoresPerMemsetOptSize = 8;
1840 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1841 MaxStoresPerMemcpyOptSize = 4;
1842 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1843 MaxStoresPerMemmoveOptSize = 4;
1844 setPrefLoopAlignment(4); // 2^4 bytes.
1846 // A predictable cmov does not hurt on an in-order CPU.
1847 // FIXME: Use a CPU attribute to trigger this, not a CPU model.
1848 PredictableSelectIsExpensive = !Subtarget->isAtom();
1849 EnableExtLdPromotion = true;
1850 setPrefFunctionAlignment(4); // 2^4 bytes.
1852 verifyIntrinsicTables();
1855 // This has so far only been implemented for 64-bit MachO.
1856 bool X86TargetLowering::useLoadStackGuardNode() const {
1857 return Subtarget->isTargetMachO() && Subtarget->is64Bit();
1860 TargetLoweringBase::LegalizeTypeAction
1861 X86TargetLowering::getPreferredVectorAction(EVT VT) const {
1862 if (ExperimentalVectorWideningLegalization &&
1863 VT.getVectorNumElements() != 1 &&
1864 VT.getVectorElementType().getSimpleVT() != MVT::i1)
1865 return TypeWidenVector;
1867 return TargetLoweringBase::getPreferredVectorAction(VT);
1870 EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1873 return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
1875 if (VT.isSimple()) {
1876 MVT VVT = VT.getSimpleVT();
1877 const unsigned NumElts = VVT.getVectorNumElements();
1878 const MVT EltVT = VVT.getVectorElementType();
1879 if (VVT.is512BitVector()) {
1880 if (Subtarget->hasAVX512())
1881 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1882 EltVT == MVT::f32 || EltVT == MVT::f64)
1884 case 8: return MVT::v8i1;
1885 case 16: return MVT::v16i1;
1887 if (Subtarget->hasBWI())
1888 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1890 case 32: return MVT::v32i1;
1891 case 64: return MVT::v64i1;
1895 if (VVT.is256BitVector() || VVT.is128BitVector()) {
1896 if (Subtarget->hasVLX())
1897 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1898 EltVT == MVT::f32 || EltVT == MVT::f64)
1900 case 2: return MVT::v2i1;
1901 case 4: return MVT::v4i1;
1902 case 8: return MVT::v8i1;
1904 if (Subtarget->hasBWI() && Subtarget->hasVLX())
1905 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1907 case 8: return MVT::v8i1;
1908 case 16: return MVT::v16i1;
1909 case 32: return MVT::v32i1;
1914 return VT.changeVectorElementTypeToInteger();
1917 /// Helper for getByValTypeAlignment to determine
1918 /// the desired ByVal argument alignment.
1919 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
1922 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1923 if (VTy->getBitWidth() == 128)
1925 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1926 unsigned EltAlign = 0;
1927 getMaxByValAlign(ATy->getElementType(), EltAlign);
1928 if (EltAlign > MaxAlign)
1929 MaxAlign = EltAlign;
1930 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1931 for (auto *EltTy : STy->elements()) {
1932 unsigned EltAlign = 0;
1933 getMaxByValAlign(EltTy, EltAlign);
1934 if (EltAlign > MaxAlign)
1935 MaxAlign = EltAlign;
1942 /// Return the desired alignment for ByVal aggregate
1943 /// function arguments in the caller parameter area. For X86, aggregates
1944 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
1945 /// are at 4-byte boundaries.
1946 unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
1947 const DataLayout &DL) const {
1948 if (Subtarget->is64Bit()) {
1949 // Max of 8 and alignment of type.
1950 unsigned TyAlign = DL.getABITypeAlignment(Ty);
1957 if (Subtarget->hasSSE1())
1958 getMaxByValAlign(Ty, Align);
1962 /// Returns the target specific optimal type for load
1963 /// and store operations as a result of memset, memcpy, and memmove
1964 /// lowering. If DstAlign is zero that means it's safe to destination
1965 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
1966 /// means there isn't a need to check it against alignment requirement,
1967 /// probably because the source does not need to be loaded. If 'IsMemset' is
1968 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
1969 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
1970 /// source is constant so it does not need to be loaded.
1971 /// It returns EVT::Other if the type should be determined using generic
1972 /// target-independent logic.
1974 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
1975 unsigned DstAlign, unsigned SrcAlign,
1976 bool IsMemset, bool ZeroMemset,
1978 MachineFunction &MF) const {
1979 const Function *F = MF.getFunction();
1980 if ((!IsMemset || ZeroMemset) &&
1981 !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
1983 (!Subtarget->isUnalignedMem16Slow() ||
1984 ((DstAlign == 0 || DstAlign >= 16) &&
1985 (SrcAlign == 0 || SrcAlign >= 16)))) {
1987 // FIXME: Check if unaligned 32-byte accesses are slow.
1988 if (Subtarget->hasInt256())
1990 if (Subtarget->hasFp256())
1993 if (Subtarget->hasSSE2())
1995 if (Subtarget->hasSSE1())
1997 } else if (!MemcpyStrSrc && Size >= 8 &&
1998 !Subtarget->is64Bit() &&
1999 Subtarget->hasSSE2()) {
2000 // Do not use f64 to lower memcpy if source is string constant. It's
2001 // better to use i32 to avoid the loads.
2005 // This is a compromise. If we reach here, unaligned accesses may be slow on
2006 // this target. However, creating smaller, aligned accesses could be even
2007 // slower and would certainly be a lot more code.
2008 if (Subtarget->is64Bit() && Size >= 8)
2013 bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2015 return X86ScalarSSEf32;
2016 else if (VT == MVT::f64)
2017 return X86ScalarSSEf64;
2022 X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
2027 switch (VT.getSizeInBits()) {
2029 // 8-byte and under are always assumed to be fast.
2033 *Fast = !Subtarget->isUnalignedMem16Slow();
2036 *Fast = !Subtarget->isUnalignedMem32Slow();
2038 // TODO: What about AVX-512 (512-bit) accesses?
2041 // Misaligned accesses of any size are always allowed.
2045 /// Return the entry encoding for a jump table in the
2046 /// current function. The returned value is a member of the
2047 /// MachineJumpTableInfo::JTEntryKind enum.
2048 unsigned X86TargetLowering::getJumpTableEncoding() const {
2049 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2051 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
2052 Subtarget->isPICStyleGOT())
2053 return MachineJumpTableInfo::EK_Custom32;
2055 // Otherwise, use the normal jump table encoding heuristics.
2056 return TargetLowering::getJumpTableEncoding();
2059 bool X86TargetLowering::useSoftFloat() const {
2060 return Subtarget->useSoftFloat();
2064 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2065 const MachineBasicBlock *MBB,
2066 unsigned uid,MCContext &Ctx) const{
2067 assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
2068 Subtarget->isPICStyleGOT());
2069 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2071 return MCSymbolRefExpr::create(MBB->getSymbol(),
2072 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2075 /// Returns relocation base for the given PIC jumptable.
2076 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2077 SelectionDAG &DAG) const {
2078 if (!Subtarget->is64Bit())
2079 // This doesn't have SDLoc associated with it, but is not really the
2080 // same as a Register.
2081 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2082 getPointerTy(DAG.getDataLayout()));
2086 /// This returns the relocation base for the given PIC jumptable,
2087 /// the same as getPICJumpTableRelocBase, but as an MCExpr.
2088 const MCExpr *X86TargetLowering::
2089 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2090 MCContext &Ctx) const {
2091 // X86-64 uses RIP relative addressing based on the jump table label.
2092 if (Subtarget->isPICStyleRIPRel())
2093 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2095 // Otherwise, the reference is relative to the PIC base.
2096 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2099 std::pair<const TargetRegisterClass *, uint8_t>
2100 X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2102 const TargetRegisterClass *RRC = nullptr;
2104 switch (VT.SimpleTy) {
2106 return TargetLowering::findRepresentativeClass(TRI, VT);
2107 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2108 RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2111 RRC = &X86::VR64RegClass;
2113 case MVT::f32: case MVT::f64:
2114 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2115 case MVT::v4f32: case MVT::v2f64:
2116 case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
2118 RRC = &X86::VR128RegClass;
2121 return std::make_pair(RRC, Cost);
2124 bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
2125 unsigned &Offset) const {
2126 if (!Subtarget->isTargetLinux())
2129 if (Subtarget->is64Bit()) {
2130 // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
2132 if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
2144 Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2145 if (!Subtarget->isTargetAndroid())
2146 return TargetLowering::getSafeStackPointerLocation(IRB);
2148 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2149 // definition of TLS_SLOT_SAFESTACK in
2150 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2151 unsigned AddressSpace, Offset;
2152 if (Subtarget->is64Bit()) {
2153 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2155 if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
2165 return ConstantExpr::getIntToPtr(
2166 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2167 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2170 bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2171 unsigned DestAS) const {
2172 assert(SrcAS != DestAS && "Expected different address spaces!");
2174 return SrcAS < 256 && DestAS < 256;
2177 //===----------------------------------------------------------------------===//
2178 // Return Value Calling Convention Implementation
2179 //===----------------------------------------------------------------------===//
2181 #include "X86GenCallingConv.inc"
2183 bool X86TargetLowering::CanLowerReturn(
2184 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2185 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2186 SmallVector<CCValAssign, 16> RVLocs;
2187 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2188 return CCInfo.CheckReturn(Outs, RetCC_X86);
2191 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2192 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2197 X86TargetLowering::LowerReturn(SDValue Chain,
2198 CallingConv::ID CallConv, bool isVarArg,
2199 const SmallVectorImpl<ISD::OutputArg> &Outs,
2200 const SmallVectorImpl<SDValue> &OutVals,
2201 SDLoc dl, SelectionDAG &DAG) const {
2202 MachineFunction &MF = DAG.getMachineFunction();
2203 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2205 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2206 report_fatal_error("X86 interrupts may not return any value");
2208 SmallVector<CCValAssign, 16> RVLocs;
2209 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2210 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2213 SmallVector<SDValue, 6> RetOps;
2214 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2215 // Operand #1 = Bytes To Pop
2216 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2219 // Copy the result values into the output registers.
2220 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2221 CCValAssign &VA = RVLocs[i];
2222 assert(VA.isRegLoc() && "Can only return in registers!");
2223 SDValue ValToCopy = OutVals[i];
2224 EVT ValVT = ValToCopy.getValueType();
2226 // Promote values to the appropriate types.
2227 if (VA.getLocInfo() == CCValAssign::SExt)
2228 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2229 else if (VA.getLocInfo() == CCValAssign::ZExt)
2230 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2231 else if (VA.getLocInfo() == CCValAssign::AExt) {
2232 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2233 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2235 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2237 else if (VA.getLocInfo() == CCValAssign::BCvt)
2238 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2240 assert(VA.getLocInfo() != CCValAssign::FPExt &&
2241 "Unexpected FP-extend for return value.");
2243 // If this is x86-64, and we disabled SSE, we can't return FP values,
2244 // or SSE or MMX vectors.
2245 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2246 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2247 (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
2248 report_fatal_error("SSE register return with SSE disabled");
2250 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2251 // llvm-gcc has never done it right and no one has noticed, so this
2252 // should be OK for now.
2253 if (ValVT == MVT::f64 &&
2254 (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
2255 report_fatal_error("SSE2 register return with SSE2 disabled");
2257 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2258 // the RET instruction and handled by the FP Stackifier.
2259 if (VA.getLocReg() == X86::FP0 ||
2260 VA.getLocReg() == X86::FP1) {
2261 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2262 // change the value to the FP stack register class.
2263 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2264 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2265 RetOps.push_back(ValToCopy);
2266 // Don't emit a copytoreg.
2270 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2271 // which is returned in RAX / RDX.
2272 if (Subtarget->is64Bit()) {
2273 if (ValVT == MVT::x86mmx) {
2274 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2275 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2276 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2278 // If we don't have SSE2 available, convert to v4f32 so the generated
2279 // register is legal.
2280 if (!Subtarget->hasSSE2())
2281 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2286 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
2287 Flag = Chain.getValue(1);
2288 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2291 // All x86 ABIs require that for returning structs by value we copy
2292 // the sret argument into %rax/%eax (depending on ABI) for the return.
2293 // We saved the argument into a virtual register in the entry block,
2294 // so now we copy the value out and into %rax/%eax.
2296 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2297 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2298 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2299 // either case FuncInfo->setSRetReturnReg() will have been called.
2300 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2301 SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg,
2302 getPointerTy(MF.getDataLayout()));
2305 = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
2306 X86::RAX : X86::EAX;
2307 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2308 Flag = Chain.getValue(1);
2310 // RAX/EAX now acts like a return value.
2312 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2315 const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
2316 const MCPhysReg *I =
2317 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2320 if (X86::GR64RegClass.contains(*I))
2321 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2323 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2327 RetOps[0] = Chain; // Update chain.
2329 // Add the flag if we have it.
2331 RetOps.push_back(Flag);
2333 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2334 if (CallConv == CallingConv::X86_INTR)
2335 opcode = X86ISD::IRET;
2336 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2339 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2340 if (N->getNumValues() != 1)
2342 if (!N->hasNUsesOfValue(1, 0))
2345 SDValue TCChain = Chain;
2346 SDNode *Copy = *N->use_begin();
2347 if (Copy->getOpcode() == ISD::CopyToReg) {
2348 // If the copy has a glue operand, we conservatively assume it isn't safe to
2349 // perform a tail call.
2350 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2352 TCChain = Copy->getOperand(0);
2353 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2356 bool HasRet = false;
2357 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2359 if (UI->getOpcode() != X86ISD::RET_FLAG)
2361 // If we are returning more than one value, we can definitely
2362 // not make a tail call see PR19530
2363 if (UI->getNumOperands() > 4)
2365 if (UI->getNumOperands() == 4 &&
2366 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2379 X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
2380 ISD::NodeType ExtendKind) const {
2382 // TODO: Is this also valid on 32-bit?
2383 if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
2384 ReturnMVT = MVT::i8;
2386 ReturnMVT = MVT::i32;
2388 EVT MinVT = getRegisterType(Context, ReturnMVT);
2389 return VT.bitsLT(MinVT) ? MinVT : VT;
2392 /// Lower the result values of a call into the
2393 /// appropriate copies out of appropriate physical registers.
2396 X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
2397 CallingConv::ID CallConv, bool isVarArg,
2398 const SmallVectorImpl<ISD::InputArg> &Ins,
2399 SDLoc dl, SelectionDAG &DAG,
2400 SmallVectorImpl<SDValue> &InVals) const {
2402 // Assign locations to each value returned by this call.
2403 SmallVector<CCValAssign, 16> RVLocs;
2404 bool Is64Bit = Subtarget->is64Bit();
2405 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2407 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2409 // Copy all of the result registers out of their specified physreg.
2410 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
2411 CCValAssign &VA = RVLocs[i];
2412 EVT CopyVT = VA.getLocVT();
2414 // If this is x86-64, and we disabled SSE, we can't return FP values
2415 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
2416 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
2417 report_fatal_error("SSE register return with SSE disabled");
2420 // If we prefer to use the value in xmm registers, copy it out as f80 and
2421 // use a truncate to move it from fp stack reg to xmm reg.
2422 bool RoundAfterCopy = false;
2423 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2424 isScalarFPTypeInSSEReg(VA.getValVT())) {
2426 RoundAfterCopy = (CopyVT != VA.getLocVT());
2429 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
2430 CopyVT, InFlag).getValue(1);
2431 SDValue Val = Chain.getValue(0);
2434 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2435 // This truncation won't change the value.
2436 DAG.getIntPtrConstant(1, dl));
2438 if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1)
2439 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2441 InFlag = Chain.getValue(2);
2442 InVals.push_back(Val);
2448 //===----------------------------------------------------------------------===//
2449 // C & StdCall & Fast Calling Convention implementation
2450 //===----------------------------------------------------------------------===//
2451 // StdCall calling convention seems to be standard for many Windows' API
2452 // routines and around. It differs from C calling convention just a little:
2453 // callee should clean up the stack, not caller. Symbols should be also
2454 // decorated in some fancy way :) It doesn't support any vector arguments.
2455 // For info on fast calling convention see Fast Calling Convention (tail call)
2456 // implementation LowerX86_32FastCCCallTo.
2458 /// CallIsStructReturn - Determines whether a call uses struct return
2460 enum StructReturnType {
2465 static StructReturnType
2466 callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
2468 return NotStructReturn;
2470 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2471 if (!Flags.isSRet())
2472 return NotStructReturn;
2473 if (Flags.isInReg() || IsMCU)
2474 return RegStructReturn;
2475 return StackStructReturn;
2478 /// Determines whether a function uses struct return semantics.
2479 static StructReturnType
2480 argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
2482 return NotStructReturn;
2484 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2485 if (!Flags.isSRet())
2486 return NotStructReturn;
2487 if (Flags.isInReg() || IsMCU)
2488 return RegStructReturn;
2489 return StackStructReturn;
2492 /// Make a copy of an aggregate at address specified by "Src" to address
2493 /// "Dst" with size and alignment information specified by the specific
2494 /// parameter attribute. The copy will be passed as a byval function parameter.
2496 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
2497 ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
2499 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2501 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2502 /*isVolatile*/false, /*AlwaysInline=*/true,
2503 /*isTailCall*/false,
2504 MachinePointerInfo(), MachinePointerInfo());
2507 /// Return true if the calling convention is one that we can guarantee TCO for.
2508 static bool canGuaranteeTCO(CallingConv::ID CC) {
2509 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2510 CC == CallingConv::HiPE || CC == CallingConv::HHVM);
2513 /// Return true if we might ever do TCO for calls with this calling convention.
2514 static bool mayTailCallThisCC(CallingConv::ID CC) {
2516 // C calling conventions:
2517 case CallingConv::C:
2518 case CallingConv::X86_64_Win64:
2519 case CallingConv::X86_64_SysV:
2520 // Callee pop conventions:
2521 case CallingConv::X86_ThisCall:
2522 case CallingConv::X86_StdCall:
2523 case CallingConv::X86_VectorCall:
2524 case CallingConv::X86_FastCall:
2527 return canGuaranteeTCO(CC);
2531 /// Return true if the function is being made into a tailcall target by
2532 /// changing its ABI.
2533 static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
2534 return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
2537 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
2539 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2540 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2544 CallingConv::ID CalleeCC = CS.getCallingConv();
2545 if (!mayTailCallThisCC(CalleeCC))
2552 X86TargetLowering::LowerMemArgument(SDValue Chain,
2553 CallingConv::ID CallConv,
2554 const SmallVectorImpl<ISD::InputArg> &Ins,
2555 SDLoc dl, SelectionDAG &DAG,
2556 const CCValAssign &VA,
2557 MachineFrameInfo *MFI,
2559 // Create the nodes corresponding to a load from this parameter slot.
2560 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2561 bool AlwaysUseMutable = shouldGuaranteeTCO(
2562 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2563 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2566 // If value is passed by pointer we have address passed instead of the value
2568 bool ExtendedInMem = VA.isExtInLoc() &&
2569 VA.getValVT().getScalarType() == MVT::i1;
2571 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
2572 ValVT = VA.getLocVT();
2574 ValVT = VA.getValVT();
2576 // Calculate SP offset of interrupt parameter, re-arrange the slot normally
2577 // taken by a return address.
2579 if (CallConv == CallingConv::X86_INTR) {
2580 const X86Subtarget& Subtarget =
2581 static_cast<const X86Subtarget&>(DAG.getSubtarget());
2582 // X86 interrupts may take one or two arguments.
2583 // On the stack there will be no return address as in regular call.
2584 // Offset of last argument need to be set to -4/-8 bytes.
2585 // Where offset of the first argument out of two, should be set to 0 bytes.
2586 Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
2589 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2590 // changed with more analysis.
2591 // In case of tail call optimization mark all arguments mutable. Since they
2592 // could be overwritten by lowering of arguments in case of a tail call.
2593 if (Flags.isByVal()) {
2594 unsigned Bytes = Flags.getByValSize();
2595 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2596 int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
2597 // Adjust SP offset of interrupt parameter.
2598 if (CallConv == CallingConv::X86_INTR) {
2599 MFI->setObjectOffset(FI, Offset);
2601 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2603 int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
2604 VA.getLocMemOffset(), isImmutable);
2605 // Adjust SP offset of interrupt parameter.
2606 if (CallConv == CallingConv::X86_INTR) {
2607 MFI->setObjectOffset(FI, Offset);
2610 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2611 SDValue Val = DAG.getLoad(
2612 ValVT, dl, Chain, FIN,
2613 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
2615 return ExtendedInMem ?
2616 DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val;
2620 // FIXME: Get this from tablegen.
2621 static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
2622 const X86Subtarget *Subtarget) {
2623 assert(Subtarget->is64Bit());
2625 if (Subtarget->isCallingConvWin64(CallConv)) {
2626 static const MCPhysReg GPR64ArgRegsWin64[] = {
2627 X86::RCX, X86::RDX, X86::R8, X86::R9
2629 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
2632 static const MCPhysReg GPR64ArgRegs64Bit[] = {
2633 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
2635 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
2638 // FIXME: Get this from tablegen.
2639 static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
2640 CallingConv::ID CallConv,
2641 const X86Subtarget *Subtarget) {
2642 assert(Subtarget->is64Bit());
2643 if (Subtarget->isCallingConvWin64(CallConv)) {
2644 // The XMM registers which might contain var arg parameters are shadowed
2645 // in their paired GPR. So we only need to save the GPR to their home
2647 // TODO: __vectorcall will change this.
2651 const Function *Fn = MF.getFunction();
2652 bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
2653 bool isSoftFloat = Subtarget->useSoftFloat();
2654 assert(!(isSoftFloat && NoImplicitFloatOps) &&
2655 "SSE register cannot be used when SSE is disabled!");
2656 if (isSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
2657 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
2661 static const MCPhysReg XMMArgRegs64Bit[] = {
2662 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2663 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2665 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
2668 SDValue X86TargetLowering::LowerFormalArguments(
2669 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2670 const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
2671 SmallVectorImpl<SDValue> &InVals) const {
2672 MachineFunction &MF = DAG.getMachineFunction();
2673 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2674 const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
2676 const Function* Fn = MF.getFunction();
2677 if (Fn->hasExternalLinkage() &&
2678 Subtarget->isTargetCygMing() &&
2679 Fn->getName() == "main")
2680 FuncInfo->setForceFramePointer(true);
2682 MachineFrameInfo *MFI = MF.getFrameInfo();
2683 bool Is64Bit = Subtarget->is64Bit();
2684 bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
2686 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2687 "Var args not supported with calling convention fastcc, ghc or hipe");
2689 if (CallConv == CallingConv::X86_INTR) {
2690 bool isLegal = Ins.size() == 1 ||
2691 (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
2692 (!Is64Bit && Ins[1].VT == MVT::i32)));
2694 report_fatal_error("X86 interrupts may take one or two arguments");
2697 // Assign locations to all of the incoming arguments.
2698 SmallVector<CCValAssign, 16> ArgLocs;
2699 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2701 // Allocate shadow area for Win64
2703 CCInfo.AllocateStack(32, 8);
2705 CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
2707 unsigned LastVal = ~0U;
2709 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2710 CCValAssign &VA = ArgLocs[i];
2711 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
2713 assert(VA.getValNo() != LastVal &&
2714 "Don't support value assigned to multiple locs yet");
2716 LastVal = VA.getValNo();
2718 if (VA.isRegLoc()) {
2719 EVT RegVT = VA.getLocVT();
2720 const TargetRegisterClass *RC;
2721 if (RegVT == MVT::i32)
2722 RC = &X86::GR32RegClass;
2723 else if (Is64Bit && RegVT == MVT::i64)
2724 RC = &X86::GR64RegClass;
2725 else if (RegVT == MVT::f32)
2726 RC = &X86::FR32RegClass;
2727 else if (RegVT == MVT::f64)
2728 RC = &X86::FR64RegClass;
2729 else if (RegVT == MVT::f128)
2730 RC = &X86::FR128RegClass;
2731 else if (RegVT.is512BitVector())
2732 RC = &X86::VR512RegClass;
2733 else if (RegVT.is256BitVector())
2734 RC = &X86::VR256RegClass;
2735 else if (RegVT.is128BitVector())
2736 RC = &X86::VR128RegClass;
2737 else if (RegVT == MVT::x86mmx)
2738 RC = &X86::VR64RegClass;
2739 else if (RegVT == MVT::i1)
2740 RC = &X86::VK1RegClass;
2741 else if (RegVT == MVT::v8i1)
2742 RC = &X86::VK8RegClass;
2743 else if (RegVT == MVT::v16i1)
2744 RC = &X86::VK16RegClass;
2745 else if (RegVT == MVT::v32i1)
2746 RC = &X86::VK32RegClass;
2747 else if (RegVT == MVT::v64i1)
2748 RC = &X86::VK64RegClass;
2750 llvm_unreachable("Unknown argument type!");
2752 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2753 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2755 // If this is an 8 or 16-bit value, it is really passed promoted to 32
2756 // bits. Insert an assert[sz]ext to capture this, then truncate to the
2758 if (VA.getLocInfo() == CCValAssign::SExt)
2759 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2760 DAG.getValueType(VA.getValVT()));
2761 else if (VA.getLocInfo() == CCValAssign::ZExt)
2762 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
2763 DAG.getValueType(VA.getValVT()));
2764 else if (VA.getLocInfo() == CCValAssign::BCvt)
2765 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
2767 if (VA.isExtInLoc()) {
2768 // Handle MMX values passed in XMM regs.
2769 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
2770 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
2772 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2775 assert(VA.isMemLoc());
2776 ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
2779 // If value is passed via pointer - do a load.
2780 if (VA.getLocInfo() == CCValAssign::Indirect)
2781 ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
2782 MachinePointerInfo(), false, false, false, 0);
2784 InVals.push_back(ArgValue);
2787 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2788 // All x86 ABIs require that for returning structs by value we copy the
2789 // sret argument into %rax/%eax (depending on ABI) for the return. Save
2790 // the argument into a virtual register so that we can access it from the
2792 if (Ins[i].Flags.isSRet()) {
2793 unsigned Reg = FuncInfo->getSRetReturnReg();
2795 MVT PtrTy = getPointerTy(DAG.getDataLayout());
2796 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
2797 FuncInfo->setSRetReturnReg(Reg);
2799 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
2800 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
2805 unsigned StackSize = CCInfo.getNextStackOffset();
2806 // Align stack specially for tail calls.
2807 if (shouldGuaranteeTCO(CallConv,
2808 MF.getTarget().Options.GuaranteedTailCallOpt))
2809 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
2811 // If the function takes variable number of arguments, make a frame index for
2812 // the start of the first vararg value... for expansion of llvm.va_start. We
2813 // can skip this if there are no va_start calls.
2814 if (MFI->hasVAStart() &&
2815 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
2816 CallConv != CallingConv::X86_ThisCall))) {
2817 FuncInfo->setVarArgsFrameIndex(
2818 MFI->CreateFixedObject(1, StackSize, true));
2821 // Figure out if XMM registers are in use.
2822 assert(!(Subtarget->useSoftFloat() &&
2823 Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
2824 "SSE register cannot be used when SSE is disabled!");
2826 // 64-bit calling conventions support varargs and register parameters, so we
2827 // have to do extra work to spill them in the prologue.
2828 if (Is64Bit && isVarArg && MFI->hasVAStart()) {
2829 // Find the first unallocated argument registers.
2830 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
2831 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
2832 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
2833 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
2834 assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
2835 "SSE register cannot be used when SSE is disabled!");
2837 // Gather all the live in physical registers.
2838 SmallVector<SDValue, 6> LiveGPRs;
2839 SmallVector<SDValue, 8> LiveXMMRegs;
2841 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
2842 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
2844 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
2846 if (!ArgXMMs.empty()) {
2847 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
2848 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
2849 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
2850 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
2851 LiveXMMRegs.push_back(
2852 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
2857 // Get to the caller-allocated home save location. Add 8 to account
2858 // for the return address.
2859 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
2860 FuncInfo->setRegSaveFrameIndex(
2861 MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
2862 // Fixup to set vararg frame on shadow area (4 x i64).
2864 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
2866 // For X86-64, if there are vararg parameters that are passed via
2867 // registers, then we must store them to their spots on the stack so
2868 // they may be loaded by deferencing the result of va_next.
2869 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
2870 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
2871 FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
2872 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
2875 // Store the integer parameter registers.
2876 SmallVector<SDValue, 8> MemOps;
2877 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
2878 getPointerTy(DAG.getDataLayout()));
2879 unsigned Offset = FuncInfo->getVarArgsGPOffset();
2880 for (SDValue Val : LiveGPRs) {
2881 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2882 RSFIN, DAG.getIntPtrConstant(Offset, dl));
2884 DAG.getStore(Val.getValue(1), dl, Val, FIN,
2885 MachinePointerInfo::getFixedStack(
2886 DAG.getMachineFunction(),
2887 FuncInfo->getRegSaveFrameIndex(), Offset),
2889 MemOps.push_back(Store);
2893 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
2894 // Now store the XMM (fp + vector) parameter registers.
2895 SmallVector<SDValue, 12> SaveXMMOps;
2896 SaveXMMOps.push_back(Chain);
2897 SaveXMMOps.push_back(ALVal);
2898 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2899 FuncInfo->getRegSaveFrameIndex(), dl));
2900 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2901 FuncInfo->getVarArgsFPOffset(), dl));
2902 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
2904 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
2905 MVT::Other, SaveXMMOps));
2908 if (!MemOps.empty())
2909 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2912 if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
2913 // Find the largest legal vector type.
2914 MVT VecVT = MVT::Other;
2915 // FIXME: Only some x86_32 calling conventions support AVX512.
2916 if (Subtarget->hasAVX512() &&
2917 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
2918 CallConv == CallingConv::Intel_OCL_BI)))
2919 VecVT = MVT::v16f32;
2920 else if (Subtarget->hasAVX())
2922 else if (Subtarget->hasSSE2())
2925 // We forward some GPRs and some vector types.
2926 SmallVector<MVT, 2> RegParmTypes;
2927 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
2928 RegParmTypes.push_back(IntVT);
2929 if (VecVT != MVT::Other)
2930 RegParmTypes.push_back(VecVT);
2932 // Compute the set of forwarded registers. The rest are scratch.
2933 SmallVectorImpl<ForwardedRegister> &Forwards =
2934 FuncInfo->getForwardedMustTailRegParms();
2935 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
2937 // Conservatively forward AL on x86_64, since it might be used for varargs.
2938 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
2939 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
2940 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
2943 // Copy all forwards from physical to virtual registers.
2944 for (ForwardedRegister &F : Forwards) {
2945 // FIXME: Can we use a less constrained schedule?
2946 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2947 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
2948 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
2952 // Some CCs need callee pop.
2953 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2954 MF.getTarget().Options.GuaranteedTailCallOpt)) {
2955 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
2956 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
2957 // X86 interrupts must pop the error code if present
2958 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 8 : 4);
2960 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
2961 // If this is an sret function, the return should pop the hidden pointer.
2962 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
2963 !Subtarget->getTargetTriple().isOSMSVCRT() &&
2964 argsAreStructReturn(Ins, Subtarget->isTargetMCU()) == StackStructReturn)
2965 FuncInfo->setBytesToPopOnReturn(4);
2969 // RegSaveFrameIndex is X86-64 only.
2970 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
2971 if (CallConv == CallingConv::X86_FastCall ||
2972 CallConv == CallingConv::X86_ThisCall)
2973 // fastcc functions can't have varargs.
2974 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
2977 FuncInfo->setArgumentStackSize(StackSize);
2979 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
2980 EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
2981 if (Personality == EHPersonality::CoreCLR) {
2983 // TODO: Add a mechanism to frame lowering that will allow us to indicate
2984 // that we'd prefer this slot be allocated towards the bottom of the frame
2985 // (i.e. near the stack pointer after allocating the frame). Every
2986 // funclet needs a copy of this slot in its (mostly empty) frame, and the
2987 // offset from the bottom of this and each funclet's frame must be the
2988 // same, so the size of funclets' (mostly empty) frames is dictated by
2989 // how far this slot is from the bottom (since they allocate just enough
2990 // space to accomodate holding this slot at the correct offset).
2991 int PSPSymFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
2992 EHInfo->PSPSymFrameIdx = PSPSymFI;
3000 X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
3001 SDValue StackPtr, SDValue Arg,
3002 SDLoc dl, SelectionDAG &DAG,
3003 const CCValAssign &VA,
3004 ISD::ArgFlagsTy Flags) const {
3005 unsigned LocMemOffset = VA.getLocMemOffset();
3006 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3007 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3009 if (Flags.isByVal())
3010 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3012 return DAG.getStore(
3013 Chain, dl, Arg, PtrOff,
3014 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
3018 /// Emit a load of return address if tail call
3019 /// optimization is performed and it is required.
3021 X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
3022 SDValue &OutRetAddr, SDValue Chain,
3023 bool IsTailCall, bool Is64Bit,
3024 int FPDiff, SDLoc dl) const {
3025 // Adjust the Return address stack slot.
3026 EVT VT = getPointerTy(DAG.getDataLayout());
3027 OutRetAddr = getReturnAddressFrameIndex(DAG);
3029 // Load the "old" Return address.
3030 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
3031 false, false, false, 0);
3032 return SDValue(OutRetAddr.getNode(), 1);
3035 /// Emit a store of the return address if tail call
3036 /// optimization is performed and it is required (FPDiff!=0).
3037 static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3038 SDValue Chain, SDValue RetAddrFrIdx,
3039 EVT PtrVT, unsigned SlotSize,
3040 int FPDiff, SDLoc dl) {
3041 // Store the return address to the appropriate stack slot.
3042 if (!FPDiff) return Chain;
3043 // Calculate the new stack slot for the return address.
3044 int NewReturnAddrFI =
3045 MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3047 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3048 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3049 MachinePointerInfo::getFixedStack(
3050 DAG.getMachineFunction(), NewReturnAddrFI),
3055 /// Returns a vector_shuffle mask for an movs{s|d}, movd
3056 /// operation of specified width.
3057 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
3059 unsigned NumElems = VT.getVectorNumElements();
3060 SmallVector<int, 8> Mask;
3061 Mask.push_back(NumElems);
3062 for (unsigned i = 1; i != NumElems; ++i)
3064 return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
3068 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3069 SmallVectorImpl<SDValue> &InVals) const {
3070 SelectionDAG &DAG = CLI.DAG;
3072 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3073 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3074 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3075 SDValue Chain = CLI.Chain;
3076 SDValue Callee = CLI.Callee;
3077 CallingConv::ID CallConv = CLI.CallConv;
3078 bool &isTailCall = CLI.IsTailCall;
3079 bool isVarArg = CLI.IsVarArg;
3081 MachineFunction &MF = DAG.getMachineFunction();
3082 bool Is64Bit = Subtarget->is64Bit();
3083 bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
3084 StructReturnType SR = callIsStructReturn(Outs, Subtarget->isTargetMCU());
3085 bool IsSibcall = false;
3086 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3087 auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
3089 if (CallConv == CallingConv::X86_INTR)
3090 report_fatal_error("X86 interrupts may not be called directly");
3092 if (Attr.getValueAsString() == "true")
3095 if (Subtarget->isPICStyleGOT() &&
3096 !MF.getTarget().Options.GuaranteedTailCallOpt) {
3097 // If we are using a GOT, disable tail calls to external symbols with
3098 // default visibility. Tail calling such a symbol requires using a GOT
3099 // relocation, which forces early binding of the symbol. This breaks code
3100 // that require lazy function symbol resolution. Using musttail or
3101 // GuaranteedTailCallOpt will override this.
3102 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3103 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3104 G->getGlobal()->hasDefaultVisibility()))
3108 bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
3110 // Force this to be a tail call. The verifier rules are enough to ensure
3111 // that we can lower this successfully without moving the return address
3114 } else if (isTailCall) {
3115 // Check if it's really possible to do a tail call.
3116 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3117 isVarArg, SR != NotStructReturn,
3118 MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
3119 Outs, OutVals, Ins, DAG);
3121 // Sibcalls are automatically detected tailcalls which do not require
3123 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
3130 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
3131 "Var args not supported with calling convention fastcc, ghc or hipe");
3133 // Analyze operands of the call, assigning locations to each operand.
3134 SmallVector<CCValAssign, 16> ArgLocs;
3135 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3137 // Allocate shadow area for Win64
3139 CCInfo.AllocateStack(32, 8);
3141 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
3143 // Get a count of how many bytes are to be pushed on the stack.
3144 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3146 // This is a sibcall. The memory operands are available in caller's
3147 // own caller's stack.
3149 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
3150 canGuaranteeTCO(CallConv))
3151 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3154 if (isTailCall && !IsSibcall && !IsMustTail) {
3155 // Lower arguments at fp - stackoffset + fpdiff.
3156 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3158 FPDiff = NumBytesCallerPushed - NumBytes;
3160 // Set the delta of movement of the returnaddr stackslot.
3161 // But only set if delta is greater than previous delta.
3162 if (FPDiff < X86Info->getTCReturnAddrDelta())
3163 X86Info->setTCReturnAddrDelta(FPDiff);
3166 unsigned NumBytesToPush = NumBytes;
3167 unsigned NumBytesToPop = NumBytes;
3169 // If we have an inalloca argument, all stack space has already been allocated
3170 // for us and be right at the top of the stack. We don't support multiple
3171 // arguments passed in memory when using inalloca.
3172 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3174 if (!ArgLocs.back().isMemLoc())
3175 report_fatal_error("cannot use inalloca attribute on a register "
3177 if (ArgLocs.back().getLocMemOffset() != 0)
3178 report_fatal_error("any parameter with the inalloca attribute must be "
3179 "the only memory argument");
3183 Chain = DAG.getCALLSEQ_START(
3184 Chain, DAG.getIntPtrConstant(NumBytesToPush, dl, true), dl);
3186 SDValue RetAddrFrIdx;
3187 // Load return address for tail calls.
3188 if (isTailCall && FPDiff)
3189 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3190 Is64Bit, FPDiff, dl);
3192 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3193 SmallVector<SDValue, 8> MemOpChains;
3196 // Walk the register/memloc assignments, inserting copies/loads. In the case
3197 // of tail call optimization arguments are handle later.
3198 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3199 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3200 // Skip inalloca arguments, they have already been written.
3201 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3202 if (Flags.isInAlloca())
3205 CCValAssign &VA = ArgLocs[i];
3206 EVT RegVT = VA.getLocVT();
3207 SDValue Arg = OutVals[i];
3208 bool isByVal = Flags.isByVal();
3210 // Promote the value if needed.
3211 switch (VA.getLocInfo()) {
3212 default: llvm_unreachable("Unknown loc info!");
3213 case CCValAssign::Full: break;
3214 case CCValAssign::SExt:
3215 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3217 case CCValAssign::ZExt:
3218 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3220 case CCValAssign::AExt:
3221 if (Arg.getValueType().isVector() &&
3222 Arg.getValueType().getVectorElementType() == MVT::i1)
3223 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3224 else if (RegVT.is128BitVector()) {
3225 // Special case: passing MMX values in XMM registers.
3226 Arg = DAG.getBitcast(MVT::i64, Arg);
3227 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3228 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3230 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3232 case CCValAssign::BCvt:
3233 Arg = DAG.getBitcast(RegVT, Arg);
3235 case CCValAssign::Indirect: {
3236 // Store the argument.
3237 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3238 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3239 Chain = DAG.getStore(
3240 Chain, dl, Arg, SpillSlot,
3241 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3248 if (VA.isRegLoc()) {
3249 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3250 if (isVarArg && IsWin64) {
3251 // Win64 ABI requires argument XMM reg to be copied to the corresponding
3252 // shadow reg if callee is a varargs function.
3253 unsigned ShadowReg = 0;
3254 switch (VA.getLocReg()) {
3255 case X86::XMM0: ShadowReg = X86::RCX; break;
3256 case X86::XMM1: ShadowReg = X86::RDX; break;
3257 case X86::XMM2: ShadowReg = X86::R8; break;
3258 case X86::XMM3: ShadowReg = X86::R9; break;
3261 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3263 } else if (!IsSibcall && (!isTailCall || isByVal)) {
3264 assert(VA.isMemLoc());
3265 if (!StackPtr.getNode())
3266 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3267 getPointerTy(DAG.getDataLayout()));
3268 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3269 dl, DAG, VA, Flags));
3273 if (!MemOpChains.empty())
3274 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOp