1 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the interfaces that X86 uses to lower LLVM code into a
13 //===----------------------------------------------------------------------===//
15 #include "X86ISelLowering.h"
16 #include "Utils/X86ShuffleDecode.h"
17 #include "X86CallingConv.h"
18 #include "X86FrameLowering.h"
19 #include "X86InstrBuilder.h"
20 #include "X86MachineFunctionInfo.h"
21 #include "X86ShuffleDecodeConstantPool.h"
22 #include "X86TargetMachine.h"
23 #include "X86TargetObjectFile.h"
24 #include "llvm/ADT/SmallBitVector.h"
25 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/ADT/StringExtras.h"
28 #include "llvm/ADT/StringSwitch.h"
29 #include "llvm/Analysis/EHPersonalities.h"
30 #include "llvm/CodeGen/IntrinsicLowering.h"
31 #include "llvm/CodeGen/MachineFrameInfo.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineInstrBuilder.h"
34 #include "llvm/CodeGen/MachineJumpTableInfo.h"
35 #include "llvm/CodeGen/MachineModuleInfo.h"
36 #include "llvm/CodeGen/MachineRegisterInfo.h"
37 #include "llvm/CodeGen/WinEHFuncInfo.h"
38 #include "llvm/IR/CallSite.h"
39 #include "llvm/IR/CallingConv.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DerivedTypes.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/GlobalAlias.h"
44 #include "llvm/IR/GlobalVariable.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/IR/Intrinsics.h"
47 #include "llvm/MC/MCAsmInfo.h"
48 #include "llvm/MC/MCContext.h"
49 #include "llvm/MC/MCExpr.h"
50 #include "llvm/MC/MCSymbol.h"
51 #include "llvm/Support/CommandLine.h"
52 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/ErrorHandling.h"
54 #include "llvm/Support/MathExtras.h"
55 #include "llvm/Target/TargetOptions.h"
56 #include "X86IntrinsicsInfo.h"
62 #define DEBUG_TYPE "x86-isel"
64 STATISTIC(NumTailCalls, "Number of tail calls");
66 static cl::opt<bool> ExperimentalVectorWideningLegalization(
67 "x86-experimental-vector-widening-legalization", cl::init(false),
68 cl::desc("Enable an experimental vector type legalization through widening "
69 "rather than promotion."),
72 X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
73 const X86Subtarget &STI)
74 : TargetLowering(TM), Subtarget(&STI) {
75 X86ScalarSSEf64 = Subtarget->hasSSE2();
76 X86ScalarSSEf32 = Subtarget->hasSSE1();
77 MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
79 // Set up the TargetLowering object.
81 // X86 is weird. It always uses i8 for shift amounts and setcc results.
82 setBooleanContents(ZeroOrOneBooleanContent);
83 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
84 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
86 // For 64-bit, since we have so many registers, use the ILP scheduler.
87 // For 32-bit, use the register pressure specific scheduling.
88 // For Atom, always use ILP scheduling.
89 if (Subtarget->isAtom())
90 setSchedulingPreference(Sched::ILP);
91 else if (Subtarget->is64Bit())
92 setSchedulingPreference(Sched::ILP);
94 setSchedulingPreference(Sched::RegPressure);
95 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
96 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
98 // Bypass expensive divides on Atom when compiling with O2.
99 if (TM.getOptLevel() >= CodeGenOpt::Default) {
100 if (Subtarget->hasSlowDivide32())
101 addBypassSlowDiv(32, 8);
102 if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
103 addBypassSlowDiv(64, 16);
106 if (Subtarget->isTargetKnownWindowsMSVC()) {
107 // Setup Windows compiler runtime calls.
108 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
109 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
110 setLibcallName(RTLIB::SREM_I64, "_allrem");
111 setLibcallName(RTLIB::UREM_I64, "_aullrem");
112 setLibcallName(RTLIB::MUL_I64, "_allmul");
113 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
114 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
115 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
116 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
117 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
120 if (Subtarget->isTargetDarwin()) {
121 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
122 setUseUnderscoreSetJmp(false);
123 setUseUnderscoreLongJmp(false);
124 } else if (Subtarget->isTargetWindowsGNU()) {
125 // MS runtime is weird: it exports _setjmp, but longjmp!
126 setUseUnderscoreSetJmp(true);
127 setUseUnderscoreLongJmp(false);
129 setUseUnderscoreSetJmp(true);
130 setUseUnderscoreLongJmp(true);
133 // Set up the register classes.
134 addRegisterClass(MVT::i8, &X86::GR8RegClass);
135 addRegisterClass(MVT::i16, &X86::GR16RegClass);
136 addRegisterClass(MVT::i32, &X86::GR32RegClass);
137 if (Subtarget->is64Bit())
138 addRegisterClass(MVT::i64, &X86::GR64RegClass);
140 for (MVT VT : MVT::integer_valuetypes())
141 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
143 // We don't accept any truncstore of integer registers.
144 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
145 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
146 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
147 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
148 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
149 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
151 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
153 // SETOEQ and SETUNE require checking two conditions.
154 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
155 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
156 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
157 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
158 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
159 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
161 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
163 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
164 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
165 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
167 if (Subtarget->is64Bit()) {
168 if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512())
169 // f32/f64 are legal, f80 is custom.
170 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
172 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
173 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
174 } else if (!Subtarget->useSoftFloat()) {
175 // We have an algorithm for SSE2->double, and we turn this into a
176 // 64-bit FILD followed by conditional FADD for other targets.
177 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
178 // We have an algorithm for SSE2, and we turn this into a 64-bit
179 // FILD or VCVTUSI2SS/SD for other targets.
180 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
183 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
185 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
186 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
188 if (!Subtarget->useSoftFloat()) {
189 // SSE has no i16 to fp conversion, only i32
190 if (X86ScalarSSEf32) {
191 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
192 // f32 and f64 cases are Legal, f80 case is not
193 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
195 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
196 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
199 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
200 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
203 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
205 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
206 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
208 if (!Subtarget->useSoftFloat()) {
209 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
210 // are Legal, f80 is custom lowered.
211 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
212 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
214 if (X86ScalarSSEf32) {
215 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
216 // f32 and f64 cases are Legal, f80 case is not
217 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
219 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
220 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
223 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
224 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
225 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
228 // Handle FP_TO_UINT by promoting the destination to a larger signed
230 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
231 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
232 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
234 if (Subtarget->is64Bit()) {
235 if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
236 // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
237 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
238 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
240 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
241 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
243 } else if (!Subtarget->useSoftFloat()) {
244 // Since AVX is a superset of SSE3, only check for SSE here.
245 if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
246 // Expand FP_TO_UINT into a select.
247 // FIXME: We would like to use a Custom expander here eventually to do
248 // the optimal thing for SSE vs. the default expansion in the legalizer.
249 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
251 // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
252 // With SSE3 we can use fisttpll to convert to a signed i64; without
253 // SSE, we're stuck with a fistpll.
254 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
256 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
259 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
260 if (!X86ScalarSSEf64) {
261 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
262 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
263 if (Subtarget->is64Bit()) {
264 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
265 // Without SSE, i64->f64 goes through memory.
266 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
270 // Scalar integer divide and remainder are lowered to use operations that
271 // produce two results, to match the available instructions. This exposes
272 // the two-result form to trivial CSE, which is able to combine x/y and x%y
273 // into a single instruction.
275 // Scalar integer multiply-high is also lowered to use two-result
276 // operations, to match the available instructions. However, plain multiply
277 // (low) operations are left as Legal, as there are single-result
278 // instructions for this in x86. Using the two-result multiply instructions
279 // when both high and low results are needed must be arranged by dagcombine.
280 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
281 setOperationAction(ISD::MULHS, VT, Expand);
282 setOperationAction(ISD::MULHU, VT, Expand);
283 setOperationAction(ISD::SDIV, VT, Expand);
284 setOperationAction(ISD::UDIV, VT, Expand);
285 setOperationAction(ISD::SREM, VT, Expand);
286 setOperationAction(ISD::UREM, VT, Expand);
288 // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
289 setOperationAction(ISD::ADDC, VT, Custom);
290 setOperationAction(ISD::ADDE, VT, Custom);
291 setOperationAction(ISD::SUBC, VT, Custom);
292 setOperationAction(ISD::SUBE, VT, Custom);
295 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
296 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
297 setOperationAction(ISD::BR_CC , MVT::f32, Expand);
298 setOperationAction(ISD::BR_CC , MVT::f64, Expand);
299 setOperationAction(ISD::BR_CC , MVT::f80, Expand);
300 setOperationAction(ISD::BR_CC , MVT::f128, Expand);
301 setOperationAction(ISD::BR_CC , MVT::i8, Expand);
302 setOperationAction(ISD::BR_CC , MVT::i16, Expand);
303 setOperationAction(ISD::BR_CC , MVT::i32, Expand);
304 setOperationAction(ISD::BR_CC , MVT::i64, Expand);
305 setOperationAction(ISD::SELECT_CC , MVT::f32, Expand);
306 setOperationAction(ISD::SELECT_CC , MVT::f64, Expand);
307 setOperationAction(ISD::SELECT_CC , MVT::f80, Expand);
308 setOperationAction(ISD::SELECT_CC , MVT::f128, Expand);
309 setOperationAction(ISD::SELECT_CC , MVT::i8, Expand);
310 setOperationAction(ISD::SELECT_CC , MVT::i16, Expand);
311 setOperationAction(ISD::SELECT_CC , MVT::i32, Expand);
312 setOperationAction(ISD::SELECT_CC , MVT::i64, Expand);
313 if (Subtarget->is64Bit())
314 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
315 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
316 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
317 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
318 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
320 if (Subtarget->is32Bit() && Subtarget->isTargetKnownWindowsMSVC()) {
321 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
322 // is. We should promote the value to 64-bits to solve this.
323 // This is what the CRT headers do - `fmodf` is an inline header
324 // function casting to f64 and calling `fmod`.
325 setOperationAction(ISD::FREM , MVT::f32 , Promote);
327 setOperationAction(ISD::FREM , MVT::f32 , Expand);
330 setOperationAction(ISD::FREM , MVT::f64 , Expand);
331 setOperationAction(ISD::FREM , MVT::f80 , Expand);
332 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
334 // Promote the i8 variants and force them on up to i32 which has a shorter
336 setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
337 AddPromotedToType (ISD::CTTZ , MVT::i8 , MVT::i32);
338 setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Promote);
339 AddPromotedToType (ISD::CTTZ_ZERO_UNDEF , MVT::i8 , MVT::i32);
340 if (Subtarget->hasBMI()) {
341 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Expand);
342 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Expand);
343 if (Subtarget->is64Bit())
344 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
346 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
347 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
348 if (Subtarget->is64Bit())
349 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
352 if (Subtarget->hasLZCNT()) {
353 // When promoting the i8 variants, force them to i32 for a shorter
355 setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
356 AddPromotedToType (ISD::CTLZ , MVT::i8 , MVT::i32);
357 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Promote);
358 AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
359 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand);
360 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand);
361 if (Subtarget->is64Bit())
362 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
364 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
365 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
366 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
367 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
368 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
369 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
370 if (Subtarget->is64Bit()) {
371 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
372 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
376 // Special handling for half-precision floating point conversions.
377 // If we don't have F16C support, then lower half float conversions
378 // into library calls.
379 if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) {
380 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
381 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
384 // There's never any support for operations beyond MVT::f32.
385 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
386 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
387 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
388 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
390 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
391 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
392 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
393 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
394 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
395 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
397 if (Subtarget->hasPOPCNT()) {
398 setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
400 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
401 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
402 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
403 if (Subtarget->is64Bit())
404 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
407 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
409 if (!Subtarget->hasMOVBE())
410 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
412 // These should be promoted to a larger select which is supported.
413 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
414 // X86 wants to expand cmov itself.
415 setOperationAction(ISD::SELECT , MVT::i8 , Custom);
416 setOperationAction(ISD::SELECT , MVT::i16 , Custom);
417 setOperationAction(ISD::SELECT , MVT::i32 , Custom);
418 setOperationAction(ISD::SELECT , MVT::f32 , Custom);
419 setOperationAction(ISD::SELECT , MVT::f64 , Custom);
420 setOperationAction(ISD::SELECT , MVT::f80 , Custom);
421 setOperationAction(ISD::SELECT , MVT::f128 , Custom);
422 setOperationAction(ISD::SETCC , MVT::i8 , Custom);
423 setOperationAction(ISD::SETCC , MVT::i16 , Custom);
424 setOperationAction(ISD::SETCC , MVT::i32 , Custom);
425 setOperationAction(ISD::SETCC , MVT::f32 , Custom);
426 setOperationAction(ISD::SETCC , MVT::f64 , Custom);
427 setOperationAction(ISD::SETCC , MVT::f80 , Custom);
428 setOperationAction(ISD::SETCC , MVT::f128 , Custom);
429 setOperationAction(ISD::SETCCE , MVT::i8 , Custom);
430 setOperationAction(ISD::SETCCE , MVT::i16 , Custom);
431 setOperationAction(ISD::SETCCE , MVT::i32 , Custom);
432 if (Subtarget->is64Bit()) {
433 setOperationAction(ISD::SELECT , MVT::i64 , Custom);
434 setOperationAction(ISD::SETCC , MVT::i64 , Custom);
435 setOperationAction(ISD::SETCCE , MVT::i64 , Custom);
437 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
438 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
439 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
440 // support continuation, user-level threading, and etc.. As a result, no
441 // other SjLj exception interfaces are implemented and please don't build
442 // your own exception handling based on them.
443 // LLVM/Clang supports zero-cost DWARF exception handling.
444 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
445 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
448 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
449 setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
450 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
451 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
452 if (Subtarget->is64Bit())
453 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
454 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
455 setOperationAction(ISD::BlockAddress , MVT::i32 , Custom);
456 if (Subtarget->is64Bit()) {
457 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
458 setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
459 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
460 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
461 setOperationAction(ISD::BlockAddress , MVT::i64 , Custom);
463 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
464 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
465 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
466 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
467 if (Subtarget->is64Bit()) {
468 setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom);
469 setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
470 setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
473 if (Subtarget->hasSSE1())
474 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
476 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
478 // Expand certain atomics
479 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
480 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
481 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
482 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
485 if (Subtarget->hasCmpxchg16b()) {
486 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
489 // FIXME - use subtarget debug flags
490 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
491 !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
492 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
495 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
496 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
498 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
499 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
501 setOperationAction(ISD::TRAP, MVT::Other, Legal);
502 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
504 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
505 setOperationAction(ISD::VASTART , MVT::Other, Custom);
506 setOperationAction(ISD::VAEND , MVT::Other, Expand);
507 if (Subtarget->is64Bit()) {
508 setOperationAction(ISD::VAARG , MVT::Other, Custom);
509 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
511 // TargetInfo::CharPtrBuiltinVaList
512 setOperationAction(ISD::VAARG , MVT::Other, Expand);
513 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
516 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
517 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
519 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
521 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
522 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
523 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
525 if (!Subtarget->useSoftFloat() && X86ScalarSSEf64) {
526 // f32 and f64 use SSE.
527 // Set up the FP register classes.
528 addRegisterClass(MVT::f32, &X86::FR32RegClass);
529 addRegisterClass(MVT::f64, &X86::FR64RegClass);
531 // Use ANDPD to simulate FABS.
532 setOperationAction(ISD::FABS , MVT::f64, Custom);
533 setOperationAction(ISD::FABS , MVT::f32, Custom);
535 // Use XORP to simulate FNEG.
536 setOperationAction(ISD::FNEG , MVT::f64, Custom);
537 setOperationAction(ISD::FNEG , MVT::f32, Custom);
539 // Use ANDPD and ORPD to simulate FCOPYSIGN.
540 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
541 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
543 // Lower this to FGETSIGNx86 plus an AND.
544 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
545 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
547 // We don't support sin/cos/fmod
548 setOperationAction(ISD::FSIN , MVT::f64, Expand);
549 setOperationAction(ISD::FCOS , MVT::f64, Expand);
550 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
551 setOperationAction(ISD::FSIN , MVT::f32, Expand);
552 setOperationAction(ISD::FCOS , MVT::f32, Expand);
553 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
555 // Expand FP immediates into loads from the stack, except for the special
557 addLegalFPImmediate(APFloat(+0.0)); // xorpd
558 addLegalFPImmediate(APFloat(+0.0f)); // xorps
559 } else if (!Subtarget->useSoftFloat() && X86ScalarSSEf32) {
560 // Use SSE for f32, x87 for f64.
561 // Set up the FP register classes.
562 addRegisterClass(MVT::f32, &X86::FR32RegClass);
563 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
565 // Use ANDPS to simulate FABS.
566 setOperationAction(ISD::FABS , MVT::f32, Custom);
568 // Use XORP to simulate FNEG.
569 setOperationAction(ISD::FNEG , MVT::f32, Custom);
571 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
573 // Use ANDPS and ORPS to simulate FCOPYSIGN.
574 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
575 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
577 // We don't support sin/cos/fmod
578 setOperationAction(ISD::FSIN , MVT::f32, Expand);
579 setOperationAction(ISD::FCOS , MVT::f32, Expand);
580 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
582 // Special cases we handle for FP constants.
583 addLegalFPImmediate(APFloat(+0.0f)); // xorps
584 addLegalFPImmediate(APFloat(+0.0)); // FLD0
585 addLegalFPImmediate(APFloat(+1.0)); // FLD1
586 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
587 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
589 if (!TM.Options.UnsafeFPMath) {
590 setOperationAction(ISD::FSIN , MVT::f64, Expand);
591 setOperationAction(ISD::FCOS , MVT::f64, Expand);
592 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
594 } else if (!Subtarget->useSoftFloat()) {
595 // f32 and f64 in x87.
596 // Set up the FP register classes.
597 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
598 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
600 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
601 setOperationAction(ISD::UNDEF, MVT::f32, Expand);
602 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
603 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
605 if (!TM.Options.UnsafeFPMath) {
606 setOperationAction(ISD::FSIN , MVT::f64, Expand);
607 setOperationAction(ISD::FSIN , MVT::f32, Expand);
608 setOperationAction(ISD::FCOS , MVT::f64, Expand);
609 setOperationAction(ISD::FCOS , MVT::f32, Expand);
610 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
611 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
613 addLegalFPImmediate(APFloat(+0.0)); // FLD0
614 addLegalFPImmediate(APFloat(+1.0)); // FLD1
615 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
616 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
617 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
618 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
619 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
620 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
623 // We don't support FMA.
624 setOperationAction(ISD::FMA, MVT::f64, Expand);
625 setOperationAction(ISD::FMA, MVT::f32, Expand);
627 // Long double always uses X87, except f128 in MMX.
628 if (!Subtarget->useSoftFloat()) {
629 if (Subtarget->is64Bit() && Subtarget->hasMMX()) {
630 addRegisterClass(MVT::f128, &X86::FR128RegClass);
631 ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
632 setOperationAction(ISD::FABS , MVT::f128, Custom);
633 setOperationAction(ISD::FNEG , MVT::f128, Custom);
634 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
637 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
638 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
639 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
641 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
642 addLegalFPImmediate(TmpFlt); // FLD0
644 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
647 APFloat TmpFlt2(+1.0);
648 TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
650 addLegalFPImmediate(TmpFlt2); // FLD1
651 TmpFlt2.changeSign();
652 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
655 if (!TM.Options.UnsafeFPMath) {
656 setOperationAction(ISD::FSIN , MVT::f80, Expand);
657 setOperationAction(ISD::FCOS , MVT::f80, Expand);
658 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
661 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
662 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
663 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
664 setOperationAction(ISD::FRINT, MVT::f80, Expand);
665 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
666 setOperationAction(ISD::FMA, MVT::f80, Expand);
669 // Always use a library call for pow.
670 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
671 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
672 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
674 setOperationAction(ISD::FLOG, MVT::f80, Expand);
675 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
676 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
677 setOperationAction(ISD::FEXP, MVT::f80, Expand);
678 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
679 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
680 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
682 // First set operation action for all vector types to either promote
683 // (for widening) or expand (for scalarization). Then we will selectively
684 // turn on ones that can be effectively codegen'd.
685 for (MVT VT : MVT::vector_valuetypes()) {
686 setOperationAction(ISD::ADD , VT, Expand);
687 setOperationAction(ISD::SUB , VT, Expand);
688 setOperationAction(ISD::FADD, VT, Expand);
689 setOperationAction(ISD::FNEG, VT, Expand);
690 setOperationAction(ISD::FSUB, VT, Expand);
691 setOperationAction(ISD::MUL , VT, Expand);
692 setOperationAction(ISD::FMUL, VT, Expand);
693 setOperationAction(ISD::SDIV, VT, Expand);
694 setOperationAction(ISD::UDIV, VT, Expand);
695 setOperationAction(ISD::FDIV, VT, Expand);
696 setOperationAction(ISD::SREM, VT, Expand);
697 setOperationAction(ISD::UREM, VT, Expand);
698 setOperationAction(ISD::LOAD, VT, Expand);
699 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
700 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
701 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
702 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
703 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
704 setOperationAction(ISD::FABS, VT, Expand);
705 setOperationAction(ISD::FSIN, VT, Expand);
706 setOperationAction(ISD::FSINCOS, VT, Expand);
707 setOperationAction(ISD::FCOS, VT, Expand);
708 setOperationAction(ISD::FSINCOS, VT, Expand);
709 setOperationAction(ISD::FREM, VT, Expand);
710 setOperationAction(ISD::FMA, VT, Expand);
711 setOperationAction(ISD::FPOWI, VT, Expand);
712 setOperationAction(ISD::FSQRT, VT, Expand);
713 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
714 setOperationAction(ISD::FFLOOR, VT, Expand);
715 setOperationAction(ISD::FCEIL, VT, Expand);
716 setOperationAction(ISD::FTRUNC, VT, Expand);
717 setOperationAction(ISD::FRINT, VT, Expand);
718 setOperationAction(ISD::FNEARBYINT, VT, Expand);
719 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
720 setOperationAction(ISD::MULHS, VT, Expand);
721 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
722 setOperationAction(ISD::MULHU, VT, Expand);
723 setOperationAction(ISD::SDIVREM, VT, Expand);
724 setOperationAction(ISD::UDIVREM, VT, Expand);
725 setOperationAction(ISD::FPOW, VT, Expand);
726 setOperationAction(ISD::CTPOP, VT, Expand);
727 setOperationAction(ISD::CTTZ, VT, Expand);
728 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
729 setOperationAction(ISD::CTLZ, VT, Expand);
730 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
731 setOperationAction(ISD::SHL, VT, Expand);
732 setOperationAction(ISD::SRA, VT, Expand);
733 setOperationAction(ISD::SRL, VT, Expand);
734 setOperationAction(ISD::ROTL, VT, Expand);
735 setOperationAction(ISD::ROTR, VT, Expand);
736 setOperationAction(ISD::BSWAP, VT, Expand);
737 setOperationAction(ISD::SETCC, VT, Expand);
738 setOperationAction(ISD::FLOG, VT, Expand);
739 setOperationAction(ISD::FLOG2, VT, Expand);
740 setOperationAction(ISD::FLOG10, VT, Expand);
741 setOperationAction(ISD::FEXP, VT, Expand);
742 setOperationAction(ISD::FEXP2, VT, Expand);
743 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
744 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
745 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
746 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
747 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
748 setOperationAction(ISD::TRUNCATE, VT, Expand);
749 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
750 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
751 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
752 setOperationAction(ISD::VSELECT, VT, Expand);
753 setOperationAction(ISD::SELECT_CC, VT, Expand);
754 for (MVT InnerVT : MVT::vector_valuetypes()) {
755 setTruncStoreAction(InnerVT, VT, Expand);
757 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
758 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
760 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
761 // types, we have to deal with them whether we ask for Expansion or not.
762 // Setting Expand causes its own optimisation problems though, so leave
764 if (VT.getVectorElementType() == MVT::i1)
765 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
767 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
768 // split/scalarized right now.
769 if (VT.getVectorElementType() == MVT::f16)
770 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
774 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
775 // with -msoft-float, disable use of MMX as well.
776 if (!Subtarget->useSoftFloat() && Subtarget->hasMMX()) {
777 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
778 // No operations on x86mmx supported, everything uses intrinsics.
781 // MMX-sized vectors (other than x86mmx) are expected to be expanded
782 // into smaller operations.
783 for (MVT MMXTy : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64}) {
784 setOperationAction(ISD::MULHS, MMXTy, Expand);
785 setOperationAction(ISD::AND, MMXTy, Expand);
786 setOperationAction(ISD::OR, MMXTy, Expand);
787 setOperationAction(ISD::XOR, MMXTy, Expand);
788 setOperationAction(ISD::SCALAR_TO_VECTOR, MMXTy, Expand);
789 setOperationAction(ISD::SELECT, MMXTy, Expand);
790 setOperationAction(ISD::BITCAST, MMXTy, Expand);
792 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
794 if (!Subtarget->useSoftFloat() && Subtarget->hasSSE1()) {
795 addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
797 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
798 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
799 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
800 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
801 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
802 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
803 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
804 setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
805 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
806 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
807 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
808 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
809 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
810 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
813 if (!Subtarget->useSoftFloat() && Subtarget->hasSSE2()) {
814 addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
816 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
817 // registers cannot be used even for integer operations.
818 addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
819 addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
820 addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
821 addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
823 setOperationAction(ISD::ADD, MVT::v16i8, Legal);
824 setOperationAction(ISD::ADD, MVT::v8i16, Legal);
825 setOperationAction(ISD::ADD, MVT::v4i32, Legal);
826 setOperationAction(ISD::ADD, MVT::v2i64, Legal);
827 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
828 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
829 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
830 setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
831 setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
832 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
833 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
834 setOperationAction(ISD::SUB, MVT::v16i8, Legal);
835 setOperationAction(ISD::SUB, MVT::v8i16, Legal);
836 setOperationAction(ISD::SUB, MVT::v4i32, Legal);
837 setOperationAction(ISD::SUB, MVT::v2i64, Legal);
838 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
839 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
840 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
841 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
842 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
843 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
844 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
845 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
847 setOperationAction(ISD::SMAX, MVT::v8i16, Legal);
848 setOperationAction(ISD::UMAX, MVT::v16i8, Legal);
849 setOperationAction(ISD::SMIN, MVT::v8i16, Legal);
850 setOperationAction(ISD::UMIN, MVT::v16i8, Legal);
852 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
853 setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
854 setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
855 setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
857 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
858 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
859 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
860 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
861 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
863 setOperationAction(ISD::CTPOP, MVT::v16i8, Custom);
864 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
865 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
866 setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
868 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
869 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
870 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
871 // ISD::CTTZ v2i64 - scalarization is faster.
872 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
873 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
874 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
875 // ISD::CTTZ_ZERO_UNDEF v2i64 - scalarization is faster.
877 // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
878 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
879 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
880 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
881 setOperationAction(ISD::VSELECT, VT, Custom);
882 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
885 // We support custom legalizing of sext and anyext loads for specific
886 // memory vector types which we can load as a scalar (or sequence of
887 // scalars) and extend in-register to a legal 128-bit vector type. For sext
888 // loads these must work with a single scalar load.
889 for (MVT VT : MVT::integer_vector_valuetypes()) {
890 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
891 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
892 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
893 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
894 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
895 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
896 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
897 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
898 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
901 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
902 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
903 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
904 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
905 setOperationAction(ISD::VSELECT, MVT::v2f64, Custom);
906 setOperationAction(ISD::VSELECT, MVT::v2i64, Custom);
907 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
908 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
910 if (Subtarget->is64Bit()) {
911 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
912 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
915 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
916 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
917 setOperationAction(ISD::AND, VT, Promote);
918 AddPromotedToType (ISD::AND, VT, MVT::v2i64);
919 setOperationAction(ISD::OR, VT, Promote);
920 AddPromotedToType (ISD::OR, VT, MVT::v2i64);
921 setOperationAction(ISD::XOR, VT, Promote);
922 AddPromotedToType (ISD::XOR, VT, MVT::v2i64);
923 setOperationAction(ISD::LOAD, VT, Promote);
924 AddPromotedToType (ISD::LOAD, VT, MVT::v2i64);
925 setOperationAction(ISD::SELECT, VT, Promote);
926 AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
929 // Custom lower v2i64 and v2f64 selects.
930 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
931 setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
932 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
933 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
935 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
936 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
938 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
940 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
941 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
942 // As there is no 64-bit GPR available, we need build a special custom
943 // sequence to convert from v2i32 to v2f32.
944 if (!Subtarget->is64Bit())
945 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
947 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
948 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
950 for (MVT VT : MVT::fp_vector_valuetypes())
951 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
953 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
954 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
955 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
958 if (!Subtarget->useSoftFloat() && Subtarget->hasSSE41()) {
959 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
960 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
961 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
962 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
963 setOperationAction(ISD::FRINT, RoundedTy, Legal);
964 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
967 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
968 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
969 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
970 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
971 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
972 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
973 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
974 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
976 // FIXME: Do we need to handle scalar-to-vector here?
977 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
979 // We directly match byte blends in the backend as they match the VSELECT
981 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
983 // SSE41 brings specific instructions for doing vector sign extend even in
984 // cases where we don't have SRA.
985 for (MVT VT : MVT::integer_vector_valuetypes()) {
986 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
987 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
988 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
991 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
992 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
993 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
994 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
995 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
996 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
997 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
999 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
1000 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
1001 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
1002 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
1003 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
1004 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
1006 // i8 and i16 vectors are custom because the source register and source
1007 // source memory operand types are not the same width. f32 vectors are
1008 // custom since the immediate controlling the insert encodes additional
1010 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1011 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1012 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1013 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1015 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
1016 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
1017 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
1018 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
1020 // FIXME: these should be Legal, but that's only for the case where
1021 // the index is constant. For now custom expand to deal with that.
1022 if (Subtarget->is64Bit()) {
1023 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
1024 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
1028 if (Subtarget->hasSSE2()) {
1029 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1030 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1031 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1033 setOperationAction(ISD::SRL, MVT::v8i16, Custom);
1034 setOperationAction(ISD::SRL, MVT::v16i8, Custom);
1036 setOperationAction(ISD::SHL, MVT::v8i16, Custom);
1037 setOperationAction(ISD::SHL, MVT::v16i8, Custom);
1039 setOperationAction(ISD::SRA, MVT::v8i16, Custom);
1040 setOperationAction(ISD::SRA, MVT::v16i8, Custom);
1042 // In the customized shift lowering, the legal cases in AVX2 will be
1044 setOperationAction(ISD::SRL, MVT::v2i64, Custom);
1045 setOperationAction(ISD::SRL, MVT::v4i32, Custom);
1047 setOperationAction(ISD::SHL, MVT::v2i64, Custom);
1048 setOperationAction(ISD::SHL, MVT::v4i32, Custom);
1050 setOperationAction(ISD::SRA, MVT::v2i64, Custom);
1051 setOperationAction(ISD::SRA, MVT::v4i32, Custom);
1054 if (Subtarget->hasXOP()) {
1055 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1056 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1057 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1058 setOperationAction(ISD::ROTL, MVT::v2i64, Custom);
1059 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1060 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1061 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1062 setOperationAction(ISD::ROTL, MVT::v4i64, Custom);
1065 if (!Subtarget->useSoftFloat() && Subtarget->hasFp256()) {
1066 addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
1067 addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
1068 addRegisterClass(MVT::v8i32, &X86::VR256RegClass);
1069 addRegisterClass(MVT::v8f32, &X86::VR256RegClass);
1070 addRegisterClass(MVT::v4i64, &X86::VR256RegClass);
1071 addRegisterClass(MVT::v4f64, &X86::VR256RegClass);
1073 setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
1074 setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
1075 setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
1077 setOperationAction(ISD::FADD, MVT::v8f32, Legal);
1078 setOperationAction(ISD::FSUB, MVT::v8f32, Legal);
1079 setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
1080 setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
1081 setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
1082 setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
1083 setOperationAction(ISD::FCEIL, MVT::v8f32, Legal);
1084 setOperationAction(ISD::FTRUNC, MVT::v8f32, Legal);
1085 setOperationAction(ISD::FRINT, MVT::v8f32, Legal);
1086 setOperationAction(ISD::FNEARBYINT, MVT::v8f32, Legal);
1087 setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
1088 setOperationAction(ISD::FABS, MVT::v8f32, Custom);
1090 setOperationAction(ISD::FADD, MVT::v4f64, Legal);
1091 setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
1092 setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
1093 setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
1094 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
1095 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
1096 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
1097 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
1098 setOperationAction(ISD::FRINT, MVT::v4f64, Legal);
1099 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Legal);
1100 setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
1101 setOperationAction(ISD::FABS, MVT::v4f64, Custom);
1103 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1104 // even though v8i16 is a legal type.
1105 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote);
1106 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote);
1107 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1109 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
1110 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1111 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1113 setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
1114 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1116 for (MVT VT : MVT::fp_vector_valuetypes())
1117 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
1119 setOperationAction(ISD::SRL, MVT::v16i16, Custom);
1120 setOperationAction(ISD::SRL, MVT::v32i8, Custom);
1122 setOperationAction(ISD::SHL, MVT::v16i16, Custom);
1123 setOperationAction(ISD::SHL, MVT::v32i8, Custom);
1125 setOperationAction(ISD::SRA, MVT::v16i16, Custom);
1126 setOperationAction(ISD::SRA, MVT::v32i8, Custom);
1128 setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
1129 setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
1130 setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
1131 setOperationAction(ISD::SETCC, MVT::v4i64, Custom);
1133 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1134 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1135 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1137 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1138 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
1139 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1140 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
1141 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
1142 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
1143 setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
1144 setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
1145 setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom);
1146 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1147 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1148 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1150 setOperationAction(ISD::CTPOP, MVT::v32i8, Custom);
1151 setOperationAction(ISD::CTPOP, MVT::v16i16, Custom);
1152 setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
1153 setOperationAction(ISD::CTPOP, MVT::v4i64, Custom);
1155 setOperationAction(ISD::CTTZ, MVT::v32i8, Custom);
1156 setOperationAction(ISD::CTTZ, MVT::v16i16, Custom);
1157 setOperationAction(ISD::CTTZ, MVT::v8i32, Custom);
1158 setOperationAction(ISD::CTTZ, MVT::v4i64, Custom);
1159 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v32i8, Custom);
1160 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i16, Custom);
1161 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
1162 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
1164 if (Subtarget->hasAnyFMA()) {
1165 setOperationAction(ISD::FMA, MVT::v8f32, Legal);
1166 setOperationAction(ISD::FMA, MVT::v4f64, Legal);
1167 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
1168 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1169 setOperationAction(ISD::FMA, MVT::f32, Legal);
1170 setOperationAction(ISD::FMA, MVT::f64, Legal);
1173 if (Subtarget->hasInt256()) {
1174 setOperationAction(ISD::ADD, MVT::v4i64, Legal);
1175 setOperationAction(ISD::ADD, MVT::v8i32, Legal);
1176 setOperationAction(ISD::ADD, MVT::v16i16, Legal);
1177 setOperationAction(ISD::ADD, MVT::v32i8, Legal);
1179 setOperationAction(ISD::SUB, MVT::v4i64, Legal);
1180 setOperationAction(ISD::SUB, MVT::v8i32, Legal);
1181 setOperationAction(ISD::SUB, MVT::v16i16, Legal);
1182 setOperationAction(ISD::SUB, MVT::v32i8, Legal);
1184 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1185 setOperationAction(ISD::MUL, MVT::v8i32, Legal);
1186 setOperationAction(ISD::MUL, MVT::v16i16, Legal);
1187 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1189 setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
1190 setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
1191 setOperationAction(ISD::MULHU, MVT::v16i16, Legal);
1192 setOperationAction(ISD::MULHS, MVT::v16i16, Legal);
1194 setOperationAction(ISD::SMAX, MVT::v32i8, Legal);
1195 setOperationAction(ISD::SMAX, MVT::v16i16, Legal);
1196 setOperationAction(ISD::SMAX, MVT::v8i32, Legal);
1197 setOperationAction(ISD::UMAX, MVT::v32i8, Legal);
1198 setOperationAction(ISD::UMAX, MVT::v16i16, Legal);
1199 setOperationAction(ISD::UMAX, MVT::v8i32, Legal);
1200 setOperationAction(ISD::SMIN, MVT::v32i8, Legal);
1201 setOperationAction(ISD::SMIN, MVT::v16i16, Legal);
1202 setOperationAction(ISD::SMIN, MVT::v8i32, Legal);
1203 setOperationAction(ISD::UMIN, MVT::v32i8, Legal);
1204 setOperationAction(ISD::UMIN, MVT::v16i16, Legal);
1205 setOperationAction(ISD::UMIN, MVT::v8i32, Legal);
1207 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1208 // when we have a 256bit-wide blend with immediate.
1209 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1211 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1212 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
1213 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
1214 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
1215 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
1216 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
1217 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
1219 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
1220 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
1221 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
1222 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
1223 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
1224 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
1226 setOperationAction(ISD::ADD, MVT::v4i64, Custom);
1227 setOperationAction(ISD::ADD, MVT::v8i32, Custom);
1228 setOperationAction(ISD::ADD, MVT::v16i16, Custom);
1229 setOperationAction(ISD::ADD, MVT::v32i8, Custom);
1231 setOperationAction(ISD::SUB, MVT::v4i64, Custom);
1232 setOperationAction(ISD::SUB, MVT::v8i32, Custom);
1233 setOperationAction(ISD::SUB, MVT::v16i16, Custom);
1234 setOperationAction(ISD::SUB, MVT::v32i8, Custom);
1236 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1237 setOperationAction(ISD::MUL, MVT::v8i32, Custom);
1238 setOperationAction(ISD::MUL, MVT::v16i16, Custom);
1239 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1241 setOperationAction(ISD::SMAX, MVT::v32i8, Custom);
1242 setOperationAction(ISD::SMAX, MVT::v16i16, Custom);
1243 setOperationAction(ISD::SMAX, MVT::v8i32, Custom);
1244 setOperationAction(ISD::UMAX, MVT::v32i8, Custom);
1245 setOperationAction(ISD::UMAX, MVT::v16i16, Custom);
1246 setOperationAction(ISD::UMAX, MVT::v8i32, Custom);
1247 setOperationAction(ISD::SMIN, MVT::v32i8, Custom);
1248 setOperationAction(ISD::SMIN, MVT::v16i16, Custom);
1249 setOperationAction(ISD::SMIN, MVT::v8i32, Custom);
1250 setOperationAction(ISD::UMIN, MVT::v32i8, Custom);
1251 setOperationAction(ISD::UMIN, MVT::v16i16, Custom);
1252 setOperationAction(ISD::UMIN, MVT::v8i32, Custom);
1255 // In the customized shift lowering, the legal cases in AVX2 will be
1257 setOperationAction(ISD::SRL, MVT::v4i64, Custom);
1258 setOperationAction(ISD::SRL, MVT::v8i32, Custom);
1260 setOperationAction(ISD::SHL, MVT::v4i64, Custom);
1261 setOperationAction(ISD::SHL, MVT::v8i32, Custom);
1263 setOperationAction(ISD::SRA, MVT::v4i64, Custom);
1264 setOperationAction(ISD::SRA, MVT::v8i32, Custom);
1266 // Custom lower several nodes for 256-bit types.
1267 for (MVT VT : MVT::vector_valuetypes()) {
1268 if (VT.getScalarSizeInBits() >= 32) {
1269 setOperationAction(ISD::MLOAD, VT, Legal);
1270 setOperationAction(ISD::MSTORE, VT, Legal);
1272 // Extract subvector is special because the value type
1273 // (result) is 128-bit but the source is 256-bit wide.
1274 if (VT.is128BitVector()) {
1275 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1277 // Do not attempt to custom lower other non-256-bit vectors
1278 if (!VT.is256BitVector())
1281 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1282 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1283 setOperationAction(ISD::VSELECT, VT, Custom);
1284 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1285 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1286 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1287 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1288 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1291 if (Subtarget->hasInt256())
1292 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1294 // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
1295 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1296 setOperationAction(ISD::AND, VT, Promote);
1297 AddPromotedToType (ISD::AND, VT, MVT::v4i64);
1298 setOperationAction(ISD::OR, VT, Promote);
1299 AddPromotedToType (ISD::OR, VT, MVT::v4i64);
1300 setOperationAction(ISD::XOR, VT, Promote);
1301 AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
1302 setOperationAction(ISD::LOAD, VT, Promote);
1303 AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
1304 setOperationAction(ISD::SELECT, VT, Promote);
1305 AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
1309 if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
1310 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1311 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1312 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1313 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1315 addRegisterClass(MVT::i1, &X86::VK1RegClass);
1316 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1317 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1319 for (MVT VT : MVT::fp_vector_valuetypes())
1320 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
1322 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
1323 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
1324 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
1325 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
1326 setLoadExtAction(ISD::ZEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
1327 setLoadExtAction(ISD::SEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
1328 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i8, Legal);
1329 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i8, Legal);
1330 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
1331 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
1332 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
1333 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
1335 setOperationAction(ISD::BR_CC, MVT::i1, Expand);
1336 setOperationAction(ISD::SETCC, MVT::i1, Custom);
1337 setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
1338 setOperationAction(ISD::XOR, MVT::i1, Legal);
1339 setOperationAction(ISD::OR, MVT::i1, Legal);
1340 setOperationAction(ISD::AND, MVT::i1, Legal);
1341 setOperationAction(ISD::SUB, MVT::i1, Custom);
1342 setOperationAction(ISD::ADD, MVT::i1, Custom);
1343 setOperationAction(ISD::MUL, MVT::i1, Custom);
1344 setOperationAction(ISD::LOAD, MVT::v16f32, Legal);
1345 setOperationAction(ISD::LOAD, MVT::v8f64, Legal);
1346 setOperationAction(ISD::LOAD, MVT::v8i64, Legal);
1347 setOperationAction(ISD::LOAD, MVT::v16i32, Legal);
1348 setOperationAction(ISD::LOAD, MVT::v16i1, Legal);
1350 setOperationAction(ISD::FADD, MVT::v16f32, Legal);
1351 setOperationAction(ISD::FSUB, MVT::v16f32, Legal);
1352 setOperationAction(ISD::FMUL, MVT::v16f32, Legal);
1353 setOperationAction(ISD::FDIV, MVT::v16f32, Legal);
1354 setOperationAction(ISD::FSQRT, MVT::v16f32, Legal);
1355 setOperationAction(ISD::FNEG, MVT::v16f32, Custom);
1356 setOperationAction(ISD::FABS, MVT::v16f32, Custom);
1358 setOperationAction(ISD::FADD, MVT::v8f64, Legal);
1359 setOperationAction(ISD::FSUB, MVT::v8f64, Legal);
1360 setOperationAction(ISD::FMUL, MVT::v8f64, Legal);
1361 setOperationAction(ISD::FDIV, MVT::v8f64, Legal);
1362 setOperationAction(ISD::FSQRT, MVT::v8f64, Legal);
1363 setOperationAction(ISD::FNEG, MVT::v8f64, Custom);
1364 setOperationAction(ISD::FABS, MVT::v8f64, Custom);
1365 setOperationAction(ISD::FMA, MVT::v8f64, Legal);
1366 setOperationAction(ISD::FMA, MVT::v16f32, Legal);
1368 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1369 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1370 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1371 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1372 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1373 setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
1374 setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
1375 setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote);
1376 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote);
1377 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1378 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1379 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1380 setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
1381 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom);
1382 setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
1383 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
1385 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1386 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1387 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1388 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1389 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1390 if (Subtarget->hasVLX()){
1391 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1392 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1393 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1394 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1395 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1397 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1398 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1399 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1400 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1401 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1403 setOperationAction(ISD::MLOAD, MVT::v8i32, Custom);
1404 setOperationAction(ISD::MLOAD, MVT::v8f32, Custom);
1405 setOperationAction(ISD::MSTORE, MVT::v8i32, Custom);
1406 setOperationAction(ISD::MSTORE, MVT::v8f32, Custom);
1408 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
1409 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1410 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1411 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom);
1412 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom);
1413 if (Subtarget->hasDQI()) {
1414 setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
1415 setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
1417 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1418 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1419 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1420 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1421 if (Subtarget->hasVLX()) {
1422 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal);
1423 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1424 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal);
1425 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1426 setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal);
1427 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1428 setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal);
1429 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1432 if (Subtarget->hasVLX()) {
1433 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1434 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1435 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1436 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1437 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1438 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1439 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
1440 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1442 setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
1443 setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
1444 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1445 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1446 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1447 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1448 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1449 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1450 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1451 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
1452 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
1453 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1454 if (Subtarget->hasDQI()) {
1455 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
1456 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);
1458 setOperationAction(ISD::FFLOOR, MVT::v16f32, Legal);
1459 setOperationAction(ISD::FFLOOR, MVT::v8f64, Legal);
1460 setOperationAction(ISD::FCEIL, MVT::v16f32, Legal);
1461 setOperationAction(ISD::FCEIL, MVT::v8f64, Legal);
1462 setOperationAction(ISD::FTRUNC, MVT::v16f32, Legal);
1463 setOperationAction(ISD::FTRUNC, MVT::v8f64, Legal);
1464 setOperationAction(ISD::FRINT, MVT::v16f32, Legal);
1465 setOperationAction(ISD::FRINT, MVT::v8f64, Legal);
1466 setOperationAction(ISD::FNEARBYINT, MVT::v16f32, Legal);
1467 setOperationAction(ISD::FNEARBYINT, MVT::v8f64, Legal);
1469 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1470 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1471 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1472 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1473 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
1475 setOperationAction(ISD::SETCC, MVT::v16i1, Custom);
1476 setOperationAction(ISD::SETCC, MVT::v8i1, Custom);
1478 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1480 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
1481 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
1482 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
1483 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom);
1484 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom);
1485 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
1486 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
1487 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1488 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1489 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1490 setOperationAction(ISD::SELECT, MVT::v16i1, Custom);
1491 setOperationAction(ISD::SELECT, MVT::v8i1, Custom);
1493 setOperationAction(ISD::SMAX, MVT::v16i32, Legal);
1494 setOperationAction(ISD::SMAX, MVT::v8i64, Legal);
1495 setOperationAction(ISD::UMAX, MVT::v16i32, Legal);
1496 setOperationAction(ISD::UMAX, MVT::v8i64, Legal);
1497 setOperationAction(ISD::SMIN, MVT::v16i32, Legal);
1498 setOperationAction(ISD::SMIN, MVT::v8i64, Legal);
1499 setOperationAction(ISD::UMIN, MVT::v16i32, Legal);
1500 setOperationAction(ISD::UMIN, MVT::v8i64, Legal);
1502 setOperationAction(ISD::ADD, MVT::v8i64, Legal);
1503 setOperationAction(ISD::ADD, MVT::v16i32, Legal);
1505 setOperationAction(ISD::SUB, MVT::v8i64, Legal);
1506 setOperationAction(ISD::SUB, MVT::v16i32, Legal);
1508 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1510 setOperationAction(ISD::SRL, MVT::v8i64, Custom);
1511 setOperationAction(ISD::SRL, MVT::v16i32, Custom);
1513 setOperationAction(ISD::SHL, MVT::v8i64, Custom);
1514 setOperationAction(ISD::SHL, MVT::v16i32, Custom);
1516 setOperationAction(ISD::SRA, MVT::v8i64, Custom);
1517 setOperationAction(ISD::SRA, MVT::v16i32, Custom);
1519 setOperationAction(ISD::AND, MVT::v8i64, Legal);
1520 setOperationAction(ISD::OR, MVT::v8i64, Legal);
1521 setOperationAction(ISD::XOR, MVT::v8i64, Legal);
1522 setOperationAction(ISD::AND, MVT::v16i32, Legal);
1523 setOperationAction(ISD::OR, MVT::v16i32, Legal);
1524 setOperationAction(ISD::XOR, MVT::v16i32, Legal);
1526 if (Subtarget->hasCDI()) {
1527 setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
1528 setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
1529 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i64, Expand);
1530 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i32, Expand);
1532 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1533 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1534 setOperationAction(ISD::CTLZ, MVT::v16i16, Custom);
1535 setOperationAction(ISD::CTLZ, MVT::v32i8, Custom);
1536 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i16, Expand);
1537 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i8, Expand);
1538 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i16, Expand);
1539 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i8, Expand);
1541 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i64, Custom);
1542 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i32, Custom);
1544 if (Subtarget->hasVLX()) {
1545 setOperationAction(ISD::CTLZ, MVT::v4i64, Legal);
1546 setOperationAction(ISD::CTLZ, MVT::v8i32, Legal);
1547 setOperationAction(ISD::CTLZ, MVT::v2i64, Legal);
1548 setOperationAction(ISD::CTLZ, MVT::v4i32, Legal);
1549 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Expand);
1550 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Expand);
1551 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Expand);
1552 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Expand);
1554 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
1555 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
1556 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
1557 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
1559 setOperationAction(ISD::CTLZ, MVT::v4i64, Custom);
1560 setOperationAction(ISD::CTLZ, MVT::v8i32, Custom);
1561 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1562 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1563 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Expand);
1564 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Expand);
1565 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Expand);
1566 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Expand);
1568 } // Subtarget->hasCDI()
1570 if (Subtarget->hasDQI()) {
1571 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
1572 setOperationAction(ISD::MUL, MVT::v4i64, Legal);
1573 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1575 // Custom lower several nodes.
1576 for (MVT VT : MVT::vector_valuetypes()) {
1577 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
1579 setOperationAction(ISD::AND, VT, Legal);
1580 setOperationAction(ISD::OR, VT, Legal);
1581 setOperationAction(ISD::XOR, VT, Legal);
1583 if ((VT.is128BitVector() || VT.is256BitVector()) && EltSize >= 32) {
1584 setOperationAction(ISD::MGATHER, VT, Custom);
1585 setOperationAction(ISD::MSCATTER, VT, Custom);
1587 // Extract subvector is special because the value type
1588 // (result) is 256/128-bit but the source is 512-bit wide.
1589 if (VT.is128BitVector() || VT.is256BitVector()) {
1590 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1592 if (VT.getVectorElementType() == MVT::i1)
1593 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1595 // Do not attempt to custom lower other non-512-bit vectors
1596 if (!VT.is512BitVector())
1599 if (EltSize >= 32) {
1600 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1601 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1602 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1603 setOperationAction(ISD::VSELECT, VT, Legal);
1604 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1605 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1606 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1607 setOperationAction(ISD::MLOAD, VT, Legal);
1608 setOperationAction(ISD::MSTORE, VT, Legal);
1609 setOperationAction(ISD::MGATHER, VT, Legal);
1610 setOperationAction(ISD::MSCATTER, VT, Custom);
1613 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
1614 setOperationAction(ISD::SELECT, VT, Promote);
1615 AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
1619 if (!Subtarget->useSoftFloat() && Subtarget->hasBWI()) {
1620 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1621 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1623 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1624 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1626 setOperationAction(ISD::LOAD, MVT::v32i16, Legal);
1627 setOperationAction(ISD::LOAD, MVT::v64i8, Legal);
1628 setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
1629 setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
1630 setOperationAction(ISD::ADD, MVT::v32i16, Legal);
1631 setOperationAction(ISD::ADD, MVT::v64i8, Legal);
1632 setOperationAction(ISD::SUB, MVT::v32i16, Legal);
1633 setOperationAction(ISD::SUB, MVT::v64i8, Legal);
1634 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1635 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1636 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1637 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1638 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1639 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1640 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1641 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1642 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1643 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Custom);
1644 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Custom);
1645 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1646 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1647 setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
1648 setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
1649 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1650 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1651 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1652 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1653 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1654 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1655 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1656 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1657 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
1658 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
1659 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1660 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1661 setOperationAction(ISD::VSELECT, MVT::v32i16, Legal);
1662 setOperationAction(ISD::VSELECT, MVT::v64i8, Legal);
1663 setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
1664 setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
1665 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1666 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i1, Custom);
1667 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i1, Custom);
1669 setOperationAction(ISD::SMAX, MVT::v64i8, Legal);
1670 setOperationAction(ISD::SMAX, MVT::v32i16, Legal);
1671 setOperationAction(ISD::UMAX, MVT::v64i8, Legal);
1672 setOperationAction(ISD::UMAX, MVT::v32i16, Legal);
1673 setOperationAction(ISD::SMIN, MVT::v64i8, Legal);
1674 setOperationAction(ISD::SMIN, MVT::v32i16, Legal);
1675 setOperationAction(ISD::UMIN, MVT::v64i8, Legal);
1676 setOperationAction(ISD::UMIN, MVT::v32i16, Legal);
1678 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1679 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1680 if (Subtarget->hasVLX())
1681 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1683 if (Subtarget->hasCDI()) {
1684 setOperationAction(ISD::CTLZ, MVT::v32i16, Custom);
1685 setOperationAction(ISD::CTLZ, MVT::v64i8, Custom);
1686 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i16, Expand);
1687 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v64i8, Expand);
1690 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1691 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1692 setOperationAction(ISD::VSELECT, VT, Legal);
1693 setOperationAction(ISD::SRL, VT, Custom);
1694 setOperationAction(ISD::SHL, VT, Custom);
1695 setOperationAction(ISD::SRA, VT, Custom);
1697 setOperationAction(ISD::AND, VT, Promote);
1698 AddPromotedToType (ISD::AND, VT, MVT::v8i64);
1699 setOperationAction(ISD::OR, VT, Promote);
1700 AddPromotedToType (ISD::OR, VT, MVT::v8i64);
1701 setOperationAction(ISD::XOR, VT, Promote);
1702 AddPromotedToType (ISD::XOR, VT, MVT::v8i64);
1706 if (!Subtarget->useSoftFloat() && Subtarget->hasVLX()) {
1707 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1708 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1710 setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
1711 setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
1712 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
1713 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1714 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
1715 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
1716 setOperationAction(ISD::SELECT, MVT::v4i1, Custom);
1717 setOperationAction(ISD::SELECT, MVT::v2i1, Custom);
1718 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
1719 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i1, Custom);
1720 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i1, Custom);
1721 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i1, Custom);
1723 setOperationAction(ISD::AND, MVT::v8i32, Legal);
1724 setOperationAction(ISD::OR, MVT::v8i32, Legal);
1725 setOperationAction(ISD::XOR, MVT::v8i32, Legal);
1726 setOperationAction(ISD::AND, MVT::v4i32, Legal);
1727 setOperationAction(ISD::OR, MVT::v4i32, Legal);
1728 setOperationAction(ISD::XOR, MVT::v4i32, Legal);
1729 setOperationAction(ISD::SRA, MVT::v2i64, Custom);
1730 setOperationAction(ISD::SRA, MVT::v4i64, Custom);
1732 setOperationAction(ISD::SMAX, MVT::v2i64, Legal);
1733 setOperationAction(ISD::SMAX, MVT::v4i64, Legal);
1734 setOperationAction(ISD::UMAX, MVT::v2i64, Legal);
1735 setOperationAction(ISD::UMAX, MVT::v4i64, Legal);
1736 setOperationAction(ISD::SMIN, MVT::v2i64, Legal);
1737 setOperationAction(ISD::SMIN, MVT::v4i64, Legal);
1738 setOperationAction(ISD::UMIN, MVT::v2i64, Legal);
1739 setOperationAction(ISD::UMIN, MVT::v4i64, Legal);
1742 // We want to custom lower some of our intrinsics.
1743 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1744 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1745 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1746 if (!Subtarget->is64Bit()) {
1747 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1748 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1751 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1752 // handle type legalization for these operations here.
1754 // FIXME: We really should do custom legalization for addition and
1755 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1756 // than generic legalization for 64-bit multiplication-with-overflow, though.
1757 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1758 if (VT == MVT::i64 && !Subtarget->is64Bit())
1760 // Add/Sub/Mul with overflow operations are custom lowered.
1761 setOperationAction(ISD::SADDO, VT, Custom);
1762 setOperationAction(ISD::UADDO, VT, Custom);
1763 setOperationAction(ISD::SSUBO, VT, Custom);
1764 setOperationAction(ISD::USUBO, VT, Custom);
1765 setOperationAction(ISD::SMULO, VT, Custom);
1766 setOperationAction(ISD::UMULO, VT, Custom);
1769 if (!Subtarget->is64Bit()) {
1770 // These libcalls are not available in 32-bit.
1771 setLibcallName(RTLIB::SHL_I128, nullptr);
1772 setLibcallName(RTLIB::SRL_I128, nullptr);
1773 setLibcallName(RTLIB::SRA_I128, nullptr);
1776 // Combine sin / cos into one node or libcall if possible.
1777 if (Subtarget->hasSinCos()) {
1778 setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1779 setLibcallName(RTLIB::SINCOS_F64, "sincos");
1780 if (Subtarget->isTargetDarwin()) {
1781 // For MacOSX, we don't want the normal expansion of a libcall to sincos.
1782 // We want to issue a libcall to __sincos_stret to avoid memory traffic.
1783 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1784 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1788 if (Subtarget->isTargetWin64()) {
1789 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1790 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1791 setOperationAction(ISD::SREM, MVT::i128, Custom);
1792 setOperationAction(ISD::UREM, MVT::i128, Custom);
1793 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1794 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1797 // We have target-specific dag combine patterns for the following nodes:
1798 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1799 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1800 setTargetDAGCombine(ISD::BITCAST);
1801 setTargetDAGCombine(ISD::VSELECT);
1802 setTargetDAGCombine(ISD::SELECT);
1803 setTargetDAGCombine(ISD::SHL);
1804 setTargetDAGCombine(ISD::SRA);
1805 setTargetDAGCombine(ISD::SRL);
1806 setTargetDAGCombine(ISD::OR);
1807 setTargetDAGCombine(ISD::AND);
1808 setTargetDAGCombine(ISD::ADD);
1809 setTargetDAGCombine(ISD::FADD);
1810 setTargetDAGCombine(ISD::FSUB);
1811 setTargetDAGCombine(ISD::FNEG);
1812 setTargetDAGCombine(ISD::FMA);
1813 setTargetDAGCombine(ISD::FMINNUM);
1814 setTargetDAGCombine(ISD::FMAXNUM);
1815 setTargetDAGCombine(ISD::SUB);
1816 setTargetDAGCombine(ISD::LOAD);
1817 setTargetDAGCombine(ISD::MLOAD);
1818 setTargetDAGCombine(ISD::STORE);
1819 setTargetDAGCombine(ISD::MSTORE);
1820 setTargetDAGCombine(ISD::TRUNCATE);
1821 setTargetDAGCombine(ISD::ZERO_EXTEND);
1822 setTargetDAGCombine(ISD::ANY_EXTEND);
1823 setTargetDAGCombine(ISD::SIGN_EXTEND);
1824 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1825 setTargetDAGCombine(ISD::SINT_TO_FP);
1826 setTargetDAGCombine(ISD::UINT_TO_FP);
1827 setTargetDAGCombine(ISD::SETCC);
1828 setTargetDAGCombine(ISD::BUILD_VECTOR);
1829 setTargetDAGCombine(ISD::MUL);
1830 setTargetDAGCombine(ISD::XOR);
1831 setTargetDAGCombine(ISD::MSCATTER);
1832 setTargetDAGCombine(ISD::MGATHER);
1834 computeRegisterProperties(Subtarget->getRegisterInfo());
1836 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1837 MaxStoresPerMemsetOptSize = 8;
1838 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1839 MaxStoresPerMemcpyOptSize = 4;
1840 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1841 MaxStoresPerMemmoveOptSize = 4;
1842 setPrefLoopAlignment(4); // 2^4 bytes.
1844 // A predictable cmov does not hurt on an in-order CPU.
1845 // FIXME: Use a CPU attribute to trigger this, not a CPU model.
1846 PredictableSelectIsExpensive = !Subtarget->isAtom();
1847 EnableExtLdPromotion = true;
1848 setPrefFunctionAlignment(4); // 2^4 bytes.
1850 verifyIntrinsicTables();
1853 // This has so far only been implemented for 64-bit MachO.
1854 bool X86TargetLowering::useLoadStackGuardNode() const {
1855 return Subtarget->isTargetMachO() && Subtarget->is64Bit();
1858 TargetLoweringBase::LegalizeTypeAction
1859 X86TargetLowering::getPreferredVectorAction(EVT VT) const {
1860 if (ExperimentalVectorWideningLegalization &&
1861 VT.getVectorNumElements() != 1 &&
1862 VT.getVectorElementType().getSimpleVT() != MVT::i1)
1863 return TypeWidenVector;
1865 return TargetLoweringBase::getPreferredVectorAction(VT);
1868 EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1871 return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
1873 if (VT.isSimple()) {
1874 MVT VVT = VT.getSimpleVT();
1875 const unsigned NumElts = VVT.getVectorNumElements();
1876 const MVT EltVT = VVT.getVectorElementType();
1877 if (VVT.is512BitVector()) {
1878 if (Subtarget->hasAVX512())
1879 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1880 EltVT == MVT::f32 || EltVT == MVT::f64)
1882 case 8: return MVT::v8i1;
1883 case 16: return MVT::v16i1;
1885 if (Subtarget->hasBWI())
1886 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1888 case 32: return MVT::v32i1;
1889 case 64: return MVT::v64i1;
1893 if (VVT.is256BitVector() || VVT.is128BitVector()) {
1894 if (Subtarget->hasVLX())
1895 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1896 EltVT == MVT::f32 || EltVT == MVT::f64)
1898 case 2: return MVT::v2i1;
1899 case 4: return MVT::v4i1;
1900 case 8: return MVT::v8i1;
1902 if (Subtarget->hasBWI() && Subtarget->hasVLX())
1903 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1905 case 8: return MVT::v8i1;
1906 case 16: return MVT::v16i1;
1907 case 32: return MVT::v32i1;
1912 return VT.changeVectorElementTypeToInteger();
1915 /// Helper for getByValTypeAlignment to determine
1916 /// the desired ByVal argument alignment.
1917 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
1920 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1921 if (VTy->getBitWidth() == 128)
1923 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1924 unsigned EltAlign = 0;
1925 getMaxByValAlign(ATy->getElementType(), EltAlign);
1926 if (EltAlign > MaxAlign)
1927 MaxAlign = EltAlign;
1928 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1929 for (auto *EltTy : STy->elements()) {
1930 unsigned EltAlign = 0;
1931 getMaxByValAlign(EltTy, EltAlign);
1932 if (EltAlign > MaxAlign)
1933 MaxAlign = EltAlign;
1940 /// Return the desired alignment for ByVal aggregate
1941 /// function arguments in the caller parameter area. For X86, aggregates
1942 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
1943 /// are at 4-byte boundaries.
1944 unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
1945 const DataLayout &DL) const {
1946 if (Subtarget->is64Bit()) {
1947 // Max of 8 and alignment of type.
1948 unsigned TyAlign = DL.getABITypeAlignment(Ty);
1955 if (Subtarget->hasSSE1())
1956 getMaxByValAlign(Ty, Align);
1960 /// Returns the target specific optimal type for load
1961 /// and store operations as a result of memset, memcpy, and memmove
1962 /// lowering. If DstAlign is zero that means it's safe to destination
1963 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
1964 /// means there isn't a need to check it against alignment requirement,
1965 /// probably because the source does not need to be loaded. If 'IsMemset' is
1966 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
1967 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
1968 /// source is constant so it does not need to be loaded.
1969 /// It returns EVT::Other if the type should be determined using generic
1970 /// target-independent logic.
1972 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
1973 unsigned DstAlign, unsigned SrcAlign,
1974 bool IsMemset, bool ZeroMemset,
1976 MachineFunction &MF) const {
1977 const Function *F = MF.getFunction();
1978 if ((!IsMemset || ZeroMemset) &&
1979 !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
1981 (!Subtarget->isUnalignedMem16Slow() ||
1982 ((DstAlign == 0 || DstAlign >= 16) &&
1983 (SrcAlign == 0 || SrcAlign >= 16)))) {
1985 // FIXME: Check if unaligned 32-byte accesses are slow.
1986 if (Subtarget->hasInt256())
1988 if (Subtarget->hasFp256())
1991 if (Subtarget->hasSSE2())
1993 if (Subtarget->hasSSE1())
1995 } else if (!MemcpyStrSrc && Size >= 8 &&
1996 !Subtarget->is64Bit() &&
1997 Subtarget->hasSSE2()) {
1998 // Do not use f64 to lower memcpy if source is string constant. It's
1999 // better to use i32 to avoid the loads.
2003 // This is a compromise. If we reach here, unaligned accesses may be slow on
2004 // this target. However, creating smaller, aligned accesses could be even
2005 // slower and would certainly be a lot more code.
2006 if (Subtarget->is64Bit() && Size >= 8)
2011 bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2013 return X86ScalarSSEf32;
2014 else if (VT == MVT::f64)
2015 return X86ScalarSSEf64;
2020 X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
2025 switch (VT.getSizeInBits()) {
2027 // 8-byte and under are always assumed to be fast.
2031 *Fast = !Subtarget->isUnalignedMem16Slow();
2034 *Fast = !Subtarget->isUnalignedMem32Slow();
2036 // TODO: What about AVX-512 (512-bit) accesses?
2039 // Misaligned accesses of any size are always allowed.
2043 /// Return the entry encoding for a jump table in the
2044 /// current function. The returned value is a member of the
2045 /// MachineJumpTableInfo::JTEntryKind enum.
2046 unsigned X86TargetLowering::getJumpTableEncoding() const {
2047 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2049 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
2050 Subtarget->isPICStyleGOT())
2051 return MachineJumpTableInfo::EK_Custom32;
2053 // Otherwise, use the normal jump table encoding heuristics.
2054 return TargetLowering::getJumpTableEncoding();
2057 bool X86TargetLowering::useSoftFloat() const {
2058 return Subtarget->useSoftFloat();
2062 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2063 const MachineBasicBlock *MBB,
2064 unsigned uid,MCContext &Ctx) const{
2065 assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
2066 Subtarget->isPICStyleGOT());
2067 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2069 return MCSymbolRefExpr::create(MBB->getSymbol(),
2070 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2073 /// Returns relocation base for the given PIC jumptable.
2074 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2075 SelectionDAG &DAG) const {
2076 if (!Subtarget->is64Bit())
2077 // This doesn't have SDLoc associated with it, but is not really the
2078 // same as a Register.
2079 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2080 getPointerTy(DAG.getDataLayout()));
2084 /// This returns the relocation base for the given PIC jumptable,
2085 /// the same as getPICJumpTableRelocBase, but as an MCExpr.
2086 const MCExpr *X86TargetLowering::
2087 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2088 MCContext &Ctx) const {
2089 // X86-64 uses RIP relative addressing based on the jump table label.
2090 if (Subtarget->isPICStyleRIPRel())
2091 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2093 // Otherwise, the reference is relative to the PIC base.
2094 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2097 std::pair<const TargetRegisterClass *, uint8_t>
2098 X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2100 const TargetRegisterClass *RRC = nullptr;
2102 switch (VT.SimpleTy) {
2104 return TargetLowering::findRepresentativeClass(TRI, VT);
2105 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2106 RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2109 RRC = &X86::VR64RegClass;
2111 case MVT::f32: case MVT::f64:
2112 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2113 case MVT::v4f32: case MVT::v2f64:
2114 case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
2116 RRC = &X86::VR128RegClass;
2119 return std::make_pair(RRC, Cost);
2122 bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
2123 unsigned &Offset) const {
2124 if (!Subtarget->isTargetLinux())
2127 if (Subtarget->is64Bit()) {
2128 // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
2130 if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
2142 Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2143 if (!Subtarget->isTargetAndroid())
2144 return TargetLowering::getSafeStackPointerLocation(IRB);
2146 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2147 // definition of TLS_SLOT_SAFESTACK in
2148 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2149 unsigned AddressSpace, Offset;
2150 if (Subtarget->is64Bit()) {
2151 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2153 if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
2163 return ConstantExpr::getIntToPtr(
2164 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2165 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2168 bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2169 unsigned DestAS) const {
2170 assert(SrcAS != DestAS && "Expected different address spaces!");
2172 return SrcAS < 256 && DestAS < 256;
2175 //===----------------------------------------------------------------------===//
2176 // Return Value Calling Convention Implementation
2177 //===----------------------------------------------------------------------===//
2179 #include "X86GenCallingConv.inc"
2181 bool X86TargetLowering::CanLowerReturn(
2182 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2183 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2184 SmallVector<CCValAssign, 16> RVLocs;
2185 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2186 return CCInfo.CheckReturn(Outs, RetCC_X86);
2189 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2190 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2195 X86TargetLowering::LowerReturn(SDValue Chain,
2196 CallingConv::ID CallConv, bool isVarArg,
2197 const SmallVectorImpl<ISD::OutputArg> &Outs,
2198 const SmallVectorImpl<SDValue> &OutVals,
2199 SDLoc dl, SelectionDAG &DAG) const {
2200 MachineFunction &MF = DAG.getMachineFunction();
2201 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2203 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2204 report_fatal_error("X86 interrupts may not return any value");
2206 SmallVector<CCValAssign, 16> RVLocs;
2207 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2208 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2211 SmallVector<SDValue, 6> RetOps;
2212 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2213 // Operand #1 = Bytes To Pop
2214 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2217 // Copy the result values into the output registers.
2218 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2219 CCValAssign &VA = RVLocs[i];
2220 assert(VA.isRegLoc() && "Can only return in registers!");
2221 SDValue ValToCopy = OutVals[i];
2222 EVT ValVT = ValToCopy.getValueType();
2224 // Promote values to the appropriate types.
2225 if (VA.getLocInfo() == CCValAssign::SExt)
2226 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2227 else if (VA.getLocInfo() == CCValAssign::ZExt)
2228 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2229 else if (VA.getLocInfo() == CCValAssign::AExt) {
2230 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2231 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2233 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2235 else if (VA.getLocInfo() == CCValAssign::BCvt)
2236 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2238 assert(VA.getLocInfo() != CCValAssign::FPExt &&
2239 "Unexpected FP-extend for return value.");
2241 // If this is x86-64, and we disabled SSE, we can't return FP values,
2242 // or SSE or MMX vectors.
2243 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2244 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2245 (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
2246 report_fatal_error("SSE register return with SSE disabled");
2248 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2249 // llvm-gcc has never done it right and no one has noticed, so this
2250 // should be OK for now.
2251 if (ValVT == MVT::f64 &&
2252 (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
2253 report_fatal_error("SSE2 register return with SSE2 disabled");
2255 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2256 // the RET instruction and handled by the FP Stackifier.
2257 if (VA.getLocReg() == X86::FP0 ||
2258 VA.getLocReg() == X86::FP1) {
2259 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2260 // change the value to the FP stack register class.
2261 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2262 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2263 RetOps.push_back(ValToCopy);
2264 // Don't emit a copytoreg.
2268 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2269 // which is returned in RAX / RDX.
2270 if (Subtarget->is64Bit()) {
2271 if (ValVT == MVT::x86mmx) {
2272 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2273 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2274 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2276 // If we don't have SSE2 available, convert to v4f32 so the generated
2277 // register is legal.
2278 if (!Subtarget->hasSSE2())
2279 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2284 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
2285 Flag = Chain.getValue(1);
2286 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2289 // All x86 ABIs require that for returning structs by value we copy
2290 // the sret argument into %rax/%eax (depending on ABI) for the return.
2291 // We saved the argument into a virtual register in the entry block,
2292 // so now we copy the value out and into %rax/%eax.
2294 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2295 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2296 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2297 // either case FuncInfo->setSRetReturnReg() will have been called.
2298 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2299 SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg,
2300 getPointerTy(MF.getDataLayout()));
2303 = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
2304 X86::RAX : X86::EAX;
2305 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2306 Flag = Chain.getValue(1);
2308 // RAX/EAX now acts like a return value.
2310 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2313 RetOps[0] = Chain; // Update chain.
2315 // Add the flag if we have it.
2317 RetOps.push_back(Flag);
2319 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2320 if (CallConv == CallingConv::X86_INTR)
2321 opcode = X86ISD::IRET;
2322 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2325 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2326 if (N->getNumValues() != 1)
2328 if (!N->hasNUsesOfValue(1, 0))
2331 SDValue TCChain = Chain;
2332 SDNode *Copy = *N->use_begin();
2333 if (Copy->getOpcode() == ISD::CopyToReg) {
2334 // If the copy has a glue operand, we conservatively assume it isn't safe to
2335 // perform a tail call.
2336 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2338 TCChain = Copy->getOperand(0);
2339 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2342 bool HasRet = false;
2343 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2345 if (UI->getOpcode() != X86ISD::RET_FLAG)
2347 // If we are returning more than one value, we can definitely
2348 // not make a tail call see PR19530
2349 if (UI->getNumOperands() > 4)
2351 if (UI->getNumOperands() == 4 &&
2352 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2365 X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
2366 ISD::NodeType ExtendKind) const {
2368 // TODO: Is this also valid on 32-bit?
2369 if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
2370 ReturnMVT = MVT::i8;
2372 ReturnMVT = MVT::i32;
2374 EVT MinVT = getRegisterType(Context, ReturnMVT);
2375 return VT.bitsLT(MinVT) ? MinVT : VT;
2378 /// Lower the result values of a call into the
2379 /// appropriate copies out of appropriate physical registers.
2382 X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
2383 CallingConv::ID CallConv, bool isVarArg,
2384 const SmallVectorImpl<ISD::InputArg> &Ins,
2385 SDLoc dl, SelectionDAG &DAG,
2386 SmallVectorImpl<SDValue> &InVals) const {
2388 // Assign locations to each value returned by this call.
2389 SmallVector<CCValAssign, 16> RVLocs;
2390 bool Is64Bit = Subtarget->is64Bit();
2391 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2393 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2395 // Copy all of the result registers out of their specified physreg.
2396 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
2397 CCValAssign &VA = RVLocs[i];
2398 EVT CopyVT = VA.getLocVT();
2400 // If this is x86-64, and we disabled SSE, we can't return FP values
2401 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
2402 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
2403 report_fatal_error("SSE register return with SSE disabled");
2406 // If we prefer to use the value in xmm registers, copy it out as f80 and
2407 // use a truncate to move it from fp stack reg to xmm reg.
2408 bool RoundAfterCopy = false;
2409 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2410 isScalarFPTypeInSSEReg(VA.getValVT())) {
2412 RoundAfterCopy = (CopyVT != VA.getLocVT());
2415 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
2416 CopyVT, InFlag).getValue(1);
2417 SDValue Val = Chain.getValue(0);
2420 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2421 // This truncation won't change the value.
2422 DAG.getIntPtrConstant(1, dl));
2424 if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1)
2425 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2427 InFlag = Chain.getValue(2);
2428 InVals.push_back(Val);
2434 //===----------------------------------------------------------------------===//
2435 // C & StdCall & Fast Calling Convention implementation
2436 //===----------------------------------------------------------------------===//
2437 // StdCall calling convention seems to be standard for many Windows' API
2438 // routines and around. It differs from C calling convention just a little:
2439 // callee should clean up the stack, not caller. Symbols should be also
2440 // decorated in some fancy way :) It doesn't support any vector arguments.
2441 // For info on fast calling convention see Fast Calling Convention (tail call)
2442 // implementation LowerX86_32FastCCCallTo.
2444 /// CallIsStructReturn - Determines whether a call uses struct return
2446 enum StructReturnType {
2451 static StructReturnType
2452 callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
2454 return NotStructReturn;
2456 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2457 if (!Flags.isSRet())
2458 return NotStructReturn;
2459 if (Flags.isInReg() || IsMCU)
2460 return RegStructReturn;
2461 return StackStructReturn;
2464 /// Determines whether a function uses struct return semantics.
2465 static StructReturnType
2466 argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
2468 return NotStructReturn;
2470 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2471 if (!Flags.isSRet())
2472 return NotStructReturn;
2473 if (Flags.isInReg() || IsMCU)
2474 return RegStructReturn;
2475 return StackStructReturn;
2478 /// Make a copy of an aggregate at address specified by "Src" to address
2479 /// "Dst" with size and alignment information specified by the specific
2480 /// parameter attribute. The copy will be passed as a byval function parameter.
2482 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
2483 ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
2485 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2487 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2488 /*isVolatile*/false, /*AlwaysInline=*/true,
2489 /*isTailCall*/false,
2490 MachinePointerInfo(), MachinePointerInfo());
2493 /// Return true if the calling convention is one that we can guarantee TCO for.
2494 static bool canGuaranteeTCO(CallingConv::ID CC) {
2495 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2496 CC == CallingConv::HiPE || CC == CallingConv::HHVM);
2499 /// Return true if we might ever do TCO for calls with this calling convention.
2500 static bool mayTailCallThisCC(CallingConv::ID CC) {
2502 // C calling conventions:
2503 case CallingConv::C:
2504 case CallingConv::X86_64_Win64:
2505 case CallingConv::X86_64_SysV:
2506 // Callee pop conventions:
2507 case CallingConv::X86_ThisCall:
2508 case CallingConv::X86_StdCall:
2509 case CallingConv::X86_VectorCall:
2510 case CallingConv::X86_FastCall:
2513 return canGuaranteeTCO(CC);
2517 /// Return true if the function is being made into a tailcall target by
2518 /// changing its ABI.
2519 static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
2520 return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
2523 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
2525 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2526 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2530 CallingConv::ID CalleeCC = CS.getCallingConv();
2531 if (!mayTailCallThisCC(CalleeCC))
2538 X86TargetLowering::LowerMemArgument(SDValue Chain,
2539 CallingConv::ID CallConv,
2540 const SmallVectorImpl<ISD::InputArg> &Ins,
2541 SDLoc dl, SelectionDAG &DAG,
2542 const CCValAssign &VA,
2543 MachineFrameInfo *MFI,
2545 // Create the nodes corresponding to a load from this parameter slot.
2546 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2547 bool AlwaysUseMutable = shouldGuaranteeTCO(
2548 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2549 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2552 // If value is passed by pointer we have address passed instead of the value
2554 bool ExtendedInMem = VA.isExtInLoc() &&
2555 VA.getValVT().getScalarType() == MVT::i1;
2557 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
2558 ValVT = VA.getLocVT();
2560 ValVT = VA.getValVT();
2562 // Calculate SP offset of interrupt parameter, re-arrange the slot normally
2563 // taken by a return address.
2565 if (CallConv == CallingConv::X86_INTR) {
2566 const X86Subtarget& Subtarget =
2567 static_cast<const X86Subtarget&>(DAG.getSubtarget());
2568 // X86 interrupts may take one or two arguments.
2569 // On the stack there will be no return address as in regular call.
2570 // Offset of last argument need to be set to -4/-8 bytes.
2571 // Where offset of the first argument out of two, should be set to 0 bytes.
2572 Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
2575 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2576 // changed with more analysis.
2577 // In case of tail call optimization mark all arguments mutable. Since they
2578 // could be overwritten by lowering of arguments in case of a tail call.
2579 if (Flags.isByVal()) {
2580 unsigned Bytes = Flags.getByValSize();
2581 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2582 int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
2583 // Adjust SP offset of interrupt parameter.
2584 if (CallConv == CallingConv::X86_INTR) {
2585 MFI->setObjectOffset(FI, Offset);
2587 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2589 int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
2590 VA.getLocMemOffset(), isImmutable);
2591 // Adjust SP offset of interrupt parameter.
2592 if (CallConv == CallingConv::X86_INTR) {
2593 MFI->setObjectOffset(FI, Offset);
2596 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2597 SDValue Val = DAG.getLoad(
2598 ValVT, dl, Chain, FIN,
2599 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
2601 return ExtendedInMem ?
2602 DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val;
2606 // FIXME: Get this from tablegen.
2607 static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
2608 const X86Subtarget *Subtarget) {
2609 assert(Subtarget->is64Bit());
2611 if (Subtarget->isCallingConvWin64(CallConv)) {
2612 static const MCPhysReg GPR64ArgRegsWin64[] = {
2613 X86::RCX, X86::RDX, X86::R8, X86::R9
2615 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
2618 static const MCPhysReg GPR64ArgRegs64Bit[] = {
2619 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
2621 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
2624 // FIXME: Get this from tablegen.
2625 static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
2626 CallingConv::ID CallConv,
2627 const X86Subtarget *Subtarget) {
2628 assert(Subtarget->is64Bit());
2629 if (Subtarget->isCallingConvWin64(CallConv)) {
2630 // The XMM registers which might contain var arg parameters are shadowed
2631 // in their paired GPR. So we only need to save the GPR to their home
2633 // TODO: __vectorcall will change this.
2637 const Function *Fn = MF.getFunction();
2638 bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
2639 bool isSoftFloat = Subtarget->useSoftFloat();
2640 assert(!(isSoftFloat && NoImplicitFloatOps) &&
2641 "SSE register cannot be used when SSE is disabled!");
2642 if (isSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
2643 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
2647 static const MCPhysReg XMMArgRegs64Bit[] = {
2648 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2649 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2651 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
2654 SDValue X86TargetLowering::LowerFormalArguments(
2655 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2656 const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
2657 SmallVectorImpl<SDValue> &InVals) const {
2658 MachineFunction &MF = DAG.getMachineFunction();
2659 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2660 const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
2662 const Function* Fn = MF.getFunction();
2663 if (Fn->hasExternalLinkage() &&
2664 Subtarget->isTargetCygMing() &&
2665 Fn->getName() == "main")
2666 FuncInfo->setForceFramePointer(true);
2668 MachineFrameInfo *MFI = MF.getFrameInfo();
2669 bool Is64Bit = Subtarget->is64Bit();
2670 bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
2672 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2673 "Var args not supported with calling convention fastcc, ghc or hipe");
2675 if (CallConv == CallingConv::X86_INTR) {
2676 bool isLegal = Ins.size() == 1 ||
2677 (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
2678 (!Is64Bit && Ins[1].VT == MVT::i32)));
2680 report_fatal_error("X86 interrupts may take one or two arguments");
2683 // Assign locations to all of the incoming arguments.
2684 SmallVector<CCValAssign, 16> ArgLocs;
2685 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2687 // Allocate shadow area for Win64
2689 CCInfo.AllocateStack(32, 8);
2691 CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
2693 unsigned LastVal = ~0U;
2695 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2696 CCValAssign &VA = ArgLocs[i];
2697 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
2699 assert(VA.getValNo() != LastVal &&
2700 "Don't support value assigned to multiple locs yet");
2702 LastVal = VA.getValNo();
2704 if (VA.isRegLoc()) {
2705 EVT RegVT = VA.getLocVT();
2706 const TargetRegisterClass *RC;
2707 if (RegVT == MVT::i32)
2708 RC = &X86::GR32RegClass;
2709 else if (Is64Bit && RegVT == MVT::i64)
2710 RC = &X86::GR64RegClass;
2711 else if (RegVT == MVT::f32)
2712 RC = &X86::FR32RegClass;
2713 else if (RegVT == MVT::f64)
2714 RC = &X86::FR64RegClass;
2715 else if (RegVT == MVT::f128)
2716 RC = &X86::FR128RegClass;
2717 else if (RegVT.is512BitVector())
2718 RC = &X86::VR512RegClass;
2719 else if (RegVT.is256BitVector())
2720 RC = &X86::VR256RegClass;
2721 else if (RegVT.is128BitVector())
2722 RC = &X86::VR128RegClass;
2723 else if (RegVT == MVT::x86mmx)
2724 RC = &X86::VR64RegClass;
2725 else if (RegVT == MVT::i1)
2726 RC = &X86::VK1RegClass;
2727 else if (RegVT == MVT::v8i1)
2728 RC = &X86::VK8RegClass;
2729 else if (RegVT == MVT::v16i1)
2730 RC = &X86::VK16RegClass;
2731 else if (RegVT == MVT::v32i1)
2732 RC = &X86::VK32RegClass;
2733 else if (RegVT == MVT::v64i1)
2734 RC = &X86::VK64RegClass;
2736 llvm_unreachable("Unknown argument type!");
2738 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2739 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2741 // If this is an 8 or 16-bit value, it is really passed promoted to 32
2742 // bits. Insert an assert[sz]ext to capture this, then truncate to the
2744 if (VA.getLocInfo() == CCValAssign::SExt)
2745 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2746 DAG.getValueType(VA.getValVT()));
2747 else if (VA.getLocInfo() == CCValAssign::ZExt)
2748 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
2749 DAG.getValueType(VA.getValVT()));
2750 else if (VA.getLocInfo() == CCValAssign::BCvt)
2751 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
2753 if (VA.isExtInLoc()) {
2754 // Handle MMX values passed in XMM regs.
2755 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
2756 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
2758 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2761 assert(VA.isMemLoc());
2762 ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
2765 // If value is passed via pointer - do a load.
2766 if (VA.getLocInfo() == CCValAssign::Indirect)
2767 ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
2768 MachinePointerInfo(), false, false, false, 0);
2770 InVals.push_back(ArgValue);
2773 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2774 // All x86 ABIs require that for returning structs by value we copy the
2775 // sret argument into %rax/%eax (depending on ABI) for the return. Save
2776 // the argument into a virtual register so that we can access it from the
2778 if (Ins[i].Flags.isSRet()) {
2779 unsigned Reg = FuncInfo->getSRetReturnReg();
2781 MVT PtrTy = getPointerTy(DAG.getDataLayout());
2782 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
2783 FuncInfo->setSRetReturnReg(Reg);
2785 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
2786 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
2791 unsigned StackSize = CCInfo.getNextStackOffset();
2792 // Align stack specially for tail calls.
2793 if (shouldGuaranteeTCO(CallConv,
2794 MF.getTarget().Options.GuaranteedTailCallOpt))
2795 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
2797 // If the function takes variable number of arguments, make a frame index for
2798 // the start of the first vararg value... for expansion of llvm.va_start. We
2799 // can skip this if there are no va_start calls.
2800 if (MFI->hasVAStart() &&
2801 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
2802 CallConv != CallingConv::X86_ThisCall))) {
2803 FuncInfo->setVarArgsFrameIndex(
2804 MFI->CreateFixedObject(1, StackSize, true));
2807 // Figure out if XMM registers are in use.
2808 assert(!(Subtarget->useSoftFloat() &&
2809 Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
2810 "SSE register cannot be used when SSE is disabled!");
2812 // 64-bit calling conventions support varargs and register parameters, so we
2813 // have to do extra work to spill them in the prologue.
2814 if (Is64Bit && isVarArg && MFI->hasVAStart()) {
2815 // Find the first unallocated argument registers.
2816 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
2817 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
2818 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
2819 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
2820 assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
2821 "SSE register cannot be used when SSE is disabled!");
2823 // Gather all the live in physical registers.
2824 SmallVector<SDValue, 6> LiveGPRs;
2825 SmallVector<SDValue, 8> LiveXMMRegs;
2827 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
2828 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
2830 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
2832 if (!ArgXMMs.empty()) {
2833 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
2834 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
2835 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
2836 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
2837 LiveXMMRegs.push_back(
2838 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
2843 // Get to the caller-allocated home save location. Add 8 to account
2844 // for the return address.
2845 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
2846 FuncInfo->setRegSaveFrameIndex(
2847 MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
2848 // Fixup to set vararg frame on shadow area (4 x i64).
2850 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
2852 // For X86-64, if there are vararg parameters that are passed via
2853 // registers, then we must store them to their spots on the stack so
2854 // they may be loaded by deferencing the result of va_next.
2855 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
2856 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
2857 FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
2858 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
2861 // Store the integer parameter registers.
2862 SmallVector<SDValue, 8> MemOps;
2863 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
2864 getPointerTy(DAG.getDataLayout()));
2865 unsigned Offset = FuncInfo->getVarArgsGPOffset();
2866 for (SDValue Val : LiveGPRs) {
2867 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2868 RSFIN, DAG.getIntPtrConstant(Offset, dl));
2870 DAG.getStore(Val.getValue(1), dl, Val, FIN,
2871 MachinePointerInfo::getFixedStack(
2872 DAG.getMachineFunction(),
2873 FuncInfo->getRegSaveFrameIndex(), Offset),
2875 MemOps.push_back(Store);
2879 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
2880 // Now store the XMM (fp + vector) parameter registers.
2881 SmallVector<SDValue, 12> SaveXMMOps;
2882 SaveXMMOps.push_back(Chain);
2883 SaveXMMOps.push_back(ALVal);
2884 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2885 FuncInfo->getRegSaveFrameIndex(), dl));
2886 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2887 FuncInfo->getVarArgsFPOffset(), dl));
2888 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
2890 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
2891 MVT::Other, SaveXMMOps));
2894 if (!MemOps.empty())
2895 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2898 if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
2899 // Find the largest legal vector type.
2900 MVT VecVT = MVT::Other;
2901 // FIXME: Only some x86_32 calling conventions support AVX512.
2902 if (Subtarget->hasAVX512() &&
2903 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
2904 CallConv == CallingConv::Intel_OCL_BI)))
2905 VecVT = MVT::v16f32;
2906 else if (Subtarget->hasAVX())
2908 else if (Subtarget->hasSSE2())
2911 // We forward some GPRs and some vector types.
2912 SmallVector<MVT, 2> RegParmTypes;
2913 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
2914 RegParmTypes.push_back(IntVT);
2915 if (VecVT != MVT::Other)
2916 RegParmTypes.push_back(VecVT);
2918 // Compute the set of forwarded registers. The rest are scratch.
2919 SmallVectorImpl<ForwardedRegister> &Forwards =
2920 FuncInfo->getForwardedMustTailRegParms();
2921 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
2923 // Conservatively forward AL on x86_64, since it might be used for varargs.
2924 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
2925 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
2926 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
2929 // Copy all forwards from physical to virtual registers.
2930 for (ForwardedRegister &F : Forwards) {
2931 // FIXME: Can we use a less constrained schedule?
2932 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2933 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
2934 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
2938 // Some CCs need callee pop.
2939 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2940 MF.getTarget().Options.GuaranteedTailCallOpt)) {
2941 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
2942 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
2943 // X86 interrupts must pop the error code if present
2944 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 8 : 4);
2946 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
2947 // If this is an sret function, the return should pop the hidden pointer.
2948 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
2949 !Subtarget->getTargetTriple().isOSMSVCRT() &&
2950 argsAreStructReturn(Ins, Subtarget->isTargetMCU()) == StackStructReturn)
2951 FuncInfo->setBytesToPopOnReturn(4);
2955 // RegSaveFrameIndex is X86-64 only.
2956 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
2957 if (CallConv == CallingConv::X86_FastCall ||
2958 CallConv == CallingConv::X86_ThisCall)
2959 // fastcc functions can't have varargs.
2960 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
2963 FuncInfo->setArgumentStackSize(StackSize);
2965 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
2966 EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
2967 if (Personality == EHPersonality::CoreCLR) {
2969 // TODO: Add a mechanism to frame lowering that will allow us to indicate
2970 // that we'd prefer this slot be allocated towards the bottom of the frame
2971 // (i.e. near the stack pointer after allocating the frame). Every
2972 // funclet needs a copy of this slot in its (mostly empty) frame, and the
2973 // offset from the bottom of this and each funclet's frame must be the
2974 // same, so the size of funclets' (mostly empty) frames is dictated by
2975 // how far this slot is from the bottom (since they allocate just enough
2976 // space to accomodate holding this slot at the correct offset).
2977 int PSPSymFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
2978 EHInfo->PSPSymFrameIdx = PSPSymFI;
2986 X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
2987 SDValue StackPtr, SDValue Arg,
2988 SDLoc dl, SelectionDAG &DAG,
2989 const CCValAssign &VA,
2990 ISD::ArgFlagsTy Flags) const {
2991 unsigned LocMemOffset = VA.getLocMemOffset();
2992 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2993 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2995 if (Flags.isByVal())
2996 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
2998 return DAG.getStore(
2999 Chain, dl, Arg, PtrOff,
3000 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
3004 /// Emit a load of return address if tail call
3005 /// optimization is performed and it is required.
3007 X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
3008 SDValue &OutRetAddr, SDValue Chain,
3009 bool IsTailCall, bool Is64Bit,
3010 int FPDiff, SDLoc dl) const {
3011 // Adjust the Return address stack slot.
3012 EVT VT = getPointerTy(DAG.getDataLayout());
3013 OutRetAddr = getReturnAddressFrameIndex(DAG);
3015 // Load the "old" Return address.
3016 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
3017 false, false, false, 0);
3018 return SDValue(OutRetAddr.getNode(), 1);
3021 /// Emit a store of the return address if tail call
3022 /// optimization is performed and it is required (FPDiff!=0).
3023 static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3024 SDValue Chain, SDValue RetAddrFrIdx,
3025 EVT PtrVT, unsigned SlotSize,
3026 int FPDiff, SDLoc dl) {
3027 // Store the return address to the appropriate stack slot.
3028 if (!FPDiff) return Chain;
3029 // Calculate the new stack slot for the return address.
3030 int NewReturnAddrFI =
3031 MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3033 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3034 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3035 MachinePointerInfo::getFixedStack(
3036 DAG.getMachineFunction(), NewReturnAddrFI),
3041 /// Returns a vector_shuffle mask for an movs{s|d}, movd
3042 /// operation of specified width.
3043 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
3045 unsigned NumElems = VT.getVectorNumElements();
3046 SmallVector<int, 8> Mask;
3047 Mask.push_back(NumElems);
3048 for (unsigned i = 1; i != NumElems; ++i)
3050 return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
3054 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3055 SmallVectorImpl<SDValue> &InVals) const {
3056 SelectionDAG &DAG = CLI.DAG;
3058 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3059 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3060 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3061 SDValue Chain = CLI.Chain;
3062 SDValue Callee = CLI.Callee;
3063 CallingConv::ID CallConv = CLI.CallConv;
3064 bool &isTailCall = CLI.IsTailCall;
3065 bool isVarArg = CLI.IsVarArg;
3067 MachineFunction &MF = DAG.getMachineFunction();
3068 bool Is64Bit = Subtarget->is64Bit();
3069 bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
3070 StructReturnType SR = callIsStructReturn(Outs, Subtarget->isTargetMCU());
3071 bool IsSibcall = false;
3072 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3073 auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
3075 if (CallConv == CallingConv::X86_INTR)
3076 report_fatal_error("X86 interrupts may not be called directly");
3078 if (Attr.getValueAsString() == "true")
3081 if (Subtarget->isPICStyleGOT() &&
3082 !MF.getTarget().Options.GuaranteedTailCallOpt) {
3083 // If we are using a GOT, disable tail calls to external symbols with
3084 // default visibility. Tail calling such a symbol requires using a GOT
3085 // relocation, which forces early binding of the symbol. This breaks code
3086 // that require lazy function symbol resolution. Using musttail or
3087 // GuaranteedTailCallOpt will override this.
3088 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3089 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3090 G->getGlobal()->hasDefaultVisibility()))
3094 bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
3096 // Force this to be a tail call. The verifier rules are enough to ensure
3097 // that we can lower this successfully without moving the return address
3100 } else if (isTailCall) {
3101 // Check if it's really possible to do a tail call.
3102 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3103 isVarArg, SR != NotStructReturn,
3104 MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
3105 Outs, OutVals, Ins, DAG);
3107 // Sibcalls are automatically detected tailcalls which do not require
3109 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
3116 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
3117 "Var args not supported with calling convention fastcc, ghc or hipe");
3119 // Analyze operands of the call, assigning locations to each operand.
3120 SmallVector<CCValAssign, 16> ArgLocs;
3121 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3123 // Allocate shadow area for Win64
3125 CCInfo.AllocateStack(32, 8);
3127 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
3129 // Get a count of how many bytes are to be pushed on the stack.
3130 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3132 // This is a sibcall. The memory operands are available in caller's
3133 // own caller's stack.
3135 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
3136 canGuaranteeTCO(CallConv))
3137 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3140 if (isTailCall && !IsSibcall && !IsMustTail) {
3141 // Lower arguments at fp - stackoffset + fpdiff.
3142 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3144 FPDiff = NumBytesCallerPushed - NumBytes;
3146 // Set the delta of movement of the returnaddr stackslot.
3147 // But only set if delta is greater than previous delta.
3148 if (FPDiff < X86Info->getTCReturnAddrDelta())
3149 X86Info->setTCReturnAddrDelta(FPDiff);
3152 unsigned NumBytesToPush = NumBytes;
3153 unsigned NumBytesToPop = NumBytes;
3155 // If we have an inalloca argument, all stack space has already been allocated
3156 // for us and be right at the top of the stack. We don't support multiple
3157 // arguments passed in memory when using inalloca.
3158 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3160 if (!ArgLocs.back().isMemLoc())
3161 report_fatal_error("cannot use inalloca attribute on a register "
3163 if (ArgLocs.back().getLocMemOffset() != 0)
3164 report_fatal_error("any parameter with the inalloca attribute must be "
3165 "the only memory argument");
3169 Chain = DAG.getCALLSEQ_START(
3170 Chain, DAG.getIntPtrConstant(NumBytesToPush, dl, true), dl);
3172 SDValue RetAddrFrIdx;
3173 // Load return address for tail calls.
3174 if (isTailCall && FPDiff)
3175 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3176 Is64Bit, FPDiff, dl);
3178 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3179 SmallVector<SDValue, 8> MemOpChains;
3182 // Walk the register/memloc assignments, inserting copies/loads. In the case
3183 // of tail call optimization arguments are handle later.
3184 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3185 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3186 // Skip inalloca arguments, they have already been written.
3187 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3188 if (Flags.isInAlloca())
3191 CCValAssign &VA = ArgLocs[i];
3192 EVT RegVT = VA.getLocVT();
3193 SDValue Arg = OutVals[i];
3194 bool isByVal = Flags.isByVal();
3196 // Promote the value if needed.
3197 switch (VA.getLocInfo()) {
3198 default: llvm_unreachable("Unknown loc info!");
3199 case CCValAssign::Full: break;
3200 case CCValAssign::SExt:
3201 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3203 case CCValAssign::ZExt:
3204 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3206 case CCValAssign::AExt:
3207 if (Arg.getValueType().isVector() &&
3208 Arg.getValueType().getVectorElementType() == MVT::i1)
3209 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3210 else if (RegVT.is128BitVector()) {
3211 // Special case: passing MMX values in XMM registers.
3212 Arg = DAG.getBitcast(MVT::i64, Arg);
3213 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3214 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3216 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3218 case CCValAssign::BCvt:
3219 Arg = DAG.getBitcast(RegVT, Arg);
3221 case CCValAssign::Indirect: {
3222 // Store the argument.
3223 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3224 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3225 Chain = DAG.getStore(
3226 Chain, dl, Arg, SpillSlot,
3227 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3234 if (VA.isRegLoc()) {
3235 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3236 if (isVarArg && IsWin64) {
3237 // Win64 ABI requires argument XMM reg to be copied to the corresponding
3238 // shadow reg if callee is a varargs function.
3239 unsigned ShadowReg = 0;
3240 switch (VA.getLocReg()) {
3241 case X86::XMM0: ShadowReg = X86::RCX; break;
3242 case X86::XMM1: ShadowReg = X86::RDX; break;
3243 case X86::XMM2: ShadowReg = X86::R8; break;
3244 case X86::XMM3: ShadowReg = X86::R9; break;
3247 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3249 } else if (!IsSibcall && (!isTailCall || isByVal)) {
3250 assert(VA.isMemLoc());
3251 if (!StackPtr.getNode())
3252 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3253 getPointerTy(DAG.getDataLayout()));
3254 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3255 dl, DAG, VA, Flags));
3259 if (!MemOpChains.empty())
3260 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3262 if (Subtarget->isPICStyleGOT()) {
3263 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3266 RegsToPass.push_back(std::make_pair(
3267 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
3268 getPointerTy(DAG.getDataLayout()))));
3270 // If we are tail calling and generating PIC/GOT style code load the
3271 // address of the callee into ECX. The value in ecx is used as target of
3272 // the tail jump. This is done to circumvent the ebx/callee-saved problem