1 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the interfaces that X86 uses to lower LLVM code into a
13 //===----------------------------------------------------------------------===//
15 #include "X86ISelLowering.h"
16 #include "Utils/X86ShuffleDecode.h"
17 #include "X86CallingConv.h"
18 #include "X86FrameLowering.h"
19 #include "X86InstrBuilder.h"
20 #include "X86MachineFunctionInfo.h"
21 #include "X86ShuffleDecodeConstantPool.h"
22 #include "X86TargetMachine.h"
23 #include "X86TargetObjectFile.h"
24 #include "llvm/ADT/SmallBitVector.h"
25 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/ADT/StringExtras.h"
28 #include "llvm/ADT/StringSwitch.h"
29 #include "llvm/Analysis/EHPersonalities.h"
30 #include "llvm/CodeGen/IntrinsicLowering.h"
31 #include "llvm/CodeGen/MachineFrameInfo.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineInstrBuilder.h"
34 #include "llvm/CodeGen/MachineJumpTableInfo.h"
35 #include "llvm/CodeGen/MachineModuleInfo.h"
36 #include "llvm/CodeGen/MachineRegisterInfo.h"
37 #include "llvm/CodeGen/WinEHFuncInfo.h"
38 #include "llvm/IR/CallSite.h"
39 #include "llvm/IR/CallingConv.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DerivedTypes.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/GlobalAlias.h"
44 #include "llvm/IR/GlobalVariable.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/IR/Intrinsics.h"
47 #include "llvm/MC/MCAsmInfo.h"
48 #include "llvm/MC/MCContext.h"
49 #include "llvm/MC/MCExpr.h"
50 #include "llvm/MC/MCSymbol.h"
51 #include "llvm/Support/CommandLine.h"
52 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/ErrorHandling.h"
54 #include "llvm/Support/MathExtras.h"
55 #include "llvm/Target/TargetOptions.h"
56 #include "X86IntrinsicsInfo.h"
62 #define DEBUG_TYPE "x86-isel"
64 STATISTIC(NumTailCalls, "Number of tail calls");
66 static cl::opt<bool> ExperimentalVectorWideningLegalization(
67 "x86-experimental-vector-widening-legalization", cl::init(false),
68 cl::desc("Enable an experimental vector type legalization through widening "
69 "rather than promotion."),
72 X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
73 const X86Subtarget &STI)
74 : TargetLowering(TM), Subtarget(&STI) {
75 X86ScalarSSEf64 = Subtarget->hasSSE2();
76 X86ScalarSSEf32 = Subtarget->hasSSE1();
77 MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
79 // Set up the TargetLowering object.
81 // X86 is weird. It always uses i8 for shift amounts and setcc results.
82 setBooleanContents(ZeroOrOneBooleanContent);
83 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
84 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
86 // For 64-bit, since we have so many registers, use the ILP scheduler.
87 // For 32-bit, use the register pressure specific scheduling.
88 // For Atom, always use ILP scheduling.
89 if (Subtarget->isAtom())
90 setSchedulingPreference(Sched::ILP);
91 else if (Subtarget->is64Bit())
92 setSchedulingPreference(Sched::ILP);
94 setSchedulingPreference(Sched::RegPressure);
95 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
96 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
98 // Bypass expensive divides on Atom when compiling with O2.
99 if (TM.getOptLevel() >= CodeGenOpt::Default) {
100 if (Subtarget->hasSlowDivide32())
101 addBypassSlowDiv(32, 8);
102 if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
103 addBypassSlowDiv(64, 16);
106 if (Subtarget->isTargetKnownWindowsMSVC()) {
107 // Setup Windows compiler runtime calls.
108 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
109 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
110 setLibcallName(RTLIB::SREM_I64, "_allrem");
111 setLibcallName(RTLIB::UREM_I64, "_aullrem");
112 setLibcallName(RTLIB::MUL_I64, "_allmul");
113 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
114 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
115 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
116 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
117 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
120 if (Subtarget->isTargetDarwin()) {
121 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
122 setUseUnderscoreSetJmp(false);
123 setUseUnderscoreLongJmp(false);
124 } else if (Subtarget->isTargetWindowsGNU()) {
125 // MS runtime is weird: it exports _setjmp, but longjmp!
126 setUseUnderscoreSetJmp(true);
127 setUseUnderscoreLongJmp(false);
129 setUseUnderscoreSetJmp(true);
130 setUseUnderscoreLongJmp(true);
133 // Set up the register classes.
134 addRegisterClass(MVT::i8, &X86::GR8RegClass);
135 addRegisterClass(MVT::i16, &X86::GR16RegClass);
136 addRegisterClass(MVT::i32, &X86::GR32RegClass);
137 if (Subtarget->is64Bit())
138 addRegisterClass(MVT::i64, &X86::GR64RegClass);
140 for (MVT VT : MVT::integer_valuetypes())
141 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
143 // We don't accept any truncstore of integer registers.
144 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
145 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
146 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
147 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
148 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
149 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
151 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
153 // SETOEQ and SETUNE require checking two conditions.
154 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
155 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
156 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
157 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
158 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
159 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
161 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
163 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
164 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
165 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
167 if (Subtarget->is64Bit()) {
168 if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512())
169 // f32/f64 are legal, f80 is custom.
170 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
172 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
173 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
174 } else if (!Subtarget->useSoftFloat()) {
175 // We have an algorithm for SSE2->double, and we turn this into a
176 // 64-bit FILD followed by conditional FADD for other targets.
177 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
178 // We have an algorithm for SSE2, and we turn this into a 64-bit
179 // FILD or VCVTUSI2SS/SD for other targets.
180 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
183 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
185 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
186 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
188 if (!Subtarget->useSoftFloat()) {
189 // SSE has no i16 to fp conversion, only i32
190 if (X86ScalarSSEf32) {
191 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
192 // f32 and f64 cases are Legal, f80 case is not
193 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
195 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
196 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
199 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
200 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
203 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
205 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
206 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
208 if (!Subtarget->useSoftFloat()) {
209 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
210 // are Legal, f80 is custom lowered.
211 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
212 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
214 if (X86ScalarSSEf32) {
215 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
216 // f32 and f64 cases are Legal, f80 case is not
217 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
219 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
220 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
223 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
224 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
225 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
228 // Handle FP_TO_UINT by promoting the destination to a larger signed
230 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
231 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
232 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
234 if (Subtarget->is64Bit()) {
235 if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
236 // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
237 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
238 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
240 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
241 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
243 } else if (!Subtarget->useSoftFloat()) {
244 // Since AVX is a superset of SSE3, only check for SSE here.
245 if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
246 // Expand FP_TO_UINT into a select.
247 // FIXME: We would like to use a Custom expander here eventually to do
248 // the optimal thing for SSE vs. the default expansion in the legalizer.
249 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
251 // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
252 // With SSE3 we can use fisttpll to convert to a signed i64; without
253 // SSE, we're stuck with a fistpll.
254 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
256 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
259 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
260 if (!X86ScalarSSEf64) {
261 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
262 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
263 if (Subtarget->is64Bit()) {
264 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
265 // Without SSE, i64->f64 goes through memory.
266 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
268 } else if (!Subtarget->is64Bit())
269 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
271 // Scalar integer divide and remainder are lowered to use operations that
272 // produce two results, to match the available instructions. This exposes
273 // the two-result form to trivial CSE, which is able to combine x/y and x%y
274 // into a single instruction.
276 // Scalar integer multiply-high is also lowered to use two-result
277 // operations, to match the available instructions. However, plain multiply
278 // (low) operations are left as Legal, as there are single-result
279 // instructions for this in x86. Using the two-result multiply instructions
280 // when both high and low results are needed must be arranged by dagcombine.
281 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
282 setOperationAction(ISD::MULHS, VT, Expand);
283 setOperationAction(ISD::MULHU, VT, Expand);
284 setOperationAction(ISD::SDIV, VT, Expand);
285 setOperationAction(ISD::UDIV, VT, Expand);
286 setOperationAction(ISD::SREM, VT, Expand);
287 setOperationAction(ISD::UREM, VT, Expand);
289 // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
290 setOperationAction(ISD::ADDC, VT, Custom);
291 setOperationAction(ISD::ADDE, VT, Custom);
292 setOperationAction(ISD::SUBC, VT, Custom);
293 setOperationAction(ISD::SUBE, VT, Custom);
296 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
297 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
298 setOperationAction(ISD::BR_CC , MVT::f32, Expand);
299 setOperationAction(ISD::BR_CC , MVT::f64, Expand);
300 setOperationAction(ISD::BR_CC , MVT::f80, Expand);
301 setOperationAction(ISD::BR_CC , MVT::f128, Expand);
302 setOperationAction(ISD::BR_CC , MVT::i8, Expand);
303 setOperationAction(ISD::BR_CC , MVT::i16, Expand);
304 setOperationAction(ISD::BR_CC , MVT::i32, Expand);
305 setOperationAction(ISD::BR_CC , MVT::i64, Expand);
306 setOperationAction(ISD::SELECT_CC , MVT::f32, Expand);
307 setOperationAction(ISD::SELECT_CC , MVT::f64, Expand);
308 setOperationAction(ISD::SELECT_CC , MVT::f80, Expand);
309 setOperationAction(ISD::SELECT_CC , MVT::f128, Expand);
310 setOperationAction(ISD::SELECT_CC , MVT::i8, Expand);
311 setOperationAction(ISD::SELECT_CC , MVT::i16, Expand);
312 setOperationAction(ISD::SELECT_CC , MVT::i32, Expand);
313 setOperationAction(ISD::SELECT_CC , MVT::i64, Expand);
314 if (Subtarget->is64Bit())
315 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
316 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
317 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
318 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
319 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
321 if (Subtarget->is32Bit() && Subtarget->isTargetKnownWindowsMSVC()) {
322 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
323 // is. We should promote the value to 64-bits to solve this.
324 // This is what the CRT headers do - `fmodf` is an inline header
325 // function casting to f64 and calling `fmod`.
326 setOperationAction(ISD::FREM , MVT::f32 , Promote);
328 setOperationAction(ISD::FREM , MVT::f32 , Expand);
331 setOperationAction(ISD::FREM , MVT::f64 , Expand);
332 setOperationAction(ISD::FREM , MVT::f80 , Expand);
333 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
335 // Promote the i8 variants and force them on up to i32 which has a shorter
337 setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
338 AddPromotedToType (ISD::CTTZ , MVT::i8 , MVT::i32);
339 setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Promote);
340 AddPromotedToType (ISD::CTTZ_ZERO_UNDEF , MVT::i8 , MVT::i32);
341 if (Subtarget->hasBMI()) {
342 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Expand);
343 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Expand);
344 if (Subtarget->is64Bit())
345 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
347 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
348 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
349 if (Subtarget->is64Bit())
350 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
353 if (Subtarget->hasLZCNT()) {
354 // When promoting the i8 variants, force them to i32 for a shorter
356 setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
357 AddPromotedToType (ISD::CTLZ , MVT::i8 , MVT::i32);
358 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Promote);
359 AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
360 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand);
361 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand);
362 if (Subtarget->is64Bit())
363 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
365 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
366 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
367 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
368 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
369 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
370 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
371 if (Subtarget->is64Bit()) {
372 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
373 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
377 // Special handling for half-precision floating point conversions.
378 // If we don't have F16C support, then lower half float conversions
379 // into library calls.
380 if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) {
381 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
382 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
385 // There's never any support for operations beyond MVT::f32.
386 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
387 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
388 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
389 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
391 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
392 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
393 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
394 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
395 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
396 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
398 if (Subtarget->hasPOPCNT()) {
399 setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
401 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
402 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
403 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
404 if (Subtarget->is64Bit())
405 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
408 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
410 if (!Subtarget->hasMOVBE())
411 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
413 // These should be promoted to a larger select which is supported.
414 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
415 // X86 wants to expand cmov itself.
416 setOperationAction(ISD::SELECT , MVT::i8 , Custom);
417 setOperationAction(ISD::SELECT , MVT::i16 , Custom);
418 setOperationAction(ISD::SELECT , MVT::i32 , Custom);
419 setOperationAction(ISD::SELECT , MVT::f32 , Custom);
420 setOperationAction(ISD::SELECT , MVT::f64 , Custom);
421 setOperationAction(ISD::SELECT , MVT::f80 , Custom);
422 setOperationAction(ISD::SELECT , MVT::f128 , Custom);
423 setOperationAction(ISD::SETCC , MVT::i8 , Custom);
424 setOperationAction(ISD::SETCC , MVT::i16 , Custom);
425 setOperationAction(ISD::SETCC , MVT::i32 , Custom);
426 setOperationAction(ISD::SETCC , MVT::f32 , Custom);
427 setOperationAction(ISD::SETCC , MVT::f64 , Custom);
428 setOperationAction(ISD::SETCC , MVT::f80 , Custom);
429 setOperationAction(ISD::SETCC , MVT::f128 , Custom);
430 setOperationAction(ISD::SETCCE , MVT::i8 , Custom);
431 setOperationAction(ISD::SETCCE , MVT::i16 , Custom);
432 setOperationAction(ISD::SETCCE , MVT::i32 , Custom);
433 if (Subtarget->is64Bit()) {
434 setOperationAction(ISD::SELECT , MVT::i64 , Custom);
435 setOperationAction(ISD::SETCC , MVT::i64 , Custom);
436 setOperationAction(ISD::SETCCE , MVT::i64 , Custom);
438 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
439 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
440 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
441 // support continuation, user-level threading, and etc.. As a result, no
442 // other SjLj exception interfaces are implemented and please don't build
443 // your own exception handling based on them.
444 // LLVM/Clang supports zero-cost DWARF exception handling.
445 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
446 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
449 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
450 setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
451 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
452 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
453 if (Subtarget->is64Bit())
454 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
455 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
456 setOperationAction(ISD::BlockAddress , MVT::i32 , Custom);
457 if (Subtarget->is64Bit()) {
458 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
459 setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
460 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
461 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
462 setOperationAction(ISD::BlockAddress , MVT::i64 , Custom);
464 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
465 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
466 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
467 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
468 if (Subtarget->is64Bit()) {
469 setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom);
470 setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
471 setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
474 if (Subtarget->hasSSE1())
475 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
477 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
479 // Expand certain atomics
480 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
481 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
482 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
483 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
486 if (Subtarget->hasCmpxchg16b()) {
487 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
490 // FIXME - use subtarget debug flags
491 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
492 !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
493 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
496 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
497 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
499 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
500 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
502 setOperationAction(ISD::TRAP, MVT::Other, Legal);
503 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
505 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
506 setOperationAction(ISD::VASTART , MVT::Other, Custom);
507 setOperationAction(ISD::VAEND , MVT::Other, Expand);
508 if (Subtarget->is64Bit()) {
509 setOperationAction(ISD::VAARG , MVT::Other, Custom);
510 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
512 // TargetInfo::CharPtrBuiltinVaList
513 setOperationAction(ISD::VAARG , MVT::Other, Expand);
514 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
517 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
518 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
520 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
522 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
523 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
524 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
526 if (!Subtarget->useSoftFloat() && X86ScalarSSEf64) {
527 // f32 and f64 use SSE.
528 // Set up the FP register classes.
529 addRegisterClass(MVT::f32, &X86::FR32RegClass);
530 addRegisterClass(MVT::f64, &X86::FR64RegClass);
532 // Use ANDPD to simulate FABS.
533 setOperationAction(ISD::FABS , MVT::f64, Custom);
534 setOperationAction(ISD::FABS , MVT::f32, Custom);
536 // Use XORP to simulate FNEG.
537 setOperationAction(ISD::FNEG , MVT::f64, Custom);
538 setOperationAction(ISD::FNEG , MVT::f32, Custom);
540 // Use ANDPD and ORPD to simulate FCOPYSIGN.
541 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
542 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
544 // Lower this to FGETSIGNx86 plus an AND.
545 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
546 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
548 // We don't support sin/cos/fmod
549 setOperationAction(ISD::FSIN , MVT::f64, Expand);
550 setOperationAction(ISD::FCOS , MVT::f64, Expand);
551 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
552 setOperationAction(ISD::FSIN , MVT::f32, Expand);
553 setOperationAction(ISD::FCOS , MVT::f32, Expand);
554 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
556 // Expand FP immediates into loads from the stack, except for the special
558 addLegalFPImmediate(APFloat(+0.0)); // xorpd
559 addLegalFPImmediate(APFloat(+0.0f)); // xorps
560 } else if (!Subtarget->useSoftFloat() && X86ScalarSSEf32) {
561 // Use SSE for f32, x87 for f64.
562 // Set up the FP register classes.
563 addRegisterClass(MVT::f32, &X86::FR32RegClass);
564 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
566 // Use ANDPS to simulate FABS.
567 setOperationAction(ISD::FABS , MVT::f32, Custom);
569 // Use XORP to simulate FNEG.
570 setOperationAction(ISD::FNEG , MVT::f32, Custom);
572 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
574 // Use ANDPS and ORPS to simulate FCOPYSIGN.
575 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
576 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
578 // We don't support sin/cos/fmod
579 setOperationAction(ISD::FSIN , MVT::f32, Expand);
580 setOperationAction(ISD::FCOS , MVT::f32, Expand);
581 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
583 // Special cases we handle for FP constants.
584 addLegalFPImmediate(APFloat(+0.0f)); // xorps
585 addLegalFPImmediate(APFloat(+0.0)); // FLD0
586 addLegalFPImmediate(APFloat(+1.0)); // FLD1
587 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
588 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
590 if (!TM.Options.UnsafeFPMath) {
591 setOperationAction(ISD::FSIN , MVT::f64, Expand);
592 setOperationAction(ISD::FCOS , MVT::f64, Expand);
593 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
595 } else if (!Subtarget->useSoftFloat()) {
596 // f32 and f64 in x87.
597 // Set up the FP register classes.
598 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
599 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
601 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
602 setOperationAction(ISD::UNDEF, MVT::f32, Expand);
603 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
604 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
606 if (!TM.Options.UnsafeFPMath) {
607 setOperationAction(ISD::FSIN , MVT::f64, Expand);
608 setOperationAction(ISD::FSIN , MVT::f32, Expand);
609 setOperationAction(ISD::FCOS , MVT::f64, Expand);
610 setOperationAction(ISD::FCOS , MVT::f32, Expand);
611 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
612 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
614 addLegalFPImmediate(APFloat(+0.0)); // FLD0
615 addLegalFPImmediate(APFloat(+1.0)); // FLD1
616 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
617 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
618 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
619 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
620 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
621 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
624 // We don't support FMA.
625 setOperationAction(ISD::FMA, MVT::f64, Expand);
626 setOperationAction(ISD::FMA, MVT::f32, Expand);
628 // Long double always uses X87, except f128 in MMX.
629 if (!Subtarget->useSoftFloat()) {
630 if (Subtarget->is64Bit() && Subtarget->hasMMX()) {
631 addRegisterClass(MVT::f128, &X86::FR128RegClass);
632 ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
633 setOperationAction(ISD::FABS , MVT::f128, Custom);
634 setOperationAction(ISD::FNEG , MVT::f128, Custom);
635 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
638 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
639 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
640 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
642 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
643 addLegalFPImmediate(TmpFlt); // FLD0
645 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
648 APFloat TmpFlt2(+1.0);
649 TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
651 addLegalFPImmediate(TmpFlt2); // FLD1
652 TmpFlt2.changeSign();
653 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
656 if (!TM.Options.UnsafeFPMath) {
657 setOperationAction(ISD::FSIN , MVT::f80, Expand);
658 setOperationAction(ISD::FCOS , MVT::f80, Expand);
659 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
662 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
663 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
664 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
665 setOperationAction(ISD::FRINT, MVT::f80, Expand);
666 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
667 setOperationAction(ISD::FMA, MVT::f80, Expand);
670 // Always use a library call for pow.
671 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
672 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
673 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
675 setOperationAction(ISD::FLOG, MVT::f80, Expand);
676 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
677 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
678 setOperationAction(ISD::FEXP, MVT::f80, Expand);
679 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
680 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
681 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
683 // First set operation action for all vector types to either promote
684 // (for widening) or expand (for scalarization). Then we will selectively
685 // turn on ones that can be effectively codegen'd.
686 for (MVT VT : MVT::vector_valuetypes()) {
687 setOperationAction(ISD::ADD , VT, Expand);
688 setOperationAction(ISD::SUB , VT, Expand);
689 setOperationAction(ISD::FADD, VT, Expand);
690 setOperationAction(ISD::FNEG, VT, Expand);
691 setOperationAction(ISD::FSUB, VT, Expand);
692 setOperationAction(ISD::MUL , VT, Expand);
693 setOperationAction(ISD::FMUL, VT, Expand);
694 setOperationAction(ISD::SDIV, VT, Expand);
695 setOperationAction(ISD::UDIV, VT, Expand);
696 setOperationAction(ISD::FDIV, VT, Expand);
697 setOperationAction(ISD::SREM, VT, Expand);
698 setOperationAction(ISD::UREM, VT, Expand);
699 setOperationAction(ISD::LOAD, VT, Expand);
700 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
701 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
702 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
703 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
704 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
705 setOperationAction(ISD::FABS, VT, Expand);
706 setOperationAction(ISD::FSIN, VT, Expand);
707 setOperationAction(ISD::FSINCOS, VT, Expand);
708 setOperationAction(ISD::FCOS, VT, Expand);
709 setOperationAction(ISD::FSINCOS, VT, Expand);
710 setOperationAction(ISD::FREM, VT, Expand);
711 setOperationAction(ISD::FMA, VT, Expand);
712 setOperationAction(ISD::FPOWI, VT, Expand);
713 setOperationAction(ISD::FSQRT, VT, Expand);
714 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
715 setOperationAction(ISD::FFLOOR, VT, Expand);
716 setOperationAction(ISD::FCEIL, VT, Expand);
717 setOperationAction(ISD::FTRUNC, VT, Expand);
718 setOperationAction(ISD::FRINT, VT, Expand);
719 setOperationAction(ISD::FNEARBYINT, VT, Expand);
720 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
721 setOperationAction(ISD::MULHS, VT, Expand);
722 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
723 setOperationAction(ISD::MULHU, VT, Expand);
724 setOperationAction(ISD::SDIVREM, VT, Expand);
725 setOperationAction(ISD::UDIVREM, VT, Expand);
726 setOperationAction(ISD::FPOW, VT, Expand);
727 setOperationAction(ISD::CTPOP, VT, Expand);
728 setOperationAction(ISD::CTTZ, VT, Expand);
729 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
730 setOperationAction(ISD::CTLZ, VT, Expand);
731 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
732 setOperationAction(ISD::SHL, VT, Expand);
733 setOperationAction(ISD::SRA, VT, Expand);
734 setOperationAction(ISD::SRL, VT, Expand);
735 setOperationAction(ISD::ROTL, VT, Expand);
736 setOperationAction(ISD::ROTR, VT, Expand);
737 setOperationAction(ISD::BSWAP, VT, Expand);
738 setOperationAction(ISD::SETCC, VT, Expand);
739 setOperationAction(ISD::FLOG, VT, Expand);
740 setOperationAction(ISD::FLOG2, VT, Expand);
741 setOperationAction(ISD::FLOG10, VT, Expand);
742 setOperationAction(ISD::FEXP, VT, Expand);
743 setOperationAction(ISD::FEXP2, VT, Expand);
744 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
745 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
746 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
747 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
748 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
749 setOperationAction(ISD::TRUNCATE, VT, Expand);
750 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
751 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
752 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
753 setOperationAction(ISD::VSELECT, VT, Expand);
754 setOperationAction(ISD::SELECT_CC, VT, Expand);
755 for (MVT InnerVT : MVT::vector_valuetypes()) {
756 setTruncStoreAction(InnerVT, VT, Expand);
758 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
759 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
761 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
762 // types, we have to deal with them whether we ask for Expansion or not.
763 // Setting Expand causes its own optimisation problems though, so leave
765 if (VT.getVectorElementType() == MVT::i1)
766 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
768 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
769 // split/scalarized right now.
770 if (VT.getVectorElementType() == MVT::f16)
771 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
775 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
776 // with -msoft-float, disable use of MMX as well.
777 if (!Subtarget->useSoftFloat() && Subtarget->hasMMX()) {
778 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
779 // No operations on x86mmx supported, everything uses intrinsics.
782 // MMX-sized vectors (other than x86mmx) are expected to be expanded
783 // into smaller operations.
784 for (MVT MMXTy : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64}) {
785 setOperationAction(ISD::MULHS, MMXTy, Expand);
786 setOperationAction(ISD::AND, MMXTy, Expand);
787 setOperationAction(ISD::OR, MMXTy, Expand);
788 setOperationAction(ISD::XOR, MMXTy, Expand);
789 setOperationAction(ISD::SCALAR_TO_VECTOR, MMXTy, Expand);
790 setOperationAction(ISD::SELECT, MMXTy, Expand);
791 setOperationAction(ISD::BITCAST, MMXTy, Expand);
793 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
795 if (!Subtarget->useSoftFloat() && Subtarget->hasSSE1()) {
796 addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
798 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
799 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
800 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
801 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
802 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
803 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
804 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
805 setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
806 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
807 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
808 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
809 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
810 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
811 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
814 if (!Subtarget->useSoftFloat() && Subtarget->hasSSE2()) {
815 addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
817 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
818 // registers cannot be used even for integer operations.
819 addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
820 addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
821 addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
822 addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
824 setOperationAction(ISD::ADD, MVT::v16i8, Legal);
825 setOperationAction(ISD::ADD, MVT::v8i16, Legal);
826 setOperationAction(ISD::ADD, MVT::v4i32, Legal);
827 setOperationAction(ISD::ADD, MVT::v2i64, Legal);
828 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
829 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
830 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
831 setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
832 setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
833 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
834 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
835 setOperationAction(ISD::SUB, MVT::v16i8, Legal);
836 setOperationAction(ISD::SUB, MVT::v8i16, Legal);
837 setOperationAction(ISD::SUB, MVT::v4i32, Legal);
838 setOperationAction(ISD::SUB, MVT::v2i64, Legal);
839 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
840 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
841 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
842 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
843 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
844 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
845 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
846 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
848 setOperationAction(ISD::SMAX, MVT::v8i16, Legal);
849 setOperationAction(ISD::UMAX, MVT::v16i8, Legal);
850 setOperationAction(ISD::SMIN, MVT::v8i16, Legal);
851 setOperationAction(ISD::UMIN, MVT::v16i8, Legal);
853 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
854 setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
855 setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
856 setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
858 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
859 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
860 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
861 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
862 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
864 setOperationAction(ISD::CTPOP, MVT::v16i8, Custom);
865 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
866 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
867 setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
869 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
870 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
871 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
872 // ISD::CTTZ v2i64 - scalarization is faster.
873 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
874 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
875 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
876 // ISD::CTTZ_ZERO_UNDEF v2i64 - scalarization is faster.
878 // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
879 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
880 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
881 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
882 setOperationAction(ISD::VSELECT, VT, Custom);
883 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
886 // We support custom legalizing of sext and anyext loads for specific
887 // memory vector types which we can load as a scalar (or sequence of
888 // scalars) and extend in-register to a legal 128-bit vector type. For sext
889 // loads these must work with a single scalar load.
890 for (MVT VT : MVT::integer_vector_valuetypes()) {
891 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
892 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
893 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
894 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
895 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
896 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
897 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
898 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
899 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
902 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
903 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
904 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
905 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
906 setOperationAction(ISD::VSELECT, MVT::v2f64, Custom);
907 setOperationAction(ISD::VSELECT, MVT::v2i64, Custom);
908 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
909 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
911 if (Subtarget->is64Bit()) {
912 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
913 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
916 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
917 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
918 setOperationAction(ISD::AND, VT, Promote);
919 AddPromotedToType (ISD::AND, VT, MVT::v2i64);
920 setOperationAction(ISD::OR, VT, Promote);
921 AddPromotedToType (ISD::OR, VT, MVT::v2i64);
922 setOperationAction(ISD::XOR, VT, Promote);
923 AddPromotedToType (ISD::XOR, VT, MVT::v2i64);
924 setOperationAction(ISD::LOAD, VT, Promote);
925 AddPromotedToType (ISD::LOAD, VT, MVT::v2i64);
926 setOperationAction(ISD::SELECT, VT, Promote);
927 AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
930 // Custom lower v2i64 and v2f64 selects.
931 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
932 setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
933 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
934 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
936 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
937 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
939 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
941 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
942 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
943 // As there is no 64-bit GPR available, we need build a special custom
944 // sequence to convert from v2i32 to v2f32.
945 if (!Subtarget->is64Bit())
946 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
948 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
949 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
951 for (MVT VT : MVT::fp_vector_valuetypes())
952 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
954 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
955 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
956 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
959 if (!Subtarget->useSoftFloat() && Subtarget->hasSSE41()) {
960 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
961 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
962 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
963 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
964 setOperationAction(ISD::FRINT, RoundedTy, Legal);
965 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
968 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
969 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
970 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
971 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
972 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
973 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
974 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
975 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
977 // FIXME: Do we need to handle scalar-to-vector here?
978 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
980 // We directly match byte blends in the backend as they match the VSELECT
982 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
984 // SSE41 brings specific instructions for doing vector sign extend even in
985 // cases where we don't have SRA.
986 for (MVT VT : MVT::integer_vector_valuetypes()) {
987 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
988 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
989 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
992 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
993 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
994 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
995 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
996 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
997 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
998 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
1000 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
1001 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
1002 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
1003 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
1004 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
1005 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
1007 // i8 and i16 vectors are custom because the source register and source
1008 // source memory operand types are not the same width. f32 vectors are
1009 // custom since the immediate controlling the insert encodes additional
1011 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1012 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1013 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1014 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1016 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
1017 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
1018 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
1019 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
1021 // FIXME: these should be Legal, but that's only for the case where
1022 // the index is constant. For now custom expand to deal with that.
1023 if (Subtarget->is64Bit()) {
1024 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
1025 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
1029 if (Subtarget->hasSSE2()) {
1030 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1031 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1032 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1034 setOperationAction(ISD::SRL, MVT::v8i16, Custom);
1035 setOperationAction(ISD::SRL, MVT::v16i8, Custom);
1037 setOperationAction(ISD::SHL, MVT::v8i16, Custom);
1038 setOperationAction(ISD::SHL, MVT::v16i8, Custom);
1040 setOperationAction(ISD::SRA, MVT::v8i16, Custom);
1041 setOperationAction(ISD::SRA, MVT::v16i8, Custom);
1043 // In the customized shift lowering, the legal cases in AVX2 will be
1045 setOperationAction(ISD::SRL, MVT::v2i64, Custom);
1046 setOperationAction(ISD::SRL, MVT::v4i32, Custom);
1048 setOperationAction(ISD::SHL, MVT::v2i64, Custom);
1049 setOperationAction(ISD::SHL, MVT::v4i32, Custom);
1051 setOperationAction(ISD::SRA, MVT::v2i64, Custom);
1052 setOperationAction(ISD::SRA, MVT::v4i32, Custom);
1055 if (Subtarget->hasXOP()) {
1056 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1057 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1058 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1059 setOperationAction(ISD::ROTL, MVT::v2i64, Custom);
1060 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1061 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1062 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1063 setOperationAction(ISD::ROTL, MVT::v4i64, Custom);
1066 if (!Subtarget->useSoftFloat() && Subtarget->hasFp256()) {
1067 addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
1068 addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
1069 addRegisterClass(MVT::v8i32, &X86::VR256RegClass);
1070 addRegisterClass(MVT::v8f32, &X86::VR256RegClass);
1071 addRegisterClass(MVT::v4i64, &X86::VR256RegClass);
1072 addRegisterClass(MVT::v4f64, &X86::VR256RegClass);
1074 setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
1075 setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
1076 setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
1078 setOperationAction(ISD::FADD, MVT::v8f32, Legal);
1079 setOperationAction(ISD::FSUB, MVT::v8f32, Legal);
1080 setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
1081 setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
1082 setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
1083 setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
1084 setOperationAction(ISD::FCEIL, MVT::v8f32, Legal);
1085 setOperationAction(ISD::FTRUNC, MVT::v8f32, Legal);
1086 setOperationAction(ISD::FRINT, MVT::v8f32, Legal);
1087 setOperationAction(ISD::FNEARBYINT, MVT::v8f32, Legal);
1088 setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
1089 setOperationAction(ISD::FABS, MVT::v8f32, Custom);
1091 setOperationAction(ISD::FADD, MVT::v4f64, Legal);
1092 setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
1093 setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
1094 setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
1095 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
1096 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
1097 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
1098 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
1099 setOperationAction(ISD::FRINT, MVT::v4f64, Legal);
1100 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Legal);
1101 setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
1102 setOperationAction(ISD::FABS, MVT::v4f64, Custom);
1104 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1105 // even though v8i16 is a legal type.
1106 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote);
1107 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote);
1108 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1110 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
1111 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1112 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1114 setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
1115 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1117 for (MVT VT : MVT::fp_vector_valuetypes())
1118 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
1120 setOperationAction(ISD::SRL, MVT::v16i16, Custom);
1121 setOperationAction(ISD::SRL, MVT::v32i8, Custom);
1123 setOperationAction(ISD::SHL, MVT::v16i16, Custom);
1124 setOperationAction(ISD::SHL, MVT::v32i8, Custom);
1126 setOperationAction(ISD::SRA, MVT::v16i16, Custom);
1127 setOperationAction(ISD::SRA, MVT::v32i8, Custom);
1129 setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
1130 setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
1131 setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
1132 setOperationAction(ISD::SETCC, MVT::v4i64, Custom);
1134 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1135 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1136 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1138 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1139 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
1140 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1141 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
1142 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
1143 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
1144 setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
1145 setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
1146 setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom);
1147 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1148 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1149 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1151 setOperationAction(ISD::CTPOP, MVT::v32i8, Custom);
1152 setOperationAction(ISD::CTPOP, MVT::v16i16, Custom);
1153 setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
1154 setOperationAction(ISD::CTPOP, MVT::v4i64, Custom);
1156 setOperationAction(ISD::CTTZ, MVT::v32i8, Custom);
1157 setOperationAction(ISD::CTTZ, MVT::v16i16, Custom);
1158 setOperationAction(ISD::CTTZ, MVT::v8i32, Custom);
1159 setOperationAction(ISD::CTTZ, MVT::v4i64, Custom);
1160 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v32i8, Custom);
1161 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i16, Custom);
1162 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
1163 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
1165 if (Subtarget->hasAnyFMA()) {
1166 setOperationAction(ISD::FMA, MVT::v8f32, Legal);
1167 setOperationAction(ISD::FMA, MVT::v4f64, Legal);
1168 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
1169 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1170 setOperationAction(ISD::FMA, MVT::f32, Legal);
1171 setOperationAction(ISD::FMA, MVT::f64, Legal);
1174 if (Subtarget->hasInt256()) {
1175 setOperationAction(ISD::ADD, MVT::v4i64, Legal);
1176 setOperationAction(ISD::ADD, MVT::v8i32, Legal);
1177 setOperationAction(ISD::ADD, MVT::v16i16, Legal);
1178 setOperationAction(ISD::ADD, MVT::v32i8, Legal);
1180 setOperationAction(ISD::SUB, MVT::v4i64, Legal);
1181 setOperationAction(ISD::SUB, MVT::v8i32, Legal);
1182 setOperationAction(ISD::SUB, MVT::v16i16, Legal);
1183 setOperationAction(ISD::SUB, MVT::v32i8, Legal);
1185 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1186 setOperationAction(ISD::MUL, MVT::v8i32, Legal);
1187 setOperationAction(ISD::MUL, MVT::v16i16, Legal);
1188 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1190 setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
1191 setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
1192 setOperationAction(ISD::MULHU, MVT::v16i16, Legal);
1193 setOperationAction(ISD::MULHS, MVT::v16i16, Legal);
1195 setOperationAction(ISD::SMAX, MVT::v32i8, Legal);
1196 setOperationAction(ISD::SMAX, MVT::v16i16, Legal);
1197 setOperationAction(ISD::SMAX, MVT::v8i32, Legal);
1198 setOperationAction(ISD::UMAX, MVT::v32i8, Legal);
1199 setOperationAction(ISD::UMAX, MVT::v16i16, Legal);
1200 setOperationAction(ISD::UMAX, MVT::v8i32, Legal);
1201 setOperationAction(ISD::SMIN, MVT::v32i8, Legal);
1202 setOperationAction(ISD::SMIN, MVT::v16i16, Legal);
1203 setOperationAction(ISD::SMIN, MVT::v8i32, Legal);
1204 setOperationAction(ISD::UMIN, MVT::v32i8, Legal);
1205 setOperationAction(ISD::UMIN, MVT::v16i16, Legal);
1206 setOperationAction(ISD::UMIN, MVT::v8i32, Legal);
1208 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1209 // when we have a 256bit-wide blend with immediate.
1210 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1212 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1213 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
1214 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
1215 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
1216 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
1217 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
1218 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
1220 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
1221 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
1222 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
1223 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
1224 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
1225 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
1227 setOperationAction(ISD::ADD, MVT::v4i64, Custom);
1228 setOperationAction(ISD::ADD, MVT::v8i32, Custom);
1229 setOperationAction(ISD::ADD, MVT::v16i16, Custom);
1230 setOperationAction(ISD::ADD, MVT::v32i8, Custom);
1232 setOperationAction(ISD::SUB, MVT::v4i64, Custom);
1233 setOperationAction(ISD::SUB, MVT::v8i32, Custom);
1234 setOperationAction(ISD::SUB, MVT::v16i16, Custom);
1235 setOperationAction(ISD::SUB, MVT::v32i8, Custom);
1237 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1238 setOperationAction(ISD::MUL, MVT::v8i32, Custom);
1239 setOperationAction(ISD::MUL, MVT::v16i16, Custom);
1240 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1242 setOperationAction(ISD::SMAX, MVT::v32i8, Custom);
1243 setOperationAction(ISD::SMAX, MVT::v16i16, Custom);
1244 setOperationAction(ISD::SMAX, MVT::v8i32, Custom);
1245 setOperationAction(ISD::UMAX, MVT::v32i8, Custom);
1246 setOperationAction(ISD::UMAX, MVT::v16i16, Custom);
1247 setOperationAction(ISD::UMAX, MVT::v8i32, Custom);
1248 setOperationAction(ISD::SMIN, MVT::v32i8, Custom);
1249 setOperationAction(ISD::SMIN, MVT::v16i16, Custom);
1250 setOperationAction(ISD::SMIN, MVT::v8i32, Custom);
1251 setOperationAction(ISD::UMIN, MVT::v32i8, Custom);
1252 setOperationAction(ISD::UMIN, MVT::v16i16, Custom);
1253 setOperationAction(ISD::UMIN, MVT::v8i32, Custom);
1256 // In the customized shift lowering, the legal cases in AVX2 will be
1258 setOperationAction(ISD::SRL, MVT::v4i64, Custom);
1259 setOperationAction(ISD::SRL, MVT::v8i32, Custom);
1261 setOperationAction(ISD::SHL, MVT::v4i64, Custom);
1262 setOperationAction(ISD::SHL, MVT::v8i32, Custom);
1264 setOperationAction(ISD::SRA, MVT::v4i64, Custom);
1265 setOperationAction(ISD::SRA, MVT::v8i32, Custom);
1267 // Custom lower several nodes for 256-bit types.
1268 for (MVT VT : MVT::vector_valuetypes()) {
1269 if (VT.getScalarSizeInBits() >= 32) {
1270 setOperationAction(ISD::MLOAD, VT, Legal);
1271 setOperationAction(ISD::MSTORE, VT, Legal);
1273 // Extract subvector is special because the value type
1274 // (result) is 128-bit but the source is 256-bit wide.
1275 if (VT.is128BitVector()) {
1276 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1278 // Do not attempt to custom lower other non-256-bit vectors
1279 if (!VT.is256BitVector())
1282 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1283 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1284 setOperationAction(ISD::VSELECT, VT, Custom);
1285 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1286 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1287 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1288 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1289 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1292 if (Subtarget->hasInt256())
1293 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1295 // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
1296 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1297 setOperationAction(ISD::AND, VT, Promote);
1298 AddPromotedToType (ISD::AND, VT, MVT::v4i64);
1299 setOperationAction(ISD::OR, VT, Promote);
1300 AddPromotedToType (ISD::OR, VT, MVT::v4i64);
1301 setOperationAction(ISD::XOR, VT, Promote);
1302 AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
1303 setOperationAction(ISD::LOAD, VT, Promote);
1304 AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
1305 setOperationAction(ISD::SELECT, VT, Promote);
1306 AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
1310 if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
1311 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1312 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1313 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1314 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1316 addRegisterClass(MVT::i1, &X86::VK1RegClass);
1317 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1318 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1320 for (MVT VT : MVT::fp_vector_valuetypes())
1321 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
1323 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
1324 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
1325 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
1326 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
1327 setLoadExtAction(ISD::ZEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
1328 setLoadExtAction(ISD::SEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
1329 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i8, Legal);
1330 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i8, Legal);
1331 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
1332 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
1333 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
1334 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
1336 setOperationAction(ISD::BR_CC, MVT::i1, Expand);
1337 setOperationAction(ISD::SETCC, MVT::i1, Custom);
1338 setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
1339 setOperationAction(ISD::XOR, MVT::i1, Legal);
1340 setOperationAction(ISD::OR, MVT::i1, Legal);
1341 setOperationAction(ISD::AND, MVT::i1, Legal);
1342 setOperationAction(ISD::SUB, MVT::i1, Custom);
1343 setOperationAction(ISD::ADD, MVT::i1, Custom);
1344 setOperationAction(ISD::MUL, MVT::i1, Custom);
1345 setOperationAction(ISD::LOAD, MVT::v16f32, Legal);
1346 setOperationAction(ISD::LOAD, MVT::v8f64, Legal);
1347 setOperationAction(ISD::LOAD, MVT::v8i64, Legal);
1348 setOperationAction(ISD::LOAD, MVT::v16i32, Legal);
1349 setOperationAction(ISD::LOAD, MVT::v16i1, Legal);
1351 setOperationAction(ISD::FADD, MVT::v16f32, Legal);
1352 setOperationAction(ISD::FSUB, MVT::v16f32, Legal);
1353 setOperationAction(ISD::FMUL, MVT::v16f32, Legal);
1354 setOperationAction(ISD::FDIV, MVT::v16f32, Legal);
1355 setOperationAction(ISD::FSQRT, MVT::v16f32, Legal);
1356 setOperationAction(ISD::FNEG, MVT::v16f32, Custom);
1357 setOperationAction(ISD::FABS, MVT::v16f32, Custom);
1359 setOperationAction(ISD::FADD, MVT::v8f64, Legal);
1360 setOperationAction(ISD::FSUB, MVT::v8f64, Legal);
1361 setOperationAction(ISD::FMUL, MVT::v8f64, Legal);
1362 setOperationAction(ISD::FDIV, MVT::v8f64, Legal);
1363 setOperationAction(ISD::FSQRT, MVT::v8f64, Legal);
1364 setOperationAction(ISD::FNEG, MVT::v8f64, Custom);
1365 setOperationAction(ISD::FABS, MVT::v8f64, Custom);
1366 setOperationAction(ISD::FMA, MVT::v8f64, Legal);
1367 setOperationAction(ISD::FMA, MVT::v16f32, Legal);
1369 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1370 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1371 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1372 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1373 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1374 setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
1375 setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
1376 setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote);
1377 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote);
1378 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1379 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1380 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1381 setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
1382 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom);
1383 setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
1384 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
1386 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1387 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1388 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1389 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1390 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1391 if (Subtarget->hasVLX()){
1392 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1393 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1394 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1395 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1396 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1398 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1399 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1400 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1401 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1402 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1404 setOperationAction(ISD::MLOAD, MVT::v8i32, Custom);
1405 setOperationAction(ISD::MLOAD, MVT::v8f32, Custom);
1406 setOperationAction(ISD::MSTORE, MVT::v8i32, Custom);
1407 setOperationAction(ISD::MSTORE, MVT::v8f32, Custom);
1409 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
1410 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1411 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1412 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom);
1413 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom);
1414 if (Subtarget->hasDQI()) {
1415 setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
1416 setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
1418 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1419 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1420 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1421 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1422 if (Subtarget->hasVLX()) {
1423 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal);
1424 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1425 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal);
1426 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1427 setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal);
1428 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1429 setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal);
1430 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1433 if (Subtarget->hasVLX()) {
1434 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1435 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1436 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1437 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1438 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1439 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1440 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
1441 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1443 setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
1444 setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
1445 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1446 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1447 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1448 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1449 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1450 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1451 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1452 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
1453 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
1454 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1455 if (Subtarget->hasDQI()) {
1456 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
1457 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);
1459 setOperationAction(ISD::FFLOOR, MVT::v16f32, Legal);
1460 setOperationAction(ISD::FFLOOR, MVT::v8f64, Legal);
1461 setOperationAction(ISD::FCEIL, MVT::v16f32, Legal);
1462 setOperationAction(ISD::FCEIL, MVT::v8f64, Legal);
1463 setOperationAction(ISD::FTRUNC, MVT::v16f32, Legal);
1464 setOperationAction(ISD::FTRUNC, MVT::v8f64, Legal);
1465 setOperationAction(ISD::FRINT, MVT::v16f32, Legal);
1466 setOperationAction(ISD::FRINT, MVT::v8f64, Legal);
1467 setOperationAction(ISD::FNEARBYINT, MVT::v16f32, Legal);
1468 setOperationAction(ISD::FNEARBYINT, MVT::v8f64, Legal);
1470 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1471 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1472 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1473 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1474 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
1476 setOperationAction(ISD::SETCC, MVT::v16i1, Custom);
1477 setOperationAction(ISD::SETCC, MVT::v8i1, Custom);
1479 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1481 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
1482 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
1483 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
1484 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom);
1485 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom);
1486 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
1487 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
1488 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1489 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1490 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1491 setOperationAction(ISD::SELECT, MVT::v16i1, Custom);
1492 setOperationAction(ISD::SELECT, MVT::v8i1, Custom);
1494 setOperationAction(ISD::SMAX, MVT::v16i32, Legal);
1495 setOperationAction(ISD::SMAX, MVT::v8i64, Legal);
1496 setOperationAction(ISD::UMAX, MVT::v16i32, Legal);
1497 setOperationAction(ISD::UMAX, MVT::v8i64, Legal);
1498 setOperationAction(ISD::SMIN, MVT::v16i32, Legal);
1499 setOperationAction(ISD::SMIN, MVT::v8i64, Legal);
1500 setOperationAction(ISD::UMIN, MVT::v16i32, Legal);
1501 setOperationAction(ISD::UMIN, MVT::v8i64, Legal);
1503 setOperationAction(ISD::ADD, MVT::v8i64, Legal);
1504 setOperationAction(ISD::ADD, MVT::v16i32, Legal);
1506 setOperationAction(ISD::SUB, MVT::v8i64, Legal);
1507 setOperationAction(ISD::SUB, MVT::v16i32, Legal);
1509 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1511 setOperationAction(ISD::SRL, MVT::v8i64, Custom);
1512 setOperationAction(ISD::SRL, MVT::v16i32, Custom);
1514 setOperationAction(ISD::SHL, MVT::v8i64, Custom);
1515 setOperationAction(ISD::SHL, MVT::v16i32, Custom);
1517 setOperationAction(ISD::SRA, MVT::v8i64, Custom);
1518 setOperationAction(ISD::SRA, MVT::v16i32, Custom);
1520 setOperationAction(ISD::AND, MVT::v8i64, Legal);
1521 setOperationAction(ISD::OR, MVT::v8i64, Legal);
1522 setOperationAction(ISD::XOR, MVT::v8i64, Legal);
1523 setOperationAction(ISD::AND, MVT::v16i32, Legal);
1524 setOperationAction(ISD::OR, MVT::v16i32, Legal);
1525 setOperationAction(ISD::XOR, MVT::v16i32, Legal);
1527 if (Subtarget->hasCDI()) {
1528 setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
1529 setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
1530 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i64, Expand);
1531 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i32, Expand);
1533 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1534 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1535 setOperationAction(ISD::CTLZ, MVT::v16i16, Custom);
1536 setOperationAction(ISD::CTLZ, MVT::v32i8, Custom);
1537 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i16, Expand);
1538 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i8, Expand);
1539 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i16, Expand);
1540 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i8, Expand);
1542 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i64, Custom);
1543 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i32, Custom);
1545 if (Subtarget->hasVLX()) {
1546 setOperationAction(ISD::CTLZ, MVT::v4i64, Legal);
1547 setOperationAction(ISD::CTLZ, MVT::v8i32, Legal);
1548 setOperationAction(ISD::CTLZ, MVT::v2i64, Legal);
1549 setOperationAction(ISD::CTLZ, MVT::v4i32, Legal);
1550 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Expand);
1551 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Expand);
1552 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Expand);
1553 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Expand);
1555 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
1556 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
1557 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
1558 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
1560 setOperationAction(ISD::CTLZ, MVT::v4i64, Custom);
1561 setOperationAction(ISD::CTLZ, MVT::v8i32, Custom);
1562 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1563 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1564 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Expand);
1565 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Expand);
1566 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Expand);
1567 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Expand);
1569 } // Subtarget->hasCDI()
1571 if (Subtarget->hasDQI()) {
1572 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
1573 setOperationAction(ISD::MUL, MVT::v4i64, Legal);
1574 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1576 // Custom lower several nodes.
1577 for (MVT VT : MVT::vector_valuetypes()) {
1578 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
1580 setOperationAction(ISD::AND, VT, Legal);
1581 setOperationAction(ISD::OR, VT, Legal);
1582 setOperationAction(ISD::XOR, VT, Legal);
1584 if ((VT.is128BitVector() || VT.is256BitVector()) && EltSize >= 32) {
1585 setOperationAction(ISD::MGATHER, VT, Custom);
1586 setOperationAction(ISD::MSCATTER, VT, Custom);
1588 // Extract subvector is special because the value type
1589 // (result) is 256/128-bit but the source is 512-bit wide.
1590 if (VT.is128BitVector() || VT.is256BitVector()) {
1591 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1593 if (VT.getVectorElementType() == MVT::i1)
1594 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1596 // Do not attempt to custom lower other non-512-bit vectors
1597 if (!VT.is512BitVector())
1600 if (EltSize >= 32) {
1601 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1602 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1603 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1604 setOperationAction(ISD::VSELECT, VT, Legal);
1605 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1606 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1607 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1608 setOperationAction(ISD::MLOAD, VT, Legal);
1609 setOperationAction(ISD::MSTORE, VT, Legal);
1610 setOperationAction(ISD::MGATHER, VT, Legal);
1611 setOperationAction(ISD::MSCATTER, VT, Custom);
1614 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
1615 setOperationAction(ISD::SELECT, VT, Promote);
1616 AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
1620 if (!Subtarget->useSoftFloat() && Subtarget->hasBWI()) {
1621 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1622 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1624 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1625 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1627 setOperationAction(ISD::LOAD, MVT::v32i16, Legal);
1628 setOperationAction(ISD::LOAD, MVT::v64i8, Legal);
1629 setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
1630 setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
1631 setOperationAction(ISD::ADD, MVT::v32i16, Legal);
1632 setOperationAction(ISD::ADD, MVT::v64i8, Legal);
1633 setOperationAction(ISD::SUB, MVT::v32i16, Legal);
1634 setOperationAction(ISD::SUB, MVT::v64i8, Legal);
1635 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1636 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1637 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1638 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1639 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1640 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1641 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1642 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1643 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1644 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Custom);
1645 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Custom);
1646 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1647 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1648 setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
1649 setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
1650 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1651 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1652 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1653 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1654 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1655 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1656 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1657 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1658 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
1659 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
1660 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1661 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1662 setOperationAction(ISD::VSELECT, MVT::v32i16, Legal);
1663 setOperationAction(ISD::VSELECT, MVT::v64i8, Legal);
1664 setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
1665 setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
1666 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1667 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i1, Custom);
1668 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i1, Custom);
1670 setOperationAction(ISD::SMAX, MVT::v64i8, Legal);
1671 setOperationAction(ISD::SMAX, MVT::v32i16, Legal);
1672 setOperationAction(ISD::UMAX, MVT::v64i8, Legal);
1673 setOperationAction(ISD::UMAX, MVT::v32i16, Legal);
1674 setOperationAction(ISD::SMIN, MVT::v64i8, Legal);
1675 setOperationAction(ISD::SMIN, MVT::v32i16, Legal);
1676 setOperationAction(ISD::UMIN, MVT::v64i8, Legal);
1677 setOperationAction(ISD::UMIN, MVT::v32i16, Legal);
1679 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1680 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1681 if (Subtarget->hasVLX())
1682 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1684 if (Subtarget->hasCDI()) {
1685 setOperationAction(ISD::CTLZ, MVT::v32i16, Custom);
1686 setOperationAction(ISD::CTLZ, MVT::v64i8, Custom);
1687 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i16, Expand);
1688 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v64i8, Expand);
1691 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1692 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1693 setOperationAction(ISD::VSELECT, VT, Legal);
1694 setOperationAction(ISD::SRL, VT, Custom);
1695 setOperationAction(ISD::SHL, VT, Custom);
1696 setOperationAction(ISD::SRA, VT, Custom);
1698 setOperationAction(ISD::AND, VT, Promote);
1699 AddPromotedToType (ISD::AND, VT, MVT::v8i64);
1700 setOperationAction(ISD::OR, VT, Promote);
1701 AddPromotedToType (ISD::OR, VT, MVT::v8i64);
1702 setOperationAction(ISD::XOR, VT, Promote);
1703 AddPromotedToType (ISD::XOR, VT, MVT::v8i64);
1707 if (!Subtarget->useSoftFloat() && Subtarget->hasVLX()) {
1708 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1709 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1711 setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
1712 setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
1713 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
1714 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1715 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
1716 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
1717 setOperationAction(ISD::SELECT, MVT::v4i1, Custom);
1718 setOperationAction(ISD::SELECT, MVT::v2i1, Custom);
1719 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
1720 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i1, Custom);
1721 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i1, Custom);
1722 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i1, Custom);
1724 setOperationAction(ISD::AND, MVT::v8i32, Legal);
1725 setOperationAction(ISD::OR, MVT::v8i32, Legal);
1726 setOperationAction(ISD::XOR, MVT::v8i32, Legal);
1727 setOperationAction(ISD::AND, MVT::v4i32, Legal);
1728 setOperationAction(ISD::OR, MVT::v4i32, Legal);
1729 setOperationAction(ISD::XOR, MVT::v4i32, Legal);
1730 setOperationAction(ISD::SRA, MVT::v2i64, Custom);
1731 setOperationAction(ISD::SRA, MVT::v4i64, Custom);
1733 setOperationAction(ISD::SMAX, MVT::v2i64, Legal);
1734 setOperationAction(ISD::SMAX, MVT::v4i64, Legal);
1735 setOperationAction(ISD::UMAX, MVT::v2i64, Legal);
1736 setOperationAction(ISD::UMAX, MVT::v4i64, Legal);
1737 setOperationAction(ISD::SMIN, MVT::v2i64, Legal);
1738 setOperationAction(ISD::SMIN, MVT::v4i64, Legal);
1739 setOperationAction(ISD::UMIN, MVT::v2i64, Legal);
1740 setOperationAction(ISD::UMIN, MVT::v4i64, Legal);
1743 // We want to custom lower some of our intrinsics.
1744 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1745 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1746 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1747 if (!Subtarget->is64Bit()) {
1748 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1749 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1752 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1753 // handle type legalization for these operations here.
1755 // FIXME: We really should do custom legalization for addition and
1756 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1757 // than generic legalization for 64-bit multiplication-with-overflow, though.
1758 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1759 if (VT == MVT::i64 && !Subtarget->is64Bit())
1761 // Add/Sub/Mul with overflow operations are custom lowered.
1762 setOperationAction(ISD::SADDO, VT, Custom);
1763 setOperationAction(ISD::UADDO, VT, Custom);
1764 setOperationAction(ISD::SSUBO, VT, Custom);
1765 setOperationAction(ISD::USUBO, VT, Custom);
1766 setOperationAction(ISD::SMULO, VT, Custom);
1767 setOperationAction(ISD::UMULO, VT, Custom);
1770 if (!Subtarget->is64Bit()) {
1771 // These libcalls are not available in 32-bit.
1772 setLibcallName(RTLIB::SHL_I128, nullptr);
1773 setLibcallName(RTLIB::SRL_I128, nullptr);
1774 setLibcallName(RTLIB::SRA_I128, nullptr);
1777 // Combine sin / cos into one node or libcall if possible.
1778 if (Subtarget->hasSinCos()) {
1779 setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1780 setLibcallName(RTLIB::SINCOS_F64, "sincos");
1781 if (Subtarget->isTargetDarwin()) {
1782 // For MacOSX, we don't want the normal expansion of a libcall to sincos.
1783 // We want to issue a libcall to __sincos_stret to avoid memory traffic.
1784 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1785 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1789 if (Subtarget->isTargetWin64()) {
1790 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1791 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1792 setOperationAction(ISD::SREM, MVT::i128, Custom);
1793 setOperationAction(ISD::UREM, MVT::i128, Custom);
1794 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1795 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1798 // We have target-specific dag combine patterns for the following nodes:
1799 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1800 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1801 setTargetDAGCombine(ISD::BITCAST);
1802 setTargetDAGCombine(ISD::VSELECT);
1803 setTargetDAGCombine(ISD::SELECT);
1804 setTargetDAGCombine(ISD::SHL);
1805 setTargetDAGCombine(ISD::SRA);
1806 setTargetDAGCombine(ISD::SRL);
1807 setTargetDAGCombine(ISD::OR);
1808 setTargetDAGCombine(ISD::AND);
1809 setTargetDAGCombine(ISD::ADD);
1810 setTargetDAGCombine(ISD::FADD);
1811 setTargetDAGCombine(ISD::FSUB);
1812 setTargetDAGCombine(ISD::FNEG);
1813 setTargetDAGCombine(ISD::FMA);
1814 setTargetDAGCombine(ISD::FMINNUM);
1815 setTargetDAGCombine(ISD::FMAXNUM);
1816 setTargetDAGCombine(ISD::SUB);
1817 setTargetDAGCombine(ISD::LOAD);
1818 setTargetDAGCombine(ISD::MLOAD);
1819 setTargetDAGCombine(ISD::STORE);
1820 setTargetDAGCombine(ISD::MSTORE);
1821 setTargetDAGCombine(ISD::TRUNCATE);
1822 setTargetDAGCombine(ISD::ZERO_EXTEND);
1823 setTargetDAGCombine(ISD::ANY_EXTEND);
1824 setTargetDAGCombine(ISD::SIGN_EXTEND);
1825 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1826 setTargetDAGCombine(ISD::SINT_TO_FP);
1827 setTargetDAGCombine(ISD::UINT_TO_FP);
1828 setTargetDAGCombine(ISD::SETCC);
1829 setTargetDAGCombine(ISD::BUILD_VECTOR);
1830 setTargetDAGCombine(ISD::MUL);
1831 setTargetDAGCombine(ISD::XOR);
1832 setTargetDAGCombine(ISD::MSCATTER);
1833 setTargetDAGCombine(ISD::MGATHER);
1835 computeRegisterProperties(Subtarget->getRegisterInfo());
1837 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1838 MaxStoresPerMemsetOptSize = 8;
1839 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1840 MaxStoresPerMemcpyOptSize = 4;
1841 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1842 MaxStoresPerMemmoveOptSize = 4;
1843 setPrefLoopAlignment(4); // 2^4 bytes.
1845 // A predictable cmov does not hurt on an in-order CPU.
1846 // FIXME: Use a CPU attribute to trigger this, not a CPU model.
1847 PredictableSelectIsExpensive = !Subtarget->isAtom();
1848 EnableExtLdPromotion = true;
1849 setPrefFunctionAlignment(4); // 2^4 bytes.
1851 verifyIntrinsicTables();
1854 // This has so far only been implemented for 64-bit MachO.
1855 bool X86TargetLowering::useLoadStackGuardNode() const {
1856 return Subtarget->isTargetMachO() && Subtarget->is64Bit();
1859 TargetLoweringBase::LegalizeTypeAction
1860 X86TargetLowering::getPreferredVectorAction(EVT VT) const {
1861 if (ExperimentalVectorWideningLegalization &&
1862 VT.getVectorNumElements() != 1 &&
1863 VT.getVectorElementType().getSimpleVT() != MVT::i1)
1864 return TypeWidenVector;
1866 return TargetLoweringBase::getPreferredVectorAction(VT);
1869 EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1872 return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
1874 if (VT.isSimple()) {
1875 MVT VVT = VT.getSimpleVT();
1876 const unsigned NumElts = VVT.getVectorNumElements();
1877 const MVT EltVT = VVT.getVectorElementType();
1878 if (VVT.is512BitVector()) {
1879 if (Subtarget->hasAVX512())
1880 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1881 EltVT == MVT::f32 || EltVT == MVT::f64)
1883 case 8: return MVT::v8i1;
1884 case 16: return MVT::v16i1;
1886 if (Subtarget->hasBWI())
1887 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1889 case 32: return MVT::v32i1;
1890 case 64: return MVT::v64i1;
1894 if (VVT.is256BitVector() || VVT.is128BitVector()) {
1895 if (Subtarget->hasVLX())
1896 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1897 EltVT == MVT::f32 || EltVT == MVT::f64)
1899 case 2: return MVT::v2i1;
1900 case 4: return MVT::v4i1;
1901 case 8: return MVT::v8i1;
1903 if (Subtarget->hasBWI() && Subtarget->hasVLX())
1904 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1906 case 8: return MVT::v8i1;
1907 case 16: return MVT::v16i1;
1908 case 32: return MVT::v32i1;
1913 return VT.changeVectorElementTypeToInteger();
1916 /// Helper for getByValTypeAlignment to determine
1917 /// the desired ByVal argument alignment.
1918 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
1921 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1922 if (VTy->getBitWidth() == 128)
1924 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1925 unsigned EltAlign = 0;
1926 getMaxByValAlign(ATy->getElementType(), EltAlign);
1927 if (EltAlign > MaxAlign)
1928 MaxAlign = EltAlign;
1929 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1930 for (auto *EltTy : STy->elements()) {
1931 unsigned EltAlign = 0;
1932 getMaxByValAlign(EltTy, EltAlign);
1933 if (EltAlign > MaxAlign)
1934 MaxAlign = EltAlign;
1941 /// Return the desired alignment for ByVal aggregate
1942 /// function arguments in the caller parameter area. For X86, aggregates
1943 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
1944 /// are at 4-byte boundaries.
1945 unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
1946 const DataLayout &DL) const {
1947 if (Subtarget->is64Bit()) {
1948 // Max of 8 and alignment of type.
1949 unsigned TyAlign = DL.getABITypeAlignment(Ty);
1956 if (Subtarget->hasSSE1())
1957 getMaxByValAlign(Ty, Align);
1961 /// Returns the target specific optimal type for load
1962 /// and store operations as a result of memset, memcpy, and memmove
1963 /// lowering. If DstAlign is zero that means it's safe to destination
1964 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
1965 /// means there isn't a need to check it against alignment requirement,
1966 /// probably because the source does not need to be loaded. If 'IsMemset' is
1967 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
1968 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
1969 /// source is constant so it does not need to be loaded.
1970 /// It returns EVT::Other if the type should be determined using generic
1971 /// target-independent logic.
1973 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
1974 unsigned DstAlign, unsigned SrcAlign,
1975 bool IsMemset, bool ZeroMemset,
1977 MachineFunction &MF) const {
1978 const Function *F = MF.getFunction();
1979 if ((!IsMemset || ZeroMemset) &&
1980 !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
1982 (!Subtarget->isUnalignedMem16Slow() ||
1983 ((DstAlign == 0 || DstAlign >= 16) &&
1984 (SrcAlign == 0 || SrcAlign >= 16)))) {
1986 // FIXME: Check if unaligned 32-byte accesses are slow.
1987 if (Subtarget->hasInt256())
1989 if (Subtarget->hasFp256())
1992 if (Subtarget->hasSSE2())
1994 if (Subtarget->hasSSE1())
1996 } else if (!MemcpyStrSrc && Size >= 8 &&
1997 !Subtarget->is64Bit() &&
1998 Subtarget->hasSSE2()) {
1999 // Do not use f64 to lower memcpy if source is string constant. It's
2000 // better to use i32 to avoid the loads.
2004 // This is a compromise. If we reach here, unaligned accesses may be slow on
2005 // this target. However, creating smaller, aligned accesses could be even
2006 // slower and would certainly be a lot more code.
2007 if (Subtarget->is64Bit() && Size >= 8)
2012 bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2014 return X86ScalarSSEf32;
2015 else if (VT == MVT::f64)
2016 return X86ScalarSSEf64;
2021 X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
2026 switch (VT.getSizeInBits()) {
2028 // 8-byte and under are always assumed to be fast.
2032 *Fast = !Subtarget->isUnalignedMem16Slow();
2035 *Fast = !Subtarget->isUnalignedMem32Slow();
2037 // TODO: What about AVX-512 (512-bit) accesses?
2040 // Misaligned accesses of any size are always allowed.
2044 /// Return the entry encoding for a jump table in the
2045 /// current function. The returned value is a member of the
2046 /// MachineJumpTableInfo::JTEntryKind enum.
2047 unsigned X86TargetLowering::getJumpTableEncoding() const {
2048 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2050 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
2051 Subtarget->isPICStyleGOT())
2052 return MachineJumpTableInfo::EK_Custom32;
2054 // Otherwise, use the normal jump table encoding heuristics.
2055 return TargetLowering::getJumpTableEncoding();
2058 bool X86TargetLowering::useSoftFloat() const {
2059 return Subtarget->useSoftFloat();
2063 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2064 const MachineBasicBlock *MBB,
2065 unsigned uid,MCContext &Ctx) const{
2066 assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
2067 Subtarget->isPICStyleGOT());
2068 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2070 return MCSymbolRefExpr::create(MBB->getSymbol(),
2071 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2074 /// Returns relocation base for the given PIC jumptable.
2075 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2076 SelectionDAG &DAG) const {
2077 if (!Subtarget->is64Bit())
2078 // This doesn't have SDLoc associated with it, but is not really the
2079 // same as a Register.
2080 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2081 getPointerTy(DAG.getDataLayout()));
2085 /// This returns the relocation base for the given PIC jumptable,
2086 /// the same as getPICJumpTableRelocBase, but as an MCExpr.
2087 const MCExpr *X86TargetLowering::
2088 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2089 MCContext &Ctx) const {
2090 // X86-64 uses RIP relative addressing based on the jump table label.
2091 if (Subtarget->isPICStyleRIPRel())
2092 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2094 // Otherwise, the reference is relative to the PIC base.
2095 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2098 std::pair<const TargetRegisterClass *, uint8_t>
2099 X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2101 const TargetRegisterClass *RRC = nullptr;
2103 switch (VT.SimpleTy) {
2105 return TargetLowering::findRepresentativeClass(TRI, VT);
2106 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2107 RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2110 RRC = &X86::VR64RegClass;
2112 case MVT::f32: case MVT::f64:
2113 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2114 case MVT::v4f32: case MVT::v2f64:
2115 case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
2117 RRC = &X86::VR128RegClass;
2120 return std::make_pair(RRC, Cost);
2123 bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
2124 unsigned &Offset) const {
2125 if (!Subtarget->isTargetLinux())
2128 if (Subtarget->is64Bit()) {
2129 // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
2131 if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
2143 Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2144 if (!Subtarget->isTargetAndroid())
2145 return TargetLowering::getSafeStackPointerLocation(IRB);
2147 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2148 // definition of TLS_SLOT_SAFESTACK in
2149 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2150 unsigned AddressSpace, Offset;
2151 if (Subtarget->is64Bit()) {
2152 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2154 if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
2164 return ConstantExpr::getIntToPtr(
2165 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2166 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2169 bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2170 unsigned DestAS) const {
2171 assert(SrcAS != DestAS && "Expected different address spaces!");
2173 return SrcAS < 256 && DestAS < 256;
2176 //===----------------------------------------------------------------------===//
2177 // Return Value Calling Convention Implementation
2178 //===----------------------------------------------------------------------===//
2180 #include "X86GenCallingConv.inc"
2182 bool X86TargetLowering::CanLowerReturn(
2183 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2184 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2185 SmallVector<CCValAssign, 16> RVLocs;
2186 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2187 return CCInfo.CheckReturn(Outs, RetCC_X86);
2190 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2191 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2196 X86TargetLowering::LowerReturn(SDValue Chain,
2197 CallingConv::ID CallConv, bool isVarArg,
2198 const SmallVectorImpl<ISD::OutputArg> &Outs,
2199 const SmallVectorImpl<SDValue> &OutVals,
2200 SDLoc dl, SelectionDAG &DAG) const {
2201 MachineFunction &MF = DAG.getMachineFunction();
2202 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2204 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2205 report_fatal_error("X86 interrupts may not return any value");
2207 SmallVector<CCValAssign, 16> RVLocs;
2208 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2209 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2212 SmallVector<SDValue, 6> RetOps;
2213 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2214 // Operand #1 = Bytes To Pop
2215 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2218 // Copy the result values into the output registers.
2219 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2220 CCValAssign &VA = RVLocs[i];
2221 assert(VA.isRegLoc() && "Can only return in registers!");
2222 SDValue ValToCopy = OutVals[i];
2223 EVT ValVT = ValToCopy.getValueType();
2225 // Promote values to the appropriate types.
2226 if (VA.getLocInfo() == CCValAssign::SExt)
2227 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2228 else if (VA.getLocInfo() == CCValAssign::ZExt)
2229 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2230 else if (VA.getLocInfo() == CCValAssign::AExt) {
2231 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2232 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2234 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2236 else if (VA.getLocInfo() == CCValAssign::BCvt)
2237 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2239 assert(VA.getLocInfo() != CCValAssign::FPExt &&
2240 "Unexpected FP-extend for return value.");
2242 // If this is x86-64, and we disabled SSE, we can't return FP values,
2243 // or SSE or MMX vectors.
2244 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2245 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2246 (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
2247 report_fatal_error("SSE register return with SSE disabled");
2249 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2250 // llvm-gcc has never done it right and no one has noticed, so this
2251 // should be OK for now.
2252 if (ValVT == MVT::f64 &&
2253 (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
2254 report_fatal_error("SSE2 register return with SSE2 disabled");
2256 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2257 // the RET instruction and handled by the FP Stackifier.
2258 if (VA.getLocReg() == X86::FP0 ||
2259 VA.getLocReg() == X86::FP1) {
2260 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2261 // change the value to the FP stack register class.
2262 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2263 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2264 RetOps.push_back(ValToCopy);
2265 // Don't emit a copytoreg.
2269 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2270 // which is returned in RAX / RDX.
2271 if (Subtarget->is64Bit()) {
2272 if (ValVT == MVT::x86mmx) {
2273 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2274 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2275 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2277 // If we don't have SSE2 available, convert to v4f32 so the generated
2278 // register is legal.
2279 if (!Subtarget->hasSSE2())
2280 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2285 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
2286 Flag = Chain.getValue(1);
2287 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2290 // All x86 ABIs require that for returning structs by value we copy
2291 // the sret argument into %rax/%eax (depending on ABI) for the return.
2292 // We saved the argument into a virtual register in the entry block,
2293 // so now we copy the value out and into %rax/%eax.
2295 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2296 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2297 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2298 // either case FuncInfo->setSRetReturnReg() will have been called.
2299 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2300 SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg,
2301 getPointerTy(MF.getDataLayout()));
2304 = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
2305 X86::RAX : X86::EAX;
2306 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2307 Flag = Chain.getValue(1);
2309 // RAX/EAX now acts like a return value.
2311 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2314 const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
2315 const MCPhysReg *I =
2316 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2319 if (X86::GR64RegClass.contains(*I))
2320 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2322 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
2326 RetOps[0] = Chain; // Update chain.
2328 // Add the flag if we have it.
2330 RetOps.push_back(Flag);
2332 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2333 if (CallConv == CallingConv::X86_INTR)
2334 opcode = X86ISD::IRET;
2335 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2338 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2339 if (N->getNumValues() != 1)
2341 if (!N->hasNUsesOfValue(1, 0))
2344 SDValue TCChain = Chain;
2345 SDNode *Copy = *N->use_begin();
2346 if (Copy->getOpcode() == ISD::CopyToReg) {
2347 // If the copy has a glue operand, we conservatively assume it isn't safe to
2348 // perform a tail call.
2349 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2351 TCChain = Copy->getOperand(0);
2352 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2355 bool HasRet = false;
2356 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2358 if (UI->getOpcode() != X86ISD::RET_FLAG)
2360 // If we are returning more than one value, we can definitely
2361 // not make a tail call see PR19530
2362 if (UI->getNumOperands() > 4)
2364 if (UI->getNumOperands() == 4 &&
2365 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2378 X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
2379 ISD::NodeType ExtendKind) const {
2381 // TODO: Is this also valid on 32-bit?
2382 if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
2383 ReturnMVT = MVT::i8;
2385 ReturnMVT = MVT::i32;
2387 EVT MinVT = getRegisterType(Context, ReturnMVT);
2388 return VT.bitsLT(MinVT) ? MinVT : VT;
2391 /// Lower the result values of a call into the
2392 /// appropriate copies out of appropriate physical registers.
2395 X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
2396 CallingConv::ID CallConv, bool isVarArg,
2397 const SmallVectorImpl<ISD::InputArg> &Ins,
2398 SDLoc dl, SelectionDAG &DAG,
2399 SmallVectorImpl<SDValue> &InVals) const {
2401 // Assign locations to each value returned by this call.
2402 SmallVector<CCValAssign, 16> RVLocs;
2403 bool Is64Bit = Subtarget->is64Bit();
2404 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2406 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2408 // Copy all of the result registers out of their specified physreg.
2409 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
2410 CCValAssign &VA = RVLocs[i];
2411 EVT CopyVT = VA.getLocVT();
2413 // If this is x86-64, and we disabled SSE, we can't return FP values
2414 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
2415 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
2416 report_fatal_error("SSE register return with SSE disabled");
2419 // If we prefer to use the value in xmm registers, copy it out as f80 and
2420 // use a truncate to move it from fp stack reg to xmm reg.
2421 bool RoundAfterCopy = false;
2422 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2423 isScalarFPTypeInSSEReg(VA.getValVT())) {
2425 RoundAfterCopy = (CopyVT != VA.getLocVT());
2428 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
2429 CopyVT, InFlag).getValue(1);
2430 SDValue Val = Chain.getValue(0);
2433 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2434 // This truncation won't change the value.
2435 DAG.getIntPtrConstant(1, dl));
2437 if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1)
2438 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2440 InFlag = Chain.getValue(2);
2441 InVals.push_back(Val);
2447 //===----------------------------------------------------------------------===//
2448 // C & StdCall & Fast Calling Convention implementation
2449 //===----------------------------------------------------------------------===//
2450 // StdCall calling convention seems to be standard for many Windows' API
2451 // routines and around. It differs from C calling convention just a little:
2452 // callee should clean up the stack, not caller. Symbols should be also
2453 // decorated in some fancy way :) It doesn't support any vector arguments.
2454 // For info on fast calling convention see Fast Calling Convention (tail call)
2455 // implementation LowerX86_32FastCCCallTo.
2457 /// CallIsStructReturn - Determines whether a call uses struct return
2459 enum StructReturnType {
2464 static StructReturnType
2465 callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
2467 return NotStructReturn;
2469 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2470 if (!Flags.isSRet())
2471 return NotStructReturn;
2472 if (Flags.isInReg() || IsMCU)
2473 return RegStructReturn;
2474 return StackStructReturn;
2477 /// Determines whether a function uses struct return semantics.
2478 static StructReturnType
2479 argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
2481 return NotStructReturn;
2483 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2484 if (!Flags.isSRet())
2485 return NotStructReturn;
2486 if (Flags.isInReg() || IsMCU)
2487 return RegStructReturn;
2488 return StackStructReturn;
2491 /// Make a copy of an aggregate at address specified by "Src" to address
2492 /// "Dst" with size and alignment information specified by the specific
2493 /// parameter attribute. The copy will be passed as a byval function parameter.
2495 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
2496 ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
2498 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2500 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2501 /*isVolatile*/false, /*AlwaysInline=*/true,
2502 /*isTailCall*/false,
2503 MachinePointerInfo(), MachinePointerInfo());
2506 /// Return true if the calling convention is one that we can guarantee TCO for.
2507 static bool canGuaranteeTCO(CallingConv::ID CC) {
2508 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2509 CC == CallingConv::HiPE || CC == CallingConv::HHVM);
2512 /// Return true if we might ever do TCO for calls with this calling convention.
2513 static bool mayTailCallThisCC(CallingConv::ID CC) {
2515 // C calling conventions:
2516 case CallingConv::C:
2517 case CallingConv::X86_64_Win64:
2518 case CallingConv::X86_64_SysV:
2519 // Callee pop conventions:
2520 case CallingConv::X86_ThisCall:
2521 case CallingConv::X86_StdCall:
2522 case CallingConv::X86_VectorCall:
2523 case CallingConv::X86_FastCall:
2526 return canGuaranteeTCO(CC);
2530 /// Return true if the function is being made into a tailcall target by
2531 /// changing its ABI.
2532 static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
2533 return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
2536 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
2538 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2539 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2543 CallingConv::ID CalleeCC = CS.getCallingConv();
2544 if (!mayTailCallThisCC(CalleeCC))
2551 X86TargetLowering::LowerMemArgument(SDValue Chain,
2552 CallingConv::ID CallConv,
2553 const SmallVectorImpl<ISD::InputArg> &Ins,
2554 SDLoc dl, SelectionDAG &DAG,
2555 const CCValAssign &VA,
2556 MachineFrameInfo *MFI,
2558 // Create the nodes corresponding to a load from this parameter slot.
2559 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2560 bool AlwaysUseMutable = shouldGuaranteeTCO(
2561 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2562 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2565 // If value is passed by pointer we have address passed instead of the value
2567 bool ExtendedInMem = VA.isExtInLoc() &&
2568 VA.getValVT().getScalarType() == MVT::i1;
2570 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
2571 ValVT = VA.getLocVT();
2573 ValVT = VA.getValVT();
2575 // Calculate SP offset of interrupt parameter, re-arrange the slot normally
2576 // taken by a return address.
2578 if (CallConv == CallingConv::X86_INTR) {
2579 const X86Subtarget& Subtarget =
2580 static_cast<const X86Subtarget&>(DAG.getSubtarget());
2581 // X86 interrupts may take one or two arguments.
2582 // On the stack there will be no return address as in regular call.
2583 // Offset of last argument need to be set to -4/-8 bytes.
2584 // Where offset of the first argument out of two, should be set to 0 bytes.
2585 Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
2588 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2589 // changed with more analysis.
2590 // In case of tail call optimization mark all arguments mutable. Since they
2591 // could be overwritten by lowering of arguments in case of a tail call.
2592 if (Flags.isByVal()) {
2593 unsigned Bytes = Flags.getByValSize();
2594 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2595 int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
2596 // Adjust SP offset of interrupt parameter.
2597 if (CallConv == CallingConv::X86_INTR) {
2598 MFI->setObjectOffset(FI, Offset);
2600 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2602 int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
2603 VA.getLocMemOffset(), isImmutable);
2604 // Adjust SP offset of interrupt parameter.
2605 if (CallConv == CallingConv::X86_INTR) {
2606 MFI->setObjectOffset(FI, Offset);
2609 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2610 SDValue Val = DAG.getLoad(
2611 ValVT, dl, Chain, FIN,
2612 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
2614 return ExtendedInMem ?
2615 DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val;
2619 // FIXME: Get this from tablegen.
2620 static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
2621 const X86Subtarget *Subtarget) {
2622 assert(Subtarget->is64Bit());
2624 if (Subtarget->isCallingConvWin64(CallConv)) {
2625 static const MCPhysReg GPR64ArgRegsWin64[] = {
2626 X86::RCX, X86::RDX, X86::R8, X86::R9
2628 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
2631 static const MCPhysReg GPR64ArgRegs64Bit[] = {
2632 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
2634 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
2637 // FIXME: Get this from tablegen.
2638 static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
2639 CallingConv::ID CallConv,
2640 const X86Subtarget *Subtarget) {
2641 assert(Subtarget->is64Bit());
2642 if (Subtarget->isCallingConvWin64(CallConv)) {
2643 // The XMM registers which might contain var arg parameters are shadowed
2644 // in their paired GPR. So we only need to save the GPR to their home
2646 // TODO: __vectorcall will change this.
2650 const Function *Fn = MF.getFunction();
2651 bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
2652 bool isSoftFloat = Subtarget->useSoftFloat();
2653 assert(!(isSoftFloat && NoImplicitFloatOps) &&
2654 "SSE register cannot be used when SSE is disabled!");
2655 if (isSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
2656 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
2660 static const MCPhysReg XMMArgRegs64Bit[] = {
2661 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2662 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2664 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
2667 SDValue X86TargetLowering::LowerFormalArguments(
2668 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2669 const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
2670 SmallVectorImpl<SDValue> &InVals) const {
2671 MachineFunction &MF = DAG.getMachineFunction();
2672 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2673 const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
2675 const Function* Fn = MF.getFunction();
2676 if (Fn->hasExternalLinkage() &&
2677 Subtarget->isTargetCygMing() &&
2678 Fn->getName() == "main")
2679 FuncInfo->setForceFramePointer(true);
2681 MachineFrameInfo *MFI = MF.getFrameInfo();
2682 bool Is64Bit = Subtarget->is64Bit();
2683 bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
2685 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2686 "Var args not supported with calling convention fastcc, ghc or hipe");
2688 if (CallConv == CallingConv::X86_INTR) {
2689 bool isLegal = Ins.size() == 1 ||
2690 (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
2691 (!Is64Bit && Ins[1].VT == MVT::i32)));
2693 report_fatal_error("X86 interrupts may take one or two arguments");
2696 // Assign locations to all of the incoming arguments.
2697 SmallVector<CCValAssign, 16> ArgLocs;
2698 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2700 // Allocate shadow area for Win64
2702 CCInfo.AllocateStack(32, 8);
2704 CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
2706 unsigned LastVal = ~0U;
2708 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2709 CCValAssign &VA = ArgLocs[i];
2710 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
2712 assert(VA.getValNo() != LastVal &&
2713 "Don't support value assigned to multiple locs yet");
2715 LastVal = VA.getValNo();
2717 if (VA.isRegLoc()) {
2718 EVT RegVT = VA.getLocVT();
2719 const TargetRegisterClass *RC;
2720 if (RegVT == MVT::i32)
2721 RC = &X86::GR32RegClass;
2722 else if (Is64Bit && RegVT == MVT::i64)
2723 RC = &X86::GR64RegClass;
2724 else if (RegVT == MVT::f32)
2725 RC = &X86::FR32RegClass;
2726 else if (RegVT == MVT::f64)
2727 RC = &X86::FR64RegClass;
2728 else if (RegVT == MVT::f128)
2729 RC = &X86::FR128RegClass;
2730 else if (RegVT.is512BitVector())
2731 RC = &X86::VR512RegClass;
2732 else if (RegVT.is256BitVector())
2733 RC = &X86::VR256RegClass;
2734 else if (RegVT.is128BitVector())
2735 RC = &X86::VR128RegClass;
2736 else if (RegVT == MVT::x86mmx)
2737 RC = &X86::VR64RegClass;
2738 else if (RegVT == MVT::i1)
2739 RC = &X86::VK1RegClass;
2740 else if (RegVT == MVT::v8i1)
2741 RC = &X86::VK8RegClass;
2742 else if (RegVT == MVT::v16i1)
2743 RC = &X86::VK16RegClass;
2744 else if (RegVT == MVT::v32i1)
2745 RC = &X86::VK32RegClass;
2746 else if (RegVT == MVT::v64i1)
2747 RC = &X86::VK64RegClass;
2749 llvm_unreachable("Unknown argument type!");
2751 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2752 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2754 // If this is an 8 or 16-bit value, it is really passed promoted to 32
2755 // bits. Insert an assert[sz]ext to capture this, then truncate to the
2757 if (VA.getLocInfo() == CCValAssign::SExt)
2758 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2759 DAG.getValueType(VA.getValVT()));
2760 else if (VA.getLocInfo() == CCValAssign::ZExt)
2761 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
2762 DAG.getValueType(VA.getValVT()));
2763 else if (VA.getLocInfo() == CCValAssign::BCvt)
2764 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
2766 if (VA.isExtInLoc()) {
2767 // Handle MMX values passed in XMM regs.
2768 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
2769 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
2771 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2774 assert(VA.isMemLoc());
2775 ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
2778 // If value is passed via pointer - do a load.
2779 if (VA.getLocInfo() == CCValAssign::Indirect)
2780 ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
2781 MachinePointerInfo(), false, false, false, 0);
2783 InVals.push_back(ArgValue);
2786 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2787 // All x86 ABIs require that for returning structs by value we copy the
2788 // sret argument into %rax/%eax (depending on ABI) for the return. Save
2789 // the argument into a virtual register so that we can access it from the
2791 if (Ins[i].Flags.isSRet()) {
2792 unsigned Reg = FuncInfo->getSRetReturnReg();
2794 MVT PtrTy = getPointerTy(DAG.getDataLayout());
2795 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
2796 FuncInfo->setSRetReturnReg(Reg);
2798 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
2799 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
2804 unsigned StackSize = CCInfo.getNextStackOffset();
2805 // Align stack specially for tail calls.
2806 if (shouldGuaranteeTCO(CallConv,
2807 MF.getTarget().Options.GuaranteedTailCallOpt))
2808 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
2810 // If the function takes variable number of arguments, make a frame index for
2811 // the start of the first vararg value... for expansion of llvm.va_start. We
2812 // can skip this if there are no va_start calls.
2813 if (MFI->hasVAStart() &&
2814 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
2815 CallConv != CallingConv::X86_ThisCall))) {
2816 FuncInfo->setVarArgsFrameIndex(
2817 MFI->CreateFixedObject(1, StackSize, true));
2820 // Figure out if XMM registers are in use.
2821 assert(!(Subtarget->useSoftFloat() &&
2822 Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
2823 "SSE register cannot be used when SSE is disabled!");
2825 // 64-bit calling conventions support varargs and register parameters, so we
2826 // have to do extra work to spill them in the prologue.
2827 if (Is64Bit && isVarArg && MFI->hasVAStart()) {
2828 // Find the first unallocated argument registers.
2829 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
2830 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
2831 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
2832 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
2833 assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
2834 "SSE register cannot be used when SSE is disabled!");
2836 // Gather all the live in physical registers.
2837 SmallVector<SDValue, 6> LiveGPRs;
2838 SmallVector<SDValue, 8> LiveXMMRegs;
2840 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
2841 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
2843 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
2845 if (!ArgXMMs.empty()) {
2846 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
2847 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
2848 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
2849 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
2850 LiveXMMRegs.push_back(
2851 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
2856 // Get to the caller-allocated home save location. Add 8 to account
2857 // for the return address.
2858 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
2859 FuncInfo->setRegSaveFrameIndex(
2860 MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
2861 // Fixup to set vararg frame on shadow area (4 x i64).
2863 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
2865 // For X86-64, if there are vararg parameters that are passed via
2866 // registers, then we must store them to their spots on the stack so
2867 // they may be loaded by deferencing the result of va_next.
2868 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
2869 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
2870 FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
2871 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
2874 // Store the integer parameter registers.
2875 SmallVector<SDValue, 8> MemOps;
2876 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
2877 getPointerTy(DAG.getDataLayout()));
2878 unsigned Offset = FuncInfo->getVarArgsGPOffset();
2879 for (SDValue Val : LiveGPRs) {
2880 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2881 RSFIN, DAG.getIntPtrConstant(Offset, dl));
2883 DAG.getStore(Val.getValue(1), dl, Val, FIN,
2884 MachinePointerInfo::getFixedStack(
2885 DAG.getMachineFunction(),
2886 FuncInfo->getRegSaveFrameIndex(), Offset),
2888 MemOps.push_back(Store);
2892 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
2893 // Now store the XMM (fp + vector) parameter registers.
2894 SmallVector<SDValue, 12> SaveXMMOps;
2895 SaveXMMOps.push_back(Chain);
2896 SaveXMMOps.push_back(ALVal);
2897 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2898 FuncInfo->getRegSaveFrameIndex(), dl));
2899 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2900 FuncInfo->getVarArgsFPOffset(), dl));
2901 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
2903 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
2904 MVT::Other, SaveXMMOps));
2907 if (!MemOps.empty())
2908 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2911 if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
2912 // Find the largest legal vector type.
2913 MVT VecVT = MVT::Other;
2914 // FIXME: Only some x86_32 calling conventions support AVX512.
2915 if (Subtarget->hasAVX512() &&
2916 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
2917 CallConv == CallingConv::Intel_OCL_BI)))
2918 VecVT = MVT::v16f32;
2919 else if (Subtarget->hasAVX())
2921 else if (Subtarget->hasSSE2())
2924 // We forward some GPRs and some vector types.
2925 SmallVector<MVT, 2> RegParmTypes;
2926 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
2927 RegParmTypes.push_back(IntVT);
2928 if (VecVT != MVT::Other)
2929 RegParmTypes.push_back(VecVT);
2931 // Compute the set of forwarded registers. The rest are scratch.
2932 SmallVectorImpl<ForwardedRegister> &Forwards =
2933 FuncInfo->getForwardedMustTailRegParms();
2934 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
2936 // Conservatively forward AL on x86_64, since it might be used for varargs.
2937 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
2938 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
2939 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
2942 // Copy all forwards from physical to virtual registers.
2943 for (ForwardedRegister &F : Forwards) {
2944 // FIXME: Can we use a less constrained schedule?
2945 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2946 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
2947 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
2951 // Some CCs need callee pop.
2952 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2953 MF.getTarget().Options.GuaranteedTailCallOpt)) {
2954 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
2955 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
2956 // X86 interrupts must pop the error code if present
2957 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 8 : 4);
2959 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
2960 // If this is an sret function, the return should pop the hidden pointer.
2961 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
2962 !Subtarget->getTargetTriple().isOSMSVCRT() &&
2963 argsAreStructReturn(Ins, Subtarget->isTargetMCU()) == StackStructReturn)
2964 FuncInfo->setBytesToPopOnReturn(4);
2968 // RegSaveFrameIndex is X86-64 only.
2969 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
2970 if (CallConv == CallingConv::X86_FastCall ||
2971 CallConv == CallingConv::X86_ThisCall)
2972 // fastcc functions can't have varargs.
2973 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
2976 FuncInfo->setArgumentStackSize(StackSize);
2978 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
2979 EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
2980 if (Personality == EHPersonality::CoreCLR) {
2982 // TODO: Add a mechanism to frame lowering that will allow us to indicate
2983 // that we'd prefer this slot be allocated towards the bottom of the frame
2984 // (i.e. near the stack pointer after allocating the frame). Every
2985 // funclet needs a copy of this slot in its (mostly empty) frame, and the
2986 // offset from the bottom of this and each funclet's frame must be the
2987 // same, so the size of funclets' (mostly empty) frames is dictated by
2988 // how far this slot is from the bottom (since they allocate just enough
2989 // space to accomodate holding this slot at the correct offset).
2990 int PSPSymFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
2991 EHInfo->PSPSymFrameIdx = PSPSymFI;
2999 X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
3000 SDValue StackPtr, SDValue Arg,
3001 SDLoc dl, SelectionDAG &DAG,
3002 const CCValAssign &VA,
3003 ISD::ArgFlagsTy Flags) const {
3004 unsigned LocMemOffset = VA.getLocMemOffset();
3005 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3006 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3008 if (Flags.isByVal())
3009 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3011 return DAG.getStore(
3012 Chain, dl, Arg, PtrOff,
3013 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
3017 /// Emit a load of return address if tail call
3018 /// optimization is performed and it is required.
3020 X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
3021 SDValue &OutRetAddr, SDValue Chain,
3022 bool IsTailCall, bool Is64Bit,
3023 int FPDiff, SDLoc dl) const {
3024 // Adjust the Return address stack slot.
3025 EVT VT = getPointerTy(DAG.getDataLayout());
3026 OutRetAddr = getReturnAddressFrameIndex(DAG);
3028 // Load the "old" Return address.
3029 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
3030 false, false, false, 0);
3031 return SDValue(OutRetAddr.getNode(), 1);
3034 /// Emit a store of the return address if tail call
3035 /// optimization is performed and it is required (FPDiff!=0).
3036 static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3037 SDValue Chain, SDValue RetAddrFrIdx,
3038 EVT PtrVT, unsigned SlotSize,
3039 int FPDiff, SDLoc dl) {
3040 // Store the return address to the appropriate stack slot.
3041 if (!FPDiff) return Chain;
3042 // Calculate the new stack slot for the return address.
3043 int NewReturnAddrFI =
3044 MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3046 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3047 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3048 MachinePointerInfo::getFixedStack(
3049 DAG.getMachineFunction(), NewReturnAddrFI),
3054 /// Returns a vector_shuffle mask for an movs{s|d}, movd
3055 /// operation of specified width.
3056 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
3058 unsigned NumElems = VT.getVectorNumElements();
3059 SmallVector<int, 8> Mask;
3060 Mask.push_back(NumElems);
3061 for (unsigned i = 1; i != NumElems; ++i)
3063 return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
3067 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3068 SmallVectorImpl<SDValue> &InVals) const {
3069 SelectionDAG &DAG = CLI.DAG;
3071 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3072 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3073 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3074 SDValue Chain = CLI.Chain;
3075 SDValue Callee = CLI.Callee;
3076 CallingConv::ID CallConv = CLI.CallConv;
3077 bool &isTailCall = CLI.IsTailCall;
3078 bool isVarArg = CLI.IsVarArg;
3080 MachineFunction &MF = DAG.getMachineFunction();
3081 bool Is64Bit = Subtarget->is64Bit();
3082 bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
3083 StructReturnType SR = callIsStructReturn(Outs, Subtarget->isTargetMCU());
3084 bool IsSibcall = false;
3085 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3086 auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
3088 if (CallConv == CallingConv::X86_INTR)
3089 report_fatal_error("X86 interrupts may not be called directly");
3091 if (Attr.getValueAsString() == "true")
3094 if (Subtarget->isPICStyleGOT() &&
3095 !MF.getTarget().Options.GuaranteedTailCallOpt) {
3096 // If we are using a GOT, disable tail calls to external symbols with
3097 // default visibility. Tail calling such a symbol requires using a GOT
3098 // relocation, which forces early binding of the symbol. This breaks code
3099 // that require lazy function symbol resolution. Using musttail or
3100 // GuaranteedTailCallOpt will override this.
3101 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3102 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3103 G->getGlobal()->hasDefaultVisibility()))
3107 bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
3109 // Force this to be a tail call. The verifier rules are enough to ensure
3110 // that we can lower this successfully without moving the return address
3113 } else if (isTailCall) {
3114 // Check if it's really possible to do a tail call.
3115 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3116 isVarArg, SR != NotStructReturn,
3117 MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
3118 Outs, OutVals, Ins, DAG);
3120 // Sibcalls are automatically detected tailcalls which do not require
3122 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
3129 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
3130 "Var args not supported with calling convention fastcc, ghc or hipe");
3132 // Analyze operands of the call, assigning locations to each operand.
3133 SmallVector<CCValAssign, 16> ArgLocs;
3134 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3136 // Allocate shadow area for Win64
3138 CCInfo.AllocateStack(32, 8);
3140 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
3142 // Get a count of how many bytes are to be pushed on the stack.
3143 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3145 // This is a sibcall. The memory operands are available in caller's
3146 // own caller's stack.
3148 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
3149 canGuaranteeTCO(CallConv))
3150 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3153 if (isTailCall && !IsSibcall && !IsMustTail) {
3154 // Lower arguments at fp - stackoffset + fpdiff.
3155 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3157 FPDiff = NumBytesCallerPushed - NumBytes;
3159 // Set the delta of movement of the returnaddr stackslot.
3160 // But only set if delta is greater than previous delta.
3161 if (FPDiff < X86Info->getTCReturnAddrDelta())
3162 X86Info->setTCReturnAddrDelta(FPDiff);
3165 unsigned NumBytesToPush = NumBytes;
3166 unsigned NumBytesToPop = NumBytes;
3168 // If we have an inalloca argument, all stack space has already been allocated
3169 // for us and be right at the top of the stack. We don't support multiple
3170 // arguments passed in memory when using inalloca.
3171 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3173 if (!ArgLocs.back().isMemLoc())
3174 report_fatal_error("cannot use inalloca attribute on a register "
3176 if (ArgLocs.back().getLocMemOffset() != 0)
3177 report_fatal_error("any parameter with the inalloca attribute must be "
3178 "the only memory argument");
3182 Chain = DAG.getCALLSEQ_START(
3183 Chain, DAG.getIntPtrConstant(NumBytesToPush, dl, true), dl);
3185 SDValue RetAddrFrIdx;
3186 // Load return address for tail calls.
3187 if (isTailCall && FPDiff)
3188 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3189 Is64Bit, FPDiff, dl);
3191 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3192 SmallVector<SDValue, 8> MemOpChains;
3195 // Walk the register/memloc assignments, inserting copies/loads. In the case
3196 // of tail call optimization arguments are handle later.
3197 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3198 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3199 // Skip inalloca arguments, they have already been written.
3200 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3201 if (Flags.isInAlloca())
3204 CCValAssign &VA = ArgLocs[i];
3205 EVT RegVT = VA.getLocVT();
3206 SDValue Arg = OutVals[i];
3207 bool isByVal = Flags.isByVal();
3209 // Promote the value if needed.
3210 switch (VA.getLocInfo()) {
3211 default: llvm_unreachable("Unknown loc info!");
3212 case CCValAssign::Full: break;
3213 case CCValAssign::SExt:
3214 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3216 case CCValAssign::ZExt:
3217 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3219 case CCValAssign::AExt:
3220 if (Arg.getValueType().isVector() &&
3221 Arg.getValueType().getVectorElementType() == MVT::i1)
3222 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3223 else if (RegVT.is128BitVector()) {
3224 // Special case: passing MMX values in XMM registers.
3225 Arg = DAG.getBitcast(MVT::i64, Arg);
3226 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3227 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3229 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3231 case CCValAssign::BCvt:
3232 Arg = DAG.getBitcast(RegVT, Arg);
3234 case CCValAssign::Indirect: {
3235 // Store the argument.
3236 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3237 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3238 Chain = DAG.getStore(
3239 Chain, dl, Arg, SpillSlot,
3240 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3247 if (VA.isRegLoc()) {
3248 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3249 if (isVarArg && IsWin64) {
3250 // Win64 ABI requires argument XMM reg to be copied to the corresponding
3251 // shadow reg if callee is a varargs function.
3252 unsigned ShadowReg = 0;
3253 switch (VA.getLocReg()) {
3254 case X86::XMM0: ShadowReg = X86::RCX; break;
3255 case X86::XMM1: ShadowReg = X86::RDX; break;
3256 case X86::XMM2: ShadowReg = X86::R8; break;
3257 case X86::XMM3: ShadowReg = X86::R9; break;
3260 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3262 } else if (!IsSibcall && (!isTailCall || isByVal)) {
3263 assert(VA.isMemLoc());
3264 if (!StackPtr.getNode())
3265 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3266 getPointerTy(DAG.getDataLayout()));
3267 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3268 dl, DAG, VA, Flags));
3272 if (!MemOpChains.empty())
3273 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3275 if (Subtarget->isPICStyleGOT()) {
3276 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3279 RegsToPass.push_back(std::make_pair(
3280 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
3281 getPointerTy(DAG.getDataLayout()))));
3283 // If we are tail calling and generating PIC/GOT style code load the
3284 // address of the callee into ECX. The value in ecx is used as target of
3285 // the tail jump. This is done to circumvent the ebx/callee-saved problem
3286 // for tail calls on PIC/GOT architectures. Normally we would just put the
3287 // address of GOT into ebx and then call target@PLT. But for tail calls
3288 // ebx would be restored (since ebx is callee saved) before jumping to the
3291 // Note: The actual moving to ECX is done further down.
3292 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3293 if (G && !G->getGlobal()->hasLocalLinkage() &&
3294 G->getGlobal()->hasDefaultVisibility())
3295 Callee = LowerGlobalAddress(Callee, DAG);
3296 else if (isa<ExternalSymbolSDNode>(Callee))
3297 Callee = LowerExternalSymbol(Callee, DAG);
3301 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
3302 // From AMD64 ABI document:
3303 // For calls that may call functions that use varargs or stdargs
3304 // (prototype-less calls or calls to functions containing ellipsis (...) in
3305 // the declaration) %al is used as hidden argument to specify the number
3306 // of SSE registers used. The contents of %al do not need to match exactly
3307 // the number of registers, but must be an ubound on the number of SSE
3308 // registers used and is in the range 0 - 8 inclusive.
3310 // Count the number of XMM registers allocated.
3311 static const MCPhysReg XMMArgRegs[] = {
3312 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3313 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3315 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3316 assert((Subtarget->hasSSE1() || !NumXMMRegs)
3317 && "SSE registers cannot be used when SSE is disabled");
3319 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
3320 DAG.getConstant(NumXMMRegs, dl,
3324 if (isVarArg && IsMustTail) {
3325 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
3326 for (const auto &F : Forwards) {
3327 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3328 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3332 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
3333 // don't need this because the eligibility check rejects calls that require
3334 // shuffling arguments passed in memory.
3335 if (!IsSibcall && isTailCall) {
3336 // Force all the incoming stack arguments to be loaded from the stack
3337 // before any new outgoing arguments are stored to the stack, because the
3338 // outgoing stack slots may alias the incoming argument stack slots, and
3339 // the alias isn't otherwise explicit. This is slightly more conservative
3340 // than necessary, because it means that each store effectively depends
3341 // on every argument instead of just those arguments it would clobber.
3342 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
3344 SmallVector<SDValue, 8> MemOpChains2;
3347 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3348 CCValAssign &VA = ArgLocs[i];
3351 assert(VA.isMemLoc());
3352 SDValue Arg = OutVals[i];
3353 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3354 // Skip inalloca arguments. They don't require any work.
3355 if (Flags.isInAlloca())
3357 // Create frame index.
3358 int32_t Offset = VA.getLocMemOffset()+FPDiff;
3359 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3360 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
3361 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3363 if (Flags.isByVal()) {
3364 // Copy relative to framepointer.
3365 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
3366 if (!StackPtr.getNode())
3367 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3368 getPointerTy(DAG.getDataLayout()));
3369 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3372 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3376 // Store relative to framepointer.
3377 MemOpChains2.push_back(DAG.getStore(
3378 ArgChain, dl, Arg, FIN,
3379 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3384 if (!MemOpChains2.empty())
3385 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3387 // Store the return address to the appropriate stack slot.
3388 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3389 getPointerTy(DAG.getDataLayout()),
3390 RegInfo->getSlotSize(), FPDiff, dl);
3393 // Build a sequence of copy-to-reg nodes chained together with token chain
3394 // and flag operands which copy the outgoing args into registers.
3396 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3397 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3398 RegsToPass[i].second, InFlag);
3399 InFlag = Chain.getValue(1);
3402 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
3403 assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
3404 // In the 64-bit large code model, we have to make all calls
3405 // through a register, since the call instruction's 32-bit
3406 // pc-relative offset may not be large enough to hold the whole
3408 } else if (Callee->getOpcode() == ISD::GlobalAddress) {
3409 // If the callee is a GlobalAddress node (quite common, every direct call
3410 // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
3412 GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
3414 // We should use extra load for direct calls to dllimported functions in
3416 const GlobalValue *GV = G->getGlobal();
3417 if (!GV->hasDLLImportStorageClass()) {
3418 unsigned char OpFlags = 0;
3419 bool ExtraLoad = false;
3420 unsigned WrapperKind = ISD::DELETED_NODE;
3422 // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
3423 // external symbols most go through the PLT in PIC mode. If the symbol
3424 // has hidden or protected visibility, or if it is static or local, then
3425 // we don't need to use the PLT - we can directly call it.
3426 if (Subtarget->isTargetELF() &&
3427 DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
3428 GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
3429 OpFlags = X86II::MO_PLT;
3430 } else if (Subtarget->isPICStyleStubAny() &&
3431 !GV->isStrongDefinitionForLinker() &&
3432 (!Subtarget->getTargetTriple().isMacOSX() ||
3433 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
3434 // PC-relative references to external symbols should go through $stub,
3435 // unless we're building with the leopard linker or later, which
3436 // automatically synthesizes these stubs.
3437 OpFlags = X86II::MO_DARWIN_STUB;
3438 } else if (Subtarget->isPICStyleRIPRel() && isa<Function>(GV) &&
3439 cast<Function>(GV)->hasFnAttribute(Attribute::NonLazyBind)) {
3440 // If the function is marked as non-lazy, generate an indirect call
3441 // which loads from the GOT directly. This avoids runtime overhead
3442 // at the cost of eager binding (and one extra byte of encoding).
3443 OpFlags = X86II::MO_GOTPCREL;
3444 WrapperKind = X86ISD::WrapperRIP;
3448 Callee = DAG.getTargetGlobalAddress(
3449 GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);
3451 // Add a wrapper if needed.
3452 if (WrapperKind != ISD::DELETED_NODE)
3453 Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
3454 getPointerTy(DAG.getDataLayout()), Callee);
3455 // Add extra indirection if needed.
3457 Callee = DAG.getLoad(
3458 getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
3459 MachinePointerInfo::getGOT(DAG.getMachineFunction()), false, false,
3462 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3463 unsigned char OpFlags = 0;
3465 // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
3466 // external symbols should go through the PLT.
3467 if (Subtarget->isTargetELF() &&
3468 DAG.getTarget().getRelocationModel() == Reloc::PIC_) {
3469 OpFlags = X86II::MO_PLT;
3470 } else if (Subtarget->isPICStyleStubAny() &&
3471 (!Subtarget->getTargetTriple().isMacOSX() ||
3472 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
3473 // PC-relative references to external symbols should go through $stub,
3474 // unless we're building with the leopard linker or later, which
3475 // automatically synthesizes these stubs.
3476 OpFlags = X86II::MO_DARWIN_STUB;
3479 Callee = DAG.getTargetExternalSymbol(
3480 S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
3481 } else if (Subtarget->isTarget64BitILP32() &&
3482 Callee->getValueType(0) == MVT::i32) {
3483 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
3484 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
3487 // Returns a chain & a flag for retval copy to use.
3488 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3489 SmallVector<SDValue, 8> Ops;
3491 if (!IsSibcall && isTailCall) {
3492 Chain = DAG.getCALLSEQ_END(Chain,
3493 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3494 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
3495 InFlag = Chain.getValue(1);
3498 Ops.push_back(Chain);
3499 Ops.push_back(Callee);
3502 Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
3504 // Add argument registers to the end of the list so that they are known live
3506 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
3507 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3508 RegsToPass[i].second.getValueType()));
3510 // Add a register mask operand representing the call-preserved registers.
3511 const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv);
3512 assert(Mask && "Missing call preserved mask for calling convention");
3514 // If this is an invoke in a 32-bit function using a funclet-based
3515 // personality, assume the function clobbers all registers. If an exception
3516 // is thrown, the runtime will not restore CSRs.
3517 // FIXME: Model this more precisely so that we can register allocate across
3518 // the normal edge and spill and fill across the exceptional edge.
3519 if (!Is64Bit && CLI.CS && CLI.CS->isInvoke()) {
3520 const Function *CallerFn = MF.getFunction();
3521 EHPersonality Pers =
3522 CallerFn->hasPersonalityFn()
3523 ? classifyEHPersonality(CallerFn->getPersonalityFn())
3524 : EHPersonality::Unknown;
3525 if (isFuncletEHPersonality(Pers))
3526 Mask = RegInfo->getNoPreservedMask();
3529 Ops.push_back(DAG.getRegisterMask(Mask));
3531 if (InFlag.getNode())
3532 Ops.push_back(InFlag);
3536 //// If this is the first return lowered for this function, add the regs
3537 //// to the liveout set for the function.
3538 // This isn't right, although it's probably harmless on x86; liveouts
3539 // should be computed from returns not tail calls. Consider a void
3540 // function making a tail call to a function returning int.
3541 MF.getFrameInfo()->setHasTailCall();
3542 return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
3545 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
3546 InFlag = Chain.getValue(1);
3548 // Create the CALLSEQ_END node.
3549 unsigned NumBytesForCalleeToPop;
3550 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3551 DAG.getTarget().Options.GuaranteedTailCallOpt))
3552 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
3553 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3554 !Subtarget->getTargetTriple().isOSMSVCRT() &&
3555 SR == StackStructReturn)
3556 // If this is a call to a struct-return function, the callee
3557 // pops the hidden struct pointer, so we have to push it back.
3558 // This is common for Darwin/X86, Linux & Mingw32 targets.
3559 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
3560 NumBytesForCalleeToPop = 4;
3562 NumBytesForCalleeToPop = 0; // Callee pops nothing.
3564 // Returns a flag for retval copy to use.
3566 Chain = DAG.getCALLSEQ_END(Chain,
3567 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3568 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
3571 InFlag = Chain.getValue(1);
3574 // Handle result values, copying them out of physregs into vregs that we
3576 return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
3577 Ins, dl, DAG, InVals);
3580 //===----------------------------------------------------------------------===//
3581 // Fast Calling Convention (tail call) implementation
3582 //===----------------------------------------------------------------------===//
3584 // Like std call, callee cleans arguments, convention except that ECX is
3585 // reserved for storing the tail called function address. Only 2 registers are
3586 // free for argument passing (inreg). Tail call optimization is performed
3588 // * tailcallopt is enabled
3589 // * caller/callee are fastcc
3590 // On X86_64 architecture with GOT-style position independent code only local
3591 // (within module) calls are supported at the moment.
3592 // To keep the stack aligned according to platform abi the function
3593 // GetAlignedArgumentStackSize ensures that argument delta is always multiples
3594 // of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
3595 // If a tail called function callee has more arguments than the caller the
3596 // caller needs to make sure that there is room to move the RETADDR to. This is
3597 // achieved by reserving an area the size of the argument delta right after the
3598 // original RETADDR, but before the saved framepointer or the spilled registers
3599 // e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
3611 /// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
3614 X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
3615 SelectionDAG& DAG) const {
3616 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3617 const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
3618 unsigned StackAlignment = TFI.getStackAlignment();
3619 uint64_t AlignMask = StackAlignment - 1;
3620 int64_t Offset = StackSize;
3621 unsigned SlotSize = RegInfo->getSlotSize();
3622 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
3623 // Number smaller than 12 so just add the difference.
3624 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
3626 // Mask out lower bits, add stackalignment once plus the 12 bytes.
3627 Offset = ((~AlignMask) & Offset) + StackAlignment +
3628 (StackAlignment-SlotSize);
3633 /// Return true if the given stack call argument is already available in the
3634 /// same position (relatively) of the caller's incoming argument stack.
3636 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
3637 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
3638 const X86InstrInfo *TII) {
3639 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
3641 if (Arg.getOpcode() == ISD::CopyFromReg) {
3642 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
3643 if (!TargetRegisterInfo::isVirtualRegister(VR))
3645 MachineInstr *Def = MRI->getVRegDef(VR);
3648 if (!Flags.isByVal()) {
3649 if (!TII->isLoadFromStackSlot(Def, FI))
3652 unsigned Opcode = Def->getOpcode();
3653 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
3654 Opcode == X86::LEA64_32r) &&
3655 Def->getOperand(1).isFI()) {
3656 FI = Def->getOperand(1).getIndex();
3657 Bytes = Flags.getByValSize();
3661 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
3662 if (Flags.isByVal())
3663 // ByVal argument is passed in as a pointer but it's now being
3664 // dereferenced. e.g.
3665 // define @foo(%struct.X* %A) {
3666 // tail call @bar(%struct.X* byval %A)
3669 SDValue Ptr = Ld->getBasePtr();
3670 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
3673 FI = FINode->getIndex();
3674 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
3675 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
3676 FI = FINode->getIndex();
3677 Bytes = Flags.getByValSize();
3681 assert(FI != INT_MAX);
3682 if (!MFI->isFixedObjectIndex(FI))
3684 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
3687 /// Check whether the call is eligible for tail call optimization. Targets
3688 /// that want to do tail call optimization should implement this function.
3689 bool X86TargetLowering::IsEligibleForTailCallOptimization(
3690 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
3691 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
3692 const SmallVectorImpl<ISD::OutputArg> &Outs,
3693 const SmallVectorImpl<SDValue> &OutVals,
3694 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
3695 if (!mayTailCallThisCC(CalleeCC))
3698 // If -tailcallopt is specified, make fastcc functions tail-callable.
3699 MachineFunction &MF = DAG.getMachineFunction();
3700 const Function *CallerF = MF.getFunction();
3702 // If the function return type is x86_fp80 and the callee return type is not,
3703 // then the FP_EXTEND of the call result is not a nop. It's not safe to
3704 // perform a tailcall optimization here.
3705 if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
3708 CallingConv::ID CallerCC = CallerF->getCallingConv();
3709 bool CCMatch = CallerCC == CalleeCC;
3710 bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
3711 bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC);
3713 // Win64 functions have extra shadow space for argument homing. Don't do the
3714 // sibcall if the caller and callee have mismatched expectations for this
3716 if (IsCalleeWin64 != IsCallerWin64)
3719 if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
3720 if (canGuaranteeTCO(CalleeCC) && CCMatch)
3725 // Look for obvious safe cases to perform tail call optimization that do not
3726 // require ABI changes. This is what gcc calls sibcall.
3728 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
3729 // emit a special epilogue.
3730 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3731 if (RegInfo->needsStackRealignment(MF))
3734 // Also avoid sibcall optimization if either caller or callee uses struct
3735 // return semantics.
3736 if (isCalleeStructRet || isCallerStructRet)
3739 // Do not sibcall optimize vararg calls unless all arguments are passed via
3741 if (isVarArg && !Outs.empty()) {
3742 // Optimizing for varargs on Win64 is unlikely to be safe without
3743 // additional testing.
3744 if (IsCalleeWin64 || IsCallerWin64)
3747 SmallVector<CCValAssign, 16> ArgLocs;
3748 CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
3751 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
3752 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
3753 if (!ArgLocs[i].isRegLoc())
3757 // If the call result is in ST0 / ST1, it needs to be popped off the x87
3758 // stack. Therefore, if it's not used by the call it is not safe to optimize
3759 // this into a sibcall.
3760 bool Unused = false;
3761 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3768 SmallVector<CCValAssign, 16> RVLocs;
3769 CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(), RVLocs,
3771 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3772 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
3773 CCValAssign &VA = RVLocs[i];
3774 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
3779 // If the calling conventions do not match, then we'd better make sure the
3780 // results are returned in the same way as what the caller expects.
3782 SmallVector<CCValAssign, 16> RVLocs1;
3783 CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
3785 CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
3787 SmallVector<CCValAssign, 16> RVLocs2;
3788 CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
3790 CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
3792 if (RVLocs1.size() != RVLocs2.size())
3794 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
3795 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
3797 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
3799 if (RVLocs1[i].isRegLoc()) {
3800 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
3803 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())