1 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the interfaces that X86 uses to lower LLVM code into a
13 //===----------------------------------------------------------------------===//
15 #include "X86ISelLowering.h"
16 #include "Utils/X86ShuffleDecode.h"
17 #include "X86CallingConv.h"
18 #include "X86FrameLowering.h"
19 #include "X86InstrBuilder.h"
20 #include "X86MachineFunctionInfo.h"
21 #include "X86TargetMachine.h"
22 #include "X86TargetObjectFile.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/ADT/StringSwitch.h"
28 #include "llvm/Analysis/EHPersonalities.h"
29 #include "llvm/CodeGen/IntrinsicLowering.h"
30 #include "llvm/CodeGen/MachineFrameInfo.h"
31 #include "llvm/CodeGen/MachineFunction.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineJumpTableInfo.h"
34 #include "llvm/CodeGen/MachineModuleInfo.h"
35 #include "llvm/CodeGen/MachineRegisterInfo.h"
36 #include "llvm/CodeGen/WinEHFuncInfo.h"
37 #include "llvm/IR/CallSite.h"
38 #include "llvm/IR/CallingConv.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DerivedTypes.h"
41 #include "llvm/IR/Function.h"
42 #include "llvm/IR/GlobalAlias.h"
43 #include "llvm/IR/GlobalVariable.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/Intrinsics.h"
46 #include "llvm/MC/MCAsmInfo.h"
47 #include "llvm/MC/MCContext.h"
48 #include "llvm/MC/MCExpr.h"
49 #include "llvm/MC/MCSymbol.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/Support/ErrorHandling.h"
53 #include "llvm/Support/MathExtras.h"
54 #include "llvm/Target/TargetOptions.h"
55 #include "X86IntrinsicsInfo.h"
61 #define DEBUG_TYPE "x86-isel"
63 STATISTIC(NumTailCalls, "Number of tail calls");
65 static cl::opt<bool> ExperimentalVectorWideningLegalization(
66 "x86-experimental-vector-widening-legalization", cl::init(false),
67 cl::desc("Enable an experimental vector type legalization through widening "
68 "rather than promotion."),
71 X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
72 const X86Subtarget &STI)
73 : TargetLowering(TM), Subtarget(&STI) {
74 X86ScalarSSEf64 = Subtarget->hasSSE2();
75 X86ScalarSSEf32 = Subtarget->hasSSE1();
76 MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
78 // Set up the TargetLowering object.
80 // X86 is weird. It always uses i8 for shift amounts and setcc results.
81 setBooleanContents(ZeroOrOneBooleanContent);
82 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
83 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
85 // For 64-bit, since we have so many registers, use the ILP scheduler.
86 // For 32-bit, use the register pressure specific scheduling.
87 // For Atom, always use ILP scheduling.
88 if (Subtarget->isAtom())
89 setSchedulingPreference(Sched::ILP);
90 else if (Subtarget->is64Bit())
91 setSchedulingPreference(Sched::ILP);
93 setSchedulingPreference(Sched::RegPressure);
94 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
95 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
97 // Bypass expensive divides on Atom when compiling with O2.
98 if (TM.getOptLevel() >= CodeGenOpt::Default) {
99 if (Subtarget->hasSlowDivide32())
100 addBypassSlowDiv(32, 8);
101 if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
102 addBypassSlowDiv(64, 16);
105 if (Subtarget->isTargetKnownWindowsMSVC()) {
106 // Setup Windows compiler runtime calls.
107 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
108 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
109 setLibcallName(RTLIB::SREM_I64, "_allrem");
110 setLibcallName(RTLIB::UREM_I64, "_aullrem");
111 setLibcallName(RTLIB::MUL_I64, "_allmul");
112 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
113 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
114 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
115 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
116 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
119 if (Subtarget->isTargetDarwin()) {
120 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(false);
122 setUseUnderscoreLongJmp(false);
123 } else if (Subtarget->isTargetWindowsGNU()) {
124 // MS runtime is weird: it exports _setjmp, but longjmp!
125 setUseUnderscoreSetJmp(true);
126 setUseUnderscoreLongJmp(false);
128 setUseUnderscoreSetJmp(true);
129 setUseUnderscoreLongJmp(true);
132 // Set up the register classes.
133 addRegisterClass(MVT::i8, &X86::GR8RegClass);
134 addRegisterClass(MVT::i16, &X86::GR16RegClass);
135 addRegisterClass(MVT::i32, &X86::GR32RegClass);
136 if (Subtarget->is64Bit())
137 addRegisterClass(MVT::i64, &X86::GR64RegClass);
139 for (MVT VT : MVT::integer_valuetypes())
140 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
142 // We don't accept any truncstore of integer registers.
143 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
144 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
145 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
146 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
147 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
148 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
150 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
152 // SETOEQ and SETUNE require checking two conditions.
153 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
154 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
155 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
156 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
157 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
158 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
160 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
162 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
163 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
164 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
166 if (Subtarget->is64Bit()) {
167 if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512())
168 // f32/f64 are legal, f80 is custom.
169 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
171 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
172 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
173 } else if (!Subtarget->useSoftFloat()) {
174 // We have an algorithm for SSE2->double, and we turn this into a
175 // 64-bit FILD followed by conditional FADD for other targets.
176 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
177 // We have an algorithm for SSE2, and we turn this into a 64-bit
178 // FILD or VCVTUSI2SS/SD for other targets.
179 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
182 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
184 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
185 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
187 if (!Subtarget->useSoftFloat()) {
188 // SSE has no i16 to fp conversion, only i32
189 if (X86ScalarSSEf32) {
190 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
191 // f32 and f64 cases are Legal, f80 case is not
192 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
194 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
195 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
198 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
199 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
202 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
204 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
205 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
207 if (!Subtarget->useSoftFloat()) {
208 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
209 // are Legal, f80 is custom lowered.
210 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
211 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
213 if (X86ScalarSSEf32) {
214 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
215 // f32 and f64 cases are Legal, f80 case is not
216 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
218 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
219 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
222 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
223 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
224 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
227 // Handle FP_TO_UINT by promoting the destination to a larger signed
229 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
230 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
231 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
233 if (Subtarget->is64Bit()) {
234 if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
235 // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
236 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
237 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
239 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
240 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
242 } else if (!Subtarget->useSoftFloat()) {
243 // Since AVX is a superset of SSE3, only check for SSE here.
244 if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
245 // Expand FP_TO_UINT into a select.
246 // FIXME: We would like to use a Custom expander here eventually to do
247 // the optimal thing for SSE vs. the default expansion in the legalizer.
248 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
250 // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
251 // With SSE3 we can use fisttpll to convert to a signed i64; without
252 // SSE, we're stuck with a fistpll.
253 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
255 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
258 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
259 if (!X86ScalarSSEf64) {
260 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
261 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
262 if (Subtarget->is64Bit()) {
263 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
264 // Without SSE, i64->f64 goes through memory.
265 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
269 // Scalar integer divide and remainder are lowered to use operations that
270 // produce two results, to match the available instructions. This exposes
271 // the two-result form to trivial CSE, which is able to combine x/y and x%y
272 // into a single instruction.
274 // Scalar integer multiply-high is also lowered to use two-result
275 // operations, to match the available instructions. However, plain multiply
276 // (low) operations are left as Legal, as there are single-result
277 // instructions for this in x86. Using the two-result multiply instructions
278 // when both high and low results are needed must be arranged by dagcombine.
279 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
280 setOperationAction(ISD::MULHS, VT, Expand);
281 setOperationAction(ISD::MULHU, VT, Expand);
282 setOperationAction(ISD::SDIV, VT, Expand);
283 setOperationAction(ISD::UDIV, VT, Expand);
284 setOperationAction(ISD::SREM, VT, Expand);
285 setOperationAction(ISD::UREM, VT, Expand);
287 // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
288 setOperationAction(ISD::ADDC, VT, Custom);
289 setOperationAction(ISD::ADDE, VT, Custom);
290 setOperationAction(ISD::SUBC, VT, Custom);
291 setOperationAction(ISD::SUBE, VT, Custom);
294 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
295 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
296 setOperationAction(ISD::BR_CC , MVT::f32, Expand);
297 setOperationAction(ISD::BR_CC , MVT::f64, Expand);
298 setOperationAction(ISD::BR_CC , MVT::f80, Expand);
299 setOperationAction(ISD::BR_CC , MVT::f128, Expand);
300 setOperationAction(ISD::BR_CC , MVT::i8, Expand);
301 setOperationAction(ISD::BR_CC , MVT::i16, Expand);
302 setOperationAction(ISD::BR_CC , MVT::i32, Expand);
303 setOperationAction(ISD::BR_CC , MVT::i64, Expand);
304 setOperationAction(ISD::SELECT_CC , MVT::f32, Expand);
305 setOperationAction(ISD::SELECT_CC , MVT::f64, Expand);
306 setOperationAction(ISD::SELECT_CC , MVT::f80, Expand);
307 setOperationAction(ISD::SELECT_CC , MVT::f128, Expand);
308 setOperationAction(ISD::SELECT_CC , MVT::i8, Expand);
309 setOperationAction(ISD::SELECT_CC , MVT::i16, Expand);
310 setOperationAction(ISD::SELECT_CC , MVT::i32, Expand);
311 setOperationAction(ISD::SELECT_CC , MVT::i64, Expand);
312 if (Subtarget->is64Bit())
313 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
314 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
315 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
316 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
317 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
319 if (Subtarget->is32Bit() && Subtarget->isTargetKnownWindowsMSVC()) {
320 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
321 // is. We should promote the value to 64-bits to solve this.
322 // This is what the CRT headers do - `fmodf` is an inline header
323 // function casting to f64 and calling `fmod`.
324 setOperationAction(ISD::FREM , MVT::f32 , Promote);
326 setOperationAction(ISD::FREM , MVT::f32 , Expand);
329 setOperationAction(ISD::FREM , MVT::f64 , Expand);
330 setOperationAction(ISD::FREM , MVT::f80 , Expand);
331 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
333 // Promote the i8 variants and force them on up to i32 which has a shorter
335 setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
336 AddPromotedToType (ISD::CTTZ , MVT::i8 , MVT::i32);
337 setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Promote);
338 AddPromotedToType (ISD::CTTZ_ZERO_UNDEF , MVT::i8 , MVT::i32);
339 if (Subtarget->hasBMI()) {
340 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Expand);
341 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Expand);
342 if (Subtarget->is64Bit())
343 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
345 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
346 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
347 if (Subtarget->is64Bit())
348 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
351 if (Subtarget->hasLZCNT()) {
352 // When promoting the i8 variants, force them to i32 for a shorter
354 setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
355 AddPromotedToType (ISD::CTLZ , MVT::i8 , MVT::i32);
356 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Promote);
357 AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
358 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand);
359 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand);
360 if (Subtarget->is64Bit())
361 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
363 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
364 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
365 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
366 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
367 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
368 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
369 if (Subtarget->is64Bit()) {
370 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
371 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
375 // Special handling for half-precision floating point conversions.
376 // If we don't have F16C support, then lower half float conversions
377 // into library calls.
378 if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) {
379 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
380 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
383 // There's never any support for operations beyond MVT::f32.
384 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
385 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
386 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
387 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
389 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
390 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
391 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
392 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
393 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
394 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
396 if (Subtarget->hasPOPCNT()) {
397 setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
399 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
400 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
401 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
402 if (Subtarget->is64Bit())
403 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
406 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
408 if (!Subtarget->hasMOVBE())
409 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
411 // These should be promoted to a larger select which is supported.
412 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
413 // X86 wants to expand cmov itself.
414 setOperationAction(ISD::SELECT , MVT::i8 , Custom);
415 setOperationAction(ISD::SELECT , MVT::i16 , Custom);
416 setOperationAction(ISD::SELECT , MVT::i32 , Custom);
417 setOperationAction(ISD::SELECT , MVT::f32 , Custom);
418 setOperationAction(ISD::SELECT , MVT::f64 , Custom);
419 setOperationAction(ISD::SELECT , MVT::f80 , Custom);
420 setOperationAction(ISD::SELECT , MVT::f128 , Custom);
421 setOperationAction(ISD::SETCC , MVT::i8 , Custom);
422 setOperationAction(ISD::SETCC , MVT::i16 , Custom);
423 setOperationAction(ISD::SETCC , MVT::i32 , Custom);
424 setOperationAction(ISD::SETCC , MVT::f32 , Custom);
425 setOperationAction(ISD::SETCC , MVT::f64 , Custom);
426 setOperationAction(ISD::SETCC , MVT::f80 , Custom);
427 setOperationAction(ISD::SETCC , MVT::f128 , Custom);
428 setOperationAction(ISD::SETCCE , MVT::i8 , Custom);
429 setOperationAction(ISD::SETCCE , MVT::i16 , Custom);
430 setOperationAction(ISD::SETCCE , MVT::i32 , Custom);
431 if (Subtarget->is64Bit()) {
432 setOperationAction(ISD::SELECT , MVT::i64 , Custom);
433 setOperationAction(ISD::SETCC , MVT::i64 , Custom);
434 setOperationAction(ISD::SETCCE , MVT::i64 , Custom);
436 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
437 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
438 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
439 // support continuation, user-level threading, and etc.. As a result, no
440 // other SjLj exception interfaces are implemented and please don't build
441 // your own exception handling based on them.
442 // LLVM/Clang supports zero-cost DWARF exception handling.
443 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
444 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
447 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
448 setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
449 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
450 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
451 if (Subtarget->is64Bit())
452 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
453 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
454 setOperationAction(ISD::BlockAddress , MVT::i32 , Custom);
455 if (Subtarget->is64Bit()) {
456 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
457 setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
458 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
459 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
460 setOperationAction(ISD::BlockAddress , MVT::i64 , Custom);
462 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
463 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
464 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
465 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
466 if (Subtarget->is64Bit()) {
467 setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom);
468 setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
469 setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
472 if (Subtarget->hasSSE1())
473 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
475 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
477 // Expand certain atomics
478 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
479 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
480 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
481 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
484 if (Subtarget->hasCmpxchg16b()) {
485 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
488 // FIXME - use subtarget debug flags
489 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
490 !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
491 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
494 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
495 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
497 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
498 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
500 setOperationAction(ISD::TRAP, MVT::Other, Legal);
501 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
503 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
504 setOperationAction(ISD::VASTART , MVT::Other, Custom);
505 setOperationAction(ISD::VAEND , MVT::Other, Expand);
506 if (Subtarget->is64Bit()) {
507 setOperationAction(ISD::VAARG , MVT::Other, Custom);
508 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
510 // TargetInfo::CharPtrBuiltinVaList
511 setOperationAction(ISD::VAARG , MVT::Other, Expand);
512 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
515 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
516 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
518 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
520 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
521 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
522 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
524 if (!Subtarget->useSoftFloat() && X86ScalarSSEf64) {
525 // f32 and f64 use SSE.
526 // Set up the FP register classes.
527 addRegisterClass(MVT::f32, &X86::FR32RegClass);
528 addRegisterClass(MVT::f64, &X86::FR64RegClass);
530 // Use ANDPD to simulate FABS.
531 setOperationAction(ISD::FABS , MVT::f64, Custom);
532 setOperationAction(ISD::FABS , MVT::f32, Custom);
534 // Use XORP to simulate FNEG.
535 setOperationAction(ISD::FNEG , MVT::f64, Custom);
536 setOperationAction(ISD::FNEG , MVT::f32, Custom);
538 // Use ANDPD and ORPD to simulate FCOPYSIGN.
539 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
540 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
542 // Lower this to FGETSIGNx86 plus an AND.
543 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
544 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
546 // We don't support sin/cos/fmod
547 setOperationAction(ISD::FSIN , MVT::f64, Expand);
548 setOperationAction(ISD::FCOS , MVT::f64, Expand);
549 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
550 setOperationAction(ISD::FSIN , MVT::f32, Expand);
551 setOperationAction(ISD::FCOS , MVT::f32, Expand);
552 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
554 // Expand FP immediates into loads from the stack, except for the special
556 addLegalFPImmediate(APFloat(+0.0)); // xorpd
557 addLegalFPImmediate(APFloat(+0.0f)); // xorps
558 } else if (!Subtarget->useSoftFloat() && X86ScalarSSEf32) {
559 // Use SSE for f32, x87 for f64.
560 // Set up the FP register classes.
561 addRegisterClass(MVT::f32, &X86::FR32RegClass);
562 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
564 // Use ANDPS to simulate FABS.
565 setOperationAction(ISD::FABS , MVT::f32, Custom);
567 // Use XORP to simulate FNEG.
568 setOperationAction(ISD::FNEG , MVT::f32, Custom);
570 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
572 // Use ANDPS and ORPS to simulate FCOPYSIGN.
573 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
574 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
576 // We don't support sin/cos/fmod
577 setOperationAction(ISD::FSIN , MVT::f32, Expand);
578 setOperationAction(ISD::FCOS , MVT::f32, Expand);
579 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
581 // Special cases we handle for FP constants.
582 addLegalFPImmediate(APFloat(+0.0f)); // xorps
583 addLegalFPImmediate(APFloat(+0.0)); // FLD0
584 addLegalFPImmediate(APFloat(+1.0)); // FLD1
585 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
586 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
588 if (!TM.Options.UnsafeFPMath) {
589 setOperationAction(ISD::FSIN , MVT::f64, Expand);
590 setOperationAction(ISD::FCOS , MVT::f64, Expand);
591 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
593 } else if (!Subtarget->useSoftFloat()) {
594 // f32 and f64 in x87.
595 // Set up the FP register classes.
596 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
597 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
599 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
600 setOperationAction(ISD::UNDEF, MVT::f32, Expand);
601 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
602 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
604 if (!TM.Options.UnsafeFPMath) {
605 setOperationAction(ISD::FSIN , MVT::f64, Expand);
606 setOperationAction(ISD::FSIN , MVT::f32, Expand);
607 setOperationAction(ISD::FCOS , MVT::f64, Expand);
608 setOperationAction(ISD::FCOS , MVT::f32, Expand);
609 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
610 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
612 addLegalFPImmediate(APFloat(+0.0)); // FLD0
613 addLegalFPImmediate(APFloat(+1.0)); // FLD1
614 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
615 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
616 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
617 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
618 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
619 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
622 // We don't support FMA.
623 setOperationAction(ISD::FMA, MVT::f64, Expand);
624 setOperationAction(ISD::FMA, MVT::f32, Expand);
626 // Long double always uses X87, except f128 in MMX.
627 if (!Subtarget->useSoftFloat()) {
628 if (Subtarget->is64Bit() && Subtarget->hasMMX()) {
629 addRegisterClass(MVT::f128, &X86::FR128RegClass);
630 ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
631 setOperationAction(ISD::FABS , MVT::f128, Custom);
632 setOperationAction(ISD::FNEG , MVT::f128, Custom);
633 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
636 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
637 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
638 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
640 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
641 addLegalFPImmediate(TmpFlt); // FLD0
643 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
646 APFloat TmpFlt2(+1.0);
647 TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
649 addLegalFPImmediate(TmpFlt2); // FLD1
650 TmpFlt2.changeSign();
651 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
654 if (!TM.Options.UnsafeFPMath) {
655 setOperationAction(ISD::FSIN , MVT::f80, Expand);
656 setOperationAction(ISD::FCOS , MVT::f80, Expand);
657 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
660 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
661 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
662 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
663 setOperationAction(ISD::FRINT, MVT::f80, Expand);
664 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
665 setOperationAction(ISD::FMA, MVT::f80, Expand);
668 // Always use a library call for pow.
669 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
670 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
671 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
673 setOperationAction(ISD::FLOG, MVT::f80, Expand);
674 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
675 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
676 setOperationAction(ISD::FEXP, MVT::f80, Expand);
677 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
678 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
679 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
681 // First set operation action for all vector types to either promote
682 // (for widening) or expand (for scalarization). Then we will selectively
683 // turn on ones that can be effectively codegen'd.
684 for (MVT VT : MVT::vector_valuetypes()) {
685 setOperationAction(ISD::ADD , VT, Expand);
686 setOperationAction(ISD::SUB , VT, Expand);
687 setOperationAction(ISD::FADD, VT, Expand);
688 setOperationAction(ISD::FNEG, VT, Expand);
689 setOperationAction(ISD::FSUB, VT, Expand);
690 setOperationAction(ISD::MUL , VT, Expand);
691 setOperationAction(ISD::FMUL, VT, Expand);
692 setOperationAction(ISD::SDIV, VT, Expand);
693 setOperationAction(ISD::UDIV, VT, Expand);
694 setOperationAction(ISD::FDIV, VT, Expand);
695 setOperationAction(ISD::SREM, VT, Expand);
696 setOperationAction(ISD::UREM, VT, Expand);
697 setOperationAction(ISD::LOAD, VT, Expand);
698 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
699 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
700 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
701 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
702 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
703 setOperationAction(ISD::FABS, VT, Expand);
704 setOperationAction(ISD::FSIN, VT, Expand);
705 setOperationAction(ISD::FSINCOS, VT, Expand);
706 setOperationAction(ISD::FCOS, VT, Expand);
707 setOperationAction(ISD::FSINCOS, VT, Expand);
708 setOperationAction(ISD::FREM, VT, Expand);
709 setOperationAction(ISD::FMA, VT, Expand);
710 setOperationAction(ISD::FPOWI, VT, Expand);
711 setOperationAction(ISD::FSQRT, VT, Expand);
712 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
713 setOperationAction(ISD::FFLOOR, VT, Expand);
714 setOperationAction(ISD::FCEIL, VT, Expand);
715 setOperationAction(ISD::FTRUNC, VT, Expand);
716 setOperationAction(ISD::FRINT, VT, Expand);
717 setOperationAction(ISD::FNEARBYINT, VT, Expand);
718 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
719 setOperationAction(ISD::MULHS, VT, Expand);
720 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
721 setOperationAction(ISD::MULHU, VT, Expand);
722 setOperationAction(ISD::SDIVREM, VT, Expand);
723 setOperationAction(ISD::UDIVREM, VT, Expand);
724 setOperationAction(ISD::FPOW, VT, Expand);
725 setOperationAction(ISD::CTPOP, VT, Expand);
726 setOperationAction(ISD::CTTZ, VT, Expand);
727 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
728 setOperationAction(ISD::CTLZ, VT, Expand);
729 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
730 setOperationAction(ISD::SHL, VT, Expand);
731 setOperationAction(ISD::SRA, VT, Expand);
732 setOperationAction(ISD::SRL, VT, Expand);
733 setOperationAction(ISD::ROTL, VT, Expand);
734 setOperationAction(ISD::ROTR, VT, Expand);
735 setOperationAction(ISD::BSWAP, VT, Expand);
736 setOperationAction(ISD::SETCC, VT, Expand);
737 setOperationAction(ISD::FLOG, VT, Expand);
738 setOperationAction(ISD::FLOG2, VT, Expand);
739 setOperationAction(ISD::FLOG10, VT, Expand);
740 setOperationAction(ISD::FEXP, VT, Expand);
741 setOperationAction(ISD::FEXP2, VT, Expand);
742 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
743 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
744 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
745 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
746 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
747 setOperationAction(ISD::TRUNCATE, VT, Expand);
748 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
749 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
750 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
751 setOperationAction(ISD::VSELECT, VT, Expand);
752 setOperationAction(ISD::SELECT_CC, VT, Expand);
753 for (MVT InnerVT : MVT::vector_valuetypes()) {
754 setTruncStoreAction(InnerVT, VT, Expand);
756 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
757 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
759 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
760 // types, we have to deal with them whether we ask for Expansion or not.
761 // Setting Expand causes its own optimisation problems though, so leave
763 if (VT.getVectorElementType() == MVT::i1)
764 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
766 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
767 // split/scalarized right now.
768 if (VT.getVectorElementType() == MVT::f16)
769 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
773 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
774 // with -msoft-float, disable use of MMX as well.
775 if (!Subtarget->useSoftFloat() && Subtarget->hasMMX()) {
776 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
777 // No operations on x86mmx supported, everything uses intrinsics.
780 // MMX-sized vectors (other than x86mmx) are expected to be expanded
781 // into smaller operations.
782 for (MVT MMXTy : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64}) {
783 setOperationAction(ISD::MULHS, MMXTy, Expand);
784 setOperationAction(ISD::AND, MMXTy, Expand);
785 setOperationAction(ISD::OR, MMXTy, Expand);
786 setOperationAction(ISD::XOR, MMXTy, Expand);
787 setOperationAction(ISD::SCALAR_TO_VECTOR, MMXTy, Expand);
788 setOperationAction(ISD::SELECT, MMXTy, Expand);
789 setOperationAction(ISD::BITCAST, MMXTy, Expand);
791 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
793 if (!Subtarget->useSoftFloat() && Subtarget->hasSSE1()) {
794 addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
796 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
797 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
798 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
799 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
800 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
801 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
802 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
803 setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
804 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
805 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
806 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
807 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
808 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
809 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
812 if (!Subtarget->useSoftFloat() && Subtarget->hasSSE2()) {
813 addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
815 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
816 // registers cannot be used even for integer operations.
817 addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
818 addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
819 addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
820 addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
822 setOperationAction(ISD::ADD, MVT::v16i8, Legal);
823 setOperationAction(ISD::ADD, MVT::v8i16, Legal);
824 setOperationAction(ISD::ADD, MVT::v4i32, Legal);
825 setOperationAction(ISD::ADD, MVT::v2i64, Legal);
826 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
827 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
828 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
829 setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
830 setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
831 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
832 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
833 setOperationAction(ISD::SUB, MVT::v16i8, Legal);
834 setOperationAction(ISD::SUB, MVT::v8i16, Legal);
835 setOperationAction(ISD::SUB, MVT::v4i32, Legal);
836 setOperationAction(ISD::SUB, MVT::v2i64, Legal);
837 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
838 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
839 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
840 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
841 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
842 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
843 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
844 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
846 setOperationAction(ISD::SMAX, MVT::v8i16, Legal);
847 setOperationAction(ISD::UMAX, MVT::v16i8, Legal);
848 setOperationAction(ISD::SMIN, MVT::v8i16, Legal);
849 setOperationAction(ISD::UMIN, MVT::v16i8, Legal);
851 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
852 setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
853 setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
854 setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
856 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
857 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
858 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
859 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
860 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
862 setOperationAction(ISD::CTPOP, MVT::v16i8, Custom);
863 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
864 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
865 setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
867 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
868 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
869 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
870 // ISD::CTTZ v2i64 - scalarization is faster.
871 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
872 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
873 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
874 // ISD::CTTZ_ZERO_UNDEF v2i64 - scalarization is faster.
876 // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
877 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
878 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
879 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
880 setOperationAction(ISD::VSELECT, VT, Custom);
881 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
884 // We support custom legalizing of sext and anyext loads for specific
885 // memory vector types which we can load as a scalar (or sequence of
886 // scalars) and extend in-register to a legal 128-bit vector type. For sext
887 // loads these must work with a single scalar load.
888 for (MVT VT : MVT::integer_vector_valuetypes()) {
889 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
890 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
891 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
892 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
893 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
894 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
895 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
896 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
897 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
900 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
901 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
902 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
903 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
904 setOperationAction(ISD::VSELECT, MVT::v2f64, Custom);
905 setOperationAction(ISD::VSELECT, MVT::v2i64, Custom);
906 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
907 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
909 if (Subtarget->is64Bit()) {
910 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
911 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
914 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
915 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
916 setOperationAction(ISD::AND, VT, Promote);
917 AddPromotedToType (ISD::AND, VT, MVT::v2i64);
918 setOperationAction(ISD::OR, VT, Promote);
919 AddPromotedToType (ISD::OR, VT, MVT::v2i64);
920 setOperationAction(ISD::XOR, VT, Promote);
921 AddPromotedToType (ISD::XOR, VT, MVT::v2i64);
922 setOperationAction(ISD::LOAD, VT, Promote);
923 AddPromotedToType (ISD::LOAD, VT, MVT::v2i64);
924 setOperationAction(ISD::SELECT, VT, Promote);
925 AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
928 // Custom lower v2i64 and v2f64 selects.
929 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
930 setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
931 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
932 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
934 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
935 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
937 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
939 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
940 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
941 // As there is no 64-bit GPR available, we need build a special custom
942 // sequence to convert from v2i32 to v2f32.
943 if (!Subtarget->is64Bit())
944 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
946 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
947 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
949 for (MVT VT : MVT::fp_vector_valuetypes())
950 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
952 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
953 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
954 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
957 if (!Subtarget->useSoftFloat() && Subtarget->hasSSE41()) {
958 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
959 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
960 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
961 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
962 setOperationAction(ISD::FRINT, RoundedTy, Legal);
963 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
966 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
967 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
968 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
969 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
970 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
971 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
972 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
973 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
975 // FIXME: Do we need to handle scalar-to-vector here?
976 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
978 // We directly match byte blends in the backend as they match the VSELECT
980 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
982 // SSE41 brings specific instructions for doing vector sign extend even in
983 // cases where we don't have SRA.
984 for (MVT VT : MVT::integer_vector_valuetypes()) {
985 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
986 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
987 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
990 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
991 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
992 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
993 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
994 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
995 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
996 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
998 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
999 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
1000 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
1001 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
1002 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
1003 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
1005 // i8 and i16 vectors are custom because the source register and source
1006 // source memory operand types are not the same width. f32 vectors are
1007 // custom since the immediate controlling the insert encodes additional
1009 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1010 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1011 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1012 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1014 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
1015 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
1016 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
1017 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
1019 // FIXME: these should be Legal, but that's only for the case where
1020 // the index is constant. For now custom expand to deal with that.
1021 if (Subtarget->is64Bit()) {
1022 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
1023 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
1027 if (Subtarget->hasSSE2()) {
1028 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1029 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1030 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1032 setOperationAction(ISD::SRL, MVT::v8i16, Custom);
1033 setOperationAction(ISD::SRL, MVT::v16i8, Custom);
1035 setOperationAction(ISD::SHL, MVT::v8i16, Custom);
1036 setOperationAction(ISD::SHL, MVT::v16i8, Custom);
1038 setOperationAction(ISD::SRA, MVT::v8i16, Custom);
1039 setOperationAction(ISD::SRA, MVT::v16i8, Custom);
1041 // In the customized shift lowering, the legal cases in AVX2 will be
1043 setOperationAction(ISD::SRL, MVT::v2i64, Custom);
1044 setOperationAction(ISD::SRL, MVT::v4i32, Custom);
1046 setOperationAction(ISD::SHL, MVT::v2i64, Custom);
1047 setOperationAction(ISD::SHL, MVT::v4i32, Custom);
1049 setOperationAction(ISD::SRA, MVT::v2i64, Custom);
1050 setOperationAction(ISD::SRA, MVT::v4i32, Custom);
1053 if (Subtarget->hasXOP()) {
1054 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1055 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1056 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1057 setOperationAction(ISD::ROTL, MVT::v2i64, Custom);
1058 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1059 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1060 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1061 setOperationAction(ISD::ROTL, MVT::v4i64, Custom);
1064 if (!Subtarget->useSoftFloat() && Subtarget->hasFp256()) {
1065 addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
1066 addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
1067 addRegisterClass(MVT::v8i32, &X86::VR256RegClass);
1068 addRegisterClass(MVT::v8f32, &X86::VR256RegClass);
1069 addRegisterClass(MVT::v4i64, &X86::VR256RegClass);
1070 addRegisterClass(MVT::v4f64, &X86::VR256RegClass);
1072 setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
1073 setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
1074 setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
1076 setOperationAction(ISD::FADD, MVT::v8f32, Legal);
1077 setOperationAction(ISD::FSUB, MVT::v8f32, Legal);
1078 setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
1079 setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
1080 setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
1081 setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
1082 setOperationAction(ISD::FCEIL, MVT::v8f32, Legal);
1083 setOperationAction(ISD::FTRUNC, MVT::v8f32, Legal);
1084 setOperationAction(ISD::FRINT, MVT::v8f32, Legal);
1085 setOperationAction(ISD::FNEARBYINT, MVT::v8f32, Legal);
1086 setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
1087 setOperationAction(ISD::FABS, MVT::v8f32, Custom);
1089 setOperationAction(ISD::FADD, MVT::v4f64, Legal);
1090 setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
1091 setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
1092 setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
1093 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
1094 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
1095 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
1096 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
1097 setOperationAction(ISD::FRINT, MVT::v4f64, Legal);
1098 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Legal);
1099 setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
1100 setOperationAction(ISD::FABS, MVT::v4f64, Custom);
1102 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1103 // even though v8i16 is a legal type.
1104 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote);
1105 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote);
1106 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1108 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
1109 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1110 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1112 setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
1113 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1115 for (MVT VT : MVT::fp_vector_valuetypes())
1116 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
1118 setOperationAction(ISD::SRL, MVT::v16i16, Custom);
1119 setOperationAction(ISD::SRL, MVT::v32i8, Custom);
1121 setOperationAction(ISD::SHL, MVT::v16i16, Custom);
1122 setOperationAction(ISD::SHL, MVT::v32i8, Custom);
1124 setOperationAction(ISD::SRA, MVT::v16i16, Custom);
1125 setOperationAction(ISD::SRA, MVT::v32i8, Custom);
1127 setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
1128 setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
1129 setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
1130 setOperationAction(ISD::SETCC, MVT::v4i64, Custom);
1132 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1133 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1134 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1136 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1137 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
1138 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1139 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
1140 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
1141 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
1142 setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
1143 setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
1144 setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom);
1145 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1146 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1147 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1149 setOperationAction(ISD::CTPOP, MVT::v32i8, Custom);
1150 setOperationAction(ISD::CTPOP, MVT::v16i16, Custom);
1151 setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
1152 setOperationAction(ISD::CTPOP, MVT::v4i64, Custom);
1154 setOperationAction(ISD::CTTZ, MVT::v32i8, Custom);
1155 setOperationAction(ISD::CTTZ, MVT::v16i16, Custom);
1156 setOperationAction(ISD::CTTZ, MVT::v8i32, Custom);
1157 setOperationAction(ISD::CTTZ, MVT::v4i64, Custom);
1158 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v32i8, Custom);
1159 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i16, Custom);
1160 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
1161 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
1163 if (Subtarget->hasAnyFMA()) {
1164 setOperationAction(ISD::FMA, MVT::v8f32, Legal);
1165 setOperationAction(ISD::FMA, MVT::v4f64, Legal);
1166 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
1167 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1168 setOperationAction(ISD::FMA, MVT::f32, Legal);
1169 setOperationAction(ISD::FMA, MVT::f64, Legal);
1172 if (Subtarget->hasInt256()) {
1173 setOperationAction(ISD::ADD, MVT::v4i64, Legal);
1174 setOperationAction(ISD::ADD, MVT::v8i32, Legal);
1175 setOperationAction(ISD::ADD, MVT::v16i16, Legal);
1176 setOperationAction(ISD::ADD, MVT::v32i8, Legal);
1178 setOperationAction(ISD::SUB, MVT::v4i64, Legal);
1179 setOperationAction(ISD::SUB, MVT::v8i32, Legal);
1180 setOperationAction(ISD::SUB, MVT::v16i16, Legal);
1181 setOperationAction(ISD::SUB, MVT::v32i8, Legal);
1183 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1184 setOperationAction(ISD::MUL, MVT::v8i32, Legal);
1185 setOperationAction(ISD::MUL, MVT::v16i16, Legal);
1186 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1188 setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
1189 setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
1190 setOperationAction(ISD::MULHU, MVT::v16i16, Legal);
1191 setOperationAction(ISD::MULHS, MVT::v16i16, Legal);
1193 setOperationAction(ISD::SMAX, MVT::v32i8, Legal);
1194 setOperationAction(ISD::SMAX, MVT::v16i16, Legal);
1195 setOperationAction(ISD::SMAX, MVT::v8i32, Legal);
1196 setOperationAction(ISD::UMAX, MVT::v32i8, Legal);
1197 setOperationAction(ISD::UMAX, MVT::v16i16, Legal);
1198 setOperationAction(ISD::UMAX, MVT::v8i32, Legal);
1199 setOperationAction(ISD::SMIN, MVT::v32i8, Legal);
1200 setOperationAction(ISD::SMIN, MVT::v16i16, Legal);
1201 setOperationAction(ISD::SMIN, MVT::v8i32, Legal);
1202 setOperationAction(ISD::UMIN, MVT::v32i8, Legal);
1203 setOperationAction(ISD::UMIN, MVT::v16i16, Legal);
1204 setOperationAction(ISD::UMIN, MVT::v8i32, Legal);
1206 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1207 // when we have a 256bit-wide blend with immediate.
1208 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1210 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1211 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
1212 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
1213 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
1214 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
1215 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
1216 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
1218 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
1219 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
1220 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
1221 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
1222 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
1223 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
1225 setOperationAction(ISD::ADD, MVT::v4i64, Custom);
1226 setOperationAction(ISD::ADD, MVT::v8i32, Custom);
1227 setOperationAction(ISD::ADD, MVT::v16i16, Custom);
1228 setOperationAction(ISD::ADD, MVT::v32i8, Custom);
1230 setOperationAction(ISD::SUB, MVT::v4i64, Custom);
1231 setOperationAction(ISD::SUB, MVT::v8i32, Custom);
1232 setOperationAction(ISD::SUB, MVT::v16i16, Custom);
1233 setOperationAction(ISD::SUB, MVT::v32i8, Custom);
1235 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1236 setOperationAction(ISD::MUL, MVT::v8i32, Custom);
1237 setOperationAction(ISD::MUL, MVT::v16i16, Custom);
1238 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1240 setOperationAction(ISD::SMAX, MVT::v32i8, Custom);
1241 setOperationAction(ISD::SMAX, MVT::v16i16, Custom);
1242 setOperationAction(ISD::SMAX, MVT::v8i32, Custom);
1243 setOperationAction(ISD::UMAX, MVT::v32i8, Custom);
1244 setOperationAction(ISD::UMAX, MVT::v16i16, Custom);
1245 setOperationAction(ISD::UMAX, MVT::v8i32, Custom);
1246 setOperationAction(ISD::SMIN, MVT::v32i8, Custom);
1247 setOperationAction(ISD::SMIN, MVT::v16i16, Custom);
1248 setOperationAction(ISD::SMIN, MVT::v8i32, Custom);
1249 setOperationAction(ISD::UMIN, MVT::v32i8, Custom);
1250 setOperationAction(ISD::UMIN, MVT::v16i16, Custom);
1251 setOperationAction(ISD::UMIN, MVT::v8i32, Custom);
1254 // In the customized shift lowering, the legal cases in AVX2 will be
1256 setOperationAction(ISD::SRL, MVT::v4i64, Custom);
1257 setOperationAction(ISD::SRL, MVT::v8i32, Custom);
1259 setOperationAction(ISD::SHL, MVT::v4i64, Custom);
1260 setOperationAction(ISD::SHL, MVT::v8i32, Custom);
1262 setOperationAction(ISD::SRA, MVT::v4i64, Custom);
1263 setOperationAction(ISD::SRA, MVT::v8i32, Custom);
1265 // Custom lower several nodes for 256-bit types.
1266 for (MVT VT : MVT::vector_valuetypes()) {
1267 if (VT.getScalarSizeInBits() >= 32) {
1268 setOperationAction(ISD::MLOAD, VT, Legal);
1269 setOperationAction(ISD::MSTORE, VT, Legal);
1271 // Extract subvector is special because the value type
1272 // (result) is 128-bit but the source is 256-bit wide.
1273 if (VT.is128BitVector()) {
1274 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1276 // Do not attempt to custom lower other non-256-bit vectors
1277 if (!VT.is256BitVector())
1280 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1281 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1282 setOperationAction(ISD::VSELECT, VT, Custom);
1283 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1284 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1285 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1286 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1287 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1290 if (Subtarget->hasInt256())
1291 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1293 // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
1294 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1295 setOperationAction(ISD::AND, VT, Promote);
1296 AddPromotedToType (ISD::AND, VT, MVT::v4i64);
1297 setOperationAction(ISD::OR, VT, Promote);
1298 AddPromotedToType (ISD::OR, VT, MVT::v4i64);
1299 setOperationAction(ISD::XOR, VT, Promote);
1300 AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
1301 setOperationAction(ISD::LOAD, VT, Promote);
1302 AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
1303 setOperationAction(ISD::SELECT, VT, Promote);
1304 AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
1308 if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
1309 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1310 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1311 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1312 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1314 addRegisterClass(MVT::i1, &X86::VK1RegClass);
1315 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1316 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1318 for (MVT VT : MVT::fp_vector_valuetypes())
1319 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
1321 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
1322 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
1323 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
1324 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
1325 setLoadExtAction(ISD::ZEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
1326 setLoadExtAction(ISD::SEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
1327 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i8, Legal);
1328 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i8, Legal);
1329 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
1330 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
1331 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
1332 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
1334 setOperationAction(ISD::BR_CC, MVT::i1, Expand);
1335 setOperationAction(ISD::SETCC, MVT::i1, Custom);
1336 setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
1337 setOperationAction(ISD::XOR, MVT::i1, Legal);
1338 setOperationAction(ISD::OR, MVT::i1, Legal);
1339 setOperationAction(ISD::AND, MVT::i1, Legal);
1340 setOperationAction(ISD::SUB, MVT::i1, Custom);
1341 setOperationAction(ISD::ADD, MVT::i1, Custom);
1342 setOperationAction(ISD::MUL, MVT::i1, Custom);
1343 setOperationAction(ISD::LOAD, MVT::v16f32, Legal);
1344 setOperationAction(ISD::LOAD, MVT::v8f64, Legal);
1345 setOperationAction(ISD::LOAD, MVT::v8i64, Legal);
1346 setOperationAction(ISD::LOAD, MVT::v16i32, Legal);
1347 setOperationAction(ISD::LOAD, MVT::v16i1, Legal);
1349 setOperationAction(ISD::FADD, MVT::v16f32, Legal);
1350 setOperationAction(ISD::FSUB, MVT::v16f32, Legal);
1351 setOperationAction(ISD::FMUL, MVT::v16f32, Legal);
1352 setOperationAction(ISD::FDIV, MVT::v16f32, Legal);
1353 setOperationAction(ISD::FSQRT, MVT::v16f32, Legal);
1354 setOperationAction(ISD::FNEG, MVT::v16f32, Custom);
1355 setOperationAction(ISD::FABS, MVT::v16f32, Custom);
1357 setOperationAction(ISD::FADD, MVT::v8f64, Legal);
1358 setOperationAction(ISD::FSUB, MVT::v8f64, Legal);
1359 setOperationAction(ISD::FMUL, MVT::v8f64, Legal);
1360 setOperationAction(ISD::FDIV, MVT::v8f64, Legal);
1361 setOperationAction(ISD::FSQRT, MVT::v8f64, Legal);
1362 setOperationAction(ISD::FNEG, MVT::v8f64, Custom);
1363 setOperationAction(ISD::FABS, MVT::v8f64, Custom);
1364 setOperationAction(ISD::FMA, MVT::v8f64, Legal);
1365 setOperationAction(ISD::FMA, MVT::v16f32, Legal);
1367 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1368 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1369 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1370 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1371 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1372 setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
1373 setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
1374 setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote);
1375 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote);
1376 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1377 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1378 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1379 setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
1380 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom);
1381 setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
1382 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
1384 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1385 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1386 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1387 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1388 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1389 if (Subtarget->hasVLX()){
1390 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1391 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1392 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1393 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1394 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1396 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1397 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1398 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1399 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1400 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1402 setOperationAction(ISD::MLOAD, MVT::v8i32, Custom);
1403 setOperationAction(ISD::MLOAD, MVT::v8f32, Custom);
1404 setOperationAction(ISD::MSTORE, MVT::v8i32, Custom);
1405 setOperationAction(ISD::MSTORE, MVT::v8f32, Custom);
1407 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
1408 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1409 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1410 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom);
1411 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom);
1412 if (Subtarget->hasDQI()) {
1413 setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
1414 setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
1416 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1417 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1418 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1419 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1420 if (Subtarget->hasVLX()) {
1421 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal);
1422 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1423 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal);
1424 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1425 setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal);
1426 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1427 setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal);
1428 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1431 if (Subtarget->hasVLX()) {
1432 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1433 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1434 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1435 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1436 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1437 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1438 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
1439 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1441 setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
1442 setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
1443 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1444 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1445 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1446 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1447 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1448 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1449 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1450 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
1451 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
1452 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1453 if (Subtarget->hasDQI()) {
1454 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
1455 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);
1457 setOperationAction(ISD::FFLOOR, MVT::v16f32, Legal);
1458 setOperationAction(ISD::FFLOOR, MVT::v8f64, Legal);
1459 setOperationAction(ISD::FCEIL, MVT::v16f32, Legal);
1460 setOperationAction(ISD::FCEIL, MVT::v8f64, Legal);
1461 setOperationAction(ISD::FTRUNC, MVT::v16f32, Legal);
1462 setOperationAction(ISD::FTRUNC, MVT::v8f64, Legal);
1463 setOperationAction(ISD::FRINT, MVT::v16f32, Legal);
1464 setOperationAction(ISD::FRINT, MVT::v8f64, Legal);
1465 setOperationAction(ISD::FNEARBYINT, MVT::v16f32, Legal);
1466 setOperationAction(ISD::FNEARBYINT, MVT::v8f64, Legal);
1468 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1469 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1470 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1471 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1472 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
1474 setOperationAction(ISD::SETCC, MVT::v16i1, Custom);
1475 setOperationAction(ISD::SETCC, MVT::v8i1, Custom);
1477 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1479 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
1480 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
1481 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
1482 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom);
1483 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom);
1484 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
1485 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
1486 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1487 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1488 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1489 setOperationAction(ISD::SELECT, MVT::v16i1, Custom);
1490 setOperationAction(ISD::SELECT, MVT::v8i1, Custom);
1492 setOperationAction(ISD::SMAX, MVT::v16i32, Legal);
1493 setOperationAction(ISD::SMAX, MVT::v8i64, Legal);
1494 setOperationAction(ISD::UMAX, MVT::v16i32, Legal);
1495 setOperationAction(ISD::UMAX, MVT::v8i64, Legal);
1496 setOperationAction(ISD::SMIN, MVT::v16i32, Legal);
1497 setOperationAction(ISD::SMIN, MVT::v8i64, Legal);
1498 setOperationAction(ISD::UMIN, MVT::v16i32, Legal);
1499 setOperationAction(ISD::UMIN, MVT::v8i64, Legal);
1501 setOperationAction(ISD::ADD, MVT::v8i64, Legal);
1502 setOperationAction(ISD::ADD, MVT::v16i32, Legal);
1504 setOperationAction(ISD::SUB, MVT::v8i64, Legal);
1505 setOperationAction(ISD::SUB, MVT::v16i32, Legal);
1507 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1509 setOperationAction(ISD::SRL, MVT::v8i64, Custom);
1510 setOperationAction(ISD::SRL, MVT::v16i32, Custom);
1512 setOperationAction(ISD::SHL, MVT::v8i64, Custom);
1513 setOperationAction(ISD::SHL, MVT::v16i32, Custom);
1515 setOperationAction(ISD::SRA, MVT::v8i64, Custom);
1516 setOperationAction(ISD::SRA, MVT::v16i32, Custom);
1518 setOperationAction(ISD::AND, MVT::v8i64, Legal);
1519 setOperationAction(ISD::OR, MVT::v8i64, Legal);
1520 setOperationAction(ISD::XOR, MVT::v8i64, Legal);
1521 setOperationAction(ISD::AND, MVT::v16i32, Legal);
1522 setOperationAction(ISD::OR, MVT::v16i32, Legal);
1523 setOperationAction(ISD::XOR, MVT::v16i32, Legal);
1525 if (Subtarget->hasCDI()) {
1526 setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
1527 setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
1528 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i64, Legal);
1529 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i32, Legal);
1531 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1532 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1533 setOperationAction(ISD::CTLZ, MVT::v16i16, Custom);
1534 setOperationAction(ISD::CTLZ, MVT::v32i8, Custom);
1535 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i16, Custom);
1536 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i8, Custom);
1537 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i16, Custom);
1538 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i8, Custom);
1540 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i64, Custom);
1541 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i32, Custom);
1543 if (Subtarget->hasVLX()) {
1544 setOperationAction(ISD::CTLZ, MVT::v4i64, Legal);
1545 setOperationAction(ISD::CTLZ, MVT::v8i32, Legal);
1546 setOperationAction(ISD::CTLZ, MVT::v2i64, Legal);
1547 setOperationAction(ISD::CTLZ, MVT::v4i32, Legal);
1548 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Legal);
1549 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Legal);
1550 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Legal);
1551 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Legal);
1553 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
1554 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
1555 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
1556 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
1558 setOperationAction(ISD::CTLZ, MVT::v4i64, Custom);
1559 setOperationAction(ISD::CTLZ, MVT::v8i32, Custom);
1560 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1561 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1562 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Custom);
1563 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Custom);
1564 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Custom);
1565 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Custom);
1567 } // Subtarget->hasCDI()
1569 if (Subtarget->hasDQI()) {
1570 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
1571 setOperationAction(ISD::MUL, MVT::v4i64, Legal);
1572 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1574 // Custom lower several nodes.
1575 for (MVT VT : MVT::vector_valuetypes()) {
1576 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
1578 setOperationAction(ISD::AND, VT, Legal);
1579 setOperationAction(ISD::OR, VT, Legal);
1580 setOperationAction(ISD::XOR, VT, Legal);
1582 if ((VT.is128BitVector() || VT.is256BitVector()) && EltSize >= 32) {
1583 setOperationAction(ISD::MGATHER, VT, Custom);
1584 setOperationAction(ISD::MSCATTER, VT, Custom);
1586 // Extract subvector is special because the value type
1587 // (result) is 256/128-bit but the source is 512-bit wide.
1588 if (VT.is128BitVector() || VT.is256BitVector()) {
1589 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1591 if (VT.getVectorElementType() == MVT::i1)
1592 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1594 // Do not attempt to custom lower other non-512-bit vectors
1595 if (!VT.is512BitVector())
1598 if (EltSize >= 32) {
1599 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1600 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1601 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1602 setOperationAction(ISD::VSELECT, VT, Legal);
1603 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1604 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1605 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1606 setOperationAction(ISD::MLOAD, VT, Legal);
1607 setOperationAction(ISD::MSTORE, VT, Legal);
1608 setOperationAction(ISD::MGATHER, VT, Legal);
1609 setOperationAction(ISD::MSCATTER, VT, Custom);
1612 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
1613 setOperationAction(ISD::SELECT, VT, Promote);
1614 AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
1618 if (!Subtarget->useSoftFloat() && Subtarget->hasBWI()) {
1619 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1620 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1622 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1623 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1625 setOperationAction(ISD::LOAD, MVT::v32i16, Legal);
1626 setOperationAction(ISD::LOAD, MVT::v64i8, Legal);
1627 setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
1628 setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
1629 setOperationAction(ISD::ADD, MVT::v32i16, Legal);
1630 setOperationAction(ISD::ADD, MVT::v64i8, Legal);
1631 setOperationAction(ISD::SUB, MVT::v32i16, Legal);
1632 setOperationAction(ISD::SUB, MVT::v64i8, Legal);
1633 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1634 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1635 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1636 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1637 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1638 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1639 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1640 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1641 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1642 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Custom);
1643 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Custom);
1644 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1645 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1646 setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
1647 setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
1648 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1649 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1650 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1651 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1652 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1653 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1654 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1655 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1656 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
1657 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
1658 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1659 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1660 setOperationAction(ISD::VSELECT, MVT::v32i16, Legal);
1661 setOperationAction(ISD::VSELECT, MVT::v64i8, Legal);
1662 setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
1663 setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
1664 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1665 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i1, Custom);
1666 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i1, Custom);
1668 setOperationAction(ISD::SMAX, MVT::v64i8, Legal);
1669 setOperationAction(ISD::SMAX, MVT::v32i16, Legal);
1670 setOperationAction(ISD::UMAX, MVT::v64i8, Legal);
1671 setOperationAction(ISD::UMAX, MVT::v32i16, Legal);
1672 setOperationAction(ISD::SMIN, MVT::v64i8, Legal);
1673 setOperationAction(ISD::SMIN, MVT::v32i16, Legal);
1674 setOperationAction(ISD::UMIN, MVT::v64i8, Legal);
1675 setOperationAction(ISD::UMIN, MVT::v32i16, Legal);
1677 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1678 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1679 if (Subtarget->hasVLX())
1680 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1682 if (Subtarget->hasCDI()) {
1683 setOperationAction(ISD::CTLZ, MVT::v32i16, Custom);
1684 setOperationAction(ISD::CTLZ, MVT::v64i8, Custom);
1685 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i16, Custom);
1686 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v64i8, Custom);
1689 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1690 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1691 setOperationAction(ISD::VSELECT, VT, Legal);
1695 if (!Subtarget->useSoftFloat() && Subtarget->hasVLX()) {
1696 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1697 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1699 setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
1700 setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
1701 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
1702 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1703 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
1704 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
1705 setOperationAction(ISD::SELECT, MVT::v4i1, Custom);
1706 setOperationAction(ISD::SELECT, MVT::v2i1, Custom);
1707 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
1708 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i1, Custom);
1709 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i1, Custom);
1710 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i1, Custom);
1712 setOperationAction(ISD::AND, MVT::v8i32, Legal);
1713 setOperationAction(ISD::OR, MVT::v8i32, Legal);
1714 setOperationAction(ISD::XOR, MVT::v8i32, Legal);
1715 setOperationAction(ISD::AND, MVT::v4i32, Legal);
1716 setOperationAction(ISD::OR, MVT::v4i32, Legal);
1717 setOperationAction(ISD::XOR, MVT::v4i32, Legal);
1718 setOperationAction(ISD::SRA, MVT::v2i64, Custom);
1719 setOperationAction(ISD::SRA, MVT::v4i64, Custom);
1721 setOperationAction(ISD::SMAX, MVT::v2i64, Legal);
1722 setOperationAction(ISD::SMAX, MVT::v4i64, Legal);
1723 setOperationAction(ISD::UMAX, MVT::v2i64, Legal);
1724 setOperationAction(ISD::UMAX, MVT::v4i64, Legal);
1725 setOperationAction(ISD::SMIN, MVT::v2i64, Legal);
1726 setOperationAction(ISD::SMIN, MVT::v4i64, Legal);
1727 setOperationAction(ISD::UMIN, MVT::v2i64, Legal);
1728 setOperationAction(ISD::UMIN, MVT::v4i64, Legal);
1731 // We want to custom lower some of our intrinsics.
1732 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1733 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1734 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1735 if (!Subtarget->is64Bit()) {
1736 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1737 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1740 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1741 // handle type legalization for these operations here.
1743 // FIXME: We really should do custom legalization for addition and
1744 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1745 // than generic legalization for 64-bit multiplication-with-overflow, though.
1746 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1747 if (VT == MVT::i64 && !Subtarget->is64Bit())
1749 // Add/Sub/Mul with overflow operations are custom lowered.
1750 setOperationAction(ISD::SADDO, VT, Custom);
1751 setOperationAction(ISD::UADDO, VT, Custom);
1752 setOperationAction(ISD::SSUBO, VT, Custom);
1753 setOperationAction(ISD::USUBO, VT, Custom);
1754 setOperationAction(ISD::SMULO, VT, Custom);
1755 setOperationAction(ISD::UMULO, VT, Custom);
1758 if (!Subtarget->is64Bit()) {
1759 // These libcalls are not available in 32-bit.
1760 setLibcallName(RTLIB::SHL_I128, nullptr);
1761 setLibcallName(RTLIB::SRL_I128, nullptr);
1762 setLibcallName(RTLIB::SRA_I128, nullptr);
1765 // Combine sin / cos into one node or libcall if possible.
1766 if (Subtarget->hasSinCos()) {
1767 setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1768 setLibcallName(RTLIB::SINCOS_F64, "sincos");
1769 if (Subtarget->isTargetDarwin()) {
1770 // For MacOSX, we don't want the normal expansion of a libcall to sincos.
1771 // We want to issue a libcall to __sincos_stret to avoid memory traffic.
1772 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1773 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1777 if (Subtarget->isTargetWin64()) {
1778 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1779 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1780 setOperationAction(ISD::SREM, MVT::i128, Custom);
1781 setOperationAction(ISD::UREM, MVT::i128, Custom);
1782 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1783 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1786 // We have target-specific dag combine patterns for the following nodes:
1787 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1788 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1789 setTargetDAGCombine(ISD::BITCAST);
1790 setTargetDAGCombine(ISD::VSELECT);
1791 setTargetDAGCombine(ISD::SELECT);
1792 setTargetDAGCombine(ISD::SHL);
1793 setTargetDAGCombine(ISD::SRA);
1794 setTargetDAGCombine(ISD::SRL);
1795 setTargetDAGCombine(ISD::OR);
1796 setTargetDAGCombine(ISD::AND);
1797 setTargetDAGCombine(ISD::ADD);
1798 setTargetDAGCombine(ISD::FADD);
1799 setTargetDAGCombine(ISD::FSUB);
1800 setTargetDAGCombine(ISD::FNEG);
1801 setTargetDAGCombine(ISD::FMA);
1802 setTargetDAGCombine(ISD::FMAXNUM);
1803 setTargetDAGCombine(ISD::SUB);
1804 setTargetDAGCombine(ISD::LOAD);
1805 setTargetDAGCombine(ISD::MLOAD);
1806 setTargetDAGCombine(ISD::STORE);
1807 setTargetDAGCombine(ISD::MSTORE);
1808 setTargetDAGCombine(ISD::TRUNCATE);
1809 setTargetDAGCombine(ISD::ZERO_EXTEND);
1810 setTargetDAGCombine(ISD::ANY_EXTEND);
1811 setTargetDAGCombine(ISD::SIGN_EXTEND);
1812 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1813 setTargetDAGCombine(ISD::SINT_TO_FP);
1814 setTargetDAGCombine(ISD::UINT_TO_FP);
1815 setTargetDAGCombine(ISD::SETCC);
1816 setTargetDAGCombine(ISD::BUILD_VECTOR);
1817 setTargetDAGCombine(ISD::MUL);
1818 setTargetDAGCombine(ISD::XOR);
1819 setTargetDAGCombine(ISD::MSCATTER);
1820 setTargetDAGCombine(ISD::MGATHER);
1822 computeRegisterProperties(Subtarget->getRegisterInfo());
1824 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1825 MaxStoresPerMemsetOptSize = 8;
1826 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1827 MaxStoresPerMemcpyOptSize = 4;
1828 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1829 MaxStoresPerMemmoveOptSize = 4;
1830 setPrefLoopAlignment(4); // 2^4 bytes.
1832 // A predictable cmov does not hurt on an in-order CPU.
1833 // FIXME: Use a CPU attribute to trigger this, not a CPU model.
1834 PredictableSelectIsExpensive = !Subtarget->isAtom();
1835 EnableExtLdPromotion = true;
1836 setPrefFunctionAlignment(4); // 2^4 bytes.
1838 verifyIntrinsicTables();
1841 // This has so far only been implemented for 64-bit MachO.
1842 bool X86TargetLowering::useLoadStackGuardNode() const {
1843 return Subtarget->isTargetMachO() && Subtarget->is64Bit();
1846 TargetLoweringBase::LegalizeTypeAction
1847 X86TargetLowering::getPreferredVectorAction(EVT VT) const {
1848 if (ExperimentalVectorWideningLegalization &&
1849 VT.getVectorNumElements() != 1 &&
1850 VT.getVectorElementType().getSimpleVT() != MVT::i1)
1851 return TypeWidenVector;
1853 return TargetLoweringBase::getPreferredVectorAction(VT);
1856 EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1859 return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
1861 if (VT.isSimple()) {
1862 MVT VVT = VT.getSimpleVT();
1863 const unsigned NumElts = VVT.getVectorNumElements();
1864 const MVT EltVT = VVT.getVectorElementType();
1865 if (VVT.is512BitVector()) {
1866 if (Subtarget->hasAVX512())
1867 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1868 EltVT == MVT::f32 || EltVT == MVT::f64)
1870 case 8: return MVT::v8i1;
1871 case 16: return MVT::v16i1;
1873 if (Subtarget->hasBWI())
1874 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1876 case 32: return MVT::v32i1;
1877 case 64: return MVT::v64i1;
1881 if (VVT.is256BitVector() || VVT.is128BitVector()) {
1882 if (Subtarget->hasVLX())
1883 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1884 EltVT == MVT::f32 || EltVT == MVT::f64)
1886 case 2: return MVT::v2i1;
1887 case 4: return MVT::v4i1;
1888 case 8: return MVT::v8i1;
1890 if (Subtarget->hasBWI() && Subtarget->hasVLX())
1891 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1893 case 8: return MVT::v8i1;
1894 case 16: return MVT::v16i1;
1895 case 32: return MVT::v32i1;
1900 return VT.changeVectorElementTypeToInteger();
1903 /// Helper for getByValTypeAlignment to determine
1904 /// the desired ByVal argument alignment.
1905 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
1908 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1909 if (VTy->getBitWidth() == 128)
1911 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1912 unsigned EltAlign = 0;
1913 getMaxByValAlign(ATy->getElementType(), EltAlign);
1914 if (EltAlign > MaxAlign)
1915 MaxAlign = EltAlign;
1916 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1917 for (auto *EltTy : STy->elements()) {
1918 unsigned EltAlign = 0;
1919 getMaxByValAlign(EltTy, EltAlign);
1920 if (EltAlign > MaxAlign)
1921 MaxAlign = EltAlign;
1928 /// Return the desired alignment for ByVal aggregate
1929 /// function arguments in the caller parameter area. For X86, aggregates
1930 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
1931 /// are at 4-byte boundaries.
1932 unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
1933 const DataLayout &DL) const {
1934 if (Subtarget->is64Bit()) {
1935 // Max of 8 and alignment of type.
1936 unsigned TyAlign = DL.getABITypeAlignment(Ty);
1943 if (Subtarget->hasSSE1())
1944 getMaxByValAlign(Ty, Align);
1948 /// Returns the target specific optimal type for load
1949 /// and store operations as a result of memset, memcpy, and memmove
1950 /// lowering. If DstAlign is zero that means it's safe to destination
1951 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
1952 /// means there isn't a need to check it against alignment requirement,
1953 /// probably because the source does not need to be loaded. If 'IsMemset' is
1954 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
1955 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
1956 /// source is constant so it does not need to be loaded.
1957 /// It returns EVT::Other if the type should be determined using generic
1958 /// target-independent logic.
1960 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
1961 unsigned DstAlign, unsigned SrcAlign,
1962 bool IsMemset, bool ZeroMemset,
1964 MachineFunction &MF) const {
1965 const Function *F = MF.getFunction();
1966 if ((!IsMemset || ZeroMemset) &&
1967 !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
1969 (!Subtarget->isUnalignedMem16Slow() ||
1970 ((DstAlign == 0 || DstAlign >= 16) &&
1971 (SrcAlign == 0 || SrcAlign >= 16)))) {
1973 // FIXME: Check if unaligned 32-byte accesses are slow.
1974 if (Subtarget->hasInt256())
1976 if (Subtarget->hasFp256())
1979 if (Subtarget->hasSSE2())
1981 if (Subtarget->hasSSE1())
1983 } else if (!MemcpyStrSrc && Size >= 8 &&
1984 !Subtarget->is64Bit() &&
1985 Subtarget->hasSSE2()) {
1986 // Do not use f64 to lower memcpy if source is string constant. It's
1987 // better to use i32 to avoid the loads.
1991 // This is a compromise. If we reach here, unaligned accesses may be slow on
1992 // this target. However, creating smaller, aligned accesses could be even
1993 // slower and would certainly be a lot more code.
1994 if (Subtarget->is64Bit() && Size >= 8)
1999 bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2001 return X86ScalarSSEf32;
2002 else if (VT == MVT::f64)
2003 return X86ScalarSSEf64;
2008 X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
2013 switch (VT.getSizeInBits()) {
2015 // 8-byte and under are always assumed to be fast.
2019 *Fast = !Subtarget->isUnalignedMem16Slow();
2022 *Fast = !Subtarget->isUnalignedMem32Slow();
2024 // TODO: What about AVX-512 (512-bit) accesses?
2027 // Misaligned accesses of any size are always allowed.
2031 /// Return the entry encoding for a jump table in the
2032 /// current function. The returned value is a member of the
2033 /// MachineJumpTableInfo::JTEntryKind enum.
2034 unsigned X86TargetLowering::getJumpTableEncoding() const {
2035 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2037 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
2038 Subtarget->isPICStyleGOT())
2039 return MachineJumpTableInfo::EK_Custom32;
2041 // Otherwise, use the normal jump table encoding heuristics.
2042 return TargetLowering::getJumpTableEncoding();
2045 bool X86TargetLowering::useSoftFloat() const {
2046 return Subtarget->useSoftFloat();
2050 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2051 const MachineBasicBlock *MBB,
2052 unsigned uid,MCContext &Ctx) const{
2053 assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
2054 Subtarget->isPICStyleGOT());
2055 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2057 return MCSymbolRefExpr::create(MBB->getSymbol(),
2058 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2061 /// Returns relocation base for the given PIC jumptable.
2062 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2063 SelectionDAG &DAG) const {
2064 if (!Subtarget->is64Bit())
2065 // This doesn't have SDLoc associated with it, but is not really the
2066 // same as a Register.
2067 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2068 getPointerTy(DAG.getDataLayout()));
2072 /// This returns the relocation base for the given PIC jumptable,
2073 /// the same as getPICJumpTableRelocBase, but as an MCExpr.
2074 const MCExpr *X86TargetLowering::
2075 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2076 MCContext &Ctx) const {
2077 // X86-64 uses RIP relative addressing based on the jump table label.
2078 if (Subtarget->isPICStyleRIPRel())
2079 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2081 // Otherwise, the reference is relative to the PIC base.
2082 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2085 std::pair<const TargetRegisterClass *, uint8_t>
2086 X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2088 const TargetRegisterClass *RRC = nullptr;
2090 switch (VT.SimpleTy) {
2092 return TargetLowering::findRepresentativeClass(TRI, VT);
2093 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2094 RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2097 RRC = &X86::VR64RegClass;
2099 case MVT::f32: case MVT::f64:
2100 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2101 case MVT::v4f32: case MVT::v2f64:
2102 case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
2104 RRC = &X86::VR128RegClass;
2107 return std::make_pair(RRC, Cost);
2110 bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
2111 unsigned &Offset) const {
2112 if (!Subtarget->isTargetLinux())
2115 if (Subtarget->is64Bit()) {
2116 // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
2118 if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
2130 Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2131 if (!Subtarget->isTargetAndroid())
2132 return TargetLowering::getSafeStackPointerLocation(IRB);
2134 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2135 // definition of TLS_SLOT_SAFESTACK in
2136 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2137 unsigned AddressSpace, Offset;
2138 if (Subtarget->is64Bit()) {
2139 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2141 if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
2151 return ConstantExpr::getIntToPtr(
2152 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2153 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2156 bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2157 unsigned DestAS) const {
2158 assert(SrcAS != DestAS && "Expected different address spaces!");
2160 return SrcAS < 256 && DestAS < 256;
2163 //===----------------------------------------------------------------------===//
2164 // Return Value Calling Convention Implementation
2165 //===----------------------------------------------------------------------===//
2167 #include "X86GenCallingConv.inc"
2169 bool X86TargetLowering::CanLowerReturn(
2170 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2171 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2172 SmallVector<CCValAssign, 16> RVLocs;
2173 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2174 return CCInfo.CheckReturn(Outs, RetCC_X86);
2177 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2178 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2183 X86TargetLowering::LowerReturn(SDValue Chain,
2184 CallingConv::ID CallConv, bool isVarArg,
2185 const SmallVectorImpl<ISD::OutputArg> &Outs,
2186 const SmallVectorImpl<SDValue> &OutVals,
2187 SDLoc dl, SelectionDAG &DAG) const {
2188 MachineFunction &MF = DAG.getMachineFunction();
2189 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2191 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2192 report_fatal_error("X86 interrupts may not return any value");
2194 SmallVector<CCValAssign, 16> RVLocs;
2195 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2196 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2199 SmallVector<SDValue, 6> RetOps;
2200 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2201 // Operand #1 = Bytes To Pop
2202 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2205 // Copy the result values into the output registers.
2206 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2207 CCValAssign &VA = RVLocs[i];
2208 assert(VA.isRegLoc() && "Can only return in registers!");
2209 SDValue ValToCopy = OutVals[i];
2210 EVT ValVT = ValToCopy.getValueType();
2212 // Promote values to the appropriate types.
2213 if (VA.getLocInfo() == CCValAssign::SExt)
2214 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2215 else if (VA.getLocInfo() == CCValAssign::ZExt)
2216 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2217 else if (VA.getLocInfo() == CCValAssign::AExt) {
2218 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2219 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2221 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2223 else if (VA.getLocInfo() == CCValAssign::BCvt)
2224 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2226 assert(VA.getLocInfo() != CCValAssign::FPExt &&
2227 "Unexpected FP-extend for return value.");
2229 // If this is x86-64, and we disabled SSE, we can't return FP values,
2230 // or SSE or MMX vectors.
2231 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2232 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2233 (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
2234 report_fatal_error("SSE register return with SSE disabled");
2236 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2237 // llvm-gcc has never done it right and no one has noticed, so this
2238 // should be OK for now.
2239 if (ValVT == MVT::f64 &&
2240 (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
2241 report_fatal_error("SSE2 register return with SSE2 disabled");
2243 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2244 // the RET instruction and handled by the FP Stackifier.
2245 if (VA.getLocReg() == X86::FP0 ||
2246 VA.getLocReg() == X86::FP1) {
2247 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2248 // change the value to the FP stack register class.
2249 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2250 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2251 RetOps.push_back(ValToCopy);
2252 // Don't emit a copytoreg.
2256 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2257 // which is returned in RAX / RDX.
2258 if (Subtarget->is64Bit()) {
2259 if (ValVT == MVT::x86mmx) {
2260 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2261 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2262 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2264 // If we don't have SSE2 available, convert to v4f32 so the generated
2265 // register is legal.
2266 if (!Subtarget->hasSSE2())
2267 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2272 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
2273 Flag = Chain.getValue(1);
2274 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2277 // All x86 ABIs require that for returning structs by value we copy
2278 // the sret argument into %rax/%eax (depending on ABI) for the return.
2279 // We saved the argument into a virtual register in the entry block,
2280 // so now we copy the value out and into %rax/%eax.
2282 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2283 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2284 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2285 // either case FuncInfo->setSRetReturnReg() will have been called.
2286 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2287 SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg,
2288 getPointerTy(MF.getDataLayout()));
2291 = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
2292 X86::RAX : X86::EAX;
2293 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2294 Flag = Chain.getValue(1);
2296 // RAX/EAX now acts like a return value.
2298 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2301 RetOps[0] = Chain; // Update chain.
2303 // Add the flag if we have it.
2305 RetOps.push_back(Flag);
2307 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2308 if (CallConv == CallingConv::X86_INTR)
2309 opcode = X86ISD::IRET;
2310 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2313 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2314 if (N->getNumValues() != 1)
2316 if (!N->hasNUsesOfValue(1, 0))
2319 SDValue TCChain = Chain;
2320 SDNode *Copy = *N->use_begin();
2321 if (Copy->getOpcode() == ISD::CopyToReg) {
2322 // If the copy has a glue operand, we conservatively assume it isn't safe to
2323 // perform a tail call.
2324 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2326 TCChain = Copy->getOperand(0);
2327 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2330 bool HasRet = false;
2331 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2333 if (UI->getOpcode() != X86ISD::RET_FLAG)
2335 // If we are returning more than one value, we can definitely
2336 // not make a tail call see PR19530
2337 if (UI->getNumOperands() > 4)
2339 if (UI->getNumOperands() == 4 &&
2340 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2353 X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
2354 ISD::NodeType ExtendKind) const {
2356 // TODO: Is this also valid on 32-bit?
2357 if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
2358 ReturnMVT = MVT::i8;
2360 ReturnMVT = MVT::i32;
2362 EVT MinVT = getRegisterType(Context, ReturnMVT);
2363 return VT.bitsLT(MinVT) ? MinVT : VT;
2366 /// Lower the result values of a call into the
2367 /// appropriate copies out of appropriate physical registers.
2370 X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
2371 CallingConv::ID CallConv, bool isVarArg,
2372 const SmallVectorImpl<ISD::InputArg> &Ins,
2373 SDLoc dl, SelectionDAG &DAG,
2374 SmallVectorImpl<SDValue> &InVals) const {
2376 // Assign locations to each value returned by this call.
2377 SmallVector<CCValAssign, 16> RVLocs;
2378 bool Is64Bit = Subtarget->is64Bit();
2379 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2381 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2383 // Copy all of the result registers out of their specified physreg.
2384 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
2385 CCValAssign &VA = RVLocs[i];
2386 EVT CopyVT = VA.getLocVT();
2388 // If this is x86-64, and we disabled SSE, we can't return FP values
2389 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
2390 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
2391 report_fatal_error("SSE register return with SSE disabled");
2394 // If we prefer to use the value in xmm registers, copy it out as f80 and
2395 // use a truncate to move it from fp stack reg to xmm reg.
2396 bool RoundAfterCopy = false;
2397 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2398 isScalarFPTypeInSSEReg(VA.getValVT())) {
2400 RoundAfterCopy = (CopyVT != VA.getLocVT());
2403 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
2404 CopyVT, InFlag).getValue(1);
2405 SDValue Val = Chain.getValue(0);
2408 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2409 // This truncation won't change the value.
2410 DAG.getIntPtrConstant(1, dl));
2412 if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1)
2413 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2415 InFlag = Chain.getValue(2);
2416 InVals.push_back(Val);
2422 //===----------------------------------------------------------------------===//
2423 // C & StdCall & Fast Calling Convention implementation
2424 //===----------------------------------------------------------------------===//
2425 // StdCall calling convention seems to be standard for many Windows' API
2426 // routines and around. It differs from C calling convention just a little:
2427 // callee should clean up the stack, not caller. Symbols should be also
2428 // decorated in some fancy way :) It doesn't support any vector arguments.
2429 // For info on fast calling convention see Fast Calling Convention (tail call)
2430 // implementation LowerX86_32FastCCCallTo.
2432 /// CallIsStructReturn - Determines whether a call uses struct return
2434 enum StructReturnType {
2439 static StructReturnType
2440 callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
2442 return NotStructReturn;
2444 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2445 if (!Flags.isSRet())
2446 return NotStructReturn;
2447 if (Flags.isInReg())
2448 return RegStructReturn;
2449 return StackStructReturn;
2452 /// Determines whether a function uses struct return semantics.
2453 static StructReturnType
2454 argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
2456 return NotStructReturn;
2458 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2459 if (!Flags.isSRet())
2460 return NotStructReturn;
2461 if (Flags.isInReg())
2462 return RegStructReturn;
2463 return StackStructReturn;
2466 /// Make a copy of an aggregate at address specified by "Src" to address
2467 /// "Dst" with size and alignment information specified by the specific
2468 /// parameter attribute. The copy will be passed as a byval function parameter.
2470 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
2471 ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
2473 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2475 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2476 /*isVolatile*/false, /*AlwaysInline=*/true,
2477 /*isTailCall*/false,
2478 MachinePointerInfo(), MachinePointerInfo());
2481 /// Return true if the calling convention is one that we can guarantee TCO for.
2482 static bool canGuaranteeTCO(CallingConv::ID CC) {
2483 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2484 CC == CallingConv::HiPE || CC == CallingConv::HHVM);
2487 /// Return true if we might ever do TCO for calls with this calling convention.
2488 static bool mayTailCallThisCC(CallingConv::ID CC) {
2490 // C calling conventions:
2491 case CallingConv::C:
2492 case CallingConv::X86_64_Win64:
2493 case CallingConv::X86_64_SysV:
2494 // Callee pop conventions:
2495 case CallingConv::X86_ThisCall:
2496 case CallingConv::X86_StdCall:
2497 case CallingConv::X86_VectorCall:
2498 case CallingConv::X86_FastCall:
2501 return canGuaranteeTCO(CC);
2505 /// Return true if the function is being made into a tailcall target by
2506 /// changing its ABI.
2507 static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
2508 return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
2511 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
2513 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2514 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2518 CallingConv::ID CalleeCC = CS.getCallingConv();
2519 if (!mayTailCallThisCC(CalleeCC))
2526 X86TargetLowering::LowerMemArgument(SDValue Chain,
2527 CallingConv::ID CallConv,
2528 const SmallVectorImpl<ISD::InputArg> &Ins,
2529 SDLoc dl, SelectionDAG &DAG,
2530 const CCValAssign &VA,
2531 MachineFrameInfo *MFI,
2533 // Create the nodes corresponding to a load from this parameter slot.
2534 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2535 bool AlwaysUseMutable = shouldGuaranteeTCO(
2536 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2537 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2540 // If value is passed by pointer we have address passed instead of the value
2542 bool ExtendedInMem = VA.isExtInLoc() &&
2543 VA.getValVT().getScalarType() == MVT::i1;
2545 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
2546 ValVT = VA.getLocVT();
2548 ValVT = VA.getValVT();
2550 // Calculate SP offset of interrupt parameter, re-arrange the slot normally
2551 // taken by a return address.
2553 if (CallConv == CallingConv::X86_INTR) {
2554 const X86Subtarget& Subtarget =
2555 static_cast<const X86Subtarget&>(DAG.getSubtarget());
2556 // X86 interrupts may take one or two arguments.
2557 // On the stack there will be no return address as in regular call.
2558 // Offset of last argument need to be set to -4/-8 bytes.
2559 // Where offset of the first argument out of two, should be set to 0 bytes.
2560 Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
2563 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2564 // changed with more analysis.
2565 // In case of tail call optimization mark all arguments mutable. Since they
2566 // could be overwritten by lowering of arguments in case of a tail call.
2567 if (Flags.isByVal()) {
2568 unsigned Bytes = Flags.getByValSize();
2569 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2570 int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
2571 // Adjust SP offset of interrupt parameter.
2572 if (CallConv == CallingConv::X86_INTR) {
2573 MFI->setObjectOffset(FI, Offset);
2575 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2577 int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
2578 VA.getLocMemOffset(), isImmutable);
2579 // Adjust SP offset of interrupt parameter.
2580 if (CallConv == CallingConv::X86_INTR) {
2581 MFI->setObjectOffset(FI, Offset);
2584 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2585 SDValue Val = DAG.getLoad(
2586 ValVT, dl, Chain, FIN,
2587 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
2589 return ExtendedInMem ?
2590 DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val;
2594 // FIXME: Get this from tablegen.
2595 static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
2596 const X86Subtarget *Subtarget) {
2597 assert(Subtarget->is64Bit());
2599 if (Subtarget->isCallingConvWin64(CallConv)) {
2600 static const MCPhysReg GPR64ArgRegsWin64[] = {
2601 X86::RCX, X86::RDX, X86::R8, X86::R9
2603 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
2606 static const MCPhysReg GPR64ArgRegs64Bit[] = {
2607 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
2609 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
2612 // FIXME: Get this from tablegen.
2613 static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
2614 CallingConv::ID CallConv,
2615 const X86Subtarget *Subtarget) {
2616 assert(Subtarget->is64Bit());
2617 if (Subtarget->isCallingConvWin64(CallConv)) {
2618 // The XMM registers which might contain var arg parameters are shadowed
2619 // in their paired GPR. So we only need to save the GPR to their home
2621 // TODO: __vectorcall will change this.
2625 const Function *Fn = MF.getFunction();
2626 bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
2627 bool isSoftFloat = Subtarget->useSoftFloat();
2628 assert(!(isSoftFloat && NoImplicitFloatOps) &&
2629 "SSE register cannot be used when SSE is disabled!");
2630 if (isSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
2631 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
2635 static const MCPhysReg XMMArgRegs64Bit[] = {
2636 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2637 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2639 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
2642 SDValue X86TargetLowering::LowerFormalArguments(
2643 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2644 const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
2645 SmallVectorImpl<SDValue> &InVals) const {
2646 MachineFunction &MF = DAG.getMachineFunction();
2647 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2648 const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
2650 const Function* Fn = MF.getFunction();
2651 if (Fn->hasExternalLinkage() &&
2652 Subtarget->isTargetCygMing() &&
2653 Fn->getName() == "main")
2654 FuncInfo->setForceFramePointer(true);
2656 MachineFrameInfo *MFI = MF.getFrameInfo();
2657 bool Is64Bit = Subtarget->is64Bit();
2658 bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
2660 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2661 "Var args not supported with calling convention fastcc, ghc or hipe");
2663 if (CallConv == CallingConv::X86_INTR) {
2664 bool isLegal = Ins.size() == 1 ||
2665 (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
2666 (!Is64Bit && Ins[1].VT == MVT::i32)));
2668 report_fatal_error("X86 interrupts may take one or two arguments");
2671 // Assign locations to all of the incoming arguments.
2672 SmallVector<CCValAssign, 16> ArgLocs;
2673 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2675 // Allocate shadow area for Win64
2677 CCInfo.AllocateStack(32, 8);
2679 CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
2681 unsigned LastVal = ~0U;
2683 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2684 CCValAssign &VA = ArgLocs[i];
2685 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
2687 assert(VA.getValNo() != LastVal &&
2688 "Don't support value assigned to multiple locs yet");
2690 LastVal = VA.getValNo();
2692 if (VA.isRegLoc()) {
2693 EVT RegVT = VA.getLocVT();
2694 const TargetRegisterClass *RC;
2695 if (RegVT == MVT::i32)
2696 RC = &X86::GR32RegClass;
2697 else if (Is64Bit && RegVT == MVT::i64)
2698 RC = &X86::GR64RegClass;
2699 else if (RegVT == MVT::f32)
2700 RC = &X86::FR32RegClass;
2701 else if (RegVT == MVT::f64)
2702 RC = &X86::FR64RegClass;
2703 else if (RegVT == MVT::f128)
2704 RC = &X86::FR128RegClass;
2705 else if (RegVT.is512BitVector())
2706 RC = &X86::VR512RegClass;
2707 else if (RegVT.is256BitVector())
2708 RC = &X86::VR256RegClass;
2709 else if (RegVT.is128BitVector())
2710 RC = &X86::VR128RegClass;
2711 else if (RegVT == MVT::x86mmx)
2712 RC = &X86::VR64RegClass;
2713 else if (RegVT == MVT::i1)
2714 RC = &X86::VK1RegClass;
2715 else if (RegVT == MVT::v8i1)
2716 RC = &X86::VK8RegClass;
2717 else if (RegVT == MVT::v16i1)
2718 RC = &X86::VK16RegClass;
2719 else if (RegVT == MVT::v32i1)
2720 RC = &X86::VK32RegClass;
2721 else if (RegVT == MVT::v64i1)
2722 RC = &X86::VK64RegClass;
2724 llvm_unreachable("Unknown argument type!");
2726 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2727 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2729 // If this is an 8 or 16-bit value, it is really passed promoted to 32
2730 // bits. Insert an assert[sz]ext to capture this, then truncate to the
2732 if (VA.getLocInfo() == CCValAssign::SExt)
2733 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2734 DAG.getValueType(VA.getValVT()));
2735 else if (VA.getLocInfo() == CCValAssign::ZExt)
2736 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
2737 DAG.getValueType(VA.getValVT()));
2738 else if (VA.getLocInfo() == CCValAssign::BCvt)
2739 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
2741 if (VA.isExtInLoc()) {
2742 // Handle MMX values passed in XMM regs.
2743 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
2744 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
2746 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2749 assert(VA.isMemLoc());
2750 ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
2753 // If value is passed via pointer - do a load.
2754 if (VA.getLocInfo() == CCValAssign::Indirect)
2755 ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
2756 MachinePointerInfo(), false, false, false, 0);
2758 InVals.push_back(ArgValue);
2761 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2762 // All x86 ABIs require that for returning structs by value we copy the
2763 // sret argument into %rax/%eax (depending on ABI) for the return. Save
2764 // the argument into a virtual register so that we can access it from the
2766 if (Ins[i].Flags.isSRet()) {
2767 unsigned Reg = FuncInfo->getSRetReturnReg();
2769 MVT PtrTy = getPointerTy(DAG.getDataLayout());
2770 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
2771 FuncInfo->setSRetReturnReg(Reg);
2773 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
2774 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
2779 unsigned StackSize = CCInfo.getNextStackOffset();
2780 // Align stack specially for tail calls.
2781 if (shouldGuaranteeTCO(CallConv,
2782 MF.getTarget().Options.GuaranteedTailCallOpt))
2783 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
2785 // If the function takes variable number of arguments, make a frame index for
2786 // the start of the first vararg value... for expansion of llvm.va_start. We
2787 // can skip this if there are no va_start calls.
2788 if (MFI->hasVAStart() &&
2789 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
2790 CallConv != CallingConv::X86_ThisCall))) {
2791 FuncInfo->setVarArgsFrameIndex(
2792 MFI->CreateFixedObject(1, StackSize, true));
2795 // Figure out if XMM registers are in use.
2796 assert(!(Subtarget->useSoftFloat() &&
2797 Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
2798 "SSE register cannot be used when SSE is disabled!");
2800 // 64-bit calling conventions support varargs and register parameters, so we
2801 // have to do extra work to spill them in the prologue.
2802 if (Is64Bit && isVarArg && MFI->hasVAStart()) {
2803 // Find the first unallocated argument registers.
2804 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
2805 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
2806 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
2807 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
2808 assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
2809 "SSE register cannot be used when SSE is disabled!");
2811 // Gather all the live in physical registers.
2812 SmallVector<SDValue, 6> LiveGPRs;
2813 SmallVector<SDValue, 8> LiveXMMRegs;
2815 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
2816 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
2818 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
2820 if (!ArgXMMs.empty()) {
2821 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
2822 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
2823 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
2824 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
2825 LiveXMMRegs.push_back(
2826 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
2831 // Get to the caller-allocated home save location. Add 8 to account
2832 // for the return address.
2833 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
2834 FuncInfo->setRegSaveFrameIndex(
2835 MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
2836 // Fixup to set vararg frame on shadow area (4 x i64).
2838 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
2840 // For X86-64, if there are vararg parameters that are passed via
2841 // registers, then we must store them to their spots on the stack so
2842 // they may be loaded by deferencing the result of va_next.
2843 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
2844 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
2845 FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
2846 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
2849 // Store the integer parameter registers.
2850 SmallVector<SDValue, 8> MemOps;
2851 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
2852 getPointerTy(DAG.getDataLayout()));
2853 unsigned Offset = FuncInfo->getVarArgsGPOffset();
2854 for (SDValue Val : LiveGPRs) {
2855 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2856 RSFIN, DAG.getIntPtrConstant(Offset, dl));
2858 DAG.getStore(Val.getValue(1), dl, Val, FIN,
2859 MachinePointerInfo::getFixedStack(
2860 DAG.getMachineFunction(),
2861 FuncInfo->getRegSaveFrameIndex(), Offset),
2863 MemOps.push_back(Store);
2867 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
2868 // Now store the XMM (fp + vector) parameter registers.
2869 SmallVector<SDValue, 12> SaveXMMOps;
2870 SaveXMMOps.push_back(Chain);
2871 SaveXMMOps.push_back(ALVal);
2872 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2873 FuncInfo->getRegSaveFrameIndex(), dl));
2874 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2875 FuncInfo->getVarArgsFPOffset(), dl));
2876 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
2878 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
2879 MVT::Other, SaveXMMOps));
2882 if (!MemOps.empty())
2883 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2886 if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
2887 // Find the largest legal vector type.
2888 MVT VecVT = MVT::Other;
2889 // FIXME: Only some x86_32 calling conventions support AVX512.
2890 if (Subtarget->hasAVX512() &&
2891 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
2892 CallConv == CallingConv::Intel_OCL_BI)))
2893 VecVT = MVT::v16f32;
2894 else if (Subtarget->hasAVX())
2896 else if (Subtarget->hasSSE2())
2899 // We forward some GPRs and some vector types.
2900 SmallVector<MVT, 2> RegParmTypes;
2901 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
2902 RegParmTypes.push_back(IntVT);
2903 if (VecVT != MVT::Other)
2904 RegParmTypes.push_back(VecVT);
2906 // Compute the set of forwarded registers. The rest are scratch.
2907 SmallVectorImpl<ForwardedRegister> &Forwards =
2908 FuncInfo->getForwardedMustTailRegParms();
2909 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
2911 // Conservatively forward AL on x86_64, since it might be used for varargs.
2912 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
2913 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
2914 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
2917 // Copy all forwards from physical to virtual registers.
2918 for (ForwardedRegister &F : Forwards) {
2919 // FIXME: Can we use a less constrained schedule?
2920 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2921 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
2922 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
2926 // Some CCs need callee pop.
2927 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2928 MF.getTarget().Options.GuaranteedTailCallOpt)) {
2929 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
2930 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
2931 // X86 interrupts must pop the error code if present
2932 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 8 : 4);
2934 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
2935 // If this is an sret function, the return should pop the hidden pointer.
2936 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
2937 !Subtarget->getTargetTriple().isOSMSVCRT() &&
2938 argsAreStructReturn(Ins) == StackStructReturn)
2939 FuncInfo->setBytesToPopOnReturn(4);
2943 // RegSaveFrameIndex is X86-64 only.
2944 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
2945 if (CallConv == CallingConv::X86_FastCall ||
2946 CallConv == CallingConv::X86_ThisCall)
2947 // fastcc functions can't have varargs.
2948 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
2951 FuncInfo->setArgumentStackSize(StackSize);
2953 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
2954 EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
2955 if (Personality == EHPersonality::CoreCLR) {
2957 // TODO: Add a mechanism to frame lowering that will allow us to indicate
2958 // that we'd prefer this slot be allocated towards the bottom of the frame
2959 // (i.e. near the stack pointer after allocating the frame). Every
2960 // funclet needs a copy of this slot in its (mostly empty) frame, and the
2961 // offset from the bottom of this and each funclet's frame must be the
2962 // same, so the size of funclets' (mostly empty) frames is dictated by
2963 // how far this slot is from the bottom (since they allocate just enough
2964 // space to accomodate holding this slot at the correct offset).
2965 int PSPSymFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
2966 EHInfo->PSPSymFrameIdx = PSPSymFI;
2974 X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
2975 SDValue StackPtr, SDValue Arg,
2976 SDLoc dl, SelectionDAG &DAG,
2977 const CCValAssign &VA,
2978 ISD::ArgFlagsTy Flags) const {
2979 unsigned LocMemOffset = VA.getLocMemOffset();
2980 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
2981 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2983 if (Flags.isByVal())
2984 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
2986 return DAG.getStore(
2987 Chain, dl, Arg, PtrOff,
2988 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
2992 /// Emit a load of return address if tail call
2993 /// optimization is performed and it is required.
2995 X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
2996 SDValue &OutRetAddr, SDValue Chain,
2997 bool IsTailCall, bool Is64Bit,
2998 int FPDiff, SDLoc dl) const {
2999 // Adjust the Return address stack slot.
3000 EVT VT = getPointerTy(DAG.getDataLayout());
3001 OutRetAddr = getReturnAddressFrameIndex(DAG);
3003 // Load the "old" Return address.
3004 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
3005 false, false, false, 0);
3006 return SDValue(OutRetAddr.getNode(), 1);
3009 /// Emit a store of the return address if tail call
3010 /// optimization is performed and it is required (FPDiff!=0).
3011 static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3012 SDValue Chain, SDValue RetAddrFrIdx,
3013 EVT PtrVT, unsigned SlotSize,
3014 int FPDiff, SDLoc dl) {
3015 // Store the return address to the appropriate stack slot.
3016 if (!FPDiff) return Chain;
3017 // Calculate the new stack slot for the return address.
3018 int NewReturnAddrFI =
3019 MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3021 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3022 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3023 MachinePointerInfo::getFixedStack(
3024 DAG.getMachineFunction(), NewReturnAddrFI),
3029 /// Returns a vector_shuffle mask for an movs{s|d}, movd
3030 /// operation of specified width.
3031 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
3033 unsigned NumElems = VT.getVectorNumElements();
3034 SmallVector<int, 8> Mask;
3035 Mask.push_back(NumElems);
3036 for (unsigned i = 1; i != NumElems; ++i)
3038 return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
3042 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3043 SmallVectorImpl<SDValue> &InVals) const {
3044 SelectionDAG &DAG = CLI.DAG;
3046 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3047 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3048 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3049 SDValue Chain = CLI.Chain;
3050 SDValue Callee = CLI.Callee;
3051 CallingConv::ID CallConv = CLI.CallConv;
3052 bool &isTailCall = CLI.IsTailCall;
3053 bool isVarArg = CLI.IsVarArg;
3055 MachineFunction &MF = DAG.getMachineFunction();
3056 bool Is64Bit = Subtarget->is64Bit();
3057 bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
3058 StructReturnType SR = callIsStructReturn(Outs);
3059 bool IsSibcall = false;
3060 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3061 auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
3063 if (CallConv == CallingConv::X86_INTR)
3064 report_fatal_error("X86 interrupts may not be called directly");
3066 if (Attr.getValueAsString() == "true")
3069 if (Subtarget->isPICStyleGOT() &&
3070 !MF.getTarget().Options.GuaranteedTailCallOpt) {
3071 // If we are using a GOT, disable tail calls to external symbols with
3072 // default visibility. Tail calling such a symbol requires using a GOT
3073 // relocation, which forces early binding of the symbol. This breaks code
3074 // that require lazy function symbol resolution. Using musttail or
3075 // GuaranteedTailCallOpt will override this.
3076 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3077 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3078 G->getGlobal()->hasDefaultVisibility()))
3082 bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
3084 // Force this to be a tail call. The verifier rules are enough to ensure
3085 // that we can lower this successfully without moving the return address
3088 } else if (isTailCall) {
3089 // Check if it's really possible to do a tail call.
3090 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3091 isVarArg, SR != NotStructReturn,
3092 MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
3093 Outs, OutVals, Ins, DAG);
3095 // Sibcalls are automatically detected tailcalls which do not require
3097 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
3104 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
3105 "Var args not supported with calling convention fastcc, ghc or hipe");
3107 // Analyze operands of the call, assigning locations to each operand.
3108 SmallVector<CCValAssign, 16> ArgLocs;
3109 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3111 // Allocate shadow area for Win64
3113 CCInfo.AllocateStack(32, 8);
3115 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
3117 // Get a count of how many bytes are to be pushed on the stack.
3118 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3120 // This is a sibcall. The memory operands are available in caller's
3121 // own caller's stack.
3123 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
3124 canGuaranteeTCO(CallConv))
3125 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3128 if (isTailCall && !IsSibcall && !IsMustTail) {
3129 // Lower arguments at fp - stackoffset + fpdiff.
3130 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3132 FPDiff = NumBytesCallerPushed - NumBytes;
3134 // Set the delta of movement of the returnaddr stackslot.
3135 // But only set if delta is greater than previous delta.
3136 if (FPDiff < X86Info->getTCReturnAddrDelta())
3137 X86Info->setTCReturnAddrDelta(FPDiff);
3140 unsigned NumBytesToPush = NumBytes;
3141 unsigned NumBytesToPop = NumBytes;
3143 // If we have an inalloca argument, all stack space has already been allocated
3144 // for us and be right at the top of the stack. We don't support multiple
3145 // arguments passed in memory when using inalloca.
3146 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3148 if (!ArgLocs.back().isMemLoc())
3149 report_fatal_error("cannot use inalloca attribute on a register "
3151 if (ArgLocs.back().getLocMemOffset() != 0)
3152 report_fatal_error("any parameter with the inalloca attribute must be "
3153 "the only memory argument");
3157 Chain = DAG.getCALLSEQ_START(
3158 Chain, DAG.getIntPtrConstant(NumBytesToPush, dl, true), dl);
3160 SDValue RetAddrFrIdx;
3161 // Load return address for tail calls.
3162 if (isTailCall && FPDiff)
3163 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3164 Is64Bit, FPDiff, dl);
3166 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3167 SmallVector<SDValue, 8> MemOpChains;
3170 // Walk the register/memloc assignments, inserting copies/loads. In the case
3171 // of tail call optimization arguments are handle later.
3172 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3173 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3174 // Skip inalloca arguments, they have already been written.
3175 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3176 if (Flags.isInAlloca())
3179 CCValAssign &VA = ArgLocs[i];
3180 EVT RegVT = VA.getLocVT();
3181 SDValue Arg = OutVals[i];
3182 bool isByVal = Flags.isByVal();
3184 // Promote the value if needed.
3185 switch (VA.getLocInfo()) {
3186 default: llvm_unreachable("Unknown loc info!");
3187 case CCValAssign::Full: break;
3188 case CCValAssign::SExt:
3189 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3191 case CCValAssign::ZExt:
3192 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3194 case CCValAssign::AExt:
3195 if (Arg.getValueType().isVector() &&
3196 Arg.getValueType().getVectorElementType() == MVT::i1)
3197 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3198 else if (RegVT.is128BitVector()) {
3199 // Special case: passing MMX values in XMM registers.
3200 Arg = DAG.getBitcast(MVT::i64, Arg);
3201 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3202 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3204 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3206 case CCValAssign::BCvt:
3207 Arg = DAG.getBitcast(RegVT, Arg);
3209 case CCValAssign::Indirect: {
3210 // Store the argument.
3211 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3212 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3213 Chain = DAG.getStore(
3214 Chain, dl, Arg, SpillSlot,
3215 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3222 if (VA.isRegLoc()) {
3223 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3224 if (isVarArg && IsWin64) {
3225 // Win64 ABI requires argument XMM reg to be copied to the corresponding
3226 // shadow reg if callee is a varargs function.
3227 unsigned ShadowReg = 0;
3228 switch (VA.getLocReg()) {
3229 case X86::XMM0: ShadowReg = X86::RCX; break;
3230 case X86::XMM1: ShadowReg = X86::RDX; break;
3231 case X86::XMM2: ShadowReg = X86::R8; break;
3232 case X86::XMM3: ShadowReg = X86::R9; break;
3235 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3237 } else if (!IsSibcall && (!isTailCall || isByVal)) {
3238 assert(VA.isMemLoc());
3239 if (!StackPtr.getNode())
3240 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3241 getPointerTy(DAG.getDataLayout()));
3242 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3243 dl, DAG, VA, Flags));
3247 if (!MemOpChains.empty())
3248 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3250 if (Subtarget->isPICStyleGOT()) {
3251 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3254 RegsToPass.push_back(std::make_pair(
3255 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
3256 getPointerTy(DAG.getDataLayout()))));
3258 // If we are tail calling and generating PIC/GOT style code load the
3259 // address of the callee into ECX. The value in ecx is used as target of
3260 // the tail jump. This is done to circumvent the ebx/callee-saved problem
3261 // for tail calls on PIC/GOT architectures. Normally we would just put the
3262 // address of GOT into ebx and then call target@PLT. But for tail calls
3263 // ebx would be restored (since ebx is callee saved) before jumping to the
3266 // Note: The actual moving to ECX is done further down.
3267 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3268 if (G && !G->getGlobal()->hasLocalLinkage() &&
3269 G->getGlobal()->hasDefaultVisibility())
3270 Callee = LowerGlobalAddress(Callee, DAG);
3271 else if (isa<ExternalSymbolSDNode>(Callee))
3272 Callee = LowerExternalSymbol(Callee, DAG);