1 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the interfaces that X86 uses to lower LLVM code into a
13 //===----------------------------------------------------------------------===//
15 #include "X86ISelLowering.h"
16 #include "Utils/X86ShuffleDecode.h"
17 #include "X86CallingConv.h"
18 #include "X86InstrBuilder.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86TargetMachine.h"
21 #include "X86TargetObjectFile.h"
22 #include "llvm/ADT/SmallBitVector.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/VariadicFunction.h"
28 #include "llvm/CodeGen/IntrinsicLowering.h"
29 #include "llvm/CodeGen/MachineFrameInfo.h"
30 #include "llvm/CodeGen/MachineFunction.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineJumpTableInfo.h"
33 #include "llvm/CodeGen/MachineModuleInfo.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/IR/CallSite.h"
36 #include "llvm/IR/CallingConv.h"
37 #include "llvm/IR/Constants.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/GlobalAlias.h"
41 #include "llvm/IR/GlobalVariable.h"
42 #include "llvm/IR/Instructions.h"
43 #include "llvm/IR/Intrinsics.h"
44 #include "llvm/MC/MCAsmInfo.h"
45 #include "llvm/MC/MCContext.h"
46 #include "llvm/MC/MCExpr.h"
47 #include "llvm/MC/MCSymbol.h"
48 #include "llvm/Support/CommandLine.h"
49 #include "llvm/Support/Debug.h"
50 #include "llvm/Support/ErrorHandling.h"
51 #include "llvm/Support/MathExtras.h"
52 #include "llvm/Target/TargetOptions.h"
53 #include "X86IntrinsicsInfo.h"
59 #define DEBUG_TYPE "x86-isel"
61 STATISTIC(NumTailCalls, "Number of tail calls");
63 static cl::opt<bool> ExperimentalVectorWideningLegalization(
64 "x86-experimental-vector-widening-legalization", cl::init(false),
65 cl::desc("Enable an experimental vector type legalization through widening "
66 "rather than promotion."),
69 static cl::opt<bool> ExperimentalVectorShuffleLowering(
70 "x86-experimental-vector-shuffle-lowering", cl::init(true),
71 cl::desc("Enable an experimental vector shuffle lowering code path."),
74 // Forward declarations.
75 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
78 static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
79 SelectionDAG &DAG, SDLoc dl,
80 unsigned vectorWidth) {
81 assert((vectorWidth == 128 || vectorWidth == 256) &&
82 "Unsupported vector width");
83 EVT VT = Vec.getValueType();
84 EVT ElVT = VT.getVectorElementType();
85 unsigned Factor = VT.getSizeInBits()/vectorWidth;
86 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
87 VT.getVectorNumElements()/Factor);
89 // Extract from UNDEF is UNDEF.
90 if (Vec.getOpcode() == ISD::UNDEF)
91 return DAG.getUNDEF(ResultVT);
93 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
94 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
96 // This is the index of the first element of the vectorWidth-bit chunk
98 unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
101 // If the input is a buildvector just emit a smaller one.
102 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
103 return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
104 makeArrayRef(Vec->op_begin()+NormalizedIdxVal,
107 SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
108 SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
114 /// Generate a DAG to grab 128-bits from a vector > 128 bits. This
115 /// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
116 /// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
117 /// instructions or a simple subregister reference. Idx is an index in the
118 /// 128 bits we want. It need not be aligned to a 128-bit bounday. That makes
119 /// lowering EXTRACT_VECTOR_ELT operations easier.
120 static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
121 SelectionDAG &DAG, SDLoc dl) {
122 assert((Vec.getValueType().is256BitVector() ||
123 Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
124 return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
127 /// Generate a DAG to grab 256-bits from a 512-bit vector.
128 static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
129 SelectionDAG &DAG, SDLoc dl) {
130 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
131 return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
134 static SDValue InsertSubVector(SDValue Result, SDValue Vec,
135 unsigned IdxVal, SelectionDAG &DAG,
136 SDLoc dl, unsigned vectorWidth) {
137 assert((vectorWidth == 128 || vectorWidth == 256) &&
138 "Unsupported vector width");
139 // Inserting UNDEF is Result
140 if (Vec.getOpcode() == ISD::UNDEF)
142 EVT VT = Vec.getValueType();
143 EVT ElVT = VT.getVectorElementType();
144 EVT ResultVT = Result.getValueType();
146 // Insert the relevant vectorWidth bits.
147 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
149 // This is the index of the first element of the vectorWidth-bit chunk
151 unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
154 SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
155 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
158 /// Generate a DAG to put 128-bits into a vector > 128 bits. This
159 /// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
160 /// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
161 /// simple superregister reference. Idx is an index in the 128 bits
162 /// we want. It need not be aligned to a 128-bit bounday. That makes
163 /// lowering INSERT_VECTOR_ELT operations easier.
164 static SDValue Insert128BitVector(SDValue Result, SDValue Vec,
165 unsigned IdxVal, SelectionDAG &DAG,
167 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
168 return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
171 static SDValue Insert256BitVector(SDValue Result, SDValue Vec,
172 unsigned IdxVal, SelectionDAG &DAG,
174 assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
175 return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
178 /// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
179 /// instructions. This is used because creating CONCAT_VECTOR nodes of
180 /// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
181 /// large BUILD_VECTORS.
182 static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
183 unsigned NumElems, SelectionDAG &DAG,
185 SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
186 return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
189 static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
190 unsigned NumElems, SelectionDAG &DAG,
192 SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
193 return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
196 static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
197 if (TT.isOSBinFormatMachO()) {
198 if (TT.getArch() == Triple::x86_64)
199 return new X86_64MachoTargetObjectFile();
200 return new TargetLoweringObjectFileMachO();
204 return new X86LinuxTargetObjectFile();
205 if (TT.isOSBinFormatELF())
206 return new TargetLoweringObjectFileELF();
207 if (TT.isKnownWindowsMSVCEnvironment())
208 return new X86WindowsTargetObjectFile();
209 if (TT.isOSBinFormatCOFF())
210 return new TargetLoweringObjectFileCOFF();
211 llvm_unreachable("unknown subtarget type");
214 // FIXME: This should stop caching the target machine as soon as
215 // we can remove resetOperationActions et al.
216 X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM)
217 : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
218 Subtarget = &TM.getSubtarget<X86Subtarget>();
219 X86ScalarSSEf64 = Subtarget->hasSSE2();
220 X86ScalarSSEf32 = Subtarget->hasSSE1();
221 TD = getDataLayout();
223 resetOperationActions();
226 void X86TargetLowering::resetOperationActions() {
227 const TargetMachine &TM = getTargetMachine();
228 static bool FirstTimeThrough = true;
230 // If none of the target options have changed, then we don't need to reset the
231 // operation actions.
232 if (!FirstTimeThrough && TO == TM.Options) return;
234 if (!FirstTimeThrough) {
235 // Reinitialize the actions.
237 FirstTimeThrough = false;
242 // Set up the TargetLowering object.
243 static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
245 // X86 is weird, it always uses i8 for shift amounts and setcc results.
246 setBooleanContents(ZeroOrOneBooleanContent);
247 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
248 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
250 // For 64-bit since we have so many registers use the ILP scheduler, for
251 // 32-bit code use the register pressure specific scheduling.
252 // For Atom, always use ILP scheduling.
253 if (Subtarget->isAtom())
254 setSchedulingPreference(Sched::ILP);
255 else if (Subtarget->is64Bit())
256 setSchedulingPreference(Sched::ILP);
258 setSchedulingPreference(Sched::RegPressure);
259 const X86RegisterInfo *RegInfo =
260 TM.getSubtarget<X86Subtarget>().getRegisterInfo();
261 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
263 // Bypass expensive divides on Atom when compiling with O2
264 if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
265 addBypassSlowDiv(32, 8);
266 if (Subtarget->is64Bit())
267 addBypassSlowDiv(64, 16);
270 if (Subtarget->isTargetKnownWindowsMSVC()) {
271 // Setup Windows compiler runtime calls.
272 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
273 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
274 setLibcallName(RTLIB::SREM_I64, "_allrem");
275 setLibcallName(RTLIB::UREM_I64, "_aullrem");
276 setLibcallName(RTLIB::MUL_I64, "_allmul");
277 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
278 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
279 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
280 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
281 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
283 // The _ftol2 runtime function has an unusual calling conv, which
284 // is modeled by a special pseudo-instruction.
285 setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr);
286 setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr);
287 setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr);
288 setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr);
291 if (Subtarget->isTargetDarwin()) {
292 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
293 setUseUnderscoreSetJmp(false);
294 setUseUnderscoreLongJmp(false);
295 } else if (Subtarget->isTargetWindowsGNU()) {
296 // MS runtime is weird: it exports _setjmp, but longjmp!
297 setUseUnderscoreSetJmp(true);
298 setUseUnderscoreLongJmp(false);
300 setUseUnderscoreSetJmp(true);
301 setUseUnderscoreLongJmp(true);
304 // Set up the register classes.
305 addRegisterClass(MVT::i8, &X86::GR8RegClass);
306 addRegisterClass(MVT::i16, &X86::GR16RegClass);
307 addRegisterClass(MVT::i32, &X86::GR32RegClass);
308 if (Subtarget->is64Bit())
309 addRegisterClass(MVT::i64, &X86::GR64RegClass);
311 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
313 // We don't accept any truncstore of integer registers.
314 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
315 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
316 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
317 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
318 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
319 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
321 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
323 // SETOEQ and SETUNE require checking two conditions.
324 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
325 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
326 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
327 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
328 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
329 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
331 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
333 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
334 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
335 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
337 if (Subtarget->is64Bit()) {
338 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
339 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
340 } else if (!TM.Options.UseSoftFloat) {
341 // We have an algorithm for SSE2->double, and we turn this into a
342 // 64-bit FILD followed by conditional FADD for other targets.
343 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
344 // We have an algorithm for SSE2, and we turn this into a 64-bit
345 // FILD for other targets.
346 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
349 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
351 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
352 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
354 if (!TM.Options.UseSoftFloat) {
355 // SSE has no i16 to fp conversion, only i32
356 if (X86ScalarSSEf32) {
357 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
358 // f32 and f64 cases are Legal, f80 case is not
359 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
361 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
362 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
365 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
366 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
369 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
370 // are Legal, f80 is custom lowered.
371 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
372 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
374 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
376 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
377 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
379 if (X86ScalarSSEf32) {
380 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
381 // f32 and f64 cases are Legal, f80 case is not
382 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
384 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
385 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
388 // Handle FP_TO_UINT by promoting the destination to a larger signed
390 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
391 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
392 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
394 if (Subtarget->is64Bit()) {
395 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
396 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
397 } else if (!TM.Options.UseSoftFloat) {
398 // Since AVX is a superset of SSE3, only check for SSE here.
399 if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
400 // Expand FP_TO_UINT into a select.
401 // FIXME: We would like to use a Custom expander here eventually to do
402 // the optimal thing for SSE vs. the default expansion in the legalizer.
403 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
405 // With SSE3 we can use fisttpll to convert to a signed i64; without
406 // SSE, we're stuck with a fistpll.
407 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
410 if (isTargetFTOL()) {
411 // Use the _ftol2 runtime function, which has a pseudo-instruction
412 // to handle its weird calling convention.
413 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
416 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
417 if (!X86ScalarSSEf64) {
418 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
419 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
420 if (Subtarget->is64Bit()) {
421 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
422 // Without SSE, i64->f64 goes through memory.
423 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
427 // Scalar integer divide and remainder are lowered to use operations that
428 // produce two results, to match the available instructions. This exposes
429 // the two-result form to trivial CSE, which is able to combine x/y and x%y
430 // into a single instruction.
432 // Scalar integer multiply-high is also lowered to use two-result
433 // operations, to match the available instructions. However, plain multiply
434 // (low) operations are left as Legal, as there are single-result
435 // instructions for this in x86. Using the two-result multiply instructions
436 // when both high and low results are needed must be arranged by dagcombine.
437 for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
439 setOperationAction(ISD::MULHS, VT, Expand);
440 setOperationAction(ISD::MULHU, VT, Expand);
441 setOperationAction(ISD::SDIV, VT, Expand);
442 setOperationAction(ISD::UDIV, VT, Expand);
443 setOperationAction(ISD::SREM, VT, Expand);
444 setOperationAction(ISD::UREM, VT, Expand);
446 // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
447 setOperationAction(ISD::ADDC, VT, Custom);
448 setOperationAction(ISD::ADDE, VT, Custom);
449 setOperationAction(ISD::SUBC, VT, Custom);
450 setOperationAction(ISD::SUBE, VT, Custom);
453 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
454 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
455 setOperationAction(ISD::BR_CC , MVT::f32, Expand);
456 setOperationAction(ISD::BR_CC , MVT::f64, Expand);
457 setOperationAction(ISD::BR_CC , MVT::f80, Expand);
458 setOperationAction(ISD::BR_CC , MVT::i8, Expand);
459 setOperationAction(ISD::BR_CC , MVT::i16, Expand);
460 setOperationAction(ISD::BR_CC , MVT::i32, Expand);
461 setOperationAction(ISD::BR_CC , MVT::i64, Expand);
462 setOperationAction(ISD::SELECT_CC , MVT::f32, Expand);
463 setOperationAction(ISD::SELECT_CC , MVT::f64, Expand);
464 setOperationAction(ISD::SELECT_CC , MVT::f80, Expand);
465 setOperationAction(ISD::SELECT_CC , MVT::i8, Expand);
466 setOperationAction(ISD::SELECT_CC , MVT::i16, Expand);
467 setOperationAction(ISD::SELECT_CC , MVT::i32, Expand);
468 setOperationAction(ISD::SELECT_CC , MVT::i64, Expand);
469 if (Subtarget->is64Bit())
470 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
471 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
472 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
473 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
474 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
475 setOperationAction(ISD::FREM , MVT::f32 , Expand);
476 setOperationAction(ISD::FREM , MVT::f64 , Expand);
477 setOperationAction(ISD::FREM , MVT::f80 , Expand);
478 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
480 // Promote the i8 variants and force them on up to i32 which has a shorter
482 setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
483 AddPromotedToType (ISD::CTTZ , MVT::i8 , MVT::i32);
484 setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Promote);
485 AddPromotedToType (ISD::CTTZ_ZERO_UNDEF , MVT::i8 , MVT::i32);
486 if (Subtarget->hasBMI()) {
487 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Expand);
488 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Expand);
489 if (Subtarget->is64Bit())
490 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
492 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
493 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
494 if (Subtarget->is64Bit())
495 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
498 if (Subtarget->hasLZCNT()) {
499 // When promoting the i8 variants, force them to i32 for a shorter
501 setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
502 AddPromotedToType (ISD::CTLZ , MVT::i8 , MVT::i32);
503 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Promote);
504 AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
505 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand);
506 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand);
507 if (Subtarget->is64Bit())
508 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
510 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
511 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
512 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
513 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
514 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
515 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
516 if (Subtarget->is64Bit()) {
517 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
518 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
522 // Special handling for half-precision floating point conversions.
523 // If we don't have F16C support, then lower half float conversions
524 // into library calls.
525 if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) {
526 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
527 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
530 // There's never any support for operations beyond MVT::f32.
531 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
532 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
533 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
534 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
536 setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
537 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
538 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
539 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
541 if (Subtarget->hasPOPCNT()) {
542 setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
544 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
545 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
546 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
547 if (Subtarget->is64Bit())
548 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
551 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
553 if (!Subtarget->hasMOVBE())
554 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
556 // These should be promoted to a larger select which is supported.
557 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
558 // X86 wants to expand cmov itself.
559 setOperationAction(ISD::SELECT , MVT::i8 , Custom);
560 setOperationAction(ISD::SELECT , MVT::i16 , Custom);
561 setOperationAction(ISD::SELECT , MVT::i32 , Custom);
562 setOperationAction(ISD::SELECT , MVT::f32 , Custom);
563 setOperationAction(ISD::SELECT , MVT::f64 , Custom);
564 setOperationAction(ISD::SELECT , MVT::f80 , Custom);
565 setOperationAction(ISD::SETCC , MVT::i8 , Custom);
566 setOperationAction(ISD::SETCC , MVT::i16 , Custom);
567 setOperationAction(ISD::SETCC , MVT::i32 , Custom);
568 setOperationAction(ISD::SETCC , MVT::f32 , Custom);
569 setOperationAction(ISD::SETCC , MVT::f64 , Custom);
570 setOperationAction(ISD::SETCC , MVT::f80 , Custom);
571 if (Subtarget->is64Bit()) {
572 setOperationAction(ISD::SELECT , MVT::i64 , Custom);
573 setOperationAction(ISD::SETCC , MVT::i64 , Custom);
575 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
576 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
577 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
578 // support continuation, user-level threading, and etc.. As a result, no
579 // other SjLj exception interfaces are implemented and please don't build
580 // your own exception handling based on them.
581 // LLVM/Clang supports zero-cost DWARF exception handling.
582 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
583 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
586 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
587 setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
588 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
589 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
590 if (Subtarget->is64Bit())
591 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
592 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
593 setOperationAction(ISD::BlockAddress , MVT::i32 , Custom);
594 if (Subtarget->is64Bit()) {
595 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
596 setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
597 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
598 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
599 setOperationAction(ISD::BlockAddress , MVT::i64 , Custom);
601 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
602 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
603 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
604 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
605 if (Subtarget->is64Bit()) {
606 setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom);
607 setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
608 setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
611 if (Subtarget->hasSSE1())
612 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
614 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
616 // Expand certain atomics
617 for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
619 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
620 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
621 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
624 if (Subtarget->hasCmpxchg16b()) {
625 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
628 // FIXME - use subtarget debug flags
629 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
630 !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
631 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
634 if (Subtarget->is64Bit()) {
635 setExceptionPointerRegister(X86::RAX);
636 setExceptionSelectorRegister(X86::RDX);
638 setExceptionPointerRegister(X86::EAX);
639 setExceptionSelectorRegister(X86::EDX);
641 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
642 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
644 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
645 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
647 setOperationAction(ISD::TRAP, MVT::Other, Legal);
648 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
650 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
651 setOperationAction(ISD::VASTART , MVT::Other, Custom);
652 setOperationAction(ISD::VAEND , MVT::Other, Expand);
653 if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) {
654 // TargetInfo::X86_64ABIBuiltinVaList
655 setOperationAction(ISD::VAARG , MVT::Other, Custom);
656 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
658 // TargetInfo::CharPtrBuiltinVaList
659 setOperationAction(ISD::VAARG , MVT::Other, Expand);
660 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
663 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
664 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
666 setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom);
668 if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
669 // f32 and f64 use SSE.
670 // Set up the FP register classes.
671 addRegisterClass(MVT::f32, &X86::FR32RegClass);
672 addRegisterClass(MVT::f64, &X86::FR64RegClass);
674 // Use ANDPD to simulate FABS.
675 setOperationAction(ISD::FABS , MVT::f64, Custom);
676 setOperationAction(ISD::FABS , MVT::f32, Custom);
678 // Use XORP to simulate FNEG.
679 setOperationAction(ISD::FNEG , MVT::f64, Custom);
680 setOperationAction(ISD::FNEG , MVT::f32, Custom);
682 // Use ANDPD and ORPD to simulate FCOPYSIGN.
683 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
684 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
686 // Lower this to FGETSIGNx86 plus an AND.
687 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
688 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
690 // We don't support sin/cos/fmod
691 setOperationAction(ISD::FSIN , MVT::f64, Expand);
692 setOperationAction(ISD::FCOS , MVT::f64, Expand);
693 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
694 setOperationAction(ISD::FSIN , MVT::f32, Expand);
695 setOperationAction(ISD::FCOS , MVT::f32, Expand);
696 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
698 // Expand FP immediates into loads from the stack, except for the special
700 addLegalFPImmediate(APFloat(+0.0)); // xorpd
701 addLegalFPImmediate(APFloat(+0.0f)); // xorps
702 } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) {
703 // Use SSE for f32, x87 for f64.
704 // Set up the FP register classes.
705 addRegisterClass(MVT::f32, &X86::FR32RegClass);
706 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
708 // Use ANDPS to simulate FABS.
709 setOperationAction(ISD::FABS , MVT::f32, Custom);
711 // Use XORP to simulate FNEG.
712 setOperationAction(ISD::FNEG , MVT::f32, Custom);
714 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
716 // Use ANDPS and ORPS to simulate FCOPYSIGN.
717 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
718 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
720 // We don't support sin/cos/fmod
721 setOperationAction(ISD::FSIN , MVT::f32, Expand);
722 setOperationAction(ISD::FCOS , MVT::f32, Expand);
723 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
725 // Special cases we handle for FP constants.
726 addLegalFPImmediate(APFloat(+0.0f)); // xorps
727 addLegalFPImmediate(APFloat(+0.0)); // FLD0
728 addLegalFPImmediate(APFloat(+1.0)); // FLD1
729 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
730 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
732 if (!TM.Options.UnsafeFPMath) {
733 setOperationAction(ISD::FSIN , MVT::f64, Expand);
734 setOperationAction(ISD::FCOS , MVT::f64, Expand);
735 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
737 } else if (!TM.Options.UseSoftFloat) {
738 // f32 and f64 in x87.
739 // Set up the FP register classes.
740 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
741 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
743 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
744 setOperationAction(ISD::UNDEF, MVT::f32, Expand);
745 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
746 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
748 if (!TM.Options.UnsafeFPMath) {
749 setOperationAction(ISD::FSIN , MVT::f64, Expand);
750 setOperationAction(ISD::FSIN , MVT::f32, Expand);
751 setOperationAction(ISD::FCOS , MVT::f64, Expand);
752 setOperationAction(ISD::FCOS , MVT::f32, Expand);
753 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
754 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
756 addLegalFPImmediate(APFloat(+0.0)); // FLD0
757 addLegalFPImmediate(APFloat(+1.0)); // FLD1
758 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
759 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
760 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
761 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
762 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
763 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
766 // We don't support FMA.
767 setOperationAction(ISD::FMA, MVT::f64, Expand);
768 setOperationAction(ISD::FMA, MVT::f32, Expand);
770 // Long double always uses X87.
771 if (!TM.Options.UseSoftFloat) {
772 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
773 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
774 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
776 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
777 addLegalFPImmediate(TmpFlt); // FLD0
779 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
782 APFloat TmpFlt2(+1.0);
783 TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
785 addLegalFPImmediate(TmpFlt2); // FLD1
786 TmpFlt2.changeSign();
787 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
790 if (!TM.Options.UnsafeFPMath) {
791 setOperationAction(ISD::FSIN , MVT::f80, Expand);
792 setOperationAction(ISD::FCOS , MVT::f80, Expand);
793 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
796 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
797 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
798 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
799 setOperationAction(ISD::FRINT, MVT::f80, Expand);
800 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
801 setOperationAction(ISD::FMA, MVT::f80, Expand);
804 // Always use a library call for pow.
805 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
806 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
807 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
809 setOperationAction(ISD::FLOG, MVT::f80, Expand);
810 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
811 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
812 setOperationAction(ISD::FEXP, MVT::f80, Expand);
813 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
814 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
815 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
817 // First set operation action for all vector types to either promote
818 // (for widening) or expand (for scalarization). Then we will selectively
819 // turn on ones that can be effectively codegen'd.
820 for (int i = MVT::FIRST_VECTOR_VALUETYPE;
821 i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
822 MVT VT = (MVT::SimpleValueType)i;
823 setOperationAction(ISD::ADD , VT, Expand);
824 setOperationAction(ISD::SUB , VT, Expand);
825 setOperationAction(ISD::FADD, VT, Expand);
826 setOperationAction(ISD::FNEG, VT, Expand);
827 setOperationAction(ISD::FSUB, VT, Expand);
828 setOperationAction(ISD::MUL , VT, Expand);
829 setOperationAction(ISD::FMUL, VT, Expand);
830 setOperationAction(ISD::SDIV, VT, Expand);
831 setOperationAction(ISD::UDIV, VT, Expand);
832 setOperationAction(ISD::FDIV, VT, Expand);
833 setOperationAction(ISD::SREM, VT, Expand);
834 setOperationAction(ISD::UREM, VT, Expand);
835 setOperationAction(ISD::LOAD, VT, Expand);
836 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
837 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
838 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
839 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
840 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
841 setOperationAction(ISD::FABS, VT, Expand);
842 setOperationAction(ISD::FSIN, VT, Expand);
843 setOperationAction(ISD::FSINCOS, VT, Expand);
844 setOperationAction(ISD::FCOS, VT, Expand);
845 setOperationAction(ISD::FSINCOS, VT, Expand);
846 setOperationAction(ISD::FREM, VT, Expand);
847 setOperationAction(ISD::FMA, VT, Expand);
848 setOperationAction(ISD::FPOWI, VT, Expand);
849 setOperationAction(ISD::FSQRT, VT, Expand);
850 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
851 setOperationAction(ISD::FFLOOR, VT, Expand);
852 setOperationAction(ISD::FCEIL, VT, Expand);
853 setOperationAction(ISD::FTRUNC, VT, Expand);
854 setOperationAction(ISD::FRINT, VT, Expand);
855 setOperationAction(ISD::FNEARBYINT, VT, Expand);
856 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
857 setOperationAction(ISD::MULHS, VT, Expand);
858 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
859 setOperationAction(ISD::MULHU, VT, Expand);
860 setOperationAction(ISD::SDIVREM, VT, Expand);
861 setOperationAction(ISD::UDIVREM, VT, Expand);
862 setOperationAction(ISD::FPOW, VT, Expand);
863 setOperationAction(ISD::CTPOP, VT, Expand);
864 setOperationAction(ISD::CTTZ, VT, Expand);
865 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
866 setOperationAction(ISD::CTLZ, VT, Expand);
867 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
868 setOperationAction(ISD::SHL, VT, Expand);
869 setOperationAction(ISD::SRA, VT, Expand);
870 setOperationAction(ISD::SRL, VT, Expand);
871 setOperationAction(ISD::ROTL, VT, Expand);
872 setOperationAction(ISD::ROTR, VT, Expand);
873 setOperationAction(ISD::BSWAP, VT, Expand);
874 setOperationAction(ISD::SETCC, VT, Expand);
875 setOperationAction(ISD::FLOG, VT, Expand);
876 setOperationAction(ISD::FLOG2, VT, Expand);
877 setOperationAction(ISD::FLOG10, VT, Expand);
878 setOperationAction(ISD::FEXP, VT, Expand);
879 setOperationAction(ISD::FEXP2, VT, Expand);
880 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
881 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
882 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
883 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
884 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
885 setOperationAction(ISD::TRUNCATE, VT, Expand);
886 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
887 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
888 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
889 setOperationAction(ISD::VSELECT, VT, Expand);
890 setOperationAction(ISD::SELECT_CC, VT, Expand);
891 for (int InnerVT = MVT::FIRST_VECTOR_VALUETYPE;
892 InnerVT <= MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
893 setTruncStoreAction(VT,
894 (MVT::SimpleValueType)InnerVT, Expand);
895 setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
896 setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
898 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like types,
899 // we have to deal with them whether we ask for Expansion or not. Setting
900 // Expand causes its own optimisation problems though, so leave them legal.
901 if (VT.getVectorElementType() == MVT::i1)
902 setLoadExtAction(ISD::EXTLOAD, VT, Expand);
905 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
906 // with -msoft-float, disable use of MMX as well.
907 if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) {
908 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
909 // No operations on x86mmx supported, everything uses intrinsics.
912 // MMX-sized vectors (other than x86mmx) are expected to be expanded
913 // into smaller operations.
914 setOperationAction(ISD::MULHS, MVT::v8i8, Expand);
915 setOperationAction(ISD::MULHS, MVT::v4i16, Expand);
916 setOperationAction(ISD::MULHS, MVT::v2i32, Expand);
917 setOperationAction(ISD::MULHS, MVT::v1i64, Expand);
918 setOperationAction(ISD::AND, MVT::v8i8, Expand);
919 setOperationAction(ISD::AND, MVT::v4i16, Expand);
920 setOperationAction(ISD::AND, MVT::v2i32, Expand);
921 setOperationAction(ISD::AND, MVT::v1i64, Expand);
922 setOperationAction(ISD::OR, MVT::v8i8, Expand);
923 setOperationAction(ISD::OR, MVT::v4i16, Expand);
924 setOperationAction(ISD::OR, MVT::v2i32, Expand);
925 setOperationAction(ISD::OR, MVT::v1i64, Expand);
926 setOperationAction(ISD::XOR, MVT::v8i8, Expand);
927 setOperationAction(ISD::XOR, MVT::v4i16, Expand);
928 setOperationAction(ISD::XOR, MVT::v2i32, Expand);
929 setOperationAction(ISD::XOR, MVT::v1i64, Expand);
930 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand);
931 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand);
932 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand);
933 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand);
934 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
935 setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
936 setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
937 setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
938 setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
939 setOperationAction(ISD::BITCAST, MVT::v8i8, Expand);
940 setOperationAction(ISD::BITCAST, MVT::v4i16, Expand);
941 setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
942 setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
944 if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) {
945 addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
947 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
948 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
949 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
950 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
951 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
952 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
953 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
954 setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
955 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
956 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
957 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
958 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
961 if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
962 addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
964 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
965 // registers cannot be used even for integer operations.
966 addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
967 addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
968 addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
969 addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
971 setOperationAction(ISD::ADD, MVT::v16i8, Legal);
972 setOperationAction(ISD::ADD, MVT::v8i16, Legal);
973 setOperationAction(ISD::ADD, MVT::v4i32, Legal);
974 setOperationAction(ISD::ADD, MVT::v2i64, Legal);
975 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
976 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
977 setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
978 setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
979 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
980 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
981 setOperationAction(ISD::SUB, MVT::v16i8, Legal);
982 setOperationAction(ISD::SUB, MVT::v8i16, Legal);
983 setOperationAction(ISD::SUB, MVT::v4i32, Legal);
984 setOperationAction(ISD::SUB, MVT::v2i64, Legal);
985 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
986 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
987 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
988 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
989 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
990 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
991 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
992 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
994 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
995 setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
996 setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
997 setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
999 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
1000 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
1001 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1002 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1003 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1005 // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
1006 for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
1007 MVT VT = (MVT::SimpleValueType)i;
1008 // Do not attempt to custom lower non-power-of-2 vectors
1009 if (!isPowerOf2_32(VT.getVectorNumElements()))
1011 // Do not attempt to custom lower non-128-bit vectors
1012 if (!VT.is128BitVector())
1014 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1015 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1016 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1019 // We support custom legalizing of sext and anyext loads for specific
1020 // memory vector types which we can load as a scalar (or sequence of
1021 // scalars) and extend in-register to a legal 128-bit vector type. For sext
1022 // loads these must work with a single scalar load.
1023 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Custom);
1024 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Custom);
1025 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i8, Custom);
1026 setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Custom);
1027 setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Custom);
1028 setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, Custom);
1029 setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
1030 setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Custom);
1031 setLoadExtAction(ISD::EXTLOAD, MVT::v8i8, Custom);
1033 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1034 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1035 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
1036 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
1037 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
1038 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
1040 if (Subtarget->is64Bit()) {
1041 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
1042 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
1045 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
1046 for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
1047 MVT VT = (MVT::SimpleValueType)i;
1049 // Do not attempt to promote non-128-bit vectors
1050 if (!VT.is128BitVector())
1053 setOperationAction(ISD::AND, VT, Promote);
1054 AddPromotedToType (ISD::AND, VT, MVT::v2i64);
1055 setOperationAction(ISD::OR, VT, Promote);
1056 AddPromotedToType (ISD::OR, VT, MVT::v2i64);
1057 setOperationAction(ISD::XOR, VT, Promote);
1058 AddPromotedToType (ISD::XOR, VT, MVT::v2i64);
1059 setOperationAction(ISD::LOAD, VT, Promote);
1060 AddPromotedToType (ISD::LOAD, VT, MVT::v2i64);
1061 setOperationAction(ISD::SELECT, VT, Promote);
1062 AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
1065 // Custom lower v2i64 and v2f64 selects.
1066 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1067 setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
1068 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
1069 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
1071 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
1072 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1074 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1075 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1076 // As there is no 64-bit GPR available, we need build a special custom
1077 // sequence to convert from v2i32 to v2f32.
1078 if (!Subtarget->is64Bit())
1079 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
1081 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1082 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
1084 setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
1086 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1087 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1088 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1091 if (!TM.Options.UseSoftFloat && Subtarget->hasSSE41()) {
1092 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
1093 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
1094 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
1095 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1096 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1097 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
1098 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
1099 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
1100 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1101 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1103 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
1104 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
1105 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
1106 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1107 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
1108 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1109 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1110 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1111 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1112 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
1114 // FIXME: Do we need to handle scalar-to-vector here?
1115 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1117 setOperationAction(ISD::VSELECT, MVT::v2f64, Custom);
1118 setOperationAction(ISD::VSELECT, MVT::v2i64, Custom);
1119 setOperationAction(ISD::VSELECT, MVT::v4i32, Custom);
1120 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
1121 setOperationAction(ISD::VSELECT, MVT::v8i16, Custom);
1122 // There is no BLENDI for byte vectors. We don't need to custom lower
1123 // some vselects for now.
1124 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1126 // SSE41 brings specific instructions for doing vector sign extend even in
1127 // cases where we don't have SRA.
1128 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Custom);
1129 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Custom);
1130 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, Custom);
1132 // i8 and i16 vectors are custom because the source register and source
1133 // source memory operand types are not the same width. f32 vectors are
1134 // custom since the immediate controlling the insert encodes additional
1136 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1137 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1138 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1139 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1141 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
1142 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
1143 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
1144 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
1146 // FIXME: these should be Legal, but that's only for the case where
1147 // the index is constant. For now custom expand to deal with that.
1148 if (Subtarget->is64Bit()) {
1149 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
1150 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
1154 if (Subtarget->hasSSE2()) {
1155 setOperationAction(ISD::SRL, MVT::v8i16, Custom);
1156 setOperationAction(ISD::SRL, MVT::v16i8, Custom);
1158 setOperationAction(ISD::SHL, MVT::v8i16, Custom);
1159 setOperationAction(ISD::SHL, MVT::v16i8, Custom);
1161 setOperationAction(ISD::SRA, MVT::v8i16, Custom);
1162 setOperationAction(ISD::SRA, MVT::v16i8, Custom);
1164 // In the customized shift lowering, the legal cases in AVX2 will be
1166 setOperationAction(ISD::SRL, MVT::v2i64, Custom);
1167 setOperationAction(ISD::SRL, MVT::v4i32, Custom);
1169 setOperationAction(ISD::SHL, MVT::v2i64, Custom);
1170 setOperationAction(ISD::SHL, MVT::v4i32, Custom);
1172 setOperationAction(ISD::SRA, MVT::v4i32, Custom);
1175 if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
1176 addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
1177 addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
1178 addRegisterClass(MVT::v8i32, &X86::VR256RegClass);
1179 addRegisterClass(MVT::v8f32, &X86::VR256RegClass);
1180 addRegisterClass(MVT::v4i64, &X86::VR256RegClass);
1181 addRegisterClass(MVT::v4f64, &X86::VR256RegClass);
1183 setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
1184 setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
1185 setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
1187 setOperationAction(ISD::FADD, MVT::v8f32, Legal);
1188 setOperationAction(ISD::FSUB, MVT::v8f32, Legal);
1189 setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
1190 setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
1191 setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
1192 setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
1193 setOperationAction(ISD::FCEIL, MVT::v8f32, Legal);
1194 setOperationAction(ISD::FTRUNC, MVT::v8f32, Legal);
1195 setOperationAction(ISD::FRINT, MVT::v8f32, Legal);
1196 setOperationAction(ISD::FNEARBYINT, MVT::v8f32, Legal);
1197 setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
1198 setOperationAction(ISD::FABS, MVT::v8f32, Custom);
1200 setOperationAction(ISD::FADD, MVT::v4f64, Legal);
1201 setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
1202 setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
1203 setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
1204 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
1205 setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
1206 setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
1207 setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
1208 setOperationAction(ISD::FRINT, MVT::v4f64, Legal);
1209 setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Legal);
1210 setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
1211 setOperationAction(ISD::FABS, MVT::v4f64, Custom);
1213 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1214 // even though v8i16 is a legal type.
1215 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote);
1216 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote);
1217 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1219 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
1220 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1221 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1223 setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
1224 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1226 setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, Legal);
1228 setOperationAction(ISD::SRL, MVT::v16i16, Custom);
1229 setOperationAction(ISD::SRL, MVT::v32i8, Custom);
1231 setOperationAction(ISD::SHL, MVT::v16i16, Custom);
1232 setOperationAction(ISD::SHL, MVT::v32i8, Custom);
1234 setOperationAction(ISD::SRA, MVT::v16i16, Custom);
1235 setOperationAction(ISD::SRA, MVT::v32i8, Custom);
1237 setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
1238 setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
1239 setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
1240 setOperationAction(ISD::SETCC, MVT::v4i64, Custom);
1242 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1243 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1244 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1246 setOperationAction(ISD::VSELECT, MVT::v4f64, Custom);
1247 setOperationAction(ISD::VSELECT, MVT::v4i64, Custom);
1248 setOperationAction(ISD::VSELECT, MVT::v8i32, Custom);
1249 setOperationAction(ISD::VSELECT, MVT::v8f32, Custom);
1251 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1252 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
1253 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1254 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
1255 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
1256 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
1257 setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
1258 setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
1259 setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom);
1260 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1261 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1262 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1264 if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
1265 setOperationAction(ISD::FMA, MVT::v8f32, Legal);
1266 setOperationAction(ISD::FMA, MVT::v4f64, Legal);
1267 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
1268 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1269 setOperationAction(ISD::FMA, MVT::f32, Legal);
1270 setOperationAction(ISD::FMA, MVT::f64, Legal);
1273 if (Subtarget->hasInt256()) {
1274 setOperationAction(ISD::ADD, MVT::v4i64, Legal);
1275 setOperationAction(ISD::ADD, MVT::v8i32, Legal);
1276 setOperationAction(ISD::ADD, MVT::v16i16, Legal);
1277 setOperationAction(ISD::ADD, MVT::v32i8, Legal);
1279 setOperationAction(ISD::SUB, MVT::v4i64, Legal);
1280 setOperationAction(ISD::SUB, MVT::v8i32, Legal);
1281 setOperationAction(ISD::SUB, MVT::v16i16, Legal);
1282 setOperationAction(ISD::SUB, MVT::v32i8, Legal);
1284 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1285 setOperationAction(ISD::MUL, MVT::v8i32, Legal);
1286 setOperationAction(ISD::MUL, MVT::v16i16, Legal);
1287 // Don't lower v32i8 because there is no 128-bit byte mul
1289 setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
1290 setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
1291 setOperationAction(ISD::MULHU, MVT::v16i16, Legal);
1292 setOperationAction(ISD::MULHS, MVT::v16i16, Legal);
1294 setOperationAction(ISD::VSELECT, MVT::v16i16, Custom);
1295 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1297 setOperationAction(ISD::ADD, MVT::v4i64, Custom);
1298 setOperationAction(ISD::ADD, MVT::v8i32, Custom);
1299 setOperationAction(ISD::ADD, MVT::v16i16, Custom);
1300 setOperationAction(ISD::ADD, MVT::v32i8, Custom);
1302 setOperationAction(ISD::SUB, MVT::v4i64, Custom);
1303 setOperationAction(ISD::SUB, MVT::v8i32, Custom);
1304 setOperationAction(ISD::SUB, MVT::v16i16, Custom);
1305 setOperationAction(ISD::SUB, MVT::v32i8, Custom);
1307 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1308 setOperationAction(ISD::MUL, MVT::v8i32, Custom);
1309 setOperationAction(ISD::MUL, MVT::v16i16, Custom);
1310 // Don't lower v32i8 because there is no 128-bit byte mul
1313 // In the customized shift lowering, the legal cases in AVX2 will be
1315 setOperationAction(ISD::SRL, MVT::v4i64, Custom);
1316 setOperationAction(ISD::SRL, MVT::v8i32, Custom);
1318 setOperationAction(ISD::SHL, MVT::v4i64, Custom);
1319 setOperationAction(ISD::SHL, MVT::v8i32, Custom);
1321 setOperationAction(ISD::SRA, MVT::v8i32, Custom);
1323 // Custom lower several nodes for 256-bit types.
1324 for (int i = MVT::FIRST_VECTOR_VALUETYPE;
1325 i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
1326 MVT VT = (MVT::SimpleValueType)i;
1328 // Extract subvector is special because the value type
1329 // (result) is 128-bit but the source is 256-bit wide.
1330 if (VT.is128BitVector())
1331 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1333 // Do not attempt to custom lower other non-256-bit vectors
1334 if (!VT.is256BitVector())
1337 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1338 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1339 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1340 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1341 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1342 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1343 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1346 // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
1347 for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
1348 MVT VT = (MVT::SimpleValueType)i;
1350 // Do not attempt to promote non-256-bit vectors
1351 if (!VT.is256BitVector())
1354 setOperationAction(ISD::AND, VT, Promote);
1355 AddPromotedToType (ISD::AND, VT, MVT::v4i64);
1356 setOperationAction(ISD::OR, VT, Promote);
1357 AddPromotedToType (ISD::OR, VT, MVT::v4i64);
1358 setOperationAction(ISD::XOR, VT, Promote);
1359 AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
1360 setOperationAction(ISD::LOAD, VT, Promote);
1361 AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
1362 setOperationAction(ISD::SELECT, VT, Promote);
1363 AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
1367 if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512()) {
1368 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1369 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1370 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1371 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1373 addRegisterClass(MVT::i1, &X86::VK1RegClass);
1374 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1375 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1377 setOperationAction(ISD::BR_CC, MVT::i1, Expand);
1378 setOperationAction(ISD::SETCC, MVT::i1, Custom);
1379 setOperationAction(ISD::XOR, MVT::i1, Legal);
1380 setOperationAction(ISD::OR, MVT::i1, Legal);
1381 setOperationAction(ISD::AND, MVT::i1, Legal);
1382 setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, Legal);
1383 setOperationAction(ISD::LOAD, MVT::v16f32, Legal);
1384 setOperationAction(ISD::LOAD, MVT::v8f64, Legal);
1385 setOperationAction(ISD::LOAD, MVT::v8i64, Legal);
1386 setOperationAction(ISD::LOAD, MVT::v16i32, Legal);
1387 setOperationAction(ISD::LOAD, MVT::v16i1, Legal);
1389 setOperationAction(ISD::FADD, MVT::v16f32, Legal);
1390 setOperationAction(ISD::FSUB, MVT::v16f32, Legal);
1391 setOperationAction(ISD::FMUL, MVT::v16f32, Legal);
1392 setOperationAction(ISD::FDIV, MVT::v16f32, Legal);
1393 setOperationAction(ISD::FSQRT, MVT::v16f32, Legal);
1394 setOperationAction(ISD::FNEG, MVT::v16f32, Custom);
1396 setOperationAction(ISD::FADD, MVT::v8f64, Legal);
1397 setOperationAction(ISD::FSUB, MVT::v8f64, Legal);
1398 setOperationAction(ISD::FMUL, MVT::v8f64, Legal);
1399 setOperationAction(ISD::FDIV, MVT::v8f64, Legal);
1400 setOperationAction(ISD::FSQRT, MVT::v8f64, Legal);
1401 setOperationAction(ISD::FNEG, MVT::v8f64, Custom);
1402 setOperationAction(ISD::FMA, MVT::v8f64, Legal);
1403 setOperationAction(ISD::FMA, MVT::v16f32, Legal);
1405 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
1406 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
1407 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
1408 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
1409 if (Subtarget->is64Bit()) {
1410 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
1411 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
1412 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
1413 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
1415 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1416 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1417 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1418 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1419 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1420 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1421 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1422 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1423 setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
1424 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
1426 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
1427 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1428 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1429 setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
1430 setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
1431 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1432 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1433 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1434 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1435 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1436 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
1437 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
1438 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1440 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1441 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1442 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1443 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1444 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1445 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Legal);
1447 setOperationAction(ISD::SETCC, MVT::v16i1, Custom);
1448 setOperationAction(ISD::SETCC, MVT::v8i1, Custom);
1450 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1452 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
1453 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
1454 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom);
1455 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom);
1456 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
1457 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
1458 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1459 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1460 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1462 setOperationAction(ISD::ADD, MVT::v8i64, Legal);
1463 setOperationAction(ISD::ADD, MVT::v16i32, Legal);
1465 setOperationAction(ISD::SUB, MVT::v8i64, Legal);
1466 setOperationAction(ISD::SUB, MVT::v16i32, Legal);
1468 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1470 setOperationAction(ISD::SRL, MVT::v8i64, Custom);
1471 setOperationAction(ISD::SRL, MVT::v16i32, Custom);
1473 setOperationAction(ISD::SHL, MVT::v8i64, Custom);
1474 setOperationAction(ISD::SHL, MVT::v16i32, Custom);
1476 setOperationAction(ISD::SRA, MVT::v8i64, Custom);
1477 setOperationAction(ISD::SRA, MVT::v16i32, Custom);
1479 setOperationAction(ISD::AND, MVT::v8i64, Legal);
1480 setOperationAction(ISD::OR, MVT::v8i64, Legal);
1481 setOperationAction(ISD::XOR, MVT::v8i64, Legal);
1482 setOperationAction(ISD::AND, MVT::v16i32, Legal);
1483 setOperationAction(ISD::OR, MVT::v16i32, Legal);
1484 setOperationAction(ISD::XOR, MVT::v16i32, Legal);
1486 if (Subtarget->hasCDI()) {
1487 setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
1488 setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
1491 // Custom lower several nodes.
1492 for (int i = MVT::FIRST_VECTOR_VALUETYPE;
1493 i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
1494 MVT VT = (MVT::SimpleValueType)i;
1496 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
1497 // Extract subvector is special because the value type
1498 // (result) is 256/128-bit but the source is 512-bit wide.
1499 if (VT.is128BitVector() || VT.is256BitVector())
1500 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1502 if (VT.getVectorElementType() == MVT::i1)
1503 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1505 // Do not attempt to custom lower other non-512-bit vectors
1506 if (!VT.is512BitVector())
1509 if ( EltSize >= 32) {
1510 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1511 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1512 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1513 setOperationAction(ISD::VSELECT, VT, Legal);
1514 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1515 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1516 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1519 for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
1520 MVT VT = (MVT::SimpleValueType)i;
1522 // Do not attempt to promote non-256-bit vectors
1523 if (!VT.is512BitVector())
1526 setOperationAction(ISD::SELECT, VT, Promote);
1527 AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
1531 if (!TM.Options.UseSoftFloat && Subtarget->hasBWI()) {
1532 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1533 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1535 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1536 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1538 setOperationAction(ISD::LOAD, MVT::v32i16, Legal);
1539 setOperationAction(ISD::LOAD, MVT::v64i8, Legal);
1540 setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
1541 setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
1543 for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
1544 const MVT VT = (MVT::SimpleValueType)i;
1546 const unsigned EltSize = VT.getVectorElementType().getSizeInBits();
1548 // Do not attempt to promote non-256-bit vectors
1549 if (!VT.is512BitVector())
1552 if ( EltSize < 32) {
1553 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1554 setOperationAction(ISD::VSELECT, VT, Legal);
1559 if (!TM.Options.UseSoftFloat && Subtarget->hasVLX()) {
1560 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1561 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1563 setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
1564 setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
1565 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Legal);
1568 // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
1569 // of this type with custom code.
1570 for (int VT = MVT::FIRST_VECTOR_VALUETYPE;
1571 VT != MVT::LAST_VECTOR_VALUETYPE; VT++) {
1572 setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,
1576 // We want to custom lower some of our intrinsics.
1577 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1578 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1579 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1580 if (!Subtarget->is64Bit())
1581 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1583 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1584 // handle type legalization for these operations here.
1586 // FIXME: We really should do custom legalization for addition and
1587 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1588 // than generic legalization for 64-bit multiplication-with-overflow, though.
1589 for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
1590 // Add/Sub/Mul with overflow operations are custom lowered.
1592 setOperationAction(ISD::SADDO, VT, Custom);
1593 setOperationAction(ISD::UADDO, VT, Custom);
1594 setOperationAction(ISD::SSUBO, VT, Custom);
1595 setOperationAction(ISD::USUBO, VT, Custom);
1596 setOperationAction(ISD::SMULO, VT, Custom);
1597 setOperationAction(ISD::UMULO, VT, Custom);
1601 if (!Subtarget->is64Bit()) {
1602 // These libcalls are not available in 32-bit.
1603 setLibcallName(RTLIB::SHL_I128, nullptr);
1604 setLibcallName(RTLIB::SRL_I128, nullptr);
1605 setLibcallName(RTLIB::SRA_I128, nullptr);
1608 // Combine sin / cos into one node or libcall if possible.
1609 if (Subtarget->hasSinCos()) {
1610 setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1611 setLibcallName(RTLIB::SINCOS_F64, "sincos");
1612 if (Subtarget->isTargetDarwin()) {
1613 // For MacOSX, we don't want to the normal expansion of a libcall to
1614 // sincos. We want to issue a libcall to __sincos_stret to avoid memory
1616 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1617 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1621 if (Subtarget->isTargetWin64()) {
1622 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1623 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1624 setOperationAction(ISD::SREM, MVT::i128, Custom);
1625 setOperationAction(ISD::UREM, MVT::i128, Custom);
1626 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1627 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1630 // We have target-specific dag combine patterns for the following nodes:
1631 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1632 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1633 setTargetDAGCombine(ISD::VSELECT);
1634 setTargetDAGCombine(ISD::SELECT);
1635 setTargetDAGCombine(ISD::SHL);
1636 setTargetDAGCombine(ISD::SRA);
1637 setTargetDAGCombine(ISD::SRL);
1638 setTargetDAGCombine(ISD::OR);
1639 setTargetDAGCombine(ISD::AND);
1640 setTargetDAGCombine(ISD::ADD);
1641 setTargetDAGCombine(ISD::FADD);
1642 setTargetDAGCombine(ISD::FSUB);
1643 setTargetDAGCombine(ISD::FMA);
1644 setTargetDAGCombine(ISD::SUB);
1645 setTargetDAGCombine(ISD::LOAD);
1646 setTargetDAGCombine(ISD::STORE);
1647 setTargetDAGCombine(ISD::ZERO_EXTEND);
1648 setTargetDAGCombine(ISD::ANY_EXTEND);
1649 setTargetDAGCombine(ISD::SIGN_EXTEND);
1650 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1651 setTargetDAGCombine(ISD::TRUNCATE);
1652 setTargetDAGCombine(ISD::SINT_TO_FP);
1653 setTargetDAGCombine(ISD::SETCC);
1654 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1655 setTargetDAGCombine(ISD::BUILD_VECTOR);
1656 if (Subtarget->is64Bit())
1657 setTargetDAGCombine(ISD::MUL);
1658 setTargetDAGCombine(ISD::XOR);
1660 computeRegisterProperties();
1662 // On Darwin, -Os means optimize for size without hurting performance,
1663 // do not reduce the limit.
1664 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1665 MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
1666 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1667 MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
1668 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1669 MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
1670 setPrefLoopAlignment(4); // 2^4 bytes.
1672 // Predictable cmov don't hurt on atom because it's in-order.
1673 PredictableSelectIsExpensive = !Subtarget->isAtom();
1675 setPrefFunctionAlignment(4); // 2^4 bytes.
1677 verifyIntrinsicTables();
1680 // This has so far only been implemented for 64-bit MachO.
1681 bool X86TargetLowering::useLoadStackGuardNode() const {
1682 return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO &&
1683 Subtarget->is64Bit();
1686 TargetLoweringBase::LegalizeTypeAction
1687 X86TargetLowering::getPreferredVectorAction(EVT VT) const {
1688 if (ExperimentalVectorWideningLegalization &&
1689 VT.getVectorNumElements() != 1 &&
1690 VT.getVectorElementType().getSimpleVT() != MVT::i1)
1691 return TypeWidenVector;
1693 return TargetLoweringBase::getPreferredVectorAction(VT);
1696 EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
1698 return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
1700 const unsigned NumElts = VT.getVectorNumElements();
1701 const EVT EltVT = VT.getVectorElementType();
1702 if (VT.is512BitVector()) {
1703 if (Subtarget->hasAVX512())
1704 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1705 EltVT == MVT::f32 || EltVT == MVT::f64)
1707 case 8: return MVT::v8i1;
1708 case 16: return MVT::v16i1;
1710 if (Subtarget->hasBWI())
1711 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1713 case 32: return MVT::v32i1;
1714 case 64: return MVT::v64i1;
1718 if (VT.is256BitVector() || VT.is128BitVector()) {
1719 if (Subtarget->hasVLX())
1720 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1721 EltVT == MVT::f32 || EltVT == MVT::f64)
1723 case 2: return MVT::v2i1;
1724 case 4: return MVT::v4i1;
1725 case 8: return MVT::v8i1;
1727 if (Subtarget->hasBWI() && Subtarget->hasVLX())
1728 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1730 case 8: return MVT::v8i1;
1731 case 16: return MVT::v16i1;
1732 case 32: return MVT::v32i1;
1736 return VT.changeVectorElementTypeToInteger();
1739 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1740 /// the desired ByVal argument alignment.
1741 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
1744 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1745 if (VTy->getBitWidth() == 128)
1747 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1748 unsigned EltAlign = 0;
1749 getMaxByValAlign(ATy->getElementType(), EltAlign);
1750 if (EltAlign > MaxAlign)
1751 MaxAlign = EltAlign;
1752 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1753 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1754 unsigned EltAlign = 0;
1755 getMaxByValAlign(STy->getElementType(i), EltAlign);
1756 if (EltAlign > MaxAlign)
1757 MaxAlign = EltAlign;
1764 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1765 /// function arguments in the caller parameter area. For X86, aggregates
1766 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
1767 /// are at 4-byte boundaries.
1768 unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
1769 if (Subtarget->is64Bit()) {
1770 // Max of 8 and alignment of type.
1771 unsigned TyAlign = TD->getABITypeAlignment(Ty);
1778 if (Subtarget->hasSSE1())
1779 getMaxByValAlign(Ty, Align);
1783 /// getOptimalMemOpType - Returns the target specific optimal type for load
1784 /// and store operations as a result of memset, memcpy, and memmove
1785 /// lowering. If DstAlign is zero that means it's safe to destination
1786 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
1787 /// means there isn't a need to check it against alignment requirement,
1788 /// probably because the source does not need to be loaded. If 'IsMemset' is
1789 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
1790 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
1791 /// source is constant so it does not need to be loaded.
1792 /// It returns EVT::Other if the type should be determined using generic
1793 /// target-independent logic.
1795 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
1796 unsigned DstAlign, unsigned SrcAlign,
1797 bool IsMemset, bool ZeroMemset,
1799 MachineFunction &MF) const {
1800 const Function *F = MF.getFunction();
1801 if ((!IsMemset || ZeroMemset) &&
1802 !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
1803 Attribute::NoImplicitFloat)) {
1805 (Subtarget->isUnalignedMemAccessFast() ||
1806 ((DstAlign == 0 || DstAlign >= 16) &&
1807 (SrcAlign == 0 || SrcAlign >= 16)))) {
1809 if (Subtarget->hasInt256())
1811 if (Subtarget->hasFp256())
1814 if (Subtarget->hasSSE2())
1816 if (Subtarget->hasSSE1())
1818 } else if (!MemcpyStrSrc && Size >= 8 &&
1819 !Subtarget->is64Bit() &&
1820 Subtarget->hasSSE2()) {
1821 // Do not use f64 to lower memcpy if source is string constant. It's
1822 // better to use i32 to avoid the loads.
1826 if (Subtarget->is64Bit() && Size >= 8)
1831 bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
1833 return X86ScalarSSEf32;
1834 else if (VT == MVT::f64)
1835 return X86ScalarSSEf64;
1840 X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1845 *Fast = Subtarget->isUnalignedMemAccessFast();
1849 /// getJumpTableEncoding - Return the entry encoding for a jump table in the
1850 /// current function. The returned value is a member of the
1851 /// MachineJumpTableInfo::JTEntryKind enum.
1852 unsigned X86TargetLowering::getJumpTableEncoding() const {
1853 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
1855 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
1856 Subtarget->isPICStyleGOT())
1857 return MachineJumpTableInfo::EK_Custom32;
1859 // Otherwise, use the normal jump table encoding heuristics.
1860 return TargetLowering::getJumpTableEncoding();
1864 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
1865 const MachineBasicBlock *MBB,
1866 unsigned uid,MCContext &Ctx) const{
1867 assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
1868 Subtarget->isPICStyleGOT());
1869 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
1871 return MCSymbolRefExpr::Create(MBB->getSymbol(),
1872 MCSymbolRefExpr::VK_GOTOFF, Ctx);
1875 /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
1877 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
1878 SelectionDAG &DAG) const {
1879 if (!Subtarget->is64Bit())
1880 // This doesn't have SDLoc associated with it, but is not really the
1881 // same as a Register.
1882 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy());
1886 /// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
1887 /// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
1889 const MCExpr *X86TargetLowering::
1890 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
1891 MCContext &Ctx) const {
1892 // X86-64 uses RIP relative addressing based on the jump table label.
1893 if (Subtarget->isPICStyleRIPRel())
1894 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
1896 // Otherwise, the reference is relative to the PIC base.
1897 return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
1900 // FIXME: Why this routine is here? Move to RegInfo!
1901 std::pair<const TargetRegisterClass*, uint8_t>
1902 X86TargetLowering::findRepresentativeClass(MVT VT) const{
1903 const TargetRegisterClass *RRC = nullptr;
1905 switch (VT.SimpleTy) {
1907 return TargetLowering::findRepresentativeClass(VT);
1908 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
1909 RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
1912 RRC = &X86::VR64RegClass;
1914 case MVT::f32: case MVT::f64:
1915 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1916 case MVT::v4f32: case MVT::v2f64:
1917 case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
1919 RRC = &X86::VR128RegClass;
1922 return std::make_pair(RRC, Cost);
1925 bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
1926 unsigned &Offset) const {
1927 if (!Subtarget->isTargetLinux())
1930 if (Subtarget->is64Bit()) {
1931 // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
1933 if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
1945 bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
1946 unsigned DestAS) const {
1947 assert(SrcAS != DestAS && "Expected different address spaces!");
1949 return SrcAS < 256 && DestAS < 256;
1952 //===----------------------------------------------------------------------===//
1953 // Return Value Calling Convention Implementation
1954 //===----------------------------------------------------------------------===//
1956 #include "X86GenCallingConv.inc"
1959 X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
1960 MachineFunction &MF, bool isVarArg,
1961 const SmallVectorImpl<ISD::OutputArg> &Outs,
1962 LLVMContext &Context) const {
1963 SmallVector<CCValAssign, 16> RVLocs;
1964 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
1965 return CCInfo.CheckReturn(Outs, RetCC_X86);
1968 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
1969 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
1974 X86TargetLowering::LowerReturn(SDValue Chain,
1975 CallingConv::ID CallConv, bool isVarArg,
1976 const SmallVectorImpl<ISD::OutputArg> &Outs,
1977 const SmallVectorImpl<SDValue> &OutVals,
1978 SDLoc dl, SelectionDAG &DAG) const {
1979 MachineFunction &MF = DAG.getMachineFunction();
1980 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
1982 SmallVector<CCValAssign, 16> RVLocs;
1983 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
1984 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
1987 SmallVector<SDValue, 6> RetOps;
1988 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
1989 // Operand #1 = Bytes To Pop
1990 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(),
1993 // Copy the result values into the output registers.
1994 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1995 CCValAssign &VA = RVLocs[i];
1996 assert(VA.isRegLoc() && "Can only return in registers!");
1997 SDValue ValToCopy = OutVals[i];
1998 EVT ValVT = ValToCopy.getValueType();
2000 // Promote values to the appropriate types
2001 if (VA.getLocInfo() == CCValAssign::SExt)
2002 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2003 else if (VA.getLocInfo() == CCValAssign::ZExt)
2004 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2005 else if (VA.getLocInfo() == CCValAssign::AExt)
2006 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2007 else if (VA.getLocInfo() == CCValAssign::BCvt)
2008 ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy);
2010 assert(VA.getLocInfo() != CCValAssign::FPExt &&
2011 "Unexpected FP-extend for return value.");
2013 // If this is x86-64, and we disabled SSE, we can't return FP values,
2014 // or SSE or MMX vectors.
2015 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2016 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2017 (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
2018 report_fatal_error("SSE register return with SSE disabled");
2020 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2021 // llvm-gcc has never done it right and no one has noticed, so this
2022 // should be OK for now.
2023 if (ValVT == MVT::f64 &&
2024 (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
2025 report_fatal_error("SSE2 register return with SSE2 disabled");
2027 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2028 // the RET instruction and handled by the FP Stackifier.
2029 if (VA.getLocReg() == X86::FP0 ||
2030 VA.getLocReg() == X86::FP1) {
2031 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2032 // change the value to the FP stack register class.
2033 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2034 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2035 RetOps.push_back(ValToCopy);
2036 // Don't emit a copytoreg.
2040 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2041 // which is returned in RAX / RDX.
2042 if (Subtarget->is64Bit()) {
2043 if (ValVT == MVT::x86mmx) {
2044 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2045 ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
2046 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2048 // If we don't have SSE2 available, convert to v4f32 so the generated
2049 // register is legal.
2050 if (!Subtarget->hasSSE2())
2051 ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
2056 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
2057 Flag = Chain.getValue(1);
2058 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2061 // The x86-64 ABIs require that for returning structs by value we copy
2062 // the sret argument into %rax/%eax (depending on ABI) for the return.
2063 // Win32 requires us to put the sret argument to %eax as well.
2064 // We saved the argument into a virtual register in the entry block,
2065 // so now we copy the value out and into %rax/%eax.
2066 if (DAG.getMachineFunction().getFunction()->hasStructRetAttr() &&
2067 (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
2068 MachineFunction &MF = DAG.getMachineFunction();
2069 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2070 unsigned Reg = FuncInfo->getSRetReturnReg();
2072 "SRetReturnReg should have been set in LowerFormalArguments().");
2073 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
2076 = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
2077 X86::RAX : X86::EAX;
2078 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2079 Flag = Chain.getValue(1);
2081 // RAX/EAX now acts like a return value.
2082 RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy()));
2085 RetOps[0] = Chain; // Update chain.
2087 // Add the flag if we have it.
2089 RetOps.push_back(Flag);
2091 return DAG.getNode(X86ISD::RET_FLAG, dl, MVT::Other, RetOps);
2094 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2095 if (N->getNumValues() != 1)
2097 if (!N->hasNUsesOfValue(1, 0))
2100 SDValue TCChain = Chain;
2101 SDNode *Copy = *N->use_begin();
2102 if (Copy->getOpcode() == ISD::CopyToReg) {
2103 // If the copy has a glue operand, we conservatively assume it isn't safe to
2104 // perform a tail call.
2105 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2107 TCChain = Copy->getOperand(0);
2108 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2111 bool HasRet = false;
2112 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2114 if (UI->getOpcode() != X86ISD::RET_FLAG)
2116 // If we are returning more than one value, we can definitely
2117 // not make a tail call see PR19530
2118 if (UI->getNumOperands() > 4)
2120 if (UI->getNumOperands() == 4 &&
2121 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2134 X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
2135 ISD::NodeType ExtendKind) const {
2137 // TODO: Is this also valid on 32-bit?
2138 if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
2139 ReturnMVT = MVT::i8;
2141 ReturnMVT = MVT::i32;
2143 EVT MinVT = getRegisterType(Context, ReturnMVT);
2144 return VT.bitsLT(MinVT) ? MinVT : VT;
2147 /// LowerCallResult - Lower the result values of a call into the
2148 /// appropriate copies out of appropriate physical registers.
2151 X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
2152 CallingConv::ID CallConv, bool isVarArg,
2153 const SmallVectorImpl<ISD::InputArg> &Ins,
2154 SDLoc dl, SelectionDAG &DAG,
2155 SmallVectorImpl<SDValue> &InVals) const {
2157 // Assign locations to each value returned by this call.
2158 SmallVector<CCValAssign, 16> RVLocs;
2159 bool Is64Bit = Subtarget->is64Bit();
2160 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2162 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2164 // Copy all of the result registers out of their specified physreg.
2165 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
2166 CCValAssign &VA = RVLocs[i];
2167 EVT CopyVT = VA.getValVT();
2169 // If this is x86-64, and we disabled SSE, we can't return FP values
2170 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
2171 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
2172 report_fatal_error("SSE register return with SSE disabled");
2175 // If we prefer to use the value in xmm registers, copy it out as f80 and
2176 // use a truncate to move it from fp stack reg to xmm reg.
2177 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2178 isScalarFPTypeInSSEReg(VA.getValVT()))
2181 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
2182 CopyVT, InFlag).getValue(1);
2183 SDValue Val = Chain.getValue(0);
2185 if (CopyVT != VA.getValVT())
2186 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2187 // This truncation won't change the value.
2188 DAG.getIntPtrConstant(1));
2190 InFlag = Chain.getValue(2);
2191 InVals.push_back(Val);
2197 //===----------------------------------------------------------------------===//
2198 // C & StdCall & Fast Calling Convention implementation
2199 //===----------------------------------------------------------------------===//
2200 // StdCall calling convention seems to be standard for many Windows' API
2201 // routines and around. It differs from C calling convention just a little:
2202 // callee should clean up the stack, not caller. Symbols should be also
2203 // decorated in some fancy way :) It doesn't support any vector arguments.
2204 // For info on fast calling convention see Fast Calling Convention (tail call)
2205 // implementation LowerX86_32FastCCCallTo.
2207 /// CallIsStructReturn - Determines whether a call uses struct return
2209 enum StructReturnType {
2214 static StructReturnType
2215 callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
2217 return NotStructReturn;
2219 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2220 if (!Flags.isSRet())
2221 return NotStructReturn;
2222 if (Flags.isInReg())
2223 return RegStructReturn;
2224 return StackStructReturn;
2227 /// ArgsAreStructReturn - Determines whether a function uses struct
2228 /// return semantics.
2229 static StructReturnType
2230 argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
2232 return NotStructReturn;
2234 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2235 if (!Flags.isSRet())
2236 return NotStructReturn;
2237 if (Flags.isInReg())
2238 return RegStructReturn;
2239 return StackStructReturn;
2242 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
2243 /// by "Src" to address "Dst" with size and alignment information specified by
2244 /// the specific parameter attribute. The copy will be passed as a byval
2245 /// function parameter.
2247 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
2248 ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
2250 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
2252 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2253 /*isVolatile*/false, /*AlwaysInline=*/true,
2254 MachinePointerInfo(), MachinePointerInfo());
2257 /// IsTailCallConvention - Return true if the calling convention is one that
2258 /// supports tail call optimization.
2259 static bool IsTailCallConvention(CallingConv::ID CC) {
2260 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2261 CC == CallingConv::HiPE);
2264 /// \brief Return true if the calling convention is a C calling convention.
2265 static bool IsCCallConvention(CallingConv::ID CC) {
2266 return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 ||
2267 CC == CallingConv::X86_64_SysV);
2270 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
2271 if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
2275 CallingConv::ID CalleeCC = CS.getCallingConv();
2276 if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
2282 /// FuncIsMadeTailCallSafe - Return true if the function is being made into
2283 /// a tailcall target by changing its ABI.
2284 static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
2285 bool GuaranteedTailCallOpt) {
2286 return GuaranteedTailCallOpt && IsTailCallConvention(CC);
2290 X86TargetLowering::LowerMemArgument(SDValue Chain,
2291 CallingConv::ID CallConv,
2292 const SmallVectorImpl<ISD::InputArg> &Ins,
2293 SDLoc dl, SelectionDAG &DAG,
2294 const CCValAssign &VA,
2295 MachineFrameInfo *MFI,
2297 // Create the nodes corresponding to a load from this parameter slot.
2298 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2299 bool AlwaysUseMutable = FuncIsMadeTailCallSafe(
2300 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2301 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2304 // If value is passed by pointer we have address passed instead of the value
2306 if (VA.getLocInfo() == CCValAssign::Indirect)
2307 ValVT = VA.getLocVT();
2309 ValVT = VA.getValVT();
2311 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2312 // changed with more analysis.
2313 // In case of tail call optimization mark all arguments mutable. Since they
2314 // could be overwritten by lowering of arguments in case of a tail call.
2315 if (Flags.isByVal()) {
2316 unsigned Bytes = Flags.getByValSize();
2317 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2318 int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
2319 return DAG.getFrameIndex(FI, getPointerTy());
2321 int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
2322 VA.getLocMemOffset(), isImmutable);
2323 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
2324 return DAG.getLoad(ValVT, dl, Chain, FIN,
2325 MachinePointerInfo::getFixedStack(FI),
2326 false, false, false, 0);
2330 // FIXME: Get this from tablegen.
2331 static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
2332 const X86Subtarget *Subtarget) {
2333 assert(Subtarget->is64Bit());
2335 if (Subtarget->isCallingConvWin64(CallConv)) {
2336 static const MCPhysReg GPR64ArgRegsWin64[] = {
2337 X86::RCX, X86::RDX, X86::R8, X86::R9
2339 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
2342 static const MCPhysReg GPR64ArgRegs64Bit[] = {
2343 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
2345 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
2348 // FIXME: Get this from tablegen.
2349 static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
2350 CallingConv::ID CallConv,
2351 const X86Subtarget *Subtarget) {
2352 assert(Subtarget->is64Bit());
2353 if (Subtarget->isCallingConvWin64(CallConv)) {
2354 // The XMM registers which might contain var arg parameters are shadowed
2355 // in their paired GPR. So we only need to save the GPR to their home
2357 // TODO: __vectorcall will change this.
2361 const Function *Fn = MF.getFunction();
2362 bool NoImplicitFloatOps = Fn->getAttributes().
2363 hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
2364 assert(!(MF.getTarget().Options.UseSoftFloat && NoImplicitFloatOps) &&
2365 "SSE register cannot be used when SSE is disabled!");
2366 if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
2367 !Subtarget->hasSSE1())
2368 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
2372 static const MCPhysReg XMMArgRegs64Bit[] = {
2373 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2374 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2376 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
2380 X86TargetLowering::LowerFormalArguments(SDValue Chain,
2381 CallingConv::ID CallConv,
2383 const SmallVectorImpl<ISD::InputArg> &Ins,
2386 SmallVectorImpl<SDValue> &InVals)
2388 MachineFunction &MF = DAG.getMachineFunction();
2389 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2391 const Function* Fn = MF.getFunction();
2392 if (Fn->hasExternalLinkage() &&
2393 Subtarget->isTargetCygMing() &&
2394 Fn->getName() == "main")
2395 FuncInfo->setForceFramePointer(true);
2397 MachineFrameInfo *MFI = MF.getFrameInfo();
2398 bool Is64Bit = Subtarget->is64Bit();
2399 bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
2401 assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
2402 "Var args not supported with calling convention fastcc, ghc or hipe");
2404 // Assign locations to all of the incoming arguments.
2405 SmallVector<CCValAssign, 16> ArgLocs;
2406 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2408 // Allocate shadow area for Win64
2410 CCInfo.AllocateStack(32, 8);
2412 CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
2414 unsigned LastVal = ~0U;
2416 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2417 CCValAssign &VA = ArgLocs[i];
2418 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
2420 assert(VA.getValNo() != LastVal &&
2421 "Don't support value assigned to multiple locs yet");
2423 LastVal = VA.getValNo();
2425 if (VA.isRegLoc()) {
2426 EVT RegVT = VA.getLocVT();
2427 const TargetRegisterClass *RC;
2428 if (RegVT == MVT::i32)
2429 RC = &X86::GR32RegClass;
2430 else if (Is64Bit && RegVT == MVT::i64)
2431 RC = &X86::GR64RegClass;
2432 else if (RegVT == MVT::f32)
2433 RC = &X86::FR32RegClass;
2434 else if (RegVT == MVT::f64)
2435 RC = &X86::FR64RegClass;
2436 else if (RegVT.is512BitVector())
2437 RC = &X86::VR512RegClass;
2438 else if (RegVT.is256BitVector())
2439 RC = &X86::VR256RegClass;
2440 else if (RegVT.is128BitVector())
2441 RC = &X86::VR128RegClass;
2442 else if (RegVT == MVT::x86mmx)
2443 RC = &X86::VR64RegClass;
2444 else if (RegVT == MVT::i1)
2445 RC = &X86::VK1RegClass;
2446 else if (RegVT == MVT::v8i1)
2447 RC = &X86::VK8RegClass;
2448 else if (RegVT == MVT::v16i1)
2449 RC = &X86::VK16RegClass;
2450 else if (RegVT == MVT::v32i1)
2451 RC = &X86::VK32RegClass;
2452 else if (RegVT == MVT::v64i1)
2453 RC = &X86::VK64RegClass;
2455 llvm_unreachable("Unknown argument type!");
2457 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2458 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2460 // If this is an 8 or 16-bit value, it is really passed promoted to 32
2461 // bits. Insert an assert[sz]ext to capture this, then truncate to the
2463 if (VA.getLocInfo() == CCValAssign::SExt)
2464 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2465 DAG.getValueType(VA.getValVT()));
2466 else if (VA.getLocInfo() == CCValAssign::ZExt)
2467 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
2468 DAG.getValueType(VA.getValVT()));
2469 else if (VA.getLocInfo() == CCValAssign::BCvt)
2470 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
2472 if (VA.isExtInLoc()) {
2473 // Handle MMX values passed in XMM regs.
2474 if (RegVT.isVector())
2475 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
2477 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
2480 assert(VA.isMemLoc());
2481 ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
2484 // If value is passed via pointer - do a load.
2485 if (VA.getLocInfo() == CCValAssign::Indirect)
2486 ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
2487 MachinePointerInfo(), false, false, false, 0);
2489 InVals.push_back(ArgValue);
2492 if (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC()) {
2493 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2494 // The x86-64 ABIs require that for returning structs by value we copy
2495 // the sret argument into %rax/%eax (depending on ABI) for the return.
2496 // Win32 requires us to put the sret argument to %eax as well.
2497 // Save the argument into a virtual register so that we can access it
2498 // from the return points.
2499 if (Ins[i].Flags.isSRet()) {
2500 unsigned Reg = FuncInfo->getSRetReturnReg();
2502 MVT PtrTy = getPointerTy();
2503 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
2504 FuncInfo->setSRetReturnReg(Reg);
2506 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
2507 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
2513 unsigned StackSize = CCInfo.getNextStackOffset();
2514 // Align stack specially for tail calls.
2515 if (FuncIsMadeTailCallSafe(CallConv,
2516 MF.getTarget().Options.GuaranteedTailCallOpt))
2517 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
2519 // If the function takes variable number of arguments, make a frame index for
2520 // the start of the first vararg value... for expansion of llvm.va_start. We
2521 // can skip this if there are no va_start calls.
2522 if (MFI->hasVAStart() &&
2523 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
2524 CallConv != CallingConv::X86_ThisCall))) {
2525 FuncInfo->setVarArgsFrameIndex(
2526 MFI->CreateFixedObject(1, StackSize, true));
2529 // 64-bit calling conventions support varargs and register parameters, so we
2530 // have to do extra work to spill them in the prologue or forward them to
2532 if (Is64Bit && isVarArg &&
2533 (MFI->hasVAStart() || MFI->hasMustTailInVarArgFunc())) {
2534 // Find the first unallocated argument registers.
2535 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
2536 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
2537 unsigned NumIntRegs =
2538 CCInfo.getFirstUnallocated(ArgGPRs.data(), ArgGPRs.size());
2539 unsigned NumXMMRegs =
2540 CCInfo.getFirstUnallocated(ArgXMMs.data(), ArgXMMs.size());
2541 assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
2542 "SSE register cannot be used when SSE is disabled!");
2544 // Gather all the live in physical registers.
2545 SmallVector<SDValue, 6> LiveGPRs;
2546 SmallVector<SDValue, 8> LiveXMMRegs;
2548 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
2549 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
2551 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
2553 if (!ArgXMMs.empty()) {
2554 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
2555 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
2556 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
2557 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
2558 LiveXMMRegs.push_back(
2559 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
2563 // Store them to the va_list returned by va_start.
2564 if (MFI->hasVAStart()) {
2566 const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
2567 // Get to the caller-allocated home save location. Add 8 to account
2568 // for the return address.
2569 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
2570 FuncInfo->setRegSaveFrameIndex(
2571 MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
2572 // Fixup to set vararg frame on shadow area (4 x i64).
2574 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
2576 // For X86-64, if there are vararg parameters that are passed via
2577 // registers, then we must store them to their spots on the stack so
2578 // they may be loaded by deferencing the result of va_next.
2579 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
2580 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
2581 FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
2582 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
2585 // Store the integer parameter registers.
2586 SmallVector<SDValue, 8> MemOps;
2587 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
2589 unsigned Offset = FuncInfo->getVarArgsGPOffset();
2590 for (SDValue Val : LiveGPRs) {
2591 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
2592 DAG.getIntPtrConstant(Offset));
2594 DAG.getStore(Val.getValue(1), dl, Val, FIN,
2595 MachinePointerInfo::getFixedStack(
2596 FuncInfo->getRegSaveFrameIndex(), Offset),
2598 MemOps.push_back(Store);
2602 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
2603 // Now store the XMM (fp + vector) parameter registers.
2604 SmallVector<SDValue, 12> SaveXMMOps;
2605 SaveXMMOps.push_back(Chain);
2606 SaveXMMOps.push_back(ALVal);
2607 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2608 FuncInfo->getRegSaveFrameIndex()));
2609 SaveXMMOps.push_back(DAG.getIntPtrConstant(
2610 FuncInfo->getVarArgsFPOffset()));
2611 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
2613 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
2614 MVT::Other, SaveXMMOps));
2617 if (!MemOps.empty())
2618 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2620 // Add all GPRs, al, and XMMs to the list of forwards. We will add then
2621 // to the liveout set on a musttail call.
2622 assert(MFI->hasMustTailInVarArgFunc());
2623 auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
2624 typedef X86MachineFunctionInfo::Forward Forward;
2626 for (unsigned I = 0, E = LiveGPRs.size(); I != E; ++I) {
2628 MF.getRegInfo().createVirtualRegister(&X86::GR64RegClass);
2629 Chain = DAG.getCopyToReg(Chain, dl, VReg, LiveGPRs[I]);
2630 Forwards.push_back(Forward(VReg, ArgGPRs[NumIntRegs + I], MVT::i64));
2633 if (!ArgXMMs.empty()) {
2635 MF.getRegInfo().createVirtualRegister(&X86::GR8RegClass);
2636 Chain = DAG.getCopyToReg(Chain, dl, ALVReg, ALVal);
2637 Forwards.push_back(Forward(ALVReg, X86::AL, MVT::i8));
2639 for (unsigned I = 0, E = LiveXMMRegs.size(); I != E; ++I) {
2641 MF.getRegInfo().createVirtualRegister(&X86::VR128RegClass);
2642 Chain = DAG.getCopyToReg(Chain, dl, VReg, LiveXMMRegs[I]);
2644 Forward(VReg, ArgXMMs[NumXMMRegs + I], MVT::v4f32));
2650 // Some CCs need callee pop.
2651 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2652 MF.getTarget().Options.GuaranteedTailCallOpt)) {
2653 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
2655 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
2656 // If this is an sret function, the return should pop the hidden pointer.
2657 if (!Is64Bit && !IsTailCallConvention(CallConv) &&
2658 !Subtarget->getTargetTriple().isOSMSVCRT() &&
2659 argsAreStructReturn(Ins) == StackStructReturn)
2660 FuncInfo->setBytesToPopOnReturn(4);
2664 // RegSaveFrameIndex is X86-64 only.
2665 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
2666 if (CallConv == CallingConv::X86_FastCall ||
2667 CallConv == CallingConv::X86_ThisCall)
2668 // fastcc functions can't have varargs.
2669 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
2672 FuncInfo->setArgumentStackSize(StackSize);
2678 X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
2679 SDValue StackPtr, SDValue Arg,
2680 SDLoc dl, SelectionDAG &DAG,
2681 const CCValAssign &VA,
2682 ISD::ArgFlagsTy Flags) const {
2683 unsigned LocMemOffset = VA.getLocMemOffset();
2684 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
2685 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
2686 if (Flags.isByVal())
2687 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
2689 return DAG.getStore(Chain, dl, Arg, PtrOff,
2690 MachinePointerInfo::getStack(LocMemOffset),
2694 /// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
2695 /// optimization is performed and it is required.
2697 X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
2698 SDValue &OutRetAddr, SDValue Chain,
2699 bool IsTailCall, bool Is64Bit,
2700 int FPDiff, SDLoc dl) const {
2701 // Adjust the Return address stack slot.
2702 EVT VT = getPointerTy();
2703 OutRetAddr = getReturnAddressFrameIndex(DAG);
2705 // Load the "old" Return address.
2706 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
2707 false, false, false, 0);
2708 return SDValue(OutRetAddr.getNode(), 1);
2711 /// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call
2712 /// optimization is performed and it is required (FPDiff!=0).
2713 static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
2714 SDValue Chain, SDValue RetAddrFrIdx,
2715 EVT PtrVT, unsigned SlotSize,
2716 int FPDiff, SDLoc dl) {
2717 // Store the return address to the appropriate stack slot.
2718 if (!FPDiff) return Chain;
2719 // Calculate the new stack slot for the return address.
2720 int NewReturnAddrFI =
2721 MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
2723 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
2724 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
2725 MachinePointerInfo::getFixedStack(NewReturnAddrFI),
2731 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2732 SmallVectorImpl<SDValue> &InVals) const {
2733 SelectionDAG &DAG = CLI.DAG;
2735 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2736 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2737 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2738 SDValue Chain = CLI.Chain;
2739 SDValue Callee = CLI.Callee;
2740 CallingConv::ID CallConv = CLI.CallConv;
2741 bool &isTailCall = CLI.IsTailCall;
2742 bool isVarArg = CLI.IsVarArg;
2744 MachineFunction &MF = DAG.getMachineFunction();
2745 bool Is64Bit = Subtarget->is64Bit();
2746 bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
2747 StructReturnType SR = callIsStructReturn(Outs);
2748 bool IsSibcall = false;
2749 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
2751 if (MF.getTarget().Options.DisableTailCalls)
2754 bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
2756 // Force this to be a tail call. The verifier rules are enough to ensure
2757 // that we can lower this successfully without moving the return address
2760 } else if (isTailCall) {
2761 // Check if it's really possible to do a tail call.
2762 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
2763 isVarArg, SR != NotStructReturn,
2764 MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
2765 Outs, OutVals, Ins, DAG);
2767 // Sibcalls are automatically detected tailcalls which do not require
2769 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
2776 assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
2777 "Var args not supported with calling convention fastcc, ghc or hipe");
2779 // Analyze operands of the call, assigning locations to each operand.
2780 SmallVector<CCValAssign, 16> ArgLocs;
2781 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2783 // Allocate shadow area for Win64
2785 CCInfo.AllocateStack(32, 8);
2787 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
2789 // Get a count of how many bytes are to be pushed on the stack.
2790 unsigned NumBytes = CCInfo.getNextStackOffset();
2792 // This is a sibcall. The memory operands are available in caller's
2793 // own caller's stack.
2795 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2796 IsTailCallConvention(CallConv))
2797 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
2800 if (isTailCall && !IsSibcall && !IsMustTail) {
2801 // Lower arguments at fp - stackoffset + fpdiff.
2802 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2804 FPDiff = NumBytesCallerPushed - NumBytes;
2806 // Set the delta of movement of the returnaddr stackslot.
2807 // But only set if delta is greater than previous delta.
2808 if (FPDiff < X86Info->getTCReturnAddrDelta())
2809 X86Info->setTCReturnAddrDelta(FPDiff);
2812 unsigned NumBytesToPush = NumBytes;
2813 unsigned NumBytesToPop = NumBytes;
2815 // If we have an inalloca argument, all stack space has already been allocated
2816 // for us and be right at the top of the stack. We don't support multiple
2817 // arguments passed in memory when using inalloca.
2818 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2820 if (!ArgLocs.back().isMemLoc())
2821 report_fatal_error("cannot use inalloca attribute on a register "
2823 if (ArgLocs.back().getLocMemOffset() != 0)
2824 report_fatal_error("any parameter with the inalloca attribute must be "
2825 "the only memory argument");
2829 Chain = DAG.getCALLSEQ_START(
2830 Chain, DAG.getIntPtrConstant(NumBytesToPush, true), dl);
2832 SDValue RetAddrFrIdx;
2833 // Load return address for tail calls.
2834 if (isTailCall && FPDiff)
2835 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
2836 Is64Bit, FPDiff, dl);
2838 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2839 SmallVector<SDValue, 8> MemOpChains;
2842 // Walk the register/memloc assignments, inserting copies/loads. In the case
2843 // of tail call optimization arguments are handle later.
2844 const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
2845 DAG.getSubtarget().getRegisterInfo());
2846 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2847 // Skip inalloca arguments, they have already been written.
2848 ISD::ArgFlagsTy Flags = Outs[i].Flags;
2849 if (Flags.isInAlloca())
2852 CCValAssign &VA = ArgLocs[i];
2853 EVT RegVT = VA.getLocVT();
2854 SDValue Arg = OutVals[i];
2855 bool isByVal = Flags.isByVal();
2857 // Promote the value if needed.
2858 switch (VA.getLocInfo()) {
2859 default: llvm_unreachable("Unknown loc info!");
2860 case CCValAssign::Full: break;
2861 case CCValAssign::SExt:
2862 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
2864 case CCValAssign::ZExt:
2865 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
2867 case CCValAssign::AExt:
2868 if (RegVT.is128BitVector()) {
2869 // Special case: passing MMX values in XMM registers.
2870 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
2871 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2872 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2874 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
2876 case CCValAssign::BCvt:
2877 Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
2879 case CCValAssign::Indirect: {
2880 // Store the argument.
2881 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
2882 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2883 Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
2884 MachinePointerInfo::getFixedStack(FI),
2891 if (VA.isRegLoc()) {
2892 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2893 if (isVarArg && IsWin64) {
2894 // Win64 ABI requires argument XMM reg to be copied to the corresponding
2895 // shadow reg if callee is a varargs function.
2896 unsigned ShadowReg = 0;
2897 switch (VA.getLocReg()) {
2898 case X86::XMM0: ShadowReg = X86::RCX; break;
2899 case X86::XMM1: ShadowReg = X86::RDX; break;
2900 case X86::XMM2: ShadowReg = X86::R8; break;
2901 case X86::XMM3: ShadowReg = X86::R9; break;
2904 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
2906 } else if (!IsSibcall && (!isTailCall || isByVal)) {
2907 assert(VA.isMemLoc());
2908 if (!StackPtr.getNode())
2909 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2911 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2912 dl, DAG, VA, Flags));
2916 if (!MemOpChains.empty())
2917 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2919 if (Subtarget->isPICStyleGOT()) {
2920 // ELF / PIC requires GOT in the EBX register before function calls via PLT
2923 RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
2924 DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy())));
2926 // If we are tail calling and generating PIC/GOT style code load the
2927 // address of the callee into ECX. The value in ecx is used as target of
2928 // the tail jump. This is done to circumvent the ebx/callee-saved problem
2929 // for tail calls on PIC/GOT architectures. Normally we would just put the
2930 // address of GOT into ebx and then call target@PLT. But for tail calls
2931 // ebx would be restored (since ebx is callee saved) before jumping to the
2934 // Note: The actual moving to ECX is done further down.
2935 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2936 if (G && !G->getGlobal()->hasHiddenVisibility() &&
2937 !G->getGlobal()->hasProtectedVisibility())
2938 Callee = LowerGlobalAddress(Callee, DAG);
2939 else if (isa<ExternalSymbolSDNode>(Callee))
2940 Callee = LowerExternalSymbol(Callee, DAG);
2944 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
2945 // From AMD64 ABI document:
2946 // For calls that may call functions that use varargs or stdargs
2947 // (prototype-less calls or calls to functions containing ellipsis (...) in
2948 // the declaration) %al is used as hidden argument to specify the number
2949 // of SSE registers used. The contents of %al do not need to match exactly
2950 // the number of registers, but must be an ubound on the number of SSE
2951 // registers used and is in the range 0 - 8 inclusive.
2953 // Count the number of XMM registers allocated.
2954 static const MCPhysReg XMMArgRegs[] = {
2955 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2956 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2958 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
2959 assert((Subtarget->hasSSE1() || !NumXMMRegs)
2960 && "SSE registers cannot be used when SSE is disabled");
2962 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
2963 DAG.getConstant(NumXMMRegs, MVT::i8)));
2966 if (Is64Bit && isVarArg && IsMustTail) {
2967 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2968 for (const auto &F : Forwards) {
2969 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2970 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
2974 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
2975 // don't need this because the eligibility check rejects calls that require
2976 // shuffling arguments passed in memory.
2977 if (!IsSibcall && isTailCall) {
2978 // Force all the incoming stack arguments to be loaded from the stack
2979 // before any new outgoing arguments are stored to the stack, because the
2980 // outgoing stack slots may alias the incoming argument stack slots, and
2981 // the alias isn't otherwise explicit. This is slightly more conservative
2982 // than necessary, because it means that each store effectively depends
2983 // on every argument instead of just those arguments it would clobber.
2984 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
2986 SmallVector<SDValue, 8> MemOpChains2;
2989 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2990 CCValAssign &VA = ArgLocs[i];
2993 assert(VA.isMemLoc());
2994 SDValue Arg = OutVals[i];
2995 ISD::ArgFlagsTy Flags = Outs[i].Flags;
2996 // Skip inalloca arguments. They don't require any work.
2997 if (Flags.isInAlloca())
2999 // Create frame index.
3000 int32_t Offset = VA.getLocMemOffset()+FPDiff;
3001 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3002 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
3003 FIN = DAG.getFrameIndex(FI, getPointerTy());
3005 if (Flags.isByVal()) {
3006 // Copy relative to framepointer.
3007 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
3008 if (!StackPtr.getNode())
3009 StackPtr = DAG.getCopyFromReg(Chain, dl,
3010 RegInfo->getStackRegister(),
3012 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
3014 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3018 // Store relative to framepointer.
3019 MemOpChains2.push_back(
3020 DAG.getStore(ArgChain, dl, Arg, FIN,
3021 MachinePointerInfo::getFixedStack(FI),
3026 if (!MemOpChains2.empty())
3027 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3029 // Store the return address to the appropriate stack slot.
3030 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3031 getPointerTy(), RegInfo->getSlotSize(),
3035 // Build a sequence of copy-to-reg nodes chained together with token chain
3036 // and flag operands which copy the outgoing args into registers.
3038 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3039 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3040 RegsToPass[i].second, InFlag);
3041 InFlag = Chain.getValue(1);
3044 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
3045 assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
3046 // In the 64-bit large code model, we have to make all calls
3047 // through a register, since the call instruction's 32-bit
3048 // pc-relative offset may not be large enough to hold the whole
3050 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3051 // If the callee is a GlobalAddress node (quite common, every direct call
3052 // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
3055 // We should use extra load for direct calls to dllimported functions in
3057 const GlobalValue *GV = G->getGlobal();
3058 if (!GV->hasDLLImportStorageClass()) {
3059 unsigned char OpFlags = 0;
3060 bool ExtraLoad = false;
3061 unsigned WrapperKind = ISD::DELETED_NODE;
3063 // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
3064 // external symbols most go through the PLT in PIC mode. If the symbol
3065 // has hidden or protected visibility, or if it is static or local, then
3066 // we don't need to use the PLT - we can directly call it.
3067 if (Subtarget->isTargetELF() &&
3068 DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
3069 GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
3070 OpFlags = X86II::MO_PLT;
3071 } else if (Subtarget->isPICStyleStubAny() &&
3072 (GV->isDeclaration() || GV->isWeakForLinker()) &&
3073 (!Subtarget->getTargetTriple().isMacOSX() ||
3074 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
3075 // PC-relative references to external symbols should go through $stub,
3076 // unless we're building with the leopard linker or later, which
3077 // automatically synthesizes these stubs.
3078 OpFlags = X86II::MO_DARWIN_STUB;
3079 } else if (Subtarget->isPICStyleRIPRel() &&
3080 isa<Function>(GV) &&
3081 cast<Function>(GV)->getAttributes().
3082 hasAttribute(AttributeSet::FunctionIndex,
3083 Attribute::NonLazyBind)) {
3084 // If the function is marked as non-lazy, generate an indirect call
3085 // which loads from the GOT directly. This avoids runtime overhead
3086 // at the cost of eager binding (and one extra byte of encoding).
3087 OpFlags = X86II::MO_GOTPCREL;
3088 WrapperKind = X86ISD::WrapperRIP;
3092 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
3093 G->getOffset(), OpFlags);
3095 // Add a wrapper if needed.
3096 if (WrapperKind != ISD::DELETED_NODE)
3097 Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
3098 // Add extra indirection if needed.
3100 Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
3101 MachinePointerInfo::getGOT(),
3102 false, false, false, 0);
3104 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3105 unsigned char OpFlags = 0;
3107 // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
3108 // external symbols should go through the PLT.
3109 if (Subtarget->isTargetELF() &&
3110 DAG.getTarget().getRelocationModel() == Reloc::PIC_) {
3111 OpFlags = X86II::MO_PLT;
3112 } else if (Subtarget->isPICStyleStubAny() &&
3113 (!Subtarget->getTargetTriple().isMacOSX() ||
3114 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
3115 // PC-relative references to external symbols should go through $stub,
3116 // unless we're building with the leopard linker or later, which
3117 // automatically synthesizes these stubs.
3118 OpFlags = X86II::MO_DARWIN_STUB;
3121 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
3123 } else if (Subtarget->isTarget64BitILP32() && Callee->getValueType(0) == MVT::i32) {
3124 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
3125 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
3128 // Returns a chain & a flag for retval copy to use.
3129 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3130 SmallVector<SDValue, 8> Ops;
3132 if (!IsSibcall && isTailCall) {
3133 Chain = DAG.getCALLSEQ_END(Chain,
3134 DAG.getIntPtrConstant(NumBytesToPop, true),
3135 DAG.getIntPtrConstant(0, true), InFlag, dl);
3136 InFlag = Chain.getValue(1);
3139 Ops.push_back(Chain);
3140 Ops.push_back(Callee);
3143 Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
3145 // Add argument registers to the end of the list so that they are known live
3147 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
3148 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3149 RegsToPass[i].second.getValueType()));
3151 // Add a register mask operand representing the call-preserved registers.
3152 const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
3153 const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
3154 assert(Mask && "Missing call preserved mask for calling convention");
3155 Ops.push_back(DAG.getRegisterMask(Mask));
3157 if (InFlag.getNode())
3158 Ops.push_back(InFlag);
3162 //// If this is the first return lowered for this function, add the regs
3163 //// to the liveout set for the function.
3164 // This isn't right, although it's probably harmless on x86; liveouts
3165 // should be computed from returns not tail calls. Consider a void
3166 // function making a tail call to a function returning int.
3167 return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
3170 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
3171 InFlag = Chain.getValue(1);
3173 // Create the CALLSEQ_END node.
3174 unsigned NumBytesForCalleeToPop;
3175 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3176 DAG.getTarget().Options.GuaranteedTailCallOpt))
3177 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
3178 else if (!Is64Bit && !IsTailCallConvention(CallConv) &&
3179 !Subtarget->getTargetTriple().isOSMSVCRT() &&
3180 SR == StackStructReturn)
3181 // If this is a call to a struct-return function, the callee
3182 // pops the hidden struct pointer, so we have to push it back.
3183 // This is common for Darwin/X86, Linux & Mingw32 targets.
3184 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
3185 NumBytesForCalleeToPop = 4;
3187 NumBytesForCalleeToPop = 0; // Callee pops nothing.
3189 // Returns a flag for retval copy to use.
3191 Chain = DAG.getCALLSEQ_END(Chain,
3192 DAG.getIntPtrConstant(NumBytesToPop, true),
3193 DAG.getIntPtrConstant(NumBytesForCalleeToPop,
3196 InFlag = Chain.getValue(1);
3199 // Handle result values, copying them out of physregs into vregs that we
3201 return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
3202 Ins, dl, DAG, InVals);
3205 //===----------------------------------------------------------------------===//
3206 // Fast Calling Convention (tail call) implementation
3207 //===----------------------------------------------------------------------===//
3209 // Like std call, callee cleans arguments, convention except that ECX is
3210 // reserved for storing the tail called function address. Only 2 registers are
3211 // free for argument passing (inreg). Tail call optimization is performed
3213 // * tailcallopt is enabled
3214 // * caller/callee are fastcc
3215 // On X86_64 architecture with GOT-style position independent code only local
3216 // (within module) calls are supported at the moment.
3217 // To keep the stack aligned according to platform abi the function
3218 // GetAlignedArgumentStackSize ensures that argument delta is always multiples
3219 // of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
3220 // If a tail called function callee has more arguments than the caller the
3221 // caller needs to make sure that there is room to move the RETADDR to. This is
3222 // achieved by reserving an area the size of the argument delta right after the
3223 // original RETADDR, but before the saved framepointer or the spilled registers
3224 // e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
3236 /// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
3237 /// for a 16 byte align requirement.
3239 X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
3240 SelectionDAG& DAG) const {
3241 MachineFunction &MF = DAG.getMachineFunction();
3242 const TargetMachine &TM = MF.getTarget();
3243 const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
3244 TM.getSubtargetImpl()->getRegisterInfo());
3245 const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering();
3246 unsigned StackAlignment = TFI.getStackAlignment();
3247 uint64_t AlignMask = StackAlignment - 1;
3248 int64_t Offset = StackSize;
3249 unsigned SlotSize = RegInfo->getSlotSize();
3250 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
3251 // Number smaller than 12 so just add the difference.
3252 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
3254 // Mask out lower bits, add stackalignment once plus the 12 bytes.
3255 Offset = ((~AlignMask) & Offset) + StackAlignment +
3256 (StackAlignment-SlotSize);
3261 /// MatchingStackOffset - Return true if the given stack call argument is
3262 /// already available in the same position (relatively) of the caller's
3263 /// incoming argument stack.
3265 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
3266 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
3267 const X86InstrInfo *TII) {
3268 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
3270 if (Arg.getOpcode() == ISD::CopyFromReg) {
3271 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
3272 if (!TargetRegisterInfo::isVirtualRegister(VR))
3274 MachineInstr *Def = MRI->getVRegDef(VR);
3277 if (!Flags.isByVal()) {
3278 if (!TII->isLoadFromStackSlot(Def, FI))
3281 unsigned Opcode = Def->getOpcode();
3282 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
3283 Def->getOperand(1).isFI()) {
3284 FI = Def->getOperand(1).getIndex();
3285 Bytes = Flags.getByValSize();
3289 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
3290 if (Flags.isByVal())
3291 // ByVal argument is passed in as a pointer but it's now being