1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
11 /// \brief TargetLowering functions borrowed from AMDIL.
13 //===----------------------------------------------------------------------===//
15 #include "AMDGPUISelLowering.h"
16 #include "AMDGPURegisterInfo.h"
17 #include "AMDGPUSubtarget.h"
18 #include "AMDILIntrinsicInfo.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/PseudoSourceValue.h"
22 #include "llvm/CodeGen/SelectionDAG.h"
23 #include "llvm/CodeGen/SelectionDAGNodes.h"
24 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
25 #include "llvm/IR/CallingConv.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/Instructions.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetOptions.h"
34 //===----------------------------------------------------------------------===//
35 // TargetLowering Implementation Help Functions End
36 //===----------------------------------------------------------------------===//
38 //===----------------------------------------------------------------------===//
39 // TargetLowering Class Implementation Begins
40 //===----------------------------------------------------------------------===//
41 void AMDGPUTargetLowering::InitAMDILLowering() {
42 static const int types[] = {
61 static const int IntTypes[] = {
68 static const int FloatTypes[] = {
73 static const int VectorTypes[] = {
85 const size_t NumTypes = array_lengthof(types);
86 const size_t NumFloatTypes = array_lengthof(FloatTypes);
87 const size_t NumIntTypes = array_lengthof(IntTypes);
88 const size_t NumVectorTypes = array_lengthof(VectorTypes);
90 const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
91 // These are the current register classes that are
94 for (unsigned int x = 0; x < NumTypes; ++x) {
95 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
97 setOperationAction(ISD::SUBE, VT, Expand);
98 setOperationAction(ISD::SUBC, VT, Expand);
99 setOperationAction(ISD::ADDE, VT, Expand);
100 setOperationAction(ISD::ADDC, VT, Expand);
101 setOperationAction(ISD::BRCOND, VT, Custom);
102 setOperationAction(ISD::BR_JT, VT, Expand);
103 setOperationAction(ISD::BRIND, VT, Expand);
104 // TODO: Implement custom UREM/SREM routines
105 setOperationAction(ISD::SREM, VT, Expand);
106 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
107 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
108 if (VT != MVT::i64 && VT != MVT::v2i64) {
109 setOperationAction(ISD::SDIV, VT, Custom);
112 for (unsigned int x = 0; x < NumFloatTypes; ++x) {
113 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
115 // IL does not have these operations for floating point types
116 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
117 setOperationAction(ISD::SETOLT, VT, Expand);
118 setOperationAction(ISD::SETOGE, VT, Expand);
119 setOperationAction(ISD::SETOGT, VT, Expand);
120 setOperationAction(ISD::SETOLE, VT, Expand);
121 setOperationAction(ISD::SETULT, VT, Expand);
122 setOperationAction(ISD::SETUGE, VT, Expand);
123 setOperationAction(ISD::SETUGT, VT, Expand);
124 setOperationAction(ISD::SETULE, VT, Expand);
127 for (unsigned int x = 0; x < NumIntTypes; ++x) {
128 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
130 // GPU also does not have divrem function for signed or unsigned
131 setOperationAction(ISD::SDIVREM, VT, Expand);
133 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
134 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
135 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
137 setOperationAction(ISD::BSWAP, VT, Expand);
139 // GPU doesn't have any counting operators
140 setOperationAction(ISD::CTPOP, VT, Expand);
141 setOperationAction(ISD::CTTZ, VT, Expand);
142 setOperationAction(ISD::CTLZ, VT, Expand);
145 for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
146 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
148 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
149 setOperationAction(ISD::SDIVREM, VT, Expand);
150 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
151 // setOperationAction(ISD::VSETCC, VT, Expand);
152 setOperationAction(ISD::SELECT_CC, VT, Expand);
155 setOperationAction(ISD::MULHU, MVT::i64, Expand);
156 setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
157 setOperationAction(ISD::MULHS, MVT::i64, Expand);
158 setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
159 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
160 setOperationAction(ISD::SREM, MVT::v2i64, Expand);
161 setOperationAction(ISD::Constant , MVT::i64 , Legal);
162 setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
163 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
164 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
165 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
166 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
167 if (STM.hasHWFP64()) {
168 // we support loading/storing v2f64 but not operations on the type
169 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
170 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
171 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
172 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
173 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
174 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
175 // We want to expand vector conversions into their scalar
177 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
178 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
179 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
180 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
181 setOperationAction(ISD::FABS, MVT::f64, Expand);
182 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
184 // TODO: Fix the UDIV24 algorithm so it works for these
185 // types correctly. This needs vector comparisons
186 // for this to work correctly.
187 setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
188 setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
189 setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
190 setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
191 setOperationAction(ISD::SUBC, MVT::Other, Expand);
192 setOperationAction(ISD::ADDE, MVT::Other, Expand);
193 setOperationAction(ISD::ADDC, MVT::Other, Expand);
194 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
195 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
196 setOperationAction(ISD::BRIND, MVT::Other, Expand);
199 // Use the default implementation.
200 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
201 setOperationAction(ISD::Constant , MVT::i32 , Legal);
203 setSchedulingPreference(Sched::RegPressure);
204 setPow2DivIsCheap(false);
205 setSelectIsExpensive(true);
206 setJumpIsExpensive(true);
208 MaxStoresPerMemcpy = 4096;
209 MaxStoresPerMemmove = 4096;
210 MaxStoresPerMemset = 4096;
215 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
216 const CallInst &I, unsigned Intrinsic) const {
220 // The backend supports 32 and 64 bit floating point immediates
222 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
223 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
224 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
232 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
233 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
234 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
242 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
243 // be zero. Op is expected to be a target specific node. Used by DAG
247 AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
251 const SelectionDAG &DAG,
252 unsigned Depth) const {
255 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
256 switch (Op.getOpcode()) {
259 DAG.ComputeMaskedBits(
265 DAG.ComputeMaskedBits(
270 assert((KnownZero & KnownOne) == 0
271 && "Bits known to be one AND zero?");
272 assert((KnownZero2 & KnownOne2) == 0
273 && "Bits known to be one AND zero?");
274 // Only known if known in both the LHS and RHS
275 KnownOne &= KnownOne2;
276 KnownZero &= KnownZero2;
281 //===----------------------------------------------------------------------===//
282 // Other Lowering Hooks
283 //===----------------------------------------------------------------------===//
286 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
287 EVT OVT = Op.getValueType();
289 if (OVT.getScalarType() == MVT::i64) {
290 DST = LowerSDIV64(Op, DAG);
291 } else if (OVT.getScalarType() == MVT::i32) {
292 DST = LowerSDIV32(Op, DAG);
293 } else if (OVT.getScalarType() == MVT::i16
294 || OVT.getScalarType() == MVT::i8) {
295 DST = LowerSDIV24(Op, DAG);
297 DST = SDValue(Op.getNode(), 0);
303 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
304 EVT OVT = Op.getValueType();
306 if (OVT.getScalarType() == MVT::i64) {
307 DST = LowerSREM64(Op, DAG);
308 } else if (OVT.getScalarType() == MVT::i32) {
309 DST = LowerSREM32(Op, DAG);
310 } else if (OVT.getScalarType() == MVT::i16) {
311 DST = LowerSREM16(Op, DAG);
312 } else if (OVT.getScalarType() == MVT::i8) {
313 DST = LowerSREM8(Op, DAG);
315 DST = SDValue(Op.getNode(), 0);
321 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
322 int iSize = (size * numEle);
323 int vEle = (iSize >> ((size == 64) ? 6 : 5));
329 return EVT(MVT::i64);
331 return EVT(MVT::getVectorVT(MVT::i64, vEle));
335 return EVT(MVT::i32);
337 return EVT(MVT::getVectorVT(MVT::i32, vEle));
343 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
344 SDValue Chain = Op.getOperand(0);
345 SDValue Cond = Op.getOperand(1);
346 SDValue Jump = Op.getOperand(2);
348 Result = DAG.getNode(
349 AMDGPUISD::BRANCH_COND,
357 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
359 EVT OVT = Op.getValueType();
360 SDValue LHS = Op.getOperand(0);
361 SDValue RHS = Op.getOperand(1);
364 if (!OVT.isVector()) {
367 } else if (OVT.getVectorNumElements() == 2) {
370 } else if (OVT.getVectorNumElements() == 4) {
374 unsigned bitsize = OVT.getScalarType().getSizeInBits();
375 // char|short jq = ia ^ ib;
376 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
378 // jq = jq >> (bitsize - 2)
379 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
382 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
385 jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
387 // int ia = (int)LHS;
388 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
391 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
393 // float fa = (float)ia;
394 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
396 // float fb = (float)ib;
397 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
399 // float fq = native_divide(fa, fb);
400 SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
403 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
405 // float fqneg = -fq;
406 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
408 // float fr = mad(fqneg, fb, fa);
409 SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
410 DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
413 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
416 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
419 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
421 // int cv = fr >= fb;
423 if (INTTY == MVT::i32) {
424 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
426 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
428 // jq = (cv ? jq : 0);
429 jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
430 DAG.getConstant(0, OVT));
432 iq = DAG.getSExtOrTrunc(iq, DL, OVT);
433 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
438 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
440 EVT OVT = Op.getValueType();
441 SDValue LHS = Op.getOperand(0);
442 SDValue RHS = Op.getOperand(1);
443 // The LowerSDIV32 function generates equivalent to the following IL.
453 // ixor r10, r10, r11
464 SDValue r10 = DAG.getSelectCC(DL,
465 r0, DAG.getConstant(0, OVT),
466 DAG.getConstant(-1, MVT::i32),
467 DAG.getConstant(0, MVT::i32),
471 SDValue r11 = DAG.getSelectCC(DL,
472 r1, DAG.getConstant(0, OVT),
473 DAG.getConstant(-1, MVT::i32),
474 DAG.getConstant(0, MVT::i32),
478 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
481 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
484 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
487 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
490 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
492 // ixor r10, r10, r11
493 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
496 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
499 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
504 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
505 return SDValue(Op.getNode(), 0);
509 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
511 EVT OVT = Op.getValueType();
512 MVT INTTY = MVT::i32;
513 if (OVT == MVT::v2i8) {
515 } else if (OVT == MVT::v4i8) {
518 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
519 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
520 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
521 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
526 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
528 EVT OVT = Op.getValueType();
529 MVT INTTY = MVT::i32;
530 if (OVT == MVT::v2i16) {
532 } else if (OVT == MVT::v4i16) {
535 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
536 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
537 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
538 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
543 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
545 EVT OVT = Op.getValueType();
546 SDValue LHS = Op.getOperand(0);
547 SDValue RHS = Op.getOperand(1);
548 // The LowerSREM32 function generates equivalent to the following IL.
570 SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
573 SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
576 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
579 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
582 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
585 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
588 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
591 r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
594 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
597 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
600 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
605 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
606 return SDValue(Op.getNode(), 0);