1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
11 /// \brief TargetLowering functions borrowed from AMDIL.
13 //===----------------------------------------------------------------------===//
15 #include "AMDGPUISelLowering.h"
16 #include "AMDGPURegisterInfo.h"
17 #include "AMDGPUSubtarget.h"
18 #include "AMDILIntrinsicInfo.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/PseudoSourceValue.h"
22 #include "llvm/CodeGen/SelectionDAG.h"
23 #include "llvm/CodeGen/SelectionDAGNodes.h"
24 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
25 #include "llvm/IR/CallingConv.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/Instructions.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetOptions.h"
34 //===----------------------------------------------------------------------===//
35 // TargetLowering Implementation Help Functions End
36 //===----------------------------------------------------------------------===//
38 //===----------------------------------------------------------------------===//
39 // TargetLowering Class Implementation Begins
40 //===----------------------------------------------------------------------===//
41 void AMDGPUTargetLowering::InitAMDILLowering() {
42 static const MVT::SimpleValueType types[] = {
61 static const MVT::SimpleValueType IntTypes[] = {
68 static const MVT::SimpleValueType FloatTypes[] = {
73 static const MVT::SimpleValueType VectorTypes[] = {
86 const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
87 // These are the current register classes that are
90 for (MVT VT : types) {
91 setOperationAction(ISD::SUBE, VT, Expand);
92 setOperationAction(ISD::SUBC, VT, Expand);
93 setOperationAction(ISD::ADDE, VT, Expand);
94 setOperationAction(ISD::ADDC, VT, Expand);
95 setOperationAction(ISD::BRCOND, VT, Custom);
96 setOperationAction(ISD::BR_JT, VT, Expand);
97 setOperationAction(ISD::BRIND, VT, Expand);
98 // TODO: Implement custom UREM/SREM routines
99 setOperationAction(ISD::SREM, VT, Expand);
100 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
101 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
102 if (VT != MVT::i64 && VT != MVT::v2i64) {
103 setOperationAction(ISD::SDIV, VT, Custom);
106 for (MVT VT : FloatTypes) {
107 // IL does not have these operations for floating point types
108 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
109 setOperationAction(ISD::SETOLT, VT, Expand);
110 setOperationAction(ISD::SETOGE, VT, Expand);
111 setOperationAction(ISD::SETOGT, VT, Expand);
112 setOperationAction(ISD::SETOLE, VT, Expand);
113 setOperationAction(ISD::SETULT, VT, Expand);
114 setOperationAction(ISD::SETUGE, VT, Expand);
115 setOperationAction(ISD::SETUGT, VT, Expand);
116 setOperationAction(ISD::SETULE, VT, Expand);
119 for (MVT VT : IntTypes) {
120 // GPU also does not have divrem function for signed or unsigned
121 setOperationAction(ISD::SDIVREM, VT, Expand);
123 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
124 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
125 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
127 setOperationAction(ISD::BSWAP, VT, Expand);
130 for (MVT VT : VectorTypes) {
131 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
132 setOperationAction(ISD::SDIVREM, VT, Expand);
133 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
134 // setOperationAction(ISD::VSETCC, VT, Expand);
135 setOperationAction(ISD::SELECT_CC, VT, Expand);
138 setOperationAction(ISD::MULHU, MVT::i64, Expand);
139 setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
140 setOperationAction(ISD::MULHS, MVT::i64, Expand);
141 setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
142 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
143 setOperationAction(ISD::SREM, MVT::v2i64, Expand);
144 setOperationAction(ISD::Constant , MVT::i64 , Legal);
145 setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
146 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
147 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
148 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
149 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
150 if (STM.hasHWFP64()) {
151 // we support loading/storing v2f64 but not operations on the type
152 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
153 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
154 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
155 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
156 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
157 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
158 // We want to expand vector conversions into their scalar
160 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
161 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
162 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
163 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
164 setOperationAction(ISD::FABS, MVT::f64, Expand);
165 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
167 // TODO: Fix the UDIV24 algorithm so it works for these
168 // types correctly. This needs vector comparisons
169 // for this to work correctly.
170 setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
171 setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
172 setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
173 setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
174 setOperationAction(ISD::SUBC, MVT::Other, Expand);
175 setOperationAction(ISD::ADDE, MVT::Other, Expand);
176 setOperationAction(ISD::ADDC, MVT::Other, Expand);
177 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
178 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
179 setOperationAction(ISD::BRIND, MVT::Other, Expand);
182 // Use the default implementation.
183 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
184 setOperationAction(ISD::Constant , MVT::i32 , Legal);
186 setSchedulingPreference(Sched::RegPressure);
187 setPow2DivIsCheap(false);
188 setSelectIsExpensive(true);
189 setJumpIsExpensive(true);
191 MaxStoresPerMemcpy = 4096;
192 MaxStoresPerMemmove = 4096;
193 MaxStoresPerMemset = 4096;
198 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
199 const CallInst &I, unsigned Intrinsic) const {
203 // The backend supports 32 and 64 bit floating point immediates
205 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
206 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
207 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
215 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
216 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
217 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
225 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
226 // be zero. Op is expected to be a target specific node. Used by DAG
229 //===----------------------------------------------------------------------===//
230 // Other Lowering Hooks
231 //===----------------------------------------------------------------------===//
234 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
235 EVT OVT = Op.getValueType();
237 if (OVT.getScalarType() == MVT::i64) {
238 DST = LowerSDIV64(Op, DAG);
239 } else if (OVT.getScalarType() == MVT::i32) {
240 DST = LowerSDIV32(Op, DAG);
241 } else if (OVT.getScalarType() == MVT::i16
242 || OVT.getScalarType() == MVT::i8) {
243 DST = LowerSDIV24(Op, DAG);
245 DST = SDValue(Op.getNode(), 0);
251 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
252 EVT OVT = Op.getValueType();
254 if (OVT.getScalarType() == MVT::i64) {
255 DST = LowerSREM64(Op, DAG);
256 } else if (OVT.getScalarType() == MVT::i32) {
257 DST = LowerSREM32(Op, DAG);
258 } else if (OVT.getScalarType() == MVT::i16) {
259 DST = LowerSREM16(Op, DAG);
260 } else if (OVT.getScalarType() == MVT::i8) {
261 DST = LowerSREM8(Op, DAG);
263 DST = SDValue(Op.getNode(), 0);
269 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
270 int iSize = (size * numEle);
271 int vEle = (iSize >> ((size == 64) ? 6 : 5));
277 return EVT(MVT::i64);
279 return EVT(MVT::getVectorVT(MVT::i64, vEle));
283 return EVT(MVT::i32);
285 return EVT(MVT::getVectorVT(MVT::i32, vEle));
291 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
292 SDValue Chain = Op.getOperand(0);
293 SDValue Cond = Op.getOperand(1);
294 SDValue Jump = Op.getOperand(2);
296 Result = DAG.getNode(
297 AMDGPUISD::BRANCH_COND,
305 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
307 EVT OVT = Op.getValueType();
308 SDValue LHS = Op.getOperand(0);
309 SDValue RHS = Op.getOperand(1);
312 if (!OVT.isVector()) {
315 } else if (OVT.getVectorNumElements() == 2) {
318 } else if (OVT.getVectorNumElements() == 4) {
322 unsigned bitsize = OVT.getScalarType().getSizeInBits();
323 // char|short jq = ia ^ ib;
324 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
326 // jq = jq >> (bitsize - 2)
327 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
330 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
333 jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
335 // int ia = (int)LHS;
336 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
339 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
341 // float fa = (float)ia;
342 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
344 // float fb = (float)ib;
345 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
347 // float fq = native_divide(fa, fb);
348 SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
351 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
353 // float fqneg = -fq;
354 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
356 // float fr = mad(fqneg, fb, fa);
357 SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
358 DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
361 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
364 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
367 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
369 // int cv = fr >= fb;
371 if (INTTY == MVT::i32) {
372 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
374 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
376 // jq = (cv ? jq : 0);
377 jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
378 DAG.getConstant(0, OVT));
380 iq = DAG.getSExtOrTrunc(iq, DL, OVT);
381 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
386 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
388 EVT OVT = Op.getValueType();
389 SDValue LHS = Op.getOperand(0);
390 SDValue RHS = Op.getOperand(1);
391 // The LowerSDIV32 function generates equivalent to the following IL.
401 // ixor r10, r10, r11
412 SDValue r10 = DAG.getSelectCC(DL,
413 r0, DAG.getConstant(0, OVT),
414 DAG.getConstant(-1, MVT::i32),
415 DAG.getConstant(0, MVT::i32),
419 SDValue r11 = DAG.getSelectCC(DL,
420 r1, DAG.getConstant(0, OVT),
421 DAG.getConstant(-1, MVT::i32),
422 DAG.getConstant(0, MVT::i32),
426 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
429 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
432 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
435 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
438 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
440 // ixor r10, r10, r11
441 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
444 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
447 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
452 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
453 return SDValue(Op.getNode(), 0);
457 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
459 EVT OVT = Op.getValueType();
460 MVT INTTY = MVT::i32;
461 if (OVT == MVT::v2i8) {
463 } else if (OVT == MVT::v4i8) {
466 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
467 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
468 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
469 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
474 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
476 EVT OVT = Op.getValueType();
477 MVT INTTY = MVT::i32;
478 if (OVT == MVT::v2i16) {
480 } else if (OVT == MVT::v4i16) {
483 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
484 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
485 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
486 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
491 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
493 EVT OVT = Op.getValueType();
494 SDValue LHS = Op.getOperand(0);
495 SDValue RHS = Op.getOperand(1);
496 // The LowerSREM32 function generates equivalent to the following IL.
518 SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
521 SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
524 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
527 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
530 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
533 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
536 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
539 r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
542 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
545 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
548 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
553 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
554 return SDValue(Op.getNode(), 0);