1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
11 /// \brief TargetLowering functions borrowed from AMDIL.
13 //===----------------------------------------------------------------------===//
15 #include "AMDGPUISelLowering.h"
16 #include "AMDGPURegisterInfo.h"
17 #include "AMDGPUSubtarget.h"
18 #include "AMDILIntrinsicInfo.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/PseudoSourceValue.h"
22 #include "llvm/CodeGen/SelectionDAG.h"
23 #include "llvm/CodeGen/SelectionDAGNodes.h"
24 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
25 #include "llvm/IR/CallingConv.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/Instructions.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetOptions.h"
34 //===----------------------------------------------------------------------===//
35 // TargetLowering Implementation Help Functions End
36 //===----------------------------------------------------------------------===//
38 //===----------------------------------------------------------------------===//
39 // TargetLowering Class Implementation Begins
40 //===----------------------------------------------------------------------===//
41 void AMDGPUTargetLowering::InitAMDILLowering() {
42 static const MVT::SimpleValueType types[] = {
53 static const MVT::SimpleValueType FloatTypes[] = {
58 static const MVT::SimpleValueType VectorTypes[] = {
65 const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
67 for (MVT VT : types) {
68 setOperationAction(ISD::SUBE, VT, Expand);
69 setOperationAction(ISD::SUBC, VT, Expand);
70 setOperationAction(ISD::ADDE, VT, Expand);
71 setOperationAction(ISD::ADDC, VT, Expand);
72 setOperationAction(ISD::BRCOND, VT, Custom);
73 setOperationAction(ISD::BR_JT, VT, Expand);
74 setOperationAction(ISD::BRIND, VT, Expand);
75 // TODO: Implement custom UREM/SREM routines
76 setOperationAction(ISD::SREM, VT, Expand);
77 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
78 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
80 setOperationAction(ISD::SDIV, VT, Custom);
83 for (MVT VT : FloatTypes) {
84 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
87 for (MVT VT : VectorTypes) {
88 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
89 setOperationAction(ISD::SELECT_CC, VT, Expand);
92 setOperationAction(ISD::MULHU, MVT::i64, Expand);
93 setOperationAction(ISD::MULHS, MVT::i64, Expand);
94 if (STM.hasHWFP64()) {
95 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
96 setOperationAction(ISD::FABS, MVT::f64, Expand);
99 setOperationAction(ISD::SUBC, MVT::Other, Expand);
100 setOperationAction(ISD::ADDE, MVT::Other, Expand);
101 setOperationAction(ISD::ADDC, MVT::Other, Expand);
102 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
103 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
104 setOperationAction(ISD::BRIND, MVT::Other, Expand);
106 setOperationAction(ISD::Constant, MVT::i32, Legal);
107 setOperationAction(ISD::Constant, MVT::i64, Legal);
108 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
110 setPow2DivIsCheap(false);
111 setSelectIsExpensive(true); // FIXME: This makes no sense at all
115 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
116 const CallInst &I, unsigned Intrinsic) const {
120 // The backend supports 32 and 64 bit floating point immediates
122 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
123 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
124 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
132 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
133 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
134 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
142 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
143 // be zero. Op is expected to be a target specific node. Used by DAG
146 //===----------------------------------------------------------------------===//
147 // Other Lowering Hooks
148 //===----------------------------------------------------------------------===//
151 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
152 EVT OVT = Op.getValueType();
154 if (OVT.getScalarType() == MVT::i64) {
155 DST = LowerSDIV64(Op, DAG);
156 } else if (OVT.getScalarType() == MVT::i32) {
157 DST = LowerSDIV32(Op, DAG);
158 } else if (OVT.getScalarType() == MVT::i16
159 || OVT.getScalarType() == MVT::i8) {
160 DST = LowerSDIV24(Op, DAG);
162 DST = SDValue(Op.getNode(), 0);
168 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
169 EVT OVT = Op.getValueType();
171 if (OVT.getScalarType() == MVT::i64) {
172 DST = LowerSREM64(Op, DAG);
173 } else if (OVT.getScalarType() == MVT::i32) {
174 DST = LowerSREM32(Op, DAG);
175 } else if (OVT.getScalarType() == MVT::i16) {
176 DST = LowerSREM16(Op, DAG);
177 } else if (OVT.getScalarType() == MVT::i8) {
178 DST = LowerSREM8(Op, DAG);
180 DST = SDValue(Op.getNode(), 0);
186 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
187 int iSize = (size * numEle);
188 int vEle = (iSize >> ((size == 64) ? 6 : 5));
194 return EVT(MVT::i64);
196 return EVT(MVT::getVectorVT(MVT::i64, vEle));
200 return EVT(MVT::i32);
202 return EVT(MVT::getVectorVT(MVT::i32, vEle));
208 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
209 SDValue Chain = Op.getOperand(0);
210 SDValue Cond = Op.getOperand(1);
211 SDValue Jump = Op.getOperand(2);
213 Result = DAG.getNode(
214 AMDGPUISD::BRANCH_COND,
222 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
224 EVT OVT = Op.getValueType();
225 SDValue LHS = Op.getOperand(0);
226 SDValue RHS = Op.getOperand(1);
229 if (!OVT.isVector()) {
232 } else if (OVT.getVectorNumElements() == 2) {
235 } else if (OVT.getVectorNumElements() == 4) {
239 unsigned bitsize = OVT.getScalarType().getSizeInBits();
240 // char|short jq = ia ^ ib;
241 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
243 // jq = jq >> (bitsize - 2)
244 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
247 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
250 jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
252 // int ia = (int)LHS;
253 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
256 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
258 // float fa = (float)ia;
259 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
261 // float fb = (float)ib;
262 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
264 // float fq = native_divide(fa, fb);
265 SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
268 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
270 // float fqneg = -fq;
271 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
273 // float fr = mad(fqneg, fb, fa);
274 SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
275 DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
278 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
281 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
284 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
286 // int cv = fr >= fb;
288 if (INTTY == MVT::i32) {
289 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
291 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
293 // jq = (cv ? jq : 0);
294 jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
295 DAG.getConstant(0, OVT));
297 iq = DAG.getSExtOrTrunc(iq, DL, OVT);
298 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
303 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
305 EVT OVT = Op.getValueType();
306 SDValue LHS = Op.getOperand(0);
307 SDValue RHS = Op.getOperand(1);
308 // The LowerSDIV32 function generates equivalent to the following IL.
318 // ixor r10, r10, r11
329 SDValue r10 = DAG.getSelectCC(DL,
330 r0, DAG.getConstant(0, OVT),
331 DAG.getConstant(-1, MVT::i32),
332 DAG.getConstant(0, MVT::i32),
336 SDValue r11 = DAG.getSelectCC(DL,
337 r1, DAG.getConstant(0, OVT),
338 DAG.getConstant(-1, MVT::i32),
339 DAG.getConstant(0, MVT::i32),
343 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
346 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
349 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
352 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
355 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
357 // ixor r10, r10, r11
358 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
361 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
364 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
369 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
370 return SDValue(Op.getNode(), 0);
374 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
376 EVT OVT = Op.getValueType();
377 MVT INTTY = MVT::i32;
378 if (OVT == MVT::v2i8) {
380 } else if (OVT == MVT::v4i8) {
383 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
384 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
385 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
386 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
391 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
393 EVT OVT = Op.getValueType();
394 MVT INTTY = MVT::i32;
395 if (OVT == MVT::v2i16) {
397 } else if (OVT == MVT::v4i16) {
400 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
401 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
402 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
403 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
408 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
410 EVT OVT = Op.getValueType();
411 SDValue LHS = Op.getOperand(0);
412 SDValue RHS = Op.getOperand(1);
413 // The LowerSREM32 function generates equivalent to the following IL.
435 SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
438 SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
441 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
444 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
447 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
450 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
453 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
456 r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
459 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
462 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
465 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
470 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
471 return SDValue(Op.getNode(), 0);