R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
AMDGPUTargetLowering(TM),
TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
- setOperationAction(ISD::MUL, MVT::i64, Expand);
addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
setOperationAction(ISD::AND, MVT::v4i32, Expand);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
+ setOperationAction(ISD::MUL, MVT::v2i32, Expand);
+ setOperationAction(ISD::MUL, MVT::v4i32, Expand);
+ setOperationAction(ISD::OR, MVT::v4i32, Expand);
+ setOperationAction(ISD::OR, MVT::v2i32, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
+ setOperationAction(ISD::SHL, MVT::v4i32, Expand);
+ setOperationAction(ISD::SHL, MVT::v2i32, Expand);
+ setOperationAction(ISD::SRL, MVT::v4i32, Expand);
+ setOperationAction(ISD::SRL, MVT::v2i32, Expand);
+ setOperationAction(ISD::SRA, MVT::v4i32, Expand);
+ setOperationAction(ISD::SRA, MVT::v2i32, Expand);
+ setOperationAction(ISD::SUB, MVT::v4i32, Expand);
+ setOperationAction(ISD::SUB, MVT::v2i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
setOperationAction(ISD::UREM, MVT::v4i32, Expand);
setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
+ setOperationAction(ISD::XOR, MVT::v4i32, Expand);
+ setOperationAction(ISD::XOR, MVT::v2i32, Expand);
- setOperationAction(ISD::BR_CC, MVT::i32, Custom);
- setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
setOperationAction(ISD::FSUB, MVT::f32, Expand);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
- setOperationAction(ISD::FPOW, MVT::f32, Custom);
-
- setOperationAction(ISD::ROTL, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
- setOperationAction(ISD::SETCC, MVT::i32, Custom);
- setOperationAction(ISD::SETCC, MVT::f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v4i32, Expand);
+ setOperationAction(ISD::VSELECT, MVT::v2i32, Expand);
+
// Legalize loads and stores to the private address space.
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::SELECT_CC);
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
setSchedulingPreference(Sched::VLIW);
}
switch (MI->getOpcode()) {
default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
- case AMDGPU::SHADER_TYPE: break;
case AMDGPU::CLAMP_R600: {
MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
AMDGPU::MOV,
TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
MI->getOperand(1).getImm());
break;
-
+ case AMDGPU::CONST_COPY: {
+ MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
+ MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
+ TII->setImmOperand(NewMI, R600Operands::SRC0_SEL,
+ MI->getOperand(1).getImm());
+ break;
+ }
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
case AMDGPU::TXD: {
unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
-
+ MachineOperand &RID = MI->getOperand(4);
+ MachineOperand &SID = MI->getOperand(5);
+ unsigned TextureId = MI->getOperand(6).getImm();
+ unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
+ unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
+
+ switch (TextureId) {
+ case 5: // Rect
+ CTX = CTY = 0;
+ break;
+ case 6: // Shadow1D
+ SrcW = SrcZ;
+ break;
+ case 7: // Shadow2D
+ SrcW = SrcZ;
+ break;
+ case 8: // ShadowRect
+ CTX = CTY = 0;
+ SrcW = SrcZ;
+ break;
+ case 9: // 1DArray
+ SrcZ = SrcY;
+ CTZ = 0;
+ break;
+ case 10: // 2DArray
+ CTZ = 0;
+ break;
+ case 11: // Shadow1DArray
+ SrcZ = SrcY;
+ CTZ = 0;
+ break;
+ case 12: // Shadow2DArray
+ CTZ = 0;
+ break;
+ }
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
.addOperand(MI->getOperand(3))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5))
- .addOperand(MI->getOperand(6));
+ .addImm(SrcX)
+ .addImm(SrcY)
+ .addImm(SrcZ)
+ .addImm(SrcW)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(1)
+ .addImm(2)
+ .addImm(3)
+ .addOperand(RID)
+ .addOperand(SID)
+ .addImm(CTX)
+ .addImm(CTY)
+ .addImm(CTZ)
+ .addImm(CTW);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
.addOperand(MI->getOperand(2))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5))
- .addOperand(MI->getOperand(6));
+ .addImm(SrcX)
+ .addImm(SrcY)
+ .addImm(SrcZ)
+ .addImm(SrcW)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(1)
+ .addImm(2)
+ .addImm(3)
+ .addOperand(RID)
+ .addOperand(SID)
+ .addImm(CTX)
+ .addImm(CTY)
+ .addImm(CTZ)
+ .addImm(CTW);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5))
- .addOperand(MI->getOperand(6))
+ .addImm(SrcX)
+ .addImm(SrcY)
+ .addImm(SrcZ)
+ .addImm(SrcW)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(1)
+ .addImm(2)
+ .addImm(3)
+ .addOperand(RID)
+ .addOperand(SID)
+ .addImm(CTX)
+ .addImm(CTY)
+ .addImm(CTZ)
+ .addImm(CTW)
.addReg(T0, RegState::Implicit)
.addReg(T1, RegState::Implicit);
break;
case AMDGPU::TXD_SHADOW: {
unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+ MachineOperand &RID = MI->getOperand(4);
+ MachineOperand &SID = MI->getOperand(5);
+ unsigned TextureId = MI->getOperand(6).getImm();
+ unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
+ unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
+
+ switch (TextureId) {
+ case 5: // Rect
+ CTX = CTY = 0;
+ break;
+ case 6: // Shadow1D
+ SrcW = SrcZ;
+ break;
+ case 7: // Shadow2D
+ SrcW = SrcZ;
+ break;
+ case 8: // ShadowRect
+ CTX = CTY = 0;
+ SrcW = SrcZ;
+ break;
+ case 9: // 1DArray
+ SrcZ = SrcY;
+ CTZ = 0;
+ break;
+ case 10: // 2DArray
+ CTZ = 0;
+ break;
+ case 11: // Shadow1DArray
+ SrcZ = SrcY;
+ CTZ = 0;
+ break;
+ case 12: // Shadow2DArray
+ CTZ = 0;
+ break;
+ }
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
.addOperand(MI->getOperand(3))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5))
- .addOperand(MI->getOperand(6));
+ .addImm(SrcX)
+ .addImm(SrcY)
+ .addImm(SrcZ)
+ .addImm(SrcW)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(1)
+ .addImm(2)
+ .addImm(3)
+ .addOperand(RID)
+ .addOperand(SID)
+ .addImm(CTX)
+ .addImm(CTY)
+ .addImm(CTZ)
+ .addImm(CTW);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
.addOperand(MI->getOperand(2))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5))
- .addOperand(MI->getOperand(6));
+ .addImm(SrcX)
+ .addImm(SrcY)
+ .addImm(SrcZ)
+ .addImm(SrcW)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(1)
+ .addImm(2)
+ .addImm(3)
+ .addOperand(RID)
+ .addOperand(SID)
+ .addImm(CTX)
+ .addImm(CTY)
+ .addImm(CTZ)
+ .addImm(CTW);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
- .addOperand(MI->getOperand(4))
- .addOperand(MI->getOperand(5))
- .addOperand(MI->getOperand(6))
+ .addImm(SrcX)
+ .addImm(SrcY)
+ .addImm(SrcZ)
+ .addImm(SrcW)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(1)
+ .addImm(2)
+ .addImm(3)
+ .addOperand(RID)
+ .addOperand(SID)
+ .addImm(CTX)
+ .addImm(CTY)
+ .addImm(CTZ)
+ .addImm(CTW)
.addReg(T0, RegState::Implicit)
.addReg(T1, RegState::Implicit);
break;
case AMDGPU::BRANCH:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
- .addOperand(MI->getOperand(0))
- .addReg(0);
+ .addOperand(MI->getOperand(0));
break;
case AMDGPU::BRANCH_COND_f32: {
.addImm(OPCODE_IS_NOT_ZERO)
.addImm(0); // Flags
TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
.addOperand(MI->getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
break;
.addImm(OPCODE_IS_NOT_ZERO_INT)
.addImm(0); // Flags
TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
.addOperand(MI->getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
break;
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
-using namespace llvm::Intrinsic;
-using namespace llvm::AMDGPUIntrinsic;
-
-static SDValue
-InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap,
- unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type,
- SDValue Scalar, SDValue Chain) {
- if (!ExportMap[Slot]) {
- SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
- DL, MVT::v4f32,
- DAG.getUNDEF(MVT::v4f32),
- Scalar,
- DAG.getConstant(Channel, MVT::i32));
-
- unsigned Mask = 1 << Channel;
-
- const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32),
- DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32),
- DAG.getConstant(Mask, MVT::i32)};
-
- SDValue Res = DAG.getNode(
- AMDGPUISD::EXPORT,
- DL,
- MVT::Other,
- Ops, 6);
- ExportMap[Slot] = Res.getNode();
- return Res;
- }
-
- SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ;
- SDValue PreviousVector = ExportInstruction->getOperand(1);
- SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
- DL, MVT::v4f32,
- PreviousVector,
- Scalar,
- DAG.getConstant(Channel, MVT::i32));
-
- unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5))
- ->getZExtValue();
- Mask |= (1 << Channel);
-
- const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector,
- DAG.getConstant(Inst, MVT::i32),
- DAG.getConstant(Type, MVT::i32),
- DAG.getConstant(Slot, MVT::i32),
- DAG.getConstant(Mask, MVT::i32)};
-
- DAG.UpdateNodeOperands(ExportInstruction,
- Ops, 6);
-
- return Chain;
-
-}
-
SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
- case ISD::BR_CC: return LowerBR_CC(Op, DAG);
- case ISD::ROTL: return LowerROTL(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
- case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
- case ISD::FPOW: return LowerFPOW(Op, DAG);
case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
MFI->LiveOuts.push_back(Reg);
- return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
+ return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
}
- case AMDGPUIntrinsic::R600_store_pixel_color: {
- MachineFunction &MF = DAG.getMachineFunction();
- R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
- int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
-
- SDNode **OutputsMap = MFI->Outputs;
- return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
- RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2),
- Chain);
-
+ case AMDGPUIntrinsic::R600_store_swizzle: {
+ const SDValue Args[8] = {
+ Chain,
+ Op.getOperand(2), // Export Value
+ Op.getOperand(3), // ArrayBase
+ Op.getOperand(4), // Type
+ DAG.getConstant(0, MVT::i32), // SWZ_X
+ DAG.getConstant(1, MVT::i32), // SWZ_Y
+ DAG.getConstant(2, MVT::i32), // SWZ_Z
+ DAG.getConstant(3, MVT::i32) // SWZ_W
+ };
+ return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
+ Args, 8);
}
// default for switch(IntrinsicID)
unsigned IntrinsicID =
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT VT = Op.getValueType();
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
switch(IntrinsicID) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case AMDGPUIntrinsic::R600_load_input: {
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
- return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.addLiveIn(Reg);
+ return DAG.getCopyFromReg(DAG.getEntryNode(),
+ SDLoc(DAG.getEntryNode()), Reg, VT);
}
case AMDGPUIntrinsic::R600_interp_input: {
return SDValue(interp, slot % 2);
}
+ case AMDGPUIntrinsic::R600_tex:
+ case AMDGPUIntrinsic::R600_texc:
+ case AMDGPUIntrinsic::R600_txl:
+ case AMDGPUIntrinsic::R600_txlc:
+ case AMDGPUIntrinsic::R600_txb:
+ case AMDGPUIntrinsic::R600_txbc:
+ case AMDGPUIntrinsic::R600_txf:
+ case AMDGPUIntrinsic::R600_txq:
+ case AMDGPUIntrinsic::R600_ddx:
+ case AMDGPUIntrinsic::R600_ddy: {
+ unsigned TextureOp;
+ switch (IntrinsicID) {
+ case AMDGPUIntrinsic::R600_tex:
+ TextureOp = 0;
+ break;
+ case AMDGPUIntrinsic::R600_texc:
+ TextureOp = 1;
+ break;
+ case AMDGPUIntrinsic::R600_txl:
+ TextureOp = 2;
+ break;
+ case AMDGPUIntrinsic::R600_txlc:
+ TextureOp = 3;
+ break;
+ case AMDGPUIntrinsic::R600_txb:
+ TextureOp = 4;
+ break;
+ case AMDGPUIntrinsic::R600_txbc:
+ TextureOp = 5;
+ break;
+ case AMDGPUIntrinsic::R600_txf:
+ TextureOp = 6;
+ break;
+ case AMDGPUIntrinsic::R600_txq:
+ TextureOp = 7;
+ break;
+ case AMDGPUIntrinsic::R600_ddx:
+ TextureOp = 8;
+ break;
+ case AMDGPUIntrinsic::R600_ddy:
+ TextureOp = 9;
+ break;
+ default:
+ llvm_unreachable("Unknow Texture Operation");
+ }
+
+ SDValue TexArgs[19] = {
+ DAG.getConstant(TextureOp, MVT::i32),
+ Op.getOperand(1),
+ DAG.getConstant(0, MVT::i32),
+ DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(2, MVT::i32),
+ DAG.getConstant(3, MVT::i32),
+ Op.getOperand(2),
+ Op.getOperand(3),
+ Op.getOperand(4),
+ DAG.getConstant(0, MVT::i32),
+ DAG.getConstant(1, MVT::i32),
+ DAG.getConstant(2, MVT::i32),
+ DAG.getConstant(3, MVT::i32),
+ Op.getOperand(5),
+ Op.getOperand(6),
+ Op.getOperand(7),
+ Op.getOperand(8),
+ Op.getOperand(9),
+ Op.getOperand(10)
+ };
+ return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
+ }
+ case AMDGPUIntrinsic::AMDGPU_dp4: {
+ SDValue Args[8] = {
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
+ DAG.getConstant(0, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
+ DAG.getConstant(0, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
+ DAG.getConstant(1, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
+ DAG.getConstant(1, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
+ DAG.getConstant(2, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
+ DAG.getConstant(2, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
+ DAG.getConstant(3, MVT::i32)),
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
+ DAG.getConstant(3, MVT::i32))
+ };
+ return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
+ }
- case r600_read_ngroups_x:
+ case Intrinsic::r600_read_ngroups_x:
return LowerImplicitParameter(DAG, VT, DL, 0);
- case r600_read_ngroups_y:
+ case Intrinsic::r600_read_ngroups_y:
return LowerImplicitParameter(DAG, VT, DL, 1);
- case r600_read_ngroups_z:
+ case Intrinsic::r600_read_ngroups_z:
return LowerImplicitParameter(DAG, VT, DL, 2);
- case r600_read_global_size_x:
+ case Intrinsic::r600_read_global_size_x:
return LowerImplicitParameter(DAG, VT, DL, 3);
- case r600_read_global_size_y:
+ case Intrinsic::r600_read_global_size_y:
return LowerImplicitParameter(DAG, VT, DL, 4);
- case r600_read_global_size_z:
+ case Intrinsic::r600_read_global_size_z:
return LowerImplicitParameter(DAG, VT, DL, 5);
- case r600_read_local_size_x:
+ case Intrinsic::r600_read_local_size_x:
return LowerImplicitParameter(DAG, VT, DL, 6);
- case r600_read_local_size_y:
+ case Intrinsic::r600_read_local_size_y:
return LowerImplicitParameter(DAG, VT, DL, 7);
- case r600_read_local_size_z:
+ case Intrinsic::r600_read_local_size_z:
return LowerImplicitParameter(DAG, VT, DL, 8);
- case r600_read_tgid_x:
+ case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_X, VT);
- case r600_read_tgid_y:
+ case Intrinsic::r600_read_tgid_y:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_Y, VT);
- case r600_read_tgid_z:
+ case Intrinsic::r600_read_tgid_z:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_Z, VT);
- case r600_read_tidig_x:
+ case Intrinsic::r600_read_tidig_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_X, VT);
- case r600_read_tidig_y:
+ case Intrinsic::r600_read_tidig_y:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_Y, VT);
- case r600_read_tidig_z:
+ case Intrinsic::r600_read_tidig_z:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_Z, VT);
}
SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(
ISD::SETCC,
- Op.getDebugLoc(),
+ SDLoc(Op),
MVT::i1,
Op, DAG.getConstantFP(0.0f, MVT::f32),
DAG.getCondCode(ISD::SETNE)
);
}
-SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
- SDValue Chain = Op.getOperand(0);
- SDValue CC = Op.getOperand(1);
- SDValue LHS = Op.getOperand(2);
- SDValue RHS = Op.getOperand(3);
- SDValue JumpT = Op.getOperand(4);
- SDValue CmpValue;
- SDValue Result;
-
- if (LHS.getValueType() == MVT::i32) {
- CmpValue = DAG.getNode(
- ISD::SELECT_CC,
- Op.getDebugLoc(),
- MVT::i32,
- LHS, RHS,
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- CC);
- } else if (LHS.getValueType() == MVT::f32) {
- CmpValue = DAG.getNode(
- ISD::SELECT_CC,
- Op.getDebugLoc(),
- MVT::f32,
- LHS, RHS,
- DAG.getConstantFP(1.0f, MVT::f32),
- DAG.getConstantFP(0.0f, MVT::f32),
- CC);
- } else {
- assert(0 && "Not valid type for br_cc");
- }
- Result = DAG.getNode(
- AMDGPUISD::BRANCH_COND,
- CmpValue.getDebugLoc(),
- MVT::Other, Chain,
- JumpT, CmpValue);
- return Result;
-}
-
SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
- DebugLoc DL,
+ SDLoc DL,
unsigned DwordOffset) const {
unsigned ByteOffset = DwordOffset * 4;
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
}
-SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
-
- return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
- Op.getOperand(0),
- Op.getOperand(0),
- DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(32, MVT::i32),
- Op.getOperand(1)));
-}
-
bool R600TargetLowering::isZero(SDValue Op) const {
if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
return Cst->isNullValue();
}
SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
// Check if we can lower this to a native operation.
+ // Try to lower to a SET* instruction:
+ //
+ // SET* can match the following patterns:
+ //
+ // select_cc f32, f32, -1, 0, cc_any
+ // select_cc f32, f32, 1.0f, 0.0f, cc_any
+ // select_cc i32, i32, -1, 0, cc_any
+ //
+
+ // Move hardware True/False values to the correct operand.
+ if (isHWTrueValue(False) && isHWFalseValue(True)) {
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ std::swap(False, True);
+ CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
+ }
+
+ if (isHWTrueValue(True) && isHWFalseValue(False) &&
+ (CompareVT == VT || VT == MVT::i32)) {
+ // This can be matched by a SET* instruction.
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
+ }
+
// Try to lower to a CND* instruction:
- // CND* instructions requires RHS to be zero. Some SELECT_CC nodes that
- // can be lowered to CND* instructions can also be lowered to SET*
- // instructions. CND* instructions are cheaper, because they dont't
- // require additional instructions to convert their result to the correct
- // value type, so this check should be first.
+ //
+ // CND* can match the following patterns:
+ //
+ // select_cc f32, 0.0, f32, f32, cc_any
+ // select_cc f32, 0.0, i32, i32, cc_any
+ // select_cc i32, 0, f32, f32, cc_any
+ // select_cc i32, 0, i32, i32, cc_any
+ //
if (isZero(LHS) || isZero(RHS)) {
SDValue Cond = (isZero(LHS) ? RHS : LHS);
SDValue Zero = (isZero(LHS) ? LHS : RHS);
return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
}
- // Try to lower to a SET* instruction:
- //
- // CompareVT == MVT::f32 and VT == MVT::i32 is supported by the hardware,
- // but for the other case where CompareVT != VT, all operands of
- // SELECT_CC need to have the same value type, so we need to change True and
- // False to be the same type as LHS and RHS, and then convert the result of
- // the select_cc back to the correct type.
-
- // Move hardware True/False values to the correct operand.
- if (isHWTrueValue(False) && isHWFalseValue(True)) {
- ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
- std::swap(False, True);
- CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
- }
-
- if (isHWTrueValue(True) && isHWFalseValue(False)) {
- if (CompareVT != VT && VT == MVT::f32 && CompareVT == MVT::i32) {
- SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
- LHS, RHS,
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- CC);
- // Convert integer values of true (-1) and false (0) to fp values of
- // true (1.0f) and false (0.0f).
- SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
- DAG.getConstant(1, MVT::i32));
- return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
- } else {
- // This SELECT_CC is already legal.
- return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
- }
- }
// Possible Min/Max pattern
SDValue MinMax = LowerMinMax(Op, DAG);
SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::SELECT_CC,
- Op.getDebugLoc(),
+ SDLoc(Op),
Op.getValueType(),
Op.getOperand(0),
DAG.getConstant(0, MVT::i32),
DAG.getCondCode(ISD::SETNE));
}
-SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
- SDValue Cond;
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue CC = Op.getOperand(2);
- DebugLoc DL = Op.getDebugLoc();
- assert(Op.getValueType() == MVT::i32);
- if (LHS.getValueType() == MVT::i32) {
- Cond = DAG.getNode(
- ISD::SELECT_CC,
- Op.getDebugLoc(),
- MVT::i32,
- LHS, RHS,
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- CC);
- } else if (LHS.getValueType() == MVT::f32) {
- Cond = DAG.getNode(
- ISD::SELECT_CC,
- Op.getDebugLoc(),
- MVT::f32,
- LHS, RHS,
- DAG.getConstantFP(1.0f, MVT::f32),
- DAG.getConstantFP(0.0f, MVT::f32),
- CC);
- Cond = DAG.getNode(
- ISD::FP_TO_SINT,
- DL,
- MVT::i32,
- Cond);
- } else {
- assert(0 && "Not valid type for set_cc");
- }
- Cond = DAG.getNode(
- ISD::AND,
- DL,
- MVT::i32,
- DAG.getConstant(1, MVT::i32),
- Cond);
- return Cond;
-}
-
/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
/// convert these pointers to a register index. Each register holds
/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
default: llvm_unreachable("Invalid stack width");
}
- return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,
+ return DAG.getNode(ISD::SRL, SDLoc(Ptr), Ptr.getValueType(), Ptr,
DAG.getConstant(SRLPad, MVT::i32));
}
}
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
SDValue Chain = Op.getOperand(0);
SDValue Value = Op.getOperand(1);
Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
}
Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
- DAG.getTargetConstant(0, MVT::i32)); // Channel
+ DAG.getTargetConstant(0, MVT::i32)); // Channel
}
return Chain;
SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
{
EVT VT = Op.getValueType();
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
SDValue Chain = Op.getOperand(0);
SDValue Ptr = Op.getOperand(1);
if (ConstantBlock > -1) {
SDValue Result;
if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
- dyn_cast<Constant>(LoadNode->getSrcValue())) {
+ dyn_cast<Constant>(LoadNode->getSrcValue()) ||
+ dyn_cast<ConstantSDNode>(Ptr)) {
SDValue Slots[4];
for (unsigned i = 0; i < 4; i++) {
// We want Const position encoded with the following formula :
} else {
// non constant ptr cant be folded, keeps it as a v4f32 load
Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
- DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
+ DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
+ DAG.getConstant(LoadNode->getAddressSpace() -
+ AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
);
}
return DAG.getMergeValues(Ops, 2, DL);
}
-SDValue R600TargetLowering::LowerFPOW(SDValue Op,
- SelectionDAG &DAG) const {
- DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0));
- SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase);
- return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase);
-}
-
/// XXX Only kernel functions are supported, so we can assume for now that
/// every function is a kernel function, but in the future we should use
/// separate calling conventions for kernel and non-kernel functions.
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc DL, SelectionDAG &DAG,
+ SDLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
unsigned ParamOffsetBytes = 36;
Function::const_arg_iterator FuncArg =
AMDGPUAS::PARAM_I_ADDRESS);
SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
DAG.getConstant(ParamOffsetBytes, MVT::i32),
- MachinePointerInfo(new Argument(PtrTy)),
+ MachinePointerInfo(UndefValue::get(PtrTy)),
ArgVT, false, false, ArgBytes);
InVals.push_back(Arg);
ParamOffsetBytes += ArgBytes;
return Chain;
}
-EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
+EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector()) return MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
case ISD::FP_ROUND: {
SDValue Arg = N->getOperand(0);
if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
- return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
+ return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
Arg.getOperand(0));
}
break;
return SDValue();
}
- return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N->getValueType(0),
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
SelectCC.getOperand(0), // LHS
SelectCC.getOperand(1), // RHS
DAG.getConstant(-1, MVT::i32), // True
Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
unsigned Element = Const->getZExtValue();
- return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(),
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
Arg->getOperand(0).getOperand(Element));
}
}
case ISD::SELECT_CC: {
// fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
// selectcc x, y, a, b, inv(cc)
+ //
+ // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
+ // selectcc x, y, a, b, cc
SDValue LHS = N->getOperand(0);
if (LHS.getOpcode() != ISD::SELECT_CC) {
return SDValue();
SDValue RHS = N->getOperand(1);
SDValue True = N->getOperand(2);
SDValue False = N->getOperand(3);
+ ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
if (LHS.getOperand(2).getNode() != True.getNode() ||
LHS.getOperand(3).getNode() != False.getNode() ||
- RHS.getNode() != False.getNode() ||
- cast<CondCodeSDNode>(N->getOperand(4))->get() != ISD::SETEQ) {
+ RHS.getNode() != False.getNode()) {
return SDValue();
}
- ISD::CondCode CCOpcode = cast<CondCodeSDNode>(LHS->getOperand(4))->get();
- CCOpcode = ISD::getSetCCInverse(
- CCOpcode, LHS.getOperand(0).getValueType().isInteger());
- return DAG.getSelectCC(N->getDebugLoc(),
- LHS.getOperand(0),
- LHS.getOperand(1),
- LHS.getOperand(2),
- LHS.getOperand(3),
- CCOpcode);
-
+ switch (NCC) {
+ default: return SDValue();
+ case ISD::SETNE: return LHS;
+ case ISD::SETEQ: {
+ ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
+ LHSCC = ISD::getSetCCInverse(LHSCC,
+ LHS.getOperand(0).getValueType().isInteger());
+ return DAG.getSelectCC(SDLoc(N),
+ LHS.getOperand(0),
+ LHS.getOperand(1),
+ LHS.getOperand(2),
+ LHS.getOperand(3),
+ LHSCC);
+ }
+ }
+ }
+ case AMDGPUISD::EXPORT: {
+ SDValue Arg = N->getOperand(1);
+ if (Arg.getOpcode() != ISD::BUILD_VECTOR)
+ break;
+ SDValue NewBldVec[4] = {
+ DAG.getUNDEF(MVT::f32),
+ DAG.getUNDEF(MVT::f32),
+ DAG.getUNDEF(MVT::f32),
+ DAG.getUNDEF(MVT::f32)
+ };
+ SDValue NewArgs[8] = {
+ N->getOperand(0), // Chain
+ SDValue(),
+ N->getOperand(2), // ArrayBase
+ N->getOperand(3), // Type
+ N->getOperand(4), // SWZ_X
+ N->getOperand(5), // SWZ_Y
+ N->getOperand(6), // SWZ_Z
+ N->getOperand(7) // SWZ_W
+ };
+ for (unsigned i = 0; i < Arg.getNumOperands(); i++) {
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg.getOperand(i))) {
+ if (C->isZero()) {
+ NewArgs[4 + i] = DAG.getConstant(4, MVT::i32); // SEL_0
+ } else if (C->isExactlyValue(1.0)) {
+ NewArgs[4 + i] = DAG.getConstant(5, MVT::i32); // SEL_0
+ } else {
+ NewBldVec[i] = Arg.getOperand(i);
+ }
+ } else {
+ NewBldVec[i] = Arg.getOperand(i);
+ }
+ }
+ SDLoc DL(N);
+ NewArgs[1] = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, NewBldVec, 4);
+ return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
}
}
return SDValue();