lib/Target/R600/R600ISelLowering.cpp

   1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief Custom DAG lowering for R600
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "R600ISelLowering.h"
  16 #include "R600Defines.h"
  17 #include "R600InstrInfo.h"
  18 #include "R600MachineFunctionInfo.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineInstrBuilder.h"
  21 #include "llvm/CodeGen/MachineRegisterInfo.h"
  22 #include "llvm/CodeGen/SelectionDAG.h"
  23 #include "llvm/IR/Argument.h"
  24 #include "llvm/IR/Function.h"
  25
  26 using namespace llvm;
  27
  28 R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
  29     AMDGPUTargetLowering(TM),
  30     TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
  31   addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
  32   addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
  33   addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
  34   addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
  35   computeRegisterProperties();
  36
  37   setOperationAction(ISD::FADD, MVT::v4f32, Expand);
  38   setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
  39   setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
  40   setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
  41
  42   setOperationAction(ISD::ADD,  MVT::v4i32, Expand);
  43   setOperationAction(ISD::AND,  MVT::v4i32, Expand);
  44   setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
  45   setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
  46   setOperationAction(ISD::OR, MVT::v4i32, Expand);
  47   setOperationAction(ISD::OR, MVT::v2i32, Expand);
  48   setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
  49   setOperationAction(ISD::SHL, MVT::v4i32, Expand);
  50   setOperationAction(ISD::SHL, MVT::v2i32, Expand);
  51   setOperationAction(ISD::SRL, MVT::v4i32, Expand);
  52   setOperationAction(ISD::SRL, MVT::v2i32, Expand);
  53   setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
  54   setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
  55   setOperationAction(ISD::UREM, MVT::v4i32, Expand);
  56   setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
  57   setOperationAction(ISD::XOR, MVT::v4i32, Expand);
  58   setOperationAction(ISD::XOR, MVT::v2i32, Expand);
  59
  60   setOperationAction(ISD::BR_CC, MVT::i32, Expand);
  61   setOperationAction(ISD::BR_CC, MVT::f32, Expand);
  62
  63   setOperationAction(ISD::FSUB, MVT::f32, Expand);
  64
  65   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
  66   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
  67   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
  68
  69   setOperationAction(ISD::ROTL, MVT::i32, Custom);
  70
  71   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
  72   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
  73
  74   setOperationAction(ISD::SETCC, MVT::i32, Expand);
  75   setOperationAction(ISD::SETCC, MVT::f32, Expand);
  76   setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
  77
  78   setOperationAction(ISD::SELECT, MVT::i32, Custom);
  79   setOperationAction(ISD::SELECT, MVT::f32, Custom);
  80
  81   // Legalize loads and stores to the private address space.
  82   setOperationAction(ISD::LOAD, MVT::i32, Custom);
  83   setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
  84   setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
  85   setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
  86   setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
  87   setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
  88   setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
  89   setOperationAction(ISD::STORE, MVT::i8, Custom);
  90   setOperationAction(ISD::STORE, MVT::i32, Custom);
  91   setOperationAction(ISD::STORE, MVT::v2i32, Custom);
  92   setOperationAction(ISD::STORE, MVT::v4i32, Custom);
  93
  94   setOperationAction(ISD::LOAD, MVT::i32, Custom);
  95   setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
  96   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
  97
  98   setTargetDAGCombine(ISD::FP_ROUND);
  99   setTargetDAGCombine(ISD::FP_TO_SINT);
 100   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
 101   setTargetDAGCombine(ISD::SELECT_CC);
 102
 103   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 104   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 105   setSchedulingPreference(Sched::VLIW);
 106 }
 107
 108 MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
 109     MachineInstr * MI, MachineBasicBlock * BB) const {
 110   MachineFunction * MF = BB->getParent();
 111   MachineRegisterInfo &MRI = MF->getRegInfo();
 112   MachineBasicBlock::iterator I = *MI;
 113
 114   switch (MI->getOpcode()) {
 115   default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
 116   case AMDGPU::CLAMP_R600: {
 117     MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
 118                                                    AMDGPU::MOV,
 119                                                    MI->getOperand(0).getReg(),
 120                                                    MI->getOperand(1).getReg());
 121     TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
 122     break;
 123   }
 124
 125   case AMDGPU::FABS_R600: {
 126     MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
 127                                                     AMDGPU::MOV,
 128                                                     MI->getOperand(0).getReg(),
 129                                                     MI->getOperand(1).getReg());
 130     TII->addFlag(NewMI, 0, MO_FLAG_ABS);
 131     break;
 132   }
 133
 134   case AMDGPU::FNEG_R600: {
 135     MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
 136                                                     AMDGPU::MOV,
 137                                                     MI->getOperand(0).getReg(),
 138                                                     MI->getOperand(1).getReg());
 139     TII->addFlag(NewMI, 0, MO_FLAG_NEG);
 140     break;
 141   }
 142
 143   case AMDGPU::MASK_WRITE: {
 144     unsigned maskedRegister = MI->getOperand(0).getReg();
 145     assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
 146     MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
 147     TII->addFlag(defInstr, 0, MO_FLAG_MASK);
 148     break;
 149   }
 150
 151   case AMDGPU::MOV_IMM_F32:
 152     TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
 153                      MI->getOperand(1).getFPImm()->getValueAPF()
 154                          .bitcastToAPInt().getZExtValue());
 155     break;
 156   case AMDGPU::MOV_IMM_I32:
 157     TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
 158                      MI->getOperand(1).getImm());
 159     break;
 160   case AMDGPU::CONST_COPY: {
 161     MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
 162         MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
 163     TII->setImmOperand(NewMI, R600Operands::SRC0_SEL,
 164         MI->getOperand(1).getImm());
 165     break;
 166   }
 167
 168   case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
 169   case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
 170     unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
 171
 172     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
 173             .addOperand(MI->getOperand(0))
 174             .addOperand(MI->getOperand(1))
 175             .addImm(EOP); // Set End of program bit
 176     break;
 177   }
 178
 179   case AMDGPU::TXD: {
 180     unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
 181     unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
 182
 183     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
 184             .addOperand(MI->getOperand(3))
 185             .addOperand(MI->getOperand(4))
 186             .addOperand(MI->getOperand(5))
 187             .addOperand(MI->getOperand(6));
 188     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
 189             .addOperand(MI->getOperand(2))
 190             .addOperand(MI->getOperand(4))
 191             .addOperand(MI->getOperand(5))
 192             .addOperand(MI->getOperand(6));
 193     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
 194             .addOperand(MI->getOperand(0))
 195             .addOperand(MI->getOperand(1))
 196             .addOperand(MI->getOperand(4))
 197             .addOperand(MI->getOperand(5))
 198             .addOperand(MI->getOperand(6))
 199             .addReg(T0, RegState::Implicit)
 200             .addReg(T1, RegState::Implicit);
 201     break;
 202   }
 203
 204   case AMDGPU::TXD_SHADOW: {
 205     unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
 206     unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
 207
 208     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
 209             .addOperand(MI->getOperand(3))
 210             .addOperand(MI->getOperand(4))
 211             .addOperand(MI->getOperand(5))
 212             .addOperand(MI->getOperand(6));
 213     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
 214             .addOperand(MI->getOperand(2))
 215             .addOperand(MI->getOperand(4))
 216             .addOperand(MI->getOperand(5))
 217             .addOperand(MI->getOperand(6));
 218     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
 219             .addOperand(MI->getOperand(0))
 220             .addOperand(MI->getOperand(1))
 221             .addOperand(MI->getOperand(4))
 222             .addOperand(MI->getOperand(5))
 223             .addOperand(MI->getOperand(6))
 224             .addReg(T0, RegState::Implicit)
 225             .addReg(T1, RegState::Implicit);
 226     break;
 227   }
 228
 229   case AMDGPU::BRANCH:
 230       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
 231               .addOperand(MI->getOperand(0));
 232       break;
 233
 234   case AMDGPU::BRANCH_COND_f32: {
 235     MachineInstr *NewMI =
 236       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
 237               AMDGPU::PREDICATE_BIT)
 238               .addOperand(MI->getOperand(1))
 239               .addImm(OPCODE_IS_NOT_ZERO)
 240               .addImm(0); // Flags
 241     TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
 242     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
 243             .addOperand(MI->getOperand(0))
 244             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
 245     break;
 246   }
 247
 248   case AMDGPU::BRANCH_COND_i32: {
 249     MachineInstr *NewMI =
 250       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
 251             AMDGPU::PREDICATE_BIT)
 252             .addOperand(MI->getOperand(1))
 253             .addImm(OPCODE_IS_NOT_ZERO_INT)
 254             .addImm(0); // Flags
 255     TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
 256     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
 257            .addOperand(MI->getOperand(0))
 258             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
 259     break;
 260   }
 261
 262   case AMDGPU::EG_ExportSwz:
 263   case AMDGPU::R600_ExportSwz: {
 264     // Instruction is left unmodified if its not the last one of its type
 265     bool isLastInstructionOfItsType = true;
 266     unsigned InstExportType = MI->getOperand(1).getImm();
 267     for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
 268          EndBlock = BB->end(); NextExportInst != EndBlock;
 269          NextExportInst = llvm::next(NextExportInst)) {
 270       if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
 271           NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
 272         unsigned CurrentInstExportType = NextExportInst->getOperand(1)
 273             .getImm();
 274         if (CurrentInstExportType == InstExportType) {
 275           isLastInstructionOfItsType = false;
 276           break;
 277         }
 278       }
 279     }
 280     bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
 281     if (!EOP && !isLastInstructionOfItsType)
 282       return BB;
 283     unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
 284     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
 285             .addOperand(MI->getOperand(0))
 286             .addOperand(MI->getOperand(1))
 287             .addOperand(MI->getOperand(2))
 288             .addOperand(MI->getOperand(3))
 289             .addOperand(MI->getOperand(4))
 290             .addOperand(MI->getOperand(5))
 291             .addOperand(MI->getOperand(6))
 292             .addImm(CfInst)
 293             .addImm(EOP);
 294     break;
 295   }
 296   case AMDGPU::RETURN: {
 297     // RETURN instructions must have the live-out registers as implicit uses,
 298     // otherwise they appear dead.
 299     R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
 300     MachineInstrBuilder MIB(*MF, MI);
 301     for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
 302       MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
 303     return BB;
 304   }
 305   }
 306
 307   MI->eraseFromParent();
 308   return BB;
 309 }
 310
 311 //===----------------------------------------------------------------------===//
 312 // Custom DAG Lowering Operations
 313 //===----------------------------------------------------------------------===//
 314
 315 using namespace llvm::Intrinsic;
 316 using namespace llvm::AMDGPUIntrinsic;
 317
 318 SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
 319   switch (Op.getOpcode()) {
 320   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
 321   case ISD::ROTL: return LowerROTL(Op, DAG);
 322   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
 323   case ISD::SELECT: return LowerSELECT(Op, DAG);
 324   case ISD::STORE: return LowerSTORE(Op, DAG);
 325   case ISD::LOAD: return LowerLOAD(Op, DAG);
 326   case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
 327   case ISD::INTRINSIC_VOID: {
 328     SDValue Chain = Op.getOperand(0);
 329     unsigned IntrinsicID =
 330                          cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 331     switch (IntrinsicID) {
 332     case AMDGPUIntrinsic::AMDGPU_store_output: {
 333       MachineFunction &MF = DAG.getMachineFunction();
 334       R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
 335       int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
 336       unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
 337       MFI->LiveOuts.push_back(Reg);
 338       return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
 339     }
 340     case AMDGPUIntrinsic::R600_store_swizzle: {
 341       const SDValue Args[8] = {
 342         Chain,
 343         Op.getOperand(2), // Export Value
 344         Op.getOperand(3), // ArrayBase
 345         Op.getOperand(4), // Type
 346         DAG.getConstant(0, MVT::i32), // SWZ_X
 347         DAG.getConstant(1, MVT::i32), // SWZ_Y
 348         DAG.getConstant(2, MVT::i32), // SWZ_Z
 349         DAG.getConstant(3, MVT::i32) // SWZ_W
 350       };
 351       return DAG.getNode(AMDGPUISD::EXPORT, Op.getDebugLoc(), Op.getValueType(),
 352           Args, 8);
 353     }
 354
 355     // default for switch(IntrinsicID)
 356     default: break;
 357     }
 358     // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
 359     break;
 360   }
 361   case ISD::INTRINSIC_WO_CHAIN: {
 362     unsigned IntrinsicID =
 363                          cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 364     EVT VT = Op.getValueType();
 365     DebugLoc DL = Op.getDebugLoc();
 366     switch(IntrinsicID) {
 367     default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
 368     case AMDGPUIntrinsic::R600_load_input: {
 369       int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 370       unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
 371       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
 372     }
 373
 374     case AMDGPUIntrinsic::R600_interp_input: {
 375       int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 376       int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
 377       MachineSDNode *interp;
 378       if (ijb < 0) {
 379         interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
 380             MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
 381         return DAG.getTargetExtractSubreg(
 382             TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
 383             DL, MVT::f32, SDValue(interp, 0));
 384       }
 385
 386       if (slot % 4 < 2)
 387         interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
 388             MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
 389             CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 390                 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
 391             CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 392                 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
 393       else
 394         interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
 395             MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
 396             CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 397                 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
 398             CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 399                 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
 400
 401       return SDValue(interp, slot % 2);
 402     }
 403
 404     case r600_read_ngroups_x:
 405       return LowerImplicitParameter(DAG, VT, DL, 0);
 406     case r600_read_ngroups_y:
 407       return LowerImplicitParameter(DAG, VT, DL, 1);
 408     case r600_read_ngroups_z:
 409       return LowerImplicitParameter(DAG, VT, DL, 2);
 410     case r600_read_global_size_x:
 411       return LowerImplicitParameter(DAG, VT, DL, 3);
 412     case r600_read_global_size_y:
 413       return LowerImplicitParameter(DAG, VT, DL, 4);
 414     case r600_read_global_size_z:
 415       return LowerImplicitParameter(DAG, VT, DL, 5);
 416     case r600_read_local_size_x:
 417       return LowerImplicitParameter(DAG, VT, DL, 6);
 418     case r600_read_local_size_y:
 419       return LowerImplicitParameter(DAG, VT, DL, 7);
 420     case r600_read_local_size_z:
 421       return LowerImplicitParameter(DAG, VT, DL, 8);
 422
 423     case r600_read_tgid_x:
 424       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 425                                   AMDGPU::T1_X, VT);
 426     case r600_read_tgid_y:
 427       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 428                                   AMDGPU::T1_Y, VT);
 429     case r600_read_tgid_z:
 430       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 431                                   AMDGPU::T1_Z, VT);
 432     case r600_read_tidig_x:
 433       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 434                                   AMDGPU::T0_X, VT);
 435     case r600_read_tidig_y:
 436       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 437                                   AMDGPU::T0_Y, VT);
 438     case r600_read_tidig_z:
 439       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 440                                   AMDGPU::T0_Z, VT);
 441     }
 442     // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
 443     break;
 444   }
 445   } // end switch(Op.getOpcode())
 446   return SDValue();
 447 }
 448
 449 void R600TargetLowering::ReplaceNodeResults(SDNode *N,
 450                                             SmallVectorImpl<SDValue> &Results,
 451                                             SelectionDAG &DAG) const {
 452   switch (N->getOpcode()) {
 453   default: return;
 454   case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
 455     return;
 456   case ISD::LOAD: {
 457     SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
 458     Results.push_back(SDValue(Node, 0));
 459     Results.push_back(SDValue(Node, 1));
 460     // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
 461     // function
 462     DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
 463     return;
 464   }
 465   case ISD::STORE:
 466     SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
 467     Results.push_back(SDValue(Node, 0));
 468     return;
 469   }
 470 }
 471
 472 SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
 473   return DAG.getNode(
 474       ISD::SETCC,
 475       Op.getDebugLoc(),
 476       MVT::i1,
 477       Op, DAG.getConstantFP(0.0f, MVT::f32),
 478       DAG.getCondCode(ISD::SETNE)
 479       );
 480 }
 481
 482 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
 483                                                    DebugLoc DL,
 484                                                    unsigned DwordOffset) const {
 485   unsigned ByteOffset = DwordOffset * 4;
 486   PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
 487                                       AMDGPUAS::PARAM_I_ADDRESS);
 488
 489   // We shouldn't be using an offset wider than 16-bits for implicit parameters.
 490   assert(isInt<16>(ByteOffset));
 491
 492   return DAG.getLoad(VT, DL, DAG.getEntryNode(),
 493                      DAG.getConstant(ByteOffset, MVT::i32), // PTR
 494                      MachinePointerInfo(ConstantPointerNull::get(PtrType)),
 495                      false, false, false, 0);
 496 }
 497
 498 SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
 499
 500   MachineFunction &MF = DAG.getMachineFunction();
 501   const AMDGPUFrameLowering *TFL =
 502    static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
 503
 504   FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
 505   assert(FIN);
 506
 507   unsigned FrameIndex = FIN->getIndex();
 508   unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
 509   return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
 510 }
 511
 512 SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
 513   DebugLoc DL = Op.getDebugLoc();
 514   EVT VT = Op.getValueType();
 515
 516   return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
 517                      Op.getOperand(0),
 518                      Op.getOperand(0),
 519                      DAG.getNode(ISD::SUB, DL, VT,
 520                                  DAG.getConstant(32, MVT::i32),
 521                                  Op.getOperand(1)));
 522 }
 523
 524 bool R600TargetLowering::isZero(SDValue Op) const {
 525   if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
 526     return Cst->isNullValue();
 527   } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
 528     return CstFP->isZero();
 529   } else {
 530     return false;
 531   }
 532 }
 533
 534 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
 535   DebugLoc DL = Op.getDebugLoc();
 536   EVT VT = Op.getValueType();
 537
 538   SDValue LHS = Op.getOperand(0);
 539   SDValue RHS = Op.getOperand(1);
 540   SDValue True = Op.getOperand(2);
 541   SDValue False = Op.getOperand(3);
 542   SDValue CC = Op.getOperand(4);
 543   SDValue Temp;
 544
 545   // LHS and RHS are guaranteed to be the same value type
 546   EVT CompareVT = LHS.getValueType();
 547
 548   // Check if we can lower this to a native operation.
 549
 550   // Try to lower to a SET* instruction:
 551   //
 552   // SET* can match the following patterns:
 553   //
 554   // select_cc f32, f32, -1,  0, cc_any
 555   // select_cc f32, f32, 1.0f, 0.0f, cc_any
 556   // select_cc i32, i32, -1,  0, cc_any
 557   //
 558
 559   // Move hardware True/False values to the correct operand.
 560   if (isHWTrueValue(False) && isHWFalseValue(True)) {
 561     ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
 562     std::swap(False, True);
 563     CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
 564   }
 565
 566   if (isHWTrueValue(True) && isHWFalseValue(False) &&
 567       (CompareVT == VT || VT == MVT::i32)) {
 568     // This can be matched by a SET* instruction.
 569     return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
 570   }
 571
 572   // Try to lower to a CND* instruction:
 573   //
 574   // CND* can match the following patterns:
 575   //
 576   // select_cc f32, 0.0, f32, f32, cc_any
 577   // select_cc f32, 0.0, i32, i32, cc_any
 578   // select_cc i32, 0,   f32, f32, cc_any
 579   // select_cc i32, 0,   i32, i32, cc_any
 580   //
 581   if (isZero(LHS) || isZero(RHS)) {
 582     SDValue Cond = (isZero(LHS) ? RHS : LHS);
 583     SDValue Zero = (isZero(LHS) ? LHS : RHS);
 584     ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
 585     if (CompareVT != VT) {
 586       // Bitcast True / False to the correct types.  This will end up being
 587       // a nop, but it allows us to define only a single pattern in the
 588       // .TD files for each CND* instruction rather than having to have
 589       // one pattern for integer True/False and one for fp True/False
 590       True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
 591       False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
 592     }
 593     if (isZero(LHS)) {
 594       CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
 595     }
 596
 597     switch (CCOpcode) {
 598     case ISD::SETONE:
 599     case ISD::SETUNE:
 600     case ISD::SETNE:
 601     case ISD::SETULE:
 602     case ISD::SETULT:
 603     case ISD::SETOLE:
 604     case ISD::SETOLT:
 605     case ISD::SETLE:
 606     case ISD::SETLT:
 607       CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
 608       Temp = True;
 609       True = False;
 610       False = Temp;
 611       break;
 612     default:
 613       break;
 614     }
 615     SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
 616         Cond, Zero,
 617         True, False,
 618         DAG.getCondCode(CCOpcode));
 619     return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
 620   }
 621
 622
 623   // Possible Min/Max pattern
 624   SDValue MinMax = LowerMinMax(Op, DAG);
 625   if (MinMax.getNode()) {
 626     return MinMax;
 627   }
 628
 629   // If we make it this for it means we have no native instructions to handle
 630   // this SELECT_CC, so we must lower it.
 631   SDValue HWTrue, HWFalse;
 632
 633   if (CompareVT == MVT::f32) {
 634     HWTrue = DAG.getConstantFP(1.0f, CompareVT);
 635     HWFalse = DAG.getConstantFP(0.0f, CompareVT);
 636   } else if (CompareVT == MVT::i32) {
 637     HWTrue = DAG.getConstant(-1, CompareVT);
 638     HWFalse = DAG.getConstant(0, CompareVT);
 639   }
 640   else {
 641     assert(!"Unhandled value type in LowerSELECT_CC");
 642   }
 643
 644   // Lower this unsupported SELECT_CC into a combination of two supported
 645   // SELECT_CC operations.
 646   SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
 647
 648   return DAG.getNode(ISD::SELECT_CC, DL, VT,
 649       Cond, HWFalse,
 650       True, False,
 651       DAG.getCondCode(ISD::SETNE));
 652 }
 653
 654 SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
 655   return DAG.getNode(ISD::SELECT_CC,
 656       Op.getDebugLoc(),
 657       Op.getValueType(),
 658       Op.getOperand(0),
 659       DAG.getConstant(0, MVT::i32),
 660       Op.getOperand(1),
 661       Op.getOperand(2),
 662       DAG.getCondCode(ISD::SETNE));
 663 }
 664
 665 /// LLVM generates byte-addresed pointers.  For indirect addressing, we need to
 666 /// convert these pointers to a register index.  Each register holds
 667 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
 668 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
 669 /// for indirect addressing.
 670 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
 671                                                unsigned StackWidth,
 672                                                SelectionDAG &DAG) const {
 673   unsigned SRLPad;
 674   switch(StackWidth) {
 675   case 1:
 676     SRLPad = 2;
 677     break;
 678   case 2:
 679     SRLPad = 3;
 680     break;
 681   case 4:
 682     SRLPad = 4;
 683     break;
 684   default: llvm_unreachable("Invalid stack width");
 685   }
 686
 687   return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,
 688                      DAG.getConstant(SRLPad, MVT::i32));
 689 }
 690
 691 void R600TargetLowering::getStackAddress(unsigned StackWidth,
 692                                          unsigned ElemIdx,
 693                                          unsigned &Channel,
 694                                          unsigned &PtrIncr) const {
 695   switch (StackWidth) {
 696   default:
 697   case 1:
 698     Channel = 0;
 699     if (ElemIdx > 0) {
 700       PtrIncr = 1;
 701     } else {
 702       PtrIncr = 0;
 703     }
 704     break;
 705   case 2:
 706     Channel = ElemIdx % 2;
 707     if (ElemIdx == 2) {
 708       PtrIncr = 1;
 709     } else {
 710       PtrIncr = 0;
 711     }
 712     break;
 713   case 4:
 714     Channel = ElemIdx;
 715     PtrIncr = 0;
 716     break;
 717   }
 718 }
 719
 720 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
 721   DebugLoc DL = Op.getDebugLoc();
 722   StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
 723   SDValue Chain = Op.getOperand(0);
 724   SDValue Value = Op.getOperand(1);
 725   SDValue Ptr = Op.getOperand(2);
 726
 727   if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
 728       Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
 729     // Convert pointer from byte address to dword address.
 730     Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
 731                       DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
 732                                   Ptr, DAG.getConstant(2, MVT::i32)));
 733
 734     if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
 735       assert(!"Truncated and indexed stores not supported yet");
 736     } else {
 737       Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
 738     }
 739     return Chain;
 740   }
 741
 742   EVT ValueVT = Value.getValueType();
 743
 744   if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
 745     return SDValue();
 746   }
 747
 748   // Lowering for indirect addressing
 749
 750   const MachineFunction &MF = DAG.getMachineFunction();
 751   const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
 752                                          getTargetMachine().getFrameLowering());
 753   unsigned StackWidth = TFL->getStackWidth(MF);
 754
 755   Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
 756
 757   if (ValueVT.isVector()) {
 758     unsigned NumElemVT = ValueVT.getVectorNumElements();
 759     EVT ElemVT = ValueVT.getVectorElementType();
 760     SDValue Stores[4];
 761
 762     assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
 763                                       "vector width in load");
 764
 765     for (unsigned i = 0; i < NumElemVT; ++i) {
 766       unsigned Channel, PtrIncr;
 767       getStackAddress(StackWidth, i, Channel, PtrIncr);
 768       Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
 769                         DAG.getConstant(PtrIncr, MVT::i32));
 770       SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
 771                                  Value, DAG.getConstant(i, MVT::i32));
 772
 773       Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
 774                               Chain, Elem, Ptr,
 775                               DAG.getTargetConstant(Channel, MVT::i32));
 776     }
 777      Chain =  DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
 778    } else {
 779     if (ValueVT == MVT::i8) {
 780       Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
 781     }
 782     Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
 783     DAG.getTargetConstant(0, MVT::i32)); // Channel
 784   }
 785
 786   return Chain;
 787 }
 788
 789 // return (512 + (kc_bank << 12)
 790 static int
 791 ConstantAddressBlock(unsigned AddressSpace) {
 792   switch (AddressSpace) {
 793   case AMDGPUAS::CONSTANT_BUFFER_0:
 794     return 512;
 795   case AMDGPUAS::CONSTANT_BUFFER_1:
 796     return 512 + 4096;
 797   case AMDGPUAS::CONSTANT_BUFFER_2:
 798     return 512 + 4096 * 2;
 799   case AMDGPUAS::CONSTANT_BUFFER_3:
 800     return 512 + 4096 * 3;
 801   case AMDGPUAS::CONSTANT_BUFFER_4:
 802     return 512 + 4096 * 4;
 803   case AMDGPUAS::CONSTANT_BUFFER_5:
 804     return 512 + 4096 * 5;
 805   case AMDGPUAS::CONSTANT_BUFFER_6:
 806     return 512 + 4096 * 6;
 807   case AMDGPUAS::CONSTANT_BUFFER_7:
 808     return 512 + 4096 * 7;
 809   case AMDGPUAS::CONSTANT_BUFFER_8:
 810     return 512 + 4096 * 8;
 811   case AMDGPUAS::CONSTANT_BUFFER_9:
 812     return 512 + 4096 * 9;
 813   case AMDGPUAS::CONSTANT_BUFFER_10:
 814     return 512 + 4096 * 10;
 815   case AMDGPUAS::CONSTANT_BUFFER_11:
 816     return 512 + 4096 * 11;
 817   case AMDGPUAS::CONSTANT_BUFFER_12:
 818     return 512 + 4096 * 12;
 819   case AMDGPUAS::CONSTANT_BUFFER_13:
 820     return 512 + 4096 * 13;
 821   case AMDGPUAS::CONSTANT_BUFFER_14:
 822     return 512 + 4096 * 14;
 823   case AMDGPUAS::CONSTANT_BUFFER_15:
 824     return 512 + 4096 * 15;
 825   default:
 826     return -1;
 827   }
 828 }
 829
 830 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
 831 {
 832   EVT VT = Op.getValueType();
 833   DebugLoc DL = Op.getDebugLoc();
 834   LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
 835   SDValue Chain = Op.getOperand(0);
 836   SDValue Ptr = Op.getOperand(1);
 837   SDValue LoweredLoad;
 838
 839   int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
 840   if (ConstantBlock > -1) {
 841     SDValue Result;
 842     if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
 843         dyn_cast<Constant>(LoadNode->getSrcValue()) ||
 844         dyn_cast<ConstantSDNode>(Ptr)) {
 845       SDValue Slots[4];
 846       for (unsigned i = 0; i < 4; i++) {
 847         // We want Const position encoded with the following formula :
 848         // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
 849         // const_index is Ptr computed by llvm using an alignment of 16.
 850         // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
 851         // then div by 4 at the ISel step
 852         SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
 853             DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
 854         Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
 855       }
 856       Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
 857     } else {
 858       // non constant ptr cant be folded, keeps it as a v4f32 load
 859       Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
 860           DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
 861           DAG.getConstant(LoadNode->getAddressSpace() -
 862                           AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
 863           );
 864     }
 865
 866     if (!VT.isVector()) {
 867       Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
 868           DAG.getConstant(0, MVT::i32));
 869     }
 870
 871     SDValue MergedValues[2] = {
 872         Result,
 873         Chain
 874     };
 875     return DAG.getMergeValues(MergedValues, 2, DL);
 876   }
 877
 878   if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
 879     return SDValue();
 880   }
 881
 882   // Lowering for indirect addressing
 883   const MachineFunction &MF = DAG.getMachineFunction();
 884   const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(
 885                                          getTargetMachine().getFrameLowering());
 886   unsigned StackWidth = TFL->getStackWidth(MF);
 887
 888   Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
 889
 890   if (VT.isVector()) {
 891     unsigned NumElemVT = VT.getVectorNumElements();
 892     EVT ElemVT = VT.getVectorElementType();
 893     SDValue Loads[4];
 894
 895     assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
 896                                       "vector width in load");
 897
 898     for (unsigned i = 0; i < NumElemVT; ++i) {
 899       unsigned Channel, PtrIncr;
 900       getStackAddress(StackWidth, i, Channel, PtrIncr);
 901       Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
 902                         DAG.getConstant(PtrIncr, MVT::i32));
 903       Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
 904                              Chain, Ptr,
 905                              DAG.getTargetConstant(Channel, MVT::i32),
 906                              Op.getOperand(2));
 907     }
 908     for (unsigned i = NumElemVT; i < 4; ++i) {
 909       Loads[i] = DAG.getUNDEF(ElemVT);
 910     }
 911     EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
 912     LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
 913   } else {
 914     LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
 915                               Chain, Ptr,
 916                               DAG.getTargetConstant(0, MVT::i32), // Channel
 917                               Op.getOperand(2));
 918   }
 919
 920   SDValue Ops[2];
 921   Ops[0] = LoweredLoad;
 922   Ops[1] = Chain;
 923
 924   return DAG.getMergeValues(Ops, 2, DL);
 925 }
 926
 927 /// XXX Only kernel functions are supported, so we can assume for now that
 928 /// every function is a kernel function, but in the future we should use
 929 /// separate calling conventions for kernel and non-kernel functions.
 930 SDValue R600TargetLowering::LowerFormalArguments(
 931                                       SDValue Chain,
 932                                       CallingConv::ID CallConv,
 933                                       bool isVarArg,
 934                                       const SmallVectorImpl<ISD::InputArg> &Ins,
 935                                       DebugLoc DL, SelectionDAG &DAG,
 936                                       SmallVectorImpl<SDValue> &InVals) const {
 937   unsigned ParamOffsetBytes = 36;
 938   Function::const_arg_iterator FuncArg =
 939                             DAG.getMachineFunction().getFunction()->arg_begin();
 940   for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
 941     EVT VT = Ins[i].VT;
 942     Type *ArgType = FuncArg->getType();
 943     unsigned ArgSizeInBits = ArgType->isPointerTy() ?
 944                              32 : ArgType->getPrimitiveSizeInBits();
 945     unsigned ArgBytes = ArgSizeInBits >> 3;
 946     EVT ArgVT;
 947     if (ArgSizeInBits < VT.getSizeInBits()) {
 948       assert(!ArgType->isFloatTy() &&
 949              "Extending floating point arguments not supported yet");
 950       ArgVT = MVT::getIntegerVT(ArgSizeInBits);
 951     } else {
 952       ArgVT = VT;
 953     }
 954     PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
 955                                                     AMDGPUAS::PARAM_I_ADDRESS);
 956     SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
 957                                 DAG.getConstant(ParamOffsetBytes, MVT::i32),
 958                                        MachinePointerInfo(UndefValue::get(PtrTy)),
 959                                        ArgVT, false, false, ArgBytes);
 960     InVals.push_back(Arg);
 961     ParamOffsetBytes += ArgBytes;
 962   }
 963   return Chain;
 964 }
 965
 966 EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
 967    if (!VT.isVector()) return MVT::i32;
 968    return VT.changeVectorElementTypeToInteger();
 969 }
 970
 971 //===----------------------------------------------------------------------===//
 972 // Custom DAG Optimizations
 973 //===----------------------------------------------------------------------===//
 974
 975 SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
 976                                               DAGCombinerInfo &DCI) const {
 977   SelectionDAG &DAG = DCI.DAG;
 978
 979   switch (N->getOpcode()) {
 980   // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
 981   case ISD::FP_ROUND: {
 982       SDValue Arg = N->getOperand(0);
 983       if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
 984         return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
 985                            Arg.getOperand(0));
 986       }
 987       break;
 988     }
 989
 990   // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
 991   // (i32 select_cc f32, f32, -1, 0 cc)
 992   //
 993   // Mesa's GLSL frontend generates the above pattern a lot and we can lower
 994   // this to one of the SET*_DX10 instructions.
 995   case ISD::FP_TO_SINT: {
 996     SDValue FNeg = N->getOperand(0);
 997     if (FNeg.getOpcode() != ISD::FNEG) {
 998       return SDValue();
 999     }
1000     SDValue SelectCC = FNeg.getOperand(0);
1001     if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1002         SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1003         SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1004         !isHWTrueValue(SelectCC.getOperand(2)) ||
1005         !isHWFalseValue(SelectCC.getOperand(3))) {
1006       return SDValue();
1007     }
1008
1009     return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N->getValueType(0),
1010                            SelectCC.getOperand(0), // LHS
1011                            SelectCC.getOperand(1), // RHS
1012                            DAG.getConstant(-1, MVT::i32), // True
1013                            DAG.getConstant(0, MVT::i32),  // Flase
1014                            SelectCC.getOperand(4)); // CC
1015
1016     break;
1017   }
1018   // Extract_vec (Build_vector) generated by custom lowering
1019   // also needs to be customly combined
1020   case ISD::EXTRACT_VECTOR_ELT: {
1021     SDValue Arg = N->getOperand(0);
1022     if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1023       if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1024         unsigned Element = Const->getZExtValue();
1025         return Arg->getOperand(Element);
1026       }
1027     }
1028     if (Arg.getOpcode() == ISD::BITCAST &&
1029         Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1030       if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1031         unsigned Element = Const->getZExtValue();
1032         return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(),
1033             Arg->getOperand(0).getOperand(Element));
1034       }
1035     }
1036   }
1037
1038   case ISD::SELECT_CC: {
1039     // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1040     //      selectcc x, y, a, b, inv(cc)
1041     //
1042     // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1043     //      selectcc x, y, a, b, cc
1044     SDValue LHS = N->getOperand(0);
1045     if (LHS.getOpcode() != ISD::SELECT_CC) {
1046       return SDValue();
1047     }
1048
1049     SDValue RHS = N->getOperand(1);
1050     SDValue True = N->getOperand(2);
1051     SDValue False = N->getOperand(3);
1052     ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1053
1054     if (LHS.getOperand(2).getNode() != True.getNode() ||
1055         LHS.getOperand(3).getNode() != False.getNode() ||
1056         RHS.getNode() != False.getNode()) {
1057       return SDValue();
1058     }
1059
1060     switch (NCC) {
1061     default: return SDValue();
1062     case ISD::SETNE: return LHS;
1063     case ISD::SETEQ: {
1064       ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1065       LHSCC = ISD::getSetCCInverse(LHSCC,
1066                                   LHS.getOperand(0).getValueType().isInteger());
1067       return DAG.getSelectCC(N->getDebugLoc(),
1068                              LHS.getOperand(0),
1069                              LHS.getOperand(1),
1070                              LHS.getOperand(2),
1071                              LHS.getOperand(3),
1072                              LHSCC);
1073     }
1074     }
1075   }
1076   case AMDGPUISD::EXPORT: {
1077     SDValue Arg = N->getOperand(1);
1078     if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1079       break;
1080     SDValue NewBldVec[4] = {
1081         DAG.getUNDEF(MVT::f32),
1082         DAG.getUNDEF(MVT::f32),
1083         DAG.getUNDEF(MVT::f32),
1084         DAG.getUNDEF(MVT::f32)
1085       };
1086     SDValue NewArgs[8] = {
1087       N->getOperand(0), // Chain
1088       SDValue(),
1089       N->getOperand(2), // ArrayBase
1090       N->getOperand(3), // Type
1091       N->getOperand(4), // SWZ_X
1092       N->getOperand(5), // SWZ_Y
1093       N->getOperand(6), // SWZ_Z
1094       N->getOperand(7) // SWZ_W
1095     };
1096     for (unsigned i = 0; i < Arg.getNumOperands(); i++) {
1097       if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg.getOperand(i))) {
1098         if (C->isZero()) {
1099           NewArgs[4 + i] = DAG.getConstant(4, MVT::i32); // SEL_0
1100         } else if (C->isExactlyValue(1.0)) {
1101           NewArgs[4 + i] = DAG.getConstant(5, MVT::i32); // SEL_0
1102         } else {
1103           NewBldVec[i] = Arg.getOperand(i);
1104         }
1105       } else {
1106         NewBldVec[i] = Arg.getOperand(i);
1107       }
1108     }
1109     DebugLoc DL = N->getDebugLoc();
1110     NewArgs[1] = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, NewBldVec, 4);
1111     return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
1112   }
1113   }
1114   return SDValue();
1115 }