lib/Target/R600/R600ISelLowering.cpp

   1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief Custom DAG lowering for R600
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "R600ISelLowering.h"
  16 #include "R600Defines.h"
  17 #include "R600InstrInfo.h"
  18 #include "R600MachineFunctionInfo.h"
  19 #include "llvm/CodeGen/MachineInstrBuilder.h"
  20 #include "llvm/CodeGen/MachineRegisterInfo.h"
  21 #include "llvm/CodeGen/SelectionDAG.h"
  22 #include "llvm/IR/Argument.h"
  23 #include "llvm/IR/Function.h"
  24
  25 using namespace llvm;
  26
  27 R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
  28     AMDGPUTargetLowering(TM),
  29     TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
  30   setOperationAction(ISD::MUL, MVT::i64, Expand);
  31   addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
  32   addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
  33   addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
  34   addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
  35   computeRegisterProperties();
  36
  37   setOperationAction(ISD::FADD, MVT::v4f32, Expand);
  38   setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
  39   setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
  40   setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
  41
  42   setOperationAction(ISD::ADD,  MVT::v4i32, Expand);
  43   setOperationAction(ISD::AND,  MVT::v4i32, Expand);
  44   setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
  45   setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
  46   setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
  47   setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
  48   setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
  49   setOperationAction(ISD::UREM, MVT::v4i32, Expand);
  50   setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
  51
  52   setOperationAction(ISD::BR_CC, MVT::i32, Custom);
  53   setOperationAction(ISD::BR_CC, MVT::f32, Custom);
  54
  55   setOperationAction(ISD::FSUB, MVT::f32, Expand);
  56
  57   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
  58   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
  59   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
  60   setOperationAction(ISD::FPOW, MVT::f32, Custom);
  61
  62   setOperationAction(ISD::ROTL, MVT::i32, Custom);
  63
  64   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
  65   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
  66
  67   setOperationAction(ISD::SETCC, MVT::i32, Custom);
  68   setOperationAction(ISD::SETCC, MVT::f32, Custom);
  69   setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
  70
  71   setOperationAction(ISD::SELECT, MVT::i32, Custom);
  72   setOperationAction(ISD::SELECT, MVT::f32, Custom);
  73
  74   setOperationAction(ISD::STORE, MVT::i32, Custom);
  75   setOperationAction(ISD::STORE, MVT::v4i32, Custom);
  76
  77   setOperationAction(ISD::LOAD, MVT::i32, Custom);
  78   setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
  79   setTargetDAGCombine(ISD::FP_ROUND);
  80   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
  81
  82   setSchedulingPreference(Sched::VLIW);
  83 }
  84
  85 MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
  86     MachineInstr * MI, MachineBasicBlock * BB) const {
  87   MachineFunction * MF = BB->getParent();
  88   MachineRegisterInfo &MRI = MF->getRegInfo();
  89   MachineBasicBlock::iterator I = *MI;
  90
  91   switch (MI->getOpcode()) {
  92   default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
  93   case AMDGPU::SHADER_TYPE: break;
  94   case AMDGPU::CLAMP_R600: {
  95     MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
  96                                                    AMDGPU::MOV,
  97                                                    MI->getOperand(0).getReg(),
  98                                                    MI->getOperand(1).getReg());
  99     TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
 100     break;
 101   }
 102
 103   case AMDGPU::FABS_R600: {
 104     MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
 105                                                     AMDGPU::MOV,
 106                                                     MI->getOperand(0).getReg(),
 107                                                     MI->getOperand(1).getReg());
 108     TII->addFlag(NewMI, 0, MO_FLAG_ABS);
 109     break;
 110   }
 111
 112   case AMDGPU::FNEG_R600: {
 113     MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
 114                                                     AMDGPU::MOV,
 115                                                     MI->getOperand(0).getReg(),
 116                                                     MI->getOperand(1).getReg());
 117     TII->addFlag(NewMI, 0, MO_FLAG_NEG);
 118     break;
 119   }
 120
 121   case AMDGPU::MASK_WRITE: {
 122     unsigned maskedRegister = MI->getOperand(0).getReg();
 123     assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
 124     MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
 125     TII->addFlag(defInstr, 0, MO_FLAG_MASK);
 126     break;
 127   }
 128
 129   case AMDGPU::MOV_IMM_F32:
 130     TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
 131                      MI->getOperand(1).getFPImm()->getValueAPF()
 132                          .bitcastToAPInt().getZExtValue());
 133     break;
 134   case AMDGPU::MOV_IMM_I32:
 135     TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
 136                      MI->getOperand(1).getImm());
 137     break;
 138
 139
 140   case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
 141   case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
 142     unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
 143
 144     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
 145             .addOperand(MI->getOperand(0))
 146             .addOperand(MI->getOperand(1))
 147             .addImm(EOP); // Set End of program bit
 148     break;
 149   }
 150
 151   case AMDGPU::RESERVE_REG: {
 152     R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
 153     int64_t ReservedIndex = MI->getOperand(0).getImm();
 154     unsigned ReservedReg =
 155                          AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
 156     MFI->ReservedRegs.push_back(ReservedReg);
 157     unsigned SuperReg =
 158           AMDGPU::R600_Reg128RegClass.getRegister(ReservedIndex / 4);
 159     MFI->ReservedRegs.push_back(SuperReg);
 160     break;
 161   }
 162
 163   case AMDGPU::TXD: {
 164     unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
 165     unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
 166
 167     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
 168             .addOperand(MI->getOperand(3))
 169             .addOperand(MI->getOperand(4))
 170             .addOperand(MI->getOperand(5))
 171             .addOperand(MI->getOperand(6));
 172     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
 173             .addOperand(MI->getOperand(2))
 174             .addOperand(MI->getOperand(4))
 175             .addOperand(MI->getOperand(5))
 176             .addOperand(MI->getOperand(6));
 177     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
 178             .addOperand(MI->getOperand(0))
 179             .addOperand(MI->getOperand(1))
 180             .addOperand(MI->getOperand(4))
 181             .addOperand(MI->getOperand(5))
 182             .addOperand(MI->getOperand(6))
 183             .addReg(T0, RegState::Implicit)
 184             .addReg(T1, RegState::Implicit);
 185     break;
 186   }
 187
 188   case AMDGPU::TXD_SHADOW: {
 189     unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
 190     unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
 191
 192     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
 193             .addOperand(MI->getOperand(3))
 194             .addOperand(MI->getOperand(4))
 195             .addOperand(MI->getOperand(5))
 196             .addOperand(MI->getOperand(6));
 197     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
 198             .addOperand(MI->getOperand(2))
 199             .addOperand(MI->getOperand(4))
 200             .addOperand(MI->getOperand(5))
 201             .addOperand(MI->getOperand(6));
 202     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
 203             .addOperand(MI->getOperand(0))
 204             .addOperand(MI->getOperand(1))
 205             .addOperand(MI->getOperand(4))
 206             .addOperand(MI->getOperand(5))
 207             .addOperand(MI->getOperand(6))
 208             .addReg(T0, RegState::Implicit)
 209             .addReg(T1, RegState::Implicit);
 210     break;
 211   }
 212
 213   case AMDGPU::BRANCH:
 214       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
 215               .addOperand(MI->getOperand(0))
 216               .addReg(0);
 217       break;
 218
 219   case AMDGPU::BRANCH_COND_f32: {
 220     MachineInstr *NewMI =
 221       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
 222               AMDGPU::PREDICATE_BIT)
 223               .addOperand(MI->getOperand(1))
 224               .addImm(OPCODE_IS_NOT_ZERO)
 225               .addImm(0); // Flags
 226     TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
 227     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
 228             .addOperand(MI->getOperand(0))
 229             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
 230     break;
 231   }
 232
 233   case AMDGPU::BRANCH_COND_i32: {
 234     MachineInstr *NewMI =
 235       BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
 236             AMDGPU::PREDICATE_BIT)
 237             .addOperand(MI->getOperand(1))
 238             .addImm(OPCODE_IS_NOT_ZERO_INT)
 239             .addImm(0); // Flags
 240     TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
 241     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
 242            .addOperand(MI->getOperand(0))
 243             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
 244     break;
 245   }
 246
 247   case AMDGPU::input_perspective: {
 248     R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
 249
 250     // XXX Be more fine about register reservation
 251     for (unsigned i = 0; i < 4; i ++) {
 252       unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
 253       MFI->ReservedRegs.push_back(ReservedReg);
 254     }
 255
 256     switch (MI->getOperand(1).getImm()) {
 257     case 0:// Perspective
 258       MFI->HasPerspectiveInterpolation = true;
 259       break;
 260     case 1:// Linear
 261       MFI->HasLinearInterpolation = true;
 262       break;
 263     default:
 264       assert(0 && "Unknow ij index");
 265     }
 266
 267     return BB;
 268   }
 269
 270   case AMDGPU::EG_ExportSwz:
 271   case AMDGPU::R600_ExportSwz: {
 272     bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
 273     if (!EOP)
 274       return BB;
 275     unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
 276     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
 277             .addOperand(MI->getOperand(0))
 278             .addOperand(MI->getOperand(1))
 279             .addOperand(MI->getOperand(2))
 280             .addOperand(MI->getOperand(3))
 281             .addOperand(MI->getOperand(4))
 282             .addOperand(MI->getOperand(5))
 283             .addOperand(MI->getOperand(6))
 284             .addImm(CfInst)
 285             .addImm(1);
 286     break;
 287   }
 288   }
 289
 290   MI->eraseFromParent();
 291   return BB;
 292 }
 293
 294 //===----------------------------------------------------------------------===//
 295 // Custom DAG Lowering Operations
 296 //===----------------------------------------------------------------------===//
 297
 298 using namespace llvm::Intrinsic;
 299 using namespace llvm::AMDGPUIntrinsic;
 300
 301 static SDValue
 302 InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap,
 303     unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type,
 304     SDValue Scalar, SDValue Chain) {
 305   if (!ExportMap[Slot]) {
 306     SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
 307       DL, MVT::v4f32,
 308       DAG.getUNDEF(MVT::v4f32),
 309       Scalar,
 310       DAG.getConstant(Channel, MVT::i32));
 311
 312     unsigned Mask = 1 << Channel;
 313
 314     const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32),
 315         DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32),
 316         DAG.getConstant(Mask, MVT::i32)};
 317
 318     SDValue Res =  DAG.getNode(
 319         AMDGPUISD::EXPORT,
 320         DL,
 321         MVT::Other,
 322         Ops, 6);
 323      ExportMap[Slot] = Res.getNode();
 324      return Res;
 325   }
 326
 327   SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ;
 328   SDValue PreviousVector = ExportInstruction->getOperand(1);
 329   SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
 330       DL, MVT::v4f32,
 331       PreviousVector,
 332       Scalar,
 333       DAG.getConstant(Channel, MVT::i32));
 334
 335   unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5))
 336       ->getZExtValue();
 337   Mask |= (1 << Channel);
 338
 339   const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector,
 340       DAG.getConstant(Inst, MVT::i32),
 341       DAG.getConstant(Type, MVT::i32),
 342       DAG.getConstant(Slot, MVT::i32),
 343       DAG.getConstant(Mask, MVT::i32)};
 344
 345   DAG.UpdateNodeOperands(ExportInstruction,
 346       Ops, 6);
 347
 348   return Chain;
 349
 350 }
 351
 352 SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
 353   switch (Op.getOpcode()) {
 354   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
 355   case ISD::BR_CC: return LowerBR_CC(Op, DAG);
 356   case ISD::ROTL: return LowerROTL(Op, DAG);
 357   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
 358   case ISD::SELECT: return LowerSELECT(Op, DAG);
 359   case ISD::SETCC: return LowerSETCC(Op, DAG);
 360   case ISD::STORE: return LowerSTORE(Op, DAG);
 361   case ISD::LOAD: return LowerLOAD(Op, DAG);
 362   case ISD::FPOW: return LowerFPOW(Op, DAG);
 363   case ISD::INTRINSIC_VOID: {
 364     SDValue Chain = Op.getOperand(0);
 365     unsigned IntrinsicID =
 366                          cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 367     switch (IntrinsicID) {
 368     case AMDGPUIntrinsic::AMDGPU_store_output: {
 369       MachineFunction &MF = DAG.getMachineFunction();
 370       MachineRegisterInfo &MRI = MF.getRegInfo();
 371       int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
 372       unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
 373       if (!MRI.isLiveOut(Reg)) {
 374         MRI.addLiveOut(Reg);
 375       }
 376       return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
 377     }
 378     case AMDGPUIntrinsic::R600_store_pixel_color: {
 379       MachineFunction &MF = DAG.getMachineFunction();
 380       R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
 381       int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
 382
 383       SDNode **OutputsMap = MFI->Outputs;
 384       return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
 385           RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2),
 386           Chain);
 387
 388     }
 389     case AMDGPUIntrinsic::R600_store_stream_output : {
 390       MachineFunction &MF = DAG.getMachineFunction();
 391       R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
 392       int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
 393       int64_t BufIndex = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
 394
 395       SDNode **OutputsMap = MFI->StreamOutputs[BufIndex];
 396       unsigned Inst;
 397       switch (cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue()  ) {
 398       // STREAM3
 399       case 3:
 400         Inst = 4;
 401         break;
 402       // STREAM2
 403       case 2:
 404         Inst = 3;
 405         break;
 406       // STREAM1
 407       case 1:
 408         Inst = 2;
 409         break;
 410       // STREAM0
 411       case 0:
 412         Inst = 1;
 413         break;
 414       default:
 415         llvm_unreachable("Wrong buffer id for stream outputs !");
 416       }
 417
 418       return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
 419           RegIndex / 4, RegIndex % 4, Inst, 0, Op.getOperand(2),
 420           Chain);
 421     }
 422     // default for switch(IntrinsicID)
 423     default: break;
 424     }
 425     // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
 426     break;
 427   }
 428   case ISD::INTRINSIC_WO_CHAIN: {
 429     unsigned IntrinsicID =
 430                          cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 431     EVT VT = Op.getValueType();
 432     DebugLoc DL = Op.getDebugLoc();
 433     switch(IntrinsicID) {
 434     default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
 435     case AMDGPUIntrinsic::R600_load_input: {
 436       int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 437       unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
 438       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
 439     }
 440     case AMDGPUIntrinsic::R600_load_input_perspective: {
 441       int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 442       if (slot < 0)
 443         return DAG.getUNDEF(MVT::f32);
 444       SDValue FullVector = DAG.getNode(
 445           AMDGPUISD::INTERP,
 446           DL, MVT::v4f32,
 447           DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
 448       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
 449         DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
 450     }
 451     case AMDGPUIntrinsic::R600_load_input_linear: {
 452       int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 453       if (slot < 0)
 454         return DAG.getUNDEF(MVT::f32);
 455       SDValue FullVector = DAG.getNode(
 456         AMDGPUISD::INTERP,
 457         DL, MVT::v4f32,
 458         DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
 459       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
 460         DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
 461     }
 462     case AMDGPUIntrinsic::R600_load_input_constant: {
 463       int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 464       if (slot < 0)
 465         return DAG.getUNDEF(MVT::f32);
 466       SDValue FullVector = DAG.getNode(
 467         AMDGPUISD::INTERP_P0,
 468         DL, MVT::v4f32,
 469         DAG.getConstant(slot / 4 , MVT::i32));
 470       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
 471           DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
 472     }
 473
 474     case r600_read_ngroups_x:
 475       return LowerImplicitParameter(DAG, VT, DL, 0);
 476     case r600_read_ngroups_y:
 477       return LowerImplicitParameter(DAG, VT, DL, 1);
 478     case r600_read_ngroups_z:
 479       return LowerImplicitParameter(DAG, VT, DL, 2);
 480     case r600_read_global_size_x:
 481       return LowerImplicitParameter(DAG, VT, DL, 3);
 482     case r600_read_global_size_y:
 483       return LowerImplicitParameter(DAG, VT, DL, 4);
 484     case r600_read_global_size_z:
 485       return LowerImplicitParameter(DAG, VT, DL, 5);
 486     case r600_read_local_size_x:
 487       return LowerImplicitParameter(DAG, VT, DL, 6);
 488     case r600_read_local_size_y:
 489       return LowerImplicitParameter(DAG, VT, DL, 7);
 490     case r600_read_local_size_z:
 491       return LowerImplicitParameter(DAG, VT, DL, 8);
 492
 493     case r600_read_tgid_x:
 494       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 495                                   AMDGPU::T1_X, VT);
 496     case r600_read_tgid_y:
 497       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 498                                   AMDGPU::T1_Y, VT);
 499     case r600_read_tgid_z:
 500       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 501                                   AMDGPU::T1_Z, VT);
 502     case r600_read_tidig_x:
 503       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 504                                   AMDGPU::T0_X, VT);
 505     case r600_read_tidig_y:
 506       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 507                                   AMDGPU::T0_Y, VT);
 508     case r600_read_tidig_z:
 509       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
 510                                   AMDGPU::T0_Z, VT);
 511     }
 512     // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
 513     break;
 514   }
 515   } // end switch(Op.getOpcode())
 516   return SDValue();
 517 }
 518
 519 void R600TargetLowering::ReplaceNodeResults(SDNode *N,
 520                                             SmallVectorImpl<SDValue> &Results,
 521                                             SelectionDAG &DAG) const {
 522   switch (N->getOpcode()) {
 523   default: return;
 524   case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
 525     return;
 526   case ISD::LOAD: {
 527     SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
 528     Results.push_back(SDValue(Node, 0));
 529     Results.push_back(SDValue(Node, 1));
 530     // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
 531     // function
 532     DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
 533     return;
 534   }
 535   }
 536 }
 537
 538 SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
 539   return DAG.getNode(
 540       ISD::SETCC,
 541       Op.getDebugLoc(),
 542       MVT::i1,
 543       Op, DAG.getConstantFP(0.0f, MVT::f32),
 544       DAG.getCondCode(ISD::SETNE)
 545       );
 546 }
 547
 548 SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
 549   SDValue Chain = Op.getOperand(0);
 550   SDValue CC = Op.getOperand(1);
 551   SDValue LHS   = Op.getOperand(2);
 552   SDValue RHS   = Op.getOperand(3);
 553   SDValue JumpT  = Op.getOperand(4);
 554   SDValue CmpValue;
 555   SDValue Result;
 556
 557   if (LHS.getValueType() == MVT::i32) {
 558     CmpValue = DAG.getNode(
 559         ISD::SELECT_CC,
 560         Op.getDebugLoc(),
 561         MVT::i32,
 562         LHS, RHS,
 563         DAG.getConstant(-1, MVT::i32),
 564         DAG.getConstant(0, MVT::i32),
 565         CC);
 566   } else if (LHS.getValueType() == MVT::f32) {
 567     CmpValue = DAG.getNode(
 568         ISD::SELECT_CC,
 569         Op.getDebugLoc(),
 570         MVT::f32,
 571         LHS, RHS,
 572         DAG.getConstantFP(1.0f, MVT::f32),
 573         DAG.getConstantFP(0.0f, MVT::f32),
 574         CC);
 575   } else {
 576     assert(0 && "Not valid type for br_cc");
 577   }
 578   Result = DAG.getNode(
 579       AMDGPUISD::BRANCH_COND,
 580       CmpValue.getDebugLoc(),
 581       MVT::Other, Chain,
 582       JumpT, CmpValue);
 583   return Result;
 584 }
 585
 586 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
 587                                                    DebugLoc DL,
 588                                                    unsigned DwordOffset) const {
 589   unsigned ByteOffset = DwordOffset * 4;
 590   PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
 591                                       AMDGPUAS::PARAM_I_ADDRESS);
 592
 593   // We shouldn't be using an offset wider than 16-bits for implicit parameters.
 594   assert(isInt<16>(ByteOffset));
 595
 596   return DAG.getLoad(VT, DL, DAG.getEntryNode(),
 597                      DAG.getConstant(ByteOffset, MVT::i32), // PTR
 598                      MachinePointerInfo(ConstantPointerNull::get(PtrType)),
 599                      false, false, false, 0);
 600 }
 601
 602 SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
 603   DebugLoc DL = Op.getDebugLoc();
 604   EVT VT = Op.getValueType();
 605
 606   return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
 607                      Op.getOperand(0),
 608                      Op.getOperand(0),
 609                      DAG.getNode(ISD::SUB, DL, VT,
 610                                  DAG.getConstant(32, MVT::i32),
 611                                  Op.getOperand(1)));
 612 }
 613
 614 bool R600TargetLowering::isZero(SDValue Op) const {
 615   if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
 616     return Cst->isNullValue();
 617   } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
 618     return CstFP->isZero();
 619   } else {
 620     return false;
 621   }
 622 }
 623
 624 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
 625   DebugLoc DL = Op.getDebugLoc();
 626   EVT VT = Op.getValueType();
 627
 628   SDValue LHS = Op.getOperand(0);
 629   SDValue RHS = Op.getOperand(1);
 630   SDValue True = Op.getOperand(2);
 631   SDValue False = Op.getOperand(3);
 632   SDValue CC = Op.getOperand(4);
 633   SDValue Temp;
 634
 635   // LHS and RHS are guaranteed to be the same value type
 636   EVT CompareVT = LHS.getValueType();
 637
 638   // Check if we can lower this to a native operation.
 639
 640   // Try to lower to a CND* instruction:
 641   // CND* instructions requires RHS to be zero.  Some SELECT_CC nodes that
 642   // can be lowered to CND* instructions can also be lowered to SET*
 643   // instructions.  CND* instructions are cheaper, because they dont't
 644   // require additional instructions to convert their result to the correct
 645   // value type, so this check should be first.
 646   if (isZero(LHS) || isZero(RHS)) {
 647     SDValue Cond = (isZero(LHS) ? RHS : LHS);
 648     SDValue Zero = (isZero(LHS) ? LHS : RHS);
 649     ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
 650     if (CompareVT != VT) {
 651       // Bitcast True / False to the correct types.  This will end up being
 652       // a nop, but it allows us to define only a single pattern in the
 653       // .TD files for each CND* instruction rather than having to have
 654       // one pattern for integer True/False and one for fp True/False
 655       True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
 656       False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
 657     }
 658     if (isZero(LHS)) {
 659       CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
 660     }
 661
 662     switch (CCOpcode) {
 663     case ISD::SETONE:
 664     case ISD::SETUNE:
 665     case ISD::SETNE:
 666     case ISD::SETULE:
 667     case ISD::SETULT:
 668     case ISD::SETOLE:
 669     case ISD::SETOLT:
 670     case ISD::SETLE:
 671     case ISD::SETLT:
 672       CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
 673       Temp = True;
 674       True = False;
 675       False = Temp;
 676       break;
 677     default:
 678       break;
 679     }
 680     SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
 681         Cond, Zero,
 682         True, False,
 683         DAG.getCondCode(CCOpcode));
 684     return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
 685   }
 686
 687   // Try to lower to a SET* instruction:
 688   // We need all the operands of SELECT_CC to have the same value type, so if
 689   // necessary we need to change True and False to be the same type as LHS and
 690   // RHS, and then convert the result of the select_cc back to the correct type.
 691
 692   // Move hardware True/False values to the correct operand.
 693   if (isHWTrueValue(False) && isHWFalseValue(True)) {
 694     ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
 695     std::swap(False, True);
 696     CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
 697   }
 698
 699   if (isHWTrueValue(True) && isHWFalseValue(False)) {
 700     if (CompareVT !=  VT) {
 701       if (VT == MVT::f32 && CompareVT == MVT::i32) {
 702         SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
 703             LHS, RHS,
 704             DAG.getConstant(-1, MVT::i32),
 705             DAG.getConstant(0, MVT::i32),
 706             CC);
 707         // Convert integer values of true (-1) and false (0) to fp values of
 708         // true (1.0f) and false (0.0f).
 709         SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
 710                                                   DAG.getConstant(1, MVT::i32));
 711         return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
 712       } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
 713         SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
 714             LHS, RHS,
 715             DAG.getConstantFP(1.0f, MVT::f32),
 716             DAG.getConstantFP(0.0f, MVT::f32),
 717             CC);
 718         // Convert fp values of true (1.0f) and false (0.0f) to integer values
 719         // of true (-1) and false (0).
 720         SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt);
 721         return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg);
 722       } else {
 723         // I don't think there will be any other type pairings.
 724         assert(!"Unhandled operand type parings in SELECT_CC");
 725       }
 726     } else {
 727       // This SELECT_CC is already legal.
 728       return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
 729     }
 730   }
 731
 732   // Possible Min/Max pattern
 733   SDValue MinMax = LowerMinMax(Op, DAG);
 734   if (MinMax.getNode()) {
 735     return MinMax;
 736   }
 737
 738   // If we make it this for it means we have no native instructions to handle
 739   // this SELECT_CC, so we must lower it.
 740   SDValue HWTrue, HWFalse;
 741
 742   if (CompareVT == MVT::f32) {
 743     HWTrue = DAG.getConstantFP(1.0f, CompareVT);
 744     HWFalse = DAG.getConstantFP(0.0f, CompareVT);
 745   } else if (CompareVT == MVT::i32) {
 746     HWTrue = DAG.getConstant(-1, CompareVT);
 747     HWFalse = DAG.getConstant(0, CompareVT);
 748   }
 749   else {
 750     assert(!"Unhandled value type in LowerSELECT_CC");
 751   }
 752
 753   // Lower this unsupported SELECT_CC into a combination of two supported
 754   // SELECT_CC operations.
 755   SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
 756
 757   return DAG.getNode(ISD::SELECT_CC, DL, VT,
 758       Cond, HWFalse,
 759       True, False,
 760       DAG.getCondCode(ISD::SETNE));
 761 }
 762
 763 SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
 764   return DAG.getNode(ISD::SELECT_CC,
 765       Op.getDebugLoc(),
 766       Op.getValueType(),
 767       Op.getOperand(0),
 768       DAG.getConstant(0, MVT::i32),
 769       Op.getOperand(1),
 770       Op.getOperand(2),
 771       DAG.getCondCode(ISD::SETNE));
 772 }
 773
 774 SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 775   SDValue Cond;
 776   SDValue LHS = Op.getOperand(0);
 777   SDValue RHS = Op.getOperand(1);
 778   SDValue CC  = Op.getOperand(2);
 779   DebugLoc DL = Op.getDebugLoc();
 780   assert(Op.getValueType() == MVT::i32);
 781   if (LHS.getValueType() == MVT::i32) {
 782     Cond = DAG.getNode(
 783         ISD::SELECT_CC,
 784         Op.getDebugLoc(),
 785         MVT::i32,
 786         LHS, RHS,
 787         DAG.getConstant(-1, MVT::i32),
 788         DAG.getConstant(0, MVT::i32),
 789         CC);
 790   } else if (LHS.getValueType() == MVT::f32) {
 791     Cond = DAG.getNode(
 792         ISD::SELECT_CC,
 793         Op.getDebugLoc(),
 794         MVT::f32,
 795         LHS, RHS,
 796         DAG.getConstantFP(1.0f, MVT::f32),
 797         DAG.getConstantFP(0.0f, MVT::f32),
 798         CC);
 799     Cond = DAG.getNode(
 800         ISD::FP_TO_SINT,
 801         DL,
 802         MVT::i32,
 803         Cond);
 804   } else {
 805     assert(0 && "Not valid type for set_cc");
 806   }
 807   Cond = DAG.getNode(
 808       ISD::AND,
 809       DL,
 810       MVT::i32,
 811       DAG.getConstant(1, MVT::i32),
 812       Cond);
 813   return Cond;
 814 }
 815
 816 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
 817   DebugLoc DL = Op.getDebugLoc();
 818   StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
 819   SDValue Chain = Op.getOperand(0);
 820   SDValue Value = Op.getOperand(1);
 821   SDValue Ptr = Op.getOperand(2);
 822
 823   if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
 824       Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
 825     // Convert pointer from byte address to dword address.
 826     Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
 827                       DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
 828                                   Ptr, DAG.getConstant(2, MVT::i32)));
 829
 830     if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
 831       assert(!"Truncated and indexed stores not supported yet");
 832     } else {
 833       Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
 834     }
 835     return Chain;
 836   }
 837   return SDValue();
 838 }
 839
 840 // return (512 + (kc_bank << 12)
 841 static int
 842 ConstantAddressBlock(unsigned AddressSpace) {
 843   switch (AddressSpace) {
 844   case AMDGPUAS::CONSTANT_BUFFER_0:
 845     return 512;
 846   case AMDGPUAS::CONSTANT_BUFFER_1:
 847     return 512 + 4096;
 848   case AMDGPUAS::CONSTANT_BUFFER_2:
 849     return 512 + 4096 * 2;
 850   case AMDGPUAS::CONSTANT_BUFFER_3:
 851     return 512 + 4096 * 3;
 852   case AMDGPUAS::CONSTANT_BUFFER_4:
 853     return 512 + 4096 * 4;
 854   case AMDGPUAS::CONSTANT_BUFFER_5:
 855     return 512 + 4096 * 5;
 856   case AMDGPUAS::CONSTANT_BUFFER_6:
 857     return 512 + 4096 * 6;
 858   case AMDGPUAS::CONSTANT_BUFFER_7:
 859     return 512 + 4096 * 7;
 860   case AMDGPUAS::CONSTANT_BUFFER_8:
 861     return 512 + 4096 * 8;
 862   case AMDGPUAS::CONSTANT_BUFFER_9:
 863     return 512 + 4096 * 9;
 864   case AMDGPUAS::CONSTANT_BUFFER_10:
 865     return 512 + 4096 * 10;
 866   case AMDGPUAS::CONSTANT_BUFFER_11:
 867     return 512 + 4096 * 11;
 868   case AMDGPUAS::CONSTANT_BUFFER_12:
 869     return 512 + 4096 * 12;
 870   case AMDGPUAS::CONSTANT_BUFFER_13:
 871     return 512 + 4096 * 13;
 872   case AMDGPUAS::CONSTANT_BUFFER_14:
 873     return 512 + 4096 * 14;
 874   case AMDGPUAS::CONSTANT_BUFFER_15:
 875     return 512 + 4096 * 15;
 876   default:
 877     return -1;
 878   }
 879 }
 880
 881 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
 882 {
 883   EVT VT = Op.getValueType();
 884   DebugLoc DL = Op.getDebugLoc();
 885   LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
 886   SDValue Chain = Op.getOperand(0);
 887   SDValue Ptr = Op.getOperand(1);
 888   SDValue LoweredLoad;
 889
 890   int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
 891   if (ConstantBlock > -1) {
 892     SDValue Result;
 893     if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
 894         dyn_cast<Constant>(LoadNode->getSrcValue())) {
 895       SDValue Slots[4];
 896       for (unsigned i = 0; i < 4; i++) {
 897         // We want Const position encoded with the following formula :
 898         // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
 899         // const_index is Ptr computed by llvm using an alignment of 16.
 900         // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
 901         // then div by 4 at the ISel step
 902         SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
 903             DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
 904         Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
 905       }
 906       Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
 907     } else {
 908       // non constant ptr cant be folded, keeps it as a v4f32 load
 909       Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
 910           DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
 911           );
 912     }
 913
 914     if (!VT.isVector()) {
 915       Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
 916           DAG.getConstant(0, MVT::i32));
 917     }
 918
 919     SDValue MergedValues[2] = {
 920         Result,
 921         Chain
 922     };
 923     return DAG.getMergeValues(MergedValues, 2, DL);
 924   }
 925
 926   return SDValue();
 927 }
 928
 929 SDValue R600TargetLowering::LowerFPOW(SDValue Op,
 930     SelectionDAG &DAG) const {
 931   DebugLoc DL = Op.getDebugLoc();
 932   EVT VT = Op.getValueType();
 933   SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0));
 934   SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase);
 935   return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase);
 936 }
 937
 938 /// XXX Only kernel functions are supported, so we can assume for now that
 939 /// every function is a kernel function, but in the future we should use
 940 /// separate calling conventions for kernel and non-kernel functions.
 941 SDValue R600TargetLowering::LowerFormalArguments(
 942                                       SDValue Chain,
 943                                       CallingConv::ID CallConv,
 944                                       bool isVarArg,
 945                                       const SmallVectorImpl<ISD::InputArg> &Ins,
 946                                       DebugLoc DL, SelectionDAG &DAG,
 947                                       SmallVectorImpl<SDValue> &InVals) const {
 948   unsigned ParamOffsetBytes = 36;
 949   Function::const_arg_iterator FuncArg =
 950                             DAG.getMachineFunction().getFunction()->arg_begin();
 951   for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
 952     EVT VT = Ins[i].VT;
 953     Type *ArgType = FuncArg->getType();
 954     unsigned ArgSizeInBits = ArgType->isPointerTy() ?
 955                              32 : ArgType->getPrimitiveSizeInBits();
 956     unsigned ArgBytes = ArgSizeInBits >> 3;
 957     EVT ArgVT;
 958     if (ArgSizeInBits < VT.getSizeInBits()) {
 959       assert(!ArgType->isFloatTy() &&
 960              "Extending floating point arguments not supported yet");
 961       ArgVT = MVT::getIntegerVT(ArgSizeInBits);
 962     } else {
 963       ArgVT = VT;
 964     }
 965     PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
 966                                                     AMDGPUAS::PARAM_I_ADDRESS);
 967     SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
 968                                 DAG.getConstant(ParamOffsetBytes, MVT::i32),
 969                                        MachinePointerInfo(new Argument(PtrTy)),
 970                                        ArgVT, false, false, ArgBytes);
 971     InVals.push_back(Arg);
 972     ParamOffsetBytes += ArgBytes;
 973   }
 974   return Chain;
 975 }
 976
 977 EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
 978    if (!VT.isVector()) return MVT::i32;
 979    return VT.changeVectorElementTypeToInteger();
 980 }
 981
 982 //===----------------------------------------------------------------------===//
 983 // Custom DAG Optimizations
 984 //===----------------------------------------------------------------------===//
 985
 986 SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
 987                                               DAGCombinerInfo &DCI) const {
 988   SelectionDAG &DAG = DCI.DAG;
 989
 990   switch (N->getOpcode()) {
 991   // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
 992   case ISD::FP_ROUND: {
 993       SDValue Arg = N->getOperand(0);
 994       if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
 995         return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
 996                            Arg.getOperand(0));
 997       }
 998       break;
 999     }
1000   // Extract_vec (Build_vector) generated by custom lowering
1001   // also needs to be customly combined
1002   case ISD::EXTRACT_VECTOR_ELT: {
1003     SDValue Arg = N->getOperand(0);
1004     if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1005       if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1006         unsigned Element = Const->getZExtValue();
1007         return Arg->getOperand(Element);
1008       }
1009     }
1010   }
1011   }
1012   return SDValue();
1013 }