R600/SI: Add pattern for bswap

[oota-llvm.git] / lib / Target / R600 / SIISelLowering.cpp
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp

index 1a73d9bda587eef48af5aaeb5381ee776b917cc7..039282939f17a10d3ee2a9e8aa2432b41d898974 100644 (file)
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -117,6 +117,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
    setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
    setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
  
+  setOperationAction(ISD::BSWAP, MVT::i32, Legal);
+
    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal);
    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom);
@@ -257,6 +259,13 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
  // TargetLowering queries
  //===----------------------------------------------------------------------===//
  
+bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
+                                          EVT) const {
+  // SI has some legal vector types, but no legal vector operations. Say no
+  // shuffles are legal in order to prefer scalarizing some vector operations.
+  return false;
+}
+
  // FIXME: This really needs an address space argument. The immediate offset
  // size is different for different sets of memory instruction sets.
  
@@ -519,11 +528,11 @@ SDValue SITargetLowering::LowerFormalArguments(
      if (VA.isMemLoc()) {
        VT = Ins[i].VT;
        EVT MemVT = Splits[i].VT;
+      const unsigned Offset = 36 + VA.getLocMemOffset();
        // The first 36 bytes of the input buffer contains information about
        // thread group and global sizes.
        SDValue Arg = LowerParameter(DAG, VT, MemVT,  DL, DAG.getRoot(),
-                                   36 + VA.getLocMemOffset(),
-                                   Ins[i].Flags.isSExt());
+                                   Offset, Ins[i].Flags.isSExt());
  
        const PointerType *ParamTy =
            dyn_cast<PointerType>(FType->getParamType(Ins[i].OrigArgIndex));
@@ -537,6 +546,7 @@ SDValue SITargetLowering::LowerFormalArguments(
        }
  
        InVals.push_back(Arg);
+      Info->ABIArgOffset = Offset + MemVT.getStoreSize();
        continue;
      }
      assert(VA.isRegLoc() && "Parameter must be in a register!");
@@ -622,36 +632,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
      MI->eraseFromParent();
      break;
    }
-  case AMDGPU::SI_BUFFER_RSRC: {
-    unsigned SuperReg = MI->getOperand(0).getReg();
-    unsigned Args[4];
-    for (unsigned i = 0, e = 4; i < e; ++i) {
-      MachineOperand &Arg = MI->getOperand(i + 1);
-
-      if (Arg.isReg()) {
-        Args[i] = Arg.getReg();
-        continue;
-      }
-
-      assert(Arg.isImm());
-      unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
-      BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), Reg)
-              .addImm(Arg.getImm());
-      Args[i] = Reg;
-    }
-    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE),
-            SuperReg)
-            .addReg(Args[0])
-            .addImm(AMDGPU::sub0)
-            .addReg(Args[1])
-            .addImm(AMDGPU::sub1)
-            .addReg(Args[2])
-            .addImm(AMDGPU::sub2)
-            .addReg(Args[3])
-            .addImm(AMDGPU::sub3);
-    MI->eraseFromParent();
-    break;
-  }
    case AMDGPU::V_SUB_F64: {
      unsigned DestReg = MI->getOperand(0).getReg();
      BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), DestReg)
@@ -927,6 +907,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
    case Intrinsic::r600_read_local_size_z:
      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
                            SI::KernelInputOffsets::LOCAL_SIZE_Z, false);
+
+  case Intrinsic::AMDGPU_read_workdim:
+    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+                          MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset,
+                          false);
+
    case Intrinsic::r600_read_tgid_x:
      return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
        TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT);
@@ -1920,28 +1906,26 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
    }
  }
  
-/// \brief Legalize INSERT_SUBREG instructions with frame index operands.
-/// LLVM assumes that all INSERT_SUBREG inputs are registers.
-static void legalizeInsertSubreg(MachineSDNode *InsertSubreg,
-                                 SelectionDAG &DAG) {
-
-  assert(InsertSubreg->getMachineOpcode() == AMDGPU::INSERT_SUBREG);
+/// \brief Legalize target independent instructions (e.g. INSERT_SUBREG)
+/// with frame index operands.
+/// LLVM assumes that inputs are to these instructions are registers.
+void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
+                                                     SelectionDAG &DAG) const {
  
    SmallVector<SDValue, 8> Ops;
-  for (unsigned i = 0; i < 2; ++i) {
-    if (!isa<FrameIndexSDNode>(InsertSubreg->getOperand(i))) {
-      Ops.push_back(InsertSubreg->getOperand(i));
+  for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
+    if (!isa<FrameIndexSDNode>(Node->getOperand(i))) {
+      Ops.push_back(Node->getOperand(i));
        continue;
      }
  
-    SDLoc DL(InsertSubreg);
+    SDLoc DL(Node);
      Ops.push_back(SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL,
-                                     InsertSubreg->getOperand(i).getValueType(),
-                                     InsertSubreg->getOperand(i)), 0));
+                                     Node->getOperand(i).getValueType(),
+                                     Node->getOperand(i)), 0));
    }
  
-  DAG.UpdateNodeOperands(InsertSubreg, Ops[0], Ops[1],
-                         InsertSubreg->getOperand(2));
+  DAG.UpdateNodeOperands(Node, Ops);
  }
  
  /// \brief Fold the instructions after selecting them.