R600/SI: Set the ATC bit on all resource descriptors for the HSA runtime

author Tom Stellard <thomas.stellard@amd.com>

Tue, 2 Dec 2014 17:05:41 +0000 (17:05 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Tue, 2 Dec 2014 17:05:41 +0000 (17:05 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Tue, 2 Dec 2014 17:05:41 +0000 (17:05 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Tue, 2 Dec 2014 17:05:41 +0000 (17:05 +0000)
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp

index 90b66725c17a647b57e8eeb099b4ac6eb82671d2..ad5a5417ee3caabb18a1201a9657e5cd422ade86 100644 (file)
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -1012,6 +1012,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
                                             SDValue &GLC, SDValue &SLC,
                                             SDValue &TFE) const {
    SDValue Ptr, VAddr, Offen, Idxen, Addr64;
+  const SIInstrInfo *TII =
+    static_cast<const SIInstrInfo *>(Subtarget.getInstrInfo());
  
    SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
                GLC, SLC, TFE);
@@ -1019,7 +1021,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
    if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
        !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
        !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
-    uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT |
+    uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
                      APInt::getAllOnesValue(32).getZExtValue(); // Size
      SDLoc DL(Addr);
  
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp

index 9d09a1963709670b81c18be1ec5447c1d40a6a64..0d693c8c9c65fb686930d5dbbb328d06a3d9497d 100644 (file)
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -84,7 +84,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
        FrameLowering(TargetFrameLowering::StackGrowsUp,
                      64 * 16, // Maximum stack alignment (long16)
                      0),
-      InstrItins(getInstrItineraryForCPU(GPU)) {
+      InstrItins(getInstrItineraryForCPU(GPU)),
+      TargetTriple(TT) {
    if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
      InstrInfo.reset(new R600InstrInfo(*this));
      TLInfo.reset(new R600TargetLowering(TM));
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h

index f71d80a8ba0c353170bf2f60516c1d39af225ff0..79adf55c5c8a9eb4c726eafd3bdf1d90215eacc5 100644 (file)
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -68,6 +68,7 @@ private:
    std::unique_ptr<AMDGPUTargetLowering> TLInfo;
    std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
    InstrItineraryData InstrItins;
+  Triple TargetTriple;
  
  public:
    AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS, TargetMachine &TM);
@@ -217,6 +218,9 @@ public:
    bool r600ALUEncoding() const {
      return R600ALUInst;
    }
+  bool isAmdHsaOS() const {
+    return TargetTriple.getOS() == Triple::AMDHSA;
+  }
  };
  
  } // End namespace llvm
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp

index adc017866f0c3523705487021c167d963c64eb69..2733e09cd10f579d283b882d9b671b3749e9bf91 100644 (file)
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -2030,6 +2030,8 @@ static SDValue buildSMovImm32(SelectionDAG &DAG, SDLoc DL, uint64_t Val) {
  MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
                                                  SDLoc DL,
                                                  SDValue Ptr) const {
+  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+      getTargetMachine().getSubtargetImpl()->getInstrInfo());
  #if 1
      // XXX - Workaround for moveToVALU not handling different register class
      // inserts for REG_SEQUENCE.
@@ -2039,7 +2041,7 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
        DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, MVT::i32),
        buildSMovImm32(DAG, DL, 0),
        DAG.getTargetConstant(AMDGPU::sub0, MVT::i32),
-      buildSMovImm32(DAG, DL, AMDGPU::RSRC_DATA_FORMAT >> 32),
+      buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
        DAG.getTargetConstant(AMDGPU::sub1, MVT::i32)
      };
  
@@ -2063,7 +2065,7 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
        DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32),
        buildSMovImm32(DAG, DL, 0),
        DAG.getTargetConstant(AMDGPU::sub2, MVT::i32),
-      buildSMovImm32(DAG, DL, AMDGPU::RSRC_DATA_FORMAT >> 32),
+      buildSMovImm32(DAG, DL, TII->getDefaultRsrcFormat() >> 32),
        DAG.getTargetConstant(AMDGPU::sub3, MVT::i32)
      };
  
@@ -2110,7 +2112,9 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
  MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
                                                    SDLoc DL,
                                                    SDValue Ptr) const {
-  uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
+  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+      getTargetMachine().getSubtargetImpl()->getInstrInfo());
+  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE |
                    0xffffffff; // Size
  
    return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp

index 21aadea1e9361285b72cb2741b7892dd68736a3b..931f351157f7a36c3fce874fc50fa3477e64d971 100644 (file)
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -1580,6 +1580,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
      unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
      unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
      unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+    uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
  
      // Zero64 = 0
      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
@@ -1589,12 +1590,12 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
      // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
              SRsrcFormatLo)
-            .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
+            .addImm(RsrcDataFormat & 0xFFFFFFFF);
  
      // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
              SRsrcFormatHi)
-            .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
+            .addImm(RsrcDataFormat >> 32);
  
      // NewSRsrc = {Zero64, SRsrcFormat}
      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
@@ -1798,13 +1799,14 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
        unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
        unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
        unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+      uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
  
        BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1)
                .addImm(0);
        BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2)
-              .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
+              .addImm(RsrcDataFormat & 0xFFFFFFFF);
        BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
-              .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
+              .addImm(RsrcDataFormat >> 32);
        BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
                .addReg(DWord0)
                .addImm(AMDGPU::sub0)
@@ -2413,3 +2415,11 @@ MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
  
    return &MI.getOperand(Idx);
  }
+
+uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
+  uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
+  if (ST.isAmdHsaOS())
+    RsrcDataFormat |= (1ULL << 56);
+
+  return RsrcDataFormat;
+}
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h

index 5295606c548919713c5d6e7a62a1b768069e0063..32881c745fafd26611696756d07f1abcc6e4038f 100644 (file)
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -308,6 +308,9 @@ public:
                                          unsigned OpName) const {
      return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
    }
+
+  uint64_t getDefaultRsrcDataFormat() const;
+
  };
  
  namespace AMDGPU {
diff --git a/test/CodeGen/R600/hsa.ll b/test/CodeGen/R600/hsa.ll

new file mode 100644 (file)

index 0000000..2e79866
--- /dev/null
+++ b/test/CodeGen/R600/hsa.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=r600--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
+
+; HSA: {{^}}simple:
+; Make sure we are setting the ATC bit:
+; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000
+; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0
+
+define void @simple(i32 addrspace(1)* %out) {
+entry:
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
author	Tom Stellard <thomas.stellard@amd.com>
	Tue, 2 Dec 2014 17:05:41 +0000 (17:05 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Tue, 2 Dec 2014 17:05:41 +0000 (17:05 +0000)
lib/Target/R600/AMDGPUISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/R600/AMDGPUSubtarget.cpp		patch \| blob \| history
lib/Target/R600/AMDGPUSubtarget.h		patch \| blob \| history
lib/Target/R600/SIISelLowering.cpp		patch \| blob \| history
lib/Target/R600/SIInstrInfo.cpp		patch \| blob \| history
lib/Target/R600/SIInstrInfo.h		patch \| blob \| history
test/CodeGen/R600/hsa.ll	[new file with mode: 0644]	patch \| blob