def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
[Feature64BitPtr, FeatureFP64]>;
+def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
+ [Feature64BitPtr, FeatureFP64]>;
//===----------------------------------------------------------------------===//
def AMDGPUInstrInfo : InstrInfo {
}
void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
+ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
unsigned MaxSGPR = 0;
unsigned MaxVGPR = 0;
bool VCCUsed = false;
OutStreamer.EmitIntValue(RsrcReg, 4);
OutStreamer.EmitIntValue(S_00B028_VGPRS(MaxVGPR / 4) | S_00B028_SGPRS(MaxSGPR / 8), 4);
+ unsigned LDSAlignShift;
+ if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
+ // LDS is allocated in 64 dword blocks
+ LDSAlignShift = 8;
+ } else {
+ // LDS is allocated in 128 dword blocks
+ LDSAlignShift = 9;
+ }
+ unsigned LDSBlocks =
+ RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
+
if (MFI->ShaderType == ShaderType::COMPUTE) {
OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
- OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(RoundUpToAlignment(MFI->LDSSize, 256) >> 8), 4);
+ OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
}
if (MFI->ShaderType == ShaderType::PIXEL) {
OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
- OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(RoundUpToAlignment(MFI->LDSSize, 256) >> 8), 4);
+ OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
}
]>;
def CC_AMDGPU : CallingConv<[
- CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().getGeneration() == "
+ CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().getGeneration() >= "
"AMDGPUSubtarget::SOUTHERN_ISLANDS && "
"State.getMachineFunction().getInfo<SIMachineFunctionInfo>()->"#
"ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>,
"State.getMachineFunction().getInfo<R600MachineFunctionInfo>()->"
"ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>,
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"#
- ".getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>
+ ".getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>
]>;
R700,
EVERGREEN,
NORTHERN_ISLANDS,
- SOUTHERN_ISLANDS
+ SOUTHERN_ISLANDS,
+ SEA_ISLANDS
};
private:
def : Proc<"verde", SI_Itin, [FeatureSouthernIslands]>;
def : Proc<"oland", SI_Itin, [FeatureSouthernIslands]>;
def : Proc<"hainan", SI_Itin, [FeatureSouthernIslands]>;
-def : Proc<"bonaire", SI_Itin, [FeatureSouthernIslands]>;
-def : Proc<"kabini", SI_Itin, [FeatureSouthernIslands]>;
-def : Proc<"kaveri", SI_Itin, [FeatureSouthernIslands]>;
+def : Proc<"bonaire", SI_Itin, [FeatureSeaIslands]>;
+def : Proc<"kabini", SI_Itin, [FeatureSeaIslands]>;
+def : Proc<"kaveri", SI_Itin, [FeatureSeaIslands]>;
}
def isSI : Predicate<"Subtarget.getGeneration() "
- "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
+ ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
def WAIT_FLAG : InstFlag<"printWaitFlag">;
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=CI-CHECK %s
-@local_memory.local_mem = internal addrspace(3) unnamed_addr global [16 x i32] zeroinitializer, align 4
+@local_memory.local_mem = internal addrspace(3) unnamed_addr global [128 x i32] zeroinitializer, align 4
-; EG-CHECK: @local_memory
-; SI-CHECK: @local_memory
+; EG-CHECK-LABEL: @local_memory
+; SI-CHECK-LABEL: @local_memory
+; CI-CHECK-LABEL: @local_memory
; Check that the LDS size emitted correctly
; EG-CHECK: .long 166120
-; EG-CHECK-NEXT: .long 16
+; EG-CHECK-NEXT: .long 128
; SI-CHECK: .long 47180
-; SI-CHECK-NEXT: .long 32768
+; SI-CHECK-NEXT: .long 65536
+; CI-CHECK: .long 47180
+; CI-CHECK-NEXT: .long 32768
; EG-CHECK: LDS_WRITE
; SI-CHECK_NOT: S_WQM_B64
define void @local_memory(i32 addrspace(1)* %out) {
entry:
%y.i = call i32 @llvm.r600.read.tidig.x() #0
- %arrayidx = getelementptr inbounds [16 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %y.i
+ %arrayidx = getelementptr inbounds [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %y.i
store i32 %y.i, i32 addrspace(3)* %arrayidx, align 4
%add = add nsw i32 %y.i, 1
%cmp = icmp eq i32 %add, 16
%.add = select i1 %cmp, i32 0, i32 %add
call void @llvm.AMDGPU.barrier.local()
- %arrayidx1 = getelementptr inbounds [16 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %.add
+ %arrayidx1 = getelementptr inbounds [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %.add
%0 = load i32 addrspace(3)* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32 addrspace(1)* %out, i32 %y.i
store i32 %0, i32 addrspace(1)* %arrayidx2, align 4