From 704b06ce61eb4623b97956d8112b3d9a38e1133f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 14 Oct 2014 17:21:19 +0000 Subject: [PATCH] R600/SI: Use DS offsets for constant addresses Use 0 as the base address for a constant address, so if we have a constant address we can save moves and form read2/write2s. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219698 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 12 ++++++++++++ test/CodeGen/R600/ds_read2.ll | 24 ++++++++++++++++++++++++ test/CodeGen/R600/ds_write2.ll | 21 +++++++++++++++++++++ 3 files changed, 57 insertions(+) diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 989fc938c08..a1bbfa65530 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -787,6 +787,18 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, } } + // If we have a constant address, prefer to put the constant into the + // offset. This can save moves to load the constant address since multiple + // operations can share the zero base address register, and enables merging + // into read2 / write2 instructions. + if (const ConstantSDNode *CAddr = dyn_cast(Addr)) { + if (isUInt<16>(CAddr->getZExtValue())) { + Base = CurDAG->getConstant(0, MVT::i32); + Offset = Addr; + return true; + } + } + // default case Base = Addr; Offset = CurDAG->getTargetConstant(0, MVT::i16); diff --git a/test/CodeGen/R600/ds_read2.ll b/test/CodeGen/R600/ds_read2.ll index 4c193987d2e..74d3a598b96 100644 --- a/test/CodeGen/R600/ds_read2.ll +++ b/test/CodeGen/R600/ds_read2.ll @@ -382,6 +382,30 @@ define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3) ret void } +@foo = addrspace(3) global [4 x i32] zeroinitializer, align 4 + +; SI-LABEL: @load_constant_adjacent_offsets +; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1 +define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) { + %val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 + %val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4 + %sum = add i32 %val0, %val1 + store i32 %sum, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @load_constant_disjoint_offsets +; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2 +define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) { + %val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 + %val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4 + %sum = add i32 %val0, %val1 + store i32 %sum, i32 addrspace(1)* %out, align 4 + ret void +} + @sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] zeroinitializer, align 4 @sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] zeroinitializer, align 4 diff --git a/test/CodeGen/R600/ds_write2.ll b/test/CodeGen/R600/ds_write2.ll index 5dc4b6d7325..6e5bcffb621 100644 --- a/test/CodeGen/R600/ds_write2.ll +++ b/test/CodeGen/R600/ds_write2.ll @@ -320,6 +320,27 @@ define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace ret void } +@foo = addrspace(3) global [4 x i32] zeroinitializer, align 4 + +; SI-LABEL: @store_constant_adjacent_offsets +; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1 +define void @store_constant_adjacent_offsets() { + store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 + store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4 + ret void +} + +; SI-LABEL: @store_constant_disjoint_offsets +; SI-DAG: V_MOV_B32_e32 [[VAL:v[0-9]+]], 0x7b{{$}} +; SI-DAG: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; SI: DS_WRITE2_B32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2 +define void @store_constant_disjoint_offsets() { + store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 + store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4 + ret void +} + @sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] zeroinitializer, align 4 @sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] zeroinitializer, align 4 -- 2.34.1