projects
/
oota-llvm.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
AMDGPU/SI: Select constant loads with non-uniform addresses to MUBUF instructions
[oota-llvm.git]
/
lib
/
Target
/
AMDGPU
/
SIInstructions.td
diff --git
a/lib/Target/AMDGPU/SIInstructions.td
b/lib/Target/AMDGPU/SIInstructions.td
index 8163419cd483c11d229aeb1ef8b5840129402d87..6f653c70aca06cb489bc456c21f0491bbb570ea6 100644
(file)
--- a/
lib/Target/AMDGPU/SIInstructions.td
+++ b/
lib/Target/AMDGPU/SIInstructions.td
@@
-953,13
+953,13
@@
defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <
mubuf<0x0b, 0x13>, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global
>;
defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <
mubuf<0x0b, 0x13>, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global
>;
defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <
- mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32,
global
_load
+ mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32,
mubuf
_load
>;
defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <
>;
defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <
- mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32,
global
_load
+ mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32,
mubuf
_load
>;
defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <
>;
defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <
- mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32,
global
_load
+ mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32,
mubuf
_load
>;
defm BUFFER_STORE_BYTE : MUBUF_Store_Helper <
>;
defm BUFFER_STORE_BYTE : MUBUF_Store_Helper <
@@
-2087,24
+2087,29
@@
multiclass SMRD_Pattern <string Instr, ValueType vt> {
// 1. IMM offset
def : Pat <
// 1. IMM offset
def : Pat <
- (
constant
_load (SMRDImm i64:$sbase, i32:$offset)),
+ (
smrd
_load (SMRDImm i64:$sbase, i32:$offset)),
(vt (!cast<SMRD>(Instr#"_IMM") $sbase, $offset))
>;
// 2. SGPR offset
def : Pat <
(vt (!cast<SMRD>(Instr#"_IMM") $sbase, $offset))
>;
// 2. SGPR offset
def : Pat <
- (
constant
_load (SMRDSgpr i64:$sbase, i32:$offset)),
+ (
smrd
_load (SMRDSgpr i64:$sbase, i32:$offset)),
(vt (!cast<SMRD>(Instr#"_SGPR") $sbase, $offset))
>;
def : Pat <
(vt (!cast<SMRD>(Instr#"_SGPR") $sbase, $offset))
>;
def : Pat <
- (
constant
_load (SMRDImm32 i64:$sbase, i32:$offset)),
+ (
smrd
_load (SMRDImm32 i64:$sbase, i32:$offset)),
(vt (!cast<SMRD>(Instr#"_IMM_ci") $sbase, $offset))
> {
let Predicates = [isCIOnly];
}
}
(vt (!cast<SMRD>(Instr#"_IMM_ci") $sbase, $offset))
> {
let Predicates = [isCIOnly];
}
}
+// Global and constant loads can be selected to either MUBUF or SMRD
+// instructions, but SMRD instructions are faster so we want the instruction
+// selector to prefer those.
+let AddedComplexity = 100 in {
+
defm : SMRD_Pattern <"S_LOAD_DWORD", i32>;
defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>;
defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>;
defm : SMRD_Pattern <"S_LOAD_DWORD", i32>;
defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>;
defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>;
@@
-2133,6
+2138,8
@@
def : Pat <
} // End Predicates = [isCI]
} // End Predicates = [isCI]
+} // End let AddedComplexity = 10000
+
//===----------------------------------------------------------------------===//
// SOP1 Patterns
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// SOP1 Patterns
//===----------------------------------------------------------------------===//