mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-29 21:00:16 +01:00
radeonsi: enable scratch coalescing
This makes one particular compute shader 8x faster. Latest LLVM git is required. Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
0c0f841e5d
commit
6e1b12c788
1 changed files with 10 additions and 2 deletions
|
|
@ -5903,8 +5903,16 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx,
|
|||
unsigned i;
|
||||
uint32_t scratch_rsrc_dword0 = scratch_va;
|
||||
uint32_t scratch_rsrc_dword1 =
|
||||
S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
|
||||
| S_008F04_STRIDE(config->scratch_bytes_per_wave / 64);
|
||||
S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
|
||||
|
||||
/* Enable scratch coalescing if LLVM sets ELEMENT_SIZE & INDEX_STRIDE
|
||||
* correctly.
|
||||
*/
|
||||
if (HAVE_LLVM >= 0x0309)
|
||||
scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1);
|
||||
else
|
||||
scratch_rsrc_dword1 |=
|
||||
S_008F04_STRIDE(config->scratch_bytes_per_wave / 64);
|
||||
|
||||
for (i = 0 ; i < shader->binary.reloc_count; i++) {
|
||||
const struct radeon_shader_reloc *reloc =
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue