radeonsi: don't update compute scratch if the compute shader doesn't use it

We need to save the last COMPUTE_TMPRING_SIZE value in si_context because
it's no longer computed when compute scratch isn't used.

Fixes: 3b0bfd254f - radeonsi/gfx11: make flat_scratch changes for compute

Reviewed-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30071>
(cherry picked from commit bc4382348d)
This commit is contained in:
Marek Olšák 2024-07-12 17:11:32 -04:00 committed by Eric Engestrom
parent 78639a95cc
commit 94e41cd24c
4 changed files with 26 additions and 23 deletions

View file

@ -1054,7 +1054,7 @@
"description": "radeonsi: don't update compute scratch if the compute shader doesn't use it",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "3b0bfd254f722e5773f70c6cb367e859876a4208",
"notes": null

View file

@ -381,17 +381,13 @@ static void si_set_global_binding(struct pipe_context *ctx, unsigned first, unsi
}
}
static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_shader *shader,
const struct ac_shader_config *config)
static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_shader *shader)
{
uint64_t scratch_bo_size, scratch_needed;
scratch_bo_size = 0;
scratch_needed = sctx->max_seen_compute_scratch_bytes_per_wave * sctx->screen->info.max_scratch_waves;
if (sctx->compute_scratch_buffer)
scratch_bo_size = sctx->compute_scratch_buffer->b.b.width0;
if (!scratch_needed)
return true;
uint64_t scratch_bo_size =
sctx->compute_scratch_buffer ? sctx->compute_scratch_buffer->b.b.width0 : 0;
uint64_t scratch_needed = sctx->max_seen_compute_scratch_bytes_per_wave *
sctx->screen->info.max_scratch_waves;
assert(scratch_needed);
if (scratch_bo_size < scratch_needed) {
si_resource_reference(&sctx->compute_scratch_buffer, NULL);
@ -408,8 +404,7 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_s
}
/* Set the scratch address in the shader binary. */
if (config->scratch_bytes_per_wave && sctx->gfx_level < GFX11 &&
(sctx->family < CHIP_GFX940 || sctx->screen->info.has_graphics)) {
if (sctx->gfx_level < GFX11 && (sctx->family < CHIP_GFX940 || sctx->screen->info.has_graphics)) {
uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address;
if (shader->scratch_va != scratch_va) {
@ -482,15 +477,16 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
rsrc2 |= S_00B84C_LDS_SIZE(lds_blocks);
}
unsigned tmpring_size;
ac_get_scratch_tmpring_size(&sctx->screen->info,
config->scratch_bytes_per_wave,
&sctx->max_seen_compute_scratch_bytes_per_wave, &tmpring_size);
if (!si_setup_compute_scratch_buffer(sctx, shader, config))
return false;
if (config->scratch_bytes_per_wave) {
/* Update max_seen_compute_scratch_bytes_per_wave and compute_tmpring_size. */
ac_get_scratch_tmpring_size(&sctx->screen->info,
config->scratch_bytes_per_wave,
&sctx->max_seen_compute_scratch_bytes_per_wave,
&sctx->compute_tmpring_size);
if (!si_setup_compute_scratch_buffer(sctx, shader))
return false;
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->compute_scratch_buffer,
RADEON_USAGE_READWRITE | RADEON_PRIO_SCRATCH_BUFFER);
}
@ -515,7 +511,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
SI_TRACKED_COMPUTE_PGM_RSRC3,
S_00B8A0_INST_PREF_SIZE(si_get_shader_prefetch_size(shader)));
gfx11_opt_push_compute_sh_reg(R_00B860_COMPUTE_TMPRING_SIZE,
SI_TRACKED_COMPUTE_TMPRING_SIZE, tmpring_size);
SI_TRACKED_COMPUTE_TMPRING_SIZE, sctx->compute_tmpring_size);
if (config->scratch_bytes_per_wave) {
gfx11_opt_push_compute_sh_reg(R_00B840_COMPUTE_DISPATCH_SCRATCH_BASE_LO,
SI_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_LO,
@ -531,7 +527,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
SI_TRACKED_COMPUTE_PGM_RSRC1,
config->rsrc1, rsrc2);
radeon_opt_set_sh_reg(sctx, R_00B860_COMPUTE_TMPRING_SIZE,
SI_TRACKED_COMPUTE_TMPRING_SIZE, tmpring_size);
SI_TRACKED_COMPUTE_TMPRING_SIZE, sctx->compute_tmpring_size);
if (config->scratch_bytes_per_wave &&
(sctx->gfx_level >= GFX11 ||

View file

@ -11,6 +11,7 @@
#include "radeon_uvd.h"
#include "si_public.h"
#include "sid.h"
#include "ac_shader_util.h"
#include "ac_shadowed_regs.h"
#include "compiler/nir/nir.h"
#include "util/disk_cache.h"
@ -879,6 +880,11 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign
if (!sctx->cs_blit_shaders)
goto fail;
/* Initialize compute_tmpring_size. */
ac_get_scratch_tmpring_size(&sctx->screen->info, 0,
&sctx->max_seen_compute_scratch_bytes_per_wave,
&sctx->compute_tmpring_size);
return &sctx->b;
fail:
fprintf(stderr, "radeonsi: Failed to create a context.\n");

View file

@ -1129,6 +1129,7 @@ struct si_context {
struct si_vertex_elements *vertex_elements;
unsigned num_vertex_elements;
unsigned cs_max_waves_per_sh;
uint32_t compute_tmpring_size;
bool uses_nontrivial_vs_inputs;
bool force_trivial_vs_inputs;
bool do_update_shaders;