mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 20:08:06 +02:00
radeonsi: don't update compute scratch if the compute shader doesn't use it
We need to save the last COMPUTE_TMPRING_SIZE value in si_context because it's no longer computed when compute scratch isn't used. Fixes:3b0bfd254f- radeonsi/gfx11: make flat_scratch changes for compute Reviewed-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30071> (cherry picked from commitbc4382348d)
This commit is contained in:
parent
78639a95cc
commit
94e41cd24c
4 changed files with 26 additions and 23 deletions
|
|
@ -1054,7 +1054,7 @@
|
|||
"description": "radeonsi: don't update compute scratch if the compute shader doesn't use it",
|
||||
"nominated": true,
|
||||
"nomination_type": 1,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": "3b0bfd254f722e5773f70c6cb367e859876a4208",
|
||||
"notes": null
|
||||
|
|
|
|||
|
|
@ -381,17 +381,13 @@ static void si_set_global_binding(struct pipe_context *ctx, unsigned first, unsi
|
|||
}
|
||||
}
|
||||
|
||||
static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_shader *shader,
|
||||
const struct ac_shader_config *config)
|
||||
static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_shader *shader)
|
||||
{
|
||||
uint64_t scratch_bo_size, scratch_needed;
|
||||
scratch_bo_size = 0;
|
||||
scratch_needed = sctx->max_seen_compute_scratch_bytes_per_wave * sctx->screen->info.max_scratch_waves;
|
||||
if (sctx->compute_scratch_buffer)
|
||||
scratch_bo_size = sctx->compute_scratch_buffer->b.b.width0;
|
||||
|
||||
if (!scratch_needed)
|
||||
return true;
|
||||
uint64_t scratch_bo_size =
|
||||
sctx->compute_scratch_buffer ? sctx->compute_scratch_buffer->b.b.width0 : 0;
|
||||
uint64_t scratch_needed = sctx->max_seen_compute_scratch_bytes_per_wave *
|
||||
sctx->screen->info.max_scratch_waves;
|
||||
assert(scratch_needed);
|
||||
|
||||
if (scratch_bo_size < scratch_needed) {
|
||||
si_resource_reference(&sctx->compute_scratch_buffer, NULL);
|
||||
|
|
@ -408,8 +404,7 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_s
|
|||
}
|
||||
|
||||
/* Set the scratch address in the shader binary. */
|
||||
if (config->scratch_bytes_per_wave && sctx->gfx_level < GFX11 &&
|
||||
(sctx->family < CHIP_GFX940 || sctx->screen->info.has_graphics)) {
|
||||
if (sctx->gfx_level < GFX11 && (sctx->family < CHIP_GFX940 || sctx->screen->info.has_graphics)) {
|
||||
uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address;
|
||||
|
||||
if (shader->scratch_va != scratch_va) {
|
||||
|
|
@ -482,15 +477,16 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
|
|||
rsrc2 |= S_00B84C_LDS_SIZE(lds_blocks);
|
||||
}
|
||||
|
||||
unsigned tmpring_size;
|
||||
ac_get_scratch_tmpring_size(&sctx->screen->info,
|
||||
config->scratch_bytes_per_wave,
|
||||
&sctx->max_seen_compute_scratch_bytes_per_wave, &tmpring_size);
|
||||
|
||||
if (!si_setup_compute_scratch_buffer(sctx, shader, config))
|
||||
return false;
|
||||
|
||||
if (config->scratch_bytes_per_wave) {
|
||||
/* Update max_seen_compute_scratch_bytes_per_wave and compute_tmpring_size. */
|
||||
ac_get_scratch_tmpring_size(&sctx->screen->info,
|
||||
config->scratch_bytes_per_wave,
|
||||
&sctx->max_seen_compute_scratch_bytes_per_wave,
|
||||
&sctx->compute_tmpring_size);
|
||||
|
||||
if (!si_setup_compute_scratch_buffer(sctx, shader))
|
||||
return false;
|
||||
|
||||
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->compute_scratch_buffer,
|
||||
RADEON_USAGE_READWRITE | RADEON_PRIO_SCRATCH_BUFFER);
|
||||
}
|
||||
|
|
@ -515,7 +511,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
|
|||
SI_TRACKED_COMPUTE_PGM_RSRC3,
|
||||
S_00B8A0_INST_PREF_SIZE(si_get_shader_prefetch_size(shader)));
|
||||
gfx11_opt_push_compute_sh_reg(R_00B860_COMPUTE_TMPRING_SIZE,
|
||||
SI_TRACKED_COMPUTE_TMPRING_SIZE, tmpring_size);
|
||||
SI_TRACKED_COMPUTE_TMPRING_SIZE, sctx->compute_tmpring_size);
|
||||
if (config->scratch_bytes_per_wave) {
|
||||
gfx11_opt_push_compute_sh_reg(R_00B840_COMPUTE_DISPATCH_SCRATCH_BASE_LO,
|
||||
SI_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_LO,
|
||||
|
|
@ -531,7 +527,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
|
|||
SI_TRACKED_COMPUTE_PGM_RSRC1,
|
||||
config->rsrc1, rsrc2);
|
||||
radeon_opt_set_sh_reg(sctx, R_00B860_COMPUTE_TMPRING_SIZE,
|
||||
SI_TRACKED_COMPUTE_TMPRING_SIZE, tmpring_size);
|
||||
SI_TRACKED_COMPUTE_TMPRING_SIZE, sctx->compute_tmpring_size);
|
||||
|
||||
if (config->scratch_bytes_per_wave &&
|
||||
(sctx->gfx_level >= GFX11 ||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
#include "radeon_uvd.h"
|
||||
#include "si_public.h"
|
||||
#include "sid.h"
|
||||
#include "ac_shader_util.h"
|
||||
#include "ac_shadowed_regs.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "util/disk_cache.h"
|
||||
|
|
@ -879,6 +880,11 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign
|
|||
if (!sctx->cs_blit_shaders)
|
||||
goto fail;
|
||||
|
||||
/* Initialize compute_tmpring_size. */
|
||||
ac_get_scratch_tmpring_size(&sctx->screen->info, 0,
|
||||
&sctx->max_seen_compute_scratch_bytes_per_wave,
|
||||
&sctx->compute_tmpring_size);
|
||||
|
||||
return &sctx->b;
|
||||
fail:
|
||||
fprintf(stderr, "radeonsi: Failed to create a context.\n");
|
||||
|
|
|
|||
|
|
@ -1129,6 +1129,7 @@ struct si_context {
|
|||
struct si_vertex_elements *vertex_elements;
|
||||
unsigned num_vertex_elements;
|
||||
unsigned cs_max_waves_per_sh;
|
||||
uint32_t compute_tmpring_size;
|
||||
bool uses_nontrivial_vs_inputs;
|
||||
bool force_trivial_vs_inputs;
|
||||
bool do_update_shaders;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue