diff --git a/.pick_status.json b/.pick_status.json index c344072d8de..8bf3ad97977 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1054,7 +1054,7 @@ "description": "radeonsi: don't update compute scratch if the compute shader doesn't use it", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "3b0bfd254f722e5773f70c6cb367e859876a4208", "notes": null diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index b8562eaca9c..153d2889762 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -381,17 +381,13 @@ static void si_set_global_binding(struct pipe_context *ctx, unsigned first, unsi } } -static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_shader *shader, - const struct ac_shader_config *config) +static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_shader *shader) { - uint64_t scratch_bo_size, scratch_needed; - scratch_bo_size = 0; - scratch_needed = sctx->max_seen_compute_scratch_bytes_per_wave * sctx->screen->info.max_scratch_waves; - if (sctx->compute_scratch_buffer) - scratch_bo_size = sctx->compute_scratch_buffer->b.b.width0; - - if (!scratch_needed) - return true; + uint64_t scratch_bo_size = + sctx->compute_scratch_buffer ? sctx->compute_scratch_buffer->b.b.width0 : 0; + uint64_t scratch_needed = sctx->max_seen_compute_scratch_bytes_per_wave * + sctx->screen->info.max_scratch_waves; + assert(scratch_needed); if (scratch_bo_size < scratch_needed) { si_resource_reference(&sctx->compute_scratch_buffer, NULL); @@ -408,8 +404,7 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_s } /* Set the scratch address in the shader binary. */ - if (config->scratch_bytes_per_wave && sctx->gfx_level < GFX11 && - (sctx->family < CHIP_GFX940 || sctx->screen->info.has_graphics)) { + if (sctx->gfx_level < GFX11 && (sctx->family < CHIP_GFX940 || sctx->screen->info.has_graphics)) { uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address; if (shader->scratch_va != scratch_va) { @@ -482,15 +477,16 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute rsrc2 |= S_00B84C_LDS_SIZE(lds_blocks); } - unsigned tmpring_size; - ac_get_scratch_tmpring_size(&sctx->screen->info, - config->scratch_bytes_per_wave, - &sctx->max_seen_compute_scratch_bytes_per_wave, &tmpring_size); - - if (!si_setup_compute_scratch_buffer(sctx, shader, config)) - return false; - if (config->scratch_bytes_per_wave) { + /* Update max_seen_compute_scratch_bytes_per_wave and compute_tmpring_size. */ + ac_get_scratch_tmpring_size(&sctx->screen->info, + config->scratch_bytes_per_wave, + &sctx->max_seen_compute_scratch_bytes_per_wave, + &sctx->compute_tmpring_size); + + if (!si_setup_compute_scratch_buffer(sctx, shader)) + return false; + radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->compute_scratch_buffer, RADEON_USAGE_READWRITE | RADEON_PRIO_SCRATCH_BUFFER); } @@ -515,7 +511,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute SI_TRACKED_COMPUTE_PGM_RSRC3, S_00B8A0_INST_PREF_SIZE(si_get_shader_prefetch_size(shader))); gfx11_opt_push_compute_sh_reg(R_00B860_COMPUTE_TMPRING_SIZE, - SI_TRACKED_COMPUTE_TMPRING_SIZE, tmpring_size); + SI_TRACKED_COMPUTE_TMPRING_SIZE, sctx->compute_tmpring_size); if (config->scratch_bytes_per_wave) { gfx11_opt_push_compute_sh_reg(R_00B840_COMPUTE_DISPATCH_SCRATCH_BASE_LO, SI_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_LO, @@ -531,7 +527,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute SI_TRACKED_COMPUTE_PGM_RSRC1, config->rsrc1, rsrc2); radeon_opt_set_sh_reg(sctx, R_00B860_COMPUTE_TMPRING_SIZE, - SI_TRACKED_COMPUTE_TMPRING_SIZE, tmpring_size); + SI_TRACKED_COMPUTE_TMPRING_SIZE, sctx->compute_tmpring_size); if (config->scratch_bytes_per_wave && (sctx->gfx_level >= GFX11 || diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index bb4dd703407..85f9d3943fa 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -11,6 +11,7 @@ #include "radeon_uvd.h" #include "si_public.h" #include "sid.h" +#include "ac_shader_util.h" #include "ac_shadowed_regs.h" #include "compiler/nir/nir.h" #include "util/disk_cache.h" @@ -879,6 +880,11 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign if (!sctx->cs_blit_shaders) goto fail; + /* Initialize compute_tmpring_size. */ + ac_get_scratch_tmpring_size(&sctx->screen->info, 0, + &sctx->max_seen_compute_scratch_bytes_per_wave, + &sctx->compute_tmpring_size); + return &sctx->b; fail: fprintf(stderr, "radeonsi: Failed to create a context.\n"); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index d6cfffac5e6..f4efe0c9446 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1129,6 +1129,7 @@ struct si_context { struct si_vertex_elements *vertex_elements; unsigned num_vertex_elements; unsigned cs_max_waves_per_sh; + uint32_t compute_tmpring_size; bool uses_nontrivial_vs_inputs; bool force_trivial_vs_inputs; bool do_update_shaders;