mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
radeonsi: pack the variable block size in one SGPR, 10 bits per component
The side effect of this is that the compute copy image shader now has
enough free user SGPRs that it passes the src image via user SGPRs,
resulting in lower wave lifetime.
Previous copy shader:
s_load_dwordx8
image_load
s_load_dwordx8
s_waitcnt
image_store
Current copy shader:
image_load
s_load_dwordx8
s_waitcnt
image_store
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9795>
This commit is contained in:
parent
034c1e4845
commit
95940459be
3 changed files with 17 additions and 9 deletions
|
|
@ -132,8 +132,11 @@ static void si_create_compute_state_async(void *job, int thread_index)
|
|||
|
||||
program->shader.is_monolithic = true;
|
||||
|
||||
/* Variable block sizes need 10 bits (1 + log2(SI_MAX_VARIABLE_THREADS_PER_BLOCK)) per dim.
|
||||
* We pack them into a single user SGPR.
|
||||
*/
|
||||
unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS + (sel->info.uses_grid_size ? 3 : 0) +
|
||||
(sel->info.uses_variable_block_size ? 3 : 0) +
|
||||
(sel->info.uses_variable_block_size ? 1 : 0) +
|
||||
sel->info.base.cs.user_data_components_amd;
|
||||
|
||||
/* Fast path for compute shaders - some descriptors passed via user SGPRs. */
|
||||
|
|
@ -707,7 +710,7 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr
|
|||
unsigned block_size_reg = grid_size_reg +
|
||||
/* 12 bytes = 3 dwords. */
|
||||
12 * sel->info.uses_grid_size;
|
||||
unsigned cs_user_data_reg = block_size_reg + 12 * program->sel.info.uses_variable_block_size;
|
||||
unsigned cs_user_data_reg = block_size_reg + 4 * program->sel.info.uses_variable_block_size;
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -730,10 +733,8 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr
|
|||
}
|
||||
|
||||
if (sel->info.uses_variable_block_size) {
|
||||
radeon_set_sh_reg_seq(cs, block_size_reg, 3);
|
||||
radeon_emit(cs, info->block[0]);
|
||||
radeon_emit(cs, info->block[1]);
|
||||
radeon_emit(cs, info->block[2]);
|
||||
radeon_set_sh_reg(cs, block_size_reg,
|
||||
info->block[0] | (info->block[1] << 10) | (info->block[2] << 20));
|
||||
}
|
||||
|
||||
if (sel->info.base.cs.user_data_components_amd) {
|
||||
|
|
|
|||
|
|
@ -702,7 +702,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
|||
if (shader->selector->info.uses_grid_size)
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 3, AC_ARG_INT, &ctx->args.num_work_groups);
|
||||
if (shader->selector->info.uses_variable_block_size)
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 3, AC_ARG_INT, &ctx->block_size);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->block_size);
|
||||
|
||||
unsigned cs_user_data_dwords =
|
||||
shader->selector->info.base.cs.user_data_components_amd;
|
||||
|
|
|
|||
|
|
@ -412,8 +412,15 @@ static LLVMValueRef si_llvm_get_block_size(struct ac_shader_abi *abi)
|
|||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
|
||||
assert(ctx->shader->selector->info.base.cs.local_size_variable);
|
||||
return ac_get_arg(&ctx->ac, ctx->block_size);
|
||||
assert(ctx->shader->selector->info.base.cs.local_size_variable &&
|
||||
ctx->shader->selector->info.uses_variable_block_size);
|
||||
|
||||
LLVMValueRef chan[3] = {
|
||||
si_unpack_param(ctx, ctx->block_size, 0, 10),
|
||||
si_unpack_param(ctx, ctx->block_size, 10, 10),
|
||||
si_unpack_param(ctx, ctx->block_size, 20, 10),
|
||||
};
|
||||
return ac_build_gather_values(&ctx->ac, chan, 3);
|
||||
}
|
||||
|
||||
static void si_llvm_declare_compute_memory(struct si_shader_context *ctx)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue