radeonsi: move CS sysval si_shader_info fields into si_shader_variant_info

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38802>
This commit is contained in:
Marek Olšák 2025-11-21 14:36:18 -05:00 committed by Marge Bot
parent 04e6e70e69
commit 3cc5517925
8 changed files with 92 additions and 62 deletions

View file

@ -81,13 +81,13 @@ static void si_create_compute_state_async(void *job, void *gdata, int thread_ind
shader->config.rsrc2 = S_00B84C_USER_SGPR(shader->info.cs_num_user_sgprs) |
S_00B84C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0) |
S_00B84C_TGID_X_EN(sel->info.uses_sysval_workgroup_id[0]) |
S_00B84C_TGID_Y_EN(sel->info.uses_sysval_workgroup_id[1]) |
S_00B84C_TGID_Z_EN(sel->info.uses_sysval_workgroup_id[2]) |
S_00B84C_TG_SIZE_EN(sel->info.uses_sgpr_tg_size) |
S_00B84C_TIDIG_COMP_CNT(sel->info.uses_sysval_local_invocation_id[2]
S_00B84C_TGID_X_EN(shader->info.uses_sysval_workgroup_id_x) |
S_00B84C_TGID_Y_EN(shader->info.uses_sysval_workgroup_id_y) |
S_00B84C_TGID_Z_EN(shader->info.uses_sysval_workgroup_id_z) |
S_00B84C_TG_SIZE_EN(shader->info.uses_sgpr_tg_size) |
S_00B84C_TIDIG_COMP_CNT(shader->info.uses_sysval_local_invocation_id_z
? 2
: sel->info.uses_sysval_local_invocation_id[1] ? 1 : 0) |
: shader->info.uses_sysval_local_invocation_id_y ? 1 : 0) |
S_00B84C_LDS_SIZE(ac_shader_encode_lds_size(shader->config.lds_size, sscreen->info.gfx_level, sel->stage));
/* COMPUTE_PGM_RSRC3 is only present on GFX10+ and GFX940+. */
@ -436,10 +436,10 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr
unsigned grid_size_reg = R_00B900_COMPUTE_USER_DATA_0 + 4 * SI_NUM_RESOURCE_SGPRS;
unsigned block_size_reg = grid_size_reg +
/* 12 bytes = 3 dwords. */
12 * sel->info.uses_sysval_num_workgroups;
unsigned cs_user_data_reg = block_size_reg + 4 * program->sel.info.uses_sysval_workgroup_size;
12 * program->shader.info.uses_sysval_num_workgroups;
unsigned cs_user_data_reg = block_size_reg + 4 * program->shader.info.uses_sysval_workgroup_size;
if (sel->info.uses_sysval_num_workgroups && info->indirect) {
if (program->shader.info.uses_sysval_num_workgroups && info->indirect) {
for (unsigned i = 0; i < 3; ++i) {
si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_REG, NULL, (grid_size_reg >> 2) + i,
COPY_DATA_SRC_MEM, si_resource(info->indirect),
@ -448,13 +448,13 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr
}
if (sctx->gfx_level >= GFX12) {
if (sel->info.uses_sysval_num_workgroups && !info->indirect) {
if (program->shader.info.uses_sysval_num_workgroups && !info->indirect) {
gfx12_push_compute_sh_reg(grid_size_reg, info->grid[0]);
gfx12_push_compute_sh_reg(grid_size_reg + 4, info->grid[1]);
gfx12_push_compute_sh_reg(grid_size_reg + 8, info->grid[2]);
}
if (sel->info.uses_sysval_workgroup_size) {
if (program->shader.info.uses_sysval_workgroup_size) {
uint32_t value = info->block[0] | (info->block[1] << 10) | (info->block[2] << 20);
gfx12_push_compute_sh_reg(block_size_reg, value);
}
@ -465,13 +465,13 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr
gfx12_push_compute_sh_reg(cs_user_data_reg + i * 4, sctx->cs_user_data[i]);
}
} else if (sctx->screen->info.has_set_sh_pairs_packed) {
if (sel->info.uses_sysval_num_workgroups && !info->indirect) {
if (program->shader.info.uses_sysval_num_workgroups && !info->indirect) {
gfx11_push_compute_sh_reg(grid_size_reg, info->grid[0]);
gfx11_push_compute_sh_reg(grid_size_reg + 4, info->grid[1]);
gfx11_push_compute_sh_reg(grid_size_reg + 8, info->grid[2]);
}
if (sel->info.uses_sysval_workgroup_size) {
if (program->shader.info.uses_sysval_workgroup_size) {
uint32_t value = info->block[0] | (info->block[1] << 10) | (info->block[2] << 20);
gfx11_push_compute_sh_reg(block_size_reg, value);
}
@ -484,14 +484,14 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr
} else {
radeon_begin(cs);
if (sel->info.uses_sysval_num_workgroups && !info->indirect) {
if (program->shader.info.uses_sysval_num_workgroups && !info->indirect) {
radeon_set_sh_reg_seq(grid_size_reg, 3);
radeon_emit(info->grid[0]);
radeon_emit(info->grid[1]);
radeon_emit(info->grid[2]);
}
if (sel->info.uses_sysval_workgroup_size) {
if (program->shader.info.uses_sysval_workgroup_size) {
uint32_t value = info->block[0] | (info->block[1] << 10) | (info->block[2] << 20);
radeon_set_sh_reg(block_size_reg, value);
}

View file

@ -141,7 +141,7 @@ static void si_emit_draw_mesh_tasks_ace_packets(struct si_context *sctx,
struct radeon_cmdbuf *cs = sctx->gfx_cs.gang_cs;
struct si_shader *shader = &sctx->ts_shader_state.program->shader;
bool uses_draw_id = shader->info.uses_sysval_draw_id;
bool uses_grid_size = shader->selector->info.uses_sysval_num_workgroups;
bool uses_grid_size = shader->info.uses_sysval_num_workgroups;
unsigned sh_base_reg = R_00B900_COMPUTE_USER_DATA_0;
unsigned reg = sh_base_reg + 4 * GFX10_SGPR_TS_TASK_RING_ENTRY;
@ -279,7 +279,7 @@ static void si_emit_draw_mesh_tasks_gfx_packets(struct si_context *sctx,
unsigned sh_base_reg = sctx->shader_pointers.sh_base[MESA_SHADER_MESH];
struct si_shader *shader = sctx->ms_shader_state.current;
struct si_shader_selector *sel = shader->selector;
bool uses_grid_size = sel->info.uses_sysval_num_workgroups;
bool uses_grid_size = shader->info.uses_sysval_num_workgroups;
int offset = GFX11_SGPR_MS_ATTRIBUTE_RING_ADDR;
if (sctx->gfx_level >= GFX11)
@ -354,7 +354,7 @@ static void si_emit_draw_mesh_shader_only_packets(struct si_context *sctx,
struct si_shader *shader = sctx->ms_shader_state.current;
struct si_shader_selector *sel = shader->selector;
bool uses_draw_id = shader->info.uses_sysval_draw_id;
bool uses_grid_size = sel->info.uses_sysval_num_workgroups;
bool uses_grid_size = shader->info.uses_sysval_num_workgroups;
unsigned sh_base_reg = sctx->shader_pointers.sh_base[MESA_SHADER_MESH];
int offset = GFX11_SGPR_MS_ATTRIBUTE_RING_ADDR;

View file

@ -316,7 +316,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
break;
}
case nir_intrinsic_load_workgroup_size: {
assert(b->shader->info.workgroup_size_variable && sel->info.uses_sysval_workgroup_size);
assert(b->shader->info.workgroup_size_variable && shader->info.uses_sysval_workgroup_size);
nir_def *block_size = ac_nir_load_arg(b, &args->ac, args->block_size);
nir_def *comp[] = {

View file

@ -522,9 +522,9 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args,
}
if (shader->info.uses_sysval_draw_id)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->ac.draw_id);
if (shader->selector->info.uses_sysval_num_workgroups)
if (shader->info.uses_sysval_num_workgroups)
ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_VALUE, &args->ac.num_work_groups);
if (shader->selector->info.uses_sysval_workgroup_size)
if (shader->info.uses_sysval_workgroup_size)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->block_size);
unsigned cs_user_data_dwords =
@ -557,18 +557,25 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args,
}
/* Hardware SGPRs. */
for (i = 0; i < 3; i++) {
if (shader->selector->info.uses_sysval_workgroup_id[i]) {
/* GFX12 loads workgroup IDs into ttmp registers, so they are not input SGPRs, but we
* still need to set this to indicate that they are enabled (for ac_nir_to_llvm).
*/
if (sel->screen->info.gfx_level >= GFX12)
args->ac.workgroup_ids[i].used = true;
else
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->ac.workgroup_ids[i]);
}
if (sel->screen->info.gfx_level >= GFX12) {
/* GFX12 loads workgroup IDs into ttmp registers, so they are not input SGPRs, but we
* still need to set this to indicate that they are enabled (for ac_nir_to_llvm).
*/
if (shader->info.uses_sysval_workgroup_id_x)
args->ac.workgroup_ids[0].used = true;
if (shader->info.uses_sysval_workgroup_id_y)
args->ac.workgroup_ids[1].used = true;
if (shader->info.uses_sysval_workgroup_id_z)
args->ac.workgroup_ids[2].used = true;
} else {
if (shader->info.uses_sysval_workgroup_id_x)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->ac.workgroup_ids[0]);
if (shader->info.uses_sysval_workgroup_id_y)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->ac.workgroup_ids[1]);
if (shader->info.uses_sysval_workgroup_id_z)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->ac.workgroup_ids[2]);
}
if (shader->selector->info.uses_sgpr_tg_size)
if (shader->info.uses_sgpr_tg_size)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->ac.tg_size);
/* GFX11 set FLAT_SCRATCH directly instead of using this arg. */
@ -617,7 +624,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args,
if (shader->info.uses_sysval_draw_id)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->ac.draw_id);
if (sel->info.uses_sysval_num_workgroups)
if (shader->info.uses_sysval_num_workgroups)
ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_VALUE, &args->ac.num_work_groups);
else if (sel->screen->info.gfx_level < GFX11)
/* GFX10 always write grid size to SGPR, reserve space for it */

View file

@ -293,19 +293,6 @@ static void gather_instruction(const struct nir_shader *nir, struct si_shader_in
}
switch (intr->intrinsic) {
case nir_intrinsic_load_local_invocation_id:
case nir_intrinsic_load_workgroup_id: {
unsigned mask = nir_def_components_read(&intr->def);
while (mask) {
unsigned i = u_bit_scan(&mask);
if (intr->intrinsic == nir_intrinsic_load_workgroup_id)
info->uses_sysval_workgroup_id[i] = true;
else
info->uses_sysval_local_invocation_id[i] = true;
}
break;
}
case nir_intrinsic_load_color0:
case nir_intrinsic_load_color1: {
unsigned index = intr->intrinsic == nir_intrinsic_load_color1;
@ -558,14 +545,6 @@ void si_nir_gather_info(struct si_screen *sscreen, struct nir_shader *nir,
info->uses_sysval_front_face = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE) |
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE_FSIGN);
info->uses_sysval_invocation_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID);
info->uses_sysval_num_workgroups = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS);
info->uses_sgpr_tg_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS);
if (sscreen->info.gfx_level < GFX12) {
info->uses_sgpr_tg_size |= BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) ||
si_should_clear_lds(sscreen, nir);
}
info->uses_sysval_workgroup_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_SIZE);
info->uses_sysval_primitive_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||
nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID;
info->uses_sysval_sample_mask_in = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);

View file

@ -151,11 +151,6 @@ struct si_shader_info {
bool uses_sysval_primitive_id;
bool uses_sysval_front_face;
bool uses_sysval_invocation_id;
bool uses_sysval_local_invocation_id[3];
bool uses_sysval_workgroup_id[3];
bool uses_sysval_workgroup_size;
bool uses_sysval_num_workgroups;
bool uses_sgpr_tg_size;
bool uses_atomic_ordered_add;
bool writes_psize;
bool writes_primid;
@ -222,6 +217,15 @@ struct si_shader_variant_info {
bool writes_sample_mask : 1;
bool uses_discard : 1;
bool uses_mesh_scratch_ring : 1;
bool uses_sysval_local_invocation_id_x : 1;
bool uses_sysval_local_invocation_id_y : 1;
bool uses_sysval_local_invocation_id_z : 1;
bool uses_sysval_workgroup_id_x : 1;
bool uses_sysval_workgroup_id_y : 1;
bool uses_sysval_workgroup_id_z : 1;
bool uses_sysval_workgroup_size : 1;
bool uses_sysval_num_workgroups : 1;
bool uses_sgpr_tg_size : 1;
uint8_t nr_pos_exports;
uint8_t nr_param_exports;
uint8_t nr_prim_param_exports;

View file

@ -40,11 +40,46 @@ void si_get_shader_variant_info(struct si_shader *shader,
nir_foreach_block(block, nir_shader_get_entrypoint(nir)) {
nir_foreach_instr(instr, block) {
unsigned mask;
switch (instr->type) {
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_local_invocation_id:
mask = nir_def_components_read(&intr->def);
if (mask & BITFIELD_BIT(0))
shader->info.uses_sysval_local_invocation_id_x = true;
if (mask & BITFIELD_BIT(1))
shader->info.uses_sysval_local_invocation_id_y = true;
if (mask & BITFIELD_BIT(2))
shader->info.uses_sysval_local_invocation_id_z = true;
break;
case nir_intrinsic_load_workgroup_id:
mask = nir_def_components_read(&intr->def);
if (mask & BITFIELD_BIT(0))
shader->info.uses_sysval_workgroup_id_x = true;
if (mask & BITFIELD_BIT(1))
shader->info.uses_sysval_workgroup_id_y = true;
if (mask & BITFIELD_BIT(2))
shader->info.uses_sysval_workgroup_id_z = true;
break;
case nir_intrinsic_load_workgroup_size:
shader->info.uses_sysval_workgroup_size = true;
break;
case nir_intrinsic_load_num_workgroups:
shader->info.uses_sysval_num_workgroups = true;
break;
case nir_intrinsic_load_num_subgroups:
shader->info.uses_sgpr_tg_size = true;
break;
case nir_intrinsic_load_local_invocation_index:
case nir_intrinsic_load_subgroup_id:
/* GFX12 computes these using subgroup_id from ttmp8. */
if (shader->selector->screen->info.gfx_level < GFX12)
shader->info.uses_sgpr_tg_size = true;
break;
case nir_intrinsic_load_instance_id:
shader->info.uses_sysval_instance_id = true;
break;
@ -308,13 +343,18 @@ void si_get_shader_variant_info(struct si_shader *shader,
if (nir->info.stage == MESA_SHADER_COMPUTE ||
nir->info.stage == MESA_SHADER_KERNEL ||
nir->info.stage == MESA_SHADER_TASK) {
/* nir_clear_shared_memory uses local_invocation_index. */
if (shader->selector->screen->info.gfx_level < GFX12 &&
si_should_clear_lds(shader->selector->screen, nir))
shader->info.uses_sgpr_tg_size = true;
/* Determine user SGPRs for compute shader. This includes descriptors in user SGPRs.
*
* Variable block sizes need 10 bits (1 + log2(SI_MAX_VARIABLE_THREADS_PER_BLOCK)) per dim.
* We pack them into a single user SGPR.
*/
unsigned num_user_sgprs = SI_NUM_RESOURCE_SGPRS + (shader->selector->info.uses_sysval_num_workgroups ? 3 : 0) +
(shader->selector->info.uses_sysval_workgroup_size ? 1 : 0) +
unsigned num_user_sgprs = SI_NUM_RESOURCE_SGPRS + (shader->info.uses_sysval_num_workgroups ? 3 : 0) +
(shader->info.uses_sysval_workgroup_size ? 1 : 0) +
shader->selector->nir->info.cs.user_data_components_amd;
if (nir->info.stage == MESA_SHADER_TASK) {

View file

@ -1485,7 +1485,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
num_user_sgprs++;
if (shader->info.uses_sysval_draw_id)
num_user_sgprs++;
if (gs_sel->info.uses_sysval_num_workgroups || sscreen->info.gfx_level < GFX11)
if (shader->info.uses_sysval_num_workgroups || sscreen->info.gfx_level < GFX11)
num_user_sgprs += 3;
if (shader->info.uses_mesh_scratch_ring)
num_user_sgprs++;