mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 07:28:11 +02:00
radeonsi: move CS sysval si_shader_info fields into si_shader_variant_info
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38802>
This commit is contained in:
parent
04e6e70e69
commit
3cc5517925
8 changed files with 92 additions and 62 deletions
|
|
@ -81,13 +81,13 @@ static void si_create_compute_state_async(void *job, void *gdata, int thread_ind
|
|||
|
||||
shader->config.rsrc2 = S_00B84C_USER_SGPR(shader->info.cs_num_user_sgprs) |
|
||||
S_00B84C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0) |
|
||||
S_00B84C_TGID_X_EN(sel->info.uses_sysval_workgroup_id[0]) |
|
||||
S_00B84C_TGID_Y_EN(sel->info.uses_sysval_workgroup_id[1]) |
|
||||
S_00B84C_TGID_Z_EN(sel->info.uses_sysval_workgroup_id[2]) |
|
||||
S_00B84C_TG_SIZE_EN(sel->info.uses_sgpr_tg_size) |
|
||||
S_00B84C_TIDIG_COMP_CNT(sel->info.uses_sysval_local_invocation_id[2]
|
||||
S_00B84C_TGID_X_EN(shader->info.uses_sysval_workgroup_id_x) |
|
||||
S_00B84C_TGID_Y_EN(shader->info.uses_sysval_workgroup_id_y) |
|
||||
S_00B84C_TGID_Z_EN(shader->info.uses_sysval_workgroup_id_z) |
|
||||
S_00B84C_TG_SIZE_EN(shader->info.uses_sgpr_tg_size) |
|
||||
S_00B84C_TIDIG_COMP_CNT(shader->info.uses_sysval_local_invocation_id_z
|
||||
? 2
|
||||
: sel->info.uses_sysval_local_invocation_id[1] ? 1 : 0) |
|
||||
: shader->info.uses_sysval_local_invocation_id_y ? 1 : 0) |
|
||||
S_00B84C_LDS_SIZE(ac_shader_encode_lds_size(shader->config.lds_size, sscreen->info.gfx_level, sel->stage));
|
||||
|
||||
/* COMPUTE_PGM_RSRC3 is only present on GFX10+ and GFX940+. */
|
||||
|
|
@ -436,10 +436,10 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr
|
|||
unsigned grid_size_reg = R_00B900_COMPUTE_USER_DATA_0 + 4 * SI_NUM_RESOURCE_SGPRS;
|
||||
unsigned block_size_reg = grid_size_reg +
|
||||
/* 12 bytes = 3 dwords. */
|
||||
12 * sel->info.uses_sysval_num_workgroups;
|
||||
unsigned cs_user_data_reg = block_size_reg + 4 * program->sel.info.uses_sysval_workgroup_size;
|
||||
12 * program->shader.info.uses_sysval_num_workgroups;
|
||||
unsigned cs_user_data_reg = block_size_reg + 4 * program->shader.info.uses_sysval_workgroup_size;
|
||||
|
||||
if (sel->info.uses_sysval_num_workgroups && info->indirect) {
|
||||
if (program->shader.info.uses_sysval_num_workgroups && info->indirect) {
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
si_cp_copy_data(sctx, &sctx->gfx_cs, COPY_DATA_REG, NULL, (grid_size_reg >> 2) + i,
|
||||
COPY_DATA_SRC_MEM, si_resource(info->indirect),
|
||||
|
|
@ -448,13 +448,13 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr
|
|||
}
|
||||
|
||||
if (sctx->gfx_level >= GFX12) {
|
||||
if (sel->info.uses_sysval_num_workgroups && !info->indirect) {
|
||||
if (program->shader.info.uses_sysval_num_workgroups && !info->indirect) {
|
||||
gfx12_push_compute_sh_reg(grid_size_reg, info->grid[0]);
|
||||
gfx12_push_compute_sh_reg(grid_size_reg + 4, info->grid[1]);
|
||||
gfx12_push_compute_sh_reg(grid_size_reg + 8, info->grid[2]);
|
||||
}
|
||||
|
||||
if (sel->info.uses_sysval_workgroup_size) {
|
||||
if (program->shader.info.uses_sysval_workgroup_size) {
|
||||
uint32_t value = info->block[0] | (info->block[1] << 10) | (info->block[2] << 20);
|
||||
gfx12_push_compute_sh_reg(block_size_reg, value);
|
||||
}
|
||||
|
|
@ -465,13 +465,13 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr
|
|||
gfx12_push_compute_sh_reg(cs_user_data_reg + i * 4, sctx->cs_user_data[i]);
|
||||
}
|
||||
} else if (sctx->screen->info.has_set_sh_pairs_packed) {
|
||||
if (sel->info.uses_sysval_num_workgroups && !info->indirect) {
|
||||
if (program->shader.info.uses_sysval_num_workgroups && !info->indirect) {
|
||||
gfx11_push_compute_sh_reg(grid_size_reg, info->grid[0]);
|
||||
gfx11_push_compute_sh_reg(grid_size_reg + 4, info->grid[1]);
|
||||
gfx11_push_compute_sh_reg(grid_size_reg + 8, info->grid[2]);
|
||||
}
|
||||
|
||||
if (sel->info.uses_sysval_workgroup_size) {
|
||||
if (program->shader.info.uses_sysval_workgroup_size) {
|
||||
uint32_t value = info->block[0] | (info->block[1] << 10) | (info->block[2] << 20);
|
||||
gfx11_push_compute_sh_reg(block_size_reg, value);
|
||||
}
|
||||
|
|
@ -484,14 +484,14 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr
|
|||
} else {
|
||||
radeon_begin(cs);
|
||||
|
||||
if (sel->info.uses_sysval_num_workgroups && !info->indirect) {
|
||||
if (program->shader.info.uses_sysval_num_workgroups && !info->indirect) {
|
||||
radeon_set_sh_reg_seq(grid_size_reg, 3);
|
||||
radeon_emit(info->grid[0]);
|
||||
radeon_emit(info->grid[1]);
|
||||
radeon_emit(info->grid[2]);
|
||||
}
|
||||
|
||||
if (sel->info.uses_sysval_workgroup_size) {
|
||||
if (program->shader.info.uses_sysval_workgroup_size) {
|
||||
uint32_t value = info->block[0] | (info->block[1] << 10) | (info->block[2] << 20);
|
||||
radeon_set_sh_reg(block_size_reg, value);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -141,7 +141,7 @@ static void si_emit_draw_mesh_tasks_ace_packets(struct si_context *sctx,
|
|||
struct radeon_cmdbuf *cs = sctx->gfx_cs.gang_cs;
|
||||
struct si_shader *shader = &sctx->ts_shader_state.program->shader;
|
||||
bool uses_draw_id = shader->info.uses_sysval_draw_id;
|
||||
bool uses_grid_size = shader->selector->info.uses_sysval_num_workgroups;
|
||||
bool uses_grid_size = shader->info.uses_sysval_num_workgroups;
|
||||
unsigned sh_base_reg = R_00B900_COMPUTE_USER_DATA_0;
|
||||
|
||||
unsigned reg = sh_base_reg + 4 * GFX10_SGPR_TS_TASK_RING_ENTRY;
|
||||
|
|
@ -279,7 +279,7 @@ static void si_emit_draw_mesh_tasks_gfx_packets(struct si_context *sctx,
|
|||
unsigned sh_base_reg = sctx->shader_pointers.sh_base[MESA_SHADER_MESH];
|
||||
struct si_shader *shader = sctx->ms_shader_state.current;
|
||||
struct si_shader_selector *sel = shader->selector;
|
||||
bool uses_grid_size = sel->info.uses_sysval_num_workgroups;
|
||||
bool uses_grid_size = shader->info.uses_sysval_num_workgroups;
|
||||
|
||||
int offset = GFX11_SGPR_MS_ATTRIBUTE_RING_ADDR;
|
||||
if (sctx->gfx_level >= GFX11)
|
||||
|
|
@ -354,7 +354,7 @@ static void si_emit_draw_mesh_shader_only_packets(struct si_context *sctx,
|
|||
struct si_shader *shader = sctx->ms_shader_state.current;
|
||||
struct si_shader_selector *sel = shader->selector;
|
||||
bool uses_draw_id = shader->info.uses_sysval_draw_id;
|
||||
bool uses_grid_size = sel->info.uses_sysval_num_workgroups;
|
||||
bool uses_grid_size = shader->info.uses_sysval_num_workgroups;
|
||||
unsigned sh_base_reg = sctx->shader_pointers.sh_base[MESA_SHADER_MESH];
|
||||
|
||||
int offset = GFX11_SGPR_MS_ATTRIBUTE_RING_ADDR;
|
||||
|
|
|
|||
|
|
@ -316,7 +316,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
|
|||
break;
|
||||
}
|
||||
case nir_intrinsic_load_workgroup_size: {
|
||||
assert(b->shader->info.workgroup_size_variable && sel->info.uses_sysval_workgroup_size);
|
||||
assert(b->shader->info.workgroup_size_variable && shader->info.uses_sysval_workgroup_size);
|
||||
|
||||
nir_def *block_size = ac_nir_load_arg(b, &args->ac, args->block_size);
|
||||
nir_def *comp[] = {
|
||||
|
|
|
|||
|
|
@ -522,9 +522,9 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args,
|
|||
}
|
||||
if (shader->info.uses_sysval_draw_id)
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->ac.draw_id);
|
||||
if (shader->selector->info.uses_sysval_num_workgroups)
|
||||
if (shader->info.uses_sysval_num_workgroups)
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_VALUE, &args->ac.num_work_groups);
|
||||
if (shader->selector->info.uses_sysval_workgroup_size)
|
||||
if (shader->info.uses_sysval_workgroup_size)
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->block_size);
|
||||
|
||||
unsigned cs_user_data_dwords =
|
||||
|
|
@ -557,18 +557,25 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args,
|
|||
}
|
||||
|
||||
/* Hardware SGPRs. */
|
||||
for (i = 0; i < 3; i++) {
|
||||
if (shader->selector->info.uses_sysval_workgroup_id[i]) {
|
||||
/* GFX12 loads workgroup IDs into ttmp registers, so they are not input SGPRs, but we
|
||||
* still need to set this to indicate that they are enabled (for ac_nir_to_llvm).
|
||||
*/
|
||||
if (sel->screen->info.gfx_level >= GFX12)
|
||||
args->ac.workgroup_ids[i].used = true;
|
||||
else
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->ac.workgroup_ids[i]);
|
||||
}
|
||||
if (sel->screen->info.gfx_level >= GFX12) {
|
||||
/* GFX12 loads workgroup IDs into ttmp registers, so they are not input SGPRs, but we
|
||||
* still need to set this to indicate that they are enabled (for ac_nir_to_llvm).
|
||||
*/
|
||||
if (shader->info.uses_sysval_workgroup_id_x)
|
||||
args->ac.workgroup_ids[0].used = true;
|
||||
if (shader->info.uses_sysval_workgroup_id_y)
|
||||
args->ac.workgroup_ids[1].used = true;
|
||||
if (shader->info.uses_sysval_workgroup_id_z)
|
||||
args->ac.workgroup_ids[2].used = true;
|
||||
} else {
|
||||
if (shader->info.uses_sysval_workgroup_id_x)
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->ac.workgroup_ids[0]);
|
||||
if (shader->info.uses_sysval_workgroup_id_y)
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->ac.workgroup_ids[1]);
|
||||
if (shader->info.uses_sysval_workgroup_id_z)
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->ac.workgroup_ids[2]);
|
||||
}
|
||||
if (shader->selector->info.uses_sgpr_tg_size)
|
||||
if (shader->info.uses_sgpr_tg_size)
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->ac.tg_size);
|
||||
|
||||
/* GFX11 set FLAT_SCRATCH directly instead of using this arg. */
|
||||
|
|
@ -617,7 +624,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args,
|
|||
if (shader->info.uses_sysval_draw_id)
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->ac.draw_id);
|
||||
|
||||
if (sel->info.uses_sysval_num_workgroups)
|
||||
if (shader->info.uses_sysval_num_workgroups)
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_VALUE, &args->ac.num_work_groups);
|
||||
else if (sel->screen->info.gfx_level < GFX11)
|
||||
/* GFX10 always write grid size to SGPR, reserve space for it */
|
||||
|
|
|
|||
|
|
@ -293,19 +293,6 @@ static void gather_instruction(const struct nir_shader *nir, struct si_shader_in
|
|||
}
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_local_invocation_id:
|
||||
case nir_intrinsic_load_workgroup_id: {
|
||||
unsigned mask = nir_def_components_read(&intr->def);
|
||||
while (mask) {
|
||||
unsigned i = u_bit_scan(&mask);
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_load_workgroup_id)
|
||||
info->uses_sysval_workgroup_id[i] = true;
|
||||
else
|
||||
info->uses_sysval_local_invocation_id[i] = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_color0:
|
||||
case nir_intrinsic_load_color1: {
|
||||
unsigned index = intr->intrinsic == nir_intrinsic_load_color1;
|
||||
|
|
@ -558,14 +545,6 @@ void si_nir_gather_info(struct si_screen *sscreen, struct nir_shader *nir,
|
|||
info->uses_sysval_front_face = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE) |
|
||||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE_FSIGN);
|
||||
info->uses_sysval_invocation_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID);
|
||||
info->uses_sysval_num_workgroups = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS);
|
||||
info->uses_sgpr_tg_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS);
|
||||
if (sscreen->info.gfx_level < GFX12) {
|
||||
info->uses_sgpr_tg_size |= BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) ||
|
||||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) ||
|
||||
si_should_clear_lds(sscreen, nir);
|
||||
}
|
||||
info->uses_sysval_workgroup_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_SIZE);
|
||||
info->uses_sysval_primitive_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||
|
||||
nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID;
|
||||
info->uses_sysval_sample_mask_in = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
|
||||
|
|
|
|||
|
|
@ -151,11 +151,6 @@ struct si_shader_info {
|
|||
bool uses_sysval_primitive_id;
|
||||
bool uses_sysval_front_face;
|
||||
bool uses_sysval_invocation_id;
|
||||
bool uses_sysval_local_invocation_id[3];
|
||||
bool uses_sysval_workgroup_id[3];
|
||||
bool uses_sysval_workgroup_size;
|
||||
bool uses_sysval_num_workgroups;
|
||||
bool uses_sgpr_tg_size;
|
||||
bool uses_atomic_ordered_add;
|
||||
bool writes_psize;
|
||||
bool writes_primid;
|
||||
|
|
@ -222,6 +217,15 @@ struct si_shader_variant_info {
|
|||
bool writes_sample_mask : 1;
|
||||
bool uses_discard : 1;
|
||||
bool uses_mesh_scratch_ring : 1;
|
||||
bool uses_sysval_local_invocation_id_x : 1;
|
||||
bool uses_sysval_local_invocation_id_y : 1;
|
||||
bool uses_sysval_local_invocation_id_z : 1;
|
||||
bool uses_sysval_workgroup_id_x : 1;
|
||||
bool uses_sysval_workgroup_id_y : 1;
|
||||
bool uses_sysval_workgroup_id_z : 1;
|
||||
bool uses_sysval_workgroup_size : 1;
|
||||
bool uses_sysval_num_workgroups : 1;
|
||||
bool uses_sgpr_tg_size : 1;
|
||||
uint8_t nr_pos_exports;
|
||||
uint8_t nr_param_exports;
|
||||
uint8_t nr_prim_param_exports;
|
||||
|
|
|
|||
|
|
@ -40,11 +40,46 @@ void si_get_shader_variant_info(struct si_shader *shader,
|
|||
|
||||
nir_foreach_block(block, nir_shader_get_entrypoint(nir)) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
unsigned mask;
|
||||
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_local_invocation_id:
|
||||
mask = nir_def_components_read(&intr->def);
|
||||
if (mask & BITFIELD_BIT(0))
|
||||
shader->info.uses_sysval_local_invocation_id_x = true;
|
||||
if (mask & BITFIELD_BIT(1))
|
||||
shader->info.uses_sysval_local_invocation_id_y = true;
|
||||
if (mask & BITFIELD_BIT(2))
|
||||
shader->info.uses_sysval_local_invocation_id_z = true;
|
||||
break;
|
||||
case nir_intrinsic_load_workgroup_id:
|
||||
mask = nir_def_components_read(&intr->def);
|
||||
if (mask & BITFIELD_BIT(0))
|
||||
shader->info.uses_sysval_workgroup_id_x = true;
|
||||
if (mask & BITFIELD_BIT(1))
|
||||
shader->info.uses_sysval_workgroup_id_y = true;
|
||||
if (mask & BITFIELD_BIT(2))
|
||||
shader->info.uses_sysval_workgroup_id_z = true;
|
||||
break;
|
||||
case nir_intrinsic_load_workgroup_size:
|
||||
shader->info.uses_sysval_workgroup_size = true;
|
||||
break;
|
||||
case nir_intrinsic_load_num_workgroups:
|
||||
shader->info.uses_sysval_num_workgroups = true;
|
||||
break;
|
||||
case nir_intrinsic_load_num_subgroups:
|
||||
shader->info.uses_sgpr_tg_size = true;
|
||||
break;
|
||||
case nir_intrinsic_load_local_invocation_index:
|
||||
case nir_intrinsic_load_subgroup_id:
|
||||
/* GFX12 computes these using subgroup_id from ttmp8. */
|
||||
if (shader->selector->screen->info.gfx_level < GFX12)
|
||||
shader->info.uses_sgpr_tg_size = true;
|
||||
break;
|
||||
case nir_intrinsic_load_instance_id:
|
||||
shader->info.uses_sysval_instance_id = true;
|
||||
break;
|
||||
|
|
@ -308,13 +343,18 @@ void si_get_shader_variant_info(struct si_shader *shader,
|
|||
if (nir->info.stage == MESA_SHADER_COMPUTE ||
|
||||
nir->info.stage == MESA_SHADER_KERNEL ||
|
||||
nir->info.stage == MESA_SHADER_TASK) {
|
||||
/* nir_clear_shared_memory uses local_invocation_index. */
|
||||
if (shader->selector->screen->info.gfx_level < GFX12 &&
|
||||
si_should_clear_lds(shader->selector->screen, nir))
|
||||
shader->info.uses_sgpr_tg_size = true;
|
||||
|
||||
/* Determine user SGPRs for compute shader. This includes descriptors in user SGPRs.
|
||||
*
|
||||
* Variable block sizes need 10 bits (1 + log2(SI_MAX_VARIABLE_THREADS_PER_BLOCK)) per dim.
|
||||
* We pack them into a single user SGPR.
|
||||
*/
|
||||
unsigned num_user_sgprs = SI_NUM_RESOURCE_SGPRS + (shader->selector->info.uses_sysval_num_workgroups ? 3 : 0) +
|
||||
(shader->selector->info.uses_sysval_workgroup_size ? 1 : 0) +
|
||||
unsigned num_user_sgprs = SI_NUM_RESOURCE_SGPRS + (shader->info.uses_sysval_num_workgroups ? 3 : 0) +
|
||||
(shader->info.uses_sysval_workgroup_size ? 1 : 0) +
|
||||
shader->selector->nir->info.cs.user_data_components_amd;
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_TASK) {
|
||||
|
|
|
|||
|
|
@ -1485,7 +1485,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
num_user_sgprs++;
|
||||
if (shader->info.uses_sysval_draw_id)
|
||||
num_user_sgprs++;
|
||||
if (gs_sel->info.uses_sysval_num_workgroups || sscreen->info.gfx_level < GFX11)
|
||||
if (shader->info.uses_sysval_num_workgroups || sscreen->info.gfx_level < GFX11)
|
||||
num_user_sgprs += 3;
|
||||
if (shader->info.uses_mesh_scratch_ring)
|
||||
num_user_sgprs++;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue