radeonsi: use si_shader_encode_{sgprs|vgprs} in si_compute.c

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38701>
This commit is contained in:
Daniel Schürmann 2025-12-12 12:44:45 +01:00 committed by Marge Bot
parent d94e90df25
commit b9c7cea719
4 changed files with 23 additions and 21 deletions

View file

@ -120,19 +120,14 @@ static void si_create_compute_state_async(void *job, void *gdata, int thread_ind
if (sel->stage == MESA_SHADER_TASK)
user_sgprs += shader->info.uses_draw_id ? 3 : 2;
shader->config.rsrc1 = S_00B848_VGPRS((shader->config.num_vgprs - 1) /
((shader->wave_size == 32 ||
sscreen->info.wave64_vgpr_alloc_granularity == 8) ? 8 : 4)) |
shader->config.rsrc1 = S_00B848_VGPRS(si_shader_encode_vgprs(shader)) |
S_00B848_SGPRS(si_shader_encode_sgprs(shader)) |
S_00B848_DX10_CLAMP(sscreen->info.gfx_level < GFX12) |
S_00B848_MEM_ORDERED(si_shader_mem_ordered(shader)) |
S_00B848_FLOAT_MODE(shader->config.float_mode) |
/* This is needed for CWSR, but it causes halts to work differently. */
S_00B848_PRIV(sscreen->info.gfx_level == GFX11);
if (sscreen->info.gfx_level < GFX10) {
shader->config.rsrc1 |= S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8);
}
shader->config.rsrc2 = S_00B84C_USER_SGPR(user_sgprs) |
S_00B84C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0) |
S_00B84C_TGID_X_EN(sel->info.uses_block_id[0]) |

View file

@ -216,6 +216,25 @@ unsigned si_calculate_needed_lds_size(enum amd_gfx_level gfx_level, struct si_sh
return lds_size;
}
unsigned si_shader_encode_vgprs(struct si_shader *shader)
{
struct radeon_info *info = &shader->selector->screen->info;
unsigned encode_granularity = !info->has_graphics && info->family >= CHIP_MI200 ? 8 : 4;
assert(info->gfx_level >= GFX10 || shader->wave_size == 64);
if (shader->wave_size == 32)
encode_granularity *= 2;
return shader->config.num_vgprs / encode_granularity - 1;
}
unsigned si_shader_encode_sgprs(struct si_shader *shader)
{
if (shader->selector->screen->info.gfx_level >= GFX10)
return 0; /* Gfx10+ don't have the SGPRS field and always allocate 128 SGPRs. */
return shader->config.num_sgprs / 8 - 1;
}
static void si_calculate_max_simd_waves(struct si_shader *shader)
{

View file

@ -911,6 +911,8 @@ struct nir_shader *si_deserialize_shader(struct si_shader_selector *sel);
unsigned si_get_ps_num_interp(struct si_shader *ps);
unsigned si_get_shader_prefetch_size(struct si_shader *shader);
unsigned si_get_max_workgroup_size(const struct si_shader *shader);
unsigned si_shader_encode_vgprs(struct si_shader *shader);
unsigned si_shader_encode_sgprs(struct si_shader *shader);
/* si_shader_info.c */
void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir,

View file

@ -497,20 +497,6 @@ void si_destroy_shader_cache(struct si_screen *sscreen)
/* SHADER STATES */
unsigned si_shader_encode_vgprs(struct si_shader *shader)
{
assert(shader->selector->screen->info.gfx_level >= GFX10 || shader->wave_size == 64);
return shader->config.num_vgprs / (shader->wave_size == 32 ? 8 : 4) - 1;
}
unsigned si_shader_encode_sgprs(struct si_shader *shader)
{
if (shader->selector->screen->info.gfx_level >= GFX10)
return 0; /* Gfx10+ don't have the SGPRS field and always allocate 128 SGPRs. */
return shader->config.num_sgprs / 8 - 1;
}
bool si_shader_mem_ordered(struct si_shader *shader)
{
struct si_screen *sscreen = shader->selector->screen;