diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index d9472329c4d..426ac1c2102 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -120,19 +120,14 @@ static void si_create_compute_state_async(void *job, void *gdata, int thread_ind if (sel->stage == MESA_SHADER_TASK) user_sgprs += shader->info.uses_draw_id ? 3 : 2; - shader->config.rsrc1 = S_00B848_VGPRS((shader->config.num_vgprs - 1) / - ((shader->wave_size == 32 || - sscreen->info.wave64_vgpr_alloc_granularity == 8) ? 8 : 4)) | + shader->config.rsrc1 = S_00B848_VGPRS(si_shader_encode_vgprs(shader)) | + S_00B848_SGPRS(si_shader_encode_sgprs(shader)) | S_00B848_DX10_CLAMP(sscreen->info.gfx_level < GFX12) | S_00B848_MEM_ORDERED(si_shader_mem_ordered(shader)) | S_00B848_FLOAT_MODE(shader->config.float_mode) | /* This is needed for CWSR, but it causes halts to work differently. */ S_00B848_PRIV(sscreen->info.gfx_level == GFX11); - if (sscreen->info.gfx_level < GFX10) { - shader->config.rsrc1 |= S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8); - } - shader->config.rsrc2 = S_00B84C_USER_SGPR(user_sgprs) | S_00B84C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0) | S_00B84C_TGID_X_EN(sel->info.uses_block_id[0]) | diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 728984304c5..e6cd77a6bbb 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -216,6 +216,25 @@ unsigned si_calculate_needed_lds_size(enum amd_gfx_level gfx_level, struct si_sh return lds_size; } +unsigned si_shader_encode_vgprs(struct si_shader *shader) +{ + struct radeon_info *info = &shader->selector->screen->info; + unsigned encode_granularity = !info->has_graphics && info->family >= CHIP_MI200 ? 8 : 4; + + assert(info->gfx_level >= GFX10 || shader->wave_size == 64); + if (shader->wave_size == 32) + encode_granularity *= 2; + + return shader->config.num_vgprs / encode_granularity - 1; +} + +unsigned si_shader_encode_sgprs(struct si_shader *shader) +{ + if (shader->selector->screen->info.gfx_level >= GFX10) + return 0; /* Gfx10+ don't have the SGPRS field and always allocate 128 SGPRs. */ + + return shader->config.num_sgprs / 8 - 1; +} static void si_calculate_max_simd_waves(struct si_shader *shader) { diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 4fd55b90eb6..dca75a5f5fc 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -911,6 +911,8 @@ struct nir_shader *si_deserialize_shader(struct si_shader_selector *sel); unsigned si_get_ps_num_interp(struct si_shader *ps); unsigned si_get_shader_prefetch_size(struct si_shader *shader); unsigned si_get_max_workgroup_size(const struct si_shader *shader); +unsigned si_shader_encode_vgprs(struct si_shader *shader); +unsigned si_shader_encode_sgprs(struct si_shader *shader); /* si_shader_info.c */ void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 2d3e85e8b20..7bc03653f40 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -497,20 +497,6 @@ void si_destroy_shader_cache(struct si_screen *sscreen) /* SHADER STATES */ -unsigned si_shader_encode_vgprs(struct si_shader *shader) -{ - assert(shader->selector->screen->info.gfx_level >= GFX10 || shader->wave_size == 64); - return shader->config.num_vgprs / (shader->wave_size == 32 ? 8 : 4) - 1; -} - -unsigned si_shader_encode_sgprs(struct si_shader *shader) -{ - if (shader->selector->screen->info.gfx_level >= GFX10) - return 0; /* Gfx10+ don't have the SGPRS field and always allocate 128 SGPRs. */ - - return shader->config.num_sgprs / 8 - 1; -} - bool si_shader_mem_ordered(struct si_shader *shader) { struct si_screen *sscreen = shader->selector->screen;