radv: only call radv_should_use_wgp_mode() once

This will let the compiler choose between CU and WGP mode.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37791>
This commit is contained in:
Rhys Perry 2025-10-09 14:52:06 +01:00
parent b2c44e3a65
commit 3e9921f52e
4 changed files with 9 additions and 8 deletions

View file

@ -28,6 +28,7 @@ struct ac_shader_config {
unsigned spi_ps_input_addr;
unsigned float_mode;
unsigned scratch_bytes_per_wave;
bool wgp_mode;
unsigned rsrc1;
unsigned rsrc2;
unsigned rsrc3;

View file

@ -1830,6 +1830,7 @@ emit_program(Program* program, std::vector<uint32_t>& code, std::vector<struct a
program->config->scratch_bytes_per_wave =
align(program->config->scratch_bytes_per_wave, program->dev.scratch_alloc_granule);
program->config->wgp_mode = program->wgp_mode;
return exec_size;
}

View file

@ -390,6 +390,7 @@ ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, LLVMModuleRef llvm_modu
rbin->base.type = RADV_BINARY_TYPE_RTLD;
rbin->base.total_size = alloc_size;
rbin->base.config.wgp_mode = options->wgp_mode;
rbin->elf_size = elf_size;
rbin->llvm_ir_size = llvm_ir_size;
*rbinary = &rbin->base;

View file

@ -2161,7 +2161,6 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi
}
}
bool wgp_mode = radv_should_use_wgp_mode(device, stage, info);
assert(config->lds_size <= pdev->info.lds_size_per_workgroup);
unsigned lds_alloc = ac_shader_encode_lds_size(config->lds_size, pdev->info.gfx_level, stage);
@ -2204,7 +2203,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi
} else {
config->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en);
}
config->rsrc1 |= S_00B428_MEM_ORDERED(radv_mem_ordered(pdev)) | S_00B428_WGP_MODE(wgp_mode);
config->rsrc1 |= S_00B428_MEM_ORDERED(radv_mem_ordered(pdev)) | S_00B428_WGP_MODE(config->wgp_mode);
config->rsrc2 |= S_00B42C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
break;
case MESA_SHADER_VERTEX:
@ -2264,7 +2263,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi
case MESA_SHADER_ANY_HIT:
case MESA_SHADER_COMPUTE:
case MESA_SHADER_TASK:
config->rsrc1 |= S_00B848_MEM_ORDERED(radv_mem_ordered(pdev)) | S_00B848_WGP_MODE(wgp_mode);
config->rsrc1 |= S_00B848_MEM_ORDERED(radv_mem_ordered(pdev)) | S_00B848_WGP_MODE(config->wgp_mode);
config->rsrc2 |= S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) | S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) |
S_00B84C_TGID_Z_EN(info->cs.uses_block_id[2]) |
S_00B84C_TIDIG_COMP_CNT(info->cs.uses_thread_id[2] ? 2
@ -2349,7 +2348,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi
* happened on VanGogh) Let's disable it on all chips that
* disable exactly 1 CU per SA for GS.
*/
config->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(wgp_mode);
config->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(config->wgp_mode);
config->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | S_00B22C_LDS_SIZE(lds_alloc) |
S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL);
} else if (pdev->info.gfx_level >= GFX9 && stage == MESA_SHADER_GEOMETRY) {
@ -2381,7 +2380,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi
gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
}
config->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(wgp_mode);
config->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(config->wgp_mode);
config->rsrc2 |=
S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL);
} else if (pdev->info.gfx_level >= GFX9 && stage == MESA_SHADER_TESS_CTRL) {
@ -2764,13 +2763,12 @@ radv_get_max_waves(const struct radv_device *device, const struct ac_shader_conf
max_simd_waves = MIN2(max_simd_waves, physical_vgprs / vgprs);
}
bool wgp_mode = radv_should_use_wgp_mode(device, stage, info);
unsigned simd_per_cu_wgp = gpu_info->num_simd_per_compute_unit;
if (wgp_mode)
if (conf->wgp_mode)
simd_per_cu_wgp *= 2;
if (lds_per_workgroup) {
unsigned lds_per_cu_wgp = gpu_info->lds_size_per_workgroup * (gfx_level >= GFX10 && wgp_mode ? 2 : 1);
unsigned lds_per_cu_wgp = gpu_info->lds_size_per_workgroup * (gfx_level >= GFX10 && conf->wgp_mode ? 2 : 1);
unsigned max_cu_wgp_waves = lds_per_cu_wgp / lds_per_workgroup * waves_per_workgroup;
max_simd_waves = MIN2(max_simd_waves, DIV_ROUND_UP(max_cu_wgp_waves, simd_per_cu_wgp));
}