diff --git a/src/amd/common/ac_binary.h b/src/amd/common/ac_binary.h index 2e18ff72ed8..f28dd1fd7d6 100644 --- a/src/amd/common/ac_binary.h +++ b/src/amd/common/ac_binary.h @@ -28,6 +28,7 @@ struct ac_shader_config { unsigned spi_ps_input_addr; unsigned float_mode; unsigned scratch_bytes_per_wave; + bool wgp_mode; unsigned rsrc1; unsigned rsrc2; unsigned rsrc3; diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index 3e37b11970d..e5c1d5e8fcf 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -1830,6 +1830,7 @@ emit_program(Program* program, std::vector& code, std::vectorconfig->scratch_bytes_per_wave = align(program->config->scratch_bytes_per_wave, program->dev.scratch_alloc_granule); + program->config->wgp_mode = program->wgp_mode; return exec_size; } diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index e899df6ff19..f9214c18031 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -390,6 +390,7 @@ ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, LLVMModuleRef llvm_modu rbin->base.type = RADV_BINARY_TYPE_RTLD; rbin->base.total_size = alloc_size; + rbin->base.config.wgp_mode = options->wgp_mode; rbin->elf_size = elf_size; rbin->llvm_ir_size = llvm_ir_size; *rbinary = &rbin->base; diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 76b688257c9..3c1542ec1cd 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -2161,7 +2161,6 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi } } - bool wgp_mode = radv_should_use_wgp_mode(device, stage, info); assert(config->lds_size <= pdev->info.lds_size_per_workgroup); unsigned lds_alloc = ac_shader_encode_lds_size(config->lds_size, pdev->info.gfx_level, stage); @@ -2204,7 +2203,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi } else { config->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en); } - config->rsrc1 |= S_00B428_MEM_ORDERED(radv_mem_ordered(pdev)) | S_00B428_WGP_MODE(wgp_mode); + config->rsrc1 |= S_00B428_MEM_ORDERED(radv_mem_ordered(pdev)) | S_00B428_WGP_MODE(config->wgp_mode); config->rsrc2 |= S_00B42C_SHARED_VGPR_CNT(num_shared_vgpr_blocks); break; case MESA_SHADER_VERTEX: @@ -2264,7 +2263,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi case MESA_SHADER_ANY_HIT: case MESA_SHADER_COMPUTE: case MESA_SHADER_TASK: - config->rsrc1 |= S_00B848_MEM_ORDERED(radv_mem_ordered(pdev)) | S_00B848_WGP_MODE(wgp_mode); + config->rsrc1 |= S_00B848_MEM_ORDERED(radv_mem_ordered(pdev)) | S_00B848_WGP_MODE(config->wgp_mode); config->rsrc2 |= S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) | S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) | S_00B84C_TGID_Z_EN(info->cs.uses_block_id[2]) | S_00B84C_TIDIG_COMP_CNT(info->cs.uses_thread_id[2] ? 2 @@ -2349,7 +2348,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi * happened on VanGogh) Let's disable it on all chips that * disable exactly 1 CU per SA for GS. */ - config->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(wgp_mode); + config->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(config->wgp_mode); config->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | S_00B22C_LDS_SIZE(lds_alloc) | S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL); } else if (pdev->info.gfx_level >= GFX9 && stage == MESA_SHADER_GEOMETRY) { @@ -2381,7 +2380,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */ } - config->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(wgp_mode); + config->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(config->wgp_mode); config->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL); } else if (pdev->info.gfx_level >= GFX9 && stage == MESA_SHADER_TESS_CTRL) { @@ -2764,13 +2763,12 @@ radv_get_max_waves(const struct radv_device *device, const struct ac_shader_conf max_simd_waves = MIN2(max_simd_waves, physical_vgprs / vgprs); } - bool wgp_mode = radv_should_use_wgp_mode(device, stage, info); unsigned simd_per_cu_wgp = gpu_info->num_simd_per_compute_unit; - if (wgp_mode) + if (conf->wgp_mode) simd_per_cu_wgp *= 2; if (lds_per_workgroup) { - unsigned lds_per_cu_wgp = gpu_info->lds_size_per_workgroup * (gfx_level >= GFX10 && wgp_mode ? 2 : 1); + unsigned lds_per_cu_wgp = gpu_info->lds_size_per_workgroup * (gfx_level >= GFX10 && conf->wgp_mode ? 2 : 1); unsigned max_cu_wgp_waves = lds_per_cu_wgp / lds_per_workgroup * waves_per_workgroup; max_simd_waves = MIN2(max_simd_waves, DIV_ROUND_UP(max_cu_wgp_waves, simd_per_cu_wgp)); }