From 39448c8e9c0ca376a413287fb45871bba4425536 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Tue, 10 Jan 2023 12:58:52 +0100 Subject: [PATCH] radv, aco: Add uses_full_subgroups to compute shader info. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow the compiler to assume that the shader always has full subgroups, meaning that the initial EXEC mask is -1 in all waves (all lanes enabled). This assumption is incorrect for ray tracing and internal (meta) shaders because they can use unaligned dispatch. Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_shader_info.h | 1 + src/amd/vulkan/radv_aco_shader_info.h | 1 + src/amd/vulkan/radv_pipeline.c | 3 ++- src/amd/vulkan/radv_private.h | 1 + src/amd/vulkan/radv_shader.h | 1 + src/amd/vulkan/radv_shader_info.c | 11 +++++++++++ 6 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_shader_info.h b/src/amd/compiler/aco_shader_info.h index e292262261d..a3f1872f475 100644 --- a/src/amd/compiler/aco_shader_info.h +++ b/src/amd/compiler/aco_shader_info.h @@ -117,6 +117,7 @@ struct aco_shader_info { } ps; struct { uint8_t subgroup_size; + bool uses_full_subgroups; } cs; uint32_t gfx9_gs_ring_lds_size; diff --git a/src/amd/vulkan/radv_aco_shader_info.h b/src/amd/vulkan/radv_aco_shader_info.h index 8394b5ec71f..02ca932e6b5 100644 --- a/src/amd/vulkan/radv_aco_shader_info.h +++ b/src/amd/vulkan/radv_aco_shader_info.h @@ -87,6 +87,7 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, ASSIGN_FIELD(ps.num_interp); ASSIGN_FIELD(ps.spi_ps_input); ASSIGN_FIELD(cs.subgroup_size); + ASSIGN_FIELD(cs.uses_full_subgroups); aco_info->gfx9_gs_ring_lds_size = radv->gs_ring_info.lds_size; } diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index c12da38c550..e9257c64747 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2471,6 +2471,7 @@ radv_fill_shader_info(struct radv_pipeline *pipeline, radv_nir_shader_info_init(&stages[i].info); radv_nir_shader_info_pass(device, stages[i].nir, pipeline_layout, pipeline_key, + pipeline->type, &stages[i].info); } @@ -2998,7 +2999,7 @@ radv_pipeline_create_gs_copy_shader(struct radv_pipeline *pipeline, nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); struct radv_shader_info info = {0}; - radv_nir_shader_info_pass(device, nir, pipeline_layout, pipeline_key, &info); + radv_nir_shader_info_pass(device, nir, pipeline_layout, pipeline_key, pipeline->type, &info); info.wave_size = 64; /* Wave32 not supported. */ info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */ info.so = gs_info->so; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index a719c4caaff..eb2ea9e97b4 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -2848,6 +2848,7 @@ struct radv_shader_info; void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir, const struct radv_pipeline_layout *layout, const struct radv_pipeline_key *pipeline_key, + const enum radv_pipeline_type pipeline_type, struct radv_shader_info *info); void radv_nir_shader_info_init(struct radv_shader_info *info); diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 7d70544eeb0..fec4b8c1774 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -359,6 +359,7 @@ struct radv_shader_info { bool uses_ray_launch_size; bool uses_dynamic_rt_callable_stack; bool uses_rt; + bool uses_full_subgroups; } cs; struct { uint64_t tes_inputs_read; diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 6c7ccf24a8b..3e25671552b 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -683,6 +683,7 @@ void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir, const struct radv_pipeline_layout *layout, const struct radv_pipeline_key *pipeline_key, + const enum radv_pipeline_type pipeline_type, struct radv_shader_info *info) { struct nir_function *func = (struct nir_function *)exec_list_get_head_const(&nir->functions); @@ -823,6 +824,16 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n case MESA_SHADER_TASK: info->workgroup_size = ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX); + + /* Allow the compiler to assume that the shader always has full subgroups, + * meaning that the initial EXEC mask is -1 in all waves (all lanes enabled). + * This assumption is incorrect for ray tracing and internal (meta) shaders + * because they can use unaligned dispatch. + */ + info->cs.uses_full_subgroups = + pipeline_type != RADV_PIPELINE_RAY_TRACING && + !nir->info.internal && + (info->workgroup_size % info->wave_size) == 0; break; case MESA_SHADER_MESH: /* Already computed in gather_shader_info_mesh(). */