From d86f60d21e31718120b633745e53768a537ef4a3 Mon Sep 17 00:00:00 2001 From: Konstantin Seurer Date: Sat, 3 Dec 2022 16:48:03 +0100 Subject: [PATCH] radv: Use wave32 for ray queries inside compute shaders Results in a 6% performance improvement with Quake II RTX. Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/vulkan/radv_shader.h | 1 + src/amd/vulkan/radv_shader_info.c | 13 ++++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 08792195856..13c5104225f 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -348,6 +348,7 @@ struct radv_shader_info { bool uses_sbt; bool uses_ray_launch_size; bool uses_dynamic_rt_callable_stack; + bool uses_rt; } cs; struct { uint64_t tes_inputs_read; diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 6893cd99d39..013ef763eb1 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -217,6 +217,9 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, case nir_intrinsic_load_rt_dynamic_callable_stack_base_amd: info->cs.uses_dynamic_rt_callable_stack = true; break; + case nir_intrinsic_bvh64_intersect_ray_amd: + info->cs.uses_rt = true; + break; default: break; } @@ -619,8 +622,12 @@ gather_shader_info_cs(struct radv_device *device, const nir_shader *nir, unsigned req_subgroup_size = subgroup_size; bool require_full_subgroups = pipeline_key->cs.require_full_subgroups; + unsigned default_wave_size = device->physical_device->cs_wave_size; + if (info->cs.uses_rt) + default_wave_size = device->physical_device->rt_wave_size; + if (!subgroup_size) - subgroup_size = device->physical_device->cs_wave_size; + subgroup_size = default_wave_size; unsigned local_size = nir->info.workgroup_size[0] * nir->info.workgroup_size[1] * nir->info.workgroup_size[2]; @@ -628,8 +635,8 @@ gather_shader_info_cs(struct radv_device *device, const nir_shader *nir, /* Games don't always request full subgroups when they should, which can cause bugs if cswave32 * is enabled. */ - if (device->physical_device->cs_wave_size == 32 && nir->info.uses_wide_subgroup_intrinsics && - !req_subgroup_size && local_size % RADV_SUBGROUP_SIZE == 0) + if (default_wave_size == 32 && nir->info.uses_wide_subgroup_intrinsics && !req_subgroup_size && + local_size % RADV_SUBGROUP_SIZE == 0) require_full_subgroups = true; if (require_full_subgroups && !req_subgroup_size) {