radv: Use wave32 for ray queries inside compute shaders

Results in a 6% performance improvement with Quake II RTX.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20143>
This commit is contained in:
Konstantin Seurer 2022-12-03 16:48:03 +01:00 committed by Marge Bot
parent 712fcaba1f
commit d86f60d21e
2 changed files with 11 additions and 3 deletions

View file

@ -348,6 +348,7 @@ struct radv_shader_info {
bool uses_sbt;
bool uses_ray_launch_size;
bool uses_dynamic_rt_callable_stack;
bool uses_rt;
} cs;
struct {
uint64_t tes_inputs_read;

View file

@ -217,6 +217,9 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
case nir_intrinsic_load_rt_dynamic_callable_stack_base_amd:
info->cs.uses_dynamic_rt_callable_stack = true;
break;
case nir_intrinsic_bvh64_intersect_ray_amd:
info->cs.uses_rt = true;
break;
default:
break;
}
@ -619,8 +622,12 @@ gather_shader_info_cs(struct radv_device *device, const nir_shader *nir,
unsigned req_subgroup_size = subgroup_size;
bool require_full_subgroups = pipeline_key->cs.require_full_subgroups;
unsigned default_wave_size = device->physical_device->cs_wave_size;
if (info->cs.uses_rt)
default_wave_size = device->physical_device->rt_wave_size;
if (!subgroup_size)
subgroup_size = device->physical_device->cs_wave_size;
subgroup_size = default_wave_size;
unsigned local_size =
nir->info.workgroup_size[0] * nir->info.workgroup_size[1] * nir->info.workgroup_size[2];
@ -628,8 +635,8 @@ gather_shader_info_cs(struct radv_device *device, const nir_shader *nir,
/* Games don't always request full subgroups when they should, which can cause bugs if cswave32
* is enabled.
*/
if (device->physical_device->cs_wave_size == 32 && nir->info.uses_wide_subgroup_intrinsics &&
!req_subgroup_size && local_size % RADV_SUBGROUP_SIZE == 0)
if (default_wave_size == 32 && nir->info.uses_wide_subgroup_intrinsics && !req_subgroup_size &&
local_size % RADV_SUBGROUP_SIZE == 0)
require_full_subgroups = true;
if (require_full_subgroups && !req_subgroup_size) {