radv/gfx11+: add rtwave32 perftest option

Useful for testing compiler changes and performance considerations.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27584>
This commit is contained in:
Georg Lehmann 2024-02-12 23:50:09 +01:00 committed by Marge Bot
parent 90eae30bcb
commit e136a0629d
4 changed files with 12 additions and 6 deletions

View file

@ -1358,8 +1358,10 @@ RADV driver environment variables
disable optimizations that get enabled when all VRAM is CPU visible.
``pswave32``
enable wave32 for pixel shaders (GFX10+)
``rtwave32``
enable wave32 for ray tracing shaders (GFX11+)
``rtwave64``
enable wave64 for ray tracing shaders (GFX10+)
enable wave64 for ray tracing shaders (GFX10-10.3)
``sam``
enable optimizations to move more driver internal objects to VRAM.
``shader_object``

View file

@ -93,6 +93,7 @@ enum {
RADV_PERFTEST_TRANSFER_QUEUE = 1u << 14,
RADV_PERFTEST_SHADER_OBJECT = 1u << 15,
RADV_PERFTEST_NIR_CACHE = 1u << 16,
RADV_PERFTEST_RT_WAVE_32 = 1u << 17,
};
bool radv_init_trace(struct radv_device *device);

View file

@ -104,6 +104,7 @@ static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_P
{"transfer_queue", RADV_PERFTEST_TRANSFER_QUEUE},
{"shader_object", RADV_PERFTEST_SHADER_OBJECT},
{"nircache", RADV_PERFTEST_NIR_CACHE},
{"rtwave32", RADV_PERFTEST_RT_WAVE_32},
{NULL, 0}};
const char *

View file

@ -2012,13 +2012,15 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm
device->ge_wave_size = 32;
/* Default to 32 on RDNA1-2 as that gives better perf due to less issues with divergence.
* However, on GFX11 default to wave64 as ACO does not support VOPD yet, and with the VALU
* dependence wave32 would likely be a net-loss (as well as the SALU count becoming more
* problematic)
* However, on RDNA3+ default to wave64 as implicit dual issuing is likely better than
* wave32 VOPD for VALU dependent code.
* (as well as the SALU count becoming more problematic with wave32)
*/
if (!(device->instance->perftest_flags & RADV_PERFTEST_RT_WAVE_64) &&
!(device->instance->drirc.force_rt_wave64) && device->rad_info.gfx_level < GFX11)
if (device->instance->perftest_flags & RADV_PERFTEST_RT_WAVE_32 || device->rad_info.gfx_level < GFX11)
device->rt_wave_size = 32;
if (device->instance->perftest_flags & RADV_PERFTEST_RT_WAVE_64 || device->instance->drirc.force_rt_wave64)
device->rt_wave_size = 64;
}
device->max_shared_size = device->rad_info.gfx_level >= GFX7 ? 65536 : 32768;