From 7bb3f93fa96a64ae4237ab7aec0887bbeeb3722a Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Mon, 9 Dec 2024 13:52:32 +0100 Subject: [PATCH] radv: use nir_opt_clip_cull_const This reduces the fixed function hardware bottleneck in vertex shading heavy sections with d3d9 DXVK. DXVK always declares 6 clip distances, and with https://github.com/doitsujin/dxvk/pull/4508 it writes 0 for disabled clip planes. But while working on this, I also discovered that other games also write constant clip/cull distances: Foz-DB Navi21: Totals from 442 (0.56% of 79206) affected shaders: MaxWaves: 10880 -> 10900 (+0.18%) Instrs: 283569 -> 282493 (-0.38%); split: -0.38%, +0.01% CodeSize: 1497596 -> 1491948 (-0.38%); split: -0.38%, +0.00% VGPRs: 18928 -> 18888 (-0.21%) Outputs: 3026 -> 2970 (-1.85%) Latency: 1051990 -> 1048878 (-0.30%); split: -0.47%, +0.18% InvThroughput: 184190 -> 183366 (-0.45%); split: -0.45%, +0.01% VClause: 6071 -> 6077 (+0.10%); split: -0.38%, +0.48% SClause: 6724 -> 6725 (+0.01%); split: -0.12%, +0.13% Copies: 19624 -> 19160 (-2.36%); split: -2.44%, +0.08% PreSGPRs: 18634 -> 18526 (-0.58%) PreVGPRs: 15522 -> 15465 (-0.37%) VALU: 156964 -> 156330 (-0.40%); split: -0.41%, +0.01% SALU: 41802 -> 41748 (-0.13%); split: -0.13%, +0.00% Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_pipeline_graphics.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index b65a80faec0..82d2890e43f 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -2730,6 +2730,21 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac /* Optimize varyings on lowered shader I/O (more efficient than optimizing I/O derefs). */ radv_graphics_shaders_link_varyings(stages); + /* Optimize constant clip/cull distance after linking to operate on scalar io in the last + * pre raster stage. + */ + radv_foreach_stage(i, active_nir_stages & (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) + { + if (stages[i].key.optimisations_disabled) + continue; + + int64_t stage_start = os_time_get_nano(); + + NIR_PASS(_, stages[i].nir, nir_opt_clip_cull_const); + + stages[i].feedback.duration += os_time_get_nano() - stage_start; + } + radv_fill_shader_info(device, RADV_PIPELINE_GRAPHICS, gfx_state, stages, active_nir_stages); radv_declare_pipeline_args(device, stages, gfx_state, active_nir_stages);