From 6cd04ca6e55e08b19e766d46f9b434d0790df801 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 17 May 2026 17:13:15 -0400 Subject: [PATCH] radv: add a pass that selects either frag_coord_xy or pixel_coord, but not both to reduce the number of initialized PS VGPRs, increasing the PS wave launch rate. The pass will have more RADV-specific stuff. Reviewed-by: Samuel Pitoiset Reviewed-by: Georg Lehmann Part-of: --- src/amd/compiler/tests/test_d3d11_derivs.cpp | 12 +- src/amd/vulkan/meson.build | 1 + src/amd/vulkan/nir/radv_nir.h | 2 + .../nir/radv_nir_lower_opt_fs_frag_pos.c | 118 ++++++++++++++++++ src/amd/vulkan/radv_pipeline_graphics.c | 6 +- src/amd/vulkan/radv_shader.h | 1 + 6 files changed, 135 insertions(+), 5 deletions(-) create mode 100644 src/amd/vulkan/nir/radv_nir_lower_opt_fs_frag_pos.c diff --git a/src/amd/compiler/tests/test_d3d11_derivs.cpp b/src/amd/compiler/tests/test_d3d11_derivs.cpp index 57276e9e4d8..176bbda910e 100644 --- a/src/amd/compiler/tests/test_d3d11_derivs.cpp +++ b/src/amd/compiler/tests/test_d3d11_derivs.cpp @@ -145,10 +145,12 @@ BEGIN_TEST(d3d11_derivs.bias) PipelineBuilder pbld(get_vk_device(GFX10_3)); pbld.add_vsfs(vs, fs); - //>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias:v[2] = p_startpgm + //>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias1:v[2] = p_startpgm //>> lv3: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_ + //>> v1: %bias2 = v_cvt_f32_u32 (kill)%bias1 dst_sel:dword src0_sel:uword0 + //>> v1: %bias3 = v_add_f32 0.5, (kill)%bias2 //>> BB1 - //>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias 2d + //>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias3 2d //>> BB2 //>> BB6 //>> p_end_linear_vgpr (kill)%wqm @@ -279,11 +281,13 @@ BEGIN_TEST(d3d11_derivs.bias_array) PipelineBuilder pbld(get_vk_device(GFX10_3)); pbld.add_vsfs(vs, fs); - //>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias:v[2] = p_startpgm + //>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias1:v[2] = p_startpgm //>> v1: %layer = v_rndne_f32 (kill)%_ //>> lv4: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_, (kill)%layer + //>> v1: %bias2 = v_cvt_f32_u32 (kill)%bias1 dst_sel:dword src0_sel:uword0 + //>> v1: %bias3 = v_add_f32 0.5, (kill)%bias2 //>> BB1 - //>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias 2darray da + //>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias3 2darray da //>> BB2 //>> BB6 //>> p_end_linear_vgpr (kill)%wqm diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build index b938803fe78..4d0da31a263 100644 --- a/src/amd/vulkan/meson.build +++ b/src/amd/vulkan/meson.build @@ -89,6 +89,7 @@ libradv_files = files( 'nir/radv_nir_lower_immediate_samplers.c', 'nir/radv_nir_lower_intrinsics_early.c', 'nir/radv_nir_lower_io.c', + 'nir/radv_nir_lower_opt_fs_frag_pos.c', 'nir/radv_nir_lower_primitive_shading_rate.c', 'nir/radv_nir_lower_printf.c', 'nir/radv_nir_lower_ray_queries.c', diff --git a/src/amd/vulkan/nir/radv_nir.h b/src/amd/vulkan/nir/radv_nir.h index b54e45d7eec..a17a99ca132 100644 --- a/src/amd/vulkan/nir/radv_nir.h +++ b/src/amd/vulkan/nir/radv_nir.h @@ -96,6 +96,8 @@ bool radv_nir_opt_tid_function(nir_shader *shader, const radv_nir_opt_tid_functi bool radv_nir_opt_fs_builtins(nir_shader *shader, const struct radv_graphics_state_key *gfx_state, unsigned vgt_outprim_type); +bool radv_nir_lower_opt_fs_frag_pos(nir_shader *shader, bool force_pixel_coord); + bool radv_nir_lower_immediate_samplers(nir_shader *shader, const struct radv_compiler_info *compiler_info, const struct radv_shader_stage *stage); diff --git a/src/amd/vulkan/nir/radv_nir_lower_opt_fs_frag_pos.c b/src/amd/vulkan/nir/radv_nir_lower_opt_fs_frag_pos.c new file mode 100644 index 00000000000..62f05646802 --- /dev/null +++ b/src/amd/vulkan/nir/radv_nir_lower_opt_fs_frag_pos.c @@ -0,0 +1,118 @@ +/* Copyright © 2026 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +/* Lower frag_coord_xy to pixel_coord if either force_pixel_coord=true or all uses of all instances + * of frag_coord_xy ignore the fractional part, and lower pixel_coord to frag_coord_xy if both + * pixel_coord and frag_coord_xy exist and frag_coord_xy has at least one use that doesn't ignore + * the fractional part. At the end of the pass, only frag_coord_xy or pixel_coord can be present, + * not both. + * + * sample_pos counts as a frag_coord_xy use and is lowered to frag_coord_xy here. + */ + +#include "nir_builder.h" +#include "radv_nir.h" +#include "radv_shader_info.h" + +typedef struct { + /* gather_frag_coord_and_pixel_coord */ + bool has_frag_coord_xy; + bool has_frag_coord_xy_float_use; + bool has_pixel_coord; + bool has_sample_pos; + + /* lower_frag_coord_and_pixel_coord */ + bool lower_to_pixel_coord; + bool lower_to_frag_coord_xy; +} opt_fs_frag_coord_and_pixel_coord_state; + +static bool +gather_fs_frag_pos(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + opt_fs_frag_coord_and_pixel_coord_state *state = (opt_fs_frag_coord_and_pixel_coord_state *)data; + + switch (intr->intrinsic) { + case nir_intrinsic_load_frag_coord: + UNREACHABLE("only frag_coord_xy is expected"); + + case nir_intrinsic_load_frag_coord_xy: + case nir_intrinsic_load_sample_pos: + assert(!nir_def_is_unused(&intr->def)); + + if (!state->has_frag_coord_xy_float_use && !nir_all_uses_of_float_are_integer(&intr->def, 0x3)) + state->has_frag_coord_xy_float_use = true; + + state->has_frag_coord_xy |= intr->intrinsic == nir_intrinsic_load_frag_coord_xy; + state->has_sample_pos |= intr->intrinsic == nir_intrinsic_load_sample_pos; + return false; + + case nir_intrinsic_load_pixel_coord: + state->has_pixel_coord = true; + return false; + + default: + return false; + } +} + +static bool +lower_fs_frag_pos(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + opt_fs_frag_coord_and_pixel_coord_state *state = (opt_fs_frag_coord_and_pixel_coord_state *)data; + + b->cursor = nir_before_instr(&intr->instr); + + switch (intr->intrinsic) { + case nir_intrinsic_load_frag_coord_xy: + if (state->lower_to_pixel_coord) { + nir_def_replace(&intr->def, nir_fadd_imm(b, nir_u2f32(b, nir_load_pixel_coord(b)), 0.5)); + return true; + } + return false; + + case nir_intrinsic_load_sample_pos: + if (state->lower_to_pixel_coord) { + /* This is unlikely and only possible with integer use. */ + nir_def_replace(&intr->def, nir_imm_vec2(b, 0, 0)); + } else { + nir_def_replace(&intr->def, nir_ffract(b, nir_load_frag_coord_xy(b))); + } + return true; + + case nir_intrinsic_load_pixel_coord: + if (state->lower_to_frag_coord_xy) { + nir_def_replace(&intr->def, nir_f2u16(b, nir_load_frag_coord_xy(b))); + return true; + } + return false; + + default: + return false; + } +} + +bool +radv_nir_lower_opt_fs_frag_pos(nir_shader *shader, bool force_pixel_coord) +{ + if (force_pixel_coord) { + opt_fs_frag_coord_and_pixel_coord_state state = { + .lower_to_pixel_coord = true, + }; + + return nir_shader_intrinsics_pass(shader, lower_fs_frag_pos, nir_metadata_control_flow, &state); + } else { + opt_fs_frag_coord_and_pixel_coord_state state = {0}; + + nir_shader_intrinsics_pass(shader, gather_fs_frag_pos, nir_metadata_all, &state); + state.lower_to_pixel_coord = + (state.has_frag_coord_xy || state.has_sample_pos) && !state.has_frag_coord_xy_float_use; + state.lower_to_frag_coord_xy = + (state.has_pixel_coord || state.has_sample_pos) && state.has_frag_coord_xy_float_use; + + if (!state.lower_to_pixel_coord && !state.lower_to_frag_coord_xy) + return false; + + return nir_shader_intrinsics_pass(shader, lower_fs_frag_pos, nir_metadata_control_flow, &state); + } +} diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index fd3a0732944..04ca57eb8ea 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -1714,6 +1714,7 @@ radv_generate_graphics_state_key(const struct radv_compiler_info *compiler_info, } key.ps.force_vrs_enabled = compiler_info->force_vrs_enabled && !radv_is_static_vrs_enabled(state); + key.vrs_may_be_enabled = radv_is_vrs_enabled(state) || key.ps.force_vrs_enabled; if ((radv_is_vrs_enabled(state) || key.ps.force_vrs_enabled) && compiler_info->ac->has_vrs_frag_pos_z_bug) key.adjust_frag_coord_z = true; @@ -2563,7 +2564,10 @@ radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, st NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_cse); NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop); NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce); - NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_frag_coord_to_pixel_coord); + + NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_opt_fs_frag_pos, + !gfx_state->vrs_may_be_enabled && !gfx_state->ms.sample_shading_enable && + !stages[MESA_SHADER_FRAGMENT].nir->info.fs.uses_sample_shading); /* Lower the view index to map on the layer. */ NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_view_index); diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index b030a1c19e4..88153bae79d 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -133,6 +133,7 @@ struct radv_graphics_state_key { uint32_t lib_flags : 4; /* VkGraphicsPipelineLibraryFlagBitsEXT */ uint32_t has_multiview_view_index : 1; + uint32_t vrs_may_be_enabled : 1; uint32_t adjust_frag_coord_z : 1; uint32_t dynamic_rasterization_samples : 1; uint32_t dynamic_provoking_vtx_mode : 1;