radv: add a pass that selects either frag_coord_xy or pixel_coord, but not both

to reduce the number of initialized PS VGPRs, increasing the PS wave launch
rate.

The pass will have more RADV-specific stuff.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41689>
This commit is contained in:
Marek Olšák 2026-05-17 17:13:15 -04:00 committed by Marge Bot
parent a5ba7694b5
commit 6cd04ca6e5
6 changed files with 135 additions and 5 deletions

View file

@ -145,10 +145,12 @@ BEGIN_TEST(d3d11_derivs.bias)
PipelineBuilder pbld(get_vk_device(GFX10_3));
pbld.add_vsfs(vs, fs);
//>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias:v[2] = p_startpgm
//>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias1:v[2] = p_startpgm
//>> lv3: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_
//>> v1: %bias2 = v_cvt_f32_u32 (kill)%bias1 dst_sel:dword src0_sel:uword0
//>> v1: %bias3 = v_add_f32 0.5, (kill)%bias2
//>> BB1
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias 2d
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias3 2d
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (kill)%wqm
@ -279,11 +281,13 @@ BEGIN_TEST(d3d11_derivs.bias_array)
PipelineBuilder pbld(get_vk_device(GFX10_3));
pbld.add_vsfs(vs, fs);
//>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias:v[2] = p_startpgm
//>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias1:v[2] = p_startpgm
//>> v1: %layer = v_rndne_f32 (kill)%_
//>> lv4: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_, (kill)%layer
//>> v1: %bias2 = v_cvt_f32_u32 (kill)%bias1 dst_sel:dword src0_sel:uword0
//>> v1: %bias3 = v_add_f32 0.5, (kill)%bias2
//>> BB1
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias 2darray da
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias3 2darray da
//>> BB2
//>> BB6
//>> p_end_linear_vgpr (kill)%wqm

View file

@ -89,6 +89,7 @@ libradv_files = files(
'nir/radv_nir_lower_immediate_samplers.c',
'nir/radv_nir_lower_intrinsics_early.c',
'nir/radv_nir_lower_io.c',
'nir/radv_nir_lower_opt_fs_frag_pos.c',
'nir/radv_nir_lower_primitive_shading_rate.c',
'nir/radv_nir_lower_printf.c',
'nir/radv_nir_lower_ray_queries.c',

View file

@ -96,6 +96,8 @@ bool radv_nir_opt_tid_function(nir_shader *shader, const radv_nir_opt_tid_functi
bool radv_nir_opt_fs_builtins(nir_shader *shader, const struct radv_graphics_state_key *gfx_state,
unsigned vgt_outprim_type);
bool radv_nir_lower_opt_fs_frag_pos(nir_shader *shader, bool force_pixel_coord);
bool radv_nir_lower_immediate_samplers(nir_shader *shader, const struct radv_compiler_info *compiler_info,
const struct radv_shader_stage *stage);

View file

@ -0,0 +1,118 @@
/* Copyright © 2026 Valve Corporation
* SPDX-License-Identifier: MIT
*/
/* Lower frag_coord_xy to pixel_coord if either force_pixel_coord=true or all uses of all instances
* of frag_coord_xy ignore the fractional part, and lower pixel_coord to frag_coord_xy if both
* pixel_coord and frag_coord_xy exist and frag_coord_xy has at least one use that doesn't ignore
* the fractional part. At the end of the pass, only frag_coord_xy or pixel_coord can be present,
* not both.
*
* sample_pos counts as a frag_coord_xy use and is lowered to frag_coord_xy here.
*/
#include "nir_builder.h"
#include "radv_nir.h"
#include "radv_shader_info.h"
typedef struct {
/* gather_frag_coord_and_pixel_coord */
bool has_frag_coord_xy;
bool has_frag_coord_xy_float_use;
bool has_pixel_coord;
bool has_sample_pos;
/* lower_frag_coord_and_pixel_coord */
bool lower_to_pixel_coord;
bool lower_to_frag_coord_xy;
} opt_fs_frag_coord_and_pixel_coord_state;
static bool
gather_fs_frag_pos(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
opt_fs_frag_coord_and_pixel_coord_state *state = (opt_fs_frag_coord_and_pixel_coord_state *)data;
switch (intr->intrinsic) {
case nir_intrinsic_load_frag_coord:
UNREACHABLE("only frag_coord_xy is expected");
case nir_intrinsic_load_frag_coord_xy:
case nir_intrinsic_load_sample_pos:
assert(!nir_def_is_unused(&intr->def));
if (!state->has_frag_coord_xy_float_use && !nir_all_uses_of_float_are_integer(&intr->def, 0x3))
state->has_frag_coord_xy_float_use = true;
state->has_frag_coord_xy |= intr->intrinsic == nir_intrinsic_load_frag_coord_xy;
state->has_sample_pos |= intr->intrinsic == nir_intrinsic_load_sample_pos;
return false;
case nir_intrinsic_load_pixel_coord:
state->has_pixel_coord = true;
return false;
default:
return false;
}
}
static bool
lower_fs_frag_pos(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
opt_fs_frag_coord_and_pixel_coord_state *state = (opt_fs_frag_coord_and_pixel_coord_state *)data;
b->cursor = nir_before_instr(&intr->instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_frag_coord_xy:
if (state->lower_to_pixel_coord) {
nir_def_replace(&intr->def, nir_fadd_imm(b, nir_u2f32(b, nir_load_pixel_coord(b)), 0.5));
return true;
}
return false;
case nir_intrinsic_load_sample_pos:
if (state->lower_to_pixel_coord) {
/* This is unlikely and only possible with integer use. */
nir_def_replace(&intr->def, nir_imm_vec2(b, 0, 0));
} else {
nir_def_replace(&intr->def, nir_ffract(b, nir_load_frag_coord_xy(b)));
}
return true;
case nir_intrinsic_load_pixel_coord:
if (state->lower_to_frag_coord_xy) {
nir_def_replace(&intr->def, nir_f2u16(b, nir_load_frag_coord_xy(b)));
return true;
}
return false;
default:
return false;
}
}
bool
radv_nir_lower_opt_fs_frag_pos(nir_shader *shader, bool force_pixel_coord)
{
if (force_pixel_coord) {
opt_fs_frag_coord_and_pixel_coord_state state = {
.lower_to_pixel_coord = true,
};
return nir_shader_intrinsics_pass(shader, lower_fs_frag_pos, nir_metadata_control_flow, &state);
} else {
opt_fs_frag_coord_and_pixel_coord_state state = {0};
nir_shader_intrinsics_pass(shader, gather_fs_frag_pos, nir_metadata_all, &state);
state.lower_to_pixel_coord =
(state.has_frag_coord_xy || state.has_sample_pos) && !state.has_frag_coord_xy_float_use;
state.lower_to_frag_coord_xy =
(state.has_pixel_coord || state.has_sample_pos) && state.has_frag_coord_xy_float_use;
if (!state.lower_to_pixel_coord && !state.lower_to_frag_coord_xy)
return false;
return nir_shader_intrinsics_pass(shader, lower_fs_frag_pos, nir_metadata_control_flow, &state);
}
}

View file

@ -1714,6 +1714,7 @@ radv_generate_graphics_state_key(const struct radv_compiler_info *compiler_info,
}
key.ps.force_vrs_enabled = compiler_info->force_vrs_enabled && !radv_is_static_vrs_enabled(state);
key.vrs_may_be_enabled = radv_is_vrs_enabled(state) || key.ps.force_vrs_enabled;
if ((radv_is_vrs_enabled(state) || key.ps.force_vrs_enabled) && compiler_info->ac->has_vrs_frag_pos_z_bug)
key.adjust_frag_coord_z = true;
@ -2563,7 +2564,10 @@ radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, st
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_cse);
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop);
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce);
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_frag_coord_to_pixel_coord);
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_opt_fs_frag_pos,
!gfx_state->vrs_may_be_enabled && !gfx_state->ms.sample_shading_enable &&
!stages[MESA_SHADER_FRAGMENT].nir->info.fs.uses_sample_shading);
/* Lower the view index to map on the layer. */
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_view_index);

View file

@ -133,6 +133,7 @@ struct radv_graphics_state_key {
uint32_t lib_flags : 4; /* VkGraphicsPipelineLibraryFlagBitsEXT */
uint32_t has_multiview_view_index : 1;
uint32_t vrs_may_be_enabled : 1;
uint32_t adjust_frag_coord_z : 1;
uint32_t dynamic_rasterization_samples : 1;
uint32_t dynamic_provoking_vtx_mode : 1;