From a5edf2ef48f79a8d460e16a607fa2d0bb55fd7cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 25 Apr 2026 13:41:56 -0400 Subject: [PATCH] radeonsi: declare prolog LINE_STIPPLE_TEX_ENA VGPR only if needed Small PS have their VGPR usage equal to the number of input VGPRs, and this reduces it. 1 input VGPR removed from the PS prolog in most cases. --- src/gallium/drivers/radeonsi/si_shader.c | 2 ++ src/gallium/drivers/radeonsi/si_shader.h | 1 + src/gallium/drivers/radeonsi/si_shader_args.c | 3 ++- src/gallium/drivers/radeonsi/si_shader_info.c | 4 ++++ src/gallium/drivers/radeonsi/si_shader_info.h | 1 + .../drivers/radeonsi/si_shader_variant_info.c | 17 ++++++++++++++++- 6 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index fdaf1ed7fe4..1805691e228 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1748,6 +1748,8 @@ static void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_ G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_addr); /* addr because the PS prolog may use it */ key->ps_prolog.uses_linear_centroid = G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_addr); /* addr because the PS prolog may use it */ + key->ps_prolog.reserve_line_stipple_tex_ena = + G_0286CC_LINE_STIPPLE_TEX_ENA(shader->config.spi_ps_input_addr); /* unused but may need to be reserved */ key->ps_prolog.fragcoord_usage_mask = G_0286CC_POS_X_FLOAT_ENA(shader->config.spi_ps_input_ena) | (G_0286CC_POS_Y_FLOAT_ENA(shader->config.spi_ps_input_ena) << 1) | diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index d9a14876db6..67573890b79 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -566,6 +566,7 @@ union si_shader_part_key { unsigned num_interp_inputs : 5; /* BCOLOR is at this location */ unsigned uses_persp_centroid : 1; unsigned uses_linear_centroid : 1; + unsigned reserve_line_stipple_tex_ena : 1; /* only reserve the VGPR, don't use it */ unsigned fragcoord_usage_mask : 4; unsigned uses_ancillary : 1; unsigned uses_sample_coverage : 1; diff --git a/src/gallium/drivers/radeonsi/si_shader_args.c b/src/gallium/drivers/radeonsi/si_shader_args.c index 3407b3a2fc1..334a8c14204 100644 --- a/src/gallium/drivers/radeonsi/si_shader_args.c +++ b/src/gallium/drivers/radeonsi/si_shader_args.c @@ -684,7 +684,8 @@ void si_get_ps_prolog_args(struct si_shader_args *args, if (key->ps_prolog.uses_linear_centroid) ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_centroid); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.line_stipple_tex_ena); + if (key->ps_prolog.reserve_line_stipple_tex_ena) + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.line_stipple_tex_ena); /* POS_X|Y|Z|W_FLOAT */ u_foreach_bit(i, key->ps_prolog.fragcoord_usage_mask) diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c index a813c62a06f..9b595b1ccda 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_info.c @@ -369,6 +369,10 @@ static void gather_instruction(const struct nir_shader *nir, struct si_shader_in if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample) info->uses_interp_at_sample = true; break; + case nir_intrinsic_load_frag_coord: + if (nir_def_components_read(&intr->def) & BITFIELD_BIT(3)) + info->uses_sysval_frag_coord_w = true; + break; case nir_intrinsic_load_input: case nir_intrinsic_load_per_vertex_input: case nir_intrinsic_load_per_primitive_input: diff --git a/src/gallium/drivers/radeonsi/si_shader_info.h b/src/gallium/drivers/radeonsi/si_shader_info.h index 9398cdad7b9..ef08e1d3784 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.h +++ b/src/gallium/drivers/radeonsi/si_shader_info.h @@ -150,6 +150,7 @@ struct si_shader_info { bool uses_sysval_primitive_id; bool uses_sysval_front_face; bool uses_sysval_invocation_id; + bool uses_sysval_frag_coord_w; bool uses_atomic_ordered_add; bool writes_psize; bool writes_primid; diff --git a/src/gallium/drivers/radeonsi/si_shader_variant_info.c b/src/gallium/drivers/radeonsi/si_shader_variant_info.c index eb2e43bc47a..a88171077eb 100644 --- a/src/gallium/drivers/radeonsi/si_shader_variant_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_variant_info.c @@ -614,7 +614,6 @@ unsigned si_get_spi_ps_input_addr_for_prolog(struct si_shader_selector *sel) S_0286D0_PERSP_CENTER_ENA(1) | S_0286D0_LINEAR_SAMPLE_ENA(1) | S_0286D0_LINEAR_CENTER_ENA(1) | - S_0286D0_LINE_STIPPLE_TEX_ENA(1) | S_0286D0_FRONT_FACE_ENA(1) | S_0286D0_POS_FIXED_PT_ENA(1); @@ -627,6 +626,22 @@ unsigned si_get_spi_ps_input_addr_for_prolog(struct si_shader_selector *sel) if (sel->info.uses_sysval_linear_centroid) spi_ps_input_addr |= S_0286D0_LINEAR_CENTROID_ENA(1); + /* If barycentrics and pos.w aren't used, we may need LINE_STIPPLE_TEX_ENA as the filler + * input VGPR. See si_fixup_spi_ps_input_config for more information. + */ + if (!sel->info.uses_sysval_persp_sample && + !sel->info.uses_sysval_persp_center && + !sel->info.uses_sysval_persp_centroid && + !sel->info.uses_sysval_linear_sample && + !sel->info.uses_sysval_linear_center && + !sel->info.uses_sysval_linear_centroid && + !sel->info.uses_interp_color && + !sel->info.uses_sysval_frag_coord_w && + /* We don't set LINE_STIPPLE_TEX_ENA with LLVM, and never on GFX12. */ + sel->info.base.use_aco_amd && + sel->screen->info.gfx_level != GFX12) + spi_ps_input_addr |= S_0286D0_LINE_STIPPLE_TEX_ENA(1); + if (sel->info.uses_sysval_ancillary) spi_ps_input_addr |= S_0286D0_ANCILLARY_ENA(1);