radeonsi: declare prolog LINE_STIPPLE_TEX_ENA VGPR only if needed

Small PS have their VGPR usage equal to the number of input VGPRs,
and this reduces it.

1 input VGPR removed from the PS prolog in most cases.
This commit is contained in:
Marek Olšák 2026-04-25 13:41:56 -04:00
parent 96a073664b
commit a5edf2ef48
6 changed files with 26 additions and 2 deletions

View file

@ -1748,6 +1748,8 @@ static void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_
G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_addr); /* addr because the PS prolog may use it */
key->ps_prolog.uses_linear_centroid =
G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_addr); /* addr because the PS prolog may use it */
key->ps_prolog.reserve_line_stipple_tex_ena =
G_0286CC_LINE_STIPPLE_TEX_ENA(shader->config.spi_ps_input_addr); /* unused but may need to be reserved */
key->ps_prolog.fragcoord_usage_mask =
G_0286CC_POS_X_FLOAT_ENA(shader->config.spi_ps_input_ena) |
(G_0286CC_POS_Y_FLOAT_ENA(shader->config.spi_ps_input_ena) << 1) |

View file

@ -566,6 +566,7 @@ union si_shader_part_key {
unsigned num_interp_inputs : 5; /* BCOLOR is at this location */
unsigned uses_persp_centroid : 1;
unsigned uses_linear_centroid : 1;
unsigned reserve_line_stipple_tex_ena : 1; /* only reserve the VGPR, don't use it */
unsigned fragcoord_usage_mask : 4;
unsigned uses_ancillary : 1;
unsigned uses_sample_coverage : 1;

View file

@ -684,7 +684,8 @@ void si_get_ps_prolog_args(struct si_shader_args *args,
if (key->ps_prolog.uses_linear_centroid)
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_centroid);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.line_stipple_tex_ena);
if (key->ps_prolog.reserve_line_stipple_tex_ena)
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.line_stipple_tex_ena);
/* POS_X|Y|Z|W_FLOAT */
u_foreach_bit(i, key->ps_prolog.fragcoord_usage_mask)

View file

@ -369,6 +369,10 @@ static void gather_instruction(const struct nir_shader *nir, struct si_shader_in
if (intr->intrinsic == nir_intrinsic_load_barycentric_at_sample)
info->uses_interp_at_sample = true;
break;
case nir_intrinsic_load_frag_coord:
if (nir_def_components_read(&intr->def) & BITFIELD_BIT(3))
info->uses_sysval_frag_coord_w = true;
break;
case nir_intrinsic_load_input:
case nir_intrinsic_load_per_vertex_input:
case nir_intrinsic_load_per_primitive_input:

View file

@ -150,6 +150,7 @@ struct si_shader_info {
bool uses_sysval_primitive_id;
bool uses_sysval_front_face;
bool uses_sysval_invocation_id;
bool uses_sysval_frag_coord_w;
bool uses_atomic_ordered_add;
bool writes_psize;
bool writes_primid;

View file

@ -614,7 +614,6 @@ unsigned si_get_spi_ps_input_addr_for_prolog(struct si_shader_selector *sel)
S_0286D0_PERSP_CENTER_ENA(1) |
S_0286D0_LINEAR_SAMPLE_ENA(1) |
S_0286D0_LINEAR_CENTER_ENA(1) |
S_0286D0_LINE_STIPPLE_TEX_ENA(1) |
S_0286D0_FRONT_FACE_ENA(1) |
S_0286D0_POS_FIXED_PT_ENA(1);
@ -627,6 +626,22 @@ unsigned si_get_spi_ps_input_addr_for_prolog(struct si_shader_selector *sel)
if (sel->info.uses_sysval_linear_centroid)
spi_ps_input_addr |= S_0286D0_LINEAR_CENTROID_ENA(1);
/* If barycentrics and pos.w aren't used, we may need LINE_STIPPLE_TEX_ENA as the filler
* input VGPR. See si_fixup_spi_ps_input_config for more information.
*/
if (!sel->info.uses_sysval_persp_sample &&
!sel->info.uses_sysval_persp_center &&
!sel->info.uses_sysval_persp_centroid &&
!sel->info.uses_sysval_linear_sample &&
!sel->info.uses_sysval_linear_center &&
!sel->info.uses_sysval_linear_centroid &&
!sel->info.uses_interp_color &&
!sel->info.uses_sysval_frag_coord_w &&
/* We don't set LINE_STIPPLE_TEX_ENA with LLVM, and never on GFX12. */
sel->info.base.use_aco_amd &&
sel->screen->info.gfx_level != GFX12)
spi_ps_input_addr |= S_0286D0_LINE_STIPPLE_TEX_ENA(1);
if (sel->info.uses_sysval_ancillary)
spi_ps_input_addr |= S_0286D0_ANCILLARY_ENA(1);