radeonsi: declare prolog ANCILLARY & SAMPLE_COVERAGE VGPRs only if used

Small PS have their VGPR usage equal to the number of input VGPRs,
and this reduces it.

2 input VGPRs removed from the PS prolog in most cases.
This commit is contained in:
Marek Olšák 2026-04-25 16:03:17 -04:00
parent 7fea12b686
commit 96a073664b
6 changed files with 27 additions and 4 deletions

View file

@ -1753,6 +1753,10 @@ static void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_
(G_0286CC_POS_Y_FLOAT_ENA(shader->config.spi_ps_input_ena) << 1) |
(G_0286CC_POS_Z_FLOAT_ENA(shader->config.spi_ps_input_ena) << 2) |
(G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) << 3);
key->ps_prolog.uses_ancillary =
G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr); /* addr because the PS prolog may use it */
key->ps_prolog.uses_sample_coverage =
G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr); /* addr because the PS prolog may use it */
if (shader->key.ps.part.prolog.poly_stipple)
shader->info.uses_vmem_load_other = true;

View file

@ -567,6 +567,8 @@ union si_shader_part_key {
unsigned uses_persp_centroid : 1;
unsigned uses_linear_centroid : 1;
unsigned fragcoord_usage_mask : 4;
unsigned uses_ancillary : 1;
unsigned uses_sample_coverage : 1;
unsigned wqm : 1;
uint8_t color_attr_index[2];
uint8_t color_interp[2]; /* AC_COLOR_INTERP_* */

View file

@ -691,8 +691,13 @@ void si_get_ps_prolog_args(struct si_shader_args *args,
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.frag_pos[i]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.front_face);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.ancillary);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.sample_coverage);
if (key->ps_prolog.uses_ancillary)
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.ancillary);
if (key->ps_prolog.uses_sample_coverage)
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.sample_coverage);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.pos_fixed_pt);
}

View file

@ -588,6 +588,13 @@ void si_nir_gather_info(struct si_screen *sscreen, struct nir_shader *nir,
info->uses_sysval_invocation_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID);
info->uses_sysval_primitive_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||
nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID;
info->uses_sysval_ancillary = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LAYER_ID) ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_SHADING_RATE) ||
/* The PS prolog uses LAYER_ID for fbfetch. */
(nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output) ||
/* The PS prolog uses SAMPLE_ID for SAMPLE_MASK_IN. */
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
info->uses_sysval_sample_mask_in = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
info->uses_sysval_linear_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE);
info->uses_sysval_linear_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);

View file

@ -125,6 +125,7 @@ struct si_shader_info {
uint8_t colors_written;
uint16_t output_color_types; /**< Each bit pair is enum si_color_output_type */
bool color0_writes_all_cbufs; /**< gl_FragColor */
bool uses_sysval_ancillary;
bool uses_sysval_sample_mask_in; /**< does fragment shader read sample mask? */
bool reads_tess_factors; /**< If TES reads TESSINNER or TESSOUTER */
bool writes_z; /**< does fragment shader write Z value? */

View file

@ -616,8 +616,6 @@ unsigned si_get_spi_ps_input_addr_for_prolog(struct si_shader_selector *sel)
S_0286D0_LINEAR_CENTER_ENA(1) |
S_0286D0_LINE_STIPPLE_TEX_ENA(1) |
S_0286D0_FRONT_FACE_ENA(1) |
S_0286D0_ANCILLARY_ENA(1) |
S_0286D0_SAMPLE_COVERAGE_ENA(1) |
S_0286D0_POS_FIXED_PT_ENA(1);
/* This includes color interpolation at centroid even if the main shader part doesn't
@ -629,5 +627,11 @@ unsigned si_get_spi_ps_input_addr_for_prolog(struct si_shader_selector *sel)
if (sel->info.uses_sysval_linear_centroid)
spi_ps_input_addr |= S_0286D0_LINEAR_CENTROID_ENA(1);
if (sel->info.uses_sysval_ancillary)
spi_ps_input_addr |= S_0286D0_ANCILLARY_ENA(1);
if (sel->info.uses_sysval_sample_mask_in)
spi_ps_input_addr |= S_0286D0_SAMPLE_COVERAGE_ENA(1);
return spi_ps_input_addr;
}