From 96a073664b65e5b73574c8334a9498d8d6a12909 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 25 Apr 2026 16:03:17 -0400 Subject: [PATCH] radeonsi: declare prolog ANCILLARY & SAMPLE_COVERAGE VGPRs only if used Small PS have their VGPR usage equal to the number of input VGPRs, and this reduces it. 2 input VGPRs removed from the PS prolog in most cases. --- src/gallium/drivers/radeonsi/si_shader.c | 4 ++++ src/gallium/drivers/radeonsi/si_shader.h | 2 ++ src/gallium/drivers/radeonsi/si_shader_args.c | 9 +++++++-- src/gallium/drivers/radeonsi/si_shader_info.c | 7 +++++++ src/gallium/drivers/radeonsi/si_shader_info.h | 1 + src/gallium/drivers/radeonsi/si_shader_variant_info.c | 8 ++++++-- 6 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 93be4b593e1..fdaf1ed7fe4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1753,6 +1753,10 @@ static void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_ (G_0286CC_POS_Y_FLOAT_ENA(shader->config.spi_ps_input_ena) << 1) | (G_0286CC_POS_Z_FLOAT_ENA(shader->config.spi_ps_input_ena) << 2) | (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) << 3); + key->ps_prolog.uses_ancillary = + G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr); /* addr because the PS prolog may use it */ + key->ps_prolog.uses_sample_coverage = + G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr); /* addr because the PS prolog may use it */ if (shader->key.ps.part.prolog.poly_stipple) shader->info.uses_vmem_load_other = true; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index fc3f914d66d..d9a14876db6 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -567,6 +567,8 @@ union si_shader_part_key { unsigned uses_persp_centroid : 1; unsigned uses_linear_centroid : 1; unsigned fragcoord_usage_mask : 4; + unsigned uses_ancillary : 1; + unsigned uses_sample_coverage : 1; unsigned wqm : 1; uint8_t color_attr_index[2]; uint8_t color_interp[2]; /* AC_COLOR_INTERP_* */ diff --git a/src/gallium/drivers/radeonsi/si_shader_args.c b/src/gallium/drivers/radeonsi/si_shader_args.c index 3ae18e1b329..3407b3a2fc1 100644 --- a/src/gallium/drivers/radeonsi/si_shader_args.c +++ b/src/gallium/drivers/radeonsi/si_shader_args.c @@ -691,8 +691,13 @@ void si_get_ps_prolog_args(struct si_shader_args *args, ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.frag_pos[i]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.front_face); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.ancillary); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.sample_coverage); + + if (key->ps_prolog.uses_ancillary) + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.ancillary); + + if (key->ps_prolog.uses_sample_coverage) + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.sample_coverage); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.pos_fixed_pt); } diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c index 2bc8fe29d9d..a813c62a06f 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_info.c @@ -588,6 +588,13 @@ void si_nir_gather_info(struct si_screen *sscreen, struct nir_shader *nir, info->uses_sysval_invocation_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID); info->uses_sysval_primitive_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) || nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID; + info->uses_sysval_ancillary = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) || + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LAYER_ID) || + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_SHADING_RATE) || + /* The PS prolog uses LAYER_ID for fbfetch. */ + (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output) || + /* The PS prolog uses SAMPLE_ID for SAMPLE_MASK_IN. */ + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); info->uses_sysval_sample_mask_in = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); info->uses_sysval_linear_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE); info->uses_sysval_linear_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID); diff --git a/src/gallium/drivers/radeonsi/si_shader_info.h b/src/gallium/drivers/radeonsi/si_shader_info.h index fe6b388ff96..9398cdad7b9 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.h +++ b/src/gallium/drivers/radeonsi/si_shader_info.h @@ -125,6 +125,7 @@ struct si_shader_info { uint8_t colors_written; uint16_t output_color_types; /**< Each bit pair is enum si_color_output_type */ bool color0_writes_all_cbufs; /**< gl_FragColor */ + bool uses_sysval_ancillary; bool uses_sysval_sample_mask_in; /**< does fragment shader read sample mask? */ bool reads_tess_factors; /**< If TES reads TESSINNER or TESSOUTER */ bool writes_z; /**< does fragment shader write Z value? */ diff --git a/src/gallium/drivers/radeonsi/si_shader_variant_info.c b/src/gallium/drivers/radeonsi/si_shader_variant_info.c index 94c542b5541..eb2e43bc47a 100644 --- a/src/gallium/drivers/radeonsi/si_shader_variant_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_variant_info.c @@ -616,8 +616,6 @@ unsigned si_get_spi_ps_input_addr_for_prolog(struct si_shader_selector *sel) S_0286D0_LINEAR_CENTER_ENA(1) | S_0286D0_LINE_STIPPLE_TEX_ENA(1) | S_0286D0_FRONT_FACE_ENA(1) | - S_0286D0_ANCILLARY_ENA(1) | - S_0286D0_SAMPLE_COVERAGE_ENA(1) | S_0286D0_POS_FIXED_PT_ENA(1); /* This includes color interpolation at centroid even if the main shader part doesn't @@ -629,5 +627,11 @@ unsigned si_get_spi_ps_input_addr_for_prolog(struct si_shader_selector *sel) if (sel->info.uses_sysval_linear_centroid) spi_ps_input_addr |= S_0286D0_LINEAR_CENTROID_ENA(1); + if (sel->info.uses_sysval_ancillary) + spi_ps_input_addr |= S_0286D0_ANCILLARY_ENA(1); + + if (sel->info.uses_sysval_sample_mask_in) + spi_ps_input_addr |= S_0286D0_SAMPLE_COVERAGE_ENA(1); + return spi_ps_input_addr; }