radeonsi: move spi_shader_*_format to si_shader_variant_info

Variants can affect theses value so it's best to store them
in this struct.

No functional changes.

Cc: mesa-stable
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40372>
This commit is contained in:
Pierre-Eric Pelloux-Prayer 2026-03-11 15:41:41 +01:00 committed by Marge Bot
parent 8d3a223eed
commit da7c515783
6 changed files with 70 additions and 64 deletions

View file

@ -2020,6 +2020,7 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
shader->info.writes_sample_mask &= !shader->key.ps.part.epilog.kill_samplemask;
shader->info.uses_discard |= shader->key.ps.part.prolog.poly_stipple ||
shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_ALWAYS;
si_shader_update_spi_shader_formats(shader);
break;
default:;
}

View file

@ -858,8 +858,6 @@ struct si_shader {
unsigned spi_ps_input_ena;
unsigned spi_ps_input_addr;
unsigned spi_ps_in_control;
unsigned spi_shader_z_format;
unsigned spi_shader_col_format;
unsigned cb_shader_mask;
unsigned db_shader_control;
unsigned num_interp;

View file

@ -205,6 +205,8 @@ union si_ps_input_info {
struct si_shader_variant_info {
uint32_t vs_output_ps_input_cntl[NUM_TOTAL_VARYING_SLOTS];
union si_ps_input_info ps_inputs[SI_NUM_INTERP];
uint32_t spi_shader_col_format;
uint8_t spi_shader_z_format;
uint8_t num_ps_inputs;
uint8_t num_ps_per_primitive_inputs;
uint8_t num_ps_maybe_per_primitive_inputs;

View file

@ -159,5 +159,6 @@ void si_get_late_shader_variant_info(struct si_shader *shader, struct si_shader_
nir_shader *nir);
void si_set_spi_ps_input_config_for_separate_prolog(struct si_shader *shader);
void si_fixup_spi_ps_input_config(struct si_shader *shader);
void si_shader_update_spi_shader_formats(struct si_shader *shader);
#endif

View file

@ -8,6 +8,64 @@
#include "sid.h"
#include "si_pipe.h"
/* The spi_shader_*_format fields depend on the framebuffer state and the
* NIR shader (monolithic or main part).
*/
void si_shader_update_spi_shader_formats(struct si_shader *shader)
{
unsigned spi_shader_col_format = shader->key.ps.part.epilog.spi_shader_col_format;
unsigned value = 0, num_mrts = 0;
unsigned i, num_targets = (util_last_bit(spi_shader_col_format) + 3) / 4;
shader->info.spi_shader_z_format = ac_get_spi_shader_z_format(shader->info.writes_z, shader->info.writes_stencil,
shader->info.writes_sample_mask,
shader->key.ps.part.epilog.alpha_to_coverage_via_mrtz);
/* Remove holes in spi_shader_col_format. */
for (i = 0; i < num_targets; i++) {
unsigned spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
if (spi_format) {
value |= spi_format << (num_mrts * 4);
num_mrts++;
}
}
/* Ensure that some export memory is always allocated, for two reasons:
*
* 1) Correctness: The hardware ignores the EXEC mask if no export
* memory is allocated, so KILL and alpha test do not work correctly
* without this.
* 2) Performance: Every shader needs at least a NULL export, even when
* it writes no color/depth output. The NULL export instruction
* stalls without this setting.
*
* Don't add this to CB_SHADER_MASK.
*
* GFX10 supports pixel shaders without exports by setting both
* the color and Z formats to SPI_SHADER_ZERO. The hw will skip export
* instructions if any are present.
*
* RB+ depth-only rendering requires SPI_SHADER_32_R.
*/
if (!value) {
bool has_mrtz = shader->info.spi_shader_z_format != V_028710_SPI_SHADER_ZERO;
if (shader->key.ps.part.epilog.rbplus_depth_only_opt) {
value = V_028714_SPI_SHADER_32_R;
} else if (!has_mrtz) {
if (shader->selector->screen->info.gfx_level >= GFX10) {
if (shader->info.uses_discard)
value = V_028714_SPI_SHADER_32_R;
} else {
value = V_028714_SPI_SHADER_32_R;
}
}
}
shader->info.spi_shader_col_format = value;
}
void si_get_shader_variant_info(struct si_shader *shader,
struct si_temp_shader_variant_info *temp_info, nir_shader *nir)
{
@ -288,6 +346,8 @@ void si_get_shader_variant_info(struct si_shader *shader,
}
}
si_shader_update_spi_shader_formats(shader);
/* ACO needs spi_ps_input_ena before si_init_shader_args. */
shader->config.spi_ps_input_ena =
S_0286CC_PERSP_SAMPLE_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE)) |

View file

@ -1983,25 +1983,6 @@ static void si_shader_vs_legacy(struct si_screen *sscreen, struct si_shader *sha
ac_pm4_finalize(&pm4->base);
}
static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
{
unsigned spi_shader_col_format = shader->key.ps.part.epilog.spi_shader_col_format;
unsigned value = 0, num_mrts = 0;
unsigned i, num_targets = (util_last_bit(spi_shader_col_format) + 3) / 4;
/* Remove holes in spi_shader_col_format. */
for (i = 0; i < num_targets; i++) {
unsigned spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
if (spi_format) {
value |= spi_format << (num_mrts * 4);
num_mrts++;
}
}
return value;
}
static void gfx6_emit_shader_ps(struct si_context *sctx, unsigned index)
{
struct si_shader *shader = sctx->queued.named.ps;
@ -2011,8 +1992,8 @@ static void gfx6_emit_shader_ps(struct si_context *sctx, unsigned index)
shader->ps.spi_ps_input_ena,
shader->ps.spi_ps_input_addr);
radeon_opt_set_context_reg2(R_028710_SPI_SHADER_Z_FORMAT, AC_TRACKED_SPI_SHADER_Z_FORMAT,
shader->ps.spi_shader_z_format,
shader->ps.spi_shader_col_format);
shader->info.spi_shader_z_format,
shader->info.spi_shader_col_format);
radeon_opt_set_context_reg(R_02823C_CB_SHADER_MASK, AC_TRACKED_CB_SHADER_MASK,
shader->ps.cb_shader_mask);
radeon_end_update_context_roll();
@ -2029,9 +2010,9 @@ static void gfx11_dgpu_emit_shader_ps(struct si_context *sctx, unsigned index)
gfx11_opt_set_context_reg(R_0286D0_SPI_PS_INPUT_ADDR, AC_TRACKED_SPI_PS_INPUT_ADDR,
shader->ps.spi_ps_input_addr);
gfx11_opt_set_context_reg(R_028710_SPI_SHADER_Z_FORMAT, AC_TRACKED_SPI_SHADER_Z_FORMAT,
shader->ps.spi_shader_z_format);
shader->info.spi_shader_z_format);
gfx11_opt_set_context_reg(R_028714_SPI_SHADER_COL_FORMAT, AC_TRACKED_SPI_SHADER_COL_FORMAT,
shader->ps.spi_shader_col_format);
shader->info.spi_shader_col_format);
gfx11_opt_set_context_reg(R_02823C_CB_SHADER_MASK, AC_TRACKED_CB_SHADER_MASK,
shader->ps.cb_shader_mask);
gfx11_end_packed_context_regs();
@ -2045,9 +2026,9 @@ static void gfx12_emit_shader_ps(struct si_context *sctx, unsigned index)
radeon_begin(&sctx->gfx_cs);
gfx12_begin_context_regs();
gfx12_opt_set_context_reg(R_028650_SPI_SHADER_Z_FORMAT, AC_TRACKED_SPI_SHADER_Z_FORMAT,
shader->ps.spi_shader_z_format);
shader->info.spi_shader_z_format);
gfx12_opt_set_context_reg(R_028654_SPI_SHADER_COL_FORMAT, AC_TRACKED_SPI_SHADER_COL_FORMAT,
shader->ps.spi_shader_col_format);
shader->info.spi_shader_col_format);
gfx12_opt_set_context_reg(R_02865C_SPI_PS_INPUT_ENA, AC_TRACKED_SPI_PS_INPUT_ENA,
shader->ps.spi_ps_input_ena);
gfx12_opt_set_context_reg(R_028660_SPI_PS_INPUT_ADDR, AC_TRACKED_SPI_PS_INPUT_ADDR,
@ -2180,47 +2161,10 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
if (sscreen->info.has_rbplus && !sscreen->info.rbplus_allowed)
shader->ps.db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
shader->ps.spi_shader_col_format = si_get_spi_shader_col_format(shader);
shader->ps.cb_shader_mask = ac_get_cb_shader_mask(shader->key.ps.part.epilog.spi_shader_col_format);
shader->ps.spi_ps_input_ena = shader->config.spi_ps_input_ena;
shader->ps.spi_ps_input_addr = shader->config.spi_ps_input_addr;
shader->ps.num_interp = si_get_ps_num_interp(shader);
shader->ps.spi_shader_z_format =
ac_get_spi_shader_z_format(shader->info.writes_z, shader->info.writes_stencil,
shader->info.writes_sample_mask,
shader->key.ps.part.epilog.alpha_to_coverage_via_mrtz);
/* Ensure that some export memory is always allocated, for two reasons:
*
* 1) Correctness: The hardware ignores the EXEC mask if no export
* memory is allocated, so KILL and alpha test do not work correctly
* without this.
* 2) Performance: Every shader needs at least a NULL export, even when
* it writes no color/depth output. The NULL export instruction
* stalls without this setting.
*
* Don't add this to CB_SHADER_MASK.
*
* GFX10 supports pixel shaders without exports by setting both
* the color and Z formats to SPI_SHADER_ZERO. The hw will skip export
* instructions if any are present.
*
* RB+ depth-only rendering requires SPI_SHADER_32_R.
*/
bool has_mrtz = shader->ps.spi_shader_z_format != V_028710_SPI_SHADER_ZERO;
if (!shader->ps.spi_shader_col_format) {
if (shader->key.ps.part.epilog.rbplus_depth_only_opt) {
shader->ps.spi_shader_col_format = V_028714_SPI_SHADER_32_R;
} else if (!has_mrtz) {
if (sscreen->info.gfx_level >= GFX10) {
if (G_02880C_KILL_ENABLE(shader->ps.db_shader_control))
shader->ps.spi_shader_col_format = V_028714_SPI_SHADER_32_R;
} else {
shader->ps.spi_shader_col_format = V_028714_SPI_SHADER_32_R;
}
}
}
if (sscreen->info.gfx_level >= GFX12) {
shader->ps.spi_ps_in_control = S_028640_PARAM_GEN(shader->key.ps.mono.point_smoothing) |