radeonsi: move xfb fields from si_shader_info to shader variant info

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34492>
This commit is contained in:
Marek Olšák 2025-04-11 15:47:08 -04:00 committed by Marge Bot
parent 9edcf19f7d
commit 100f9a1624
4 changed files with 12 additions and 12 deletions

View file

@ -2978,6 +2978,15 @@ si_get_shader_variant_info(struct si_shader *shader, nir_shader *nir)
SI_SPI_PS_INPUT_ADDR_FOR_PROLOG;
}
}
if (nir->info.stage <= MESA_SHADER_GEOMETRY && nir->xfb_info &&
!shader->key.ge.as_ls && !shader->key.ge.as_es) {
unsigned num_streamout_dwords = 0;
for (unsigned i = 0; i < 4; i++)
num_streamout_dwords += nir->info.xfb_stride[i];
shader->info.num_streamout_vec4s = DIV_ROUND_UP(num_streamout_dwords, 4);
}
}
/* Late shader variant info for AMD-specific intrinsics. */
@ -3073,6 +3082,7 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen,
shader->selector = gs_selector;
shader->is_gs_copy_shader = true;
shader->wave_size = si_determine_wave_size(sscreen, shader);
shader->info.num_streamout_vec4s = gs_shader->info.num_streamout_vec4s;
STATIC_ASSERT(sizeof(shader->info.vs_output_param_offset[0]) == 1);
memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000,

View file

@ -478,9 +478,7 @@ struct si_shader_info {
uint8_t output_usagemask[PIPE_MAX_SHADER_OUTPUTS];
uint8_t output_streams[PIPE_MAX_SHADER_OUTPUTS];
uint8_t output_type[PIPE_MAX_SHADER_OUTPUTS]; /* enum nir_alu_type */
uint8_t output_xfb_writemask[PIPE_MAX_SHADER_OUTPUTS];
uint8_t num_streamout_components;
uint8_t num_vs_inputs;
uint8_t num_vbos_in_user_sgprs;
uint8_t num_stream_output_components[4]; /* for GS streams, not streamout */
@ -883,6 +881,7 @@ struct si_shader_binary_info {
bool uses_discard : 1;
uint8_t nr_pos_exports;
uint8_t nr_param_exports;
uint8_t num_streamout_vec4s;
unsigned private_mem_vgprs;
unsigned max_simd_waves;
};

View file

@ -184,8 +184,6 @@ static void scan_io_usage(const nir_shader *nir, struct si_shader_info *info,
info->enabled_streamout_buffer_mask |=
BITFIELD_BIT(stream * 4 + xfb.out[i % 2].buffer);
}
info->output_xfb_writemask[loc] |= nir_instr_xfb_write_mask(intr);
}
}
@ -506,13 +504,6 @@ void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir,
scan_instruction(nir, info, instr, colors_lowered);
}
if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL ||
nir->info.stage == MESA_SHADER_GEOMETRY) {
info->num_streamout_components = 0;
for (unsigned i = 0; i < info->num_outputs; i++)
info->num_streamout_components += util_bitcount(info->output_xfb_writemask[i]);
}
if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) {
/* Add the PrimitiveID output, but don't increment num_outputs.
* The driver inserts PrimitiveID only when it's used by the pixel shader,

View file

@ -1633,7 +1633,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
/* This tuning adds up to 50% streamout performance. */
if (si_shader_uses_streamout(shader)) {
unsigned num_streamout_vec4s = DIV_ROUND_UP(shader->selector->info.num_streamout_components, 4);
unsigned num_streamout_vec4s = shader->info.num_streamout_vec4s;
/* TODO: Tested on a pre-production chip. Re-test on the final chip. */
if (num_streamout_vec4s <= 4)