radeonsi: use info.num_streamout_vec4s instead of si_shader_uses_streamout

It's identical now.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34492>
This commit is contained in:
Marek Olšák 2025-04-11 16:09:28 -04:00 committed by Marge Bot
parent 759de230de
commit 180f320e69
5 changed files with 13 additions and 21 deletions

View file

@ -41,7 +41,7 @@ unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader)
return ac_ngg_get_scratch_lds_size(sel->stage,
si_get_max_workgroup_size(shader),
shader->wave_size,
si_shader_uses_streamout(shader),
shader->info.num_streamout_vec4s != 0,
si_shader_culling_enabled(shader),
false) / 4;
}
@ -117,7 +117,7 @@ retry_select_mode:
esvert_lds_size = ac_ngg_nogs_get_pervertex_lds_size(
gs_stage, gs_sel->info.num_outputs,
si_shader_uses_streamout(shader),
shader->info.num_streamout_vec4s != 0,
shader->key.ge.mono.u.vs_export_prim_id,
gfx10_ngg_writes_user_edgeflags(shader),
si_shader_culling_enabled(shader),

View file

@ -132,7 +132,7 @@ static void declare_streamout_params(struct si_shader_args *args, struct si_shad
}
/* Streamout SGPRs. */
if (si_shader_uses_streamout(shader)) {
if (shader->info.num_streamout_vec4s) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index);
@ -160,7 +160,7 @@ unsigned si_get_max_workgroup_size(const struct si_shader *shader)
case MESA_SHADER_TESS_EVAL:
/* Use the largest workgroup size for streamout */
if (shader->key.ge.as_ngg)
return si_shader_uses_streamout(shader) ? 256 : 128;
return shader->info.num_streamout_vec4s ? 256 : 128;
/* As part of merged shader. */
return shader->selector->screen->info.gfx_level >= GFX9 &&
@ -1826,7 +1826,7 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
.max_workgroup_size = si_get_max_workgroup_size(shader),
.wave_size = shader->wave_size,
.can_cull = si_shader_culling_enabled(shader),
.disable_streamout = !si_shader_uses_streamout(shader),
.disable_streamout = !shader->info.num_streamout_vec4s,
.vs_output_param_offset = shader->info.vs_output_param_offset,
.has_param_exports = shader->info.nr_param_exports,
.clip_cull_dist_mask = clip_cull_dist_mask,
@ -2462,7 +2462,7 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx *
shader->info.vs_output_param_offset,
shader->info.nr_param_exports,
shader->key.ge.mono.u.vs_export_prim_id,
!si_shader_uses_streamout(shader),
!shader->info.num_streamout_vec4s,
key->ge.opt.kill_pointsize,
key->ge.opt.kill_layer,
sel->screen->options.vrs2x2);
@ -3139,7 +3139,7 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen,
clip_cull_mask,
shader->info.vs_output_param_offset,
shader->info.nr_param_exports,
!si_shader_uses_streamout(gs_shader),
!gs_shader->info.num_streamout_vec4s,
gskey->ge.opt.kill_pointsize,
gskey->ge.opt.kill_layer,
sscreen->options.vrs2x2,

View file

@ -1132,14 +1132,6 @@ static inline bool gfx10_has_variable_edgeflags(struct si_shader *shader)
(output_prim == MESA_PRIM_TRIANGLES || output_prim == MESA_PRIM_UNKNOWN);
}
static inline bool si_shader_uses_streamout(const struct si_shader *shader)
{
return shader->selector->stage <= MESA_SHADER_GEOMETRY &&
shader->selector->info.enabled_streamout_buffer_mask &&
!shader->key.ge.opt.remove_streamout &&
!shader->key.ge.mono.remove_streamout;
}
static inline bool si_shader_culling_enabled(struct si_shader *shader)
{
/* Legacy VS/TES/GS and ES don't cull in the shader. */

View file

@ -604,7 +604,7 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
* compaction is enabled.
*/
if (is_nogs_ngg_stage &&
(si_shader_uses_streamout(shader) || si_shader_culling_enabled(shader))) {
(shader->info.num_streamout_vec4s || si_shader_culling_enabled(shader))) {
LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader));
ctx->gs_ngg_scratch = (struct ac_llvm_pointer) {
.value = LLVMAddGlobalInAddressSpace(ctx->ac.module, asi32, "ngg_scratch",

View file

@ -1475,7 +1475,7 @@ unsigned si_shader_num_alloc_param_exports(struct si_shader *shader)
* The recommended solution is to use the alloc/dealloc mechanism of the attribute ring to limit
* the number of workgroups in flight and thus the number of ordered IDs in flight.
*/
if (shader->selector->screen->info.gfx_level >= GFX12 && si_shader_uses_streamout(shader))
if (shader->selector->screen->info.gfx_level >= GFX12 && shader->info.num_streamout_vec4s)
num_params = MAX2(num_params, 8);
return num_params;
@ -1632,7 +1632,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
unsigned wave_limit_per_se = 0x3ff;
/* This tuning adds up to 50% streamout performance. */
if (si_shader_uses_streamout(shader)) {
if (shader->info.num_streamout_vec4s) {
unsigned num_streamout_vec4s = shader->info.num_streamout_vec4s;
/* TODO: Tested on a pre-production chip. Re-test on the final chip. */
@ -1758,7 +1758,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
S_028A98_GS_EN(gs_stage == MESA_SHADER_GEOMETRY) |
S_028A98_PRIMGEN_PASSTHRU_NO_MSG(gfx10_is_ngg_passthrough(shader)) |
S_028A98_GS_W32_EN(shader->wave_size == 32) |
S_028A98_NGG_WAVE_ID_EN(si_shader_uses_streamout(shader));
S_028A98_NGG_WAVE_ID_EN(shader->info.num_streamout_vec4s != 0);
} else {
shader->ngg.vgt_shader_stages_en =
S_028B54_ES_EN(es_stage == MESA_SHADER_TESS_EVAL ?
@ -1768,7 +1768,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
S_028B54_PRIMGEN_PASSTHRU_EN(gfx10_is_ngg_passthrough(shader)) |
S_028B54_PRIMGEN_PASSTHRU_NO_MSG(gfx10_is_ngg_passthrough(shader) &&
sscreen->info.family >= CHIP_NAVI23) |
S_028B54_NGG_WAVE_ID_EN(si_shader_uses_streamout(shader)) |
S_028B54_NGG_WAVE_ID_EN(shader->info.num_streamout_vec4s != 0) |
S_028B54_GS_W32_EN(shader->wave_size == 32) |
S_028B54_MAX_PRIMGRP_IN_WAVE(2);
}
@ -1957,7 +1957,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
else if (sscreen->info.gfx_level == GFX9)
rsrc2 |= S_00B12C_USER_SGPR_MSB_GFX9(num_user_sgprs >> 5);
if (si_shader_uses_streamout(shader)) {
if (shader->info.num_streamout_vec4s) {
rsrc2 |= S_00B12C_SO_BASE0_EN(!!shader->selector->info.base.xfb_stride[0]) |
S_00B12C_SO_BASE1_EN(!!shader->selector->info.base.xfb_stride[1]) |
S_00B12C_SO_BASE2_EN(!!shader->selector->info.base.xfb_stride[2]) |