mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
radeonsi: use info.num_streamout_vec4s instead of si_shader_uses_streamout
It's identical now. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34492>
This commit is contained in:
parent
759de230de
commit
180f320e69
5 changed files with 13 additions and 21 deletions
|
|
@ -41,7 +41,7 @@ unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader)
|
|||
return ac_ngg_get_scratch_lds_size(sel->stage,
|
||||
si_get_max_workgroup_size(shader),
|
||||
shader->wave_size,
|
||||
si_shader_uses_streamout(shader),
|
||||
shader->info.num_streamout_vec4s != 0,
|
||||
si_shader_culling_enabled(shader),
|
||||
false) / 4;
|
||||
}
|
||||
|
|
@ -117,7 +117,7 @@ retry_select_mode:
|
|||
|
||||
esvert_lds_size = ac_ngg_nogs_get_pervertex_lds_size(
|
||||
gs_stage, gs_sel->info.num_outputs,
|
||||
si_shader_uses_streamout(shader),
|
||||
shader->info.num_streamout_vec4s != 0,
|
||||
shader->key.ge.mono.u.vs_export_prim_id,
|
||||
gfx10_ngg_writes_user_edgeflags(shader),
|
||||
si_shader_culling_enabled(shader),
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ static void declare_streamout_params(struct si_shader_args *args, struct si_shad
|
|||
}
|
||||
|
||||
/* Streamout SGPRs. */
|
||||
if (si_shader_uses_streamout(shader)) {
|
||||
if (shader->info.num_streamout_vec4s) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index);
|
||||
|
||||
|
|
@ -160,7 +160,7 @@ unsigned si_get_max_workgroup_size(const struct si_shader *shader)
|
|||
case MESA_SHADER_TESS_EVAL:
|
||||
/* Use the largest workgroup size for streamout */
|
||||
if (shader->key.ge.as_ngg)
|
||||
return si_shader_uses_streamout(shader) ? 256 : 128;
|
||||
return shader->info.num_streamout_vec4s ? 256 : 128;
|
||||
|
||||
/* As part of merged shader. */
|
||||
return shader->selector->screen->info.gfx_level >= GFX9 &&
|
||||
|
|
@ -1826,7 +1826,7 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
|
|||
.max_workgroup_size = si_get_max_workgroup_size(shader),
|
||||
.wave_size = shader->wave_size,
|
||||
.can_cull = si_shader_culling_enabled(shader),
|
||||
.disable_streamout = !si_shader_uses_streamout(shader),
|
||||
.disable_streamout = !shader->info.num_streamout_vec4s,
|
||||
.vs_output_param_offset = shader->info.vs_output_param_offset,
|
||||
.has_param_exports = shader->info.nr_param_exports,
|
||||
.clip_cull_dist_mask = clip_cull_dist_mask,
|
||||
|
|
@ -2462,7 +2462,7 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx *
|
|||
shader->info.vs_output_param_offset,
|
||||
shader->info.nr_param_exports,
|
||||
shader->key.ge.mono.u.vs_export_prim_id,
|
||||
!si_shader_uses_streamout(shader),
|
||||
!shader->info.num_streamout_vec4s,
|
||||
key->ge.opt.kill_pointsize,
|
||||
key->ge.opt.kill_layer,
|
||||
sel->screen->options.vrs2x2);
|
||||
|
|
@ -3139,7 +3139,7 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen,
|
|||
clip_cull_mask,
|
||||
shader->info.vs_output_param_offset,
|
||||
shader->info.nr_param_exports,
|
||||
!si_shader_uses_streamout(gs_shader),
|
||||
!gs_shader->info.num_streamout_vec4s,
|
||||
gskey->ge.opt.kill_pointsize,
|
||||
gskey->ge.opt.kill_layer,
|
||||
sscreen->options.vrs2x2,
|
||||
|
|
|
|||
|
|
@ -1132,14 +1132,6 @@ static inline bool gfx10_has_variable_edgeflags(struct si_shader *shader)
|
|||
(output_prim == MESA_PRIM_TRIANGLES || output_prim == MESA_PRIM_UNKNOWN);
|
||||
}
|
||||
|
||||
static inline bool si_shader_uses_streamout(const struct si_shader *shader)
|
||||
{
|
||||
return shader->selector->stage <= MESA_SHADER_GEOMETRY &&
|
||||
shader->selector->info.enabled_streamout_buffer_mask &&
|
||||
!shader->key.ge.opt.remove_streamout &&
|
||||
!shader->key.ge.mono.remove_streamout;
|
||||
}
|
||||
|
||||
static inline bool si_shader_culling_enabled(struct si_shader *shader)
|
||||
{
|
||||
/* Legacy VS/TES/GS and ES don't cull in the shader. */
|
||||
|
|
|
|||
|
|
@ -604,7 +604,7 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
|
|||
* compaction is enabled.
|
||||
*/
|
||||
if (is_nogs_ngg_stage &&
|
||||
(si_shader_uses_streamout(shader) || si_shader_culling_enabled(shader))) {
|
||||
(shader->info.num_streamout_vec4s || si_shader_culling_enabled(shader))) {
|
||||
LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader));
|
||||
ctx->gs_ngg_scratch = (struct ac_llvm_pointer) {
|
||||
.value = LLVMAddGlobalInAddressSpace(ctx->ac.module, asi32, "ngg_scratch",
|
||||
|
|
|
|||
|
|
@ -1475,7 +1475,7 @@ unsigned si_shader_num_alloc_param_exports(struct si_shader *shader)
|
|||
* The recommended solution is to use the alloc/dealloc mechanism of the attribute ring to limit
|
||||
* the number of workgroups in flight and thus the number of ordered IDs in flight.
|
||||
*/
|
||||
if (shader->selector->screen->info.gfx_level >= GFX12 && si_shader_uses_streamout(shader))
|
||||
if (shader->selector->screen->info.gfx_level >= GFX12 && shader->info.num_streamout_vec4s)
|
||||
num_params = MAX2(num_params, 8);
|
||||
|
||||
return num_params;
|
||||
|
|
@ -1632,7 +1632,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
unsigned wave_limit_per_se = 0x3ff;
|
||||
|
||||
/* This tuning adds up to 50% streamout performance. */
|
||||
if (si_shader_uses_streamout(shader)) {
|
||||
if (shader->info.num_streamout_vec4s) {
|
||||
unsigned num_streamout_vec4s = shader->info.num_streamout_vec4s;
|
||||
|
||||
/* TODO: Tested on a pre-production chip. Re-test on the final chip. */
|
||||
|
|
@ -1758,7 +1758,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
S_028A98_GS_EN(gs_stage == MESA_SHADER_GEOMETRY) |
|
||||
S_028A98_PRIMGEN_PASSTHRU_NO_MSG(gfx10_is_ngg_passthrough(shader)) |
|
||||
S_028A98_GS_W32_EN(shader->wave_size == 32) |
|
||||
S_028A98_NGG_WAVE_ID_EN(si_shader_uses_streamout(shader));
|
||||
S_028A98_NGG_WAVE_ID_EN(shader->info.num_streamout_vec4s != 0);
|
||||
} else {
|
||||
shader->ngg.vgt_shader_stages_en =
|
||||
S_028B54_ES_EN(es_stage == MESA_SHADER_TESS_EVAL ?
|
||||
|
|
@ -1768,7 +1768,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
S_028B54_PRIMGEN_PASSTHRU_EN(gfx10_is_ngg_passthrough(shader)) |
|
||||
S_028B54_PRIMGEN_PASSTHRU_NO_MSG(gfx10_is_ngg_passthrough(shader) &&
|
||||
sscreen->info.family >= CHIP_NAVI23) |
|
||||
S_028B54_NGG_WAVE_ID_EN(si_shader_uses_streamout(shader)) |
|
||||
S_028B54_NGG_WAVE_ID_EN(shader->info.num_streamout_vec4s != 0) |
|
||||
S_028B54_GS_W32_EN(shader->wave_size == 32) |
|
||||
S_028B54_MAX_PRIMGRP_IN_WAVE(2);
|
||||
}
|
||||
|
|
@ -1957,7 +1957,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
|
|||
else if (sscreen->info.gfx_level == GFX9)
|
||||
rsrc2 |= S_00B12C_USER_SGPR_MSB_GFX9(num_user_sgprs >> 5);
|
||||
|
||||
if (si_shader_uses_streamout(shader)) {
|
||||
if (shader->info.num_streamout_vec4s) {
|
||||
rsrc2 |= S_00B12C_SO_BASE0_EN(!!shader->selector->info.base.xfb_stride[0]) |
|
||||
S_00B12C_SO_BASE1_EN(!!shader->selector->info.base.xfb_stride[1]) |
|
||||
S_00B12C_SO_BASE2_EN(!!shader->selector->info.base.xfb_stride[2]) |
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue