radeonsi: add helper si_shader_culling_enabled

it will contain more logic

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32257>
This commit is contained in:
Marek Olšák 2024-11-19 10:19:05 -05:00 committed by Marge Bot
parent d7415d3717
commit 06292538ae
7 changed files with 16 additions and 11 deletions

View file

@ -35,7 +35,7 @@ unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader)
si_get_max_workgroup_size(shader),
shader->wave_size,
si_shader_uses_streamout(shader),
shader->key.ge.opt.ngg_culling) / 4;
si_shader_culling_enabled(shader)) / 4;
}
/**
@ -119,7 +119,7 @@ retry_select_mode:
si_shader_uses_streamout(shader),
shader->key.ge.mono.u.vs_export_prim_id,
gfx10_ngg_writes_user_edgeflags(shader),
shader->key.ge.opt.ngg_culling,
si_shader_culling_enabled(shader),
uses_instance_id,
uses_primitive_id) / 4;
}

View file

@ -417,7 +417,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
replacement = nir_imm_false(b);
break;
case nir_intrinsic_load_cull_any_enabled_amd:
replacement = nir_imm_bool(b, !!key->ge.opt.ngg_culling);
replacement = nir_imm_bool(b, si_shader_culling_enabled(shader));
break;
case nir_intrinsic_load_cull_back_face_enabled_amd:
replacement = nir_imm_bool(b, key->ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE);

View file

@ -1911,7 +1911,7 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
.gfx_level = sel->screen->info.gfx_level,
.max_workgroup_size = si_get_max_workgroup_size(shader),
.wave_size = shader->wave_size,
.can_cull = !!key->ge.opt.ngg_culling,
.can_cull = si_shader_culling_enabled(shader),
.disable_streamout = !si_shader_uses_streamout(shader),
.vs_output_param_offset = shader->info.vs_output_param_offset,
.has_param_exports = shader->info.nr_param_exports,

View file

@ -1150,6 +1150,11 @@ static inline bool si_shader_uses_discard(struct si_shader *shader)
shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_ALWAYS;
}
static inline bool si_shader_culling_enabled(struct si_shader *shader)
{
return !!shader->key.ge.opt.ngg_culling;
}
#ifdef __cplusplus
}
#endif

View file

@ -80,7 +80,7 @@ si_fill_aco_shader_info(struct si_shader *shader, struct aco_shader_info *info,
info->hw_stage = si_select_hw_stage(stage, key, gfx_level);
if (stage <= MESA_SHADER_GEOMETRY && key->ge.as_ngg && !key->ge.as_es) {
info->has_ngg_culling = key->ge.opt.ngg_culling;
info->has_ngg_culling = si_shader_culling_enabled(shader);
info->has_ngg_early_prim_export = gfx10_ngg_export_prim_early(shader);
}

View file

@ -623,7 +623,7 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
* compaction is enabled.
*/
if (is_nogs_ngg_stage &&
(si_shader_uses_streamout(shader) || shader->key.ge.opt.ngg_culling)) {
(si_shader_uses_streamout(shader) || si_shader_culling_enabled(shader))) {
LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader));
ctx->gs_ngg_scratch = (struct ac_llvm_pointer) {
.value = LLVMAddGlobalInAddressSpace(ctx->ac.module, asi32, "ngg_scratch",
@ -659,7 +659,7 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
*/
if (ctx->screen->info.gfx_level == GFX10 &&
(ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL) &&
shader->key.ge.as_ngg && !shader->key.ge.as_es && !shader->key.ge.opt.ngg_culling)
shader->key.ge.as_ngg && !shader->key.ge.as_es && !si_shader_culling_enabled(shader))
ac_build_s_barrier(&ctx->ac, ctx->stage);
LLVMValueRef thread_enabled = NULL;

View file

@ -95,7 +95,7 @@ unsigned si_determine_wave_size(struct si_screen *sscreen, struct si_shader *sha
*/
if (stage <= MESA_SHADER_GEOMETRY &&
(sscreen->info.gfx_level == GFX10 || sscreen->info.gfx_level == GFX10_3) &&
!(sscreen->info.gfx_level == GFX10 && shader->key.ge.opt.ngg_culling))
!(sscreen->info.gfx_level == GFX10 && si_shader_culling_enabled(shader)))
return 32;
/* Divergent loops in Wave64 can end up having too many iterations in one half of the wave
@ -1186,7 +1186,7 @@ bool gfx10_is_ngg_passthrough(struct si_shader *shader)
*
* NGG passthrough still allows the use of LDS.
*/
return sel->stage != MESA_SHADER_GEOMETRY && !shader->key.ge.opt.ngg_culling;
return sel->stage != MESA_SHADER_GEOMETRY && !si_shader_culling_enabled(shader);
}
template <enum si_has_tess HAS_TESS>
@ -1624,14 +1624,14 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
} else {
unsigned late_alloc_wave64, cu_mask;
ac_compute_late_alloc(&sscreen->info, true, shader->key.ge.opt.ngg_culling,
ac_compute_late_alloc(&sscreen->info, true, si_shader_culling_enabled(shader),
shader->config.scratch_bytes_per_wave > 0,
&late_alloc_wave64, &cu_mask);
/* Oversubscribe PC. This improves performance when there are too many varyings. */
unsigned oversub_pc_lines, oversub_pc_factor = 1;
if (shader->key.ge.opt.ngg_culling) {
if (si_shader_culling_enabled(shader)) {
/* Be more aggressive with NGG culling. */
if (shader->info.nr_param_exports > 4)
oversub_pc_factor = 4;