mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-18 23:30:30 +01:00
radeonsi: add helper si_shader_culling_enabled
it will contain more logic Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32257>
This commit is contained in:
parent
d7415d3717
commit
06292538ae
7 changed files with 16 additions and 11 deletions
|
|
@ -35,7 +35,7 @@ unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader)
|
|||
si_get_max_workgroup_size(shader),
|
||||
shader->wave_size,
|
||||
si_shader_uses_streamout(shader),
|
||||
shader->key.ge.opt.ngg_culling) / 4;
|
||||
si_shader_culling_enabled(shader)) / 4;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -119,7 +119,7 @@ retry_select_mode:
|
|||
si_shader_uses_streamout(shader),
|
||||
shader->key.ge.mono.u.vs_export_prim_id,
|
||||
gfx10_ngg_writes_user_edgeflags(shader),
|
||||
shader->key.ge.opt.ngg_culling,
|
||||
si_shader_culling_enabled(shader),
|
||||
uses_instance_id,
|
||||
uses_primitive_id) / 4;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -417,7 +417,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
|
|||
replacement = nir_imm_false(b);
|
||||
break;
|
||||
case nir_intrinsic_load_cull_any_enabled_amd:
|
||||
replacement = nir_imm_bool(b, !!key->ge.opt.ngg_culling);
|
||||
replacement = nir_imm_bool(b, si_shader_culling_enabled(shader));
|
||||
break;
|
||||
case nir_intrinsic_load_cull_back_face_enabled_amd:
|
||||
replacement = nir_imm_bool(b, key->ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE);
|
||||
|
|
|
|||
|
|
@ -1911,7 +1911,7 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
|
|||
.gfx_level = sel->screen->info.gfx_level,
|
||||
.max_workgroup_size = si_get_max_workgroup_size(shader),
|
||||
.wave_size = shader->wave_size,
|
||||
.can_cull = !!key->ge.opt.ngg_culling,
|
||||
.can_cull = si_shader_culling_enabled(shader),
|
||||
.disable_streamout = !si_shader_uses_streamout(shader),
|
||||
.vs_output_param_offset = shader->info.vs_output_param_offset,
|
||||
.has_param_exports = shader->info.nr_param_exports,
|
||||
|
|
|
|||
|
|
@ -1150,6 +1150,11 @@ static inline bool si_shader_uses_discard(struct si_shader *shader)
|
|||
shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_ALWAYS;
|
||||
}
|
||||
|
||||
static inline bool si_shader_culling_enabled(struct si_shader *shader)
|
||||
{
|
||||
return !!shader->key.ge.opt.ngg_culling;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ si_fill_aco_shader_info(struct si_shader *shader, struct aco_shader_info *info,
|
|||
info->hw_stage = si_select_hw_stage(stage, key, gfx_level);
|
||||
|
||||
if (stage <= MESA_SHADER_GEOMETRY && key->ge.as_ngg && !key->ge.as_es) {
|
||||
info->has_ngg_culling = key->ge.opt.ngg_culling;
|
||||
info->has_ngg_culling = si_shader_culling_enabled(shader);
|
||||
info->has_ngg_early_prim_export = gfx10_ngg_export_prim_early(shader);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -623,7 +623,7 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
|
|||
* compaction is enabled.
|
||||
*/
|
||||
if (is_nogs_ngg_stage &&
|
||||
(si_shader_uses_streamout(shader) || shader->key.ge.opt.ngg_culling)) {
|
||||
(si_shader_uses_streamout(shader) || si_shader_culling_enabled(shader))) {
|
||||
LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader));
|
||||
ctx->gs_ngg_scratch = (struct ac_llvm_pointer) {
|
||||
.value = LLVMAddGlobalInAddressSpace(ctx->ac.module, asi32, "ngg_scratch",
|
||||
|
|
@ -659,7 +659,7 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
|
|||
*/
|
||||
if (ctx->screen->info.gfx_level == GFX10 &&
|
||||
(ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL) &&
|
||||
shader->key.ge.as_ngg && !shader->key.ge.as_es && !shader->key.ge.opt.ngg_culling)
|
||||
shader->key.ge.as_ngg && !shader->key.ge.as_es && !si_shader_culling_enabled(shader))
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
|
||||
LLVMValueRef thread_enabled = NULL;
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ unsigned si_determine_wave_size(struct si_screen *sscreen, struct si_shader *sha
|
|||
*/
|
||||
if (stage <= MESA_SHADER_GEOMETRY &&
|
||||
(sscreen->info.gfx_level == GFX10 || sscreen->info.gfx_level == GFX10_3) &&
|
||||
!(sscreen->info.gfx_level == GFX10 && shader->key.ge.opt.ngg_culling))
|
||||
!(sscreen->info.gfx_level == GFX10 && si_shader_culling_enabled(shader)))
|
||||
return 32;
|
||||
|
||||
/* Divergent loops in Wave64 can end up having too many iterations in one half of the wave
|
||||
|
|
@ -1186,7 +1186,7 @@ bool gfx10_is_ngg_passthrough(struct si_shader *shader)
|
|||
*
|
||||
* NGG passthrough still allows the use of LDS.
|
||||
*/
|
||||
return sel->stage != MESA_SHADER_GEOMETRY && !shader->key.ge.opt.ngg_culling;
|
||||
return sel->stage != MESA_SHADER_GEOMETRY && !si_shader_culling_enabled(shader);
|
||||
}
|
||||
|
||||
template <enum si_has_tess HAS_TESS>
|
||||
|
|
@ -1624,14 +1624,14 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
} else {
|
||||
unsigned late_alloc_wave64, cu_mask;
|
||||
|
||||
ac_compute_late_alloc(&sscreen->info, true, shader->key.ge.opt.ngg_culling,
|
||||
ac_compute_late_alloc(&sscreen->info, true, si_shader_culling_enabled(shader),
|
||||
shader->config.scratch_bytes_per_wave > 0,
|
||||
&late_alloc_wave64, &cu_mask);
|
||||
|
||||
/* Oversubscribe PC. This improves performance when there are too many varyings. */
|
||||
unsigned oversub_pc_lines, oversub_pc_factor = 1;
|
||||
|
||||
if (shader->key.ge.opt.ngg_culling) {
|
||||
if (si_shader_culling_enabled(shader)) {
|
||||
/* Be more aggressive with NGG culling. */
|
||||
if (shader->info.nr_param_exports > 4)
|
||||
oversub_pc_factor = 4;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue