mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 20:00:11 +01:00
radeonsi: rewrite how occlusion query precision is determined for performance
The precision of occlusion queries is determined from active queries. Then the register programming is determined from the precision and other states. This has the effect that we no longer set PERFECT_ZPASS_COUNTS for PIPE_QUERY_OCCLUSION_PREDICATE in some cases, resulting in higher performance. This also disables conservative occlusion queries for gfx11 because it's not recommended with late Z, but detecting late Z vs early Z would be more complicated, so just never use it, which results in better performance with late Z. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24732>
This commit is contained in:
parent
5d50e77207
commit
a7e6d5bad1
4 changed files with 89 additions and 50 deletions
|
|
@ -165,6 +165,13 @@ enum si_clear_code
|
|||
#define SI_IMAGE_ACCESS_ALLOW_DCC_STORE (1 << 9)
|
||||
#define SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT (1 << 10) /* for compressed/subsampled images */
|
||||
|
||||
enum si_occlusion_query_mode {
|
||||
SI_OCCLUSION_QUERY_MODE_DISABLE,
|
||||
SI_OCCLUSION_QUERY_MODE_PRECISE_INTEGER,
|
||||
SI_OCCLUSION_QUERY_MODE_PRECISE_BOOLEAN,
|
||||
SI_OCCLUSION_QUERY_MODE_CONSERVATIVE_BOOLEAN,
|
||||
};
|
||||
|
||||
/* Debug flags. */
|
||||
enum
|
||||
{
|
||||
|
|
@ -1286,8 +1293,10 @@ struct si_context {
|
|||
|
||||
/* Queries. */
|
||||
/* Maintain the list of active queries for pausing between IBs. */
|
||||
int num_occlusion_queries;
|
||||
int num_perfect_occlusion_queries;
|
||||
enum si_occlusion_query_mode occlusion_query_mode;
|
||||
int num_integer_occlusion_queries;
|
||||
int num_boolean_occlusion_queries;
|
||||
int num_conservative_occlusion_queries;
|
||||
int num_pipeline_stat_queries;
|
||||
int num_pipeline_stat_emulated_queries;
|
||||
int num_hw_pipestat_streamout_queries;
|
||||
|
|
|
|||
|
|
@ -707,23 +707,45 @@ static void si_update_occlusion_query_state(struct si_context *sctx, unsigned ty
|
|||
{
|
||||
if (type == PIPE_QUERY_OCCLUSION_COUNTER || type == PIPE_QUERY_OCCLUSION_PREDICATE ||
|
||||
type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
|
||||
bool old_enable = sctx->num_occlusion_queries != 0;
|
||||
bool old_perfect_enable = sctx->num_perfect_occlusion_queries != 0;
|
||||
bool enable, perfect_enable;
|
||||
|
||||
sctx->num_occlusion_queries += diff;
|
||||
assert(sctx->num_occlusion_queries >= 0);
|
||||
|
||||
if (type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
|
||||
sctx->num_perfect_occlusion_queries += diff;
|
||||
assert(sctx->num_perfect_occlusion_queries >= 0);
|
||||
switch (type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
sctx->num_integer_occlusion_queries += diff;
|
||||
break;
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
sctx->num_boolean_occlusion_queries += diff;
|
||||
break;
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
|
||||
sctx->num_conservative_occlusion_queries += diff;
|
||||
break;
|
||||
}
|
||||
|
||||
enable = sctx->num_occlusion_queries != 0;
|
||||
perfect_enable = sctx->num_perfect_occlusion_queries != 0;
|
||||
assert(sctx->num_integer_occlusion_queries >= 0);
|
||||
assert(sctx->num_boolean_occlusion_queries >= 0);
|
||||
assert(sctx->num_conservative_occlusion_queries >= 0);
|
||||
|
||||
if (enable != old_enable || perfect_enable != old_perfect_enable) {
|
||||
si_set_occlusion_query_state(sctx, old_perfect_enable);
|
||||
enum si_occlusion_query_mode new_mode =
|
||||
sctx->num_integer_occlusion_queries ? SI_OCCLUSION_QUERY_MODE_PRECISE_INTEGER :
|
||||
sctx->num_boolean_occlusion_queries ? SI_OCCLUSION_QUERY_MODE_PRECISE_BOOLEAN :
|
||||
sctx->num_conservative_occlusion_queries ? SI_OCCLUSION_QUERY_MODE_CONSERVATIVE_BOOLEAN :
|
||||
SI_OCCLUSION_QUERY_MODE_DISABLE;
|
||||
|
||||
/* Conservative queries are only available on gfx10+. On gfx11+, they perform worse
|
||||
* with late Z, but not early Z. Instead of trying to detect late Z, never enable
|
||||
* conservative queries to keep it simple. This is the recommended programming.
|
||||
*/
|
||||
if (new_mode == SI_OCCLUSION_QUERY_MODE_CONSERVATIVE_BOOLEAN &&
|
||||
(sctx->gfx_level < GFX10 || sctx ->gfx_level >= GFX11))
|
||||
new_mode = SI_OCCLUSION_QUERY_MODE_PRECISE_BOOLEAN;
|
||||
|
||||
if (sctx->occlusion_query_mode != new_mode) {
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
|
||||
|
||||
if (sctx->screen->info.has_out_of_order_rast &&
|
||||
(sctx->occlusion_query_mode == SI_OCCLUSION_QUERY_MODE_PRECISE_INTEGER) !=
|
||||
(new_mode == SI_OCCLUSION_QUERY_MODE_PRECISE_INTEGER))
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
|
||||
|
||||
sctx->occlusion_query_mode = new_mode;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -739,8 +739,10 @@ static void si_bind_blend_state(struct pipe_context *ctx, void *state)
|
|||
sctx->framebuffer.has_dcc_msaa))
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state);
|
||||
|
||||
if (sctx->screen->info.has_export_conflict_bug &&
|
||||
old_blend->blend_enable_4bit != blend->blend_enable_4bit)
|
||||
if ((sctx->screen->info.has_export_conflict_bug &&
|
||||
old_blend->blend_enable_4bit != blend->blend_enable_4bit) ||
|
||||
(sctx->occlusion_query_mode == SI_OCCLUSION_QUERY_MODE_PRECISE_BOOLEAN &&
|
||||
!!old_blend->cb_target_mask != !!blend->cb_target_enabled_4bit))
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
|
||||
|
||||
if (old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit ||
|
||||
|
|
@ -1454,6 +1456,11 @@ static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
|
|||
sctx->do_update_shaders = true;
|
||||
}
|
||||
|
||||
if (sctx->occlusion_query_mode == SI_OCCLUSION_QUERY_MODE_PRECISE_BOOLEAN &&
|
||||
(old_dsa->depth_enabled != dsa->depth_enabled ||
|
||||
old_dsa->depth_write_enabled != dsa->depth_write_enabled))
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
|
||||
|
||||
if (sctx->screen->dpbb_allowed && ((old_dsa->depth_enabled != dsa->depth_enabled ||
|
||||
old_dsa->stencil_enabled != dsa->stencil_enabled ||
|
||||
old_dsa->db_can_write != dsa->db_can_write)))
|
||||
|
|
@ -1511,16 +1518,6 @@ static void si_set_active_query_state(struct pipe_context *ctx, bool enable)
|
|||
}
|
||||
}
|
||||
|
||||
void si_set_occlusion_query_state(struct si_context *sctx, bool old_perfect_enable)
|
||||
{
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
|
||||
|
||||
bool perfect_enable = sctx->num_perfect_occlusion_queries != 0;
|
||||
|
||||
if (perfect_enable != old_perfect_enable)
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
|
||||
}
|
||||
|
||||
void si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st)
|
||||
{
|
||||
si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
|
||||
|
|
@ -1574,30 +1571,41 @@ static void si_emit_db_render_state(struct si_context *sctx, unsigned index)
|
|||
}
|
||||
|
||||
/* DB_COUNT_CONTROL (occlusion queries) */
|
||||
if (sctx->num_occlusion_queries > 0 && !sctx->occlusion_queries_disabled) {
|
||||
bool perfect = sctx->num_perfect_occlusion_queries > 0;
|
||||
bool gfx10_perfect = sctx->gfx_level >= GFX10 && perfect;
|
||||
|
||||
if (sctx->gfx_level >= GFX7) {
|
||||
unsigned log_sample_rate = sctx->framebuffer.log_samples;
|
||||
|
||||
db_count_control = S_028004_PERFECT_ZPASS_COUNTS(perfect) |
|
||||
S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) |
|
||||
S_028004_SAMPLE_RATE(log_sample_rate) | S_028004_ZPASS_ENABLE(1) |
|
||||
S_028004_SLICE_EVEN_ENABLE(1) | S_028004_SLICE_ODD_ENABLE(1);
|
||||
} else {
|
||||
db_count_control = S_028004_PERFECT_ZPASS_COUNTS(perfect) |
|
||||
S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples);
|
||||
}
|
||||
} else {
|
||||
/* Disable occlusion queries. */
|
||||
if (sctx->gfx_level >= GFX7) {
|
||||
db_count_control = 0;
|
||||
} else {
|
||||
if (sctx->occlusion_query_mode == SI_OCCLUSION_QUERY_MODE_DISABLE ||
|
||||
sctx->occlusion_queries_disabled) {
|
||||
/* Occlusion queries disabled. */
|
||||
if (sctx->gfx_level >= GFX7)
|
||||
db_count_control = S_028004_ZPASS_ENABLE(0);
|
||||
else
|
||||
db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1);
|
||||
} else {
|
||||
/* Occlusion queries enabled. */
|
||||
db_count_control = S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples);
|
||||
|
||||
if (sctx->gfx_level >= GFX7) {
|
||||
db_count_control |= S_028004_ZPASS_ENABLE(1) |
|
||||
S_028004_SLICE_EVEN_ENABLE(1) |
|
||||
S_028004_SLICE_ODD_ENABLE(1);
|
||||
}
|
||||
|
||||
if (sctx->occlusion_query_mode == SI_OCCLUSION_QUERY_MODE_PRECISE_INTEGER ||
|
||||
/* Boolean occlusion queries must set PERFECT_ZPASS_COUNTS for depth-only rendering
|
||||
* without depth writes or when depth testing is disabled. */
|
||||
(sctx->occlusion_query_mode == SI_OCCLUSION_QUERY_MODE_PRECISE_BOOLEAN &&
|
||||
(!sctx->queued.named.dsa->depth_enabled ||
|
||||
(!sctx->queued.named.blend->cb_target_mask &&
|
||||
!sctx->queued.named.dsa->depth_write_enabled))))
|
||||
db_count_control |= S_028004_PERFECT_ZPASS_COUNTS(1);
|
||||
|
||||
if (sctx->gfx_level >= GFX10 &&
|
||||
sctx->occlusion_query_mode != SI_OCCLUSION_QUERY_MODE_CONSERVATIVE_BOOLEAN)
|
||||
db_count_control |= S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(1);
|
||||
}
|
||||
|
||||
/* This should always be set on GFX11. */
|
||||
if (sctx->gfx_level >= GFX11)
|
||||
db_count_control |= S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(1);
|
||||
|
||||
db_shader_control = sctx->ps_db_shader_control;
|
||||
|
||||
if (sctx->screen->info.has_export_conflict_bug &&
|
||||
|
|
@ -3597,7 +3605,8 @@ static bool si_out_of_order_rasterization(struct si_context *sctx)
|
|||
!dsa_order_invariant.pass_set)
|
||||
return false;
|
||||
|
||||
if (sctx->num_perfect_occlusion_queries != 0 && !dsa_order_invariant.pass_set)
|
||||
if (sctx->occlusion_query_mode == SI_OCCLUSION_QUERY_MODE_PRECISE_INTEGER &&
|
||||
!dsa_order_invariant.pass_set)
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -580,7 +580,6 @@ void si_mark_display_dcc_dirty(struct si_context *sctx, struct si_texture *tex);
|
|||
void si_update_ps_iter_samples(struct si_context *sctx);
|
||||
void si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st);
|
||||
void si_restore_qbo_state(struct si_context *sctx, struct si_qbo_state *st);
|
||||
void si_set_occlusion_query_state(struct si_context *sctx, bool old_perfect_enable);
|
||||
unsigned gfx103_get_cu_mask_ps(struct si_screen *sscreen);
|
||||
|
||||
struct si_fast_udiv_info32 {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue