radeonsi/gfx12: adjust HiZ/HiS logic

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32653>
This commit is contained in:
Marek Olšák 2024-12-16 03:59:13 -05:00 committed by Marge Bot
parent e3cef02c24
commit cdecbee922
7 changed files with 110 additions and 27 deletions

View file

@ -555,6 +555,7 @@ static void handle_env_var_force_family(struct radeon_info *info)
get_radeon_info(info, &ac_fake_hw_db[i]);
info->name = "NOOP";
info->family_overridden = true;
info->chip_rev = 1;
return;
}
}

View file

@ -3035,7 +3035,7 @@ static bool gfx12_compute_hiz_his_info(struct ac_addrlib *addrlib, const struct
{
assert(surf_in->flags.depth != surf_in->flags.stencil);
if (surf->flags & RADEON_SURF_NO_HTILE || (info->gfx_level == GFX12 && info->chip_rev <= 1))
if (surf->flags & RADEON_SURF_NO_HTILE || (info->gfx_level == GFX12 && info->chip_rev == 0))
return true;
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
@ -3109,7 +3109,11 @@ static bool gfx12_compute_miptree(struct ac_addrlib *addrlib, const struct radeo
surf->surf_alignment_log2 = MAX2(surf->surf_alignment_log2, util_logbase2(out.baseAlign));
surf->surf_size = surf->u.gfx9.zs.stencil_offset + out.surfSize;
return gfx12_compute_hiz_his_info(addrlib, info, surf, &surf->u.gfx9.zs.his, in);
if (info->chip_rev >= 2 &&
!gfx12_compute_hiz_his_info(addrlib, info, surf, &surf->u.gfx9.zs.his, in))
return false;
return true;
}
surf->u.gfx9.surf_slice_size = out.sliceSize;

View file

@ -514,6 +514,20 @@
__event_type == V_028A90_PIXEL_PIPE_STAT_CONTROL ? 1 : 0)); \
} while (0)
#define radeon_emit_alt_hiz_logic() do { \
static_assert(GFX_VERSION == GFX12 || !ALT_HIZ_LOGIC, ""); \
if (GFX_VERSION == GFX12 && ALT_HIZ_LOGIC) { \
radeon_emit(PKT3(PKT3_RELEASE_MEM, 6, 0)); \
radeon_emit(S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_490_EVENT_INDEX(5)); \
radeon_emit(0); /* DST_SEL, INT_SEL = no write confirm, DATA_SEL = no data */ \
radeon_emit(0); /* ADDRESS_LO */ \
radeon_emit(0); /* ADDRESS_HI */ \
radeon_emit(0); /* DATA_LO */ \
radeon_emit(0); /* DATA_HI */ \
radeon_emit(0); /* INT_CTXID */ \
} \
} while (0)
/* This should be evaluated at compile time if all parameters are constants. */
static ALWAYS_INLINE unsigned
si_get_user_data_base(enum amd_gfx_level gfx_level, enum si_has_tess has_tess,

View file

@ -22,6 +22,7 @@ OPT_BOOL(dcc_msaa, true, "Enable DCC for MSAA for GFX10-10.3")
OPT_BOOL(zerovram, false, "Zero all VRAM allocations")
OPT_BOOL(clear_lds, false, "Clear LDS at the end of shaders. Might decrease performance.")
OPT_BOOL(cache_rb_gl2, false, "Enable GL2 caching for CB and DB.")
OPT_BOOL(alt_hiz_logic, false, "Enable alternative HiZ logic")
#undef OPT_BOOL
#undef OPT_INT

View file

@ -382,6 +382,7 @@ struct si_texture {
bool can_sample_z : 1;
bool can_sample_s : 1;
bool need_flush_after_depth_decompression: 1;
bool force_disable_hiz_his : 1;
/* We need to track DCC dirtiness, because st/dri usually calls
* flush_resource twice per frame (not a bug) and we don't wanna

View file

@ -1694,6 +1694,20 @@ static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref);
}
struct pipe_surface *zssurf = sctx->framebuffer.state.zsbuf;
struct si_texture *zstex = (struct si_texture*)(zssurf ? zssurf->texture : NULL);
if (sctx->gfx_level == GFX12 && !sctx->screen->options.alt_hiz_logic &&
sctx->framebuffer.has_stencil && dsa->stencil_enabled && !zstex->force_disable_hiz_his) {
zstex->force_disable_hiz_his = true;
si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
if (sctx->framebuffer.has_hiz_his) {
sctx->framebuffer.has_hiz_his = false;
si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
}
}
if (old_dsa->alpha_func != dsa->alpha_func) {
si_ps_key_update_dsa(sctx);
si_update_ps_inputs_read_or_disabled(sctx);
@ -2690,9 +2704,14 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
if (util_format_has_stencil(util_format_description(zstex->buffer.b.b.format)))
sctx->framebuffer.has_stencil = true;
if (sctx->gfx_level == GFX12 && !sctx->screen->options.alt_hiz_logic &&
sctx->framebuffer.has_stencil && sctx->queued.named.dsa->stencil_enabled)
zstex->force_disable_hiz_his = true;
if (sctx->gfx_level >= GFX12) {
sctx->framebuffer.has_hiz_his = zstex->surface.u.gfx9.zs.hiz.offset ||
zstex->surface.u.gfx9.zs.his.offset;
sctx->framebuffer.has_hiz_his = (zstex->surface.u.gfx9.zs.hiz.offset ||
zstex->surface.u.gfx9.zs.his.offset) &&
!zstex->force_disable_hiz_his;
}
}
@ -3319,18 +3338,24 @@ static void gfx12_emit_framebuffer_state(struct si_context *sctx, unsigned index
gfx12_set_context_reg(R_028034_DB_STENCIL_READ_BASE_HI, zb->ds.db_stencil_base >> 32);
gfx12_set_context_reg(R_028038_DB_STENCIL_WRITE_BASE, zb->ds.db_stencil_base);
gfx12_set_context_reg(R_02803C_DB_STENCIL_WRITE_BASE_HI, zb->ds.db_stencil_base >> 32);
gfx12_set_context_reg(R_028B94_PA_SC_HIZ_INFO, zb->ds.u.gfx12.hiz_info);
gfx12_set_context_reg(R_028B98_PA_SC_HIS_INFO, zb->ds.u.gfx12.his_info);
if (zb->ds.u.gfx12.hiz_info) {
gfx12_set_context_reg(R_028B9C_PA_SC_HIZ_BASE, zb->ds.u.gfx12.hiz_base);
gfx12_set_context_reg(R_028BA0_PA_SC_HIZ_BASE_EXT, zb->ds.u.gfx12.hiz_base >> 32);
gfx12_set_context_reg(R_028BA4_PA_SC_HIZ_SIZE_XY, zb->ds.u.gfx12.hiz_size_xy);
}
if (zb->ds.u.gfx12.his_info) {
gfx12_set_context_reg(R_028BA8_PA_SC_HIS_BASE, zb->ds.u.gfx12.his_base);
gfx12_set_context_reg(R_028BAC_PA_SC_HIS_BASE_EXT, zb->ds.u.gfx12.his_base >> 32);
gfx12_set_context_reg(R_028BB0_PA_SC_HIS_SIZE_XY, zb->ds.u.gfx12.his_size_xy);
if (tex->force_disable_hiz_his) {
gfx12_set_context_reg(R_028B94_PA_SC_HIZ_INFO, S_028B94_SURFACE_ENABLE(0));
gfx12_set_context_reg(R_028B98_PA_SC_HIS_INFO, S_028B98_SURFACE_ENABLE(0));
} else {
gfx12_set_context_reg(R_028B94_PA_SC_HIZ_INFO, zb->ds.u.gfx12.hiz_info);
gfx12_set_context_reg(R_028B98_PA_SC_HIS_INFO, zb->ds.u.gfx12.his_info);
if (zb->ds.u.gfx12.hiz_info) {
gfx12_set_context_reg(R_028B9C_PA_SC_HIZ_BASE, zb->ds.u.gfx12.hiz_base);
gfx12_set_context_reg(R_028BA0_PA_SC_HIZ_BASE_EXT, zb->ds.u.gfx12.hiz_base >> 32);
gfx12_set_context_reg(R_028BA4_PA_SC_HIZ_SIZE_XY, zb->ds.u.gfx12.hiz_size_xy);
}
if (zb->ds.u.gfx12.his_info) {
gfx12_set_context_reg(R_028BA8_PA_SC_HIS_BASE, zb->ds.u.gfx12.his_base);
gfx12_set_context_reg(R_028BAC_PA_SC_HIS_BASE_EXT, zb->ds.u.gfx12.his_base >> 32);
gfx12_set_context_reg(R_028BB0_PA_SC_HIS_SIZE_XY, zb->ds.u.gfx12.his_size_xy);
}
}
} else if (sctx->framebuffer.dirty_zsbuf) {
gfx12_set_context_reg(R_028018_DB_Z_INFO,

View file

@ -837,6 +837,11 @@ enum si_has_sh_pairs_packed {
HAS_SH_PAIRS_PACKED_ON,
};
enum si_alt_hiz_logic {
ALT_HIZ_LOGIC_OFF,
ALT_HIZ_LOGIC_ON,
};
template <si_is_draw_vertex_state IS_DRAW_VERTEX_STATE> ALWAYS_INLINE
static bool num_instanced_prims_less_than(const struct pipe_draw_indirect_info *indirect,
enum mesa_prim prim,
@ -1231,7 +1236,8 @@ void si_emit_buffered_compute_sh_regs(struct si_context *sctx)
#endif
template <amd_gfx_level GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG,
si_is_draw_vertex_state IS_DRAW_VERTEX_STATE, si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED> ALWAYS_INLINE
si_is_draw_vertex_state IS_DRAW_VERTEX_STATE, si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED,
si_alt_hiz_logic ALT_HIZ_LOGIC> ALWAYS_INLINE
static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw_info *info,
unsigned drawid_base,
const struct pipe_draw_indirect_info *indirect,
@ -1423,6 +1429,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit((sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit((sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(di_src_sel);
radeon_emit_alt_hiz_logic();
} else {
uint64_t count_va = 0;
@ -1448,6 +1456,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(count_va >> 32);
radeon_emit(indirect->stride);
radeon_emit(di_src_sel);
radeon_emit_alt_hiz_logic();
}
} else {
if (sctx->last_instance_count == SI_INSTANCE_COUNT_UNKNOWN ||
@ -1567,6 +1577,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(va >> 32);
radeon_emit(draws[i].count);
radeon_emit(V_0287F0_DI_SRC_SEL_DMA); /* NOT_EOP disabled */
radeon_emit_alt_hiz_logic();
}
if (num_draws > 1) {
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */
@ -1586,6 +1598,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(va >> 32);
radeon_emit(draws[i].count);
radeon_emit(V_0287F0_DI_SRC_SEL_DMA); /* NOT_EOP disabled */
radeon_emit_alt_hiz_logic();
}
if (num_draws > 1) {
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg + 1); /* DrawID */
@ -1606,6 +1620,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(va >> 32);
radeon_emit(draws[i].count);
radeon_emit(V_0287F0_DI_SRC_SEL_DMA); /* NOT_EOP disabled */
radeon_emit_alt_hiz_logic();
}
if (num_draws > 1) {
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */
@ -1633,6 +1649,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(V_0287F0_DI_SRC_SEL_DMA |
S_0287F0_NOT_EOP(GFX_VERSION >= GFX10 && GFX_VERSION < GFX12 &&
i < num_draws - 1));
radeon_emit_alt_hiz_logic();
}
}
}
@ -1646,6 +1664,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(0);
radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque);
radeon_emit_alt_hiz_logic();
for (unsigned i = 0; i < 3; i++)
radeon_event_write(V_028A90_SQ_NON_EVENT);
} else if (increment_draw_id) {
@ -1661,6 +1680,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit));
radeon_emit(draws[i].count);
radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque);
radeon_emit_alt_hiz_logic();
}
if (num_draws > 1 && (IS_DRAW_VERTEX_STATE || !sctx->num_vs_blit_sgprs)) {
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */
@ -1674,6 +1695,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit));
radeon_emit(draws[i].count);
radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque);
radeon_emit_alt_hiz_logic();
}
if (num_draws > 1 && (IS_DRAW_VERTEX_STATE || !sctx->num_vs_blit_sgprs)) {
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */
@ -2030,7 +2053,7 @@ static void si_emit_all_states(struct si_context *sctx, uint64_t skip_atom_mask)
template <amd_gfx_level GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG,
si_is_draw_vertex_state IS_DRAW_VERTEX_STATE, si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED,
util_popcnt POPCNT> ALWAYS_INLINE
util_popcnt POPCNT, si_alt_hiz_logic ALT_HIZ_LOGIC> ALWAYS_INLINE
static void si_draw(struct pipe_context *ctx,
const struct pipe_draw_info *info,
unsigned drawid_offset,
@ -2348,7 +2371,8 @@ static void si_draw(struct pipe_context *ctx,
return;
}
si_emit_draw_packets<GFX_VERSION, HAS_TESS, HAS_GS, NGG, IS_DRAW_VERTEX_STATE, HAS_SH_PAIRS_PACKED>
si_emit_draw_packets<GFX_VERSION, HAS_TESS, HAS_GS, NGG, IS_DRAW_VERTEX_STATE,
HAS_SH_PAIRS_PACKED, ALT_HIZ_LOGIC>
(sctx, info, drawid_offset, indirect, draws, num_draws, indexbuf,
index_size, index_offset, instance_count);
/* <-- CUs start to get busy here if we waited. */
@ -2400,7 +2424,7 @@ static void si_draw(struct pipe_context *ctx,
}
template <amd_gfx_level GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG,
si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED>
si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED, si_alt_hiz_logic ALT_HIZ_LOGIC>
static void si_draw_vbo(struct pipe_context *ctx,
const struct pipe_draw_info *info,
unsigned drawid_offset,
@ -2408,12 +2432,14 @@ static void si_draw_vbo(struct pipe_context *ctx,
const struct pipe_draw_start_count_bias *draws,
unsigned num_draws)
{
si_draw<GFX_VERSION, HAS_TESS, HAS_GS, NGG, DRAW_VERTEX_STATE_OFF, HAS_SH_PAIRS_PACKED, POPCNT_NO>
si_draw<GFX_VERSION, HAS_TESS, HAS_GS, NGG, DRAW_VERTEX_STATE_OFF, HAS_SH_PAIRS_PACKED,
POPCNT_NO, ALT_HIZ_LOGIC>
(ctx, info, drawid_offset, indirect, draws, num_draws, NULL, 0);
}
template <amd_gfx_level GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG,
si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED, util_popcnt POPCNT>
si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED, util_popcnt POPCNT,
si_alt_hiz_logic ALT_HIZ_LOGIC>
static void si_draw_vertex_state(struct pipe_context *ctx,
struct pipe_vertex_state *vstate,
uint32_t partial_velem_mask,
@ -2429,7 +2455,8 @@ static void si_draw_vertex_state(struct pipe_context *ctx,
dinfo.instance_count = 1;
dinfo.index.resource = state->b.input.indexbuf;
si_draw<GFX_VERSION, HAS_TESS, HAS_GS, NGG, DRAW_VERTEX_STATE_ON, HAS_SH_PAIRS_PACKED, POPCNT>
si_draw<GFX_VERSION, HAS_TESS, HAS_GS, NGG, DRAW_VERTEX_STATE_ON, HAS_SH_PAIRS_PACKED, POPCNT,
ALT_HIZ_LOGIC>
(ctx, &dinfo, 0, NULL, draws, num_draws, vstate, partial_velem_mask);
if (info.take_vertex_state_ownership)
@ -2491,18 +2518,28 @@ static void si_init_draw_vbo(struct si_context *sctx)
if (!NGG && GFX_VERSION >= GFX11)
return;
if (GFX_VERSION >= GFX11 && GFX_VERSION < GFX12 && sctx->screen->info.has_set_sh_pairs_packed) {
if (GFX_VERSION == GFX12 && sctx->screen->options.alt_hiz_logic) {
sctx->draw_vbo[HAS_TESS][HAS_GS][NGG] =
si_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_ON>;
si_draw_vbo<GFX12, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_OFF, ALT_HIZ_LOGIC_ON>;
sctx->draw_vertex_state[HAS_TESS][HAS_GS][NGG] =
si_draw_vertex_state<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_ON, POPCNT>;
si_draw_vertex_state<GFX12, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_OFF, POPCNT,
ALT_HIZ_LOGIC_ON>;
} else if (GFX_VERSION >= GFX11 && GFX_VERSION < GFX12 &&
sctx->screen->info.has_set_sh_pairs_packed) {
sctx->draw_vbo[HAS_TESS][HAS_GS][NGG] =
si_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_ON, ALT_HIZ_LOGIC_OFF>;
sctx->draw_vertex_state[HAS_TESS][HAS_GS][NGG] =
si_draw_vertex_state<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_ON, POPCNT,
ALT_HIZ_LOGIC_OFF>;
} else {
sctx->draw_vbo[HAS_TESS][HAS_GS][NGG] =
si_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_OFF>;
si_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_OFF, ALT_HIZ_LOGIC_OFF>;
sctx->draw_vertex_state[HAS_TESS][HAS_GS][NGG] =
si_draw_vertex_state<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_OFF, POPCNT>;
si_draw_vertex_state<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_OFF, POPCNT,
ALT_HIZ_LOGIC_OFF>;
}
}