radeonsi/gfx12: adjust HiZ/HiS logic

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32653>
This commit is contained in:
Marek Olšák 2024-12-16 03:59:13 -05:00 committed by Marge Bot
parent e3cef02c24
commit cdecbee922
7 changed files with 110 additions and 27 deletions

View file

@ -555,6 +555,7 @@ static void handle_env_var_force_family(struct radeon_info *info)
get_radeon_info(info, &ac_fake_hw_db[i]); get_radeon_info(info, &ac_fake_hw_db[i]);
info->name = "NOOP"; info->name = "NOOP";
info->family_overridden = true; info->family_overridden = true;
info->chip_rev = 1;
return; return;
} }
} }

View file

@ -3035,7 +3035,7 @@ static bool gfx12_compute_hiz_his_info(struct ac_addrlib *addrlib, const struct
{ {
assert(surf_in->flags.depth != surf_in->flags.stencil); assert(surf_in->flags.depth != surf_in->flags.stencil);
if (surf->flags & RADEON_SURF_NO_HTILE || (info->gfx_level == GFX12 && info->chip_rev <= 1)) if (surf->flags & RADEON_SURF_NO_HTILE || (info->gfx_level == GFX12 && info->chip_rev == 0))
return true; return true;
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
@ -3109,7 +3109,11 @@ static bool gfx12_compute_miptree(struct ac_addrlib *addrlib, const struct radeo
surf->surf_alignment_log2 = MAX2(surf->surf_alignment_log2, util_logbase2(out.baseAlign)); surf->surf_alignment_log2 = MAX2(surf->surf_alignment_log2, util_logbase2(out.baseAlign));
surf->surf_size = surf->u.gfx9.zs.stencil_offset + out.surfSize; surf->surf_size = surf->u.gfx9.zs.stencil_offset + out.surfSize;
return gfx12_compute_hiz_his_info(addrlib, info, surf, &surf->u.gfx9.zs.his, in); if (info->chip_rev >= 2 &&
!gfx12_compute_hiz_his_info(addrlib, info, surf, &surf->u.gfx9.zs.his, in))
return false;
return true;
} }
surf->u.gfx9.surf_slice_size = out.sliceSize; surf->u.gfx9.surf_slice_size = out.sliceSize;

View file

@ -514,6 +514,20 @@
__event_type == V_028A90_PIXEL_PIPE_STAT_CONTROL ? 1 : 0)); \ __event_type == V_028A90_PIXEL_PIPE_STAT_CONTROL ? 1 : 0)); \
} while (0) } while (0)
#define radeon_emit_alt_hiz_logic() do { \
static_assert(GFX_VERSION == GFX12 || !ALT_HIZ_LOGIC, ""); \
if (GFX_VERSION == GFX12 && ALT_HIZ_LOGIC) { \
radeon_emit(PKT3(PKT3_RELEASE_MEM, 6, 0)); \
radeon_emit(S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_490_EVENT_INDEX(5)); \
radeon_emit(0); /* DST_SEL, INT_SEL = no write confirm, DATA_SEL = no data */ \
radeon_emit(0); /* ADDRESS_LO */ \
radeon_emit(0); /* ADDRESS_HI */ \
radeon_emit(0); /* DATA_LO */ \
radeon_emit(0); /* DATA_HI */ \
radeon_emit(0); /* INT_CTXID */ \
} \
} while (0)
/* This should be evaluated at compile time if all parameters are constants. */ /* This should be evaluated at compile time if all parameters are constants. */
static ALWAYS_INLINE unsigned static ALWAYS_INLINE unsigned
si_get_user_data_base(enum amd_gfx_level gfx_level, enum si_has_tess has_tess, si_get_user_data_base(enum amd_gfx_level gfx_level, enum si_has_tess has_tess,

View file

@ -22,6 +22,7 @@ OPT_BOOL(dcc_msaa, true, "Enable DCC for MSAA for GFX10-10.3")
OPT_BOOL(zerovram, false, "Zero all VRAM allocations") OPT_BOOL(zerovram, false, "Zero all VRAM allocations")
OPT_BOOL(clear_lds, false, "Clear LDS at the end of shaders. Might decrease performance.") OPT_BOOL(clear_lds, false, "Clear LDS at the end of shaders. Might decrease performance.")
OPT_BOOL(cache_rb_gl2, false, "Enable GL2 caching for CB and DB.") OPT_BOOL(cache_rb_gl2, false, "Enable GL2 caching for CB and DB.")
OPT_BOOL(alt_hiz_logic, false, "Enable alternative HiZ logic")
#undef OPT_BOOL #undef OPT_BOOL
#undef OPT_INT #undef OPT_INT

View file

@ -382,6 +382,7 @@ struct si_texture {
bool can_sample_z : 1; bool can_sample_z : 1;
bool can_sample_s : 1; bool can_sample_s : 1;
bool need_flush_after_depth_decompression: 1; bool need_flush_after_depth_decompression: 1;
bool force_disable_hiz_his : 1;
/* We need to track DCC dirtiness, because st/dri usually calls /* We need to track DCC dirtiness, because st/dri usually calls
* flush_resource twice per frame (not a bug) and we don't wanna * flush_resource twice per frame (not a bug) and we don't wanna

View file

@ -1694,6 +1694,20 @@ static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref); si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref);
} }
struct pipe_surface *zssurf = sctx->framebuffer.state.zsbuf;
struct si_texture *zstex = (struct si_texture*)(zssurf ? zssurf->texture : NULL);
if (sctx->gfx_level == GFX12 && !sctx->screen->options.alt_hiz_logic &&
sctx->framebuffer.has_stencil && dsa->stencil_enabled && !zstex->force_disable_hiz_his) {
zstex->force_disable_hiz_his = true;
si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
if (sctx->framebuffer.has_hiz_his) {
sctx->framebuffer.has_hiz_his = false;
si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
}
}
if (old_dsa->alpha_func != dsa->alpha_func) { if (old_dsa->alpha_func != dsa->alpha_func) {
si_ps_key_update_dsa(sctx); si_ps_key_update_dsa(sctx);
si_update_ps_inputs_read_or_disabled(sctx); si_update_ps_inputs_read_or_disabled(sctx);
@ -2690,9 +2704,14 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
if (util_format_has_stencil(util_format_description(zstex->buffer.b.b.format))) if (util_format_has_stencil(util_format_description(zstex->buffer.b.b.format)))
sctx->framebuffer.has_stencil = true; sctx->framebuffer.has_stencil = true;
if (sctx->gfx_level == GFX12 && !sctx->screen->options.alt_hiz_logic &&
sctx->framebuffer.has_stencil && sctx->queued.named.dsa->stencil_enabled)
zstex->force_disable_hiz_his = true;
if (sctx->gfx_level >= GFX12) { if (sctx->gfx_level >= GFX12) {
sctx->framebuffer.has_hiz_his = zstex->surface.u.gfx9.zs.hiz.offset || sctx->framebuffer.has_hiz_his = (zstex->surface.u.gfx9.zs.hiz.offset ||
zstex->surface.u.gfx9.zs.his.offset; zstex->surface.u.gfx9.zs.his.offset) &&
!zstex->force_disable_hiz_his;
} }
} }
@ -3319,18 +3338,24 @@ static void gfx12_emit_framebuffer_state(struct si_context *sctx, unsigned index
gfx12_set_context_reg(R_028034_DB_STENCIL_READ_BASE_HI, zb->ds.db_stencil_base >> 32); gfx12_set_context_reg(R_028034_DB_STENCIL_READ_BASE_HI, zb->ds.db_stencil_base >> 32);
gfx12_set_context_reg(R_028038_DB_STENCIL_WRITE_BASE, zb->ds.db_stencil_base); gfx12_set_context_reg(R_028038_DB_STENCIL_WRITE_BASE, zb->ds.db_stencil_base);
gfx12_set_context_reg(R_02803C_DB_STENCIL_WRITE_BASE_HI, zb->ds.db_stencil_base >> 32); gfx12_set_context_reg(R_02803C_DB_STENCIL_WRITE_BASE_HI, zb->ds.db_stencil_base >> 32);
gfx12_set_context_reg(R_028B94_PA_SC_HIZ_INFO, zb->ds.u.gfx12.hiz_info);
gfx12_set_context_reg(R_028B98_PA_SC_HIS_INFO, zb->ds.u.gfx12.his_info);
if (zb->ds.u.gfx12.hiz_info) { if (tex->force_disable_hiz_his) {
gfx12_set_context_reg(R_028B9C_PA_SC_HIZ_BASE, zb->ds.u.gfx12.hiz_base); gfx12_set_context_reg(R_028B94_PA_SC_HIZ_INFO, S_028B94_SURFACE_ENABLE(0));
gfx12_set_context_reg(R_028BA0_PA_SC_HIZ_BASE_EXT, zb->ds.u.gfx12.hiz_base >> 32); gfx12_set_context_reg(R_028B98_PA_SC_HIS_INFO, S_028B98_SURFACE_ENABLE(0));
gfx12_set_context_reg(R_028BA4_PA_SC_HIZ_SIZE_XY, zb->ds.u.gfx12.hiz_size_xy); } else {
} gfx12_set_context_reg(R_028B94_PA_SC_HIZ_INFO, zb->ds.u.gfx12.hiz_info);
if (zb->ds.u.gfx12.his_info) { gfx12_set_context_reg(R_028B98_PA_SC_HIS_INFO, zb->ds.u.gfx12.his_info);
gfx12_set_context_reg(R_028BA8_PA_SC_HIS_BASE, zb->ds.u.gfx12.his_base);
gfx12_set_context_reg(R_028BAC_PA_SC_HIS_BASE_EXT, zb->ds.u.gfx12.his_base >> 32); if (zb->ds.u.gfx12.hiz_info) {
gfx12_set_context_reg(R_028BB0_PA_SC_HIS_SIZE_XY, zb->ds.u.gfx12.his_size_xy); gfx12_set_context_reg(R_028B9C_PA_SC_HIZ_BASE, zb->ds.u.gfx12.hiz_base);
gfx12_set_context_reg(R_028BA0_PA_SC_HIZ_BASE_EXT, zb->ds.u.gfx12.hiz_base >> 32);
gfx12_set_context_reg(R_028BA4_PA_SC_HIZ_SIZE_XY, zb->ds.u.gfx12.hiz_size_xy);
}
if (zb->ds.u.gfx12.his_info) {
gfx12_set_context_reg(R_028BA8_PA_SC_HIS_BASE, zb->ds.u.gfx12.his_base);
gfx12_set_context_reg(R_028BAC_PA_SC_HIS_BASE_EXT, zb->ds.u.gfx12.his_base >> 32);
gfx12_set_context_reg(R_028BB0_PA_SC_HIS_SIZE_XY, zb->ds.u.gfx12.his_size_xy);
}
} }
} else if (sctx->framebuffer.dirty_zsbuf) { } else if (sctx->framebuffer.dirty_zsbuf) {
gfx12_set_context_reg(R_028018_DB_Z_INFO, gfx12_set_context_reg(R_028018_DB_Z_INFO,

View file

@ -837,6 +837,11 @@ enum si_has_sh_pairs_packed {
HAS_SH_PAIRS_PACKED_ON, HAS_SH_PAIRS_PACKED_ON,
}; };
enum si_alt_hiz_logic {
ALT_HIZ_LOGIC_OFF,
ALT_HIZ_LOGIC_ON,
};
template <si_is_draw_vertex_state IS_DRAW_VERTEX_STATE> ALWAYS_INLINE template <si_is_draw_vertex_state IS_DRAW_VERTEX_STATE> ALWAYS_INLINE
static bool num_instanced_prims_less_than(const struct pipe_draw_indirect_info *indirect, static bool num_instanced_prims_less_than(const struct pipe_draw_indirect_info *indirect,
enum mesa_prim prim, enum mesa_prim prim,
@ -1231,7 +1236,8 @@ void si_emit_buffered_compute_sh_regs(struct si_context *sctx)
#endif #endif
template <amd_gfx_level GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG, template <amd_gfx_level GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG,
si_is_draw_vertex_state IS_DRAW_VERTEX_STATE, si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED> ALWAYS_INLINE si_is_draw_vertex_state IS_DRAW_VERTEX_STATE, si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED,
si_alt_hiz_logic ALT_HIZ_LOGIC> ALWAYS_INLINE
static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw_info *info, static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw_info *info,
unsigned drawid_base, unsigned drawid_base,
const struct pipe_draw_indirect_info *indirect, const struct pipe_draw_indirect_info *indirect,
@ -1423,6 +1429,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit((sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2); radeon_emit((sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit((sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2); radeon_emit((sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(di_src_sel); radeon_emit(di_src_sel);
radeon_emit_alt_hiz_logic();
} else { } else {
uint64_t count_va = 0; uint64_t count_va = 0;
@ -1448,6 +1456,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(count_va >> 32); radeon_emit(count_va >> 32);
radeon_emit(indirect->stride); radeon_emit(indirect->stride);
radeon_emit(di_src_sel); radeon_emit(di_src_sel);
radeon_emit_alt_hiz_logic();
} }
} else { } else {
if (sctx->last_instance_count == SI_INSTANCE_COUNT_UNKNOWN || if (sctx->last_instance_count == SI_INSTANCE_COUNT_UNKNOWN ||
@ -1567,6 +1577,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(va >> 32); radeon_emit(va >> 32);
radeon_emit(draws[i].count); radeon_emit(draws[i].count);
radeon_emit(V_0287F0_DI_SRC_SEL_DMA); /* NOT_EOP disabled */ radeon_emit(V_0287F0_DI_SRC_SEL_DMA); /* NOT_EOP disabled */
radeon_emit_alt_hiz_logic();
} }
if (num_draws > 1) { if (num_draws > 1) {
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */ BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */
@ -1586,6 +1598,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(va >> 32); radeon_emit(va >> 32);
radeon_emit(draws[i].count); radeon_emit(draws[i].count);
radeon_emit(V_0287F0_DI_SRC_SEL_DMA); /* NOT_EOP disabled */ radeon_emit(V_0287F0_DI_SRC_SEL_DMA); /* NOT_EOP disabled */
radeon_emit_alt_hiz_logic();
} }
if (num_draws > 1) { if (num_draws > 1) {
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg + 1); /* DrawID */ BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg + 1); /* DrawID */
@ -1606,6 +1620,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(va >> 32); radeon_emit(va >> 32);
radeon_emit(draws[i].count); radeon_emit(draws[i].count);
radeon_emit(V_0287F0_DI_SRC_SEL_DMA); /* NOT_EOP disabled */ radeon_emit(V_0287F0_DI_SRC_SEL_DMA); /* NOT_EOP disabled */
radeon_emit_alt_hiz_logic();
} }
if (num_draws > 1) { if (num_draws > 1) {
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */ BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */
@ -1633,6 +1649,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(V_0287F0_DI_SRC_SEL_DMA | radeon_emit(V_0287F0_DI_SRC_SEL_DMA |
S_0287F0_NOT_EOP(GFX_VERSION >= GFX10 && GFX_VERSION < GFX12 && S_0287F0_NOT_EOP(GFX_VERSION >= GFX10 && GFX_VERSION < GFX12 &&
i < num_draws - 1)); i < num_draws - 1));
radeon_emit_alt_hiz_logic();
} }
} }
} }
@ -1646,6 +1664,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(0); radeon_emit(0);
radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque); radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque);
radeon_emit_alt_hiz_logic();
for (unsigned i = 0; i < 3; i++) for (unsigned i = 0; i < 3; i++)
radeon_event_write(V_028A90_SQ_NON_EVENT); radeon_event_write(V_028A90_SQ_NON_EVENT);
} else if (increment_draw_id) { } else if (increment_draw_id) {
@ -1661,6 +1680,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit)); radeon_emit(PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit));
radeon_emit(draws[i].count); radeon_emit(draws[i].count);
radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque); radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque);
radeon_emit_alt_hiz_logic();
} }
if (num_draws > 1 && (IS_DRAW_VERTEX_STATE || !sctx->num_vs_blit_sgprs)) { if (num_draws > 1 && (IS_DRAW_VERTEX_STATE || !sctx->num_vs_blit_sgprs)) {
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */ BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */
@ -1674,6 +1695,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit)); radeon_emit(PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit));
radeon_emit(draws[i].count); radeon_emit(draws[i].count);
radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque); radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque);
radeon_emit_alt_hiz_logic();
} }
if (num_draws > 1 && (IS_DRAW_VERTEX_STATE || !sctx->num_vs_blit_sgprs)) { if (num_draws > 1 && (IS_DRAW_VERTEX_STATE || !sctx->num_vs_blit_sgprs)) {
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */ BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */
@ -2030,7 +2053,7 @@ static void si_emit_all_states(struct si_context *sctx, uint64_t skip_atom_mask)
template <amd_gfx_level GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG, template <amd_gfx_level GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG,
si_is_draw_vertex_state IS_DRAW_VERTEX_STATE, si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED, si_is_draw_vertex_state IS_DRAW_VERTEX_STATE, si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED,
util_popcnt POPCNT> ALWAYS_INLINE util_popcnt POPCNT, si_alt_hiz_logic ALT_HIZ_LOGIC> ALWAYS_INLINE
static void si_draw(struct pipe_context *ctx, static void si_draw(struct pipe_context *ctx,
const struct pipe_draw_info *info, const struct pipe_draw_info *info,
unsigned drawid_offset, unsigned drawid_offset,
@ -2348,7 +2371,8 @@ static void si_draw(struct pipe_context *ctx,
return; return;
} }
si_emit_draw_packets<GFX_VERSION, HAS_TESS, HAS_GS, NGG, IS_DRAW_VERTEX_STATE, HAS_SH_PAIRS_PACKED> si_emit_draw_packets<GFX_VERSION, HAS_TESS, HAS_GS, NGG, IS_DRAW_VERTEX_STATE,
HAS_SH_PAIRS_PACKED, ALT_HIZ_LOGIC>
(sctx, info, drawid_offset, indirect, draws, num_draws, indexbuf, (sctx, info, drawid_offset, indirect, draws, num_draws, indexbuf,
index_size, index_offset, instance_count); index_size, index_offset, instance_count);
/* <-- CUs start to get busy here if we waited. */ /* <-- CUs start to get busy here if we waited. */
@ -2400,7 +2424,7 @@ static void si_draw(struct pipe_context *ctx,
} }
template <amd_gfx_level GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG, template <amd_gfx_level GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG,
si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED> si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED, si_alt_hiz_logic ALT_HIZ_LOGIC>
static void si_draw_vbo(struct pipe_context *ctx, static void si_draw_vbo(struct pipe_context *ctx,
const struct pipe_draw_info *info, const struct pipe_draw_info *info,
unsigned drawid_offset, unsigned drawid_offset,
@ -2408,12 +2432,14 @@ static void si_draw_vbo(struct pipe_context *ctx,
const struct pipe_draw_start_count_bias *draws, const struct pipe_draw_start_count_bias *draws,
unsigned num_draws) unsigned num_draws)
{ {
si_draw<GFX_VERSION, HAS_TESS, HAS_GS, NGG, DRAW_VERTEX_STATE_OFF, HAS_SH_PAIRS_PACKED, POPCNT_NO> si_draw<GFX_VERSION, HAS_TESS, HAS_GS, NGG, DRAW_VERTEX_STATE_OFF, HAS_SH_PAIRS_PACKED,
POPCNT_NO, ALT_HIZ_LOGIC>
(ctx, info, drawid_offset, indirect, draws, num_draws, NULL, 0); (ctx, info, drawid_offset, indirect, draws, num_draws, NULL, 0);
} }
template <amd_gfx_level GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG, template <amd_gfx_level GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG,
si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED, util_popcnt POPCNT> si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED, util_popcnt POPCNT,
si_alt_hiz_logic ALT_HIZ_LOGIC>
static void si_draw_vertex_state(struct pipe_context *ctx, static void si_draw_vertex_state(struct pipe_context *ctx,
struct pipe_vertex_state *vstate, struct pipe_vertex_state *vstate,
uint32_t partial_velem_mask, uint32_t partial_velem_mask,
@ -2429,7 +2455,8 @@ static void si_draw_vertex_state(struct pipe_context *ctx,
dinfo.instance_count = 1; dinfo.instance_count = 1;
dinfo.index.resource = state->b.input.indexbuf; dinfo.index.resource = state->b.input.indexbuf;
si_draw<GFX_VERSION, HAS_TESS, HAS_GS, NGG, DRAW_VERTEX_STATE_ON, HAS_SH_PAIRS_PACKED, POPCNT> si_draw<GFX_VERSION, HAS_TESS, HAS_GS, NGG, DRAW_VERTEX_STATE_ON, HAS_SH_PAIRS_PACKED, POPCNT,
ALT_HIZ_LOGIC>
(ctx, &dinfo, 0, NULL, draws, num_draws, vstate, partial_velem_mask); (ctx, &dinfo, 0, NULL, draws, num_draws, vstate, partial_velem_mask);
if (info.take_vertex_state_ownership) if (info.take_vertex_state_ownership)
@ -2491,18 +2518,28 @@ static void si_init_draw_vbo(struct si_context *sctx)
if (!NGG && GFX_VERSION >= GFX11) if (!NGG && GFX_VERSION >= GFX11)
return; return;
if (GFX_VERSION >= GFX11 && GFX_VERSION < GFX12 && sctx->screen->info.has_set_sh_pairs_packed) { if (GFX_VERSION == GFX12 && sctx->screen->options.alt_hiz_logic) {
sctx->draw_vbo[HAS_TESS][HAS_GS][NGG] = sctx->draw_vbo[HAS_TESS][HAS_GS][NGG] =
si_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_ON>; si_draw_vbo<GFX12, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_OFF, ALT_HIZ_LOGIC_ON>;
sctx->draw_vertex_state[HAS_TESS][HAS_GS][NGG] = sctx->draw_vertex_state[HAS_TESS][HAS_GS][NGG] =
si_draw_vertex_state<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_ON, POPCNT>; si_draw_vertex_state<GFX12, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_OFF, POPCNT,
ALT_HIZ_LOGIC_ON>;
} else if (GFX_VERSION >= GFX11 && GFX_VERSION < GFX12 &&
sctx->screen->info.has_set_sh_pairs_packed) {
sctx->draw_vbo[HAS_TESS][HAS_GS][NGG] =
si_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_ON, ALT_HIZ_LOGIC_OFF>;
sctx->draw_vertex_state[HAS_TESS][HAS_GS][NGG] =
si_draw_vertex_state<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_ON, POPCNT,
ALT_HIZ_LOGIC_OFF>;
} else { } else {
sctx->draw_vbo[HAS_TESS][HAS_GS][NGG] = sctx->draw_vbo[HAS_TESS][HAS_GS][NGG] =
si_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_OFF>; si_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_OFF, ALT_HIZ_LOGIC_OFF>;
sctx->draw_vertex_state[HAS_TESS][HAS_GS][NGG] = sctx->draw_vertex_state[HAS_TESS][HAS_GS][NGG] =
si_draw_vertex_state<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_OFF, POPCNT>; si_draw_vertex_state<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_SH_PAIRS_PACKED_OFF, POPCNT,
ALT_HIZ_LOGIC_OFF>;
} }
} }