radeonsi: handle GE_CNTL and IA_MULTI_VGT_PARAM as a tracked register

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23687>
This commit is contained in:
Marek Olšák 2023-06-06 13:20:14 -04:00 committed by Marge Bot
parent 12b123fdb7
commit 283be8ac3b
5 changed files with 33 additions and 26 deletions

View file

@ -334,6 +334,16 @@
} \
} while (0)
#define radeon_opt_set_uconfig_reg_idx(sctx, gfx_level, offset, reg, idx, val) do { \
unsigned __value = val; \
if (((sctx->tracked_regs.other_reg_saved_mask >> (reg)) & 0x1) != 0x1 || \
sctx->tracked_regs.other_reg_value[reg] != __value) { \
radeon_set_uconfig_reg_idx((sctx)->screen, gfx_level, offset, idx, __value); \
sctx->tracked_regs.other_reg_saved_mask |= 0x1ull << (reg); \
sctx->tracked_regs.other_reg_value[reg] = __value; \
} \
} while (0)
#define radeon_set_privileged_config_reg(reg, value) do { \
assert((reg) < CIK_UCONFIG_REG_OFFSET); \
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); \

View file

@ -283,6 +283,7 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx)
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_ESGS_RING_ITEMSIZE] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_REUSE_OFF] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_IA_MULTI_VGT_PARAM] = 0xff;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GS_ONCHIP_CNTL] = 0;
@ -545,7 +546,6 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
ctx->last_primitive_restart_en = ctx->gfx_level >= GFX11 ? false : -1;
ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN;
ctx->last_prim = -1;
ctx->last_multi_vgt_param = -1;
ctx->last_vs_state = ~0;
ctx->last_gs_state = ~0;
ctx->last_ls = NULL;

View file

@ -1158,7 +1158,6 @@ struct si_context {
int last_primitive_restart_en;
unsigned last_restart_index;
unsigned last_prim;
unsigned last_multi_vgt_param;
unsigned current_vs_state; /* all VS bits including LS bits */
unsigned current_gs_state; /* only GS and NGG bits */
unsigned last_vs_state;

View file

@ -293,6 +293,7 @@ enum si_tracked_context_reg
/* The slots below can be reused by other generations. */
SI_TRACKED_VGT_ESGS_RING_ITEMSIZE, /* GFX6-8 (GFX9+ can reuse this slot) */
SI_TRACKED_VGT_REUSE_OFF, /* GFX6-8 (GFX9+ can reuse this slot) */
SI_TRACKED_IA_MULTI_VGT_PARAM, /* GFX6-8 (GFX9+ can reuse this slot) */
SI_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP, /* GFX9-10 - the slots above can be reused */
SI_TRACKED_VGT_GS_ONCHIP_CNTL, /* GFX9-10 - the slots above can be reused */
@ -328,6 +329,9 @@ enum si_tracked_other_reg {
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS, /* GFX10+ */
SI_TRACKED_VGT_GS_OUT_PRIM_TYPE_UCONFIG, /* GFX11+ */
SI_TRACKED_IA_MULTI_VGT_PARAM_UCONFIG, /* GFX9 only */
SI_TRACKED_GE_CNTL = SI_TRACKED_IA_MULTI_VGT_PARAM_UCONFIG, /* GFX10+ */
SI_TRACKED_COMPUTE_RESOURCE_LIMITS,
SI_TRACKED_COMPUTE_NUM_THREAD_X,
SI_TRACKED_COMPUTE_NUM_THREAD_Y,

View file

@ -1303,24 +1303,23 @@ static void si_emit_ia_multi_vgt_param(struct si_context *sctx,
(sctx, indirect, prim, num_patches, instance_count, primitive_restart,
min_vertex_count);
/* Draw state. */
if (ia_multi_vgt_param != sctx->last_multi_vgt_param ||
/* Workaround for SpecviewPerf13 Catia hang on GFX9. */
(GFX_VERSION == GFX9 && prim != sctx->last_prim)) {
radeon_begin(cs);
radeon_begin(cs);
if (GFX_VERSION == GFX9) {
/* Workaround for SpecviewPerf13 Catia hang on GFX9. */
if (prim != sctx->last_prim)
sctx->tracked_regs.other_reg_saved_mask &= ~BITFIELD64_BIT(SI_TRACKED_IA_MULTI_VGT_PARAM);
if (GFX_VERSION == GFX9)
radeon_set_uconfig_reg_idx(sctx->screen, GFX_VERSION,
R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param);
else if (GFX_VERSION >= GFX7)
radeon_set_context_reg_idx(R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
else
radeon_set_context_reg(R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
radeon_end();
sctx->last_multi_vgt_param = ia_multi_vgt_param;
radeon_opt_set_uconfig_reg_idx(sctx, GFX_VERSION, R_030960_IA_MULTI_VGT_PARAM,
SI_TRACKED_IA_MULTI_VGT_PARAM_UCONFIG,
4, ia_multi_vgt_param);
} else if (GFX_VERSION >= GFX7) {
radeon_opt_set_context_reg_idx(sctx, R_028AA8_IA_MULTI_VGT_PARAM,
SI_TRACKED_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
} else {
radeon_opt_set_context_reg(sctx, R_028AA8_IA_MULTI_VGT_PARAM,
SI_TRACKED_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
}
radeon_end();
}
/* GFX10 removed IA_MULTI_VGT_PARAM in exchange for GE_CNTL.
@ -1371,14 +1370,9 @@ static void gfx10_emit_ge_cntl(struct si_context *sctx, unsigned num_patches)
* Since we don't use that, we don't have to do anything.
*/
if (ge_cntl != sctx->last_multi_vgt_param) {
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
radeon_begin(cs);
radeon_set_uconfig_reg(R_03096C_GE_CNTL, ge_cntl);
radeon_end();
sctx->last_multi_vgt_param = ge_cntl;
}
radeon_begin(&sctx->gfx_cs);
radeon_opt_set_uconfig_reg(sctx, R_03096C_GE_CNTL, SI_TRACKED_GE_CNTL, ge_cntl);
radeon_end();
}
template <amd_gfx_level GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG,