diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h b/src/gallium/drivers/radeonsi/si_build_pm4.h index 044516a90ff..9d0506a5ccd 100644 --- a/src/gallium/drivers/radeonsi/si_build_pm4.h +++ b/src/gallium/drivers/radeonsi/si_build_pm4.h @@ -334,6 +334,16 @@ } \ } while (0) +#define radeon_opt_set_uconfig_reg_idx(sctx, gfx_level, offset, reg, idx, val) do { \ + unsigned __value = val; \ + if (((sctx->tracked_regs.other_reg_saved_mask >> (reg)) & 0x1) != 0x1 || \ + sctx->tracked_regs.other_reg_value[reg] != __value) { \ + radeon_set_uconfig_reg_idx((sctx)->screen, gfx_level, offset, idx, __value); \ + sctx->tracked_regs.other_reg_saved_mask |= 0x1ull << (reg); \ + sctx->tracked_regs.other_reg_value[reg] = __value; \ + } \ +} while (0) + #define radeon_set_privileged_config_reg(reg, value) do { \ assert((reg) < CIK_UCONFIG_REG_OFFSET); \ radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); \ diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 9ce16488647..75db3581283 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -283,6 +283,7 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx) ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_ESGS_RING_ITEMSIZE] = 0; ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_REUSE_OFF] = 0; + ctx->tracked_regs.context_reg_value[SI_TRACKED_IA_MULTI_VGT_PARAM] = 0xff; ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP] = 0; ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GS_ONCHIP_CNTL] = 0; @@ -545,7 +546,6 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) ctx->last_primitive_restart_en = ctx->gfx_level >= GFX11 ? false : -1; ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN; ctx->last_prim = -1; - ctx->last_multi_vgt_param = -1; ctx->last_vs_state = ~0; ctx->last_gs_state = ~0; ctx->last_ls = NULL; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 6d8f567baa9..f2d0b21b52b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1158,7 +1158,6 @@ struct si_context { int last_primitive_restart_en; unsigned last_restart_index; unsigned last_prim; - unsigned last_multi_vgt_param; unsigned current_vs_state; /* all VS bits including LS bits */ unsigned current_gs_state; /* only GS and NGG bits */ unsigned last_vs_state; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index b2af33747dc..271dbeadc89 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -293,6 +293,7 @@ enum si_tracked_context_reg /* The slots below can be reused by other generations. */ SI_TRACKED_VGT_ESGS_RING_ITEMSIZE, /* GFX6-8 (GFX9+ can reuse this slot) */ SI_TRACKED_VGT_REUSE_OFF, /* GFX6-8 (GFX9+ can reuse this slot) */ + SI_TRACKED_IA_MULTI_VGT_PARAM, /* GFX6-8 (GFX9+ can reuse this slot) */ SI_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP, /* GFX9-10 - the slots above can be reused */ SI_TRACKED_VGT_GS_ONCHIP_CNTL, /* GFX9-10 - the slots above can be reused */ @@ -328,6 +329,9 @@ enum si_tracked_other_reg { SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS, /* GFX10+ */ SI_TRACKED_VGT_GS_OUT_PRIM_TYPE_UCONFIG, /* GFX11+ */ + SI_TRACKED_IA_MULTI_VGT_PARAM_UCONFIG, /* GFX9 only */ + SI_TRACKED_GE_CNTL = SI_TRACKED_IA_MULTI_VGT_PARAM_UCONFIG, /* GFX10+ */ + SI_TRACKED_COMPUTE_RESOURCE_LIMITS, SI_TRACKED_COMPUTE_NUM_THREAD_X, SI_TRACKED_COMPUTE_NUM_THREAD_Y, diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 684249657f9..992cd6cbc17 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -1303,24 +1303,23 @@ static void si_emit_ia_multi_vgt_param(struct si_context *sctx, (sctx, indirect, prim, num_patches, instance_count, primitive_restart, min_vertex_count); - /* Draw state. */ - if (ia_multi_vgt_param != sctx->last_multi_vgt_param || - /* Workaround for SpecviewPerf13 Catia hang on GFX9. */ - (GFX_VERSION == GFX9 && prim != sctx->last_prim)) { - radeon_begin(cs); + radeon_begin(cs); + if (GFX_VERSION == GFX9) { + /* Workaround for SpecviewPerf13 Catia hang on GFX9. */ + if (prim != sctx->last_prim) + sctx->tracked_regs.other_reg_saved_mask &= ~BITFIELD64_BIT(SI_TRACKED_IA_MULTI_VGT_PARAM); - if (GFX_VERSION == GFX9) - radeon_set_uconfig_reg_idx(sctx->screen, GFX_VERSION, - R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param); - else if (GFX_VERSION >= GFX7) - radeon_set_context_reg_idx(R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); - else - radeon_set_context_reg(R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param); - - radeon_end(); - - sctx->last_multi_vgt_param = ia_multi_vgt_param; + radeon_opt_set_uconfig_reg_idx(sctx, GFX_VERSION, R_030960_IA_MULTI_VGT_PARAM, + SI_TRACKED_IA_MULTI_VGT_PARAM_UCONFIG, + 4, ia_multi_vgt_param); + } else if (GFX_VERSION >= GFX7) { + radeon_opt_set_context_reg_idx(sctx, R_028AA8_IA_MULTI_VGT_PARAM, + SI_TRACKED_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); + } else { + radeon_opt_set_context_reg(sctx, R_028AA8_IA_MULTI_VGT_PARAM, + SI_TRACKED_IA_MULTI_VGT_PARAM, ia_multi_vgt_param); } + radeon_end(); } /* GFX10 removed IA_MULTI_VGT_PARAM in exchange for GE_CNTL. @@ -1371,14 +1370,9 @@ static void gfx10_emit_ge_cntl(struct si_context *sctx, unsigned num_patches) * Since we don't use that, we don't have to do anything. */ - if (ge_cntl != sctx->last_multi_vgt_param) { - struct radeon_cmdbuf *cs = &sctx->gfx_cs; - - radeon_begin(cs); - radeon_set_uconfig_reg(R_03096C_GE_CNTL, ge_cntl); - radeon_end(); - sctx->last_multi_vgt_param = ge_cntl; - } + radeon_begin(&sctx->gfx_cs); + radeon_opt_set_uconfig_reg(sctx, R_03096C_GE_CNTL, SI_TRACKED_GE_CNTL, ge_cntl); + radeon_end(); } template