ac,radeonsi: move si_tracked_reg to common code

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38740>
This commit is contained in:
Samuel Pitoiset 2025-12-04 13:00:27 +01:00 committed by Marge Bot
parent 6bda88bfdb
commit 18bdb76408
11 changed files with 526 additions and 524 deletions

View file

@ -56,6 +56,180 @@ struct ac_buffered_sh_regs {
}; };
}; };
/* Tracked registers. */
enum ac_tracked_reg
{
/* CONTEXT registers. */
/* 2 consecutive registers (GFX6-11), or separate registers (GFX12) */
AC_TRACKED_DB_RENDER_CONTROL,
AC_TRACKED_DB_COUNT_CONTROL,
AC_TRACKED_DB_DEPTH_CONTROL,
AC_TRACKED_DB_STENCIL_CONTROL,
/* 2 consecutive registers */
AC_TRACKED_DB_DEPTH_BOUNDS_MIN,
AC_TRACKED_DB_DEPTH_BOUNDS_MAX,
AC_TRACKED_SPI_INTERP_CONTROL_0,
AC_TRACKED_PA_SU_POINT_SIZE,
AC_TRACKED_PA_SU_POINT_MINMAX,
AC_TRACKED_PA_SU_LINE_CNTL,
AC_TRACKED_PA_SC_MODE_CNTL_0,
AC_TRACKED_PA_SU_SC_MODE_CNTL,
AC_TRACKED_PA_SC_EDGERULE,
/* 6 consecutive registers */
AC_TRACKED_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
AC_TRACKED_PA_SU_POLY_OFFSET_CLAMP,
AC_TRACKED_PA_SU_POLY_OFFSET_FRONT_SCALE,
AC_TRACKED_PA_SU_POLY_OFFSET_FRONT_OFFSET,
AC_TRACKED_PA_SU_POLY_OFFSET_BACK_SCALE,
AC_TRACKED_PA_SU_POLY_OFFSET_BACK_OFFSET,
/* 2 consecutive registers */
AC_TRACKED_PA_SC_LINE_CNTL,
AC_TRACKED_PA_SC_AA_CONFIG,
/* 5 consecutive registers (GFX6-11) */
AC_TRACKED_PA_SU_VTX_CNTL,
/* 4 consecutive registers (GFX12) */
AC_TRACKED_PA_CL_GB_VERT_CLIP_ADJ,
AC_TRACKED_PA_CL_GB_VERT_DISC_ADJ,
AC_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ,
AC_TRACKED_PA_CL_GB_HORZ_DISC_ADJ,
/* Non-consecutive register */
AC_TRACKED_SPI_SHADER_POS_FORMAT,
/* 5 consecutive registers (GFX12), or 2 consecutive registers (GFX6-11) */
AC_TRACKED_SPI_SHADER_Z_FORMAT,
AC_TRACKED_SPI_SHADER_COL_FORMAT,
/* 2 consecutive registers. */
AC_TRACKED_SPI_PS_INPUT_ENA,
AC_TRACKED_SPI_PS_INPUT_ADDR,
AC_TRACKED_DB_EQAA,
AC_TRACKED_DB_RENDER_OVERRIDE2,
AC_TRACKED_DB_SHADER_CONTROL,
AC_TRACKED_CB_SHADER_MASK,
AC_TRACKED_CB_TARGET_MASK,
AC_TRACKED_PA_CL_CLIP_CNTL,
AC_TRACKED_PA_CL_VS_OUT_CNTL,
AC_TRACKED_PA_CL_VTE_CNTL,
AC_TRACKED_PA_SC_CLIPRECT_RULE,
AC_TRACKED_PA_SC_LINE_STIPPLE,
AC_TRACKED_PA_SC_MODE_CNTL_1,
AC_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
AC_TRACKED_SPI_PS_IN_CONTROL,
AC_TRACKED_VGT_GS_INSTANCE_CNT,
AC_TRACKED_VGT_GS_MAX_VERT_OUT,
AC_TRACKED_VGT_SHADER_STAGES_EN,
AC_TRACKED_VGT_LS_HS_CONFIG,
AC_TRACKED_VGT_TF_PARAM,
AC_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL, /* GFX8-9 (only with has_small_prim_filter_sample_loc_bug) */
AC_TRACKED_PA_SC_BINNER_CNTL_0, /* GFX9+ */
AC_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP, /* GFX10+ - the SMALL_PRIM_FILTER slot above can be reused */
AC_TRACKED_GE_NGG_SUBGRP_CNTL, /* GFX10+ */
AC_TRACKED_PA_CL_NGG_CNTL, /* GFX10+ */
AC_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, /* GFX10.3+ */
/* 3 consecutive registers */
AC_TRACKED_SX_PS_DOWNCONVERT, /* GFX8+ */
AC_TRACKED_SX_BLEND_OPT_EPSILON, /* GFX8+ */
AC_TRACKED_SX_BLEND_OPT_CONTROL, /* GFX8+ */
/* The slots below can be reused by other generations. */
AC_TRACKED_VGT_ESGS_RING_ITEMSIZE, /* GFX6-8 (GFX9+ can reuse this slot) */
AC_TRACKED_VGT_REUSE_OFF, /* GFX6-8,10.3 */
AC_TRACKED_IA_MULTI_VGT_PARAM, /* GFX6-8 (GFX9+ can reuse this slot) */
AC_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP, /* GFX9 - the slots above can be reused */
AC_TRACKED_VGT_GS_ONCHIP_CNTL, /* GFX9-10 - the slots above can be reused */
AC_TRACKED_VGT_GSVS_RING_ITEMSIZE, /* GFX6-10 (GFX11+ can reuse this slot) */
AC_TRACKED_VGT_GS_MODE, /* GFX6-10 (GFX11+ can reuse this slot) */
AC_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, /* GFX6-10 (GFX11+ can reuse this slot) */
AC_TRACKED_VGT_GS_OUT_PRIM_TYPE, /* GFX6-10 (GFX11+ can reuse this slot) */
/* 3 consecutive registers */
AC_TRACKED_VGT_GSVS_RING_OFFSET_1, /* GFX6-10 (GFX11+ can reuse this slot) */
AC_TRACKED_VGT_GSVS_RING_OFFSET_2, /* GFX6-10 (GFX11+ can reuse this slot) */
AC_TRACKED_VGT_GSVS_RING_OFFSET_3, /* GFX6-10 (GFX11+ can reuse this slot) */
/* 4 consecutive registers */
AC_TRACKED_VGT_GS_VERT_ITEMSIZE, /* GFX6-10 (GFX11+ can reuse this slot) */
AC_TRACKED_VGT_GS_VERT_ITEMSIZE_1, /* GFX6-10 (GFX11+ can reuse this slot) */
AC_TRACKED_VGT_GS_VERT_ITEMSIZE_2, /* GFX6-10 (GFX11+ can reuse this slot) */
AC_TRACKED_VGT_GS_VERT_ITEMSIZE_3, /* GFX6-10 (GFX11+ can reuse this slot) */
AC_TRACKED_SPI_VS_OUT_CONFIG, /* GFX6-11 */
AC_TRACKED_DB_RENDER_OVERRIDE = AC_TRACKED_SPI_VS_OUT_CONFIG, /* GFX12+ (slot reused) */
AC_TRACKED_VGT_PRIMITIVEID_EN, /* GFX6-11 */
AC_TRACKED_CB_DCC_CONTROL, /* GFX8-11 */
AC_TRACKED_DB_STENCIL_READ_MASK, /* GFX12+ */
AC_TRACKED_DB_STENCIL_WRITE_MASK, /* GFX12+ */
AC_TRACKED_PA_SC_HISZ_CONTROL, /* GFX12+ */
AC_TRACKED_PA_SC_LINE_STIPPLE_RESET, /* GFX12+ */
AC_NUM_TRACKED_CONTEXT_REGS,
AC_FIRST_TRACKED_OTHER_REG = AC_NUM_TRACKED_CONTEXT_REGS,
/* SH and UCONFIG registers. */
AC_TRACKED_GE_PC_ALLOC = AC_FIRST_TRACKED_OTHER_REG, /* GFX10-11 */
AC_TRACKED_SPI_SHADER_PGM_RSRC3_GS, /* GFX7-11 */
AC_TRACKED_SPI_SHADER_PGM_RSRC4_GS, /* GFX10+ */
AC_TRACKED_VGT_GS_OUT_PRIM_TYPE_UCONFIG, /* GFX11+ */
AC_TRACKED_SPI_SHADER_GS_OUT_CONFIG_PS, /* GFX12+ */
AC_TRACKED_VGT_PRIMITIVEID_EN_UCONFIG, /* GFX12+ */
AC_TRACKED_IA_MULTI_VGT_PARAM_UCONFIG, /* GFX9 only */
AC_TRACKED_GE_CNTL = AC_TRACKED_IA_MULTI_VGT_PARAM_UCONFIG, /* GFX10+ */
AC_TRACKED_SPI_SHADER_PGM_RSRC2_HS, /* GFX9+ (not tracked on previous chips) */
AC_TRACKED_SPI_SHADER_USER_DATA_PS__ALPHA_REF,
/* 3 consecutive registers. */
AC_TRACKED_SPI_SHADER_USER_DATA_HS__TCS_OFFCHIP_LAYOUT,
AC_TRACKED_SPI_SHADER_USER_DATA_HS__TCS_OFFCHIP_ADDR,
AC_TRACKED_SPI_SHADER_USER_DATA_HS__VS_STATE_BITS, /* GFX6-8 */
AC_TRACKED_SPI_SHADER_USER_DATA_LS__BASE_VERTEX,
AC_TRACKED_SPI_SHADER_USER_DATA_LS__DRAWID,
AC_TRACKED_SPI_SHADER_USER_DATA_LS__START_INSTANCE,
AC_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX,
AC_TRACKED_SPI_SHADER_USER_DATA_ES__DRAWID,
AC_TRACKED_SPI_SHADER_USER_DATA_ES__START_INSTANCE,
AC_TRACKED_SPI_SHADER_USER_DATA_VS__BASE_VERTEX, /* GFX6-10 */
AC_TRACKED_SPI_SHADER_USER_DATA_VS__DRAWID, /* GFX6-10 */
AC_TRACKED_SPI_SHADER_USER_DATA_VS__START_INSTANCE, /* GFX6-10 */
AC_TRACKED_COMPUTE_RESOURCE_LIMITS,
AC_TRACKED_COMPUTE_DISPATCH_INTERLEAVE, /* GFX12+ (not tracked on previous chips) */
AC_TRACKED_COMPUTE_NUM_THREAD_X,
AC_TRACKED_COMPUTE_NUM_THREAD_Y,
AC_TRACKED_COMPUTE_NUM_THREAD_Z,
AC_TRACKED_COMPUTE_TMPRING_SIZE,
AC_TRACKED_COMPUTE_PGM_RSRC3, /* GFX11+ */
/* 2 consecutive registers. */
AC_TRACKED_COMPUTE_PGM_RSRC1,
AC_TRACKED_COMPUTE_PGM_RSRC2,
/* 2 consecutive registers. */
AC_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_LO, /* GFX11+ */
AC_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_HI, /* GFX11+ */
/* 3 consecutive registers. */
AC_TRACKED_SPI_SHADER_GS_MESHLET_DIM, /* GFX11+ */
AC_TRACKED_SPI_SHADER_GS_MESHLET_EXP_ALLOC, /* GFX11+ */
AC_TRACKED_SPI_SHADER_GS_MESHLET_CTRL, /* GFX12+ */
AC_NUM_ALL_TRACKED_REGS,
};
#define ac_cmdbuf_begin(cs) struct ac_cmdbuf *__cs = (cs); \ #define ac_cmdbuf_begin(cs) struct ac_cmdbuf *__cs = (cs); \
uint32_t __cs_num = __cs->cdw; \ uint32_t __cs_num = __cs->cdw; \
UNUSED uint32_t __cs_num_initial = __cs_num; \ UNUSED uint32_t __cs_num_initial = __cs_num; \

View file

@ -414,58 +414,58 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute
if (sctx->gfx_level >= GFX12) { if (sctx->gfx_level >= GFX12) {
gfx12_push_compute_sh_reg(R_00B830_COMPUTE_PGM_LO, shader_va >> 8); gfx12_push_compute_sh_reg(R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
gfx12_opt_push_compute_sh_reg(R_00B848_COMPUTE_PGM_RSRC1, gfx12_opt_push_compute_sh_reg(R_00B848_COMPUTE_PGM_RSRC1,
SI_TRACKED_COMPUTE_PGM_RSRC1, config->rsrc1); AC_TRACKED_COMPUTE_PGM_RSRC1, config->rsrc1);
gfx12_opt_push_compute_sh_reg(R_00B84C_COMPUTE_PGM_RSRC2, gfx12_opt_push_compute_sh_reg(R_00B84C_COMPUTE_PGM_RSRC2,
SI_TRACKED_COMPUTE_PGM_RSRC2, rsrc2); AC_TRACKED_COMPUTE_PGM_RSRC2, rsrc2);
gfx12_opt_push_compute_sh_reg(R_00B8A0_COMPUTE_PGM_RSRC3, gfx12_opt_push_compute_sh_reg(R_00B8A0_COMPUTE_PGM_RSRC3,
SI_TRACKED_COMPUTE_PGM_RSRC3, config->rsrc3); AC_TRACKED_COMPUTE_PGM_RSRC3, config->rsrc3);
gfx12_opt_push_compute_sh_reg(R_00B860_COMPUTE_TMPRING_SIZE, gfx12_opt_push_compute_sh_reg(R_00B860_COMPUTE_TMPRING_SIZE,
SI_TRACKED_COMPUTE_TMPRING_SIZE, sctx->compute_tmpring_size); AC_TRACKED_COMPUTE_TMPRING_SIZE, sctx->compute_tmpring_size);
if (config->scratch_bytes_per_wave) { if (config->scratch_bytes_per_wave) {
gfx12_opt_push_compute_sh_reg(R_00B840_COMPUTE_DISPATCH_SCRATCH_BASE_LO, gfx12_opt_push_compute_sh_reg(R_00B840_COMPUTE_DISPATCH_SCRATCH_BASE_LO,
SI_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_LO, AC_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_LO,
sctx->compute_scratch_buffer->gpu_address >> 8); sctx->compute_scratch_buffer->gpu_address >> 8);
gfx12_opt_push_compute_sh_reg(R_00B844_COMPUTE_DISPATCH_SCRATCH_BASE_HI, gfx12_opt_push_compute_sh_reg(R_00B844_COMPUTE_DISPATCH_SCRATCH_BASE_HI,
SI_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_HI, AC_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_HI,
sctx->compute_scratch_buffer->gpu_address >> 40); sctx->compute_scratch_buffer->gpu_address >> 40);
} }
} else if (sctx->screen->info.has_set_sh_pairs_packed) { } else if (sctx->screen->info.has_set_sh_pairs_packed) {
gfx11_push_compute_sh_reg(R_00B830_COMPUTE_PGM_LO, shader_va >> 8); gfx11_push_compute_sh_reg(R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
gfx11_opt_push_compute_sh_reg(R_00B848_COMPUTE_PGM_RSRC1, gfx11_opt_push_compute_sh_reg(R_00B848_COMPUTE_PGM_RSRC1,
SI_TRACKED_COMPUTE_PGM_RSRC1, config->rsrc1); AC_TRACKED_COMPUTE_PGM_RSRC1, config->rsrc1);
gfx11_opt_push_compute_sh_reg(R_00B84C_COMPUTE_PGM_RSRC2, gfx11_opt_push_compute_sh_reg(R_00B84C_COMPUTE_PGM_RSRC2,
SI_TRACKED_COMPUTE_PGM_RSRC2, rsrc2); AC_TRACKED_COMPUTE_PGM_RSRC2, rsrc2);
gfx11_opt_push_compute_sh_reg(R_00B8A0_COMPUTE_PGM_RSRC3, gfx11_opt_push_compute_sh_reg(R_00B8A0_COMPUTE_PGM_RSRC3,
SI_TRACKED_COMPUTE_PGM_RSRC3, config->rsrc3); AC_TRACKED_COMPUTE_PGM_RSRC3, config->rsrc3);
gfx11_opt_push_compute_sh_reg(R_00B860_COMPUTE_TMPRING_SIZE, gfx11_opt_push_compute_sh_reg(R_00B860_COMPUTE_TMPRING_SIZE,
SI_TRACKED_COMPUTE_TMPRING_SIZE, sctx->compute_tmpring_size); AC_TRACKED_COMPUTE_TMPRING_SIZE, sctx->compute_tmpring_size);
if (config->scratch_bytes_per_wave) { if (config->scratch_bytes_per_wave) {
gfx11_opt_push_compute_sh_reg(R_00B840_COMPUTE_DISPATCH_SCRATCH_BASE_LO, gfx11_opt_push_compute_sh_reg(R_00B840_COMPUTE_DISPATCH_SCRATCH_BASE_LO,
SI_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_LO, AC_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_LO,
sctx->compute_scratch_buffer->gpu_address >> 8); sctx->compute_scratch_buffer->gpu_address >> 8);
gfx11_opt_push_compute_sh_reg(R_00B844_COMPUTE_DISPATCH_SCRATCH_BASE_HI, gfx11_opt_push_compute_sh_reg(R_00B844_COMPUTE_DISPATCH_SCRATCH_BASE_HI,
SI_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_HI, AC_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_HI,
sctx->compute_scratch_buffer->gpu_address >> 40); sctx->compute_scratch_buffer->gpu_address >> 40);
} }
} else { } else {
radeon_begin(cs); radeon_begin(cs);
radeon_set_sh_reg(R_00B830_COMPUTE_PGM_LO, shader_va >> 8); radeon_set_sh_reg(R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
radeon_opt_set_sh_reg2(R_00B848_COMPUTE_PGM_RSRC1, radeon_opt_set_sh_reg2(R_00B848_COMPUTE_PGM_RSRC1,
SI_TRACKED_COMPUTE_PGM_RSRC1, AC_TRACKED_COMPUTE_PGM_RSRC1,
config->rsrc1, rsrc2); config->rsrc1, rsrc2);
radeon_opt_set_sh_reg(R_00B860_COMPUTE_TMPRING_SIZE, radeon_opt_set_sh_reg(R_00B860_COMPUTE_TMPRING_SIZE,
SI_TRACKED_COMPUTE_TMPRING_SIZE, sctx->compute_tmpring_size); AC_TRACKED_COMPUTE_TMPRING_SIZE, sctx->compute_tmpring_size);
if (config->scratch_bytes_per_wave && sctx->screen->info.has_scratch_base_registers) { if (config->scratch_bytes_per_wave && sctx->screen->info.has_scratch_base_registers) {
radeon_opt_set_sh_reg2(R_00B840_COMPUTE_DISPATCH_SCRATCH_BASE_LO, radeon_opt_set_sh_reg2(R_00B840_COMPUTE_DISPATCH_SCRATCH_BASE_LO,
SI_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_LO, AC_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_LO,
sctx->compute_scratch_buffer->gpu_address >> 8, sctx->compute_scratch_buffer->gpu_address >> 8,
sctx->compute_scratch_buffer->gpu_address >> 40); sctx->compute_scratch_buffer->gpu_address >> 40);
} }
if (sctx->gfx_level >= GFX10) { if (sctx->gfx_level >= GFX10) {
radeon_opt_set_sh_reg(R_00B8A0_COMPUTE_PGM_RSRC3, radeon_opt_set_sh_reg(R_00B8A0_COMPUTE_PGM_RSRC3,
SI_TRACKED_COMPUTE_PGM_RSRC3, config->rsrc3); AC_TRACKED_COMPUTE_PGM_RSRC3, config->rsrc3);
} }
radeon_end(); radeon_end();
} }
@ -662,15 +662,15 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_
if (sctx->gfx_level >= GFX12) { if (sctx->gfx_level >= GFX12) {
gfx12_opt_push_compute_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS, gfx12_opt_push_compute_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS,
SI_TRACKED_COMPUTE_RESOURCE_LIMITS, AC_TRACKED_COMPUTE_RESOURCE_LIMITS,
compute_resource_limits); compute_resource_limits);
} else if (sctx->screen->info.has_set_sh_pairs_packed) { } else if (sctx->screen->info.has_set_sh_pairs_packed) {
gfx11_opt_push_compute_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS, gfx11_opt_push_compute_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS,
SI_TRACKED_COMPUTE_RESOURCE_LIMITS, AC_TRACKED_COMPUTE_RESOURCE_LIMITS,
compute_resource_limits); compute_resource_limits);
} else { } else {
radeon_opt_set_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS, radeon_opt_set_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS,
SI_TRACKED_COMPUTE_RESOURCE_LIMITS, AC_TRACKED_COMPUTE_RESOURCE_LIMITS,
compute_resource_limits); compute_resource_limits);
} }
@ -776,30 +776,30 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_
if (sctx->is_gfx_queue) { if (sctx->is_gfx_queue) {
radeon_opt_set_sh_reg_idx(R_00B8BC_COMPUTE_DISPATCH_INTERLEAVE, radeon_opt_set_sh_reg_idx(R_00B8BC_COMPUTE_DISPATCH_INTERLEAVE,
SI_TRACKED_COMPUTE_DISPATCH_INTERLEAVE, 2, dispatch_interleave); AC_TRACKED_COMPUTE_DISPATCH_INTERLEAVE, 2, dispatch_interleave);
} else { } else {
gfx12_opt_push_compute_sh_reg(R_00B8BC_COMPUTE_DISPATCH_INTERLEAVE, gfx12_opt_push_compute_sh_reg(R_00B8BC_COMPUTE_DISPATCH_INTERLEAVE,
SI_TRACKED_COMPUTE_DISPATCH_INTERLEAVE, dispatch_interleave); AC_TRACKED_COMPUTE_DISPATCH_INTERLEAVE, dispatch_interleave);
} }
} }
if (sctx->gfx_level >= GFX12) { if (sctx->gfx_level >= GFX12) {
gfx12_opt_push_compute_sh_reg(R_00B81C_COMPUTE_NUM_THREAD_X, gfx12_opt_push_compute_sh_reg(R_00B81C_COMPUTE_NUM_THREAD_X,
SI_TRACKED_COMPUTE_NUM_THREAD_X, num_threads[0]); AC_TRACKED_COMPUTE_NUM_THREAD_X, num_threads[0]);
gfx12_opt_push_compute_sh_reg(R_00B820_COMPUTE_NUM_THREAD_Y, gfx12_opt_push_compute_sh_reg(R_00B820_COMPUTE_NUM_THREAD_Y,
SI_TRACKED_COMPUTE_NUM_THREAD_Y, num_threads[1]); AC_TRACKED_COMPUTE_NUM_THREAD_Y, num_threads[1]);
gfx12_opt_push_compute_sh_reg(R_00B824_COMPUTE_NUM_THREAD_Z, gfx12_opt_push_compute_sh_reg(R_00B824_COMPUTE_NUM_THREAD_Z,
SI_TRACKED_COMPUTE_NUM_THREAD_Z, num_threads[2]); AC_TRACKED_COMPUTE_NUM_THREAD_Z, num_threads[2]);
} else if (sctx->screen->info.has_set_sh_pairs_packed) { } else if (sctx->screen->info.has_set_sh_pairs_packed) {
gfx11_opt_push_compute_sh_reg(R_00B81C_COMPUTE_NUM_THREAD_X, gfx11_opt_push_compute_sh_reg(R_00B81C_COMPUTE_NUM_THREAD_X,
SI_TRACKED_COMPUTE_NUM_THREAD_X, num_threads[0]); AC_TRACKED_COMPUTE_NUM_THREAD_X, num_threads[0]);
gfx11_opt_push_compute_sh_reg(R_00B820_COMPUTE_NUM_THREAD_Y, gfx11_opt_push_compute_sh_reg(R_00B820_COMPUTE_NUM_THREAD_Y,
SI_TRACKED_COMPUTE_NUM_THREAD_Y, num_threads[1]); AC_TRACKED_COMPUTE_NUM_THREAD_Y, num_threads[1]);
gfx11_opt_push_compute_sh_reg(R_00B824_COMPUTE_NUM_THREAD_Z, gfx11_opt_push_compute_sh_reg(R_00B824_COMPUTE_NUM_THREAD_Z,
SI_TRACKED_COMPUTE_NUM_THREAD_Z, num_threads[2]); AC_TRACKED_COMPUTE_NUM_THREAD_Z, num_threads[2]);
} else { } else {
radeon_opt_set_sh_reg3(R_00B81C_COMPUTE_NUM_THREAD_X, radeon_opt_set_sh_reg3(R_00B81C_COMPUTE_NUM_THREAD_X,
SI_TRACKED_COMPUTE_NUM_THREAD_X, AC_TRACKED_COMPUTE_NUM_THREAD_X,
num_threads[0], num_threads[1], num_threads[2]); num_threads[0], num_threads[1], num_threads[2]);
} }
@ -963,7 +963,7 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
/* Skipping setting redundant registers on compute queues breaks compute. */ /* Skipping setting redundant registers on compute queues breaks compute. */
if (!sctx->is_gfx_queue) { if (!sctx->is_gfx_queue) {
BITSET_CLEAR_RANGE(sctx->tracked_regs.reg_saved_mask, BITSET_CLEAR_RANGE(sctx->tracked_regs.reg_saved_mask,
SI_FIRST_TRACKED_OTHER_REG, SI_NUM_ALL_TRACKED_REGS - 1); AC_FIRST_TRACKED_OTHER_REG, AC_NUM_ALL_TRACKED_REGS - 1);
} }
/* First emit registers. */ /* First emit registers. */

View file

@ -272,108 +272,108 @@ static void si_begin_gfx_cs_debug(struct si_context *ctx)
void si_set_tracked_regs_to_clear_state(struct si_context *ctx) void si_set_tracked_regs_to_clear_state(struct si_context *ctx)
{ {
assert(ctx->gfx_level < GFX12); assert(ctx->gfx_level < GFX12);
STATIC_ASSERT(SI_NUM_ALL_TRACKED_REGS <= sizeof(ctx->tracked_regs.reg_saved_mask) * 8); STATIC_ASSERT(AC_NUM_ALL_TRACKED_REGS <= sizeof(ctx->tracked_regs.reg_saved_mask) * 8);
ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_DB_RENDER_CONTROL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_DB_COUNT_CONTROL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_DEPTH_CONTROL] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_DB_DEPTH_CONTROL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_STENCIL_CONTROL] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_DB_STENCIL_CONTROL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_DEPTH_BOUNDS_MIN] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_DB_DEPTH_BOUNDS_MIN] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_DEPTH_BOUNDS_MAX] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_DB_DEPTH_BOUNDS_MAX] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_INTERP_CONTROL_0] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_SPI_INTERP_CONTROL_0] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_POINT_SIZE] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SU_POINT_SIZE] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_POINT_MINMAX] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SU_POINT_MINMAX] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_LINE_CNTL] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SU_LINE_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_MODE_CNTL_0] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SC_MODE_CNTL_0] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_SC_MODE_CNTL] = 0x4; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SU_SC_MODE_CNTL] = 0x4;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_EDGERULE] = 0xaa99aaaa; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SC_EDGERULE] = 0xaa99aaaa;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_POLY_OFFSET_DB_FMT_CNTL] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SU_POLY_OFFSET_DB_FMT_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_POLY_OFFSET_CLAMP] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SU_POLY_OFFSET_CLAMP] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_POLY_OFFSET_FRONT_SCALE] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SU_POLY_OFFSET_FRONT_SCALE] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_POLY_OFFSET_FRONT_OFFSET] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SU_POLY_OFFSET_FRONT_OFFSET] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_POLY_OFFSET_BACK_SCALE] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SU_POLY_OFFSET_BACK_SCALE] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_POLY_OFFSET_BACK_OFFSET] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SU_POLY_OFFSET_BACK_OFFSET] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_LINE_CNTL] = 0x1000; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SC_LINE_CNTL] = 0x1000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_AA_CONFIG] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SC_AA_CONFIG] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_VTX_CNTL] = 0x5; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SU_VTX_CNTL] = 0x5;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ] = 0x3f800000; ctx->tracked_regs.reg_value[AC_TRACKED_PA_CL_GB_VERT_CLIP_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ] = 0x3f800000; ctx->tracked_regs.reg_value[AC_TRACKED_PA_CL_GB_VERT_DISC_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ] = 0x3f800000; ctx->tracked_regs.reg_value[AC_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ] = 0x3f800000; ctx->tracked_regs.reg_value[AC_TRACKED_PA_CL_GB_HORZ_DISC_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_POS_FORMAT] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_SPI_SHADER_POS_FORMAT] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_Z_FORMAT] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_SPI_SHADER_Z_FORMAT] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_COL_FORMAT] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_SPI_SHADER_COL_FORMAT] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ENA] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_SPI_PS_INPUT_ENA] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ADDR] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_SPI_PS_INPUT_ADDR] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_EQAA] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_DB_EQAA] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_DB_RENDER_OVERRIDE2] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_DB_SHADER_CONTROL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_CB_SHADER_MASK] = 0xffffffff; ctx->tracked_regs.reg_value[AC_TRACKED_CB_SHADER_MASK] = 0xffffffff;
ctx->tracked_regs.reg_value[SI_TRACKED_CB_TARGET_MASK] = 0xffffffff; ctx->tracked_regs.reg_value[AC_TRACKED_CB_TARGET_MASK] = 0xffffffff;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x90000; ctx->tracked_regs.reg_value[AC_TRACKED_PA_CL_CLIP_CNTL] = 0x90000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_PA_CL_VS_OUT_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VTE_CNTL] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_PA_CL_VTE_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_CLIPRECT_RULE] = 0xffff; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SC_CLIPRECT_RULE] = 0xffff;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_LINE_STIPPLE] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SC_LINE_STIPPLE] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_MODE_CNTL_1] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SC_MODE_CNTL_1] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_IN_CONTROL] = 0x2; ctx->tracked_regs.reg_value[AC_TRACKED_SPI_PS_IN_CONTROL] = 0x2;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_INSTANCE_CNT] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_GS_INSTANCE_CNT] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MAX_VERT_OUT] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_GS_MAX_VERT_OUT] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_SHADER_STAGES_EN] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_SHADER_STAGES_EN] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_LS_HS_CONFIG] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_LS_HS_CONFIG] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_TF_PARAM] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_TF_PARAM] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x3; ctx->tracked_regs.reg_value[AC_TRACKED_PA_SC_BINNER_CNTL_0] = 0x3;
ctx->tracked_regs.reg_value[SI_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_GE_NGG_SUBGRP_CNTL] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_GE_NGG_SUBGRP_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_NGG_CNTL] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_PA_CL_NGG_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SX_PS_DOWNCONVERT] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_SX_PS_DOWNCONVERT] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_SX_BLEND_OPT_EPSILON] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_SX_BLEND_OPT_CONTROL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_ESGS_RING_ITEMSIZE] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_ESGS_RING_ITEMSIZE] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_REUSE_OFF] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_REUSE_OFF] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_IA_MULTI_VGT_PARAM] = 0xff; ctx->tracked_regs.reg_value[AC_TRACKED_IA_MULTI_VGT_PARAM] = 0xff;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_ONCHIP_CNTL] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_GS_ONCHIP_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_ITEMSIZE] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_GSVS_RING_ITEMSIZE] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MODE] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_GS_MODE] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL] = 0x1e; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL] = 0x1e;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_OUT_PRIM_TYPE] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_GS_OUT_PRIM_TYPE] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_1] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_GSVS_RING_OFFSET_1] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_2] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_GSVS_RING_OFFSET_2] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_3] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_GSVS_RING_OFFSET_3] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_GS_VERT_ITEMSIZE] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_1] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_GS_VERT_ITEMSIZE_1] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_2] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_GS_VERT_ITEMSIZE_2] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_3] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_GS_VERT_ITEMSIZE_3] = 0;
if (ctx->gfx_level >= GFX12) if (ctx->gfx_level >= GFX12)
ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_DB_RENDER_OVERRIDE] = 0;
else else
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_VS_OUT_CONFIG] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_SPI_VS_OUT_CONFIG] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_PRIMITIVEID_EN] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_VGT_PRIMITIVEID_EN] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] = 0; ctx->tracked_regs.reg_value[AC_TRACKED_CB_DCC_CONTROL] = 0;
/* Set all cleared context registers to saved. */ /* Set all cleared context registers to saved. */
BITSET_SET_COUNT(ctx->tracked_regs.reg_saved_mask, 0, BITSET_SET_COUNT(ctx->tracked_regs.reg_saved_mask, 0,
SI_NUM_TRACKED_CONTEXT_REGS); AC_NUM_TRACKED_CONTEXT_REGS);
} }
void si_install_draw_wrapper(struct si_context *sctx, pipe_draw_func wrapper, void si_install_draw_wrapper(struct si_context *sctx, pipe_draw_func wrapper,

View file

@ -245,7 +245,7 @@ static void clear_reg_saved_mask(struct si_context *sctx, unsigned reg)
{ {
if (reg >= SI_SGPR_BASE_VERTEX && reg <= SI_SGPR_START_INSTANCE) { if (reg >= SI_SGPR_BASE_VERTEX && reg <= SI_SGPR_START_INSTANCE) {
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask,
SI_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX + AC_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX +
(reg - SI_SGPR_BASE_VERTEX)); (reg - SI_SGPR_BASE_VERTEX));
} }
} }
@ -254,7 +254,7 @@ static void clear_reg_saved_mask(struct si_context *sctx, unsigned reg)
do { \ do { \
unsigned addr = sh_base_reg + (reg) * 4; \ unsigned addr = sh_base_reg + (reg) * 4; \
if ((reg) >= SI_SGPR_BASE_VERTEX && (reg) <= SI_SGPR_START_INSTANCE) { \ if ((reg) >= SI_SGPR_BASE_VERTEX && (reg) <= SI_SGPR_START_INSTANCE) { \
unsigned tracked_reg = SI_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX; \ unsigned tracked_reg = AC_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX; \
tracked_reg += (reg) - SI_SGPR_BASE_VERTEX; \ tracked_reg += (reg) - SI_SGPR_BASE_VERTEX; \
if (sctx->gfx_level >= GFX12) \ if (sctx->gfx_level >= GFX12) \
gfx12_opt_push_gfx_sh_reg(addr, tracked_reg, value); \ gfx12_opt_push_gfx_sh_reg(addr, tracked_reg, value); \

View file

@ -263,41 +263,41 @@ static void si_emit_cb_render_state(struct si_context *sctx, unsigned index)
radeon_begin(cs); radeon_begin(cs);
gfx12_begin_context_regs(); gfx12_begin_context_regs();
gfx12_opt_set_context_reg(R_028850_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK, gfx12_opt_set_context_reg(R_028850_CB_TARGET_MASK, AC_TRACKED_CB_TARGET_MASK,
cb_target_mask); cb_target_mask);
gfx12_opt_set_context_reg(R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT, gfx12_opt_set_context_reg(R_028754_SX_PS_DOWNCONVERT, AC_TRACKED_SX_PS_DOWNCONVERT,
sx_ps_downconvert); sx_ps_downconvert);
gfx12_opt_set_context_reg(R_028758_SX_BLEND_OPT_EPSILON, SI_TRACKED_SX_BLEND_OPT_EPSILON, gfx12_opt_set_context_reg(R_028758_SX_BLEND_OPT_EPSILON, AC_TRACKED_SX_BLEND_OPT_EPSILON,
sx_blend_opt_epsilon); sx_blend_opt_epsilon);
gfx12_opt_set_context_reg(R_02875C_SX_BLEND_OPT_CONTROL, SI_TRACKED_SX_BLEND_OPT_CONTROL, gfx12_opt_set_context_reg(R_02875C_SX_BLEND_OPT_CONTROL, AC_TRACKED_SX_BLEND_OPT_CONTROL,
sx_blend_opt_control); sx_blend_opt_control);
gfx12_end_context_regs(); gfx12_end_context_regs();
radeon_end(); /* don't track context rolls on GFX12 */ radeon_end(); /* don't track context rolls on GFX12 */
} else if (sctx->screen->info.has_set_context_pairs_packed) { } else if (sctx->screen->info.has_set_context_pairs_packed) {
radeon_begin(cs); radeon_begin(cs);
gfx11_begin_packed_context_regs(); gfx11_begin_packed_context_regs();
gfx11_opt_set_context_reg(R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK, gfx11_opt_set_context_reg(R_028238_CB_TARGET_MASK, AC_TRACKED_CB_TARGET_MASK,
cb_target_mask); cb_target_mask);
gfx11_opt_set_context_reg(R_028424_CB_DCC_CONTROL, SI_TRACKED_CB_DCC_CONTROL, gfx11_opt_set_context_reg(R_028424_CB_DCC_CONTROL, AC_TRACKED_CB_DCC_CONTROL,
cb_dcc_control); cb_dcc_control);
gfx11_opt_set_context_reg(R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT, gfx11_opt_set_context_reg(R_028754_SX_PS_DOWNCONVERT, AC_TRACKED_SX_PS_DOWNCONVERT,
sx_ps_downconvert); sx_ps_downconvert);
gfx11_opt_set_context_reg(R_028758_SX_BLEND_OPT_EPSILON, SI_TRACKED_SX_BLEND_OPT_EPSILON, gfx11_opt_set_context_reg(R_028758_SX_BLEND_OPT_EPSILON, AC_TRACKED_SX_BLEND_OPT_EPSILON,
sx_blend_opt_epsilon); sx_blend_opt_epsilon);
gfx11_opt_set_context_reg(R_02875C_SX_BLEND_OPT_CONTROL, SI_TRACKED_SX_BLEND_OPT_CONTROL, gfx11_opt_set_context_reg(R_02875C_SX_BLEND_OPT_CONTROL, AC_TRACKED_SX_BLEND_OPT_CONTROL,
sx_blend_opt_control); sx_blend_opt_control);
gfx11_end_packed_context_regs(); gfx11_end_packed_context_regs();
radeon_end(); /* don't track context rolls on GFX11 */ radeon_end(); /* don't track context rolls on GFX11 */
} else { } else {
radeon_begin(cs); radeon_begin(cs);
radeon_opt_set_context_reg(R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK, radeon_opt_set_context_reg(R_028238_CB_TARGET_MASK, AC_TRACKED_CB_TARGET_MASK,
cb_target_mask); cb_target_mask);
if (sctx->gfx_level >= GFX8) { if (sctx->gfx_level >= GFX8) {
radeon_opt_set_context_reg(R_028424_CB_DCC_CONTROL, SI_TRACKED_CB_DCC_CONTROL, radeon_opt_set_context_reg(R_028424_CB_DCC_CONTROL, AC_TRACKED_CB_DCC_CONTROL,
cb_dcc_control); cb_dcc_control);
} }
if (sctx->screen->info.rbplus_allowed) { if (sctx->screen->info.rbplus_allowed) {
radeon_opt_set_context_reg3(R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT, radeon_opt_set_context_reg3(R_028754_SX_PS_DOWNCONVERT, AC_TRACKED_SX_PS_DOWNCONVERT,
sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control); sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control);
} }
radeon_end_update_context_roll(); radeon_end_update_context_roll();
@ -954,26 +954,26 @@ static void si_emit_clip_regs(struct si_context *sctx, unsigned index)
if (sctx->gfx_level >= GFX12) { if (sctx->gfx_level >= GFX12) {
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
gfx12_begin_context_regs(); gfx12_begin_context_regs();
gfx12_opt_set_context_reg(R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL, gfx12_opt_set_context_reg(R_028810_PA_CL_CLIP_CNTL, AC_TRACKED_PA_CL_CLIP_CNTL,
pa_cl_clip_cntl); pa_cl_clip_cntl);
gfx12_opt_set_context_reg(R_028818_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL, gfx12_opt_set_context_reg(R_028818_PA_CL_VS_OUT_CNTL, AC_TRACKED_PA_CL_VS_OUT_CNTL,
pa_cl_vs_out_cntl); pa_cl_vs_out_cntl);
gfx12_end_context_regs(); gfx12_end_context_regs();
radeon_end(); /* don't track context rolls on GFX12 */ radeon_end(); /* don't track context rolls on GFX12 */
} else if (sctx->screen->info.has_set_context_pairs_packed) { } else if (sctx->screen->info.has_set_context_pairs_packed) {
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
gfx11_begin_packed_context_regs(); gfx11_begin_packed_context_regs();
gfx11_opt_set_context_reg(R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL, gfx11_opt_set_context_reg(R_028810_PA_CL_CLIP_CNTL, AC_TRACKED_PA_CL_CLIP_CNTL,
pa_cl_clip_cntl); pa_cl_clip_cntl);
gfx11_opt_set_context_reg(R_02881C_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL, gfx11_opt_set_context_reg(R_02881C_PA_CL_VS_OUT_CNTL, AC_TRACKED_PA_CL_VS_OUT_CNTL,
pa_cl_vs_out_cntl); pa_cl_vs_out_cntl);
gfx11_end_packed_context_regs(); gfx11_end_packed_context_regs();
radeon_end(); /* don't track context rolls on GFX11 */ radeon_end(); /* don't track context rolls on GFX11 */
} else { } else {
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg(R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL, radeon_opt_set_context_reg(R_028810_PA_CL_CLIP_CNTL, AC_TRACKED_PA_CL_CLIP_CNTL,
pa_cl_clip_cntl); pa_cl_clip_cntl);
radeon_opt_set_context_reg(R_02881C_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL, radeon_opt_set_context_reg(R_02881C_PA_CL_VS_OUT_CNTL, AC_TRACKED_PA_CL_VS_OUT_CNTL,
pa_cl_vs_out_cntl); pa_cl_vs_out_cntl);
radeon_end_update_context_roll(); radeon_end_update_context_roll();
} }
@ -1174,7 +1174,7 @@ static void si_pm4_emit_rasterizer(struct si_context *sctx, unsigned index)
{ {
struct si_state_rasterizer *state = sctx->queued.named.rasterizer; struct si_state_rasterizer *state = sctx->queued.named.rasterizer;
const unsigned cull_bits = S_028814_CULL_FRONT(1) | S_028814_CULL_BACK(1); const unsigned cull_bits = S_028814_CULL_FRONT(1) | S_028814_CULL_BACK(1);
unsigned last_pa_su_sc_mode_nctl = sctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_SC_MODE_CNTL]; unsigned last_pa_su_sc_mode_nctl = sctx->tracked_regs.reg_value[AC_TRACKED_PA_SU_SC_MODE_CNTL];
unsigned pa_su_sc_mode_cntl; unsigned pa_su_sc_mode_cntl;
if (!sctx->fixed_func_face_culling_has_effect && if (!sctx->fixed_func_face_culling_has_effect &&
@ -1194,25 +1194,25 @@ static void si_pm4_emit_rasterizer(struct si_context *sctx, unsigned index)
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
gfx12_begin_context_regs(); gfx12_begin_context_regs();
if (state->line_stipple_enable) { if (state->line_stipple_enable) {
gfx12_opt_set_context_reg(R_028A0C_PA_SC_LINE_STIPPLE, SI_TRACKED_PA_SC_LINE_STIPPLE, gfx12_opt_set_context_reg(R_028A0C_PA_SC_LINE_STIPPLE, AC_TRACKED_PA_SC_LINE_STIPPLE,
state->pa_sc_line_stipple); state->pa_sc_line_stipple);
} }
gfx12_opt_set_context_reg(R_028644_SPI_INTERP_CONTROL_0, SI_TRACKED_SPI_INTERP_CONTROL_0, gfx12_opt_set_context_reg(R_028644_SPI_INTERP_CONTROL_0, AC_TRACKED_SPI_INTERP_CONTROL_0,
state->spi_interp_control_0); state->spi_interp_control_0);
gfx12_opt_set_context_reg(R_028A00_PA_SU_POINT_SIZE, SI_TRACKED_PA_SU_POINT_SIZE, gfx12_opt_set_context_reg(R_028A00_PA_SU_POINT_SIZE, AC_TRACKED_PA_SU_POINT_SIZE,
state->pa_su_point_size); state->pa_su_point_size);
gfx12_opt_set_context_reg(R_028A04_PA_SU_POINT_MINMAX, SI_TRACKED_PA_SU_POINT_MINMAX, gfx12_opt_set_context_reg(R_028A04_PA_SU_POINT_MINMAX, AC_TRACKED_PA_SU_POINT_MINMAX,
state->pa_su_point_minmax); state->pa_su_point_minmax);
gfx12_opt_set_context_reg(R_028A08_PA_SU_LINE_CNTL, SI_TRACKED_PA_SU_LINE_CNTL, gfx12_opt_set_context_reg(R_028A08_PA_SU_LINE_CNTL, AC_TRACKED_PA_SU_LINE_CNTL,
state->pa_su_line_cntl); state->pa_su_line_cntl);
gfx12_opt_set_context_reg(R_028A48_PA_SC_MODE_CNTL_0, SI_TRACKED_PA_SC_MODE_CNTL_0, gfx12_opt_set_context_reg(R_028A48_PA_SC_MODE_CNTL_0, AC_TRACKED_PA_SC_MODE_CNTL_0,
state->pa_sc_mode_cntl_0); state->pa_sc_mode_cntl_0);
gfx12_opt_set_context_reg(R_02881C_PA_SU_SC_MODE_CNTL, SI_TRACKED_PA_SU_SC_MODE_CNTL, gfx12_opt_set_context_reg(R_02881C_PA_SU_SC_MODE_CNTL, AC_TRACKED_PA_SU_SC_MODE_CNTL,
pa_su_sc_mode_cntl); pa_su_sc_mode_cntl);
gfx12_opt_set_context_reg(R_028838_PA_CL_NGG_CNTL, SI_TRACKED_PA_CL_NGG_CNTL, gfx12_opt_set_context_reg(R_028838_PA_CL_NGG_CNTL, AC_TRACKED_PA_CL_NGG_CNTL,
state->pa_cl_ngg_cntl); state->pa_cl_ngg_cntl);
gfx12_opt_set_context_reg(R_028230_PA_SC_EDGERULE, SI_TRACKED_PA_SC_EDGERULE, gfx12_opt_set_context_reg(R_028230_PA_SC_EDGERULE, AC_TRACKED_PA_SC_EDGERULE,
state->pa_sc_edgerule); state->pa_sc_edgerule);
if (state->uses_poly_offset && sctx->framebuffer.state.zsbuf.texture) { if (state->uses_poly_offset && sctx->framebuffer.state.zsbuf.texture) {
@ -1220,22 +1220,22 @@ static void si_pm4_emit_rasterizer(struct si_context *sctx, unsigned index)
((struct si_surface *)sctx->framebuffer.fb_zsbuf)->db_format_index; ((struct si_surface *)sctx->framebuffer.fb_zsbuf)->db_format_index;
gfx12_opt_set_context_reg(R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, gfx12_opt_set_context_reg(R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
SI_TRACKED_PA_SU_POLY_OFFSET_DB_FMT_CNTL, AC_TRACKED_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
state->pa_su_poly_offset_db_fmt_cntl[db_format_index]); state->pa_su_poly_offset_db_fmt_cntl[db_format_index]);
gfx12_opt_set_context_reg(R_028B7C_PA_SU_POLY_OFFSET_CLAMP, gfx12_opt_set_context_reg(R_028B7C_PA_SU_POLY_OFFSET_CLAMP,
SI_TRACKED_PA_SU_POLY_OFFSET_CLAMP, AC_TRACKED_PA_SU_POLY_OFFSET_CLAMP,
state->pa_su_poly_offset_clamp); state->pa_su_poly_offset_clamp);
gfx12_opt_set_context_reg(R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, gfx12_opt_set_context_reg(R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
SI_TRACKED_PA_SU_POLY_OFFSET_FRONT_SCALE, AC_TRACKED_PA_SU_POLY_OFFSET_FRONT_SCALE,
state->pa_su_poly_offset_frontback_scale); state->pa_su_poly_offset_frontback_scale);
gfx12_opt_set_context_reg(R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, gfx12_opt_set_context_reg(R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
SI_TRACKED_PA_SU_POLY_OFFSET_FRONT_OFFSET, AC_TRACKED_PA_SU_POLY_OFFSET_FRONT_OFFSET,
state->pa_su_poly_offset_frontback_offset[db_format_index]); state->pa_su_poly_offset_frontback_offset[db_format_index]);
gfx12_opt_set_context_reg(R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, gfx12_opt_set_context_reg(R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
SI_TRACKED_PA_SU_POLY_OFFSET_BACK_SCALE, AC_TRACKED_PA_SU_POLY_OFFSET_BACK_SCALE,
state->pa_su_poly_offset_frontback_scale); state->pa_su_poly_offset_frontback_scale);
gfx12_opt_set_context_reg(R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, gfx12_opt_set_context_reg(R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
SI_TRACKED_PA_SU_POLY_OFFSET_BACK_OFFSET, AC_TRACKED_PA_SU_POLY_OFFSET_BACK_OFFSET,
state->pa_su_poly_offset_frontback_offset[db_format_index]); state->pa_su_poly_offset_frontback_offset[db_format_index]);
} }
gfx12_end_context_regs(); gfx12_end_context_regs();
@ -1243,21 +1243,21 @@ static void si_pm4_emit_rasterizer(struct si_context *sctx, unsigned index)
} else if (sctx->screen->info.has_set_context_pairs_packed) { } else if (sctx->screen->info.has_set_context_pairs_packed) {
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
gfx11_begin_packed_context_regs(); gfx11_begin_packed_context_regs();
gfx11_opt_set_context_reg(R_0286D4_SPI_INTERP_CONTROL_0, SI_TRACKED_SPI_INTERP_CONTROL_0, gfx11_opt_set_context_reg(R_0286D4_SPI_INTERP_CONTROL_0, AC_TRACKED_SPI_INTERP_CONTROL_0,
state->spi_interp_control_0); state->spi_interp_control_0);
gfx11_opt_set_context_reg(R_028A00_PA_SU_POINT_SIZE, SI_TRACKED_PA_SU_POINT_SIZE, gfx11_opt_set_context_reg(R_028A00_PA_SU_POINT_SIZE, AC_TRACKED_PA_SU_POINT_SIZE,
state->pa_su_point_size); state->pa_su_point_size);
gfx11_opt_set_context_reg(R_028A04_PA_SU_POINT_MINMAX, SI_TRACKED_PA_SU_POINT_MINMAX, gfx11_opt_set_context_reg(R_028A04_PA_SU_POINT_MINMAX, AC_TRACKED_PA_SU_POINT_MINMAX,
state->pa_su_point_minmax); state->pa_su_point_minmax);
gfx11_opt_set_context_reg(R_028A08_PA_SU_LINE_CNTL, SI_TRACKED_PA_SU_LINE_CNTL, gfx11_opt_set_context_reg(R_028A08_PA_SU_LINE_CNTL, AC_TRACKED_PA_SU_LINE_CNTL,
state->pa_su_line_cntl); state->pa_su_line_cntl);
gfx11_opt_set_context_reg(R_028A48_PA_SC_MODE_CNTL_0, SI_TRACKED_PA_SC_MODE_CNTL_0, gfx11_opt_set_context_reg(R_028A48_PA_SC_MODE_CNTL_0, AC_TRACKED_PA_SC_MODE_CNTL_0,
state->pa_sc_mode_cntl_0); state->pa_sc_mode_cntl_0);
gfx11_opt_set_context_reg(R_028814_PA_SU_SC_MODE_CNTL, SI_TRACKED_PA_SU_SC_MODE_CNTL, gfx11_opt_set_context_reg(R_028814_PA_SU_SC_MODE_CNTL, AC_TRACKED_PA_SU_SC_MODE_CNTL,
pa_su_sc_mode_cntl); pa_su_sc_mode_cntl);
gfx11_opt_set_context_reg(R_028838_PA_CL_NGG_CNTL, SI_TRACKED_PA_CL_NGG_CNTL, gfx11_opt_set_context_reg(R_028838_PA_CL_NGG_CNTL, AC_TRACKED_PA_CL_NGG_CNTL,
state->pa_cl_ngg_cntl); state->pa_cl_ngg_cntl);
gfx11_opt_set_context_reg(R_028230_PA_SC_EDGERULE, SI_TRACKED_PA_SC_EDGERULE, gfx11_opt_set_context_reg(R_028230_PA_SC_EDGERULE, AC_TRACKED_PA_SC_EDGERULE,
state->pa_sc_edgerule); state->pa_sc_edgerule);
if (state->uses_poly_offset && sctx->framebuffer.state.zsbuf.texture) { if (state->uses_poly_offset && sctx->framebuffer.state.zsbuf.texture) {
@ -1265,22 +1265,22 @@ static void si_pm4_emit_rasterizer(struct si_context *sctx, unsigned index)
((struct si_surface *)sctx->framebuffer.fb_zsbuf)->db_format_index; ((struct si_surface *)sctx->framebuffer.fb_zsbuf)->db_format_index;
gfx11_opt_set_context_reg(R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, gfx11_opt_set_context_reg(R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
SI_TRACKED_PA_SU_POLY_OFFSET_DB_FMT_CNTL, AC_TRACKED_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
state->pa_su_poly_offset_db_fmt_cntl[db_format_index]); state->pa_su_poly_offset_db_fmt_cntl[db_format_index]);
gfx11_opt_set_context_reg(R_028B7C_PA_SU_POLY_OFFSET_CLAMP, gfx11_opt_set_context_reg(R_028B7C_PA_SU_POLY_OFFSET_CLAMP,
SI_TRACKED_PA_SU_POLY_OFFSET_CLAMP, AC_TRACKED_PA_SU_POLY_OFFSET_CLAMP,
state->pa_su_poly_offset_clamp); state->pa_su_poly_offset_clamp);
gfx11_opt_set_context_reg(R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, gfx11_opt_set_context_reg(R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE,
SI_TRACKED_PA_SU_POLY_OFFSET_FRONT_SCALE, AC_TRACKED_PA_SU_POLY_OFFSET_FRONT_SCALE,
state->pa_su_poly_offset_frontback_scale); state->pa_su_poly_offset_frontback_scale);
gfx11_opt_set_context_reg(R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, gfx11_opt_set_context_reg(R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET,
SI_TRACKED_PA_SU_POLY_OFFSET_FRONT_OFFSET, AC_TRACKED_PA_SU_POLY_OFFSET_FRONT_OFFSET,
state->pa_su_poly_offset_frontback_offset[db_format_index]); state->pa_su_poly_offset_frontback_offset[db_format_index]);
gfx11_opt_set_context_reg(R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, gfx11_opt_set_context_reg(R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE,
SI_TRACKED_PA_SU_POLY_OFFSET_BACK_SCALE, AC_TRACKED_PA_SU_POLY_OFFSET_BACK_SCALE,
state->pa_su_poly_offset_frontback_scale); state->pa_su_poly_offset_frontback_scale);
gfx11_opt_set_context_reg(R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, gfx11_opt_set_context_reg(R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET,
SI_TRACKED_PA_SU_POLY_OFFSET_BACK_OFFSET, AC_TRACKED_PA_SU_POLY_OFFSET_BACK_OFFSET,
state->pa_su_poly_offset_frontback_offset[db_format_index]); state->pa_su_poly_offset_frontback_offset[db_format_index]);
} }
gfx11_end_packed_context_regs(); gfx11_end_packed_context_regs();
@ -1288,23 +1288,23 @@ static void si_pm4_emit_rasterizer(struct si_context *sctx, unsigned index)
} else { } else {
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg(R_0286D4_SPI_INTERP_CONTROL_0, radeon_opt_set_context_reg(R_0286D4_SPI_INTERP_CONTROL_0,
SI_TRACKED_SPI_INTERP_CONTROL_0, AC_TRACKED_SPI_INTERP_CONTROL_0,
state->spi_interp_control_0); state->spi_interp_control_0);
radeon_opt_set_context_reg(R_028A00_PA_SU_POINT_SIZE, SI_TRACKED_PA_SU_POINT_SIZE, radeon_opt_set_context_reg(R_028A00_PA_SU_POINT_SIZE, AC_TRACKED_PA_SU_POINT_SIZE,
state->pa_su_point_size); state->pa_su_point_size);
radeon_opt_set_context_reg(R_028A04_PA_SU_POINT_MINMAX, SI_TRACKED_PA_SU_POINT_MINMAX, radeon_opt_set_context_reg(R_028A04_PA_SU_POINT_MINMAX, AC_TRACKED_PA_SU_POINT_MINMAX,
state->pa_su_point_minmax); state->pa_su_point_minmax);
radeon_opt_set_context_reg(R_028A08_PA_SU_LINE_CNTL, SI_TRACKED_PA_SU_LINE_CNTL, radeon_opt_set_context_reg(R_028A08_PA_SU_LINE_CNTL, AC_TRACKED_PA_SU_LINE_CNTL,
state->pa_su_line_cntl); state->pa_su_line_cntl);
radeon_opt_set_context_reg(R_028A48_PA_SC_MODE_CNTL_0, SI_TRACKED_PA_SC_MODE_CNTL_0, radeon_opt_set_context_reg(R_028A48_PA_SC_MODE_CNTL_0, AC_TRACKED_PA_SC_MODE_CNTL_0,
state->pa_sc_mode_cntl_0); state->pa_sc_mode_cntl_0);
radeon_opt_set_context_reg(R_028814_PA_SU_SC_MODE_CNTL, radeon_opt_set_context_reg(R_028814_PA_SU_SC_MODE_CNTL,
SI_TRACKED_PA_SU_SC_MODE_CNTL, pa_su_sc_mode_cntl); AC_TRACKED_PA_SU_SC_MODE_CNTL, pa_su_sc_mode_cntl);
if (sctx->gfx_level >= GFX10) { if (sctx->gfx_level >= GFX10) {
radeon_opt_set_context_reg(R_028838_PA_CL_NGG_CNTL, SI_TRACKED_PA_CL_NGG_CNTL, radeon_opt_set_context_reg(R_028838_PA_CL_NGG_CNTL, AC_TRACKED_PA_CL_NGG_CNTL,
state->pa_cl_ngg_cntl); state->pa_cl_ngg_cntl);
} }
radeon_opt_set_context_reg(R_028230_PA_SC_EDGERULE, SI_TRACKED_PA_SC_EDGERULE, radeon_opt_set_context_reg(R_028230_PA_SC_EDGERULE, AC_TRACKED_PA_SC_EDGERULE,
state->pa_sc_edgerule); state->pa_sc_edgerule);
if (state->uses_poly_offset && sctx->framebuffer.state.zsbuf.texture) { if (state->uses_poly_offset && sctx->framebuffer.state.zsbuf.texture) {
@ -1312,7 +1312,7 @@ static void si_pm4_emit_rasterizer(struct si_context *sctx, unsigned index)
((struct si_surface *)sctx->framebuffer.fb_zsbuf)->db_format_index; ((struct si_surface *)sctx->framebuffer.fb_zsbuf)->db_format_index;
radeon_opt_set_context_reg6(R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, radeon_opt_set_context_reg6(R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
SI_TRACKED_PA_SU_POLY_OFFSET_DB_FMT_CNTL, AC_TRACKED_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
state->pa_su_poly_offset_db_fmt_cntl[db_format_index], state->pa_su_poly_offset_db_fmt_cntl[db_format_index],
state->pa_su_poly_offset_clamp, state->pa_su_poly_offset_clamp,
state->pa_su_poly_offset_frontback_scale, state->pa_su_poly_offset_frontback_scale,
@ -1642,22 +1642,22 @@ static void si_pm4_emit_dsa(struct si_context *sctx, unsigned index)
if (sctx->gfx_level >= GFX12) { if (sctx->gfx_level >= GFX12) {
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
gfx12_begin_context_regs(); gfx12_begin_context_regs();
gfx12_opt_set_context_reg(R_02800C_DB_RENDER_OVERRIDE, SI_TRACKED_DB_RENDER_OVERRIDE, gfx12_opt_set_context_reg(R_02800C_DB_RENDER_OVERRIDE, AC_TRACKED_DB_RENDER_OVERRIDE,
state->db_render_override); state->db_render_override);
gfx12_opt_set_context_reg(R_028070_DB_DEPTH_CONTROL, SI_TRACKED_DB_DEPTH_CONTROL, gfx12_opt_set_context_reg(R_028070_DB_DEPTH_CONTROL, AC_TRACKED_DB_DEPTH_CONTROL,
state->db_depth_control); state->db_depth_control);
if (state->stencil_enabled) { if (state->stencil_enabled) {
gfx12_opt_set_context_reg(R_028074_DB_STENCIL_CONTROL, SI_TRACKED_DB_STENCIL_CONTROL, gfx12_opt_set_context_reg(R_028074_DB_STENCIL_CONTROL, AC_TRACKED_DB_STENCIL_CONTROL,
state->db_stencil_control); state->db_stencil_control);
gfx12_opt_set_context_reg(R_028090_DB_STENCIL_READ_MASK, SI_TRACKED_DB_STENCIL_READ_MASK, gfx12_opt_set_context_reg(R_028090_DB_STENCIL_READ_MASK, AC_TRACKED_DB_STENCIL_READ_MASK,
state->db_stencil_read_mask); state->db_stencil_read_mask);
gfx12_opt_set_context_reg(R_028094_DB_STENCIL_WRITE_MASK, SI_TRACKED_DB_STENCIL_WRITE_MASK, gfx12_opt_set_context_reg(R_028094_DB_STENCIL_WRITE_MASK, AC_TRACKED_DB_STENCIL_WRITE_MASK,
state->db_stencil_write_mask); state->db_stencil_write_mask);
} }
if (state->depth_bounds_enabled) { if (state->depth_bounds_enabled) {
gfx12_opt_set_context_reg(R_028050_DB_DEPTH_BOUNDS_MIN, SI_TRACKED_DB_DEPTH_BOUNDS_MIN, gfx12_opt_set_context_reg(R_028050_DB_DEPTH_BOUNDS_MIN, AC_TRACKED_DB_DEPTH_BOUNDS_MIN,
state->db_depth_bounds_min); state->db_depth_bounds_min);
gfx12_opt_set_context_reg(R_028054_DB_DEPTH_BOUNDS_MAX, SI_TRACKED_DB_DEPTH_BOUNDS_MAX, gfx12_opt_set_context_reg(R_028054_DB_DEPTH_BOUNDS_MAX, AC_TRACKED_DB_DEPTH_BOUNDS_MAX,
state->db_depth_bounds_max); state->db_depth_bounds_max);
} }
gfx12_end_context_regs(); gfx12_end_context_regs();
@ -1665,22 +1665,22 @@ static void si_pm4_emit_dsa(struct si_context *sctx, unsigned index)
if (state->alpha_func != PIPE_FUNC_ALWAYS && state->alpha_func != PIPE_FUNC_NEVER) { if (state->alpha_func != PIPE_FUNC_ALWAYS && state->alpha_func != PIPE_FUNC_NEVER) {
gfx12_opt_push_gfx_sh_reg(R_00B030_SPI_SHADER_USER_DATA_PS_0 + SI_SGPR_ALPHA_REF * 4, gfx12_opt_push_gfx_sh_reg(R_00B030_SPI_SHADER_USER_DATA_PS_0 + SI_SGPR_ALPHA_REF * 4,
SI_TRACKED_SPI_SHADER_USER_DATA_PS__ALPHA_REF, AC_TRACKED_SPI_SHADER_USER_DATA_PS__ALPHA_REF,
state->spi_shader_user_data_ps_alpha_ref); state->spi_shader_user_data_ps_alpha_ref);
} }
} else if (sctx->screen->info.has_set_context_pairs_packed) { } else if (sctx->screen->info.has_set_context_pairs_packed) {
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
gfx11_begin_packed_context_regs(); gfx11_begin_packed_context_regs();
gfx11_opt_set_context_reg(R_028800_DB_DEPTH_CONTROL, SI_TRACKED_DB_DEPTH_CONTROL, gfx11_opt_set_context_reg(R_028800_DB_DEPTH_CONTROL, AC_TRACKED_DB_DEPTH_CONTROL,
state->db_depth_control); state->db_depth_control);
if (state->stencil_enabled) { if (state->stencil_enabled) {
gfx11_opt_set_context_reg(R_02842C_DB_STENCIL_CONTROL, SI_TRACKED_DB_STENCIL_CONTROL, gfx11_opt_set_context_reg(R_02842C_DB_STENCIL_CONTROL, AC_TRACKED_DB_STENCIL_CONTROL,
state->db_stencil_control); state->db_stencil_control);
} }
if (state->depth_bounds_enabled) { if (state->depth_bounds_enabled) {
gfx11_opt_set_context_reg(R_028020_DB_DEPTH_BOUNDS_MIN, SI_TRACKED_DB_DEPTH_BOUNDS_MIN, gfx11_opt_set_context_reg(R_028020_DB_DEPTH_BOUNDS_MIN, AC_TRACKED_DB_DEPTH_BOUNDS_MIN,
state->db_depth_bounds_min); state->db_depth_bounds_min);
gfx11_opt_set_context_reg(R_028024_DB_DEPTH_BOUNDS_MAX, SI_TRACKED_DB_DEPTH_BOUNDS_MAX, gfx11_opt_set_context_reg(R_028024_DB_DEPTH_BOUNDS_MAX, AC_TRACKED_DB_DEPTH_BOUNDS_MAX,
state->db_depth_bounds_max); state->db_depth_bounds_max);
} }
gfx11_end_packed_context_regs(); gfx11_end_packed_context_regs();
@ -1688,26 +1688,26 @@ static void si_pm4_emit_dsa(struct si_context *sctx, unsigned index)
if (state->alpha_func != PIPE_FUNC_ALWAYS && state->alpha_func != PIPE_FUNC_NEVER) { if (state->alpha_func != PIPE_FUNC_ALWAYS && state->alpha_func != PIPE_FUNC_NEVER) {
if (sctx->screen->info.has_set_sh_pairs_packed) { if (sctx->screen->info.has_set_sh_pairs_packed) {
gfx11_opt_push_gfx_sh_reg(R_00B030_SPI_SHADER_USER_DATA_PS_0 + SI_SGPR_ALPHA_REF * 4, gfx11_opt_push_gfx_sh_reg(R_00B030_SPI_SHADER_USER_DATA_PS_0 + SI_SGPR_ALPHA_REF * 4,
SI_TRACKED_SPI_SHADER_USER_DATA_PS__ALPHA_REF, AC_TRACKED_SPI_SHADER_USER_DATA_PS__ALPHA_REF,
state->spi_shader_user_data_ps_alpha_ref); state->spi_shader_user_data_ps_alpha_ref);
} else { } else {
radeon_opt_set_sh_reg(R_00B030_SPI_SHADER_USER_DATA_PS_0 + SI_SGPR_ALPHA_REF * 4, radeon_opt_set_sh_reg(R_00B030_SPI_SHADER_USER_DATA_PS_0 + SI_SGPR_ALPHA_REF * 4,
SI_TRACKED_SPI_SHADER_USER_DATA_PS__ALPHA_REF, AC_TRACKED_SPI_SHADER_USER_DATA_PS__ALPHA_REF,
state->spi_shader_user_data_ps_alpha_ref); state->spi_shader_user_data_ps_alpha_ref);
} }
} }
radeon_end(); /* don't track context rolls on GFX11 */ radeon_end(); /* don't track context rolls on GFX11 */
} else { } else {
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg(R_028800_DB_DEPTH_CONTROL, SI_TRACKED_DB_DEPTH_CONTROL, radeon_opt_set_context_reg(R_028800_DB_DEPTH_CONTROL, AC_TRACKED_DB_DEPTH_CONTROL,
state->db_depth_control); state->db_depth_control);
if (state->stencil_enabled) { if (state->stencil_enabled) {
radeon_opt_set_context_reg(R_02842C_DB_STENCIL_CONTROL, SI_TRACKED_DB_STENCIL_CONTROL, radeon_opt_set_context_reg(R_02842C_DB_STENCIL_CONTROL, AC_TRACKED_DB_STENCIL_CONTROL,
state->db_stencil_control); state->db_stencil_control);
} }
if (state->depth_bounds_enabled) { if (state->depth_bounds_enabled) {
radeon_opt_set_context_reg2(R_028020_DB_DEPTH_BOUNDS_MIN, radeon_opt_set_context_reg2(R_028020_DB_DEPTH_BOUNDS_MIN,
SI_TRACKED_DB_DEPTH_BOUNDS_MIN, AC_TRACKED_DB_DEPTH_BOUNDS_MIN,
state->db_depth_bounds_min, state->db_depth_bounds_min,
state->db_depth_bounds_max); state->db_depth_bounds_max);
} }
@ -1716,7 +1716,7 @@ static void si_pm4_emit_dsa(struct si_context *sctx, unsigned index)
if (state->alpha_func != PIPE_FUNC_ALWAYS && state->alpha_func != PIPE_FUNC_NEVER) { if (state->alpha_func != PIPE_FUNC_ALWAYS && state->alpha_func != PIPE_FUNC_NEVER) {
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
radeon_opt_set_sh_reg(R_00B030_SPI_SHADER_USER_DATA_PS_0 + SI_SGPR_ALPHA_REF * 4, radeon_opt_set_sh_reg(R_00B030_SPI_SHADER_USER_DATA_PS_0 + SI_SGPR_ALPHA_REF * 4,
SI_TRACKED_SPI_SHADER_USER_DATA_PS__ALPHA_REF, AC_TRACKED_SPI_SHADER_USER_DATA_PS__ALPHA_REF,
state->spi_shader_user_data_ps_alpha_ref); state->spi_shader_user_data_ps_alpha_ref);
radeon_end(); radeon_end();
} }
@ -1981,49 +1981,49 @@ static void si_emit_db_render_state(struct si_context *sctx, unsigned index)
if (sctx->gfx_level >= GFX12) { if (sctx->gfx_level >= GFX12) {
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
gfx12_begin_context_regs(); gfx12_begin_context_regs();
gfx12_opt_set_context_reg(R_028000_DB_RENDER_CONTROL, SI_TRACKED_DB_RENDER_CONTROL, gfx12_opt_set_context_reg(R_028000_DB_RENDER_CONTROL, AC_TRACKED_DB_RENDER_CONTROL,
db_render_control); db_render_control);
gfx12_opt_set_context_reg(R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2, gfx12_opt_set_context_reg(R_028010_DB_RENDER_OVERRIDE2, AC_TRACKED_DB_RENDER_OVERRIDE2,
S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) | S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) |
S_028010_CENTROID_COMPUTATION_MODE(1)); S_028010_CENTROID_COMPUTATION_MODE(1));
gfx12_opt_set_context_reg(R_028060_DB_COUNT_CONTROL, SI_TRACKED_DB_COUNT_CONTROL, gfx12_opt_set_context_reg(R_028060_DB_COUNT_CONTROL, AC_TRACKED_DB_COUNT_CONTROL,
db_count_control); db_count_control);
gfx12_opt_set_context_reg(R_02806C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL, gfx12_opt_set_context_reg(R_02806C_DB_SHADER_CONTROL, AC_TRACKED_DB_SHADER_CONTROL,
db_shader_control); db_shader_control);
gfx12_opt_set_context_reg(R_0283D0_PA_SC_VRS_OVERRIDE_CNTL, gfx12_opt_set_context_reg(R_0283D0_PA_SC_VRS_OVERRIDE_CNTL,
SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, vrs_override_cntl); AC_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, vrs_override_cntl);
gfx12_end_context_regs(); gfx12_end_context_regs();
radeon_end(); /* don't track context rolls on GFX12 */ radeon_end(); /* don't track context rolls on GFX12 */
} else if (sctx->screen->info.has_set_context_pairs_packed) { } else if (sctx->screen->info.has_set_context_pairs_packed) {
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
gfx11_begin_packed_context_regs(); gfx11_begin_packed_context_regs();
gfx11_opt_set_context_reg(R_028000_DB_RENDER_CONTROL, SI_TRACKED_DB_RENDER_CONTROL, gfx11_opt_set_context_reg(R_028000_DB_RENDER_CONTROL, AC_TRACKED_DB_RENDER_CONTROL,
db_render_control); db_render_control);
gfx11_opt_set_context_reg(R_028004_DB_COUNT_CONTROL, SI_TRACKED_DB_COUNT_CONTROL, gfx11_opt_set_context_reg(R_028004_DB_COUNT_CONTROL, AC_TRACKED_DB_COUNT_CONTROL,
db_count_control); db_count_control);
gfx11_opt_set_context_reg(R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2, gfx11_opt_set_context_reg(R_028010_DB_RENDER_OVERRIDE2, AC_TRACKED_DB_RENDER_OVERRIDE2,
db_render_override2); db_render_override2);
gfx11_opt_set_context_reg(R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL, gfx11_opt_set_context_reg(R_02880C_DB_SHADER_CONTROL, AC_TRACKED_DB_SHADER_CONTROL,
db_shader_control); db_shader_control);
gfx11_opt_set_context_reg(R_0283D0_PA_SC_VRS_OVERRIDE_CNTL, gfx11_opt_set_context_reg(R_0283D0_PA_SC_VRS_OVERRIDE_CNTL,
SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, vrs_override_cntl); AC_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, vrs_override_cntl);
gfx11_end_packed_context_regs(); gfx11_end_packed_context_regs();
radeon_end(); /* don't track context rolls on GFX11 */ radeon_end(); /* don't track context rolls on GFX11 */
} else { } else {
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg2(R_028000_DB_RENDER_CONTROL, SI_TRACKED_DB_RENDER_CONTROL, radeon_opt_set_context_reg2(R_028000_DB_RENDER_CONTROL, AC_TRACKED_DB_RENDER_CONTROL,
db_render_control, db_count_control); db_render_control, db_count_control);
radeon_opt_set_context_reg(R_028010_DB_RENDER_OVERRIDE2, radeon_opt_set_context_reg(R_028010_DB_RENDER_OVERRIDE2,
SI_TRACKED_DB_RENDER_OVERRIDE2, db_render_override2); AC_TRACKED_DB_RENDER_OVERRIDE2, db_render_override2);
radeon_opt_set_context_reg(R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL, radeon_opt_set_context_reg(R_02880C_DB_SHADER_CONTROL, AC_TRACKED_DB_SHADER_CONTROL,
db_shader_control); db_shader_control);
if (sctx->gfx_level >= GFX11) { if (sctx->gfx_level >= GFX11) {
radeon_opt_set_context_reg(R_0283D0_PA_SC_VRS_OVERRIDE_CNTL, radeon_opt_set_context_reg(R_0283D0_PA_SC_VRS_OVERRIDE_CNTL,
SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, vrs_override_cntl); AC_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, vrs_override_cntl);
} else if (sctx->gfx_level >= GFX10_3) { } else if (sctx->gfx_level >= GFX10_3) {
radeon_opt_set_context_reg(R_028064_DB_VRS_OVERRIDE_CNTL, radeon_opt_set_context_reg(R_028064_DB_VRS_OVERRIDE_CNTL,
SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, vrs_override_cntl); AC_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, vrs_override_cntl);
} }
radeon_end_update_context_roll(); radeon_end_update_context_roll();
} }
@ -3585,33 +3585,33 @@ static void si_emit_msaa_config(struct si_context *sctx, unsigned index)
if (sctx->gfx_level >= GFX12) { if (sctx->gfx_level >= GFX12) {
radeon_begin(cs); radeon_begin(cs);
gfx12_begin_context_regs(); gfx12_begin_context_regs();
gfx12_opt_set_context_reg(R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL, gfx12_opt_set_context_reg(R_028BDC_PA_SC_LINE_CNTL, AC_TRACKED_PA_SC_LINE_CNTL,
sc_line_cntl); sc_line_cntl);
gfx12_opt_set_context_reg(R_028BE0_PA_SC_AA_CONFIG, SI_TRACKED_PA_SC_AA_CONFIG, gfx12_opt_set_context_reg(R_028BE0_PA_SC_AA_CONFIG, AC_TRACKED_PA_SC_AA_CONFIG,
sc_aa_config); sc_aa_config);
gfx12_opt_set_context_reg(R_028078_DB_EQAA, SI_TRACKED_DB_EQAA, db_eqaa); gfx12_opt_set_context_reg(R_028078_DB_EQAA, AC_TRACKED_DB_EQAA, db_eqaa);
gfx12_opt_set_context_reg(R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1, gfx12_opt_set_context_reg(R_028A4C_PA_SC_MODE_CNTL_1, AC_TRACKED_PA_SC_MODE_CNTL_1,
sc_mode_cntl_1); sc_mode_cntl_1);
gfx12_end_context_regs(); gfx12_end_context_regs();
radeon_end(); /* don't track context rolls on GFX12 */ radeon_end(); /* don't track context rolls on GFX12 */
} else if (sctx->screen->info.has_set_context_pairs_packed) { } else if (sctx->screen->info.has_set_context_pairs_packed) {
radeon_begin(cs); radeon_begin(cs);
gfx11_begin_packed_context_regs(); gfx11_begin_packed_context_regs();
gfx11_opt_set_context_reg(R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL, gfx11_opt_set_context_reg(R_028BDC_PA_SC_LINE_CNTL, AC_TRACKED_PA_SC_LINE_CNTL,
sc_line_cntl); sc_line_cntl);
gfx11_opt_set_context_reg(R_028BE0_PA_SC_AA_CONFIG, SI_TRACKED_PA_SC_AA_CONFIG, gfx11_opt_set_context_reg(R_028BE0_PA_SC_AA_CONFIG, AC_TRACKED_PA_SC_AA_CONFIG,
sc_aa_config); sc_aa_config);
gfx11_opt_set_context_reg(R_028804_DB_EQAA, SI_TRACKED_DB_EQAA, db_eqaa); gfx11_opt_set_context_reg(R_028804_DB_EQAA, AC_TRACKED_DB_EQAA, db_eqaa);
gfx11_opt_set_context_reg(R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1, gfx11_opt_set_context_reg(R_028A4C_PA_SC_MODE_CNTL_1, AC_TRACKED_PA_SC_MODE_CNTL_1,
sc_mode_cntl_1); sc_mode_cntl_1);
gfx11_end_packed_context_regs(); gfx11_end_packed_context_regs();
radeon_end(); /* don't track context rolls on GFX11 */ radeon_end(); /* don't track context rolls on GFX11 */
} else { } else {
radeon_begin(cs); radeon_begin(cs);
radeon_opt_set_context_reg2(R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL, radeon_opt_set_context_reg2(R_028BDC_PA_SC_LINE_CNTL, AC_TRACKED_PA_SC_LINE_CNTL,
sc_line_cntl, sc_aa_config); sc_line_cntl, sc_aa_config);
radeon_opt_set_context_reg(R_028804_DB_EQAA, SI_TRACKED_DB_EQAA, db_eqaa); radeon_opt_set_context_reg(R_028804_DB_EQAA, AC_TRACKED_DB_EQAA, db_eqaa);
radeon_opt_set_context_reg(R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1, radeon_opt_set_context_reg(R_028A4C_PA_SC_MODE_CNTL_1, AC_TRACKED_PA_SC_MODE_CNTL_1,
sc_mode_cntl_1); sc_mode_cntl_1);
radeon_end_update_context_roll(); radeon_end_update_context_roll();
} }

View file

@ -11,6 +11,8 @@
#include "util/format/u_format.h" #include "util/format/u_format.h"
#include "util/bitset.h" #include "util/bitset.h"
#include "common/ac_cmdbuf.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
@ -264,180 +266,6 @@ struct si_shader_data {
uint32_t sh_base[SI_NUM_SHADERS]; uint32_t sh_base[SI_NUM_SHADERS];
}; };
/* Registers whose values are tracked by si_context. */
enum si_tracked_reg
{
/* CONTEXT registers. */
/* 2 consecutive registers (GFX6-11), or separate registers (GFX12) */
SI_TRACKED_DB_RENDER_CONTROL,
SI_TRACKED_DB_COUNT_CONTROL,
SI_TRACKED_DB_DEPTH_CONTROL,
SI_TRACKED_DB_STENCIL_CONTROL,
/* 2 consecutive registers */
SI_TRACKED_DB_DEPTH_BOUNDS_MIN,
SI_TRACKED_DB_DEPTH_BOUNDS_MAX,
SI_TRACKED_SPI_INTERP_CONTROL_0,
SI_TRACKED_PA_SU_POINT_SIZE,
SI_TRACKED_PA_SU_POINT_MINMAX,
SI_TRACKED_PA_SU_LINE_CNTL,
SI_TRACKED_PA_SC_MODE_CNTL_0,
SI_TRACKED_PA_SU_SC_MODE_CNTL,
SI_TRACKED_PA_SC_EDGERULE,
/* 6 consecutive registers */
SI_TRACKED_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
SI_TRACKED_PA_SU_POLY_OFFSET_CLAMP,
SI_TRACKED_PA_SU_POLY_OFFSET_FRONT_SCALE,
SI_TRACKED_PA_SU_POLY_OFFSET_FRONT_OFFSET,
SI_TRACKED_PA_SU_POLY_OFFSET_BACK_SCALE,
SI_TRACKED_PA_SU_POLY_OFFSET_BACK_OFFSET,
/* 2 consecutive registers */
SI_TRACKED_PA_SC_LINE_CNTL,
SI_TRACKED_PA_SC_AA_CONFIG,
/* 5 consecutive registers (GFX6-11) */
SI_TRACKED_PA_SU_VTX_CNTL,
/* 4 consecutive registers (GFX12) */
SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ,
SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ,
SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ,
SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ,
/* Non-consecutive register */
SI_TRACKED_SPI_SHADER_POS_FORMAT,
/* 5 consecutive registers (GFX12), or 2 consecutive registers (GFX6-11) */
SI_TRACKED_SPI_SHADER_Z_FORMAT,
SI_TRACKED_SPI_SHADER_COL_FORMAT,
/* 2 consecutive registers. */
SI_TRACKED_SPI_PS_INPUT_ENA,
SI_TRACKED_SPI_PS_INPUT_ADDR,
SI_TRACKED_DB_EQAA,
SI_TRACKED_DB_RENDER_OVERRIDE2,
SI_TRACKED_DB_SHADER_CONTROL,
SI_TRACKED_CB_SHADER_MASK,
SI_TRACKED_CB_TARGET_MASK,
SI_TRACKED_PA_CL_CLIP_CNTL,
SI_TRACKED_PA_CL_VS_OUT_CNTL,
SI_TRACKED_PA_CL_VTE_CNTL,
SI_TRACKED_PA_SC_CLIPRECT_RULE,
SI_TRACKED_PA_SC_LINE_STIPPLE,
SI_TRACKED_PA_SC_MODE_CNTL_1,
SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
SI_TRACKED_SPI_PS_IN_CONTROL,
SI_TRACKED_VGT_GS_INSTANCE_CNT,
SI_TRACKED_VGT_GS_MAX_VERT_OUT,
SI_TRACKED_VGT_SHADER_STAGES_EN,
SI_TRACKED_VGT_LS_HS_CONFIG,
SI_TRACKED_VGT_TF_PARAM,
SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL, /* GFX8-9 (only with has_small_prim_filter_sample_loc_bug) */
SI_TRACKED_PA_SC_BINNER_CNTL_0, /* GFX9+ */
SI_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP, /* GFX10+ - the SMALL_PRIM_FILTER slot above can be reused */
SI_TRACKED_GE_NGG_SUBGRP_CNTL, /* GFX10+ */
SI_TRACKED_PA_CL_NGG_CNTL, /* GFX10+ */
SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, /* GFX10.3+ */
/* 3 consecutive registers */
SI_TRACKED_SX_PS_DOWNCONVERT, /* GFX8+ */
SI_TRACKED_SX_BLEND_OPT_EPSILON, /* GFX8+ */
SI_TRACKED_SX_BLEND_OPT_CONTROL, /* GFX8+ */
/* The slots below can be reused by other generations. */
SI_TRACKED_VGT_ESGS_RING_ITEMSIZE, /* GFX6-8 (GFX9+ can reuse this slot) */
SI_TRACKED_VGT_REUSE_OFF, /* GFX6-8,10.3 */
SI_TRACKED_IA_MULTI_VGT_PARAM, /* GFX6-8 (GFX9+ can reuse this slot) */
SI_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP, /* GFX9 - the slots above can be reused */
SI_TRACKED_VGT_GS_ONCHIP_CNTL, /* GFX9-10 - the slots above can be reused */
SI_TRACKED_VGT_GSVS_RING_ITEMSIZE, /* GFX6-10 (GFX11+ can reuse this slot) */
SI_TRACKED_VGT_GS_MODE, /* GFX6-10 (GFX11+ can reuse this slot) */
SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, /* GFX6-10 (GFX11+ can reuse this slot) */
SI_TRACKED_VGT_GS_OUT_PRIM_TYPE, /* GFX6-10 (GFX11+ can reuse this slot) */
/* 3 consecutive registers */
SI_TRACKED_VGT_GSVS_RING_OFFSET_1, /* GFX6-10 (GFX11+ can reuse this slot) */
SI_TRACKED_VGT_GSVS_RING_OFFSET_2, /* GFX6-10 (GFX11+ can reuse this slot) */
SI_TRACKED_VGT_GSVS_RING_OFFSET_3, /* GFX6-10 (GFX11+ can reuse this slot) */
/* 4 consecutive registers */
SI_TRACKED_VGT_GS_VERT_ITEMSIZE, /* GFX6-10 (GFX11+ can reuse this slot) */
SI_TRACKED_VGT_GS_VERT_ITEMSIZE_1, /* GFX6-10 (GFX11+ can reuse this slot) */
SI_TRACKED_VGT_GS_VERT_ITEMSIZE_2, /* GFX6-10 (GFX11+ can reuse this slot) */
SI_TRACKED_VGT_GS_VERT_ITEMSIZE_3, /* GFX6-10 (GFX11+ can reuse this slot) */
SI_TRACKED_SPI_VS_OUT_CONFIG, /* GFX6-11 */
SI_TRACKED_DB_RENDER_OVERRIDE = SI_TRACKED_SPI_VS_OUT_CONFIG, /* GFX12+ (slot reused) */
SI_TRACKED_VGT_PRIMITIVEID_EN, /* GFX6-11 */
SI_TRACKED_CB_DCC_CONTROL, /* GFX8-11 */
SI_TRACKED_DB_STENCIL_READ_MASK, /* GFX12+ */
SI_TRACKED_DB_STENCIL_WRITE_MASK, /* GFX12+ */
SI_TRACKED_PA_SC_HISZ_CONTROL, /* GFX12+ */
SI_TRACKED_PA_SC_LINE_STIPPLE_RESET, /* GFX12+ */
SI_NUM_TRACKED_CONTEXT_REGS,
SI_FIRST_TRACKED_OTHER_REG = SI_NUM_TRACKED_CONTEXT_REGS,
/* SH and UCONFIG registers. */
SI_TRACKED_GE_PC_ALLOC = SI_FIRST_TRACKED_OTHER_REG, /* GFX10-11 */
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS, /* GFX7-11 */
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS, /* GFX10+ */
SI_TRACKED_VGT_GS_OUT_PRIM_TYPE_UCONFIG, /* GFX11+ */
SI_TRACKED_SPI_SHADER_GS_OUT_CONFIG_PS, /* GFX12+ */
SI_TRACKED_VGT_PRIMITIVEID_EN_UCONFIG, /* GFX12+ */
SI_TRACKED_IA_MULTI_VGT_PARAM_UCONFIG, /* GFX9 only */
SI_TRACKED_GE_CNTL = SI_TRACKED_IA_MULTI_VGT_PARAM_UCONFIG, /* GFX10+ */
SI_TRACKED_SPI_SHADER_PGM_RSRC2_HS, /* GFX9+ (not tracked on previous chips) */
SI_TRACKED_SPI_SHADER_USER_DATA_PS__ALPHA_REF,
/* 3 consecutive registers. */
SI_TRACKED_SPI_SHADER_USER_DATA_HS__TCS_OFFCHIP_LAYOUT,
SI_TRACKED_SPI_SHADER_USER_DATA_HS__TCS_OFFCHIP_ADDR,
SI_TRACKED_SPI_SHADER_USER_DATA_HS__VS_STATE_BITS, /* GFX6-8 */
SI_TRACKED_SPI_SHADER_USER_DATA_LS__BASE_VERTEX,
SI_TRACKED_SPI_SHADER_USER_DATA_LS__DRAWID,
SI_TRACKED_SPI_SHADER_USER_DATA_LS__START_INSTANCE,
SI_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX,
SI_TRACKED_SPI_SHADER_USER_DATA_ES__DRAWID,
SI_TRACKED_SPI_SHADER_USER_DATA_ES__START_INSTANCE,
SI_TRACKED_SPI_SHADER_USER_DATA_VS__BASE_VERTEX, /* GFX6-10 */
SI_TRACKED_SPI_SHADER_USER_DATA_VS__DRAWID, /* GFX6-10 */
SI_TRACKED_SPI_SHADER_USER_DATA_VS__START_INSTANCE, /* GFX6-10 */
SI_TRACKED_COMPUTE_RESOURCE_LIMITS,
SI_TRACKED_COMPUTE_DISPATCH_INTERLEAVE, /* GFX12+ (not tracked on previous chips) */
SI_TRACKED_COMPUTE_NUM_THREAD_X,
SI_TRACKED_COMPUTE_NUM_THREAD_Y,
SI_TRACKED_COMPUTE_NUM_THREAD_Z,
SI_TRACKED_COMPUTE_TMPRING_SIZE,
SI_TRACKED_COMPUTE_PGM_RSRC3, /* GFX11+ */
/* 2 consecutive registers. */
SI_TRACKED_COMPUTE_PGM_RSRC1,
SI_TRACKED_COMPUTE_PGM_RSRC2,
/* 2 consecutive registers. */
SI_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_LO, /* GFX11+ */
SI_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_HI, /* GFX11+ */
/* 3 consecutive registers. */
SI_TRACKED_SPI_SHADER_GS_MESHLET_DIM, /* GFX11+ */
SI_TRACKED_SPI_SHADER_GS_MESHLET_EXP_ALLOC, /* GFX11+ */
SI_TRACKED_SPI_SHADER_GS_MESHLET_CTRL, /* GFX12+ */
SI_NUM_ALL_TRACKED_REGS,
};
/* For 3 draw constants: BaseVertex, DrawID, StartInstance */ /* For 3 draw constants: BaseVertex, DrawID, StartInstance */
#define BASEVERTEX_MASK 0x1 #define BASEVERTEX_MASK 0x1
#define DRAWID_MASK 0x2 #define DRAWID_MASK 0x2
@ -446,8 +274,8 @@ enum si_tracked_reg
#define BASEVERTEX_DRAWID_STARTINSTANCE_MASK (BASEVERTEX_MASK | DRAWID_MASK | STARTINSTANCE_MASK) #define BASEVERTEX_DRAWID_STARTINSTANCE_MASK (BASEVERTEX_MASK | DRAWID_MASK | STARTINSTANCE_MASK)
struct si_tracked_regs { struct si_tracked_regs {
BITSET_DECLARE(reg_saved_mask, SI_NUM_ALL_TRACKED_REGS); BITSET_DECLARE(reg_saved_mask, AC_NUM_ALL_TRACKED_REGS);
uint32_t reg_value[SI_NUM_ALL_TRACKED_REGS]; uint32_t reg_value[AC_NUM_ALL_TRACKED_REGS];
uint32_t spi_ps_input_cntl[32]; uint32_t spi_ps_input_cntl[32];
}; };

View file

@ -394,7 +394,7 @@ static void si_emit_dpbb_disable(struct si_context *sctx)
struct uvec2 bin_size = {128, 128}; struct uvec2 bin_size = {128, 128};
radeon_opt_set_context_reg(R_028C44_PA_SC_BINNER_CNTL_0, radeon_opt_set_context_reg(R_028C44_PA_SC_BINNER_CNTL_0,
SI_TRACKED_PA_SC_BINNER_CNTL_0, AC_TRACKED_PA_SC_BINNER_CNTL_0,
S_028C44_BINNING_MODE(V_028C44_BINNING_DISABLED) | S_028C44_BINNING_MODE(V_028C44_BINNING_DISABLED) |
S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(bin_size.x) - 5) | S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(bin_size.x) - 5) |
S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(bin_size.y) - 5) | S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(bin_size.y) - 5) |
@ -418,7 +418,7 @@ static void si_emit_dpbb_disable(struct si_context *sctx)
bin_size_extend.y = util_logbase2(bin_size.y) - 5; bin_size_extend.y = util_logbase2(bin_size.y) - 5;
radeon_opt_set_context_reg(R_028C44_PA_SC_BINNER_CNTL_0, radeon_opt_set_context_reg(R_028C44_PA_SC_BINNER_CNTL_0,
SI_TRACKED_PA_SC_BINNER_CNTL_0, AC_TRACKED_PA_SC_BINNER_CNTL_0,
S_028C44_BINNING_MODE(binning_disabled) | S_028C44_BINNING_MODE(binning_disabled) |
S_028C44_BIN_SIZE_X(bin_size.x == 16) | S_028C44_BIN_SIZE_X(bin_size.x == 16) |
S_028C44_BIN_SIZE_Y(bin_size.y == 16) | S_028C44_BIN_SIZE_Y(bin_size.y == 16) |
@ -430,7 +430,7 @@ static void si_emit_dpbb_disable(struct si_context *sctx)
S_028C44_FLUSH_ON_BINNING_TRANSITION(1)); S_028C44_FLUSH_ON_BINNING_TRANSITION(1));
} else { } else {
radeon_opt_set_context_reg(R_028C44_PA_SC_BINNER_CNTL_0, radeon_opt_set_context_reg(R_028C44_PA_SC_BINNER_CNTL_0,
SI_TRACKED_PA_SC_BINNER_CNTL_0, AC_TRACKED_PA_SC_BINNER_CNTL_0,
S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) | S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
S_028C44_DISABLE_START_OF_PRIM(1) | S_028C44_DISABLE_START_OF_PRIM(1) |
S_028C44_FLUSH_ON_BINNING_TRANSITION(sctx->family == CHIP_VEGA12 || S_028C44_FLUSH_ON_BINNING_TRANSITION(sctx->family == CHIP_VEGA12 ||
@ -512,7 +512,7 @@ void si_emit_dpbb_state(struct si_context *sctx, unsigned index)
bin_size_extend.y = util_logbase2(bin_size.y) - 5; bin_size_extend.y = util_logbase2(bin_size.y) - 5;
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg(R_028C44_PA_SC_BINNER_CNTL_0, SI_TRACKED_PA_SC_BINNER_CNTL_0, radeon_opt_set_context_reg(R_028C44_PA_SC_BINNER_CNTL_0, AC_TRACKED_PA_SC_BINNER_CNTL_0,
S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) | S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
S_028C44_BIN_SIZE_X(bin_size.x == 16) | S_028C44_BIN_SIZE_X(bin_size.x == 16) |
S_028C44_BIN_SIZE_Y(bin_size.y == 16) | S_028C44_BIN_SIZE_Y(bin_size.y == 16) |

View file

@ -1018,13 +1018,13 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
/* 0 = no reset, 1 = reset per prim, 2 = reset per packet */ /* 0 = no reset, 1 = reset per prim, 2 = reset per packet */
if (GFX_VERSION >= GFX12) { if (GFX_VERSION >= GFX12) {
radeon_opt_set_context_reg(R_028A44_PA_SC_LINE_STIPPLE_RESET, radeon_opt_set_context_reg(R_028A44_PA_SC_LINE_STIPPLE_RESET,
SI_TRACKED_PA_SC_LINE_STIPPLE_RESET, AC_TRACKED_PA_SC_LINE_STIPPLE_RESET,
S_028A44_AUTO_RESET_CNTL(reset_per_prim ? 1 : 2)); S_028A44_AUTO_RESET_CNTL(reset_per_prim ? 1 : 2));
} else { } else {
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
radeon_opt_set_context_reg(R_028A0C_PA_SC_LINE_STIPPLE, radeon_opt_set_context_reg(R_028A0C_PA_SC_LINE_STIPPLE,
SI_TRACKED_PA_SC_LINE_STIPPLE, AC_TRACKED_PA_SC_LINE_STIPPLE,
rs->pa_sc_line_stipple | rs->pa_sc_line_stipple |
S_028A0C_AUTO_RESET_CNTL(reset_per_prim ? 1 : 2)); S_028A0C_AUTO_RESET_CNTL(reset_per_prim ? 1 : 2));
} }
@ -1033,10 +1033,10 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
if (NGG || HAS_GS) { if (NGG || HAS_GS) {
if (GFX_VERSION >= GFX11) { if (GFX_VERSION >= GFX11) {
radeon_opt_set_uconfig_reg(R_030998_VGT_GS_OUT_PRIM_TYPE, radeon_opt_set_uconfig_reg(R_030998_VGT_GS_OUT_PRIM_TYPE,
SI_TRACKED_VGT_GS_OUT_PRIM_TYPE_UCONFIG, sctx->gs_out_prim); AC_TRACKED_VGT_GS_OUT_PRIM_TYPE_UCONFIG, sctx->gs_out_prim);
} else { } else {
radeon_opt_set_context_reg(R_028A6C_VGT_GS_OUT_PRIM_TYPE, radeon_opt_set_context_reg(R_028A6C_VGT_GS_OUT_PRIM_TYPE,
SI_TRACKED_VGT_GS_OUT_PRIM_TYPE, sctx->gs_out_prim); AC_TRACKED_VGT_GS_OUT_PRIM_TYPE, sctx->gs_out_prim);
} }
} }
@ -1178,17 +1178,17 @@ static void si_emit_ia_multi_vgt_param(struct si_context *sctx,
if (GFX_VERSION == GFX9) { if (GFX_VERSION == GFX9) {
/* Workaround for SpecviewPerf13 Catia hang on GFX9. */ /* Workaround for SpecviewPerf13 Catia hang on GFX9. */
if (prim != sctx->last_prim) if (prim != sctx->last_prim)
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, SI_TRACKED_IA_MULTI_VGT_PARAM_UCONFIG); BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, AC_TRACKED_IA_MULTI_VGT_PARAM_UCONFIG);
radeon_opt_set_uconfig_reg_idx(R_030960_IA_MULTI_VGT_PARAM, radeon_opt_set_uconfig_reg_idx(R_030960_IA_MULTI_VGT_PARAM,
SI_TRACKED_IA_MULTI_VGT_PARAM_UCONFIG, AC_TRACKED_IA_MULTI_VGT_PARAM_UCONFIG,
4, ia_multi_vgt_param); 4, ia_multi_vgt_param);
} else if (GFX_VERSION >= GFX7) { } else if (GFX_VERSION >= GFX7) {
radeon_opt_set_context_reg_idx(R_028AA8_IA_MULTI_VGT_PARAM, radeon_opt_set_context_reg_idx(R_028AA8_IA_MULTI_VGT_PARAM,
SI_TRACKED_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); AC_TRACKED_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
} else { } else {
radeon_opt_set_context_reg(R_028AA8_IA_MULTI_VGT_PARAM, radeon_opt_set_context_reg(R_028AA8_IA_MULTI_VGT_PARAM,
SI_TRACKED_IA_MULTI_VGT_PARAM, ia_multi_vgt_param); AC_TRACKED_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
} }
radeon_end(); radeon_end();
} }
@ -1498,9 +1498,9 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
MESA_SHADER_VERTEX); MESA_SHADER_VERTEX);
bool render_cond_bit = sctx->render_cond_enabled; bool render_cond_bit = sctx->render_cond_enabled;
const unsigned tracked_base_vertex_reg = const unsigned tracked_base_vertex_reg =
HAS_TESS ? SI_TRACKED_SPI_SHADER_USER_DATA_LS__BASE_VERTEX : HAS_TESS ? AC_TRACKED_SPI_SHADER_USER_DATA_LS__BASE_VERTEX :
HAS_GS || NGG ? SI_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX : HAS_GS || NGG ? AC_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX :
SI_TRACKED_SPI_SHADER_USER_DATA_VS__BASE_VERTEX; AC_TRACKED_SPI_SHADER_USER_DATA_VS__BASE_VERTEX;
if (!IS_DRAW_VERTEX_STATE && indirect) { if (!IS_DRAW_VERTEX_STATE && indirect) {
assert(num_draws == 1); assert(num_draws == 1);

View file

@ -310,7 +310,7 @@ static void si_emit_sample_locations(struct si_context *sctx, unsigned index)
radeon_begin(cs); radeon_begin(cs);
radeon_opt_set_context_reg(R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, radeon_opt_set_context_reg(R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL, AC_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL,
S_028830_SMALL_PRIM_FILTER_ENABLE(small_prim_filter_enable) | S_028830_SMALL_PRIM_FILTER_ENABLE(small_prim_filter_enable) |
/* Small line culling doesn't work on Polaris10-12. */ /* Small line culling doesn't work on Polaris10-12. */
S_028830_LINE_FILTER_DISABLE(sctx->family <= CHIP_POLARIS12)); S_028830_LINE_FILTER_DISABLE(sctx->family <= CHIP_POLARIS12));

View file

@ -781,16 +781,16 @@ static void si_emit_shader_es(struct si_context *sctx, unsigned index)
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg(R_028AAC_VGT_ESGS_RING_ITEMSIZE, radeon_opt_set_context_reg(R_028AAC_VGT_ESGS_RING_ITEMSIZE,
SI_TRACKED_VGT_ESGS_RING_ITEMSIZE, AC_TRACKED_VGT_ESGS_RING_ITEMSIZE,
shader->selector->info.esgs_vertex_stride / 4); shader->selector->info.esgs_vertex_stride / 4);
if (shader->selector->stage == MESA_SHADER_TESS_EVAL) if (shader->selector->stage == MESA_SHADER_TESS_EVAL)
radeon_opt_set_context_reg(R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM, radeon_opt_set_context_reg(R_028B6C_VGT_TF_PARAM, AC_TRACKED_VGT_TF_PARAM,
shader->vgt_tf_param); shader->vgt_tf_param);
if (shader->vgt_vertex_reuse_block_cntl) if (shader->vgt_vertex_reuse_block_cntl)
radeon_opt_set_context_reg(R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, radeon_opt_set_context_reg(R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, AC_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
shader->vgt_vertex_reuse_block_cntl); shader->vgt_vertex_reuse_block_cntl);
radeon_end_update_context_roll(); radeon_end_update_context_roll();
} }
@ -867,47 +867,47 @@ static void si_emit_shader_gs(struct si_context *sctx, unsigned index)
/* R_028A60_VGT_GSVS_RING_OFFSET_1, R_028A64_VGT_GSVS_RING_OFFSET_2 /* R_028A60_VGT_GSVS_RING_OFFSET_1, R_028A64_VGT_GSVS_RING_OFFSET_2
* R_028A68_VGT_GSVS_RING_OFFSET_3 */ * R_028A68_VGT_GSVS_RING_OFFSET_3 */
radeon_opt_set_context_reg3( radeon_opt_set_context_reg3(
R_028A60_VGT_GSVS_RING_OFFSET_1, SI_TRACKED_VGT_GSVS_RING_OFFSET_1, R_028A60_VGT_GSVS_RING_OFFSET_1, AC_TRACKED_VGT_GSVS_RING_OFFSET_1,
shader->gs.vgt_gsvs_ring_offset_1, shader->gs.vgt_gsvs_ring_offset_2, shader->gs.vgt_gsvs_ring_offset_1, shader->gs.vgt_gsvs_ring_offset_2,
shader->gs.vgt_gsvs_ring_offset_3); shader->gs.vgt_gsvs_ring_offset_3);
/* R_028AB0_VGT_GSVS_RING_ITEMSIZE */ /* R_028AB0_VGT_GSVS_RING_ITEMSIZE */
radeon_opt_set_context_reg(R_028AB0_VGT_GSVS_RING_ITEMSIZE, radeon_opt_set_context_reg(R_028AB0_VGT_GSVS_RING_ITEMSIZE,
SI_TRACKED_VGT_GSVS_RING_ITEMSIZE, AC_TRACKED_VGT_GSVS_RING_ITEMSIZE,
shader->gs.vgt_gsvs_ring_itemsize); shader->gs.vgt_gsvs_ring_itemsize);
/* R_028B38_VGT_GS_MAX_VERT_OUT */ /* R_028B38_VGT_GS_MAX_VERT_OUT */
radeon_opt_set_context_reg(R_028B38_VGT_GS_MAX_VERT_OUT, SI_TRACKED_VGT_GS_MAX_VERT_OUT, radeon_opt_set_context_reg(R_028B38_VGT_GS_MAX_VERT_OUT, AC_TRACKED_VGT_GS_MAX_VERT_OUT,
shader->gs.vgt_gs_max_vert_out); shader->gs.vgt_gs_max_vert_out);
/* R_028B5C_VGT_GS_VERT_ITEMSIZE, R_028B60_VGT_GS_VERT_ITEMSIZE_1 /* R_028B5C_VGT_GS_VERT_ITEMSIZE, R_028B60_VGT_GS_VERT_ITEMSIZE_1
* R_028B64_VGT_GS_VERT_ITEMSIZE_2, R_028B68_VGT_GS_VERT_ITEMSIZE_3 */ * R_028B64_VGT_GS_VERT_ITEMSIZE_2, R_028B68_VGT_GS_VERT_ITEMSIZE_3 */
radeon_opt_set_context_reg4( radeon_opt_set_context_reg4(
R_028B5C_VGT_GS_VERT_ITEMSIZE, SI_TRACKED_VGT_GS_VERT_ITEMSIZE, R_028B5C_VGT_GS_VERT_ITEMSIZE, AC_TRACKED_VGT_GS_VERT_ITEMSIZE,
shader->gs.vgt_gs_vert_itemsize, shader->gs.vgt_gs_vert_itemsize_1, shader->gs.vgt_gs_vert_itemsize, shader->gs.vgt_gs_vert_itemsize_1,
shader->gs.vgt_gs_vert_itemsize_2, shader->gs.vgt_gs_vert_itemsize_3); shader->gs.vgt_gs_vert_itemsize_2, shader->gs.vgt_gs_vert_itemsize_3);
/* R_028B90_VGT_GS_INSTANCE_CNT */ /* R_028B90_VGT_GS_INSTANCE_CNT */
radeon_opt_set_context_reg(R_028B90_VGT_GS_INSTANCE_CNT, SI_TRACKED_VGT_GS_INSTANCE_CNT, radeon_opt_set_context_reg(R_028B90_VGT_GS_INSTANCE_CNT, AC_TRACKED_VGT_GS_INSTANCE_CNT,
shader->gs.vgt_gs_instance_cnt); shader->gs.vgt_gs_instance_cnt);
if (sctx->gfx_level >= GFX9) { if (sctx->gfx_level >= GFX9) {
/* R_028A44_VGT_GS_ONCHIP_CNTL */ /* R_028A44_VGT_GS_ONCHIP_CNTL */
radeon_opt_set_context_reg(R_028A44_VGT_GS_ONCHIP_CNTL, SI_TRACKED_VGT_GS_ONCHIP_CNTL, radeon_opt_set_context_reg(R_028A44_VGT_GS_ONCHIP_CNTL, AC_TRACKED_VGT_GS_ONCHIP_CNTL,
shader->gs.vgt_gs_onchip_cntl); shader->gs.vgt_gs_onchip_cntl);
/* R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP */ /* R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP */
if (sctx->gfx_level == GFX9) { if (sctx->gfx_level == GFX9) {
radeon_opt_set_context_reg(R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, radeon_opt_set_context_reg(R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
SI_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP, AC_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
shader->gs.vgt_gs_max_prims_per_subgroup); shader->gs.vgt_gs_max_prims_per_subgroup);
} }
if (shader->key.ge.part.gs.es->stage == MESA_SHADER_TESS_EVAL) if (shader->key.ge.part.gs.es->stage == MESA_SHADER_TESS_EVAL)
radeon_opt_set_context_reg(R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM, radeon_opt_set_context_reg(R_028B6C_VGT_TF_PARAM, AC_TRACKED_VGT_TF_PARAM,
shader->vgt_tf_param); shader->vgt_tf_param);
if (shader->vgt_vertex_reuse_block_cntl) if (shader->vgt_vertex_reuse_block_cntl)
radeon_opt_set_context_reg(R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, radeon_opt_set_context_reg(R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, AC_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
shader->vgt_vertex_reuse_block_cntl); shader->vgt_vertex_reuse_block_cntl);
} }
radeon_end_update_context_roll(); radeon_end_update_context_roll();
@ -917,22 +917,22 @@ static void si_emit_shader_gs(struct si_context *sctx, unsigned index)
if (sctx->gfx_level >= GFX7) { if (sctx->gfx_level >= GFX7) {
if (sctx->screen->info.uses_kernel_cu_mask) { if (sctx->screen->info.uses_kernel_cu_mask) {
radeon_opt_set_sh_reg_idx(R_00B21C_SPI_SHADER_PGM_RSRC3_GS, radeon_opt_set_sh_reg_idx(R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS, AC_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
3, shader->gs.spi_shader_pgm_rsrc3_gs); 3, shader->gs.spi_shader_pgm_rsrc3_gs);
} else { } else {
radeon_opt_set_sh_reg(R_00B21C_SPI_SHADER_PGM_RSRC3_GS, radeon_opt_set_sh_reg(R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS, AC_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
shader->gs.spi_shader_pgm_rsrc3_gs); shader->gs.spi_shader_pgm_rsrc3_gs);
} }
} }
if (sctx->gfx_level >= GFX10) { if (sctx->gfx_level >= GFX10) {
if (sctx->screen->info.uses_kernel_cu_mask) { if (sctx->screen->info.uses_kernel_cu_mask) {
radeon_opt_set_sh_reg_idx(R_00B204_SPI_SHADER_PGM_RSRC4_GS, radeon_opt_set_sh_reg_idx(R_00B204_SPI_SHADER_PGM_RSRC4_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS, AC_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
3, shader->gs.spi_shader_pgm_rsrc4_gs); 3, shader->gs.spi_shader_pgm_rsrc4_gs);
} else { } else {
radeon_opt_set_sh_reg(R_00B204_SPI_SHADER_PGM_RSRC4_GS, radeon_opt_set_sh_reg(R_00B204_SPI_SHADER_PGM_RSRC4_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS, AC_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
shader->gs.spi_shader_pgm_rsrc4_gs); shader->gs.spi_shader_pgm_rsrc4_gs);
} }
} }
@ -1124,30 +1124,30 @@ static void gfx10_emit_shader_ngg(struct si_context *sctx, unsigned index)
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
if (HAS_TESS) { if (HAS_TESS) {
radeon_opt_set_context_reg(R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM, radeon_opt_set_context_reg(R_028B6C_VGT_TF_PARAM, AC_TRACKED_VGT_TF_PARAM,
shader->vgt_tf_param); shader->vgt_tf_param);
} }
radeon_opt_set_context_reg(R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP, radeon_opt_set_context_reg(R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
SI_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP, AC_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP,
shader->ngg.ge_max_output_per_subgroup); shader->ngg.ge_max_output_per_subgroup);
radeon_opt_set_context_reg(R_028B4C_GE_NGG_SUBGRP_CNTL, SI_TRACKED_GE_NGG_SUBGRP_CNTL, radeon_opt_set_context_reg(R_028B4C_GE_NGG_SUBGRP_CNTL, AC_TRACKED_GE_NGG_SUBGRP_CNTL,
shader->ngg.ge_ngg_subgrp_cntl); shader->ngg.ge_ngg_subgrp_cntl);
radeon_opt_set_context_reg(R_028A84_VGT_PRIMITIVEID_EN, SI_TRACKED_VGT_PRIMITIVEID_EN, radeon_opt_set_context_reg(R_028A84_VGT_PRIMITIVEID_EN, AC_TRACKED_VGT_PRIMITIVEID_EN,
shader->ngg.vgt_primitiveid_en); shader->ngg.vgt_primitiveid_en);
if (sctx->gfx_level < GFX11) { if (sctx->gfx_level < GFX11) {
radeon_opt_set_context_reg(R_028A44_VGT_GS_ONCHIP_CNTL, SI_TRACKED_VGT_GS_ONCHIP_CNTL, radeon_opt_set_context_reg(R_028A44_VGT_GS_ONCHIP_CNTL, AC_TRACKED_VGT_GS_ONCHIP_CNTL,
shader->ngg.vgt_gs_onchip_cntl); shader->ngg.vgt_gs_onchip_cntl);
} }
radeon_opt_set_context_reg(R_028B38_VGT_GS_MAX_VERT_OUT, SI_TRACKED_VGT_GS_MAX_VERT_OUT, radeon_opt_set_context_reg(R_028B38_VGT_GS_MAX_VERT_OUT, AC_TRACKED_VGT_GS_MAX_VERT_OUT,
shader->ngg.vgt_gs_max_vert_out); shader->ngg.vgt_gs_max_vert_out);
radeon_opt_set_context_reg(R_028B90_VGT_GS_INSTANCE_CNT, SI_TRACKED_VGT_GS_INSTANCE_CNT, radeon_opt_set_context_reg(R_028B90_VGT_GS_INSTANCE_CNT, AC_TRACKED_VGT_GS_INSTANCE_CNT,
shader->ngg.vgt_gs_instance_cnt); shader->ngg.vgt_gs_instance_cnt);
radeon_opt_set_context_reg(R_0286C4_SPI_VS_OUT_CONFIG, SI_TRACKED_SPI_VS_OUT_CONFIG, radeon_opt_set_context_reg(R_0286C4_SPI_VS_OUT_CONFIG, AC_TRACKED_SPI_VS_OUT_CONFIG,
shader->ngg.spi_vs_out_config); shader->ngg.spi_vs_out_config);
radeon_opt_set_context_reg(R_02870C_SPI_SHADER_POS_FORMAT, radeon_opt_set_context_reg(R_02870C_SPI_SHADER_POS_FORMAT,
SI_TRACKED_SPI_SHADER_POS_FORMAT, AC_TRACKED_SPI_SHADER_POS_FORMAT,
shader->ngg.spi_shader_pos_format); shader->ngg.spi_shader_pos_format);
radeon_opt_set_context_reg(R_028818_PA_CL_VTE_CNTL, SI_TRACKED_PA_CL_VTE_CNTL, radeon_opt_set_context_reg(R_028818_PA_CL_VTE_CNTL, AC_TRACKED_PA_CL_VTE_CNTL,
shader->ngg.pa_cl_vte_cntl); shader->ngg.pa_cl_vte_cntl);
radeon_end_update_context_roll(); radeon_end_update_context_roll();
@ -1155,20 +1155,20 @@ static void gfx10_emit_shader_ngg(struct si_context *sctx, unsigned index)
radeon_begin_again(&sctx->gfx_cs); radeon_begin_again(&sctx->gfx_cs);
if (sctx->screen->info.uses_kernel_cu_mask) { if (sctx->screen->info.uses_kernel_cu_mask) {
radeon_opt_set_sh_reg_idx(R_00B21C_SPI_SHADER_PGM_RSRC3_GS, radeon_opt_set_sh_reg_idx(R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS, AC_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
3, shader->ngg.spi_shader_pgm_rsrc3_gs); 3, shader->ngg.spi_shader_pgm_rsrc3_gs);
radeon_opt_set_sh_reg_idx(R_00B204_SPI_SHADER_PGM_RSRC4_GS, radeon_opt_set_sh_reg_idx(R_00B204_SPI_SHADER_PGM_RSRC4_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS, AC_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
3, shader->ngg.spi_shader_pgm_rsrc4_gs); 3, shader->ngg.spi_shader_pgm_rsrc4_gs);
} else { } else {
radeon_opt_set_sh_reg(R_00B21C_SPI_SHADER_PGM_RSRC3_GS, radeon_opt_set_sh_reg(R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS, AC_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
shader->ngg.spi_shader_pgm_rsrc3_gs); shader->ngg.spi_shader_pgm_rsrc3_gs);
radeon_opt_set_sh_reg(R_00B204_SPI_SHADER_PGM_RSRC4_GS, radeon_opt_set_sh_reg(R_00B204_SPI_SHADER_PGM_RSRC4_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS, AC_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
shader->ngg.spi_shader_pgm_rsrc4_gs); shader->ngg.spi_shader_pgm_rsrc4_gs);
} }
radeon_opt_set_uconfig_reg(R_030980_GE_PC_ALLOC, SI_TRACKED_GE_PC_ALLOC, radeon_opt_set_uconfig_reg(R_030980_GE_PC_ALLOC, AC_TRACKED_GE_PC_ALLOC,
shader->ngg.ge_pc_alloc); shader->ngg.ge_pc_alloc);
radeon_end(); radeon_end();
} }
@ -1184,69 +1184,69 @@ static void gfx11_dgpu_emit_shader_ngg(struct si_context *sctx, unsigned index)
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
gfx11_begin_packed_context_regs(); gfx11_begin_packed_context_regs();
if (HAS_TESS) { if (HAS_TESS) {
gfx11_opt_set_context_reg(R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM, gfx11_opt_set_context_reg(R_028B6C_VGT_TF_PARAM, AC_TRACKED_VGT_TF_PARAM,
shader->vgt_tf_param); shader->vgt_tf_param);
} }
gfx11_opt_set_context_reg(R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP, gfx11_opt_set_context_reg(R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
SI_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP, AC_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP,
shader->ngg.ge_max_output_per_subgroup); shader->ngg.ge_max_output_per_subgroup);
gfx11_opt_set_context_reg(R_028B4C_GE_NGG_SUBGRP_CNTL, SI_TRACKED_GE_NGG_SUBGRP_CNTL, gfx11_opt_set_context_reg(R_028B4C_GE_NGG_SUBGRP_CNTL, AC_TRACKED_GE_NGG_SUBGRP_CNTL,
shader->ngg.ge_ngg_subgrp_cntl); shader->ngg.ge_ngg_subgrp_cntl);
gfx11_opt_set_context_reg(R_028A84_VGT_PRIMITIVEID_EN, SI_TRACKED_VGT_PRIMITIVEID_EN, gfx11_opt_set_context_reg(R_028A84_VGT_PRIMITIVEID_EN, AC_TRACKED_VGT_PRIMITIVEID_EN,
shader->ngg.vgt_primitiveid_en); shader->ngg.vgt_primitiveid_en);
gfx11_opt_set_context_reg(R_028B38_VGT_GS_MAX_VERT_OUT, SI_TRACKED_VGT_GS_MAX_VERT_OUT, gfx11_opt_set_context_reg(R_028B38_VGT_GS_MAX_VERT_OUT, AC_TRACKED_VGT_GS_MAX_VERT_OUT,
shader->ngg.vgt_gs_max_vert_out); shader->ngg.vgt_gs_max_vert_out);
gfx11_opt_set_context_reg(R_028B90_VGT_GS_INSTANCE_CNT, SI_TRACKED_VGT_GS_INSTANCE_CNT, gfx11_opt_set_context_reg(R_028B90_VGT_GS_INSTANCE_CNT, AC_TRACKED_VGT_GS_INSTANCE_CNT,
shader->ngg.vgt_gs_instance_cnt); shader->ngg.vgt_gs_instance_cnt);
gfx11_opt_set_context_reg(R_0286C4_SPI_VS_OUT_CONFIG, SI_TRACKED_SPI_VS_OUT_CONFIG, gfx11_opt_set_context_reg(R_0286C4_SPI_VS_OUT_CONFIG, AC_TRACKED_SPI_VS_OUT_CONFIG,
shader->ngg.spi_vs_out_config); shader->ngg.spi_vs_out_config);
gfx11_opt_set_context_reg(R_02870C_SPI_SHADER_POS_FORMAT, SI_TRACKED_SPI_SHADER_POS_FORMAT, gfx11_opt_set_context_reg(R_02870C_SPI_SHADER_POS_FORMAT, AC_TRACKED_SPI_SHADER_POS_FORMAT,
shader->ngg.spi_shader_pos_format); shader->ngg.spi_shader_pos_format);
gfx11_opt_set_context_reg(R_028818_PA_CL_VTE_CNTL, SI_TRACKED_PA_CL_VTE_CNTL, gfx11_opt_set_context_reg(R_028818_PA_CL_VTE_CNTL, AC_TRACKED_PA_CL_VTE_CNTL,
shader->ngg.pa_cl_vte_cntl); shader->ngg.pa_cl_vte_cntl);
gfx11_end_packed_context_regs(); gfx11_end_packed_context_regs();
assert(!sctx->screen->info.uses_kernel_cu_mask); assert(!sctx->screen->info.uses_kernel_cu_mask);
if (sctx->screen->info.has_set_sh_pairs_packed) { if (sctx->screen->info.has_set_sh_pairs_packed) {
gfx11_opt_push_gfx_sh_reg(R_00B21C_SPI_SHADER_PGM_RSRC3_GS, gfx11_opt_push_gfx_sh_reg(R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS, AC_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
shader->ngg.spi_shader_pgm_rsrc3_gs); shader->ngg.spi_shader_pgm_rsrc3_gs);
gfx11_opt_push_gfx_sh_reg(R_00B204_SPI_SHADER_PGM_RSRC4_GS, gfx11_opt_push_gfx_sh_reg(R_00B204_SPI_SHADER_PGM_RSRC4_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS, AC_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
shader->ngg.spi_shader_pgm_rsrc4_gs); shader->ngg.spi_shader_pgm_rsrc4_gs);
if (HAS_MS) { if (HAS_MS) {
gfx11_opt_push_gfx_sh_reg(R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, gfx11_opt_push_gfx_sh_reg(R_00B2B0_SPI_SHADER_GS_MESHLET_DIM,
SI_TRACKED_SPI_SHADER_GS_MESHLET_DIM, AC_TRACKED_SPI_SHADER_GS_MESHLET_DIM,
shader->ngg.spi_shader_gs_meshlet_dim); shader->ngg.spi_shader_gs_meshlet_dim);
gfx11_opt_push_gfx_sh_reg(R_00B2B4_SPI_SHADER_GS_MESHLET_EXP_ALLOC, gfx11_opt_push_gfx_sh_reg(R_00B2B4_SPI_SHADER_GS_MESHLET_EXP_ALLOC,
SI_TRACKED_SPI_SHADER_GS_MESHLET_EXP_ALLOC, AC_TRACKED_SPI_SHADER_GS_MESHLET_EXP_ALLOC,
shader->ngg.spi_shader_gs_meshlet_exp_alloc); shader->ngg.spi_shader_gs_meshlet_exp_alloc);
} }
} else { } else {
if (sctx->screen->info.uses_kernel_cu_mask) { if (sctx->screen->info.uses_kernel_cu_mask) {
radeon_opt_set_sh_reg_idx(R_00B21C_SPI_SHADER_PGM_RSRC3_GS, radeon_opt_set_sh_reg_idx(R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS, AC_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
3, shader->ngg.spi_shader_pgm_rsrc3_gs); 3, shader->ngg.spi_shader_pgm_rsrc3_gs);
radeon_opt_set_sh_reg_idx(R_00B204_SPI_SHADER_PGM_RSRC4_GS, radeon_opt_set_sh_reg_idx(R_00B204_SPI_SHADER_PGM_RSRC4_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS, AC_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
3, shader->ngg.spi_shader_pgm_rsrc4_gs); 3, shader->ngg.spi_shader_pgm_rsrc4_gs);
} else { } else {
radeon_opt_set_sh_reg(R_00B21C_SPI_SHADER_PGM_RSRC3_GS, radeon_opt_set_sh_reg(R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS, AC_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
shader->ngg.spi_shader_pgm_rsrc3_gs); shader->ngg.spi_shader_pgm_rsrc3_gs);
radeon_opt_set_sh_reg(R_00B204_SPI_SHADER_PGM_RSRC4_GS, radeon_opt_set_sh_reg(R_00B204_SPI_SHADER_PGM_RSRC4_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS, AC_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
shader->ngg.spi_shader_pgm_rsrc4_gs); shader->ngg.spi_shader_pgm_rsrc4_gs);
} }
if (HAS_MS) { if (HAS_MS) {
radeon_opt_set_sh_reg2(R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, radeon_opt_set_sh_reg2(R_00B2B0_SPI_SHADER_GS_MESHLET_DIM,
SI_TRACKED_SPI_SHADER_GS_MESHLET_DIM, AC_TRACKED_SPI_SHADER_GS_MESHLET_DIM,
shader->ngg.spi_shader_gs_meshlet_dim, shader->ngg.spi_shader_gs_meshlet_dim,
shader->ngg.spi_shader_gs_meshlet_exp_alloc); shader->ngg.spi_shader_gs_meshlet_exp_alloc);
} }
} }
radeon_opt_set_uconfig_reg(R_030980_GE_PC_ALLOC, SI_TRACKED_GE_PC_ALLOC, radeon_opt_set_uconfig_reg(R_030980_GE_PC_ALLOC, AC_TRACKED_GE_PC_ALLOC,
shader->ngg.ge_pc_alloc); shader->ngg.ge_pc_alloc);
radeon_end(); radeon_end();
} }
@ -1262,42 +1262,42 @@ static void gfx12_emit_shader_ngg(struct si_context *sctx, unsigned index)
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
gfx12_begin_context_regs(); gfx12_begin_context_regs();
if (HAS_TESS) { if (HAS_TESS) {
gfx12_opt_set_context_reg(R_028AA4_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM, gfx12_opt_set_context_reg(R_028AA4_VGT_TF_PARAM, AC_TRACKED_VGT_TF_PARAM,
shader->vgt_tf_param); shader->vgt_tf_param);
} }
gfx12_opt_set_context_reg(R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP, gfx12_opt_set_context_reg(R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
SI_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP, AC_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP,
shader->ngg.ge_max_output_per_subgroup); shader->ngg.ge_max_output_per_subgroup);
gfx12_opt_set_context_reg(R_028B4C_GE_NGG_SUBGRP_CNTL, SI_TRACKED_GE_NGG_SUBGRP_CNTL, gfx12_opt_set_context_reg(R_028B4C_GE_NGG_SUBGRP_CNTL, AC_TRACKED_GE_NGG_SUBGRP_CNTL,
shader->ngg.ge_ngg_subgrp_cntl); shader->ngg.ge_ngg_subgrp_cntl);
gfx12_opt_set_context_reg(R_028B38_VGT_GS_MAX_VERT_OUT, SI_TRACKED_VGT_GS_MAX_VERT_OUT, gfx12_opt_set_context_reg(R_028B38_VGT_GS_MAX_VERT_OUT, AC_TRACKED_VGT_GS_MAX_VERT_OUT,
shader->ngg.vgt_gs_max_vert_out); shader->ngg.vgt_gs_max_vert_out);
gfx12_opt_set_context_reg(R_028B3C_VGT_GS_INSTANCE_CNT, SI_TRACKED_VGT_GS_INSTANCE_CNT, gfx12_opt_set_context_reg(R_028B3C_VGT_GS_INSTANCE_CNT, AC_TRACKED_VGT_GS_INSTANCE_CNT,
shader->ngg.vgt_gs_instance_cnt); shader->ngg.vgt_gs_instance_cnt);
gfx12_opt_set_context_reg(R_02864C_SPI_SHADER_POS_FORMAT, SI_TRACKED_SPI_SHADER_POS_FORMAT, gfx12_opt_set_context_reg(R_02864C_SPI_SHADER_POS_FORMAT, AC_TRACKED_SPI_SHADER_POS_FORMAT,
shader->ngg.spi_shader_pos_format); shader->ngg.spi_shader_pos_format);
gfx12_opt_set_context_reg(R_028814_PA_CL_VTE_CNTL, SI_TRACKED_PA_CL_VTE_CNTL, gfx12_opt_set_context_reg(R_028814_PA_CL_VTE_CNTL, AC_TRACKED_PA_CL_VTE_CNTL,
shader->ngg.pa_cl_vte_cntl); shader->ngg.pa_cl_vte_cntl);
gfx12_end_context_regs(); gfx12_end_context_regs();
radeon_opt_set_uconfig_reg(R_030988_VGT_PRIMITIVEID_EN, radeon_opt_set_uconfig_reg(R_030988_VGT_PRIMITIVEID_EN,
SI_TRACKED_VGT_PRIMITIVEID_EN_UCONFIG, AC_TRACKED_VGT_PRIMITIVEID_EN_UCONFIG,
shader->ngg.vgt_primitiveid_en); shader->ngg.vgt_primitiveid_en);
radeon_end(); /* don't track context rolls on GFX12 */ radeon_end(); /* don't track context rolls on GFX12 */
assert(!sctx->screen->info.uses_kernel_cu_mask); assert(!sctx->screen->info.uses_kernel_cu_mask);
gfx12_opt_push_gfx_sh_reg(R_00B220_SPI_SHADER_PGM_RSRC4_GS, gfx12_opt_push_gfx_sh_reg(R_00B220_SPI_SHADER_PGM_RSRC4_GS,
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS, AC_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
shader->ngg.spi_shader_pgm_rsrc4_gs); shader->ngg.spi_shader_pgm_rsrc4_gs);
if (HAS_MS) { if (HAS_MS) {
gfx12_opt_push_gfx_sh_reg(R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, gfx12_opt_push_gfx_sh_reg(R_00B2B0_SPI_SHADER_GS_MESHLET_DIM,
SI_TRACKED_SPI_SHADER_GS_MESHLET_DIM, AC_TRACKED_SPI_SHADER_GS_MESHLET_DIM,
shader->ngg.spi_shader_gs_meshlet_dim); shader->ngg.spi_shader_gs_meshlet_dim);
gfx12_opt_push_gfx_sh_reg(R_00B2B4_SPI_SHADER_GS_MESHLET_EXP_ALLOC, gfx12_opt_push_gfx_sh_reg(R_00B2B4_SPI_SHADER_GS_MESHLET_EXP_ALLOC,
SI_TRACKED_SPI_SHADER_GS_MESHLET_EXP_ALLOC, AC_TRACKED_SPI_SHADER_GS_MESHLET_EXP_ALLOC,
shader->ngg.spi_shader_gs_meshlet_exp_alloc); shader->ngg.spi_shader_gs_meshlet_exp_alloc);
gfx12_opt_push_gfx_sh_reg(R_00B2B8_SPI_SHADER_GS_MESHLET_CTRL, gfx12_opt_push_gfx_sh_reg(R_00B2B8_SPI_SHADER_GS_MESHLET_CTRL,
SI_TRACKED_SPI_SHADER_GS_MESHLET_CTRL, AC_TRACKED_SPI_SHADER_GS_MESHLET_CTRL,
shader->ngg.spi_shader_gs_meshlet_ctrl); shader->ngg.spi_shader_gs_meshlet_ctrl);
} }
} }
@ -1794,39 +1794,39 @@ static void si_emit_shader_vs(struct si_context *sctx, unsigned index)
struct si_shader *shader = sctx->queued.named.vs; struct si_shader *shader = sctx->queued.named.vs;
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg(R_028A40_VGT_GS_MODE, SI_TRACKED_VGT_GS_MODE, radeon_opt_set_context_reg(R_028A40_VGT_GS_MODE, AC_TRACKED_VGT_GS_MODE,
shader->vs.vgt_gs_mode); shader->vs.vgt_gs_mode);
radeon_opt_set_context_reg(R_028A84_VGT_PRIMITIVEID_EN, SI_TRACKED_VGT_PRIMITIVEID_EN, radeon_opt_set_context_reg(R_028A84_VGT_PRIMITIVEID_EN, AC_TRACKED_VGT_PRIMITIVEID_EN,
shader->vs.vgt_primitiveid_en); shader->vs.vgt_primitiveid_en);
if (sctx->gfx_level <= GFX8) { if (sctx->gfx_level <= GFX8) {
radeon_opt_set_context_reg(R_028AB4_VGT_REUSE_OFF, SI_TRACKED_VGT_REUSE_OFF, radeon_opt_set_context_reg(R_028AB4_VGT_REUSE_OFF, AC_TRACKED_VGT_REUSE_OFF,
shader->vs.vgt_reuse_off); shader->vs.vgt_reuse_off);
} }
radeon_opt_set_context_reg(R_0286C4_SPI_VS_OUT_CONFIG, SI_TRACKED_SPI_VS_OUT_CONFIG, radeon_opt_set_context_reg(R_0286C4_SPI_VS_OUT_CONFIG, AC_TRACKED_SPI_VS_OUT_CONFIG,
shader->vs.spi_vs_out_config); shader->vs.spi_vs_out_config);
radeon_opt_set_context_reg(R_02870C_SPI_SHADER_POS_FORMAT, radeon_opt_set_context_reg(R_02870C_SPI_SHADER_POS_FORMAT,
SI_TRACKED_SPI_SHADER_POS_FORMAT, AC_TRACKED_SPI_SHADER_POS_FORMAT,
shader->vs.spi_shader_pos_format); shader->vs.spi_shader_pos_format);
radeon_opt_set_context_reg(R_028818_PA_CL_VTE_CNTL, SI_TRACKED_PA_CL_VTE_CNTL, radeon_opt_set_context_reg(R_028818_PA_CL_VTE_CNTL, AC_TRACKED_PA_CL_VTE_CNTL,
shader->vs.pa_cl_vte_cntl); shader->vs.pa_cl_vte_cntl);
if (shader->selector->stage == MESA_SHADER_TESS_EVAL) if (shader->selector->stage == MESA_SHADER_TESS_EVAL)
radeon_opt_set_context_reg(R_028B6C_VGT_TF_PARAM, SI_TRACKED_VGT_TF_PARAM, radeon_opt_set_context_reg(R_028B6C_VGT_TF_PARAM, AC_TRACKED_VGT_TF_PARAM,
shader->vgt_tf_param); shader->vgt_tf_param);
if (shader->vgt_vertex_reuse_block_cntl) if (shader->vgt_vertex_reuse_block_cntl)
radeon_opt_set_context_reg(R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, radeon_opt_set_context_reg(R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, AC_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
shader->vgt_vertex_reuse_block_cntl); shader->vgt_vertex_reuse_block_cntl);
/* Required programming for tessellation. (legacy pipeline only) */ /* Required programming for tessellation. (legacy pipeline only) */
if (sctx->gfx_level >= GFX10 && shader->selector->stage == MESA_SHADER_TESS_EVAL) { if (sctx->gfx_level >= GFX10 && shader->selector->stage == MESA_SHADER_TESS_EVAL) {
radeon_opt_set_context_reg(R_028A44_VGT_GS_ONCHIP_CNTL, radeon_opt_set_context_reg(R_028A44_VGT_GS_ONCHIP_CNTL,
SI_TRACKED_VGT_GS_ONCHIP_CNTL, AC_TRACKED_VGT_GS_ONCHIP_CNTL,
S_028A44_ES_VERTS_PER_SUBGRP(250) | S_028A44_ES_VERTS_PER_SUBGRP(250) |
S_028A44_GS_PRIMS_PER_SUBGRP(126) | S_028A44_GS_PRIMS_PER_SUBGRP(126) |
S_028A44_GS_INST_PRIMS_IN_SUBGRP(126)); S_028A44_GS_INST_PRIMS_IN_SUBGRP(126));
@ -1837,7 +1837,7 @@ static void si_emit_shader_vs(struct si_context *sctx, unsigned index)
/* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */ /* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */
if (sctx->gfx_level >= GFX10) { if (sctx->gfx_level >= GFX10) {
radeon_begin_again(&sctx->gfx_cs); radeon_begin_again(&sctx->gfx_cs);
radeon_opt_set_uconfig_reg(R_030980_GE_PC_ALLOC, SI_TRACKED_GE_PC_ALLOC, radeon_opt_set_uconfig_reg(R_030980_GE_PC_ALLOC, AC_TRACKED_GE_PC_ALLOC,
shader->vs.ge_pc_alloc); shader->vs.ge_pc_alloc);
radeon_end(); radeon_end();
} }
@ -2020,13 +2020,13 @@ static void gfx6_emit_shader_ps(struct si_context *sctx, unsigned index)
struct si_shader *shader = sctx->queued.named.ps; struct si_shader *shader = sctx->queued.named.ps;
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg2(R_0286CC_SPI_PS_INPUT_ENA, SI_TRACKED_SPI_PS_INPUT_ENA, radeon_opt_set_context_reg2(R_0286CC_SPI_PS_INPUT_ENA, AC_TRACKED_SPI_PS_INPUT_ENA,
shader->ps.spi_ps_input_ena, shader->ps.spi_ps_input_ena,
shader->ps.spi_ps_input_addr); shader->ps.spi_ps_input_addr);
radeon_opt_set_context_reg2(R_028710_SPI_SHADER_Z_FORMAT, SI_TRACKED_SPI_SHADER_Z_FORMAT, radeon_opt_set_context_reg2(R_028710_SPI_SHADER_Z_FORMAT, AC_TRACKED_SPI_SHADER_Z_FORMAT,
shader->ps.spi_shader_z_format, shader->ps.spi_shader_z_format,
shader->ps.spi_shader_col_format); shader->ps.spi_shader_col_format);
radeon_opt_set_context_reg(R_02823C_CB_SHADER_MASK, SI_TRACKED_CB_SHADER_MASK, radeon_opt_set_context_reg(R_02823C_CB_SHADER_MASK, AC_TRACKED_CB_SHADER_MASK,
shader->ps.cb_shader_mask); shader->ps.cb_shader_mask);
radeon_end_update_context_roll(); radeon_end_update_context_roll();
} }
@ -2037,15 +2037,15 @@ static void gfx11_dgpu_emit_shader_ps(struct si_context *sctx, unsigned index)
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
gfx11_begin_packed_context_regs(); gfx11_begin_packed_context_regs();
gfx11_opt_set_context_reg(R_0286CC_SPI_PS_INPUT_ENA, SI_TRACKED_SPI_PS_INPUT_ENA, gfx11_opt_set_context_reg(R_0286CC_SPI_PS_INPUT_ENA, AC_TRACKED_SPI_PS_INPUT_ENA,
shader->ps.spi_ps_input_ena); shader->ps.spi_ps_input_ena);
gfx11_opt_set_context_reg(R_0286D0_SPI_PS_INPUT_ADDR, SI_TRACKED_SPI_PS_INPUT_ADDR, gfx11_opt_set_context_reg(R_0286D0_SPI_PS_INPUT_ADDR, AC_TRACKED_SPI_PS_INPUT_ADDR,
shader->ps.spi_ps_input_addr); shader->ps.spi_ps_input_addr);
gfx11_opt_set_context_reg(R_028710_SPI_SHADER_Z_FORMAT, SI_TRACKED_SPI_SHADER_Z_FORMAT, gfx11_opt_set_context_reg(R_028710_SPI_SHADER_Z_FORMAT, AC_TRACKED_SPI_SHADER_Z_FORMAT,
shader->ps.spi_shader_z_format); shader->ps.spi_shader_z_format);
gfx11_opt_set_context_reg(R_028714_SPI_SHADER_COL_FORMAT, SI_TRACKED_SPI_SHADER_COL_FORMAT, gfx11_opt_set_context_reg(R_028714_SPI_SHADER_COL_FORMAT, AC_TRACKED_SPI_SHADER_COL_FORMAT,
shader->ps.spi_shader_col_format); shader->ps.spi_shader_col_format);
gfx11_opt_set_context_reg(R_02823C_CB_SHADER_MASK, SI_TRACKED_CB_SHADER_MASK, gfx11_opt_set_context_reg(R_02823C_CB_SHADER_MASK, AC_TRACKED_CB_SHADER_MASK,
shader->ps.cb_shader_mask); shader->ps.cb_shader_mask);
gfx11_end_packed_context_regs(); gfx11_end_packed_context_regs();
radeon_end(); /* don't track context rolls on GFX11 */ radeon_end(); /* don't track context rolls on GFX11 */
@ -2057,17 +2057,17 @@ static void gfx12_emit_shader_ps(struct si_context *sctx, unsigned index)
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
gfx12_begin_context_regs(); gfx12_begin_context_regs();
gfx12_opt_set_context_reg(R_028650_SPI_SHADER_Z_FORMAT, SI_TRACKED_SPI_SHADER_Z_FORMAT, gfx12_opt_set_context_reg(R_028650_SPI_SHADER_Z_FORMAT, AC_TRACKED_SPI_SHADER_Z_FORMAT,
shader->ps.spi_shader_z_format); shader->ps.spi_shader_z_format);
gfx12_opt_set_context_reg(R_028654_SPI_SHADER_COL_FORMAT, SI_TRACKED_SPI_SHADER_COL_FORMAT, gfx12_opt_set_context_reg(R_028654_SPI_SHADER_COL_FORMAT, AC_TRACKED_SPI_SHADER_COL_FORMAT,
shader->ps.spi_shader_col_format); shader->ps.spi_shader_col_format);
gfx12_opt_set_context_reg(R_02865C_SPI_PS_INPUT_ENA, SI_TRACKED_SPI_PS_INPUT_ENA, gfx12_opt_set_context_reg(R_02865C_SPI_PS_INPUT_ENA, AC_TRACKED_SPI_PS_INPUT_ENA,
shader->ps.spi_ps_input_ena); shader->ps.spi_ps_input_ena);
gfx12_opt_set_context_reg(R_028660_SPI_PS_INPUT_ADDR, SI_TRACKED_SPI_PS_INPUT_ADDR, gfx12_opt_set_context_reg(R_028660_SPI_PS_INPUT_ADDR, AC_TRACKED_SPI_PS_INPUT_ADDR,
shader->ps.spi_ps_input_addr); shader->ps.spi_ps_input_addr);
gfx12_opt_set_context_reg(R_028854_CB_SHADER_MASK, SI_TRACKED_CB_SHADER_MASK, gfx12_opt_set_context_reg(R_028854_CB_SHADER_MASK, AC_TRACKED_CB_SHADER_MASK,
shader->ps.cb_shader_mask); shader->ps.cb_shader_mask);
gfx12_opt_set_context_reg(R_028BBC_PA_SC_HISZ_CONTROL, SI_TRACKED_PA_SC_HISZ_CONTROL, gfx12_opt_set_context_reg(R_028BBC_PA_SC_HISZ_CONTROL, AC_TRACKED_PA_SC_HISZ_CONTROL,
shader->ps.pa_sc_hisz_control); shader->ps.pa_sc_hisz_control);
gfx12_end_context_regs(); gfx12_end_context_regs();
radeon_end(); /* don't track context rolls on GFX12 */ radeon_end(); /* don't track context rolls on GFX12 */
@ -4638,14 +4638,14 @@ static void si_emit_vgt_pipeline_state(struct si_context *sctx, unsigned index)
radeon_opt_set_context_reg(sctx->gfx_level >= GFX12 ? radeon_opt_set_context_reg(sctx->gfx_level >= GFX12 ?
R_028A98_VGT_SHADER_STAGES_EN : R_028A98_VGT_SHADER_STAGES_EN :
R_028B54_VGT_SHADER_STAGES_EN, R_028B54_VGT_SHADER_STAGES_EN,
SI_TRACKED_VGT_SHADER_STAGES_EN, sctx->vgt_shader_stages_en); AC_TRACKED_VGT_SHADER_STAGES_EN, sctx->vgt_shader_stages_en);
if (sctx->gfx_level == GFX10_3) { if (sctx->gfx_level == GFX10_3) {
/* Legacy Tess+GS should disable reuse to prevent hangs on GFX10.3. */ /* Legacy Tess+GS should disable reuse to prevent hangs on GFX10.3. */
bool has_legacy_tess_gs = G_028B54_HS_EN(sctx->vgt_shader_stages_en) && bool has_legacy_tess_gs = G_028B54_HS_EN(sctx->vgt_shader_stages_en) &&
G_028B54_GS_EN(sctx->vgt_shader_stages_en) && G_028B54_GS_EN(sctx->vgt_shader_stages_en) &&
!G_028B54_PRIMGEN_EN(sctx->vgt_shader_stages_en); /* !NGG */ !G_028B54_PRIMGEN_EN(sctx->vgt_shader_stages_en); /* !NGG */
radeon_opt_set_context_reg(R_028AB4_VGT_REUSE_OFF, SI_TRACKED_VGT_REUSE_OFF, radeon_opt_set_context_reg(R_028AB4_VGT_REUSE_OFF, AC_TRACKED_VGT_REUSE_OFF,
S_028AB4_REUSE_OFF(has_legacy_tess_gs)); S_028AB4_REUSE_OFF(has_legacy_tess_gs));
} }
radeon_end_update_context_roll(); radeon_end_update_context_roll();
@ -4659,7 +4659,7 @@ static void si_emit_vgt_pipeline_state(struct si_context *sctx, unsigned index)
} }
radeon_begin_again(cs); radeon_begin_again(cs);
radeon_opt_set_uconfig_reg(R_03096C_GE_CNTL, SI_TRACKED_GE_CNTL, ge_cntl); radeon_opt_set_uconfig_reg(R_03096C_GE_CNTL, AC_TRACKED_GE_CNTL, ge_cntl);
radeon_end(); radeon_end();
} }
} }
@ -4935,25 +4935,25 @@ static void gfx6_emit_tess_io_layout_state(struct si_context *sctx, unsigned ind
radeon_begin(cs); radeon_begin(cs);
if (sctx->screen->info.has_set_sh_pairs_packed) { if (sctx->screen->info.has_set_sh_pairs_packed) {
gfx11_opt_push_gfx_sh_reg(R_00B42C_SPI_SHADER_PGM_RSRC2_HS, gfx11_opt_push_gfx_sh_reg(R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
SI_TRACKED_SPI_SHADER_PGM_RSRC2_HS, sctx->ls_hs_rsrc2); AC_TRACKED_SPI_SHADER_PGM_RSRC2_HS, sctx->ls_hs_rsrc2);
/* Set userdata SGPRs for merged LS-HS. */ /* Set userdata SGPRs for merged LS-HS. */
gfx11_opt_push_gfx_sh_reg(R_00B430_SPI_SHADER_USER_DATA_HS_0 + gfx11_opt_push_gfx_sh_reg(R_00B430_SPI_SHADER_USER_DATA_HS_0 +
GFX9_SGPR_TCS_OFFCHIP_LAYOUT * 4, GFX9_SGPR_TCS_OFFCHIP_LAYOUT * 4,
SI_TRACKED_SPI_SHADER_USER_DATA_HS__TCS_OFFCHIP_LAYOUT, AC_TRACKED_SPI_SHADER_USER_DATA_HS__TCS_OFFCHIP_LAYOUT,
sctx->tcs_offchip_layout); sctx->tcs_offchip_layout);
gfx11_opt_push_gfx_sh_reg(R_00B430_SPI_SHADER_USER_DATA_HS_0 + gfx11_opt_push_gfx_sh_reg(R_00B430_SPI_SHADER_USER_DATA_HS_0 +
GFX9_SGPR_TCS_OFFCHIP_ADDR * 4, GFX9_SGPR_TCS_OFFCHIP_ADDR * 4,
SI_TRACKED_SPI_SHADER_USER_DATA_HS__TCS_OFFCHIP_ADDR, AC_TRACKED_SPI_SHADER_USER_DATA_HS__TCS_OFFCHIP_ADDR,
sctx->tes_offchip_ring_va_sgpr); sctx->tes_offchip_ring_va_sgpr);
} else if (sctx->gfx_level >= GFX9) { } else if (sctx->gfx_level >= GFX9) {
radeon_opt_set_sh_reg(R_00B42C_SPI_SHADER_PGM_RSRC2_HS, radeon_opt_set_sh_reg(R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
SI_TRACKED_SPI_SHADER_PGM_RSRC2_HS, sctx->ls_hs_rsrc2); AC_TRACKED_SPI_SHADER_PGM_RSRC2_HS, sctx->ls_hs_rsrc2);
/* Set userdata SGPRs for merged LS-HS. */ /* Set userdata SGPRs for merged LS-HS. */
radeon_opt_set_sh_reg2(R_00B430_SPI_SHADER_USER_DATA_HS_0 + radeon_opt_set_sh_reg2(R_00B430_SPI_SHADER_USER_DATA_HS_0 +
GFX9_SGPR_TCS_OFFCHIP_LAYOUT * 4, GFX9_SGPR_TCS_OFFCHIP_LAYOUT * 4,
SI_TRACKED_SPI_SHADER_USER_DATA_HS__TCS_OFFCHIP_LAYOUT, AC_TRACKED_SPI_SHADER_USER_DATA_HS__TCS_OFFCHIP_LAYOUT,
sctx->tcs_offchip_layout, sctx->tes_offchip_ring_va_sgpr); sctx->tcs_offchip_layout, sctx->tes_offchip_ring_va_sgpr);
} else { } else {
/* Due to a hw bug, RSRC2_LS must be written twice with another /* Due to a hw bug, RSRC2_LS must be written twice with another
@ -4967,7 +4967,7 @@ static void gfx6_emit_tess_io_layout_state(struct si_context *sctx, unsigned ind
/* Set userdata SGPRs for TCS. */ /* Set userdata SGPRs for TCS. */
radeon_opt_set_sh_reg3(R_00B430_SPI_SHADER_USER_DATA_HS_0 + radeon_opt_set_sh_reg3(R_00B430_SPI_SHADER_USER_DATA_HS_0 +
GFX6_SGPR_TCS_OFFCHIP_LAYOUT * 4, GFX6_SGPR_TCS_OFFCHIP_LAYOUT * 4,
SI_TRACKED_SPI_SHADER_USER_DATA_HS__TCS_OFFCHIP_LAYOUT, AC_TRACKED_SPI_SHADER_USER_DATA_HS__TCS_OFFCHIP_LAYOUT,
sctx->tcs_offchip_layout, sctx->tes_offchip_ring_va_sgpr, sctx->tcs_offchip_layout, sctx->tes_offchip_ring_va_sgpr,
sctx->current_vs_state); sctx->current_vs_state);
} }
@ -4982,18 +4982,18 @@ static void gfx6_emit_tess_io_layout_state(struct si_context *sctx, unsigned ind
*/ */
if (sctx->screen->info.has_set_sh_pairs_packed) { if (sctx->screen->info.has_set_sh_pairs_packed) {
gfx11_opt_push_gfx_sh_reg(tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, gfx11_opt_push_gfx_sh_reg(tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4,
SI_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX, AC_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX,
sctx->tes_offchip_layout); sctx->tes_offchip_layout);
gfx11_opt_push_gfx_sh_reg(tes_sh_base + SI_SGPR_TES_OFFCHIP_ADDR * 4, gfx11_opt_push_gfx_sh_reg(tes_sh_base + SI_SGPR_TES_OFFCHIP_ADDR * 4,
SI_TRACKED_SPI_SHADER_USER_DATA_ES__DRAWID, AC_TRACKED_SPI_SHADER_USER_DATA_ES__DRAWID,
sctx->tes_offchip_ring_va_sgpr); sctx->tes_offchip_ring_va_sgpr);
} else if (sctx->ngg || sctx->shader.gs.cso) { } else if (sctx->ngg || sctx->shader.gs.cso) {
radeon_opt_set_sh_reg2(tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, radeon_opt_set_sh_reg2(tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4,
SI_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX, AC_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX,
sctx->tes_offchip_layout, sctx->tes_offchip_ring_va_sgpr); sctx->tes_offchip_layout, sctx->tes_offchip_ring_va_sgpr);
} else { } else {
radeon_opt_set_sh_reg2(tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, radeon_opt_set_sh_reg2(tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4,
SI_TRACKED_SPI_SHADER_USER_DATA_VS__BASE_VERTEX, AC_TRACKED_SPI_SHADER_USER_DATA_VS__BASE_VERTEX,
sctx->tes_offchip_layout, sctx->tes_offchip_ring_va_sgpr); sctx->tes_offchip_layout, sctx->tes_offchip_ring_va_sgpr);
} }
radeon_end(); radeon_end();
@ -5001,10 +5001,10 @@ static void gfx6_emit_tess_io_layout_state(struct si_context *sctx, unsigned ind
radeon_begin_again(cs); radeon_begin_again(cs);
if (sctx->gfx_level >= GFX7) { if (sctx->gfx_level >= GFX7) {
radeon_opt_set_context_reg_idx(R_028B58_VGT_LS_HS_CONFIG, radeon_opt_set_context_reg_idx(R_028B58_VGT_LS_HS_CONFIG,
SI_TRACKED_VGT_LS_HS_CONFIG, 2, sctx->ls_hs_config); AC_TRACKED_VGT_LS_HS_CONFIG, 2, sctx->ls_hs_config);
} else { } else {
radeon_opt_set_context_reg(R_028B58_VGT_LS_HS_CONFIG, radeon_opt_set_context_reg(R_028B58_VGT_LS_HS_CONFIG,
SI_TRACKED_VGT_LS_HS_CONFIG, sctx->ls_hs_config); AC_TRACKED_VGT_LS_HS_CONFIG, sctx->ls_hs_config);
} }
radeon_end_update_context_roll(); radeon_end_update_context_roll();
} }
@ -5017,15 +5017,15 @@ static void gfx12_emit_tess_io_layout_state(struct si_context *sctx, unsigned in
return; return;
gfx12_opt_push_gfx_sh_reg(R_00B42C_SPI_SHADER_PGM_RSRC2_HS, gfx12_opt_push_gfx_sh_reg(R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
SI_TRACKED_SPI_SHADER_PGM_RSRC2_HS, sctx->ls_hs_rsrc2); AC_TRACKED_SPI_SHADER_PGM_RSRC2_HS, sctx->ls_hs_rsrc2);
/* Set userdata SGPRs for merged LS-HS. */ /* Set userdata SGPRs for merged LS-HS. */
gfx12_opt_push_gfx_sh_reg(R_00B430_SPI_SHADER_USER_DATA_HS_0 + gfx12_opt_push_gfx_sh_reg(R_00B430_SPI_SHADER_USER_DATA_HS_0 +
GFX9_SGPR_TCS_OFFCHIP_LAYOUT * 4, GFX9_SGPR_TCS_OFFCHIP_LAYOUT * 4,
SI_TRACKED_SPI_SHADER_USER_DATA_HS__TCS_OFFCHIP_LAYOUT, AC_TRACKED_SPI_SHADER_USER_DATA_HS__TCS_OFFCHIP_LAYOUT,
sctx->tcs_offchip_layout); sctx->tcs_offchip_layout);
gfx12_opt_push_gfx_sh_reg(R_00B430_SPI_SHADER_USER_DATA_HS_0 + gfx12_opt_push_gfx_sh_reg(R_00B430_SPI_SHADER_USER_DATA_HS_0 +
GFX9_SGPR_TCS_OFFCHIP_ADDR * 4, GFX9_SGPR_TCS_OFFCHIP_ADDR * 4,
SI_TRACKED_SPI_SHADER_USER_DATA_HS__TCS_OFFCHIP_ADDR, AC_TRACKED_SPI_SHADER_USER_DATA_HS__TCS_OFFCHIP_ADDR,
sctx->tes_offchip_ring_va_sgpr); sctx->tes_offchip_ring_va_sgpr);
/* Set userdata SGPRs for TES. */ /* Set userdata SGPRs for TES. */
@ -5037,15 +5037,15 @@ static void gfx12_emit_tess_io_layout_state(struct si_context *sctx, unsigned in
* for tessellation and are unused in TES. * for tessellation and are unused in TES.
*/ */
gfx12_opt_push_gfx_sh_reg(tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, gfx12_opt_push_gfx_sh_reg(tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4,
SI_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX, AC_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX,
sctx->tes_offchip_layout); sctx->tes_offchip_layout);
gfx12_opt_push_gfx_sh_reg(tes_sh_base + SI_SGPR_TES_OFFCHIP_ADDR * 4, gfx12_opt_push_gfx_sh_reg(tes_sh_base + SI_SGPR_TES_OFFCHIP_ADDR * 4,
SI_TRACKED_SPI_SHADER_USER_DATA_ES__DRAWID, AC_TRACKED_SPI_SHADER_USER_DATA_ES__DRAWID,
sctx->tes_offchip_ring_va_sgpr); sctx->tes_offchip_ring_va_sgpr);
radeon_begin(cs); radeon_begin(cs);
radeon_opt_set_context_reg_idx(R_028B58_VGT_LS_HS_CONFIG, radeon_opt_set_context_reg_idx(R_028B58_VGT_LS_HS_CONFIG,
SI_TRACKED_VGT_LS_HS_CONFIG, 2, sctx->ls_hs_config); AC_TRACKED_VGT_LS_HS_CONFIG, 2, sctx->ls_hs_config);
radeon_end(); /* don't track context rolls on GFX12 */ radeon_end(); /* don't track context rolls on GFX12 */
} }
@ -5067,11 +5067,11 @@ static void si_emit_spi_map(struct si_context *sctx, unsigned index)
unsigned spi_ps_in_control = ps->ps.spi_ps_in_control; unsigned spi_ps_in_control = ps->ps.spi_ps_in_control;
if (sctx->gfx_level >= GFX12) { if (sctx->gfx_level >= GFX12) {
gfx12_opt_push_gfx_sh_reg(R_00B0C4_SPI_SHADER_GS_OUT_CONFIG_PS, gfx12_opt_push_gfx_sh_reg(R_00B0C4_SPI_SHADER_GS_OUT_CONFIG_PS,
SI_TRACKED_SPI_SHADER_GS_OUT_CONFIG_PS, AC_TRACKED_SPI_SHADER_GS_OUT_CONFIG_PS,
vs->ngg.spi_vs_out_config | ps->ps.spi_gs_out_config_ps); vs->ngg.spi_vs_out_config | ps->ps.spi_gs_out_config_ps);
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg(R_028640_SPI_PS_IN_CONTROL, SI_TRACKED_SPI_PS_IN_CONTROL, radeon_opt_set_context_reg(R_028640_SPI_PS_IN_CONTROL, AC_TRACKED_SPI_PS_IN_CONTROL,
spi_ps_in_control); spi_ps_in_control);
radeon_end(); /* don't track context rolls on GFX12 */ radeon_end(); /* don't track context rolls on GFX12 */
} else { } else {
@ -5093,7 +5093,7 @@ static void si_emit_spi_map(struct si_context *sctx, unsigned index)
} }
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg(R_0286D8_SPI_PS_IN_CONTROL, SI_TRACKED_SPI_PS_IN_CONTROL, radeon_opt_set_context_reg(R_0286D8_SPI_PS_IN_CONTROL, AC_TRACKED_SPI_PS_IN_CONTROL,
spi_ps_in_control); spi_ps_in_control);
radeon_end_update_context_roll(); radeon_end_update_context_roll();
} }

View file

@ -364,39 +364,39 @@ static void si_emit_guardband(struct si_context *sctx, unsigned index)
if (sctx->gfx_level >= GFX12) { if (sctx->gfx_level >= GFX12) {
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
gfx12_begin_context_regs(); gfx12_begin_context_regs();
gfx12_opt_set_context_reg(R_028BE4_PA_SU_VTX_CNTL, SI_TRACKED_PA_SU_VTX_CNTL, gfx12_opt_set_context_reg(R_028BE4_PA_SU_VTX_CNTL, AC_TRACKED_PA_SU_VTX_CNTL,
pa_su_vtx_cntl); pa_su_vtx_cntl);
gfx12_opt_set_context_reg4(R_02842C_PA_CL_GB_VERT_CLIP_ADJ, gfx12_opt_set_context_reg4(R_02842C_PA_CL_GB_VERT_CLIP_ADJ,
SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, AC_TRACKED_PA_CL_GB_VERT_CLIP_ADJ,
fui(guardband_y), fui(discard_y), fui(guardband_y), fui(discard_y),
fui(guardband_x), fui(discard_x)); fui(guardband_x), fui(discard_x));
gfx12_opt_set_context_reg(R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, gfx12_opt_set_context_reg(R_028234_PA_SU_HARDWARE_SCREEN_OFFSET,
SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET, AC_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
pa_su_hardware_screen_offset); pa_su_hardware_screen_offset);
gfx12_end_context_regs(); gfx12_end_context_regs();
radeon_end(); /* don't track context rolls on GFX12 */ radeon_end(); /* don't track context rolls on GFX12 */
} else if (sctx->screen->info.has_set_context_pairs_packed) { } else if (sctx->screen->info.has_set_context_pairs_packed) {
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
gfx11_begin_packed_context_regs(); gfx11_begin_packed_context_regs();
gfx11_opt_set_context_reg(R_028BE4_PA_SU_VTX_CNTL, SI_TRACKED_PA_SU_VTX_CNTL, gfx11_opt_set_context_reg(R_028BE4_PA_SU_VTX_CNTL, AC_TRACKED_PA_SU_VTX_CNTL,
pa_su_vtx_cntl); pa_su_vtx_cntl);
gfx11_opt_set_context_reg4(R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, gfx11_opt_set_context_reg4(R_028BE8_PA_CL_GB_VERT_CLIP_ADJ,
SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, AC_TRACKED_PA_CL_GB_VERT_CLIP_ADJ,
fui(guardband_y), fui(discard_y), fui(guardband_y), fui(discard_y),
fui(guardband_x), fui(discard_x)); fui(guardband_x), fui(discard_x));
gfx11_opt_set_context_reg(R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, gfx11_opt_set_context_reg(R_028234_PA_SU_HARDWARE_SCREEN_OFFSET,
SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET, AC_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
pa_su_hardware_screen_offset); pa_su_hardware_screen_offset);
gfx11_end_packed_context_regs(); gfx11_end_packed_context_regs();
radeon_end(); /* don't track context rolls on GFX11 */ radeon_end(); /* don't track context rolls on GFX11 */
} else { } else {
radeon_begin(&sctx->gfx_cs); radeon_begin(&sctx->gfx_cs);
radeon_opt_set_context_reg5(R_028BE4_PA_SU_VTX_CNTL, SI_TRACKED_PA_SU_VTX_CNTL, radeon_opt_set_context_reg5(R_028BE4_PA_SU_VTX_CNTL, AC_TRACKED_PA_SU_VTX_CNTL,
pa_su_vtx_cntl, pa_su_vtx_cntl,
fui(guardband_y), fui(discard_y), fui(guardband_y), fui(discard_y),
fui(guardband_x), fui(discard_x)); fui(guardband_x), fui(discard_x));
radeon_opt_set_context_reg(R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, radeon_opt_set_context_reg(R_028234_PA_SU_HARDWARE_SCREEN_OFFSET,
SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET, AC_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET,
pa_su_hardware_screen_offset); pa_su_hardware_screen_offset);
radeon_end_update_context_roll(); radeon_end_update_context_roll();
} }
@ -720,7 +720,7 @@ static void si_emit_window_rectangles(struct si_context *sctx, unsigned index)
if (sctx->gfx_level >= GFX12) { if (sctx->gfx_level >= GFX12) {
radeon_begin(cs); radeon_begin(cs);
gfx12_begin_context_regs(); gfx12_begin_context_regs();
gfx12_opt_set_context_reg(R_02820C_PA_SC_CLIPRECT_RULE, SI_TRACKED_PA_SC_CLIPRECT_RULE, rule); gfx12_opt_set_context_reg(R_02820C_PA_SC_CLIPRECT_RULE, AC_TRACKED_PA_SC_CLIPRECT_RULE, rule);
if (num_rectangles) { if (num_rectangles) {
for (unsigned i = 0; i < num_rectangles; i++) { for (unsigned i = 0; i < num_rectangles; i++) {
@ -742,7 +742,7 @@ static void si_emit_window_rectangles(struct si_context *sctx, unsigned index)
radeon_end(); radeon_end();
} else { } else {
radeon_begin(cs); radeon_begin(cs);
radeon_opt_set_context_reg(R_02820C_PA_SC_CLIPRECT_RULE, SI_TRACKED_PA_SC_CLIPRECT_RULE, radeon_opt_set_context_reg(R_02820C_PA_SC_CLIPRECT_RULE, AC_TRACKED_PA_SC_CLIPRECT_RULE,
rule); rule);
if (num_rectangles) { if (num_rectangles) {
radeon_set_context_reg_seq(R_028210_PA_SC_CLIPRECT_0_TL, num_rectangles * 2); radeon_set_context_reg_seq(R_028210_PA_SC_CLIPRECT_0_TL, num_rectangles * 2);