radeonsi: merge context_reg_saved_mask and other_reg_saved_mask into a BITSET

There will be more than 64 context registers that we'll need to track,
so use BITSET for all of them.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26307>
This commit is contained in:
Marek Olšák 2023-11-26 09:23:41 -05:00 committed by Marge Bot
parent fb994f44d9
commit 17e01a9a9b
5 changed files with 182 additions and 176 deletions

View file

@ -68,88 +68,96 @@
radeon_emit(value); \
} while (0)
#define radeon_opt_set_reg(reg, reg_enum, idx, value, prefix_name, packet, category) do { \
#define radeon_opt_set_reg(reg, reg_enum, idx, value, prefix_name, packet) do { \
unsigned __value = (value); \
if (!((sctx->tracked_regs.category##_reg_saved_mask >> (reg_enum)) & 0x1) || \
sctx->tracked_regs.category##_reg_value[(reg_enum)] != __value) { \
if (!BITSET_TEST(sctx->tracked_regs.reg_saved_mask, (reg_enum)) || \
sctx->tracked_regs.reg_value[(reg_enum)] != __value) { \
radeon_set_reg(reg, idx, __value, prefix_name, packet); \
sctx->tracked_regs.category##_reg_saved_mask |= BITFIELD64_BIT(reg_enum); \
sctx->tracked_regs.category##_reg_value[(reg_enum)] = __value; \
BITSET_SET(sctx->tracked_regs.reg_saved_mask, (reg_enum)); \
sctx->tracked_regs.reg_value[(reg_enum)] = __value; \
} \
} while (0)
/* Set consecutive registers if any value is different. */
#define radeon_opt_set_reg2(reg, reg_enum, v1, v2, prefix_name, packet, category) do { \
#define radeon_opt_set_reg2(reg, reg_enum, v1, v2, prefix_name, packet) do { \
unsigned __v1 = (v1), __v2 = (v2); \
if (((sctx->tracked_regs.category##_reg_saved_mask >> (reg_enum)) & 0x3) != 0x3 || \
sctx->tracked_regs.category##_reg_value[(reg_enum)] != __v1 || \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] != __v2) { \
if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
(reg_enum), (reg_enum) + 1, 0x3) || \
sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2) { \
radeon_set_reg_seq(reg, 2, 0, prefix_name, packet, 0); \
radeon_emit(__v1); \
radeon_emit(__v2); \
sctx->tracked_regs.category##_reg_saved_mask |= BITFIELD64_RANGE(reg_enum, 2); \
sctx->tracked_regs.category##_reg_value[(reg_enum)] = __v1; \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] = __v2; \
BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
(reg_enum), (reg_enum) + 1); \
sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
} \
} while (0)
#define radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, prefix_name, packet, category) do { \
#define radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, prefix_name, packet) do { \
unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3); \
if (((sctx->tracked_regs.category##_reg_saved_mask >> (reg_enum)) & 0x7) != 0x7 || \
sctx->tracked_regs.category##_reg_value[(reg_enum)] != __v1 || \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] != __v2 || \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 2] != __v3) { \
if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
(reg_enum), (reg_enum) + 2, 0x7) || \
sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3) { \
radeon_set_reg_seq(reg, 3, 0, prefix_name, packet, 0); \
radeon_emit(__v1); \
radeon_emit(__v2); \
radeon_emit(__v3); \
sctx->tracked_regs.category##_reg_saved_mask |= BITFIELD64_RANGE(reg_enum, 3); \
sctx->tracked_regs.category##_reg_value[(reg_enum)] = __v1; \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] = __v2; \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 2] = __v3; \
BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
(reg_enum), (reg_enum) + 2); \
sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
} \
} while (0)
#define radeon_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, prefix_name, packet, category) do { \
#define radeon_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, prefix_name, packet) do { \
unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4); \
if (((sctx->tracked_regs.category##_reg_saved_mask >> (reg_enum)) & 0xf) != 0xf || \
sctx->tracked_regs.category##_reg_value[(reg_enum)] != __v1 || \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] != __v2 || \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 2] != __v3 || \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 3] != __v4) { \
if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
(reg_enum), (reg_enum) + 3, 0xf) || \
sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4) { \
radeon_set_reg_seq(reg, 4, 0, prefix_name, packet, 0); \
radeon_emit(__v1); \
radeon_emit(__v2); \
radeon_emit(__v3); \
radeon_emit(__v4); \
sctx->tracked_regs.category##_reg_saved_mask |= BITFIELD64_RANGE(reg_enum, 4); \
sctx->tracked_regs.category##_reg_value[(reg_enum)] = __v1; \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] = __v2; \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 2] = __v3; \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 3] = __v4; \
BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
(reg_enum), (reg_enum) + 3); \
sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
} \
} while (0)
#define radeon_opt_set_reg5(reg, reg_enum, v1, v2, v3, v4, v5, prefix_name, packet, category) do { \
#define radeon_opt_set_reg5(reg, reg_enum, v1, v2, v3, v4, v5, prefix_name, packet) do { \
unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4), __v5 = (v5); \
if (((sctx->tracked_regs.category##_reg_saved_mask >> (reg_enum)) & 0x1f) != 0x1f || \
sctx->tracked_regs.category##_reg_value[(reg_enum)] != __v1 || \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] != __v2 || \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 2] != __v3 || \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 3] != __v4 || \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 4] != __v5) { \
if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
(reg_enum), (reg_enum) + 4, 0x1f) || \
sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4 || \
sctx->tracked_regs.reg_value[(reg_enum) + 4] != __v5) { \
radeon_set_reg_seq(reg, 5, 0, prefix_name, packet, 0); \
radeon_emit(__v1); \
radeon_emit(__v2); \
radeon_emit(__v3); \
radeon_emit(__v4); \
radeon_emit(__v5); \
sctx->tracked_regs.category##_reg_saved_mask |= BITFIELD64_RANGE(reg_enum, 5); \
sctx->tracked_regs.category##_reg_value[(reg_enum)] = __v1; \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] = __v2; \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 2] = __v3; \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 3] = __v4; \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 4] = __v5; \
BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
(reg_enum), (reg_enum) + 4); \
sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
sctx->tracked_regs.reg_value[(reg_enum) + 4] = __v5; \
} \
} while (0)
@ -174,22 +182,22 @@
radeon_set_reg(reg, 0, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
#define radeon_opt_set_context_reg(_unused, reg, reg_enum, value) \
radeon_opt_set_reg(reg, reg_enum, 0, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG, context)
radeon_opt_set_reg(reg, reg_enum, 0, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
#define radeon_opt_set_context_reg_idx(_unused, reg, reg_enum, idx, value) \
radeon_opt_set_reg(reg, reg_enum, idx, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG, context)
radeon_opt_set_reg(reg, reg_enum, idx, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
#define radeon_opt_set_context_reg2(_unused, reg, reg_enum, v1, v2) \
radeon_opt_set_reg2(reg, reg_enum, v1, v2, SI_CONTEXT, PKT3_SET_CONTEXT_REG, context)
radeon_opt_set_reg2(reg, reg_enum, v1, v2, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
#define radeon_opt_set_context_reg3(_unused, reg, reg_enum, v1, v2, v3) \
radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, SI_CONTEXT, PKT3_SET_CONTEXT_REG, context)
radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
#define radeon_opt_set_context_reg4(_unused, reg, reg_enum, v1, v2, v3, v4) \
radeon_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, SI_CONTEXT, PKT3_SET_CONTEXT_REG, context)
radeon_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
#define radeon_opt_set_context_reg5(_unused, reg, reg_enum, v1, v2, v3, v4, v5) \
radeon_opt_set_reg5(reg, reg_enum, v1, v2, v3, v4, v5, SI_CONTEXT, PKT3_SET_CONTEXT_REG, context)
radeon_opt_set_reg5(reg, reg_enum, v1, v2, v3, v4, v5, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
#define radeon_opt_set_context_regn(_unused, reg, values, saved_values, num) \
radeon_opt_set_regn(reg, values, saved_values, num, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
@ -202,17 +210,17 @@
radeon_set_reg(reg, 0, value, SI_SH, PKT3_SET_SH_REG)
#define radeon_opt_set_sh_reg(_unused, reg, reg_enum, value) \
radeon_opt_set_reg(reg, reg_enum, 0, value, SI_SH, PKT3_SET_SH_REG, other)
radeon_opt_set_reg(reg, reg_enum, 0, value, SI_SH, PKT3_SET_SH_REG)
#define radeon_opt_set_sh_reg2(_unused, reg, reg_enum, v1, v2) \
radeon_opt_set_reg2(reg, reg_enum, v1, v2, SI_SH, PKT3_SET_SH_REG, other)
radeon_opt_set_reg2(reg, reg_enum, v1, v2, SI_SH, PKT3_SET_SH_REG)
#define radeon_opt_set_sh_reg3(_unused, reg, reg_enum, v1, v2, v3) \
radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, SI_SH, PKT3_SET_SH_REG, other)
radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, SI_SH, PKT3_SET_SH_REG)
#define radeon_opt_set_sh_reg_idx(_unused, reg, reg_enum, idx, value) do { \
assert(sctx->gfx_level >= GFX10); \
radeon_opt_set_reg(reg, reg_enum, idx, value, SI_SH, PKT3_SET_SH_REG_INDEX, other); \
radeon_opt_set_reg(reg, reg_enum, idx, value, SI_SH, PKT3_SET_SH_REG_INDEX); \
} while (0)
#define radeon_emit_32bit_pointer(_unused, va) do { \
@ -236,7 +244,7 @@
radeon_set_reg(reg, 0, value, CIK_UCONFIG, PKT3_SET_UCONFIG_REG)
#define radeon_opt_set_uconfig_reg(_unused, reg, reg_enum, value) \
radeon_opt_set_reg(reg, reg_enum, 0, value, CIK_UCONFIG, PKT3_SET_UCONFIG_REG, other)
radeon_opt_set_reg(reg, reg_enum, 0, value, CIK_UCONFIG, PKT3_SET_UCONFIG_REG)
#define RESOLVE_PKT3_SET_UCONFIG_REG_INDEX \
(GFX_VERSION >= GFX10 || (GFX_VERSION == GFX9 && sctx->screen->info.me_fw_version >= 26) ? \
@ -246,7 +254,7 @@
radeon_set_reg(reg, idx, value, CIK_UCONFIG, RESOLVE_PKT3_SET_UCONFIG_REG_INDEX)
#define radeon_opt_set_uconfig_reg_idx(_unused, _unused2, reg, reg_enum, idx, value) \
radeon_opt_set_reg(reg, reg_enum, idx, value, CIK_UCONFIG, RESOLVE_PKT3_SET_UCONFIG_REG_INDEX, other)
radeon_opt_set_reg(reg, reg_enum, idx, value, CIK_UCONFIG, RESOLVE_PKT3_SET_UCONFIG_REG_INDEX)
#define radeon_set_privileged_config_reg(reg, value) do { \
assert((reg) < CIK_UCONFIG_REG_OFFSET); \
@ -268,35 +276,37 @@
buffer[__i / 2].reg_value[__i % 2] = value; \
} while (0)
#define gfx11_opt_push_reg(reg, reg_enum, value, prefix_name, category, buffer, reg_count) do { \
#define gfx11_opt_push_reg(reg, reg_enum, value, prefix_name, buffer, reg_count) do { \
unsigned __value = value; \
if (((sctx->tracked_regs.category##_reg_saved_mask >> (reg_enum)) & 0x1) != 0x1 || \
sctx->tracked_regs.category##_reg_value[reg_enum] != __value) { \
if (!BITSET_TEST(sctx->tracked_regs.reg_saved_mask, (reg_enum)) || \
sctx->tracked_regs.reg_value[reg_enum] != __value) { \
gfx11_push_reg(reg, __value, prefix_name, buffer, reg_count); \
sctx->tracked_regs.category##_reg_saved_mask |= BITFIELD64_BIT(reg_enum); \
sctx->tracked_regs.category##_reg_value[reg_enum] = __value; \
BITSET_SET(sctx->tracked_regs.reg_saved_mask, (reg_enum)); \
sctx->tracked_regs.reg_value[reg_enum] = __value; \
} \
} while (0)
#define gfx11_opt_push_reg4(reg, reg_enum, v1, v2, v3, v4, prefix_name, category, buffer, reg_count) do { \
#define gfx11_opt_push_reg4(reg, reg_enum, v1, v2, v3, v4, prefix_name, buffer, reg_count) do { \
unsigned __v1 = (v1); \
unsigned __v2 = (v2); \
unsigned __v3 = (v3); \
unsigned __v4 = (v4); \
if (((sctx->tracked_regs.category##_reg_saved_mask >> (reg_enum)) & 0xf) != 0xf || \
sctx->tracked_regs.category##_reg_value[(reg_enum)] != __v1 || \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] != __v2 || \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 2] != __v3 || \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 3] != __v4) { \
if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
(reg_enum), (reg_enum) + 3, 0xf) || \
sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4) { \
gfx11_push_reg((reg), __v1, prefix_name, buffer, reg_count); \
gfx11_push_reg((reg) + 4, __v2, prefix_name, buffer, reg_count); \
gfx11_push_reg((reg) + 8, __v3, prefix_name, buffer, reg_count); \
gfx11_push_reg((reg) + 12, __v4, prefix_name, buffer, reg_count); \
sctx->tracked_regs.category##_reg_saved_mask |= BITFIELD64_RANGE((reg_enum), 4); \
sctx->tracked_regs.category##_reg_value[(reg_enum)] = __v1; \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 1] = __v2; \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 2] = __v3; \
sctx->tracked_regs.category##_reg_value[(reg_enum) + 3] = __v4; \
BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
(reg_enum), (reg_enum) + 3); \
sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
} \
} while (0)
@ -310,11 +320,11 @@
sctx->num_buffered_compute_sh_regs)
#define gfx11_opt_push_gfx_sh_reg(reg, reg_enum, value) \
gfx11_opt_push_reg(reg, reg_enum, value, SI_SH, other, sctx->gfx11.buffered_gfx_sh_regs, \
gfx11_opt_push_reg(reg, reg_enum, value, SI_SH, sctx->gfx11.buffered_gfx_sh_regs, \
sctx->num_buffered_gfx_sh_regs)
#define gfx11_opt_push_compute_sh_reg(reg, reg_enum, value) \
gfx11_opt_push_reg(reg, reg_enum, value, SI_SH, other, sctx->gfx11.buffered_compute_sh_regs, \
gfx11_opt_push_reg(reg, reg_enum, value, SI_SH, sctx->gfx11.buffered_compute_sh_regs, \
sctx->num_buffered_compute_sh_regs)
/* GFX11 packet building helpers for SET_CONTEXT_REG_PAIRS_PACKED.
@ -328,11 +338,11 @@
gfx11_push_reg(reg, value, SI_CONTEXT, __cs_context_regs, __cs_context_reg_count)
#define gfx11_opt_set_context_reg(reg, reg_enum, value) \
gfx11_opt_push_reg(reg, reg_enum, value, SI_CONTEXT, context, __cs_context_regs, \
gfx11_opt_push_reg(reg, reg_enum, value, SI_CONTEXT, __cs_context_regs, \
__cs_context_reg_count)
#define gfx11_opt_set_context_reg4(reg, reg_enum, v1, v2, v3, v4) \
gfx11_opt_push_reg4(reg, reg_enum, v1, v2, v3, v4, SI_CONTEXT, context, __cs_context_regs, \
gfx11_opt_push_reg4(reg, reg_enum, v1, v2, v3, v4, SI_CONTEXT, __cs_context_regs, \
__cs_context_reg_count)
#define gfx11_end_packed_context_regs() do { \

View file

@ -999,8 +999,10 @@ static void si_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info
si_compute_resources_add_all_to_bo_list(sctx);
/* Skipping setting redundant registers on compute queues breaks compute. */
if (!sctx->has_graphics)
sctx->tracked_regs.other_reg_saved_mask = 0;
if (!sctx->has_graphics) {
BITSET_SET_RANGE(sctx->tracked_regs.reg_saved_mask,
SI_FIRST_TRACKED_OTHER_REG, SI_NUM_ALL_TRACKED_REGS - 1);
}
/* First emit registers. */
bool prefetch;

View file

@ -204,83 +204,83 @@ static void si_add_gds_to_buffer_list(struct si_context *sctx)
void si_set_tracked_regs_to_clear_state(struct si_context *ctx)
{
STATIC_ASSERT(SI_NUM_TRACKED_CONTEXT_REGS <= sizeof(ctx->tracked_regs.context_reg_saved_mask) * 8);
STATIC_ASSERT(SI_NUM_ALL_TRACKED_REGS <= sizeof(ctx->tracked_regs.reg_saved_mask) * 8);
ctx->tracked_regs.context_reg_value[SI_TRACKED_DB_RENDER_CONTROL] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_DB_COUNT_CONTROL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_CONTROL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_COUNT_CONTROL] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_SC_LINE_CNTL] = 0x1000;
ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_SC_AA_CONFIG] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_LINE_CNTL] = 0x1000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_AA_CONFIG] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_SU_VTX_CNTL] = 0x5;
ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ] = 0x3f800000;
ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ] = 0x3f800000;
ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ] = 0x3f800000;
ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_VTX_CNTL] = 0x5;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ] = 0x3f800000;
ctx->tracked_regs.context_reg_value[SI_TRACKED_SPI_SHADER_POS_FORMAT] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_POS_FORMAT] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_SPI_SHADER_Z_FORMAT] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_SPI_SHADER_COL_FORMAT] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_SPI_BARYC_CNTL] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_SPI_PS_INPUT_ENA] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_SPI_PS_INPUT_ADDR] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_Z_FORMAT] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_COL_FORMAT] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_BARYC_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ENA] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_INPUT_ADDR] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_DB_EQAA] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_DB_SHADER_CONTROL] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_CB_SHADER_MASK] = 0xffffffff;
ctx->tracked_regs.context_reg_value[SI_TRACKED_CB_TARGET_MASK] = 0xffffffff;
ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x90000;
ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_CL_VTE_CNTL] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_SC_CLIPRECT_RULE] = 0xffff;
ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_SC_LINE_STIPPLE] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_SC_MODE_CNTL_1] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_SPI_PS_IN_CONTROL] = 0x2;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GS_INSTANCE_CNT] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GS_MAX_VERT_OUT] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_SHADER_STAGES_EN] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_LS_HS_CONFIG] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_TF_PARAM] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x3;
ctx->tracked_regs.context_reg_value[SI_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_GE_NGG_SUBGRP_CNTL] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_EQAA] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_CB_SHADER_MASK] = 0xffffffff;
ctx->tracked_regs.reg_value[SI_TRACKED_CB_TARGET_MASK] = 0xffffffff;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x90000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VTE_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_CLIPRECT_RULE] = 0xffff;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_LINE_STIPPLE] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_MODE_CNTL_1] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_PS_IN_CONTROL] = 0x2;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_INSTANCE_CNT] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MAX_VERT_OUT] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_SHADER_STAGES_EN] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_LS_HS_CONFIG] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_TF_PARAM] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x3;
ctx->tracked_regs.reg_value[SI_TRACKED_GE_MAX_OUTPUT_PER_SUBGROUP] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_GE_NGG_SUBGRP_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_SX_PS_DOWNCONVERT] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SX_PS_DOWNCONVERT] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_ESGS_RING_ITEMSIZE] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_REUSE_OFF] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_IA_MULTI_VGT_PARAM] = 0xff;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_ESGS_RING_ITEMSIZE] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_REUSE_OFF] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_IA_MULTI_VGT_PARAM] = 0xff;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GS_ONCHIP_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MAX_PRIMS_PER_SUBGROUP] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_ONCHIP_CNTL] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GSVS_RING_ITEMSIZE] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GS_MODE] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL] = 0x1e;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GS_OUT_PRIM_TYPE] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_ITEMSIZE] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_MODE] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL] = 0x1e;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_OUT_PRIM_TYPE] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_1] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_2] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_3] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_1] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_2] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GSVS_RING_OFFSET_3] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_1] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_2] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_3] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_1] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_2] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_GS_VERT_ITEMSIZE_3] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_SPI_VS_OUT_CONFIG] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_VGT_PRIMITIVEID_EN] = 0;
ctx->tracked_regs.context_reg_value[SI_TRACKED_CB_DCC_CONTROL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_RENDER_OVERRIDE2] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_VS_OUT_CONFIG] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_PRIMITIVEID_EN] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] = 0;
/* Set all cleared context registers to saved. */
ctx->tracked_regs.context_reg_saved_mask = BITFIELD64_MASK(SI_NUM_TRACKED_CONTEXT_REGS);
BITSET_SET_RANGE(ctx->tracked_regs.reg_saved_mask, 0, SI_NUM_TRACKED_CONTEXT_REGS - 1);
}
void si_install_draw_wrapper(struct si_context *sctx, pipe_draw_vbo_func wrapper,
@ -512,17 +512,14 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
si_mark_atom_dirty(ctx, &ctx->atoms.s.vgt_pipeline_state);
si_mark_atom_dirty(ctx, &ctx->atoms.s.tess_io_layout);
if (has_clear_state) {
/* Set all register values to unknown. */
BITSET_ZERO(ctx->tracked_regs.reg_saved_mask);
if (has_clear_state)
si_set_tracked_regs_to_clear_state(ctx);
} else {
/* Set all register values to unknown. */
ctx->tracked_regs.context_reg_saved_mask = 0;
}
/* 0xffffffff is an impossible value to register SPI_PS_INPUT_CNTL_n */
/* 0xffffffff is an impossible value for SPI_PS_INPUT_CNTL_n registers */
memset(ctx->tracked_regs.spi_ps_input_cntl, 0xff, sizeof(uint32_t) * 32);
ctx->tracked_regs.other_reg_saved_mask = 0; /* unknown values */
}
/* Invalidate various draw states so that they are emitted before

View file

@ -9,6 +9,7 @@
#include "si_pm4.h"
#include "util/format/u_format.h"
#include "util/bitset.h"
#ifdef __cplusplus
extern "C" {
@ -237,9 +238,10 @@ struct si_shader_data {
uint32_t sh_base[SI_NUM_SHADERS];
};
/* Context registers whose values are tracked by si_context. */
enum si_tracked_context_reg
/* Registers whose values are tracked by si_context. */
enum si_tracked_reg
{
/* CONTEXT registers. */
/* 2 consecutive registers */
SI_TRACKED_DB_RENDER_CONTROL,
SI_TRACKED_DB_COUNT_CONTROL,
@ -326,11 +328,10 @@ enum si_tracked_context_reg
SI_TRACKED_CB_DCC_CONTROL, /* GFX8-xx (TBD) */
SI_NUM_TRACKED_CONTEXT_REGS,
};
SI_FIRST_TRACKED_OTHER_REG = SI_NUM_TRACKED_CONTEXT_REGS,
/* Non-context registers whose values are tracked by si_context. */
enum si_tracked_other_reg {
SI_TRACKED_GE_PC_ALLOC, /* GFX10+ */
/* SH and UCONFIG registers. */
SI_TRACKED_GE_PC_ALLOC = SI_FIRST_TRACKED_OTHER_REG, /* GFX10+ */
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS, /* GFX7+ */
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS, /* GFX10+ */
SI_TRACKED_VGT_GS_OUT_PRIM_TYPE_UCONFIG, /* GFX11+ */
@ -372,7 +373,7 @@ enum si_tracked_other_reg {
SI_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_LO, /* GFX11+ */
SI_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_HI, /* GFX11+ */
SI_NUM_TRACKED_OTHER_REGS,
SI_NUM_ALL_TRACKED_REGS,
};
/* For 3 draw constants: BaseVertex, DrawID, StartInstance */
@ -383,12 +384,9 @@ enum si_tracked_other_reg {
#define BASEVERTEX_DRAWID_STARTINSTANCE_MASK (BASEVERTEX_MASK | DRAWID_MASK | STARTINSTANCE_MASK)
struct si_tracked_regs {
uint64_t context_reg_saved_mask;
uint32_t context_reg_value[SI_NUM_TRACKED_CONTEXT_REGS];
BITSET_DECLARE(reg_saved_mask, SI_NUM_ALL_TRACKED_REGS);
uint32_t reg_value[SI_NUM_ALL_TRACKED_REGS];
uint32_t spi_ps_input_cntl[32];
uint32_t other_reg_saved_mask;
uint32_t other_reg_value[SI_NUM_TRACKED_OTHER_REGS];
};
/* Private read-write buffer slots. */

View file

@ -1041,7 +1041,7 @@ static void si_emit_ia_multi_vgt_param(struct si_context *sctx,
if (GFX_VERSION == GFX9) {
/* Workaround for SpecviewPerf13 Catia hang on GFX9. */
if (prim != sctx->last_prim)
sctx->tracked_regs.other_reg_saved_mask &= ~BITFIELD64_BIT(SI_TRACKED_IA_MULTI_VGT_PARAM_UCONFIG);
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, SI_TRACKED_IA_MULTI_VGT_PARAM_UCONFIG);
radeon_opt_set_uconfig_reg_idx(sctx, GFX_VERSION, R_030960_IA_MULTI_VGT_PARAM,
SI_TRACKED_IA_MULTI_VGT_PARAM_UCONFIG,
@ -1347,8 +1347,9 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
}
/* Invalidate tracked draw constants because DrawIndirect overwrites them. */
sctx->tracked_regs.other_reg_saved_mask &=
~(BASEVERTEX_DRAWID_STARTINSTANCE_MASK << tracked_base_vertex_reg);
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg + 1); /* DrawID */
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg + 2); /* StartInstance */
sctx->last_instance_count = SI_INSTANCE_COUNT_UNKNOWN;
radeon_emit(PKT3(PKT3_SET_BASE, 2, 0));
@ -1460,8 +1461,9 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
*/
if (is_blit) {
/* Re-emit draw constants after we leave u_blitter. */
sctx->tracked_regs.other_reg_saved_mask &=
~(BASEVERTEX_DRAWID_STARTINSTANCE_MASK << tracked_base_vertex_reg);
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg + 1); /* DrawID */
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg + 2); /* StartInstance */
/* Blit VS doesn't use BASE_VERTEX, START_INSTANCE, and DRAWID. */
radeon_set_sh_reg_seq(sh_base_reg + SI_SGPR_VS_BLIT_DATA * 4, sctx->num_vs_blit_sgprs);
@ -1510,8 +1512,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(V_0287F0_DI_SRC_SEL_DMA); /* NOT_EOP disabled */
}
if (num_draws > 1) {
sctx->tracked_regs.other_reg_saved_mask &=
~(BASEVERTEX_DRAWID_MASK << tracked_base_vertex_reg);
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg + 1); /* DrawID */
}
} else {
/* Only DrawID varies. */
@ -1529,8 +1531,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(V_0287F0_DI_SRC_SEL_DMA); /* NOT_EOP disabled */
}
if (num_draws > 1) {
sctx->tracked_regs.other_reg_saved_mask &=
~(DRAWID_MASK << tracked_base_vertex_reg);
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg + 1); /* DrawID */
}
}
} else {
@ -1550,8 +1551,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(V_0287F0_DI_SRC_SEL_DMA); /* NOT_EOP disabled */
}
if (num_draws > 1) {
sctx->tracked_regs.other_reg_saved_mask &=
~(BASEVERTEX_MASK << tracked_base_vertex_reg);
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */
}
} else {
/* DrawID and BaseVertex are constant. */
@ -1594,8 +1594,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque);
}
if (num_draws > 1 && (IS_DRAW_VERTEX_STATE || !sctx->num_vs_blit_sgprs)) {
sctx->tracked_regs.other_reg_saved_mask &=
~(BASEVERTEX_DRAWID_MASK << tracked_base_vertex_reg);
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg + 1); /* DrawID */
}
} else {
for (unsigned i = 0; i < num_draws; i++) {
@ -1607,8 +1607,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque);
}
if (num_draws > 1 && (IS_DRAW_VERTEX_STATE || !sctx->num_vs_blit_sgprs)) {
sctx->tracked_regs.other_reg_saved_mask &=
~(BASEVERTEX_MASK << tracked_base_vertex_reg);
BITSET_CLEAR(sctx->tracked_regs.reg_saved_mask, tracked_base_vertex_reg); /* BaseVertex */
}
}
}