amd,radv,radeonsi: add and use more ac_cmdbuf_XXX helpers

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37956>
This commit is contained in:
Samuel Pitoiset 2025-10-20 10:47:50 +02:00 committed by Marge Bot
parent a4a834859e
commit bc1080e27f
4 changed files with 64 additions and 58 deletions

View file

@ -81,11 +81,36 @@ struct ac_cmdbuf {
#define ac_cmdbuf_set_uconfig_reg(reg, value) __ac_cmdbuf_set_reg(reg, 0, value, CIK_UCONFIG, PKT3_SET_UCONFIG_REG)
/*
* On GFX10, there is a bug with the ME implementation of its content
* addressable memory (CAM), that means that it can skip register writes due
* to not taking correctly into account the fields from the GRBM_GFX_INDEX.
* With this __filter_cam_workaround bit we can force the write.
*/
#define ac_cmdbuf_set_uconfig_perfctr_reg_seq(gfx_level, ip_type, reg, num) \
do { \
const bool __filter_cam_workaround = (gfx_level) >= GFX10 && (ip_type) == AMD_IP_GFX; \
__ac_cmdbuf_set_reg_seq(reg, num, 0, CIK_UCONFIG, PKT3_SET_UCONFIG_REG, __filter_cam_workaround); \
} while (0)
#define ac_cmdbuf_set_uconfig_perfctr_reg(gfx_level, ip_type, reg, value) \
do { \
ac_cmdbuf_set_uconfig_perfctr_reg_seq(gfx_level, ip_type, reg, 1); \
ac_cmdbuf_emit(value); \
} while (0)
/* Packet building helpers for CONTEXT registers. */
#define ac_cmdbuf_set_context_reg_seq(reg, num) __ac_cmdbuf_set_reg_seq(reg, num, 0, SI_CONTEXT, PKT3_SET_CONTEXT_REG, 0)
#define ac_cmdbuf_set_context_reg(reg, value) __ac_cmdbuf_set_reg(reg, 0, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
#define ac_cmdbuf_set_context_reg_idx(reg, idx, value) __ac_cmdbuf_set_reg(reg, idx, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
/* Packet building helpers for SH registers. */
#define ac_cmdbuf_set_sh_reg_seq(reg, num) __ac_cmdbuf_set_reg_seq(reg, num, 0, SI_SH, PKT3_SET_SH_REG, 0)
#define ac_cmdbuf_set_sh_reg(reg, value) __ac_cmdbuf_set_reg(reg, 0, value, SI_SH, PKT3_SET_SH_REG)
#define ac_cmdbuf_event_write_predicate(event_type, predicate) \
do { \
unsigned __event_type = (event_type); \
@ -99,6 +124,17 @@ struct ac_cmdbuf {
#define ac_cmdbuf_event_write(event_type) ac_cmdbuf_event_write_predicate(event_type, false)
#define ac_cmdbuf_set_privileged_config_reg(reg, value) \
do { \
assert((reg) < CIK_UCONFIG_REG_OFFSET); \
ac_cmdbuf_emit(PKT3(PKT3_COPY_DATA, 4, 0)); \
ac_cmdbuf_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_PERF)); \
ac_cmdbuf_emit(value); \
ac_cmdbuf_emit(0); /* unused */ \
ac_cmdbuf_emit((reg) >> 2); \
ac_cmdbuf_emit(0); /* unused */ \
} while (0)
struct ac_preamble_state {
uint64_t border_color_va;

View file

@ -68,11 +68,11 @@ radeon_check_space(struct radeon_winsys *ws, struct ac_cmdbuf *cs, unsigned need
#define radeon_set_config_reg(reg, value) ac_cmdbuf_set_config_reg(reg, value)
/* Packet building helpers for CONTEXT registers. */
#define radeon_set_context_reg_seq(reg, num) __radeon_set_reg_seq(reg, num, 0, SI_CONTEXT, PKT3_SET_CONTEXT_REG, 0)
#define radeon_set_context_reg_seq(reg, num) ac_cmdbuf_set_context_reg_seq(reg, num)
#define radeon_set_context_reg(reg, value) __radeon_set_reg(reg, 0, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
#define radeon_set_context_reg(reg, value) ac_cmdbuf_set_context_reg(reg, value)
#define radeon_set_context_reg_idx(reg, idx, value) __radeon_set_reg(reg, idx, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
#define radeon_set_context_reg_idx(reg, idx, value) ac_cmdbuf_set_context_reg_idx(reg, idx, value)
#define radeon_opt_set_context_reg(reg, reg_enum, value) \
do { \
@ -154,9 +154,9 @@ radeon_check_space(struct radeon_winsys *ws, struct ac_cmdbuf *cs, unsigned need
} while (0)
/* Packet building helpers for SH registers. */
#define radeon_set_sh_reg_seq(reg, num) __radeon_set_reg_seq(reg, num, 0, SI_SH, PKT3_SET_SH_REG, 0)
#define radeon_set_sh_reg_seq(reg, num) ac_cmdbuf_set_sh_reg_seq(reg, num)
#define radeon_set_sh_reg(reg, value) __radeon_set_reg(reg, 0, value, SI_SH, PKT3_SET_SH_REG)
#define radeon_set_sh_reg(reg, value) ac_cmdbuf_set_sh_reg(reg, value)
#define radeon_set_sh_reg_idx(info, reg, idx, value) \
do { \
@ -168,9 +168,9 @@ radeon_check_space(struct radeon_winsys *ws, struct ac_cmdbuf *cs, unsigned need
} while (0)
/* Packet building helpers for UCONFIG registers. */
#define radeon_set_uconfig_reg_seq(reg, num) __radeon_set_reg_seq(reg, num, 0, CIK_UCONFIG, PKT3_SET_UCONFIG_REG, 0)
#define radeon_set_uconfig_reg_seq(reg, num) ac_cmdbuf_set_uconfig_reg_seq(reg, num)
#define radeon_set_uconfig_reg(reg, value) __radeon_set_reg(reg, 0, value, CIK_UCONFIG, PKT3_SET_UCONFIG_REG)
#define radeon_set_uconfig_reg(reg, value) ac_cmdbuf_set_uconfig_reg(reg, value)
#define radeon_set_uconfig_reg_idx(info, reg, idx, value) \
do { \
@ -181,33 +181,13 @@ radeon_check_space(struct radeon_winsys *ws, struct ac_cmdbuf *cs, unsigned need
__radeon_set_reg(reg, idx, value, CIK_UCONFIG, __opcode); \
} while (0)
/*
* On GFX10, there is a bug with the ME implementation of its content addressable memory (CAM),
* that means that it can skip register writes due to not taking correctly into account the
* fields from the GRBM_GFX_INDEX. With this bit we can force the write.
*/
#define radeon_set_uconfig_perfctr_reg_seq(gfx_level, ring, reg, num) \
do { \
const bool __filter_cam_workaround = (gfx_level) >= GFX10 && (ring) == AMD_IP_GFX; \
__radeon_set_reg_seq(reg, num, 0, CIK_UCONFIG, PKT3_SET_UCONFIG_REG, __filter_cam_workaround); \
} while (0)
#define radeon_set_uconfig_perfctr_reg_seq(gfx_level, ip_type, reg, num) \
ac_cmdbuf_set_uconfig_perfctr_reg_seq(gfx_level, ip_type, reg, num)
#define radeon_set_uconfig_perfctr_reg(gfx_level, ring, reg, value) \
do { \
radeon_set_uconfig_perfctr_reg_seq(gfx_level, ring, reg, 1); \
radeon_emit(value); \
} while (0)
#define radeon_set_uconfig_perfctr_reg(gfx_level, ip_type, reg, value) \
ac_cmdbuf_set_uconfig_perfctr_reg(gfx_level, ip_type, reg, value)
#define radeon_set_privileged_config_reg(reg, value) \
do { \
assert((reg) < CIK_UCONFIG_REG_OFFSET); \
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); \
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_PERF)); \
radeon_emit(value); \
radeon_emit(0); /* unused */ \
radeon_emit((reg) >> 2); \
radeon_emit(0); /* unused */ \
} while (0)
#define radeon_set_privileged_config_reg(reg, value) ac_cmdbuf_set_privileged_config_reg(reg, value)
#define radeon_event_write_predicate(event_type, predicate) ac_cmdbuf_event_write_predicate(event_type, predicate)

View file

@ -32,11 +32,8 @@
sctx->context_roll = true; \
} while (0)
#define radeon_emit_array(values, num) do { \
unsigned __n = (num); \
memcpy(__cs_buf + __cs_num, (values), __n * 4); \
__cs_num += __n; \
} while (0)
#define radeon_emit_array(values, num) \
ac_cmdbuf_emit_array(values, num)
/* Instead of writing into the command buffer, return the pointer to the command buffer and
* assume that the caller will fill the specified number of elements.
@ -203,10 +200,10 @@
/* Packet building helpers for CONTEXT registers. */
#define radeon_set_context_reg_seq(reg, num) \
radeon_set_reg_seq(reg, num, 0, SI_CONTEXT, PKT3_SET_CONTEXT_REG, 0)
ac_cmdbuf_set_context_reg_seq(reg, num)
#define radeon_set_context_reg(reg, value) \
radeon_set_reg(reg, 0, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
ac_cmdbuf_set_context_reg(reg, value)
#define radeon_opt_set_context_reg(reg, reg_enum, value) \
radeon_opt_set_reg(reg, reg_enum, 0, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
@ -234,10 +231,10 @@
/* Packet building helpers for SH registers. */
#define radeon_set_sh_reg_seq(reg, num) \
radeon_set_reg_seq(reg, num, 0, SI_SH, PKT3_SET_SH_REG, 0)
ac_cmdbuf_set_sh_reg_seq(reg, num)
#define radeon_set_sh_reg(reg, value) \
radeon_set_reg(reg, 0, value, SI_SH, PKT3_SET_SH_REG)
ac_cmdbuf_set_sh_reg(reg, value)
#define radeon_opt_set_sh_reg(reg, reg_enum, value) \
radeon_opt_set_reg(reg, reg_enum, 0, value, SI_SH, PKT3_SET_SH_REG)
@ -265,15 +262,13 @@
/* Packet building helpers for UCONFIG registers. */
#define radeon_set_uconfig_reg_seq(reg, num) \
radeon_set_reg_seq(reg, num, 0, CIK_UCONFIG, PKT3_SET_UCONFIG_REG, 0)
ac_cmdbuf_set_uconfig_reg_seq(reg, num)
#define radeon_set_uconfig_perfctr_reg_seq(reg, num) \
radeon_set_reg_seq(reg, num, 0, CIK_UCONFIG, PKT3_SET_UCONFIG_REG, \
sctx->gfx_level >= GFX10 && \
sctx->ws->cs_get_ip_type(__rcs) == AMD_IP_GFX)
#define radeon_set_uconfig_perfctr_reg_seq(gfx_level, ip_type, reg, num) \
ac_cmdbuf_set_uconfig_perfctr_reg_seq(gfx_level, ip_type, reg, num)
#define radeon_set_uconfig_reg(reg, value) \
radeon_set_reg(reg, 0, value, CIK_UCONFIG, PKT3_SET_UCONFIG_REG)
ac_cmdbuf_set_uconfig_reg(reg, value)
#define radeon_opt_set_uconfig_reg(reg, reg_enum, value) \
radeon_opt_set_reg(reg, reg_enum, 0, value, CIK_UCONFIG, PKT3_SET_UCONFIG_REG)
@ -288,16 +283,8 @@
#define radeon_opt_set_uconfig_reg_idx(reg, reg_enum, idx, value) \
radeon_opt_set_reg(reg, reg_enum, idx, value, CIK_UCONFIG, RESOLVE_PKT3_SET_UCONFIG_REG_INDEX)
#define radeon_set_privileged_config_reg(reg, value) do { \
assert((reg) < CIK_UCONFIG_REG_OFFSET); \
radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); \
radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | \
COPY_DATA_DST_SEL(COPY_DATA_PERF)); \
radeon_emit(value); \
radeon_emit(0); /* unused */ \
radeon_emit((reg) >> 2); \
radeon_emit(0); /* unused */ \
} while (0)
#define radeon_set_privileged_config_reg(reg, value) \
ac_cmdbuf_set_privileged_config_reg(reg, value)
/* GFX11 generic packet building helpers for buffered SH registers. Don't use these directly. */
#define gfx11_push_reg(reg, value, prefix_name, buffer, reg_count) do { \

View file

@ -506,6 +506,7 @@ static void si_emit_sqtt_userdata(struct si_context *sctx,
struct radeon_cmdbuf *cs, const void *data,
uint32_t num_dwords)
{
const enum amd_ip_type ip_type = sctx->ws->cs_get_ip_type(cs);
const uint32_t *dwords = (uint32_t *)data;
radeon_begin(cs);
@ -513,7 +514,9 @@ static void si_emit_sqtt_userdata(struct si_context *sctx,
while (num_dwords > 0) {
uint32_t count = MIN2(num_dwords, 2);
radeon_set_uconfig_perfctr_reg_seq(R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
radeon_set_uconfig_perfctr_reg_seq(sctx->gfx_level, ip_type,
R_030D08_SQ_THREAD_TRACE_USERDATA_2,
count);
radeon_emit_array(dwords, count);
dwords += count;