radeonsi: add a new PM4 helper radeon_event_write

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31168>
This commit is contained in:
Marek Olšák 2024-08-06 16:23:01 -04:00
parent 434eddd422
commit 1a1138817c
9 changed files with 74 additions and 118 deletions

View file

@ -503,6 +503,17 @@
} \
} while (0)
/* Other packet helpers. */
#define radeon_event_write(event_type) do { \
unsigned __event_type = (event_type); \
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); \
radeon_emit(EVENT_TYPE(__event_type) | \
EVENT_INDEX(__event_type == V_028A90_VS_PARTIAL_FLUSH || \
__event_type == V_028A90_PS_PARTIAL_FLUSH || \
__event_type == V_028A90_CS_PARTIAL_FLUSH ? 4 : \
__event_type == V_028A90_PIXEL_PIPE_STAT_CONTROL ? 1 : 0)); \
} while (0)
/* This should be evaluated at compile time if all parameters are constants. */
static ALWAYS_INLINE unsigned
si_get_user_data_base(enum amd_gfx_level gfx_level, enum si_has_tess has_tess,

View file

@ -1117,10 +1117,9 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_
radeon_emit(dispatch_initiator);
}
if (unlikely(sctx->sqtt_enabled && sctx->gfx_level >= GFX9)) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0));
}
if (unlikely(sctx->sqtt_enabled && sctx->gfx_level >= GFX9))
radeon_event_write(V_028A90_THREAD_TRACE_MARKER);
radeon_end();
}

View file

@ -160,8 +160,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h
*/
if ((ctx->gfx_level == GFX11 || ctx->gfx_level == GFX11_5) && ctx->has_tessellation) {
radeon_begin(cs);
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_SQ_NON_EVENT) | EVENT_INDEX(0));
radeon_event_write(V_028A90_SQ_NON_EVENT);
radeon_end();
}
@ -796,10 +795,8 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
radeon_begin(cs);
if (flags & SI_CONTEXT_VGT_FLUSH) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
}
if (flags & SI_CONTEXT_VGT_FLUSH)
radeon_event_write(V_028A90_VGT_FLUSH);
if (flags & SI_CONTEXT_INV_ICACHE)
gcr_cntl |= S_586_GLI_INV(V_586_GLI_ALL);
@ -836,19 +833,15 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
}
if (flags & (SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB)) {
if (ctx->gfx_level < GFX12 && flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
/* Flush CMASK/FMASK/DCC. Will wait for idle later. */
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
}
if (ctx->gfx_level < GFX12 && flags & SI_CONTEXT_FLUSH_AND_INV_CB)
radeon_event_write(V_028A90_FLUSH_AND_INV_CB_META);
/* Gfx11 can't flush DB_META and should use a TS event instead. */
if (ctx->gfx_level < GFX12 && ctx->gfx_level != GFX11 &&
flags & SI_CONTEXT_FLUSH_AND_INV_DB) {
/* Flush HTILE. Will wait for idle later. */
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
}
if (ctx->gfx_level < GFX12 && ctx->gfx_level != GFX11 &&
flags & SI_CONTEXT_FLUSH_AND_INV_DB)
radeon_event_write(V_028A90_FLUSH_AND_INV_DB_META);
/* First flush CB/DB, then L1/L2. */
gcr_cntl |= S_586_SEQ(V_586_SEQ_FORWARD);
@ -869,21 +862,18 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
} else {
/* Wait for graphics shaders to go idle if requested. */
if (flags & SI_CONTEXT_PS_PARTIAL_FLUSH) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
radeon_event_write(V_028A90_PS_PARTIAL_FLUSH);
/* Only count explicit shader flushes, not implicit ones. */
ctx->num_vs_flushes++;
ctx->num_ps_flushes++;
} else if (flags & SI_CONTEXT_VS_PARTIAL_FLUSH) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
radeon_event_write(V_028A90_VS_PARTIAL_FLUSH);
ctx->num_vs_flushes++;
}
}
if (flags & SI_CONTEXT_CS_PARTIAL_FLUSH && ctx->compute_is_busy) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4)));
radeon_event_write(V_028A90_CS_PARTIAL_FLUSH);
ctx->num_cs_flushes++;
ctx->compute_is_busy = false;
}
@ -1025,12 +1015,10 @@ void gfx10_emit_cache_flush(struct si_context *ctx, struct radeon_cmdbuf *cs)
}
if (flags & SI_CONTEXT_START_PIPELINE_STATS && ctx->pipeline_stats_enabled != 1) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_PIPELINESTAT_START) | EVENT_INDEX(0));
radeon_event_write(V_028A90_PIPELINESTAT_START);
ctx->pipeline_stats_enabled = 1;
} else if (flags & SI_CONTEXT_STOP_PIPELINE_STATS && ctx->pipeline_stats_enabled != 0) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0));
radeon_event_write(V_028A90_PIPELINESTAT_STOP);
ctx->pipeline_stats_enabled = 0;
}
radeon_end();
@ -1091,16 +1079,13 @@ void gfx6_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs)
radeon_begin(cs);
if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
/* Flush CMASK/FMASK/DCC. SURFACE_SYNC will wait for idle. */
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
}
if (flags & (SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_FLUSH_AND_INV_DB_META)) {
if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
radeon_event_write(V_028A90_FLUSH_AND_INV_CB_META);
/* Flush HTILE. SURFACE_SYNC will wait for idle. */
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
}
if (flags & (SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_FLUSH_AND_INV_DB_META))
radeon_event_write(V_028A90_FLUSH_AND_INV_DB_META);
/* Wait for shader engines to go idle.
* VS and PS waits are unnecessary if SURFACE_SYNC is going to wait
@ -1108,36 +1093,28 @@ void gfx6_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs)
*/
if (!flush_cb_db) {
if (flags & SI_CONTEXT_PS_PARTIAL_FLUSH) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
/* Only count explicit shader flushes, not implicit ones
* done by SURFACE_SYNC.
*/
radeon_event_write(V_028A90_PS_PARTIAL_FLUSH);
/* Only count explicit shader flushes, not implicit ones done by SURFACE_SYNC. */
sctx->num_vs_flushes++;
sctx->num_ps_flushes++;
} else if (flags & SI_CONTEXT_VS_PARTIAL_FLUSH) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
radeon_event_write(V_028A90_VS_PARTIAL_FLUSH);
sctx->num_vs_flushes++;
}
}
if (flags & SI_CONTEXT_CS_PARTIAL_FLUSH && sctx->compute_is_busy) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
radeon_event_write(V_028A90_CS_PARTIAL_FLUSH);
sctx->num_cs_flushes++;
sctx->compute_is_busy = false;
}
/* VGT state synchronization. */
if (flags & SI_CONTEXT_VGT_FLUSH) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
}
if (flags & SI_CONTEXT_VGT_STREAMOUT_SYNC) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0));
}
if (flags & SI_CONTEXT_VGT_FLUSH)
radeon_event_write(V_028A90_VGT_FLUSH);
if (flags & SI_CONTEXT_VGT_STREAMOUT_SYNC)
radeon_event_write(V_028A90_VGT_STREAMOUT_SYNC);
radeon_end();
@ -1266,14 +1243,12 @@ void gfx6_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs)
if (flags & SI_CONTEXT_START_PIPELINE_STATS && sctx->pipeline_stats_enabled != 1) {
radeon_begin(cs);
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_PIPELINESTAT_START) | EVENT_INDEX(0));
radeon_event_write(V_028A90_PIPELINESTAT_START);
radeon_end();
sctx->pipeline_stats_enabled = 1;
} else if (flags & SI_CONTEXT_STOP_PIPELINE_STATS && sctx->pipeline_stats_enabled != 0) {
radeon_begin(cs);
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0));
radeon_event_write(V_028A90_PIPELINESTAT_STOP);
radeon_end();
sctx->pipeline_stats_enabled = 0;
}

View file

@ -114,8 +114,7 @@ static void si_pc_emit_start(struct si_context *sctx, struct si_resource *buffer
radeon_begin(cs);
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET));
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0));
radeon_event_write(V_028A90_PERFCOUNTER_STOP);
radeon_set_uconfig_reg(R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_START_COUNTING));
radeon_end();
@ -132,13 +131,10 @@ static void si_pc_emit_stop(struct si_context *sctx, struct si_resource *buffer,
si_cp_wait_mem(sctx, cs, va, 0, 0xffffffff, WAIT_REG_MEM_EQUAL);
radeon_begin(cs);
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
radeon_event_write(V_028A90_PERFCOUNTER_SAMPLE);
if (!sctx->screen->info.never_send_perfcounter_stop) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
}
if (!sctx->screen->info.never_send_perfcounter_stop)
radeon_event_write(V_028A90_PERFCOUNTER_STOP);
radeon_set_uconfig_reg(
R_036020_CP_PERFMON_CNTL,
@ -158,8 +154,7 @@ void si_pc_emit_spm_start(struct radeon_cmdbuf *cs)
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
S_036020_SPM_PERFMON_STATE(V_036020_STRM_PERFMON_STATE_START_COUNTING));
/* Start windowed performance counters. */
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0));
radeon_event_write(V_028A90_PERFCOUNTER_START);
radeon_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(1));
radeon_end();
@ -171,10 +166,8 @@ void si_pc_emit_spm_stop(struct radeon_cmdbuf *cs, bool never_stop_sq_perf_count
radeon_begin(cs);
/* Stop windowed performance counters. */
if (!never_send_perfcounter_stop) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
}
if (!never_send_perfcounter_stop)
radeon_event_write(V_028A90_PERFCOUNTER_STOP);
radeon_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(0));

View file

@ -980,8 +980,7 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx, struct si_query_hw
radeon_begin(cs);
if (sctx->screen->use_ngg && query->flags & SI_QUERY_EMULATE_GS_COUNTERS) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
radeon_event_write(V_028A90_VS_PARTIAL_FLUSH);
if (--sctx->num_pipeline_stat_emulated_queries == 0) {
si_set_internal_shader_buffer(sctx, SI_GS_QUERY_BUF, NULL);

View file

@ -62,8 +62,7 @@ static void si_emit_cb_render_state(struct si_context *sctx, unsigned index)
sctx->last_cb_target_mask = cb_target_mask;
radeon_begin(cs);
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
radeon_event_write(V_028A90_BREAK_BATCH);
radeon_end();
}
@ -3181,10 +3180,9 @@ static void gfx6_emit_framebuffer_state(struct si_context *sctx, unsigned index)
S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
if (sctx->screen->dpbb_allowed &&
sctx->screen->pbb_context_states_per_bin > 1) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
}
sctx->screen->pbb_context_states_per_bin > 1)
radeon_event_write(V_028A90_BREAK_BATCH);
radeon_end();
si_update_display_dcc_dirty(sctx);
@ -3331,10 +3329,9 @@ static void gfx11_dgpu_emit_framebuffer_state(struct si_context *sctx, unsigned
gfx11_end_packed_context_regs();
if (sctx->screen->dpbb_allowed &&
sctx->screen->pbb_context_states_per_bin > 1) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
}
sctx->screen->pbb_context_states_per_bin > 1)
radeon_event_write(V_028A90_BREAK_BATCH);
radeon_end();
si_update_display_dcc_dirty(sctx);
@ -3468,10 +3465,9 @@ static void gfx12_emit_framebuffer_state(struct si_context *sctx, unsigned index
gfx12_end_context_regs();
if (sctx->screen->dpbb_allowed &&
sctx->screen->pbb_context_states_per_bin > 1) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
}
sctx->screen->pbb_context_states_per_bin > 1)
radeon_event_write(V_028A90_BREAK_BATCH);
radeon_end();
sctx->framebuffer.dirty_cbufs = 0;

View file

@ -1212,16 +1212,6 @@ void si_emit_buffered_compute_sh_regs(struct si_context *sctx)
#endif
#define EMIT_SQTT_END_DRAW \
do { \
if (GFX_VERSION >= GFX9 && unlikely(sctx->sqtt_enabled)) { \
radeon_begin(&sctx->gfx_cs); \
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); \
radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0)); \
radeon_end(); \
} \
} while (0)
template <amd_gfx_level GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG,
si_is_draw_vertex_state IS_DRAW_VERTEX_STATE, si_has_sh_pairs_packed HAS_SH_PAIRS_PACKED> ALWAYS_INLINE
static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw_info *info,
@ -1638,10 +1628,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit(0);
radeon_emit(V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque);
for (unsigned i = 0; i < 3; i++) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_SQ_NON_EVENT) | EVENT_INDEX(0));
}
for (unsigned i = 0; i < 3; i++)
radeon_event_write(V_028A90_SQ_NON_EVENT);
} else if (increment_draw_id) {
for (unsigned i = 0; i < num_draws; i++) {
if (i > 0) {
@ -1675,9 +1663,10 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
}
}
}
radeon_end();
EMIT_SQTT_END_DRAW;
if (GFX_VERSION >= GFX9 && unlikely(sctx->sqtt_enabled))
radeon_event_write(V_028A90_THREAD_TRACE_MARKER);
radeon_end();
}
/* Return false if not bound. */

View file

@ -4093,12 +4093,10 @@ static void si_emit_vgt_flush(struct radeon_cmdbuf *cs)
radeon_begin(cs);
/* This is required before VGT_FLUSH. */
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
radeon_event_write(V_028A90_VS_PARTIAL_FLUSH);
/* VGT_FLUSH is required even if VGT is idle. It resets VGT pointers. */
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
radeon_event_write(V_028A90_VGT_FLUSH);
radeon_end();
}
@ -5009,11 +5007,8 @@ static void si_emit_spi_ge_ring_state(struct si_context *sctx, unsigned index)
radeon_begin(&sctx->gfx_cs);
/* Required before writing tessellation config registers. */
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
radeon_event_write(V_028A90_VS_PARTIAL_FLUSH);
radeon_event_write(V_028A90_VGT_FLUSH);
if (sctx->gfx_level >= GFX7) {
radeon_set_uconfig_reg_seq(R_030938_VGT_TF_RING_SIZE, 3);

View file

@ -259,8 +259,7 @@ static void si_flush_vgt_streamout(struct si_context *sctx)
radeon_set_config_reg(reg_strmout_cntl, 0);
}
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
radeon_event_write(V_028A90_SO_VGTSTREAMOUT_FLUSH);
radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
radeon_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */