amd,radv,radeonsi: add ac_emit_cp_pfp_sync_me()

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37813>
This commit is contained in:
Samuel Pitoiset 2025-10-10 12:13:08 +02:00 committed by Marge Bot
parent 7ead034a06
commit 88f53906d8
6 changed files with 27 additions and 27 deletions

View file

@ -1060,3 +1060,12 @@ ac_emit_cp_copy_data(struct ac_cmdbuf *cs, uint32_t src_sel, uint32_t dst_sel,
ac_cmdbuf_emit(dst_va >> 32);
ac_cmdbuf_end();
}
void
ac_emit_cp_pfp_sync_me(struct ac_cmdbuf *cs)
{
ac_cmdbuf_begin(cs);
ac_cmdbuf_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0));
ac_cmdbuf_emit(0);
ac_cmdbuf_end();
}

View file

@ -131,6 +131,9 @@ ac_emit_cp_copy_data(struct ac_cmdbuf *cs, uint32_t src_sel, uint32_t dst_sel,
uint64_t src_va, uint64_t dst_va,
enum ac_cp_copy_data_flags flags);
void
ac_emit_cp_pfp_sync_me(struct ac_cmdbuf *cs);
#ifdef __cplusplus
}
#endif

View file

@ -5074,10 +5074,7 @@ radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct rad
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_REG, va, reg >> 2,
(reg_count == 2 ? AC_CP_COPY_DATA_COUNT_SEL : 0));
radeon_begin(cs);
radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(0);
radeon_end();
ac_emit_cp_pfp_sync_me(cs->b);
}
}
@ -14656,10 +14653,7 @@ radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64_t va
ac_emit_cp_copy_data(cs->b, COPY_DATA_SRC_MEM, COPY_DATA_DST_MEM, va, emulated_va, AC_CP_COPY_DATA_WR_CONFIRM);
radeon_begin(cs);
radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(0);
radeon_end();
ac_emit_cp_pfp_sync_me(cs->b);
pred_op = PREDICATION_OP_BOOL64;
@ -15092,10 +15086,9 @@ radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_d
/* Emitting a COPY_DATA packet should be enough because RADV doesn't support preemption
* (shadow memory) but for unknown reasons, it can lead to GPU hangs on GFX10+.
*/
radeon_begin(cs);
radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(0);
ac_emit_cp_pfp_sync_me(cs->b);
radeon_begin(cs);
radeon_emit(PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0));
radeon_emit(draw_info->strmout_va);
radeon_emit(draw_info->strmout_va >> 32);

View file

@ -289,11 +289,11 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev
}
}
radeon_begin(cs);
/* VGT state sync */
if (flush_bits & RADV_CMD_FLAG_VGT_FLUSH) {
radeon_begin(cs);
radeon_event_write(V_028A90_VGT_FLUSH);
radeon_end();
}
/* Ignore fields that only modify the behavior of other fields. */
@ -302,6 +302,7 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev
* The cache flush is executed in the ME, but the PFP waits
* for completion.
*/
radeon_begin(cs);
radeon_emit(PKT3(PKT3_ACQUIRE_MEM, 6, 0));
radeon_emit(0); /* CP_COHER_CNTL */
radeon_emit(0xffffffff); /* CP_COHER_SIZE */
@ -310,16 +311,18 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev
radeon_emit(0); /* CP_COHER_BASE_HI */
radeon_emit(0x0000000A); /* POLL_INTERVAL */
radeon_emit(gcr_cntl); /* GCR_CNTL */
radeon_end();
} else if ((cb_db_event || (flush_bits & (RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
RADV_CMD_FLAG_CS_PARTIAL_FLUSH))) &&
!is_mec) {
/* We need to ensure that PFP waits as well. */
radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(0);
ac_emit_cp_pfp_sync_me(cs->b);
*sqtt_flush_bits |= RGP_FLUSH_PFP_SYNC_ME;
}
radeon_begin(cs);
if (flush_bits & RADV_CMD_FLAG_START_PIPELINE_STATS) {
if (!is_mec) {
radeon_event_write(V_028A90_PIPELINESTAT_START);
@ -485,10 +488,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e
if ((cp_coher_cntl || (flush_bits & (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2))) &&
!is_mec) {
radeon_begin(cs);
radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(0);
radeon_end();
ac_emit_cp_pfp_sync_me(cs->b);
*sqtt_flush_bits |= RGP_FLUSH_PFP_SYNC_ME;
}

View file

@ -603,11 +603,9 @@ radv_pc_wait_idle(struct radv_cmd_buffer *cmd_buffer)
radeon_emit(0); /* CP_COHER_BASE_HI */
radeon_emit(0x0000000A); /* POLL_INTERVAL */
radeon_emit(0); /* GCR_CNTL */
radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(0);
radeon_end();
ac_emit_cp_pfp_sync_me(cs->b);
}
static void

View file

@ -101,8 +101,5 @@ void si_cp_acquire_mem(struct si_context *sctx, struct radeon_cmdbuf *cs, unsign
void si_cp_pfp_sync_me(struct radeon_cmdbuf *cs)
{
radeon_begin(cs);
radeon_emit(PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(0);
radeon_end();
ac_emit_cp_pfp_sync_me(&cs->current);
}