tu: Fix BV/BR race in tu_clone_trace_range when waiting on barrier

Caused hangs in the following CTS test with CB enabled:
dEQP-VK.renderpasses.dynamic_rendering.primary_cmd_buff.random.seed1_geometry_tessellation_multiview

Fixes: 50cc9c723c ("tu/u_trace: Prevent cloning stale RB_DONE_TS results")

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41490>
This commit is contained in:
Danylo Piliaiev 2026-05-11 17:38:59 +02:00 committed by Marge Bot
parent abcb0f6cf5
commit 98e30a598c
2 changed files with 29 additions and 15 deletions

View file

@ -98,6 +98,26 @@ tu_cmd_buffer_status_gpu_write(struct tu_cmd_buffer *cmd_buffer,
tu_cs_emit(cs, (uint32_t)status);
}
static void
tu_wait_for_rb_done(struct tu_cs *cs, uint32_t onchip_addr, uint32_t seqno)
{
tu_cs_emit_pkt7(cs, CP_EVENT_WRITE7, 4);
tu_cs_emit(cs, CP_EVENT_WRITE7_0(
.event = RB_DONE_TS,
.write_src = EV_WRITE_USER_32B,
.write_dst = EV_DST_ONCHIP,
.write_enabled = true).value);
tu_cs_emit_qw(cs, onchip_addr);
tu_cs_emit(cs, seqno);
tu_cs_emit_pkt7(cs, CP_WAIT_REG_MEM, 6);
tu_cs_emit(cs, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) | CP_WAIT_REG_MEM_0_POLL(POLL_ON_CHIP));
tu_cs_emit_qw(cs, onchip_addr);
tu_cs_emit(cs, CP_WAIT_REG_MEM_3_REF(seqno));
tu_cs_emit(cs, CP_WAIT_REG_MEM_4_MASK(~0u));
tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(2));
}
template <chip CHIP>
static void
tu_clone_trace_range(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
@ -117,21 +137,14 @@ tu_clone_trace_range(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
static uint32_t seqno = 0;
uint32_t value = p_atomic_add_return(&seqno, 1);
tu_cs_emit_pkt7(cs, CP_EVENT_WRITE7, 4);
tu_cs_emit(cs, CP_EVENT_WRITE7_0(
.event = RB_DONE_TS,
.write_src = EV_WRITE_USER_32B,
.write_dst = EV_DST_ONCHIP,
.write_enabled = true).value);
tu_cs_emit_qw(cs, TU_ONCHIP_U_TRACE_BARRIER);
tu_cs_emit(cs, value);
/* BV can race BR so we cannot wait on the same addr in both since it can be overwritten. */
tu_cond_exec_start(cs, CP_COND_REG_EXEC_0_MODE(THREAD_MODE) | CP_COND_REG_EXEC_0_BR);
tu_wait_for_rb_done(cs, TU_ONCHIP_BR_U_TRACE_BARRIER, value);
tu_cond_exec_end(cs);
tu_cs_emit_pkt7(cs, CP_WAIT_REG_MEM, 6);
tu_cs_emit(cs, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) | CP_WAIT_REG_MEM_0_POLL(POLL_ON_CHIP));
tu_cs_emit_qw(cs, TU_ONCHIP_U_TRACE_BARRIER);
tu_cs_emit(cs, CP_WAIT_REG_MEM_3_REF(value));
tu_cs_emit(cs, CP_WAIT_REG_MEM_4_MASK(~0u));
tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(2));
tu_cond_exec_start(cs, CP_COND_REG_EXEC_0_MODE(THREAD_MODE) | CP_COND_REG_EXEC_0_BV);
tu_wait_for_rb_done(cs, TU_ONCHIP_BV_U_TRACE_BARRIER, value);
tu_cond_exec_end(cs);
}
tu_cs_emit_wfi(cs);

View file

@ -172,7 +172,8 @@ enum tu_onchip_addr {
TU_ONCHIP_BARRIER,
TU_ONCHIP_CB_RESLIST_OVERFLOW,
TU_ONCHIP_U_TRACE_BARRIER,
TU_ONCHIP_BR_U_TRACE_BARRIER,
TU_ONCHIP_BV_U_TRACE_BARRIER,
/* Registers 8-15 are defined by firmware to be split between BR and BV.
* Each has their own copy.