diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 096ad1db7ee..1905e41aac9 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -98,6 +98,26 @@ tu_cmd_buffer_status_gpu_write(struct tu_cmd_buffer *cmd_buffer, tu_cs_emit(cs, (uint32_t)status); } +static void +tu_wait_for_rb_done(struct tu_cs *cs, uint32_t onchip_addr, uint32_t seqno) +{ + tu_cs_emit_pkt7(cs, CP_EVENT_WRITE7, 4); + tu_cs_emit(cs, CP_EVENT_WRITE7_0( + .event = RB_DONE_TS, + .write_src = EV_WRITE_USER_32B, + .write_dst = EV_DST_ONCHIP, + .write_enabled = true).value); + tu_cs_emit_qw(cs, onchip_addr); + tu_cs_emit(cs, seqno); + + tu_cs_emit_pkt7(cs, CP_WAIT_REG_MEM, 6); + tu_cs_emit(cs, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) | CP_WAIT_REG_MEM_0_POLL(POLL_ON_CHIP)); + tu_cs_emit_qw(cs, onchip_addr); + tu_cs_emit(cs, CP_WAIT_REG_MEM_3_REF(seqno)); + tu_cs_emit(cs, CP_WAIT_REG_MEM_4_MASK(~0u)); + tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(2)); +} + template static void tu_clone_trace_range(struct tu_cmd_buffer *cmd, struct tu_cs *cs, @@ -117,21 +137,14 @@ tu_clone_trace_range(struct tu_cmd_buffer *cmd, struct tu_cs *cs, static uint32_t seqno = 0; uint32_t value = p_atomic_add_return(&seqno, 1); - tu_cs_emit_pkt7(cs, CP_EVENT_WRITE7, 4); - tu_cs_emit(cs, CP_EVENT_WRITE7_0( - .event = RB_DONE_TS, - .write_src = EV_WRITE_USER_32B, - .write_dst = EV_DST_ONCHIP, - .write_enabled = true).value); - tu_cs_emit_qw(cs, TU_ONCHIP_U_TRACE_BARRIER); - tu_cs_emit(cs, value); + /* BV can race BR so we cannot wait on the same addr in both since it can be overwritten. */ + tu_cond_exec_start(cs, CP_COND_REG_EXEC_0_MODE(THREAD_MODE) | CP_COND_REG_EXEC_0_BR); + tu_wait_for_rb_done(cs, TU_ONCHIP_BR_U_TRACE_BARRIER, value); + tu_cond_exec_end(cs); - tu_cs_emit_pkt7(cs, CP_WAIT_REG_MEM, 6); - tu_cs_emit(cs, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) | CP_WAIT_REG_MEM_0_POLL(POLL_ON_CHIP)); - tu_cs_emit_qw(cs, TU_ONCHIP_U_TRACE_BARRIER); - tu_cs_emit(cs, CP_WAIT_REG_MEM_3_REF(value)); - tu_cs_emit(cs, CP_WAIT_REG_MEM_4_MASK(~0u)); - tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(2)); + tu_cond_exec_start(cs, CP_COND_REG_EXEC_0_MODE(THREAD_MODE) | CP_COND_REG_EXEC_0_BV); + tu_wait_for_rb_done(cs, TU_ONCHIP_BV_U_TRACE_BARRIER, value); + tu_cond_exec_end(cs); } tu_cs_emit_wfi(cs); diff --git a/src/freedreno/vulkan/tu_common.h b/src/freedreno/vulkan/tu_common.h index 18af24c00cf..afb180794a3 100644 --- a/src/freedreno/vulkan/tu_common.h +++ b/src/freedreno/vulkan/tu_common.h @@ -172,7 +172,8 @@ enum tu_onchip_addr { TU_ONCHIP_BARRIER, TU_ONCHIP_CB_RESLIST_OVERFLOW, - TU_ONCHIP_U_TRACE_BARRIER, + TU_ONCHIP_BR_U_TRACE_BARRIER, + TU_ONCHIP_BV_U_TRACE_BARRIER, /* Registers 8-15 are defined by firmware to be split between BR and BV. * Each has their own copy.