mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 13:48:06 +02:00
tu/u_trace: Prevent cloning stale RB_DONE_TS results
Otherwise, at best, we get results for the previous tile. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41102>
This commit is contained in:
parent
6ac25aac23
commit
50cc9c723c
2 changed files with 41 additions and 15 deletions
|
|
@ -95,6 +95,7 @@ tu_cmd_buffer_status_gpu_write(struct tu_cmd_buffer *cmd_buffer,
|
|||
tu_cs_emit(cs, (uint32_t)status);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
tu_clone_trace_range(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
struct u_trace *dst,
|
||||
|
|
@ -103,17 +104,44 @@ tu_clone_trace_range(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
if (u_trace_iterator_equal(begin, end))
|
||||
return;
|
||||
|
||||
/* The only way to wait for tracepoint's RB_DONE_TS completion on A7XX+ is to wait
|
||||
* on a value it written, however neither we know the value tracepoint writes,
|
||||
* nor we can rely on previous value being zero. So we have to issue our own
|
||||
* RB_DONE_TS with known value and wait for it.
|
||||
*/
|
||||
/* TODO: Maybe we can do this only when we copy from memory written by RB_DONE_TS? */
|
||||
if constexpr (CHIP >= A7XX) {
|
||||
static uint32_t seqno = 0;
|
||||
uint32_t value = p_atomic_add_return(&seqno, 1);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_EVENT_WRITE7, 4);
|
||||
tu_cs_emit(cs, CP_EVENT_WRITE7_0(
|
||||
.event = RB_DONE_TS,
|
||||
.write_src = EV_WRITE_USER_32B,
|
||||
.write_dst = EV_DST_ONCHIP,
|
||||
.write_enabled = true).value);
|
||||
tu_cs_emit_qw(cs, TU_ONCHIP_U_TRACE_BARRIER);
|
||||
tu_cs_emit(cs, value);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_WAIT_REG_MEM, 6);
|
||||
tu_cs_emit(cs, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) | CP_WAIT_REG_MEM_0_POLL(POLL_ON_CHIP));
|
||||
tu_cs_emit_qw(cs, TU_ONCHIP_U_TRACE_BARRIER);
|
||||
tu_cs_emit(cs, CP_WAIT_REG_MEM_3_REF(value));
|
||||
tu_cs_emit(cs, CP_WAIT_REG_MEM_4_MASK(~0u));
|
||||
tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(2));
|
||||
}
|
||||
|
||||
tu_cs_emit_wfi(cs);
|
||||
tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
|
||||
u_trace_clone_append(begin, end, dst, cs, tu_copy_buffer);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
tu_clone_trace(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
struct u_trace *dst, struct u_trace *src)
|
||||
{
|
||||
tu_clone_trace_range(cmd, cs, dst, u_trace_begin_iterator(src),
|
||||
u_trace_end_iterator(src));
|
||||
tu_clone_trace_range<CHIP>(cmd, cs, dst, u_trace_begin_iterator(src), u_trace_end_iterator(src));
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
@ -2683,8 +2711,7 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
else
|
||||
trace_end_binning_ib(&cmd->trace, cs);
|
||||
|
||||
tu_clone_trace_range(cmd, cs, &cmd->trace, cmd->trace_renderpass_start,
|
||||
u_trace_end_iterator(&cmd->rp_trace));
|
||||
tu_clone_trace_range<CHIP>(cmd, cs, &cmd->trace, cmd->trace_renderpass_start, u_trace_end_iterator(&cmd->rp_trace));
|
||||
|
||||
/* switching from binning pass to GMEM pass will cause a switch from
|
||||
* PROGRAM_BINNING to PROGRAM, which invalidates const state (XS_CONST states)
|
||||
|
|
@ -3671,8 +3698,7 @@ tu6_render_tile(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
|
||||
tu_cs_emit_call(cs, &cmd->tile_store_cs);
|
||||
|
||||
tu_clone_trace_range(cmd, cs, &cmd->trace, cmd->trace_renderpass_start,
|
||||
u_trace_end_iterator(&cmd->rp_trace));
|
||||
tu_clone_trace_range<CHIP>(cmd, cs, &cmd->trace, cmd->trace_renderpass_start, u_trace_end_iterator(&cmd->rp_trace));
|
||||
tu_cs_emit_wfi(cs);
|
||||
|
||||
tu_set_render_mode<CHIP>(cs, {RM6_BIN_RENDER_END});
|
||||
|
|
@ -3975,9 +4001,8 @@ tu_cmd_render_sysmem(struct tu_cmd_buffer *cmd,
|
|||
/* Outside of renderpasses we assume all draw states are disabled. */
|
||||
tu_disable_draw_states(cmd, &cmd->cs);
|
||||
|
||||
tu_clone_trace_range(cmd, &cmd->cs, &cmd->trace,
|
||||
cmd->trace_renderpass_start,
|
||||
u_trace_end_iterator(&cmd->rp_trace));
|
||||
tu_clone_trace_range<CHIP>(cmd, &cmd->cs, &cmd->trace, cmd->trace_renderpass_start,
|
||||
u_trace_end_iterator(&cmd->rp_trace));
|
||||
|
||||
tu_trace_end_render_pass<CHIP>(cmd, false);
|
||||
}
|
||||
|
|
@ -6132,8 +6157,7 @@ tu_append_pre_chain(struct tu_cmd_buffer *cmd,
|
|||
|
||||
tu_render_pass_state_merge(&cmd->state.rp,
|
||||
&secondary->pre_chain.state);
|
||||
tu_clone_trace(cmd, &cmd->draw_cs,
|
||||
&cmd->rp_trace, &secondary->pre_chain.rp_trace);
|
||||
TU_CALLX(cmd->device, tu_clone_trace)(cmd, &cmd->draw_cs, &cmd->rp_trace, &secondary->pre_chain.rp_trace);
|
||||
util_dynarray_append_dynarray(&cmd->fdm_bin_patchpoints,
|
||||
&secondary->pre_chain.fdm_bin_patchpoints);
|
||||
|
||||
|
|
@ -6154,7 +6178,7 @@ tu_append_post_chain(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_add_entries(&cmd->draw_cs, &secondary->draw_cs);
|
||||
tu_cs_add_entries(&cmd->draw_epilogue_cs, &secondary->draw_epilogue_cs);
|
||||
|
||||
tu_clone_trace(cmd, &cmd->draw_cs, &cmd->rp_trace, &secondary->rp_trace);
|
||||
TU_CALLX(cmd->device, tu_clone_trace)(cmd, &cmd->draw_cs, &cmd->rp_trace, &secondary->rp_trace);
|
||||
cmd->state.rp = secondary->state.rp;
|
||||
util_dynarray_append_dynarray(&cmd->fdm_bin_patchpoints,
|
||||
&secondary->fdm_bin_patchpoints);
|
||||
|
|
@ -6173,7 +6197,7 @@ tu_append_pre_post_chain(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_add_entries(&cmd->draw_cs, &secondary->draw_cs);
|
||||
tu_cs_add_entries(&cmd->draw_epilogue_cs, &secondary->draw_epilogue_cs);
|
||||
|
||||
tu_clone_trace(cmd, &cmd->draw_cs, &cmd->rp_trace, &secondary->rp_trace);
|
||||
TU_CALLX(cmd->device, tu_clone_trace)(cmd, &cmd->draw_cs, &cmd->rp_trace, &secondary->rp_trace);
|
||||
tu_render_pass_state_merge(&cmd->state.rp,
|
||||
&secondary->state.rp);
|
||||
util_dynarray_append_dynarray(&cmd->fdm_bin_patchpoints,
|
||||
|
|
@ -6255,7 +6279,7 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
|
|||
cmd->state.lrz.color_written_with_z_test |=
|
||||
secondary->state.lrz.color_written_with_z_test;
|
||||
|
||||
tu_clone_trace(cmd, &cmd->draw_cs, &cmd->rp_trace, &secondary->rp_trace);
|
||||
TU_CALLX(cmd->device, tu_clone_trace)(cmd, &cmd->draw_cs, &cmd->rp_trace, &secondary->rp_trace);
|
||||
tu_render_pass_state_merge(&cmd->state.rp, &secondary->state.rp);
|
||||
util_dynarray_append_dynarray(&cmd->fdm_bin_patchpoints,
|
||||
&secondary->fdm_bin_patchpoints);
|
||||
|
|
@ -6320,7 +6344,7 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
|
|||
assert(tu_cs_is_empty(&secondary->draw_cs));
|
||||
assert(tu_cs_is_empty(&secondary->draw_epilogue_cs));
|
||||
tu_cs_add_entries(&cmd->cs, &secondary->cs);
|
||||
tu_clone_trace(cmd, &cmd->cs, &cmd->trace, &secondary->trace);
|
||||
TU_CALLX(cmd->device, tu_clone_trace)(cmd, &cmd->cs, &cmd->trace, &secondary->trace);
|
||||
break;
|
||||
|
||||
case SR_IN_PRE_CHAIN:
|
||||
|
|
|
|||
|
|
@ -172,6 +172,8 @@ enum tu_onchip_addr {
|
|||
TU_ONCHIP_BARRIER,
|
||||
TU_ONCHIP_CB_RESLIST_OVERFLOW,
|
||||
|
||||
TU_ONCHIP_U_TRACE_BARRIER,
|
||||
|
||||
/* Registers 8-15 are defined by firmware to be split between BR and BV.
|
||||
* Each has their own copy.
|
||||
*/
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue