mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 08:50:13 +01:00
tu: Give renderpass events a separate trace buffer
Before this we tried hard to optimize for the sysmem case by emitting the events for the renderpass inline and only discarding them in the gmem case. However this won't work if we need to emit the render_pass_start event after the binning IB, because we don't know whether the binning IB will be emitted until the RP end time. The old system also required a ton of confusing code to keep track of the start/end pointers with suspending and resuming renderpasses. All of that goes away if we use a separate u_trace for renderpass events and just copy it to the main trace buffer for sysmem. With this, the previous method of using the space between trace_rp_start and trace_rp_drawcalls_start to keep track of the renderpass_start event and disable it when emitting a split dynamic renderpass at submit time doesn't work anymore. Just move trace_renderpass_start() to tu_cmd_render() time after the draw calls have been recorded, which is now safe to do. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35751>
This commit is contained in:
parent
7791b5286c
commit
630380349b
5 changed files with 54 additions and 139 deletions
|
|
@ -3535,7 +3535,7 @@ resolve_sysmem(struct tu_cmd_buffer *cmd,
|
|||
{
|
||||
const struct blit_ops *ops = &r2d_ops<CHIP>;
|
||||
|
||||
trace_start_sysmem_resolve(&cmd->trace, cs, cmd, vk_dst_format);
|
||||
trace_start_sysmem_resolve(&cmd->rp_trace, cs, cmd, vk_dst_format);
|
||||
|
||||
enum pipe_format src_format = vk_format_to_pipe_format(vk_src_format);
|
||||
enum pipe_format dst_format = vk_format_to_pipe_format(vk_dst_format);
|
||||
|
|
@ -3571,7 +3571,7 @@ resolve_sysmem(struct tu_cmd_buffer *cmd,
|
|||
|
||||
ops->teardown(cmd, cs);
|
||||
|
||||
trace_end_sysmem_resolve(&cmd->trace, cs);
|
||||
trace_end_sysmem_resolve(&cmd->rp_trace, cs);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
@ -4024,7 +4024,7 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
|
|||
bool z_clear = false;
|
||||
bool s_clear = false;
|
||||
|
||||
trace_start_sysmem_clear_all(&cmd->trace, cs, cmd, mrt_count, rect_count);
|
||||
trace_start_sysmem_clear_all(&cmd->rp_trace, cs, cmd, mrt_count, rect_count);
|
||||
|
||||
for (uint32_t i = 0; i < attachment_count; i++) {
|
||||
uint32_t a;
|
||||
|
|
@ -4201,7 +4201,7 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
|
|||
if (cmd->state.fdm_enabled)
|
||||
tu_cs_set_writeable(cs, false);
|
||||
|
||||
trace_end_sysmem_clear_all(&cmd->trace, cs);
|
||||
trace_end_sysmem_clear_all(&cmd->rp_trace, cs);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
@ -4290,7 +4290,7 @@ tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
const struct tu_render_pass_attachment *att =
|
||||
&cmd->state.pass->attachments[attachment];
|
||||
|
||||
trace_start_gmem_clear(&cmd->trace, cs, cmd, att->format, att->samples);
|
||||
trace_start_gmem_clear(&cmd->rp_trace, cs, cmd, att->format, att->samples);
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_BLIT_GMEM_MSAA_CNTL(tu_msaa_samples(att->samples)));
|
||||
|
|
@ -4326,7 +4326,7 @@ tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
|
||||
tu_flush_for_access(&cmd->state.renderpass_cache, TU_ACCESS_BLIT_WRITE_GMEM, TU_ACCESS_NONE);
|
||||
|
||||
trace_end_gmem_clear(&cmd->trace, cs);
|
||||
trace_end_gmem_clear(&cmd->rp_trace, cs);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
@ -4575,13 +4575,13 @@ tu_clear_attachments_generic(struct tu_cmd_buffer *cmd,
|
|||
if (a != VK_ATTACHMENT_UNUSED) {
|
||||
const struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[a];
|
||||
const struct tu_image_view *iview = cmd->state.attachments[a];
|
||||
trace_start_generic_clear(&cmd->trace, cs, cmd, att->format,
|
||||
trace_start_generic_clear(&cmd->rp_trace, cs, cmd, att->format,
|
||||
iview->view.ubwc_enabled, att->samples);
|
||||
for (unsigned j = 0; j < rectCount; j++) {
|
||||
tu7_clear_attachment_generic_single_rect(
|
||||
cmd, cs, &resolve_group, att, &pAttachments[i], a, &pRects[j]);
|
||||
}
|
||||
trace_end_generic_clear(&cmd->trace, cs);
|
||||
trace_end_generic_clear(&cmd->rp_trace, cs);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -4638,7 +4638,7 @@ clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
if (cmd->state.pass->attachments[a].samples > 1)
|
||||
ops = &r3d_ops<CHIP>;
|
||||
|
||||
trace_start_sysmem_clear(&cmd->trace, cs, cmd, vk_format, ops == &r3d_ops<CHIP>,
|
||||
trace_start_sysmem_clear(&cmd->rp_trace, cs, cmd, vk_format, ops == &r3d_ops<CHIP>,
|
||||
cmd->state.pass->attachments[a].samples);
|
||||
|
||||
ops->setup(cmd, cs, format, format, clear_mask, 0, true, iview->view.ubwc_enabled,
|
||||
|
|
@ -4662,7 +4662,7 @@ clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
|
||||
ops->teardown(cmd, cs);
|
||||
|
||||
trace_end_sysmem_clear(&cmd->trace, cs);
|
||||
trace_end_sysmem_clear(&cmd->rp_trace, cs);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
@ -4745,7 +4745,7 @@ tu7_generic_clear_attachment(struct tu_cmd_buffer *cmd,
|
|||
const VkClearValue *value = &cmd->state.clear_values[a];
|
||||
const struct tu_image_view *iview = cmd->state.attachments[a];
|
||||
|
||||
trace_start_generic_clear(&cmd->trace, cs, cmd, att->format,
|
||||
trace_start_generic_clear(&cmd->rp_trace, cs, cmd, att->format,
|
||||
iview->view.ubwc_enabled, att->samples);
|
||||
|
||||
enum pipe_format format = vk_format_to_pipe_format(att->format);
|
||||
|
|
@ -4773,7 +4773,7 @@ tu7_generic_clear_attachment(struct tu_cmd_buffer *cmd,
|
|||
tu_flush_for_access(&cmd->state.renderpass_cache,
|
||||
TU_ACCESS_BLIT_WRITE_GMEM, TU_ACCESS_NONE);
|
||||
|
||||
trace_end_generic_clear(&cmd->trace, cs);
|
||||
trace_end_generic_clear(&cmd->rp_trace, cs);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
@ -5040,7 +5040,7 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
if (!load_common && !load_stencil)
|
||||
return;
|
||||
|
||||
trace_start_gmem_load(&cmd->trace, cs, cmd, attachment->format, force_load);
|
||||
trace_start_gmem_load(&cmd->rp_trace, cs, cmd, attachment->format, force_load);
|
||||
|
||||
/* If attachment will be cleared by vkCmdClearAttachments - it is likely
|
||||
* that it would be partially cleared, and since it is done by 2d blit
|
||||
|
|
@ -5074,7 +5074,7 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
if (cond_exec)
|
||||
tu_end_load_store_cond_exec(cmd, cs, true);
|
||||
|
||||
trace_end_gmem_load(&cmd->trace, cs);
|
||||
trace_end_gmem_load(&cmd->rp_trace, cs);
|
||||
}
|
||||
TU_GENX(tu_load_gmem_attachment);
|
||||
|
||||
|
|
@ -5438,7 +5438,7 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
!resolve_d24s8_s8 &&
|
||||
(a == gmem_a || blit_can_resolve(dst->format));
|
||||
|
||||
trace_start_gmem_store(&cmd->trace, cs, cmd, dst->format, use_fast_path, unaligned);
|
||||
trace_start_gmem_store(&cmd->rp_trace, cs, cmd, dst->format, use_fast_path, unaligned);
|
||||
|
||||
/* Unconditional store should happen only if attachment was cleared,
|
||||
* which could have happened either by load_op or via vkCmdClearAttachments.
|
||||
|
|
@ -5459,7 +5459,7 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
tu_end_load_store_cond_exec(cmd, cs, false);
|
||||
}
|
||||
|
||||
trace_end_gmem_store(&cmd->trace, cs);
|
||||
trace_end_gmem_store(&cmd->rp_trace, cs);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -5538,6 +5538,6 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
tu_end_load_store_cond_exec(cmd, cs, false);
|
||||
}
|
||||
|
||||
trace_end_gmem_store(&cmd->trace, cs);
|
||||
trace_end_gmem_store(&cmd->rp_trace, cs);
|
||||
}
|
||||
TU_GENX(tu_store_gmem_attachment);
|
||||
|
|
|
|||
|
|
@ -94,6 +94,7 @@ tu_cmd_buffer_status_gpu_write(struct tu_cmd_buffer *cmd_buffer,
|
|||
|
||||
static void
|
||||
tu_clone_trace_range(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
struct u_trace *dst,
|
||||
struct u_trace_iterator begin, struct u_trace_iterator end)
|
||||
{
|
||||
if (u_trace_iterator_equal(begin, end))
|
||||
|
|
@ -106,10 +107,10 @@ tu_clone_trace_range(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
|
||||
static void
|
||||
tu_clone_trace(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
struct u_trace *trace)
|
||||
struct u_trace *dst, struct u_trace *src)
|
||||
{
|
||||
tu_clone_trace_range(cmd, cs, u_trace_begin_iterator(trace),
|
||||
u_trace_end_iterator(trace));
|
||||
tu_clone_trace_range(cmd, cs, dst, u_trace_begin_iterator(src),
|
||||
u_trace_end_iterator(src));
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
@ -2630,9 +2631,8 @@ tu6_render_tile(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
|
||||
tu_cs_emit_call(cs, &cmd->tile_store_cs);
|
||||
|
||||
tu_clone_trace_range(cmd, cs, cmd->trace_rp_drawcalls_start,
|
||||
cmd->trace_rp_drawcalls_end);
|
||||
|
||||
tu_clone_trace_range(cmd, cs, &cmd->trace, cmd->trace_renderpass_start,
|
||||
u_trace_end_iterator(&cmd->rp_trace));
|
||||
tu_cs_emit_wfi(cs);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
|
||||
|
|
@ -2986,7 +2986,7 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd,
|
|||
tu6_emit_tile_store_cs<CHIP>(cmd, &cmd->tile_store_cs);
|
||||
tu_cs_end(&cmd->tile_store_cs);
|
||||
|
||||
cmd->trace_rp_drawcalls_end = u_trace_end_iterator(&cmd->trace);
|
||||
tu_trace_start_render_pass(cmd);
|
||||
|
||||
tu6_tile_render_begin<CHIP>(cmd, &cmd->cs, autotune_result, fdm_offsets);
|
||||
|
||||
|
|
@ -3051,11 +3051,6 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd,
|
|||
if (cmd->state.pass->has_fdm)
|
||||
cmd->state.dirty |= TU_CMD_DIRTY_FDM;
|
||||
|
||||
/* tu6_render_tile has cloned these tracepoints for each tile */
|
||||
if (!u_trace_iterator_equal(cmd->trace_rp_drawcalls_start, cmd->trace_rp_drawcalls_end))
|
||||
u_trace_disable_event_range(cmd->trace_rp_drawcalls_start,
|
||||
cmd->trace_rp_drawcalls_end);
|
||||
|
||||
/* Reset the gmem store CS entry lists so that the next render pass
|
||||
* does its own stores.
|
||||
*/
|
||||
|
|
@ -3067,7 +3062,7 @@ static void
|
|||
tu_cmd_render_sysmem(struct tu_cmd_buffer *cmd,
|
||||
struct tu_renderpass_result *autotune_result)
|
||||
{
|
||||
cmd->trace_rp_drawcalls_end = u_trace_end_iterator(&cmd->trace);
|
||||
tu_trace_start_render_pass(cmd);
|
||||
|
||||
tu6_sysmem_render_begin<CHIP>(cmd, &cmd->cs, autotune_result);
|
||||
|
||||
|
|
@ -3079,6 +3074,10 @@ tu_cmd_render_sysmem(struct tu_cmd_buffer *cmd,
|
|||
|
||||
tu6_sysmem_render_end<CHIP>(cmd, &cmd->cs, autotune_result);
|
||||
|
||||
tu_clone_trace_range(cmd, &cmd->cs, &cmd->trace,
|
||||
cmd->trace_renderpass_start,
|
||||
u_trace_end_iterator(&cmd->rp_trace));
|
||||
|
||||
tu_trace_end_render_pass<CHIP>(cmd, false);
|
||||
}
|
||||
|
||||
|
|
@ -3129,6 +3128,12 @@ static void tu_reset_render_pass(struct tu_cmd_buffer *cmd_buffer)
|
|||
util_dynarray_clear(&cmd_buffer->fdm_bin_patchpoints);
|
||||
ralloc_free(cmd_buffer->patchpoints_ctx);
|
||||
cmd_buffer->patchpoints_ctx = NULL;
|
||||
|
||||
/* Discard RP trace contents */
|
||||
u_trace_disable_event_range(cmd_buffer->trace_renderpass_start,
|
||||
u_trace_end_iterator(&cmd_buffer->rp_trace));
|
||||
cmd_buffer->trace_renderpass_start =
|
||||
u_trace_end_iterator(&cmd_buffer->rp_trace);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
|
|
@ -3157,6 +3162,9 @@ tu_create_cmd_buffer(struct vk_command_pool *pool,
|
|||
cmd_buffer->device = device;
|
||||
|
||||
u_trace_init(&cmd_buffer->trace, &device->trace_context);
|
||||
u_trace_init(&cmd_buffer->rp_trace, &device->trace_context);
|
||||
cmd_buffer->trace_renderpass_start =
|
||||
u_trace_begin_iterator(&cmd_buffer->rp_trace);
|
||||
list_inithead(&cmd_buffer->renderpass_autotune_results);
|
||||
|
||||
if (TU_DEBUG_ENV(CHECK_CMD_BUFFER_STATUS)) {
|
||||
|
|
@ -3204,6 +3212,7 @@ tu_cmd_buffer_destroy(struct vk_command_buffer *vk_cmd_buffer)
|
|||
}
|
||||
|
||||
u_trace_fini(&cmd_buffer->trace);
|
||||
u_trace_fini(&cmd_buffer->rp_trace);
|
||||
|
||||
tu_autotune_free_results(cmd_buffer->device, &cmd_buffer->renderpass_autotune_results);
|
||||
|
||||
|
|
@ -3360,7 +3369,7 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer,
|
|||
|
||||
if (u_trace_enabled(&cmd_buffer->device->trace_context)) {
|
||||
trace_start_secondary_cmd_buffer(
|
||||
&cmd_buffer->trace,
|
||||
pass_continue ? &cmd_buffer->rp_trace : &cmd_buffer->trace,
|
||||
pass_continue ? &cmd_buffer->draw_cs : &cmd_buffer->cs,
|
||||
cmd_buffer);
|
||||
}
|
||||
|
|
@ -4258,30 +4267,11 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer)
|
|||
tu_emit_cache_flush<CHIP>(cmd_buffer);
|
||||
}
|
||||
|
||||
/* If we called tu_trace_render_start as part of our suspended chain, and
|
||||
* are going to reconstruct the renderpass setup at cmdbuf submit time,
|
||||
* then disable the recorded tu_trace_render_start event now (we didn't
|
||||
* know at the point of recording it whether this RP's our chain would end
|
||||
* within this command buffer or not) in favor of the one created during
|
||||
* submission.
|
||||
*/
|
||||
if (cmd_buffer->state.suspending) {
|
||||
cmd_buffer->trace_rp_drawcalls_end =
|
||||
u_trace_end_iterator(&cmd_buffer->trace);
|
||||
|
||||
if (cmd_buffer->trace_rp_start.chunk != NULL &&
|
||||
(cmd_buffer->state.suspend_resume == SR_IN_CHAIN ||
|
||||
cmd_buffer->state.suspend_resume == SR_IN_CHAIN_AFTER_PRE_CHAIN)) {
|
||||
u_trace_disable_event_range(cmd_buffer->trace_rp_start,
|
||||
cmd_buffer->trace_rp_drawcalls_start);
|
||||
}
|
||||
}
|
||||
|
||||
if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
|
||||
trace_end_cmd_buffer(&cmd_buffer->trace, &cmd_buffer->cs, cmd_buffer);
|
||||
} else {
|
||||
trace_end_secondary_cmd_buffer(
|
||||
&cmd_buffer->trace,
|
||||
cmd_buffer->state.pass ? &cmd_buffer->rp_trace : &cmd_buffer->trace,
|
||||
cmd_buffer->state.pass ? &cmd_buffer->draw_cs : &cmd_buffer->cs);
|
||||
}
|
||||
|
||||
|
|
@ -5064,8 +5054,8 @@ tu_append_pre_chain(struct tu_cmd_buffer *cmd,
|
|||
|
||||
tu_render_pass_state_merge(&cmd->state.rp,
|
||||
&secondary->pre_chain.state);
|
||||
tu_clone_trace_range(cmd, &cmd->draw_cs, secondary->pre_chain.trace_rp_drawcalls_start,
|
||||
secondary->pre_chain.trace_rp_drawcalls_end);
|
||||
tu_clone_trace(cmd, &cmd->draw_cs,
|
||||
&cmd->rp_trace, &secondary->pre_chain.rp_trace);
|
||||
util_dynarray_append_dynarray(&cmd->fdm_bin_patchpoints,
|
||||
&secondary->pre_chain.fdm_bin_patchpoints);
|
||||
|
||||
|
|
@ -5086,8 +5076,7 @@ tu_append_post_chain(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_add_entries(&cmd->draw_cs, &secondary->draw_cs);
|
||||
tu_cs_add_entries(&cmd->draw_epilogue_cs, &secondary->draw_epilogue_cs);
|
||||
|
||||
tu_clone_trace_range(cmd, &cmd->draw_cs, secondary->trace_rp_drawcalls_start,
|
||||
secondary->trace_rp_drawcalls_end);
|
||||
tu_clone_trace(cmd, &cmd->draw_cs, &cmd->rp_trace, &secondary->rp_trace);
|
||||
cmd->state.rp = secondary->state.rp;
|
||||
util_dynarray_append_dynarray(&cmd->fdm_bin_patchpoints,
|
||||
&secondary->fdm_bin_patchpoints);
|
||||
|
|
@ -5106,8 +5095,7 @@ tu_append_pre_post_chain(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_add_entries(&cmd->draw_cs, &secondary->draw_cs);
|
||||
tu_cs_add_entries(&cmd->draw_epilogue_cs, &secondary->draw_epilogue_cs);
|
||||
|
||||
tu_clone_trace_range(cmd, &cmd->draw_cs, secondary->trace_rp_drawcalls_start,
|
||||
secondary->trace_rp_drawcalls_end);
|
||||
tu_clone_trace(cmd, &cmd->draw_cs, &cmd->rp_trace, &secondary->rp_trace);
|
||||
tu_render_pass_state_merge(&cmd->state.rp,
|
||||
&secondary->state.rp);
|
||||
util_dynarray_append_dynarray(&cmd->fdm_bin_patchpoints,
|
||||
|
|
@ -5124,10 +5112,7 @@ tu_save_pre_chain(struct tu_cmd_buffer *cmd)
|
|||
&cmd->draw_cs);
|
||||
tu_cs_add_entries(&cmd->pre_chain.draw_epilogue_cs,
|
||||
&cmd->draw_epilogue_cs);
|
||||
cmd->pre_chain.trace_rp_drawcalls_start =
|
||||
cmd->trace_rp_drawcalls_start;
|
||||
cmd->pre_chain.trace_rp_drawcalls_end =
|
||||
cmd->trace_rp_drawcalls_end;
|
||||
u_trace_move(&cmd->pre_chain.rp_trace, &cmd->rp_trace);
|
||||
cmd->pre_chain.state = cmd->state.rp;
|
||||
util_dynarray_append_dynarray(&cmd->pre_chain.fdm_bin_patchpoints,
|
||||
&cmd->fdm_bin_patchpoints);
|
||||
|
|
@ -5189,7 +5174,7 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
|
|||
cmd->state.lrz.prev_direction =
|
||||
secondary->state.lrz.prev_direction;
|
||||
|
||||
tu_clone_trace(cmd, &cmd->draw_cs, &secondary->trace);
|
||||
tu_clone_trace(cmd, &cmd->draw_cs, &cmd->rp_trace, &secondary->rp_trace);
|
||||
tu_render_pass_state_merge(&cmd->state.rp, &secondary->state.rp);
|
||||
util_dynarray_append_dynarray(&cmd->fdm_bin_patchpoints,
|
||||
&secondary->fdm_bin_patchpoints);
|
||||
|
|
@ -5199,7 +5184,7 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
|
|||
assert(tu_cs_is_empty(&secondary->draw_cs));
|
||||
assert(tu_cs_is_empty(&secondary->draw_epilogue_cs));
|
||||
tu_cs_add_entries(&cmd->cs, &secondary->cs);
|
||||
tu_clone_trace(cmd, &cmd->cs, &secondary->trace);
|
||||
tu_clone_trace(cmd, &cmd->cs, &cmd->trace, &secondary->trace);
|
||||
break;
|
||||
|
||||
case SR_IN_PRE_CHAIN:
|
||||
|
|
@ -5208,7 +5193,6 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
|
|||
*/
|
||||
if (cmd->state.suspend_resume == SR_NONE) {
|
||||
cmd->state.suspend_resume = SR_IN_PRE_CHAIN;
|
||||
cmd->trace_rp_drawcalls_start = u_trace_end_iterator(&cmd->trace);
|
||||
}
|
||||
|
||||
/* The secondary is just a continuous suspend/resume chain so we
|
||||
|
|
@ -5223,13 +5207,6 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
|
|||
case SR_IN_CHAIN_AFTER_PRE_CHAIN:
|
||||
if (secondary->state.suspend_resume == SR_AFTER_PRE_CHAIN ||
|
||||
secondary->state.suspend_resume == SR_IN_CHAIN_AFTER_PRE_CHAIN) {
|
||||
/* In thse cases there is a `pre_chain` in the secondary which
|
||||
* ends that we need to append to the primary.
|
||||
*/
|
||||
|
||||
if (cmd->state.suspend_resume == SR_NONE)
|
||||
cmd->trace_rp_drawcalls_start = u_trace_end_iterator(&cmd->trace);
|
||||
|
||||
tu_append_pre_chain(cmd, secondary);
|
||||
|
||||
/* We're about to render, so we need to end the command stream
|
||||
|
|
@ -5246,7 +5223,6 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
|
|||
* started in the primary, so we have to move the state to
|
||||
* `pre_chain`.
|
||||
*/
|
||||
cmd->trace_rp_drawcalls_end = u_trace_end_iterator(&cmd->trace);
|
||||
tu_save_pre_chain(cmd);
|
||||
cmd->state.suspend_resume = SR_AFTER_PRE_CHAIN;
|
||||
break;
|
||||
|
|
@ -5286,9 +5262,7 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
|
|||
* pre-chain) that we need to copy into the current command
|
||||
* buffer.
|
||||
*/
|
||||
cmd->trace_rp_drawcalls_start = u_trace_end_iterator(&cmd->trace);
|
||||
tu_append_post_chain(cmd, secondary);
|
||||
cmd->trace_rp_drawcalls_end = u_trace_end_iterator(&cmd->trace);
|
||||
cmd->state.suspended_pass = secondary->state.suspended_pass;
|
||||
|
||||
switch (cmd->state.suspend_resume) {
|
||||
|
|
@ -5610,8 +5584,6 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
|
|||
|
||||
tu_choose_gmem_layout(cmd);
|
||||
|
||||
tu_trace_start_render_pass(cmd);
|
||||
|
||||
/* Note: because this is external, any flushes will happen before draw_cs
|
||||
* gets called. However deferred flushes could have to happen later as part
|
||||
* of the subpass.
|
||||
|
|
@ -5629,8 +5601,6 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
|
|||
|
||||
tu_lrz_begin_renderpass<CHIP>(cmd);
|
||||
|
||||
cmd->trace_rp_drawcalls_start = u_trace_end_iterator(&cmd->trace);
|
||||
|
||||
tu_emit_renderpass_begin(cmd);
|
||||
tu_emit_subpass_begin<CHIP>(cmd);
|
||||
|
||||
|
|
@ -5772,15 +5742,6 @@ tu_CmdBeginRendering(VkCommandBuffer commandBuffer,
|
|||
cmd->state.suspended_pass.gmem_layout = cmd->state.gmem_layout;
|
||||
}
|
||||
|
||||
if (!resuming) {
|
||||
cmd->trace_rp_start = u_trace_end_iterator(&cmd->trace);
|
||||
tu_trace_start_render_pass(cmd);
|
||||
}
|
||||
|
||||
if (!resuming || cmd->state.suspend_resume == SR_NONE) {
|
||||
cmd->trace_rp_drawcalls_start = u_trace_end_iterator(&cmd->trace);
|
||||
}
|
||||
|
||||
if (!resuming) {
|
||||
tu_emit_renderpass_begin(cmd);
|
||||
tu_emit_subpass_begin<CHIP>(cmd);
|
||||
|
|
@ -8052,7 +8013,6 @@ tu_CmdEndRendering2EXT(VkCommandBuffer commandBuffer,
|
|||
tu_cs_end(&cmd_buffer->draw_epilogue_cs);
|
||||
|
||||
if (cmd_buffer->state.suspend_resume == SR_IN_PRE_CHAIN) {
|
||||
cmd_buffer->trace_rp_drawcalls_end = u_trace_end_iterator(&cmd_buffer->trace);
|
||||
tu_save_pre_chain(cmd_buffer);
|
||||
cmd_buffer->pre_chain.fdm_offset = !!fdm_offsets;
|
||||
if (fdm_offsets) {
|
||||
|
|
|
|||
|
|
@ -572,10 +572,8 @@ struct tu_cmd_buffer
|
|||
|
||||
struct tu_device *device;
|
||||
|
||||
struct u_trace trace;
|
||||
struct u_trace_iterator trace_rp_start;
|
||||
struct u_trace_iterator trace_rp_drawcalls_start;
|
||||
struct u_trace_iterator trace_rp_drawcalls_end;
|
||||
struct u_trace_iterator trace_renderpass_start;
|
||||
struct u_trace trace, rp_trace;
|
||||
|
||||
struct list_head renderpass_autotune_results;
|
||||
struct tu_autotune_results_buffer* autotune_buffer;
|
||||
|
|
@ -632,7 +630,7 @@ struct tu_cmd_buffer
|
|||
bool fdm_offset;
|
||||
VkOffset2D fdm_offsets[MAX_VIEWS];
|
||||
|
||||
struct u_trace_iterator trace_rp_drawcalls_start, trace_rp_drawcalls_end;
|
||||
struct u_trace rp_trace;
|
||||
|
||||
struct tu_render_pass_state state;
|
||||
|
||||
|
|
|
|||
|
|
@ -2142,38 +2142,6 @@ tu_trace_get_data(struct u_trace_context *utctx,
|
|||
return (char *) tu_suballoc_bo_map(bo) + offset_B;
|
||||
}
|
||||
|
||||
/* Special helpers instead of u_trace_begin_iterator()/u_trace_end_iterator()
|
||||
* that ignore tracepoints at the beginning/end that are part of a
|
||||
* suspend/resume chain.
|
||||
*/
|
||||
static struct u_trace_iterator
|
||||
tu_cmd_begin_iterator(struct tu_cmd_buffer *cmdbuf)
|
||||
{
|
||||
switch (cmdbuf->state.suspend_resume) {
|
||||
case SR_IN_PRE_CHAIN:
|
||||
return cmdbuf->trace_rp_drawcalls_end;
|
||||
case SR_AFTER_PRE_CHAIN:
|
||||
case SR_IN_CHAIN_AFTER_PRE_CHAIN:
|
||||
return cmdbuf->pre_chain.trace_rp_drawcalls_end;
|
||||
default:
|
||||
return u_trace_begin_iterator(&cmdbuf->trace);
|
||||
}
|
||||
}
|
||||
|
||||
static struct u_trace_iterator
|
||||
tu_cmd_end_iterator(struct tu_cmd_buffer *cmdbuf)
|
||||
{
|
||||
switch (cmdbuf->state.suspend_resume) {
|
||||
case SR_IN_PRE_CHAIN:
|
||||
return cmdbuf->trace_rp_drawcalls_end;
|
||||
case SR_IN_CHAIN:
|
||||
case SR_IN_CHAIN_AFTER_PRE_CHAIN:
|
||||
return cmdbuf->trace_rp_drawcalls_start;
|
||||
default:
|
||||
return u_trace_end_iterator(&cmdbuf->trace);
|
||||
}
|
||||
}
|
||||
|
||||
VkResult
|
||||
tu_create_copy_timestamp_cs(struct tu_u_trace_submission_data *submission_data,
|
||||
struct tu_cmd_buffer **cmd_buffers,
|
||||
|
|
@ -2217,7 +2185,8 @@ tu_create_copy_timestamp_cs(struct tu_u_trace_submission_data *submission_data,
|
|||
continue;
|
||||
|
||||
u_trace_clone_append(
|
||||
tu_cmd_begin_iterator(cmdbuf), tu_cmd_end_iterator(cmdbuf),
|
||||
u_trace_begin_iterator(&cmdbuf->trace),
|
||||
u_trace_end_iterator(&cmdbuf->trace),
|
||||
&submission_data->timestamp_copy_data->trace, cs, tu_copy_buffer);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -144,15 +144,9 @@ tu_insert_dynamic_cmdbufs(struct tu_device *dev,
|
|||
|
||||
case SR_AFTER_PRE_CHAIN:
|
||||
case SR_IN_CHAIN_AFTER_PRE_CHAIN:
|
||||
cmd_buffer->trace_rp_drawcalls_start = u_trace_end_iterator(&cmd_buffer->trace);
|
||||
cmd_buffer->trace_renderpass_start = u_trace_end_iterator(&cmd_buffer->rp_trace);
|
||||
tu_append_pre_chain(cmd_buffer, old_cmds[i]);
|
||||
|
||||
if (!(old_cmds[i]->usage_flags &
|
||||
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
|
||||
u_trace_disable_event_range(old_cmds[i]->pre_chain.trace_rp_drawcalls_start,
|
||||
old_cmds[i]->pre_chain.trace_rp_drawcalls_end);
|
||||
}
|
||||
|
||||
const struct VkOffset2D *fdm_offsets =
|
||||
cmd_buffer->pre_chain.fdm_offset ?
|
||||
cmd_buffer->pre_chain.fdm_offsets : NULL;
|
||||
|
|
@ -200,12 +194,6 @@ tu_insert_dynamic_cmdbufs(struct tu_device *dev,
|
|||
|
||||
tu_append_pre_post_chain(cmd_buffer, old_cmds[i]);
|
||||
|
||||
if (old_cmds[i]->usage_flags &
|
||||
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) {
|
||||
u_trace_disable_event_range(old_cmds[i]->trace_rp_drawcalls_start,
|
||||
old_cmds[i]->trace_rp_drawcalls_end);
|
||||
}
|
||||
|
||||
/* When the command buffer is finally recorded, we need its state
|
||||
* to be the state of the command buffer before it. We need this
|
||||
* because we skip tu6_emit_hw().
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue