diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index d781344be45..a7a0df84fa3 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -1578,7 +1578,11 @@ tu6_blit_image(struct tu_cmd_buffer *cmd, tu6_plane_format(dst_image->vk.format, tu6_plane_index(src_image->vk.format, info->srcSubresource.aspectMask)); - trace_start_blit(&cmd->trace, cs); + trace_start_blit(&cmd->trace, cs, + ops == &r3d_ops, + src_image->vk.format, + dst_image->vk.format, + layers); ops->setup(cmd, cs, src_format, dst_format, info->dstSubresource.aspectMask, blit_param, false, dst_image->layout[0].ubwc, @@ -1629,11 +1633,7 @@ tu6_blit_image(struct tu_cmd_buffer *cmd, ops->teardown(cmd, cs); - trace_end_blit(&cmd->trace, cs, - ops == &r3d_ops, - src_image->vk.format, - dst_image->vk.format, - layers); + trace_end_blit(&cmd->trace, cs); } VKAPI_ATTR void VKAPI_CALL @@ -2262,7 +2262,7 @@ resolve_sysmem(struct tu_cmd_buffer *cmd, { const struct blit_ops *ops = &r2d_ops; - trace_start_sysmem_resolve(&cmd->trace, cs); + trace_start_sysmem_resolve(&cmd->trace, cs, vk_dst_format); enum pipe_format src_format = tu_vk_format_to_pipe_format(vk_src_format); enum pipe_format dst_format = tu_vk_format_to_pipe_format(vk_dst_format); @@ -2298,7 +2298,7 @@ resolve_sysmem(struct tu_cmd_buffer *cmd, ops->teardown(cmd, cs); - trace_end_sysmem_resolve(&cmd->trace, cs, vk_dst_format); + trace_end_sysmem_resolve(&cmd->trace, cs); } void @@ -2450,7 +2450,7 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd, bool z_clear = false; bool s_clear = false; - trace_start_sysmem_clear_all(&cmd->trace, cs); + trace_start_sysmem_clear_all(&cmd->trace, cs, mrt_count, rect_count); for (uint32_t i = 0; i < attachment_count; i++) { uint32_t a; @@ -2597,8 +2597,7 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd, tu6_emit_event_write(cmd, cs, START_PRIMITIVE_CTRS); } - trace_end_sysmem_clear_all(&cmd->trace, - cs, mrt_count, rect_count); + trace_end_sysmem_clear_all(&cmd->trace, cs); } static void @@ -2719,7 +2718,7 @@ tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd, const struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[attachment]; - trace_start_gmem_clear(&cmd->trace, cs); + trace_start_gmem_clear(&cmd->trace, cs, att->format, att->samples); tu_cs_emit_regs(cs, A6XX_RB_BLIT_GMEM_MSAA_CNTL(tu_msaa_samples(att->samples))); @@ -2742,7 +2741,7 @@ tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd, } } - trace_end_gmem_clear(&cmd->trace, cs, att->format, att->samples); + trace_end_gmem_clear(&cmd->trace, cs); } static void @@ -2871,7 +2870,8 @@ clear_sysmem_attachment(struct tu_cmd_buffer *cmd, if (cmd->state.pass->attachments[a].samples > 1) ops = &r3d_ops; - trace_start_sysmem_clear(&cmd->trace, cs); + trace_start_sysmem_clear(&cmd->trace, cs, vk_format, ops == &r3d_ops, + cmd->state.pass->attachments[a].samples); ops->setup(cmd, cs, format, format, clear_mask, 0, true, iview->view.ubwc_enabled, cmd->state.pass->attachments[a].samples); @@ -2894,9 +2894,7 @@ clear_sysmem_attachment(struct tu_cmd_buffer *cmd, ops->teardown(cmd, cs); - trace_end_sysmem_clear(&cmd->trace, cs, - vk_format, ops == &r3d_ops, - cmd->state.pass->attachments[a].samples); + trace_end_sysmem_clear(&cmd->trace, cs); } void @@ -3116,7 +3114,7 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd, if (!load_common && !load_stencil) return; - trace_start_gmem_load(&cmd->trace, cs); + trace_start_gmem_load(&cmd->trace, cs, attachment->format, force_load); /* If attachment will be cleared by vkCmdClearAttachments - it is likely * that it would be partially cleared, and since it is done by 2d blit @@ -3138,7 +3136,7 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd, if (cond_exec) tu_end_load_store_cond_exec(cmd, cs, true); - trace_end_gmem_load(&cmd->trace, cs, attachment->format, force_load); + trace_end_gmem_load(&cmd->trace, cs); } static void @@ -3343,16 +3341,6 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, if (!dst->store && !dst->store_stencil) return; - trace_start_gmem_store(&cmd->trace, cs); - - /* Unconditional store should happen only if attachment was cleared, - * which could have happened either by load_op or via vkCmdClearAttachments. - */ - bool cond_exec = cond_exec_allowed && src->cond_store_allowed; - if (cond_exec) { - tu_begin_load_store_cond_exec(cmd, cs, false); - } - bool unaligned = tu_attachment_store_unaligned(cmd, a); /* D32_SFLOAT_S8_UINT is quite special format: it has two planes, @@ -3373,9 +3361,21 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, bool store_common = dst->store && !resolve_d32s8_s8; bool store_separate_stencil = dst->store_stencil || resolve_d32s8_s8; + bool use_fast_path = !unaligned && !resolve_d24s8_s8 && + (a == gmem_a || blit_can_resolve(dst->format)); + + trace_start_gmem_store(&cmd->trace, cs, dst->format, use_fast_path, unaligned); + + /* Unconditional store should happen only if attachment was cleared, + * which could have happened either by load_op or via vkCmdClearAttachments. + */ + bool cond_exec = cond_exec_allowed && src->cond_store_allowed; + if (cond_exec) { + tu_begin_load_store_cond_exec(cmd, cs, false); + } + /* use fast path when render area is aligned, except for unsupported resolve cases */ - if (!unaligned && !resolve_d24s8_s8 && - (a == gmem_a || blit_can_resolve(dst->format))) { + if (use_fast_path) { if (store_common) tu_emit_blit(cmd, cs, iview, src, true, false); if (store_separate_stencil) @@ -3385,7 +3385,7 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, tu_end_load_store_cond_exec(cmd, cs, false); } - trace_end_gmem_store(&cmd->trace, cs, dst->format, true, false); + trace_end_gmem_store(&cmd->trace, cs); return; } @@ -3440,5 +3440,5 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, tu_end_load_store_cond_exec(cmd, cs, false); } - trace_end_gmem_store(&cmd->trace, cs, dst->format, false, unaligned); + trace_end_gmem_store(&cmd->trace, cs); } diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index e9562a0ab7f..f5f53cbb144 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -1518,7 +1518,6 @@ static void tu_cmd_render_tiles(struct tu_cmd_buffer *cmd, struct tu_renderpass_result *autotune_result) { - const struct tu_framebuffer *fb = cmd->state.framebuffer; const struct tu_tiling_config *tiling = cmd->state.tiling; /* Create gmem stores now (at EndRenderPass time)) because they needed to @@ -1569,7 +1568,7 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd, tu6_tile_render_end(cmd, &cmd->cs, autotune_result); - trace_end_render_pass(&cmd->trace, &cmd->cs, fb, tiling); + trace_end_render_pass(&cmd->trace, &cmd->cs); /* tu6_render_tile has cloned these tracepoints for each tile */ if (!u_trace_iterator_equal(cmd->trace_renderpass_start, cmd->trace_renderpass_end)) @@ -1598,7 +1597,7 @@ tu_cmd_render_sysmem(struct tu_cmd_buffer *cmd, tu6_sysmem_render_end(cmd, &cmd->cs, autotune_result); - trace_end_render_pass(&cmd->trace, &cmd->cs, cmd->state.framebuffer, cmd->state.tiling); + trace_end_render_pass(&cmd->trace, &cmd->cs); } void @@ -1808,7 +1807,7 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer, /* setup initial configuration into command buffer */ if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { - trace_start_cmd_buffer(&cmd_buffer->trace, &cmd_buffer->cs); + trace_start_cmd_buffer(&cmd_buffer->trace, &cmd_buffer->cs, cmd_buffer); switch (cmd_buffer->queue_family_index) { case TU_QUEUE_GENERAL: @@ -1822,7 +1821,7 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer, pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; trace_start_cmd_buffer(&cmd_buffer->trace, - pass_continue ? &cmd_buffer->draw_cs : &cmd_buffer->cs); + pass_continue ? &cmd_buffer->draw_cs : &cmd_buffer->cs, cmd_buffer); assert(pBeginInfo->pInheritanceInfo); @@ -2514,7 +2513,7 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer) tu_flush_all_pending(&cmd_buffer->state.renderpass_cache); tu_emit_cache_flush_renderpass(cmd_buffer, &cmd_buffer->draw_cs); - trace_end_cmd_buffer(&cmd_buffer->trace, &cmd_buffer->draw_cs, cmd_buffer); + trace_end_cmd_buffer(&cmd_buffer->trace, &cmd_buffer->draw_cs); } else { tu_flush_all_pending(&cmd_buffer->state.cache); cmd_buffer->state.cache.flush_bits |= @@ -2522,7 +2521,7 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer) TU_CMD_FLAG_CCU_FLUSH_DEPTH; tu_emit_cache_flush(cmd_buffer, &cmd_buffer->cs); - trace_end_cmd_buffer(&cmd_buffer->trace, &cmd_buffer->cs, cmd_buffer); + trace_end_cmd_buffer(&cmd_buffer->trace, &cmd_buffer->cs); } tu_cs_end(&cmd_buffer->cs); @@ -4195,7 +4194,8 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, } tu_choose_gmem_layout(cmd); - trace_start_render_pass(&cmd->trace, &cmd->cs); + trace_start_render_pass(&cmd->trace, &cmd->cs, cmd->state.framebuffer, + cmd->state.tiling); /* Note: because this is external, any flushes will happen before draw_cs * gets called. However deferred flushes could have to happen later as part @@ -4332,9 +4332,9 @@ tu_CmdBeginRendering(VkCommandBuffer commandBuffer, cmd->state.suspended_pass.gmem_layout = cmd->state.gmem_layout; } - if (!resuming) { - trace_start_render_pass(&cmd->trace, &cmd->cs); - } + if (!resuming) + trace_start_render_pass(&cmd->trace, &cmd->cs, cmd->state.framebuffer, + cmd->state.tiling); if (!resuming || cmd->state.suspend_resume == SR_NONE) { cmd->trace_renderpass_start = u_trace_end_iterator(&cmd->trace); @@ -5618,7 +5618,9 @@ tu_dispatch(struct tu_cmd_buffer *cmd, A6XX_HLSQ_CS_KERNEL_GROUP_Y(1), A6XX_HLSQ_CS_KERNEL_GROUP_Z(1)); - trace_start_compute(&cmd->trace, cs); + trace_start_compute(&cmd->trace, cs, info->indirect != NULL, local_size[0], + local_size[1], local_size[2], info->blocks[0], + info->blocks[1], info->blocks[2]); if (info->indirect) { uint64_t iova = info->indirect->iova + info->indirect_offset; @@ -5638,10 +5640,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd, tu_cs_emit(cs, CP_EXEC_CS_3_NGROUPS_Z(info->blocks[2])); } - trace_end_compute(&cmd->trace, cs, - info->indirect != NULL, - local_size[0], local_size[1], local_size[2], - info->blocks[0], info->blocks[1], info->blocks[2]); + trace_end_compute(&cmd->trace, cs); /* For the workaround above, because it's using the "wrong" context for * SP_FS_INSTRLEN we should emit another dummy event write to avoid a diff --git a/src/freedreno/vulkan/tu_tracepoints.py b/src/freedreno/vulkan/tu_tracepoints.py index d00cb3e5094..56691b25b60 100644 --- a/src/freedreno/vulkan/tu_tracepoints.py +++ b/src/freedreno/vulkan/tu_tracepoints.py @@ -50,14 +50,14 @@ def begin_end_tp(name, args=[], tp_struct=None, tp_print=None, if tp_default_enabled: tu_default_tps.append(name) Tracepoint('start_{0}'.format(name), - toggle_name=name, - tp_perfetto='tu_perfetto_start_{0}'.format(name)) - Tracepoint('end_{0}'.format(name), toggle_name=name, args=args, tp_struct=tp_struct, - tp_perfetto='tu_perfetto_end_{0}'.format(name), + tp_perfetto='tu_perfetto_start_{0}'.format(name), tp_print=tp_print) + Tracepoint('end_{0}'.format(name), + toggle_name=name, + tp_perfetto='tu_perfetto_end_{0}'.format(name)) begin_end_tp('cmd_buffer', args=[ArgStruct(type='const struct tu_cmd_buffer *', var='cmd')],