diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc index 81f4d21b3e9..54e2fecc590 100644 --- a/src/freedreno/vulkan/tu_clear_blit.cc +++ b/src/freedreno/vulkan/tu_clear_blit.cc @@ -2287,6 +2287,9 @@ tu6_clear_lrz(struct tu_cmd_buffer *cmd, { const struct blit_ops *ops = &r2d_ops; + trace_start_slow_clear_lrz(&cmd->trace, &cmd->cs, cmd, image->vk.format, + image->vk.extent.width, image->vk.extent.height); + /* It is assumed that LRZ cache is invalidated at this point for * the writes here to become visible to LRZ. * @@ -2319,6 +2322,8 @@ tu6_clear_lrz(struct tu_cmd_buffer *cmd, cmd->state.cache.flush_bits |= TU_CMD_FLAG_CCU_CLEAN_COLOR | TU_CMD_FLAG_CACHE_INVALIDATE | TU_CMD_FLAG_WAIT_FOR_IDLE; + + trace_end_slow_clear_lrz(&cmd->trace, &cmd->cs); } TU_GENX(tu6_clear_lrz); @@ -2485,11 +2490,8 @@ tu6_blit_image(struct tu_cmd_buffer *cmd, src_image->vk.format, info->srcSubresource.aspectMask); enum pipe_format dst_format = tu_aspects_to_plane( dst_image->vk.format, info->dstSubresource.aspectMask); - trace_start_blit(&cmd->trace, cs, cmd, - ops == &r3d_ops, - src_image->vk.format, - dst_image->vk.format, - layers); + trace_start_blit_image(&cmd->trace, cs, cmd, ops == &r3d_ops, + src_image->vk.format, dst_image->vk.format, layers); ops->setup(cmd, cs, src_format, dst_format, info->dstSubresource.aspectMask, blit_param, false, dst_image->layout[0].ubwc, @@ -2541,7 +2543,7 @@ tu6_blit_image(struct tu_cmd_buffer *cmd, ops->teardown(cmd, cs); - trace_end_blit(&cmd->trace, cs); + trace_end_blit_image(&cmd->trace, cs); } template @@ -2726,6 +2728,8 @@ tu_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, VK_FROM_HANDLE(tu_image, dst_image, pCopyBufferToImageInfo->dstImage); VK_FROM_HANDLE(tu_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer); + trace_start_copy_buffer_to_image(&cmd->trace, &cmd->cs, cmd, dst_image->vk.format); + for (unsigned i = 0; i < pCopyBufferToImageInfo->regionCount; ++i) tu_copy_buffer_to_image(cmd, src_buffer, dst_image, pCopyBufferToImageInfo->pRegions + i); @@ -2733,6 +2737,8 @@ tu_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, if (dst_image->lrz_layout.lrz_total_size) { tu_disable_lrz(cmd, &cmd->cs, dst_image); } + + trace_end_copy_buffer_to_image(&cmd->trace, &cmd->cs); } TU_GENX(tu_CmdCopyBufferToImage2); @@ -2929,6 +2935,8 @@ tu_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, VK_FROM_HANDLE(tu_image, src_image, pCopyImageToBufferInfo->srcImage); VK_FROM_HANDLE(tu_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer); + trace_start_copy_image_to_buffer(&cmd->trace, &cmd->cs, cmd, src_image->vk.format); + bool unaligned_store = false; for (unsigned i = 0; i < pCopyImageToBufferInfo->regionCount; ++i) tu_copy_image_to_buffer(cmd, src_image, dst_buffer, @@ -2936,6 +2944,8 @@ tu_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, &unaligned_store); after_buffer_unaligned_buffer_store(cmd, unaligned_store); + + trace_end_copy_image_to_buffer(&cmd->trace, &cmd->cs); } TU_GENX(tu_CmdCopyImageToBuffer2); @@ -3288,6 +3298,9 @@ tu_CmdCopyImage2(VkCommandBuffer commandBuffer, VK_FROM_HANDLE(tu_image, src_image, pCopyImageInfo->srcImage); VK_FROM_HANDLE(tu_image, dst_image, pCopyImageInfo->dstImage); + trace_start_copy_image(&cmd->trace, &cmd->cs, cmd, src_image->vk.format, + dst_image->vk.format); + for (uint32_t i = 0; i < pCopyImageInfo->regionCount; ++i) { if (src_image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT && dst_image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { @@ -3307,6 +3320,8 @@ tu_CmdCopyImage2(VkCommandBuffer commandBuffer, if (dst_image->lrz_layout.lrz_total_size) { tu_disable_lrz(cmd, &cmd->cs, dst_image); } + + trace_end_copy_image(&cmd->trace, &cmd->cs); } TU_GENX(tu_CmdCopyImage2); @@ -3556,6 +3571,8 @@ tu_CmdCopyBuffer2(VkCommandBuffer commandBuffer, VK_FROM_HANDLE(tu_buffer, src_buffer, pCopyBufferInfo->srcBuffer); VK_FROM_HANDLE(tu_buffer, dst_buffer, pCopyBufferInfo->dstBuffer); + trace_start_copy_buffer(&cmd->trace, &cmd->cs, cmd); + /* Choose the largest common block size for all copy regions * to prevent WaW hazards when potentially performing non-overlapping * unaligned stores through CCU. See handle_buffer_unaligned_store. @@ -3577,8 +3594,10 @@ tu_CmdCopyBuffer2(VkCommandBuffer commandBuffer, } bool unaligned_store = false; + uint32_t total_size = 0; for (unsigned i = 0; i < pCopyBufferInfo->regionCount; ++i) { const VkBufferCopy2 *region = &pCopyBufferInfo->pRegions[i]; + total_size += region->size; copy_buffer(cmd, vk_buffer_address(&dst_buffer->vk, region->dstOffset), vk_buffer_address(&src_buffer->vk, region->srcOffset), @@ -3586,6 +3605,8 @@ tu_CmdCopyBuffer2(VkCommandBuffer commandBuffer, } after_buffer_unaligned_buffer_store(cmd, unaligned_store); + + trace_end_copy_buffer(&cmd->trace, &cmd->cs, total_size, unaligned_store); } TU_GENX(tu_CmdCopyBuffer2); @@ -3607,6 +3628,8 @@ tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer, return; } + trace_start_update_buffer(&cmd->trace, &cmd->cs, cmd); + /* As in tu_CmdCopyBuffer2(), the largest viable block size is used. */ uint64_t alignment_target = dataSize | vk_buffer_address(&buffer->vk, dstOffset); uint32_t block_size = 1; @@ -3621,6 +3644,8 @@ tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer, tmp.iova, dataSize, block_size, &unaligned_store); after_buffer_unaligned_buffer_store(cmd, unaligned_store); + + trace_end_update_buffer(&cmd->trace, &cmd->cs, dataSize, unaligned_store); } TU_GENX(tu_CmdUpdateBuffer); @@ -3635,6 +3660,8 @@ tu_cmd_fill_buffer(VkCommandBuffer commandBuffer, const struct blit_ops *ops = &r2d_ops; struct tu_cs *cs = &cmd->cs; + trace_start_fill_buffer(&cmd->trace, &cmd->cs, cmd); + uint32_t blocks = fillSize / 4; bool unaligned_store = false; @@ -3663,6 +3690,8 @@ tu_cmd_fill_buffer(VkCommandBuffer commandBuffer, ops->teardown(cmd, cs); after_buffer_unaligned_buffer_store(cmd, unaligned_store); + + trace_end_fill_buffer(&cmd->trace, &cmd->cs, fillSize, unaligned_store); } void @@ -3706,6 +3735,9 @@ tu_CmdResolveImage2(VkCommandBuffer commandBuffer, const struct blit_ops *ops = &r2d_ops; struct tu_cs *cs = &cmd->cs; + trace_start_resolve_image(&cmd->trace, &cmd->cs, cmd, src_image->vk.format, + dst_image->vk.format); + enum pipe_format src_format = vk_format_to_pipe_format(src_image->vk.format); enum pipe_format dst_format = @@ -3736,6 +3768,8 @@ tu_CmdResolveImage2(VkCommandBuffer commandBuffer, } ops->teardown(cmd, cs); + + trace_end_resolve_image(&cmd->trace, &cmd->cs); } TU_GENX(tu_CmdResolveImage2); @@ -4094,6 +4128,8 @@ tu_CmdClearColorImage(VkCommandBuffer commandBuffer, VK_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); VK_FROM_HANDLE(tu_image, image, image_h); + trace_start_clear_color_image(&cmd->trace, &cmd->cs, cmd, image->vk.format); + bool use_generic_clear = use_generic_clear_for_image_clear(cmd, image); if (use_generic_clear) { /* Generic clear doesn't go through CCU (or other caches). */ @@ -4117,6 +4153,8 @@ tu_CmdClearColorImage(VkCommandBuffer commandBuffer, cmd->state.cache.flush_bits |= TU_CMD_FLAG_BLIT_CACHE_CLEAN; tu_emit_cache_flush(cmd); } + + trace_end_clear_color_image(&cmd->trace, &cmd->cs); } TU_GENX(tu_CmdClearColorImage); @@ -4132,6 +4170,9 @@ tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VK_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); VK_FROM_HANDLE(tu_image, image, image_h); + trace_start_clear_depth_stencil_image(&cmd->trace, &cmd->cs, cmd, + image->vk.format); + bool use_generic_clear = use_generic_clear_for_image_clear(cmd, image); if (use_generic_clear) { /* Generic clear doesn't go through CCU (or other caches). */ @@ -4174,6 +4215,8 @@ tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, } tu_lrz_clear_depth_image(cmd, image, pDepthStencil, rangeCount, pRanges); + + trace_end_clear_depth_stencil_image(&cmd->trace, &cmd->cs); } TU_GENX(tu_CmdClearDepthStencilImage); diff --git a/src/freedreno/vulkan/tu_lrz.cc b/src/freedreno/vulkan/tu_lrz.cc index bf9c597fca7..681e71debb5 100644 --- a/src/freedreno/vulkan/tu_lrz.cc +++ b/src/freedreno/vulkan/tu_lrz.cc @@ -11,6 +11,7 @@ #include "tu_cmd_buffer.h" #include "tu_cs.h" #include "tu_image.h" +#include "tu_tracepoints.h" /* See lrz.rst for how HW works. Here are only the implementation notes. * @@ -882,6 +883,9 @@ tu_disable_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs, if (!image->lrz_layout.lrz_total_size) return; + trace_start_disable_lrz(&cmd->trace, &cmd->cs, cmd, image->vk.format, + image->vk.extent.width, image->vk.extent.height); + uint64_t lrz_iova = image->iova + image->lrz_layout.lrz_offset; /* Synchronize writes in BV with subsequent render passes against this @@ -927,6 +931,8 @@ tu_disable_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs, tu_cs_emit_qw(cs, TU_ONCHIP_CB_RESLIST_OVERFLOW); tu_cs_emit(cs, 0); /* value */ } + + trace_end_disable_lrz(&cmd->trace, &cmd->cs); } TU_GENX(tu_disable_lrz); diff --git a/src/freedreno/vulkan/tu_perfetto.cc b/src/freedreno/vulkan/tu_perfetto.cc index 218059bcff2..293f8b8a4b4 100644 --- a/src/freedreno/vulkan/tu_perfetto.cc +++ b/src/freedreno/vulkan/tu_perfetto.cc @@ -78,6 +78,17 @@ enum tu_stage_id { GMEM_STORE_STAGE_ID, SYSMEM_RESOLVE_STAGE_ID, CUSTOM_RESOLVE_STAGE_ID, + CLEAR_COLOR_IMAGE_STAGE_ID, + CLEAR_DEPTH_STENCIL_IMAGE_STAGE_ID, + COPY_BUFFER_TO_IMAGE_STAGE_ID, + COPY_IMAGE_TO_BUFFER_STAGE_ID, + COPY_IMAGE_STAGE_ID, + RESOLVE_IMAGE_STAGE_ID, + FILL_BUFFER_STAGE_ID, + COPY_BUFFER_STAGE_ID, + UPDATE_BUFFER_STAGE_ID, + SLOW_CLEAR_LRZ_STAGE_ID, + DISABLE_LRZ_STAGE_ID, // TODO add the rest from fd_stage_id }; @@ -114,6 +125,17 @@ static const struct { [GMEM_STORE_STAGE_ID] = { "GMEM Store", "Per tile GMEM to system memory store" }, [SYSMEM_RESOLVE_STAGE_ID] = { "SysMem Resolve", "System memory MSAA resolve" }, [CUSTOM_RESOLVE_STAGE_ID] = { "Custom Resolve", "Custom resolve via shader" }, + [CLEAR_COLOR_IMAGE_STAGE_ID] = { "Clear Color Image", "" }, + [CLEAR_DEPTH_STENCIL_IMAGE_STAGE_ID] = { "Clear Depth Stencil Image", "" }, + [COPY_BUFFER_TO_IMAGE_STAGE_ID] = { "Copy Buffer to Image", "" }, + [COPY_IMAGE_TO_BUFFER_STAGE_ID] = { "Copy Image to Buffer", "" }, + [COPY_IMAGE_STAGE_ID] = { "Copy Image", "" }, + [RESOLVE_IMAGE_STAGE_ID] = { "Resolve Image", "" }, + [FILL_BUFFER_STAGE_ID] = { "Fill Buffer", "" }, + [COPY_BUFFER_STAGE_ID] = { "Copy Buffer", "" }, + [UPDATE_BUFFER_STAGE_ID] = { "Update Buffer", "" }, + [SLOW_CLEAR_LRZ_STAGE_ID] = { "Slow Clear LRZ", "Perform slow clear of LRZ for this image, should be avoided" }, + [DISABLE_LRZ_STAGE_ID] = { "Disable LRZ", "Disable LRZ for this image, should be avoided" }, // TODO add the rest }; @@ -591,7 +613,6 @@ CREATE_EVENT_CALLBACK(concurrent_binning_ib, CONCURRENT_BINNING_STAGE_ID) CREATE_EVENT_CALLBACK(concurrent_binning_barrier, CONCURRENT_BINNING_BARRIER_STAGE_ID) CREATE_EVENT_CALLBACK(draw_ib_gmem, GMEM_STAGE_ID) CREATE_EVENT_CALLBACK(draw_ib_sysmem, BYPASS_STAGE_ID) -CREATE_EVENT_CALLBACK(blit, BLIT_STAGE_ID) CREATE_EVENT_CALLBACK(draw, DRAW_STAGE_ID) CREATE_EVENT_CALLBACK(compute, COMPUTE_STAGE_ID) CREATE_EVENT_CALLBACK(compute_indirect, COMPUTE_STAGE_ID) @@ -603,6 +624,18 @@ CREATE_EVENT_CALLBACK(gmem_load, GMEM_LOAD_STAGE_ID) CREATE_EVENT_CALLBACK(gmem_store, GMEM_STORE_STAGE_ID) CREATE_EVENT_CALLBACK(sysmem_resolve, SYSMEM_RESOLVE_STAGE_ID) CREATE_EVENT_CALLBACK(custom_resolve, CUSTOM_RESOLVE_STAGE_ID) +CREATE_EVENT_CALLBACK(blit_image, BLIT_STAGE_ID) +CREATE_EVENT_CALLBACK(clear_color_image, CLEAR_COLOR_IMAGE_STAGE_ID) +CREATE_EVENT_CALLBACK(clear_depth_stencil_image, CLEAR_DEPTH_STENCIL_IMAGE_STAGE_ID) +CREATE_EVENT_CALLBACK(copy_buffer_to_image, COPY_BUFFER_TO_IMAGE_STAGE_ID) +CREATE_EVENT_CALLBACK(copy_image, COPY_IMAGE_STAGE_ID) +CREATE_EVENT_CALLBACK(copy_image_to_buffer, COPY_IMAGE_TO_BUFFER_STAGE_ID) +CREATE_EVENT_CALLBACK(fill_buffer, FILL_BUFFER_STAGE_ID) +CREATE_EVENT_CALLBACK(copy_buffer, COPY_BUFFER_STAGE_ID) +CREATE_EVENT_CALLBACK(update_buffer, UPDATE_BUFFER_STAGE_ID) +CREATE_EVENT_CALLBACK(resolve_image, RESOLVE_IMAGE_STAGE_ID) +CREATE_EVENT_CALLBACK(slow_clear_lrz, SLOW_CLEAR_LRZ_STAGE_ID) +CREATE_EVENT_CALLBACK(disable_lrz, DISABLE_LRZ_STAGE_ID) void tu_perfetto_start_cmd_buffer_annotation( diff --git a/src/freedreno/vulkan/tu_tracepoints.py b/src/freedreno/vulkan/tu_tracepoints.py index 97189f66466..8caab9068fa 100644 --- a/src/freedreno/vulkan/tu_tracepoints.py +++ b/src/freedreno/vulkan/tu_tracepoints.py @@ -54,8 +54,12 @@ command_buffer_struct = Arg(type='VkCommandBuffer', name='command_buffer_handle' def begin_end_tp(name, args=[], tp_struct=None, tp_print=None, end_args=[], end_tp_struct=None, end_tp_print=None, tp_default_enabled=True, marker_tp=True, - queue_tp=True): + queue_tp=True, toggle_name=None): global tu_default_tps + + if not toggle_name: + toggle_name = name + if tp_default_enabled: tu_default_tps.append(name) @@ -176,13 +180,67 @@ begin_end_tp('sysmem_resolve', begin_end_tp('custom_resolve') -begin_end_tp('blit', +begin_end_tp('blit_image', + toggle_name='clear_blit', # TODO: add source megapixels count and target megapixels count arguments args=[Arg(type='uint8_t', var='uses_3d_blit', c_format='%u'), Arg(type='enum VkFormat', var='src_format', c_format='%s', to_prim_type='vk_format_description({})->short_name'), Arg(type='enum VkFormat', var='dst_format', c_format='%s', to_prim_type='vk_format_description({})->short_name'), Arg(type='uint8_t', var='layers', c_format='%u')]) +begin_end_tp('clear_color_image', + toggle_name='clear_blit', + args=[Arg(type='enum VkFormat', var='format', c_format='%s', to_prim_type='vk_format_description({})->short_name')]) + +begin_end_tp('clear_depth_stencil_image', + toggle_name='clear_blit', + args=[Arg(type='enum VkFormat', var='format', c_format='%s', to_prim_type='vk_format_description({})->short_name')]) + +begin_end_tp('copy_buffer_to_image', + toggle_name='clear_blit', + args=[Arg(type='enum VkFormat', var='format', c_format='%s', to_prim_type='vk_format_description({})->short_name')]) + +begin_end_tp('copy_image_to_buffer', + toggle_name='clear_blit', + args=[Arg(type='enum VkFormat', var='format', c_format='%s', to_prim_type='vk_format_description({})->short_name')]) + +begin_end_tp('copy_image', + toggle_name='clear_blit', + args=[Arg(type='enum VkFormat', var='src_format', c_format='%s', to_prim_type='vk_format_description({})->short_name'), + Arg(type='enum VkFormat', var='dst_format', c_format='%s', to_prim_type='vk_format_description({})->short_name')]) + +begin_end_tp('resolve_image', + toggle_name='clear_blit', + args=[Arg(type='enum VkFormat', var='src_format', c_format='%s', to_prim_type='vk_format_description({})->short_name'), + Arg(type='enum VkFormat', var='dst_format', c_format='%s', to_prim_type='vk_format_description({})->short_name')]) + +begin_end_tp('fill_buffer', + toggle_name='clear_blit', + end_args=[Arg(type='uint32_t', var='size', c_format='%u'), + Arg(type='bool', var='unaligned', c_format='%s', to_prim_type='({} ? "true" : "false")')]) + +begin_end_tp('copy_buffer', + toggle_name='clear_blit', + end_args=[Arg(type='uint32_t', var='size', c_format='%u'), + Arg(type='bool', var='unaligned', c_format='%s', to_prim_type='({} ? "true" : "false")')]) + +begin_end_tp('update_buffer', + toggle_name='clear_blit', + end_args=[Arg(type='uint32_t', var='size', c_format='%u'), + Arg(type='bool', var='unaligned', c_format='%s', to_prim_type='({} ? "true" : "false")')]) + +begin_end_tp('slow_clear_lrz', + toggle_name='clear_blit', + args=[Arg(type='enum VkFormat', var='img_format', c_format='%s', to_prim_type='vk_format_description({})->short_name'), + Arg(type='uint16_t', var='img_width', c_format='%u'), + Arg(type='uint16_t', var='img_height', c_format='%u')]) + +begin_end_tp('disable_lrz', + toggle_name='clear_blit', + args=[Arg(type='enum VkFormat', var='img_format', c_format='%s', to_prim_type='vk_format_description({})->short_name'), + Arg(type='uint16_t', var='img_width', c_format='%u'), + Arg(type='uint16_t', var='img_height', c_format='%u')]) + begin_end_tp('compute', args=[Arg(type='uint8_t', var='indirect', c_format='%u'), Arg(type='uint8_t', var='unaligned', c_format='%u'),