From bfe5fa330b39c7dcd55a5b24e07c419ccf6cf87a Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Mon, 22 Jul 2024 11:27:06 +0200 Subject: [PATCH] tu/u_trace: dispatch indirect dims and LRZ status as indirect params Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/vulkan/tu_cmd_buffer.cc | 60 ++++++++++++++++++-------- src/freedreno/vulkan/tu_perfetto.cc | 1 + src/freedreno/vulkan/tu_tracepoints.py | 9 +++- 3 files changed, 51 insertions(+), 19 deletions(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 1531025be89..4fd5f174e2a 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -21,6 +21,7 @@ #include "tu_tracepoints.h" #include "common/freedreno_gpu_event.h" +#include "common/freedreno_lrz.h" static void tu_clone_trace_range(struct tu_cmd_buffer *cmd, struct tu_cs *cs, @@ -1825,6 +1826,31 @@ tu_trace_start_render_pass(struct tu_cmd_buffer *cmd) load_cpp, store_cpp, has_depth, ubwc); } +template +static void +tu_trace_end_render_pass(struct tu_cmd_buffer *cmd, bool gmem) +{ + if (!u_trace_enabled(&cmd->device->trace_context)) + return; + + uint32_t avg_per_sample_bandwidth = + cmd->state.rp.drawcall_bandwidth_per_sample_sum / + MAX2(cmd->state.rp.drawcall_count, 1); + + struct u_trace_address addr = {}; + if (cmd->state.lrz.image_view) { + struct tu_image *image = cmd->state.lrz.image_view->image; + addr.bo = image->bo; + addr.offset = (image->iova - image->bo->iova) + image->lrz_fc_offset + + offsetof(fd_lrzfc_layout, dir_track); + } + + trace_end_render_pass(&cmd->trace, &cmd->cs, gmem, + cmd->state.rp.drawcall_count, + avg_per_sample_bandwidth, cmd->state.lrz.valid, + cmd->state.rp.lrz_disable_reason, addr); +} + static void tu_emit_renderpass_begin(struct tu_cmd_buffer *cmd) { @@ -2145,12 +2171,7 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd, tu6_tile_render_end(cmd, &cmd->cs, autotune_result); - trace_end_render_pass(&cmd->trace, &cmd->cs, true, - cmd->state.rp.drawcall_count, - cmd->state.rp.drawcall_bandwidth_per_sample_sum / - MAX2(cmd->state.rp.drawcall_count, 1), - cmd->state.lrz.valid, - cmd->state.rp.lrz_disable_reason); + tu_trace_end_render_pass(cmd, true); /* We have trashed the dynamically-emitted viewport, scissor, and FS params * via the patchpoints, so we need to re-emit them if they are reused for a @@ -2187,12 +2208,7 @@ tu_cmd_render_sysmem(struct tu_cmd_buffer *cmd, tu6_sysmem_render_end(cmd, &cmd->cs, autotune_result); - trace_end_render_pass(&cmd->trace, &cmd->cs, false, - cmd->state.rp.drawcall_count, - cmd->state.rp.drawcall_bandwidth_per_sample_sum / - MAX2(cmd->state.rp.drawcall_count, 1), - cmd->state.lrz.valid, - cmd->state.rp.lrz_disable_reason); + tu_trace_end_render_pass(cmd, false); } template @@ -6304,13 +6320,11 @@ tu_dispatch(struct tu_cmd_buffer *cmd, HLSQ_CS_KERNEL_GROUP_Y(CHIP, 1), HLSQ_CS_KERNEL_GROUP_Z(CHIP, 1)); - trace_start_compute(&cmd->trace, cs, info->indirect != NULL, local_size[0], - local_size[1], local_size[2], info->blocks[0], - info->blocks[1], info->blocks[2]); - if (info->indirect) { uint64_t iova = info->indirect->iova + info->indirect_offset; + trace_start_compute_indirect(&cmd->trace, cs); + tu_cs_emit_pkt7(cs, CP_EXEC_CS_INDIRECT, 4); tu_cs_emit(cs, 0x00000000); tu_cs_emit_qw(cs, iova); @@ -6318,15 +6332,25 @@ tu_dispatch(struct tu_cmd_buffer *cmd, A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(local_size[0] - 1) | A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(local_size[1] - 1) | A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(local_size[2] - 1)); + + trace_end_compute_indirect(&cmd->trace, cs, + (struct u_trace_address) { + .bo = info->indirect->bo, + .offset = info->indirect_offset, + }); } else { + trace_start_compute(&cmd->trace, cs, info->indirect != NULL, + local_size[0], local_size[1], local_size[2], + info->blocks[0], info->blocks[1], info->blocks[2]); + tu_cs_emit_pkt7(cs, CP_EXEC_CS, 4); tu_cs_emit(cs, 0x00000000); tu_cs_emit(cs, CP_EXEC_CS_1_NGROUPS_X(info->blocks[0])); tu_cs_emit(cs, CP_EXEC_CS_2_NGROUPS_Y(info->blocks[1])); tu_cs_emit(cs, CP_EXEC_CS_3_NGROUPS_Z(info->blocks[2])); - } - trace_end_compute(&cmd->trace, cs); + trace_end_compute(&cmd->trace, cs); + } /* For the workaround above, because it's using the "wrong" context for * SP_FS_INSTRLEN we should emit another dummy event write to avoid a diff --git a/src/freedreno/vulkan/tu_perfetto.cc b/src/freedreno/vulkan/tu_perfetto.cc index 85788b6dfdc..37b61c44306 100644 --- a/src/freedreno/vulkan/tu_perfetto.cc +++ b/src/freedreno/vulkan/tu_perfetto.cc @@ -499,6 +499,7 @@ CREATE_EVENT_CALLBACK(draw_ib_gmem, GMEM_STAGE_ID) CREATE_EVENT_CALLBACK(draw_ib_sysmem, BYPASS_STAGE_ID) CREATE_EVENT_CALLBACK(blit, BLIT_STAGE_ID) CREATE_EVENT_CALLBACK(compute, COMPUTE_STAGE_ID) +CREATE_EVENT_CALLBACK(compute_indirect, COMPUTE_STAGE_ID) CREATE_EVENT_CALLBACK(gmem_clear, CLEAR_GMEM_STAGE_ID) CREATE_EVENT_CALLBACK(sysmem_clear, CLEAR_SYSMEM_STAGE_ID) CREATE_EVENT_CALLBACK(sysmem_clear_all, CLEAR_SYSMEM_STAGE_ID) diff --git a/src/freedreno/vulkan/tu_tracepoints.py b/src/freedreno/vulkan/tu_tracepoints.py index be4a39a8738..b99c84f865e 100644 --- a/src/freedreno/vulkan/tu_tracepoints.py +++ b/src/freedreno/vulkan/tu_tracepoints.py @@ -28,6 +28,8 @@ Header('vk_enum_to_str.h', scope=HeaderScope.SOURCE|HeaderScope.PERFETTO) Header('vk_format.h') Header('tu_cmd_buffer.h', scope=HeaderScope.SOURCE) Header('tu_device.h', scope=HeaderScope.SOURCE) +Header('common/freedreno_lrz.h') +Header('vulkan/vulkan_core.h', scope=HeaderScope.SOURCE|HeaderScope.PERFETTO) # we can't use tu_common.h because it includes ir3 headers which are not # compatible with C++ @@ -91,7 +93,8 @@ begin_end_tp('render_pass', Arg(type='uint32_t', var='drawCount', c_format='%u'), Arg(type='uint32_t', var='avgPerSampleBandwidth', c_format='%u'), Arg(type='bool', var='lrz', c_format='%s', to_prim_type='({} ? "true" : "false")'), - Arg(type='const char *', var='lrzDisableReason', c_format='%s'),]) + Arg(type='const char *', var='lrzDisableReason', c_format='%s'), + Arg(type='uint32_t', var='lrzStatus', c_format='%s', to_prim_type='(fd_lrz_gpu_dir_to_str((enum fd_lrz_gpu_dir)({} & 0xff)))', is_indirect=True),]) begin_end_tp('binning_ib') @@ -139,6 +142,10 @@ begin_end_tp('compute', Arg(type='uint16_t', var='num_groups_y', c_format='%u'), Arg(type='uint16_t', var='num_groups_z', c_format='%u')]) +begin_end_tp('compute_indirect', + end_args=[ArgStruct(type='VkDispatchIndirectCommand', var='size', + is_indirect=True, c_format="%ux%ux%u", + fields=['x', 'y', 'z'])]) # Annotations for Cmd(Begin|End)DebugUtilsLabelEXT for suffix in ["", "_rp"]: