mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-08 21:30:23 +01:00
tu/u_trace: dispatch indirect dims and LRZ status as indirect params
Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30513>
This commit is contained in:
parent
762851e5d8
commit
bfe5fa330b
3 changed files with 51 additions and 19 deletions
|
|
@ -21,6 +21,7 @@
|
|||
#include "tu_tracepoints.h"
|
||||
|
||||
#include "common/freedreno_gpu_event.h"
|
||||
#include "common/freedreno_lrz.h"
|
||||
|
||||
static void
|
||||
tu_clone_trace_range(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
|
|
@ -1825,6 +1826,31 @@ tu_trace_start_render_pass(struct tu_cmd_buffer *cmd)
|
|||
load_cpp, store_cpp, has_depth, ubwc);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
tu_trace_end_render_pass(struct tu_cmd_buffer *cmd, bool gmem)
|
||||
{
|
||||
if (!u_trace_enabled(&cmd->device->trace_context))
|
||||
return;
|
||||
|
||||
uint32_t avg_per_sample_bandwidth =
|
||||
cmd->state.rp.drawcall_bandwidth_per_sample_sum /
|
||||
MAX2(cmd->state.rp.drawcall_count, 1);
|
||||
|
||||
struct u_trace_address addr = {};
|
||||
if (cmd->state.lrz.image_view) {
|
||||
struct tu_image *image = cmd->state.lrz.image_view->image;
|
||||
addr.bo = image->bo;
|
||||
addr.offset = (image->iova - image->bo->iova) + image->lrz_fc_offset +
|
||||
offsetof(fd_lrzfc_layout<CHIP>, dir_track);
|
||||
}
|
||||
|
||||
trace_end_render_pass(&cmd->trace, &cmd->cs, gmem,
|
||||
cmd->state.rp.drawcall_count,
|
||||
avg_per_sample_bandwidth, cmd->state.lrz.valid,
|
||||
cmd->state.rp.lrz_disable_reason, addr);
|
||||
}
|
||||
|
||||
static void
|
||||
tu_emit_renderpass_begin(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
|
|
@ -2145,12 +2171,7 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd,
|
|||
|
||||
tu6_tile_render_end<CHIP>(cmd, &cmd->cs, autotune_result);
|
||||
|
||||
trace_end_render_pass(&cmd->trace, &cmd->cs, true,
|
||||
cmd->state.rp.drawcall_count,
|
||||
cmd->state.rp.drawcall_bandwidth_per_sample_sum /
|
||||
MAX2(cmd->state.rp.drawcall_count, 1),
|
||||
cmd->state.lrz.valid,
|
||||
cmd->state.rp.lrz_disable_reason);
|
||||
tu_trace_end_render_pass<CHIP>(cmd, true);
|
||||
|
||||
/* We have trashed the dynamically-emitted viewport, scissor, and FS params
|
||||
* via the patchpoints, so we need to re-emit them if they are reused for a
|
||||
|
|
@ -2187,12 +2208,7 @@ tu_cmd_render_sysmem(struct tu_cmd_buffer *cmd,
|
|||
|
||||
tu6_sysmem_render_end<CHIP>(cmd, &cmd->cs, autotune_result);
|
||||
|
||||
trace_end_render_pass(&cmd->trace, &cmd->cs, false,
|
||||
cmd->state.rp.drawcall_count,
|
||||
cmd->state.rp.drawcall_bandwidth_per_sample_sum /
|
||||
MAX2(cmd->state.rp.drawcall_count, 1),
|
||||
cmd->state.lrz.valid,
|
||||
cmd->state.rp.lrz_disable_reason);
|
||||
tu_trace_end_render_pass<CHIP>(cmd, false);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
@ -6304,13 +6320,11 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
|
|||
HLSQ_CS_KERNEL_GROUP_Y(CHIP, 1),
|
||||
HLSQ_CS_KERNEL_GROUP_Z(CHIP, 1));
|
||||
|
||||
trace_start_compute(&cmd->trace, cs, info->indirect != NULL, local_size[0],
|
||||
local_size[1], local_size[2], info->blocks[0],
|
||||
info->blocks[1], info->blocks[2]);
|
||||
|
||||
if (info->indirect) {
|
||||
uint64_t iova = info->indirect->iova + info->indirect_offset;
|
||||
|
||||
trace_start_compute_indirect(&cmd->trace, cs);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_EXEC_CS_INDIRECT, 4);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit_qw(cs, iova);
|
||||
|
|
@ -6318,15 +6332,25 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
|
|||
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEX(local_size[0] - 1) |
|
||||
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEY(local_size[1] - 1) |
|
||||
A5XX_CP_EXEC_CS_INDIRECT_3_LOCALSIZEZ(local_size[2] - 1));
|
||||
|
||||
trace_end_compute_indirect(&cmd->trace, cs,
|
||||
(struct u_trace_address) {
|
||||
.bo = info->indirect->bo,
|
||||
.offset = info->indirect_offset,
|
||||
});
|
||||
} else {
|
||||
trace_start_compute(&cmd->trace, cs, info->indirect != NULL,
|
||||
local_size[0], local_size[1], local_size[2],
|
||||
info->blocks[0], info->blocks[1], info->blocks[2]);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_EXEC_CS, 4);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, CP_EXEC_CS_1_NGROUPS_X(info->blocks[0]));
|
||||
tu_cs_emit(cs, CP_EXEC_CS_2_NGROUPS_Y(info->blocks[1]));
|
||||
tu_cs_emit(cs, CP_EXEC_CS_3_NGROUPS_Z(info->blocks[2]));
|
||||
}
|
||||
|
||||
trace_end_compute(&cmd->trace, cs);
|
||||
trace_end_compute(&cmd->trace, cs);
|
||||
}
|
||||
|
||||
/* For the workaround above, because it's using the "wrong" context for
|
||||
* SP_FS_INSTRLEN we should emit another dummy event write to avoid a
|
||||
|
|
|
|||
|
|
@ -499,6 +499,7 @@ CREATE_EVENT_CALLBACK(draw_ib_gmem, GMEM_STAGE_ID)
|
|||
CREATE_EVENT_CALLBACK(draw_ib_sysmem, BYPASS_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(blit, BLIT_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(compute, COMPUTE_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(compute_indirect, COMPUTE_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(gmem_clear, CLEAR_GMEM_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(sysmem_clear, CLEAR_SYSMEM_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(sysmem_clear_all, CLEAR_SYSMEM_STAGE_ID)
|
||||
|
|
|
|||
|
|
@ -28,6 +28,8 @@ Header('vk_enum_to_str.h', scope=HeaderScope.SOURCE|HeaderScope.PERFETTO)
|
|||
Header('vk_format.h')
|
||||
Header('tu_cmd_buffer.h', scope=HeaderScope.SOURCE)
|
||||
Header('tu_device.h', scope=HeaderScope.SOURCE)
|
||||
Header('common/freedreno_lrz.h')
|
||||
Header('vulkan/vulkan_core.h', scope=HeaderScope.SOURCE|HeaderScope.PERFETTO)
|
||||
|
||||
# we can't use tu_common.h because it includes ir3 headers which are not
|
||||
# compatible with C++
|
||||
|
|
@ -91,7 +93,8 @@ begin_end_tp('render_pass',
|
|||
Arg(type='uint32_t', var='drawCount', c_format='%u'),
|
||||
Arg(type='uint32_t', var='avgPerSampleBandwidth', c_format='%u'),
|
||||
Arg(type='bool', var='lrz', c_format='%s', to_prim_type='({} ? "true" : "false")'),
|
||||
Arg(type='const char *', var='lrzDisableReason', c_format='%s'),])
|
||||
Arg(type='const char *', var='lrzDisableReason', c_format='%s'),
|
||||
Arg(type='uint32_t', var='lrzStatus', c_format='%s', to_prim_type='(fd_lrz_gpu_dir_to_str((enum fd_lrz_gpu_dir)({} & 0xff)))', is_indirect=True),])
|
||||
|
||||
|
||||
begin_end_tp('binning_ib')
|
||||
|
|
@ -139,6 +142,10 @@ begin_end_tp('compute',
|
|||
Arg(type='uint16_t', var='num_groups_y', c_format='%u'),
|
||||
Arg(type='uint16_t', var='num_groups_z', c_format='%u')])
|
||||
|
||||
begin_end_tp('compute_indirect',
|
||||
end_args=[ArgStruct(type='VkDispatchIndirectCommand', var='size',
|
||||
is_indirect=True, c_format="%ux%ux%u",
|
||||
fields=['x', 'y', 'z'])])
|
||||
|
||||
# Annotations for Cmd(Begin|End)DebugUtilsLabelEXT
|
||||
for suffix in ["", "_rp"]:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue