panvk/csf: add more utrace tracepoints

Add all newly defined tracepoints, including meta, render,
dispatch/dispatch_indirect, barrier, and sync_wait.

Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32693>
This commit is contained in:
Chia-I Wu 2024-12-05 13:18:14 -08:00 committed by Marge Bot
parent 67da3d3a4c
commit c8cc9e781d
4 changed files with 65 additions and 1 deletions

View file

@ -585,6 +585,7 @@ panvk_per_arch(emit_barrier)(struct panvk_cmd_buffer *cmdbuf,
struct panvk_cs_deps deps)
{
uint32_t wait_subqueue_mask = 0;
uint32_t utrace_subqueue_mask = 0;
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) {
/* no need to perform both types of waits on the same subqueue */
if (deps.src[i].wait_sb_mask)
@ -592,10 +593,16 @@ panvk_per_arch(emit_barrier)(struct panvk_cmd_buffer *cmdbuf,
assert(!(deps.dst[i].wait_subqueue_mask & BITFIELD_BIT(i)));
wait_subqueue_mask |= deps.dst[i].wait_subqueue_mask;
if (deps.src[i].wait_sb_mask || deps.dst[i].wait_subqueue_mask ||
!panvk_cache_flush_is_nop(&deps.src[i].cache_flush))
utrace_subqueue_mask |= BITFIELD_BIT(i);
}
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) {
u_foreach_bit(i, utrace_subqueue_mask)
trace_begin_barrier(&cmdbuf->utrace.uts[i], cmdbuf);
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) {
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, i);
struct panvk_cs_state *cs_state = &cmdbuf->state.cs[i];
@ -643,6 +650,13 @@ panvk_per_arch(emit_barrier)(struct panvk_cmd_buffer *cmdbuf,
emit_barrier_insert_waits(b, cmdbuf, &deps, i, tmp_regs);
}
}
u_foreach_bit(i, utrace_subqueue_mask) {
trace_end_barrier(
&cmdbuf->utrace.uts[i], cmdbuf, deps.src[i].wait_sb_mask,
deps.dst[i].wait_subqueue_mask, deps.src[i].cache_flush.l2,
deps.src[i].cache_flush.lsc, deps.src[i].cache_flush.others);
}
}
VKAPI_ATTR void VKAPI_CALL

View file

@ -23,6 +23,7 @@
#include "panvk_macros.h"
#include "panvk_meta.h"
#include "panvk_physical_device.h"
#include "panvk_tracepoints.h"
#include "pan_desc.h"
#include "pan_encoder.h"
@ -351,11 +352,20 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
uint32_t groupCountY, uint32_t groupCountZ)
{
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
const struct panvk_shader *shader = cmdbuf->state.compute.shader;
struct panvk_dispatch_info info = {
.wg_base = {baseGroupX, baseGroupY, baseGroupZ},
.direct.wg_count = {groupCountX, groupCountY, groupCountZ},
};
trace_begin_dispatch(&cmdbuf->utrace.uts[PANVK_SUBQUEUE_COMPUTE], cmdbuf);
cmd_dispatch(cmdbuf, &info);
trace_end_dispatch(&cmdbuf->utrace.uts[PANVK_SUBQUEUE_COMPUTE], cmdbuf,
baseGroupX, baseGroupY, baseGroupZ, groupCountX,
groupCountY, groupCountZ, shader->cs.local_size.x,
shader->cs.local_size.y, shader->cs.local_size.z);
}
VKAPI_ATTR void VKAPI_CALL
@ -368,5 +378,13 @@ panvk_per_arch(CmdDispatchIndirect)(VkCommandBuffer commandBuffer,
struct panvk_dispatch_info info = {
.indirect.buffer_dev_addr = buffer_gpu,
};
trace_begin_dispatch_indirect(&cmdbuf->utrace.uts[PANVK_SUBQUEUE_COMPUTE],
cmdbuf);
cmd_dispatch(cmdbuf, &info);
trace_end_dispatch_indirect(&cmdbuf->utrace.uts[PANVK_SUBQUEUE_COMPUTE],
cmdbuf,
(struct u_trace_address){.offset = buffer_gpu});
}

View file

@ -31,6 +31,7 @@
#include "panvk_priv_bo.h"
#include "panvk_query_pool.h"
#include "panvk_shader.h"
#include "panvk_tracepoints.h"
#include "pan_desc.h"
#include "pan_earlyzs.h"
@ -2829,6 +2830,9 @@ panvk_per_arch(CmdBeginRendering)(VkCommandBuffer commandBuffer,
/* If we're not resuming, the FBD should be NULL. */
assert(!state->render.fbds.gpu || resuming);
trace_begin_render(&cmdbuf->utrace.uts[PANVK_SUBQUEUE_VERTEX_TILER], cmdbuf);
trace_begin_render(&cmdbuf->utrace.uts[PANVK_SUBQUEUE_FRAGMENT], cmdbuf);
if (!resuming)
panvk_per_arch(cmd_preload_render_area_border)(cmdbuf, pRenderingInfo);
}
@ -3031,7 +3035,9 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
length_reg);
/* Wait for the tiling to be done before submitting the fragment job. */
trace_begin_sync_wait(&cmdbuf->utrace.uts[PANVK_SUBQUEUE_FRAGMENT], cmdbuf);
wait_finish_tiling(cmdbuf);
trace_end_sync_wait(&cmdbuf->utrace.uts[PANVK_SUBQUEUE_FRAGMENT], cmdbuf);
/* Disable the oom handler once the vertex/tiler work has finished.
* We need to disable the handler at this point as the vertex/tiler subqueue
@ -3403,4 +3409,11 @@ panvk_per_arch(CmdEndRendering)(VkCommandBuffer commandBuffer)
/* If we're not suspending, we need to resolve attachments. */
if (!suspending)
panvk_per_arch(cmd_resolve_attachments)(cmdbuf);
trace_end_render(&cmdbuf->utrace.uts[PANVK_SUBQUEUE_VERTEX_TILER], cmdbuf,
cmdbuf->state.gfx.render.flags,
&cmdbuf->state.gfx.render.fb.info);
trace_end_render(&cmdbuf->utrace.uts[PANVK_SUBQUEUE_FRAGMENT], cmdbuf,
cmdbuf->state.gfx.render.flags,
&cmdbuf->state.gfx.render.fb.info);
}

View file

@ -6,6 +6,7 @@
#include "panvk_cmd_meta.h"
#include "panvk_entrypoints.h"
#include "panvk_tracepoints.h"
static bool
copy_to_image_use_gfx_pipeline(struct panvk_device *dev,
@ -45,6 +46,10 @@ panvk_per_arch(cmd_meta_compute_start)(
save_ctx->push_constants = cmdbuf->state.push_constants;
save_ctx->cs.shader = cmdbuf->state.compute.shader;
save_ctx->cs.desc = cmdbuf->state.compute.cs.desc;
#if PAN_ARCH >= 10
trace_begin_meta(&cmdbuf->utrace.uts[PANVK_SUBQUEUE_COMPUTE], cmdbuf);
#endif
}
void
@ -55,6 +60,10 @@ panvk_per_arch(cmd_meta_compute_end)(
struct panvk_descriptor_set *push_set0 =
cmdbuf->state.compute.desc_state.push_sets[0];
#if PAN_ARCH >= 10
trace_end_meta(&cmdbuf->utrace.uts[PANVK_SUBQUEUE_COMPUTE], cmdbuf);
#endif
cmdbuf->state.compute.desc_state.sets[0] = save_ctx->set0;
if (save_ctx->push_set0.desc_count) {
memcpy(push_set0->descs.host, save_ctx->push_set0.desc_storage,
@ -108,6 +117,11 @@ panvk_per_arch(cmd_meta_gfx_start)(
gfx_state_set_dirty(cmdbuf, OQ);
cmdbuf->state.gfx.vk_meta = true;
#if PAN_ARCH >= 10
trace_begin_meta(&cmdbuf->utrace.uts[PANVK_SUBQUEUE_VERTEX_TILER], cmdbuf);
trace_begin_meta(&cmdbuf->utrace.uts[PANVK_SUBQUEUE_FRAGMENT], cmdbuf);
#endif
}
void
@ -118,6 +132,11 @@ panvk_per_arch(cmd_meta_gfx_end)(
struct panvk_descriptor_set *push_set0 =
cmdbuf->state.gfx.desc_state.push_sets[0];
#if PAN_ARCH >= 10
trace_end_meta(&cmdbuf->utrace.uts[PANVK_SUBQUEUE_VERTEX_TILER], cmdbuf);
trace_end_meta(&cmdbuf->utrace.uts[PANVK_SUBQUEUE_FRAGMENT], cmdbuf);
#endif
cmdbuf->state.gfx.desc_state.sets[0] = save_ctx->set0;
if (save_ctx->push_set0.desc_count) {
memcpy(push_set0->descs.host, save_ctx->push_set0.desc_storage,