hk: pass cmdbuf to perf_debug

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33682>
This commit is contained in:
Alyssa Rosenzweig 2025-01-15 13:31:49 -05:00 committed by Marge Bot
parent debdb26167
commit 2eb5040ca8
10 changed files with 54 additions and 75 deletions

View file

@ -248,11 +248,9 @@ hk_BeginCommandBuffer(VkCommandBuffer commandBuffer,
const VkCommandBufferBeginInfo *pBeginInfo)
{
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
struct hk_device *dev = hk_cmd_buffer_device(cmd);
hk_reset_cmd_buffer(&cmd->vk, 0);
perf_debug(dev, "Begin command buffer");
perf_debug(cmd, "Begin command buffer");
hk_cmd_buffer_begin_compute(cmd, pBeginInfo);
hk_cmd_buffer_begin_graphics(cmd, pBeginInfo);
@ -263,12 +261,11 @@ VKAPI_ATTR VkResult VKAPI_CALL
hk_EndCommandBuffer(VkCommandBuffer commandBuffer)
{
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
struct hk_device *dev = hk_cmd_buffer_device(cmd);
assert(cmd->current_cs.gfx == NULL && cmd->current_cs.pre_gfx == NULL &&
"must end rendering before ending the command buffer");
perf_debug(dev, "End command buffer");
perf_debug(cmd, "End command buffer");
hk_cmd_buffer_end_compute(cmd);
hk_cmd_buffer_end_compute_internal(cmd, &cmd->current_cs.post_gfx);
@ -298,7 +295,7 @@ hk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
if (HK_PERF(dev, NOBARRIER))
return;
perf_debug(dev, "Pipeline barrier");
perf_debug(cmd, "Pipeline barrier");
/* The big hammer. We end both compute and graphics batches. Ending compute
* here is necessary to properly handle graphics->compute dependencies.
@ -610,7 +607,7 @@ hk_reserve_scratch(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
/* Note: this uses the hardware stage, not the software stage */
hk_device_alloc_scratch(dev, s->b.info.stage, max_scratch_size);
perf_debug(dev, "Reserving %u (%u) bytes of scratch for stage %s",
perf_debug(cmd, "Reserving %u (%u) bytes of scratch for stage %s",
s->b.info.scratch_size, s->b.info.preamble_scratch_size,
_mesa_shader_stage_to_abbrev(s->b.info.stage));

View file

@ -474,12 +474,6 @@ VK_DEFINE_HANDLE_CASTS(hk_cmd_buffer, vk.base, VkCommandBuffer,
extern const struct vk_command_buffer_ops hk_cmd_buffer_ops;
static inline struct hk_device *
hk_cmd_buffer_device(struct hk_cmd_buffer *cmd)
{
return (struct hk_device *)cmd->vk.base.device;
}
static inline struct hk_cmd_pool *
hk_cmd_buffer_pool(struct hk_cmd_buffer *cmd)
{

View file

@ -44,7 +44,7 @@ clear_image(struct hk_cmd_buffer *cmd, struct hk_image *image,
/* TODO: Use fast clear */
bool compressed = ail_is_compressed(&image->planes[0].layout);
perf_debug(dev, "Image clear (%scompressed)", compressed ? "" : "un");
perf_debug(cmd, "Image clear (%scompressed)", compressed ? "" : "un");
for (uint32_t r = 0; r < range_count; r++) {
const uint32_t level_count =

View file

@ -86,7 +86,6 @@ hk_dispatch_with_usc(struct hk_device *dev, struct hk_cs *cs,
static void
dispatch(struct hk_cmd_buffer *cmd, struct agx_grid grid)
{
struct hk_device *dev = hk_cmd_buffer_device(cmd);
struct hk_shader *s = hk_only_variant(cmd->state.cs.shader);
struct hk_cs *cs = hk_cmd_buffer_get_cs(cmd, true /* compute */);
if (!cs)
@ -100,7 +99,7 @@ dispatch(struct hk_cmd_buffer *cmd, struct agx_grid grid)
cmd, VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT);
if (stat) {
perf_debug(dev, "CS invocation statistic");
perf_debug(cmd, "CS invocation statistic");
uint64_t grid = cmd->state.cs.descriptors.root.cs.group_count_addr;
libagx_increment_cs_invocations(cmd, agx_1d(1), AGX_BARRIER_ALL, grid,

View file

@ -355,7 +355,7 @@ hk_build_bg_eot(struct hk_cmd_buffer *cmd, const VkRenderingInfo *info,
bool no_store = (att_info->storeOp == VK_ATTACHMENT_STORE_OP_NONE);
bool no_store_wa = no_store && !load && !clear;
if (no_store_wa) {
perf_debug(dev, "STORE_OP_NONE workaround");
perf_debug(cmd, "STORE_OP_NONE workaround");
}
load |= no_store_wa;
@ -582,7 +582,6 @@ hk_CmdBeginRendering(VkCommandBuffer commandBuffer,
{
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
struct hk_rendering_state *render = &cmd->state.gfx.render;
struct hk_device *dev = hk_cmd_buffer_device(cmd);
memset(render, 0, sizeof(*render));
@ -659,7 +658,7 @@ hk_CmdBeginRendering(VkCommandBuffer commandBuffer,
(render->view_mask &&
render->view_mask != BITFIELD64_MASK(render->cr.layers));
perf_debug(dev, "Rendering %ux%ux%u@%u %s%s", render->cr.width,
perf_debug(cmd, "Rendering %ux%ux%u@%u %s%s", render->cr.width,
render->cr.height, render->cr.layers,
render->tilebuffer.nr_samples,
render->view_mask ? " multiview" : "",
@ -827,7 +826,7 @@ hk_CmdBeginRendering(VkCommandBuffer commandBuffer,
* we're not that clever yet.
*/
if (agx_tilebuffer_spills(&render->tilebuffer)) {
perf_debug(dev, "eMRT render pass");
perf_debug(cmd, "eMRT render pass");
for (unsigned i = 0; i < render->color_att_count; ++i) {
struct hk_image_view *view = render->color_att[i].iview;
@ -841,8 +840,7 @@ hk_CmdBeginRendering(VkCommandBuffer commandBuffer,
struct ail_layout *layout = &image->planes[image_plane].layout;
if (ail_is_level_compressed(layout, view->vk.base_mip_level)) {
struct hk_device *dev = hk_cmd_buffer_device(cmd);
perf_debug(dev, "Decompressing in-place");
perf_debug(cmd, "Decompressing in-place");
unsigned level = view->vk.base_mip_level;
unsigned layer = view->vk.base_array_layer;
@ -936,7 +934,6 @@ hk_CmdEndRendering(VkCommandBuffer commandBuffer)
{
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
struct hk_rendering_state *render = &cmd->state.gfx.render;
struct hk_device *dev = hk_cmd_buffer_device(cmd);
/* The last control stream of the render pass is special since it gets its
* stores dropped. Swap it in.
@ -946,7 +943,7 @@ hk_CmdEndRendering(VkCommandBuffer commandBuffer)
cs->cr.eot.main = render->cr.eot.main;
}
perf_debug(dev, "End rendering");
perf_debug(cmd, "End rendering");
hk_cmd_buffer_end_graphics(cmd);
bool need_resolve = false;
@ -1009,7 +1006,7 @@ hk_CmdEndRendering(VkCommandBuffer commandBuffer)
memset(render, 0, sizeof(*render));
if (need_resolve) {
perf_debug(dev, "Resolving render pass, colour store op %u",
perf_debug(cmd, "Resolving render pass, colour store op %u",
vk_color_att[0].storeOp);
hk_meta_resolve_rendering(cmd, &vk_render);
@ -1023,7 +1020,7 @@ hk_geometry_state(struct hk_cmd_buffer *cmd)
/* We tie heap allocation to geometry state allocation, so allocate now. */
if (unlikely(!dev->heap)) {
perf_debug(dev, "Allocating heap");
perf_debug(cmd, "Allocating heap");
size_t size = 128 * 1024 * 1024;
dev->heap = agx_bo_create(&dev->dev, size, 0, 0, "Geometry heap");
@ -1042,7 +1039,7 @@ hk_geometry_state(struct hk_cmd_buffer *cmd)
/* We need to free all allocations after each command buffer execution */
if (!cmd->uses_heap) {
perf_debug(dev, "Freeing heap");
perf_debug(cmd, "Freeing heap");
uint64_t addr = dev->rodata.geometry_state;
/* Zeroing the allocated index frees everything */
@ -1361,7 +1358,7 @@ hk_draw_without_restart(struct hk_cmd_buffer *cmd, struct agx_draw draw,
struct hk_graphics_state *gfx = &cmd->state.gfx;
struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state;
perf_debug(dev, "Unrolling primitive restart due to GS/XFB");
perf_debug(cmd, "Unrolling primitive restart due to GS/XFB");
/* The unroll kernel assumes an indirect draw. Synthesize one if needed */
draw = hk_draw_as_indexed_indirect(cmd, draw);
@ -1414,11 +1411,11 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
unsigned count_words = count->info.gs.count_words;
if (false /* TODO */)
perf_debug(dev, "Transform feedbck");
perf_debug(cmd, "Transform feedbck");
else if (count_words)
perf_debug(dev, "Geometry shader with counts");
perf_debug(cmd, "Geometry shader with counts");
else
perf_debug(dev, "Geometry shader without counts");
perf_debug(cmd, "Geometry shader without counts");
enum mesa_prim mode = hk_gs_in_prim(cmd);
@ -1512,14 +1509,14 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
uint64_t state = gfx->descriptors.root.draw.tess_params;
struct hk_tess_info info = gfx->tess.info;
perf_debug(dev, "Tessellation");
perf_debug(cmd, "Tessellation");
uint64_t tcs_stat = hk_pipeline_stat_addr(
cmd, VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT);
/* Setup grids */
if (agx_is_indirect(draw.b)) {
perf_debug(dev, "Indirect tessellation");
perf_debug(cmd, "Indirect tessellation");
struct libagx_tess_setup_indirect_args args = {
.p = state,
@ -1557,7 +1554,7 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
/* TCS invocation counter increments once per-patch */
if (tcs_stat) {
perf_debug(dev, "Direct TCS statistic");
perf_debug(cmd, "Direct TCS statistic");
libagx_increment_statistic(
cmd, agx_1d(1), AGX_BARRIER_ALL | AGX_PREGFX, tcs_stat, patches);
}
@ -2643,13 +2640,13 @@ hk_flush_dynamic_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
bool has_sample_mask = api_sample_mask != tib_sample_mask;
if (hw_vs->info.vs.cull_distance_array_size) {
perf_debug(dev, "Emulating cull distance (size %u, %s a frag shader)",
perf_debug(cmd, "Emulating cull distance (size %u, %s a frag shader)",
hw_vs->info.vs.cull_distance_array_size,
fs ? "with" : "without");
}
if (has_sample_mask) {
perf_debug(dev, "Emulating sample mask (%s a frag shader)",
perf_debug(cmd, "Emulating sample mask (%s a frag shader)",
fs ? "with" : "without");
}
@ -2711,7 +2708,7 @@ hk_flush_dynamic_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
dyn->cb.logic_op_enable) {
perf_debug(
dev, "Epilog with%s%s%s",
cmd, "Epilog with%s%s%s",
dyn->ms.alpha_to_one_enable ? " alpha-to-one" : "",
dyn->ms.alpha_to_coverage_enable ? " alpha-to-coverage" : "",
dyn->cb.logic_op_enable ? " logic-op" : "");
@ -3140,7 +3137,7 @@ hk_handle_passthrough_gs(struct hk_cmd_buffer *cmd, struct agx_draw draw)
}
struct hk_device *dev = hk_cmd_buffer_device(cmd);
perf_debug(dev, "Binding passthrough GS for%s\n", xfb_outputs ? " XFB" : "");
perf_debug(cmd, "Binding passthrough GS for%s\n", xfb_outputs ? " XFB" : "");
gs = hk_meta_shader(dev, hk_nir_passthrough_gs, key, key_size);
gs->is_passthrough = true;
@ -3185,7 +3182,7 @@ hk_flush_gfx_state(struct hk_cmd_buffer *cmd, uint32_t draw_id,
*/
bool succ = u_tristate_set(&cs->cr.dbias_is_int, dbias_is_int);
if (!succ) {
perf_debug(dev, "Splitting control stream due to depth bias");
perf_debug(cmd, "Splitting control stream due to depth bias");
hk_cmd_buffer_end_graphics(cmd);
cs = hk_cmd_buffer_get_cs(cmd, false /* compute */);
@ -3319,8 +3316,7 @@ hk_ia_update(struct hk_cmd_buffer *cmd, struct agx_draw draw,
uint64_t ia_vertices, uint64_t ia_prims, uint64_t vs_invocations,
uint64_t c_prims, uint64_t c_inv)
{
struct hk_device *dev = hk_cmd_buffer_device(cmd);
perf_debug(dev, "Input assembly counters");
perf_debug(cmd, "Input assembly counters");
uint64_t draw_ptr;
if (agx_is_indirect(draw.b)) {
@ -3487,7 +3483,7 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_)
enum agx_primitive topology = cmd->state.gfx.topology;
if (needs_idx_robust) {
assert(!geom && !tess && !adj);
perf_debug(dev, "lowering robust index buffer");
perf_debug(cmd, "lowering robust index buffer");
cs->current = out;
@ -3704,8 +3700,7 @@ hk_draw_indirect_count(VkCommandBuffer commandBuffer, VkBuffer _buffer,
VK_FROM_HANDLE(hk_buffer, buffer, _buffer);
VK_FROM_HANDLE(hk_buffer, count_buffer, countBuffer);
struct hk_device *dev = hk_cmd_buffer_device(cmd);
perf_debug(dev, "Draw indirect count");
perf_debug(cmd, "Draw indirect count");
assert((stride % 4) == 0 && "aligned");
@ -3783,7 +3778,6 @@ hk_begin_end_xfb(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer,
{
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
struct hk_device *dev = hk_cmd_buffer_device(cmd);
struct hk_graphics_state *gfx = &cmd->state.gfx;
gfx->xfb_enabled = begin;
@ -3826,7 +3820,7 @@ hk_begin_end_xfb(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer,
copies = 4;
if (copies > 0) {
perf_debug(dev, "XFB counter copy");
perf_debug(cmd, "XFB counter copy");
libagx_copy_xfb_counters(cmd, agx_1d(copies),
AGX_BARRIER_ALL | AGX_PREGFX,

View file

@ -1340,7 +1340,7 @@ hk_CmdBlitImage2(VkCommandBuffer commandBuffer,
{
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
struct hk_device *dev = hk_cmd_buffer_device(cmd);
perf_debug(dev, "Blit image");
perf_debug(cmd, "Blit image");
struct hk_meta_save save;
hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
@ -1354,7 +1354,7 @@ hk_CmdResolveImage2(VkCommandBuffer commandBuffer,
{
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
struct hk_device *dev = hk_cmd_buffer_device(cmd);
perf_debug(dev, "Resolve");
perf_debug(cmd, "Resolve");
struct hk_meta_save save;
hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);

View file

@ -90,9 +90,8 @@ hk_CmdSetEvent2(VkCommandBuffer commandBuffer, VkEvent _event,
{
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
VK_FROM_HANDLE(hk_event, event, _event);
struct hk_device *dev = hk_cmd_buffer_device(cmd);
perf_debug(dev, "Set event");
perf_debug(cmd, "Set event");
hk_cmd_buffer_end_compute(cmd);
hk_cmd_buffer_end_graphics(cmd);
hk_queue_write(cmd, event->bo->va->addr, VK_EVENT_SET, false);
@ -104,9 +103,8 @@ hk_CmdResetEvent2(VkCommandBuffer commandBuffer, VkEvent _event,
{
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
VK_FROM_HANDLE(hk_event, event, _event);
struct hk_device *dev = hk_cmd_buffer_device(cmd);
perf_debug(dev, "Reset event");
perf_debug(cmd, "Reset event");
hk_cmd_buffer_end_compute(cmd);
hk_cmd_buffer_end_graphics(cmd);
hk_queue_write(cmd, event->bo->va->addr, VK_EVENT_RESET, false);
@ -118,9 +116,7 @@ hk_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount,
const VkDependencyInfo *pDependencyInfos)
{
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
struct hk_device *dev = hk_cmd_buffer_device(cmd);
perf_debug(dev, "Wait events");
perf_debug(cmd, "Wait events");
/* The big hammer. Need to check if this is actually needed.
*

View file

@ -38,15 +38,17 @@ struct hk_addr_range {
uint64_t range;
};
#define hk_cmd_buffer_device(cmd) ((struct hk_device *)(cmd)->vk.base.device)
#define perf_debug_dev(dev, fmt, ...) \
do { \
if (dev->debug & AGX_DBG_PERF) \
if ((dev)->debug & AGX_DBG_PERF) \
mesa_log(MESA_LOG_WARN, (MESA_LOG_TAG), (fmt), ##__VA_ARGS__); \
} while (0)
#define perf_debug(dev, fmt, ...) \
#define perf_debug(cmd, fmt, ...) \
do { \
if (dev->dev.debug & AGX_DBG_PERF) \
if (hk_cmd_buffer_device(cmd)->dev.debug & AGX_DBG_PERF) \
mesa_log(MESA_LOG_WARN, (MESA_LOG_TAG), (fmt), ##__VA_ARGS__); \
} while (0)

View file

@ -65,14 +65,12 @@ hk_reports_per_query(struct hk_query_pool *pool)
static void
hk_flush_if_timestamp(struct hk_cmd_buffer *cmd, struct hk_query_pool *pool)
{
struct hk_device *dev = hk_cmd_buffer_device(cmd);
/* There might not be a barrier between the timestamp write and the copy
* otherwise but we need one to give the CPU a chance to write the timestamp.
* This could maybe optimized.
*/
if (pool->vk.query_type == VK_QUERY_TYPE_TIMESTAMP) {
perf_debug(dev, "Flushing for timestamp copy");
perf_debug(cmd, "Flushing for timestamp copy");
hk_cmd_buffer_end_graphics(cmd);
hk_cmd_buffer_end_compute(cmd);
}
@ -258,7 +256,7 @@ hk_dispatch_imm_writes(struct hk_cmd_buffer *cmd, struct hk_cs *cs)
struct hk_device *dev = hk_cmd_buffer_device(cmd);
hk_cdm_cache_flush(dev, cs);
perf_debug(dev, "Queued writes");
perf_debug(cmd, "Queued writes");
uint64_t params =
hk_pool_upload(cmd, cs->imm_writes.data, cs->imm_writes.size, 16);
@ -301,7 +299,7 @@ hk_queue_write(struct hk_cmd_buffer *cmd, uint64_t address, uint32_t value,
struct hk_device *dev = hk_cmd_buffer_device(cmd);
hk_cdm_cache_flush(dev, cs);
perf_debug(dev, "Queued write");
perf_debug(cmd, "Queued write");
libagx_write_u32(cmd, agx_1d(1), AGX_BARRIER_ALL, address, value);
}
@ -376,11 +374,10 @@ hk_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
{
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
VK_FROM_HANDLE(hk_query_pool, pool, queryPool);
struct hk_device *dev = hk_cmd_buffer_device(cmd);
hk_flush_if_timestamp(cmd, pool);
perf_debug(dev, "Reset query pool");
perf_debug(cmd, "Reset query pool");
emit_zero_queries(cmd, pool, firstQuery, queryCount, false);
}
@ -413,7 +410,7 @@ hk_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
if (!after_gfx && cmd->current_cs.cs &&
cmd->current_cs.cs->timestamp.end.addr) {
perf_debug(dev, "Splitting for compute timestamp");
perf_debug(cmd, "Splitting for compute timestamp");
hk_cmd_buffer_end_compute(cmd);
}
@ -513,7 +510,7 @@ hk_cmd_begin_end_query(struct hk_cmd_buffer *cmd, struct hk_query_pool *pool,
/* We need to set available=1 after the graphics work finishes. */
if (end) {
perf_debug(dev, "Query ending, type %u", pool->vk.query_type);
perf_debug(cmd, "Query ending, type %u", pool->vk.query_type);
hk_queue_write(cmd, hk_query_available_addr(pool, query), 1, graphics);
}
}
@ -535,7 +532,6 @@ hk_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
{
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
VK_FROM_HANDLE(hk_query_pool, pool, queryPool);
struct hk_device *dev = hk_cmd_buffer_device(cmd);
hk_cmd_begin_end_query(cmd, pool, query, index, 0, true);
@ -555,7 +551,7 @@ hk_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
const uint32_t num_queries =
util_bitcount(cmd->state.gfx.render.view_mask);
if (num_queries > 1) {
perf_debug(dev, "Multiview query zeroing");
perf_debug(cmd, "Multiview query zeroing");
emit_zero_queries(cmd, pool, query + 1, num_queries - 1, true);
}
}
@ -671,7 +667,7 @@ hk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
struct hk_device *dev = hk_cmd_buffer_device(cmd);
hk_flush_if_timestamp(cmd, pool);
perf_debug(dev, "Query pool copy");
perf_debug(cmd, "Query pool copy");
struct libagx_copy_query_args info = {
.availability = hk_has_available(pool) ? pool->bo->va->addr : 0,

View file

@ -768,8 +768,9 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
command_count += list_length(&cmdbuf->control_streams);
}
perf_debug(dev, "Submitting %u control streams (%u command buffers)",
command_count, submit->command_buffer_count);
perf_debug_dev(&dev->dev,
"Submitting %u control streams (%u command buffers)",
command_count, submit->command_buffer_count);
if (command_count == 0)
return queue_submit_empty(dev, queue, submit);
@ -854,7 +855,7 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
if (cs->type == HK_CS_CDM) {
perf_debug(
dev,
cmdbuf,
"%u: Submitting CDM with %u API calls, %u dispatches, %u flushes",
i, cs->stats.calls, cs->stats.cmds, cs->stats.flushes);
@ -874,8 +875,8 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
cmd.cmd_buffer_size -= 8;
} else {
assert(cs->type == HK_CS_VDM);
perf_debug(dev, "%u: Submitting VDM with %u API draws, %u draws", i,
cs->stats.calls, cs->stats.cmds);
perf_debug(cmdbuf, "%u: Submitting VDM with %u API draws, %u draws",
i, cs->stats.calls, cs->stats.cmds);
assert(cs->stats.cmds > 0 || cs->cr.process_empty_tiles ||
cs->timestamp.end.handle);