anv: Add GPU breakpoint before/after specific compute dispatch call

Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13089
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35353>
This commit is contained in:
Sushma Venkatesh Reddy 2025-05-03 01:31:15 +00:00 committed by Marge Bot
parent 172e475705
commit 29fc96cb80
5 changed files with 46 additions and 21 deletions

View file

@ -916,6 +916,7 @@ VkResult anv_CreateDevice(
device->breakpoint = anv_state_pool_alloc(&device->dynamic_state_pool, 4,
4);
p_atomic_set(&device->draw_call_count, 0);
p_atomic_set(&device->dispatch_call_count, 0);
/* Create a separate command pool for companion RCS command buffer. */
if (device->info->verx10 >= 125) {

View file

@ -374,10 +374,10 @@ void genX(batch_emit_breakpoint)(struct anv_batch *batch,
static inline void
genX(emit_breakpoint)(struct anv_batch *batch,
struct anv_device *device,
bool emit_before_draw)
bool emit_before_draw_or_dispatch)
{
if (INTEL_DEBUG(DEBUG_DRAW_BKP))
genX(batch_emit_breakpoint)(batch, device, emit_before_draw);
if (INTEL_DEBUG(DEBUG_DRAW_BKP) || INTEL_DEBUG(DEBUG_DISPATCH_BKP))
genX(batch_emit_breakpoint)(batch, device, emit_before_draw_or_dispatch);
}
void

View file

@ -2136,6 +2136,7 @@ struct anv_device {
nir_shader *fp64_nir;
uint32_t draw_call_count;
uint32_t dispatch_call_count;
struct anv_state breakpoint;
/** Precompute all dirty graphics bits

View file

@ -4707,27 +4707,42 @@ void genX(CmdPipelineBarrier2)(
void
genX(batch_emit_breakpoint)(struct anv_batch *batch,
struct anv_device *device,
bool emit_before_draw)
bool emit_before_draw_or_dispatch)
{
/* Update draw call count once */
uint32_t draw_count = emit_before_draw ?
p_atomic_inc_return(&device->draw_call_count) :
p_atomic_read(&device->draw_call_count);
uint32_t before_count = 0, after_count = 0;
uint32_t *counter = NULL;
if (((draw_count == intel_debug_bkp_before_draw_count &&
emit_before_draw) ||
(draw_count == intel_debug_bkp_after_draw_count &&
!emit_before_draw))) {
struct anv_address wait_addr =
anv_state_pool_state_address(&device->dynamic_state_pool,
device->breakpoint);
if (INTEL_DEBUG(DEBUG_DRAW_BKP)) {
counter = &device->draw_call_count;
before_count = intel_debug_bkp_before_draw_count;
after_count = intel_debug_bkp_after_draw_count;
} else if (INTEL_DEBUG(DEBUG_DISPATCH_BKP)) {
counter = &device->dispatch_call_count;
before_count = intel_debug_bkp_before_dispatch_count;
after_count = intel_debug_bkp_after_dispatch_count;
}
anv_batch_emit(batch, GENX(MI_SEMAPHORE_WAIT), sem) {
sem.WaitMode = PollingMode;
sem.CompareOperation = COMPARE_SAD_EQUAL_SDD;
sem.SemaphoreDataDword = 0x1;
sem.SemaphoreAddress = wait_addr;
};
if (counter) {
uint32_t count = emit_before_draw_or_dispatch ?
p_atomic_inc_return(counter) :
p_atomic_read(counter);
bool should_emit =
(emit_before_draw_or_dispatch && count == before_count) ||
(!emit_before_draw_or_dispatch && count == after_count);
if (should_emit) {
struct anv_address wait_addr =
anv_state_pool_state_address(&device->dynamic_state_pool,
device->breakpoint);
anv_batch_emit(batch, GENX(MI_SEMAPHORE_WAIT), sem) {
sem.WaitMode = PollingMode;
sem.CompareOperation = COMPARE_SAD_EQUAL_SDD;
sem.SemaphoreDataDword = 0x1;
sem.SemaphoreAddress = wait_addr;
}
}
}
}

View file

@ -698,11 +698,15 @@ void genX(CmdDispatchBase)(
if (cmd_buffer->state.conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, true);
emit_cs_walker(cmd_buffer, pipeline, prog_data, dispatch,
ANV_NULL_ADDRESS /* no indirect data */,
groupCountX, groupCountY, groupCountZ,
false);
genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false);
trace_intel_end_compute(&cmd_buffer->trace,
groupCountX, groupCountY, groupCountZ,
prog_data->base.source_hash);
@ -861,9 +865,13 @@ genX(cmd_buffer_dispatch_indirect)(struct anv_cmd_buffer *cmd_buffer,
if (cmd_buffer->state.conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, true);
emit_cs_walker(cmd_buffer, pipeline, prog_data, dispatch, indirect_addr, 0,
0, 0, is_unaligned_size_x);
genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false);
trace_intel_end_compute_indirect(&cmd_buffer->trace,
anv_address_utrace(indirect_addr),
prog_data->base.source_hash);