diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index aa01c8f6b92..f8b2d7552ab 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -916,6 +916,7 @@ VkResult anv_CreateDevice( device->breakpoint = anv_state_pool_alloc(&device->dynamic_state_pool, 4, 4); p_atomic_set(&device->draw_call_count, 0); + p_atomic_set(&device->dispatch_call_count, 0); /* Create a separate command pool for companion RCS command buffer. */ if (device->info->verx10 >= 125) { diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 3e8e69edc2a..93ebbf779b9 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -374,10 +374,10 @@ void genX(batch_emit_breakpoint)(struct anv_batch *batch, static inline void genX(emit_breakpoint)(struct anv_batch *batch, struct anv_device *device, - bool emit_before_draw) + bool emit_before_draw_or_dispatch) { - if (INTEL_DEBUG(DEBUG_DRAW_BKP)) - genX(batch_emit_breakpoint)(batch, device, emit_before_draw); + if (INTEL_DEBUG(DEBUG_DRAW_BKP) || INTEL_DEBUG(DEBUG_DISPATCH_BKP)) + genX(batch_emit_breakpoint)(batch, device, emit_before_draw_or_dispatch); } void diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 0144b5d0ee6..2d4db72cad2 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2136,6 +2136,7 @@ struct anv_device { nir_shader *fp64_nir; uint32_t draw_call_count; + uint32_t dispatch_call_count; struct anv_state breakpoint; /** Precompute all dirty graphics bits diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index e50e3429080..e653cf6d5ee 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -4707,27 +4707,42 @@ void genX(CmdPipelineBarrier2)( void genX(batch_emit_breakpoint)(struct anv_batch *batch, struct anv_device *device, - bool emit_before_draw) + bool emit_before_draw_or_dispatch) { - /* Update draw call count once */ - uint32_t draw_count = emit_before_draw ? - p_atomic_inc_return(&device->draw_call_count) : - p_atomic_read(&device->draw_call_count); + uint32_t before_count = 0, after_count = 0; + uint32_t *counter = NULL; - if (((draw_count == intel_debug_bkp_before_draw_count && - emit_before_draw) || - (draw_count == intel_debug_bkp_after_draw_count && - !emit_before_draw))) { - struct anv_address wait_addr = - anv_state_pool_state_address(&device->dynamic_state_pool, - device->breakpoint); + if (INTEL_DEBUG(DEBUG_DRAW_BKP)) { + counter = &device->draw_call_count; + before_count = intel_debug_bkp_before_draw_count; + after_count = intel_debug_bkp_after_draw_count; + } else if (INTEL_DEBUG(DEBUG_DISPATCH_BKP)) { + counter = &device->dispatch_call_count; + before_count = intel_debug_bkp_before_dispatch_count; + after_count = intel_debug_bkp_after_dispatch_count; + } - anv_batch_emit(batch, GENX(MI_SEMAPHORE_WAIT), sem) { - sem.WaitMode = PollingMode; - sem.CompareOperation = COMPARE_SAD_EQUAL_SDD; - sem.SemaphoreDataDword = 0x1; - sem.SemaphoreAddress = wait_addr; - }; + if (counter) { + uint32_t count = emit_before_draw_or_dispatch ? + p_atomic_inc_return(counter) : + p_atomic_read(counter); + + bool should_emit = + (emit_before_draw_or_dispatch && count == before_count) || + (!emit_before_draw_or_dispatch && count == after_count); + + if (should_emit) { + struct anv_address wait_addr = + anv_state_pool_state_address(&device->dynamic_state_pool, + device->breakpoint); + + anv_batch_emit(batch, GENX(MI_SEMAPHORE_WAIT), sem) { + sem.WaitMode = PollingMode; + sem.CompareOperation = COMPARE_SAD_EQUAL_SDD; + sem.SemaphoreDataDword = 0x1; + sem.SemaphoreAddress = wait_addr; + } + } } } diff --git a/src/intel/vulkan/genX_cmd_compute.c b/src/intel/vulkan/genX_cmd_compute.c index 83479d8e245..833567e446b 100644 --- a/src/intel/vulkan/genX_cmd_compute.c +++ b/src/intel/vulkan/genX_cmd_compute.c @@ -698,11 +698,15 @@ void genX(CmdDispatchBase)( if (cmd_buffer->state.conditional_render_enabled) genX(cmd_emit_conditional_render_predicate)(cmd_buffer); + genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, true); + emit_cs_walker(cmd_buffer, pipeline, prog_data, dispatch, ANV_NULL_ADDRESS /* no indirect data */, groupCountX, groupCountY, groupCountZ, false); + genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false); + trace_intel_end_compute(&cmd_buffer->trace, groupCountX, groupCountY, groupCountZ, prog_data->base.source_hash); @@ -861,9 +865,13 @@ genX(cmd_buffer_dispatch_indirect)(struct anv_cmd_buffer *cmd_buffer, if (cmd_buffer->state.conditional_render_enabled) genX(cmd_emit_conditional_render_predicate)(cmd_buffer); + genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, true); + emit_cs_walker(cmd_buffer, pipeline, prog_data, dispatch, indirect_addr, 0, 0, 0, is_unaligned_size_x); + genX(emit_breakpoint)(&cmd_buffer->batch, cmd_buffer->device, false); + trace_intel_end_compute_indirect(&cmd_buffer->trace, anv_address_utrace(indirect_addr), prog_data->base.source_hash);