diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c index 2b3f13a3095..5518026d474 100644 --- a/src/amd/vulkan/layers/radv_sqtt_layer.c +++ b/src/amd/vulkan/layers/radv_sqtt_layer.c @@ -509,31 +509,17 @@ radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer) } void -radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, - enum rgp_barrier_reason reason) -{ - struct rgp_sqtt_marker_barrier_start marker = {}; - struct radeon_cmdbuf *cs = cmd_buffer->cs; - - if (likely(!cmd_buffer->device->thread_trace_bo)) - return; - - marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START; - marker.cb_id = 0; - marker.dword02 = reason; - - radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); -} - -void -radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer) +radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer) { struct rgp_sqtt_marker_barrier_end marker = {}; struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (likely(!cmd_buffer->device->thread_trace_bo)) + if (likely(!cmd_buffer->device->thread_trace_bo) || + !cmd_buffer->state.pending_sqtt_barrier_end) return; + cmd_buffer->state.pending_sqtt_barrier_end = false; + marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END; marker.cb_id = 0; @@ -546,6 +532,31 @@ radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->state.num_layout_transitions = 0; } +void +radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, + enum rgp_barrier_reason reason) +{ + struct rgp_sqtt_marker_barrier_start marker = {}; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + + if (likely(!cmd_buffer->device->thread_trace_bo)) + return; + + radv_describe_barrier_end_delayed(cmd_buffer); + + marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START; + marker.cb_id = 0; + marker.dword02 = reason; + + radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); +} + +void +radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer) +{ + cmd_buffer->state.pending_sqtt_barrier_end = true; +} + void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct radv_barrier_data *barrier) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 80239563b3d..243810fb732 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -5056,6 +5056,8 @@ radv_emit_draw_packets(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys *ws = cmd_buffer->device->ws; struct radeon_cmdbuf *cs = cmd_buffer->cs; + radv_describe_draw(cmd_buffer); + if (info->indirect) { uint64_t va = radv_buffer_get_va(info->indirect->bo); uint64_t count_va = 0; @@ -5286,8 +5288,6 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer, return; } - radv_describe_draw(cmd_buffer); - /* Use optimal packet order based on whether we need to sync the * pipeline. */ @@ -5523,6 +5523,8 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radv_userdata_info *loc; + radv_describe_dispatch(cmd_buffer, 8, 8, 8); + loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); @@ -5663,8 +5665,6 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty = pipeline && pipeline != cmd_buffer->state.emitted_compute_pipeline; - radv_describe_dispatch(cmd_buffer, 8, 8, 8); - if (cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 4f954d52168..4de20b0b465 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1369,6 +1369,7 @@ struct radv_cmd_state { uint32_t current_event_type; uint32_t num_events; uint32_t num_layout_transitions; + bool pending_sqtt_barrier_end; }; struct radv_cmd_pool { @@ -2551,6 +2552,7 @@ void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer); void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason); void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer); +void radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer); void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, const struct radv_barrier_data *barrier); diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index cd6cf23241a..d840457bb93 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -1424,8 +1424,10 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) RADV_CMD_FLAG_START_PIPELINE_STATS | RADV_CMD_FLAG_STOP_PIPELINE_STATS); - if (!cmd_buffer->state.flush_bits) + if (!cmd_buffer->state.flush_bits) { + radv_describe_barrier_end_delayed(cmd_buffer); return; + } radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128); @@ -1452,6 +1454,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) * should be finished at this point. */ cmd_buffer->pending_reset_query = false; + + radv_describe_barrier_end_delayed(cmd_buffer); } /* sets the CP predication state using a boolean stored at va */