mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-30 18:40:13 +01:00
intel/ds: rework RT tracepoints
That way we can identify single dispatch within each step. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Michael Cheng <michael.cheng@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33684>
This commit is contained in:
parent
31c5c386d1
commit
e4f31b8744
7 changed files with 70 additions and 111 deletions
|
|
@ -73,12 +73,10 @@ enum intel_ds_tracepoint_flags {
|
|||
*/
|
||||
INTEL_DS_TRACEPOINT_FLAG_END_CS = BITFIELD_BIT(1),
|
||||
/**
|
||||
* Whether this tracepoint's timestamp is recorded on the compute pipeline
|
||||
* or from top of pipe if there was no dispatch (useful for acceleration
|
||||
* structure builds where the runtime might choose to not emit anything for
|
||||
* a number of reasons).
|
||||
* Whether this tracepoint doesn't generate a timestamp but instead repeats
|
||||
* the last one.
|
||||
*/
|
||||
INTEL_DS_TRACEPOINT_FLAG_END_CS_OR_NOOP = BITFIELD_BIT(2),
|
||||
INTEL_DS_TRACEPOINT_FLAG_REPEAST_LAST = BITFIELD_BIT(2),
|
||||
};
|
||||
|
||||
/* Convert internal driver PIPE_CONTROL stall bits to intel_ds_stall_flag. */
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ def define_tracepoints(args):
|
|||
|
||||
def begin_end_tp(name, tp_args=[], tp_struct=None, tp_print=None,
|
||||
tp_default_enabled=True, end_pipelined=True,
|
||||
compute=False, maybe_compute=False,
|
||||
compute=False, repeat_last=False,
|
||||
need_cs_param=False):
|
||||
global intel_default_tps
|
||||
if tp_default_enabled:
|
||||
|
|
@ -69,8 +69,8 @@ def define_tracepoints(args):
|
|||
if end_pipelined:
|
||||
if compute:
|
||||
tp_flags.append('INTEL_DS_TRACEPOINT_FLAG_END_CS')
|
||||
elif maybe_compute:
|
||||
tp_flags.append('INTEL_DS_TRACEPOINT_FLAG_END_CS_OR_NOOP')
|
||||
elif repeat_last:
|
||||
tp_flags.append('INTEL_DS_TRACEPOINT_FLAG_REPEAST_LAST')
|
||||
else:
|
||||
tp_flags.append('INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE')
|
||||
Tracepoint('intel_end_{0}'.format(name),
|
||||
|
|
@ -229,15 +229,14 @@ def define_tracepoints(args):
|
|||
tp_args=[Arg(type='uint32_t', var='count', c_format='%u'),],
|
||||
need_cs_param=True)
|
||||
|
||||
rt_args = [Arg(type='uint32_t', var='cs_hash', c_format='%u')]
|
||||
begin_end_tp('as_build', tp_args=rt_args)
|
||||
begin_end_tp('as_build_leaves', tp_args=rt_args, maybe_compute=True)
|
||||
begin_end_tp('as_morton_generate', tp_args=rt_args, maybe_compute=True)
|
||||
begin_end_tp('as_morton_sort', tp_args=rt_args, maybe_compute=True)
|
||||
begin_end_tp('as_lbvh_build_internal', tp_args=rt_args, maybe_compute=True)
|
||||
begin_end_tp('as_ploc_build_internal', tp_args=rt_args, maybe_compute=True)
|
||||
begin_end_tp('as_encode', tp_args=rt_args, maybe_compute=True)
|
||||
begin_end_tp('as_copy', tp_args=rt_args, maybe_compute=True)
|
||||
begin_end_tp('as_build')
|
||||
begin_end_tp('as_build_leaves', repeat_last=True)
|
||||
begin_end_tp('as_morton_generate', repeat_last=True)
|
||||
begin_end_tp('as_morton_sort', repeat_last=True)
|
||||
begin_end_tp('as_lbvh_build_internal', repeat_last=True)
|
||||
begin_end_tp('as_ploc_build_internal', repeat_last=True)
|
||||
begin_end_tp('as_encode', repeat_last=True)
|
||||
begin_end_tp('as_copy', repeat_last=True)
|
||||
|
||||
begin_end_tp('rays',
|
||||
tp_args=[Arg(type='uint32_t', var='group_x', c_format='%u'),
|
||||
|
|
|
|||
|
|
@ -1010,6 +1010,7 @@ enum anv_timestamp_capture_type {
|
|||
ANV_TIMESTAMP_CAPTURE_AT_CS_STALL,
|
||||
ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER,
|
||||
ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH,
|
||||
ANV_TIMESTAMP_REPEAT_LAST,
|
||||
};
|
||||
|
||||
struct anv_physical_device {
|
||||
|
|
@ -6532,10 +6533,13 @@ struct anv_utrace_submit {
|
|||
struct anv_state_stream general_state_stream;
|
||||
|
||||
/* Last fully read 64bit timestamp (used to rebuild the upper bits of 32bit
|
||||
* timestamps)
|
||||
* timestamps), the timestamp is not scaled to the CPU time domain.
|
||||
*/
|
||||
uint64_t last_full_timestamp;
|
||||
|
||||
/* Last timestamp, not scaled to the CPU time domain */
|
||||
uint64_t last_timestamp;
|
||||
|
||||
/* Memcpy state tracking (only used for timestamp copies on render engine) */
|
||||
struct anv_memcpy_state memcpy_state;
|
||||
|
||||
|
|
|
|||
|
|
@ -351,11 +351,10 @@ anv_utrace_record_ts(struct u_trace *ut, void *cs,
|
|||
const bool is_end_compute =
|
||||
cs == NULL &&
|
||||
(flags & INTEL_DS_TRACEPOINT_FLAG_END_CS);
|
||||
const bool is_end_compute_or_noop =
|
||||
cs == NULL &&
|
||||
(flags & INTEL_DS_TRACEPOINT_FLAG_END_CS_OR_NOOP);
|
||||
enum anv_timestamp_capture_type capture_type;
|
||||
if (is_end_compute) {
|
||||
if (flags & INTEL_DS_TRACEPOINT_FLAG_REPEAST_LAST) {
|
||||
capture_type = ANV_TIMESTAMP_REPEAT_LAST;
|
||||
} else if (is_end_compute) {
|
||||
assert(device->info->verx10 < 125 ||
|
||||
!is_end_compute ||
|
||||
cmd_buffer->state.last_indirect_dispatch != NULL ||
|
||||
|
|
@ -366,15 +365,6 @@ anv_utrace_record_ts(struct u_trace *ut, void *cs,
|
|||
ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH :
|
||||
ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER) :
|
||||
ANV_TIMESTAMP_CAPTURE_END_OF_PIPE;
|
||||
} else if (is_end_compute_or_noop) {
|
||||
capture_type =
|
||||
device->info->verx10 >= 125 ?
|
||||
(cmd_buffer->state.last_indirect_dispatch != NULL ?
|
||||
ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH :
|
||||
(cmd_buffer->state.last_compute_walker != NULL ?
|
||||
ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER :
|
||||
ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE)) :
|
||||
ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE;
|
||||
} else {
|
||||
capture_type = (flags & INTEL_DS_TRACEPOINT_FLAG_END_CS) ?
|
||||
ANV_TIMESTAMP_CAPTURE_END_OF_PIPE :
|
||||
|
|
@ -418,6 +408,11 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
|
|||
assert(result == VK_SUCCESS);
|
||||
}
|
||||
|
||||
if (flags & INTEL_DS_TRACEPOINT_FLAG_REPEAST_LAST) {
|
||||
return intel_device_info_timebase_scale(device->info,
|
||||
submit->last_timestamp);
|
||||
}
|
||||
|
||||
assert(offset_B % sizeof(union anv_utrace_timestamp) == 0);
|
||||
union anv_utrace_timestamp *ts =
|
||||
(union anv_utrace_timestamp *)(bo->map + offset_B);
|
||||
|
|
@ -426,31 +421,32 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
|
|||
if (ts->timestamp == U_TRACE_NO_TIMESTAMP)
|
||||
return U_TRACE_NO_TIMESTAMP;
|
||||
|
||||
uint64_t timestamp;
|
||||
|
||||
/* Detect a 16/32 bytes timestamp write */
|
||||
if (ts->gfx20_postsync_data[1] != 0 ||
|
||||
ts->gfx20_postsync_data[2] != 0 ||
|
||||
ts->gfx20_postsync_data[3] != 0) {
|
||||
if (device->info->ver >= 20) {
|
||||
return intel_device_info_timebase_scale(device->info,
|
||||
ts->gfx20_postsync_data[3]);
|
||||
timestamp = ts->gfx20_postsync_data[3];
|
||||
} else {
|
||||
/* The timestamp written by COMPUTE_WALKER::PostSync only as 32bits.
|
||||
* We need to rebuild the full 64bits using the previous timestamp.
|
||||
* We assume that utrace is reading the timestamp in order. Anyway
|
||||
* timestamp rollover on 32bits in a few minutes so in most cases
|
||||
* that should be correct.
|
||||
*/
|
||||
timestamp =
|
||||
(submit->last_full_timestamp & 0xffffffff00000000) |
|
||||
(uint64_t) ts->gfx125_postsync_data[3];
|
||||
}
|
||||
|
||||
/* The timestamp written by COMPUTE_WALKER::PostSync only as 32bits. We
|
||||
* need to rebuild the full 64bits using the previous timestamp. We
|
||||
* assume that utrace is reading the timestamp in order. Anyway
|
||||
* timestamp rollover on 32bits in a few minutes so in most cases that
|
||||
* should be correct.
|
||||
*/
|
||||
uint64_t timestamp =
|
||||
(submit->last_full_timestamp & 0xffffffff00000000) |
|
||||
(uint64_t) ts->gfx125_postsync_data[3];
|
||||
|
||||
return intel_device_info_timebase_scale(device->info, timestamp);
|
||||
} else {
|
||||
submit->last_full_timestamp = timestamp = ts->timestamp;
|
||||
}
|
||||
|
||||
submit->last_full_timestamp = ts->timestamp;
|
||||
submit->last_timestamp = timestamp;
|
||||
|
||||
return intel_device_info_timebase_scale(device->info, ts->timestamp);
|
||||
return intel_device_info_timebase_scale(device->info, timestamp);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -72,41 +72,29 @@ static void
|
|||
end_debug_marker(VkCommandBuffer commandBuffer)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute;
|
||||
struct anv_compute_pipeline *pipeline =
|
||||
anv_pipeline_to_compute(comp_state->base.pipeline);
|
||||
const struct brw_cs_prog_data *cs_prog_data =
|
||||
brw_cs_prog_data_const(pipeline->cs->prog_data);
|
||||
|
||||
cmd_buffer->state.rt.debug_marker_count--;
|
||||
switch (cmd_buffer->state.rt.debug_markers[cmd_buffer->state.rt.debug_marker_count]) {
|
||||
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_TOP:
|
||||
trace_intel_end_as_build(&cmd_buffer->trace,
|
||||
cs_prog_data->base.source_hash);
|
||||
trace_intel_end_as_build(&cmd_buffer->trace);
|
||||
break;
|
||||
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_BUILD_LEAVES:
|
||||
trace_intel_end_as_build_leaves(&cmd_buffer->trace,
|
||||
cs_prog_data->base.source_hash);
|
||||
trace_intel_end_as_build_leaves(&cmd_buffer->trace);
|
||||
break;
|
||||
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_GENERATE:
|
||||
trace_intel_end_as_morton_generate(&cmd_buffer->trace,
|
||||
cs_prog_data->base.source_hash);
|
||||
trace_intel_end_as_morton_generate(&cmd_buffer->trace);
|
||||
break;
|
||||
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_SORT:
|
||||
trace_intel_end_as_morton_sort(&cmd_buffer->trace,
|
||||
cs_prog_data->base.source_hash);
|
||||
trace_intel_end_as_morton_sort(&cmd_buffer->trace);
|
||||
break;
|
||||
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_LBVH_BUILD_INTERNAL:
|
||||
trace_intel_end_as_lbvh_build_internal(&cmd_buffer->trace,
|
||||
cs_prog_data->base.source_hash);
|
||||
trace_intel_end_as_lbvh_build_internal(&cmd_buffer->trace);
|
||||
break;
|
||||
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_PLOC_BUILD_INTERNAL:
|
||||
trace_intel_end_as_ploc_build_internal(&cmd_buffer->trace,
|
||||
cs_prog_data->base.source_hash);
|
||||
trace_intel_end_as_ploc_build_internal(&cmd_buffer->trace);
|
||||
break;
|
||||
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE:
|
||||
trace_intel_end_as_encode(&cmd_buffer->trace,
|
||||
cs_prog_data->base.source_hash);
|
||||
trace_intel_end_as_encode(&cmd_buffer->trace);
|
||||
break;
|
||||
default:
|
||||
unreachable("Invalid build step");
|
||||
|
|
@ -725,12 +713,6 @@ genX(CmdCopyAccelerationStructureKHR)(
|
|||
return;
|
||||
}
|
||||
|
||||
ANV_FROM_HANDLE(anv_pipeline, anv_pipeline, pipeline);
|
||||
struct anv_compute_pipeline *compute_pipeline =
|
||||
anv_pipeline_to_compute(anv_pipeline);
|
||||
const struct brw_cs_prog_data *cs_prog_data =
|
||||
brw_cs_prog_data_const(compute_pipeline->cs->prog_data);
|
||||
|
||||
struct anv_cmd_saved_state saved;
|
||||
anv_cmd_buffer_save_state(cmd_buffer,
|
||||
ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
|
||||
|
|
@ -773,8 +755,7 @@ genX(CmdCopyAccelerationStructureKHR)(
|
|||
|
||||
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
|
||||
|
||||
trace_intel_end_as_copy(&cmd_buffer->trace,
|
||||
cs_prog_data->base.source_hash);
|
||||
trace_intel_end_as_copy(&cmd_buffer->trace);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -799,12 +780,6 @@ genX(CmdCopyAccelerationStructureToMemoryKHR)(
|
|||
return;
|
||||
}
|
||||
|
||||
ANV_FROM_HANDLE(anv_pipeline, anv_pipeline, pipeline);
|
||||
struct anv_compute_pipeline *compute_pipeline =
|
||||
anv_pipeline_to_compute(anv_pipeline);
|
||||
const struct brw_cs_prog_data *cs_prog_data =
|
||||
brw_cs_prog_data_const(compute_pipeline->cs->prog_data);
|
||||
|
||||
struct anv_cmd_saved_state saved;
|
||||
anv_cmd_buffer_save_state(cmd_buffer,
|
||||
ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
|
||||
|
|
@ -851,8 +826,7 @@ genX(CmdCopyAccelerationStructureToMemoryKHR)(
|
|||
|
||||
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
|
||||
|
||||
trace_intel_end_as_copy(&cmd_buffer->trace,
|
||||
cs_prog_data->base.source_hash);
|
||||
trace_intel_end_as_copy(&cmd_buffer->trace);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -876,12 +850,6 @@ genX(CmdCopyMemoryToAccelerationStructureKHR)(
|
|||
return;
|
||||
}
|
||||
|
||||
ANV_FROM_HANDLE(anv_pipeline, anv_pipeline, pipeline);
|
||||
struct anv_compute_pipeline *compute_pipeline =
|
||||
anv_pipeline_to_compute(anv_pipeline);
|
||||
const struct brw_cs_prog_data *cs_prog_data =
|
||||
brw_cs_prog_data_const(compute_pipeline->cs->prog_data);
|
||||
|
||||
struct anv_cmd_saved_state saved;
|
||||
anv_cmd_buffer_save_state(cmd_buffer,
|
||||
ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
|
||||
|
|
@ -911,8 +879,7 @@ genX(CmdCopyMemoryToAccelerationStructureKHR)(
|
|||
vk_common_CmdDispatch(commandBuffer, 512, 1, 1);
|
||||
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
|
||||
|
||||
trace_intel_end_as_copy(&cmd_buffer->trace,
|
||||
cs_prog_data->base.source_hash);
|
||||
trace_intel_end_as_copy(&cmd_buffer->trace);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -6269,6 +6269,10 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
|
|||
}
|
||||
#endif
|
||||
|
||||
case ANV_TIMESTAMP_REPEAT_LAST:
|
||||
/* Noop */
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("invalid");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -626,8 +626,7 @@ void genX(CmdDispatchBase)(
|
|||
prog_data->local_size[0] * prog_data->local_size[1] *
|
||||
prog_data->local_size[2]);
|
||||
|
||||
if (cmd_buffer->state.rt.debug_marker_count == 0)
|
||||
trace_intel_begin_compute(&cmd_buffer->trace);
|
||||
trace_intel_begin_compute(&cmd_buffer->trace);
|
||||
|
||||
cmd_buffer_flush_compute_state(cmd_buffer);
|
||||
|
||||
|
|
@ -639,11 +638,9 @@ void genX(CmdDispatchBase)(
|
|||
groupCountX, groupCountY, groupCountZ,
|
||||
false);
|
||||
|
||||
if (cmd_buffer->state.rt.debug_marker_count == 0) {
|
||||
trace_intel_end_compute(&cmd_buffer->trace,
|
||||
groupCountX, groupCountY, groupCountZ,
|
||||
prog_data->base.source_hash);
|
||||
}
|
||||
trace_intel_end_compute(&cmd_buffer->trace,
|
||||
groupCountX, groupCountY, groupCountZ,
|
||||
prog_data->base.source_hash);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -686,8 +683,7 @@ emit_unaligned_cs_walker(
|
|||
prog_data->local_size[0] * prog_data->local_size[1] *
|
||||
prog_data->local_size[2]);
|
||||
|
||||
if (cmd_buffer->state.rt.debug_marker_count == 0)
|
||||
trace_intel_begin_compute(&cmd_buffer->trace);
|
||||
trace_intel_begin_compute(&cmd_buffer->trace);
|
||||
|
||||
assert(!prog_data->uses_num_work_groups);
|
||||
genX(cmd_buffer_flush_compute_state)(cmd_buffer);
|
||||
|
|
@ -700,11 +696,9 @@ emit_unaligned_cs_walker(
|
|||
dispatch, groupCountX, groupCountY, groupCountZ);
|
||||
#endif
|
||||
|
||||
if (cmd_buffer->state.rt.debug_marker_count == 0) {
|
||||
trace_intel_end_compute(&cmd_buffer->trace,
|
||||
groupCountX, groupCountY, groupCountZ,
|
||||
prog_data->base.source_hash);
|
||||
}
|
||||
trace_intel_end_compute(&cmd_buffer->trace,
|
||||
groupCountX, groupCountY, groupCountZ,
|
||||
prog_data->base.source_hash);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -795,8 +789,7 @@ genX(cmd_buffer_dispatch_indirect)(struct anv_cmd_buffer *cmd_buffer,
|
|||
"compute indirect",
|
||||
0);
|
||||
|
||||
if (cmd_buffer->state.rt.debug_marker_count == 0)
|
||||
trace_intel_begin_compute_indirect(&cmd_buffer->trace);
|
||||
trace_intel_begin_compute_indirect(&cmd_buffer->trace);
|
||||
|
||||
cmd_buffer_flush_compute_state(cmd_buffer);
|
||||
|
||||
|
|
@ -806,11 +799,9 @@ genX(cmd_buffer_dispatch_indirect)(struct anv_cmd_buffer *cmd_buffer,
|
|||
emit_cs_walker(cmd_buffer, pipeline, prog_data, dispatch, indirect_addr, 0,
|
||||
0, 0, is_unaligned_size_x);
|
||||
|
||||
if (cmd_buffer->state.rt.debug_marker_count == 0) {
|
||||
trace_intel_end_compute_indirect(&cmd_buffer->trace,
|
||||
anv_address_utrace(indirect_addr),
|
||||
prog_data->base.source_hash);
|
||||
}
|
||||
trace_intel_end_compute_indirect(&cmd_buffer->trace,
|
||||
anv_address_utrace(indirect_addr),
|
||||
prog_data->base.source_hash);
|
||||
}
|
||||
|
||||
void genX(CmdDispatchIndirect)(
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue