u_trace: remove timestamp reference in allocations

We want to reduce the buffer allocations for other type of data than
timestamps.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29944>
This commit is contained in:
Lionel Landwerlin 2024-07-26 21:03:40 +03:00 committed by Lionel Landwerlin
parent 4347ccbe57
commit cb27b9541b
14 changed files with 205 additions and 197 deletions

View file

@ -31,8 +31,7 @@ tu_clone_trace_range(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
tu_cs_emit_wfi(cs);
tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
u_trace_clone_append(begin, end, &cmd->trace, cs,
tu_copy_timestamp_buffer);
u_trace_clone_append(begin, end, &cmd->trace, cs, tu_copy_buffer);
}
static void

View file

@ -1706,19 +1706,19 @@ tu_device_get_u_trace(struct tu_device *device)
}
static void*
tu_trace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size)
tu_trace_create_buffer(struct u_trace_context *utctx, uint64_t size_B)
{
struct tu_device *device =
container_of(utctx, struct tu_device, trace_context);
struct tu_bo *bo;
tu_bo_init_new(device, NULL, &bo, size, TU_BO_ALLOC_INTERNAL_RESOURCE, "trace");
tu_bo_init_new(device, NULL, &bo, size_B, TU_BO_ALLOC_INTERNAL_RESOURCE, "trace");
return bo;
}
static void
tu_trace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
tu_trace_destroy_buffer(struct u_trace_context *utctx, void *timestamps)
{
struct tu_device *device =
container_of(utctx, struct tu_device, trace_context);
@ -1730,18 +1730,16 @@ tu_trace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
template <chip CHIP>
static void
tu_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
unsigned idx, uint32_t)
uint64_t offset_B, uint32_t)
{
struct tu_bo *bo = (struct tu_bo *) timestamps;
struct tu_cs *ts_cs = (struct tu_cs *) cs;
unsigned ts_offset = idx * sizeof(uint64_t);
if (CHIP == A6XX) {
tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE, 4);
tu_cs_emit(ts_cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
CP_EVENT_WRITE_0_TIMESTAMP);
tu_cs_emit_qw(ts_cs, bo->iova + ts_offset);
tu_cs_emit_qw(ts_cs, bo->iova + offset_B);
tu_cs_emit(ts_cs, 0x00000000);
} else {
tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE7, 3);
@ -1750,13 +1748,13 @@ tu_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
.write_dst = EV_DST_RAM,
.write_enabled = true)
.value);
tu_cs_emit_qw(ts_cs, bo->iova + ts_offset);
tu_cs_emit_qw(ts_cs, bo->iova + offset_B);
}
}
static uint64_t
tu_trace_read_ts(struct u_trace_context *utctx,
void *timestamps, unsigned idx, void *flush_data)
void *timestamps, uint64_t offset_B, void *flush_data)
{
struct tu_device *device =
container_of(utctx, struct tu_device, trace_context);
@ -1765,7 +1763,7 @@ tu_trace_read_ts(struct u_trace_context *utctx,
(struct tu_u_trace_submission_data *) flush_data;
/* Only need to stall on results for the first entry: */
if (idx == 0) {
if (offset_B == 0) {
tu_device_wait_u_trace(device, submission_data->syncobj);
}
@ -1773,13 +1771,13 @@ tu_trace_read_ts(struct u_trace_context *utctx,
return U_TRACE_NO_TIMESTAMP;
}
uint64_t *ts = (uint64_t *) bo->map;
uint64_t *ts = (uint64_t *) ((char *)bo->map + offset_B);
/* Don't translate the no-timestamp marker: */
if (ts[idx] == U_TRACE_NO_TIMESTAMP)
if (*ts == U_TRACE_NO_TIMESTAMP)
return U_TRACE_NO_TIMESTAMP;
return tu_device_ticks_to_ns(device, ts[idx]);
return tu_device_ticks_to_ns(device, *ts);
}
static void
@ -1794,19 +1792,19 @@ tu_trace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
}
void
tu_copy_timestamp_buffer(struct u_trace_context *utctx, void *cmdstream,
void *ts_from, uint32_t from_offset,
void *ts_to, uint32_t to_offset,
uint32_t count)
tu_copy_buffer(struct u_trace_context *utctx, void *cmdstream,
void *ts_from, uint64_t from_offset_B,
void *ts_to, uint64_t to_offset_B,
uint64_t size_B)
{
struct tu_cs *cs = (struct tu_cs *) cmdstream;
struct tu_bo *bo_from = (struct tu_bo *) ts_from;
struct tu_bo *bo_to = (struct tu_bo *) ts_to;
tu_cs_emit_pkt7(cs, CP_MEMCPY, 5);
tu_cs_emit(cs, count * sizeof(uint64_t) / sizeof(uint32_t));
tu_cs_emit_qw(cs, bo_from->iova + from_offset * sizeof(uint64_t));
tu_cs_emit_qw(cs, bo_to->iova + to_offset * sizeof(uint64_t));
tu_cs_emit(cs, size_B / sizeof(uint32_t));
tu_cs_emit_qw(cs, bo_from->iova + from_offset_B);
tu_cs_emit_qw(cs, bo_to->iova + to_offset_B);
}
/* Special helpers instead of u_trace_begin_iterator()/u_trace_end_iterator()
@ -1872,7 +1870,7 @@ tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs,
u_trace_clone_append(tu_cmd_begin_iterator(cmdbuf),
tu_cmd_end_iterator(cmdbuf),
*trace_copy, *cs,
tu_copy_timestamp_buffer);
tu_copy_buffer);
tu_cs_emit_wfi(*cs);
@ -2522,8 +2520,9 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
device->submit_count = 0;
u_trace_context_init(&device->trace_context, device,
tu_trace_create_ts_buffer,
tu_trace_destroy_ts_buffer,
sizeof(uint64_t),
tu_trace_create_buffer,
tu_trace_destroy_buffer,
TU_CALLX(device, tu_trace_record_ts),
tu_trace_read_ts,
tu_trace_delete_flush_data);

View file

@ -509,10 +509,10 @@ void tu_setup_dynamic_framebuffer(struct tu_cmd_buffer *cmd_buffer,
const VkRenderingInfo *pRenderingInfo);
void
tu_copy_timestamp_buffer(struct u_trace_context *utctx, void *cmdstream,
void *ts_from, uint32_t from_offset,
void *ts_to, uint32_t to_offset,
uint32_t count);
tu_copy_buffer(struct u_trace_context *utctx, void *cmdstream,
void *ts_from, uint64_t from_offset_B,
void *ts_to, uint64_t to_offset_B,
uint64_t size_B);
VkResult

View file

@ -34,7 +34,7 @@ extern "C" {
#endif
static void *
u_trace_pipe_create_ts_buffer(struct u_trace_context *utctx, uint32_t size)
u_trace_pipe_create_buffer(struct u_trace_context *utctx, uint64_t size_B)
{
struct pipe_context *ctx = utctx->pctx;
@ -42,7 +42,7 @@ u_trace_pipe_create_ts_buffer(struct u_trace_context *utctx, uint32_t size)
.target = PIPE_BUFFER,
.format = PIPE_FORMAT_R8_UNORM,
.bind = PIPE_BIND_QUERY_BUFFER | PIPE_BIND_LINEAR,
.width0 = size,
.width0 = size_B,
.height0 = 1,
.depth0 = 1,
.array_size = 1,
@ -52,7 +52,7 @@ u_trace_pipe_create_ts_buffer(struct u_trace_context *utctx, uint32_t size)
}
static void
u_trace_pipe_delete_ts_buffer(struct u_trace_context *utctx, void *timestamps)
u_trace_pipe_delete_buffer(struct u_trace_context *utctx, void *timestamps)
{
struct pipe_resource *buffer = timestamps;
pipe_resource_reference(&buffer, NULL);
@ -61,13 +61,15 @@ u_trace_pipe_delete_ts_buffer(struct u_trace_context *utctx, void *timestamps)
void
u_trace_pipe_context_init(struct u_trace_context *utctx,
struct pipe_context *pctx,
uint32_t timestamp_size_B,
u_trace_record_ts record_timestamp,
u_trace_read_ts read_timestamp,
u_trace_delete_flush_data delete_flush_data)
{
u_trace_context_init(utctx, pctx,
u_trace_pipe_create_ts_buffer,
u_trace_pipe_delete_ts_buffer,
timestamp_size_B,
u_trace_pipe_create_buffer,
u_trace_pipe_delete_buffer,
record_timestamp,
read_timestamp,
delete_flush_data);

View file

@ -38,6 +38,7 @@ struct pipe_framebuffer_state;
void
u_trace_pipe_context_init(struct u_trace_context *utctx,
struct pipe_context *pctx,
uint32_t timestamp_size_B,
u_trace_record_ts record_timestamp,
u_trace_read_ts read_timestamp,
u_trace_delete_flush_data delete_flush_data);

View file

@ -499,26 +499,25 @@ fd_get_device_reset_status(struct pipe_context *pctx)
static void
fd_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
unsigned idx, uint32_t flags)
uint64_t offset_B, uint32_t flags)
{
struct fd_batch *batch = container_of(ut, struct fd_batch, trace);
struct fd_ringbuffer *ring = cs;
struct pipe_resource *buffer = timestamps;
if (ring->cur == batch->last_timestamp_cmd) {
uint64_t *ts = fd_bo_map(fd_resource(buffer)->bo);
ts[idx] = U_TRACE_NO_TIMESTAMP;
uint64_t *ts = fd_bo_map(fd_resource(buffer)->bo) + offset_B;
*ts = U_TRACE_NO_TIMESTAMP;
return;
}
unsigned ts_offset = idx * sizeof(uint64_t);
batch->ctx->record_timestamp(ring, fd_resource(buffer)->bo, ts_offset);
batch->ctx->record_timestamp(ring, fd_resource(buffer)->bo, offset_B);
batch->last_timestamp_cmd = ring->cur;
}
static uint64_t
fd_trace_read_ts(struct u_trace_context *utctx,
void *timestamps, unsigned idx, void *flush_data)
void *timestamps, uint64_t offset_B, void *flush_data)
{
struct fd_context *ctx =
container_of(utctx, struct fd_context, trace_context);
@ -526,7 +525,7 @@ fd_trace_read_ts(struct u_trace_context *utctx,
struct fd_bo *ts_bo = fd_resource(buffer)->bo;
/* Only need to stall on results for the first entry: */
if (idx == 0) {
if (offset_B == 0) {
/* Avoid triggering deferred submits from flushing, since that
* changes the behavior of what we are trying to measure:
*/
@ -537,13 +536,13 @@ fd_trace_read_ts(struct u_trace_context *utctx,
return U_TRACE_NO_TIMESTAMP;
}
uint64_t *ts = fd_bo_map(ts_bo);
uint64_t *ts = fd_bo_map(ts_bo) + offset_B;
/* Don't translate the no-timestamp marker: */
if (ts[idx] == U_TRACE_NO_TIMESTAMP)
if (*ts == U_TRACE_NO_TIMESTAMP)
return U_TRACE_NO_TIMESTAMP;
return ctx->ts_to_ns(ts[idx]);
return ctx->ts_to_ns(*ts);
}
static void
@ -718,6 +717,7 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
fd_gpu_tracepoint_config_variable();
u_trace_pipe_context_init(&ctx->trace_context, pctx,
sizeof(uint64_t),
fd_trace_record_ts,
fd_trace_read_ts,
fd_trace_delete_flush_data);

View file

@ -61,29 +61,27 @@ union iris_utrace_timestamp {
};
static void *
iris_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size)
iris_utrace_create_buffer(struct u_trace_context *utctx, uint64_t size_B)
{
struct iris_context *ice =
container_of(utctx, struct iris_context, ds.trace_context);
struct pipe_context *ctx = &ice->ctx;
struct iris_screen *screen = (struct iris_screen *)ctx->screen;
uint32_t iris_size =
(size / sizeof(uint64_t)) * sizeof(union iris_utrace_timestamp);
struct iris_bo *bo =
iris_bo_alloc(screen->bufmgr, "utrace timestamps",
iris_size, 16 /* alignment */,
size_B, 16 /* alignment */,
IRIS_MEMZONE_OTHER,
BO_ALLOC_COHERENT | BO_ALLOC_SMEM);
void *ptr = iris_bo_map(NULL, bo, MAP_READ | MAP_WRITE);
memset(ptr, 0, iris_size);
memset(ptr, 0, size_B);
return bo;
}
static void
iris_utrace_delete_ts_buffer(struct u_trace_context *utctx, void *timestamps)
iris_utrace_delete_buffer(struct u_trace_context *utctx, void *timestamps)
{
struct iris_bo *bo = timestamps;
iris_bo_unreference(bo);
@ -91,13 +89,12 @@ iris_utrace_delete_ts_buffer(struct u_trace_context *utctx, void *timestamps)
static void
iris_utrace_record_ts(struct u_trace *trace, void *cs,
void *timestamps, unsigned idx,
void *timestamps, uint64_t offset_B,
uint32_t flags)
{
struct iris_batch *batch = container_of(trace, struct iris_batch, trace);
struct iris_context *ice = batch->ice;
struct iris_bo *bo = timestamps;
uint32_t ts_offset = idx * sizeof(union iris_utrace_timestamp);
iris_use_pinned_bo(batch, bo, true, IRIS_DOMAIN_NONE);
@ -107,22 +104,22 @@ iris_utrace_record_ts(struct u_trace *trace, void *cs,
if (is_end_compute) {
assert(ice->utrace.last_compute_walker != NULL);
batch->screen->vtbl.rewrite_compute_walker_pc(
batch, ice->utrace.last_compute_walker, bo, ts_offset);
batch, ice->utrace.last_compute_walker, bo, offset_B);
ice->utrace.last_compute_walker = NULL;
} else if (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE) {
iris_emit_pipe_control_write(batch, "query: pipelined snapshot write",
PIPE_CONTROL_WRITE_TIMESTAMP,
bo, ts_offset, 0ull);
bo, offset_B, 0ull);
} else {
batch->screen->vtbl.store_register_mem64(batch, 0x2358,
bo, ts_offset,
bo, offset_B,
false);
}
}
static uint64_t
iris_utrace_read_ts(struct u_trace_context *utctx,
void *timestamps, unsigned idx, void *flush_data)
void *timestamps, uint64_t offset_B, void *flush_data)
{
struct iris_context *ice =
container_of(utctx, struct iris_context, ds.trace_context);
@ -130,17 +127,17 @@ iris_utrace_read_ts(struct u_trace_context *utctx,
struct iris_screen *screen = (struct iris_screen *)ctx->screen;
struct iris_bo *bo = timestamps;
if (idx == 0)
if (offset_B == 0)
iris_bo_wait_rendering(bo);
union iris_utrace_timestamp *ts = iris_bo_map(NULL, bo, MAP_READ);
union iris_utrace_timestamp *ts = iris_bo_map(NULL, bo, MAP_READ) + offset_B;
/* Don't translate the no-timestamp marker: */
if (ts[idx].timestamp == U_TRACE_NO_TIMESTAMP)
if (ts->timestamp == U_TRACE_NO_TIMESTAMP)
return U_TRACE_NO_TIMESTAMP;
/* Detect a 16bytes timestamp write */
if (ts[idx].compute_walker[2] != 0 || ts[idx].compute_walker[3] != 0) {
if (ts->compute_walker[2] != 0 || ts->compute_walker[3] != 0) {
/* The timestamp written by COMPUTE_WALKER::PostSync only as 32bits. We
* need to rebuild the full 64bits using the previous timestamp. We
* assume that utrace is reading the timestamp in order. Anyway
@ -149,14 +146,14 @@ iris_utrace_read_ts(struct u_trace_context *utctx,
*/
uint64_t timestamp =
(ice->utrace.last_full_timestamp & 0xffffffff00000000) |
(uint64_t) ts[idx].compute_walker[3];
(uint64_t) ts->compute_walker[3];
return intel_device_info_timebase_scale(screen->devinfo, timestamp);
}
ice->utrace.last_full_timestamp = ts[idx].timestamp;
ice->utrace.last_full_timestamp = ts->timestamp;
return intel_device_info_timebase_scale(screen->devinfo, ts[idx].timestamp);
return intel_device_info_timebase_scale(screen->devinfo, ts->timestamp);
}
static void
@ -190,8 +187,9 @@ void iris_utrace_init(struct iris_context *ice)
INTEL_DS_API_OPENGL);
u_trace_context_init(&ice->ds.trace_context, &ice->ctx,
iris_utrace_create_ts_buffer,
iris_utrace_delete_ts_buffer,
sizeof(union iris_utrace_timestamp),
iris_utrace_create_buffer,
iris_utrace_delete_buffer,
iris_utrace_record_ts,
iris_utrace_read_ts,
iris_utrace_delete_flush_data);

View file

@ -13,39 +13,37 @@
static void si_utrace_record_ts(struct u_trace *trace, void *cs, void *timestamps,
unsigned idx, uint32_t flags)
uint64_t offset_B, uint32_t flags)
{
struct si_context *ctx = container_of(trace, struct si_context, trace);
struct pipe_resource *buffer = timestamps;
struct si_resource *ts_bo = si_resource(buffer);
if (ctx->gfx_cs.current.buf == ctx->last_timestamp_cmd &&
if (ctx->gfx_cs.current.buf == ctx->last_timestamp_cmd &&
ctx->gfx_cs.current.cdw == ctx->last_timestamp_cmd_cdw) {
uint64_t *ts = si_buffer_map(ctx, ts_bo, PIPE_MAP_READ);
ts[idx] = U_TRACE_NO_TIMESTAMP;
uint64_t *ts = si_buffer_map(ctx, ts_bo, PIPE_MAP_READ) + offset_B;
*ts = U_TRACE_NO_TIMESTAMP;
return;
}
unsigned ts_offset = idx * sizeof(uint64_t);
si_emit_ts(ctx, ts_bo, ts_offset);
si_emit_ts(ctx, ts_bo, offset_B);
ctx->last_timestamp_cmd = ctx->gfx_cs.current.buf;
ctx->last_timestamp_cmd_cdw = ctx->gfx_cs.current.cdw;
}
static uint64_t si_utrace_read_ts(struct u_trace_context *utctx, void *timestamps,
unsigned idx, void *flush_data)
static uint64_t si_utrace_read_ts(struct u_trace_context *utctx, void *timestamps,
uint64_t offset_B, void *flush_data)
{
struct si_context *ctx = container_of(utctx, struct si_context, ds.trace_context);
struct pipe_resource *buffer = timestamps;
uint64_t *ts = si_buffer_map(ctx, si_resource(buffer), PIPE_MAP_READ);
uint64_t *ts = si_buffer_map(ctx, si_resource(buffer), PIPE_MAP_READ) + offset_B;
/* Don't translate the no-timestamp marker: */
if (ts[idx] == U_TRACE_NO_TIMESTAMP)
if (*ts == U_TRACE_NO_TIMESTAMP)
return U_TRACE_NO_TIMESTAMP;
return (1000000 * ts[idx]) / ctx->screen->info.clock_crystal_freq;
return (1000000 * *ts) / ctx->screen->info.clock_crystal_freq;
}
static void si_utrace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
@ -62,7 +60,8 @@ void si_utrace_init(struct si_context *sctx)
uint32_t gpu_id = _mesa_hash_string(buf);
si_ds_device_init(&sctx->ds, &sctx->screen->info, gpu_id, AMD_DS_API_OPENGL);
u_trace_pipe_context_init(&sctx->ds.trace_context, &sctx->b, si_utrace_record_ts,
u_trace_pipe_context_init(&sctx->ds.trace_context, &sctx->b,
sizeof(uint64_t), si_utrace_record_ts,
si_utrace_read_ts, si_utrace_delete_flush_data);
si_ds_device_init_queue(&sctx->ds, &sctx->ds_queue, "%s", "render");

View file

@ -6217,6 +6217,13 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
struct anv_cmd_buffer **cmd_buffers,
struct anv_utrace_submit **out_submit);
void
anv_device_utrace_emit_gfx_copy_buffer(struct u_trace_context *utctx,
void *cmdstream,
void *ts_from, uint64_t from_offset_B,
void *ts_to, uint64_t to_offset_B,
uint64_t size_B);
static bool
anv_has_cooperative_matrix(const struct anv_physical_device *device)
{

View file

@ -95,54 +95,59 @@ anv_utrace_delete_submit(struct u_trace_context *utctx, void *submit_data)
vk_free(&device->vk.alloc, submit);
}
static void
anv_device_utrace_emit_gfx_copy_ts_buffer(struct u_trace_context *utctx,
void *cmdstream,
void *ts_from, uint32_t from_offset,
void *ts_to, uint32_t to_offset,
uint32_t count)
void
anv_device_utrace_emit_gfx_copy_buffer(struct u_trace_context *utctx,
void *cmdstream,
void *ts_from, uint64_t from_offset_B,
void *ts_to, uint64_t to_offset_B,
uint64_t size_B)
{
assert(from_offset_B % sizeof(union anv_utrace_timestamp) == 0);
assert(to_offset_B % sizeof(union anv_utrace_timestamp) == 0);
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
struct anv_utrace_submit *submit = cmdstream;
struct anv_memcpy_state *memcpy_state = cmdstream;
struct anv_address from_addr = (struct anv_address) {
.bo = ts_from, .offset = from_offset * sizeof(union anv_utrace_timestamp) };
.bo = ts_from, .offset = from_offset_B };
struct anv_address to_addr = (struct anv_address) {
.bo = ts_to, .offset = to_offset * sizeof(union anv_utrace_timestamp) };
.bo = ts_to, .offset = to_offset_B };
anv_genX(device->info, emit_so_memcpy)(&submit->memcpy_state,
to_addr, from_addr,
count * sizeof(union anv_utrace_timestamp));
anv_genX(device->info, emit_so_memcpy)(memcpy_state,
to_addr, from_addr, size_B);
}
static void
anv_device_utrace_emit_cs_copy_ts_buffer(struct u_trace_context *utctx,
void *cmdstream,
void *ts_from, uint32_t from_offset,
void *ts_to, uint32_t to_offset,
uint32_t count)
anv_device_utrace_emit_cs_copy_buffer(struct u_trace_context *utctx,
void *cmdstream,
void *ts_from, uint64_t from_offset_B,
void *ts_to, uint64_t to_offset_B,
uint64_t size_B)
{
assert(from_offset_B % sizeof(union anv_utrace_timestamp) == 0);
assert(to_offset_B % sizeof(union anv_utrace_timestamp) == 0);
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
struct anv_utrace_submit *submit = cmdstream;
struct anv_simple_shader *simple_state = cmdstream;
struct anv_address from_addr = (struct anv_address) {
.bo = ts_from, .offset = from_offset * sizeof(union anv_utrace_timestamp) };
.bo = ts_from, .offset = from_offset_B };
struct anv_address to_addr = (struct anv_address) {
.bo = ts_to, .offset = to_offset * sizeof(union anv_utrace_timestamp) };
.bo = ts_to, .offset = to_offset_B };
struct anv_state push_data_state =
anv_genX(device->info, simple_shader_alloc_push)(
&submit->simple_state, sizeof(struct anv_memcpy_params));
simple_state, sizeof(struct anv_memcpy_params));
struct anv_memcpy_params *params = push_data_state.map;
*params = (struct anv_memcpy_params) {
.num_dwords = count * sizeof(union anv_utrace_timestamp) / 4,
.num_dwords = size_B / 4,
.src_addr = anv_address_physical(from_addr),
.dst_addr = anv_address_physical(to_addr),
};
anv_genX(device->info, emit_simple_shader_dispatch)(
&submit->simple_state, DIV_ROUND_UP(params->num_dwords, 4),
simple_state, DIV_ROUND_UP(params->num_dwords, 4),
push_data_state);
}
@ -213,8 +218,8 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace),
u_trace_end_iterator(&cmd_buffers[i]->trace),
&submit->ds.trace,
submit,
anv_device_utrace_emit_gfx_copy_ts_buffer);
&submit->memcpy_state,
anv_device_utrace_emit_gfx_copy_buffer);
}
}
anv_genX(device->info, emit_so_memcpy_fini)(&submit->memcpy_state);
@ -254,8 +259,8 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace),
u_trace_end_iterator(&cmd_buffers[i]->trace),
&submit->ds.trace,
submit,
anv_device_utrace_emit_cs_copy_ts_buffer);
&submit->simple_state,
anv_device_utrace_emit_cs_copy_buffer);
}
}
@ -296,18 +301,15 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
}
static void *
anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b)
anv_utrace_create_buffer(struct u_trace_context *utctx, uint64_t size_B)
{
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
uint32_t anv_ts_size_b = (size_b / sizeof(uint64_t)) *
sizeof(union anv_utrace_timestamp);
struct anv_bo *bo = NULL;
UNUSED VkResult result =
anv_bo_pool_alloc(&device->utrace_bo_pool,
align(anv_ts_size_b, 4096),
align(size_B, 4096),
&bo);
assert(result == VK_SUCCESS);
@ -322,7 +324,7 @@ anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b)
}
static void
anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
anv_utrace_destroy_buffer(struct u_trace_context *utctx, void *timestamps)
{
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
@ -333,7 +335,7 @@ anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
static void
anv_utrace_record_ts(struct u_trace *ut, void *cs,
void *timestamps, unsigned idx,
void *timestamps, uint64_t offset_B,
uint32_t flags)
{
struct anv_device *device =
@ -344,9 +346,10 @@ anv_utrace_record_ts(struct u_trace *ut, void *cs,
struct anv_batch *batch = cs != NULL ? cs : &cmd_buffer->batch;
struct anv_bo *bo = timestamps;
assert(offset_B % sizeof(union anv_utrace_timestamp) == 0);
struct anv_address ts_address = (struct anv_address) {
.bo = bo,
.offset = idx * sizeof(union anv_utrace_timestamp)
.offset = offset_B,
};
/* Is this a end of compute trace point? */
@ -384,7 +387,8 @@ anv_utrace_record_ts(struct u_trace *ut, void *cs,
static uint64_t
anv_utrace_read_ts(struct u_trace_context *utctx,
void *timestamps, unsigned idx, void *flush_data)
void *timestamps, uint64_t offset_B,
void *flush_data)
{
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
@ -393,7 +397,7 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
container_of(flush_data, struct anv_utrace_submit, ds);
/* Only need to stall on results for the first entry: */
if (idx == 0) {
if (offset_B == 0) {
MESA_TRACE_SCOPE("anv utrace wait timestamps");
UNUSED VkResult result =
vk_sync_wait(&device->vk,
@ -404,14 +408,16 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
assert(result == VK_SUCCESS);
}
union anv_utrace_timestamp *ts = (union anv_utrace_timestamp *)bo->map;
assert(offset_B % sizeof(union anv_utrace_timestamp) == 0);
union anv_utrace_timestamp *ts =
(union anv_utrace_timestamp *)(bo->map + offset_B);
/* Don't translate the no-timestamp marker: */
if (ts[idx].timestamp == U_TRACE_NO_TIMESTAMP)
if (ts->timestamp == U_TRACE_NO_TIMESTAMP)
return U_TRACE_NO_TIMESTAMP;
/* Detect a 16bytes timestamp write */
if (ts[idx].compute_walker[2] != 0 || ts[idx].compute_walker[3] != 0) {
if (ts->compute_walker[2] != 0 || ts->compute_walker[3] != 0) {
/* The timestamp written by COMPUTE_WALKER::PostSync only as 32bits. We
* need to rebuild the full 64bits using the previous timestamp. We
* assume that utrace is reading the timestamp in order. Anyway
@ -420,19 +426,21 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
*/
uint64_t timestamp =
(submit->last_full_timestamp & 0xffffffff00000000) |
(uint64_t) ts[idx].compute_walker[3];
(uint64_t) ts->compute_walker[3];
return intel_device_info_timebase_scale(device->info, timestamp);
}
submit->last_full_timestamp = ts[idx].timestamp;
submit->last_full_timestamp = ts->timestamp;
return intel_device_info_timebase_scale(device->info, ts[idx].timestamp);
return intel_device_info_timebase_scale(device->info, ts->timestamp);
}
void
anv_device_utrace_init(struct anv_device *device)
{
device->utrace_timestamp_size = sizeof(union anv_utrace_timestamp);
anv_bo_pool_init(&device->utrace_bo_pool, device, "utrace",
ANV_BO_ALLOC_MAPPED | ANV_BO_ALLOC_HOST_CACHED_COHERENT);
intel_ds_device_init(&device->ds, device->info, device->fd,
@ -440,8 +448,9 @@ anv_device_utrace_init(struct anv_device *device)
INTEL_DS_API_VULKAN);
u_trace_context_init(&device->ds.trace_context,
&device->ds,
anv_utrace_create_ts_buffer,
anv_utrace_destroy_ts_buffer,
device->utrace_timestamp_size,
anv_utrace_create_buffer,
anv_utrace_destroy_buffer,
anv_utrace_record_ts,
anv_utrace_read_ts,
anv_utrace_delete_submit);
@ -453,8 +462,6 @@ anv_device_utrace_init(struct anv_device *device)
intel_engines_class_to_string(queue->family->engine_class),
queue->vk.index_in_family);
}
device->utrace_timestamp_size = sizeof(union anv_utrace_timestamp);
}
void

View file

@ -3166,25 +3166,6 @@ genX(EndCommandBuffer)(
return status;
}
static void
cmd_buffer_emit_copy_ts_buffer(struct u_trace_context *utctx,
void *cmdstream,
void *ts_from, uint32_t from_offset,
void *ts_to, uint32_t to_offset,
uint32_t count)
{
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
struct anv_memcpy_state *memcpy_state = cmdstream;
struct anv_address from_addr = (struct anv_address) {
.bo = ts_from, .offset = from_offset * device->utrace_timestamp_size };
struct anv_address to_addr = (struct anv_address) {
.bo = ts_to, .offset = to_offset * device->utrace_timestamp_size };
genX(emit_so_memcpy)(memcpy_state, to_addr, from_addr,
count * device->utrace_timestamp_size);
}
void
genX(CmdExecuteCommands)(
VkCommandBuffer commandBuffer,
@ -3421,7 +3402,7 @@ genX(CmdExecuteCommands)(
u_trace_end_iterator(&secondary->trace),
&container->trace,
&memcpy_state,
cmd_buffer_emit_copy_ts_buffer);
anv_device_utrace_emit_gfx_copy_buffer);
}
genX(emit_so_memcpy_fini)(&memcpy_state);

View file

@ -71,20 +71,20 @@ anv_utrace_delete_flush_data(struct u_trace_context *utctx,
static void
anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context *utctx,
void *cmdstream,
void *ts_from, uint32_t from_offset,
void *ts_to, uint32_t to_offset,
uint32_t count)
void *ts_from, uint64_t from_offset_B,
void *ts_to, uint64_t to_offset_B,
uint64_t size_B)
{
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
struct anv_utrace_flush_copy *flush = cmdstream;
struct anv_address from_addr = (struct anv_address) {
.bo = ts_from, .offset = from_offset * sizeof(uint64_t) };
.bo = ts_from, .offset = from_offset_B };
struct anv_address to_addr = (struct anv_address) {
.bo = ts_to, .offset = to_offset * sizeof(uint64_t) };
.bo = ts_to, .offset = to_offset_B };
anv_genX(device->info, emit_so_memcpy)(&flush->memcpy_state,
to_addr, from_addr, count * sizeof(uint64_t));
to_addr, from_addr, size_B);
}
VkResult
@ -196,7 +196,7 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
}
static void *
anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b)
anv_utrace_create_buffer(struct u_trace_context *utctx, uint64_t size_B)
{
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
@ -204,7 +204,7 @@ anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b)
struct anv_bo *bo = NULL;
UNUSED VkResult result =
anv_bo_pool_alloc(&device->utrace_bo_pool,
align(size_b, 4096),
align(size_B, 4096),
&bo);
assert(result == VK_SUCCESS);
@ -212,7 +212,7 @@ anv_utrace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size_b)
}
static void
anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
anv_utrace_destroy_buffer(struct u_trace_context *utctx, void *timestamps)
{
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
@ -223,7 +223,7 @@ anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
static void
anv_utrace_record_ts(struct u_trace *ut, void *cs,
void *timestamps, unsigned idx,
void *timestamps, uint64_t offset_B,
uint32_t flags)
{
struct anv_cmd_buffer *cmd_buffer =
@ -238,13 +238,14 @@ anv_utrace_record_ts(struct u_trace *ut, void *cs,
device->physical->cmd_emit_timestamp(&cmd_buffer->batch, device,
(struct anv_address) {
.bo = bo,
.offset = idx * sizeof(uint64_t) },
.offset = offset_B, },
capture_type);
}
static uint64_t
anv_utrace_read_ts(struct u_trace_context *utctx,
void *timestamps, unsigned idx, void *flush_data)
void *timestamps, uint64_t offset_B,
void *flush_data)
{
struct anv_device *device =
container_of(utctx, struct anv_device, ds.trace_context);
@ -252,7 +253,7 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
struct anv_utrace_flush_copy *flush = flush_data;
/* Only need to stall on results for the first entry: */
if (idx == 0) {
if (offset_B == 0) {
UNUSED VkResult result =
vk_sync_wait(&device->vk,
flush->sync,
@ -262,13 +263,13 @@ anv_utrace_read_ts(struct u_trace_context *utctx,
assert(result == VK_SUCCESS);
}
uint64_t *ts = bo->map;
uint64_t *ts = bo->map + offset_B;
/* Don't translate the no-timestamp marker: */
if (ts[idx] == U_TRACE_NO_TIMESTAMP)
if (*ts == U_TRACE_NO_TIMESTAMP)
return U_TRACE_NO_TIMESTAMP;
return intel_device_info_timebase_scale(device->info, ts[idx]);
return intel_device_info_timebase_scale(device->info, *ts);
}
void
@ -280,8 +281,9 @@ anv_device_utrace_init(struct anv_device *device)
INTEL_DS_API_VULKAN);
u_trace_context_init(&device->ds.trace_context,
&device->ds,
anv_utrace_create_ts_buffer,
anv_utrace_destroy_ts_buffer,
sizeof(uint64_t),
anv_utrace_create_buffer,
anv_utrace_destroy_buffer,
anv_utrace_record_ts,
anv_utrace_read_ts,
anv_utrace_delete_flush_data);

View file

@ -284,7 +284,7 @@ free_chunk(void *ptr)
{
struct u_trace_chunk *chunk = ptr;
chunk->utctx->delete_timestamp_buffer(chunk->utctx, chunk->timestamps);
chunk->utctx->delete_buffer(chunk->utctx, chunk->timestamps);
/* Unref payloads attached to this chunk. */
struct u_trace_payload_buf **payload;
@ -347,7 +347,8 @@ get_chunk(struct u_trace *ut, size_t payload_size)
chunk->utctx = ut->utctx;
chunk->timestamps =
ut->utctx->create_timestamp_buffer(ut->utctx, TIMESTAMP_BUF_SIZE);
ut->utctx->create_buffer(ut->utctx,
chunk->utctx->timestamp_size_bytes * TIMESTAMP_BUF_SIZE);
chunk->last = true;
u_vector_init(&chunk->payloads, 4, sizeof(struct u_trace_payload_buf *));
if (payload_size > 0) {
@ -434,8 +435,9 @@ queue_init(struct u_trace_context *utctx)
void
u_trace_context_init(struct u_trace_context *utctx,
void *pctx,
u_trace_create_ts_buffer create_timestamp_buffer,
u_trace_delete_ts_buffer delete_timestamp_buffer,
uint32_t timestamp_size_bytes,
u_trace_create_buffer create_buffer,
u_trace_delete_buffer delete_buffer,
u_trace_record_ts record_timestamp,
u_trace_read_ts read_timestamp,
u_trace_delete_flush_data delete_flush_data)
@ -444,11 +446,12 @@ u_trace_context_init(struct u_trace_context *utctx,
utctx->enabled_traces = u_trace_state.enabled_traces;
utctx->pctx = pctx;
utctx->create_timestamp_buffer = create_timestamp_buffer;
utctx->delete_timestamp_buffer = delete_timestamp_buffer;
utctx->create_buffer = create_buffer;
utctx->delete_buffer = delete_buffer;
utctx->record_timestamp = record_timestamp;
utctx->read_timestamp = read_timestamp;
utctx->delete_flush_data = delete_flush_data;
utctx->timestamp_size_bytes = timestamp_size_bytes;
utctx->last_time_ns = 0;
utctx->first_time_ns = 0;
@ -591,7 +594,9 @@ process_chunk(void *job, void *gdata, int thread_index)
if (!evt->tp)
continue;
uint64_t ns = utctx->read_timestamp(utctx, chunk->timestamps, idx,
uint64_t ns = utctx->read_timestamp(utctx,
chunk->timestamps,
utctx->timestamp_size_bytes * idx,
chunk->flush_data);
int32_t delta;
@ -754,7 +759,7 @@ u_trace_clone_append(struct u_trace_iterator begin_it,
struct u_trace_iterator end_it,
struct u_trace *into,
void *cmdstream,
u_trace_copy_ts_buffer copy_ts_buffer)
u_trace_copy_buffer copy_buffer)
{
begin_it = sanitize_iterator(begin_it);
end_it = sanitize_iterator(end_it);
@ -770,9 +775,12 @@ u_trace_clone_append(struct u_trace_iterator begin_it,
if (from_chunk == end_it.chunk)
to_copy = MIN2(to_copy, end_it.event_idx - from_idx);
copy_ts_buffer(begin_it.ut->utctx, cmdstream, from_chunk->timestamps,
from_idx, to_chunk->timestamps, to_chunk->num_traces,
to_copy);
copy_buffer(begin_it.ut->utctx, cmdstream,
from_chunk->timestamps,
begin_it.ut->utctx->timestamp_size_bytes * from_idx,
to_chunk->timestamps,
begin_it.ut->utctx->timestamp_size_bytes * to_chunk->num_traces,
begin_it.ut->utctx->timestamp_size_bytes * to_copy);
memcpy(&to_chunk->traces[to_chunk->num_traces],
&from_chunk->traces[from_idx],
@ -853,7 +861,9 @@ u_trace_appendv(struct u_trace *ut,
}
/* record a timestamp for the trace: */
ut->utctx->record_timestamp(ut, cs, chunk->timestamps, tp_idx, tp->flags);
ut->utctx->record_timestamp(ut, cs, chunk->timestamps,
ut->utctx->timestamp_size_bytes * tp_idx,
tp->flags);
chunk->traces[tp_idx] = (struct u_trace_event) {
.tp = tp,

View file

@ -80,17 +80,17 @@ struct u_trace_printer;
#define U_TRACE_NO_TIMESTAMP ((uint64_t) 0)
/**
* Driver provided callback to create a timestamp buffer which will be
* read by u_trace_read_ts function.
* Driver provided callback to create a buffer which will be read by
* u_trace_read_ts function.
*/
typedef void *(*u_trace_create_ts_buffer)(struct u_trace_context *utctx,
uint32_t timestamps_count);
typedef void *(*u_trace_create_buffer)(struct u_trace_context *utctx,
uint64_t size_B);
/**
* Driver provided callback to delete a timestamp buffer.
* Driver provided callback to delete a buffer.
*/
typedef void (*u_trace_delete_ts_buffer)(struct u_trace_context *utctx,
void *timestamps);
typedef void (*u_trace_delete_buffer)(struct u_trace_context *utctx,
void *buffer);
/**
* Driver provided callback to emit commands into the soecified command
@ -104,7 +104,7 @@ typedef void (*u_trace_delete_ts_buffer)(struct u_trace_context *utctx,
typedef void (*u_trace_record_ts)(struct u_trace *ut,
void *cs,
void *timestamps,
unsigned idx,
uint64_t offset_B,
uint32_t flags);
/**
@ -127,7 +127,7 @@ typedef void (*u_trace_record_ts)(struct u_trace *ut,
*/
typedef uint64_t (*u_trace_read_ts)(struct u_trace_context *utctx,
void *timestamps,
unsigned idx,
uint64_t offset_B,
void *flush_data);
/**
@ -168,12 +168,14 @@ struct u_trace_context {
void *pctx;
u_trace_create_ts_buffer create_timestamp_buffer;
u_trace_delete_ts_buffer delete_timestamp_buffer;
u_trace_create_buffer create_buffer;
u_trace_delete_buffer delete_buffer;
u_trace_record_ts record_timestamp;
u_trace_read_ts read_timestamp;
u_trace_delete_flush_data delete_flush_data;
uint64_t timestamp_size_bytes;
FILE *out;
struct u_trace_printer *out_printer;
@ -225,8 +227,9 @@ struct u_trace {
void u_trace_context_init(struct u_trace_context *utctx,
void *pctx,
u_trace_create_ts_buffer create_timestamp_buffer,
u_trace_delete_ts_buffer delete_timestamp_buffer,
uint32_t timestamp_size_bytes,
u_trace_create_buffer create_buffer,
u_trace_delete_buffer delete_buffer,
u_trace_record_ts record_timestamp,
u_trace_read_ts read_timestamp,
u_trace_delete_flush_data delete_flush_data);
@ -261,13 +264,13 @@ struct u_trace_iterator u_trace_end_iterator(struct u_trace *ut);
bool u_trace_iterator_equal(struct u_trace_iterator a,
struct u_trace_iterator b);
typedef void (*u_trace_copy_ts_buffer)(struct u_trace_context *utctx,
void *cmdstream,
void *ts_from,
uint32_t from_offset,
void *ts_to,
uint32_t to_offset,
uint32_t count);
typedef void (*u_trace_copy_buffer)(struct u_trace_context *utctx,
void *cmdstream,
void *ts_from,
uint64_t from_offset,
void *ts_to,
uint64_t to_offset,
uint64_t size_B);
/**
* Clones tracepoints range into target u_trace.
@ -284,7 +287,7 @@ void u_trace_clone_append(struct u_trace_iterator begin_it,
struct u_trace_iterator end_it,
struct u_trace *into,
void *cmdstream,
u_trace_copy_ts_buffer copy_ts_buffer);
u_trace_copy_buffer copy_buffer);
void u_trace_disable_event_range(struct u_trace_iterator begin_it,
struct u_trace_iterator end_it);