mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 09:38:07 +02:00
panvk/csf: add u_trace to panvk_cmd_buffer
There is one u_trace per subqueue to record trace events. When tracing is enabled, trace_begin_cmdbuf and trace_end_cmdbuf will emit trace events to u_trace and emit timestamp writes to the command streams. The trace events are buffered in u_trace and are not flushed for processing yet. Signed-off-by: Chia-I Wu <olvaffe@gmail.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32360>
This commit is contained in:
parent
06cc6e82cf
commit
39824d70b8
4 changed files with 131 additions and 1 deletions
|
|
@ -23,6 +23,7 @@
|
|||
#include "vk_command_buffer.h"
|
||||
|
||||
#include "util/list.h"
|
||||
#include "util/perf/u_trace.h"
|
||||
|
||||
#define MAX_VBS 16
|
||||
#define MAX_RTS 8
|
||||
|
|
@ -377,6 +378,10 @@ struct panvk_cmd_buffer {
|
|||
|
||||
uint32_t flush_id;
|
||||
|
||||
struct {
|
||||
struct u_trace uts[PANVK_SUBQUEUE_COUNT];
|
||||
} utrace;
|
||||
|
||||
struct {
|
||||
struct panvk_cmd_graphics_state gfx;
|
||||
struct panvk_cmd_compute_state compute;
|
||||
|
|
|
|||
|
|
@ -39,6 +39,8 @@
|
|||
#include "panvk_instance.h"
|
||||
#include "panvk_physical_device.h"
|
||||
#include "panvk_priv_bo.h"
|
||||
#include "panvk_tracepoints.h"
|
||||
#include "panvk_utrace.h"
|
||||
|
||||
#include "pan_desc.h"
|
||||
#include "pan_encoder.h"
|
||||
|
|
@ -178,6 +180,8 @@ finish_cs(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue)
|
|||
}
|
||||
}
|
||||
|
||||
trace_end_cmdbuf(&cmdbuf->utrace.uts[subqueue], cmdbuf, cmdbuf->flags);
|
||||
|
||||
cs_finish(&cmdbuf->state.cs[subqueue].builder);
|
||||
}
|
||||
|
||||
|
|
@ -724,6 +728,7 @@ panvk_reset_cmdbuf(struct vk_command_buffer *vk_cmdbuf,
|
|||
container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
|
||||
struct panvk_cmd_pool *pool =
|
||||
container_of(vk_cmdbuf->pool, struct panvk_cmd_pool, vk);
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
|
||||
vk_command_buffer_reset(&cmdbuf->vk);
|
||||
|
||||
|
|
@ -733,6 +738,12 @@ panvk_reset_cmdbuf(struct vk_command_buffer *vk_cmdbuf,
|
|||
list_splicetail(&cmdbuf->push_sets, &pool->push_sets);
|
||||
list_inithead(&cmdbuf->push_sets);
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(cmdbuf->utrace.uts); i++) {
|
||||
struct u_trace *ut = &cmdbuf->utrace.uts[i];
|
||||
u_trace_fini(ut);
|
||||
u_trace_init(ut, &dev->utrace.utctx);
|
||||
}
|
||||
|
||||
memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
|
||||
init_cs_builders(cmdbuf);
|
||||
}
|
||||
|
|
@ -746,6 +757,9 @@ panvk_destroy_cmdbuf(struct vk_command_buffer *vk_cmdbuf)
|
|||
container_of(vk_cmdbuf->pool, struct panvk_cmd_pool, vk);
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(cmdbuf->utrace.uts); i++)
|
||||
u_trace_fini(&cmdbuf->utrace.uts[i]);
|
||||
|
||||
panvk_pool_cleanup(&cmdbuf->cs_pool);
|
||||
panvk_pool_cleanup(&cmdbuf->desc_pool);
|
||||
panvk_pool_cleanup(&cmdbuf->tls_pool);
|
||||
|
|
@ -814,6 +828,9 @@ panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level,
|
|||
panvk_pool_init(&cmdbuf->tls_pool, device, &pool->tls_bo_pool,
|
||||
&tls_pool_props);
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(cmdbuf->utrace.uts); i++)
|
||||
u_trace_init(&cmdbuf->utrace.uts[i], &device->utrace.utctx);
|
||||
|
||||
init_cs_builders(cmdbuf);
|
||||
*cmdbuf_out = &cmdbuf->vk;
|
||||
return VK_SUCCESS;
|
||||
|
|
@ -843,6 +860,9 @@ panvk_per_arch(BeginCommandBuffer)(VkCommandBuffer commandBuffer,
|
|||
|
||||
panvk_per_arch(cmd_inherit_render_state)(cmdbuf, pBeginInfo);
|
||||
|
||||
for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++)
|
||||
trace_begin_cmdbuf(&cmdbuf->utrace.uts[i], cmdbuf);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
@ -901,6 +921,12 @@ panvk_per_arch(CmdExecuteCommands)(VkCommandBuffer commandBuffer,
|
|||
cs_move64_to(prim_b, addr, cs_root_chunk_gpu_addr(sec_b));
|
||||
cs_move32_to(prim_b, size, cs_root_chunk_size(sec_b));
|
||||
cs_call(prim_b, addr, size);
|
||||
|
||||
struct u_trace *prim_ut = &primary->utrace.uts[j];
|
||||
struct u_trace *sec_ut = &secondary->utrace.uts[j];
|
||||
u_trace_clone_append(u_trace_begin_iterator(sec_ut),
|
||||
u_trace_end_iterator(sec_ut), prim_ut, prim_b,
|
||||
panvk_per_arch(utrace_copy_buffer));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,14 +5,93 @@
|
|||
|
||||
#include "panvk_utrace.h"
|
||||
|
||||
#include "genxml/cs_builder.h"
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#include "panvk_device.h"
|
||||
#include "panvk_priv_bo.h"
|
||||
|
||||
static void
|
||||
cmd_write_timestamp(struct cs_builder *b, mali_ptr addr)
|
||||
{
|
||||
const struct cs_index addr_reg = cs_scratch_reg64(b, 0);
|
||||
/* abuse DEFERRED_SYNC */
|
||||
const struct cs_async_op async = cs_defer(
|
||||
SB_ALL_ITERS_MASK | SB_MASK(DEFERRED_FLUSH), SB_ID(DEFERRED_SYNC));
|
||||
|
||||
cs_move64_to(b, addr_reg, addr);
|
||||
cs_store_state(b, addr_reg, 0, MALI_CS_STATE_TIMESTAMP, async);
|
||||
}
|
||||
|
||||
static void
|
||||
cmd_copy_data(struct cs_builder *b, mali_ptr dst_addr, mali_ptr src_addr,
|
||||
uint32_t size)
|
||||
{
|
||||
assert((dst_addr | src_addr | size) % sizeof(uint32_t) == 0);
|
||||
|
||||
/* wait for timestamp writes */
|
||||
cs_wait_slot(b, SB_ID(DEFERRED_SYNC), false);
|
||||
|
||||
/* Depending on where this is called from, we could potentially use SR
|
||||
* registers or copy with a compute job.
|
||||
*/
|
||||
const struct cs_index dst_addr_reg = cs_scratch_reg64(b, 0);
|
||||
const struct cs_index src_addr_reg = cs_scratch_reg64(b, 2);
|
||||
const uint32_t temp_count = CS_REG_SCRATCH_COUNT - 4;
|
||||
|
||||
while (size) {
|
||||
cs_move64_to(b, dst_addr_reg, dst_addr);
|
||||
cs_move64_to(b, src_addr_reg, src_addr);
|
||||
|
||||
const uint32_t max_offset = 1 << 16;
|
||||
uint32_t copy_count = MIN2(size, max_offset) / sizeof(uint32_t);
|
||||
uint32_t offset = 0;
|
||||
while (copy_count) {
|
||||
const uint32_t count = MIN2(copy_count, temp_count);
|
||||
const struct cs_index reg = cs_scratch_reg_tuple(b, 4, count);
|
||||
|
||||
cs_load_to(b, reg, src_addr_reg, BITFIELD_MASK(count), offset);
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
cs_store(b, reg, dst_addr_reg, BITFIELD_MASK(count), offset);
|
||||
|
||||
copy_count -= count;
|
||||
offset += count * sizeof(uint32_t);
|
||||
}
|
||||
|
||||
dst_addr += offset;
|
||||
src_addr += offset;
|
||||
size -= offset;
|
||||
}
|
||||
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
}
|
||||
|
||||
static struct cs_builder *
|
||||
get_builder(struct panvk_cmd_buffer *cmdbuf, struct u_trace *ut)
|
||||
{
|
||||
const uint32_t subqueue = ut - cmdbuf->utrace.uts;
|
||||
assert(subqueue < PANVK_SUBQUEUE_COUNT);
|
||||
|
||||
return panvk_get_cs_builder(cmdbuf, subqueue);
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_utrace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
|
||||
uint64_t offset_B, uint32_t flags)
|
||||
{
|
||||
struct cs_builder *b = get_builder(cs, ut);
|
||||
const struct panvk_priv_bo *bo = timestamps;
|
||||
const mali_ptr addr = bo->addr.dev + offset_B;
|
||||
|
||||
cmd_write_timestamp(b, addr);
|
||||
}
|
||||
|
||||
void
|
||||
panvk_per_arch(utrace_context_init)(struct panvk_device *dev)
|
||||
{
|
||||
u_trace_context_init(&dev->utrace.utctx, NULL, sizeof(uint64_t), 0,
|
||||
panvk_utrace_create_buffer, panvk_utrace_delete_buffer,
|
||||
NULL, panvk_utrace_read_ts, NULL, NULL, NULL);
|
||||
panvk_utrace_record_ts, panvk_utrace_read_ts, NULL,
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -20,3 +99,18 @@ panvk_per_arch(utrace_context_fini)(struct panvk_device *dev)
|
|||
{
|
||||
u_trace_context_fini(&dev->utrace.utctx);
|
||||
}
|
||||
|
||||
void
|
||||
panvk_per_arch(utrace_copy_buffer)(struct u_trace_context *utctx,
|
||||
void *cmdstream, void *ts_from,
|
||||
uint64_t from_offset, void *ts_to,
|
||||
uint64_t to_offset, uint64_t size_B)
|
||||
{
|
||||
struct cs_builder *b = cmdstream;
|
||||
const struct panvk_priv_bo *src_bo = ts_from;
|
||||
const struct panvk_priv_bo *dst_bo = ts_to;
|
||||
const mali_ptr src_addr = src_bo->addr.dev + from_offset;
|
||||
const mali_ptr dst_addr = dst_bo->addr.dev + to_offset;
|
||||
|
||||
cmd_copy_data(b, dst_addr, src_addr, size_B);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,11 @@ uint64_t panvk_utrace_read_ts(struct u_trace_context *utctx, void *timestamps,
|
|||
void panvk_per_arch(utrace_context_init)(struct panvk_device *dev);
|
||||
void panvk_per_arch(utrace_context_fini)(struct panvk_device *dev);
|
||||
|
||||
void panvk_per_arch(utrace_copy_buffer)(struct u_trace_context *utctx,
|
||||
void *cmdstream, void *ts_from,
|
||||
uint64_t from_offset, void *ts_to,
|
||||
uint64_t to_offset, uint64_t size_B);
|
||||
|
||||
#else /* PAN_ARCH >= 10 */
|
||||
|
||||
static inline void
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue