diff --git a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h index ed1465b66e9..52c92a73e85 100644 --- a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h +++ b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h @@ -1,5 +1,6 @@ /* * Copyright © 2024 Collabora Ltd. + * Copyright © 2025 Arm Ltd. * SPDX-License-Identifier: MIT */ @@ -21,6 +22,7 @@ #include "panvk_queue.h" #include "vk_command_buffer.h" +#include "vk_synchronization.h" #include "util/list.h" #include "util/perf/u_trace.h" @@ -85,6 +87,12 @@ get_fbd_size(bool has_zs_ext, uint32_t rt_count) (TILER_OOM_CTX_FIELD_OFFSET(fbds) + \ (PANVK_IR_##_pass##_PASS * sizeof(uint64_t))) +struct panvk_cs_timestamp_query { + struct cs_single_link_list_node node; + uint64_t reports; + uint64_t avail; +}; + struct panvk_cs_occlusion_query { struct cs_single_link_list_node node; uint64_t syncobj; @@ -102,6 +110,9 @@ struct panvk_cs_subqueue_context { uint64_t tiler_heap; uint64_t geom_buf; struct cs_single_link_list oq_chain; + /* Timestamp queries that need to happen after the current rp. */ + struct cs_single_link_list ts_chain; + struct cs_single_link_list ts_done_chain; } render; struct { uint32_t counter; @@ -385,6 +396,7 @@ struct panvk_cmd_buffer { struct panvk_push_constant_state push_constants; struct panvk_cs_state cs[PANVK_SUBQUEUE_COUNT]; struct panvk_tls_state tls; + bool contains_timestamp_queries; } state; }; @@ -508,6 +520,45 @@ panvk_get_subqueue_stages(enum panvk_subqueue_id subqueue) } } +static uint32_t +vk_stage_to_subqueue_mask(VkPipelineStageFlagBits2 vk_stage) +{ + assert(util_bitcount64(vk_stage) == 1); + /* Handle special stages. */ + if (vk_stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT) + return BITFIELD_BIT(PANVK_SUBQUEUE_VERTEX_TILER) | + BITFIELD_BIT(PANVK_SUBQUEUE_COMPUTE); + if (vk_stage == VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT) + return BITFIELD_BIT(PANVK_SUBQUEUE_FRAGMENT) | + BITFIELD_BIT(PANVK_SUBQUEUE_COMPUTE); + if (vk_stage == VK_PIPELINE_STAGE_2_HOST_BIT) + /* We need to map host to something, so map it to compute to not interfer + * with drawing. */ + return BITFIELD_BIT(PANVK_SUBQUEUE_COMPUTE); + + /* Handle other compound stages by expanding. */ + vk_stage = vk_expand_pipeline_stage_flags2(vk_stage); + + VkPipelineStageFlags2 flags[PANVK_SUBQUEUE_COUNT]; + for (uint32_t sq = 0; sq < PANVK_SUBQUEUE_COUNT; ++sq) + flags[sq] = panvk_get_subqueue_stages(sq); + + uint32_t result = 0; + + if (flags[PANVK_SUBQUEUE_VERTEX_TILER] & vk_stage) + result |= BITFIELD_BIT(PANVK_SUBQUEUE_VERTEX_TILER); + + if (flags[PANVK_SUBQUEUE_FRAGMENT] & vk_stage) + result |= BITFIELD_BIT(PANVK_SUBQUEUE_FRAGMENT); + + if (flags[PANVK_SUBQUEUE_COMPUTE] & vk_stage) + result |= BITFIELD_BIT(PANVK_SUBQUEUE_COMPUTE); + + /* All stages should map to at least one subqueue. */ + assert(util_bitcount(result) > 0); + return result; +} + void panvk_per_arch(emit_barrier)(struct panvk_cmd_buffer *cmdbuf, struct panvk_cs_deps deps); #endif /* PANVK_CMD_BUFFER_H */ diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c index e064e65b005..6a620f16dde 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_buffer.c @@ -36,6 +36,7 @@ #include "panvk_cmd_desc_state.h" #include "panvk_cmd_pool.h" #include "panvk_cmd_push_constant.h" +#include "panvk_cmd_ts.h" #include "panvk_device.h" #include "panvk_entrypoints.h" #include "panvk_instance.h" @@ -187,11 +188,69 @@ finish_cs(struct panvk_cmd_buffer *cmdbuf, uint32_t subqueue) cs_finish(&cmdbuf->state.cs[subqueue].builder); } +static void +finish_queries(struct panvk_cmd_buffer *cmdbuf) +{ + enum panvk_subqueue_id signal_queue = PANVK_QUERY_TS_INFO_SUBQUEUE; + + struct cs_builder *b = panvk_get_cs_builder(cmdbuf, signal_queue); + struct cs_index next = cs_scratch_reg64(b, 6); + struct cs_index syncobj = cs_scratch_reg64(b, 2); + struct cs_index signal_val = cs_scratch_reg32(b, 4); + + cs_load64_to( + b, next, cs_subqueue_ctx_reg(b), + offsetof(struct panvk_cs_subqueue_context, render.ts_done_chain.head)); + + /* If there are queries to signal, wait for other subqueues before + * signalling the syncobjs. */ + struct panvk_cs_deps deps = {0}; + deps.dst[signal_queue].wait_subqueue_mask = + BITFIELD_MASK(PANVK_SUBQUEUE_COUNT) & ~BITFIELD_BIT(signal_queue); + deps.dst[signal_queue].conditional = true; + deps.dst[signal_queue].cond_value = next; + deps.dst[signal_queue].cond = MALI_CS_CONDITION_NEQUAL; + /* Wait for DEFERRED_SYNC in addition to LS so that we don't overtake the + * deferred SYNC_ADDs added after frag jobs. */ + u_foreach_bit(i, deps.dst[signal_queue].wait_subqueue_mask) + deps.src[i].wait_sb_mask = SB_MASK(LS) | SB_MASK(DEFERRED_SYNC); + panvk_per_arch(emit_barrier)(cmdbuf, deps); + + cs_single_link_list_for_each_from(b, next, struct panvk_cs_timestamp_query, + node) { + cs_load64_to(b, syncobj, next, + offsetof(struct panvk_cs_timestamp_query, avail)); + + cs_move32_to(b, signal_val, 1); + cs_sync32_set(b, true, MALI_CS_SYNC_SCOPE_CSG, signal_val, syncobj, + cs_defer(SB_IMM_MASK, SB_ID(DEFERRED_SYNC))); + } + + cs_move64_to(b, next, 0); + cs_store64( + b, next, cs_subqueue_ctx_reg(b), + offsetof(struct panvk_cs_subqueue_context, render.ts_done_chain.head)); + cs_flush_stores(b); +} + VKAPI_ATTR VkResult VKAPI_CALL panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + /* Finishing queries requires a barrier. We don't want to do that more + * often than necessary. At the end of a primary is usually enough. + * Additionally, simultaneous use secondaries also need to flush if they + * contain timestamp query writes to avoid adding the same node more than + * once into panvk_cs_subqueue_context::render.ts_chain. */ + const bool sim_use_sec_with_ts = + cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY && + (cmdbuf->flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT) && + cmdbuf->state.contains_timestamp_queries; + if (cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY || + unlikely(sim_use_sec_with_ts)) + finish_queries(cmdbuf); + emit_tls(cmdbuf); flush_sync_points(cmdbuf); diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index 2f3943216c3..1a65da39fb0 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -22,6 +22,7 @@ #include "panvk_cmd_draw.h" #include "panvk_cmd_fb_preload.h" #include "panvk_cmd_meta.h" +#include "panvk_cmd_ts.h" #include "panvk_device.h" #include "panvk_entrypoints.h" #include "panvk_image.h" @@ -3287,6 +3288,60 @@ panvk_per_arch(cmd_flush_draws)(struct panvk_cmd_buffer *cmdbuf) } } +static void +handle_deferred_queries(struct panvk_cmd_buffer *cmdbuf) +{ + struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); + + for (uint32_t sq = 0; sq < PANVK_SUBQUEUE_COUNT; ++sq) { + struct cs_builder *b = panvk_get_cs_builder(cmdbuf, sq); + struct cs_index current = cs_scratch_reg64(b, 0); + struct cs_index reports = cs_scratch_reg64(b, 2); + struct cs_index next = cs_scratch_reg64(b, 4); + int offset = sizeof(uint64_t) * sq; + + cs_load64_to( + b, current, cs_subqueue_ctx_reg(b), + offsetof(struct panvk_cs_subqueue_context, render.ts_chain.head)); + + cs_while(b, MALI_CS_CONDITION_NEQUAL, current) { + + cs_load64_to(b, reports, current, + offsetof(struct panvk_cs_timestamp_query, reports)); + + cs_if(b, MALI_CS_CONDITION_NEQUAL, reports) + cs_store_state(b, reports, offset, MALI_CS_STATE_TIMESTAMP, + cs_defer(dev->csf.sb.all_iters_mask, SB_ID(LS))); + + cs_load64_to(b, next, current, + offsetof(struct panvk_cs_timestamp_query, node.next)); + + if (sq == PANVK_QUERY_TS_INFO_SUBQUEUE) { + /* WAR on panvk_cs_timestamp_query::next. */ + cs_flush_loads(b); + struct cs_index tmp = cs_scratch_reg64(b, 6); + cs_move64_to(b, tmp, 0); + cs_store64(b, tmp, current, + offsetof(struct panvk_cs_timestamp_query, node.next)); + + cs_single_link_list_add_tail( + b, cs_subqueue_ctx_reg(b), + offsetof(struct panvk_cs_subqueue_context, render.ts_done_chain), + current, offsetof(struct panvk_cs_timestamp_query, node), + cs_scratch_reg_tuple(b, 10, 4)); + } + + cs_add64(b, current, next, 0); + } + + cs_move64_to(b, current, 0); + cs_store64( + b, current, cs_subqueue_ctx_reg(b), + offsetof(struct panvk_cs_subqueue_context, render.ts_chain.head)); + cs_flush_stores(b); + } +} + VKAPI_ATTR void VKAPI_CALL panvk_per_arch(CmdEndRendering)(VkCommandBuffer commandBuffer) { @@ -3322,6 +3377,8 @@ panvk_per_arch(CmdEndRendering)(VkCommandBuffer commandBuffer) if (cmdbuf->state.gfx.render.fbds.gpu || inherits_render_ctx(cmdbuf)) { flush_tiling(cmdbuf); issue_fragment_jobs(cmdbuf); + + handle_deferred_queries(cmdbuf); } } else if (!inherits_render_ctx(cmdbuf)) { /* If we're suspending the render pass and we didn't inherit the render diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_query.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_query.c index b384a87ac27..24d8e31c27b 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_query.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_query.c @@ -1,22 +1,29 @@ /* * Copyright © 2024 Collabora Ltd. + * Copyright © 2025 Arm Ltd. * SPDX-License-Identifier: MIT */ #include #include "util/os_time.h" +#include "cs_builder.h" + +#include "vk_enum_defines.h" #include "vk_log.h" #include "vk_synchronization.h" #include "genxml/gen_macros.h" #include "panvk_buffer.h" +#include "panvk_cmd_alloc.h" #include "panvk_cmd_buffer.h" #include "panvk_cmd_meta.h" +#include "panvk_cmd_ts.h" #include "panvk_device.h" #include "panvk_entrypoints.h" #include "panvk_macros.h" #include "panvk_query_pool.h" +#include "panvk_queue.h" /* At the API level, a query consists of a status and a result. Both are * uninitialized initially. There are these query operations: @@ -308,6 +315,221 @@ panvk_copy_occlusion_query_results(struct panvk_cmd_buffer *cmd, } } +static void +panvk_cmd_reset_timestamp_queries(struct panvk_cmd_buffer *cmd, + struct panvk_query_pool *pool, + uint32_t first_query, uint32_t query_count) +{ + for (int sq = 0; sq < PANVK_SUBQUEUE_COUNT; ++sq) { + struct cs_builder *b = panvk_get_cs_builder(cmd, sq); + + struct cs_index zeros = cs_scratch_reg_tuple(b, 0, 4); + struct cs_index zero64 = cs_scratch_reg64(b, 0); + struct cs_index addr = cs_scratch_reg64(b, 4); + struct cs_index counter = cs_scratch_reg32(b, 6); + + int offset = sq * sizeof(struct panvk_query_report); + + for (uint32_t i = 0; i < zeros.size; i += 2) + cs_move64_to(b, cs_scratch_reg64(b, i), 0); + + cs_move32_to(b, counter, query_count); + cs_move64_to(b, addr, panvk_query_report_dev_addr(pool, first_query)); + + /* Wait for timestamp writes. */ + cs_wait_slot(b, SB_ID(LS)); + + cs_while(b, MALI_CS_CONDITION_GREATER, counter) { + /* If the info subqueue is the last one, it can reset the info field in + * one store because of the memory layout of the query report values. */ + STATIC_ASSERT(PANVK_QUERY_TS_INFO_SUBQUEUE == + PANVK_SUBQUEUE_COUNT - 1); + if (sq == PANVK_QUERY_TS_INFO_SUBQUEUE) + cs_store(b, zeros, addr, BITFIELD_MASK(zeros.size), offset); + else + cs_store64(b, zero64, addr, offset); + + cs_add64(b, addr, addr, pool->query_stride); + cs_add32(b, counter, counter, -1); + } + + cs_flush_stores(b); + } + + /* Reset availability from the info subqueue because we also use that queue + * to signal the availability later. */ + struct cs_builder *b = + panvk_get_cs_builder(cmd, PANVK_QUERY_TS_INFO_SUBQUEUE); + struct cs_index addr = cs_scratch_reg64(b, 16); + struct cs_index zero_regs = cs_scratch_reg_tuple(b, 0, 16); + cs_move64_to(b, addr, panvk_query_available_dev_addr(pool, first_query)); + reset_queries_batch(b, addr, zero_regs, query_count); + cs_flush_stores(b); +} + +static void +panvk_cs_write_ts_info(struct panvk_cmd_buffer *cmd, + VkPipelineStageFlags2 stage, + struct panvk_query_pool *pool, uint32_t first_query) +{ + const uint32_t n_views = + MAX2(1, util_bitcount(cmd->state.gfx.render.view_mask)); + + /* Store the timestamp info needed during copy. */ + struct cs_builder *b = + panvk_get_cs_builder(cmd, PANVK_QUERY_TS_INFO_SUBQUEUE); + struct cs_index addr = cs_scratch_reg64(b, 0); + struct cs_index info = cs_scratch_reg64(b, 2); + int offset = PANVK_SUBQUEUE_COUNT * sizeof(struct panvk_query_report); + + uint64_t ts_info = panvk_timestamp_info_encode( + stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT ? PANVK_QUERY_TS_OP_MIN + : PANVK_QUERY_TS_OP_MAX, + vk_stage_to_subqueue_mask(stage)); + + cs_move64_to(b, info, ts_info); + for (uint32_t query = first_query; query < first_query + n_views; ++query) { + cs_move64_to(b, addr, panvk_query_report_dev_addr(pool, query)); + cs_store64(b, info, addr, offset); + } +} + +static void +panvk_add_finished_query(struct panvk_cmd_buffer *cmd, + VkPipelineStageFlags2 stage, + struct panvk_query_pool *pool, uint32_t query) +{ + struct cs_builder *b = + panvk_get_cs_builder(cmd, PANVK_QUERY_TS_INFO_SUBQUEUE); + + struct pan_ptr new_ts_node = panvk_cmd_alloc_dev_mem( + cmd, desc, sizeof(struct panvk_cs_timestamp_query), 8); + + *((struct panvk_cs_timestamp_query *)new_ts_node.cpu) = + (struct panvk_cs_timestamp_query){ + .node = {.next = 0}, + .reports = panvk_query_report_dev_addr(pool, query), + .avail = panvk_query_available_dev_addr(pool, query), + }; + + struct cs_index new_node_ptr = cs_scratch_reg64(b, 0); + cs_move64_to(b, new_node_ptr, new_ts_node.gpu); + + cs_single_link_list_add_tail( + b, cs_subqueue_ctx_reg(b), + offsetof(struct panvk_cs_subqueue_context, render.ts_done_chain), + new_node_ptr, offsetof(struct panvk_cs_timestamp_query, node), + cs_scratch_reg_tuple(b, 10, 4)); +} + +static void +panvk_cs_defer_timestamp(struct panvk_cmd_buffer *cmd, + VkPipelineStageFlags2 stage, + struct panvk_query_pool *pool, uint32_t query) +{ + /* Deferring top of pipe doesn't make sense. */ + assert(VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT != stage); + + const uint32_t write_sq_mask = vk_stage_to_subqueue_mask(stage); + const uint32_t n_views = + MAX2(1, util_bitcount(cmd->state.gfx.render.view_mask)); + + /* Each subqueue in write_sq_mask must write a timestamp value. + * Additionally, the info subqueue needs to move the deferred timestamp + * into the list of timestamps to be signalled later - Regardless of + * whether a timestamp is needed from that subqueue. + */ + for (uint32_t sq = 0; sq < PANVK_SUBQUEUE_COUNT; ++sq) { + if (((write_sq_mask | BITFIELD_BIT(PANVK_QUERY_TS_INFO_SUBQUEUE)) & + BITFIELD_BIT(sq)) == 0) + continue; + + bool write_report = + (sq != PANVK_QUERY_TS_INFO_SUBQUEUE) || + (write_sq_mask & BITFIELD_BIT(PANVK_QUERY_TS_INFO_SUBQUEUE)) != 0; + + struct cs_builder *b = panvk_get_cs_builder(cmd, sq); + + for (uint32_t q = query; q < query + n_views; ++q) { + struct pan_ptr new_ts_node = panvk_cmd_alloc_dev_mem( + cmd, desc, sizeof(struct panvk_cs_timestamp_query), 8); + *((struct panvk_cs_timestamp_query *)new_ts_node.cpu) = + (struct panvk_cs_timestamp_query){ + .node = {.next = 0}, + .reports = + write_report ? panvk_query_report_dev_addr(pool, q) : 0, + .avail = panvk_query_available_dev_addr(pool, q), + }; + + struct cs_index new_node_ptr = cs_scratch_reg64(b, 0); + cs_move64_to(b, new_node_ptr, new_ts_node.gpu); + cs_single_link_list_add_tail( + b, cs_subqueue_ctx_reg(b), + offsetof(struct panvk_cs_subqueue_context, render.ts_chain), + new_node_ptr, offsetof(struct panvk_cs_timestamp_query, node), + cs_scratch_reg_tuple(b, 10, 4)); + } + } +} + +static void +panvk_cs_write_timestamp(struct panvk_cmd_buffer *cmd, + VkPipelineStageFlags2 stage, + struct panvk_query_pool *pool, uint32_t query) +{ + struct panvk_device *dev = to_panvk_device(cmd->vk.base.device); + + const uint32_t write_sq_mask = vk_stage_to_subqueue_mask(stage); + const uint32_t n_views = + MAX2(1, util_bitcount(cmd->state.gfx.render.view_mask)); + + for (uint32_t sq = 0; sq < PANVK_SUBQUEUE_COUNT; ++sq) { + if ((write_sq_mask & BITFIELD_BIT(sq)) == 0) + continue; + + struct cs_builder *b = panvk_get_cs_builder(cmd, sq); + struct cs_index addr = cs_scratch_reg64(b, 0); + int offset = sq * sizeof(struct panvk_query_report); + + for (uint32_t q = query; q < query + n_views; ++q) { + /* Wait for prev. timestamp so they increase monotonically. */ + cs_wait_slot(b, SB_ID(LS)); + cs_move64_to(b, addr, panvk_query_report_dev_addr(pool, q)); + cs_store_state(b, addr, offset, MALI_CS_STATE_TIMESTAMP, + cs_defer(dev->csf.sb.all_iters_mask, SB_ID(LS))); + } + } + + /* Store the queries syncobj for signalling at the end of this cmdbuf. */ + for (uint32_t q = query; q < query + n_views; ++q) + panvk_add_finished_query(cmd, stage, pool, q); +} + +static void +panvk_cmd_write_timestamp_query(struct panvk_cmd_buffer *cmd, + VkPipelineStageFlags2 stage, + struct panvk_query_pool *pool, uint32_t query) +{ + /* Store the actual timestamp values per subqueue. */ + const uint32_t write_sq_mask = vk_stage_to_subqueue_mask(stage); + + /* The timestamp has to be written after RUN_FRAGMENT if we are inside + * a renderpass at the moment and cover the F subqueue. + */ + const bool in_rp = cmd->state.gfx.render.tiler || inherits_render_ctx(cmd); + const bool defer = + in_rp && (write_sq_mask & BITFIELD_BIT(PANVK_SUBQUEUE_FRAGMENT)); + + if (defer) + panvk_cs_defer_timestamp(cmd, stage, pool, query); + else + panvk_cs_write_timestamp(cmd, stage, pool, query); + + panvk_cs_write_ts_info(cmd, stage, pool, query); + + cmd->state.contains_timestamp_queries = true; +} + VKAPI_ATTR void VKAPI_CALL panvk_per_arch(CmdResetQueryPool)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, @@ -324,6 +546,10 @@ panvk_per_arch(CmdResetQueryPool)(VkCommandBuffer commandBuffer, panvk_cmd_reset_occlusion_queries(cmd, pool, firstQuery, queryCount); break; } + case VK_QUERY_TYPE_TIMESTAMP: { + panvk_cmd_reset_timestamp_queries(cmd, pool, firstQuery, queryCount); + break; + } default: unreachable("Unsupported query type"); } @@ -377,10 +603,10 @@ panvk_per_arch(CmdWriteTimestamp2)(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkQueryPool queryPool, uint32_t query) { - UNUSED VK_FROM_HANDLE(panvk_cmd_buffer, cmd, commandBuffer); - UNUSED VK_FROM_HANDLE(panvk_query_pool, pool, queryPool); + VK_FROM_HANDLE(panvk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(panvk_query_pool, pool, queryPool); - panvk_stub(); + panvk_cmd_write_timestamp_query(cmd, stage, pool, query); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/panfrost/vulkan/panvk_cmd_ts.h b/src/panfrost/vulkan/panvk_cmd_ts.h new file mode 100644 index 00000000000..3b29b6b117e --- /dev/null +++ b/src/panfrost/vulkan/panvk_cmd_ts.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2025 Arm Ltd. + * SPDX-License-Identifier: MIT + */ + +#ifndef PANVK_CMD_TS_H +#define PANVK_CMD_TS_H + +#ifndef PAN_ARCH +#error "PAN_ARCH must be defined" +#endif + +#include + +#if PAN_ARCH >= 10 +/* The timstamp info subqueue performs extra tasks like writing the info field + * and handling deferred timestamps. To minimize impact on drawing, choose the + * compute subqueue. */ +#define PANVK_QUERY_TS_INFO_SUBQUEUE (PANVK_SUBQUEUE_COMPUTE) + +enum panvk_query_ts_op { + PANVK_QUERY_TS_OP_MAX = 0, + PANVK_QUERY_TS_OP_MIN = 1, +}; + +static uint64_t +panvk_timestamp_info_encode(enum panvk_query_ts_op op, uint64_t sq_mask) +{ + return (((uint64_t)sq_mask) << 32) | (op); +} + +static enum panvk_query_ts_op +panvk_timestamp_info_get_op(uint64_t encoded) +{ + return ((uint32_t)encoded); +} + +static uint32_t +panvk_timestamp_info_get_sq_mask(uint64_t encoded) +{ + return ((uint32_t)(encoded >> 32)); +} +#endif +#endif diff --git a/src/panfrost/vulkan/panvk_vX_query_pool.c b/src/panfrost/vulkan/panvk_vX_query_pool.c index 88b0a2d83ed..a73596d7d2f 100644 --- a/src/panfrost/vulkan/panvk_vX_query_pool.c +++ b/src/panfrost/vulkan/panvk_vX_query_pool.c @@ -1,8 +1,10 @@ /* * Copyright © 2024 Collabora Ltd. + * Copyright © 2025 Arm Ltd. * SPDX-License-Identifier: MIT */ +#include "util/macros.h" #include "vk_log.h" #include "pan_props.h" @@ -10,6 +12,12 @@ #include "panvk_entrypoints.h" #include "panvk_query_pool.h" +#include "panvk_cmd_ts.h" + +#if PAN_ARCH >= 10 +#include "panvk_queue.h" +#endif + #define PANVK_QUERY_TIMEOUT 2000000000ull VKAPI_ATTR VkResult VKAPI_CALL @@ -41,6 +49,13 @@ panvk_per_arch(CreateQueryPool)(VkDevice _device, #endif break; } +#if PAN_ARCH >= 10 + case VK_QUERY_TYPE_TIMESTAMP: { + /* One value per subqueue + 1 value for metadata. */ + reports_per_query = PANVK_SUBQUEUE_COUNT + 1; + break; + } +#endif default: unreachable("Unsupported query type"); }