mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 04:50:11 +01:00
panvk: Add timestamp copy
Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Acked-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34932>
This commit is contained in:
parent
92c4dfe6ea
commit
369b3826fd
3 changed files with 217 additions and 0 deletions
|
|
@ -1,11 +1,97 @@
|
|||
/*
|
||||
* Copyright 2024 Collabora Ltd.
|
||||
* Copyright 2025 Arm Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include "compiler/libcl/libcl.h"
|
||||
#include "compiler/libcl/libcl_vk.h"
|
||||
#include "genxml/gen_macros.h"
|
||||
|
||||
#include "vulkan/panvk_cmd_ts.h"
|
||||
|
||||
#if (PAN_ARCH >= 10)
|
||||
static inline uint64_t
|
||||
compute_timestamp_query_result(global uint64_t *report_addr,
|
||||
uint32_t report_count, uint32_t op,
|
||||
uint32_t sq_mask, bool *available)
|
||||
{
|
||||
uint64_t result = 0;
|
||||
|
||||
/* Compute min/max and keep track of which streams had non-zero results. */
|
||||
switch (op) {
|
||||
case PANVK_QUERY_TS_OP_MIN: {
|
||||
result = ULONG_MAX;
|
||||
for (uint32_t i = 0; i < report_count - 1; ++i) {
|
||||
uint64_t r = report_addr[i];
|
||||
if (r != 0 && (sq_mask & (1 << i))) {
|
||||
result = min(result, report_addr[i]);
|
||||
sq_mask &= ~(1 << i);
|
||||
}
|
||||
}
|
||||
if (result == ULONG_MAX)
|
||||
result = 0;
|
||||
break;
|
||||
}
|
||||
case PANVK_QUERY_TS_OP_MAX: {
|
||||
for (uint32_t i = 0; i < report_count - 1; ++i) {
|
||||
uint64_t r = report_addr[i];
|
||||
if (r != 0 && (sq_mask & (1 << i))) {
|
||||
result = max(result, report_addr[i]);
|
||||
sq_mask &= ~(1 << i);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
unreachable("Invalid timestamp op");
|
||||
break;
|
||||
}
|
||||
|
||||
/* The result is available if all subqueues have written their value. */
|
||||
*available = sq_mask == 0;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
KERNEL(1)
|
||||
panlib_copy_ts_query_result(uint64_t pool_addr, global uint32_t *available_addr,
|
||||
uint32_t query_stride, uint32_t first_query,
|
||||
uint32_t query_count, uint64_t dst_addr,
|
||||
uint64_t dst_stride, uint32_t query_type,
|
||||
uint32_t flags, uint32_t report_count)
|
||||
{
|
||||
uint32_t i = cl_global_id.x;
|
||||
|
||||
if (i >= query_count)
|
||||
return;
|
||||
|
||||
uintptr_t dst = dst_addr + ((uint64_t)i * dst_stride);
|
||||
|
||||
uint32_t query = first_query + i;
|
||||
global uint64_t *report_addr =
|
||||
(global uint64_t *)(pool_addr + ((uint64_t)query * query_stride));
|
||||
|
||||
uint64_t result = 0;
|
||||
|
||||
/* The last report is always metadata. */
|
||||
uint64_t info = report_addr[report_count - 1];
|
||||
uint32_t op = panvk_timestamp_info_get_op(info);
|
||||
uint32_t sq_mask = panvk_timestamp_info_get_sq_mask(info);
|
||||
|
||||
bool available = false;
|
||||
/* If no subqueue should write a result, the query is uninitialized. */
|
||||
if (sq_mask != 0)
|
||||
result = compute_timestamp_query_result(report_addr, report_count, op,
|
||||
sq_mask, &available);
|
||||
|
||||
if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available)
|
||||
vk_write_query(dst, 0, flags, result);
|
||||
|
||||
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
||||
vk_write_query(dst, 1, flags, available);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (PAN_ARCH >= 6 && PAN_ARCH < 10)
|
||||
static inline void
|
||||
write_occlusion_query_result(uintptr_t dst_addr, int32_t idx, uint32_t flags,
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
#include "panvk_cmd_alloc.h"
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#include "panvk_cmd_meta.h"
|
||||
#include "panvk_cmd_precomp.h"
|
||||
#include "panvk_cmd_ts.h"
|
||||
#include "panvk_device.h"
|
||||
#include "panvk_entrypoints.h"
|
||||
|
|
@ -530,6 +531,88 @@ panvk_cmd_write_timestamp_query(struct panvk_cmd_buffer *cmd,
|
|||
cmd->state.contains_timestamp_queries = true;
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_copy_timestamp_query_results(struct panvk_cmd_buffer *cmd,
|
||||
struct panvk_query_pool *pool,
|
||||
uint32_t first_query, uint32_t query_count,
|
||||
uint64_t dst_buffer_addr,
|
||||
VkDeviceSize stride,
|
||||
VkQueryResultFlags flags)
|
||||
{
|
||||
/*
|
||||
* Step 1:
|
||||
* The point of this is to have each subqueue "save" its own value
|
||||
* into a buffer, such that any following query operations like reset
|
||||
* don't have to worry about destroying the result before other
|
||||
* subqueues are done with it.
|
||||
*/
|
||||
|
||||
uint32_t query_stride = pool->query_stride;
|
||||
size_t buf_sz = query_count * query_stride;
|
||||
struct pan_ptr intermediate_buf =
|
||||
panvk_cmd_alloc_dev_mem(cmd, desc, buf_sz, 16);
|
||||
|
||||
for (uint32_t sq = 0; sq < PANVK_SUBQUEUE_COUNT; ++sq) {
|
||||
struct cs_builder *b = panvk_get_cs_builder(cmd, sq);
|
||||
uint32_t sq_offset = sq * sizeof(uint64_t);
|
||||
|
||||
struct cs_index src = cs_scratch_reg64(b, 0);
|
||||
struct cs_index dst = cs_scratch_reg64(b, 2);
|
||||
struct cs_index tmp = cs_scratch_reg64(b, 4);
|
||||
struct cs_index tmp2 = cs_scratch_reg64(b, 6);
|
||||
|
||||
/* Wait for STORE_STATEs to finish. */
|
||||
cs_wait_slot(b, SB_ID(LS));
|
||||
|
||||
cs_move64_to(b, src, panvk_query_report_dev_addr(pool, first_query));
|
||||
cs_move64_to(b, dst, intermediate_buf.gpu);
|
||||
|
||||
struct cs_index count = cs_scratch_reg32(b, 8);
|
||||
cs_move32_to(b, count, query_count);
|
||||
cs_while(b, MALI_CS_CONDITION_GREATER, count) {
|
||||
cs_load64_to(b, tmp, src, sq_offset);
|
||||
if (sq == PANVK_QUERY_TS_INFO_SUBQUEUE) {
|
||||
assert(PANVK_QUERY_TS_INFO_SUBQUEUE == PANVK_SUBQUEUE_COUNT - 1);
|
||||
cs_load64_to(b, tmp2, src, sq_offset + 8);
|
||||
}
|
||||
cs_store64(b, tmp, dst, sq_offset);
|
||||
if (sq == PANVK_QUERY_TS_INFO_SUBQUEUE)
|
||||
cs_store64(b, tmp2, dst, sq_offset + 8);
|
||||
|
||||
cs_add64(b, src, src, query_stride);
|
||||
cs_add64(b, dst, dst, query_stride);
|
||||
cs_add32(b, count, count, -1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Make sure C waits for all copies to be done. */
|
||||
struct panvk_cs_deps deps = {0};
|
||||
deps.dst[PANVK_SUBQUEUE_COMPUTE].wait_subqueue_mask =
|
||||
BITFIELD_MASK(PANVK_SUBQUEUE_COUNT) & ~BITFIELD_BIT(PANVK_SUBQUEUE_COMPUTE);
|
||||
u_foreach_bit(i, deps.dst[PANVK_SUBQUEUE_COMPUTE].wait_subqueue_mask)
|
||||
deps.src[i].wait_sb_mask = SB_MASK(LS);
|
||||
panvk_per_arch(emit_barrier)(cmd, deps);
|
||||
|
||||
/* Step 2: Copy from the intermediate into the application buffer. */
|
||||
|
||||
const struct panlib_copy_ts_query_result_args push = {
|
||||
.pool_addr = intermediate_buf.gpu,
|
||||
.available_addr = panvk_query_available_dev_addr(pool, first_query),
|
||||
.query_stride = pool->query_stride,
|
||||
/* The intermediate buffer starts at first_query. */
|
||||
.first_query = 0,
|
||||
.query_count = query_count,
|
||||
.report_count = pool->reports_per_query,
|
||||
.dst_addr = dst_buffer_addr,
|
||||
.dst_stride = stride,
|
||||
.flags = flags,
|
||||
};
|
||||
|
||||
struct panvk_precomp_ctx precomp_ctx = panvk_per_arch(precomp_cs)(cmd);
|
||||
panlib_copy_ts_query_result_struct(&precomp_ctx, panlib_1d(query_count),
|
||||
PANLIB_BARRIER_NONE, push);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdResetQueryPool)(VkCommandBuffer commandBuffer,
|
||||
VkQueryPool queryPool, uint32_t firstQuery,
|
||||
|
|
@ -627,6 +710,13 @@ panvk_per_arch(CmdCopyQueryPoolResults)(
|
|||
dst_buffer_addr, stride, flags);
|
||||
break;
|
||||
}
|
||||
#if PAN_ARCH >= 10
|
||||
case VK_QUERY_TYPE_TIMESTAMP: {
|
||||
panvk_copy_timestamp_query_results(cmd, pool, firstQuery, queryCount,
|
||||
dst_buffer_addr, stride, flags);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
unreachable("Unsupported query type");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -177,6 +177,39 @@ cpu_write_occlusion_query_result(void *dst, uint32_t idx,
|
|||
cpu_write_query_result(dst, idx, flags, result);
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
static void
|
||||
cpu_write_timestamp_query_result(void *dst, uint32_t idx,
|
||||
VkQueryResultFlags flags,
|
||||
const struct panvk_query_report *src,
|
||||
unsigned input_value_count)
|
||||
{
|
||||
enum panvk_query_ts_op op =
|
||||
panvk_timestamp_info_get_op(src[input_value_count - 1].value);
|
||||
uint32_t sq_mask =
|
||||
panvk_timestamp_info_get_sq_mask(src[input_value_count - 1].value);
|
||||
|
||||
uint64_t result = op == PANVK_QUERY_TS_OP_MIN ? UINT64_MAX : 0;
|
||||
|
||||
for (uint32_t idx = 0; idx < input_value_count - 1; ++idx) {
|
||||
if ((sq_mask & BITFIELD_BIT(idx)) == 0)
|
||||
continue;
|
||||
if (src[idx].value == 0)
|
||||
continue;
|
||||
|
||||
if (op == PANVK_QUERY_TS_OP_MIN)
|
||||
result = MIN2(result, src[idx].value);
|
||||
else
|
||||
result = MAX2(result, src[idx].value);
|
||||
}
|
||||
|
||||
if (op == PANVK_QUERY_TS_OP_MIN && result == UINT64_MAX)
|
||||
result = 0;
|
||||
|
||||
cpu_write_query_result(dst, idx, flags, result);
|
||||
}
|
||||
#endif
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
panvk_per_arch(GetQueryPoolResults)(VkDevice _device, VkQueryPool queryPool,
|
||||
uint32_t firstQuery, uint32_t queryCount,
|
||||
|
|
@ -218,6 +251,14 @@ panvk_per_arch(GetQueryPoolResults)(VkDevice _device, VkQueryPool queryPool,
|
|||
pool->reports_per_query);
|
||||
break;
|
||||
}
|
||||
#if PAN_ARCH >= 10
|
||||
case VK_QUERY_TYPE_TIMESTAMP: {
|
||||
if (write_results)
|
||||
cpu_write_timestamp_query_result(dst, 0, flags, src,
|
||||
pool->reports_per_query);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
unreachable("Unsupported query type");
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue