mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-24 17:08:20 +02:00
New flags allow for reseting query pools on create and configuring new resolve options on render pass resolves. Reviewed-by: Aitor Camacho <aitor@lunarg.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41663>
429 lines
14 KiB
C
429 lines
14 KiB
C
/*
|
|
* Copyright 2024 Valve Corporation
|
|
* Copyright 2024 Alyssa Rosenzweig
|
|
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
|
* Copyright 2025 LunarG, Inc.
|
|
* Copyright 2025 Google LLC
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "kk_query_pool.h"
|
|
|
|
#include "kk_bo.h"
|
|
#include "kk_buffer.h"
|
|
#include "kk_cmd_buffer.h"
|
|
#include "kk_device.h"
|
|
#include "kk_encoder.h"
|
|
#include "kk_entrypoints.h"
|
|
#include "kk_physical_device.h"
|
|
#include "kk_query_table.h"
|
|
|
|
struct kk_query_report {
|
|
uint64_t value;
|
|
};
|
|
|
|
static inline bool
|
|
kk_has_available(const struct kk_query_pool *pool)
|
|
{
|
|
return pool->vk.query_type != VK_QUERY_TYPE_TIMESTAMP;
|
|
}
|
|
|
|
uint16_t *
|
|
kk_pool_oq_index_ptr(const struct kk_query_pool *pool)
|
|
{
|
|
return (uint16_t *)((uint8_t *)pool->bo->cpu + pool->query_start);
|
|
}
|
|
|
|
static uint32_t
|
|
kk_reports_per_query(struct kk_query_pool *pool)
|
|
{
|
|
switch (pool->vk.query_type) {
|
|
case VK_QUERY_TYPE_OCCLUSION:
|
|
case VK_QUERY_TYPE_TIMESTAMP:
|
|
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
|
return 1;
|
|
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
|
return util_bitcount(pool->vk.pipeline_statistics);
|
|
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
|
// Primitives succeeded and primitives needed
|
|
return 2;
|
|
default:
|
|
UNREACHABLE("Unsupported query type");
|
|
}
|
|
}
|
|
|
|
static uint32_t *
|
|
kk_query_available_map(struct kk_query_pool *pool, uint32_t query)
|
|
{
|
|
assert(kk_has_available(pool));
|
|
assert(query < pool->vk.query_count);
|
|
return (uint32_t *)pool->bo->cpu + query;
|
|
}
|
|
|
|
static uint64_t
|
|
kk_query_offset(struct kk_query_pool *pool, uint32_t query)
|
|
{
|
|
assert(query < pool->vk.query_count);
|
|
return pool->query_start + query * pool->query_stride;
|
|
}
|
|
|
|
static uint64_t
|
|
kk_query_report_addr(struct kk_device *dev, struct kk_query_pool *pool,
|
|
uint32_t query)
|
|
{
|
|
if (pool->oq_queries) {
|
|
uint16_t *oq_index = kk_pool_oq_index_ptr(pool);
|
|
return dev->occlusion_queries.bo->gpu +
|
|
(oq_index[query] * sizeof(uint64_t));
|
|
} else {
|
|
return pool->bo->gpu + kk_query_offset(pool, query);
|
|
}
|
|
}
|
|
|
|
static uint64_t
|
|
kk_query_available_addr(struct kk_query_pool *pool, uint32_t query)
|
|
{
|
|
assert(kk_has_available(pool));
|
|
assert(query < pool->vk.query_count);
|
|
return pool->bo->gpu + query * sizeof(uint32_t);
|
|
}
|
|
|
|
static struct kk_query_report *
|
|
kk_query_report_map(struct kk_device *dev, struct kk_query_pool *pool,
|
|
uint32_t query)
|
|
{
|
|
if (pool->oq_queries) {
|
|
uint64_t *queries = (uint64_t *)(dev->occlusion_queries.bo->cpu);
|
|
uint16_t *oq_index = kk_pool_oq_index_ptr(pool);
|
|
|
|
return (struct kk_query_report *)&queries[oq_index[query]];
|
|
} else {
|
|
return (void *)((char *)pool->bo->cpu + kk_query_offset(pool, query));
|
|
}
|
|
}
|
|
|
|
static void
|
|
host_zero_queries(struct kk_device *dev, struct kk_query_pool *pool,
|
|
uint32_t first_index, uint32_t num_queries,
|
|
bool set_available)
|
|
{
|
|
for (uint32_t i = 0; i < num_queries; i++) {
|
|
struct kk_query_report *reports =
|
|
kk_query_report_map(dev, pool, first_index + i);
|
|
|
|
uint64_t value = 0;
|
|
if (kk_has_available(pool)) {
|
|
uint32_t *available = kk_query_available_map(pool, first_index + i);
|
|
*available = set_available;
|
|
} else {
|
|
value = set_available ? 0 : UINT64_MAX;
|
|
}
|
|
|
|
for (unsigned j = 0; j < kk_reports_per_query(pool); ++j) {
|
|
reports[j].value = value;
|
|
}
|
|
}
|
|
}
|
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
|
kk_CreateQueryPool(VkDevice device, const VkQueryPoolCreateInfo *pCreateInfo,
|
|
const VkAllocationCallbacks *pAllocator,
|
|
VkQueryPool *pQueryPool)
|
|
{
|
|
VK_FROM_HANDLE(kk_device, dev, device);
|
|
struct kk_query_pool *pool;
|
|
VkResult result = VK_SUCCESS;
|
|
|
|
pool =
|
|
vk_query_pool_create(&dev->vk, pCreateInfo, pAllocator, sizeof(*pool));
|
|
if (!pool)
|
|
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
bool occlusion = pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION;
|
|
unsigned occlusion_queries = occlusion ? pCreateInfo->queryCount : 0;
|
|
|
|
/* We place the availability first and then data */
|
|
pool->query_start = 0;
|
|
if (kk_has_available(pool)) {
|
|
pool->query_start = align(pool->vk.query_count * sizeof(uint32_t),
|
|
sizeof(struct kk_query_report));
|
|
}
|
|
|
|
uint32_t reports_per_query = kk_reports_per_query(pool);
|
|
pool->query_stride = reports_per_query * sizeof(struct kk_query_report);
|
|
|
|
if (pool->vk.query_count > 0) {
|
|
uint32_t bo_size = pool->query_start;
|
|
|
|
/* For occlusion queries, we stick the query index remapping here */
|
|
if (occlusion_queries)
|
|
bo_size += sizeof(uint16_t) * pool->vk.query_count;
|
|
else
|
|
bo_size += pool->query_stride * pool->vk.query_count;
|
|
|
|
result = kk_alloc_bo(dev, &dev->vk.base, bo_size, 8, &pool->bo);
|
|
if (result != VK_SUCCESS) {
|
|
kk_DestroyQueryPool(device, kk_query_pool_to_handle(pool), pAllocator);
|
|
return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
|
}
|
|
|
|
/* TODO_KOSMICKRISP Timestamps */
|
|
}
|
|
|
|
uint16_t *oq_index = kk_pool_oq_index_ptr(pool);
|
|
|
|
for (unsigned i = 0; i < occlusion_queries; ++i) {
|
|
uint64_t zero = 0;
|
|
unsigned index;
|
|
|
|
VkResult result =
|
|
kk_query_table_add(dev, &dev->occlusion_queries, zero, &index);
|
|
|
|
if (result != VK_SUCCESS) {
|
|
kk_DestroyQueryPool(device, kk_query_pool_to_handle(pool), pAllocator);
|
|
return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
|
}
|
|
|
|
/* We increment as we go so we can clean up properly if we run out */
|
|
assert(pool->oq_queries < occlusion_queries);
|
|
oq_index[pool->oq_queries++] = index;
|
|
}
|
|
|
|
if (pCreateInfo->flags & VK_QUERY_POOL_CREATE_RESET_BIT_KHR)
|
|
host_zero_queries(dev, pool, 0, pool->vk.query_count, false);
|
|
|
|
*pQueryPool = kk_query_pool_to_handle(pool);
|
|
|
|
return result;
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
kk_DestroyQueryPool(VkDevice device, VkQueryPool queryPool,
|
|
const VkAllocationCallbacks *pAllocator)
|
|
{
|
|
VK_FROM_HANDLE(kk_device, dev, device);
|
|
VK_FROM_HANDLE(kk_query_pool, pool, queryPool);
|
|
|
|
if (!pool)
|
|
return;
|
|
|
|
uint16_t *oq_index = kk_pool_oq_index_ptr(pool);
|
|
|
|
for (unsigned i = 0; i < pool->oq_queries; ++i) {
|
|
kk_query_table_remove(dev, &dev->occlusion_queries, oq_index[i]);
|
|
}
|
|
|
|
kk_destroy_bo(dev, pool->bo);
|
|
|
|
vk_query_pool_destroy(&dev->vk, pAllocator, &pool->vk);
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
kk_ResetQueryPool(VkDevice device, VkQueryPool queryPool, uint32_t firstQuery,
|
|
uint32_t queryCount)
|
|
{
|
|
VK_FROM_HANDLE(kk_device, dev, device);
|
|
VK_FROM_HANDLE(kk_query_pool, pool, queryPool);
|
|
|
|
host_zero_queries(dev, pool, firstQuery, queryCount, false);
|
|
}
|
|
|
|
static void
|
|
emit_zero_queries(struct kk_cmd_buffer *cmd, struct kk_query_pool *pool,
|
|
uint32_t first_index, uint32_t num_queries,
|
|
bool set_available)
|
|
{
|
|
struct kk_device *dev = kk_cmd_buffer_device(cmd);
|
|
struct libkk_reset_query_args info = {
|
|
.availability = kk_has_available(pool) ? pool->bo->gpu : 0,
|
|
.results = pool->oq_queries ? dev->occlusion_queries.bo->gpu
|
|
: pool->bo->gpu + pool->query_start,
|
|
.oq_index = pool->oq_queries ? pool->bo->gpu + pool->query_start : 0,
|
|
|
|
.first_query = first_index,
|
|
.reports_per_query = kk_reports_per_query(pool),
|
|
.set_available = set_available,
|
|
};
|
|
struct mtl_size grid = {.x = num_queries, .y = 1u, .z = 1u};
|
|
libkk_reset_query_struct(cmd, grid, false, info);
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
kk_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
|
|
uint32_t firstQuery, uint32_t queryCount)
|
|
{
|
|
VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
|
|
VK_FROM_HANDLE(kk_query_pool, pool, queryPool);
|
|
/* Need to flush other availabilities just in case there is a reset after it
|
|
* was made available but the writes have not propagated yet. Need to avoid
|
|
* data races in the writes. This is save to do sice vkCmdResetQueryPool
|
|
* cannot be called when a render pass is active. */
|
|
upload_queue_writes(cmd);
|
|
emit_zero_queries(cmd, pool, firstQuery, queryCount, false);
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
kk_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
|
|
VkPipelineStageFlags2 stage, VkQueryPool queryPool,
|
|
uint32_t query)
|
|
{
|
|
/* TODO_KOSMICKRISP */
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
kk_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
|
|
uint32_t query, VkQueryControlFlags flags)
|
|
{
|
|
VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
|
|
VK_FROM_HANDLE(kk_query_pool, pool, queryPool);
|
|
cmd->state.gfx.occlusion.mode = flags & VK_QUERY_CONTROL_PRECISE_BIT
|
|
? MTL_VISIBILITY_RESULT_MODE_COUNTING
|
|
: MTL_VISIBILITY_RESULT_MODE_BOOLEAN;
|
|
cmd->state.gfx.dirty |= KK_DIRTY_OCCLUSION;
|
|
uint16_t *oq_index = kk_pool_oq_index_ptr(pool);
|
|
cmd->state.gfx.occlusion.index = oq_index[query];
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
kk_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
|
|
uint32_t query)
|
|
{
|
|
VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
|
|
VK_FROM_HANDLE(kk_query_pool, pool, queryPool);
|
|
cmd->state.gfx.occlusion.mode = MTL_VISIBILITY_RESULT_MODE_DISABLED;
|
|
cmd->state.gfx.dirty |= KK_DIRTY_OCCLUSION;
|
|
|
|
/* Make the query available */
|
|
if (kk_has_available(pool)) {
|
|
uint64_t addr = kk_query_available_addr(pool, query);
|
|
kk_cmd_write(cmd, (struct libkk_imm_write){addr, true});
|
|
}
|
|
}
|
|
|
|
static bool
|
|
kk_query_is_available(struct kk_device *dev, struct kk_query_pool *pool,
|
|
uint32_t query)
|
|
{
|
|
if (kk_has_available(pool)) {
|
|
uint32_t *available = kk_query_available_map(pool, query);
|
|
return p_atomic_read(available) != 0;
|
|
} else {
|
|
const struct kk_query_report *report =
|
|
kk_query_report_map(dev, pool, query);
|
|
|
|
return report->value != UINT64_MAX;
|
|
}
|
|
}
|
|
|
|
#define KK_QUERY_TIMEOUT 2000000000ull
|
|
|
|
static VkResult
|
|
kk_query_wait_for_available(struct kk_device *dev, struct kk_query_pool *pool,
|
|
uint32_t query)
|
|
{
|
|
uint64_t abs_timeout_ns = os_time_get_absolute_timeout(KK_QUERY_TIMEOUT);
|
|
|
|
while (os_time_get_nano() < abs_timeout_ns) {
|
|
if (kk_query_is_available(dev, pool, query))
|
|
return VK_SUCCESS;
|
|
|
|
VkResult status = vk_device_check_status(&dev->vk);
|
|
if (status != VK_SUCCESS)
|
|
return status;
|
|
}
|
|
|
|
return vk_device_set_lost(&dev->vk, "query timeout");
|
|
}
|
|
|
|
static void
|
|
cpu_write_query_result(void *dst, uint32_t idx, VkQueryResultFlags flags,
|
|
uint64_t result)
|
|
{
|
|
if (flags & VK_QUERY_RESULT_64_BIT) {
|
|
uint64_t *dst64 = dst;
|
|
dst64[idx] = result;
|
|
} else {
|
|
uint32_t *dst32 = dst;
|
|
dst32[idx] = result;
|
|
}
|
|
}
|
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
|
kk_GetQueryPoolResults(VkDevice device, VkQueryPool queryPool,
|
|
uint32_t firstQuery, uint32_t queryCount,
|
|
size_t dataSize, void *pData, VkDeviceSize stride,
|
|
VkQueryResultFlags flags)
|
|
{
|
|
VK_FROM_HANDLE(kk_device, dev, device);
|
|
VK_FROM_HANDLE(kk_query_pool, pool, queryPool);
|
|
|
|
if (vk_device_is_lost(&dev->vk))
|
|
return VK_ERROR_DEVICE_LOST;
|
|
|
|
VkResult status = VK_SUCCESS;
|
|
for (uint32_t i = 0; i < queryCount; i++) {
|
|
const uint32_t query = firstQuery + i;
|
|
|
|
bool available = kk_query_is_available(dev, pool, query);
|
|
|
|
if (!available && (flags & VK_QUERY_RESULT_WAIT_BIT)) {
|
|
status = kk_query_wait_for_available(dev, pool, query);
|
|
if (status != VK_SUCCESS)
|
|
return status;
|
|
|
|
available = true;
|
|
}
|
|
|
|
bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT);
|
|
|
|
const struct kk_query_report *src = kk_query_report_map(dev, pool, query);
|
|
assert(i * stride < dataSize);
|
|
void *dst = (char *)pData + i * stride;
|
|
|
|
uint32_t reports = kk_reports_per_query(pool);
|
|
if (write_results) {
|
|
for (uint32_t j = 0; j < reports; j++) {
|
|
cpu_write_query_result(dst, j, flags, src[j].value);
|
|
}
|
|
}
|
|
|
|
if (!write_results)
|
|
status = VK_NOT_READY;
|
|
|
|
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
|
cpu_write_query_result(dst, reports, flags, available);
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
kk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
|
|
uint32_t firstQuery, uint32_t queryCount,
|
|
VkBuffer dstBuffer, VkDeviceSize dstOffset,
|
|
VkDeviceSize stride, VkQueryResultFlags flags)
|
|
{
|
|
VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
|
|
VK_FROM_HANDLE(kk_query_pool, pool, queryPool);
|
|
VK_FROM_HANDLE(kk_buffer, dst_buf, dstBuffer);
|
|
struct kk_device *dev = kk_cmd_buffer_device(cmd);
|
|
|
|
struct kk_copy_query_pool_results_info info = {
|
|
.availability = kk_has_available(pool) ? pool->bo->gpu : 0,
|
|
.results = pool->oq_queries ? dev->occlusion_queries.bo->gpu
|
|
: pool->bo->gpu + pool->query_start,
|
|
.indices = pool->oq_queries ? pool->bo->gpu + pool->query_start : 0,
|
|
.dst_addr = dst_buf->vk.device_address + dstOffset,
|
|
.dst_stride = stride,
|
|
.first_query = firstQuery,
|
|
.flags = flags,
|
|
.reports_per_query = kk_reports_per_query(pool),
|
|
.query_count = queryCount,
|
|
};
|
|
|
|
util_dynarray_append(&cmd->encoder->copy_query_pool_result_infos, info);
|
|
/* If we are not mid encoder, just upload the writes */
|
|
enum kk_encoder_type last_used = cmd->encoder->main.last_used;
|
|
if (last_used == KK_ENC_NONE || last_used == KK_ENC_COMPUTE)
|
|
upload_queue_writes(cmd);
|
|
}
|