mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-21 17:38:08 +02:00
The Mesa log infrastructure is really useful as it allows us to get debug and error information in Android systems. Apart from that, it also allows us to forward diagnostics into the right logs and files. Therefore, instead of using stderr for all messages, use Mesa log and separate the messages into debugging and error messages. Signed-off-by: Maíra Canal <mcanal@igalia.com> Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32304>
1816 lines
63 KiB
C
1816 lines
63 KiB
C
/*
|
|
* Copyright © 2020 Raspberry Pi Ltd
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "v3dv_private.h"
|
|
|
|
#include "util/timespec.h"
|
|
#include "compiler/nir/nir_builder.h"
|
|
|
|
static void
|
|
kperfmon_create(struct v3dv_device *device,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t query)
|
|
{
|
|
for (uint32_t i = 0; i < pool->perfmon.nperfmons; i++) {
|
|
assert(i * DRM_V3D_MAX_PERF_COUNTERS < pool->perfmon.ncounters);
|
|
|
|
struct drm_v3d_perfmon_create req = {
|
|
.ncounters = MIN2(pool->perfmon.ncounters -
|
|
i * DRM_V3D_MAX_PERF_COUNTERS,
|
|
DRM_V3D_MAX_PERF_COUNTERS),
|
|
};
|
|
memcpy(req.counters,
|
|
&pool->perfmon.counters[i * DRM_V3D_MAX_PERF_COUNTERS],
|
|
req.ncounters);
|
|
|
|
int ret = v3dv_ioctl(device->pdevice->render_fd,
|
|
DRM_IOCTL_V3D_PERFMON_CREATE,
|
|
&req);
|
|
if (ret)
|
|
mesa_loge("Failed to create perfmon for query %d: %s\n", query,
|
|
strerror(errno));
|
|
|
|
pool->queries[query].perf.kperfmon_ids[i] = req.id;
|
|
}
|
|
}
|
|
|
|
static void
|
|
kperfmon_destroy(struct v3dv_device *device,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t query)
|
|
{
|
|
/* Skip destroying if never created */
|
|
if (!pool->queries[query].perf.kperfmon_ids[0])
|
|
return;
|
|
|
|
for (uint32_t i = 0; i < pool->perfmon.nperfmons; i++) {
|
|
struct drm_v3d_perfmon_destroy req = {
|
|
.id = pool->queries[query].perf.kperfmon_ids[i]
|
|
};
|
|
|
|
int ret = v3dv_ioctl(device->pdevice->render_fd,
|
|
DRM_IOCTL_V3D_PERFMON_DESTROY,
|
|
&req);
|
|
|
|
if (ret) {
|
|
mesa_loge("Failed to destroy perfmon %u: %s\n",
|
|
req.id, strerror(errno));
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Creates a VkBuffer (and VkDeviceMemory) to access a BO.
|
|
*/
|
|
static VkResult
|
|
create_vk_storage_buffer(struct v3dv_device *device,
|
|
struct v3dv_bo *bo,
|
|
VkBuffer *vk_buf,
|
|
VkDeviceMemory *vk_mem)
|
|
{
|
|
VkDevice vk_device = v3dv_device_to_handle(device);
|
|
|
|
VkBufferCreateInfo buf_info = {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
|
.size = bo->size,
|
|
.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
|
|
};
|
|
VkResult result = v3dv_CreateBuffer(vk_device, &buf_info, NULL, vk_buf);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
struct v3dv_device_memory *mem =
|
|
vk_object_zalloc(&device->vk, NULL, sizeof(*mem),
|
|
VK_OBJECT_TYPE_DEVICE_MEMORY);
|
|
if (!mem)
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
mem->bo = bo;
|
|
mem->type = &device->pdevice->memory.memoryTypes[0];
|
|
|
|
*vk_mem = v3dv_device_memory_to_handle(mem);
|
|
VkBindBufferMemoryInfo bind_info = {
|
|
.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
|
|
.buffer = *vk_buf,
|
|
.memory = *vk_mem,
|
|
.memoryOffset = 0,
|
|
};
|
|
v3dv_BindBufferMemory2(vk_device, 1, &bind_info);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static void
|
|
destroy_vk_storage_buffer(struct v3dv_device *device,
|
|
VkBuffer *vk_buf,
|
|
VkDeviceMemory *vk_mem)
|
|
{
|
|
if (*vk_mem) {
|
|
vk_object_free(&device->vk, NULL, v3dv_device_memory_from_handle(*vk_mem));
|
|
*vk_mem = VK_NULL_HANDLE;
|
|
}
|
|
|
|
v3dv_DestroyBuffer(v3dv_device_to_handle(device), *vk_buf, NULL);
|
|
*vk_buf = VK_NULL_HANDLE;
|
|
}
|
|
|
|
/**
|
|
* Allocates descriptor sets to access query pool BO (availability and
|
|
* occlusion query results) from Vulkan pipelines.
|
|
*/
|
|
static VkResult
|
|
create_pool_descriptors(struct v3dv_device *device,
|
|
struct v3dv_query_pool *pool)
|
|
{
|
|
assert(pool->query_type == VK_QUERY_TYPE_OCCLUSION);
|
|
VkDevice vk_device = v3dv_device_to_handle(device);
|
|
|
|
VkDescriptorPoolSize pool_size = {
|
|
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
.descriptorCount = 1,
|
|
};
|
|
VkDescriptorPoolCreateInfo pool_info = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
|
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
|
|
.maxSets = 1,
|
|
.poolSizeCount = 1,
|
|
.pPoolSizes = &pool_size,
|
|
};
|
|
VkResult result =
|
|
v3dv_CreateDescriptorPool(vk_device, &pool_info, NULL,
|
|
&pool->meta.descriptor_pool);
|
|
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
VkDescriptorSetAllocateInfo alloc_info = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
|
.descriptorPool = pool->meta.descriptor_pool,
|
|
.descriptorSetCount = 1,
|
|
.pSetLayouts = &device->queries.buf_descriptor_set_layout,
|
|
};
|
|
result = v3dv_AllocateDescriptorSets(vk_device, &alloc_info,
|
|
&pool->meta.descriptor_set);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
VkDescriptorBufferInfo desc_buf_info = {
|
|
.buffer = pool->meta.buf,
|
|
.offset = 0,
|
|
.range = VK_WHOLE_SIZE,
|
|
};
|
|
|
|
VkWriteDescriptorSet write = {
|
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
|
.dstSet = pool->meta.descriptor_set,
|
|
.dstBinding = 0,
|
|
.dstArrayElement = 0,
|
|
.descriptorCount = 1,
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
.pBufferInfo = &desc_buf_info,
|
|
};
|
|
v3dv_UpdateDescriptorSets(vk_device, 1, &write, 0, NULL);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static void
|
|
destroy_pool_descriptors(struct v3dv_device *device,
|
|
struct v3dv_query_pool *pool)
|
|
{
|
|
assert(pool->query_type == VK_QUERY_TYPE_OCCLUSION);
|
|
|
|
v3dv_FreeDescriptorSets(v3dv_device_to_handle(device),
|
|
pool->meta.descriptor_pool,
|
|
1, &pool->meta.descriptor_set);
|
|
pool->meta.descriptor_set = VK_NULL_HANDLE;
|
|
|
|
v3dv_DestroyDescriptorPool(v3dv_device_to_handle(device),
|
|
pool->meta.descriptor_pool, NULL);
|
|
pool->meta.descriptor_pool = VK_NULL_HANDLE;
|
|
}
|
|
|
|
static VkResult
|
|
pool_create_meta_resources(struct v3dv_device *device,
|
|
struct v3dv_query_pool *pool)
|
|
{
|
|
VkResult result;
|
|
|
|
if (pool->query_type != VK_QUERY_TYPE_OCCLUSION)
|
|
return VK_SUCCESS;
|
|
|
|
result = create_vk_storage_buffer(device, pool->occlusion.bo,
|
|
&pool->meta.buf, &pool->meta.mem);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
result = create_pool_descriptors(device, pool);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static void
|
|
pool_destroy_meta_resources(struct v3dv_device *device,
|
|
struct v3dv_query_pool *pool)
|
|
{
|
|
if (pool->query_type != VK_QUERY_TYPE_OCCLUSION)
|
|
return;
|
|
|
|
destroy_pool_descriptors(device, pool);
|
|
destroy_vk_storage_buffer(device, &pool->meta.buf, &pool->meta.mem);
|
|
}
|
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
|
v3dv_CreateQueryPool(VkDevice _device,
|
|
const VkQueryPoolCreateInfo *pCreateInfo,
|
|
const VkAllocationCallbacks *pAllocator,
|
|
VkQueryPool *pQueryPool)
|
|
{
|
|
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
|
|
|
assert(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION ||
|
|
pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP ||
|
|
pCreateInfo->queryType == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR);
|
|
assert(pCreateInfo->queryCount > 0);
|
|
|
|
struct v3dv_query_pool *pool =
|
|
vk_object_zalloc(&device->vk, pAllocator, sizeof(*pool),
|
|
VK_OBJECT_TYPE_QUERY_POOL);
|
|
if (pool == NULL)
|
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
pool->query_type = pCreateInfo->queryType;
|
|
pool->query_count = pCreateInfo->queryCount;
|
|
|
|
uint32_t query_idx = 0;
|
|
VkResult result;
|
|
|
|
const uint32_t pool_bytes = sizeof(struct v3dv_query) * pool->query_count;
|
|
pool->queries = vk_alloc2(&device->vk.alloc, pAllocator, pool_bytes, 8,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
if (pool->queries == NULL) {
|
|
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
goto fail;
|
|
}
|
|
|
|
switch (pool->query_type) {
|
|
case VK_QUERY_TYPE_OCCLUSION: {
|
|
/* The hardware allows us to setup groups of 16 queries in consecutive
|
|
* 4-byte addresses, requiring only that each group of 16 queries is
|
|
* aligned to a 1024 byte boundary.
|
|
*/
|
|
const uint32_t query_groups = DIV_ROUND_UP(pool->query_count, 16);
|
|
uint32_t bo_size = query_groups * 1024;
|
|
/* After the counters we store avalability data, 1 byte/query */
|
|
pool->occlusion.avail_offset = bo_size;
|
|
bo_size += pool->query_count;
|
|
pool->occlusion.bo = v3dv_bo_alloc(device, bo_size, "query:o", true);
|
|
if (!pool->occlusion.bo) {
|
|
result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
|
goto fail;
|
|
}
|
|
if (!v3dv_bo_map(device, pool->occlusion.bo, bo_size)) {
|
|
result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
|
goto fail;
|
|
}
|
|
break;
|
|
}
|
|
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
|
|
const VkQueryPoolPerformanceCreateInfoKHR *pq_info =
|
|
vk_find_struct_const(pCreateInfo->pNext,
|
|
QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR);
|
|
|
|
assert(pq_info);
|
|
|
|
pool->perfmon.ncounters = pq_info->counterIndexCount;
|
|
for (uint32_t i = 0; i < pq_info->counterIndexCount; i++)
|
|
pool->perfmon.counters[i] = pq_info->pCounterIndices[i];
|
|
|
|
pool->perfmon.nperfmons = DIV_ROUND_UP(pool->perfmon.ncounters,
|
|
DRM_V3D_MAX_PERF_COUNTERS);
|
|
|
|
assert(pool->perfmon.nperfmons <= V3DV_MAX_PERFMONS);
|
|
break;
|
|
}
|
|
case VK_QUERY_TYPE_TIMESTAMP: {
|
|
/* 8 bytes per query used for the timestamp value. We have all
|
|
* timestamps tightly packed first in the buffer.
|
|
*/
|
|
const uint32_t bo_size = pool->query_count * 8;
|
|
pool->timestamp.bo = v3dv_bo_alloc(device, bo_size, "query:t", true);
|
|
if (!pool->timestamp.bo) {
|
|
result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
|
goto fail;
|
|
}
|
|
if (!v3dv_bo_map(device, pool->timestamp.bo, bo_size)) {
|
|
result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
|
goto fail;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
unreachable("Unsupported query type");
|
|
}
|
|
|
|
/* Initialize queries in the pool */
|
|
for (; query_idx < pool->query_count; query_idx++) {
|
|
pool->queries[query_idx].maybe_available = false;
|
|
switch (pool->query_type) {
|
|
case VK_QUERY_TYPE_OCCLUSION: {
|
|
const uint32_t query_group = query_idx / 16;
|
|
const uint32_t query_offset = query_group * 1024 + (query_idx % 16) * 4;
|
|
pool->queries[query_idx].occlusion.offset = query_offset;
|
|
break;
|
|
}
|
|
case VK_QUERY_TYPE_TIMESTAMP:
|
|
pool->queries[query_idx].timestamp.offset = query_idx * 8;
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->drm_syncobj_type, 0, 0,
|
|
&pool->queries[query_idx].timestamp.sync);
|
|
if (result != VK_SUCCESS)
|
|
goto fail;
|
|
break;
|
|
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->drm_syncobj_type, 0, 0,
|
|
&pool->queries[query_idx].perf.last_job_sync);
|
|
if (result != VK_SUCCESS)
|
|
goto fail;
|
|
|
|
kperfmon_create(device, pool, query_idx);
|
|
break;
|
|
}
|
|
default:
|
|
unreachable("Unsupported query type");
|
|
}
|
|
}
|
|
|
|
/* Create meta resources */
|
|
result = pool_create_meta_resources(device, pool);
|
|
if (result != VK_SUCCESS)
|
|
goto fail;
|
|
|
|
*pQueryPool = v3dv_query_pool_to_handle(pool);
|
|
|
|
return VK_SUCCESS;
|
|
|
|
fail:
|
|
if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
|
|
for (uint32_t j = 0; j < query_idx; j++)
|
|
vk_sync_destroy(&device->vk, pool->queries[j].timestamp.sync);
|
|
}
|
|
|
|
if (pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
|
|
for (uint32_t j = 0; j < query_idx; j++)
|
|
vk_sync_destroy(&device->vk, pool->queries[j].perf.last_job_sync);
|
|
}
|
|
|
|
if (pool->occlusion.bo)
|
|
v3dv_bo_free(device, pool->occlusion.bo);
|
|
if (pool->timestamp.bo)
|
|
v3dv_bo_free(device, pool->timestamp.bo);
|
|
if (pool->queries)
|
|
vk_free2(&device->vk.alloc, pAllocator, pool->queries);
|
|
pool_destroy_meta_resources(device, pool);
|
|
vk_object_free(&device->vk, pAllocator, pool);
|
|
|
|
return result;
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
v3dv_DestroyQueryPool(VkDevice _device,
|
|
VkQueryPool queryPool,
|
|
const VkAllocationCallbacks *pAllocator)
|
|
{
|
|
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
|
V3DV_FROM_HANDLE(v3dv_query_pool, pool, queryPool);
|
|
|
|
if (!pool)
|
|
return;
|
|
|
|
if (pool->occlusion.bo)
|
|
v3dv_bo_free(device, pool->occlusion.bo);
|
|
|
|
if (pool->timestamp.bo)
|
|
v3dv_bo_free(device, pool->timestamp.bo);
|
|
|
|
if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
|
|
for (uint32_t i = 0; i < pool->query_count; i++)
|
|
vk_sync_destroy(&device->vk, pool->queries[i].timestamp.sync);
|
|
}
|
|
|
|
if (pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
|
|
for (uint32_t i = 0; i < pool->query_count; i++) {
|
|
kperfmon_destroy(device, pool, i);
|
|
vk_sync_destroy(&device->vk, pool->queries[i].perf.last_job_sync);
|
|
}
|
|
}
|
|
|
|
if (pool->queries)
|
|
vk_free2(&device->vk.alloc, pAllocator, pool->queries);
|
|
|
|
pool_destroy_meta_resources(device, pool);
|
|
|
|
vk_object_free(&device->vk, pAllocator, pool);
|
|
}
|
|
|
|
static void
|
|
write_to_buffer(void *dst, uint32_t idx, bool do_64bit, uint64_t value)
|
|
{
|
|
if (do_64bit) {
|
|
uint64_t *dst64 = (uint64_t *) dst;
|
|
dst64[idx] = value;
|
|
} else {
|
|
uint32_t *dst32 = (uint32_t *) dst;
|
|
dst32[idx] = (uint32_t) value;
|
|
}
|
|
}
|
|
|
|
static VkResult
|
|
query_wait_available(struct v3dv_device *device,
|
|
struct v3dv_query_pool *pool,
|
|
struct v3dv_query *q,
|
|
uint32_t query_idx)
|
|
{
|
|
/* For occlusion queries we prefer to poll the availability BO in a loop
|
|
* to waiting on the query results BO, because the latter would
|
|
* make us wait for any job running queries from the pool, even if those
|
|
* queries do not involve the one we want to wait on.
|
|
*/
|
|
if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
|
|
uint8_t *q_addr = ((uint8_t *) pool->occlusion.bo->map) +
|
|
pool->occlusion.avail_offset + query_idx;
|
|
while (*q_addr == 0)
|
|
usleep(250);
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
|
|
if (vk_sync_wait(&device->vk, q->timestamp.sync,
|
|
0, VK_SYNC_WAIT_COMPLETE, UINT64_MAX) != VK_SUCCESS) {
|
|
return vk_device_set_lost(&device->vk, "Query job wait failed");
|
|
}
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
assert(pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR);
|
|
|
|
/* For performance queries we need to wait for the queue to signal that
|
|
* the query has been submitted for execution before anything else.
|
|
*/
|
|
VkResult result = VK_SUCCESS;
|
|
if (!q->maybe_available) {
|
|
struct timespec timeout;
|
|
timespec_get(&timeout, TIME_UTC);
|
|
timespec_add_msec(&timeout, &timeout, 2000);
|
|
|
|
mtx_lock(&device->query_mutex);
|
|
while (!q->maybe_available) {
|
|
if (vk_device_is_lost(&device->vk)) {
|
|
result = VK_ERROR_DEVICE_LOST;
|
|
break;
|
|
}
|
|
|
|
int ret = cnd_timedwait(&device->query_ended,
|
|
&device->query_mutex,
|
|
&timeout);
|
|
if (ret != thrd_success) {
|
|
mtx_unlock(&device->query_mutex);
|
|
result = vk_device_set_lost(&device->vk, "Query wait failed");
|
|
break;
|
|
}
|
|
}
|
|
mtx_unlock(&device->query_mutex);
|
|
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
/* For performance queries, we also need to wait for the relevant syncobj
|
|
* to be signaled to ensure completion of the GPU work.
|
|
*/
|
|
if (pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR &&
|
|
vk_sync_wait(&device->vk, q->perf.last_job_sync,
|
|
0, VK_SYNC_WAIT_COMPLETE, UINT64_MAX) != VK_SUCCESS) {
|
|
return vk_device_set_lost(&device->vk, "Query job wait failed");
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static VkResult
|
|
query_check_available(struct v3dv_device *device,
|
|
struct v3dv_query_pool *pool,
|
|
struct v3dv_query *q,
|
|
uint32_t query_idx)
|
|
{
|
|
/* For occlusion we check the availability BO */
|
|
if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
|
|
const uint8_t *q_addr = ((uint8_t *) pool->occlusion.bo->map) +
|
|
pool->occlusion.avail_offset + query_idx;
|
|
return (*q_addr != 0) ? VK_SUCCESS : VK_NOT_READY;
|
|
}
|
|
|
|
/* For timestamp queries, we need to check if the relevant job
|
|
* has completed.
|
|
*/
|
|
if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
|
|
if (vk_sync_wait(&device->vk, q->timestamp.sync,
|
|
0, VK_SYNC_WAIT_COMPLETE, 0) != VK_SUCCESS) {
|
|
return VK_NOT_READY;
|
|
}
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
/* For other queries we need to check if the queue has submitted the query
|
|
* for execution at all.
|
|
*/
|
|
assert(pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR);
|
|
if (!q->maybe_available)
|
|
return VK_NOT_READY;
|
|
|
|
/* For performance queries, we also need to check if the relevant GPU job
|
|
* has completed.
|
|
*/
|
|
if (pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR &&
|
|
vk_sync_wait(&device->vk, q->perf.last_job_sync,
|
|
0, VK_SYNC_WAIT_COMPLETE, 0) != VK_SUCCESS) {
|
|
return VK_NOT_READY;
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
query_is_available(struct v3dv_device *device,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t query,
|
|
bool do_wait,
|
|
bool *available)
|
|
{
|
|
struct v3dv_query *q = &pool->queries[query];
|
|
|
|
if (do_wait) {
|
|
VkResult result = query_wait_available(device, pool, q, query);
|
|
if (result != VK_SUCCESS) {
|
|
*available = false;
|
|
return result;
|
|
}
|
|
|
|
*available = true;
|
|
} else {
|
|
VkResult result = query_check_available(device, pool, q, query);
|
|
assert(result == VK_SUCCESS || result == VK_NOT_READY);
|
|
*available = (result == VK_SUCCESS);
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
write_occlusion_query_result(struct v3dv_device *device,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t query,
|
|
bool do_64bit,
|
|
void *data,
|
|
uint32_t slot)
|
|
{
|
|
assert(pool && pool->query_type == VK_QUERY_TYPE_OCCLUSION);
|
|
|
|
if (vk_device_is_lost(&device->vk))
|
|
return VK_ERROR_DEVICE_LOST;
|
|
|
|
struct v3dv_query *q = &pool->queries[query];
|
|
assert(pool->occlusion.bo && pool->occlusion.bo->map);
|
|
|
|
const uint8_t *query_addr =
|
|
((uint8_t *) pool->occlusion.bo->map) + q->occlusion.offset;
|
|
write_to_buffer(data, slot, do_64bit, (uint64_t) *((uint32_t *)query_addr));
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
write_timestamp_query_result(struct v3dv_device *device,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t query,
|
|
bool do_64bit,
|
|
void *data,
|
|
uint32_t slot)
|
|
{
|
|
assert(pool && pool->query_type == VK_QUERY_TYPE_TIMESTAMP);
|
|
|
|
struct v3dv_query *q = &pool->queries[query];
|
|
|
|
const uint8_t *query_addr =
|
|
((uint8_t *) pool->timestamp.bo->map) + q->timestamp.offset;
|
|
|
|
write_to_buffer(data, slot, do_64bit, *((uint64_t *)query_addr));
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
write_performance_query_result(struct v3dv_device *device,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t query,
|
|
bool do_64bit,
|
|
void *data,
|
|
uint32_t slot)
|
|
{
|
|
assert(pool && pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR);
|
|
|
|
struct v3dv_query *q = &pool->queries[query];
|
|
uint64_t counter_values[V3D_MAX_PERFCNT];
|
|
|
|
for (uint32_t i = 0; i < pool->perfmon.nperfmons; i++) {
|
|
struct drm_v3d_perfmon_get_values req = {
|
|
.id = q->perf.kperfmon_ids[i],
|
|
.values_ptr = (uintptr_t)(&counter_values[i *
|
|
DRM_V3D_MAX_PERF_COUNTERS])
|
|
};
|
|
|
|
int ret = v3dv_ioctl(device->pdevice->render_fd,
|
|
DRM_IOCTL_V3D_PERFMON_GET_VALUES,
|
|
&req);
|
|
|
|
if (ret) {
|
|
mesa_loge("failed to get perfmon values: %s\n", strerror(errno));
|
|
return vk_error(device, VK_ERROR_DEVICE_LOST);
|
|
}
|
|
}
|
|
|
|
for (uint32_t i = 0; i < pool->perfmon.ncounters; i++)
|
|
write_to_buffer(data, slot + i, do_64bit, counter_values[i]);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
write_query_result(struct v3dv_device *device,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t query,
|
|
bool do_64bit,
|
|
void *data,
|
|
uint32_t slot)
|
|
{
|
|
switch (pool->query_type) {
|
|
case VK_QUERY_TYPE_OCCLUSION:
|
|
return write_occlusion_query_result(device, pool, query, do_64bit,
|
|
data, slot);
|
|
case VK_QUERY_TYPE_TIMESTAMP:
|
|
return write_timestamp_query_result(device, pool, query, do_64bit,
|
|
data, slot);
|
|
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
|
|
return write_performance_query_result(device, pool, query, do_64bit,
|
|
data, slot);
|
|
default:
|
|
unreachable("Unsupported query type");
|
|
}
|
|
}
|
|
|
|
static uint32_t
|
|
get_query_result_count(struct v3dv_query_pool *pool)
|
|
{
|
|
switch (pool->query_type) {
|
|
case VK_QUERY_TYPE_OCCLUSION:
|
|
case VK_QUERY_TYPE_TIMESTAMP:
|
|
return 1;
|
|
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
|
|
return pool->perfmon.ncounters;
|
|
default:
|
|
unreachable("Unsupported query type");
|
|
}
|
|
}
|
|
|
|
VkResult
|
|
v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t first,
|
|
uint32_t count,
|
|
void *data,
|
|
VkDeviceSize stride,
|
|
VkQueryResultFlags flags)
|
|
{
|
|
assert(first < pool->query_count);
|
|
assert(first + count <= pool->query_count);
|
|
assert(data);
|
|
|
|
const bool do_64bit = flags & VK_QUERY_RESULT_64_BIT ||
|
|
pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR;
|
|
const bool do_wait = flags & VK_QUERY_RESULT_WAIT_BIT;
|
|
const bool do_partial = flags & VK_QUERY_RESULT_PARTIAL_BIT;
|
|
|
|
uint32_t result_count = get_query_result_count(pool);
|
|
|
|
VkResult result = VK_SUCCESS;
|
|
for (uint32_t i = first; i < first + count; i++) {
|
|
bool available = false;
|
|
VkResult query_result =
|
|
query_is_available(device, pool, i, do_wait, &available);
|
|
if (query_result == VK_ERROR_DEVICE_LOST)
|
|
result = VK_ERROR_DEVICE_LOST;
|
|
|
|
/**
|
|
* From the Vulkan 1.0 spec:
|
|
*
|
|
* "If VK_QUERY_RESULT_WAIT_BIT and VK_QUERY_RESULT_PARTIAL_BIT are
|
|
* both not set then no result values are written to pData for queries
|
|
* that are in the unavailable state at the time of the call, and
|
|
* vkGetQueryPoolResults returns VK_NOT_READY. However, availability
|
|
* state is still written to pData for those queries if
|
|
* VK_QUERY_RESULT_WITH_AVAILABILITY_BIT is set."
|
|
*/
|
|
uint32_t slot = 0;
|
|
|
|
const bool write_result = available || do_partial;
|
|
if (write_result)
|
|
write_query_result(device, pool, i, do_64bit, data, slot);
|
|
slot += result_count;
|
|
|
|
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
|
write_to_buffer(data, slot++, do_64bit, available ? 1u : 0u);
|
|
|
|
if (!write_result && result != VK_ERROR_DEVICE_LOST)
|
|
result = VK_NOT_READY;
|
|
|
|
data += stride;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
|
v3dv_GetQueryPoolResults(VkDevice _device,
|
|
VkQueryPool queryPool,
|
|
uint32_t firstQuery,
|
|
uint32_t queryCount,
|
|
size_t dataSize,
|
|
void *pData,
|
|
VkDeviceSize stride,
|
|
VkQueryResultFlags flags)
|
|
{
|
|
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
|
V3DV_FROM_HANDLE(v3dv_query_pool, pool, queryPool);
|
|
|
|
if (vk_device_is_lost(&device->vk))
|
|
return VK_ERROR_DEVICE_LOST;
|
|
|
|
return v3dv_get_query_pool_results_cpu(device, pool, firstQuery, queryCount,
|
|
pData, stride, flags);
|
|
}
|
|
|
|
/* Emits a series of vkCmdDispatchBase calls to execute all the workgroups
|
|
* required to handle a number of queries considering per-dispatch limits.
|
|
*/
|
|
static void
|
|
cmd_buffer_emit_dispatch_queries(struct v3dv_cmd_buffer *cmd_buffer,
|
|
uint32_t query_count)
|
|
{
|
|
VkCommandBuffer vk_cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
|
|
|
|
uint32_t dispatched = 0;
|
|
const uint32_t max_batch_size = 65535;
|
|
while (dispatched < query_count) {
|
|
uint32_t batch_size = MIN2(query_count - dispatched, max_batch_size);
|
|
v3dv_CmdDispatchBase(vk_cmd_buffer, dispatched, 0, 0, batch_size, 1, 1);
|
|
dispatched += batch_size;
|
|
}
|
|
}
|
|
|
|
void
|
|
v3dv_cmd_buffer_emit_set_query_availability(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t query, uint32_t count,
|
|
uint8_t availability)
|
|
{
|
|
assert(pool->query_type == VK_QUERY_TYPE_OCCLUSION ||
|
|
pool->query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR);
|
|
|
|
struct v3dv_device *device = cmd_buffer->device;
|
|
VkCommandBuffer vk_cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
|
|
|
|
/* We are about to emit a compute job to set query availability and we need
|
|
* to ensure this executes after the graphics work using the queries has
|
|
* completed.
|
|
*/
|
|
VkMemoryBarrier2 barrier = {
|
|
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
|
|
.srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
|
|
.dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
};
|
|
VkDependencyInfo barrier_info = {
|
|
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
|
.memoryBarrierCount = 1,
|
|
.pMemoryBarriers = &barrier,
|
|
};
|
|
v3dv_cmd_buffer_emit_pipeline_barrier(cmd_buffer, &barrier_info);
|
|
|
|
/* Dispatch queries */
|
|
v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
|
|
|
|
v3dv_CmdBindPipeline(vk_cmd_buffer,
|
|
VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
device->queries.avail_pipeline);
|
|
|
|
v3dv_CmdBindDescriptorSets(vk_cmd_buffer,
|
|
VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
device->queries.avail_pipeline_layout,
|
|
0, 1, &pool->meta.descriptor_set,
|
|
0, NULL);
|
|
|
|
struct {
|
|
uint32_t offset;
|
|
uint32_t query;
|
|
uint8_t availability;
|
|
} push_data = { pool->occlusion.avail_offset, query, availability };
|
|
v3dv_CmdPushConstants(vk_cmd_buffer,
|
|
device->queries.avail_pipeline_layout,
|
|
VK_SHADER_STAGE_COMPUTE_BIT,
|
|
0, sizeof(push_data), &push_data);
|
|
cmd_buffer_emit_dispatch_queries(cmd_buffer, count);
|
|
|
|
v3dv_cmd_buffer_meta_state_pop(cmd_buffer, false);
|
|
}
|
|
|
|
static void
|
|
cmd_buffer_emit_reset_occlusion_query_pool(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t query, uint32_t count)
|
|
{
|
|
struct v3dv_device *device = cmd_buffer->device;
|
|
VkCommandBuffer vk_cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
|
|
|
|
/* Ensure the GPU is done with the queries in the graphics queue before
|
|
* we reset in the compute queue.
|
|
*/
|
|
VkMemoryBarrier2 barrier = {
|
|
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
|
|
.srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
|
|
.dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
};
|
|
VkDependencyInfo barrier_info = {
|
|
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
|
.memoryBarrierCount = 1,
|
|
.pMemoryBarriers = &barrier,
|
|
};
|
|
v3dv_cmd_buffer_emit_pipeline_barrier(cmd_buffer, &barrier_info);
|
|
|
|
/* Emit compute reset */
|
|
v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
|
|
|
|
v3dv_CmdBindPipeline(vk_cmd_buffer,
|
|
VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
device->queries.reset_occlusion_pipeline);
|
|
|
|
v3dv_CmdBindDescriptorSets(vk_cmd_buffer,
|
|
VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
device->queries.reset_occlusion_pipeline_layout,
|
|
0, 1, &pool->meta.descriptor_set,
|
|
0, NULL);
|
|
struct {
|
|
uint32_t offset;
|
|
uint32_t query;
|
|
} push_data = { pool->occlusion.avail_offset, query };
|
|
v3dv_CmdPushConstants(vk_cmd_buffer,
|
|
device->queries.reset_occlusion_pipeline_layout,
|
|
VK_SHADER_STAGE_COMPUTE_BIT,
|
|
0, sizeof(push_data), &push_data);
|
|
|
|
cmd_buffer_emit_dispatch_queries(cmd_buffer, count);
|
|
|
|
v3dv_cmd_buffer_meta_state_pop(cmd_buffer, false);
|
|
|
|
/* Ensure future work in the graphics queue using the queries doesn't start
|
|
* before the reset completed.
|
|
*/
|
|
barrier = (VkMemoryBarrier2) {
|
|
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
|
|
.srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
.dstStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT,
|
|
};
|
|
barrier_info = (VkDependencyInfo) {
|
|
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
|
.memoryBarrierCount = 1,
|
|
.pMemoryBarriers = &barrier,
|
|
};
|
|
v3dv_cmd_buffer_emit_pipeline_barrier(cmd_buffer, &barrier_info);
|
|
}
|
|
|
|
static void
|
|
cmd_buffer_emit_reset_query_pool(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t first, uint32_t count)
|
|
{
|
|
assert(pool->query_type == VK_QUERY_TYPE_OCCLUSION);
|
|
cmd_buffer_emit_reset_occlusion_query_pool(cmd_buffer, pool, first, count);
|
|
}
|
|
|
|
static void
|
|
cmd_buffer_emit_reset_query_pool_cpu(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t first, uint32_t count)
|
|
{
|
|
assert(pool->query_type != VK_QUERY_TYPE_OCCLUSION);
|
|
|
|
struct v3dv_job *job =
|
|
v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
|
|
V3DV_JOB_TYPE_CPU_RESET_QUERIES,
|
|
cmd_buffer, -1);
|
|
v3dv_return_if_oom(cmd_buffer, NULL);
|
|
job->cpu.query_reset.pool = pool;
|
|
job->cpu.query_reset.first = first;
|
|
job->cpu.query_reset.count = count;
|
|
list_addtail(&job->list_link, &cmd_buffer->jobs);
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
v3dv_CmdResetQueryPool(VkCommandBuffer commandBuffer,
|
|
VkQueryPool queryPool,
|
|
uint32_t firstQuery,
|
|
uint32_t queryCount)
|
|
{
|
|
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
V3DV_FROM_HANDLE(v3dv_query_pool, pool, queryPool);
|
|
|
|
/* Resets can only happen outside a render pass instance so we should not
|
|
* be in the middle of job recording.
|
|
*/
|
|
assert(cmd_buffer->state.pass == NULL);
|
|
assert(cmd_buffer->state.job == NULL);
|
|
|
|
assert(firstQuery < pool->query_count);
|
|
assert(firstQuery + queryCount <= pool->query_count);
|
|
|
|
/* We can reset occlusion queries in the GPU, but for other query types
|
|
* we emit a CPU job that will call v3dv_reset_query_pool_cpu when executed
|
|
* in the queue.
|
|
*/
|
|
if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
|
|
cmd_buffer_emit_reset_query_pool(cmd_buffer, pool, firstQuery, queryCount);
|
|
} else {
|
|
cmd_buffer_emit_reset_query_pool_cpu(cmd_buffer, pool,
|
|
firstQuery, queryCount);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Creates a descriptor pool so we can create a descriptors for the destination
|
|
* buffers of vkCmdCopyQueryResults for queries where this is implemented in
|
|
* the GPU.
|
|
*/
|
|
static VkResult
|
|
create_storage_buffer_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
|
|
{
|
|
/* If this is not the first pool we create one for this command buffer
|
|
* size it based on the size of the currently exhausted pool.
|
|
*/
|
|
uint32_t descriptor_count = 32;
|
|
if (cmd_buffer->meta.query.dspool != VK_NULL_HANDLE) {
|
|
struct v3dv_descriptor_pool *exhausted_pool =
|
|
v3dv_descriptor_pool_from_handle(cmd_buffer->meta.query.dspool);
|
|
descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
|
|
}
|
|
|
|
/* Create the descriptor pool */
|
|
cmd_buffer->meta.query.dspool = VK_NULL_HANDLE;
|
|
VkDescriptorPoolSize pool_size = {
|
|
.type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
|
|
.descriptorCount = descriptor_count,
|
|
};
|
|
VkDescriptorPoolCreateInfo info = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
|
.maxSets = descriptor_count,
|
|
.poolSizeCount = 1,
|
|
.pPoolSizes = &pool_size,
|
|
.flags = 0,
|
|
};
|
|
VkResult result =
|
|
v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
|
|
&info,
|
|
&cmd_buffer->device->vk.alloc,
|
|
&cmd_buffer->meta.query.dspool);
|
|
|
|
if (result == VK_SUCCESS) {
|
|
assert(cmd_buffer->meta.query.dspool != VK_NULL_HANDLE);
|
|
const VkDescriptorPool vk_pool = cmd_buffer->meta.query.dspool;
|
|
|
|
v3dv_cmd_buffer_add_private_obj(
|
|
cmd_buffer, (uintptr_t) vk_pool,
|
|
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
|
|
|
|
struct v3dv_descriptor_pool *pool =
|
|
v3dv_descriptor_pool_from_handle(vk_pool);
|
|
pool->is_driver_internal = true;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static VkResult
|
|
allocate_storage_buffer_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
|
|
VkDescriptorSet *set)
|
|
{
|
|
/* Make sure we have a descriptor pool */
|
|
VkResult result;
|
|
if (cmd_buffer->meta.query.dspool == VK_NULL_HANDLE) {
|
|
result = create_storage_buffer_descriptor_pool(cmd_buffer);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
}
|
|
assert(cmd_buffer->meta.query.dspool != VK_NULL_HANDLE);
|
|
|
|
/* Allocate descriptor set */
|
|
struct v3dv_device *device = cmd_buffer->device;
|
|
VkDevice vk_device = v3dv_device_to_handle(device);
|
|
VkDescriptorSetAllocateInfo info = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
|
.descriptorPool = cmd_buffer->meta.query.dspool,
|
|
.descriptorSetCount = 1,
|
|
.pSetLayouts = &device->queries.buf_descriptor_set_layout,
|
|
};
|
|
result = v3dv_AllocateDescriptorSets(vk_device, &info, set);
|
|
|
|
/* If we ran out of pool space, grow the pool and try again */
|
|
if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
|
|
result = create_storage_buffer_descriptor_pool(cmd_buffer);
|
|
if (result == VK_SUCCESS) {
|
|
info.descriptorPool = cmd_buffer->meta.query.dspool;
|
|
result = v3dv_AllocateDescriptorSets(vk_device, &info, set);
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static uint32_t
|
|
copy_pipeline_index_from_flags(VkQueryResultFlags flags)
|
|
{
|
|
uint32_t index = 0;
|
|
if (flags & VK_QUERY_RESULT_64_BIT)
|
|
index |= 1;
|
|
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
|
index |= 2;
|
|
if (flags & VK_QUERY_RESULT_PARTIAL_BIT)
|
|
index |= 4;
|
|
assert(index < 8);
|
|
return index;
|
|
}
|
|
|
|
static nir_shader *
|
|
get_copy_query_results_cs(const nir_shader_compiler_options *compiler_options,
|
|
VkQueryResultFlags flags);
|
|
|
|
static void
|
|
cmd_buffer_emit_copy_query_pool_results(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t first, uint32_t count,
|
|
struct v3dv_buffer *buf,
|
|
uint32_t offset, uint32_t stride,
|
|
VkQueryResultFlags flags)
|
|
{
|
|
struct v3dv_device *device = cmd_buffer->device;
|
|
VkDevice vk_device = v3dv_device_to_handle(device);
|
|
VkCommandBuffer vk_cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
|
|
|
|
/* Create the required copy pipeline if not yet created */
|
|
uint32_t pipeline_idx = copy_pipeline_index_from_flags(flags);
|
|
if (!device->queries.copy_pipeline[pipeline_idx]) {
|
|
const nir_shader_compiler_options *compiler_options =
|
|
v3dv_pipeline_get_nir_options(&device->devinfo);
|
|
nir_shader *copy_query_results_cs_nir =
|
|
get_copy_query_results_cs(compiler_options, flags);
|
|
VkResult result =
|
|
v3dv_create_compute_pipeline_from_nir(
|
|
device, copy_query_results_cs_nir,
|
|
device->queries.copy_pipeline_layout,
|
|
&device->queries.copy_pipeline[pipeline_idx]);
|
|
ralloc_free(copy_query_results_cs_nir);
|
|
if (result != VK_SUCCESS) {
|
|
mesa_loge("Failed to create copy query results pipeline\n");
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* FIXME: do we need this barrier? Since vkCmdEndQuery should've been called
|
|
* and that already waits maybe we don't (since this is serialized
|
|
* in the compute queue with EndQuery anyway).
|
|
*/
|
|
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
|
|
VkMemoryBarrier2 barrier = {
|
|
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
|
|
.srcStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
|
|
.dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
|
};
|
|
VkDependencyInfo barrier_info = {
|
|
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
|
.memoryBarrierCount = 1,
|
|
.pMemoryBarriers = &barrier,
|
|
};
|
|
v3dv_cmd_buffer_emit_pipeline_barrier(cmd_buffer, &barrier_info);
|
|
}
|
|
|
|
/* Allocate and setup descriptor set for output buffer */
|
|
VkDescriptorSet out_buf_descriptor_set;
|
|
VkResult result =
|
|
allocate_storage_buffer_descriptor_set(cmd_buffer,
|
|
&out_buf_descriptor_set);
|
|
if (result != VK_SUCCESS) {
|
|
mesa_loge("vkCmdCopyQueryPoolResults failed: "
|
|
"could not allocate descriptor.\n");
|
|
return;
|
|
}
|
|
|
|
VkDescriptorBufferInfo desc_buf_info = {
|
|
.buffer = v3dv_buffer_to_handle(buf),
|
|
.offset = 0,
|
|
.range = VK_WHOLE_SIZE,
|
|
};
|
|
VkWriteDescriptorSet write = {
|
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
|
.dstSet = out_buf_descriptor_set,
|
|
.dstBinding = 0,
|
|
.dstArrayElement = 0,
|
|
.descriptorCount = 1,
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
.pBufferInfo = &desc_buf_info,
|
|
};
|
|
v3dv_UpdateDescriptorSets(vk_device, 1, &write, 0, NULL);
|
|
|
|
/* Dispatch copy */
|
|
v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
|
|
|
|
assert(device->queries.copy_pipeline[pipeline_idx]);
|
|
v3dv_CmdBindPipeline(vk_cmd_buffer,
|
|
VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
device->queries.copy_pipeline[pipeline_idx]);
|
|
|
|
VkDescriptorSet sets[2] = {
|
|
pool->meta.descriptor_set,
|
|
out_buf_descriptor_set,
|
|
};
|
|
v3dv_CmdBindDescriptorSets(vk_cmd_buffer,
|
|
VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
device->queries.copy_pipeline_layout,
|
|
0, 2, sets, 0, NULL);
|
|
|
|
struct {
|
|
uint32_t avail_offset, first, offset, stride, flags;
|
|
} push_data = { pool->occlusion.avail_offset, first, offset, stride, flags };
|
|
v3dv_CmdPushConstants(vk_cmd_buffer,
|
|
device->queries.copy_pipeline_layout,
|
|
VK_SHADER_STAGE_COMPUTE_BIT,
|
|
0, sizeof(push_data), &push_data);
|
|
|
|
cmd_buffer_emit_dispatch_queries(cmd_buffer, count);
|
|
|
|
v3dv_cmd_buffer_meta_state_pop(cmd_buffer, false);
|
|
}
|
|
|
|
static void
|
|
cmd_buffer_emit_copy_query_pool_results_cpu(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t first,
|
|
uint32_t count,
|
|
struct v3dv_buffer *dst,
|
|
uint32_t offset,
|
|
uint32_t stride,
|
|
VkQueryResultFlags flags)
|
|
{
|
|
struct v3dv_job *job =
|
|
v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
|
|
V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
|
|
cmd_buffer, -1);
|
|
v3dv_return_if_oom(cmd_buffer, NULL);
|
|
|
|
job->cpu.query_copy_results.pool = pool;
|
|
job->cpu.query_copy_results.first = first;
|
|
job->cpu.query_copy_results.count = count;
|
|
job->cpu.query_copy_results.dst = dst;
|
|
job->cpu.query_copy_results.offset = offset;
|
|
job->cpu.query_copy_results.stride = stride;
|
|
job->cpu.query_copy_results.flags = flags;
|
|
|
|
list_addtail(&job->list_link, &cmd_buffer->jobs);
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
v3dv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
|
|
VkQueryPool queryPool,
|
|
uint32_t firstQuery,
|
|
uint32_t queryCount,
|
|
VkBuffer dstBuffer,
|
|
VkDeviceSize dstOffset,
|
|
VkDeviceSize stride,
|
|
VkQueryResultFlags flags)
|
|
{
|
|
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
V3DV_FROM_HANDLE(v3dv_query_pool, pool, queryPool);
|
|
V3DV_FROM_HANDLE(v3dv_buffer, dst, dstBuffer);
|
|
|
|
/* Copies can only happen outside a render pass instance so we should not
|
|
* be in the middle of job recording.
|
|
*/
|
|
assert(cmd_buffer->state.pass == NULL);
|
|
assert(cmd_buffer->state.job == NULL);
|
|
|
|
assert(firstQuery < pool->query_count);
|
|
assert(firstQuery + queryCount <= pool->query_count);
|
|
|
|
/* For occlusion queries we implement the copy in the GPU but for other
|
|
* queries we emit a CPU job that will call v3dv_get_query_pool_results_cpu
|
|
* when executed in the queue.
|
|
*/
|
|
if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
|
|
cmd_buffer_emit_copy_query_pool_results(cmd_buffer, pool,
|
|
firstQuery, queryCount,
|
|
dst, (uint32_t) dstOffset,
|
|
(uint32_t) stride, flags);
|
|
} else {
|
|
cmd_buffer_emit_copy_query_pool_results_cpu(cmd_buffer, pool,
|
|
firstQuery, queryCount,
|
|
dst, (uint32_t)dstOffset,
|
|
(uint32_t) stride, flags);
|
|
}
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
v3dv_CmdBeginQuery(VkCommandBuffer commandBuffer,
|
|
VkQueryPool queryPool,
|
|
uint32_t query,
|
|
VkQueryControlFlags flags)
|
|
{
|
|
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
V3DV_FROM_HANDLE(v3dv_query_pool, pool, queryPool);
|
|
|
|
v3dv_cmd_buffer_begin_query(cmd_buffer, pool, query, flags);
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
v3dv_CmdEndQuery(VkCommandBuffer commandBuffer,
|
|
VkQueryPool queryPool,
|
|
uint32_t query)
|
|
{
|
|
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
V3DV_FROM_HANDLE(v3dv_query_pool, pool, queryPool);
|
|
|
|
v3dv_cmd_buffer_end_query(cmd_buffer, pool, query);
|
|
}
|
|
|
|
void
|
|
v3dv_reset_query_pool_cpu(struct v3dv_device *device,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t first,
|
|
uint32_t count)
|
|
{
|
|
mtx_lock(&device->query_mutex);
|
|
|
|
if (pool->query_type == VK_QUERY_TYPE_TIMESTAMP) {
|
|
assert(first + count <= pool->query_count);
|
|
|
|
/* Reset timestamp */
|
|
uint8_t *base_addr;
|
|
base_addr = ((uint8_t *) pool->timestamp.bo->map) +
|
|
pool->queries[first].timestamp.offset;
|
|
memset(base_addr, 0, 8 * count);
|
|
|
|
for (uint32_t i = first; i < first + count; i++) {
|
|
if (vk_sync_reset(&device->vk, pool->queries[i].timestamp.sync) != VK_SUCCESS)
|
|
mesa_loge("Failed to reset sync");
|
|
}
|
|
|
|
mtx_unlock(&device->query_mutex);
|
|
return;
|
|
}
|
|
|
|
for (uint32_t i = first; i < first + count; i++) {
|
|
assert(i < pool->query_count);
|
|
struct v3dv_query *q = &pool->queries[i];
|
|
q->maybe_available = false;
|
|
switch (pool->query_type) {
|
|
case VK_QUERY_TYPE_OCCLUSION: {
|
|
/* Reset availability */
|
|
uint8_t *base_addr = ((uint8_t *) pool->occlusion.bo->map) +
|
|
pool->occlusion.avail_offset + first;
|
|
memset(base_addr, 0, count);
|
|
|
|
/* Reset occlusion counter */
|
|
const uint8_t *q_addr =
|
|
((uint8_t *) pool->occlusion.bo->map) + q->occlusion.offset;
|
|
uint32_t *counter = (uint32_t *) q_addr;
|
|
*counter = 0;
|
|
break;
|
|
}
|
|
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
|
|
kperfmon_destroy(device, pool, i);
|
|
kperfmon_create(device, pool, i);
|
|
if (vk_sync_reset(&device->vk, q->perf.last_job_sync) != VK_SUCCESS)
|
|
mesa_loge("Failed to reset sync");
|
|
break;
|
|
default:
|
|
unreachable("Unsupported query type");
|
|
}
|
|
}
|
|
|
|
mtx_unlock(&device->query_mutex);
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
v3dv_ResetQueryPool(VkDevice _device,
|
|
VkQueryPool queryPool,
|
|
uint32_t firstQuery,
|
|
uint32_t queryCount)
|
|
{
|
|
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
|
V3DV_FROM_HANDLE(v3dv_query_pool, pool, queryPool);
|
|
|
|
v3dv_reset_query_pool_cpu(device, pool, firstQuery, queryCount);
|
|
}
|
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
|
v3dv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
|
|
VkPhysicalDevice physicalDevice,
|
|
uint32_t queueFamilyIndex,
|
|
uint32_t *pCounterCount,
|
|
VkPerformanceCounterKHR *pCounters,
|
|
VkPerformanceCounterDescriptionKHR *pCounterDescriptions)
|
|
{
|
|
V3DV_FROM_HANDLE(v3dv_physical_device, pDevice, physicalDevice);
|
|
|
|
return v3dv_X(pDevice, enumerate_performance_query_counters)(pDevice,
|
|
pCounterCount,
|
|
pCounters,
|
|
pCounterDescriptions);
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
v3dv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
|
|
VkPhysicalDevice physicalDevice,
|
|
const VkQueryPoolPerformanceCreateInfoKHR *pPerformanceQueryCreateInfo,
|
|
uint32_t *pNumPasses)
|
|
{
|
|
*pNumPasses = DIV_ROUND_UP(pPerformanceQueryCreateInfo->counterIndexCount,
|
|
DRM_V3D_MAX_PERF_COUNTERS);
|
|
}
|
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
|
v3dv_AcquireProfilingLockKHR(
|
|
VkDevice _device,
|
|
const VkAcquireProfilingLockInfoKHR *pInfo)
|
|
{
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
v3dv_ReleaseProfilingLockKHR(VkDevice device)
|
|
{
|
|
}
|
|
|
|
static inline void
|
|
nir_set_query_availability(nir_builder *b,
|
|
nir_def *buf,
|
|
nir_def *offset,
|
|
nir_def *query_idx,
|
|
nir_def *avail)
|
|
{
|
|
offset = nir_iadd(b, offset, query_idx); /* we use 1B per query */
|
|
nir_store_ssbo(b, avail, buf, offset, .write_mask = 0x1, .align_mul = 1);
|
|
}
|
|
|
|
static inline nir_def *
|
|
nir_get_query_availability(nir_builder *b,
|
|
nir_def *buf,
|
|
nir_def *offset,
|
|
nir_def *query_idx)
|
|
{
|
|
offset = nir_iadd(b, offset, query_idx); /* we use 1B per query */
|
|
nir_def *avail = nir_load_ssbo(b, 1, 8, buf, offset, .align_mul = 1);
|
|
return nir_i2i32(b, avail);
|
|
}
|
|
|
|
static nir_shader *
|
|
get_set_query_availability_cs(const nir_shader_compiler_options *options)
|
|
{
|
|
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
|
|
"set query availability cs");
|
|
|
|
nir_def *buf =
|
|
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
|
|
.desc_set = 0,
|
|
.binding = 0,
|
|
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
|
|
|
|
/* This assumes a local size of 1 and a horizontal-only dispatch. If we
|
|
* ever change any of these parameters we need to update how we compute the
|
|
* query index here.
|
|
*/
|
|
nir_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b), 0);
|
|
|
|
nir_def *offset =
|
|
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
|
|
|
|
nir_def *query_idx =
|
|
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 4, .range = 4);
|
|
|
|
nir_def *avail =
|
|
nir_load_push_constant(&b, 1, 8, nir_imm_int(&b, 0), .base = 8, .range = 1);
|
|
|
|
query_idx = nir_iadd(&b, query_idx, wg_id);
|
|
nir_set_query_availability(&b, buf, offset, query_idx, avail);
|
|
|
|
return b.shader;
|
|
}
|
|
|
|
static inline nir_def *
|
|
nir_get_occlusion_counter_offset(nir_builder *b, nir_def *query_idx)
|
|
{
|
|
nir_def *query_group = nir_udiv_imm(b, query_idx, 16);
|
|
nir_def *query_group_offset = nir_umod_imm(b, query_idx, 16);
|
|
nir_def *offset =
|
|
nir_iadd(b, nir_imul_imm(b, query_group, 1024),
|
|
nir_imul_imm(b, query_group_offset, 4));
|
|
return offset;
|
|
}
|
|
|
|
static inline void
|
|
nir_reset_occlusion_counter(nir_builder *b,
|
|
nir_def *buf,
|
|
nir_def *query_idx)
|
|
{
|
|
nir_def *offset = nir_get_occlusion_counter_offset(b, query_idx);
|
|
nir_def *zero = nir_imm_int(b, 0);
|
|
nir_store_ssbo(b, zero, buf, offset, .write_mask = 0x1, .align_mul = 4);
|
|
}
|
|
|
|
static inline nir_def *
|
|
nir_read_occlusion_counter(nir_builder *b,
|
|
nir_def *buf,
|
|
nir_def *query_idx)
|
|
{
|
|
nir_def *offset = nir_get_occlusion_counter_offset(b, query_idx);
|
|
return nir_load_ssbo(b, 1, 32, buf, offset, .access = 0, .align_mul = 4);
|
|
}
|
|
|
|
static nir_shader *
|
|
get_reset_occlusion_query_cs(const nir_shader_compiler_options *options)
|
|
{
|
|
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
|
|
"reset occlusion query cs");
|
|
|
|
nir_def *buf =
|
|
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
|
|
.desc_set = 0,
|
|
.binding = 0,
|
|
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
|
|
|
|
/* This assumes a local size of 1 and a horizontal-only dispatch. If we
|
|
* ever change any of these parameters we need to update how we compute the
|
|
* query index here.
|
|
*/
|
|
nir_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b), 0);
|
|
|
|
nir_def *avail_offset =
|
|
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
|
|
|
|
nir_def *base_query_idx =
|
|
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 4, .range = 4);
|
|
|
|
nir_def *query_idx = nir_iadd(&b, base_query_idx, wg_id);
|
|
|
|
nir_set_query_availability(&b, buf, avail_offset, query_idx,
|
|
nir_imm_intN_t(&b, 0, 8));
|
|
nir_reset_occlusion_counter(&b, buf, query_idx);
|
|
|
|
return b.shader;
|
|
}
|
|
|
|
static void
|
|
write_query_buffer(nir_builder *b,
|
|
nir_def *buf,
|
|
nir_def **offset,
|
|
nir_def *value,
|
|
bool flag_64bit)
|
|
{
|
|
if (flag_64bit) {
|
|
/* Create a 64-bit value using a vec2 with the .Y component set to 0
|
|
* so we can write a 64-bit value in a single store.
|
|
*/
|
|
nir_def *value64 = nir_vec2(b, value, nir_imm_int(b, 0));
|
|
nir_store_ssbo(b, value64, buf, *offset, .write_mask = 0x3, .align_mul = 8);
|
|
*offset = nir_iadd_imm(b, *offset, 8);
|
|
} else {
|
|
nir_store_ssbo(b, value, buf, *offset, .write_mask = 0x1, .align_mul = 4);
|
|
*offset = nir_iadd_imm(b, *offset, 4);
|
|
}
|
|
}
|
|
|
|
static nir_shader *
|
|
get_copy_query_results_cs(const nir_shader_compiler_options *options,
|
|
VkQueryResultFlags flags)
|
|
{
|
|
bool flag_64bit = flags & VK_QUERY_RESULT_64_BIT;
|
|
bool flag_avail = flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT;
|
|
bool flag_partial = flags & VK_QUERY_RESULT_PARTIAL_BIT;
|
|
|
|
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
|
|
"copy query results cs");
|
|
|
|
nir_def *buf =
|
|
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
|
|
.desc_set = 0,
|
|
.binding = 0,
|
|
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
|
|
|
|
nir_def *buf_out =
|
|
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
|
|
.desc_set = 1,
|
|
.binding = 0,
|
|
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
|
|
|
|
/* Read push constants */
|
|
nir_def *avail_offset =
|
|
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
|
|
|
|
nir_def *base_query_idx =
|
|
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 4, .range = 4);
|
|
|
|
nir_def *base_offset_out =
|
|
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 8, .range = 4);
|
|
|
|
nir_def *stride =
|
|
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 12, .range = 4);
|
|
|
|
/* This assumes a local size of 1 and a horizontal-only dispatch. If we
|
|
* ever change any of these parameters we need to update how we compute the
|
|
* query index here.
|
|
*/
|
|
nir_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b), 0);
|
|
nir_def *query_idx = nir_iadd(&b, base_query_idx, wg_id);
|
|
|
|
/* Read query availability if needed */
|
|
nir_def *avail = NULL;
|
|
if (flag_avail || !flag_partial)
|
|
avail = nir_get_query_availability(&b, buf, avail_offset, query_idx);
|
|
|
|
/* Write occusion query result... */
|
|
nir_def *offset =
|
|
nir_iadd(&b, base_offset_out, nir_imul(&b, wg_id, stride));
|
|
|
|
/* ...if partial is requested, we always write */
|
|
if(flag_partial) {
|
|
nir_def *query_res = nir_read_occlusion_counter(&b, buf, query_idx);
|
|
write_query_buffer(&b, buf_out, &offset, query_res, flag_64bit);
|
|
} else {
|
|
/*...otherwise, we only write if the query is available */
|
|
nir_if *if_stmt = nir_push_if(&b, nir_ine_imm(&b, avail, 0));
|
|
nir_def *query_res = nir_read_occlusion_counter(&b, buf, query_idx);
|
|
write_query_buffer(&b, buf_out, &offset, query_res, flag_64bit);
|
|
nir_pop_if(&b, if_stmt);
|
|
}
|
|
|
|
/* Write query availability */
|
|
if (flag_avail)
|
|
write_query_buffer(&b, buf_out, &offset, avail, flag_64bit);
|
|
|
|
return b.shader;
|
|
}
|
|
|
|
static bool
|
|
create_query_pipelines(struct v3dv_device *device)
|
|
{
|
|
VkResult result;
|
|
VkPipeline pipeline;
|
|
|
|
/* Set layout: single storage buffer */
|
|
if (!device->queries.buf_descriptor_set_layout) {
|
|
VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
|
|
.binding = 0,
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
.descriptorCount = 1,
|
|
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
};
|
|
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
|
.bindingCount = 1,
|
|
.pBindings = &descriptor_set_layout_binding,
|
|
};
|
|
result =
|
|
v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
|
|
&descriptor_set_layout_info,
|
|
&device->vk.alloc,
|
|
&device->queries.buf_descriptor_set_layout);
|
|
if (result != VK_SUCCESS)
|
|
return false;
|
|
}
|
|
|
|
/* Set availability pipeline.
|
|
*
|
|
* Pipeline layout:
|
|
* - 1 storage buffer for the BO with the query availability.
|
|
* - 2 push constants:
|
|
* 0B: offset of the availability info in the buffer (4 bytes)
|
|
* 4B: base query index (4 bytes).
|
|
* 8B: availability (1 byte).
|
|
*/
|
|
if (!device->queries.avail_pipeline_layout) {
|
|
VkPipelineLayoutCreateInfo pipeline_layout_info = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
|
.setLayoutCount = 1,
|
|
.pSetLayouts = &device->queries.buf_descriptor_set_layout,
|
|
.pushConstantRangeCount = 1,
|
|
.pPushConstantRanges =
|
|
&(VkPushConstantRange) { VK_SHADER_STAGE_COMPUTE_BIT, 0, 9 },
|
|
};
|
|
|
|
result =
|
|
v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
|
|
&pipeline_layout_info,
|
|
&device->vk.alloc,
|
|
&device->queries.avail_pipeline_layout);
|
|
|
|
if (result != VK_SUCCESS)
|
|
return false;
|
|
}
|
|
|
|
const nir_shader_compiler_options *compiler_options =
|
|
v3dv_pipeline_get_nir_options(&device->devinfo);
|
|
|
|
if (!device->queries.avail_pipeline) {
|
|
nir_shader *set_query_availability_cs_nir =
|
|
get_set_query_availability_cs(compiler_options);
|
|
result = v3dv_create_compute_pipeline_from_nir(device,
|
|
set_query_availability_cs_nir,
|
|
device->queries.avail_pipeline_layout,
|
|
&pipeline);
|
|
ralloc_free(set_query_availability_cs_nir);
|
|
if (result != VK_SUCCESS)
|
|
return false;
|
|
|
|
device->queries.avail_pipeline = pipeline;
|
|
}
|
|
|
|
/* Reset occlusion query pipeline.
|
|
*
|
|
* Pipeline layout:
|
|
* - 1 storage buffer for the BO with the occlusion and availability data.
|
|
* - Push constants:
|
|
* 0B: offset of the availability info in the buffer (4B)
|
|
* 4B: base query index (4B)
|
|
*/
|
|
if (!device->queries.reset_occlusion_pipeline_layout) {
|
|
VkPipelineLayoutCreateInfo pipeline_layout_info = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
|
.setLayoutCount = 1,
|
|
.pSetLayouts = &device->queries.buf_descriptor_set_layout,
|
|
.pushConstantRangeCount = 1,
|
|
.pPushConstantRanges =
|
|
&(VkPushConstantRange) { VK_SHADER_STAGE_COMPUTE_BIT, 0, 8 },
|
|
};
|
|
|
|
result =
|
|
v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
|
|
&pipeline_layout_info,
|
|
&device->vk.alloc,
|
|
&device->queries.reset_occlusion_pipeline_layout);
|
|
|
|
if (result != VK_SUCCESS)
|
|
return false;
|
|
}
|
|
|
|
if (!device->queries.reset_occlusion_pipeline) {
|
|
nir_shader *reset_occlusion_query_cs_nir =
|
|
get_reset_occlusion_query_cs(compiler_options);
|
|
result = v3dv_create_compute_pipeline_from_nir(
|
|
device,
|
|
reset_occlusion_query_cs_nir,
|
|
device->queries.reset_occlusion_pipeline_layout,
|
|
&pipeline);
|
|
ralloc_free(reset_occlusion_query_cs_nir);
|
|
if (result != VK_SUCCESS)
|
|
return false;
|
|
|
|
device->queries.reset_occlusion_pipeline = pipeline;
|
|
}
|
|
|
|
/* Copy query results pipelines.
|
|
*
|
|
* Pipeline layout:
|
|
* - 1 storage buffer for the BO with the query availability and occlusion.
|
|
* - 1 storage buffer for the output.
|
|
* - Push constants:
|
|
* 0B: offset of the availability info in the buffer (4B)
|
|
* 4B: base query index (4B)
|
|
* 8B: offset into output buffer (4B)
|
|
* 12B: stride (4B)
|
|
*
|
|
* We create multiple specialized pipelines depending on the copy flags
|
|
* to remove conditionals from the copy shader and get more optimized
|
|
* pipelines.
|
|
*/
|
|
if (!device->queries.copy_pipeline_layout) {
|
|
VkDescriptorSetLayout set_layouts[2] = {
|
|
device->queries.buf_descriptor_set_layout,
|
|
device->queries.buf_descriptor_set_layout
|
|
};
|
|
VkPipelineLayoutCreateInfo pipeline_layout_info = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
|
.setLayoutCount = 2,
|
|
.pSetLayouts = set_layouts,
|
|
.pushConstantRangeCount = 1,
|
|
.pPushConstantRanges =
|
|
&(VkPushConstantRange) { VK_SHADER_STAGE_COMPUTE_BIT, 0, 16 },
|
|
};
|
|
|
|
result =
|
|
v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
|
|
&pipeline_layout_info,
|
|
&device->vk.alloc,
|
|
&device->queries.copy_pipeline_layout);
|
|
|
|
if (result != VK_SUCCESS)
|
|
return false;
|
|
}
|
|
|
|
/* Actual copy pipelines are created lazily on demand since there can be up
|
|
* to 8 depending on the flags used, however it is likely that applications
|
|
* will use the same flags every time and only one pipeline is required.
|
|
*/
|
|
|
|
return true;
|
|
}
|
|
|
|
static void
|
|
destroy_query_pipelines(struct v3dv_device *device)
|
|
{
|
|
VkDevice _device = v3dv_device_to_handle(device);
|
|
|
|
/* Availability pipeline */
|
|
v3dv_DestroyPipeline(_device, device->queries.avail_pipeline,
|
|
&device->vk.alloc);
|
|
device->queries.avail_pipeline = VK_NULL_HANDLE;
|
|
v3dv_DestroyPipelineLayout(_device, device->queries.avail_pipeline_layout,
|
|
&device->vk.alloc);
|
|
device->queries.avail_pipeline_layout = VK_NULL_HANDLE;
|
|
|
|
/* Reset occlusion pipeline */
|
|
v3dv_DestroyPipeline(_device, device->queries.reset_occlusion_pipeline,
|
|
&device->vk.alloc);
|
|
device->queries.reset_occlusion_pipeline = VK_NULL_HANDLE;
|
|
v3dv_DestroyPipelineLayout(_device,
|
|
device->queries.reset_occlusion_pipeline_layout,
|
|
&device->vk.alloc);
|
|
device->queries.reset_occlusion_pipeline_layout = VK_NULL_HANDLE;
|
|
|
|
/* Copy pipelines */
|
|
for (int i = 0; i < 8; i++) {
|
|
v3dv_DestroyPipeline(_device, device->queries.copy_pipeline[i],
|
|
&device->vk.alloc);
|
|
device->queries.copy_pipeline[i] = VK_NULL_HANDLE;
|
|
}
|
|
v3dv_DestroyPipelineLayout(_device, device->queries.copy_pipeline_layout,
|
|
&device->vk.alloc);
|
|
device->queries.copy_pipeline_layout = VK_NULL_HANDLE;
|
|
|
|
v3dv_DestroyDescriptorSetLayout(_device,
|
|
device->queries.buf_descriptor_set_layout,
|
|
&device->vk.alloc);
|
|
device->queries.buf_descriptor_set_layout = VK_NULL_HANDLE;
|
|
}
|
|
|
|
/**
|
|
* Allocates device resources for implementing certain types of queries.
|
|
*/
|
|
VkResult
|
|
v3dv_query_allocate_resources(struct v3dv_device *device)
|
|
{
|
|
if (!create_query_pipelines(device))
|
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
void
|
|
v3dv_query_free_resources(struct v3dv_device *device)
|
|
{
|
|
destroy_query_pipelines(device);
|
|
}
|