venus: add event feedback

- add perf option VN_PERF_NO_EVENT_FEEDBACK
- intercept to record feedback cmds for:
  - vkCmdSetEvent
  - vkCmdResetEvent
- add feedback code path for
  - vkGetEventStatus
  - vkSetEvent
  - vkResetEvent

Test: dEQP-VK.synchronization.basic.event.*
Test: dEQP-VK.api.command_buffers.record_simul*

Signed-off-by: Yiwei Zhang <zzyiwei@chromium.org>
Reviewed-by: Ryan Neph <ryanneph@google.com>
Reviewed-by: Chad Versace <chadversary@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16731>
This commit is contained in:
Yiwei Zhang 2022-05-24 16:31:20 +00:00 committed by Marge Bot
parent f2563788a1
commit 942ec179f3
9 changed files with 181 additions and 9 deletions

View file

@ -1141,6 +1141,9 @@ vn_CmdSetEvent(VkCommandBuffer commandBuffer,
VkPipelineStageFlags stageMask)
{
VN_CMD_ENQUEUE(vkCmdSetEvent, commandBuffer, event, stageMask);
vn_feedback_event_cmd_record(commandBuffer, event, stageMask,
VK_EVENT_SET);
}
void
@ -1149,6 +1152,9 @@ vn_CmdResetEvent(VkCommandBuffer commandBuffer,
VkPipelineStageFlags stageMask)
{
VN_CMD_ENQUEUE(vkCmdResetEvent, commandBuffer, event, stageMask);
vn_feedback_event_cmd_record(commandBuffer, event, stageMask,
VK_EVENT_RESET);
}
void

View file

@ -34,6 +34,7 @@ static const struct debug_control vn_perf_options[] = {
{ "no_async_set_alloc", VN_PERF_NO_ASYNC_SET_ALLOC },
{ "no_async_buffer_create", VN_PERF_NO_ASYNC_BUFFER_CREATE },
{ "no_async_queue_submit", VN_PERF_NO_ASYNC_QUEUE_SUBMIT },
{ "no_event_feedback", VN_PERF_NO_EVENT_FEEDBACK },
{ NULL, 0 },
};

View file

@ -146,6 +146,7 @@ enum vn_perf {
VN_PERF_NO_ASYNC_SET_ALLOC = 1ull << 0,
VN_PERF_NO_ASYNC_BUFFER_CREATE = 1ull << 1,
VN_PERF_NO_ASYNC_QUEUE_SUBMIT = 1ull << 2,
VN_PERF_NO_EVENT_FEEDBACK = 1ull << 3,
};
typedef uint64_t vn_object_id;

View file

@ -300,6 +300,31 @@ vn_device_fix_create_info(const struct vn_device *dev,
return local_info;
}
static inline VkResult
vn_device_feedback_pool_init(struct vn_device *dev)
{
/* The feedback pool defaults to suballocate slots of 8 bytes each. Initial
* pool size of 4096 corresponds to a total of 512 fences, semaphores and
* events, which well covers the common scenarios. Pool can grow anyway.
*/
static const uint32_t pool_size = 4096;
const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
if (VN_PERF(NO_EVENT_FEEDBACK))
return VK_SUCCESS;
return vn_feedback_pool_init(dev, &dev->feedback_pool, pool_size, alloc);
}
static inline void
vn_device_feedback_pool_fini(struct vn_device *dev)
{
if (VN_PERF(NO_EVENT_FEEDBACK))
return;
vn_feedback_pool_fini(&dev->feedback_pool);
}
static VkResult
vn_device_init(struct vn_device *dev,
struct vn_physical_device *physical_dev,
@ -346,12 +371,19 @@ vn_device_init(struct vn_device *dev,
if (result != VK_SUCCESS)
goto out_memory_pool_fini;
result = vn_device_init_queues(dev, create_info);
result = vn_device_feedback_pool_init(dev);
if (result != VK_SUCCESS)
goto out_buffer_cache_fini;
result = vn_device_init_queues(dev, create_info);
if (result != VK_SUCCESS)
goto out_feedback_pool_fini;
return VK_SUCCESS;
out_feedback_pool_fini:
vn_device_feedback_pool_fini(dev);
out_buffer_cache_fini:
vn_buffer_cache_fini(dev);
@ -423,6 +455,8 @@ vn_DestroyDevice(VkDevice device, const VkAllocationCallbacks *pAllocator)
for (uint32_t i = 0; i < dev->queue_count; i++)
vn_queue_fini(&dev->queues[i]);
vn_device_feedback_pool_fini(dev);
vn_buffer_cache_fini(dev);
for (uint32_t i = 0; i < ARRAY_SIZE(dev->memory_pools); i++)

View file

@ -15,6 +15,7 @@
#include "vn_buffer.h"
#include "vn_device_memory.h"
#include "vn_feedback.h"
struct vn_device {
struct vn_device_base base;
@ -31,6 +32,8 @@ struct vn_device {
struct vn_buffer_cache buffer_cache;
struct vn_feedback_pool feedback_pool;
struct vn_queue *queues;
uint32_t queue_count;
};

View file

@ -7,6 +7,7 @@
#include "vn_device.h"
#include "vn_physical_device.h"
#include "vn_queue.h"
/* coherent buffer with bound and mapped memory */
struct vn_feedback_buffer {
@ -263,3 +264,61 @@ vn_feedback_pool_free(struct vn_feedback_pool *pool,
list_add(&slot->head, &pool->free_slots);
simple_mtx_unlock(&pool->mutex);
}
void
vn_feedback_event_cmd_record(VkCommandBuffer cmd_handle,
VkEvent ev_handle,
VkPipelineStageFlags stage_mask,
VkResult status)
{
/* For vkCmdSetEvent and vkCmdResetEvent feedback interception.
*
* The injection point is after the event call to avoid introducing
* unexpected src stage waiting for VK_PIPELINE_STAGE_HOST_BIT and
* VK_PIPELINE_STAGE_TRANSFER_BIT if they are not already being waited by
* vkCmdSetEvent or vkCmdResetEvent. On the other hand, the delay in the
* feedback signal is acceptable for the nature of VkEvent, and the event
* feedback cmds lifecycle is guarded by the intercepted command buffer.
*/
struct vn_event *ev = vn_event_from_handle(ev_handle);
struct vn_feedback_slot *slot = ev->feedback_slot;
if (!slot)
return;
STATIC_ASSERT(sizeof(*slot->status) == 4);
const VkBufferMemoryBarrier buf_barrier_before = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = NULL,
.srcAccessMask =
VK_ACCESS_HOST_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = slot->buffer,
.offset = slot->offset,
.size = 4,
};
vn_CmdPipelineBarrier(cmd_handle,
stage_mask | VK_PIPELINE_STAGE_HOST_BIT |
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
&buf_barrier_before, 0, NULL);
vn_CmdFillBuffer(cmd_handle, slot->buffer, slot->offset, 4, status);
const VkBufferMemoryBarrier buf_barrier_after = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = NULL,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = slot->buffer,
.offset = slot->offset,
.size = 4,
};
vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1,
&buf_barrier_after, 0, NULL);
}

View file

@ -101,4 +101,10 @@ vn_feedback_set_counter(struct vn_feedback_slot *slot, uint64_t counter)
*slot->counter = counter;
}
void
vn_feedback_event_cmd_record(VkCommandBuffer cmd_handle,
VkEvent ev_handle,
VkPipelineStageFlags stage_mask,
VkResult status);
#endif /* VN_FEEDBACK_H */

View file

@ -1030,6 +1030,33 @@ vn_GetSemaphoreFdKHR(VkDevice device,
/* event commands */
static VkResult
vn_event_feedback_init(struct vn_device *dev, struct vn_event *ev)
{
struct vn_feedback_slot *slot;
if (VN_PERF(NO_EVENT_FEEDBACK))
return VK_SUCCESS;
slot = vn_feedback_pool_alloc(&dev->feedback_pool, VN_FEEDBACK_TYPE_EVENT);
if (!slot)
return VK_ERROR_OUT_OF_HOST_MEMORY;
/* newly created event object is in the unsignaled state */
vn_feedback_set_status(slot, VK_EVENT_RESET);
ev->feedback_slot = slot;
return VK_SUCCESS;
}
static inline void
vn_event_feedback_fini(struct vn_device *dev, struct vn_event *ev)
{
if (ev->feedback_slot)
vn_feedback_pool_free(&dev->feedback_pool, ev->feedback_slot);
}
VkResult
vn_CreateEvent(VkDevice device,
const VkEventCreateInfo *pCreateInfo,
@ -1047,6 +1074,13 @@ vn_CreateEvent(VkDevice device,
vn_object_base_init(&ev->base, VK_OBJECT_TYPE_EVENT, &dev->base);
/* feedback is only needed to speed up host operations */
if (!(pCreateInfo->flags & VK_EVENT_CREATE_DEVICE_ONLY_BIT)) {
VkResult result = vn_event_feedback_init(dev, ev);
if (result != VK_SUCCESS)
return vn_error(dev->instance, result);
}
VkEvent ev_handle = vn_event_to_handle(ev);
vn_async_vkCreateEvent(dev->instance, device, pCreateInfo, NULL,
&ev_handle);
@ -1071,6 +1105,8 @@ vn_DestroyEvent(VkDevice device,
vn_async_vkDestroyEvent(dev->instance, device, event, NULL);
vn_event_feedback_fini(dev, ev);
vn_object_base_fini(&ev->base);
vk_free(alloc, ev);
}
@ -1079,11 +1115,13 @@ VkResult
vn_GetEventStatus(VkDevice device, VkEvent event)
{
struct vn_device *dev = vn_device_from_handle(device);
struct vn_event *ev = vn_event_from_handle(event);
VkResult result;
/* TODO When the renderer supports it (requires a new vk extension), there
* should be a coherent memory backing the event.
*/
VkResult result = vn_call_vkGetEventStatus(dev->instance, device, event);
if (ev->feedback_slot)
result = vn_feedback_get_status(ev->feedback_slot);
else
result = vn_call_vkGetEventStatus(dev->instance, device, event);
return vn_result(dev->instance, result);
}
@ -1092,18 +1130,34 @@ VkResult
vn_SetEvent(VkDevice device, VkEvent event)
{
struct vn_device *dev = vn_device_from_handle(device);
struct vn_event *ev = vn_event_from_handle(event);
VkResult result = vn_call_vkSetEvent(dev->instance, device, event);
if (ev->feedback_slot) {
vn_feedback_set_status(ev->feedback_slot, VK_EVENT_SET);
vn_async_vkSetEvent(dev->instance, device, event);
} else {
VkResult result = vn_call_vkSetEvent(dev->instance, device, event);
if (result != VK_SUCCESS)
return vn_error(dev->instance, result);
}
return vn_result(dev->instance, result);
return VK_SUCCESS;
}
VkResult
vn_ResetEvent(VkDevice device, VkEvent event)
{
struct vn_device *dev = vn_device_from_handle(device);
struct vn_event *ev = vn_event_from_handle(event);
VkResult result = vn_call_vkResetEvent(dev->instance, device, event);
if (ev->feedback_slot) {
vn_feedback_reset_status(ev->feedback_slot);
vn_async_vkResetEvent(dev->instance, device, event);
} else {
VkResult result = vn_call_vkResetEvent(dev->instance, device, event);
if (result != VK_SUCCESS)
return vn_error(dev->instance, result);
}
return vn_result(dev->instance, result);
return VK_SUCCESS;
}

View file

@ -13,6 +13,8 @@
#include "vn_common.h"
#include "vn_feedback.h"
struct vn_queue {
struct vn_object_base base;
@ -72,6 +74,12 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(vn_semaphore,
struct vn_event {
struct vn_object_base base;
/* non-NULL if below are satisfied:
* - event is created without VK_EVENT_CREATE_DEVICE_ONLY_BIT
* - VN_PERF_NO_EVENT_FEEDBACK is disabled
*/
struct vn_feedback_slot *feedback_slot;
};
VK_DEFINE_NONDISP_HANDLE_CASTS(vn_event,
base.base,