venus: add timeline semaphore feedback cmds

Unlike fence feedback, commands to update timeline semaphore feedback
slots can't be fully pre-recorded because of the counter value input
for signaling timeline semaphores. To avoid fully recording commands
during vkQueueSubmit, pre-record commands that write a counter value
from a feedback "src" slot to the feedback "dst" slot. Then at
vkQueueSubmit, parse the signal semaphores and write the signal counter
value in the feedback src slot and append the command that writes from
that feedback src slot offset to the command buffer associated with the
signal semaphore.

Signed-off-by: Juston Li <justonli@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20500>
This commit is contained in:
Juston Li 2023-02-01 10:58:15 -08:00 committed by Marge Bot
parent 56c6e4862a
commit 067cda659a
3 changed files with 74 additions and 36 deletions

View file

@ -59,7 +59,12 @@ vn_feedback_buffer_create(struct vn_device *dev,
const VkBufferCreateInfo buf_create_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.size = size,
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
/* Feedback for fences and timeline semaphores will write to this buffer
* as a DST when signalling. Timeline semaphore feedback will also read
* from this buffer as a SRC to retrieve the counter value to signal.
*/
.usage =
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode =
exclusive ? VK_SHARING_MODE_EXCLUSIVE : VK_SHARING_MODE_CONCURRENT,
/* below favors the current venus protocol */
@ -402,11 +407,18 @@ vn_feedback_event_cmd_record(VkCommandBuffer cmd_handle,
}
static VkResult
vn_feedback_fence_cmd_record(VkCommandBuffer cmd_handle,
struct vn_feedback_slot *slot)
vn_feedback_cmd_record(VkCommandBuffer cmd_handle,
struct vn_feedback_slot *dst_slot,
struct vn_feedback_slot *src_slot)
{
STATIC_ASSERT(sizeof(*slot->status) == 4);
STATIC_ASSERT(sizeof(*dst_slot->status) == 4);
STATIC_ASSERT(sizeof(*dst_slot->counter) == 8);
STATIC_ASSERT(sizeof(*src_slot->counter) == 8);
/* slot size is 8 bytes for timeline semaphore and 4 bytes fence.
* src slot is non-null for timeline semaphore.
*/
VkDeviceSize buf_size = src_slot ? 8 : 4;
static const VkCommandBufferBeginInfo begin_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
@ -421,11 +433,12 @@ vn_feedback_fence_cmd_record(VkCommandBuffer cmd_handle,
static const VkMemoryBarrier mem_barrier_before = {
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = NULL,
/* make pending writes available to stay close to fence signal op */
/* make pending writes available to stay close to signal op */
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
/* no need to make all memory visible for feedback update */
.dstAccessMask = 0,
};
const VkBufferMemoryBarrier buf_barrier_before = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = NULL,
@ -434,15 +447,40 @@ vn_feedback_fence_cmd_record(VkCommandBuffer cmd_handle,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = slot->buffer,
.offset = slot->offset,
.size = 4,
.buffer = dst_slot->buffer,
.offset = dst_slot->offset,
.size = buf_size,
};
/* host writes for src_slots should implicitly be made visible upon
* QueueSubmit call */
vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1,
&mem_barrier_before, 1, &buf_barrier_before, 0,
NULL);
vn_CmdFillBuffer(cmd_handle, slot->buffer, slot->offset, 4, VK_SUCCESS);
/* If passed a src_slot, timeline semaphore feedback records a
* cmd to copy the counter value from the src slot to the dst slot.
* If src_slot is NULL, then fence feedback records a cmd to fill
* the dst slot with VK_SUCCESS.
*/
if (src_slot) {
assert(src_slot->type == VN_FEEDBACK_TYPE_TIMELINE_SEMAPHORE);
assert(dst_slot->type == VN_FEEDBACK_TYPE_TIMELINE_SEMAPHORE);
const VkBufferCopy buffer_copy = {
.srcOffset = src_slot->offset,
.dstOffset = dst_slot->offset,
.size = buf_size,
};
vn_CmdCopyBuffer(cmd_handle, src_slot->buffer, dst_slot->buffer, 1,
&buffer_copy);
} else {
assert(dst_slot->type == VN_FEEDBACK_TYPE_FENCE);
vn_CmdFillBuffer(cmd_handle, dst_slot->buffer, dst_slot->offset,
buf_size, VK_SUCCESS);
}
const VkBufferMemoryBarrier buf_barrier_after = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
@ -451,9 +489,9 @@ vn_feedback_fence_cmd_record(VkCommandBuffer cmd_handle,
.dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = slot->buffer,
.offset = slot->offset,
.size = 4,
.buffer = dst_slot->buffer,
.offset = dst_slot->offset,
.size = buf_size,
};
vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1,
@ -463,10 +501,11 @@ vn_feedback_fence_cmd_record(VkCommandBuffer cmd_handle,
}
VkResult
vn_feedback_fence_cmd_alloc(VkDevice dev_handle,
struct vn_feedback_cmd_pool *pool,
struct vn_feedback_slot *slot,
VkCommandBuffer *out_cmd_handle)
vn_feedback_cmd_alloc(VkDevice dev_handle,
struct vn_feedback_cmd_pool *pool,
struct vn_feedback_slot *slot,
struct vn_feedback_slot *src_slot,
VkCommandBuffer *out_cmd_handle)
{
const VkCommandBufferAllocateInfo info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
@ -483,7 +522,7 @@ vn_feedback_fence_cmd_alloc(VkDevice dev_handle,
if (result != VK_SUCCESS)
goto out_unlock;
result = vn_feedback_fence_cmd_record(cmd_handle, slot);
result = vn_feedback_cmd_record(cmd_handle, slot, src_slot);
if (result != VK_SUCCESS) {
vn_FreeCommandBuffers(dev_handle, pool->pool, 1, &cmd_handle);
goto out_unlock;
@ -498,9 +537,9 @@ out_unlock:
}
void
vn_feedback_fence_cmd_free(VkDevice dev_handle,
struct vn_feedback_cmd_pool *pool,
VkCommandBuffer cmd_handle)
vn_feedback_cmd_free(VkDevice dev_handle,
struct vn_feedback_cmd_pool *pool,
VkCommandBuffer cmd_handle)
{
simple_mtx_lock(&pool->mutex);
vn_FreeCommandBuffers(dev_handle, pool->pool, 1, &cmd_handle);

View file

@ -113,17 +113,16 @@ vn_feedback_event_cmd_record(VkCommandBuffer cmd_handle,
VkPipelineStageFlags2 src_stage_mask,
VkResult status,
bool sync2);
VkResult
vn_feedback_fence_cmd_alloc(VkDevice dev_handle,
struct vn_feedback_cmd_pool *pool,
struct vn_feedback_slot *slot,
VkCommandBuffer *out_cmd_handle);
vn_feedback_cmd_alloc(VkDevice dev_handle,
struct vn_feedback_cmd_pool *pool,
struct vn_feedback_slot *dst_slot,
struct vn_feedback_slot *src_slot,
VkCommandBuffer *out_cmd_handle);
void
vn_feedback_fence_cmd_free(VkDevice dev_handle,
struct vn_feedback_cmd_pool *pool,
VkCommandBuffer cmd_handle);
vn_feedback_cmd_free(VkDevice dev_handle,
struct vn_feedback_cmd_pool *pool,
VkCommandBuffer cmd_handle);
VkResult
vn_feedback_cmd_pools_init(struct vn_device *dev);

View file

@ -658,12 +658,12 @@ vn_fence_feedback_init(struct vn_device *dev,
}
for (uint32_t i = 0; i < dev->queue_family_count; i++) {
result = vn_feedback_fence_cmd_alloc(dev_handle, &dev->cmd_pools[i],
slot, &cmd_handles[i]);
result = vn_feedback_cmd_alloc(dev_handle, &dev->cmd_pools[i], slot,
NULL, &cmd_handles[i]);
if (result != VK_SUCCESS) {
for (uint32_t j = 0; j < i; j++) {
vn_feedback_fence_cmd_free(dev_handle, &dev->cmd_pools[j],
cmd_handles[j]);
vn_feedback_cmd_free(dev_handle, &dev->cmd_pools[j],
cmd_handles[j]);
}
break;
}
@ -692,8 +692,8 @@ vn_fence_feedback_fini(struct vn_device *dev,
return;
for (uint32_t i = 0; i < dev->queue_family_count; i++) {
vn_feedback_fence_cmd_free(dev_handle, &dev->cmd_pools[i],
fence->feedback.commands[i]);
vn_feedback_cmd_free(dev_handle, &dev->cmd_pools[i],
fence->feedback.commands[i]);
}
vn_feedback_pool_free(&dev->feedback_pool, fence->feedback.slot);