turnip: Copy command buffers to deferred submit request

To make sure the index of global bo table in drm_msm_gem_submit_cmd is
valid at actual submit time.

v1. Move the entry_count calculation into the submit request creation
function.

Fixes: #4877
Fixes: 3f229e34 ("turnip: Implement VK_KHR_timeline_semaphore.")

Signed-off-by: Hyunjun Ko <zzoon@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11260>
This commit is contained in:
Hyunjun Ko 2021-06-08 06:49:45 +00:00 committed by Marge Bot
parent cebbdf5de3
commit 639579d116

View file

@ -83,6 +83,9 @@ struct tu_queue_submit
{
struct list_head link;
VkCommandBuffer *cmd_buffers;
uint32_t cmd_buffer_count;
struct tu_syncobj **wait_semaphores;
uint32_t wait_semaphore_count;
struct tu_syncobj **signal_semaphores;
@ -106,6 +109,7 @@ struct tu_queue_submit
bool last_submit;
uint32_t entry_count;
uint32_t counter_pass_index;
};
static int
@ -837,10 +841,10 @@ tu_queue_submit_add_timeline_signal_locked(struct tu_queue_submit* submit,
static VkResult
tu_queue_submit_create_locked(struct tu_queue *queue,
const VkSubmitInfo *submit_info,
const uint32_t entry_count,
const uint32_t nr_in_syncobjs,
const uint32_t nr_out_syncobjs,
const bool last_submit,
const VkPerformanceQuerySubmitInfoKHR *perf_info,
struct tu_queue_submit **submit)
{
VkResult result;
@ -862,6 +866,19 @@ tu_queue_submit_create_locked(struct tu_queue *queue,
struct tu_queue_submit *new_submit = vk_zalloc(&queue->device->vk.alloc,
sizeof(*new_submit), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
new_submit->cmd_buffer_count = submit_info->commandBufferCount;
new_submit->cmd_buffers = vk_zalloc(&queue->device->vk.alloc,
new_submit->cmd_buffer_count * sizeof(*new_submit->cmd_buffers), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (new_submit->cmd_buffers == NULL) {
result = vk_error(queue->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY)
goto fail_cmd_buffers;
}
memcpy(new_submit->cmd_buffers, submit_info->pCommandBuffers,
new_submit->cmd_buffer_count * sizeof(*new_submit->cmd_buffers));
new_submit->wait_semaphores = vk_zalloc(&queue->device->vk.alloc,
submit_info->waitSemaphoreCount * sizeof(*new_submit->wait_semaphores),
8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
@ -904,6 +921,16 @@ tu_queue_submit_create_locked(struct tu_queue *queue,
}
}
uint32_t entry_count = 0;
for (uint32_t j = 0; j < new_submit->cmd_buffer_count; ++j) {
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, new_submit->cmd_buffers[j]);
if (perf_info)
entry_count++;
entry_count += cmdbuf->cs.entry_count;
}
new_submit->cmds = vk_zalloc(&queue->device->vk.alloc,
entry_count * sizeof(*new_submit->cmds), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
@ -937,6 +964,8 @@ tu_queue_submit_create_locked(struct tu_queue *queue,
new_submit->nr_in_syncobjs = nr_in_syncobjs;
new_submit->nr_out_syncobjs = nr_out_syncobjs;
new_submit->last_submit = last_submit;
new_submit->counter_pass_index = perf_info ? perf_info->counterPassIndex : ~0;
list_inithead(&new_submit->link);
*submit = new_submit;
@ -954,6 +983,8 @@ fail_wait_timelines:
fail_signal_semaphores:
vk_free(&queue->device->vk.alloc, new_submit->wait_semaphores);
fail_wait_semaphores:
vk_free(&queue->device->vk.alloc, new_submit->cmd_buffers);
fail_cmd_buffers:
return result;
}
@ -971,9 +1002,49 @@ tu_queue_submit_free(struct tu_queue *queue, struct tu_queue_submit *submit)
vk_free(&queue->device->vk.alloc, submit->cmds);
vk_free(&queue->device->vk.alloc, submit->in_syncobjs);
vk_free(&queue->device->vk.alloc, submit->out_syncobjs);
vk_free(&queue->device->vk.alloc, submit->cmd_buffers);
vk_free(&queue->device->vk.alloc, submit);
}
static void
tu_queue_build_msm_gem_submit_cmds(struct tu_queue *queue,
struct tu_queue_submit *submit)
{
struct drm_msm_gem_submit_cmd *cmds = submit->cmds;
uint32_t entry_idx = 0;
for (uint32_t j = 0; j < submit->cmd_buffer_count; ++j) {
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->cmd_buffers[j]);
struct tu_cs *cs = &cmdbuf->cs;
struct tu_device *dev = queue->device;
if (submit->counter_pass_index != ~0) {
struct tu_cs_entry *perf_cs_entry =
&dev->perfcntrs_pass_cs_entries[submit->counter_pass_index];
cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF;
cmds[entry_idx].submit_idx =
dev->bo_idx[perf_cs_entry->bo->gem_handle];
cmds[entry_idx].submit_offset = perf_cs_entry->offset;
cmds[entry_idx].size = perf_cs_entry->size;
cmds[entry_idx].pad = 0;
cmds[entry_idx].nr_relocs = 0;
cmds[entry_idx++].relocs = 0;
}
for (unsigned i = 0; i < cs->entry_count; ++i, ++entry_idx) {
cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF;
cmds[entry_idx].submit_idx =
dev->bo_idx[cs->entries[i].bo->gem_handle];
cmds[entry_idx].submit_offset = cs->entries[i].offset;
cmds[entry_idx].size = cs->entries[i].size;
cmds[entry_idx].pad = 0;
cmds[entry_idx].nr_relocs = 0;
cmds[entry_idx].relocs = 0;
}
}
}
static VkResult
tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
{
@ -990,6 +1061,12 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
mtx_lock(&queue->device->bo_mutex);
/* drm_msm_gem_submit_cmd requires index of bo which could change at any
* time when bo_mutex is not locked. So we build submit cmds here the real
* place to submit.
*/
tu_queue_build_msm_gem_submit_cmds(queue, submit);
struct drm_msm_gem_submit req = {
.flags = flags,
.queueid = queue->msm_queue_id,
@ -1186,22 +1263,12 @@ tu_QueueSubmit(VkQueue _queue,
if (last_submit && fence)
out_syncobjs_size += 1;
uint32_t entry_count = 0;
for (uint32_t j = 0; j < submit->commandBufferCount; ++j) {
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->pCommandBuffers[j]);
if (perf_info)
entry_count++;
entry_count += cmdbuf->cs.entry_count;
}
pthread_mutex_lock(&queue->device->submit_mutex);
struct tu_queue_submit *submit_req = NULL;
VkResult ret = tu_queue_submit_create_locked(queue, submit,
entry_count, submit->waitSemaphoreCount, out_syncobjs_size,
last_submit, &submit_req);
submit->waitSemaphoreCount, out_syncobjs_size,
last_submit, perf_info, &submit_req);
if (ret != VK_SUCCESS) {
pthread_mutex_unlock(&queue->device->submit_mutex);
@ -1246,38 +1313,6 @@ tu_QueueSubmit(VkQueue _queue,
};
}
struct drm_msm_gem_submit_cmd *cmds = submit_req->cmds;
uint32_t entry_idx = 0;
for (uint32_t j = 0; j < submit->commandBufferCount; ++j) {
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->pCommandBuffers[j]);
struct tu_cs *cs = &cmdbuf->cs;
if (perf_info) {
struct tu_cs_entry *perf_cs_entry =
&cmdbuf->device->perfcntrs_pass_cs_entries[perf_info->counterPassIndex];
cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF;
cmds[entry_idx].submit_idx =
queue->device->bo_idx[perf_cs_entry->bo->gem_handle];
cmds[entry_idx].submit_offset = perf_cs_entry->offset;
cmds[entry_idx].size = perf_cs_entry->size;
cmds[entry_idx].pad = 0;
cmds[entry_idx].nr_relocs = 0;
cmds[entry_idx++].relocs = 0;
}
for (unsigned i = 0; i < cs->entry_count; ++i, ++entry_idx) {
cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF;
cmds[entry_idx].submit_idx =
queue->device->bo_idx[cs->entries[i].bo->gem_handle];
cmds[entry_idx].submit_offset = cs->entries[i].offset;
cmds[entry_idx].size = cs->entries[i].size;
cmds[entry_idx].pad = 0;
cmds[entry_idx].nr_relocs = 0;
cmds[entry_idx].relocs = 0;
}
}
/* Queue the current submit */
list_addtail(&submit_req->link, &queue->queued_submits);
ret = tu_device_submit_deferred_locked(queue->device);