turnip: Keep a host copy of push descriptor sets.

Otherwise, the back-copy on same-layout push descriptor updates would read
from WC memory, which is absurdly slow.  Improves performance of
vkoverhead's descriptor_template_12ubo_push from 760k/sec to 2876k/sec.
Improves submit-disabled gfxbench gl_driver2 performance on zink from 79.6
fps to 103.6.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18561>
This commit is contained in:
Emma Anholt 2022-09-14 12:29:02 -07:00 committed by Marge Bot
parent e313098427
commit 03294e1dd1
2 changed files with 47 additions and 24 deletions

View file

@ -1645,6 +1645,8 @@ tu_cmd_buffer_destroy(struct vk_command_buffer *vk_cmd_buffer)
if (cmd_buffer->descriptors[i].push_set.layout)
vk_descriptor_set_layout_unref(&cmd_buffer->device->vk,
&cmd_buffer->descriptors[i].push_set.layout->vk);
vk_free(&cmd_buffer->device->vk.alloc,
cmd_buffer->descriptors[i].push_set.mapped_ptr);
}
vk_command_buffer_finish(&cmd_buffer->vk);
@ -2156,6 +2158,31 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
}
}
static enum VkResult
tu_push_descriptor_set_update_layout(struct tu_device *device,
struct tu_descriptor_set *set,
struct tu_descriptor_set_layout *layout)
{
if (set->layout == layout)
return VK_SUCCESS;
if (set->layout)
vk_descriptor_set_layout_unref(&device->vk, &set->layout->vk);
vk_descriptor_set_layout_ref(&layout->vk);
set->layout = layout;
if (set->host_size < layout->size) {
void *new_buf =
vk_realloc(&device->vk.alloc, set->mapped_ptr, layout->size, 8,
VK_QUERY_SCOPE_COMMAND_BUFFER_KHR);
if (!new_buf)
return VK_ERROR_OUT_OF_HOST_MEMORY;
set->mapped_ptr = new_buf;
set->host_size = layout->size;
}
return VK_SUCCESS;
}
VKAPI_ATTR void VKAPI_CALL
tu_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
@ -2179,23 +2206,18 @@ tu_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer,
return;
}
/* preserve previous content if the layout is the same: */
if (set->layout == layout)
memcpy(set_mem.map, set->mapped_ptr, layout->size);
if (set->layout != layout) {
if (set->layout)
vk_descriptor_set_layout_unref(&cmd->device->vk, &set->layout->vk);
vk_descriptor_set_layout_ref(&layout->vk);
set->layout = layout;
result = tu_push_descriptor_set_update_layout(cmd->device, set, layout);
if (result != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd->vk, result);
return;
}
set->mapped_ptr = set_mem.map;
set->va = set_mem.iova;
tu_update_descriptor_sets(cmd->device, tu_descriptor_set_to_handle(set),
descriptorWriteCount, pDescriptorWrites, 0, NULL);
memcpy(set_mem.map, set->mapped_ptr, layout->size);
set->va = set_mem.iova;
tu_CmdBindDescriptorSets(commandBuffer, pipelineBindPoint, _layout, _set,
1, (VkDescriptorSet[]) { tu_descriptor_set_to_handle(set) },
0, NULL);
@ -2224,22 +2246,17 @@ tu_CmdPushDescriptorSetWithTemplateKHR(VkCommandBuffer commandBuffer,
return;
}
/* preserve previous content if the layout is the same: */
if (set->layout == layout)
memcpy(set_mem.map, set->mapped_ptr, layout->size);
if (set->layout != layout) {
if (set->layout)
vk_descriptor_set_layout_unref(&cmd->device->vk, &set->layout->vk);
vk_descriptor_set_layout_ref(&layout->vk);
set->layout = layout;
result = tu_push_descriptor_set_update_layout(cmd->device, set, layout);
if (result != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd->vk, result);
return;
}
set->mapped_ptr = set_mem.map;
set->va = set_mem.iova;
tu_update_descriptor_set_with_template(cmd->device, set, descriptorUpdateTemplate, pData);
memcpy(set_mem.map, set->mapped_ptr, layout->size);
set->va = set_mem.iova;
tu_CmdBindDescriptorSets(commandBuffer, templ->bind_point, _layout, _set,
1, (VkDescriptorSet[]) { tu_descriptor_set_to_handle(set) },
0, NULL);

View file

@ -114,8 +114,14 @@ struct tu_descriptor_set
uint32_t size;
uint64_t va;
/* Pointer to the GPU memory for the set for non-push descriptors, or pointer
* to a host memory copy for push descriptors.
*/
uint32_t *mapped_ptr;
/* Size of the host memory allocation for push descriptors */
uint32_t host_size;
uint32_t *dynamic_descriptors;
};
VK_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_set, base, VkDescriptorSet,