mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 06:20:09 +01:00
nvk: Reuse the same cond render temp in a cmd_buf
Within a single command buffer, we know that our operations will happen sequentially so we don't need to allocate a unique address per vkCmdBeginConditionalRenderingEXT - we can re-use the same address instead. Improves perf on the Sascha Willems conditionalrender demo with all rendering disabled by about 2% (595 fps -> 607 fps) Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37187>
This commit is contained in:
parent
64b4e52755
commit
966a1b5380
3 changed files with 13 additions and 48 deletions
|
|
@ -122,7 +122,6 @@ nvk_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
|
|||
cmd->push_mem_limit = NULL;
|
||||
cmd->push = (struct nv_push) {0};
|
||||
cmd->cond_render_mem = NULL;
|
||||
cmd->cond_render_offset = 0;
|
||||
|
||||
util_dynarray_clear(&cmd->pushes);
|
||||
|
||||
|
|
@ -161,7 +160,7 @@ const struct vk_command_buffer_ops nvk_cmd_buffer_ops = {
|
|||
/* If we ever fail to allocate a push, we use this */
|
||||
static uint32_t push_runout[NVK_CMD_BUFFER_MAX_PUSH];
|
||||
|
||||
static VkResult
|
||||
VkResult
|
||||
nvk_cmd_buffer_alloc_mem(struct nvk_cmd_buffer *cmd, bool force_gart,
|
||||
struct nvk_cmd_mem **mem_out)
|
||||
{
|
||||
|
|
@ -300,43 +299,6 @@ nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer *cmd,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer *cmd,
|
||||
uint64_t *addr)
|
||||
{
|
||||
uint32_t offset = cmd->cond_render_offset;
|
||||
uint32_t size = 64;
|
||||
|
||||
assert(offset <= NVK_CMD_MEM_SIZE);
|
||||
if (cmd->cond_render_mem != NULL && size <= NVK_CMD_MEM_SIZE - offset) {
|
||||
*addr = cmd->cond_render_mem->mem->va->addr + offset;
|
||||
|
||||
cmd->cond_render_offset = offset + size;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
struct nvk_cmd_mem *mem;
|
||||
VkResult result = nvk_cmd_buffer_alloc_mem(cmd, false, &mem);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
return result;
|
||||
|
||||
*addr = mem->mem->va->addr;
|
||||
|
||||
/* Pick whichever of the current upload BO and the new BO will have more
|
||||
* room left to be the BO for the next upload. If our upload size is
|
||||
* bigger than the old offset, we're better off burning the whole new
|
||||
* upload BO on this one allocation and continuing on the current upload
|
||||
* BO.
|
||||
*/
|
||||
if (cmd->cond_render_mem == NULL || size < cmd->cond_render_offset) {
|
||||
cmd->cond_render_mem = mem;
|
||||
cmd->cond_render_offset = size;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
nvk_cmd_buffer_alloc_qmd(struct nvk_cmd_buffer *cmd,
|
||||
uint32_t size, uint32_t alignment,
|
||||
|
|
|
|||
|
|
@ -223,7 +223,6 @@ struct nvk_cmd_buffer {
|
|||
uint32_t upload_offset;
|
||||
|
||||
struct nvk_cmd_mem *cond_render_mem;
|
||||
uint32_t cond_render_offset;
|
||||
|
||||
struct nvk_cmd_mem *push_mem;
|
||||
uint32_t *push_mem_limit;
|
||||
|
|
@ -353,6 +352,10 @@ nvk_cmd_buffer_last_subchannel(const struct nvk_cmd_buffer *cmd)
|
|||
}
|
||||
}
|
||||
|
||||
VkResult nvk_cmd_buffer_alloc_mem(struct nvk_cmd_buffer *cmd,
|
||||
bool force_gart,
|
||||
struct nvk_cmd_mem **mem_out);
|
||||
|
||||
VkResult nvk_cmd_buffer_upload_alloc(struct nvk_cmd_buffer *cmd,
|
||||
uint32_t size, uint32_t alignment,
|
||||
uint64_t *addr, void **ptr);
|
||||
|
|
@ -361,9 +364,6 @@ VkResult nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer *cmd,
|
|||
const void *data, uint32_t size,
|
||||
uint32_t alignment, uint64_t *addr);
|
||||
|
||||
VkResult nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer *cmd,
|
||||
uint64_t *addr);
|
||||
|
||||
VkResult nvk_cmd_buffer_alloc_qmd(struct nvk_cmd_buffer *cmd,
|
||||
uint32_t size, uint32_t alignment,
|
||||
uint64_t *addr, void **ptr);
|
||||
|
|
|
|||
|
|
@ -4857,12 +4857,15 @@ nvk_CmdBeginConditionalRenderingEXT(VkCommandBuffer commandBuffer,
|
|||
* The hardware compares a pair of 64-bit values, so we need to copy the
|
||||
* input value into one operand and zero into the other operatnd.
|
||||
*/
|
||||
uint64_t tmp_addr;
|
||||
VkResult result = nvk_cmd_buffer_cond_render_alloc(cmd, &tmp_addr);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return;
|
||||
if (cmd->cond_render_mem == NULL) {
|
||||
VkResult result = nvk_cmd_buffer_alloc_mem(cmd, false,
|
||||
&cmd->cond_render_mem);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
const uint64_t tmp_addr = cmd->cond_render_mem->mem->va->addr;
|
||||
|
||||
/* Frustratingly, the u64s are not packed together */
|
||||
const uint64_t operand_a_addr = tmp_addr + 0;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue