nvk: Reuse the same cond render temp in a cmd_buf

Within a single command buffer, we know that our operations will happen
sequentially so we don't need to allocate a unique address per
vkCmdBeginConditionalRenderingEXT - we can re-use the same address
instead.

Improves perf on the Sascha Willems conditionalrender demo with all
rendering disabled by about 2% (595 fps -> 607 fps)

Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37187>
This commit is contained in:
Mel Henning 2025-09-04 14:58:33 -04:00 committed by Marge Bot
parent 64b4e52755
commit 966a1b5380
3 changed files with 13 additions and 48 deletions

View file

@ -122,7 +122,6 @@ nvk_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
cmd->push_mem_limit = NULL;
cmd->push = (struct nv_push) {0};
cmd->cond_render_mem = NULL;
cmd->cond_render_offset = 0;
util_dynarray_clear(&cmd->pushes);
@ -161,7 +160,7 @@ const struct vk_command_buffer_ops nvk_cmd_buffer_ops = {
/* If we ever fail to allocate a push, we use this */
static uint32_t push_runout[NVK_CMD_BUFFER_MAX_PUSH];
static VkResult
VkResult
nvk_cmd_buffer_alloc_mem(struct nvk_cmd_buffer *cmd, bool force_gart,
struct nvk_cmd_mem **mem_out)
{
@ -300,43 +299,6 @@ nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer *cmd,
return VK_SUCCESS;
}
VkResult
nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer *cmd,
uint64_t *addr)
{
uint32_t offset = cmd->cond_render_offset;
uint32_t size = 64;
assert(offset <= NVK_CMD_MEM_SIZE);
if (cmd->cond_render_mem != NULL && size <= NVK_CMD_MEM_SIZE - offset) {
*addr = cmd->cond_render_mem->mem->va->addr + offset;
cmd->cond_render_offset = offset + size;
return VK_SUCCESS;
}
struct nvk_cmd_mem *mem;
VkResult result = nvk_cmd_buffer_alloc_mem(cmd, false, &mem);
if (unlikely(result != VK_SUCCESS))
return result;
*addr = mem->mem->va->addr;
/* Pick whichever of the current upload BO and the new BO will have more
* room left to be the BO for the next upload. If our upload size is
* bigger than the old offset, we're better off burning the whole new
* upload BO on this one allocation and continuing on the current upload
* BO.
*/
if (cmd->cond_render_mem == NULL || size < cmd->cond_render_offset) {
cmd->cond_render_mem = mem;
cmd->cond_render_offset = size;
}
return VK_SUCCESS;
}
VkResult
nvk_cmd_buffer_alloc_qmd(struct nvk_cmd_buffer *cmd,
uint32_t size, uint32_t alignment,

View file

@ -223,7 +223,6 @@ struct nvk_cmd_buffer {
uint32_t upload_offset;
struct nvk_cmd_mem *cond_render_mem;
uint32_t cond_render_offset;
struct nvk_cmd_mem *push_mem;
uint32_t *push_mem_limit;
@ -353,6 +352,10 @@ nvk_cmd_buffer_last_subchannel(const struct nvk_cmd_buffer *cmd)
}
}
VkResult nvk_cmd_buffer_alloc_mem(struct nvk_cmd_buffer *cmd,
bool force_gart,
struct nvk_cmd_mem **mem_out);
VkResult nvk_cmd_buffer_upload_alloc(struct nvk_cmd_buffer *cmd,
uint32_t size, uint32_t alignment,
uint64_t *addr, void **ptr);
@ -361,9 +364,6 @@ VkResult nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer *cmd,
const void *data, uint32_t size,
uint32_t alignment, uint64_t *addr);
VkResult nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer *cmd,
uint64_t *addr);
VkResult nvk_cmd_buffer_alloc_qmd(struct nvk_cmd_buffer *cmd,
uint32_t size, uint32_t alignment,
uint64_t *addr, void **ptr);

View file

@ -4857,12 +4857,15 @@ nvk_CmdBeginConditionalRenderingEXT(VkCommandBuffer commandBuffer,
* The hardware compares a pair of 64-bit values, so we need to copy the
* input value into one operand and zero into the other operatnd.
*/
uint64_t tmp_addr;
VkResult result = nvk_cmd_buffer_cond_render_alloc(cmd, &tmp_addr);
if (result != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd->vk, result);
return;
if (cmd->cond_render_mem == NULL) {
VkResult result = nvk_cmd_buffer_alloc_mem(cmd, false,
&cmd->cond_render_mem);
if (result != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd->vk, result);
return;
}
}
const uint64_t tmp_addr = cmd->cond_render_mem->mem->va->addr;
/* Frustratingly, the u64s are not packed together */
const uint64_t operand_a_addr = tmp_addr + 0;