turnip: Allow load/store skipping in vkCmdClearAttachments().

We have to use a 3D draw to make it possible (so it goes through the
binner's visibility calcs), but hopefully the increased overhead for apps
with non-skippable rendering balances against skipping in others.

The real motivation is to get draw-time state out of tile load setup.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16826>
This commit is contained in:
Emma Anholt 2022-06-01 15:01:44 -07:00 committed by Marge Bot
parent b8619ef343
commit a92fad45e9
4 changed files with 34 additions and 41 deletions

View file

@ -2312,8 +2312,6 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
s_clear_val = attachments[i].clearValue.depthStencil.stencil & 0xff;
}
}
cmd->state.attachment_cmd_clear[a] = true;
}
/* We may not know the multisample count if there are no attachments, so
@ -2587,8 +2585,6 @@ tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd,
if (a == VK_ATTACHMENT_UNUSED)
continue;
cmd->state.attachment_cmd_clear[a] = true;
tu_emit_clear_gmem_attachment(cmd, cs, a, attachments[j].aspectMask,
&attachments[j].clearValue);
}
@ -2627,6 +2623,29 @@ tu_CmdClearAttachments(VkCommandBuffer commandBuffer,
return;
}
/* If we could skip tile load/stores based on any draws intersecting them at
* binning time, then emit the clear as a 3D draw so that it contributes to
* that visibility.
*/
const struct tu_subpass *subpass = cmd->state.subpass;
for (uint32_t i = 0; i < attachmentCount; i++) {
uint32_t a;
if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
uint32_t c = pAttachments[i].colorAttachment;
a = subpass->color_attachments[c].attachment;
} else {
a = subpass->depth_stencil_attachment.attachment;
}
if (a != VK_ATTACHMENT_UNUSED) {
const struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[a];
if (att->cond_load_allowed || att->cond_store_allowed) {
tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
return;
}
}
}
/* Otherwise, emit 2D blits for gmem rendering. */
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
tu_clear_gmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
tu_cond_exec_end(cs);
@ -2905,10 +2924,7 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
* To simplify conditions treat partially cleared separate DS as fully
* cleared and don't emit cond_exec.
*/
bool cond_exec = cond_exec_allowed &&
!attachment->clear_mask &&
!cmd->state.attachment_cmd_clear[a] &&
!attachment->will_be_resolved;
bool cond_exec = cond_exec_allowed && attachment->cond_load_allowed;
if (cond_exec)
tu_begin_load_store_cond_exec(cmd, cs, true);
@ -3035,11 +3051,10 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
if (!dst->store && !dst->store_stencil)
return;
bool was_cleared = src->clear_mask || cmd->state.attachment_cmd_clear[a];
/* Unconditional store should happen only if attachment was cleared,
* which could have happened either by load_op or via vkCmdClearAttachments.
*/
bool cond_exec = cond_exec_allowed && !was_cleared;
bool cond_exec = cond_exec_allowed && src->cond_store_allowed;
if (cond_exec) {
tu_begin_load_store_cond_exec(cmd, cs, false);
}

View file

@ -1540,8 +1540,6 @@ tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer)
tu_cs_finish(&cmd_buffer->draw_epilogue_cs);
tu_cs_finish(&cmd_buffer->sub_cs);
vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachment_cmd_clear);
u_trace_fini(&cmd_buffer->trace);
tu_autotune_free_results(cmd_buffer->device, &cmd_buffer->renderpass_autotune_results);
@ -1571,9 +1569,6 @@ tu_reset_cmd_buffer(struct tu_cmd_buffer *cmd_buffer)
tu_cs_reset(&cmd_buffer->draw_epilogue_cs);
tu_cs_reset(&cmd_buffer->sub_cs);
vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachment_cmd_clear);
cmd_buffer->state.attachment_cmd_clear = NULL;
tu_autotune_free_results(cmd_buffer->device, &cmd_buffer->renderpass_autotune_results);
for (unsigned i = 0; i < MAX_BIND_POINTS; i++) {
@ -1743,14 +1738,6 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer,
cmd_buffer->state.pass = tu_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
cmd_buffer->state.subpass =
&cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
/* vkCmdClearAttachments is allowed in a secondary cmdbuf and we have to
* track it as in primary cmdbuf.
*/
cmd_buffer->state.attachment_cmd_clear =
vk_zalloc(&cmd_buffer->pool->vk.alloc,
cmd_buffer->state.pass->attachment_count *
sizeof(cmd_buffer->state.attachment_cmd_clear[0]),
8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
} else {
/* When executing in the middle of another command buffer, the CCU
* state is unknown.
@ -3277,10 +3264,6 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
cmd->state.draw_cs_writes_to_cond_pred |=
secondary->state.draw_cs_writes_to_cond_pred;
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; i++) {
cmd->state.attachment_cmd_clear[i] |=
secondary->state.attachment_cmd_clear[i];
}
} else {
assert(tu_cs_is_empty(&secondary->draw_cs));
assert(tu_cs_is_empty(&secondary->draw_epilogue_cs));
@ -3477,16 +3460,6 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
return;
}
cmd->state.attachment_cmd_clear =
vk_zalloc(&cmd->pool->vk.alloc, pass->attachment_count *
sizeof(cmd->state.attachment_cmd_clear[0]), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!cmd->state.attachment_cmd_clear) {
cmd->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
return;
}
cmd->state.draw_cs_writes_to_cond_pred = false;
for (unsigned i = 0; i < pass->attachment_count; i++) {
@ -4827,8 +4800,6 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
tu_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier, true);
vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachments);
vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachment_cmd_clear);
cmd_buffer->state.attachment_cmd_clear = NULL;
cmd_buffer->state.pass = NULL;
cmd_buffer->state.subpass = NULL;

View file

@ -863,6 +863,11 @@ tu_CreateRenderPass2(VkDevice _device,
att->clear_mask = 0;
att->load = false;
}
att->cond_load_allowed =
(att->load || att->load_stencil) && !att->clear_mask && !att->will_be_resolved;
att->cond_store_allowed =
(att->store || att->store_stencil) && !att->clear_mask;
}
/* From the VK_KHR_multiview spec:

View file

@ -1201,8 +1201,6 @@ struct tu_cmd_state
VkRect2D render_area;
const struct tu_image_view **attachments;
/* Tracks whether attachment was cleared by vkCmdClearAttachments */
bool *attachment_cmd_clear;
/* Track whether conditional predicate for COND_REG_EXEC is changed in draw_cs */
bool draw_cs_writes_to_cond_pred;
@ -1897,6 +1895,10 @@ struct tu_render_pass_attachment
/* for D32S8 separate stencil: */
bool load_stencil;
bool store_stencil;
bool cond_load_allowed;
bool cond_store_allowed;
int32_t gmem_offset_stencil;
};