From 165c26b9e12745f39ba4c03b97b924a53ab308ef Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 6 Sep 2021 16:06:49 +0200 Subject: [PATCH] panvk: Preload FB attachments when required There are at least three situations where we need to preload FBs: 1. The attachment is flagged VK_ATTACHMENT_LOAD_OP_LOAD and has not been accessed in previous subpasses 2. The batch is implicitly split (e.g. too many jobs queued to the batch, wait/set events queued, ...) 3. The attachment has been written by a previous subpass With those changes, we can get rid of panvk_emit_fb() and call pan_emit_fbd() directly (fb_info is initialized when starting a subpass and updated when an implicit split happens). Signed-off-by: Boris Brezillon Reviewed-by: Tomeu Vizoso Part-of: --- src/panfrost/vulkan/panvk_cmd_buffer.c | 95 ++++++++++++++++++++++- src/panfrost/vulkan/panvk_pass.c | 25 ++++-- src/panfrost/vulkan/panvk_private.h | 17 +++- src/panfrost/vulkan/panvk_vX_cmd_buffer.c | 31 ++++++-- src/panfrost/vulkan/panvk_vX_cs.c | 62 --------------- src/panfrost/vulkan/panvk_vX_cs.h | 10 --- 6 files changed, 152 insertions(+), 88 deletions(-) diff --git a/src/panfrost/vulkan/panvk_cmd_buffer.c b/src/panfrost/vulkan/panvk_cmd_buffer.c index ae13cd4d394..af0fcf3b316 100644 --- a/src/panfrost/vulkan/panvk_cmd_buffer.c +++ b/src/panfrost/vulkan/panvk_cmd_buffer.c @@ -341,6 +341,75 @@ panvk_cmd_prepare_clear_values(struct panvk_cmd_buffer *cmdbuf, } } +void +panvk_cmd_fb_info_set_subpass(struct panvk_cmd_buffer *cmdbuf) +{ + const struct panvk_subpass *subpass = cmdbuf->state.subpass; + struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; + const struct panvk_framebuffer *fb = cmdbuf->state.framebuffer; + const struct panvk_clear_value *clears = cmdbuf->state.clear; + struct panvk_image_view *view; + + fbinfo->nr_samples = 1; + fbinfo->rt_count = subpass->color_count; + memset(&fbinfo->bifrost.pre_post.dcds, 0, sizeof(fbinfo->bifrost.pre_post.dcds)); + + for (unsigned cb = 0; cb < subpass->color_count; cb++) { + int idx = subpass->color_attachments[cb].idx; + view = idx != VK_ATTACHMENT_UNUSED ? + fb->attachments[idx].iview : NULL; + if (!view) + continue; + fbinfo->rts[cb].view = &view->pview; + fbinfo->rts[cb].clear = subpass->color_attachments[cb].clear; + fbinfo->rts[cb].preload = subpass->color_attachments[cb].preload; + fbinfo->rts[cb].crc_valid = &cmdbuf->state.fb.crc_valid[cb]; + + memcpy(fbinfo->rts[cb].clear_value, clears[idx].color, + sizeof(fbinfo->rts[cb].clear_value)); + fbinfo->nr_samples = + MAX2(fbinfo->nr_samples, view->pview.image->layout.nr_samples); + } + + if (subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED) { + view = fb->attachments[subpass->zs_attachment.idx].iview; + const struct util_format_description *fdesc = + util_format_description(view->pview.format); + + fbinfo->nr_samples = + MAX2(fbinfo->nr_samples, view->pview.image->layout.nr_samples); + + if (util_format_has_depth(fdesc)) { + fbinfo->zs.clear.z = subpass->zs_attachment.clear; + fbinfo->zs.clear_value.depth = clears[subpass->zs_attachment.idx].depth; + fbinfo->zs.view.zs = &view->pview; + } + + if (util_format_has_stencil(fdesc)) { + fbinfo->zs.clear.s = subpass->zs_attachment.clear; + fbinfo->zs.clear_value.stencil = clears[subpass->zs_attachment.idx].depth; + if (!fbinfo->zs.view.zs) + fbinfo->zs.view.s = &view->pview; + } + } +} + +void +panvk_cmd_fb_info_init(struct panvk_cmd_buffer *cmdbuf) +{ + struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; + const struct panvk_framebuffer *fb = cmdbuf->state.framebuffer; + + memset(cmdbuf->state.fb.crc_valid, 0, sizeof(cmdbuf->state.fb.crc_valid)); + + *fbinfo = (struct pan_fb_info) { + .width = fb->width, + .height = fb->height, + .extent.maxx = fb->width - 1, + .extent.maxy = fb->height - 1, + }; +} + void panvk_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo *pRenderPassBegin, @@ -363,9 +432,10 @@ panvk_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, sizeof(*cmdbuf->state.clear) * pRenderPassBegin->clearValueCount, 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); - assert(pRenderPassBegin->clearValueCount == pass->attachment_count); panvk_cmd_prepare_clear_values(cmdbuf, pRenderPassBegin->pClearValues); memset(&cmdbuf->state.compute, 0, sizeof(cmdbuf->state.compute)); + panvk_cmd_fb_info_init(cmdbuf); + panvk_cmd_fb_info_set_subpass(cmdbuf); } void @@ -381,6 +451,29 @@ panvk_CmdBeginRenderPass(VkCommandBuffer cmd, return panvk_CmdBeginRenderPass2(cmd, info, &subpass_info); } +void +panvk_cmd_preload_fb_after_batch_split(struct panvk_cmd_buffer *cmdbuf) +{ + for (unsigned i = 0; i < cmdbuf->state.fb.info.rt_count; i++) { + if (cmdbuf->state.fb.info.rts[i].view) { + cmdbuf->state.fb.info.rts[i].clear = false; + cmdbuf->state.fb.info.rts[i].preload = true; + } + } + + if (cmdbuf->state.fb.info.zs.view.zs) { + cmdbuf->state.fb.info.zs.clear.z = false; + cmdbuf->state.fb.info.zs.preload.z = true; + } + + if (cmdbuf->state.fb.info.zs.view.s || + (cmdbuf->state.fb.info.zs.view.zs && + util_format_is_depth_and_stencil(cmdbuf->state.fb.info.zs.view.zs->format))) { + cmdbuf->state.fb.info.zs.clear.s = false; + cmdbuf->state.fb.info.zs.preload.s = true; + } +} + void panvk_cmd_open_batch(struct panvk_cmd_buffer *cmdbuf) { diff --git a/src/panfrost/vulkan/panvk_pass.c b/src/panfrost/vulkan/panvk_pass.c index 82ed878bdbc..04d187af72b 100644 --- a/src/panfrost/vulkan/panvk_pass.c +++ b/src/panfrost/vulkan/panvk_pass.c @@ -79,7 +79,7 @@ panvk_CreateRenderPass2(VkDevice _device, att->final_layout = pCreateInfo->pAttachments[i].finalLayout; att->store_op = pCreateInfo->pAttachments[i].storeOp; att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; - att->clear_subpass = ~0; + att->first_used_in_subpass = ~0; } uint32_t subpass_attachment_count = 0; @@ -144,9 +144,14 @@ panvk_CreateRenderPass2(VkDevice _device, if (idx != VK_ATTACHMENT_UNUSED) { pass->attachments[idx].view_mask |= subpass->view_mask; - if (pass->attachments[idx].clear_subpass == ~0) { - pass->attachments[idx].clear_subpass = i; - subpass->color_attachments[j].clear = true; + if (pass->attachments[idx].first_used_in_subpass == ~0) { + pass->attachments[idx].first_used_in_subpass = i; + if (pass->attachments[idx].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) + subpass->color_attachments[j].clear = true; + else if (pass->attachments[idx].load_op == VK_ATTACHMENT_LOAD_OP_LOAD) + subpass->color_attachments[j].preload = true; + } else { + subpass->color_attachments[j].preload = true; } } } @@ -176,9 +181,15 @@ panvk_CreateRenderPass2(VkDevice _device, if (idx != VK_ATTACHMENT_UNUSED) { subpass->zs_attachment.layout = desc->pDepthStencilAttachment->layout; pass->attachments[idx].view_mask |= subpass->view_mask; - if (pass->attachments[idx].clear_subpass == ~0) { - pass->attachments[idx].clear_subpass = i; - subpass->zs_attachment.clear = true; + + if (pass->attachments[idx].first_used_in_subpass == ~0) { + pass->attachments[idx].first_used_in_subpass = i; + if (pass->attachments[idx].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) + subpass->zs_attachment.clear = true; + else if (pass->attachments[idx].load_op == VK_ATTACHMENT_LOAD_OP_LOAD) + subpass->zs_attachment.preload = true; + } else { + subpass->zs_attachment.preload = true; } } } diff --git a/src/panfrost/vulkan/panvk_private.h b/src/panfrost/vulkan/panvk_private.h index bf38e102d9d..964cfb60e81 100644 --- a/src/panfrost/vulkan/panvk_private.h +++ b/src/panfrost/vulkan/panvk_private.h @@ -585,6 +585,11 @@ struct panvk_cmd_state { } s_front, s_back; } zs; + struct { + struct pan_fb_info info; + bool crc_valid[MAX_RTS]; + } fb; + const struct panvk_render_pass *pass; const struct panvk_subpass *subpass; const struct panvk_framebuffer *framebuffer; @@ -649,6 +654,15 @@ struct panvk_cmd_buffer { void panvk_cmd_open_batch(struct panvk_cmd_buffer *cmdbuf); +void +panvk_cmd_fb_info_set_subpass(struct panvk_cmd_buffer *cmdbuf); + +void +panvk_cmd_fb_info_init(struct panvk_cmd_buffer *cmdbuf); + +void +panvk_cmd_preload_fb_after_batch_split(struct panvk_cmd_buffer *cmdbuf); + void panvk_pack_color(struct panvk_clear_value *out, const VkClearColorValue *in, @@ -918,6 +932,7 @@ struct panvk_subpass_attachment { uint32_t idx; VkImageLayout layout; bool clear; + bool preload; }; struct panvk_subpass { @@ -943,7 +958,7 @@ struct panvk_render_pass_attachment { VkImageLayout initial_layout; VkImageLayout final_layout; unsigned view_mask; - unsigned clear_subpass; + unsigned first_used_in_subpass; }; struct panvk_render_pass { diff --git a/src/panfrost/vulkan/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/panvk_vX_cmd_buffer.c index aa2f2ecbe8d..ed070a38646 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_buffer.c @@ -147,6 +147,18 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf) list_addtail(&cmdbuf->state.batch->node, &cmdbuf->batches); + if (batch->scoreboard.first_tiler) { + struct panfrost_ptr preload_jobs[2]; + unsigned num_preload_jobs = + GENX(pan_preload_fb)(&cmdbuf->desc_pool.base, &batch->scoreboard, + &cmdbuf->state.fb.info, + PAN_ARCH >= 6 ? batch->tls.gpu : batch->fb.desc.gpu, + PAN_ARCH >= 6 ? batch->tiler.descs.gpu : 0, + preload_jobs); + for (unsigned i = 0; i < num_preload_jobs; i++) + util_dynarray_append(&batch->jobs, void *, preload_jobs[i].cpu); + } + struct pan_tls_info tlsinfo = { 0 }; if (cmdbuf->state.pipeline) { @@ -195,13 +207,8 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf) #endif cmdbuf->state.batch->fb.desc.gpu |= - panvk_per_arch(emit_fb)(cmdbuf->device, - cmdbuf->state.batch, - cmdbuf->state.subpass, - cmdbuf->state.framebuffer, - cmdbuf->state.clear, - &tlsinfo, &cmdbuf->state.batch->tiler.ctx, - fbd); + GENX(pan_emit_fbd)(pdev, &cmdbuf->state.fb.info, &tlsinfo, + &cmdbuf->state.batch->tiler.ctx, fbd); #if PAN_ARCH <= 5 panvk_copy_fb_desc(cmdbuf, tmp_fbd); @@ -226,6 +233,7 @@ panvk_per_arch(CmdNextSubpass2)(VkCommandBuffer commandBuffer, panvk_per_arch(cmd_close_batch)(cmdbuf); cmdbuf->state.subpass++; + panvk_cmd_fb_info_set_subpass(cmdbuf); panvk_cmd_open_batch(cmdbuf); memset(&cmdbuf->state.compute, 0, sizeof(cmdbuf->state.compute)); } @@ -265,6 +273,11 @@ panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf) /* Tag the pointer */ batch->fb.desc.gpu |= tags; + +#if PAN_ARCH >= 6 + memset(&cmdbuf->state.fb.info.bifrost.pre_post.dcds, 0, + sizeof(cmdbuf->state.fb.info.bifrost.pre_post.dcds)); +#endif } void @@ -685,6 +698,7 @@ panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, */ if (batch->scoreboard.job_index >= (UINT16_MAX - 3)) { panvk_per_arch(cmd_close_batch)(cmdbuf); + panvk_cmd_preload_fb_after_batch_split(cmdbuf); panvk_cmd_open_batch(cmdbuf); batch = cmdbuf->state.batch; } @@ -802,6 +816,7 @@ panvk_per_arch(CmdPipelineBarrier)(VkCommandBuffer commandBuffer, */ if (cmdbuf->state.batch) { panvk_per_arch(cmd_close_batch)(cmdbuf); + panvk_cmd_preload_fb_after_batch_split(cmdbuf); panvk_cmd_open_batch(cmdbuf); } } @@ -833,6 +848,7 @@ panvk_add_set_event_operation(struct panvk_cmd_buffer *cmdbuf, struct panvk_event_op, op); panvk_per_arch(cmd_close_batch)(cmdbuf); + panvk_cmd_preload_fb_after_batch_split(cmdbuf); panvk_cmd_open_batch(cmdbuf); } } @@ -859,6 +875,7 @@ panvk_add_wait_event_operation(struct panvk_cmd_buffer *cmdbuf, if (cmdbuf->state.batch->fragment_job || cmdbuf->state.batch->scoreboard.first_job) { panvk_per_arch(cmd_close_batch)(cmdbuf); + panvk_cmd_preload_fb_after_batch_split(cmdbuf); panvk_cmd_open_batch(cmdbuf); } util_dynarray_append(&cmdbuf->state.batch->event_ops, diff --git a/src/panfrost/vulkan/panvk_vX_cs.c b/src/panfrost/vulkan/panvk_vX_cs.c index b5dfe8ec768..e3ae39ee3c4 100644 --- a/src/panfrost/vulkan/panvk_vX_cs.c +++ b/src/panfrost/vulkan/panvk_vX_cs.c @@ -858,65 +858,3 @@ panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev, } #endif } - -unsigned -panvk_per_arch(emit_fb)(const struct panvk_device *dev, - const struct panvk_batch *batch, - const struct panvk_subpass *subpass, - const struct panvk_framebuffer *fb, - const struct panvk_clear_value *clears, - const struct pan_tls_info *tlsinfo, - const struct pan_tiler_context *tilerctx, - void *desc) -{ - const struct panfrost_device *pdev = &dev->physical_device->pdev; - struct panvk_image_view *view; - bool crc_valid[8] = { false }; - struct pan_fb_info fbinfo = { - .width = fb->width, - .height = fb->height, - .extent.maxx = fb->width - 1, - .extent.maxy = fb->height - 1, - .nr_samples = 1, - }; - - for (unsigned cb = 0; cb < subpass->color_count; cb++) { - int idx = subpass->color_attachments[cb].idx; - view = idx != VK_ATTACHMENT_UNUSED ? - fb->attachments[idx].iview : NULL; - if (!view) - continue; - fbinfo.rts[cb].view = &view->pview; - fbinfo.rts[cb].clear = subpass->color_attachments[idx].clear; - fbinfo.rts[cb].crc_valid = &crc_valid[cb]; - - memcpy(fbinfo.rts[cb].clear_value, clears[idx].color, - sizeof(fbinfo.rts[cb].clear_value)); - fbinfo.nr_samples = - MAX2(fbinfo.nr_samples, view->pview.image->layout.nr_samples); - } - - if (subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED) { - view = fb->attachments[subpass->zs_attachment.idx].iview; - const struct util_format_description *fdesc = - util_format_description(view->pview.format); - - fbinfo.nr_samples = - MAX2(fbinfo.nr_samples, view->pview.image->layout.nr_samples); - - if (util_format_has_depth(fdesc)) { - fbinfo.zs.clear.z = subpass->zs_attachment.clear; - fbinfo.zs.clear_value.depth = clears[subpass->zs_attachment.idx].depth; - fbinfo.zs.view.zs = &view->pview; - } - - if (util_format_has_depth(fdesc)) { - fbinfo.zs.clear.s = subpass->zs_attachment.clear; - fbinfo.zs.clear_value.stencil = clears[subpass->zs_attachment.idx].depth; - if (!fbinfo.zs.view.zs) - fbinfo.zs.view.s = &view->pview; - } - } - - return GENX(pan_emit_fbd)(pdev, &fbinfo, tlsinfo, tilerctx, desc); -} diff --git a/src/panfrost/vulkan/panvk_vX_cs.h b/src/panfrost/vulkan/panvk_vX_cs.h index b8933ce79b2..3158c767645 100644 --- a/src/panfrost/vulkan/panvk_vX_cs.h +++ b/src/panfrost/vulkan/panvk_vX_cs.h @@ -125,13 +125,3 @@ void panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev, unsigned width, unsigned height, const struct panfrost_ptr *descs); - -unsigned -panvk_per_arch(emit_fb)(const struct panvk_device *dev, - const struct panvk_batch *batch, - const struct panvk_subpass *subpass, - const struct panvk_framebuffer *fb, - const struct panvk_clear_value *clears, - const struct pan_tls_info *tlsinfo, - const struct pan_tiler_context *tilerctx, - void *desc);