panvk: Preload FB attachments when required

There are at least three situations where we need to preload FBs:

1. The attachment is flagged VK_ATTACHMENT_LOAD_OP_LOAD and has not been
   accessed in previous subpasses

2. The batch is implicitly split (e.g. too many jobs queued to the
   batch, wait/set events queued, ...)

3. The attachment has been written by a previous subpass

With those changes, we can get rid of panvk_emit_fb() and call
pan_emit_fbd() directly (fb_info is initialized when starting a subpass
and updated when an implicit split happens).

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12095>
This commit is contained in:
Boris Brezillon 2021-09-06 16:06:49 +02:00
parent 3166f4ebea
commit 165c26b9e1
6 changed files with 152 additions and 88 deletions

View file

@ -341,6 +341,75 @@ panvk_cmd_prepare_clear_values(struct panvk_cmd_buffer *cmdbuf,
}
}
void
panvk_cmd_fb_info_set_subpass(struct panvk_cmd_buffer *cmdbuf)
{
const struct panvk_subpass *subpass = cmdbuf->state.subpass;
struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
const struct panvk_framebuffer *fb = cmdbuf->state.framebuffer;
const struct panvk_clear_value *clears = cmdbuf->state.clear;
struct panvk_image_view *view;
fbinfo->nr_samples = 1;
fbinfo->rt_count = subpass->color_count;
memset(&fbinfo->bifrost.pre_post.dcds, 0, sizeof(fbinfo->bifrost.pre_post.dcds));
for (unsigned cb = 0; cb < subpass->color_count; cb++) {
int idx = subpass->color_attachments[cb].idx;
view = idx != VK_ATTACHMENT_UNUSED ?
fb->attachments[idx].iview : NULL;
if (!view)
continue;
fbinfo->rts[cb].view = &view->pview;
fbinfo->rts[cb].clear = subpass->color_attachments[cb].clear;
fbinfo->rts[cb].preload = subpass->color_attachments[cb].preload;
fbinfo->rts[cb].crc_valid = &cmdbuf->state.fb.crc_valid[cb];
memcpy(fbinfo->rts[cb].clear_value, clears[idx].color,
sizeof(fbinfo->rts[cb].clear_value));
fbinfo->nr_samples =
MAX2(fbinfo->nr_samples, view->pview.image->layout.nr_samples);
}
if (subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED) {
view = fb->attachments[subpass->zs_attachment.idx].iview;
const struct util_format_description *fdesc =
util_format_description(view->pview.format);
fbinfo->nr_samples =
MAX2(fbinfo->nr_samples, view->pview.image->layout.nr_samples);
if (util_format_has_depth(fdesc)) {
fbinfo->zs.clear.z = subpass->zs_attachment.clear;
fbinfo->zs.clear_value.depth = clears[subpass->zs_attachment.idx].depth;
fbinfo->zs.view.zs = &view->pview;
}
if (util_format_has_stencil(fdesc)) {
fbinfo->zs.clear.s = subpass->zs_attachment.clear;
fbinfo->zs.clear_value.stencil = clears[subpass->zs_attachment.idx].depth;
if (!fbinfo->zs.view.zs)
fbinfo->zs.view.s = &view->pview;
}
}
}
void
panvk_cmd_fb_info_init(struct panvk_cmd_buffer *cmdbuf)
{
struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
const struct panvk_framebuffer *fb = cmdbuf->state.framebuffer;
memset(cmdbuf->state.fb.crc_valid, 0, sizeof(cmdbuf->state.fb.crc_valid));
*fbinfo = (struct pan_fb_info) {
.width = fb->width,
.height = fb->height,
.extent.maxx = fb->width - 1,
.extent.maxy = fb->height - 1,
};
}
void
panvk_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
const VkRenderPassBeginInfo *pRenderPassBegin,
@ -363,9 +432,10 @@ panvk_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
sizeof(*cmdbuf->state.clear) *
pRenderPassBegin->clearValueCount, 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
assert(pRenderPassBegin->clearValueCount == pass->attachment_count);
panvk_cmd_prepare_clear_values(cmdbuf, pRenderPassBegin->pClearValues);
memset(&cmdbuf->state.compute, 0, sizeof(cmdbuf->state.compute));
panvk_cmd_fb_info_init(cmdbuf);
panvk_cmd_fb_info_set_subpass(cmdbuf);
}
void
@ -381,6 +451,29 @@ panvk_CmdBeginRenderPass(VkCommandBuffer cmd,
return panvk_CmdBeginRenderPass2(cmd, info, &subpass_info);
}
void
panvk_cmd_preload_fb_after_batch_split(struct panvk_cmd_buffer *cmdbuf)
{
for (unsigned i = 0; i < cmdbuf->state.fb.info.rt_count; i++) {
if (cmdbuf->state.fb.info.rts[i].view) {
cmdbuf->state.fb.info.rts[i].clear = false;
cmdbuf->state.fb.info.rts[i].preload = true;
}
}
if (cmdbuf->state.fb.info.zs.view.zs) {
cmdbuf->state.fb.info.zs.clear.z = false;
cmdbuf->state.fb.info.zs.preload.z = true;
}
if (cmdbuf->state.fb.info.zs.view.s ||
(cmdbuf->state.fb.info.zs.view.zs &&
util_format_is_depth_and_stencil(cmdbuf->state.fb.info.zs.view.zs->format))) {
cmdbuf->state.fb.info.zs.clear.s = false;
cmdbuf->state.fb.info.zs.preload.s = true;
}
}
void
panvk_cmd_open_batch(struct panvk_cmd_buffer *cmdbuf)
{

View file

@ -79,7 +79,7 @@ panvk_CreateRenderPass2(VkDevice _device,
att->final_layout = pCreateInfo->pAttachments[i].finalLayout;
att->store_op = pCreateInfo->pAttachments[i].storeOp;
att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
att->clear_subpass = ~0;
att->first_used_in_subpass = ~0;
}
uint32_t subpass_attachment_count = 0;
@ -144,9 +144,14 @@ panvk_CreateRenderPass2(VkDevice _device,
if (idx != VK_ATTACHMENT_UNUSED) {
pass->attachments[idx].view_mask |= subpass->view_mask;
if (pass->attachments[idx].clear_subpass == ~0) {
pass->attachments[idx].clear_subpass = i;
subpass->color_attachments[j].clear = true;
if (pass->attachments[idx].first_used_in_subpass == ~0) {
pass->attachments[idx].first_used_in_subpass = i;
if (pass->attachments[idx].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
subpass->color_attachments[j].clear = true;
else if (pass->attachments[idx].load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
subpass->color_attachments[j].preload = true;
} else {
subpass->color_attachments[j].preload = true;
}
}
}
@ -176,9 +181,15 @@ panvk_CreateRenderPass2(VkDevice _device,
if (idx != VK_ATTACHMENT_UNUSED) {
subpass->zs_attachment.layout = desc->pDepthStencilAttachment->layout;
pass->attachments[idx].view_mask |= subpass->view_mask;
if (pass->attachments[idx].clear_subpass == ~0) {
pass->attachments[idx].clear_subpass = i;
subpass->zs_attachment.clear = true;
if (pass->attachments[idx].first_used_in_subpass == ~0) {
pass->attachments[idx].first_used_in_subpass = i;
if (pass->attachments[idx].load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
subpass->zs_attachment.clear = true;
else if (pass->attachments[idx].load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
subpass->zs_attachment.preload = true;
} else {
subpass->zs_attachment.preload = true;
}
}
}

View file

@ -585,6 +585,11 @@ struct panvk_cmd_state {
} s_front, s_back;
} zs;
struct {
struct pan_fb_info info;
bool crc_valid[MAX_RTS];
} fb;
const struct panvk_render_pass *pass;
const struct panvk_subpass *subpass;
const struct panvk_framebuffer *framebuffer;
@ -649,6 +654,15 @@ struct panvk_cmd_buffer {
void
panvk_cmd_open_batch(struct panvk_cmd_buffer *cmdbuf);
void
panvk_cmd_fb_info_set_subpass(struct panvk_cmd_buffer *cmdbuf);
void
panvk_cmd_fb_info_init(struct panvk_cmd_buffer *cmdbuf);
void
panvk_cmd_preload_fb_after_batch_split(struct panvk_cmd_buffer *cmdbuf);
void
panvk_pack_color(struct panvk_clear_value *out,
const VkClearColorValue *in,
@ -918,6 +932,7 @@ struct panvk_subpass_attachment {
uint32_t idx;
VkImageLayout layout;
bool clear;
bool preload;
};
struct panvk_subpass {
@ -943,7 +958,7 @@ struct panvk_render_pass_attachment {
VkImageLayout initial_layout;
VkImageLayout final_layout;
unsigned view_mask;
unsigned clear_subpass;
unsigned first_used_in_subpass;
};
struct panvk_render_pass {

View file

@ -147,6 +147,18 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
list_addtail(&cmdbuf->state.batch->node, &cmdbuf->batches);
if (batch->scoreboard.first_tiler) {
struct panfrost_ptr preload_jobs[2];
unsigned num_preload_jobs =
GENX(pan_preload_fb)(&cmdbuf->desc_pool.base, &batch->scoreboard,
&cmdbuf->state.fb.info,
PAN_ARCH >= 6 ? batch->tls.gpu : batch->fb.desc.gpu,
PAN_ARCH >= 6 ? batch->tiler.descs.gpu : 0,
preload_jobs);
for (unsigned i = 0; i < num_preload_jobs; i++)
util_dynarray_append(&batch->jobs, void *, preload_jobs[i].cpu);
}
struct pan_tls_info tlsinfo = { 0 };
if (cmdbuf->state.pipeline) {
@ -195,13 +207,8 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
#endif
cmdbuf->state.batch->fb.desc.gpu |=
panvk_per_arch(emit_fb)(cmdbuf->device,
cmdbuf->state.batch,
cmdbuf->state.subpass,
cmdbuf->state.framebuffer,
cmdbuf->state.clear,
&tlsinfo, &cmdbuf->state.batch->tiler.ctx,
fbd);
GENX(pan_emit_fbd)(pdev, &cmdbuf->state.fb.info, &tlsinfo,
&cmdbuf->state.batch->tiler.ctx, fbd);
#if PAN_ARCH <= 5
panvk_copy_fb_desc(cmdbuf, tmp_fbd);
@ -226,6 +233,7 @@ panvk_per_arch(CmdNextSubpass2)(VkCommandBuffer commandBuffer,
panvk_per_arch(cmd_close_batch)(cmdbuf);
cmdbuf->state.subpass++;
panvk_cmd_fb_info_set_subpass(cmdbuf);
panvk_cmd_open_batch(cmdbuf);
memset(&cmdbuf->state.compute, 0, sizeof(cmdbuf->state.compute));
}
@ -265,6 +273,11 @@ panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf)
/* Tag the pointer */
batch->fb.desc.gpu |= tags;
#if PAN_ARCH >= 6
memset(&cmdbuf->state.fb.info.bifrost.pre_post.dcds, 0,
sizeof(cmdbuf->state.fb.info.bifrost.pre_post.dcds));
#endif
}
void
@ -685,6 +698,7 @@ panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer,
*/
if (batch->scoreboard.job_index >= (UINT16_MAX - 3)) {
panvk_per_arch(cmd_close_batch)(cmdbuf);
panvk_cmd_preload_fb_after_batch_split(cmdbuf);
panvk_cmd_open_batch(cmdbuf);
batch = cmdbuf->state.batch;
}
@ -802,6 +816,7 @@ panvk_per_arch(CmdPipelineBarrier)(VkCommandBuffer commandBuffer,
*/
if (cmdbuf->state.batch) {
panvk_per_arch(cmd_close_batch)(cmdbuf);
panvk_cmd_preload_fb_after_batch_split(cmdbuf);
panvk_cmd_open_batch(cmdbuf);
}
}
@ -833,6 +848,7 @@ panvk_add_set_event_operation(struct panvk_cmd_buffer *cmdbuf,
struct panvk_event_op,
op);
panvk_per_arch(cmd_close_batch)(cmdbuf);
panvk_cmd_preload_fb_after_batch_split(cmdbuf);
panvk_cmd_open_batch(cmdbuf);
}
}
@ -859,6 +875,7 @@ panvk_add_wait_event_operation(struct panvk_cmd_buffer *cmdbuf,
if (cmdbuf->state.batch->fragment_job ||
cmdbuf->state.batch->scoreboard.first_job) {
panvk_per_arch(cmd_close_batch)(cmdbuf);
panvk_cmd_preload_fb_after_batch_split(cmdbuf);
panvk_cmd_open_batch(cmdbuf);
}
util_dynarray_append(&cmdbuf->state.batch->event_ops,

View file

@ -858,65 +858,3 @@ panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev,
}
#endif
}
unsigned
panvk_per_arch(emit_fb)(const struct panvk_device *dev,
const struct panvk_batch *batch,
const struct panvk_subpass *subpass,
const struct panvk_framebuffer *fb,
const struct panvk_clear_value *clears,
const struct pan_tls_info *tlsinfo,
const struct pan_tiler_context *tilerctx,
void *desc)
{
const struct panfrost_device *pdev = &dev->physical_device->pdev;
struct panvk_image_view *view;
bool crc_valid[8] = { false };
struct pan_fb_info fbinfo = {
.width = fb->width,
.height = fb->height,
.extent.maxx = fb->width - 1,
.extent.maxy = fb->height - 1,
.nr_samples = 1,
};
for (unsigned cb = 0; cb < subpass->color_count; cb++) {
int idx = subpass->color_attachments[cb].idx;
view = idx != VK_ATTACHMENT_UNUSED ?
fb->attachments[idx].iview : NULL;
if (!view)
continue;
fbinfo.rts[cb].view = &view->pview;
fbinfo.rts[cb].clear = subpass->color_attachments[idx].clear;
fbinfo.rts[cb].crc_valid = &crc_valid[cb];
memcpy(fbinfo.rts[cb].clear_value, clears[idx].color,
sizeof(fbinfo.rts[cb].clear_value));
fbinfo.nr_samples =
MAX2(fbinfo.nr_samples, view->pview.image->layout.nr_samples);
}
if (subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED) {
view = fb->attachments[subpass->zs_attachment.idx].iview;
const struct util_format_description *fdesc =
util_format_description(view->pview.format);
fbinfo.nr_samples =
MAX2(fbinfo.nr_samples, view->pview.image->layout.nr_samples);
if (util_format_has_depth(fdesc)) {
fbinfo.zs.clear.z = subpass->zs_attachment.clear;
fbinfo.zs.clear_value.depth = clears[subpass->zs_attachment.idx].depth;
fbinfo.zs.view.zs = &view->pview;
}
if (util_format_has_depth(fdesc)) {
fbinfo.zs.clear.s = subpass->zs_attachment.clear;
fbinfo.zs.clear_value.stencil = clears[subpass->zs_attachment.idx].depth;
if (!fbinfo.zs.view.zs)
fbinfo.zs.view.s = &view->pview;
}
}
return GENX(pan_emit_fbd)(pdev, &fbinfo, tlsinfo, tilerctx, desc);
}

View file

@ -125,13 +125,3 @@ void
panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev,
unsigned width, unsigned height,
const struct panfrost_ptr *descs);
unsigned
panvk_per_arch(emit_fb)(const struct panvk_device *dev,
const struct panvk_batch *batch,
const struct panvk_subpass *subpass,
const struct panvk_framebuffer *fb,
const struct panvk_clear_value *clears,
const struct pan_tls_info *tlsinfo,
const struct pan_tiler_context *tilerctx,
void *desc);