mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-29 10:10:09 +01:00
tu: Add support for "unresolve" ops
These ops replicate the single-sampled source attachment to the multi-sampled destination attachment before the start of a subpass. This is the new hardware feature for VK_EXT_multisample_render_to_single_sampled, and the actual implementation of the extension emulates everything on top of these. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37919>
This commit is contained in:
parent
7542d5068c
commit
9c5012b03c
5 changed files with 138 additions and 12 deletions
|
|
@ -1253,10 +1253,11 @@ r3d_src_stencil(struct tu_cmd_buffer *cmd,
|
|||
}
|
||||
|
||||
static void
|
||||
r3d_src_gmem_load(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
const struct tu_image_view *iview,
|
||||
uint32_t layer)
|
||||
r3d_src_load(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
const struct tu_image_view *iview,
|
||||
uint32_t layer,
|
||||
bool override_swap)
|
||||
{
|
||||
uint32_t desc[A6XX_TEX_CONST_DWORDS];
|
||||
|
||||
|
|
@ -1281,8 +1282,9 @@ r3d_src_gmem_load(struct tu_cmd_buffer *cmd,
|
|||
* GMEM, so we need to fixup the swizzle and swap.
|
||||
*/
|
||||
desc[0] &= ~(A6XX_TEX_CONST_0_SWIZ_X__MASK | A6XX_TEX_CONST_0_SWIZ_Y__MASK |
|
||||
A6XX_TEX_CONST_0_SWIZ_Z__MASK | A6XX_TEX_CONST_0_SWIZ_W__MASK |
|
||||
A6XX_TEX_CONST_0_SWAP__MASK);
|
||||
A6XX_TEX_CONST_0_SWIZ_Z__MASK | A6XX_TEX_CONST_0_SWIZ_W__MASK);
|
||||
if (override_swap)
|
||||
desc[0] &= ~A6XX_TEX_CONST_0_SWAP__MASK;
|
||||
desc[0] |= A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) |
|
||||
A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_Y) |
|
||||
A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_Z) |
|
||||
|
|
@ -1294,6 +1296,24 @@ r3d_src_gmem_load(struct tu_cmd_buffer *cmd,
|
|||
VK_FILTER_NEAREST);
|
||||
}
|
||||
|
||||
static void
|
||||
r3d_src_gmem_load(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
const struct tu_image_view *iview,
|
||||
uint32_t layer)
|
||||
{
|
||||
r3d_src_load(cmd, cs, iview, layer, true);
|
||||
}
|
||||
|
||||
static void
|
||||
r3d_src_sysmem_load(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
const struct tu_image_view *iview,
|
||||
uint32_t layer)
|
||||
{
|
||||
r3d_src_load(cmd, cs, iview, layer, false);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
r3d_src_gmem(struct tu_cmd_buffer *cmd,
|
||||
|
|
@ -3576,6 +3596,11 @@ resolve_sysmem(struct tu_cmd_buffer *cmd,
|
|||
{
|
||||
const struct blit_ops *ops = &r2d_ops<CHIP>;
|
||||
|
||||
/* A2D does not support "unresolve". */
|
||||
if (dst->image->layout[0].nr_samples > 1) {
|
||||
ops = &r3d_ops<CHIP>;
|
||||
}
|
||||
|
||||
trace_start_sysmem_resolve(&cmd->rp_trace, cs, cmd, vk_dst_format);
|
||||
|
||||
enum pipe_format src_format = vk_format_to_pipe_format(vk_src_format);
|
||||
|
|
@ -3595,7 +3620,11 @@ resolve_sysmem(struct tu_cmd_buffer *cmd,
|
|||
ops->src_stencil(cmd, cs, src, i, VK_FILTER_NEAREST);
|
||||
}
|
||||
} else {
|
||||
ops->src(cmd, cs, &src->view, i, VK_FILTER_NEAREST, dst_format);
|
||||
if (ops == &r3d_ops<CHIP>) {
|
||||
r3d_src_sysmem_load(cmd, cs, src, i);
|
||||
} else {
|
||||
ops->src(cmd, cs, &src->view, i, VK_FILTER_NEAREST, dst_format);
|
||||
}
|
||||
}
|
||||
|
||||
if (dst_separate_ds) {
|
||||
|
|
@ -5081,12 +5110,13 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
struct tu_cs *cs,
|
||||
struct tu_resolve_group *resolve_group,
|
||||
uint32_t a,
|
||||
uint32_t gmem_a,
|
||||
bool cond_exec_allowed,
|
||||
bool force_load)
|
||||
{
|
||||
const struct tu_image_view *iview = cmd->state.attachments[a];
|
||||
const struct tu_render_pass_attachment *attachment =
|
||||
&cmd->state.pass->attachments[a];
|
||||
&cmd->state.pass->attachments[gmem_a];
|
||||
|
||||
bool load_common = attachment->load || force_load;
|
||||
bool load_stencil =
|
||||
|
|
@ -5110,7 +5140,10 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
tu_begin_load_store_cond_exec(cmd, cs, true);
|
||||
|
||||
if (TU_DEBUG(3D_LOAD) ||
|
||||
cmd->state.pass->has_fdm) {
|
||||
cmd->state.pass->has_fdm ||
|
||||
/* Replicating unresolve seems to not work and the blob never uses it.
|
||||
*/
|
||||
(a != gmem_a)) {
|
||||
if (load_common || load_stencil)
|
||||
tu_disable_draw_states(cmd, cs);
|
||||
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
struct tu_cs *cs,
|
||||
struct tu_resolve_group *resolve_group,
|
||||
uint32_t a,
|
||||
uint32_t gmem_a,
|
||||
bool cond_exec_allowed,
|
||||
bool force_load);
|
||||
|
||||
|
|
|
|||
|
|
@ -1521,6 +1521,62 @@ tu6_emit_sysmem_resolves(struct tu_cmd_buffer *cmd,
|
|||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
tu6_emit_sysmem_unresolve(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
uint32_t layer_mask,
|
||||
uint32_t a,
|
||||
uint32_t gmem_a)
|
||||
{
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
const struct tu_image_view *src = cmd->state.attachments[a];
|
||||
const struct tu_image_view *dst = cmd->state.attachments[gmem_a];
|
||||
|
||||
tu_resolve_sysmem<CHIP>(cmd, cs, src, dst, layer_mask, fb->layers, &cmd->state.render_area);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
tu6_emit_sysmem_unresolves(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
const struct tu_subpass *subpass)
|
||||
{
|
||||
if (subpass->unresolve_count) {
|
||||
/* Similar to above, we need to explicitly flush afterwards to keep this
|
||||
* in sync with draw commands. However we also don't currently insert
|
||||
* dependencies when a resolve is followed by an unresolve so we also
|
||||
* need to manually flush for that case.
|
||||
*/
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_COLOR);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CACHE_INVALIDATE);
|
||||
|
||||
/* Wait for the flushes to land before using the 2D engine */
|
||||
tu_cs_emit_wfi(cs);
|
||||
|
||||
bool unresolve_ds = false;
|
||||
for (unsigned i = 0; i < subpass->unresolve_count; i++) {
|
||||
uint32_t a = subpass->unresolve_attachments[i].attachment;
|
||||
if (a == VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
|
||||
if (vk_format_is_depth_or_stencil(cmd->state.pass->attachments[a].format))
|
||||
unresolve_ds = true;
|
||||
|
||||
uint32_t gmem_a = tu_subpass_get_attachment_to_unresolve(subpass, i);
|
||||
|
||||
tu6_emit_sysmem_unresolve<CHIP>(cmd, cs, subpass->multiview_mask, a, gmem_a);
|
||||
}
|
||||
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_COLOR);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_INVALIDATE_COLOR);
|
||||
if (unresolve_ds) {
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_DEPTH);
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_INVALIDATE_DEPTH);
|
||||
}
|
||||
tu_cs_emit_wfi(cs);
|
||||
}
|
||||
}
|
||||
template <chip CHIP>
|
||||
static void
|
||||
tu6_emit_gmem_resolves(struct tu_cmd_buffer *cmd,
|
||||
|
|
@ -1552,7 +1608,7 @@ tu6_emit_gmem_resolves(struct tu_cmd_buffer *cmd,
|
|||
"TODO: missing GMEM->GMEM resolve path\n");
|
||||
if (CHIP >= A7XX)
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_CCU_CLEAN_BLIT_CACHE);
|
||||
tu_load_gmem_attachment<CHIP>(cmd, cs, resolve_group, a, false, true);
|
||||
tu_load_gmem_attachment<CHIP>(cmd, cs, resolve_group, a, a, false, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -5607,11 +5663,27 @@ tu_emit_subpass_begin_gmem(struct tu_cmd_buffer *cmd, struct tu_resolve_group *r
|
|||
tu6_emit_blit_scissor(cmd, cs, true, false);
|
||||
emitted_scissor = true;
|
||||
}
|
||||
tu_load_gmem_attachment<CHIP>(cmd, cs, resolve_group, i,
|
||||
tu_load_gmem_attachment<CHIP>(cmd, cs, resolve_group, i, i,
|
||||
cond_load_allowed, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Emit unresolves that replicate single-sampled attachments into
|
||||
* multisampled GMEM attachments.
|
||||
*/
|
||||
for (uint32_t i = 0; i < cmd->state.subpass->unresolve_count; ++i) {
|
||||
uint32_t a = cmd->state.subpass->unresolve_attachments[i].attachment;
|
||||
if (a == VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
|
||||
uint32_t gmem_a =
|
||||
tu_subpass_get_attachment_to_unresolve(cmd->state.subpass, i);
|
||||
|
||||
tu_load_gmem_attachment<CHIP>(cmd, cs, resolve_group, a, gmem_a,
|
||||
cond_load_allowed, true);
|
||||
}
|
||||
|
||||
if (!cmd->device->physical_device->info->a7xx.has_generic_clear) {
|
||||
/* Emit gmem clears that are first used in this subpass. */
|
||||
emitted_scissor = false;
|
||||
|
|
@ -5640,18 +5712,23 @@ template <chip CHIP>
|
|||
static void
|
||||
tu_emit_subpass_begin_sysmem(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
if (cmd->device->physical_device->info->a7xx.has_generic_clear)
|
||||
if (cmd->device->physical_device->info->a7xx.has_generic_clear &&
|
||||
!cmd->state.subpass->unresolve_count)
|
||||
return;
|
||||
|
||||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
uint32_t subpass_idx = cmd->state.subpass - cmd->state.pass->subpasses;
|
||||
|
||||
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
|
||||
|
||||
tu6_emit_sysmem_unresolves<CHIP>(cmd, cs, cmd->state.subpass);
|
||||
|
||||
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i) {
|
||||
struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[i];
|
||||
if (att->clear_mask && att->first_subpass_idx == subpass_idx)
|
||||
tu_clear_sysmem_attachment<CHIP>(cmd, cs, i);
|
||||
}
|
||||
|
||||
tu_cond_exec_end(cs); /* sysmem */
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1491,3 +1491,13 @@ tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t
|
|||
|
||||
return subpass->color_attachments[index].attachment;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
tu_subpass_get_attachment_to_unresolve(const struct tu_subpass *subpass, uint32_t index)
|
||||
{
|
||||
if (index == subpass->color_count &&
|
||||
index == (subpass->unresolve_count - 1))
|
||||
return subpass->depth_stencil_attachment.attachment;
|
||||
|
||||
return subpass->color_attachments[index].attachment;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ struct tu_subpass
|
|||
uint32_t input_count;
|
||||
uint32_t color_count;
|
||||
uint32_t resolve_count;
|
||||
uint32_t unresolve_count;
|
||||
bool resolve_depth_stencil;
|
||||
|
||||
bool legacy_dithering_enabled;
|
||||
|
|
@ -64,6 +65,7 @@ struct tu_subpass
|
|||
struct tu_subpass_attachment *input_attachments;
|
||||
struct tu_subpass_attachment *color_attachments;
|
||||
struct tu_subpass_attachment *resolve_attachments;
|
||||
struct tu_subpass_attachment *unresolve_attachments;
|
||||
struct tu_subpass_attachment depth_stencil_attachment;
|
||||
|
||||
uint32_t fsr_attachment;
|
||||
|
|
@ -157,4 +159,7 @@ void tu_setup_dynamic_inheritance(struct tu_cmd_buffer *cmd_buffer,
|
|||
uint32_t
|
||||
tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index);
|
||||
|
||||
uint32_t
|
||||
tu_subpass_get_attachment_to_unresolve(const struct tu_subpass *subpass, uint32_t index);
|
||||
|
||||
#endif /* TU_PASS_H */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue