tu/a7xx: Use BLIT_EVENT_STORE_AND_CLEAR when appropriate

BLIT_EVENT_STORE_AND_CLEAR presumably swallows the BLIT_EVENT_CLEAR
at the start of the next bin. Should be faster than separate events.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30270>
This commit is contained in:
Danylo Piliaiev 2024-08-13 15:59:30 +02:00 committed by Marge Bot
parent 80a50269a2
commit a15466187c
3 changed files with 60 additions and 15 deletions

View file

@ -3957,11 +3957,46 @@ tu_emit_blit(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
const struct tu_image_view *iview,
const struct tu_render_pass_attachment *attachment,
const VkClearValue *clear_value,
enum a6xx_blit_event_type blit_event_type,
bool separate_stencil)
{
assert(blit_event_type != BLIT_EVENT_CLEAR);
event_blit_setup(cs, attachment, blit_event_type, 0x0);
uint32_t clear_mask = 0;
/* BLIT_EVENT_STORE_AND_CLEAR would presumably swallow the
* BLIT_EVENT_CLEAR at the start of a renderpass, and be more efficient.
*/
if (blit_event_type == BLIT_EVENT_STORE && clear_value &&
attachment->clear_mask &&
use_generic_clear_for_image_clear(cmd, iview->image)) {
blit_event_type = BLIT_EVENT_STORE_AND_CLEAR;
enum pipe_format format = vk_format_to_pipe_format(attachment->format);
VkImageAspectFlags aspect_mask = attachment->clear_mask;
if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
if (separate_stencil)
aspect_mask = VK_IMAGE_ASPECT_STENCIL_BIT;
else
aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
}
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
if (separate_stencil)
format = PIPE_FORMAT_S8_UINT;
else
format = PIPE_FORMAT_Z32_FLOAT;
}
clear_mask = aspect_write_mask_generic_clear(format, aspect_mask);
uint32_t clear_vals[4] = {};
pack_blit_event_clear_value(clear_value, format, clear_vals);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
tu_cs_emit_array(cs, clear_vals, 4);
}
event_blit_setup(cs, attachment, blit_event_type, clear_mask);
for_each_layer(i, attachment->clear_views, cmd->state.framebuffer->layers) {
event_blit_dst_view blt_view = blt_view_from_tu_view(iview, i);
@ -4194,10 +4229,10 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
load_3d_blit<CHIP>(cmd, cs, iview, attachment, true);
} else {
if (load_common)
tu_emit_blit<CHIP>(cmd, cs, iview, attachment, BLIT_EVENT_LOAD, false);
tu_emit_blit<CHIP>(cmd, cs, iview, attachment, NULL, BLIT_EVENT_LOAD, false);
if (load_stencil)
tu_emit_blit<CHIP>(cmd, cs, iview, attachment, BLIT_EVENT_LOAD, true);
tu_emit_blit<CHIP>(cmd, cs, iview, attachment, NULL, BLIT_EVENT_LOAD, true);
}
if (cond_exec)
@ -4473,6 +4508,10 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a];
const struct tu_image_view *iview = cmd->state.attachments[a];
struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a];
const VkClearValue *clear_value = &cmd->state.clear_values[gmem_a];
bool resolve = a != gmem_a;
if (resolve)
clear_value = NULL;
if (!dst->store && !dst->store_stencil)
return;
@ -4513,9 +4552,9 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
/* use fast path when render area is aligned, except for unsupported resolve cases */
if (use_fast_path) {
if (store_common)
tu_emit_blit<CHIP>(cmd, cs, iview, src, BLIT_EVENT_STORE, false);
tu_emit_blit<CHIP>(cmd, cs, iview, src, clear_value, BLIT_EVENT_STORE, false);
if (store_separate_stencil)
tu_emit_blit<CHIP>(cmd, cs, iview, src, BLIT_EVENT_STORE, true);
tu_emit_blit<CHIP>(cmd, cs, iview, src, clear_value, BLIT_EVENT_STORE, true);
if (cond_exec) {
tu_end_load_store_cond_exec(cmd, cs, false);

View file

@ -1184,16 +1184,9 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu6_emit_blit_scissor(cmd, cs, true);
for (uint32_t a = 0; a < pass->attachment_count; ++a) {
if (pass->attachments[a].gmem) {
const bool cond_exec_allowed = cmd->state.tiling->binning_possible &&
cmd->state.pass->has_cond_load_store;
tu_store_gmem_attachment<CHIP>(cmd, cs, a, a,
fb->layers, subpass->multiview_mask,
cond_exec_allowed);
}
}
/* Resolve should happen before store in case BLIT_EVENT_STORE_AND_CLEAR is
* used for a store.
*/
if (subpass->resolve_attachments) {
for (unsigned i = 0; i < subpass->resolve_count; i++) {
uint32_t a = subpass->resolve_attachments[i].attachment;
@ -1205,6 +1198,16 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
}
}
for (uint32_t a = 0; a < pass->attachment_count; ++a) {
if (pass->attachments[a].gmem) {
const bool cond_exec_allowed = cmd->state.tiling->binning_possible &&
cmd->state.pass->has_cond_load_store;
tu_store_gmem_attachment<CHIP>(cmd, cs, a, a,
fb->layers, subpass->multiview_mask,
cond_exec_allowed);
}
}
if (pass->has_fdm)
tu_cs_set_writeable(cs, false);
}
@ -4012,6 +4015,7 @@ tu_restore_suspended_pass(struct tu_cmd_buffer *cmd,
cmd->state.subpass = suspended->state.suspended_pass.subpass;
cmd->state.framebuffer = suspended->state.suspended_pass.framebuffer;
cmd->state.attachments = suspended->state.suspended_pass.attachments;
cmd->state.clear_values = suspended->state.suspended_pass.clear_values;
cmd->state.render_area = suspended->state.suspended_pass.render_area;
cmd->state.gmem_layout = suspended->state.suspended_pass.gmem_layout;
cmd->state.tiling = &cmd->state.framebuffer->tiling[cmd->state.gmem_layout];
@ -4636,6 +4640,7 @@ tu_CmdBeginRendering(VkCommandBuffer commandBuffer,
cmd->state.suspended_pass.framebuffer = cmd->state.framebuffer;
cmd->state.suspended_pass.render_area = cmd->state.render_area;
cmd->state.suspended_pass.attachments = cmd->state.attachments;
cmd->state.suspended_pass.clear_values = cmd->state.clear_values;
cmd->state.suspended_pass.gmem_layout = cmd->state.gmem_layout;
}

View file

@ -495,6 +495,7 @@ struct tu_cmd_state
enum tu_gmem_layout gmem_layout;
const struct tu_image_view **attachments;
VkClearValue *clear_values;
struct tu_lrz_state lrz;
} suspended_pass;