tu/lrz: Disable LRZ writes when draw doesn't write to all attachments

Not writing to color attachments in the same as blending from LRZ
viewpoint.

Though there is one case when we can avoid disabling LRZ writes,
when a renderpass starts with depth-only draw calls, or consists entirely
of them, but also has color attachments.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38034>
This commit is contained in:
Danylo Piliaiev 2025-10-23 15:54:22 +02:00 committed by Marge Bot
parent 7ee9835475
commit d1fe9a152e
6 changed files with 65 additions and 25 deletions

View file

@ -4692,8 +4692,9 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
tu_pipeline_update_rp_state(&cmd->state);
if (pipeline->lrz_blend.valid) {
if (cmd->state.blend_reads_dest != pipeline->lrz_blend.reads_dest) {
cmd->state.blend_reads_dest = pipeline->lrz_blend.reads_dest;
if (cmd->state.lrz_blend_status !=
pipeline->lrz_blend.lrz_blend_status) {
cmd->state.lrz_blend_status = pipeline->lrz_blend.lrz_blend_status;
cmd->state.dirty |= TU_CMD_DIRTY_LRZ;
}
}

View file

@ -551,7 +551,7 @@ struct tu_cmd_state
bool tessfactor_addr_set;
bool predication_active;
bool msaa_disable;
bool blend_reads_dest;
tu_lrz_blend_status lrz_blend_status;
bool disable_fs;
bool stencil_front_write;
bool stencil_back_write;

View file

@ -237,6 +237,8 @@ tu_lrz_init_state(struct tu_cmd_buffer *cmd,
cmd->state.lrz.valid = true;
cmd->state.lrz.valid_at_start = true;
cmd->state.lrz.disable_write_for_rp = false;
cmd->state.lrz.color_written_with_z_test = false;
cmd->state.lrz.has_lrz_write_with_skipped_color_writes = false;
cmd->state.lrz.prev_direction = TU_LRZ_UNKNOWN;
/* Be optimistic and unconditionally enable fast-clear in
* secondary cmdbufs and when reusing previous LRZ state.
@ -272,6 +274,8 @@ tu_lrz_init_secondary(struct tu_cmd_buffer *cmd,
cmd->state.lrz.valid = true;
cmd->state.lrz.valid_at_start = true;
cmd->state.lrz.disable_write_for_rp = false;
cmd->state.lrz.color_written_with_z_test = false;
cmd->state.lrz.has_lrz_write_with_skipped_color_writes = false;
cmd->state.lrz.prev_direction = TU_LRZ_UNKNOWN;
cmd->state.lrz.gpu_dir_tracking = has_gpu_tracking;
@ -737,7 +741,8 @@ tu_lrz_flush_valid_during_renderpass(struct tu_cmd_buffer *cmd,
/* Even if state is valid, we cannot be sure that secondary
* command buffer has the same sticky disable_write_for_rp.
*/
if (cmd->state.lrz.valid && !cmd->state.lrz.disable_write_for_rp)
if (cmd->state.lrz.valid && !cmd->state.lrz.disable_write_for_rp &&
!cmd->state.lrz.has_lrz_write_with_skipped_color_writes)
return;
tu6_write_lrz_reg(cmd, cs, A6XX_GRAS_LRZ_VIEW_INFO(
@ -783,7 +788,7 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd,
}
/* See comment in tu_pipeline about disabling LRZ write for blending. */
bool reads_dest = cmd->state.blend_reads_dest;
enum tu_lrz_blend_status blend_status = cmd->state.lrz_blend_status;
gras_lrz_cntl.enable = true;
gras_lrz_cntl.lrz_write =
@ -983,10 +988,28 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd,
* enable LRZ write. But this would cause early-z/lrz to discard
* fragments from draw A which should be visible due to draw B.
*/
if (reads_dest && z_write_enable && cmd->device->instance->conservative_lrz) {
if (blend_status == TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE &&
z_write_enable && cmd->device->instance->conservative_lrz) {
tu_lrz_disable_write_for_rp(cmd, "Depth write + blending");
}
/* This is a special case because we want to avoid disabling LRZ when a
* renderpass starts with depth-only draw calls, or consists entirely
* of them, but also has color attachments.
*/
if (blend_status == TU_LRZ_BLEND_ALL_COLOR_WRITES_SKIPPED &&
z_write_enable && cmd->device->instance->conservative_lrz) {
if (cmd->state.lrz.color_written_with_z_test) {
tu_lrz_disable_write_for_rp(cmd, "Depth write + no color writes");
}
cmd->state.lrz.has_lrz_write_with_skipped_color_writes = true;
}
if (z_test_enable &&
blend_status != TU_LRZ_BLEND_ALL_COLOR_WRITES_SKIPPED) {
cmd->state.lrz.color_written_with_z_test = true;
}
/* If the stencil test behavior depends on the result of the depth test, we
* have to skip LRZ for the rest of the RP for basically the same reason as
* the blending case above (LRZ testing enabled on previous draws may result

View file

@ -47,6 +47,10 @@ struct tu_lrz_state
/* Continue using old LRZ state (LOAD_OP_LOAD of depth) */
bool reuse_previous_state : 1;
bool gpu_dir_set : 1;
bool color_written_with_z_test : 1;
bool has_lrz_write_with_skipped_color_writes : 1;
enum tu_lrz_direction prev_direction;
};

View file

@ -2113,7 +2113,8 @@ tu_pipeline_builder_parse_libraries(struct tu_pipeline_builder *builder,
if (library->state &
VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) {
pipeline->output = library->base.output;
pipeline->lrz_blend.reads_dest |= library->base.lrz_blend.reads_dest;
pipeline->lrz_blend.lrz_blend_status =
library->base.lrz_blend.lrz_blend_status;
pipeline->lrz_blend.valid |= library->base.lrz_blend.valid;
}
@ -3028,28 +3029,34 @@ tu_emit_disable_fs(struct tu_disable_fs *disable_fs,
}
/* Return true if the blend state reads the color attachments. */
static bool
static tu_lrz_blend_status
tu6_calc_blend_lrz(const struct vk_color_blend_state *cb,
const struct vk_render_pass_state *rp)
{
if (cb->logic_op_enable && tu_logic_op_reads_dst((VkLogicOp)cb->logic_op))
return true;
return TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE;
bool has_enabled_attachments = false;
uint32_t written_color_attachments = 0;
uint32_t total_color_attachments = 0;
for (unsigned i = 0; i < cb->attachment_count; i++) {
if (rp->color_attachment_formats[i] == VK_FORMAT_UNDEFINED)
continue;
total_color_attachments++;
const struct vk_color_blend_attachment_state *att = &cb->attachments[i];
if ((cb->color_write_enables & (1u << i)) && att->write_mask != 0) {
has_enabled_attachments = true;
break;
written_color_attachments++;
}
}
/* There is no partial write if there is no writes at all. */
if (!has_enabled_attachments)
return false;
if (total_color_attachments == 0)
return TU_LRZ_BLEND_SAFE_FOR_LRZ;
if (written_color_attachments == 0)
return TU_LRZ_BLEND_ALL_COLOR_WRITES_SKIPPED;
if (written_color_attachments < cb->attachment_count)
return TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE;
for (unsigned i = 0; i < cb->attachment_count; i++) {
if (rp->color_attachment_formats[i] == VK_FORMAT_UNDEFINED)
@ -3057,16 +3064,16 @@ tu6_calc_blend_lrz(const struct vk_color_blend_state *cb,
const struct vk_color_blend_attachment_state *att = &cb->attachments[i];
if (att->blend_enable)
return true;
return TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE;
if (!(cb->color_write_enables & (1u << i)))
return true;
return TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE;
unsigned mask =
MASK(vk_format_get_nr_components(rp->color_attachment_formats[i]));
if ((att->write_mask & mask) != mask)
return true;
return TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE;
}
return false;
return TU_LRZ_BLEND_SAFE_FOR_LRZ;
}
static const enum mesa_vk_dynamic_graphics_state tu_blend_lrz_state[] = {
@ -3083,7 +3090,7 @@ tu_emit_blend_lrz(struct tu_lrz_blend *lrz,
const struct vk_color_blend_state *cb,
const struct vk_render_pass_state *rp)
{
lrz->reads_dest = tu6_calc_blend_lrz(cb, rp);
lrz->lrz_blend_status = tu6_calc_blend_lrz(cb, rp);
lrz->valid = true;
}
@ -4021,10 +4028,10 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd)
cmd->vk.dynamic_graphics_state.ms.sample_mask);
if (!cmd->state.pipeline_blend_lrz &&
(EMIT_STATE(blend_lrz) || (cmd->state.dirty & TU_CMD_DIRTY_SUBPASS))) {
bool blend_reads_dest = tu6_calc_blend_lrz(&cmd->vk.dynamic_graphics_state.cb,
&cmd->state.vk_rp);
if (blend_reads_dest != cmd->state.blend_reads_dest) {
cmd->state.blend_reads_dest = blend_reads_dest;
tu_lrz_blend_status blend_status = tu6_calc_blend_lrz(
&cmd->vk.dynamic_graphics_state.cb, &cmd->state.vk_rp);
if (blend_status != cmd->state.lrz_blend_status) {
cmd->state.lrz_blend_status = blend_status;
cmd->state.dirty |= TU_CMD_DIRTY_LRZ;
}
}

View file

@ -38,10 +38,15 @@ enum tu_dynamic_state
struct cache_entry;
enum tu_lrz_blend_status {
TU_LRZ_BLEND_SAFE_FOR_LRZ,
TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE,
TU_LRZ_BLEND_ALL_COLOR_WRITES_SKIPPED,
};
struct tu_lrz_blend
{
bool valid;
bool reads_dest;
enum tu_lrz_blend_status lrz_blend_status;
};
struct tu_bandwidth