From d1fe9a152e6edb66eb154a1b3771add9918322f2 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Thu, 23 Oct 2025 15:54:22 +0200 Subject: [PATCH] tu/lrz: Disable LRZ writes when draw doesn't write to all attachments Not writing to color attachments in the same as blending from LRZ viewpoint. Though there is one case when we can avoid disabling LRZ writes, when a renderpass starts with depth-only draw calls, or consists entirely of them, but also has color attachments. Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/vulkan/tu_cmd_buffer.cc | 5 ++-- src/freedreno/vulkan/tu_cmd_buffer.h | 2 +- src/freedreno/vulkan/tu_lrz.cc | 29 ++++++++++++++++-- src/freedreno/vulkan/tu_lrz.h | 4 +++ src/freedreno/vulkan/tu_pipeline.cc | 43 ++++++++++++++++----------- src/freedreno/vulkan/tu_pipeline.h | 7 ++++- 6 files changed, 65 insertions(+), 25 deletions(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index edc34a898f5..d457c474bd9 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -4692,8 +4692,9 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, tu_pipeline_update_rp_state(&cmd->state); if (pipeline->lrz_blend.valid) { - if (cmd->state.blend_reads_dest != pipeline->lrz_blend.reads_dest) { - cmd->state.blend_reads_dest = pipeline->lrz_blend.reads_dest; + if (cmd->state.lrz_blend_status != + pipeline->lrz_blend.lrz_blend_status) { + cmd->state.lrz_blend_status = pipeline->lrz_blend.lrz_blend_status; cmd->state.dirty |= TU_CMD_DIRTY_LRZ; } } diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index d8426cfd962..f8aec180129 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -551,7 +551,7 @@ struct tu_cmd_state bool tessfactor_addr_set; bool predication_active; bool msaa_disable; - bool blend_reads_dest; + tu_lrz_blend_status lrz_blend_status; bool disable_fs; bool stencil_front_write; bool stencil_back_write; diff --git a/src/freedreno/vulkan/tu_lrz.cc b/src/freedreno/vulkan/tu_lrz.cc index 5f1d9c6cac7..3feed08fd72 100644 --- a/src/freedreno/vulkan/tu_lrz.cc +++ b/src/freedreno/vulkan/tu_lrz.cc @@ -237,6 +237,8 @@ tu_lrz_init_state(struct tu_cmd_buffer *cmd, cmd->state.lrz.valid = true; cmd->state.lrz.valid_at_start = true; cmd->state.lrz.disable_write_for_rp = false; + cmd->state.lrz.color_written_with_z_test = false; + cmd->state.lrz.has_lrz_write_with_skipped_color_writes = false; cmd->state.lrz.prev_direction = TU_LRZ_UNKNOWN; /* Be optimistic and unconditionally enable fast-clear in * secondary cmdbufs and when reusing previous LRZ state. @@ -272,6 +274,8 @@ tu_lrz_init_secondary(struct tu_cmd_buffer *cmd, cmd->state.lrz.valid = true; cmd->state.lrz.valid_at_start = true; cmd->state.lrz.disable_write_for_rp = false; + cmd->state.lrz.color_written_with_z_test = false; + cmd->state.lrz.has_lrz_write_with_skipped_color_writes = false; cmd->state.lrz.prev_direction = TU_LRZ_UNKNOWN; cmd->state.lrz.gpu_dir_tracking = has_gpu_tracking; @@ -737,7 +741,8 @@ tu_lrz_flush_valid_during_renderpass(struct tu_cmd_buffer *cmd, /* Even if state is valid, we cannot be sure that secondary * command buffer has the same sticky disable_write_for_rp. */ - if (cmd->state.lrz.valid && !cmd->state.lrz.disable_write_for_rp) + if (cmd->state.lrz.valid && !cmd->state.lrz.disable_write_for_rp && + !cmd->state.lrz.has_lrz_write_with_skipped_color_writes) return; tu6_write_lrz_reg(cmd, cs, A6XX_GRAS_LRZ_VIEW_INFO( @@ -783,7 +788,7 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, } /* See comment in tu_pipeline about disabling LRZ write for blending. */ - bool reads_dest = cmd->state.blend_reads_dest; + enum tu_lrz_blend_status blend_status = cmd->state.lrz_blend_status; gras_lrz_cntl.enable = true; gras_lrz_cntl.lrz_write = @@ -983,10 +988,28 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, * enable LRZ write. But this would cause early-z/lrz to discard * fragments from draw A which should be visible due to draw B. */ - if (reads_dest && z_write_enable && cmd->device->instance->conservative_lrz) { + if (blend_status == TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE && + z_write_enable && cmd->device->instance->conservative_lrz) { tu_lrz_disable_write_for_rp(cmd, "Depth write + blending"); } + /* This is a special case because we want to avoid disabling LRZ when a + * renderpass starts with depth-only draw calls, or consists entirely + * of them, but also has color attachments. + */ + if (blend_status == TU_LRZ_BLEND_ALL_COLOR_WRITES_SKIPPED && + z_write_enable && cmd->device->instance->conservative_lrz) { + if (cmd->state.lrz.color_written_with_z_test) { + tu_lrz_disable_write_for_rp(cmd, "Depth write + no color writes"); + } + cmd->state.lrz.has_lrz_write_with_skipped_color_writes = true; + } + + if (z_test_enable && + blend_status != TU_LRZ_BLEND_ALL_COLOR_WRITES_SKIPPED) { + cmd->state.lrz.color_written_with_z_test = true; + } + /* If the stencil test behavior depends on the result of the depth test, we * have to skip LRZ for the rest of the RP for basically the same reason as * the blending case above (LRZ testing enabled on previous draws may result diff --git a/src/freedreno/vulkan/tu_lrz.h b/src/freedreno/vulkan/tu_lrz.h index c8550903aa8..a74808fd944 100644 --- a/src/freedreno/vulkan/tu_lrz.h +++ b/src/freedreno/vulkan/tu_lrz.h @@ -47,6 +47,10 @@ struct tu_lrz_state /* Continue using old LRZ state (LOAD_OP_LOAD of depth) */ bool reuse_previous_state : 1; bool gpu_dir_set : 1; + + bool color_written_with_z_test : 1; + bool has_lrz_write_with_skipped_color_writes : 1; + enum tu_lrz_direction prev_direction; }; diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index 322a43f8780..6b0e93b4a8b 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -2113,7 +2113,8 @@ tu_pipeline_builder_parse_libraries(struct tu_pipeline_builder *builder, if (library->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) { pipeline->output = library->base.output; - pipeline->lrz_blend.reads_dest |= library->base.lrz_blend.reads_dest; + pipeline->lrz_blend.lrz_blend_status = + library->base.lrz_blend.lrz_blend_status; pipeline->lrz_blend.valid |= library->base.lrz_blend.valid; } @@ -3028,28 +3029,34 @@ tu_emit_disable_fs(struct tu_disable_fs *disable_fs, } /* Return true if the blend state reads the color attachments. */ -static bool +static tu_lrz_blend_status tu6_calc_blend_lrz(const struct vk_color_blend_state *cb, const struct vk_render_pass_state *rp) { if (cb->logic_op_enable && tu_logic_op_reads_dst((VkLogicOp)cb->logic_op)) - return true; + return TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE; - bool has_enabled_attachments = false; + uint32_t written_color_attachments = 0; + uint32_t total_color_attachments = 0; for (unsigned i = 0; i < cb->attachment_count; i++) { if (rp->color_attachment_formats[i] == VK_FORMAT_UNDEFINED) continue; + total_color_attachments++; const struct vk_color_blend_attachment_state *att = &cb->attachments[i]; if ((cb->color_write_enables & (1u << i)) && att->write_mask != 0) { - has_enabled_attachments = true; - break; + written_color_attachments++; } } - /* There is no partial write if there is no writes at all. */ - if (!has_enabled_attachments) - return false; + if (total_color_attachments == 0) + return TU_LRZ_BLEND_SAFE_FOR_LRZ; + + if (written_color_attachments == 0) + return TU_LRZ_BLEND_ALL_COLOR_WRITES_SKIPPED; + + if (written_color_attachments < cb->attachment_count) + return TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE; for (unsigned i = 0; i < cb->attachment_count; i++) { if (rp->color_attachment_formats[i] == VK_FORMAT_UNDEFINED) @@ -3057,16 +3064,16 @@ tu6_calc_blend_lrz(const struct vk_color_blend_state *cb, const struct vk_color_blend_attachment_state *att = &cb->attachments[i]; if (att->blend_enable) - return true; + return TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE; if (!(cb->color_write_enables & (1u << i))) - return true; + return TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE; unsigned mask = MASK(vk_format_get_nr_components(rp->color_attachment_formats[i])); if ((att->write_mask & mask) != mask) - return true; + return TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE; } - return false; + return TU_LRZ_BLEND_SAFE_FOR_LRZ; } static const enum mesa_vk_dynamic_graphics_state tu_blend_lrz_state[] = { @@ -3083,7 +3090,7 @@ tu_emit_blend_lrz(struct tu_lrz_blend *lrz, const struct vk_color_blend_state *cb, const struct vk_render_pass_state *rp) { - lrz->reads_dest = tu6_calc_blend_lrz(cb, rp); + lrz->lrz_blend_status = tu6_calc_blend_lrz(cb, rp); lrz->valid = true; } @@ -4021,10 +4028,10 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd) cmd->vk.dynamic_graphics_state.ms.sample_mask); if (!cmd->state.pipeline_blend_lrz && (EMIT_STATE(blend_lrz) || (cmd->state.dirty & TU_CMD_DIRTY_SUBPASS))) { - bool blend_reads_dest = tu6_calc_blend_lrz(&cmd->vk.dynamic_graphics_state.cb, - &cmd->state.vk_rp); - if (blend_reads_dest != cmd->state.blend_reads_dest) { - cmd->state.blend_reads_dest = blend_reads_dest; + tu_lrz_blend_status blend_status = tu6_calc_blend_lrz( + &cmd->vk.dynamic_graphics_state.cb, &cmd->state.vk_rp); + if (blend_status != cmd->state.lrz_blend_status) { + cmd->state.lrz_blend_status = blend_status; cmd->state.dirty |= TU_CMD_DIRTY_LRZ; } } diff --git a/src/freedreno/vulkan/tu_pipeline.h b/src/freedreno/vulkan/tu_pipeline.h index 08627aa55ea..300a7b458ed 100644 --- a/src/freedreno/vulkan/tu_pipeline.h +++ b/src/freedreno/vulkan/tu_pipeline.h @@ -38,10 +38,15 @@ enum tu_dynamic_state struct cache_entry; +enum tu_lrz_blend_status { + TU_LRZ_BLEND_SAFE_FOR_LRZ, + TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE, + TU_LRZ_BLEND_ALL_COLOR_WRITES_SKIPPED, +}; struct tu_lrz_blend { bool valid; - bool reads_dest; + enum tu_lrz_blend_status lrz_blend_status; }; struct tu_bandwidth