diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index edc34a898f5..d457c474bd9 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -4692,8 +4692,9 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, tu_pipeline_update_rp_state(&cmd->state); if (pipeline->lrz_blend.valid) { - if (cmd->state.blend_reads_dest != pipeline->lrz_blend.reads_dest) { - cmd->state.blend_reads_dest = pipeline->lrz_blend.reads_dest; + if (cmd->state.lrz_blend_status != + pipeline->lrz_blend.lrz_blend_status) { + cmd->state.lrz_blend_status = pipeline->lrz_blend.lrz_blend_status; cmd->state.dirty |= TU_CMD_DIRTY_LRZ; } } diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index d8426cfd962..f8aec180129 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -551,7 +551,7 @@ struct tu_cmd_state bool tessfactor_addr_set; bool predication_active; bool msaa_disable; - bool blend_reads_dest; + tu_lrz_blend_status lrz_blend_status; bool disable_fs; bool stencil_front_write; bool stencil_back_write; diff --git a/src/freedreno/vulkan/tu_lrz.cc b/src/freedreno/vulkan/tu_lrz.cc index 5f1d9c6cac7..3feed08fd72 100644 --- a/src/freedreno/vulkan/tu_lrz.cc +++ b/src/freedreno/vulkan/tu_lrz.cc @@ -237,6 +237,8 @@ tu_lrz_init_state(struct tu_cmd_buffer *cmd, cmd->state.lrz.valid = true; cmd->state.lrz.valid_at_start = true; cmd->state.lrz.disable_write_for_rp = false; + cmd->state.lrz.color_written_with_z_test = false; + cmd->state.lrz.has_lrz_write_with_skipped_color_writes = false; cmd->state.lrz.prev_direction = TU_LRZ_UNKNOWN; /* Be optimistic and unconditionally enable fast-clear in * secondary cmdbufs and when reusing previous LRZ state. @@ -272,6 +274,8 @@ tu_lrz_init_secondary(struct tu_cmd_buffer *cmd, cmd->state.lrz.valid = true; cmd->state.lrz.valid_at_start = true; cmd->state.lrz.disable_write_for_rp = false; + cmd->state.lrz.color_written_with_z_test = false; + cmd->state.lrz.has_lrz_write_with_skipped_color_writes = false; cmd->state.lrz.prev_direction = TU_LRZ_UNKNOWN; cmd->state.lrz.gpu_dir_tracking = has_gpu_tracking; @@ -737,7 +741,8 @@ tu_lrz_flush_valid_during_renderpass(struct tu_cmd_buffer *cmd, /* Even if state is valid, we cannot be sure that secondary * command buffer has the same sticky disable_write_for_rp. */ - if (cmd->state.lrz.valid && !cmd->state.lrz.disable_write_for_rp) + if (cmd->state.lrz.valid && !cmd->state.lrz.disable_write_for_rp && + !cmd->state.lrz.has_lrz_write_with_skipped_color_writes) return; tu6_write_lrz_reg(cmd, cs, A6XX_GRAS_LRZ_VIEW_INFO( @@ -783,7 +788,7 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, } /* See comment in tu_pipeline about disabling LRZ write for blending. */ - bool reads_dest = cmd->state.blend_reads_dest; + enum tu_lrz_blend_status blend_status = cmd->state.lrz_blend_status; gras_lrz_cntl.enable = true; gras_lrz_cntl.lrz_write = @@ -983,10 +988,28 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, * enable LRZ write. But this would cause early-z/lrz to discard * fragments from draw A which should be visible due to draw B. */ - if (reads_dest && z_write_enable && cmd->device->instance->conservative_lrz) { + if (blend_status == TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE && + z_write_enable && cmd->device->instance->conservative_lrz) { tu_lrz_disable_write_for_rp(cmd, "Depth write + blending"); } + /* This is a special case because we want to avoid disabling LRZ when a + * renderpass starts with depth-only draw calls, or consists entirely + * of them, but also has color attachments. + */ + if (blend_status == TU_LRZ_BLEND_ALL_COLOR_WRITES_SKIPPED && + z_write_enable && cmd->device->instance->conservative_lrz) { + if (cmd->state.lrz.color_written_with_z_test) { + tu_lrz_disable_write_for_rp(cmd, "Depth write + no color writes"); + } + cmd->state.lrz.has_lrz_write_with_skipped_color_writes = true; + } + + if (z_test_enable && + blend_status != TU_LRZ_BLEND_ALL_COLOR_WRITES_SKIPPED) { + cmd->state.lrz.color_written_with_z_test = true; + } + /* If the stencil test behavior depends on the result of the depth test, we * have to skip LRZ for the rest of the RP for basically the same reason as * the blending case above (LRZ testing enabled on previous draws may result diff --git a/src/freedreno/vulkan/tu_lrz.h b/src/freedreno/vulkan/tu_lrz.h index c8550903aa8..a74808fd944 100644 --- a/src/freedreno/vulkan/tu_lrz.h +++ b/src/freedreno/vulkan/tu_lrz.h @@ -47,6 +47,10 @@ struct tu_lrz_state /* Continue using old LRZ state (LOAD_OP_LOAD of depth) */ bool reuse_previous_state : 1; bool gpu_dir_set : 1; + + bool color_written_with_z_test : 1; + bool has_lrz_write_with_skipped_color_writes : 1; + enum tu_lrz_direction prev_direction; }; diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index 322a43f8780..6b0e93b4a8b 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -2113,7 +2113,8 @@ tu_pipeline_builder_parse_libraries(struct tu_pipeline_builder *builder, if (library->state & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) { pipeline->output = library->base.output; - pipeline->lrz_blend.reads_dest |= library->base.lrz_blend.reads_dest; + pipeline->lrz_blend.lrz_blend_status = + library->base.lrz_blend.lrz_blend_status; pipeline->lrz_blend.valid |= library->base.lrz_blend.valid; } @@ -3028,28 +3029,34 @@ tu_emit_disable_fs(struct tu_disable_fs *disable_fs, } /* Return true if the blend state reads the color attachments. */ -static bool +static tu_lrz_blend_status tu6_calc_blend_lrz(const struct vk_color_blend_state *cb, const struct vk_render_pass_state *rp) { if (cb->logic_op_enable && tu_logic_op_reads_dst((VkLogicOp)cb->logic_op)) - return true; + return TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE; - bool has_enabled_attachments = false; + uint32_t written_color_attachments = 0; + uint32_t total_color_attachments = 0; for (unsigned i = 0; i < cb->attachment_count; i++) { if (rp->color_attachment_formats[i] == VK_FORMAT_UNDEFINED) continue; + total_color_attachments++; const struct vk_color_blend_attachment_state *att = &cb->attachments[i]; if ((cb->color_write_enables & (1u << i)) && att->write_mask != 0) { - has_enabled_attachments = true; - break; + written_color_attachments++; } } - /* There is no partial write if there is no writes at all. */ - if (!has_enabled_attachments) - return false; + if (total_color_attachments == 0) + return TU_LRZ_BLEND_SAFE_FOR_LRZ; + + if (written_color_attachments == 0) + return TU_LRZ_BLEND_ALL_COLOR_WRITES_SKIPPED; + + if (written_color_attachments < cb->attachment_count) + return TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE; for (unsigned i = 0; i < cb->attachment_count; i++) { if (rp->color_attachment_formats[i] == VK_FORMAT_UNDEFINED) @@ -3057,16 +3064,16 @@ tu6_calc_blend_lrz(const struct vk_color_blend_state *cb, const struct vk_color_blend_attachment_state *att = &cb->attachments[i]; if (att->blend_enable) - return true; + return TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE; if (!(cb->color_write_enables & (1u << i))) - return true; + return TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE; unsigned mask = MASK(vk_format_get_nr_components(rp->color_attachment_formats[i])); if ((att->write_mask & mask) != mask) - return true; + return TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE; } - return false; + return TU_LRZ_BLEND_SAFE_FOR_LRZ; } static const enum mesa_vk_dynamic_graphics_state tu_blend_lrz_state[] = { @@ -3083,7 +3090,7 @@ tu_emit_blend_lrz(struct tu_lrz_blend *lrz, const struct vk_color_blend_state *cb, const struct vk_render_pass_state *rp) { - lrz->reads_dest = tu6_calc_blend_lrz(cb, rp); + lrz->lrz_blend_status = tu6_calc_blend_lrz(cb, rp); lrz->valid = true; } @@ -4021,10 +4028,10 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd) cmd->vk.dynamic_graphics_state.ms.sample_mask); if (!cmd->state.pipeline_blend_lrz && (EMIT_STATE(blend_lrz) || (cmd->state.dirty & TU_CMD_DIRTY_SUBPASS))) { - bool blend_reads_dest = tu6_calc_blend_lrz(&cmd->vk.dynamic_graphics_state.cb, - &cmd->state.vk_rp); - if (blend_reads_dest != cmd->state.blend_reads_dest) { - cmd->state.blend_reads_dest = blend_reads_dest; + tu_lrz_blend_status blend_status = tu6_calc_blend_lrz( + &cmd->vk.dynamic_graphics_state.cb, &cmd->state.vk_rp); + if (blend_status != cmd->state.lrz_blend_status) { + cmd->state.lrz_blend_status = blend_status; cmd->state.dirty |= TU_CMD_DIRTY_LRZ; } } diff --git a/src/freedreno/vulkan/tu_pipeline.h b/src/freedreno/vulkan/tu_pipeline.h index 08627aa55ea..300a7b458ed 100644 --- a/src/freedreno/vulkan/tu_pipeline.h +++ b/src/freedreno/vulkan/tu_pipeline.h @@ -38,10 +38,15 @@ enum tu_dynamic_state struct cache_entry; +enum tu_lrz_blend_status { + TU_LRZ_BLEND_SAFE_FOR_LRZ, + TU_LRZ_BLEND_READS_DEST_OR_PARTIAL_WRITE, + TU_LRZ_BLEND_ALL_COLOR_WRITES_SKIPPED, +}; struct tu_lrz_blend { bool valid; - bool reads_dest; + enum tu_lrz_blend_status lrz_blend_status; }; struct tu_bandwidth