From 13e6dfb45f025dce2104bc9ba307f639f261c892 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Mon, 12 May 2025 16:05:34 +0200 Subject: [PATCH] tu: Use EARLY_Z_LATE_Z even when LRZ is disabled As previously documented, this mode either uses LRZ or early-z (when LRZ is invalid). Though it has some limitations, it's not compatible with: - Lack of D/S attachment - Stencil writes on stencil or depth test failure - Per-sample shading Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/vulkan/tu_cmd_buffer.cc | 53 +++++++++++++++++++++++---- src/freedreno/vulkan/tu_cmd_buffer.h | 1 + src/freedreno/vulkan/tu_lrz.cc | 47 ++++-------------------- 3 files changed, 53 insertions(+), 48 deletions(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index bf59158861a..0a81e860422 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -6117,6 +6117,31 @@ tu_emit_consts(struct tu_cmd_buffer *cmd, bool compute) return tu_cs_end_draw_state(&cmd->sub_cs, &cs); } +/* Returns true if stencil may be written when depth test fails. + * This could be either from stencil written on depth test fail itself, + * or stencil written on the stencil test failure where subsequent depth + * test may also fail. + */ +static bool +tu6_stencil_written_on_depth_fail( + const struct vk_stencil_test_face_state *face) +{ + switch (face->op.compare) { + case VK_COMPARE_OP_ALWAYS: + /* The stencil op always passes, no need to worry about failOp. */ + return face->op.depth_fail != VK_STENCIL_OP_KEEP; + case VK_COMPARE_OP_NEVER: + /* The stencil op always fails, so failOp will always be used. */ + return face->op.fail != VK_STENCIL_OP_KEEP; + default: + /* If the stencil test fails, depth may fail as well, so we can write + * stencil when the depth fails if failOp is not VK_STENCIL_OP_KEEP. + */ + return face->op.fail != VK_STENCIL_OP_KEEP || + face->op.depth_fail != VK_STENCIL_OP_KEEP; + } +} + /* Various frontends (ANGLE, zink at least) will enable stencil testing with * what works out to be no-op writes. Simplify what they give us into flags * that LRZ can use. @@ -6131,6 +6156,7 @@ tu6_update_simplified_stencil_state(struct tu_cmd_buffer *cmd) if (!stencil_test_enable) { cmd->state.stencil_front_write = false; cmd->state.stencil_back_write = false; + cmd->state.stencil_written_on_depth_fail = false; return; } @@ -6158,6 +6184,11 @@ tu6_update_simplified_stencil_state(struct tu_cmd_buffer *cmd) stencil_front_op_writes && stencil_front_writemask; cmd->state.stencil_back_write = stencil_back_op_writes && stencil_back_writemask; + cmd->state.stencil_written_on_depth_fail = + (cmd->state.stencil_front_write && + tu6_stencil_written_on_depth_fail(&ds->stencil.front)) || + (cmd->state.stencil_back_write && + tu6_stencil_written_on_depth_fail(&ds->stencil.back)); } static bool @@ -6204,6 +6235,10 @@ tu6_build_depth_plane_z_mode(struct tu_cmd_buffer *cmd, struct tu_cs *cs) const struct tu_render_pass *pass = cmd->state.pass; const struct tu_subpass *subpass = cmd->state.subpass; + VkFormat depth_format = VK_FORMAT_UNDEFINED; + if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) + depth_format = pass->attachments[subpass->depth_stencil_attachment.attachment].format; + if ((fs->variant->has_kill || (cmd->state.pipeline_feedback_loops & VK_IMAGE_ASPECT_DEPTH_BIT) || (cmd->vk.dynamic_graphics_state.feedback_loops & @@ -6214,16 +6249,12 @@ tu6_build_depth_plane_z_mode(struct tu_cmd_buffer *cmd, struct tu_cs *cs) fs->variant->writes_smask || tu_fs_reads_dynamic_ds_input_attachment(cmd, fs)) && (depth_write || stencil_write)) { - zmode = (cmd->state.lrz.valid && cmd->state.lrz.enabled) - ? A6XX_EARLY_Z_LATE_Z - : A6XX_LATE_Z; + zmode = A6XX_EARLY_Z_LATE_Z; } bool ds_test_enable = depth_test_enable || stencil_test_enable; bool force_late_z = - (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED && - pass->attachments[subpass->depth_stencil_attachment.attachment].format - == VK_FORMAT_S8_UINT) || + (depth_format == VK_FORMAT_S8_UINT) || fs->fs.lrz.force_late_z || cmd->state.lrz.force_late_z || /* alpha-to-coverage can behave like a discard. */ @@ -6232,11 +6263,17 @@ tu6_build_depth_plane_z_mode(struct tu_cmd_buffer *cmd, struct tu_cs *cs) /* If there is explicit depth direction in FS writing gl_FragDepth * may be compatible with LRZ test. */ - if (!force_late_z && cmd->state.lrz.enabled && fs->variant->writes_pos && - zmode != A6XX_LATE_Z) { + if (cmd->state.lrz.enabled && fs->variant->writes_pos && + zmode == A6XX_EARLY_Z) { zmode = A6XX_EARLY_Z_LATE_Z; } + if (zmode == A6XX_EARLY_Z_LATE_Z && + (cmd->state.stencil_written_on_depth_fail || fs->fs.per_samp || + !vk_format_has_depth(depth_format) || !ds_test_enable)) { + zmode = A6XX_LATE_Z; + } + if ((force_late_z && !fs->variant->fs.early_fragment_tests) || !ds_test_enable) zmode = A6XX_LATE_Z; diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index 032a5e84e25..cde022f9d37 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -521,6 +521,7 @@ struct tu_cmd_state bool disable_fs; bool stencil_front_write; bool stencil_back_write; + bool stencil_written_on_depth_fail; bool pipeline_sysmem_single_prim_mode; bool pipeline_has_tess; bool pipeline_disable_gmem; diff --git a/src/freedreno/vulkan/tu_lrz.cc b/src/freedreno/vulkan/tu_lrz.cc index 5017407dd6a..9e73ed2eb04 100644 --- a/src/freedreno/vulkan/tu_lrz.cc +++ b/src/freedreno/vulkan/tu_lrz.cc @@ -721,30 +721,6 @@ tu_lrz_flush_valid_during_renderpass(struct tu_cmd_buffer *cmd, } TU_GENX(tu_lrz_flush_valid_during_renderpass); -/* Returns true if stencil may be written when depth test fails. - * This could be either from stencil written on depth test fail itself, - * or stencil written on the stencil test failure where subsequent depth - * test may also fail. - */ -static bool -tu6_stencil_written_on_depth_fail(struct vk_stencil_test_face_state *face) -{ - switch (face->op.compare) { - case VK_COMPARE_OP_ALWAYS: - /* The stencil op always passes, no need to worry about failOp. */ - return face->op.depth_fail != VK_STENCIL_OP_KEEP; - case VK_COMPARE_OP_NEVER: - /* The stencil op always fails, so failOp will always be used. */ - return face->op.fail != VK_STENCIL_OP_KEEP; - default: - /* If the stencil test fails, depth may fail as well, so we can write - * stencil when the depth fails if failOp is not VK_STENCIL_OP_KEEP. - */ - return face->op.fail != VK_STENCIL_OP_KEEP || - face->op.depth_fail != VK_STENCIL_OP_KEEP; - } -} - template static struct A6XX_GRAS_LRZ_CNTL tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, @@ -935,21 +911,6 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, cmd->state.lrz.prev_direction = lrz_direction; if (cmd->vk.dynamic_graphics_state.ds.stencil.test_enable) { - /* Because the LRZ test runs first, failing the LRZ test may result in - * skipping the stencil test and subsequent stencil write. This is ok if - * stencil is only written when the depth test passes, because then the - * LRZ test will also pass, but if it may be written when the depth or - * stencil test fails then we need to disable the LRZ test for the draw. - */ - bool writes_stencil_on_ds_fail = - cmd->state.stencil_front_write && - tu6_stencil_written_on_depth_fail( - &cmd->vk.dynamic_graphics_state.ds.stencil.front); - writes_stencil_on_ds_fail |= - cmd->state.stencil_back_write && - tu6_stencil_written_on_depth_fail( - &cmd->vk.dynamic_graphics_state.ds.stencil.back); - bool frag_may_be_killed_by_stencil = !(cmd->vk.dynamic_graphics_state.ds.stencil.front.op.compare == VK_COMPARE_OP_ALWAYS && @@ -965,7 +926,13 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, cmd->state.lrz.disable_write_for_rp = true; } - if (writes_stencil_on_ds_fail) + /* Because the LRZ test runs first, failing the LRZ test may result in + * skipping the stencil test and subsequent stencil write. This is ok if + * stencil is only written when the depth test passes, because then the + * LRZ test will also pass, but if it may be written when the depth or + * stencil test fails then we need to disable the LRZ test for the draw. + */ + if (cmd->state.stencil_written_on_depth_fail) temporary_disable_lrz = true; }