From 847ad80e03e3cf90d005c8393829588005716917 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Mon, 24 Mar 2025 18:50:03 +0100 Subject: [PATCH] tu/lrz: Consider FS depth layout when gl_FragDepth is written Specifying depth write direction in shader may help us. E.g. If depth test is GREATER and FS specifies FRAG_DEPTH_LAYOUT_LESS it means that LRZ won't kill any fragment that shouldn't be killed, in other words, FS can only reduce the depth value which could make fragment to NOT pass with GREATER depth test. We just have to enable late Z test. There is the same concept in D3D11 and it is seen e.g. in "Stray" game. Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/ir3/ir3_compiler_nir.c | 4 +++ src/freedreno/ir3/ir3_shader.h | 1 + src/freedreno/vulkan/tu_cmd_buffer.cc | 10 +++++++ src/freedreno/vulkan/tu_lrz.cc | 40 ++++++++++++++++++++++++++- src/freedreno/vulkan/tu_lrz.h | 1 + src/freedreno/vulkan/tu_shader.cc | 4 +-- 6 files changed, 57 insertions(+), 3 deletions(-) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index f5b38eb91be..72720f77ac0 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -5921,6 +5921,10 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, ctx->s->info.fs.post_depth_coverage) so->post_depth_coverage = true; + if (ctx->so->type == MESA_SHADER_FRAGMENT) { + so->fs.depth_layout = ctx->s->info.fs.depth_layout; + } + ctx->so->per_samp = ctx->s->info.fs.uses_sample_shading; if (ctx->has_relative_load_const_ir3) { diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 991643c04d5..2b77b505a93 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -929,6 +929,7 @@ struct ir3_shader_variant { bool color_is_dual_source : 1; bool uses_fbfetch_output : 1; bool fbfetch_coherent : 1; + enum gl_frag_depth_layout depth_layout; } fs; struct { unsigned req_local_mem; diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index e6e8a5baf4e..fc3bc6709db 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -6222,8 +6222,18 @@ tu6_build_depth_plane_z_mode(struct tu_cmd_buffer *cmd, struct tu_cs *cs) pass->attachments[subpass->depth_stencil_attachment.attachment].format == VK_FORMAT_S8_UINT) || fs->fs.lrz.force_late_z || + cmd->state.lrz.force_late_z || /* alpha-to-coverage can behave like a discard. */ cmd->vk.dynamic_graphics_state.ms.alpha_to_coverage_enable; + + /* If there is explicit depth direction in FS writing gl_FragDepth + * may be compatible with LRZ test. + */ + if (!force_late_z && cmd->state.lrz.enabled && fs->variant->writes_pos && + zmode != A6XX_LATE_Z) { + zmode = A6XX_EARLY_LRZ_LATE_Z; + } + if ((force_late_z && !fs->variant->fs.early_fragment_tests) || !ds_test_enable) zmode = A6XX_LATE_Z; diff --git a/src/freedreno/vulkan/tu_lrz.cc b/src/freedreno/vulkan/tu_lrz.cc index 64697979052..61646ed079f 100644 --- a/src/freedreno/vulkan/tu_lrz.cc +++ b/src/freedreno/vulkan/tu_lrz.cc @@ -759,6 +759,9 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, struct A6XX_GRAS_LRZ_CNTL gras_lrz_cntl = { 0 }; + if (fs->variant->writes_pos && !fs->variant->fs.early_fragment_tests) + cmd->state.lrz.force_late_z = true; + if (!cmd->state.lrz.valid) { return gras_lrz_cntl; } @@ -802,7 +805,42 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, * fragment tests. We have to skip LRZ testing and updating, but as long as * the depth direction stayed the same we can continue with LRZ testing later. */ - if (fs->fs.lrz.status & TU_LRZ_FORCE_DISABLE_LRZ) { + bool disable_lrz_due_to_fs = fs->fs.lrz.status & TU_LRZ_FORCE_DISABLE_LRZ; + + /* Specifying depth write direction in shader may help us. E.g. + * If depth test is GREATER and FS specifies FRAG_DEPTH_LAYOUT_LESS + * it means that LRZ won't kill any fragment that shouldn't be killed, + * in other words, FS can only reduce the depth value which could + * make fragment to NOT pass with GREATER depth test. We just have to + * enable late Z test. + */ + if (!disable_lrz_due_to_fs && fs->variant->writes_pos && + !fs->variant->fs.early_fragment_tests) { + if (fs->variant->fs.depth_layout == FRAG_DEPTH_LAYOUT_NONE || + fs->variant->fs.depth_layout == FRAG_DEPTH_LAYOUT_ANY) { + disable_lrz_due_to_fs = true; + } else { + if (fs->variant->fs.depth_layout == FRAG_DEPTH_LAYOUT_GREATER) { + disable_lrz_due_to_fs = + depth_compare_op != VK_COMPARE_OP_LESS && + depth_compare_op != VK_COMPARE_OP_LESS_OR_EQUAL; + } else if (fs->variant->fs.depth_layout == FRAG_DEPTH_LAYOUT_LESS) { + disable_lrz_due_to_fs = + depth_compare_op != VK_COMPARE_OP_GREATER && + depth_compare_op != VK_COMPARE_OP_GREATER_OR_EQUAL; + } + /* FRAG_DEPTH_LAYOUT_UNCHANGED is always OK.*/ + } + + cmd->state.lrz.force_late_z = disable_lrz_due_to_fs; + } else if (fs->variant->writes_pos && !fs->variant->fs.early_fragment_tests) { + disable_lrz_due_to_fs = true; + cmd->state.lrz.force_late_z = true; + } else { + cmd->state.lrz.force_late_z = false; + } + + if (disable_lrz_due_to_fs) { if (cmd->state.lrz.prev_direction != TU_LRZ_UNKNOWN || !cmd->state.lrz.gpu_dir_tracking) { perf_debug(cmd->device, "Skipping LRZ due to FS"); temporary_disable_lrz = true; diff --git a/src/freedreno/vulkan/tu_lrz.h b/src/freedreno/vulkan/tu_lrz.h index 1ff80e0874d..941799ebd8f 100644 --- a/src/freedreno/vulkan/tu_lrz.h +++ b/src/freedreno/vulkan/tu_lrz.h @@ -43,6 +43,7 @@ struct tu_lrz_state bool enabled : 1; bool fast_clear : 1; bool gpu_dir_tracking : 1; + bool force_late_z : 1; /* Continue using old LRZ state (LOAD_OP_LOAD of depth) */ bool reuse_previous_state : 1; bool gpu_dir_set : 1; diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 110be1edca6..75f9a592fe9 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -2737,7 +2737,7 @@ tu_shader_create(struct tu_device *dev, shader->fs.has_fdm = key->fragment_density_map; if (fs->has_kill) shader->fs.lrz.status |= TU_LRZ_FORCE_DISABLE_WRITE; - if (fs->no_earlyz || (fs->writes_pos && !fs->fs.early_fragment_tests)) + if (fs->no_earlyz) shader->fs.lrz.status = TU_LRZ_FORCE_DISABLE_LRZ; /* FDM isn't compatible with LRZ, because the LRZ image uses the original * resolution and we would need to use the low resolution. @@ -2747,7 +2747,7 @@ tu_shader_create(struct tu_device *dev, if (key->fragment_density_map) shader->fs.lrz.status = TU_LRZ_FORCE_DISABLE_LRZ; if (!fs->fs.early_fragment_tests && - (fs->no_earlyz || fs->writes_pos || fs->writes_stencilref || fs->writes_smask)) { + (fs->no_earlyz || fs->writes_stencilref || fs->writes_smask)) { shader->fs.lrz.force_late_z = true; } break;