tu/lrz: Consider FS depth layout when gl_FragDepth is written

Specifying depth write direction in shader may help us. E.g.
If depth test is GREATER and FS specifies FRAG_DEPTH_LAYOUT_LESS
it means that LRZ won't kill any fragment that shouldn't be killed,
in other words, FS can only reduce the depth value which could
make fragment to NOT pass with GREATER depth test. We just have to
enable late Z test.

There is the same concept in D3D11 and it is seen e.g. in "Stray" game.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34423>
This commit is contained in:
Danylo Piliaiev 2025-03-24 18:50:03 +01:00 committed by Marge Bot
parent d05b92d720
commit 847ad80e03
6 changed files with 57 additions and 3 deletions

View file

@ -5921,6 +5921,10 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
ctx->s->info.fs.post_depth_coverage)
so->post_depth_coverage = true;
if (ctx->so->type == MESA_SHADER_FRAGMENT) {
so->fs.depth_layout = ctx->s->info.fs.depth_layout;
}
ctx->so->per_samp = ctx->s->info.fs.uses_sample_shading;
if (ctx->has_relative_load_const_ir3) {

View file

@ -929,6 +929,7 @@ struct ir3_shader_variant {
bool color_is_dual_source : 1;
bool uses_fbfetch_output : 1;
bool fbfetch_coherent : 1;
enum gl_frag_depth_layout depth_layout;
} fs;
struct {
unsigned req_local_mem;

View file

@ -6222,8 +6222,18 @@ tu6_build_depth_plane_z_mode(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
pass->attachments[subpass->depth_stencil_attachment.attachment].format
== VK_FORMAT_S8_UINT) ||
fs->fs.lrz.force_late_z ||
cmd->state.lrz.force_late_z ||
/* alpha-to-coverage can behave like a discard. */
cmd->vk.dynamic_graphics_state.ms.alpha_to_coverage_enable;
/* If there is explicit depth direction in FS writing gl_FragDepth
* may be compatible with LRZ test.
*/
if (!force_late_z && cmd->state.lrz.enabled && fs->variant->writes_pos &&
zmode != A6XX_LATE_Z) {
zmode = A6XX_EARLY_LRZ_LATE_Z;
}
if ((force_late_z && !fs->variant->fs.early_fragment_tests) ||
!ds_test_enable)
zmode = A6XX_LATE_Z;

View file

@ -759,6 +759,9 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd,
struct A6XX_GRAS_LRZ_CNTL gras_lrz_cntl = { 0 };
if (fs->variant->writes_pos && !fs->variant->fs.early_fragment_tests)
cmd->state.lrz.force_late_z = true;
if (!cmd->state.lrz.valid) {
return gras_lrz_cntl;
}
@ -802,7 +805,42 @@ tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd,
* fragment tests. We have to skip LRZ testing and updating, but as long as
* the depth direction stayed the same we can continue with LRZ testing later.
*/
if (fs->fs.lrz.status & TU_LRZ_FORCE_DISABLE_LRZ) {
bool disable_lrz_due_to_fs = fs->fs.lrz.status & TU_LRZ_FORCE_DISABLE_LRZ;
/* Specifying depth write direction in shader may help us. E.g.
* If depth test is GREATER and FS specifies FRAG_DEPTH_LAYOUT_LESS
* it means that LRZ won't kill any fragment that shouldn't be killed,
* in other words, FS can only reduce the depth value which could
* make fragment to NOT pass with GREATER depth test. We just have to
* enable late Z test.
*/
if (!disable_lrz_due_to_fs && fs->variant->writes_pos &&
!fs->variant->fs.early_fragment_tests) {
if (fs->variant->fs.depth_layout == FRAG_DEPTH_LAYOUT_NONE ||
fs->variant->fs.depth_layout == FRAG_DEPTH_LAYOUT_ANY) {
disable_lrz_due_to_fs = true;
} else {
if (fs->variant->fs.depth_layout == FRAG_DEPTH_LAYOUT_GREATER) {
disable_lrz_due_to_fs =
depth_compare_op != VK_COMPARE_OP_LESS &&
depth_compare_op != VK_COMPARE_OP_LESS_OR_EQUAL;
} else if (fs->variant->fs.depth_layout == FRAG_DEPTH_LAYOUT_LESS) {
disable_lrz_due_to_fs =
depth_compare_op != VK_COMPARE_OP_GREATER &&
depth_compare_op != VK_COMPARE_OP_GREATER_OR_EQUAL;
}
/* FRAG_DEPTH_LAYOUT_UNCHANGED is always OK.*/
}
cmd->state.lrz.force_late_z = disable_lrz_due_to_fs;
} else if (fs->variant->writes_pos && !fs->variant->fs.early_fragment_tests) {
disable_lrz_due_to_fs = true;
cmd->state.lrz.force_late_z = true;
} else {
cmd->state.lrz.force_late_z = false;
}
if (disable_lrz_due_to_fs) {
if (cmd->state.lrz.prev_direction != TU_LRZ_UNKNOWN || !cmd->state.lrz.gpu_dir_tracking) {
perf_debug(cmd->device, "Skipping LRZ due to FS");
temporary_disable_lrz = true;

View file

@ -43,6 +43,7 @@ struct tu_lrz_state
bool enabled : 1;
bool fast_clear : 1;
bool gpu_dir_tracking : 1;
bool force_late_z : 1;
/* Continue using old LRZ state (LOAD_OP_LOAD of depth) */
bool reuse_previous_state : 1;
bool gpu_dir_set : 1;

View file

@ -2737,7 +2737,7 @@ tu_shader_create(struct tu_device *dev,
shader->fs.has_fdm = key->fragment_density_map;
if (fs->has_kill)
shader->fs.lrz.status |= TU_LRZ_FORCE_DISABLE_WRITE;
if (fs->no_earlyz || (fs->writes_pos && !fs->fs.early_fragment_tests))
if (fs->no_earlyz)
shader->fs.lrz.status = TU_LRZ_FORCE_DISABLE_LRZ;
/* FDM isn't compatible with LRZ, because the LRZ image uses the original
* resolution and we would need to use the low resolution.
@ -2747,7 +2747,7 @@ tu_shader_create(struct tu_device *dev,
if (key->fragment_density_map)
shader->fs.lrz.status = TU_LRZ_FORCE_DISABLE_LRZ;
if (!fs->fs.early_fragment_tests &&
(fs->no_earlyz || fs->writes_pos || fs->writes_stencilref || fs->writes_smask)) {
(fs->no_earlyz || fs->writes_stencilref || fs->writes_smask)) {
shader->fs.lrz.force_late_z = true;
}
break;