From d88c1837853482a47cba262e3300a70b71eb91ff Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Wed, 27 May 2026 08:05:22 +0200 Subject: [PATCH] tu: Disable FS in some cases even when FS explicitly writes D/S For example, the FS may write gl_SampleMask while color writes are masked out and there is no depth attachment. Note that the proprietary driver still considers more state when disabling the FS, such as the depth test being disabled, and thus disables the FS in cases where we do not. However, I think that is too much of a stretch unless we find some real workload needing it. This change also allows disabling an FS that has discard. This requires being careful around occlusion queries, since when one is enabled, we cannot disable an FS that can discard. Found via gpu-ratemeter bench: vk.pix.noaa.output.color+z+samplemask.colormask=0 Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/ir3/ir3_compiler_nir.c | 6 ++--- src/freedreno/ir3/ir3_shader.h | 4 +-- src/freedreno/vulkan/tu_cmd_buffer.cc | 2 +- src/freedreno/vulkan/tu_cmd_buffer.h | 3 ++- src/freedreno/vulkan/tu_pipeline.cc | 37 +++++++++++++++++++-------- src/freedreno/vulkan/tu_query_pool.cc | 4 +-- 6 files changed, 36 insertions(+), 20 deletions(-) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 4be8449146d..df0b4e48a42 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -6225,9 +6225,9 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, if (so->type == MESA_SHADER_FRAGMENT) { so->empty = is_empty(ir) && so->outputs_count == 0 && so->num_sampler_prefetch == 0; - so->writes_only_color = !ctx->s->info.writes_memory && !so->has_kill && - !so->writes_pos && !so->writes_smask && - !so->writes_stencilref; + so->has_no_side_effects = !ctx->s->info.writes_memory; + so->has_no_ds_effects = !so->has_kill && !so->writes_pos && + !so->writes_smask && !so->writes_stencilref; } if (mesa_shader_stage_is_compute(so->type)) { diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index da72367643d..27d4ea2e119 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -879,8 +879,8 @@ struct ir3_shader_variant { bool post_depth_coverage; bool empty; - /* Doesn't have side-effects, no kill, no D/S write, etc. */ - bool writes_only_color; + bool has_no_side_effects; + bool has_no_ds_effects; /* Are we using split or merged register file? */ bool mergedregs; diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 57159e38532..8d5c29fcb60 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -5508,7 +5508,7 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, if (pipeline->disable_fs.valid) { if (cmd->state.disable_fs != pipeline->disable_fs.disable_fs) { cmd->state.disable_fs = pipeline->disable_fs.disable_fs; - cmd->state.dirty |= TU_CMD_DIRTY_DISABLE_FS; + cmd->state.dirty |= TU_CMD_DIRTY_RAST | TU_CMD_DIRTY_LRZ; } } cmd->state.pipeline_disable_fs = pipeline->disable_fs.valid; diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index e173dfcf63a..17bebebd135 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -82,9 +82,10 @@ enum tu_cmd_dirty_bits TU_CMD_DIRTY_DISABLE_FS = BIT(16), TU_CMD_DIRTY_TCS = BIT(17), TU_CMD_DIRTY_VS = BIT(18), + TU_CMD_DIRTY_RAST = BIT(19), /* all draw states were disabled and need to be re-enabled: */ - TU_CMD_DIRTY_DRAW_STATE = BIT(19) + TU_CMD_DIRTY_DRAW_STATE = BIT(20) }; /* There are only three cache domains we have to care about: the CCU, or diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index a52a3eeeeac..754344364fe 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -3147,31 +3147,43 @@ static const enum mesa_vk_dynamic_graphics_state tu_disable_fs_state[] = { MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE, }; +static bool +tu_fs_disable_safe_for_occlusion_query(const struct tu_shader *fs) +{ + return !fs || !(fs->variant->has_kill || fs->variant->writes_smask); +} + static bool tu_calc_disable_fs(const struct vk_color_blend_state *cb, const struct vk_render_pass_state *rp, bool alpha_to_coverage_enable, - const struct tu_shader *fs) + const struct tu_shader *fs, + bool occlusion_query_may_be_running) { if (alpha_to_coverage_enable) return false; - if (fs && !fs->variant->writes_only_color) + if (fs && !fs->variant->has_no_side_effects) + return false; + if (occlusion_query_may_be_running && !tu_fs_disable_safe_for_occlusion_query(fs)) return false; - bool has_enabled_attachments = false; + bool has_enabled_color_attachments = false; for (unsigned i = 0; i < cb->attachment_count; i++) { if (rp->color_attachment_formats[i] == VK_FORMAT_UNDEFINED) continue; const struct vk_color_blend_attachment_state *att = &cb->attachments[i]; if ((cb->color_write_enables & (1u << i)) && att->write_mask != 0) { - has_enabled_attachments = true; + has_enabled_color_attachments = true; break; } } + bool has_enabled_ds_attachment = + rp->attachments & (MESA_VK_RP_ATTACHMENT_DEPTH_BIT | MESA_VK_RP_ATTACHMENT_STENCIL_BIT); + return !fs || fs->variant->empty || - (fs->variant->writes_only_color && !has_enabled_attachments); + (!has_enabled_color_attachments && (!has_enabled_ds_attachment || fs->variant->has_no_ds_effects)); } static void @@ -3182,7 +3194,7 @@ tu_emit_disable_fs(struct tu_disable_fs *disable_fs, const struct tu_shader *fs) { disable_fs->disable_fs = - tu_calc_disable_fs(cb, rp, alpha_to_coverage_enable, fs); + tu_calc_disable_fs(cb, rp, alpha_to_coverage_enable, fs, false); disable_fs->valid = true; } @@ -3985,7 +3997,8 @@ tu_pipeline_builder_emit_state(struct tu_pipeline_builder *builder, builder->graphics_state.rp); if (EMIT_STATE( disable_fs, - attachments_valid && pipeline_contains_all_shader_state(pipeline))) + attachments_valid && pipeline_contains_all_shader_state(pipeline) && + tu_fs_disable_safe_for_occlusion_query(pipeline->shaders[MESA_SHADER_FRAGMENT]))) tu_emit_disable_fs(&pipeline->disable_fs, cb, builder->graphics_state.rp, builder->graphics_state.ms->alpha_to_coverage_enable, @@ -4245,15 +4258,17 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd) if (!cmd->state.pipeline_disable_fs && (EMIT_STATE(disable_fs) || - (cmd->state.dirty & (TU_CMD_DIRTY_SUBPASS | TU_CMD_DIRTY_FS)))) { + (cmd->state.dirty & (TU_CMD_DIRTY_SUBPASS | TU_CMD_DIRTY_FS | + TU_CMD_DIRTY_DISABLE_FS)))) { bool disable_fs = tu_calc_disable_fs( &cmd->vk.dynamic_graphics_state.cb, &cmd->state.vk_rp, cmd->vk.dynamic_graphics_state.ms.alpha_to_coverage_enable, - cmd->state.shaders[MESA_SHADER_FRAGMENT]); + cmd->state.shaders[MESA_SHADER_FRAGMENT], + cmd->state.occlusion_query_may_be_running); if (disable_fs != cmd->state.disable_fs) { cmd->state.disable_fs = disable_fs; - cmd->state.dirty |= TU_CMD_DIRTY_DISABLE_FS; + cmd->state.dirty |= TU_CMD_DIRTY_RAST | TU_CMD_DIRTY_LRZ; } } @@ -4272,7 +4287,7 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd) DRAW_STATE_COND(rast, TU_DYNAMIC_STATE_RAST, cmd->state.dirty & (TU_CMD_DIRTY_SUBPASS | TU_CMD_DIRTY_PER_VIEW_VIEWPORT | - TU_CMD_DIRTY_DISABLE_FS), + TU_CMD_DIRTY_RAST), &cmd->vk.dynamic_graphics_state.rs, &cmd->vk.dynamic_graphics_state.vp, cmd->state.vk_mv.view_mask != 0, diff --git a/src/freedreno/vulkan/tu_query_pool.cc b/src/freedreno/vulkan/tu_query_pool.cc index 554e5c72143..b9219a2790f 100644 --- a/src/freedreno/vulkan/tu_query_pool.cc +++ b/src/freedreno/vulkan/tu_query_pool.cc @@ -1090,7 +1090,7 @@ emit_begin_occlusion_query(struct tu_cmd_buffer *cmdbuf, */ struct tu_cs *cs = cmdbuf->state.pass ? &cmdbuf->draw_cs : &cmdbuf->cs; cmdbuf->state.occlusion_query_may_be_running = true; - cmdbuf->state.dirty |= TU_CMD_DIRTY_LRZ; + cmdbuf->state.dirty |= TU_CMD_DIRTY_DISABLE_FS | TU_CMD_DIRTY_LRZ; uint64_t begin_iova = occlusion_query_iova(pool, query, begin); @@ -1589,7 +1589,7 @@ emit_end_occlusion_query(struct tu_cmd_buffer *cmdbuf, tu_cs_emit_qw(epilogue_cs, 0x1); cmdbuf->state.occlusion_query_may_be_running = false; - cmdbuf->state.dirty |= TU_CMD_DIRTY_LRZ; + cmdbuf->state.dirty |= TU_CMD_DIRTY_DISABLE_FS | TU_CMD_DIRTY_LRZ; } /* PRIMITIVE_CTRS is used for two distinct queries: