tu: Disable FS in some cases even when FS explicitly writes D/S

For example, the FS may write gl_SampleMask while color writes are
masked out and there is no depth attachment.
Note that the proprietary driver still considers more state when
disabling the FS, such as the depth test being disabled, and thus
disables the FS in cases where we do not. However, I think that is
too much of a stretch unless we find some real workload needing it.

This change also allows disabling an FS that has discard.

This requires being careful around occlusion queries, since when one
is enabled, we cannot disable an FS that can discard.

Found via gpu-ratemeter bench: vk.pix.noaa.output.color+z+samplemask.colormask=0

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41857>
This commit is contained in:
Danylo Piliaiev 2026-05-27 08:05:22 +02:00
parent c4a1d9583c
commit d88c183785
6 changed files with 36 additions and 20 deletions

View file

@ -6225,9 +6225,9 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
if (so->type == MESA_SHADER_FRAGMENT) {
so->empty = is_empty(ir) && so->outputs_count == 0 &&
so->num_sampler_prefetch == 0;
so->writes_only_color = !ctx->s->info.writes_memory && !so->has_kill &&
!so->writes_pos && !so->writes_smask &&
!so->writes_stencilref;
so->has_no_side_effects = !ctx->s->info.writes_memory;
so->has_no_ds_effects = !so->has_kill && !so->writes_pos &&
!so->writes_smask && !so->writes_stencilref;
}
if (mesa_shader_stage_is_compute(so->type)) {

View file

@ -879,8 +879,8 @@ struct ir3_shader_variant {
bool post_depth_coverage;
bool empty;
/* Doesn't have side-effects, no kill, no D/S write, etc. */
bool writes_only_color;
bool has_no_side_effects;
bool has_no_ds_effects;
/* Are we using split or merged register file? */
bool mergedregs;

View file

@ -5508,7 +5508,7 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
if (pipeline->disable_fs.valid) {
if (cmd->state.disable_fs != pipeline->disable_fs.disable_fs) {
cmd->state.disable_fs = pipeline->disable_fs.disable_fs;
cmd->state.dirty |= TU_CMD_DIRTY_DISABLE_FS;
cmd->state.dirty |= TU_CMD_DIRTY_RAST | TU_CMD_DIRTY_LRZ;
}
}
cmd->state.pipeline_disable_fs = pipeline->disable_fs.valid;

View file

@ -82,9 +82,10 @@ enum tu_cmd_dirty_bits
TU_CMD_DIRTY_DISABLE_FS = BIT(16),
TU_CMD_DIRTY_TCS = BIT(17),
TU_CMD_DIRTY_VS = BIT(18),
TU_CMD_DIRTY_RAST = BIT(19),
/* all draw states were disabled and need to be re-enabled: */
TU_CMD_DIRTY_DRAW_STATE = BIT(19)
TU_CMD_DIRTY_DRAW_STATE = BIT(20)
};
/* There are only three cache domains we have to care about: the CCU, or

View file

@ -3147,31 +3147,43 @@ static const enum mesa_vk_dynamic_graphics_state tu_disable_fs_state[] = {
MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE,
};
static bool
tu_fs_disable_safe_for_occlusion_query(const struct tu_shader *fs)
{
return !fs || !(fs->variant->has_kill || fs->variant->writes_smask);
}
static bool
tu_calc_disable_fs(const struct vk_color_blend_state *cb,
const struct vk_render_pass_state *rp,
bool alpha_to_coverage_enable,
const struct tu_shader *fs)
const struct tu_shader *fs,
bool occlusion_query_may_be_running)
{
if (alpha_to_coverage_enable)
return false;
if (fs && !fs->variant->writes_only_color)
if (fs && !fs->variant->has_no_side_effects)
return false;
if (occlusion_query_may_be_running && !tu_fs_disable_safe_for_occlusion_query(fs))
return false;
bool has_enabled_attachments = false;
bool has_enabled_color_attachments = false;
for (unsigned i = 0; i < cb->attachment_count; i++) {
if (rp->color_attachment_formats[i] == VK_FORMAT_UNDEFINED)
continue;
const struct vk_color_blend_attachment_state *att = &cb->attachments[i];
if ((cb->color_write_enables & (1u << i)) && att->write_mask != 0) {
has_enabled_attachments = true;
has_enabled_color_attachments = true;
break;
}
}
bool has_enabled_ds_attachment =
rp->attachments & (MESA_VK_RP_ATTACHMENT_DEPTH_BIT | MESA_VK_RP_ATTACHMENT_STENCIL_BIT);
return !fs || fs->variant->empty ||
(fs->variant->writes_only_color && !has_enabled_attachments);
(!has_enabled_color_attachments && (!has_enabled_ds_attachment || fs->variant->has_no_ds_effects));
}
static void
@ -3182,7 +3194,7 @@ tu_emit_disable_fs(struct tu_disable_fs *disable_fs,
const struct tu_shader *fs)
{
disable_fs->disable_fs =
tu_calc_disable_fs(cb, rp, alpha_to_coverage_enable, fs);
tu_calc_disable_fs(cb, rp, alpha_to_coverage_enable, fs, false);
disable_fs->valid = true;
}
@ -3985,7 +3997,8 @@ tu_pipeline_builder_emit_state(struct tu_pipeline_builder *builder,
builder->graphics_state.rp);
if (EMIT_STATE(
disable_fs,
attachments_valid && pipeline_contains_all_shader_state(pipeline)))
attachments_valid && pipeline_contains_all_shader_state(pipeline) &&
tu_fs_disable_safe_for_occlusion_query(pipeline->shaders[MESA_SHADER_FRAGMENT])))
tu_emit_disable_fs(&pipeline->disable_fs, cb,
builder->graphics_state.rp,
builder->graphics_state.ms->alpha_to_coverage_enable,
@ -4245,15 +4258,17 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd)
if (!cmd->state.pipeline_disable_fs &&
(EMIT_STATE(disable_fs) ||
(cmd->state.dirty & (TU_CMD_DIRTY_SUBPASS | TU_CMD_DIRTY_FS)))) {
(cmd->state.dirty & (TU_CMD_DIRTY_SUBPASS | TU_CMD_DIRTY_FS |
TU_CMD_DIRTY_DISABLE_FS)))) {
bool disable_fs = tu_calc_disable_fs(
&cmd->vk.dynamic_graphics_state.cb, &cmd->state.vk_rp,
cmd->vk.dynamic_graphics_state.ms.alpha_to_coverage_enable,
cmd->state.shaders[MESA_SHADER_FRAGMENT]);
cmd->state.shaders[MESA_SHADER_FRAGMENT],
cmd->state.occlusion_query_may_be_running);
if (disable_fs != cmd->state.disable_fs) {
cmd->state.disable_fs = disable_fs;
cmd->state.dirty |= TU_CMD_DIRTY_DISABLE_FS;
cmd->state.dirty |= TU_CMD_DIRTY_RAST | TU_CMD_DIRTY_LRZ;
}
}
@ -4272,7 +4287,7 @@ tu_emit_draw_state(struct tu_cmd_buffer *cmd)
DRAW_STATE_COND(rast, TU_DYNAMIC_STATE_RAST,
cmd->state.dirty & (TU_CMD_DIRTY_SUBPASS |
TU_CMD_DIRTY_PER_VIEW_VIEWPORT |
TU_CMD_DIRTY_DISABLE_FS),
TU_CMD_DIRTY_RAST),
&cmd->vk.dynamic_graphics_state.rs,
&cmd->vk.dynamic_graphics_state.vp,
cmd->state.vk_mv.view_mask != 0,

View file

@ -1090,7 +1090,7 @@ emit_begin_occlusion_query(struct tu_cmd_buffer *cmdbuf,
*/
struct tu_cs *cs = cmdbuf->state.pass ? &cmdbuf->draw_cs : &cmdbuf->cs;
cmdbuf->state.occlusion_query_may_be_running = true;
cmdbuf->state.dirty |= TU_CMD_DIRTY_LRZ;
cmdbuf->state.dirty |= TU_CMD_DIRTY_DISABLE_FS | TU_CMD_DIRTY_LRZ;
uint64_t begin_iova = occlusion_query_iova(pool, query, begin);
@ -1589,7 +1589,7 @@ emit_end_occlusion_query(struct tu_cmd_buffer *cmdbuf,
tu_cs_emit_qw(epilogue_cs, 0x1);
cmdbuf->state.occlusion_query_may_be_running = false;
cmdbuf->state.dirty |= TU_CMD_DIRTY_LRZ;
cmdbuf->state.dirty |= TU_CMD_DIRTY_DISABLE_FS | TU_CMD_DIRTY_LRZ;
}
/* PRIMITIVE_CTRS is used for two distinct queries: