jay: Drop render target stores with unconditional discards

Surprisingly, this actually appears to come up.  Two Baldur's Gate 3
shaders optimized away to have unconditional "demote" in a shader with
no other side-effects, meaning no writes occur and we can eliminate the
entire program.  One of the shaders still did a fair amount of math to
produce color values that were never used.

We introduce a pass to detect store_render_target_intel intrinsics
where discard == true and eliminate them.  We then DCE and see if we
eliminated the entire program other than "demote" or "terminate" and
drop those too.  We then add back a Null RT store if needed.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41872>
This commit is contained in:
Kenneth Graunke 2026-05-29 23:45:43 -07:00 committed by Marge Bot
parent cbd572e8d7
commit 79fda8977e
2 changed files with 79 additions and 9 deletions

View file

@ -844,19 +844,13 @@ jay_emit_fb_write(jay_builder *b, nir_intrinsic_instr *intr)
srcs[len++] = jay_extract(packed, i);
}
/* Optimize out unconditional discards (probably should do this in NIR) */
if (nir_src_is_const(intr->src[5]) && nir_src_as_bool(intr->src[5])) {
for (unsigned i = 0; i < len; i++)
srcs[i] = jay_UNDEF_u32(b);
}
jay_inst *send =
jay_SEND(b, .sfid = GEN_SFID_RENDER_CACHE, .check_tdr = true,
.msg_desc = desc | (ex_desc << 32), .srcs = srcs, .nr_srcs = len,
.type = JAY_TYPE_U32, .eot = last, .split = split);
/* Handle the disable predicate. It is logically inverted. */
if (!nir_src_is_const(intr->src[5]) || nir_src_as_bool(intr->src[5])) {
if (!nir_src_is_zero(intr->src[5])) {
jay_add_predicate(b, send, jay_negate(nj_src(intr->src[5])));
}
}

View file

@ -13,6 +13,9 @@
#include "nir_intrinsics.h"
#include "shader_enums.h"
#define JAY_NIR_SNAPSHOT(name) BRW_NIR_SNAPSHOT(name)
#define JAY_NIR_PASS(pass, ...) BRW_NIR_PASS(pass, ##__VA_ARGS__)
/*
* Jay-to-NIR relies on a careful indexing of defs: every 32-bit word has
* its own index. Vectors/64-bit use contiguous indices. We therefore run a
@ -231,8 +234,79 @@ lower_fragment_outputs(nir_function_impl *impl,
ctx.stencil ?: undef, ctx.sample_mask ?: undef);
}
#define JAY_NIR_SNAPSHOT(name) BRW_NIR_SNAPSHOT(name)
#define JAY_NIR_PASS(pass, ...) BRW_NIR_PASS(pass, ##__VA_ARGS__)
/**
* If we've optimized the entire program down to only "demote"
* or "terminate" with no other instructions, delete it entirely.
*
* We're already not writing outputs and ending the program.
*/
static void
delete_solo_discard(nir_shader *nir)
{
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
nir_block *start_block = nir_start_block(impl);
nir_instr *instr = nir_block_first_instr(start_block);
if (start_block != nir_impl_last_block(impl) ||
!exec_list_is_singular(&start_block->instr_list) ||
instr->type != nir_instr_type_intrinsic)
return;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic == nir_intrinsic_demote ||
intrin->intrinsic == nir_intrinsic_terminate) {
nir_instr_remove(instr);
nir->info.fs.uses_discard = false;
}
}
/**
* Drop render target stores with unconditional discards.
*/
static bool
opt_unconditional_discards(nir_shader *nir)
{
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
nir_block *block = nir_impl_last_block(impl);
bool progress = false;
bool any_remaining_rt_writes = false;
nir_foreach_instr_reverse_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_store_render_target_intel)
continue;
nir_scalar discard = nir_scalar_resolved(intr->src[5].ssa, 0);
if (nir_scalar_is_const(discard) && nir_scalar_as_uint(discard) != 0) {
/* Drop store with unconditional discard */
nir_instr_remove(instr);
progress = true;
} else {
/* This RT store might actually happen */
any_remaining_rt_writes = true;
}
}
if (progress) {
nir_opt_dce_impl(impl);
delete_solo_discard(nir);
}
/* If we eliminated all RT stores, add a Null RT store to end the thread. */
if (!any_remaining_rt_writes) {
nir_builder b = nir_builder_at(nir_after_impl(impl));
nir_def *undef = nir_undef(&b, 1, 32);
insert_rt_store(&b, -1, NULL, NULL, undef, undef, undef);
}
return nir_progress(progress, impl, nir_metadata_control_flow);
}
unsigned
jay_process_nir(const struct intel_device_info *devinfo,
@ -361,6 +435,8 @@ jay_process_nir(const struct intel_device_info *devinfo,
*/
brw_nir_optimize(pt);
NIR_PASS(_, nir, opt_unconditional_discards);
// TODO
// JAY_NIR_PASS(brw_nir_move_interpolation_to_top);