mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-14 00:38:20 +02:00
jay: Drop render target stores with unconditional discards
Surprisingly, this actually appears to come up. Two Baldur's Gate 3 shaders optimized away to have unconditional "demote" in a shader with no other side-effects, meaning no writes occur and we can eliminate the entire program. One of the shaders still did a fair amount of math to produce color values that were never used. We introduce a pass to detect store_render_target_intel intrinsics where discard == true and eliminate them. We then DCE and see if we eliminated the entire program other than "demote" or "terminate" and drop those too. We then add back a Null RT store if needed. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41872>
This commit is contained in:
parent
cbd572e8d7
commit
79fda8977e
2 changed files with 79 additions and 9 deletions
|
|
@ -844,19 +844,13 @@ jay_emit_fb_write(jay_builder *b, nir_intrinsic_instr *intr)
|
|||
srcs[len++] = jay_extract(packed, i);
|
||||
}
|
||||
|
||||
/* Optimize out unconditional discards (probably should do this in NIR) */
|
||||
if (nir_src_is_const(intr->src[5]) && nir_src_as_bool(intr->src[5])) {
|
||||
for (unsigned i = 0; i < len; i++)
|
||||
srcs[i] = jay_UNDEF_u32(b);
|
||||
}
|
||||
|
||||
jay_inst *send =
|
||||
jay_SEND(b, .sfid = GEN_SFID_RENDER_CACHE, .check_tdr = true,
|
||||
.msg_desc = desc | (ex_desc << 32), .srcs = srcs, .nr_srcs = len,
|
||||
.type = JAY_TYPE_U32, .eot = last, .split = split);
|
||||
|
||||
/* Handle the disable predicate. It is logically inverted. */
|
||||
if (!nir_src_is_const(intr->src[5]) || nir_src_as_bool(intr->src[5])) {
|
||||
if (!nir_src_is_zero(intr->src[5])) {
|
||||
jay_add_predicate(b, send, jay_negate(nj_src(intr->src[5])));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,9 @@
|
|||
#include "nir_intrinsics.h"
|
||||
#include "shader_enums.h"
|
||||
|
||||
#define JAY_NIR_SNAPSHOT(name) BRW_NIR_SNAPSHOT(name)
|
||||
#define JAY_NIR_PASS(pass, ...) BRW_NIR_PASS(pass, ##__VA_ARGS__)
|
||||
|
||||
/*
|
||||
* Jay-to-NIR relies on a careful indexing of defs: every 32-bit word has
|
||||
* its own index. Vectors/64-bit use contiguous indices. We therefore run a
|
||||
|
|
@ -231,8 +234,79 @@ lower_fragment_outputs(nir_function_impl *impl,
|
|||
ctx.stencil ?: undef, ctx.sample_mask ?: undef);
|
||||
}
|
||||
|
||||
#define JAY_NIR_SNAPSHOT(name) BRW_NIR_SNAPSHOT(name)
|
||||
#define JAY_NIR_PASS(pass, ...) BRW_NIR_PASS(pass, ##__VA_ARGS__)
|
||||
/**
|
||||
* If we've optimized the entire program down to only "demote"
|
||||
* or "terminate" with no other instructions, delete it entirely.
|
||||
*
|
||||
* We're already not writing outputs and ending the program.
|
||||
*/
|
||||
static void
|
||||
delete_solo_discard(nir_shader *nir)
|
||||
{
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
nir_block *start_block = nir_start_block(impl);
|
||||
nir_instr *instr = nir_block_first_instr(start_block);
|
||||
|
||||
if (start_block != nir_impl_last_block(impl) ||
|
||||
!exec_list_is_singular(&start_block->instr_list) ||
|
||||
instr->type != nir_instr_type_intrinsic)
|
||||
return;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_demote ||
|
||||
intrin->intrinsic == nir_intrinsic_terminate) {
|
||||
nir_instr_remove(instr);
|
||||
nir->info.fs.uses_discard = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Drop render target stores with unconditional discards.
|
||||
*/
|
||||
static bool
|
||||
opt_unconditional_discards(nir_shader *nir)
|
||||
{
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
nir_block *block = nir_impl_last_block(impl);
|
||||
|
||||
bool progress = false;
|
||||
bool any_remaining_rt_writes = false;
|
||||
|
||||
nir_foreach_instr_reverse_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (intr->intrinsic != nir_intrinsic_store_render_target_intel)
|
||||
continue;
|
||||
|
||||
nir_scalar discard = nir_scalar_resolved(intr->src[5].ssa, 0);
|
||||
if (nir_scalar_is_const(discard) && nir_scalar_as_uint(discard) != 0) {
|
||||
/* Drop store with unconditional discard */
|
||||
nir_instr_remove(instr);
|
||||
progress = true;
|
||||
} else {
|
||||
/* This RT store might actually happen */
|
||||
any_remaining_rt_writes = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (progress) {
|
||||
nir_opt_dce_impl(impl);
|
||||
delete_solo_discard(nir);
|
||||
}
|
||||
|
||||
/* If we eliminated all RT stores, add a Null RT store to end the thread. */
|
||||
if (!any_remaining_rt_writes) {
|
||||
nir_builder b = nir_builder_at(nir_after_impl(impl));
|
||||
nir_def *undef = nir_undef(&b, 1, 32);
|
||||
insert_rt_store(&b, -1, NULL, NULL, undef, undef, undef);
|
||||
}
|
||||
|
||||
return nir_progress(progress, impl, nir_metadata_control_flow);
|
||||
}
|
||||
|
||||
unsigned
|
||||
jay_process_nir(const struct intel_device_info *devinfo,
|
||||
|
|
@ -361,6 +435,8 @@ jay_process_nir(const struct intel_device_info *devinfo,
|
|||
*/
|
||||
brw_nir_optimize(pt);
|
||||
|
||||
NIR_PASS(_, nir, opt_unconditional_discards);
|
||||
|
||||
// TODO
|
||||
// JAY_NIR_PASS(brw_nir_move_interpolation_to_top);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue