From fa96dfb2d7fa88cb161dcd7fedd4c14e81752737 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 27 Dec 2022 15:09:24 -0500 Subject: [PATCH] agx: Lower discard to zs_emit when zs_emit used It is invalid to use both sample_mask and zs_emit in the same shader. We'll need to do something similar for sample mask writes. Fixes Dolphin ubershaders. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_compile.c | 6 ++++ src/asahi/compiler/agx_nir_lower_zs_emit.c | 40 ++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index ab85e4b0280..9716bd128a8 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -463,6 +463,11 @@ agx_emit_local_store_pixel(agx_builder *b, nir_intrinsic_instr *instr) */ agx_sample_mask(b, agx_immediate(1)); b->shader->did_sample_mask = true; + + assert(!(b->shader->nir->info.outputs_written & + (BITFIELD64_BIT(FRAG_RESULT_DEPTH) | + BITFIELD64_BIT(FRAG_RESULT_STENCIL))) && + "incompatible"); } /* Compact the registers according to the mask */ @@ -504,6 +509,7 @@ agx_emit_store_zs(agx_builder *b, nir_intrinsic_instr *instr) * maybe rename this flag to something more general. */ b->shader->out->writes_sample_mask = true; + assert(!b->shader->did_sample_mask && "incompatible"); return agx_zs_emit(b, agx_src_index(&instr->src[0]), zs, base); } diff --git a/src/asahi/compiler/agx_nir_lower_zs_emit.c b/src/asahi/compiler/agx_nir_lower_zs_emit.c index d87a67f01a6..c52f383e25d 100644 --- a/src/asahi/compiler/agx_nir_lower_zs_emit.c +++ b/src/asahi/compiler/agx_nir_lower_zs_emit.c @@ -62,11 +62,47 @@ lower(nir_function_impl *impl, nir_block *block) return progress; } +static bool +lower_discard_to_z(nir_builder *b, nir_instr *instr, UNUSED void *data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_discard && + intr->intrinsic != nir_intrinsic_discard_if) + return false; + + b->cursor = nir_before_instr(instr); + + if (intr->intrinsic == nir_intrinsic_discard_if) + nir_push_if(b, intr->src[0].ssa); + + bool stencil_written = + b->shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL); + + nir_store_zs_agx(b, nir_imm_intN_t(b, ALL_SAMPLES, 16), + nir_imm_float(b, NAN), + stencil_written ? nir_imm_intN_t(b, 0, 16) + : nir_ssa_undef(b, 1, 16) /* stencil */, + .base = BASE_Z | (stencil_written ? BASE_S : 0)); + + if (intr->intrinsic == nir_intrinsic_discard_if) + nir_push_else(b, NULL); + + nir_instr_remove(instr); + return false; +} + bool agx_nir_lower_zs_emit(nir_shader *s) { bool any_progress = false; + if (!(s->info.outputs_written & (BITFIELD64_BIT(FRAG_RESULT_STENCIL) | + BITFIELD64_BIT(FRAG_RESULT_DEPTH)))) + return false; + nir_foreach_function(function, s) { if (!function->impl) continue; @@ -87,5 +123,9 @@ agx_nir_lower_zs_emit(nir_shader *s) any_progress |= progress; } + any_progress |= nir_shader_instructions_pass( + s, lower_discard_to_z, nir_metadata_block_index | nir_metadata_dominance, + NULL); + s->info.fs.uses_discard = false; return any_progress; }