agx: Fix discards

Switch our frontends from generating sample_mask_agx to discard_agx, and switching from legalizing sample_mask_agx to lowering discard_agx to sample_mask_agx. This is a much easier problem and is done here in a way that is simple (and inefficient) but obviously correct. This should fix corruption in Darwinia. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23832>
2026-05-05 09:38:07 +02:00 · 2023-06-14 12:34:34 -04:00 · 2023-06-14 12:34:34 -04:00 · b5fccfa197
commit b5fccfa197
parent baf67144bd
5 changed files with 63 additions and 91 deletions
--- a/src/asahi/compiler/agx_nir_lower_discard_zs_emit.c
+++ b/src/asahi/compiler/agx_nir_lower_discard_zs_emit.c
@ -6,6 +6,7 @@
 #include "compiler/nir/nir.h"
 #include "compiler/nir/nir_builder.h"
 #include "agx_compiler.h"
+#include "nir_builder_opcodes.h"

 #define ALL_SAMPLES 0xFF
 #define BASE_Z      1
@ -86,15 +87,13 @@ lower_discard(nir_builder *b, nir_instr *instr, UNUSED void *data)

   nir_ssa_def *all_samples = nir_imm_intN_t(b, ALL_SAMPLES, 16);
   nir_ssa_def *no_samples = nir_imm_intN_t(b, 0, 16);
+   nir_ssa_def *killed_samples = all_samples;

   if (intr->intrinsic == nir_intrinsic_discard_if)
-      no_samples = nir_bcsel(b, intr->src[0].ssa, no_samples, all_samples);
-
-   /* This will get lowered later to zs_emit if needed */
-   nir_sample_mask_agx(b, all_samples, no_samples);
-   b->shader->info.fs.uses_discard = false;
-   b->shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
+      killed_samples = nir_bcsel(b, intr->src[0].ssa, all_samples, no_samples);

+   /* This will get lowered later as needed */
+   nir_discard_agx(b, killed_samples);
   nir_instr_remove(instr);
   return true;
 }
--- a/src/asahi/compiler/agx_nir_lower_sample_mask.c
+++ b/src/asahi/compiler/agx_nir_lower_sample_mask.c
@ -5,6 +5,7 @@

 #include "compiler/nir/nir_builder.h"
 #include "agx_compiler.h"
+#include "nir_intrinsics.h"

 /*
 * sample_mask takes two bitmasks as arguments, TARGET and LIVE. Each bit refers
@ -51,7 +52,9 @@
 * 4. If zs_emit is used anywhere in the shader, sample_mask must not be used.
 * Instead, zs_emit with depth = NaN can be emitted.
 *
- * This pass legalizes some sample_mask instructions to satisfy these rules.
+ * This pass lowers discard_agx to sample_mask instructions satisfying these
+ * rules. Other passes should not generate sample_mask instructions, as there
+ * are too many footguns.
 */

 #define ALL_SAMPLES (0xFF)
@ -91,15 +94,11 @@ lower_sample_mask_to_zs(nir_builder *b, nir_instr *instr, UNUSED void *data)
      return true;
   }

-   if (intr->intrinsic != nir_intrinsic_sample_mask_agx)
+   if (intr->intrinsic != nir_intrinsic_discard_agx)
      return false;

-   nir_ssa_def *target = intr->src[0].ssa;
-   nir_ssa_def *live = intr->src[1].ssa;
-   nir_ssa_def *discard = nir_iand(b, target, nir_inot(b, live));
-
   /* Write a NaN depth value for discarded samples */
-   nir_store_zs_agx(b, discard, nir_imm_float(b, NAN),
+   nir_store_zs_agx(b, intr->src[0].ssa, nir_imm_float(b, NAN),
                    stencil_written ? nir_imm_intN_t(b, 0, 16)
                                    : nir_ssa_undef(b, 1, 16) /* stencil */,
                    .base = BASE_Z | (stencil_written ? BASE_S : 0));
@ -108,11 +107,27 @@ lower_sample_mask_to_zs(nir_builder *b, nir_instr *instr, UNUSED void *data)
   return true;
 }

+static bool
+lower_discard_to_sample_mask_0(nir_builder *b, nir_instr *instr,
+                               UNUSED void *data)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic != nir_intrinsic_discard_agx)
+      return false;
+
+   b->cursor = nir_before_instr(instr);
+   nir_sample_mask_agx(b, intr->src[0].ssa, nir_imm_intN_t(b, 0, 16));
+   nir_instr_remove(instr);
+   return true;
+}
+
 bool
 agx_nir_lower_sample_mask(nir_shader *shader, unsigned nr_samples)
 {
-   if (!(shader->info.outputs_written &
-         (BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))))
+   if (!shader->info.fs.uses_discard)
      return false;

   /* sample_mask can't be used with zs_emit, so lower sample_mask to zs_emit */
@ -131,40 +146,19 @@ agx_nir_lower_sample_mask(nir_shader *shader, unsigned nr_samples)
      return true;
   }

-   /* nir_lower_io_to_temporaries ensures that stores are in the last block */
+   /* Pessimistic lowering: force late depth/stencil test. TODO: Optimize. */
+   nir_shader_instructions_pass(
+      shader, lower_discard_to_sample_mask_0,
+      nir_metadata_block_index | nir_metadata_dominance, NULL);
+
+   /* nir_lower_io_to_temporaries ensures that stores are in the last block. */
   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
   nir_block *block = nir_impl_last_block(impl);

   nir_builder b;
   nir_builder_init(&b, impl);

-   /* Check which samples get a value written in the last block */
-   uint8_t samples_set = 0;
-
-   nir_foreach_instr(instr, block) {
-      if (instr->type != nir_instr_type_intrinsic)
-         continue;
-
-      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-      if (intr->intrinsic != nir_intrinsic_sample_mask_agx)
-         continue;
-
-      if (!nir_src_is_const(intr->src[0]))
-         continue;
-
-      samples_set |= nir_src_as_uint(intr->src[0]);
-   }
-
-   /* If all samples are set, we're good to go */
-   if ((samples_set & BITFIELD_MASK(nr_samples)) == BITFIELD_MASK(nr_samples))
-      return false;
-
-   /* Otherwise, at least one sample is not set in the last block and hence may
-    * not be set at all. Insert an instruction in the last block to ensure it
-    * will be live.
-    */
   b.cursor = nir_after_block(block);
-
   nir_foreach_instr(instr, block) {
      if (instr->type != nir_instr_type_intrinsic)
         continue;
@ -177,6 +171,7 @@ agx_nir_lower_sample_mask(nir_shader *shader, unsigned nr_samples)
      break;
   }

+   /* Run depth/stencil tests for all remaining samples */
   nir_sample_mask_agx(&b, nir_imm_intN_t(&b, ALL_SAMPLES, 16),
                       nir_imm_intN_t(&b, ALL_SAMPLES, 16));
   return true;
--- a/src/asahi/lib/agx_nir_lower_alpha.c
+++ b/src/asahi/lib/agx_nir_lower_alpha.c
@ -70,8 +70,8 @@ agx_nir_lower_alpha_to_coverage(nir_shader *shader, uint8_t nr_samples)
      nir_iadd_imm(b, nir_ishl(b, nir_imm_intN_t(b, 1, 16), bits), -1);

   /* Discard samples that aren't covered */
-   nir_sample_mask_agx(b, nir_imm_intN_t(b, ALL_SAMPLES, 16), mask);
-   shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
+   nir_discard_agx(b, nir_inot(b, mask));
+   shader->info.fs.uses_discard = true;
 }

 /*
--- a/src/asahi/lib/agx_nir_lower_msaa.c
+++ b/src/asahi/lib/agx_nir_lower_msaa.c
@ -31,7 +31,7 @@ lower_wrapped(nir_builder *b, nir_instr *instr, void *data)
   case nir_intrinsic_load_local_pixel_agx:
   case nir_intrinsic_store_local_pixel_agx:
   case nir_intrinsic_store_zs_agx:
-   case nir_intrinsic_sample_mask_agx: {
+   case nir_intrinsic_discard_agx: {
      /* Fragment I/O inside the loop should only affect one sample. */
      unsigned mask_index =
         (intr->intrinsic == nir_intrinsic_store_local_pixel_agx) ? 1 : 0;
@ -110,26 +110,17 @@ lower_sample_mask_write(nir_builder *b, nir_instr *instr, void *data)

   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
   b->cursor = nir_before_instr(instr);
-
-   nir_ssa_def *mask;
-   if (intr->intrinsic == nir_intrinsic_sample_mask_agx) {
-      /* For alpha-to-coverage */
-      assert(nir_src_as_uint(intr->src[0]) == ALL_SAMPLES && "not wrapped");
-      mask = intr->src[1].ssa;
-   } else if (intr->intrinsic == nir_intrinsic_store_output) {
-      nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
-      if (sem.location != FRAG_RESULT_SAMPLE_MASK)
-         return false;
-
-      /* Sample mask writes are ignored unless multisampling is used. */
-      if (state->nr_samples == 1) {
-         nir_instr_remove(instr);
-         return true;
-      }
-
-      mask = nir_u2u16(b, intr->src[0].ssa);
-   } else {
+   if (intr->intrinsic != nir_intrinsic_store_output)
      return false;
+
+   nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
+   if (sem.location != FRAG_RESULT_SAMPLE_MASK)
+      return false;
+
+   /* Sample mask writes are ignored unless multisampling is used. */
+   if (state->nr_samples == 1) {
+      nir_instr_remove(instr);
+      return true;
   }

   /* The Vulkan spec says:
@ -139,13 +130,9 @@ lower_sample_mask_write(nir_builder *b, nir_instr *instr, void *data)
    *    shader invocation are ignored.
    *
    * That will be satisfied by outputting gl_SampleMask for the whole pixel
-    * and then lowering sample shading after (splitting up sample_mask
-    * targets).
+    * and then lowering sample shading after (splitting up discard targets).
    */
-   if (state->api_sample_mask)
-      mask = nir_iand(b, mask, nir_load_api_sample_mask_agx(b));
-
-   nir_sample_mask_agx(b, nir_imm_intN_t(b, ALL_SAMPLES, 16), mask);
+   nir_discard_agx(b, nir_inot(b, nir_u2u16(b, intr->src[0].ssa)));
   nir_instr_remove(instr);
   return true;
 }
@ -179,20 +166,14 @@ lower_sample_mask_read(nir_builder *b, nir_instr *instr, UNUSED void *_)
 static void
 insert_sample_mask_write(nir_shader *s)
 {
-   /* nir_lower_io_to_temporaries ensures that stores are in the last block */
-   nir_function_impl *impl = nir_shader_get_entrypoint(s);
-
   nir_builder b;
+   nir_function_impl *impl = nir_shader_get_entrypoint(s);
   nir_builder_init(&b, impl);
   b.cursor = nir_before_block(nir_start_block(impl));

-   /* Load the desired API sample mask */
-   nir_ssa_def *api_sample_mask = nir_load_api_sample_mask_agx(&b);
-
-   /* Kill samples that are not covered by the mask using the AGX instruction */
-   nir_ssa_def *all_samples = nir_imm_intN_t(&b, ALL_SAMPLES, 16);
-   nir_sample_mask_agx(&b, all_samples, api_sample_mask);
-   s->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
+   /* Kill samples that are NOT covered by the mask */
+   nir_discard_agx(&b, nir_inot(&b, nir_load_api_sample_mask_agx(&b)));
+   s->info.fs.uses_discard = true;
 }

 /*
@ -207,20 +188,17 @@ agx_nir_lower_monolithic_msaa(nir_shader *shader, struct agx_msaa_state *state)
   assert(state->nr_samples == 1 || state->nr_samples == 2 ||
          state->nr_samples == 4);

+   /* Lower gl_SampleMask writes */
   if (shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
-      /* Sample mask writes need to be lowered. This includes an API sample mask
-       * lowering.
-       */
      nir_shader_instructions_pass(
         shader, lower_sample_mask_write,
         nir_metadata_block_index | nir_metadata_dominance, state);
-   } else if ((state->nr_samples > 1) && state->api_sample_mask) {
-      /* If there's no sample mask write, we need to add one of our own for the
-       * API-level sample masking to work.
-       */
-      insert_sample_mask_write(shader);
   }

+   /* Lower API sample masks */
+   if ((state->nr_samples > 1) && state->api_sample_mask)
+      insert_sample_mask_write(shader);
+
   /* Additional, sample_mask_in needs to account for the API-level mask */
   nir_shader_instructions_pass(
      shader, lower_sample_mask_read,
--- a/src/asahi/lib/agx_nir_lower_sample_intrinsics.c
+++ b/src/asahi/lib/agx_nir_lower_sample_intrinsics.c
@ -102,9 +102,9 @@ lower_to_sample(nir_builder *b, nir_instr *instr, void *_)
 * The load_sample_id intrinsics themselves are lowered later, with different
 * lowerings for monolithic vs epilogs.
 *
- * Note that fragment I/O (like store_local_pixel_agx and sample_mask_agx) does
- * not get lowered here, because that lowering is different for monolithic vs
- * FS epilogs even though there's no dependency on sample count.
+ * Note that fragment I/O (like store_local_pixel_agx and discard_agx) does not
+ * get lowered here, because that lowering is different for monolithic vs FS
+ * epilogs even though there's no dependency on sample count.
 */
 bool
 agx_nir_lower_sample_intrinsics(nir_shader *shader)