pan: Use nir_intrinsic_blend_pan for blend shaders

The one non-trivial change here is that we're now using BLEND with a constant descriptor instead of ST_TILE for MSAA blend shaders. However, this shouldn't make any practical difference. Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com> Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39244>
2026-05-05 07:28:11 +02:00 · 2026-01-09 10:23:29 -05:00 · 2026-01-09 10:23:29 -05:00 · f53751159a
commit f53751159a
parent 7d25c5c1ea
6 changed files with 88 additions and 102 deletions
--- a/src/gallium/drivers/panfrost/pan_blend_cso.c
+++ b/src/gallium/drivers/panfrost/pan_blend_cso.c
@ -100,25 +100,16 @@ GENX(pan_blend_get_shader_locked)(struct pan_blend_shader_cache *cache,
      .gpu_id = cache->gpu_id,
      .gpu_variant = cache->gpu_variant,
      .is_blend = true,
-      .blend.nr_samples = key.nr_samples,
      .pushable_ubos = BITFIELD_BIT(PAN_UBO_SYSVALS),
   };

-   enum pipe_format rt_formats[8] = {0};
-   rt_formats[rt] = key.format;
-
-#if PAN_ARCH >= 6
-   inputs.blend.bifrost_blend_desc =
-      GENX(pan_blend_get_internal_desc)(key.format, key.rt, 0, false);
-#endif
-
   struct pan_shader_info info;
   pan_preprocess_nir(nir, inputs.gpu_id);
   pan_postprocess_nir(nir, inputs.gpu_id);

-#if PAN_ARCH >= 6
-   NIR_PASS(_, nir, GENX(pan_inline_rt_conversion), rt_formats);
-#else
+#if PAN_ARCH < 6
+   enum pipe_format rt_formats[8] = {0};
+   rt_formats[rt] = key.format;
   NIR_PASS(_, nir, pan_nir_lower_framebuffer, rt_formats,
            pan_raw_format_mask_midgard(rt_formats), MAX2(key.nr_samples, 1),
            (cache->gpu_id >> 16) < 0x700);
--- a/src/panfrost/compiler/bifrost/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost/bifrost_compile.c
@ -897,18 +897,7 @@ bi_pixel_indices(bi_builder *b, unsigned rt, unsigned sample)

   uint32_t indices_u32 = 0;
   memcpy(&indices_u32, &pix, sizeof(indices_u32));
-   bi_index indices = bi_imm_u32(indices_u32);
-
-   /* Implicit sample_id assignment only happens in blend shaders,
-    * and we don't expect an explicit sample to be passed in that
-    * case, hence the assert(sample == 0). */
-
-   if (b->shader->inputs->blend.nr_samples > 1) {
-      assert(sample == 0);
-      indices = bi_iadd_u32(b, indices, bi_load_sample_id(b), false);
-   }
-
-   return indices;
+   return bi_imm_u32(indices_u32);
 }

 /* Source color is passed through r0-r3, or r4-r7 for the second source when
@ -937,38 +926,20 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, bi_index rgba2,
   unsigned size_2 = nir_alu_type_get_type_size(T2);
   unsigned sr_count = (size <= 16) ? 2 : 4;
   unsigned sr_count_2 = (size_2 <= 16) ? 2 : 4;
-   const struct pan_compile_inputs *inputs = b->shader->inputs;
-   uint64_t blend_desc = inputs->blend.bifrost_blend_desc;
   enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);

   /* Workaround for NIR-to-TGSI */
   if (b->shader->nir->info.fs.untyped_color_outputs)
      regfmt = BI_REGISTER_FORMAT_AUTO;

-   if (inputs->is_blend && inputs->blend.nr_samples > 1) {
-      /* Conversion descriptor comes from the compile inputs, pixel
-       * indices derived at run time based on sample ID */
-      bi_st_tile(b, rgba, bi_pixel_indices(b, rt, 0), bi_coverage(b),
-                 bi_imm_u32(blend_desc >> 32), regfmt, BI_VECSIZE_V4);
-   } else if (b->shader->inputs->is_blend) {
-      uint64_t blend_desc = b->shader->inputs->blend.bifrost_blend_desc;
+   /* Blend descriptor comes from the FAU RAM. By convention, the
+    * return address on Bifrost is stored in r48 and will be used
+    * by the blend shader to jump back to the fragment shader */

-      /* Blend descriptor comes from the compile inputs */
-      /* Put the result in r0 */
-
-      bi_blend_to(b, bi_temp(b->shader), rgba, bi_coverage(b),
-                  bi_imm_u32(blend_desc), bi_imm_u32(blend_desc >> 32),
-                  bi_null(), regfmt, sr_count, 0);
-   } else {
-      /* Blend descriptor comes from the FAU RAM. By convention, the
-       * return address on Bifrost is stored in r48 and will be used
-       * by the blend shader to jump back to the fragment shader */
-
-      bi_blend_to(b, bi_temp(b->shader), rgba, bi_coverage(b),
-                  bi_fau(BIR_FAU_BLEND_0 + rt, false),
-                  bi_fau(BIR_FAU_BLEND_0 + rt, true), rgba2, regfmt, sr_count,
-                  sr_count_2);
-   }
+   bi_blend_to(b, bi_temp(b->shader), rgba, bi_coverage(b),
+               bi_fau(BIR_FAU_BLEND_0 + rt, false),
+               bi_fau(BIR_FAU_BLEND_0 + rt, true), rgba2, regfmt, sr_count,
+               sr_count_2);

   assert(rt < 8);
   b->shader->info.bifrost->blend[rt].type = T;
@ -1039,6 +1010,9 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr)
   unsigned loc = nir_intrinsic_io_semantics(instr).location;
   bi_index src0 = bi_src_index(&instr->src[0]);

+   /* Blend shaders should use nir_intrinsic_blend_pan */
+   assert(!b->shader->inputs->is_blend);
+
   /* By ISA convention, the coverage mask is stored in R60. The store
    * itself will be handled by a subsequent ATEST instruction */
   if (loc == FRAG_RESULT_SAMPLE_MASK) {
@ -1120,19 +1094,6 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr)

      bi_emit_blend_op(b, color, nir_intrinsic_src_type(instr), color2, T2, rt);
   }
-
-   if (b->shader->inputs->is_blend) {
-      /* Jump back to the fragment shader, return address is stored
-       * in r48 (see above). On Valhall, only jump if the address is
-       * nonzero. The check is free there and it implements the "jump
-       * to 0 terminates the blend shader" that's automatic on
-       * Bifrost.
-       */
-      if (b->shader->arch >= 8)
-         bi_branchzi(b, bi_preload(b, 48), bi_preload(b, 48), BI_CMPF_NE);
-      else
-         bi_jump(b, bi_preload(b, 48));
-   }
 }

 static unsigned
--- a/src/panfrost/compiler/pan_compiler.h
+++ b/src/panfrost/compiler/pan_compiler.h
@ -121,10 +121,6 @@ struct pan_compile_inputs {
   uint64_t (*get_conv_desc)(enum pipe_format fmt, unsigned rt,
                             unsigned force_size, bool dithered);
   bool is_blend, is_blit;
-   struct {
-      unsigned nr_samples;
-      uint64_t bifrost_blend_desc;
-   } blend;
   bool no_idvs;
   uint32_t view_mask;

--- a/src/panfrost/lib/pan_blend.c
+++ b/src/panfrost/lib/pan_blend.c
@ -791,6 +791,66 @@ get_equation_str(const struct pan_blend_rt_state *rt_state, char *str,
   }
 }

+#if PAN_ARCH >= 6
+static bool
+lower_rt_intrin(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+   const struct pan_blend_state *state = data;
+
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_output: {
+      nir_io_semantics io = nir_intrinsic_io_semantics(intr);
+      assert(io.location >= FRAG_RESULT_DATA0);
+      unsigned rt = io.location - FRAG_RESULT_DATA0;
+      enum pipe_format format = state->rts[rt].format;
+      unsigned nr_samples = state->rts[rt].nr_samples;
+
+      nir_alu_type dest_type = nir_intrinsic_dest_type(intr);
+      unsigned size = nir_alu_type_get_type_size(dest_type);
+      uint64_t blend_desc =
+         GENX(pan_blend_get_internal_desc)(format, rt, size, false);
+
+      b->cursor = nir_after_instr(&intr->instr);
+
+      nir_def *lowered = nir_load_converted_output_pan(
+         b, intr->def.num_components, intr->def.bit_size,
+         nir_imm_int(b, rt),
+         nr_samples > 1 ? nir_load_sample_id(b) : nir_imm_int(b, 0),
+         nir_imm_int(b, blend_desc >> 32),
+         .dest_type = dest_type,
+         .io_semantics = io);
+
+      nir_def_replace(&intr->def, lowered);
+      return true;
+   }
+
+   case nir_intrinsic_store_output: {
+      nir_io_semantics io = nir_intrinsic_io_semantics(intr);
+      assert(io.location >= FRAG_RESULT_DATA0);
+      unsigned rt = io.location - FRAG_RESULT_DATA0;
+      enum pipe_format format = state->rts[rt].format;
+
+      nir_alu_type src_type = nir_intrinsic_src_type(intr);
+      unsigned size = nir_alu_type_get_type_size(src_type);
+      uint64_t blend_desc =
+         GENX(pan_blend_get_internal_desc)(format, rt, size, false);
+
+      b->cursor = nir_instr_remove(&intr->instr);
+
+      assert(nir_intrinsic_component(intr) == 0);
+      nir_blend_pan(b, nir_load_cumulative_coverage_pan(b),
+                    nir_imm_int64(b, blend_desc),
+                    nir_pad_vec4(b, intr->src[0].ssa),
+                    .io_semantics = io,
+                    .src_type = src_type);
+      return true;
+   }
+
+   default:
+      return false;
+   }
+}
+#endif

 nir_shader *
 GENX(pan_blend_create_shader)(const struct pan_blend_state *state,
@ -891,6 +951,20 @@ GENX(pan_blend_create_shader)(const struct pan_blend_state *state,

   NIR_PASS(_, b.shader, nir_lower_blend, &options);

+#if PAN_ARCH >= 6
+   /* On bifrost+ we use the NIR blend/load intrinsics directly */
+   NIR_PASS(_, b.shader, nir_shader_intrinsics_pass,
+            lower_rt_intrin, nir_metadata_control_flow, (void *)state);
+
+   /* And we put a blend_return_pan at the end.
+    *
+    * We have to do this here because nir_lower_blend assumes it can stick
+    * stuff at the end of the shader, after the blend_return_pan.
+    */
+   b = nir_builder_at(nir_after_impl(nir_shader_get_entrypoint(b.shader)));
+   nir_blend_return_pan(&b);
+#endif
+
   return b.shader;
 }

@ -949,30 +1023,6 @@ GENX(pan_blend_get_internal_desc)(enum pipe_format fmt, unsigned rt,
   return res.opaque[0] | ((uint64_t)res.opaque[1] << 32);
 }

-static bool
-inline_rt_conversion(nir_builder *b, nir_intrinsic_instr *intr, void *data)
-{
-   if (intr->intrinsic != nir_intrinsic_load_rt_conversion_pan)
-      return false;
-
-   enum pipe_format *formats = data;
-   unsigned rt = nir_intrinsic_base(intr);
-   unsigned size = nir_alu_type_get_type_size(nir_intrinsic_src_type(intr));
-   uint64_t conversion =
-      GENX(pan_blend_get_internal_desc)(formats[rt], rt, size, false);
-
-   b->cursor = nir_after_instr(&intr->instr);
-   nir_def_rewrite_uses(&intr->def, nir_imm_int(b, conversion >> 32));
-   return true;
-}
-
-bool
-GENX(pan_inline_rt_conversion)(nir_shader *s, enum pipe_format *formats)
-{
-   return nir_shader_intrinsics_pass(s, inline_rt_conversion,
-                                     nir_metadata_control_flow, formats);
-}
-
 #if PAN_ARCH < 9
 enum mali_register_file_format
 GENX(pan_fixup_blend_type)(nir_alu_type T_size, enum pipe_format format)
--- a/src/panfrost/lib/pan_blend.h
+++ b/src/panfrost/lib/pan_blend.h
@ -141,8 +141,6 @@ nir_shader *GENX(pan_blend_create_shader)(const struct pan_blend_state *state,
 uint64_t GENX(pan_blend_get_internal_desc)(enum pipe_format fmt, unsigned rt,
                                           unsigned force_size, bool dithered);

-bool GENX(pan_inline_rt_conversion)(nir_shader *s, enum pipe_format *formats);
-
 #if PAN_ARCH < 9
 enum mali_register_file_format
   GENX(pan_fixup_blend_type)(nir_alu_type T_size, enum pipe_format format);
--- a/src/panfrost/vulkan/panvk_vX_blend.c
+++ b/src/panfrost/vulkan/panvk_vX_blend.c
@ -90,21 +90,11 @@ get_blend_shader(struct panvk_device *dev,
      .gpu_id = pdev->kmod.dev->props.gpu_id,
      .gpu_variant = pdev->kmod.dev->props.gpu_variant,
      .is_blend = true,
-      .blend = {
-         .nr_samples = key.info.nr_samples,
-         .bifrost_blend_desc =
-            GENX(pan_blend_get_internal_desc)(key.info.format, key.info.rt, 0,
-                                              false),
-      },
   };

   pan_preprocess_nir(nir, inputs.gpu_id);
   pan_postprocess_nir(nir, inputs.gpu_id);

-   enum pipe_format rt_formats[8] = {0};
-   rt_formats[rt] = key.info.format;
-   NIR_PASS(_, nir, GENX(pan_inline_rt_conversion), rt_formats);
-
   VkResult result =
      panvk_per_arch(create_internal_shader)(dev, nir, &inputs, &shader);