pan/blend: Use the blend builder helpers instead of nir_lower_blend()

This is a little more manual (though it's actually less code) but it gives us a lot more control and makes the whole flow nicer. Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39367>
2026-05-06 11:38:05 +02:00 · 2026-01-17 00:01:45 -05:00 · 2026-01-17 00:01:45 -05:00 · 669ddc5241
commit 669ddc5241
parent 2313bec66e
4 changed files with 118 additions and 155 deletions
--- a/src/panfrost/ci/panfrost-g52-fails.txt
+++ b/src/panfrost/ci/panfrost-g52-fails.txt
@ -71,7 +71,6 @@ spec@egl_khr_surfaceless_context@viewport,Fail
 spec@egl_mesa_configless_context@basic,Fail
 spec@ext_base_instance@arb_base_instance-drawarrays_gles3,Fail
 spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
 spec@ext_framebuffer_object@fbo-blending-format-quirks,Fail
 spec@ext_framebuffer_object@fbo-blending-formats,Fail
 spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA4,Fail
 spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA8,Fail
--- a/src/panfrost/ci/panfrost-g57-fails.txt
+++ b/src/panfrost/ci/panfrost-g57-fails.txt
@ -62,7 +62,6 @@ spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_rgba,F
 spec@egl_khr_surfaceless_context@viewport,Fail
 spec@egl_mesa_configless_context@basic,Fail
 spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
 spec@ext_framebuffer_object@fbo-blending-format-quirks,Fail
 spec@ext_framebuffer_object@fbo-blending-formats,Fail
 spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA4,Fail
 spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA8,Fail
@ -318,7 +317,6 @@ afbcp-spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_
 afbcp-spec@egl_khr_surfaceless_context@viewport,Fail
 afbcp-spec@egl_mesa_configless_context@basic,Fail
 afbcp-spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
 afbcp-spec@ext_framebuffer_object@fbo-blending-format-quirks,Fail
 afbcp-spec@ext_framebuffer_object@fbo-blending-formats,Fail
 afbcp-spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA4,Fail
 afbcp-spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA8,Fail
--- a/src/panfrost/ci/panfrost-g610-fails.txt
+++ b/src/panfrost/ci/panfrost-g610-fails.txt
@ -143,7 +143,6 @@ spec@ext_framebuffer_multisample@draw-buffers-alpha-to-coverage 8,Fail
 spec@ext_framebuffer_multisample@sample-alpha-to-coverage 16 color,Fail
 spec@ext_framebuffer_multisample@sample-alpha-to-coverage 6 color,Fail
 spec@ext_framebuffer_multisample@sample-alpha-to-coverage 8 color,Fail
 spec@ext_framebuffer_object@fbo-blending-format-quirks,Fail
 spec@ext_framebuffer_object@fbo-blending-formats,Fail
 spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA4,Fail
 spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA8,Fail
--- a/src/panfrost/lib/pan_blend.c
+++ b/src/panfrost/lib/pan_blend.c
@ -791,70 +791,6 @@ get_equation_str(const struct pan_blend_rt_state *rt_state, char *str,
   }
 }
 #if PAN_ARCH >= 6
 static bool
 lower_rt_intrin(nir_builder *b, nir_intrinsic_instr *intr, void *data)
 {
   const struct pan_blend_state *state = data;
   switch (intr->intrinsic) {
   case nir_intrinsic_load_output: {
      nir_io_semantics io = nir_intrinsic_io_semantics(intr);
      assert(io.location >= FRAG_RESULT_DATA0);
      unsigned rt = io.location - FRAG_RESULT_DATA0;
      enum pipe_format format = state->rts[rt].format;
      unsigned nr_samples = state->rts[rt].nr_samples;
      nir_alu_type dest_type = nir_intrinsic_dest_type(intr);
      unsigned size = nir_alu_type_get_type_size(dest_type);
      uint64_t blend_desc =
         GENX(pan_blend_get_internal_desc)(format, rt, size, false);
      b->cursor = nir_after_instr(&intr->instr);
      nir_def *sample_id =
         nr_samples > 1 ? nir_load_sample_id(b) : nir_imm_int(b, 0);
      nir_def *lowered = nir_load_tile_pan(
         b, intr->def.num_components, intr->def.bit_size,
         pan_nir_tile_rt_sample(b, nir_imm_int(b, rt), sample_id),
         pan_nir_tile_default_coverage(b),
         nir_imm_int(b, blend_desc >> 32),
         .dest_type = dest_type,
         .io_semantics = io);
      nir_def_replace(&intr->def, lowered);
      return true;
   }
   case nir_intrinsic_store_output: {
      nir_io_semantics io = nir_intrinsic_io_semantics(intr);
      assert(io.location >= FRAG_RESULT_DATA0);
      unsigned rt = io.location - FRAG_RESULT_DATA0;
      enum pipe_format format = state->rts[rt].format;
      nir_alu_type src_type = nir_intrinsic_src_type(intr);
      unsigned size = nir_alu_type_get_type_size(src_type);
      uint64_t blend_desc =
         GENX(pan_blend_get_internal_desc)(format, rt, size, false);
      b->cursor = nir_instr_remove(&intr->instr);
      assert(nir_intrinsic_component(intr) == 0);
      nir_blend_pan(b, nir_load_cumulative_coverage_pan(b),
                    nir_imm_int64(b, blend_desc),
                    nir_pad_vec4(b, intr->src[0].ssa),
                    .io_semantics = io,
                    .src_type = src_type);
      return true;
   }
   default:
      return false;
   }
 }
 #endif
 nir_shader *
 GENX(pan_blend_create_shader)(const struct pan_blend_state *state,
                              nir_alu_type src0_type, nir_alu_type src1_type,
@ -865,111 +801,142 @@ GENX(pan_blend_create_shader)(const struct pan_blend_state *state,
   get_equation_str(rt_state, equation_str, sizeof(equation_str));
-   nir_builder b = nir_builder_init_simple_shader(
+   nir_builder builder = nir_builder_init_simple_shader(
      MESA_SHADER_FRAGMENT, pan_get_nir_shader_compiler_options(PAN_ARCH),
      "pan_blend(rt=%d,fmt=%s,nr_samples=%d,%s=%s)", rt,
      util_format_name(rt_state->format), rt_state->nr_samples,
      state->logicop_enable ? "logicop" : "equation",
      state->logicop_enable ? logicop_str(state->logicop_func) : equation_str);
   nir_builder *b = &builder;
   const enum pipe_format format = rt_state->format;
   const struct util_format_description *format_desc =
-      util_format_description(rt_state->format);
+      util_format_description(format);
   nir_alu_type nir_type = pan_unpacked_type_for_format(format_desc);
-   /* Bifrost/Valhall support 16-bit and 32-bit register formats for
+   /* Choose a type which is not going to lead to precision loss while
-    * LD_TILE/ST_TILE/BLEND, but do not support 8-bit. Rather than making
+    * blending.  If we're not dual-source blending, src1_type will be
-    * the fragment output 8-bit and inserting extra conversions in the
+    * nir_type_invalid which has a size of zero.
    * compiler, promote the output to 16-bit. The larger size is still
    * compatible with correct conversion semantics.
    */
-   if (PAN_ARCH >= 6 && nir_alu_type_get_type_size(nir_type) == 8)
+   nir_alu_type dest_type = pan_unpacked_type_for_format(format_desc);
-      nir_type = nir_alu_type_get_base_type(nir_type) | 16;
+   if (PAN_ARCH >= 6 && nir_alu_type_get_type_size(dest_type) == 8)
      dest_type = nir_alu_type_get_base_type(dest_type) | 16;
-   nir_lower_blend_options options = {
+   const unsigned dest_bit_size = nir_alu_type_get_type_size(dest_type);
-      .logicop_enable = state->logicop_enable,
+   const nir_alu_type dest_base_type = nir_alu_type_get_base_type(dest_type);
      .logicop_func = state->logicop_func,
   };
-   options.rt[rt].format = rt_state->format;
+   /* Midgard doesn't always provide types at all but it's always float32 */
-   options.rt[rt].colormask = rt_state->equation.color_mask;
+   src0_type = src0_type ?: nir_type_float32;
   src1_type = src1_type ?: nir_type_float32;
-   if (!rt_state->equation.blend_enable) {
+   nir_def *src0 = nir_load_blend_input_pan(b,
-      static const nir_lower_blend_channel replace = {
+      4, nir_alu_type_get_type_size(src0_type),
-         .func = PIPE_BLEND_ADD,
+      .io_semantics.location = FRAG_RESULT_DATA0 + rt,
-         .src_factor = PIPE_BLENDFACTOR_ONE,
+      .io_semantics.dual_source_blend_index = 0,
-         .dst_factor = PIPE_BLENDFACTOR_ZERO,
+      .io_semantics.num_slots = 1,
-      };
+      .dest_type = src0_type);
-      options.rt[rt].rgb = replace;
+   nir_def *src1 = nir_load_blend_input_pan(b,
-      options.rt[rt].alpha = replace;
+      4, nir_alu_type_get_type_size(src1_type),
-   } else {
+      .io_semantics.location = FRAG_RESULT_DATA0 + rt,
-      options.rt[rt].rgb.func = rt_state->equation.rgb_func;
+      .io_semantics.dual_source_blend_index = 1,
-      options.rt[rt].rgb.src_factor = rt_state->equation.rgb_src_factor;
+      .io_semantics.num_slots = 1,
-      options.rt[rt].rgb.dst_factor = rt_state->equation.rgb_dst_factor;
+      .dest_type = src1_type);
-      options.rt[rt].alpha.func = rt_state->equation.alpha_func;
+
-      options.rt[rt].alpha.src_factor = rt_state->equation.alpha_src_factor;
+   /* Make sure everyone is the same type.  We assume the destination type
-      options.rt[rt].alpha.dst_factor = rt_state->equation.alpha_dst_factor;
+    * here because TGSI sometimes gives us bogus types.  When they're not
    * bogus, shader types are required to match the format anyway.
    *
    * On Midgard, the blend shader is responsible for format conversion.
    * As the OpenGL spec requires integer conversions to saturate, we must
    * saturate ourselves here. On Bifrost and later, the conversion
    * hardware handles this automatically.
    */
   bool should_saturate = PAN_ARCH <= 5 && dest_base_type != nir_type_float;
   src0 = nir_convert_with_rounding(b, src0, dest_base_type, dest_type,
                                    nir_rounding_mode_undef, should_saturate);
   src1 = nir_convert_with_rounding(b, src1, dest_base_type, dest_type,
                                    nir_rounding_mode_undef, should_saturate);
   if (state->alpha_to_one && dest_base_type == nir_type_float) {
      nir_def *one = nir_imm_floatN_t(b, 1.0, dest_bit_size);
      src0 = nir_vector_insert_imm(b, src0, one, 3);
      src1 = nir_vector_insert_imm(b, src1, one, 3);
   }
   nir_def *zero = nir_imm_int(&b, 0);
   for (unsigned i = 0; i < 2; ++i) {
      nir_alu_type src_type =
         (i == 1 ? src1_type : src0_type) ?: nir_type_float32;
      /* HACK: workaround buggy TGSI shaders (u_blitter) */
      src_type = nir_alu_type_get_base_type(nir_type) |
                 nir_alu_type_get_type_size(src_type);
      nir_def *src = nir_load_blend_input_pan(
         &b, 4, nir_alu_type_get_type_size(src_type),
         .io_semantics.location = FRAG_RESULT_DATA0 + rt,
         .io_semantics.dual_source_blend_index = i,
         .io_semantics.num_slots = 1, .dest_type = src_type);
      if (state->alpha_to_one && src_type == nir_type_float32) {
         /* force alpha to 1 */
         src = nir_vector_insert_imm(&b, src,
                                     nir_imm_floatN_t(&b, 1.0, src->bit_size),
                                     3);
      }
      /* On Midgard, the blend shader is responsible for format conversion.
       * As the OpenGL spec requires integer conversions to saturate, we must
       * saturate ourselves here. On Bifrost and later, the conversion
       * hardware handles this automatically.
       */
      nir_alu_type T = nir_alu_type_get_base_type(nir_type);
      bool should_saturate = (PAN_ARCH <= 5) && (T != nir_type_float);
      src = nir_convert_with_rounding(&b, src, T, nir_type,
                                      nir_rounding_mode_undef, should_saturate);
      nir_store_output(&b, src, zero, .write_mask = BITFIELD_MASK(4),
                       .src_type = nir_type,
                       .io_semantics.location = FRAG_RESULT_DATA0 + rt,
                       .io_semantics.num_slots = 1,
                       .io_semantics.dual_source_blend_index = i);
   }
   b.shader->info.io_lowered = true;
   NIR_PASS(_, b.shader, nir_lower_blend, &options);
 #if PAN_ARCH >= 6
-   /* On bifrost+ we use the NIR blend/load intrinsics directly */
+   const uint64_t opaque_blend_desc =
-   NIR_PASS(_, b.shader, nir_shader_intrinsics_pass,
+      GENX(pan_blend_get_internal_desc)(format, rt, dest_bit_size, false);
-            lower_rt_intrin, nir_metadata_control_flow, (void *)state);
+#else
-
+   const uint64_t opaque_blend_desc = 0;
   /* And we put a blend_return_pan at the end.
    *
    * We have to do this here because nir_lower_blend assumes it can stick
    * stuff at the end of the shader, after the blend_return_pan.
    */
   b = nir_builder_at(nir_after_impl(nir_shader_get_entrypoint(b.shader)));
   nir_blend_return_pan(&b);
 #endif
-   return b.shader;
+   nir_def *dest;
   if (PAN_ARCH >= 6) {
      nir_def *sample_id =
         rt_state->nr_samples > 1 ? nir_load_sample_id(b) : nir_imm_int(b, 0);
      dest = nir_load_tile_pan(b,
         4, dest_bit_size,
         pan_nir_tile_rt_sample(b, nir_imm_int(b, rt), sample_id),
         pan_nir_tile_default_coverage(b),
         nir_imm_int(b, opaque_blend_desc >> 32),
         .dest_type = dest_type,
         .io_semantics.location = FRAG_RESULT_DATA0 + rt,
         .io_semantics.num_slots = 1);
   } else {
      dest = nir_load_output(b,
         4, dest_bit_size,
         nir_imm_int(b, 0),
         .dest_type = dest_type,
         .io_semantics.location = FRAG_RESULT_DATA0 + rt,
         .io_semantics.num_slots = 1);
   }
   nir_def *color = src0;
   if (state->logicop_enable) {
      color = nir_color_logicop(b, src0, dest, state->logicop_func, format);
   } else if (rt_state->equation.blend_enable) {
      const nir_lower_blend_rt nir_rt = {
         .format = format,
         .rgb.func = rt_state->equation.rgb_func,
         .rgb.src_factor = rt_state->equation.rgb_src_factor,
         .rgb.dst_factor = rt_state->equation.rgb_dst_factor,
         .alpha.func = rt_state->equation.alpha_func,
         .alpha.src_factor = rt_state->equation.alpha_src_factor,
         .alpha.dst_factor = rt_state->equation.alpha_dst_factor,
         .colormask = rt_state->equation.color_mask,
      };
      color = nir_color_blend(b, src0, src1, dest, &nir_rt, false);
   }
   color = nir_color_mask(b, color, dest, rt_state->equation.color_mask);
   /* Throw away any channels we don't need */
   color = nir_color_mask(b, color, nir_undef(b, 4, dest_bit_size),
                          util_format_colormask(format_desc));
   /* Only write the destination if it changed */
   if (color != dest) {
      if (PAN_ARCH >= 6) {
         nir_blend_pan(b, nir_load_cumulative_coverage_pan(b),
                       nir_imm_int64(b, opaque_blend_desc),
                       color,
                       .src_type = dest_type,
                       .io_semantics.location = FRAG_RESULT_DATA0 + rt,
                       .io_semantics.num_slots = 1);
      } else {
         nir_store_output(b, color, nir_imm_int(b, 0),
                          .src_type = dest_type,
                          .io_semantics.location = FRAG_RESULT_DATA0 + rt,
                          .io_semantics.num_slots = 1);
      }
   }
   if (PAN_ARCH >= 6)
      nir_blend_return_pan(b);
   b->shader->info.io_lowered = true;
   return builder.shader;
 }
 #if PAN_ARCH >= 6