pan/bi: Lower FS input loads in NIR

Co-authored-by: Lorenzo Rossi <lorenzo.rossi@collabora.com> Reviewed-by: Lorenzo Rossi <lorenzo.rossi@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40391>
2026-05-07 04:58:05 +02:00 · 2025-11-28 13:54:38 -05:00 · 2025-11-28 13:54:38 -05:00 · 8541dca8ed
commit 8541dca8ed
parent d2f430bea9
4 changed files with 127 additions and 148 deletions
--- a/src/panfrost/compiler/bifrost/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost/bifrost_compile.c
@ -580,151 +580,6 @@ bi_emit_load_attr(bi_builder *b, nir_intrinsic_instr *instr)
   bi_copy_component(b, instr, dest);
 }

-static void
-bi_emit_load_fs_input(bi_builder *b, nir_intrinsic_instr *instr)
-{
-   enum bi_sample sample = BI_SAMPLE_CENTER;
-   enum bi_update update = BI_UPDATE_STORE;
-   enum bi_register_format regfmt = BI_REGISTER_FORMAT_AUTO;
-   enum bi_source_format source_format;
-   bool smooth = instr->intrinsic == nir_intrinsic_load_interpolated_input;
-   bi_index src0 = bi_null();
-
-   /* Only use LD_VAR_BUF[_IMM] if explicitly told by the driver
-    * through a compiler input value, falling back to LD_VAR[_IMM] +
-    * Attribute Descriptors otherwise. */
-   bool use_ld_var_buf =
-      b->shader->malloc_idvs && b->shader->inputs->valhall.use_ld_var_buf;
-
-   unsigned component = nir_intrinsic_component(instr);
-   enum bi_vecsize vecsize = (instr->num_components + component - 1);
-   bi_index dest =
-      (component == 0) ? bi_def_index(&instr->def) : bi_temp(b->shader);
-
-   nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
-
-   const nir_alu_type type = nir_intrinsic_dest_type(instr);
-   const nir_alu_type base_type = nir_alu_type_get_base_type(type);
-   const nir_alu_type sz = nir_alu_type_get_type_size(type);
-   assert(sz == instr->def.bit_size);
-   assert(sz == 16 || sz == 32);
-   assert(base_type == nir_type_int || base_type == nir_type_uint || base_type == nir_type_float);
-
-   const struct pan_varying_slot *slot = NULL;
-   unsigned src_sz = sz;
-   if (use_ld_var_buf) {
-      pan_varying_layout_require_layout(b->shader->varying_layout);
-      slot = pan_varying_layout_find_slot(b->shader->varying_layout,
-                                          sem.location);
-      assert(slot);
-      src_sz = nir_alu_type_get_type_size(slot->alu_type);
-      assert(src_sz == 16 || src_sz == 32);
-   }
-
-   if (smooth) {
-      nir_intrinsic_instr *parent = nir_src_as_intrinsic(instr->src[0]);
-      assert(parent);
-
-      sample = bi_interp_for_intrinsic(parent->intrinsic);
-      src0 = bi_varying_src0_for_barycentric(b, parent);
-
-      /* Smooth ints don't exist */
-      assert(base_type == nir_type_float);
-      regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_F32;
-      source_format =
-         (src_sz == 16) ? BI_SOURCE_FORMAT_F16 : BI_SOURCE_FORMAT_F32;
-   } else {
-      if (use_ld_var_buf) {
-         /* integer regfmt are not supported by LD_VAR_BUF, but using float src_types for integers
-          * is okay if the source_format is flat and uses the same bit size.
-          * The conversion is a no-op. */
-         regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_F32;
-         source_format = (src_sz == 16) ?
-            BI_SOURCE_FORMAT_FLAT16 : BI_SOURCE_FORMAT_FLAT32;
-         /* conversion MUST be a noop for int varyings to work correctly */
-         assert(base_type == nir_type_float || src_sz == sz);
-      } else {
-         /* Flat loading with i16/u16 is not encodable */
-         assert(base_type == nir_type_float || sz == 32);
-         regfmt = bi_reg_fmt_for_nir(type);
-      }
-
-      /* Valhall can't have bi_null() here, although the source is
-       * logically unused for flat varyings
-       */
-      if (b->shader->arch >= 9)
-         src0 = bi_preload(b, 61);
-
-      /* Gather info as we go */
-      b->shader->info.bifrost->uses_flat_shading = true;
-   }
-
-   nir_src *offset_src = nir_get_io_offset_src(instr);
-   unsigned imm_index = 0;
-   bool immediate = bi_is_imm_var_desc_handle(b, instr, &imm_index);
-   unsigned base = nir_intrinsic_base(instr);
-
-   if (use_ld_var_buf) {
-      assert(slot);
-      if (immediate) {
-         assert(nir_src_is_const(*offset_src) && "assumes immediate offset");
-         unsigned offset = slot->offset + (nir_src_as_uint(*offset_src) * 16);
-
-         /* Immediate index given in bytes. */
-         bi_ld_var_buf_imm_to(b, sz, dest, src0, regfmt, sample, source_format,
-                              update, vecsize, offset);
-      } else {
-         bi_index idx = bi_src_index(offset_src);
-         /* Index needs to be in bytes, but NIR gives the index
-          * in slots. For now assume 16 bytes per element.
-          */
-         bi_index idx_bytes = bi_lshift_or_i32(b, idx, bi_zero(), bi_imm_u8(4));
-         if (slot->offset != 0)
-            idx_bytes = bi_iadd_u32(b, idx_bytes, bi_imm_u32(slot->offset),
-                                    false);
-
-         bi_ld_var_buf_to(b, sz, dest, src0, idx_bytes, regfmt, sample,
-                          source_format, update, vecsize);
-      }
-   } else {
-      /* On Valhall, ensure the table and index are valid for usage with
-       * immediate form when IDVS isn't used */
-      if (b->shader->arch >= 9)
-         immediate &= va_is_valid_const_table(pan_res_handle_get_table(base)) &&
-                      pan_res_handle_get_index(base) < 256;
-
-      if (immediate) {
-         bi_instr *I;
-
-         if (smooth) {
-            I = bi_ld_var_imm_to(b, dest, src0, regfmt, sample, update, vecsize,
-                                 pan_res_handle_get_index(imm_index));
-         } else {
-            I =
-               bi_ld_var_flat_imm_to(b, dest, BI_FUNCTION_NONE, regfmt, vecsize,
-                                     pan_res_handle_get_index(imm_index));
-         }
-
-         /* Valhall usually uses LD_VAR_BUF. If this is disabled, use a simple
-          * Midgard-style ABI. */
-         if (b->shader->arch >= 9)
-            I->table = va_res_fold_table_idx(pan_res_handle_get_table(base));
-      } else {
-         bi_index idx = bi_src_index(offset_src);
-
-         if (base != 0)
-            idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false);
-
-         if (smooth)
-            bi_ld_var_to(b, dest, src0, idx, regfmt, sample, update, vecsize);
-         else
-            bi_ld_var_flat_to(b, dest, idx, BI_FUNCTION_NONE, regfmt, vecsize);
-      }
-   }
-
-   bi_copy_component(b, instr, dest);
-}
-
 static void
 bi_emit_load_var(bi_builder *b, nir_intrinsic_instr *intr)
 {
@ -2140,9 +1995,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
   case nir_intrinsic_load_interpolated_input:
   case nir_intrinsic_load_input:
      assert(!b->shader->inputs->is_blend);
-      if (stage == MESA_SHADER_FRAGMENT)
-         bi_emit_load_fs_input(b, instr);
-      else if (stage == MESA_SHADER_VERTEX)
+      if (stage == MESA_SHADER_VERTEX)
         bi_emit_load_attr(b, instr);
      else
         UNREACHABLE("Unsupported shader stage");
@ -7247,6 +7100,10 @@ bifrost_compile_shader_nir(nir_shader *nir,
                                  inputs->trust_varying_flat_highp_types, false);
      info->varyings.noperspective =
         pan_nir_collect_noperspective_varyings_fs(nir);
+
+      if (!inputs->is_blend)
+         NIR_PASS(_, nir, pan_nir_lower_fs_inputs, inputs->gpu_id,
+                  inputs->varying_layout, inputs->valhall.use_ld_var_buf);
   }

   if (nir->info.stage == MESA_SHADER_VERTEX && info->vs.idvs) {
--- a/src/panfrost/compiler/meson.build
+++ b/src/panfrost/compiler/meson.build
@ -16,6 +16,7 @@ libpanfrost_compiler_files = files(
  'pan_nir_lower_sample_position.c',
  'pan_nir_lower_store_component.c',
  'pan_nir_lower_texel_buffer_index.c',
+  'pan_nir_lower_varyings_io.c',
  'pan_nir_lower_vertex_id.c',
  'pan_nir_lower_xfb.c',
  'pan_nir_resize_varying_io.c',
--- a/src/panfrost/compiler/pan_nir.h
+++ b/src/panfrost/compiler/pan_nir.h
@ -57,6 +57,10 @@ bool pan_nir_lower_frag_coord_zw(nir_shader *shader);
 bool pan_nir_lower_noperspective_vs(nir_shader *shader);
 bool pan_nir_lower_noperspective_fs(nir_shader *shader);

+bool pan_nir_lower_fs_inputs(nir_shader *shader, unsigned gpu_id,
+                             const struct pan_varying_layout *varying_layout,
+                             bool valhall_use_ld_var_buf);
+
 bool pan_nir_lower_helper_invocation(nir_shader *shader);
 bool pan_nir_lower_sample_pos(nir_shader *shader);
 bool pan_nir_lower_xfb(nir_shader *nir);
--- a/src/panfrost/compiler/pan_nir_lower_varyings_io.c
+++ b/src/panfrost/compiler/pan_nir_lower_varyings_io.c
@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2025 Collabora, Ltd.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "pan_nir.h"
+#include "nir_builder.h"
+
+#include "panfrost/model/pan_model.h"
+
+struct lower_fs_inputs_ctx {
+   unsigned arch;
+   const struct pan_varying_layout *varying_layout;
+   bool valhall_use_ld_var_buf;
+};
+
+static bool
+lower_fs_input_load(struct nir_builder *b,
+                    nir_intrinsic_instr *load, void *cb_data)
+{
+   const struct lower_fs_inputs_ctx *ctx = cb_data;
+
+   if (load->intrinsic != nir_intrinsic_load_input &&
+       load->intrinsic != nir_intrinsic_load_interpolated_input)
+      return false;
+
+   const nir_io_semantics sem = nir_intrinsic_io_semantics(load);
+   const nir_alu_type dest_type = nir_intrinsic_dest_type(load);
+
+   /* Indirect array varyings are not yet supported (num_slots > 1) */
+   assert(sem.num_slots == 1);
+   assert(nir_src_as_uint(*nir_get_io_offset_src(load)) == 0);
+
+   nir_intrinsic_instr *bary;
+   switch (load->intrinsic) {
+   case nir_intrinsic_load_input:
+      bary = NULL;
+      break;
+   case nir_intrinsic_load_interpolated_input:
+      /* Cannot interpolate ints */
+      assert(nir_alu_type_get_base_type(dest_type) == nir_type_float);
+      bary = nir_src_as_intrinsic(load->src[0]);
+      break;
+   default:
+      UNREACHABLE("Already handled");
+   }
+
+   b->cursor = nir_before_instr(&load->instr);
+
+   const unsigned component = nir_intrinsic_component(load);
+   const unsigned load_comps = load->num_components + component;
+
+   nir_def *res;
+   if (ctx->valhall_use_ld_var_buf) {
+      assert(ctx->arch >= 9);
+
+      pan_varying_layout_require_layout(ctx->varying_layout);
+      const struct pan_varying_slot *slot =
+         pan_varying_layout_find_slot(ctx->varying_layout,
+                                      sem.location);
+      assert(slot);
+      const nir_alu_type src_type = slot->alu_type;
+      nir_def *offset_B = nir_imm_int(b, slot->offset);
+
+      if (load->intrinsic == nir_intrinsic_load_interpolated_input) {
+         res = nir_load_var_buf_pan(b, load_comps, load->def.bit_size,
+                                    offset_B, &bary->def,
+                                    .src_type = src_type,
+                                    .io_semantics = sem);
+      } else {
+         res = nir_load_var_buf_flat_pan(b, load_comps, load->def.bit_size,
+                                         offset_B,
+                                         .src_type = src_type,
+                                         .io_semantics = sem);
+      }
+   } else {
+      const uint32_t base = nir_intrinsic_base(load);
+      nir_def *idx = nir_imm_int(b, base);
+
+      if (load->intrinsic == nir_intrinsic_load_interpolated_input) {
+         res = nir_load_var_pan(b, load_comps, load->def.bit_size,
+                                idx, &bary->def,
+                                .dest_type = dest_type,
+                                .io_semantics = sem);
+      } else {
+         res = nir_load_var_flat_pan(b, load_comps, load->def.bit_size, idx,
+                                     .dest_type = dest_type,
+                                     .io_semantics = sem);
+      }
+   }
+
+   if (component > 0) {
+      unsigned swiz[NIR_MAX_VEC_COMPONENTS] = {0, };
+      for (unsigned c = 0; c < load->num_components; c++)
+         swiz[c] = component + c;
+
+      res = nir_swizzle(b, res, swiz, load->num_components);
+   }
+
+   nir_def_replace(&load->def, res);
+   return true;
+}
+
+bool
+pan_nir_lower_fs_inputs(nir_shader *shader, unsigned gpu_id,
+                        const struct pan_varying_layout *varying_layout,
+                        bool valhall_use_ld_var_buf)
+{
+   const struct lower_fs_inputs_ctx ctx = {
+      .arch = pan_arch(gpu_id),
+      .varying_layout = varying_layout,
+      .valhall_use_ld_var_buf = valhall_use_ld_var_buf,
+   };
+   return nir_shader_intrinsics_pass(shader, lower_fs_input_load,
+                                     nir_metadata_control_flow,
+                                     (void *)&ctx);
+}