pan/compiler: Mostly remove auto32 varting store

Using explicit types makes the code more easier to reason about, there is only one edge-case where we still need varying stores but it should be removed soon. Signed-off-by: Lorenzo Rossi <lorenzo.rossi@collabora.com> Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Acked-by: Eric R. Smith <eric.smith@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38681>
2026-06-14 04:58:20 +02:00 · 2026-02-20 17:48:20 +01:00 · 2026-02-20 17:48:20 +01:00 · 3b8a87cbe7
commit 3b8a87cbe7
parent 9f8beb3bd5
2 changed files with 89 additions and 68 deletions
--- a/src/panfrost/compiler/bifrost/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost/bifrost_compile.c
@ -590,6 +590,12 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)
   bool smooth = instr->intrinsic == nir_intrinsic_load_interpolated_input;
   bi_index src0 = bi_null();

+   /* Only use LD_VAR_BUF[_IMM] if explicitly told by the driver
+    * through a compiler input value, falling back to LD_VAR[_IMM] +
+    * Attribute Descriptors otherwise. */
+   bool use_ld_var_buf =
+      b->shader->malloc_idvs && b->shader->inputs->valhall.use_ld_var_buf;
+
   unsigned component = nir_intrinsic_component(instr);
   enum bi_vecsize vecsize = (instr->num_components + component - 1);
   bi_index dest =
@ -597,18 +603,24 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)

   nir_io_semantics sem = nir_intrinsic_io_semantics(instr);

+   const nir_alu_type type = nir_intrinsic_dest_type(instr);
+   const nir_alu_type base_type = nir_alu_type_get_base_type(type);
+   const nir_alu_type sz = nir_alu_type_get_type_size(type);
+   assert(sz == instr->def.bit_size);
+   assert(sz == 16 || sz == 32);
+   assert(base_type == nir_type_int || base_type == nir_type_uint || base_type == nir_type_float);
+
   const struct pan_varying_slot *slot = NULL;
-   if (b->shader->varying_layout) {
+   unsigned src_sz = sz;
+   if (use_ld_var_buf) {
+      pan_varying_layout_require_layout(b->shader->varying_layout);
      slot = pan_varying_layout_find_slot(b->shader->varying_layout,
                                          sem.location);
+      assert(slot);
+      src_sz = nir_alu_type_get_type_size(slot->alu_type);
+      assert(src_sz == 16 || src_sz == 32);
   }

-   unsigned sz = instr->def.bit_size;
-   assert(sz == 16 || sz == 32);
-   /* mediump varyings are always written as 32-bits in the VS, but may be read
-    * to 16 in the FS. */
-   unsigned src_sz = sem.medium_precision ? 32 : sz;
-
   if (smooth) {
      nir_intrinsic_instr *parent = nir_src_as_intrinsic(instr->src[0]);
      assert(parent);
@ -616,17 +628,26 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)
      sample = bi_interp_for_intrinsic(parent->intrinsic);
      src0 = bi_varying_src0_for_barycentric(b, parent);

+      /* Smooth ints don't exist */
+      assert(base_type == nir_type_float);
      regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_F32;
      source_format =
         (src_sz == 16) ? BI_SOURCE_FORMAT_F16 : BI_SOURCE_FORMAT_F32;
   } else {
-      /* u16 regfmt is not supported by LD_VAR_BUF, but using f16 for integers
-       * is okay because we use a f16 attribute descriptor for all 16-bit
-       * varyings regardless of whether they are floats or ints. The
-       * conversion is a no-op. */
-      regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_AUTO;
-      source_format = (src_sz == 16) ?
-         BI_SOURCE_FORMAT_FLAT16 : BI_SOURCE_FORMAT_FLAT32;
+      if (use_ld_var_buf) {
+         /* integer regfmt are not supported by LD_VAR_BUF, but using float src_types for integers
+          * is okay if the source_format is flat and uses the same bit size.
+          * The conversion is a no-op. */
+         regfmt = (sz == 16) ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_F32;
+         source_format = (src_sz == 16) ?
+            BI_SOURCE_FORMAT_FLAT16 : BI_SOURCE_FORMAT_FLAT32;
+         /* conversion MUST be a noop for int varyings to work correctly */
+         assert(base_type == nir_type_float || src_sz == sz);
+      } else {
+         /* Flat loading with i16/u16 is not encodable */
+         assert(base_type == nir_type_float || sz == 32);
+         regfmt = bi_reg_fmt_for_nir(type);
+      }

      /* Valhall can't have bi_null() here, although the source is
       * logically unused for flat varyings
@ -643,13 +664,8 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)
   bool immediate = bi_is_imm_var_desc_handle(b, instr, &imm_index);
   unsigned base = nir_intrinsic_base(instr);

-   /* Only use LD_VAR_BUF[_IMM] if explicitly told by the driver
-    * through a compiler input value, falling back to LD_VAR[_IMM] +
-    * Attribute Descriptors otherwise. */
-   bool use_ld_var_buf =
-      b->shader->malloc_idvs && b->shader->inputs->valhall.use_ld_var_buf;
-
   if (use_ld_var_buf) {
+      assert(slot);
      if (immediate) {
         assert(nir_src_is_const(*offset_src) && "assumes immediate offset");
         unsigned offset = slot->offset + (nir_src_as_uint(*offset_src) * 16);
@ -1151,11 +1167,20 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)

      bi_store(b, nr * src_bit_sz, data, a[0], a[1], seg, offset);
   } else {
-      /* 16-bit varyings are always written and loaded as F16, regardless of
-       * whether they are float or int */
      assert(T_size == 32 || T_size == 16);
-      enum bi_register_format regfmt =
-         T_size == 16 ? BI_REGISTER_FORMAT_F16 : BI_REGISTER_FORMAT_AUTO;
+
+      enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
+
+      /* Since v9 we cannot have separate attribute descriptors for VS-FS,
+       * There might be a mismatch on Gallium where the VS thinks it is storing
+       * an int, but the data is actually a float, and that's what FS expects.
+       * So, just for v9 onwards, just until we haven't fixed gallium, use auto32.
+       * We are still getting around the midgard quirk since we do this only
+       * from v9.
+       * TODO: fix all bugs with gallium and remove this patch
+       */
+      if (b->shader->arch >= 9 && T_size == 32)
+         regfmt = BI_REGISTER_FORMAT_AUTO;

      if (immediate) {
         bi_index address = bi_lea_attr_imm(b, bi_vertex_id(b),
--- a/src/panfrost/compiler/midgard/midgard_compile.c
+++ b/src/panfrost/compiler/midgard/midgard_compile.c
@ -1322,39 +1322,26 @@ emit_varying_read(compiler_context *ctx, unsigned dest, unsigned offset,
   ins.load_store.arg_reg = REGISTER_LDST_ZERO;
   ins.load_store.index_format = midgard_index_address_u32;

-   /* For flat shading, for GPUs supporting auto32, we always use .u32 and
-    * require 32-bit mode. For smooth shading, we use the appropriate
-    * floating-point type.
-    *
-    * This could be optimized, but it makes it easy to check correctness.
-    */
-   if (ctx->quirks & MIDGARD_NO_AUTO32) {
-      switch (type) {
-      case nir_type_uint32:
-      case nir_type_bool32:
-         ins.op = midgard_op_ld_vary_32u;
-         break;
-      case nir_type_int32:
-         ins.op = midgard_op_ld_vary_32i;
-         break;
-      case nir_type_float32:
-         ins.op = midgard_op_ld_vary_32;
-         break;
-      case nir_type_float16:
-         ins.op = midgard_op_ld_vary_16;
-         break;
-      default:
-         UNREACHABLE("Attempted to load unknown type");
-         break;
-      }
-   } else if (flat) {
-      assert(nir_alu_type_get_type_size(type) == 32);
-      ins.op = midgard_op_ld_vary_32u;
-   } else {
+   if (!flat) {
      assert(nir_alu_type_get_base_type(type) == nir_type_float);
-
-      ins.op = (nir_alu_type_get_type_size(type) == 32) ? midgard_op_ld_vary_32
-                                                        : midgard_op_ld_vary_16;
+   }
+   switch (type) {
+   case nir_type_uint32:
+   case nir_type_bool32:
+      ins.op = midgard_op_ld_vary_32u;
+      break;
+   case nir_type_int32:
+      ins.op = midgard_op_ld_vary_32i;
+      break;
+   case nir_type_float32:
+      ins.op = midgard_op_ld_vary_32;
+      break;
+   case nir_type_float16:
+      ins.op = midgard_op_ld_vary_16;
+      break;
+   default:
+      UNREACHABLE("Attempted to load unknown type");
+      break;
   }

   emit_mir_instruction(ctx, &ins);
@ -1868,18 +1855,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)

         unsigned dst_component = nir_intrinsic_component(instr);
         unsigned nr_comp = nir_src_num_components(instr->src[0]);
-
-         /* ABI: Format controlled by the attribute descriptor.
-          * This simplifies flat shading, although it prevents
-          * certain (unimplemented) 16-bit optimizations.
-          *
-          * In particular, it lets the driver handle internal
-          * TGSI shaders that set flat in the VS but smooth in
-          * the FS. This matches our handling on Bifrost.
-          */
-         bool auto32 = true;
-         assert(nir_alu_type_get_type_size(nir_intrinsic_src_type(instr)) ==
-                32);
+         bool auto32 = false;

         /* ABI: varyings in the secondary attribute table */
         bool secondary_table = true;
@ -1910,6 +1886,26 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
               src_component++;
         }

+         nir_alu_type type = nir_intrinsic_src_type(instr);
+         switch (type) {
+         case nir_type_uint32:
+         case nir_type_bool32:
+            st.op = midgard_op_st_vary_32u;
+            break;
+         case nir_type_int32:
+            st.op = midgard_op_st_vary_32i;
+            break;
+         case nir_type_float32:
+            st.op = midgard_op_st_vary_32;
+            break;
+         case nir_type_float16:
+            st.op = midgard_op_st_vary_16;
+            break;
+         default:
+            UNREACHABLE("Attempted to store unknown type");
+            break;
+         }
+
         emit_mir_instruction(ctx, &st);
      } else {
         UNREACHABLE("Unknown store");