panvk,panfrost: Always emit ld_var_buf when possible

Previously the driver decided when the backend should use LD_VAR_BUF[_IMM] instructions based on the total number of varyings read, falling back to LD_VAR[_IMM] + descriptors when the varying index could overflow the immediate index in the instructions. That means that even adding a single varying read could overflow the index and make everything fall back to LD_VAR. With this patch the backend decides when to use LD_VAR_BUF for each varying load, reporting that decision to the driver. This helps with index overflows because only the instruction that actually overflow the immediate use the LD_VAR fallback, leaving all other instructions on the fast path. Signed-off-by: Lorenzo Rossi <lorenzo.rossi@collabora.com> Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40515>
2026-05-06 20:18:12 +02:00 · 2026-03-18 17:38:54 +01:00 · 2026-03-18 17:38:54 +01:00 · caf5a2640b
commit caf5a2640b
parent e4e7968806
6 changed files with 33 additions and 34 deletions
--- a/src/gallium/drivers/panfrost/pan_shader.c
+++ b/src/gallium/drivers/panfrost/pan_shader.c
@ -208,8 +208,6 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
   }

   if (dev->arch >= 9) {
-      inputs.valhall.use_ld_var_buf = inputs.varying_layout &&
-         inputs.varying_layout->generic_size_B <= pan_ld_var_buf_off_size(dev->arch);
      /* Always enable this for GL, it avoids crashes when using unbound
       * resources. */
      inputs.robust_descriptors = true;
--- a/src/panfrost/compiler/bifrost/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost/bifrost_compile.c
@ -7064,7 +7064,7 @@ bifrost_compile_shader_nir(nir_shader *nir,

      if (!inputs->is_blend)
         NIR_PASS(_, nir, pan_nir_lower_fs_inputs, inputs->gpu_id,
-                  inputs->varying_layout, inputs->valhall.use_ld_var_buf);
+                  inputs->varying_layout, info);
   }

   if (nir->info.stage == MESA_SHADER_VERTEX && info->vs.idvs) {
--- a/src/panfrost/compiler/pan_compiler.h
+++ b/src/panfrost/compiler/pan_compiler.h
@ -132,13 +132,6 @@ struct pan_compile_inputs {
      /* In multiples of 32bit. */
      uint32_t offset;
   } fau_consts;
-
-   union {
-      struct {
-         /* Use LD_VAR_BUF[_IMM] instead of LD_VAR[_IMM] to load varyings. */
-         bool use_ld_var_buf;
-      } valhall;
-   };
 };

 enum pan_varying_section {
@ -335,6 +328,13 @@ struct bifrost_shader_info {
    * values for flat varyings.
    */
   bool uses_flat_shading;
+
+   /* Whether there are any LD_VAR[_IMM] instructions that require varying
+    * descriptors.  LD_VAR_BUF[_IMM] do not count as they only need buffer
+    * descriptors to work correctly. For more details check out
+    * docs/drivers/panfrost/varyings.rst
+    */
+   bool uses_ld_var;
 };

 struct midgard_shader_info {
--- a/src/panfrost/compiler/pan_nir.h
+++ b/src/panfrost/compiler/pan_nir.h
@ -63,7 +63,7 @@ bool pan_nir_lower_vs_outputs(nir_shader *shader, unsigned gpu_id,

 bool pan_nir_lower_fs_inputs(nir_shader *shader, unsigned gpu_id,
                             const struct pan_varying_layout *varying_layout,
-                             bool valhall_use_ld_var_buf);
+                             struct pan_shader_info *info);

 bool pan_nir_lower_helper_invocation(nir_shader *shader);
 bool pan_nir_lower_sample_pos(nir_shader *shader);
--- a/src/panfrost/compiler/pan_nir_lower_varyings_io.c
+++ b/src/panfrost/compiler/pan_nir_lower_varyings_io.c
@ -169,7 +169,7 @@ pan_nir_lower_vs_outputs(nir_shader *shader, unsigned gpu_id,
 struct lower_fs_inputs_ctx {
   unsigned arch;
   const struct pan_varying_layout *varying_layout;
-   bool valhall_use_ld_var_buf;
+   struct pan_shader_info *info;
 };

 static bool
@ -208,15 +208,17 @@ lower_fs_input_load(struct nir_builder *b,
   const unsigned component = nir_intrinsic_component(load);
   const unsigned load_comps = load->num_components + component;

-   nir_def *res;
-   if (ctx->valhall_use_ld_var_buf) {
-      assert(ctx->arch >= 9);
-
+   const struct pan_varying_slot *slot = NULL;
+   bool use_ld_var_buf = false;
+   if (ctx->varying_layout && ctx->arch >= 9) {
      pan_varying_layout_require_layout(ctx->varying_layout);
-      const struct pan_varying_slot *slot =
-         pan_varying_layout_find_slot(ctx->varying_layout,
-                                      sem.location);
+      slot = pan_varying_layout_find_slot(ctx->varying_layout, sem.location);
      assert(slot);
+      use_ld_var_buf = slot->offset < pan_ld_var_buf_off_size(ctx->arch);
+   }
+
+   nir_def *res;
+   if (use_ld_var_buf) {
      const nir_alu_type src_type = slot->alu_type;
      nir_def *offset_B = nir_imm_int(b, slot->offset);

@ -232,6 +234,7 @@ lower_fs_input_load(struct nir_builder *b,
                                         .io_semantics = sem);
      }
   } else {
+      ctx->info->bifrost.uses_ld_var = true;
      const uint32_t base = nir_intrinsic_base(load);
      nir_def *idx = nir_imm_int(b, base);

@ -262,12 +265,12 @@ lower_fs_input_load(struct nir_builder *b,
 bool
 pan_nir_lower_fs_inputs(nir_shader *shader, unsigned gpu_id,
                        const struct pan_varying_layout *varying_layout,
-                        bool valhall_use_ld_var_buf)
+                        struct pan_shader_info *info)
 {
   const struct lower_fs_inputs_ctx ctx = {
      .arch = pan_arch(gpu_id),
      .varying_layout = varying_layout,
-      .valhall_use_ld_var_buf = valhall_use_ld_var_buf,
+      .info = info,
   };
   return nir_shader_intrinsics_pass(shader, lower_fs_input_load,
                                     nir_metadata_control_flow,
--- a/src/panfrost/vulkan/panvk_vX_shader.c
+++ b/src/panfrost/vulkan/panvk_vX_shader.c
@ -1405,19 +1405,6 @@ panvk_compile_shader(struct panvk_device *dev,
      /* VS (if known) decides the memory layout */
      inputs.varying_layout = vs_varying_layout;

-#if PAN_ARCH >= 9
-      /* LD_VAR_BUF[_IMM] has a fixed-size offset, limiting its use when we
-       * can fit all of the generic varyings in the offset field.
-       * TODO: We could still use LD_VAR_BUF for just the fields that don't
-       * overflow.
-       */
-      inputs.valhall.use_ld_var_buf =
-         vs_varying_layout &&
-         vs_varying_layout->generic_size_B <= pan_ld_var_buf_off_size(PAN_ARCH);
-      variant->desc_info.fs_varying_attr_desc_count =
-         inputs.valhall.use_ld_var_buf ? 0 : nir->num_inputs;
-#endif
-
      panvk_lower_nir(dev, nir, info->set_layout_count, info->set_layouts,
                      info->robustness, state, &variant->desc_info);

@ -1438,6 +1425,17 @@ panvk_compile_shader(struct panvk_device *dev,
         panvk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
         return result;
      }
+
+      #if PAN_ARCH >= 9
+      /* LD_VAR_BUF[_IMM] has a fixed-size offset, when shaders overflow
+       * that they fall back to LD_VAR[_IMM] and require descriptors.
+       * TODO: We could only emit descriptors that overflow the offset,
+       *       saving a bit of space.
+       */
+      variant->desc_info.fs_varying_attr_desc_count =
+         variant->info.bifrost.uses_ld_var ? nir->num_inputs : 0;
+      #endif
+
      break;
   }