diff --git a/src/gallium/drivers/panfrost/pan_shader.c b/src/gallium/drivers/panfrost/pan_shader.c
index d9de7e191e9..f0ad06fee4c 100644
--- a/src/gallium/drivers/panfrost/pan_shader.c
+++ b/src/gallium/drivers/panfrost/pan_shader.c
@@ -132,6 +132,10 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
       .gpu_id = panfrost_device_gpu_id(dev),
    };
 
+   if (dev->arch >= 9)
+      /* Use LD_VAR_BUF for varying lookups. */
+      inputs.valhall.use_ld_var_buf = true;
+
    /* Lower this early so the backends don't have to worry about it */
    if (s->info.stage == MESA_SHADER_FRAGMENT) {
       inputs.fixed_varying_mask = key->fs.fixed_varying_mask;
diff --git a/src/panfrost/ci/panfrost-g610-fails.txt b/src/panfrost/ci/panfrost-g610-fails.txt
index f3ef3c2f4a1..0fbf80fc4f8 100644
--- a/src/panfrost/ci/panfrost-g610-fails.txt
+++ b/src/panfrost/ci/panfrost-g610-fails.txt
@@ -273,19 +273,10 @@ dEQP-VK.api.device_init.create_device_global_priority_query_khr.basic,Fail
 dEQP-VK.renderpass.dedicated_allocation.attachment_allocation.input_output.63,Fail
 dEQP-VK.renderpass2.dedicated_allocation.attachment_allocation.input_output.63,Fail
 
-dEQP-VK.glsl.limits.near_max.fragment_input.components_123,Fail
-dEQP-VK.glsl.limits.near_max.fragment_input.components_124,Fail
-
-dEQP-VK.pipeline.monolithic.max_varyings.test_vertex_io_between_vertex_fragment,Fail
-
-dEQP-VK.pipeline.pipeline_library.max_varyings.test_vertex_io_between_vertex_fragment,Fail
-
 dEQP-VK.renderpass.suballocation.attachment_allocation.input_output.63,Fail
 dEQP-VK.renderpass.multiple_subpasses_multiple_command_buffers.test,Fail
 dEQP-VK.renderpass2.suballocation.attachment_allocation.input_output.63,Fail
 
-dEQP-VK.pipeline.fast_linked_library.max_varyings.test_vertex_io_between_vertex_fragment,Fail
-
 dEQP-VK.glsl.loops.special.do_while_dynamic_iterations.dowhile_trap_vertex,Crash
 
 dEQP-VK.rasterization.rasterization_order_attachment_access.depth.samples_1.multi_draw_barriers,Crash
diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c
index 6d55771b7c0..cee274b1918 100644
--- a/src/panfrost/compiler/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost_compile.c
@@ -589,45 +589,30 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)
       b->shader->info.bifrost->uses_flat_shading = true;
    }
 
-   enum bi_source_format source_format =
-      smooth ? BI_SOURCE_FORMAT_F32 : BI_SOURCE_FORMAT_FLAT32;
-
    nir_src *offset = nir_get_io_offset_src(instr);
    unsigned imm_index = 0;
    bool immediate = bi_is_imm_var_desc_handle(b, instr, &imm_index);
    unsigned base = nir_intrinsic_base(instr);
 
-   /* On Valhall, ensure the table and index are valid for usage with immediate
-    * form when IDVS isn't used */
-   if (b->shader->arch >= 9 && !b->shader->malloc_idvs)
-      immediate &= va_is_valid_const_table(pan_res_handle_get_table(base)) &&
-                   pan_res_handle_get_index(base) < 256;
+   /* LD_VAR_BUF[_IMM] takes an 8-bit offset, limiting its use to 64 or less
+    * varying components, assuming F32.
+    * Therefore, only use LD_VAR_BUF[_IMM] if explicitly told by the driver
+    * through a compiler input value, falling back to LD_VAR[_IMM] +
+    * Attribute Descriptors otherwise. */
+   bool use_ld_var_buf =
+      b->shader->malloc_idvs && b->shader->inputs->valhall.use_ld_var_buf;
 
-   if (b->shader->malloc_idvs && immediate) {
-      /* Immediate index given in bytes. */
-      bi_ld_var_buf_imm_to(b, sz, dest, src0, regfmt, sample, source_format,
-                           update, vecsize,
-                           bi_varying_offset(b->shader, instr));
-   } else if (immediate) {
-      bi_instr *I;
+   if (use_ld_var_buf) {
+      enum bi_source_format source_format =
+         smooth ? BI_SOURCE_FORMAT_F32 : BI_SOURCE_FORMAT_FLAT32;
 
-      if (smooth) {
-         I = bi_ld_var_imm_to(b, dest, src0, regfmt, sample, update, vecsize,
-                              pan_res_handle_get_index(imm_index));
+      if (immediate) {
+         /* Immediate index given in bytes. */
+         bi_ld_var_buf_imm_to(b, sz, dest, src0, regfmt, sample, source_format,
+                              update, vecsize,
+                              bi_varying_offset(b->shader, instr));
       } else {
-         I = bi_ld_var_flat_imm_to(b, dest, BI_FUNCTION_NONE, regfmt, vecsize,
-                                   pan_res_handle_get_index(imm_index));
-      }
-
-      /* Valhall usually uses machine-allocated IDVS. If this is disabled,
-       * use a simple Midgard-style ABI.
-       */
-      if (b->shader->arch >= 9)
-         I->table = va_res_fold_table_idx(pan_res_handle_get_table(base));
-   } else {
-      bi_index idx = bi_src_index(offset);
-
-      if (b->shader->malloc_idvs) {
+         bi_index idx = bi_src_index(offset);
          /* Index needs to be in bytes, but NIR gives the index
           * in slots. For now assume 16 bytes per element.
           */
@@ -639,7 +624,33 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr)
 
          bi_ld_var_buf_to(b, sz, dest, src0, idx_bytes, regfmt, sample,
                           source_format, update, vecsize);
+      }
+   } else {
+      /* On Valhall, ensure the table and index are valid for usage with
+       * immediate form when IDVS isn't used */
+      if (b->shader->arch >= 9)
+         immediate &= va_is_valid_const_table(pan_res_handle_get_table(base)) &&
+                      pan_res_handle_get_index(base) < 256;
+
+      if (immediate) {
+         bi_instr *I;
+
+         if (smooth) {
+            I = bi_ld_var_imm_to(b, dest, src0, regfmt, sample, update, vecsize,
+                                 pan_res_handle_get_index(imm_index));
+         } else {
+            I =
+               bi_ld_var_flat_imm_to(b, dest, BI_FUNCTION_NONE, regfmt, vecsize,
+                                     pan_res_handle_get_index(imm_index));
+         }
+
+         /* Valhall usually uses LD_VAR_BUF. If this is disabled, use a simple
+          * Midgard-style ABI. */
+         if (b->shader->arch >= 9)
+            I->table = va_res_fold_table_idx(pan_res_handle_get_table(base));
       } else {
+         bi_index idx = bi_src_index(offset);
+
          if (base != 0)
             idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false);
 
diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h
index 6ec4a2807c5..32ce7515797 100644
--- a/src/panfrost/util/pan_ir.h
+++ b/src/panfrost/util/pan_ir.h
@@ -121,6 +121,10 @@ struct panfrost_compile_inputs {
       struct {
          uint32_t rt_conv[8];
       } bifrost;
+      struct {
+         /* Use LD_VAR_BUF[_IMM] instead of LD_VAR[_IMM] to load varyings. */
+         bool use_ld_var_buf;
+      } valhall;
    };
 };
 
diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c
index 18acab6e13b..8df4f0b2f2c 100644
--- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c
+++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c
@@ -165,6 +165,73 @@ prepare_vs_driver_set(struct panvk_cmd_buffer *cmdbuf)
    return VK_SUCCESS;
 }
 
+static uint32_t
+get_varying_slots(const struct panvk_cmd_buffer *cmdbuf)
+{
+   const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
+   const struct panvk_shader *fs = get_fs(cmdbuf);
+   uint32_t varying_slots = 0;
+
+   if (fs) {
+      unsigned vs_vars = vs->info.varyings.output_count;
+      unsigned fs_vars = fs->info.varyings.input_count;
+      varying_slots = MAX2(vs_vars, fs_vars);
+   }
+
+   return varying_slots;
+}
+
+static void
+emit_varying_descs(const struct panvk_cmd_buffer *cmdbuf,
+                   struct mali_attribute_packed *descs)
+{
+   uint32_t varying_slots = get_varying_slots(cmdbuf);
+   /* Assumes 16 byte slots. We could do better. */
+   uint32_t varying_size = varying_slots * 16;
+
+   const struct panvk_shader *fs = get_fs(cmdbuf);
+
+   for (uint32_t i = 0; i < varying_slots; i++) {
+      const struct pan_shader_varying *var = &fs->info.varyings.input[i];
+      /* Skip special varyings. */
+      if (var->location < VARYING_SLOT_VAR0)
+         continue;
+
+      /* We currently always write out F32 in the vertex shaders, so the format
+       * needs to reflect this. */
+      enum pipe_format f = var->format;
+      switch (f) {
+      case PIPE_FORMAT_R16_FLOAT:
+         f = PIPE_FORMAT_R32_FLOAT;
+         break;
+      case PIPE_FORMAT_R16G16_FLOAT:
+         f = PIPE_FORMAT_R32G32_FLOAT;
+         break;
+      case PIPE_FORMAT_R16G16B16_FLOAT:
+         f = PIPE_FORMAT_R32G32B32_FLOAT;
+         break;
+      case PIPE_FORMAT_R16G16B16A16_FLOAT:
+         f = PIPE_FORMAT_R32G32B32A32_FLOAT;
+         break;
+      default:
+         break;
+      }
+
+      uint32_t loc = var->location - VARYING_SLOT_VAR0;
+      pan_pack(&descs[i], ATTRIBUTE, cfg) {
+         cfg.attribute_type = MALI_ATTRIBUTE_TYPE_VERTEX_PACKET;
+         cfg.offset_enable = false;
+         cfg.format = GENX(panfrost_format_from_pipe_format)(f)->hw;
+         cfg.table = 61;
+         cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_VERTEX;
+         cfg.offset = 1024 + (loc * 16);
+         cfg.buffer_index = 0;
+         cfg.attribute_stride = varying_size;
+         cfg.packet_stride = varying_size + 16;
+      }
+   }
+}
+
 static VkResult
 prepare_fs_driver_set(struct panvk_cmd_buffer *cmdbuf)
 {
@@ -172,7 +239,7 @@ prepare_fs_driver_set(struct panvk_cmd_buffer *cmdbuf)
    const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
    const struct panvk_descriptor_state *desc_state =
       &cmdbuf->state.gfx.desc_state;
-   uint32_t desc_count = fs->desc_info.dyn_bufs.count + 1;
+   uint32_t desc_count = fs->desc_info.dyn_bufs.count + MAX_VARYING + 1;
    struct panfrost_ptr driver_set = panvk_cmd_alloc_dev_mem(
       cmdbuf, desc, desc_count * PANVK_DESCRIPTOR_SIZE, PANVK_DESCRIPTOR_SIZE);
    struct panvk_opaque_desc *descs = driver_set.cpu;
@@ -180,13 +247,15 @@ prepare_fs_driver_set(struct panvk_cmd_buffer *cmdbuf)
    if (desc_count && !driver_set.gpu)
       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
 
-   /* Dummy sampler always comes first. */
-   pan_cast_and_pack(&descs[0], SAMPLER, cfg) {
+   emit_varying_descs(cmdbuf, (struct mali_attribute_packed *)(&descs[0]));
+
+   /* Dummy sampler always comes right after the varyings. */
+   pan_cast_and_pack(&descs[MAX_VARYING], SAMPLER, cfg) {
       cfg.clamp_integer_array_indices = false;
    }
 
-   panvk_per_arch(cmd_fill_dyn_bufs)(desc_state, fs,
-                                     (struct mali_buffer_packed *)(&descs[1]));
+   panvk_per_arch(cmd_fill_dyn_bufs)(
+      desc_state, fs, (struct mali_buffer_packed *)(&descs[1 + MAX_VARYING]));
 
    fs_desc_state->driver_set.dev_addr = driver_set.gpu;
    fs_desc_state->driver_set.size = desc_count * PANVK_DESCRIPTOR_SIZE;
@@ -1650,16 +1719,8 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
    if (result != VK_SUCCESS)
       return result;
 
-   uint32_t varying_size = 0;
-
-   if (fs) {
-      unsigned vs_vars = vs->info.varyings.output_count;
-      unsigned fs_vars = fs->info.varyings.input_count;
-      unsigned var_slots = MAX2(vs_vars, fs_vars);
-
-      /* Assumes 16 byte slots. We could do better. */
-      varying_size = var_slots * 16;
-   }
+   /* Assumes 16 byte slots. We could do better. */
+   uint32_t varying_size = get_varying_slots(cmdbuf) * 16;
 
    cs_update_vt_ctx(b) {
       /* We don't use the resource dep system yet. */
diff --git a/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c b/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c
index 1f155864fc7..aa733635f0f 100644
--- a/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c
+++ b/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c
@@ -1025,8 +1025,22 @@ create_copy_table(nir_shader *nir, struct lower_desc_ctx *ctx)
    for (uint32_t i = 0; i < PANVK_BIFROST_DESC_TABLE_COUNT; i++)
       copy_count += desc_info->others[i].count;
 #else
-   /* Dummy sampler comes after the vertex attributes. */
-   uint32_t dummy_sampler_idx = nir->info.stage == MESA_SHADER_VERTEX ? 16 : 0;
+   uint32_t dummy_sampler_idx;
+   switch (nir->info.stage) {
+   case MESA_SHADER_VERTEX:
+      /* Dummy sampler comes after the vertex attributes. */
+      dummy_sampler_idx = 16;
+      break;
+   case MESA_SHADER_FRAGMENT:
+      /* Dummy sampler comes after the varyings. */
+      dummy_sampler_idx = MAX_VARYING;
+      break;
+   case MESA_SHADER_COMPUTE:
+      dummy_sampler_idx = 0;
+      break;
+   default:
+      unreachable("unexpected stage");
+   }
    desc_info->dummy_sampler_handle = pan_res_handle(0, dummy_sampler_idx);
 
    copy_count = desc_info->dyn_bufs.count + desc_info->dyn_bufs.count;
diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c
index 0007f799f06..ee5b96c918b 100644
--- a/src/panfrost/vulkan/panvk_vX_shader.c
+++ b/src/panfrost/vulkan/panvk_vX_shader.c
@@ -1041,6 +1041,10 @@ panvk_compile_shader(struct panvk_device *dev,
       .gpu_id = phys_dev->kmod.props.gpu_prod_id,
       .no_ubo_to_push = true,
       .view_mask = (state && state->rp) ? state->rp->view_mask : 0,
+#if PAN_ARCH >= 9
+      /* LD_VAR_BUF does not support maxVertexOutputComponents (128) */
+      .valhall.use_ld_var_buf = false,
+#endif
    };
 
    if (info->stage == MESA_SHADER_FRAGMENT && state != NULL &&