diff --git a/.pick_status.json b/.pick_status.json
index e0e06d050bf..6068a2465b3 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -2424,7 +2424,7 @@
         "description": "aco: fix broken VGPRs reservation for 64-bit attributes in VS prologs",
         "nominated": true,
         "nomination_type": 2,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": "8e6bff4caac2693d01e51092fa86bb93914ef2a5",
         "notes": null
diff --git a/src/amd/compiler/instruction_selection/aco_select_vs_prolog.cpp b/src/amd/compiler/instruction_selection/aco_select_vs_prolog.cpp
index 59eba0178c6..24eaf1c9eee 100644
--- a/src/amd/compiler/instruction_selection/aco_select_vs_prolog.cpp
+++ b/src/amd/compiler/instruction_selection/aco_select_vs_prolog.cpp
@@ -339,10 +339,18 @@ load_unaligned_vs_attrib(Builder& bld, PhysReg dst, Operand desc, Operand index,
 }
 
 bool
-is_last_attribute_large(const struct aco_vs_prolog_info* pinfo)
+is_last_attribute_large(const struct aco_vs_prolog_info* pinfo, unsigned slots = 2)
 {
+   /* If the vertex shader consumes more than two channels of a large attribute,
+    * the attribute counts as two slots in num_attributes, even though
+    * misaligned_mask marks only the lower slot. Otherwise, it counts as
+    * a single slot.
+    */
+   if (pinfo->num_attributes < slots)
+      return false;
+
    const struct ac_vtx_format_info* vtx_info_table = ac_get_vtx_format_info_table(GFX8, true);
-   unsigned last_attribute = pinfo->num_attributes - 1;
+   unsigned last_attribute = pinfo->num_attributes - slots;
 
    if ((pinfo->misaligned_mask & (1u << last_attribute))) {
       const struct ac_vtx_format_info* vtx_info = &vtx_info_table[pinfo->formats[last_attribute]];
@@ -406,12 +414,14 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
    bool needs_tmp_vgpr1 =
       has_nontrivial_divisors && (program->gfx_level <= GFX8 || program->gfx_level >= GFX11);
 
-   int vgpr_offset = pinfo->misaligned_mask & (1u << (pinfo->num_attributes - 1)) ? 0 : -4;
    const bool is_last_attr_large = is_last_attribute_large(pinfo);
+   const bool is_last_attr_large_mismatch = is_last_attribute_large(pinfo, 1);
+   int vgpr_offset =
+      pinfo->misaligned_mask & (1u << (pinfo->num_attributes - 1)) || is_last_attr_large ? 0 : -4;
 
    unsigned num_vgprs = args->num_vgprs_used;
    PhysReg attributes_start =
-      get_next_vgpr(pinfo->num_attributes * 4 + (is_last_attr_large ? 4 : 0), &num_vgprs);
+      get_next_vgpr(pinfo->num_attributes * 4 + (is_last_attr_large_mismatch ? 4 : 0), &num_vgprs);
    PhysReg vertex_index, instance_index, start_instance_vgpr, nontrivial_tmp_vgpr0,
       nontrivial_tmp_vgpr1;
    if (needs_vertex_index)
@@ -645,7 +655,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
     * them and they might be overwritten. This isn't the most optimal solution
     * but 64-bit vertex attributes are rarely used.
     */
-   if (is_last_attr_large)
+   if (is_last_attr_large_mismatch)
       wait_for_vmem_loads(bld);
 
    bld.sop1(aco_opcode::s_setpc_b64, continue_pc);