diff --git a/.pick_status.json b/.pick_status.json
index 45c5b49616d..9e56eaaae24 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -574,7 +574,7 @@
         "description": "aco: Make private_segment_buffer/scratch_offset per-resume",
         "nominated": true,
         "nomination_type": 1,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": null,
         "notes": null
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 7c8c3ecddb6..7ae302a78cf 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -7391,7 +7391,9 @@ Temp
 get_scratch_resource(isel_context* ctx)
 {
    Builder bld(ctx->program, ctx->block);
-   Temp scratch_addr = ctx->program->private_segment_buffer;
+   Temp scratch_addr;
+   if (!ctx->program->private_segment_buffers.empty())
+      scratch_addr = ctx->program->private_segment_buffers.back();
    if (!scratch_addr.bytes()) {
       Temp addr_lo =
          bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo));
@@ -7449,7 +7451,7 @@ visit_load_scratch(isel_context* ctx, nir_intrinsic_instr* instr)
    } else {
       info.resource = get_scratch_resource(ctx);
       info.offset = Operand(as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa)));
-      info.soffset = ctx->program->scratch_offset;
+      info.soffset = ctx->program->scratch_offsets.back();
       emit_load(ctx, bld, info, scratch_mubuf_load_params);
    }
 }
@@ -7505,7 +7507,7 @@ visit_store_scratch(isel_context* ctx, nir_intrinsic_instr* instr)
       offset = as_vgpr(ctx, offset);
       for (unsigned i = 0; i < write_count; i++) {
          aco_opcode op = get_buffer_store_op(write_datas[i].bytes());
-         Instruction* mubuf = bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset,
+         Instruction* mubuf = bld.mubuf(op, rsrc, offset, ctx->program->scratch_offsets.back(),
                                         write_datas[i], offsets[i], true);
          mubuf->mubuf().sync = memory_sync_info(storage_scratch, semantic_private);
          unsigned access = ACCESS_TYPE_STORE | ACCESS_IS_SWIZZLED_AMD |
@@ -10929,9 +10931,9 @@ add_startpgm(struct isel_context* ctx)
           * handling spilling.
           */
          if (ctx->args->ring_offsets.used)
-            ctx->program->private_segment_buffer = get_arg(ctx, ctx->args->ring_offsets);
+            ctx->program->private_segment_buffers.push_back(get_arg(ctx, ctx->args->ring_offsets));
 
-         ctx->program->scratch_offset = get_arg(ctx, ctx->args->scratch_offset);
+         ctx->program->scratch_offsets.push_back(get_arg(ctx, ctx->args->scratch_offset));
       } else if (ctx->program->gfx_level <= GFX10_3 && ctx->program->stage != raytracing_cs) {
          /* Manually initialize scratch. For RT stages scratch initialization is done in the prolog.
           */
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 0a18d7445b3..eeceef0e5c9 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -2130,8 +2130,9 @@ public:
    std::vector<ac_shader_debug_info> debug_info;
 
    std::vector<uint8_t> constant_data;
-   Temp private_segment_buffer;
-   Temp scratch_offset;
+   /* Private segment buffers and scratch offsets. One entry per start/resume block */
+   aco::small_vec<Temp, 2> private_segment_buffers;
+   aco::small_vec<Temp, 2> scratch_offsets;
 
    uint16_t num_waves = 0;
    uint16_t min_waves = 0;
diff --git a/src/amd/compiler/aco_reindex_ssa.cpp b/src/amd/compiler/aco_reindex_ssa.cpp
index 7c30e5b5365..f06da735039 100644
--- a/src/amd/compiler/aco_reindex_ssa.cpp
+++ b/src/amd/compiler/aco_reindex_ssa.cpp
@@ -69,10 +69,14 @@ reindex_program(idx_ctx& ctx, Program* program)
    }
 
    /* update program members */
-   program->private_segment_buffer = Temp(ctx.renames[program->private_segment_buffer.id()],
-                                          program->private_segment_buffer.regClass());
-   program->scratch_offset =
-      Temp(ctx.renames[program->scratch_offset.id()], program->scratch_offset.regClass());
+   for (auto& private_segment_buffer : program->private_segment_buffers) {
+      private_segment_buffer =
+         Temp(ctx.renames[private_segment_buffer.id()], private_segment_buffer.regClass());
+   }
+   for (auto& scratch_offset : program->scratch_offsets) {
+      scratch_offset =
+         Temp(ctx.renames[scratch_offset.id()], scratch_offset.regClass());
+   }
    program->temp_rc = ctx.temp_rc;
 }
 
diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp
index ae7ae16e329..4cdb0cdeb14 100644
--- a/src/amd/compiler/aco_spill.cpp
+++ b/src/amd/compiler/aco_spill.cpp
@@ -88,13 +88,16 @@ struct spill_ctx {
    unsigned vgpr_spill_slots;
    Temp scratch_rsrc;
 
+   unsigned resume_idx;
+
    spill_ctx(const RegisterDemand target_pressure_, Program* program_)
        : target_pressure(target_pressure_), program(program_), memory(),
          renames(program->blocks.size(), aco::map<Temp, Temp>(memory)),
          spills_entry(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)),
          spills_exit(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)),
          processed(program->blocks.size(), false), ssa_infos(program->peekAllocationId()),
-         remat(memory), wave_size(program->wave_size), sgpr_spill_slots(0), vgpr_spill_slots(0)
+         remat(memory), wave_size(program->wave_size), sgpr_spill_slots(0), vgpr_spill_slots(0),
+         resume_idx(0)
    {}
 
    void add_affinity(uint32_t first, uint32_t second)
@@ -1088,7 +1091,10 @@ spill_block(spill_ctx& ctx, unsigned block_idx)
 Temp
 load_scratch_resource(spill_ctx& ctx, Builder& bld, bool apply_scratch_offset)
 {
-   Temp private_segment_buffer = ctx.program->private_segment_buffer;
+   Temp private_segment_buffer;
+   if (!ctx.program->private_segment_buffers.empty())
+      private_segment_buffer = ctx.program->private_segment_buffers[ctx.resume_idx];
+
    if (!private_segment_buffer.bytes()) {
       Temp addr_lo =
          bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo));
@@ -1109,7 +1115,7 @@ load_scratch_resource(spill_ctx& ctx, Builder& bld, bool apply_scratch_offset)
 
       Temp carry = bld.tmp(s1);
       addr_lo = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), addr_lo,
-                         ctx.program->scratch_offset);
+                         ctx.program->scratch_offsets[ctx.resume_idx]);
       addr_hi = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), addr_hi,
                          Operand::c32(0), bld.scc(carry));
 
@@ -1218,7 +1224,9 @@ spill_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& inst
    uint32_t spill_id = spill->operands[1].constantValue();
    uint32_t spill_slot = slots[spill_id];
 
-   Temp scratch_offset = ctx.program->scratch_offset;
+   Temp scratch_offset;
+   if (!ctx.program->scratch_offsets.empty())
+      scratch_offset = ctx.program->scratch_offsets[ctx.resume_idx];
    unsigned offset;
    setup_vgpr_spill_reload(ctx, block, instructions, spill_slot, scratch_offset, &offset);
 
@@ -1264,7 +1272,9 @@ reload_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& ins
    uint32_t spill_id = reload->operands[0].constantValue();
    uint32_t spill_slot = slots[spill_id];
 
-   Temp scratch_offset = ctx.program->scratch_offset;
+   Temp scratch_offset;
+   if (!ctx.program->scratch_offsets.empty())
+      scratch_offset = ctx.program->scratch_offsets[ctx.resume_idx];
    unsigned offset;
    setup_vgpr_spill_reload(ctx, block, instructions, spill_slot, scratch_offset, &offset);
 
@@ -1488,6 +1498,8 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
           * we cannot reuse the current scratch_rsrc temp because its definition is unreachable */
          if (block.linear_preds.empty())
             ctx.scratch_rsrc = Temp();
+         if (block.kind & block_kind_resume)
+            ++ctx.resume_idx;
       }
 
       std::vector<aco_ptr<Instruction>>::iterator it;