diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 666f2826ba6..0cbc6b6016c 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -8864,7 +8864,12 @@ fs_visitor::allocate_registers(bool allow_spilling)
    if (last_scratch > 0) {
       ASSERTED unsigned max_scratch_size = 2 * 1024 * 1024;
 
-      prog_data->total_scratch = brw_get_scratch_size(last_scratch);
+      /* Take the max of any previously compiled variant of the shader. In the
+       * case of bindless shaders with return parts, this will also take the
+       * max of all parts.
+       */
+      prog_data->total_scratch = MAX2(brw_get_scratch_size(last_scratch),
+                                      prog_data->total_scratch);
 
       if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL) {
          if (devinfo->platform == INTEL_PLATFORM_HSW) {
@@ -9701,6 +9706,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
 
    prog_data->base.stage = MESA_SHADER_FRAGMENT;
    prog_data->base.ray_queries = nir->info.ray_queries;
+   prog_data->base.total_scratch = 0;
 
    const struct intel_device_info *devinfo = compiler->devinfo;
    const unsigned max_subgroup_size = compiler->devinfo->ver >= 6 ? 32 : 16;
@@ -10074,6 +10080,7 @@ brw_compile_cs(const struct brw_compiler *compiler,
    prog_data->base.stage = MESA_SHADER_COMPUTE;
    prog_data->base.total_shared = nir->info.shared_size;
    prog_data->base.ray_queries = nir->info.ray_queries;
+   prog_data->base.total_scratch = 0;
 
    if (!nir->info.workgroup_size_variable) {
       prog_data->local_size[0] = nir->info.workgroup_size[0];
@@ -10333,6 +10340,8 @@ brw_compile_bs(const struct brw_compiler *compiler,
 
    prog_data->base.stage = shader->info.stage;
    prog_data->base.ray_queries = shader->info.ray_queries;
+   prog_data->base.total_scratch = 0;
+
    prog_data->max_stack_size = 0;
 
    fs_generator g(compiler, params->log_data, mem_ctx, &prog_data->base,
diff --git a/src/intel/compiler/brw_mesh.cpp b/src/intel/compiler/brw_mesh.cpp
index 521f89a2f77..f39b2a706ed 100644
--- a/src/intel/compiler/brw_mesh.cpp
+++ b/src/intel/compiler/brw_mesh.cpp
@@ -167,6 +167,7 @@ brw_compile_task(const struct brw_compiler *compiler,
 
    prog_data->base.base.stage = MESA_SHADER_TASK;
    prog_data->base.base.total_shared = nir->info.shared_size;
+   prog_data->base.base.total_scratch = 0;
 
    prog_data->base.local_size[0] = nir->info.workgroup_size[0];
    prog_data->base.local_size[1] = nir->info.workgroup_size[1];
@@ -531,6 +532,7 @@ brw_compile_mesh(const struct brw_compiler *compiler,
 
    prog_data->base.base.stage = MESA_SHADER_MESH;
    prog_data->base.base.total_shared = nir->info.shared_size;
+   prog_data->base.base.total_scratch = 0;
 
    prog_data->base.local_size[0] = nir->info.workgroup_size[0];
    prog_data->base.local_size[1] = nir->info.workgroup_size[1];
diff --git a/src/intel/compiler/brw_shader.h b/src/intel/compiler/brw_shader.h
index 6afe9c72836..dc201a66a3e 100644
--- a/src/intel/compiler/brw_shader.h
+++ b/src/intel/compiler/brw_shader.h
@@ -122,7 +122,7 @@ extern const char *const conditional_modifier[16];
 extern const char *const pred_ctrl_align16[16];
 
 /* Per-thread scratch space is a power-of-two multiple of 1KB. */
-static inline int
+static inline unsigned
 brw_get_scratch_size(int size)
 {
    return MAX2(1024, util_next_power_of_two(size));
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index 6f4f18abac4..7ee58fecad4 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -2543,6 +2543,7 @@ brw_compile_vs(const struct brw_compiler *compiler,
 
    prog_data->base.base.stage = MESA_SHADER_VERTEX;
    prog_data->base.base.ray_queries = nir->info.ray_queries;
+   prog_data->base.base.total_scratch = 0;
 
    const bool is_scalar = compiler->scalar_stage[MESA_SHADER_VERTEX];
    brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar);
diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp
index 2d3c651dc51..65c5694cc85 100644
--- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
@@ -597,6 +597,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
 
    prog_data->base.base.stage = MESA_SHADER_GEOMETRY;
    prog_data->base.base.ray_queries = nir->info.ray_queries;
+   prog_data->base.base.total_scratch = 0;
 
    /* The GLSL linker will have already matched up GS inputs and the outputs
     * of prior stages.  The driver does extend VS outputs in some cases, but
diff --git a/src/intel/compiler/brw_vec4_tcs.cpp b/src/intel/compiler/brw_vec4_tcs.cpp
index 0ae713fef28..c4c2ec113dc 100644
--- a/src/intel/compiler/brw_vec4_tcs.cpp
+++ b/src/intel/compiler/brw_vec4_tcs.cpp
@@ -367,6 +367,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
 
    vue_prog_data->base.stage = MESA_SHADER_TESS_CTRL;
    prog_data->base.base.ray_queries = nir->info.ray_queries;
+   prog_data->base.base.total_scratch = 0;
 
    nir->info.outputs_written = key->outputs_written;
    nir->info.patch_outputs_written = key->patch_outputs_written;