diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index c66b3cd8ff6..ccd2f039340 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -10193,40 +10193,6 @@ brw_compile_cs(const struct brw_compiler *compiler,
    return ret;
 }
 
-static unsigned
-brw_cs_simd_size_for_group_size(const struct intel_device_info *devinfo,
-                                const struct brw_cs_prog_data *cs_prog_data,
-                                unsigned group_size)
-{
-   const unsigned mask = cs_prog_data->prog_mask;
-   assert(mask != 0);
-
-   static const unsigned simd8  = 1 << 0;
-   static const unsigned simd16 = 1 << 1;
-   static const unsigned simd32 = 1 << 2;
-
-   if (INTEL_DEBUG(DEBUG_DO32) && (mask & simd32))
-      return 32;
-
-   const uint32_t max_threads = devinfo->max_cs_workgroup_threads;
-
-   if ((mask & simd8) && group_size <= 8 * max_threads) {
-      /* Prefer SIMD16 if can do without spilling.  Matches logic in
-       * brw_simd_selection.cpp.
-       */
-      if ((mask & simd16) && (~cs_prog_data->prog_spilled & simd16))
-         return 16;
-      return 8;
-   }
-
-   if ((mask & simd16) && group_size <= 16 * max_threads)
-      return 16;
-
-   assert(mask & simd32);
-   assert(group_size <= 32 * max_threads);
-   return 32;
-}
-
 struct brw_cs_dispatch_info
 brw_cs_get_dispatch_info(const struct intel_device_info *devinfo,
                          const struct brw_cs_prog_data *prog_data,
@@ -10238,9 +10204,13 @@ brw_cs_get_dispatch_info(const struct intel_device_info *devinfo,
       override_local_size ? override_local_size :
                             prog_data->local_size;
 
+   const int simd =
+      override_local_size ? brw_simd_select_for_workgroup_size(devinfo, prog_data, sizes) :
+                            brw_simd_select(prog_data);
+   assert(simd >= 0 && simd < 3);
+
    info.group_size = sizes[0] * sizes[1] * sizes[2];
-   info.simd_size =
-      brw_cs_simd_size_for_group_size(devinfo, prog_data, info.group_size);
+   info.simd_size = 8u << simd;
    info.threads = DIV_ROUND_UP(info.group_size, info.simd_size);
 
    const uint32_t remainder = info.group_size & (info.simd_size - 1);
diff --git a/src/intel/compiler/brw_private.h b/src/intel/compiler/brw_private.h
index d166a29e0d5..c4334ce3ff7 100644
--- a/src/intel/compiler/brw_private.h
+++ b/src/intel/compiler/brw_private.h
@@ -47,6 +47,10 @@ void brw_simd_mark_compiled(unsigned simd,
 
 int brw_simd_select(const struct brw_cs_prog_data *prog_data);
 
+int brw_simd_select_for_workgroup_size(const struct intel_device_info *devinfo,
+                                       const struct brw_cs_prog_data *prog_data,
+                                       const unsigned *sizes);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/src/intel/compiler/brw_simd_selection.c b/src/intel/compiler/brw_simd_selection.c
index 551e882e1a8..2ecf568ad57 100644
--- a/src/intel/compiler/brw_simd_selection.c
+++ b/src/intel/compiler/brw_simd_selection.c
@@ -161,3 +161,42 @@ brw_simd_select(const struct brw_cs_prog_data *prog_data)
    else
       return -1;
 }
+
+int
+brw_simd_select_for_workgroup_size(const struct intel_device_info *devinfo,
+                                   const struct brw_cs_prog_data *prog_data,
+                                   const unsigned *sizes)
+{
+   assert(sizes);
+
+   if (prog_data->local_size[0] == sizes[0] &&
+       prog_data->local_size[1] == sizes[1] &&
+       prog_data->local_size[2] == sizes[2])
+      return brw_simd_select(prog_data);
+
+   void *mem_ctx = ralloc_context(NULL);
+
+   struct brw_cs_prog_data cloned = *prog_data;
+   for (unsigned i = 0; i < 3; i++)
+      cloned.local_size[i] = sizes[i];
+
+   cloned.prog_mask = 0;
+   cloned.prog_spilled = 0;
+
+   const char *error[3] = {0};
+
+   for (unsigned simd = 0; simd < 3; simd++) {
+      /* We are not recompiling, so use original results of prog_mask and
+       * prog_spilled as they will already contain all possible compilations.
+       */
+      if (brw_simd_should_compile(mem_ctx, simd, devinfo, &cloned,
+                                  0 /* required_dispatch_width */, &error[simd]) &&
+          test_bit(prog_data->prog_mask, simd)) {
+         brw_simd_mark_compiled(simd, &cloned, test_bit(prog_data->prog_spilled, simd));
+      }
+   }
+
+   ralloc_free(mem_ctx);
+
+   return brw_simd_select(&cloned);
+}
diff --git a/src/intel/compiler/test_simd_selection.cpp b/src/intel/compiler/test_simd_selection.cpp
index f1be0bf185f..7344c57aca4 100644
--- a/src/intel/compiler/test_simd_selection.cpp
+++ b/src/intel/compiler/test_simd_selection.cpp
@@ -145,6 +145,15 @@ TEST_F(SIMDSelectionCS, WorkgroupSizeVariable)
    brw_simd_mark_compiled(SIMD32, prog_data, not_spilled);
 
    ASSERT_EQ(prog_data->prog_mask, 1u << SIMD8 | 1u << SIMD16 | 1u << SIMD32);
+
+   const unsigned wg_8_1_1[] = { 8, 1, 1 };
+   ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_8_1_1), SIMD16);
+
+   const unsigned wg_16_1_1[] = { 16, 1, 1 };
+   ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_16_1_1), SIMD16);
+
+   const unsigned wg_32_1_1[] = { 32, 1, 1 };
+   ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_32_1_1), SIMD16);
 }
 
 TEST_F(SIMDSelectionCS, WorkgroupSizeVariableSpilled)
@@ -161,6 +170,86 @@ TEST_F(SIMDSelectionCS, WorkgroupSizeVariableSpilled)
    brw_simd_mark_compiled(SIMD32, prog_data, spilled);
 
    ASSERT_EQ(prog_data->prog_mask, 1u << SIMD8 | 1u << SIMD16 | 1u << SIMD32);
+
+   const unsigned wg_8_1_1[] = { 8, 1, 1 };
+   ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_8_1_1), SIMD8);
+
+   const unsigned wg_16_1_1[] = { 16, 1, 1 };
+   ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_16_1_1), SIMD8);
+
+   const unsigned wg_32_1_1[] = { 32, 1, 1 };
+   ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_32_1_1), SIMD8);
+}
+
+TEST_F(SIMDSelectionCS, WorkgroupSizeVariableNoSIMD8)
+{
+   prog_data->local_size[0] = 0;
+   prog_data->local_size[1] = 0;
+   prog_data->local_size[2] = 0;
+
+   ASSERT_TRUE(should_compile(SIMD8));
+   ASSERT_TRUE(should_compile(SIMD16));
+   brw_simd_mark_compiled(SIMD16, prog_data, not_spilled);
+   ASSERT_TRUE(should_compile(SIMD32));
+   brw_simd_mark_compiled(SIMD32, prog_data, not_spilled);
+
+   ASSERT_EQ(prog_data->prog_mask, 1u << SIMD16 | 1u << SIMD32);
+
+   const unsigned wg_8_1_1[] = { 8, 1, 1 };
+   ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_8_1_1), SIMD16);
+
+   const unsigned wg_16_1_1[] = { 16, 1, 1 };
+   ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_16_1_1), SIMD16);
+
+   const unsigned wg_32_1_1[] = { 32, 1, 1 };
+   ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_32_1_1), SIMD16);
+}
+
+TEST_F(SIMDSelectionCS, WorkgroupSizeVariableNoSIMD16)
+{
+   prog_data->local_size[0] = 0;
+   prog_data->local_size[1] = 0;
+   prog_data->local_size[2] = 0;
+
+   ASSERT_TRUE(should_compile(SIMD8));
+   brw_simd_mark_compiled(SIMD8, prog_data, not_spilled);
+   ASSERT_TRUE(should_compile(SIMD16));
+   ASSERT_TRUE(should_compile(SIMD32));
+   brw_simd_mark_compiled(SIMD32, prog_data, not_spilled);
+
+   ASSERT_EQ(prog_data->prog_mask, 1u << SIMD8 | 1u << SIMD32);
+
+   const unsigned wg_8_1_1[] = { 8, 1, 1 };
+   ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_8_1_1), SIMD8);
+
+   const unsigned wg_16_1_1[] = { 16, 1, 1 };
+   ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_16_1_1), SIMD8);
+
+   const unsigned wg_32_1_1[] = { 32, 1, 1 };
+   ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_32_1_1), SIMD8);
+}
+
+TEST_F(SIMDSelectionCS, WorkgroupSizeVariableNoSIMD8NoSIMD16)
+{
+   prog_data->local_size[0] = 0;
+   prog_data->local_size[1] = 0;
+   prog_data->local_size[2] = 0;
+
+   ASSERT_TRUE(should_compile(SIMD8));
+   ASSERT_TRUE(should_compile(SIMD16));
+   ASSERT_TRUE(should_compile(SIMD32));
+   brw_simd_mark_compiled(SIMD32, prog_data, not_spilled);
+
+   ASSERT_EQ(prog_data->prog_mask, 1u << SIMD32);
+
+   const unsigned wg_8_1_1[] = { 8, 1, 1 };
+   ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_8_1_1), SIMD32);
+
+   const unsigned wg_16_1_1[] = { 16, 1, 1 };
+   ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_16_1_1), SIMD32);
+
+   const unsigned wg_32_1_1[] = { 32, 1, 1 };
+   ASSERT_EQ(brw_simd_select_for_workgroup_size(devinfo, prog_data, wg_32_1_1), SIMD32);
 }
 
 TEST_F(SIMDSelectionCS, SpillAtSIMD8)