intel/fs/gfx12: Don't consider multipolygon PS to have packed dispatch.

This fixes a number of regressions and hangs in multipolygon fragment shaders that have FIND_LIVE_CHANNEL sequences which would otherwise lead to access of a dead channel. Note that the failures don't seem to be reproducible in simulation. Acked-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26585>
2026-05-05 00:58:05 +02:00 · 2023-12-07 19:38:02 -08:00 · 2023-12-07 19:38:02 -08:00 · 5e0760a993
commit 5e0760a993
parent 8f92baa5d3
3 changed files with 14 additions and 9 deletions
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@ -2190,7 +2190,7 @@ brw_cs_get_dispatch_info(const struct intel_device_info *devinfo,
 */
 static inline bool
 brw_stage_has_packed_dispatch(ASSERTED const struct intel_device_info *devinfo,
-                              gl_shader_stage stage,
+                              gl_shader_stage stage, unsigned max_polygons,
                              const struct brw_stage_prog_data *prog_data)
 {
   /* The code below makes assumptions about the hardware's thread dispatch
@ -2214,7 +2214,8 @@ brw_stage_has_packed_dispatch(ASSERTED const struct intel_device_info *devinfo,
         (const struct brw_wm_prog_data *)prog_data;
      return devinfo->verx10 < 125 &&
             !wm_prog_data->persample_dispatch &&
-             wm_prog_data->uses_vmask;
+             wm_prog_data->uses_vmask &&
+             max_polygons < 2;
   }
   case MESA_SHADER_COMPUTE:
      /* Compute shaders will be spawned with either a fully enabled dispatch
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@ -3293,7 +3293,8 @@ fs_visitor::eliminate_find_live_channel()
   bool progress = false;
   unsigned depth = 0;

-   if (!brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data)) {
+   if (!brw_stage_has_packed_dispatch(devinfo, stage, max_polygons,
+                                      stage_prog_data)) {
      /* The optimization below assumes that channel zero is live on thread
       * dispatch, which may not be the case if the fixed function dispatches
       * threads sparsely.
@ -5625,7 +5626,8 @@ fs_visitor::lower_find_live_channel()
      return false;

   bool packed_dispatch =
-      brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data);
+      brw_stage_has_packed_dispatch(devinfo, stage, max_polygons,
+                                    stage_prog_data);
   bool vmask =
      stage == MESA_SHADER_FRAGMENT &&
      brw_wm_prog_data(stage_prog_data)->uses_vmask;
@ -8232,13 +8234,15 @@ brw_compile_bs(const struct brw_compiler *compiler,
 static UNUSED void
 brw_fs_test_dispatch_packing(const fs_builder &bld)
 {
-   const gl_shader_stage stage = bld.shader->stage;
+   const fs_visitor *shader = static_cast<const fs_visitor *>(bld.shader);
+   const gl_shader_stage stage = shader->stage;
   const bool uses_vmask =
      stage == MESA_SHADER_FRAGMENT &&
-      brw_wm_prog_data(bld.shader->stage_prog_data)->uses_vmask;
+      brw_wm_prog_data(shader->stage_prog_data)->uses_vmask;

-   if (brw_stage_has_packed_dispatch(bld.shader->devinfo, stage,
-                                     bld.shader->stage_prog_data)) {
+   if (brw_stage_has_packed_dispatch(shader->devinfo, stage,
+                                     shader->max_polygons,
+                                     shader->stage_prog_data)) {
      const fs_builder ubld = bld.exec_all().group(1, 0);
      const fs_reg tmp = component(bld.vgrf(BRW_REGISTER_TYPE_UD), 0);
      const fs_reg mask = uses_vmask ? brw_vmask_reg() : brw_dmask_reg();
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@ -1208,7 +1208,7 @@ vec4_visitor::eliminate_find_live_channel()
   bool progress = false;
   unsigned depth = 0;

-   if (!brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data)) {
+   if (!brw_stage_has_packed_dispatch(devinfo, stage, 0, stage_prog_data)) {
      /* The optimization below assumes that channel zero is live on thread
       * dispatch, which may not be the case if the fixed function dispatches
       * threads sparsely.