intel/compiler: Attempt to build dual-SIMD8 variant of fragment shaders on gfx12+ platforms.

Similar to other FS dispatch modes, attempt to build a dual-SIMD8 program if the regular SIMD8 program didn't spill and doubling the amount of space for varyings doesn't cause us to go over the thread payload limit. Dual-SIMD8 builds in combination with coarse pixel shading are currently not handled. Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26585>
2025-12-24 15:20:10 +01:00 · 2022-06-22 16:32:57 -07:00 · 2022-06-22 16:32:57 -07:00 · 1f2c44dc21
commit 1f2c44dc21
parent 261d07f398
1 changed files with 38 additions and 3 deletions
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@ -7604,8 +7604,9 @@ brw_compile_fs(const struct brw_compiler *compiler,
   brw_nir_populate_wm_prog_data(nir, compiler->devinfo, key, prog_data,
                                 params->mue_map);

-   std::unique_ptr<fs_visitor> v8, v16, v32;
-   cfg_t *simd8_cfg = NULL, *simd16_cfg = NULL, *simd32_cfg = NULL;
+   std::unique_ptr<fs_visitor> v8, v16, v32, vmulti;
+   cfg_t *simd8_cfg = NULL, *simd16_cfg = NULL, *simd32_cfg = NULL,
+      *multi_cfg = NULL;
   float throughput = 0;
   bool has_spilled = false;

@ -7713,6 +7714,31 @@ brw_compile_fs(const struct brw_compiler *compiler,
      }
   }

+   if (devinfo->ver >= 12 && !has_spilled &&
+       v8 && v8->max_dispatch_width >= 16 &&
+       params->max_polygons >= 2 && !key->coarse_pixel &&
+       2 * prog_data->num_varying_inputs <= MAX_VARYING &&
+       INTEL_SIMD(FS, 2X8)) {
+      /* Try a dual-SIMD8 compile */
+      vmulti = std::make_unique<fs_visitor>(compiler, &params->base, key,
+                                            prog_data, nir, 16, 2,
+                                            params->base.stats != NULL,
+                                            debug_enabled);
+      if (v8)
+         vmulti->import_uniforms(v8.get());
+      if (!vmulti->run_fs(allow_spilling, params->use_rep_send)) {
+         brw_shader_perf_log(compiler, params->base.log_data,
+                             "Dual-SIMD8 shader failed to compile: %s\n",
+                             vmulti->fail_msg);
+      } else {
+         multi_cfg = vmulti->cfg;
+         prog_data->base.dispatch_grf_start_reg = vmulti->payload().num_regs;
+         prog_data->reg_blocks_8 = brw_register_blocks(vmulti->grf_used);
+         has_spilled = vmulti->spilled_any_registers;
+         allow_spilling = false;
+      }
+   }
+
   /* When the caller requests a repclear shader, they want SIMD16-only */
   if (params->use_rep_send)
      simd8_cfg = NULL;
@ -7761,7 +7787,16 @@ brw_compile_fs(const struct brw_compiler *compiler,
   struct brw_compile_stats *stats = params->base.stats;
   uint32_t max_dispatch_width = 0;

-   if (simd8_cfg) {
+   if (multi_cfg) {
+      prog_data->dispatch_multi = vmulti->dispatch_width;
+      prog_data->max_polygons = vmulti->max_polygons;
+      g.generate_code(multi_cfg, vmulti->dispatch_width, vmulti->shader_stats,
+                      vmulti->performance_analysis.require(),
+                      stats, vmulti->max_polygons);
+      stats = stats ? stats + 1 : NULL;
+      max_dispatch_width = vmulti->dispatch_width;
+
+   } else if (simd8_cfg) {
      prog_data->dispatch_8 = true;
      g.generate_code(simd8_cfg, 8, v8->shader_stats,
                      v8->performance_analysis.require(), stats, 1);