From 9d53e275791edacfedab49d1822523f33b64707d Mon Sep 17 00:00:00 2001
From: Caio Oliveira <caio.oliveira@intel.com>
Date: Fri, 14 Feb 2025 09:08:01 -0800
Subject: [PATCH] intel/brw: Remove brw_shader::import_uniforms()

The brw_shader::uniforms now is derived from the nir_shader.  The
only exception is compute shaders for older Gfx versions, so we
move the adjust logic for that.

The benefit here is untangling the code for compilation variants,
that before needed to keep track of the first that compiled to,
in most cases, copy an integer.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33541>
---
 src/intel/compiler/brw_compile_cs.cpp   | 40 +++++++++++++++++++++++--
 src/intel/compiler/brw_compile_fs.cpp   | 13 ++------
 src/intel/compiler/brw_compile_mesh.cpp | 10 -------
 src/intel/compiler/brw_compiler.h       | 12 --------
 src/intel/compiler/brw_from_nir.cpp     | 26 ----------------
 src/intel/compiler/brw_shader.cpp       | 11 +------
 src/intel/compiler/brw_shader.h         |  2 --
 7 files changed, 40 insertions(+), 74 deletions(-)

diff --git a/src/intel/compiler/brw_compile_cs.cpp b/src/intel/compiler/brw_compile_cs.cpp
index 03f7e7c30b8..34c59a2da4d 100644
--- a/src/intel/compiler/brw_compile_cs.cpp
+++ b/src/intel/compiler/brw_compile_cs.cpp
@@ -120,6 +120,41 @@ brw_nir_uses_sampler(nir_shader *shader)
                                        NULL);
 }
 
+static inline uint32_t *
+brw_stage_prog_data_add_params(struct brw_stage_prog_data *prog_data,
+                               unsigned nr_new_params)
+{
+   unsigned old_nr_params = prog_data->nr_params;
+   prog_data->nr_params += nr_new_params;
+   prog_data->param = reralloc(ralloc_parent(prog_data->param),
+                               prog_data->param, uint32_t,
+                               prog_data->nr_params);
+   return prog_data->param + old_nr_params;
+}
+
+static void
+brw_adjust_uniforms(brw_shader &s)
+{
+   if (s.devinfo->verx10 >= 125)
+      return;
+
+   assert(mesa_shader_stage_is_compute(s.stage));
+
+   if (brw_get_subgroup_id_param_index(s.devinfo, s.prog_data) == -1) {
+      /* Add uniforms for builtins after regular NIR uniforms. */
+      assert(s.uniforms == s.prog_data->nr_params);
+
+      /* Subgroup ID must be the last uniform on the list.  This will make
+       * easier later to split between cross thread and per thread
+       * uniforms.
+       */
+      uint32_t *param = brw_stage_prog_data_add_params(s.prog_data, 1);
+      *param = BRW_PARAM_BUILTIN_SUBGROUP_ID;
+   }
+
+   s.uniforms = s.prog_data->nr_params;
+}
+
 const unsigned *
 brw_compile_cs(const struct brw_compiler *compiler,
                struct brw_compile_cs_params *params)
@@ -188,15 +223,14 @@ brw_compile_cs(const struct brw_compiler *compiler,
          .debug_enabled           = debug_enabled,
       };
       v[simd] = std::make_unique<brw_shader>(&shader_params);
+      brw_adjust_uniforms(*v[simd]);
 
       const bool allow_spilling = simd == 0 ||
          (!simd_state.compiled[simd - 1] && !brw_simd_should_compile(simd_state, simd - 1)) ||
          nir->info.workgroup_size_variable;
 
       if (devinfo->ver < 30 || nir->info.workgroup_size_variable) {
-         const int first = brw_simd_first_compiled(simd_state);
-         if (first >= 0)
-            v[simd]->import_uniforms(v[first].get());
+         ASSERTED const int first = brw_simd_first_compiled(simd_state);
          assert(allow_spilling == (first < 0 || nir->info.workgroup_size_variable));
       }
 
diff --git a/src/intel/compiler/brw_compile_fs.cpp b/src/intel/compiler/brw_compile_fs.cpp
index f393a72b5af..b9d8c1daa50 100644
--- a/src/intel/compiler/brw_compile_fs.cpp
+++ b/src/intel/compiler/brw_compile_fs.cpp
@@ -1713,8 +1713,6 @@ brw_compile_fs(const struct brw_compiler *compiler,
          shader_params.num_polygons   = 1;
 
          v32 = std::make_unique<brw_shader>(&shader_params);
-         if (vbase)
-            v32->import_uniforms(vbase);
 
          if (!run_fs(*v32, false, false)) {
             brw_shader_perf_log(compiler, params->base.log_data,
@@ -1763,8 +1761,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
          shader_params.num_polygons   = 1;
 
          v16 = std::make_unique<brw_shader>(&shader_params);
-         if (v8)
-            v16->import_uniforms(v8.get());
+
          if (!run_fs(*v16, allow_spilling, params->use_rep_send)) {
             brw_shader_perf_log(compiler, params->base.log_data,
                                 "SIMD16 shader failed to compile: %s\n",
@@ -1798,10 +1795,6 @@ brw_compile_fs(const struct brw_compiler *compiler,
          shader_params.num_polygons   = 1;
 
          v32 = std::make_unique<brw_shader>(&shader_params);
-         if (v8)
-            v32->import_uniforms(v8.get());
-         else if (v16)
-            v32->import_uniforms(v16.get());
 
          if (!run_fs(*v32, allow_spilling, false)) {
             brw_shader_perf_log(compiler, params->base.log_data,
@@ -1842,7 +1835,6 @@ brw_compile_fs(const struct brw_compiler *compiler,
             shader_params.num_polygons   = 4;
             vmulti = std::make_unique<brw_shader>(&shader_params);
 
-            vmulti->import_uniforms(vbase);
             if (!run_fs(*vmulti, false, params->use_rep_send)) {
                brw_shader_perf_log(compiler, params->base.log_data,
                                    "Quad-SIMD8 shader failed to compile: %s\n",
@@ -1863,7 +1855,6 @@ brw_compile_fs(const struct brw_compiler *compiler,
             shader_params.num_polygons   = 2;
 
             vmulti = std::make_unique<brw_shader>(&shader_params);
-            vmulti->import_uniforms(vbase);
             if (!run_fs(*vmulti, false, params->use_rep_send)) {
                brw_shader_perf_log(compiler, params->base.log_data,
                                    "Dual-SIMD16 shader failed to compile: %s\n",
@@ -1883,7 +1874,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
             shader_params.num_polygons   = 2;
 
             vmulti = std::make_unique<brw_shader>(&shader_params);
-            vmulti->import_uniforms(vbase);
+
             if (!run_fs(*vmulti, allow_spilling, params->use_rep_send)) {
                brw_shader_perf_log(compiler, params->base.log_data,
                                    "Dual-SIMD8 shader failed to compile: %s\n",
diff --git a/src/intel/compiler/brw_compile_mesh.cpp b/src/intel/compiler/brw_compile_mesh.cpp
index 67f2ec47ca2..344ec8e9eba 100644
--- a/src/intel/compiler/brw_compile_mesh.cpp
+++ b/src/intel/compiler/brw_compile_mesh.cpp
@@ -419,11 +419,6 @@ brw_compile_task(const struct brw_compiler *compiler,
       };
       v[simd] = std::make_unique<brw_shader>(&shader_params);
 
-      if (prog_data->base.prog_mask) {
-         unsigned first = ffs(prog_data->base.prog_mask) - 1;
-         v[simd]->import_uniforms(v[first].get());
-      }
-
       const bool allow_spilling = simd == 0 ||
          (!simd_state.compiled[simd - 1] && !brw_simd_should_compile(simd_state, simd - 1));
       if (run_task_mesh(*v[simd], allow_spilling)) {
@@ -1279,11 +1274,6 @@ brw_compile_mesh(const struct brw_compiler *compiler,
       };
       v[simd] = std::make_unique<brw_shader>(&shader_params);
 
-      if (prog_data->base.prog_mask) {
-         unsigned first = ffs(prog_data->base.prog_mask) - 1;
-         v[simd]->import_uniforms(v[first].get());
-      }
-
       const bool allow_spilling = simd == 0 ||
          (!simd_state.compiled[simd - 1] && !brw_simd_should_compile(simd_state, simd - 1));
       if (run_task_mesh(*v[simd], allow_spilling)) {
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index 1e26e9f4bde..c8fc5788c3e 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -664,18 +664,6 @@ struct brw_stage_prog_data {
  */
 unsigned ptl_register_blocks(unsigned grf_used);
 
-static inline uint32_t *
-brw_stage_prog_data_add_params(struct brw_stage_prog_data *prog_data,
-                               unsigned nr_new_params)
-{
-   unsigned old_nr_params = prog_data->nr_params;
-   prog_data->nr_params += nr_new_params;
-   prog_data->param = reralloc(ralloc_parent(prog_data->param),
-                               prog_data->param, uint32_t,
-                               prog_data->nr_params);
-   return prog_data->param + old_nr_params;
-}
-
 void
 brw_stage_prog_data_add_printf(struct brw_stage_prog_data *prog_data,
                                void *mem_ctx,
diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp
index 4b8dd0d9f77..b1d07ee0876 100644
--- a/src/intel/compiler/brw_from_nir.cpp
+++ b/src/intel/compiler/brw_from_nir.cpp
@@ -142,31 +142,6 @@ brw_from_nir_setup_outputs(nir_to_brw_state &ntb)
    }
 }
 
-static void
-brw_from_nir_setup_uniforms(brw_shader &s)
-{
-   const intel_device_info *devinfo = s.devinfo;
-
-   /* Only the first compile gets to set up uniforms. */
-   if (s.uniforms)
-      return;
-
-   s.uniforms = s.nir->num_uniforms / 4;
-
-   if (mesa_shader_stage_is_compute(s.stage) && devinfo->verx10 < 125) {
-      /* Add uniforms for builtins after regular NIR uniforms. */
-      assert(s.uniforms == s.prog_data->nr_params);
-
-      /* Subgroup ID must be the last uniform on the list.  This will make
-       * easier later to split between cross thread and per thread
-       * uniforms.
-       */
-      uint32_t *param = brw_stage_prog_data_add_params(s.prog_data, 1);
-      *param = BRW_PARAM_BUILTIN_SUBGROUP_ID;
-      s.uniforms++;
-   }
-}
-
 static brw_reg
 emit_work_group_id_setup(nir_to_brw_state &ntb)
 {
@@ -8066,7 +8041,6 @@ brw_from_nir(brw_shader *s)
     * be converted to reads/writes of these arrays
     */
    brw_from_nir_setup_outputs(ntb);
-   brw_from_nir_setup_uniforms(ntb.s);
    brw_from_nir_emit_system_values(ntb);
    ntb.s.last_scratch = ALIGN(ntb.nir->scratch_size, 4) * ntb.s.dispatch_width;
 
diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp
index ad398d68ff3..5bbb4b10bd1 100644
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -430,7 +430,7 @@ brw_shader::brw_shader(const brw_shader_params *params)
    this->source_depth_to_render_target = false;
    this->first_non_payload_grf = 0;
 
-   this->uniforms = 0;
+   this->uniforms = this->nir->num_uniforms / 4;
    this->last_scratch = 0;
 
    memset(&this->shader_stats, 0, sizeof(this->shader_stats));
@@ -517,15 +517,6 @@ brw_shader::limit_dispatch_width(unsigned n, const char *msg)
    }
 }
 
-/* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch.
- * This brings in those uniform definitions
- */
-void
-brw_shader::import_uniforms(brw_shader *v)
-{
-   this->uniforms = v->uniforms;
-}
-
 enum intel_barycentric_mode
 brw_barycentric_mode(const struct brw_wm_prog_key *key,
                      nir_intrinsic_instr *intr)
diff --git a/src/intel/compiler/brw_shader.h b/src/intel/compiler/brw_shader.h
index 9e33146aaa3..44aa044c12a 100644
--- a/src/intel/compiler/brw_shader.h
+++ b/src/intel/compiler/brw_shader.h
@@ -85,8 +85,6 @@ public:
    brw_shader(const brw_shader_params *params);
    ~brw_shader();
 
-   void import_uniforms(brw_shader *v);
-
    void assign_curb_setup();
    void convert_attr_sources_to_hw_regs(brw_inst *inst);
    void calculate_payload_ranges(bool allow_spilling,