ir3,tu: Refactor push consts info plumbing

In preparation for a new way to pass push consts into a shader, introduced in a7xx. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25086>
2026-05-05 13:58:04 +02:00 · 2023-09-05 11:26:16 +02:00 · 2023-09-05 11:26:16 +02:00 · 823b3bfeea
commit 823b3bfeea
parent d5d7631060
15 changed files with 89 additions and 84 deletions
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@ -72,7 +72,8 @@ is_shared_consts(struct ir3_compiler *compiler,
                 struct ir3_const_state *const_state,
                 struct ir3_register *reg)
 {
-   if (const_state->shared_consts_enable && reg->flags & IR3_REG_CONST) {
+   if (const_state->push_consts_type == IR3_PUSH_CONSTS_SHARED &&
+       reg->flags & IR3_REG_CONST) {
      uint32_t min_const_reg = regid(compiler->shared_consts_base_offset, 0);
      uint32_t max_const_reg =
         regid(compiler->shared_consts_base_offset +
@ -136,9 +137,9 @@ ir3_should_double_threadsize(struct ir3_shader_variant *v, unsigned regs_count)
   const struct ir3_compiler *compiler = v->compiler;

   /* If the user forced a particular wavesize respect that. */
-   if (v->real_wavesize == IR3_SINGLE_ONLY)
+   if (v->shader_options.real_wavesize == IR3_SINGLE_ONLY)
      return false;
-   if (v->real_wavesize == IR3_DOUBLE_ONLY)
+   if (v->shader_options.real_wavesize == IR3_DOUBLE_ONLY)
      return true;

   /* We can't support more than compiler->branchstack_size diverging threads
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@ -2087,7 +2087,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
          */
         ctx->so->constlen =
            MAX2(ctx->so->constlen,
-                 ctx->so->num_reserved_user_consts +
+                 ctx->so->shader_options.num_reserved_user_consts +
                 const_state->ubo_state.size / 16);
      }
      break;
--- a/src/freedreno/ir3/ir3_disk_cache.c
+++ b/src/freedreno/ir3/ir3_disk_cache.c
@ -90,10 +90,10 @@ ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler,
   _mesa_sha1_update(&ctx, blob.data, blob.size);
   blob_finish(&blob);

-   _mesa_sha1_update(&ctx, &shader->api_wavesize,
-                     sizeof(shader->api_wavesize));
-   _mesa_sha1_update(&ctx, &shader->real_wavesize,
-                     sizeof(shader->real_wavesize));
+   _mesa_sha1_update(&ctx, &shader->options.api_wavesize,
+                     sizeof(shader->options.api_wavesize));
+   _mesa_sha1_update(&ctx, &shader->options.real_wavesize,
+                     sizeof(shader->options.real_wavesize));

   /* Note that on some gens stream-out is lowered in ir3 to stg.  For later
    * gens we maybe don't need to include stream-out in the cache key.
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@ -531,7 +531,7 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
       * the "real" subgroup size.
       */
      unsigned subgroup_size = 0, max_subgroup_size = 0;
-      switch (shader->api_wavesize) {
+      switch (shader->options.api_wavesize) {
      case IR3_SINGLE_ONLY:
         subgroup_size = max_subgroup_size = compiler->threadsize_base;
         break;
@ -981,7 +981,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
   const_state->num_ubos = nir->info.num_ubos;

   assert((const_state->ubo_state.size % 16) == 0);
-   unsigned constoff = v->num_reserved_user_consts +
+   unsigned constoff = v->shader_options.num_reserved_user_consts +
      const_state->ubo_state.size / 16 +
      const_state->preamble_size;
   unsigned ptrsz = ir3_pointer_size(compiler);
--- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
+++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
@ -450,7 +450,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
      uint32_t range_size = state->range[i].end - state->range[i].start;

      assert(offset <= max_upload);
-      state->range[i].offset = offset + v->num_reserved_user_consts * 16;
+      state->range[i].offset = offset + v->shader_options.num_reserved_user_consts * 16;
      assert(offset <= max_upload);
      offset += range_size;
   }
--- a/src/freedreno/ir3/ir3_nir_opt_preamble.c
+++ b/src/freedreno/ir3/ir3_nir_opt_preamble.c
@ -303,7 +303,7 @@ ir3_nir_lower_preamble(nir_shader *nir, struct ir3_shader_variant *v)

   /* First, lower load/store_preamble. */  
   const struct ir3_const_state *const_state = ir3_const_state(v);
-   unsigned preamble_base = v->num_reserved_user_consts * 4 +
+   unsigned preamble_base = v->shader_options.num_reserved_user_consts * 4 +
      const_state->ubo_state.size / 4;
   unsigned preamble_size = const_state->preamble_size * 4;

--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
@ -2577,7 +2577,7 @@ ir3_ra(struct ir3_shader_variant *v)
    * because on some gens the register file is not big enough to hold a
    * double-size wave with all 48 registers in use.
    */
-   if (v->real_wavesize == IR3_DOUBLE_ONLY) {
+   if (v->shader_options.real_wavesize == IR3_DOUBLE_ONLY) {
      limit_pressure.full =
         MAX2(limit_pressure.full, ctx->compiler->reg_size_vec4 / 2 * 16);
   }
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
@ -298,13 +298,11 @@ alloc_variant(struct ir3_shader *shader, const struct ir3_shader_key *key,

   v->num_ssbos = info->num_ssbos;
   v->num_ibos = info->num_ssbos + info->num_images;
-   v->num_reserved_user_consts = shader->num_reserved_user_consts;
-   v->api_wavesize = shader->api_wavesize;
-   v->real_wavesize = shader->real_wavesize;
+   v->shader_options = shader->options;

   if (!v->binning_pass) {
      v->const_state = rzalloc_size(v, sizeof(*v->const_state));
-      v->const_state->shared_consts_enable = shader->shared_consts_enable;
+      v->const_state->push_consts_type = shader->options.push_consts_type;
   }

   return v;
@ -589,7 +587,7 @@ ir3_trim_constlen(const struct ir3_shader_variant **variants,
      if (variants[i]) {
         constlens[i] = variants[i]->constlen;
         shared_consts_enable =
-            ir3_const_state(variants[i])->shared_consts_enable;
+            ir3_const_state(variants[i])->push_consts_type == IR3_PUSH_CONSTS_SHARED;
      }
   }

@ -641,10 +639,7 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,
   if (stream_output)
      memcpy(&shader->stream_output, stream_output,
             sizeof(shader->stream_output));
-   shader->num_reserved_user_consts = options->reserved_user_consts;
-   shader->api_wavesize = options->api_wavesize;
-   shader->real_wavesize = options->real_wavesize;
-   shader->shared_consts_enable = options->shared_consts_enable;
+   shader->options = *options;
   shader->nir = nir;

   ir3_disk_cache_init_shader_key(compiler, shader);
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@ -147,6 +147,12 @@ struct ir3_ubo_analysis_state {
   uint32_t size;
 };

+enum ir3_push_consts_type {
+   IR3_PUSH_CONSTS_NONE,
+   IR3_PUSH_CONSTS_PER_STAGE,
+   IR3_PUSH_CONSTS_SHARED,
+};
+
 /**
 * Describes the layout of shader consts in the const register file.
 *
@ -213,7 +219,7 @@ struct ir3_const_state {

   /* State of ubo access lowered to push consts: */
   struct ir3_ubo_analysis_state ubo_state;
-   bool shared_consts_enable;
+   enum ir3_push_consts_type push_consts_type;
 };

 /**
@ -489,6 +495,20 @@ struct ir3_disasm_info {
 /* Represents half register in regid */
 #define HALF_REG_ID 0x100

+struct ir3_shader_options {
+   unsigned num_reserved_user_consts;
+   /* What API-visible wavesizes are allowed. Even if only double wavesize is
+    * allowed, we may still use the smaller wavesize "under the hood" and the
+    * application simply sees the upper half as always disabled.
+    */
+   enum ir3_wavesize_option api_wavesize;
+   /* What wavesizes we're allowed to actually use. If the API wavesize is
+    * single-only, then this must be single-only too.
+    */
+   enum ir3_wavesize_option real_wavesize;
+   enum ir3_push_consts_type push_consts_type;
+};
+
 /**
 * Shader variant which contains the actual hw shader instructions,
 * and necessary info for shader state setup.
@ -554,6 +574,8 @@ struct ir3_shader_variant {

   struct ir3_info info;

+   struct ir3_shader_options shader_options;
+
   uint32_t constant_data_size;

   /* Levels of nesting of flow control:
@ -751,8 +773,6 @@ struct ir3_shader_variant {
   /* The total number of SSBOs and images, i.e. the number of hardware IBOs. */
   unsigned num_ibos;

-   unsigned num_reserved_user_consts;
-
   union {
      struct {
         enum tess_primitive_mode primitive_mode;
@ -790,8 +810,6 @@ struct ir3_shader_variant {
      } cs;
   };

-   enum ir3_wavesize_option api_wavesize, real_wavesize;
-
   /* For when we don't have a shader, variant's copy of streamout state */
   struct ir3_stream_output_info stream_output;
 };
@ -849,18 +867,7 @@ struct ir3_shader {

   struct ir3_compiler *compiler;

-   unsigned num_reserved_user_consts;
-
-   /* What API-visible wavesizes are allowed. Even if only double wavesize is
-    * allowed, we may still use the smaller wavesize "under the hood" and the
-    * application simply sees the upper half as always disabled.
-    */
-   enum ir3_wavesize_option api_wavesize;
-
-   /* What wavesizes we're allowed to actually use. If the API wavesize is
-    * single-only, then this must be single-only too.
-    */
-   enum ir3_wavesize_option real_wavesize;
+   struct ir3_shader_options options;

   bool nir_finalized;
   struct nir_shader *nir;
@ -893,8 +900,6 @@ struct ir3_shader {
    * recompiles for GL NOS that doesn't actually apply to the shader.
    */
   struct ir3_shader_key key_mask;
-
-   bool shared_consts_enable;
 };

 /**
@ -914,7 +919,8 @@ static inline unsigned
 _ir3_max_const(const struct ir3_shader_variant *v, bool safe_constlen)
 {
   const struct ir3_compiler *compiler = v->compiler;
-   bool shared_consts_enable = ir3_const_state(v)->shared_consts_enable;
+   bool shared_consts_enable =
+      ir3_const_state(v)->push_consts_type == IR3_PUSH_CONSTS_SHARED;

   /* Shared consts size for CS and FS matches with what's acutally used,
    * but the size of shared consts for geomtry stages doesn't.
@ -969,13 +975,6 @@ ir3_shader_get_variant(struct ir3_shader *shader,
                       const struct ir3_shader_key *key, bool binning_pass,
                       bool keep_ir, bool *created);

-
-struct ir3_shader_options {
-   unsigned reserved_user_consts;
-   enum ir3_wavesize_option api_wavesize, real_wavesize;
-   bool shared_consts_enable;
-};
-
 struct ir3_shader *
 ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,
                    const struct ir3_shader_options *options,
--- a/src/freedreno/vulkan/tu_clear_blit.cc
+++ b/src/freedreno/vulkan/tu_clear_blit.cc
@ -741,7 +741,7 @@ compile_shader(struct tu_device *dev, struct nir_shader *nir,
   ir3_finalize_nir(dev->compiler, nir);

   const struct ir3_shader_options options = {
-      .reserved_user_consts = align(consts, 4),
+      .num_reserved_user_consts = align(consts, 4),
      .api_wavesize = IR3_SINGLE_OR_DOUBLE,
      .real_wavesize = IR3_SINGLE_OR_DOUBLE,
   };
--- a/src/freedreno/vulkan/tu_cmd_buffer.cc
+++ b/src/freedreno/vulkan/tu_cmd_buffer.cc
@ -4274,9 +4274,10 @@ tu6_emit_user_consts(struct tu_cs *cs,
                     struct tu_descriptor_state *descriptors,
                     uint32_t *push_constants)
 {
-   if (const_state->push_consts.dwords > 0) {
+   if (const_state->push_consts.type == IR3_PUSH_CONSTS_PER_STAGE) {
      unsigned num_units = const_state->push_consts.dwords;
      unsigned offset = const_state->push_consts.lo;
+      assert(num_units > 0);

      /* DST_OFF and NUM_UNIT requires vec4 units */
      tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + num_units);
@ -4355,7 +4356,7 @@ tu6_const_size(struct tu_cmd_buffer *cmd,
 {
   uint32_t dwords = 0;

-   if (shared_consts->dwords > 0) {
+   if (shared_consts->type == IR3_PUSH_CONSTS_SHARED) {
      dwords += shared_consts->dwords + 4;
   }

@ -4376,7 +4377,7 @@ tu6_emit_consts(struct tu_cmd_buffer *cmd,
 {
   uint32_t dwords = 0;
   const struct tu_push_constant_range *shared_consts =
-      compute ? &cmd->state.shaders[MESA_SHADER_COMPUTE]->shared_consts :
+      compute ? &cmd->state.shaders[MESA_SHADER_COMPUTE]->const_state.push_consts :
      &cmd->state.program.shared_consts;

   dwords = tu6_const_size(cmd, shared_consts, compute);
@ -4387,14 +4388,8 @@ tu6_emit_consts(struct tu_cmd_buffer *cmd,
   struct tu_cs cs;
   tu_cs_begin_sub_stream(&cmd->sub_cs, dwords, &cs);

-   if (shared_consts->dwords > 0) {
+   if (shared_consts->type == IR3_PUSH_CONSTS_SHARED) {
      tu6_emit_shared_consts(&cs, shared_consts, cmd->push_constants, compute);
-
-      for (uint32_t i = 0; i < ARRAY_SIZE(cmd->state.program.link); i++) {
-         const struct tu_program_descriptor_linkage *link =
-            &cmd->state.program.link[i];
-         assert(!link->tu_const_state.push_consts.dwords);
-      }
   }

   if (compute) {
--- a/src/freedreno/vulkan/tu_pipeline.cc
+++ b/src/freedreno/vulkan/tu_pipeline.cc
@ -308,6 +308,20 @@ tu_blend_state_is_dual_src(const struct vk_color_blend_state *cb)
   return false;
 }

+enum ir3_push_consts_type
+tu_push_consts_type(const struct tu_pipeline_layout *layout,
+                    const struct ir3_compiler *compiler)
+{
+   if (!layout->push_constant_size)
+      return IR3_PUSH_CONSTS_NONE;
+
+   if (tu6_shared_constants_enable(layout, compiler)) {
+      return IR3_PUSH_CONSTS_SHARED;
+   } else {
+      return IR3_PUSH_CONSTS_PER_STAGE;
+   }
+}
+
 template <chip CHIP>
 struct xs_config {
   uint16_t reg_sp_xs_config;
@ -2321,9 +2335,10 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder,
                              &pipeline->shaders[i]->const_state,
                              variants[i]);

-      if (pipeline->shaders[i]->shared_consts.dwords != 0) {
+      if (pipeline->shaders[i]->const_state.push_consts.type ==
+          IR3_PUSH_CONSTS_SHARED) {
         pipeline->program.shared_consts =
-            pipeline->shaders[i]->shared_consts;
+            pipeline->shaders[i]->const_state.push_consts;
      }
   }

--- a/src/freedreno/vulkan/tu_pipeline.h
+++ b/src/freedreno/vulkan/tu_pipeline.h
@ -70,6 +70,10 @@ tu6_shared_constants_enable(const struct tu_pipeline_layout *layout,
          layout->push_constant_size <= (compiler->shared_consts_size * 16);
 }

+enum ir3_push_consts_type
+tu_push_consts_type(const struct tu_pipeline_layout *layout,
+                    const struct ir3_compiler *compiler);
+
 struct tu_program_descriptor_linkage
 {
   struct ir3_const_state const_state;
--- a/src/freedreno/vulkan/tu_shader.cc
+++ b/src/freedreno/vulkan/tu_shader.cc
@ -682,8 +682,7 @@ gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader)
   }

   if (min >= max) {
-      tu_shader->const_state.push_consts.lo = 0;
-      tu_shader->const_state.push_consts.dwords = 0;
+      tu_shader->const_state.push_consts = (struct tu_push_constant_range) {};
      return;
   }

@ -706,7 +705,7 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev,
            const struct tu_pipeline_layout *layout,
            unsigned *reserved_consts_vec4_out)
 {
-   if (!tu6_shared_constants_enable(layout, dev->compiler))
+   if (tu_shader->const_state.push_consts.type == IR3_PUSH_CONSTS_PER_STAGE)
      gather_push_constants(shader, tu_shader);

   struct tu_const_state *const_state = &tu_shader->const_state;
@ -1227,7 +1226,8 @@ tu6_emit_cs_config(struct tu_cs *cs,
                   const struct tu_pvtmem_config *pvtmem,
                   uint64_t binary_iova)
 {
-   bool shared_consts_enable = ir3_const_state(v)->shared_consts_enable;
+   bool shared_consts_enable =
+      ir3_const_state(v)->push_consts_type == IR3_PUSH_CONSTS_SHARED;
   tu6_emit_shared_consts_enable<CHIP>(cs, shared_consts_enable);

   tu_cs_emit_regs(cs, HLSQ_INVALIDATE_CMD(CHIP,
@ -2084,7 +2084,6 @@ tu_shader_serialize(struct vk_pipeline_cache_object *object,
      container_of(object, struct tu_shader, base);

   blob_write_bytes(blob, &shader->const_state, sizeof(shader->const_state));
-   blob_write_bytes(blob, &shader->shared_consts, sizeof(shader->shared_consts));
   blob_write_uint32(blob, shader->view_mask);
   blob_write_uint8(blob, shader->active_desc_sets);

@ -2126,7 +2125,6 @@ tu_shader_deserialize(struct vk_pipeline_cache *cache,
      return NULL;

   blob_copy_bytes(blob, &shader->const_state, sizeof(shader->const_state));
-   blob_copy_bytes(blob, &shader->shared_consts, sizeof(shader->shared_consts));
   shader->view_mask = blob_read_uint32(blob);
   shader->active_desc_sets = blob_read_uint8(blob);

@ -2270,6 +2268,12 @@ tu_shader_create(struct tu_device *dev,
         nir->info.stage == MESA_SHADER_GEOMETRY)
      tu_gather_xfb_info(nir, &so_info);

+   shader->const_state.push_consts = (struct tu_push_constant_range) {
+      .lo = 0,
+      .dwords = layout->push_constant_size / 4,
+      .type = tu_push_consts_type(layout, dev->compiler),
+   };
+
   unsigned reserved_consts_vec4 = 0;
   NIR_PASS_V(nir, tu_lower_io, dev, shader, layout, &reserved_consts_vec4);

@ -2277,20 +2281,11 @@ tu_shader_create(struct tu_device *dev,

   ir3_finalize_nir(dev->compiler, nir);

-   bool shared_consts_enable = tu6_shared_constants_enable(layout, dev->compiler);
-   if (shared_consts_enable) {
-      assert(!shader->const_state.push_consts.dwords);
-      shader->shared_consts = (struct tu_push_constant_range) {
-         .lo = 0,
-         .dwords = layout->push_constant_size / 4,
-      };
-   }
-
   const struct ir3_shader_options options = {
-      .reserved_user_consts = reserved_consts_vec4,
+      .num_reserved_user_consts = reserved_consts_vec4,
      .api_wavesize = key->api_wavesize,
      .real_wavesize = key->real_wavesize,
-      .shared_consts_enable = shared_consts_enable,
+      .push_consts_type = shader->const_state.push_consts.type,
   };

   struct ir3_shader *ir3_shader =
--- a/src/freedreno/vulkan/tu_shader.h
+++ b/src/freedreno/vulkan/tu_shader.h
@ -30,10 +30,16 @@ struct tu_inline_ubo
   unsigned size_vec4;
 };

+/* The meaning of the range depends on "type". If it's
+ * IR3_PUSH_CONSTS_PER_STAGE, then it's the range used by this shader. If
+ * it's IR3_PUSH_CONSTS_SHARED then it's the overall range as provided by
+ * the pipeline layout and must match between shaders where it's non-zero.
+ */
 struct tu_push_constant_range
 {
   uint32_t lo;
   uint32_t dwords;
+   enum ir3_push_consts_type type;
 };

 struct tu_const_state
@ -63,11 +69,6 @@ struct tu_shader
   uint32_t view_mask;
   uint8_t active_desc_sets;

-   /* This is the range of shared consts used by all shaders. It must be the
-    * same between shaders.
-    */
-   struct tu_push_constant_range shared_consts;
-
   union {
      struct {
         unsigned patch_type;