ac/nir: create lowered inverse_ballot

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Fixes: b49eab68a8 ("ac/nir: use s_sendmsg(HS_TESSFACTOR) to optimize writing tess factors for gfx11") Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Marek Olšák <maraeo@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35489>
2026-05-06 02:58:05 +02:00 · 2025-06-12 10:07:34 +01:00 · 2025-06-12 10:07:34 +01:00 · ac2e36b377
commit ac2e36b377
parent 29787911e7
1 changed files with 5 additions and 3 deletions
--- a/src/amd/common/nir/ac_nir_lower_tess_io_to_mem.c
+++ b/src/amd/common/nir/ac_nir_lower_tess_io_to_mem.c
@ -111,6 +111,7 @@
 typedef struct {
   /* Which hardware generation we're dealing with */
   enum amd_gfx_level gfx_level;
+   unsigned wave_size;
   nir_tcs_info tcs_info;
   ac_nir_tess_io_info io_info;

@ -942,7 +943,7 @@ hs_tess_level_group_vote(nir_builder *b, lower_tess_io_state *st,

   nir_if *thread0 = nir_push_if(&top_b,
                                 nir_iand(&top_b, nir_ieq_imm(&top_b, nir_load_subgroup_id(&top_b), 0),
-                                          nir_inverse_ballot(&top_b, 1, nir_imm_ivec4(&top_b, 0x1, 0, 0, 0))));
+                                          nir_inverse_ballot(&top_b, 1, nir_imm_intN_t(&top_b, 0x1, st->wave_size))));
   {
      /* 0x3 is the initial bitmask (tf0 | tf1). Each subgroup will do atomic iand on it for the vote. */
      nir_store_shared(&top_b, nir_imm_int(&top_b, 0x3), nir_imm_int(&top_b, 0),
@ -1069,7 +1070,7 @@ hs_tess_level_group_vote(nir_builder *b, lower_tess_io_state *st,
      const unsigned tcs_vertices_out = b->shader->info.tess.tcs_vertices_out;
      assert(tcs_vertices_out <= 32);
      nir_def *is_first_active_lane =
-         nir_inverse_ballot(b, 1, nir_imm_ivec4(b, BITFIELD_MASK(tcs_vertices_out), 0, 0, 0));
+         nir_inverse_ballot(b, 1, nir_imm_intN_t(b, BITFIELD_MASK(tcs_vertices_out), st->wave_size));

      /* Only the first active invocation in each subgroup performs the AND reduction through LDS. */
      nir_if *if_first_active_lane = nir_push_if(b, is_first_active_lane);
@ -1093,7 +1094,7 @@ hs_tess_level_group_vote(nir_builder *b, lower_tess_io_state *st,

   /* Read the result from LDS. Only 1 lane should load it to prevent LDS bank conflicts. */
   nir_def *lds_result;
-   nir_if *if_lane0 = nir_push_if(b, nir_inverse_ballot(b, 1, nir_imm_ivec4(b, 0x1, 0, 0, 0)));
+   nir_if *if_lane0 = nir_push_if(b, nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 0x1, st->wave_size)));
   if_lane0->control = nir_selection_control_divergent_always_taken;
   {
      lds_result = nir_load_shared(b, 1, 32, nir_imm_int(b, 0), .align_mul = 4);
@ -1601,6 +1602,7 @@ ac_nir_lower_hs_outputs_to_mem(nir_shader *shader, const nir_tcs_info *info,

   lower_tess_io_state state = {
      .gfx_level = gfx_level,
+      .wave_size = wave_size,
      .tcs_info = *info,
      .io_info = *io_info,
      .tcs_out_patch_fits_subgroup = wave_size % shader->info.tess.tcs_vertices_out == 0,