ac/nir: create lowered inverse_ballot

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Fixes: b49eab68a8 ("ac/nir: use s_sendmsg(HS_TESSFACTOR) to optimize writing tess factors for gfx11")
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Marek Olšák <maraeo@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35489>
This commit is contained in:
Rhys Perry 2025-06-12 10:07:34 +01:00 committed by Marge Bot
parent 29787911e7
commit ac2e36b377

View file

@ -111,6 +111,7 @@
typedef struct {
/* Which hardware generation we're dealing with */
enum amd_gfx_level gfx_level;
unsigned wave_size;
nir_tcs_info tcs_info;
ac_nir_tess_io_info io_info;
@ -942,7 +943,7 @@ hs_tess_level_group_vote(nir_builder *b, lower_tess_io_state *st,
nir_if *thread0 = nir_push_if(&top_b,
nir_iand(&top_b, nir_ieq_imm(&top_b, nir_load_subgroup_id(&top_b), 0),
nir_inverse_ballot(&top_b, 1, nir_imm_ivec4(&top_b, 0x1, 0, 0, 0))));
nir_inverse_ballot(&top_b, 1, nir_imm_intN_t(&top_b, 0x1, st->wave_size))));
{
/* 0x3 is the initial bitmask (tf0 | tf1). Each subgroup will do atomic iand on it for the vote. */
nir_store_shared(&top_b, nir_imm_int(&top_b, 0x3), nir_imm_int(&top_b, 0),
@ -1069,7 +1070,7 @@ hs_tess_level_group_vote(nir_builder *b, lower_tess_io_state *st,
const unsigned tcs_vertices_out = b->shader->info.tess.tcs_vertices_out;
assert(tcs_vertices_out <= 32);
nir_def *is_first_active_lane =
nir_inverse_ballot(b, 1, nir_imm_ivec4(b, BITFIELD_MASK(tcs_vertices_out), 0, 0, 0));
nir_inverse_ballot(b, 1, nir_imm_intN_t(b, BITFIELD_MASK(tcs_vertices_out), st->wave_size));
/* Only the first active invocation in each subgroup performs the AND reduction through LDS. */
nir_if *if_first_active_lane = nir_push_if(b, is_first_active_lane);
@ -1093,7 +1094,7 @@ hs_tess_level_group_vote(nir_builder *b, lower_tess_io_state *st,
/* Read the result from LDS. Only 1 lane should load it to prevent LDS bank conflicts. */
nir_def *lds_result;
nir_if *if_lane0 = nir_push_if(b, nir_inverse_ballot(b, 1, nir_imm_ivec4(b, 0x1, 0, 0, 0)));
nir_if *if_lane0 = nir_push_if(b, nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 0x1, st->wave_size)));
if_lane0->control = nir_selection_control_divergent_always_taken;
{
lds_result = nir_load_shared(b, 1, 32, nir_imm_int(b, 0), .align_mul = 4);
@ -1601,6 +1602,7 @@ ac_nir_lower_hs_outputs_to_mem(nir_shader *shader, const nir_tcs_info *info,
lower_tess_io_state state = {
.gfx_level = gfx_level,
.wave_size = wave_size,
.tcs_info = *info,
.io_info = *io_info,
.tcs_out_patch_fits_subgroup = wave_size % shader->info.tess.tcs_vertices_out == 0,