nir: make inverse_ballot 1bit only

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37178>
This commit is contained in:
Georg Lehmann 2025-09-03 13:34:07 +02:00 committed by Marge Bot
parent 6c0017be38
commit ef8c364d3d
7 changed files with 21 additions and 21 deletions

View file

@ -824,12 +824,12 @@ ac_nir_repack_invocations_in_workgroup(nir_builder *b, nir_def **input_bool,
nir_def *dont_care = nir_undef(b, 1, num_lds_dwords * 32);
nir_def *packed_counts = NULL;
nir_if *if_use_lds = nir_push_if(b, nir_inverse_ballot(b, 1, nir_imm_intN_t(b, ballot, wave_size)));
nir_if *if_use_lds = nir_push_if(b, nir_inverse_ballot(b, nir_imm_intN_t(b, ballot, wave_size)));
{
nir_def *store_val = surviving_invocations_in_current_wave[0];
if (num_repacks == 2) {
nir_def *lane_id_0 = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 1, wave_size));
nir_def *lane_id_0 = nir_inverse_ballot(b, nir_imm_intN_t(b, 1, wave_size));
nir_def *off = nir_bcsel(b, lane_id_0, nir_imm_int(b, 0), nir_imm_int(b, num_lds_dwords * 4));
lds_addr_base = nir_iadd_nuw(b, lds_addr_base, off);
store_val = nir_bcsel(b, lane_id_0, store_val, surviving_invocations_in_current_wave[1]);

View file

@ -943,7 +943,7 @@ hs_tess_level_group_vote(nir_builder *b, lower_tess_io_state *st,
nir_if *thread0 = nir_push_if(&top_b,
nir_iand(&top_b, nir_ieq_imm(&top_b, nir_load_subgroup_id(&top_b), 0),
nir_inverse_ballot(&top_b, 1, nir_imm_intN_t(&top_b, 0x1, st->wave_size))));
nir_inverse_ballot(&top_b, nir_imm_intN_t(&top_b, 0x1, st->wave_size))));
{
/* 0x3 is the initial bitmask (tf0 | tf1). Each subgroup will do atomic iand on it for the vote. */
nir_store_shared(&top_b, nir_imm_int(&top_b, 0x3), nir_imm_int(&top_b, 0),
@ -1070,7 +1070,7 @@ hs_tess_level_group_vote(nir_builder *b, lower_tess_io_state *st,
const unsigned tcs_vertices_out = b->shader->info.tess.tcs_vertices_out;
assert(tcs_vertices_out <= 32);
nir_def *is_first_active_lane =
nir_inverse_ballot(b, 1, nir_imm_intN_t(b, BITFIELD_MASK(tcs_vertices_out), st->wave_size));
nir_inverse_ballot(b, nir_imm_intN_t(b, BITFIELD_MASK(tcs_vertices_out), st->wave_size));
/* Only the first active invocation in each subgroup performs the AND reduction through LDS. */
nir_if *if_first_active_lane = nir_push_if(b, is_first_active_lane);
@ -1094,7 +1094,7 @@ hs_tess_level_group_vote(nir_builder *b, lower_tess_io_state *st,
/* Read the result from LDS. Only 1 lane should load it to prevent LDS bank conflicts. */
nir_def *lds_result;
nir_if *if_lane0 = nir_push_if(b, nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 0x1, st->wave_size)));
nir_if *if_lane0 = nir_push_if(b, nir_inverse_ballot(b, nir_imm_intN_t(b, 0x1, st->wave_size)));
if_lane0->control = nir_selection_control_divergent_always_taken;
{
lds_result = nir_load_shared(b, 1, 32, nir_imm_int(b, 0), .align_mul = 4);

View file

@ -451,7 +451,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_
if (src->bit_size == 32) {
if (params->wave_size == 64) {
nir_def *low_lanes = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, UINT32_MAX, 64));
nir_def *low_lanes = nir_inverse_ballot(b, nir_imm_intN_t(b, UINT32_MAX, 64));
for (int i = 0; i < num_comps; i++) {
nir_def *comp = components[i];
nir_def *half_swap = nir_rotate(b, comp, nir_imm_int(b, 32), .cluster_size = 64);
@ -463,7 +463,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_
memcpy(components, tmp, sizeof(components));
}
nir_def *low_lanes = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 0xffff0000ffffull, params->wave_size));
nir_def *low_lanes = nir_inverse_ballot(b, nir_imm_intN_t(b, 0xffff0000ffffull, params->wave_size));
for (int i = 0; i < num_comps; i++) {
unsigned swap16 = 0x1f | (0x10 << 10);
nir_def *half_swap = nir_masked_swizzle_amd(b, components[i], .swizzle_mask = swap16, .fetch_inactive = 1);
@ -485,7 +485,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_
nir_def *high_sel = nir_imm_int(b, src->bit_size == 8 ? 0x01050004 : 0x01000504);
if (params->wave_size == 64) {
nir_def *low_lanes = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, UINT32_MAX, 64));
nir_def *low_lanes = nir_inverse_ballot(b, nir_imm_intN_t(b, UINT32_MAX, 64));
nir_def *first_perm = nir_bcsel(b, low_lanes, low_sel, high_sel);
nir_def *second_perm = nir_ior_imm(b, first_perm, 0x02020202);
for (int i = 0; i < num_comps; i++) {
@ -499,7 +499,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_
memcpy(components, tmp, sizeof(components));
}
nir_def *low_lanes = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 0xffff0000ffffull, params->wave_size));
nir_def *low_lanes = nir_inverse_ballot(b, nir_imm_intN_t(b, 0xffff0000ffffull, params->wave_size));
nir_def *first_perm = nir_bcsel(b, low_lanes, low_sel, high_sel);
nir_def *second_perm = nir_ior_imm(b, first_perm, 0x02020202);
for (int i = 0; i < num_comps; i++) {
@ -526,7 +526,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_
if (src->bit_size == 32) {
for (unsigned keep32 = 0; keep32 < ((params->wave_size == 64) ? 2 : 1); keep32++) {
nir_def *ballot = nir_imm_intN_t(b, keep32 ? UINT32_MAX : 0xffff0000ffffull, params->wave_size);
nir_def *keep = nir_inverse_ballot(b, 1, ballot);
nir_def *keep = nir_inverse_ballot(b, ballot);
num_comps /= 2;
for (unsigned i = 0; i < num_comps; i++) {
components[i] = nir_bcsel(b, keep, components[i * 2], components[i * 2 + 1]);
@ -545,7 +545,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_
for (unsigned keep32 = 0; keep32 < ((params->wave_size == 64) ? 2 : 1); keep32++) {
nir_def *ballot = nir_imm_intN_t(b, keep32 ? UINT32_MAX : 0xffff0000ffffull, params->wave_size);
nir_def *keep = nir_inverse_ballot(b, 1, ballot);
nir_def *keep = nir_inverse_ballot(b, ballot);
nir_def *perm = nir_bcsel(b, keep, low_sel, high_sel);
num_comps /= 2;
for (unsigned i = 0; i < num_comps; i++) {
@ -569,8 +569,8 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_
mask |= BITFIELD64_MASK(x_mask) << i;
}
nir_def *even = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, mask, params->wave_size));
nir_def *odd = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, mask << x_mask, params->wave_size));
nir_def *even = nir_inverse_ballot(b, nir_imm_intN_t(b, mask, params->wave_size));
nir_def *odd = nir_inverse_ballot(b, nir_imm_intN_t(b, mask << x_mask, params->wave_size));
for (unsigned i = 0; i < num_comps; i += 2 * x_mask) {
for (unsigned j = 0; j < x_mask; j++) {
@ -594,7 +594,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_
if (params->gfx_level >= GFX12) {
if (params->wave_size == 64) {
nir_def *cond = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 0xf0f0f0f00f0f0f0f, params->wave_size));
nir_def *cond = nir_inverse_ballot(b, nir_imm_intN_t(b, 0xf0f0f0f00f0f0f0f, params->wave_size));
for (unsigned i = 0; i < num_comps; i++) {
nir_def *comp = components[i];
nir_def *compx = nir_rotate(b, comp, nir_imm_int(b, 32));
@ -603,7 +603,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_
}
}
nir_def *cond = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 0xff0000ffff0000ff, params->wave_size));
nir_def *cond = nir_inverse_ballot(b, nir_imm_intN_t(b, 0xff0000ffff0000ff, params->wave_size));
for (unsigned i = 0; i < num_comps; i++) {
nir_def *comp = components[i];
nir_def *compx = nir_masked_swizzle_amd(b, comp, .swizzle_mask = 0x1f | (0x18 << 10), .fetch_inactive = 1);

View file

@ -529,7 +529,7 @@ opt_fotid_bool(nir_builder *b, nir_alu_instr *instr, const radv_nir_opt_tid_func
}
nir_def *ballot = nir_vec(b, ballot_comp, options->hw_ballot_num_comp);
nir_def *res = nir_inverse_ballot(b, 1, ballot);
nir_def *res = nir_inverse_ballot(b, ballot);
res->parent_instr->pass_flags = 1;
nir_def_replace(&instr->def, res);

View file

@ -546,7 +546,7 @@ intrinsic("read_getlast_ir3", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=S
intrinsic("elect", dest_comp=1, flags=SUBGROUP_FLAGS)
intrinsic("first_invocation", dest_comp=1, bit_sizes=[32], flags=SUBGROUP_FLAGS)
intrinsic("last_invocation", dest_comp=1, bit_sizes=[32], flags=SUBGROUP_FLAGS)
intrinsic("inverse_ballot", src_comp=[0], dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
intrinsic("inverse_ballot", src_comp=[0], dest_comp=1, bit_sizes=[1], flags=[CAN_ELIMINATE, CAN_REORDER])
barrier("begin_invocation_interlock")
barrier("end_invocation_interlock")

View file

@ -545,7 +545,7 @@ lower_boolean_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
nir_def *mask = nir_ishl(b, nir_imm_intN_t(b, 1, ballot->bit_size), index);
return nir_ine_imm(b, nir_iand(b, ballot, mask), 0);
} else {
return nir_inverse_ballot(b, 1, ballot);
return nir_inverse_ballot(b, ballot);
}
}
@ -689,7 +689,7 @@ lower_boolean_reduce(nir_builder *b, nir_intrinsic_instr *intrin,
val = nir_inot(b, val);
}
return nir_inverse_ballot(b, 1, val);
return nir_inverse_ballot(b, val);
}
static nir_def *
@ -1138,7 +1138,7 @@ lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options)
nir_load_subgroup_invocation(b));
} else if (intrin->src[0].ssa->num_components != options->ballot_components ||
intrin->src[0].ssa->bit_size != options->ballot_bit_size) {
return nir_inverse_ballot(b, 1, ballot_type_to_uint(b, intrin->src[0].ssa, options));
return nir_inverse_ballot(b, ballot_type_to_uint(b, intrin->src[0].ssa, options));
}
break;

View file

@ -103,7 +103,7 @@ vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,
}
case SpvOpGroupNonUniformInverseBallot: {
nir_def *dest = nir_inverse_ballot(&b->nb, 1, vtn_get_nir_ssa(b, w[4]));
nir_def *dest = nir_inverse_ballot(&b->nb, vtn_get_nir_ssa(b, w[4]));
vtn_push_nir_ssa(b, w[2], dest);
break;
}