mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-29 18:50:10 +01:00
nir: make inverse_ballot 1bit only
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37178>
This commit is contained in:
parent
6c0017be38
commit
ef8c364d3d
7 changed files with 21 additions and 21 deletions
|
|
@ -824,12 +824,12 @@ ac_nir_repack_invocations_in_workgroup(nir_builder *b, nir_def **input_bool,
|
|||
nir_def *dont_care = nir_undef(b, 1, num_lds_dwords * 32);
|
||||
nir_def *packed_counts = NULL;
|
||||
|
||||
nir_if *if_use_lds = nir_push_if(b, nir_inverse_ballot(b, 1, nir_imm_intN_t(b, ballot, wave_size)));
|
||||
nir_if *if_use_lds = nir_push_if(b, nir_inverse_ballot(b, nir_imm_intN_t(b, ballot, wave_size)));
|
||||
{
|
||||
nir_def *store_val = surviving_invocations_in_current_wave[0];
|
||||
|
||||
if (num_repacks == 2) {
|
||||
nir_def *lane_id_0 = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 1, wave_size));
|
||||
nir_def *lane_id_0 = nir_inverse_ballot(b, nir_imm_intN_t(b, 1, wave_size));
|
||||
nir_def *off = nir_bcsel(b, lane_id_0, nir_imm_int(b, 0), nir_imm_int(b, num_lds_dwords * 4));
|
||||
lds_addr_base = nir_iadd_nuw(b, lds_addr_base, off);
|
||||
store_val = nir_bcsel(b, lane_id_0, store_val, surviving_invocations_in_current_wave[1]);
|
||||
|
|
|
|||
|
|
@ -943,7 +943,7 @@ hs_tess_level_group_vote(nir_builder *b, lower_tess_io_state *st,
|
|||
|
||||
nir_if *thread0 = nir_push_if(&top_b,
|
||||
nir_iand(&top_b, nir_ieq_imm(&top_b, nir_load_subgroup_id(&top_b), 0),
|
||||
nir_inverse_ballot(&top_b, 1, nir_imm_intN_t(&top_b, 0x1, st->wave_size))));
|
||||
nir_inverse_ballot(&top_b, nir_imm_intN_t(&top_b, 0x1, st->wave_size))));
|
||||
{
|
||||
/* 0x3 is the initial bitmask (tf0 | tf1). Each subgroup will do atomic iand on it for the vote. */
|
||||
nir_store_shared(&top_b, nir_imm_int(&top_b, 0x3), nir_imm_int(&top_b, 0),
|
||||
|
|
@ -1070,7 +1070,7 @@ hs_tess_level_group_vote(nir_builder *b, lower_tess_io_state *st,
|
|||
const unsigned tcs_vertices_out = b->shader->info.tess.tcs_vertices_out;
|
||||
assert(tcs_vertices_out <= 32);
|
||||
nir_def *is_first_active_lane =
|
||||
nir_inverse_ballot(b, 1, nir_imm_intN_t(b, BITFIELD_MASK(tcs_vertices_out), st->wave_size));
|
||||
nir_inverse_ballot(b, nir_imm_intN_t(b, BITFIELD_MASK(tcs_vertices_out), st->wave_size));
|
||||
|
||||
/* Only the first active invocation in each subgroup performs the AND reduction through LDS. */
|
||||
nir_if *if_first_active_lane = nir_push_if(b, is_first_active_lane);
|
||||
|
|
@ -1094,7 +1094,7 @@ hs_tess_level_group_vote(nir_builder *b, lower_tess_io_state *st,
|
|||
|
||||
/* Read the result from LDS. Only 1 lane should load it to prevent LDS bank conflicts. */
|
||||
nir_def *lds_result;
|
||||
nir_if *if_lane0 = nir_push_if(b, nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 0x1, st->wave_size)));
|
||||
nir_if *if_lane0 = nir_push_if(b, nir_inverse_ballot(b, nir_imm_intN_t(b, 0x1, st->wave_size)));
|
||||
if_lane0->control = nir_selection_control_divergent_always_taken;
|
||||
{
|
||||
lds_result = nir_load_shared(b, 1, 32, nir_imm_int(b, 0), .align_mul = 4);
|
||||
|
|
|
|||
|
|
@ -451,7 +451,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_
|
|||
|
||||
if (src->bit_size == 32) {
|
||||
if (params->wave_size == 64) {
|
||||
nir_def *low_lanes = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, UINT32_MAX, 64));
|
||||
nir_def *low_lanes = nir_inverse_ballot(b, nir_imm_intN_t(b, UINT32_MAX, 64));
|
||||
for (int i = 0; i < num_comps; i++) {
|
||||
nir_def *comp = components[i];
|
||||
nir_def *half_swap = nir_rotate(b, comp, nir_imm_int(b, 32), .cluster_size = 64);
|
||||
|
|
@ -463,7 +463,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_
|
|||
memcpy(components, tmp, sizeof(components));
|
||||
}
|
||||
|
||||
nir_def *low_lanes = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 0xffff0000ffffull, params->wave_size));
|
||||
nir_def *low_lanes = nir_inverse_ballot(b, nir_imm_intN_t(b, 0xffff0000ffffull, params->wave_size));
|
||||
for (int i = 0; i < num_comps; i++) {
|
||||
unsigned swap16 = 0x1f | (0x10 << 10);
|
||||
nir_def *half_swap = nir_masked_swizzle_amd(b, components[i], .swizzle_mask = swap16, .fetch_inactive = 1);
|
||||
|
|
@ -485,7 +485,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_
|
|||
nir_def *high_sel = nir_imm_int(b, src->bit_size == 8 ? 0x01050004 : 0x01000504);
|
||||
|
||||
if (params->wave_size == 64) {
|
||||
nir_def *low_lanes = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, UINT32_MAX, 64));
|
||||
nir_def *low_lanes = nir_inverse_ballot(b, nir_imm_intN_t(b, UINT32_MAX, 64));
|
||||
nir_def *first_perm = nir_bcsel(b, low_lanes, low_sel, high_sel);
|
||||
nir_def *second_perm = nir_ior_imm(b, first_perm, 0x02020202);
|
||||
for (int i = 0; i < num_comps; i++) {
|
||||
|
|
@ -499,7 +499,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_
|
|||
memcpy(components, tmp, sizeof(components));
|
||||
}
|
||||
|
||||
nir_def *low_lanes = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 0xffff0000ffffull, params->wave_size));
|
||||
nir_def *low_lanes = nir_inverse_ballot(b, nir_imm_intN_t(b, 0xffff0000ffffull, params->wave_size));
|
||||
nir_def *first_perm = nir_bcsel(b, low_lanes, low_sel, high_sel);
|
||||
nir_def *second_perm = nir_ior_imm(b, first_perm, 0x02020202);
|
||||
for (int i = 0; i < num_comps; i++) {
|
||||
|
|
@ -526,7 +526,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_
|
|||
if (src->bit_size == 32) {
|
||||
for (unsigned keep32 = 0; keep32 < ((params->wave_size == 64) ? 2 : 1); keep32++) {
|
||||
nir_def *ballot = nir_imm_intN_t(b, keep32 ? UINT32_MAX : 0xffff0000ffffull, params->wave_size);
|
||||
nir_def *keep = nir_inverse_ballot(b, 1, ballot);
|
||||
nir_def *keep = nir_inverse_ballot(b, ballot);
|
||||
num_comps /= 2;
|
||||
for (unsigned i = 0; i < num_comps; i++) {
|
||||
components[i] = nir_bcsel(b, keep, components[i * 2], components[i * 2 + 1]);
|
||||
|
|
@ -545,7 +545,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_
|
|||
|
||||
for (unsigned keep32 = 0; keep32 < ((params->wave_size == 64) ? 2 : 1); keep32++) {
|
||||
nir_def *ballot = nir_imm_intN_t(b, keep32 ? UINT32_MAX : 0xffff0000ffffull, params->wave_size);
|
||||
nir_def *keep = nir_inverse_ballot(b, 1, ballot);
|
||||
nir_def *keep = nir_inverse_ballot(b, ballot);
|
||||
nir_def *perm = nir_bcsel(b, keep, low_sel, high_sel);
|
||||
num_comps /= 2;
|
||||
for (unsigned i = 0; i < num_comps; i++) {
|
||||
|
|
@ -569,8 +569,8 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_
|
|||
mask |= BITFIELD64_MASK(x_mask) << i;
|
||||
}
|
||||
|
||||
nir_def *even = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, mask, params->wave_size));
|
||||
nir_def *odd = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, mask << x_mask, params->wave_size));
|
||||
nir_def *even = nir_inverse_ballot(b, nir_imm_intN_t(b, mask, params->wave_size));
|
||||
nir_def *odd = nir_inverse_ballot(b, nir_imm_intN_t(b, mask << x_mask, params->wave_size));
|
||||
|
||||
for (unsigned i = 0; i < num_comps; i += 2 * x_mask) {
|
||||
for (unsigned j = 0; j < x_mask; j++) {
|
||||
|
|
@ -594,7 +594,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_
|
|||
|
||||
if (params->gfx_level >= GFX12) {
|
||||
if (params->wave_size == 64) {
|
||||
nir_def *cond = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 0xf0f0f0f00f0f0f0f, params->wave_size));
|
||||
nir_def *cond = nir_inverse_ballot(b, nir_imm_intN_t(b, 0xf0f0f0f00f0f0f0f, params->wave_size));
|
||||
for (unsigned i = 0; i < num_comps; i++) {
|
||||
nir_def *comp = components[i];
|
||||
nir_def *compx = nir_rotate(b, comp, nir_imm_int(b, 32));
|
||||
|
|
@ -603,7 +603,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_
|
|||
}
|
||||
}
|
||||
|
||||
nir_def *cond = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 0xff0000ffff0000ff, params->wave_size));
|
||||
nir_def *cond = nir_inverse_ballot(b, nir_imm_intN_t(b, 0xff0000ffff0000ff, params->wave_size));
|
||||
for (unsigned i = 0; i < num_comps; i++) {
|
||||
nir_def *comp = components[i];
|
||||
nir_def *compx = nir_masked_swizzle_amd(b, comp, .swizzle_mask = 0x1f | (0x18 << 10), .fetch_inactive = 1);
|
||||
|
|
|
|||
|
|
@ -529,7 +529,7 @@ opt_fotid_bool(nir_builder *b, nir_alu_instr *instr, const radv_nir_opt_tid_func
|
|||
}
|
||||
|
||||
nir_def *ballot = nir_vec(b, ballot_comp, options->hw_ballot_num_comp);
|
||||
nir_def *res = nir_inverse_ballot(b, 1, ballot);
|
||||
nir_def *res = nir_inverse_ballot(b, ballot);
|
||||
res->parent_instr->pass_flags = 1;
|
||||
|
||||
nir_def_replace(&instr->def, res);
|
||||
|
|
|
|||
|
|
@ -546,7 +546,7 @@ intrinsic("read_getlast_ir3", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=S
|
|||
intrinsic("elect", dest_comp=1, flags=SUBGROUP_FLAGS)
|
||||
intrinsic("first_invocation", dest_comp=1, bit_sizes=[32], flags=SUBGROUP_FLAGS)
|
||||
intrinsic("last_invocation", dest_comp=1, bit_sizes=[32], flags=SUBGROUP_FLAGS)
|
||||
intrinsic("inverse_ballot", src_comp=[0], dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
intrinsic("inverse_ballot", src_comp=[0], dest_comp=1, bit_sizes=[1], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
barrier("begin_invocation_interlock")
|
||||
barrier("end_invocation_interlock")
|
||||
|
|
|
|||
|
|
@ -545,7 +545,7 @@ lower_boolean_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||
nir_def *mask = nir_ishl(b, nir_imm_intN_t(b, 1, ballot->bit_size), index);
|
||||
return nir_ine_imm(b, nir_iand(b, ballot, mask), 0);
|
||||
} else {
|
||||
return nir_inverse_ballot(b, 1, ballot);
|
||||
return nir_inverse_ballot(b, ballot);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -689,7 +689,7 @@ lower_boolean_reduce(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||
val = nir_inot(b, val);
|
||||
}
|
||||
|
||||
return nir_inverse_ballot(b, 1, val);
|
||||
return nir_inverse_ballot(b, val);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
|
|
@ -1138,7 +1138,7 @@ lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options)
|
|||
nir_load_subgroup_invocation(b));
|
||||
} else if (intrin->src[0].ssa->num_components != options->ballot_components ||
|
||||
intrin->src[0].ssa->bit_size != options->ballot_bit_size) {
|
||||
return nir_inverse_ballot(b, 1, ballot_type_to_uint(b, intrin->src[0].ssa, options));
|
||||
return nir_inverse_ballot(b, ballot_type_to_uint(b, intrin->src[0].ssa, options));
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,
|
|||
}
|
||||
|
||||
case SpvOpGroupNonUniformInverseBallot: {
|
||||
nir_def *dest = nir_inverse_ballot(&b->nb, 1, vtn_get_nir_ssa(b, w[4]));
|
||||
nir_def *dest = nir_inverse_ballot(&b->nb, vtn_get_nir_ssa(b, w[4]));
|
||||
vtn_push_nir_ssa(b, w[2], dest);
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue