diff --git a/src/amd/common/nir/ac_nir.c b/src/amd/common/nir/ac_nir.c index d533f1d72bb..33c2515726c 100644 --- a/src/amd/common/nir/ac_nir.c +++ b/src/amd/common/nir/ac_nir.c @@ -824,12 +824,12 @@ ac_nir_repack_invocations_in_workgroup(nir_builder *b, nir_def **input_bool, nir_def *dont_care = nir_undef(b, 1, num_lds_dwords * 32); nir_def *packed_counts = NULL; - nir_if *if_use_lds = nir_push_if(b, nir_inverse_ballot(b, 1, nir_imm_intN_t(b, ballot, wave_size))); + nir_if *if_use_lds = nir_push_if(b, nir_inverse_ballot(b, nir_imm_intN_t(b, ballot, wave_size))); { nir_def *store_val = surviving_invocations_in_current_wave[0]; if (num_repacks == 2) { - nir_def *lane_id_0 = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 1, wave_size)); + nir_def *lane_id_0 = nir_inverse_ballot(b, nir_imm_intN_t(b, 1, wave_size)); nir_def *off = nir_bcsel(b, lane_id_0, nir_imm_int(b, 0), nir_imm_int(b, num_lds_dwords * 4)); lds_addr_base = nir_iadd_nuw(b, lds_addr_base, off); store_val = nir_bcsel(b, lane_id_0, store_val, surviving_invocations_in_current_wave[1]); diff --git a/src/amd/common/nir/ac_nir_lower_tess_io_to_mem.c b/src/amd/common/nir/ac_nir_lower_tess_io_to_mem.c index 5918cb50c46..240ae322224 100644 --- a/src/amd/common/nir/ac_nir_lower_tess_io_to_mem.c +++ b/src/amd/common/nir/ac_nir_lower_tess_io_to_mem.c @@ -943,7 +943,7 @@ hs_tess_level_group_vote(nir_builder *b, lower_tess_io_state *st, nir_if *thread0 = nir_push_if(&top_b, nir_iand(&top_b, nir_ieq_imm(&top_b, nir_load_subgroup_id(&top_b), 0), - nir_inverse_ballot(&top_b, 1, nir_imm_intN_t(&top_b, 0x1, st->wave_size)))); + nir_inverse_ballot(&top_b, nir_imm_intN_t(&top_b, 0x1, st->wave_size)))); { /* 0x3 is the initial bitmask (tf0 | tf1). Each subgroup will do atomic iand on it for the vote. */ nir_store_shared(&top_b, nir_imm_int(&top_b, 0x3), nir_imm_int(&top_b, 0), @@ -1070,7 +1070,7 @@ hs_tess_level_group_vote(nir_builder *b, lower_tess_io_state *st, const unsigned tcs_vertices_out = b->shader->info.tess.tcs_vertices_out; assert(tcs_vertices_out <= 32); nir_def *is_first_active_lane = - nir_inverse_ballot(b, 1, nir_imm_intN_t(b, BITFIELD_MASK(tcs_vertices_out), st->wave_size)); + nir_inverse_ballot(b, nir_imm_intN_t(b, BITFIELD_MASK(tcs_vertices_out), st->wave_size)); /* Only the first active invocation in each subgroup performs the AND reduction through LDS. */ nir_if *if_first_active_lane = nir_push_if(b, is_first_active_lane); @@ -1094,7 +1094,7 @@ hs_tess_level_group_vote(nir_builder *b, lower_tess_io_state *st, /* Read the result from LDS. Only 1 lane should load it to prevent LDS bank conflicts. */ nir_def *lds_result; - nir_if *if_lane0 = nir_push_if(b, nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 0x1, st->wave_size))); + nir_if *if_lane0 = nir_push_if(b, nir_inverse_ballot(b, nir_imm_intN_t(b, 0x1, st->wave_size))); if_lane0->control = nir_selection_control_divergent_always_taken; { lds_result = nir_load_shared(b, 1, 32, nir_imm_int(b, 0), .align_mul = 4); diff --git a/src/amd/vulkan/nir/radv_nir_lower_cooperative_matrix.c b/src/amd/vulkan/nir/radv_nir_lower_cooperative_matrix.c index 8625ea35333..5cd097287c7 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_cooperative_matrix.c +++ b/src/amd/vulkan/nir/radv_nir_lower_cooperative_matrix.c @@ -451,7 +451,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_ if (src->bit_size == 32) { if (params->wave_size == 64) { - nir_def *low_lanes = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, UINT32_MAX, 64)); + nir_def *low_lanes = nir_inverse_ballot(b, nir_imm_intN_t(b, UINT32_MAX, 64)); for (int i = 0; i < num_comps; i++) { nir_def *comp = components[i]; nir_def *half_swap = nir_rotate(b, comp, nir_imm_int(b, 32), .cluster_size = 64); @@ -463,7 +463,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_ memcpy(components, tmp, sizeof(components)); } - nir_def *low_lanes = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 0xffff0000ffffull, params->wave_size)); + nir_def *low_lanes = nir_inverse_ballot(b, nir_imm_intN_t(b, 0xffff0000ffffull, params->wave_size)); for (int i = 0; i < num_comps; i++) { unsigned swap16 = 0x1f | (0x10 << 10); nir_def *half_swap = nir_masked_swizzle_amd(b, components[i], .swizzle_mask = swap16, .fetch_inactive = 1); @@ -485,7 +485,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_ nir_def *high_sel = nir_imm_int(b, src->bit_size == 8 ? 0x01050004 : 0x01000504); if (params->wave_size == 64) { - nir_def *low_lanes = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, UINT32_MAX, 64)); + nir_def *low_lanes = nir_inverse_ballot(b, nir_imm_intN_t(b, UINT32_MAX, 64)); nir_def *first_perm = nir_bcsel(b, low_lanes, low_sel, high_sel); nir_def *second_perm = nir_ior_imm(b, first_perm, 0x02020202); for (int i = 0; i < num_comps; i++) { @@ -499,7 +499,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_ memcpy(components, tmp, sizeof(components)); } - nir_def *low_lanes = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 0xffff0000ffffull, params->wave_size)); + nir_def *low_lanes = nir_inverse_ballot(b, nir_imm_intN_t(b, 0xffff0000ffffull, params->wave_size)); nir_def *first_perm = nir_bcsel(b, low_lanes, low_sel, high_sel); nir_def *second_perm = nir_ior_imm(b, first_perm, 0x02020202); for (int i = 0; i < num_comps; i++) { @@ -526,7 +526,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_ if (src->bit_size == 32) { for (unsigned keep32 = 0; keep32 < ((params->wave_size == 64) ? 2 : 1); keep32++) { nir_def *ballot = nir_imm_intN_t(b, keep32 ? UINT32_MAX : 0xffff0000ffffull, params->wave_size); - nir_def *keep = nir_inverse_ballot(b, 1, ballot); + nir_def *keep = nir_inverse_ballot(b, ballot); num_comps /= 2; for (unsigned i = 0; i < num_comps; i++) { components[i] = nir_bcsel(b, keep, components[i * 2], components[i * 2 + 1]); @@ -545,7 +545,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_ for (unsigned keep32 = 0; keep32 < ((params->wave_size == 64) ? 2 : 1); keep32++) { nir_def *ballot = nir_imm_intN_t(b, keep32 ? UINT32_MAX : 0xffff0000ffffull, params->wave_size); - nir_def *keep = nir_inverse_ballot(b, 1, ballot); + nir_def *keep = nir_inverse_ballot(b, ballot); nir_def *perm = nir_bcsel(b, keep, low_sel, high_sel); num_comps /= 2; for (unsigned i = 0; i < num_comps; i++) { @@ -569,8 +569,8 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_ mask |= BITFIELD64_MASK(x_mask) << i; } - nir_def *even = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, mask, params->wave_size)); - nir_def *odd = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, mask << x_mask, params->wave_size)); + nir_def *even = nir_inverse_ballot(b, nir_imm_intN_t(b, mask, params->wave_size)); + nir_def *odd = nir_inverse_ballot(b, nir_imm_intN_t(b, mask << x_mask, params->wave_size)); for (unsigned i = 0; i < num_comps; i += 2 * x_mask) { for (unsigned j = 0; j < x_mask; j++) { @@ -594,7 +594,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_ if (params->gfx_level >= GFX12) { if (params->wave_size == 64) { - nir_def *cond = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 0xf0f0f0f00f0f0f0f, params->wave_size)); + nir_def *cond = nir_inverse_ballot(b, nir_imm_intN_t(b, 0xf0f0f0f00f0f0f0f, params->wave_size)); for (unsigned i = 0; i < num_comps; i++) { nir_def *comp = components[i]; nir_def *compx = nir_rotate(b, comp, nir_imm_int(b, 32)); @@ -603,7 +603,7 @@ convert_use(nir_builder *b, nir_def *src, enum glsl_cmat_use src_use, enum glsl_ } } - nir_def *cond = nir_inverse_ballot(b, 1, nir_imm_intN_t(b, 0xff0000ffff0000ff, params->wave_size)); + nir_def *cond = nir_inverse_ballot(b, nir_imm_intN_t(b, 0xff0000ffff0000ff, params->wave_size)); for (unsigned i = 0; i < num_comps; i++) { nir_def *comp = components[i]; nir_def *compx = nir_masked_swizzle_amd(b, comp, .swizzle_mask = 0x1f | (0x18 << 10), .fetch_inactive = 1); diff --git a/src/amd/vulkan/nir/radv_nir_opt_tid_function.c b/src/amd/vulkan/nir/radv_nir_opt_tid_function.c index 7e1417e4b90..68587f36613 100644 --- a/src/amd/vulkan/nir/radv_nir_opt_tid_function.c +++ b/src/amd/vulkan/nir/radv_nir_opt_tid_function.c @@ -529,7 +529,7 @@ opt_fotid_bool(nir_builder *b, nir_alu_instr *instr, const radv_nir_opt_tid_func } nir_def *ballot = nir_vec(b, ballot_comp, options->hw_ballot_num_comp); - nir_def *res = nir_inverse_ballot(b, 1, ballot); + nir_def *res = nir_inverse_ballot(b, ballot); res->parent_instr->pass_flags = 1; nir_def_replace(&instr->def, res); diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 583b14fb203..70f58dc83f5 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -546,7 +546,7 @@ intrinsic("read_getlast_ir3", src_comp=[0], dest_comp=0, bit_sizes=src0, flags=S intrinsic("elect", dest_comp=1, flags=SUBGROUP_FLAGS) intrinsic("first_invocation", dest_comp=1, bit_sizes=[32], flags=SUBGROUP_FLAGS) intrinsic("last_invocation", dest_comp=1, bit_sizes=[32], flags=SUBGROUP_FLAGS) -intrinsic("inverse_ballot", src_comp=[0], dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER]) +intrinsic("inverse_ballot", src_comp=[0], dest_comp=1, bit_sizes=[1], flags=[CAN_ELIMINATE, CAN_REORDER]) barrier("begin_invocation_interlock") barrier("end_invocation_interlock") diff --git a/src/compiler/nir/nir_lower_subgroups.c b/src/compiler/nir/nir_lower_subgroups.c index 90af472c4d1..f92e86305ee 100644 --- a/src/compiler/nir/nir_lower_subgroups.c +++ b/src/compiler/nir/nir_lower_subgroups.c @@ -545,7 +545,7 @@ lower_boolean_shuffle(nir_builder *b, nir_intrinsic_instr *intrin, nir_def *mask = nir_ishl(b, nir_imm_intN_t(b, 1, ballot->bit_size), index); return nir_ine_imm(b, nir_iand(b, ballot, mask), 0); } else { - return nir_inverse_ballot(b, 1, ballot); + return nir_inverse_ballot(b, ballot); } } @@ -689,7 +689,7 @@ lower_boolean_reduce(nir_builder *b, nir_intrinsic_instr *intrin, val = nir_inot(b, val); } - return nir_inverse_ballot(b, 1, val); + return nir_inverse_ballot(b, val); } static nir_def * @@ -1138,7 +1138,7 @@ lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options) nir_load_subgroup_invocation(b)); } else if (intrin->src[0].ssa->num_components != options->ballot_components || intrin->src[0].ssa->bit_size != options->ballot_bit_size) { - return nir_inverse_ballot(b, 1, ballot_type_to_uint(b, intrin->src[0].ssa, options)); + return nir_inverse_ballot(b, ballot_type_to_uint(b, intrin->src[0].ssa, options)); } break; diff --git a/src/compiler/spirv/vtn_subgroup.c b/src/compiler/spirv/vtn_subgroup.c index e90df1c714e..a1e27721934 100644 --- a/src/compiler/spirv/vtn_subgroup.c +++ b/src/compiler/spirv/vtn_subgroup.c @@ -103,7 +103,7 @@ vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode, } case SpvOpGroupNonUniformInverseBallot: { - nir_def *dest = nir_inverse_ballot(&b->nb, 1, vtn_get_nir_ssa(b, w[4])); + nir_def *dest = nir_inverse_ballot(&b->nb, vtn_get_nir_ssa(b, w[4])); vtn_push_nir_ssa(b, w[2], dest); break; }