nir: Split lower_vote_eq into int/float versions

Recent nvidia hardware has a native instruction for
nir_intrinsic_vote_ieq but not for nir_intrinsic_vote_feq. So, split
this boolean into two so we can contol the lowering separately for each
instruction.

Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35778>
This commit is contained in:
Mel Henning 2025-06-26 16:29:56 -04:00 committed by Marge Bot
parent 00fe8e45a0
commit 10acb44c64
7 changed files with 17 additions and 7 deletions

View file

@ -588,7 +588,8 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
.lower_relative_shuffle = 1,
.lower_rotate_to_shuffle = use_llvm,
.lower_shuffle_to_32bit = 1,
.lower_vote_eq = 1,
.lower_vote_feq = 1,
.lower_vote_ieq = 1,
.lower_vote_bool_eq = 1,
.lower_quad_broadcast_dynamic = 1,
.lower_quad_broadcast_dynamic_to_const = gfx7minus,

View file

@ -5356,7 +5356,8 @@ typedef struct nir_lower_subgroups_options {
uint8_t ballot_components;
bool lower_to_scalar : 1;
bool lower_vote_trivial : 1;
bool lower_vote_eq : 1;
bool lower_vote_feq : 1;
bool lower_vote_ieq : 1;
bool lower_vote_bool_eq : 1;
bool lower_first_invocation_to_ballot : 1;
bool lower_read_first_invocation : 1;

View file

@ -1035,7 +1035,11 @@ lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options)
if (options->lower_vote_bool_eq)
return lower_vote_eq(b, intrin);
} else {
if (options->lower_vote_eq)
if (intrin->intrinsic == nir_intrinsic_vote_feq &&
options->lower_vote_feq)
return lower_vote_eq(b, intrin);
if (intrin->intrinsic == nir_intrinsic_vote_ieq &&
options->lower_vote_ieq)
return lower_vote_eq(b, intrin);
}

View file

@ -888,7 +888,8 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
.ballot_bit_size = 32,
.ballot_components = max_subgroup_size / 32,
.lower_to_scalar = true,
.lower_vote_eq = true,
.lower_vote_feq = true,
.lower_vote_ieq = true,
.lower_vote_bool_eq = true,
.lower_subgroup_masks = true,
.lower_read_invocation_to_cond = true,

View file

@ -1646,7 +1646,8 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx *
.lower_relative_shuffle = true,
.lower_rotate_to_shuffle = !nir->info.use_aco_amd,
.lower_shuffle_to_32bit = true,
.lower_vote_eq = true,
.lower_vote_feq = true,
.lower_vote_ieq = true,
.lower_vote_bool_eq = true,
.lower_quad_broadcast_dynamic = true,
.lower_quad_broadcast_dynamic_to_const = sel->screen->info.gfx_level <= GFX7,

View file

@ -948,7 +948,8 @@ nak_postprocess_nir(nir_shader *nir,
.ballot_bit_size = 32,
.ballot_components = 1,
.lower_to_scalar = true,
.lower_vote_eq = true,
.lower_vote_feq = true,
.lower_vote_ieq = true,
.lower_first_invocation_to_ballot = true,
.lower_read_first_invocation = true,
.lower_elect = true,

View file

@ -5929,7 +5929,8 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
.ballot_bit_size = 32,
.ballot_components = 1,
.lower_to_scalar = true,
.lower_vote_eq = true,
.lower_vote_feq = true,
.lower_vote_ieq = true,
.lower_vote_bool_eq = true,
.lower_first_invocation_to_ballot = true,
.lower_read_first_invocation = true,