mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-02 05:48:07 +02:00
nir/spirv: Add inverse_ballot intrinsic
This is actually a no-op on AMD, so we really don't want to lower it to something more complicated. There may be a more efficient way to do this on Intel too. In addition, in the future we'll want to use this for lowering boolean reduce operations, where the inverse ballot will operate on the backend's "natural" ballot type as indicated by options->ballot_bit_size, instead of uvec4 as produced by SPIR-V. In total, there are now three possible lowerings we may have to perform: - inverse_ballot with source type of uvec4 from SPIR-V to inverse_ballot with natural source type, when the backend supports inverse_ballot natively. - inverse_ballot with source type of uvec4 from SPIR-V to arithmetic, when the backend doesn't support inverse_ballot. - inverse_ballot with natural source type from reduce operation, when the backend doesn't support inverse_ballot. Previously we just did the second lowering unconditionally in vtn, but it's just a combination of the first and third. We add support here for the first and third lowerings in nir_lower_subgroups, instead of simply moving the second lowering, to avoid unnecessary churn. Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25123>
This commit is contained in:
parent
0ef87f148d
commit
4282386311
14 changed files with 25 additions and 16 deletions
|
|
@ -638,6 +638,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
|
|||
.lower_quad_broadcast_dynamic_to_const = gfx7minus,
|
||||
.lower_shuffle_to_swizzle_amd = 1,
|
||||
.lower_ballot_bit_count_to_mbcnt_amd = 1,
|
||||
.lower_inverse_ballot = 1,
|
||||
});
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
|
||||
|
|
|
|||
|
|
@ -5369,6 +5369,7 @@ typedef struct nir_lower_subgroups_options {
|
|||
bool lower_read_invocation_to_cond : 1;
|
||||
bool lower_rotate_to_shuffle : 1;
|
||||
bool lower_ballot_bit_count_to_mbcnt_amd : 1;
|
||||
bool lower_inverse_ballot : 1;
|
||||
} nir_lower_subgroups_options;
|
||||
|
||||
bool nir_lower_subgroups(nir_shader *shader,
|
||||
|
|
|
|||
|
|
@ -478,6 +478,7 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
|
|||
break;
|
||||
|
||||
/* Intrinsics which are always divergent */
|
||||
case nir_intrinsic_inverse_ballot:
|
||||
case nir_intrinsic_load_color0:
|
||||
case nir_intrinsic_load_color1:
|
||||
case nir_intrinsic_load_param:
|
||||
|
|
|
|||
|
|
@ -446,9 +446,11 @@ intrinsic("read_invocation_cond_ir3", src_comp=[0, 1], dest_comp=0, flags=[CAN_E
|
|||
#
|
||||
# OpGroupNonUniformElect
|
||||
# OpSubgroupFirstInvocationKHR
|
||||
# OpGroupNonUniformInverseBallot
|
||||
intrinsic("elect", dest_comp=1, flags=[CAN_ELIMINATE])
|
||||
intrinsic("first_invocation", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
|
||||
intrinsic("last_invocation", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE])
|
||||
intrinsic("inverse_ballot", src_comp=[0], dest_comp=1, flags=[CAN_ELIMINATE])
|
||||
|
||||
barrier("begin_invocation_interlock")
|
||||
barrier("end_invocation_interlock")
|
||||
|
|
|
|||
|
|
@ -683,6 +683,16 @@ lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options)
|
|||
intrin->def.bit_size);
|
||||
}
|
||||
|
||||
case nir_intrinsic_inverse_ballot:
|
||||
if (options->lower_inverse_ballot) {
|
||||
return nir_ballot_bitfield_extract(b, 1, intrin->src[0].ssa,
|
||||
nir_load_subgroup_invocation(b));
|
||||
} else if (intrin->src[0].ssa->num_components != options->ballot_components ||
|
||||
intrin->src[0].ssa->bit_size != options->ballot_bit_size) {
|
||||
return nir_inverse_ballot(b, 1, ballot_type_to_uint(b, intrin->src[0].ssa, options));
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_intrinsic_ballot_bitfield_extract:
|
||||
case nir_intrinsic_ballot_bit_count_reduce:
|
||||
case nir_intrinsic_ballot_find_lsb:
|
||||
|
|
|
|||
|
|
@ -103,22 +103,8 @@ vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,
|
|||
}
|
||||
|
||||
case SpvOpGroupNonUniformInverseBallot: {
|
||||
/* This one is just a BallotBitfieldExtract with subgroup invocation.
|
||||
* We could add a NIR intrinsic but it's easier to just lower it on the
|
||||
* spot.
|
||||
*/
|
||||
nir_intrinsic_instr *intrin =
|
||||
nir_intrinsic_instr_create(b->nb.shader,
|
||||
nir_intrinsic_ballot_bitfield_extract);
|
||||
|
||||
intrin->src[0] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[4]));
|
||||
intrin->src[1] = nir_src_for_ssa(nir_load_subgroup_invocation(&b->nb));
|
||||
|
||||
nir_def_init_for_type(&intrin->instr, &intrin->def,
|
||||
dest_type->type);
|
||||
nir_builder_instr_insert(&b->nb, &intrin->instr);
|
||||
|
||||
vtn_push_nir_ssa(b, w[2], &intrin->def);
|
||||
nir_def *dest = nir_inverse_ballot(&b->nb, 1, vtn_get_nir_ssa(b, w[4]));
|
||||
vtn_push_nir_ssa(b, w[2], dest);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -564,6 +564,7 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
|
|||
.lower_read_invocation_to_cond = true,
|
||||
.lower_shuffle = true,
|
||||
.lower_relative_shuffle = true,
|
||||
.lower_inverse_ballot = true,
|
||||
};
|
||||
|
||||
if (!((s->info.stage == MESA_SHADER_COMPUTE) ||
|
||||
|
|
|
|||
|
|
@ -2996,6 +2996,7 @@ lp_build_opt_nir(struct nir_shader *nir)
|
|||
.lower_to_scalar = true,
|
||||
.lower_subgroup_masks = true,
|
||||
.lower_relative_shuffle = true,
|
||||
.lower_inverse_ballot = true,
|
||||
};
|
||||
NIR_PASS(progress, nir, nir_lower_subgroups, &subgroups_options);
|
||||
} while (progress);
|
||||
|
|
|
|||
|
|
@ -254,6 +254,7 @@ const nir_lower_subgroups_options si_nir_subgroups_options = {
|
|||
.lower_subgroup_masks = true,
|
||||
.lower_vote_trivial = false,
|
||||
.lower_vote_eq = true,
|
||||
.lower_inverse_ballot = true,
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -5423,6 +5423,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir)
|
|||
subgroup_options.subgroup_size = 1;
|
||||
subgroup_options.lower_vote_trivial = true;
|
||||
}
|
||||
subgroup_options.lower_inverse_ballot = true;
|
||||
NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -401,6 +401,7 @@ lvp_shader_lower(struct lvp_device *pdevice, struct lvp_pipeline *pipeline, nir_
|
|||
subgroup_opts.lower_quad = true;
|
||||
subgroup_opts.ballot_components = 1;
|
||||
subgroup_opts.ballot_bit_size = 32;
|
||||
subgroup_opts.lower_inverse_ballot = true;
|
||||
NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_opts);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
|
|
|
|||
|
|
@ -1000,6 +1000,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir,
|
|||
.lower_relative_shuffle = true,
|
||||
.lower_quad_broadcast_dynamic = true,
|
||||
.lower_elect = true,
|
||||
.lower_inverse_ballot = true,
|
||||
};
|
||||
OPT(nir_lower_subgroups, &subgroups_options);
|
||||
|
||||
|
|
|
|||
|
|
@ -971,6 +971,7 @@ dxil_spirv_nir_passes(nir_shader *nir,
|
|||
.lower_subgroup_masks = true,
|
||||
.lower_to_scalar = true,
|
||||
.lower_relative_shuffle = true,
|
||||
.lower_inverse_ballot = true,
|
||||
};
|
||||
if (nir->info.stage != MESA_SHADER_FRAGMENT &&
|
||||
nir->info.stage != MESA_SHADER_COMPUTE)
|
||||
|
|
|
|||
|
|
@ -3242,6 +3242,7 @@ Converter::run()
|
|||
subgroup_options.ballot_bit_size = 32;
|
||||
subgroup_options.ballot_components = 1;
|
||||
subgroup_options.lower_elect = true;
|
||||
subgroup_options.lower_inverse_ballot = true;
|
||||
|
||||
unsigned lower_flrp = (nir->options->lower_flrp16 ? 16 : 0) |
|
||||
(nir->options->lower_flrp32 ? 32 : 0) |
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue