mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-10 01:18:18 +02:00
nir,nak: Add match_any_nv
NVIDIA hardware have an instruction allowering you to retrive the mask of active threads matching the same source value as the current invocation. This is going to be used by shared memory lowering for mesh / task stages on NVK. Signed-off-by: Mary Guillemard <mary@mary.zone> Reviewed-by: Mel Henning <mhenning@darkrefraction.com> Tested-by: Thomas H.P. Andersen <phomes@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27196>
This commit is contained in:
parent
d88c183785
commit
b95dbc64bf
3 changed files with 23 additions and 0 deletions
|
|
@ -1063,6 +1063,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_load_agx:
|
||||
case nir_intrinsic_load_shared_lock_nv:
|
||||
case nir_intrinsic_store_shared_unlock_nv:
|
||||
case nir_intrinsic_match_any_nv:
|
||||
case nir_intrinsic_bvh_stack_rtn_amd:
|
||||
case nir_intrinsic_cmat_load_shared_nv:
|
||||
case nir_intrinsic_cmat_mov_transpose_nv:
|
||||
|
|
|
|||
|
|
@ -2948,6 +2948,8 @@ intrinsic("ipa_nv", dest_comp=1, src_comp=[1, 1], bit_sizes=[32],
|
|||
# FLAGS indicate if we load vertex_id == 2
|
||||
intrinsic("ldtram_nv", dest_comp=2, bit_sizes=[32],
|
||||
indices=[BASE, FLAGS], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
# Gives the mask of active threads matching the same source value
|
||||
intrinsic("match_any_nv", src_comp=[0], dest_comp=1, flags=SUBGROUP_FLAGS)
|
||||
|
||||
# NVIDIA-specific Image intrinsics
|
||||
# only used for kepler address calculations.
|
||||
|
|
|
|||
|
|
@ -3901,6 +3901,26 @@ impl<'a> ShaderFromNir<'a> {
|
|||
}
|
||||
self.set_dst(&intrin.def, dst.into());
|
||||
}
|
||||
nir_intrinsic_match_any_nv => {
|
||||
let src = self.get_src(&srcs[0]);
|
||||
let src_bits = srcs[0].bit_size() * srcs[0].num_components();
|
||||
assert!(
|
||||
intrin.def.bit_size() == 32 || intrin.def.bit_size() == 64
|
||||
);
|
||||
let dst = b.alloc_ssa(RegFile::GPR);
|
||||
b.push_op(OpMatch {
|
||||
op: MatchOp::Any,
|
||||
mask: dst.into(),
|
||||
pred: Dst::None,
|
||||
src,
|
||||
u64: match src_bits {
|
||||
32 => false,
|
||||
64 => true,
|
||||
_ => panic!("Unsupported vote_ieq bit size"),
|
||||
},
|
||||
});
|
||||
self.set_dst(&intrin.def, dst.into());
|
||||
}
|
||||
nir_intrinsic_is_sparse_texels_resident => {
|
||||
let src = self.get_src(&srcs[0]);
|
||||
let dst = b.isetp(IntCmpType::I32, IntCmpOp::Ne, src, 0.into());
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue