From b95dbc64bf9fc2df2db8640414775a83b6ffd02a Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Wed, 4 Mar 2026 16:41:00 +0100 Subject: [PATCH] nir,nak: Add match_any_nv NVIDIA hardware have an instruction allowering you to retrive the mask of active threads matching the same source value as the current invocation. This is going to be used by shared memory lowering for mesh / task stages on NVK. Signed-off-by: Mary Guillemard Reviewed-by: Mel Henning Tested-by: Thomas H.P. Andersen Part-of: --- src/compiler/nir/nir_divergence_analysis.c | 1 + src/compiler/nir/nir_intrinsics.py | 2 ++ src/nouveau/compiler/nak/from_nir.rs | 20 ++++++++++++++++++++ 3 files changed, 23 insertions(+) diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 45297e664c0..7762a186394 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -1063,6 +1063,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_agx: case nir_intrinsic_load_shared_lock_nv: case nir_intrinsic_store_shared_unlock_nv: + case nir_intrinsic_match_any_nv: case nir_intrinsic_bvh_stack_rtn_amd: case nir_intrinsic_cmat_load_shared_nv: case nir_intrinsic_cmat_mov_transpose_nv: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 5cec60122e5..4d2d95d5d77 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2948,6 +2948,8 @@ intrinsic("ipa_nv", dest_comp=1, src_comp=[1, 1], bit_sizes=[32], # FLAGS indicate if we load vertex_id == 2 intrinsic("ldtram_nv", dest_comp=2, bit_sizes=[32], indices=[BASE, FLAGS], flags=[CAN_ELIMINATE, CAN_REORDER]) +# Gives the mask of active threads matching the same source value +intrinsic("match_any_nv", src_comp=[0], dest_comp=1, flags=SUBGROUP_FLAGS) # NVIDIA-specific Image intrinsics # only used for kepler address calculations. diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 4e55ba53060..4ea82502a94 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -3901,6 +3901,26 @@ impl<'a> ShaderFromNir<'a> { } self.set_dst(&intrin.def, dst.into()); } + nir_intrinsic_match_any_nv => { + let src = self.get_src(&srcs[0]); + let src_bits = srcs[0].bit_size() * srcs[0].num_components(); + assert!( + intrin.def.bit_size() == 32 || intrin.def.bit_size() == 64 + ); + let dst = b.alloc_ssa(RegFile::GPR); + b.push_op(OpMatch { + op: MatchOp::Any, + mask: dst.into(), + pred: Dst::None, + src, + u64: match src_bits { + 32 => false, + 64 => true, + _ => panic!("Unsupported vote_ieq bit size"), + }, + }); + self.set_dst(&intrin.def, dst.into()); + } nir_intrinsic_is_sparse_texels_resident => { let src = self.get_src(&srcs[0]); let dst = b.isetp(IntCmpType::I32, IntCmpOp::Ne, src, 0.into());