diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 45297e664c0..7762a186394 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -1063,6 +1063,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_agx: case nir_intrinsic_load_shared_lock_nv: case nir_intrinsic_store_shared_unlock_nv: + case nir_intrinsic_match_any_nv: case nir_intrinsic_bvh_stack_rtn_amd: case nir_intrinsic_cmat_load_shared_nv: case nir_intrinsic_cmat_mov_transpose_nv: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 5cec60122e5..4d2d95d5d77 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2948,6 +2948,8 @@ intrinsic("ipa_nv", dest_comp=1, src_comp=[1, 1], bit_sizes=[32], # FLAGS indicate if we load vertex_id == 2 intrinsic("ldtram_nv", dest_comp=2, bit_sizes=[32], indices=[BASE, FLAGS], flags=[CAN_ELIMINATE, CAN_REORDER]) +# Gives the mask of active threads matching the same source value +intrinsic("match_any_nv", src_comp=[0], dest_comp=1, flags=SUBGROUP_FLAGS) # NVIDIA-specific Image intrinsics # only used for kepler address calculations. diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 4e55ba53060..4ea82502a94 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -3901,6 +3901,26 @@ impl<'a> ShaderFromNir<'a> { } self.set_dst(&intrin.def, dst.into()); } + nir_intrinsic_match_any_nv => { + let src = self.get_src(&srcs[0]); + let src_bits = srcs[0].bit_size() * srcs[0].num_components(); + assert!( + intrin.def.bit_size() == 32 || intrin.def.bit_size() == 64 + ); + let dst = b.alloc_ssa(RegFile::GPR); + b.push_op(OpMatch { + op: MatchOp::Any, + mask: dst.into(), + pred: Dst::None, + src, + u64: match src_bits { + 32 => false, + 64 => true, + _ => panic!("Unsupported vote_ieq bit size"), + }, + }); + self.set_dst(&intrin.def, dst.into()); + } nir_intrinsic_is_sparse_texels_resident => { let src = self.get_src(&srcs[0]); let dst = b.isetp(IntCmpType::I32, IntCmpOp::Ne, src, 0.into());