diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 37dd14f5f12..1857f55bfa0 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -960,6 +960,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_shared_lock_nv: case nir_intrinsic_store_shared_unlock_nv: case nir_intrinsic_bvh_stack_rtn_amd: + case nir_intrinsic_cmat_load_shared_nv: is_divergent = true; break; diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 75e7ef653f0..583b14fb203 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -360,6 +360,8 @@ index("bool", "explicit_coord") # The index of the format string used by a printf. (u_printf_info element of the shader) index("unsigned", "fmt_idx") +# for NV coop matrix - num of matrix in load 1/2/4 +index("unsigned", "num_matrices") # Register class for load/store_preamble index("nir_preamble_class", "preamble_class") @@ -2630,6 +2632,8 @@ intrinsic("ssa_bar_nv", src_comp=[1]) intrinsic("cmat_muladd_nv", src_comp=[-1, -1, -1], dest_comp=0, bit_sizes=src2, indices=[FLAGS], flags=[CAN_ELIMINATE]) +intrinsic("cmat_load_shared_nv", src_comp=[1], dest_comp=0, indices=[NUM_MATRICES, MATRIX_LAYOUT, BASE]) + # NVIDIA-specific system values system_value("warps_per_sm_nv", 1, bit_sizes=[32]) system_value("sm_count_nv", 1, bit_sizes=[32]) diff --git a/src/compiler/rust/meson.build b/src/compiler/rust/meson.build index 0274ac62668..9337408a104 100644 --- a/src/compiler/rust/meson.build +++ b/src/compiler/rust/meson.build @@ -26,6 +26,7 @@ _compiler_binding_types = [ 'gl_varying_slot', 'gl_vert_attrib', 'glsl_type', + 'glsl_matrix_layout', 'nir_.*', 'mesa_scope', 'mesa_prim', diff --git a/src/compiler/rust/nir.rs b/src/compiler/rust/nir.rs index 854b126eba0..b519e6b2abe 100644 --- a/src/compiler/rust/nir.rs +++ b/src/compiler/rust/nir.rs @@ -385,6 +385,14 @@ impl nir_intrinsic_instr { pub fn saturate(&self) -> bool { self.get_const_index(NIR_INTRINSIC_SATURATE) != 0 } + + pub fn matrix_layout(&self) -> glsl_matrix_layout { + self.get_const_index(NIR_INTRINSIC_MATRIX_LAYOUT) as glsl_matrix_layout + } + + pub fn num_matrices(&self) -> u8 { + self.get_const_index(NIR_INTRINSIC_NUM_MATRICES) as u8 + } } impl nir_intrinsic_info { diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index b4c2b4f34f0..6409d34376e 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -3745,6 +3745,31 @@ impl<'a> ShaderFromNir<'a> { let dst = b.isetp(IntCmpType::I32, IntCmpOp::Ne, src, 0.into()); self.set_dst(&intrin.def, dst.into()); } + nir_intrinsic_cmat_load_shared_nv => { + let dst_bit_size = usize::from(intrin.def.bit_size()); + let layout: glsl_matrix_layout = intrin.matrix_layout(); + let mat_count = intrin.num_matrices(); + let dst_num_components = + usize::from(intrin.def.num_components()); + let comps = + (dst_bit_size * dst_num_components).div_ceil(32) as u8; + let mat_size = if layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR { + LdsmSize::MT8N8 + } else { + LdsmSize::M8N8 + }; + let dst = b.alloc_ssa_vec(RegFile::GPR, comps); + let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); + let offset = offset + intrin.base(); + b.push_op(OpLdsm { + dst: dst.clone().into(), + mat_size, + mat_count, + addr, + offset, + }); + self.set_dst(&intrin.def, dst); + } nir_intrinsic_cmat_muladd_nv => { let flags: nak_nir_cmat_mul_add_flags = unsafe { std::mem::transmute(intrin.flags()) };