nir: add nir_intrinsic_cmat_load_shared_nv

This maps to NAK's OpLdsm

Reviewed-by: Mary Guillemard <mary@mary.zone>
Reviewed-by: Karol Herbst <kherbst@redhat.com>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36363>
This commit is contained in:
Dave Airlie 2025-03-07 12:09:53 +10:00 committed by Karol Herbst
parent 016159096f
commit c38170452d
5 changed files with 39 additions and 0 deletions

View file

@ -960,6 +960,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_load_shared_lock_nv:
case nir_intrinsic_store_shared_unlock_nv:
case nir_intrinsic_bvh_stack_rtn_amd:
case nir_intrinsic_cmat_load_shared_nv:
is_divergent = true;
break;

View file

@ -360,6 +360,8 @@ index("bool", "explicit_coord")
# The index of the format string used by a printf. (u_printf_info element of the shader)
index("unsigned", "fmt_idx")
# for NV coop matrix - num of matrix in load 1/2/4
index("unsigned", "num_matrices")
# Register class for load/store_preamble
index("nir_preamble_class", "preamble_class")
@ -2630,6 +2632,8 @@ intrinsic("ssa_bar_nv", src_comp=[1])
intrinsic("cmat_muladd_nv", src_comp=[-1, -1, -1], dest_comp=0, bit_sizes=src2,
indices=[FLAGS], flags=[CAN_ELIMINATE])
intrinsic("cmat_load_shared_nv", src_comp=[1], dest_comp=0, indices=[NUM_MATRICES, MATRIX_LAYOUT, BASE])
# NVIDIA-specific system values
system_value("warps_per_sm_nv", 1, bit_sizes=[32])
system_value("sm_count_nv", 1, bit_sizes=[32])

View file

@ -26,6 +26,7 @@ _compiler_binding_types = [
'gl_varying_slot',
'gl_vert_attrib',
'glsl_type',
'glsl_matrix_layout',
'nir_.*',
'mesa_scope',
'mesa_prim',

View file

@ -385,6 +385,14 @@ impl nir_intrinsic_instr {
pub fn saturate(&self) -> bool {
self.get_const_index(NIR_INTRINSIC_SATURATE) != 0
}
pub fn matrix_layout(&self) -> glsl_matrix_layout {
self.get_const_index(NIR_INTRINSIC_MATRIX_LAYOUT) as glsl_matrix_layout
}
pub fn num_matrices(&self) -> u8 {
self.get_const_index(NIR_INTRINSIC_NUM_MATRICES) as u8
}
}
impl nir_intrinsic_info {

View file

@ -3745,6 +3745,31 @@ impl<'a> ShaderFromNir<'a> {
let dst = b.isetp(IntCmpType::I32, IntCmpOp::Ne, src, 0.into());
self.set_dst(&intrin.def, dst.into());
}
nir_intrinsic_cmat_load_shared_nv => {
let dst_bit_size = usize::from(intrin.def.bit_size());
let layout: glsl_matrix_layout = intrin.matrix_layout();
let mat_count = intrin.num_matrices();
let dst_num_components =
usize::from(intrin.def.num_components());
let comps =
(dst_bit_size * dst_num_components).div_ceil(32) as u8;
let mat_size = if layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR {
LdsmSize::MT8N8
} else {
LdsmSize::M8N8
};
let dst = b.alloc_ssa_vec(RegFile::GPR, comps);
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
let offset = offset + intrin.base();
b.push_op(OpLdsm {
dst: dst.clone().into(),
mat_size,
mat_count,
addr,
offset,
});
self.set_dst(&intrin.def, dst);
}
nir_intrinsic_cmat_muladd_nv => {
let flags: nak_nir_cmat_mul_add_flags =
unsafe { std::mem::transmute(intrin.flags()) };