From 90438bae51bc3a29bf1dfdb63e84a498104fc790 Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Tue, 24 Dec 2024 14:09:56 +0100 Subject: [PATCH] nir: Add NVIDIA-specific muladd intrinsics Signed-off-by: Mary Guillemard Reviewed-by: Faith Ekstrand Part-of: --- src/compiler/nir/nir_divergence_analysis.c | 1 + src/compiler/nir/nir_intrinsics.py | 5 +++++ src/nouveau/compiler/meson.build | 2 ++ src/nouveau/compiler/nak_private.h | 25 ++++++++++++++++++++++ 4 files changed, 33 insertions(+) diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index d92da173892..f0e2a5742f1 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -940,6 +940,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_sustga_nv: case nir_intrinsic_ipa_nv: case nir_intrinsic_ldtram_nv: + case nir_intrinsic_cmat_muladd_nv: case nir_intrinsic_printf: case nir_intrinsic_load_gs_header_ir3: case nir_intrinsic_load_tcs_header_ir3: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 360b498fca1..dab3d857aec 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2593,6 +2593,11 @@ intrinsic("bar_sync_nv", src_comp=[1, 1]) # Stall until the given SSA value is available intrinsic("ssa_bar_nv", src_comp=[1]) +# NVIDIA-specific muladd intrinsics. +# src[] = { a, b, c} +intrinsic("cmat_muladd_nv", src_comp=[-1, -1, -1], dest_comp=0, bit_sizes=src2, + indices=[FLAGS], flags=[CAN_ELIMINATE]) + # NVIDIA-specific system values system_value("warps_per_sm_nv", 1, bit_sizes=[32]) system_value("sm_count_nv", 1, bit_sizes=[32]) diff --git a/src/nouveau/compiler/meson.build b/src/nouveau/compiler/meson.build index 76b15edc073..15646dd33ca 100644 --- a/src/nouveau/compiler/meson.build +++ b/src/nouveau/compiler/meson.build @@ -65,6 +65,8 @@ _nak_bindings_rs = rust.bindgen( '--allowlist-function', 'drm.*', '--allowlist-function', 'nak_.*', '--allowlist-function', 'nouveau_ws_.*', + # provided through compiler::bindings::* + '--blocklist-type', 'glsl_.*', '--no-prepend-enum-name', '--with-derive-default', ], diff --git a/src/nouveau/compiler/nak_private.h b/src/nouveau/compiler/nak_private.h index 969f97b2bc8..45b4fa27d8b 100644 --- a/src/nouveau/compiler/nak_private.h +++ b/src/nouveau/compiler/nak_private.h @@ -290,6 +290,31 @@ struct nak_nir_ipa_flags { uint32_t pad:26; }; +enum nak_cmat_type { + NAK_CMAT_TYPE_M8N8K16_INT, + NAK_CMAT_TYPE_M16N8K32_INT, + + NAK_CMAT_TYPE_M16N8K8_FLOAT, + NAK_CMAT_TYPE_M16N8K16_FLOAT, + + /* Software emulated cmat layouts + * + * Those aren't supported as a single native *MMA invocation on any hardware, + * so in order to support those we execute multiple *MMA instructions with a + * register layout defined by us. + */ + NAK_CMAT_TYPE_M16N16K32_INT_SW, + NAK_CMAT_TYPE_M16N16K16_FLOAT_SW, +}; + +struct nak_nir_cmat_mul_add_flags { + enum nak_cmat_type cmat_type:3; + enum glsl_base_type a_type:5; + enum glsl_base_type b_type:5; + bool sat:1; + uint32_t pad:18; +}; + bool nak_nir_lower_fs_inputs(nir_shader *nir, const struct nak_compiler *nak, const struct nak_fs_key *fs_key);