nouveau/mme: Add unit tests for sharing between compute and 3D scratch registers

Co-developed-by: Mary Guillemard <mary@mary.zone> Reviewed-by: Karol Herbst <kherbst@redhat.com> Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Tested-by: Mary Guillemard <mary@mary.zone> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37671>
2026-05-05 13:58:04 +02:00 · 2025-09-19 01:55:25 +03:00 · 2025-09-19 01:55:25 +03:00 · 146a64524d
commit 146a64524d
parent 0bfe27553d
5 changed files with 168 additions and 0 deletions
--- a/src/nouveau/headers/class_parser.py
+++ b/src/nouveau/headers/class_parser.py
@ -27,6 +27,7 @@ METHOD_ARRAY_SIZES = {
    'SET_COLOR_COMPRESSION'                                 : 8,
    'SET_COLOR_CLEAR_VALUE'                                 : 4,
    'SET_CT_WRITE'                                          : 8,
+    # For compute, this is only 8:
    'SET_MME_SHADOW_SCRATCH'                                : 256,
    'SET_MULTI_VIEW_RENDER_TARGET_ARRAY_INDEX_OFFSET'       : 4,
    'SET_PIPELINE_*'                                        : 6,
--- a/src/nouveau/mme/tests/mme_runner.cpp
+++ b/src/nouveau/mme/tests/mme_runner.cpp
@ -12,6 +12,7 @@
 #include "mme_tu104_sim.h"

 #include "nv_push_clc597.h"
+#include "nv_push_cl90c0.h"

 #include "nouveau_bo.h"
 #include "nouveau_context.h"
@ -142,6 +143,12 @@ mme_hw_runner::reset_push()
      .class_id = dev->info.cls_eng3d,
      .engine_id = 0,
   });
+
+   P_MTHD(p, NV90C0, SET_OBJECT);
+   P_NV90C0_SET_OBJECT(p, {
+      .class_id = dev->info.cls_compute,
+      .engine_id = 0,
+   });
 }

 void
--- a/src/nouveau/mme/tests/mme_runner.h
+++ b/src/nouveau/mme/tests/mme_runner.h
@ -13,6 +13,7 @@ struct nouveau_ws_device;

 #include "nv_push.h"
 #include "nv_push_cl9097.h"
+#include "nv_push_clc7c0.h"

 #define DATA_BO_SIZE 4096
 #define DATA_DWORDS 1024
@ -133,3 +134,33 @@ mme_store(mme_builder *b, struct mme_value64 addr, mme_value v,
   if (free_reg && v.type == MME_VALUE_TYPE_REG)
      mme_free_reg(b, v);
 }
+
+inline void
+mme_store_compute_imm_addr(mme_builder *b, uint64_t addr, mme_value v,
+                           bool free_reg = false)
+{
+   mme_mthd(b, NVC7C0_SET_REPORT_SEMAPHORE_PAYLOAD_LOWER);
+   mme_emit(b, v);
+   mme_emit(b, mme_imm(0));
+   mme_emit(b, mme_imm(low32(addr)));
+   mme_emit(b, mme_imm(high32(addr)));
+   mme_emit(b, mme_imm(0x8));
+
+   if (free_reg && v.type == MME_VALUE_TYPE_REG)
+      mme_free_reg(b, v);
+}
+
+inline void
+mme_store_compute(mme_builder *b, struct mme_value64 addr, mme_value v,
+                  bool free_reg = false)
+{
+   mme_mthd(b, NVC7C0_SET_REPORT_SEMAPHORE_PAYLOAD_LOWER);
+   mme_emit(b, v);
+   mme_emit(b, mme_imm(0));
+   mme_emit(b, addr.lo);
+   mme_emit(b, addr.hi);
+   mme_emit(b, mme_imm(0x8));
+
+   if (free_reg && v.type == MME_VALUE_TYPE_REG)
+      mme_free_reg(b, v);
+}
--- a/src/nouveau/mme/tests/mme_tu104_sim_hw_test.cpp
+++ b/src/nouveau/mme/tests/mme_tu104_sim_hw_test.cpp
@ -1695,3 +1695,124 @@ TEST_F(mme_tu104_sim_test, scratch_limit)
         ASSERT_EQ(data[j], i + j);
   }
 }
+
+TEST_F(mme_tu104_sim_test, sanity_compute)
+{
+   const uint32_t canary = 0xc0ffee01;
+
+   mme_builder b;
+   mme_builder_init(&b, devinfo);
+
+   mme_store_compute_imm_addr(&b, data_addr, mme_imm(canary));
+   auto macro = mme_builder_finish_vec(&b);
+
+   reset_push();
+   push_macro(0, macro);
+
+   P_1INC(p, NVC7C0, CALL_MME_MACRO(0));
+   P_NVC7C0_CALL_MME_MACRO(p, 0, 0);
+   submit_push();
+
+   ASSERT_EQ(data[0], canary);
+}
+
+TEST_F(mme_tu104_sim_test, scratch_limit_compute)
+{
+   static const uint32_t chunk_size = 4;
+
+   mme_builder b;
+   mme_builder_init(&b, devinfo);
+
+   mme_value start = mme_load(&b);
+   mme_value count = mme_load(&b);
+
+   mme_value i = mme_mov(&b, start);
+   mme_loop(&b, count) {
+      mme_mthd_arr(&b, NVC7C0_SET_MME_SHADOW_SCRATCH(0), i);
+      mme_emit(&b, i);
+      mme_add_to(&b, i, i, mme_imm(1));
+   }
+
+   mme_value j = mme_mov(&b, start);
+   struct mme_value64 addr = mme_mov64(&b, mme_imm64(data_addr));
+
+   mme_loop(&b, count) {
+      mme_value x = mme_state_arr(&b, NVC7C0_SET_MME_SHADOW_SCRATCH(0), j);
+      mme_store_compute(&b, addr, x);
+      mme_add_to(&b, j, j, mme_imm(1));
+      mme_add64_to(&b, addr, addr, mme_imm64(4));
+   }
+
+   auto macro = mme_builder_finish_vec(&b);
+
+   for (uint32_t i = 0; i < 8; i += chunk_size) {
+      reset_push();
+
+      push_macro(0, macro);
+
+      P_1INC(p, NVC7C0, CALL_MME_MACRO(1));
+      P_INLINE_DATA(p, i);
+      P_INLINE_DATA(p, chunk_size);
+
+      submit_push();
+
+      for (uint32_t j = 0; j < chunk_size; j++)
+         ASSERT_EQ(data[j], i + j);
+   }
+}
+
+TEST_F(mme_tu104_sim_test, scratch_share_3d_to_compute)
+{
+   static const uint32_t chunk_size = 4;
+   
+   mme_builder b;
+   mme_builder_init(&b, devinfo);
+
+   mme_value start = mme_load(&b);
+   mme_value count = mme_load(&b);
+   mme_value channel = mme_load(&b);
+
+   mme_if(&b, ieq, channel, mme_zero()) {
+      mme_value i = mme_mov(&b, start);
+      mme_loop(&b, count) {
+         mme_mthd_arr(&b, NVC597_SET_MME_SHADOW_SCRATCH(0), i);
+         mme_emit(&b, i);
+         mme_add_to(&b, i, i, mme_imm(1));
+      }
+   }
+
+   mme_if(&b, ieq, channel, mme_imm(1)) {
+      mme_value i = mme_mov(&b, start);
+      struct mme_value64 addr = mme_mov64(&b, mme_imm64(data_addr));
+
+      mme_loop(&b, count) {
+         mme_value val = mme_state_arr(&b, NVC7C0_SET_MME_SHADOW_SCRATCH(0), i);
+         mme_store_compute(&b, addr, val);
+         mme_add_to(&b, i, i, mme_imm(1));
+         mme_add64_to(&b, addr, addr, mme_imm64(4));
+      }
+   }
+
+   auto macro = mme_builder_finish_vec(&b);
+
+   for (uint32_t i = 0; i < 8; i += chunk_size) {
+      reset_push();
+
+      push_macro(0, macro);
+
+      P_1INC(p, NVC597, CALL_MME_MACRO(0));
+      P_INLINE_DATA(p, i);
+      P_INLINE_DATA(p, chunk_size);
+      P_INLINE_DATA(p, 0);
+
+      P_1INC(p, NVC7C0, CALL_MME_MACRO(0));
+      P_INLINE_DATA(p, i);
+      P_INLINE_DATA(p, chunk_size);
+      P_INLINE_DATA(p, 1);
+
+      submit_push();
+
+      for (uint32_t j = 0; j < chunk_size; j++)
+         ASSERT_EQ(data[j], i + j);
+   }
+}
--- a/src/nouveau/vulkan/nvk_mme.h
+++ b/src/nouveau/vulkan/nvk_mme.h
@ -43,14 +43,22 @@ enum nvk_mme {
   NVK_MME_COUNT,
 };

+/*
+ * For the compute MME, as tested in scratch_limit_compute in the unit tests,
+ * we only have 8 registers. Using more than 8 leads to a MMU fault.
+ * Moreover, as tested in scratch_share_3d_to_compute, scratch space isn't
+ * shared between compute and 3D.
+ */
 enum nvk_mme_scratch {
   /* These are reserved for communicating with FALCON */
   NVK_MME_SCRATCH_FALCON_0 = 0,
   NVK_MME_SCRATCH_FALCON_1 = 1,
   NVK_MME_SCRATCH_FALCON_2 = 2,

+   /* These need to stay at the top since they get accessed by the compute MME */
   NVK_MME_SCRATCH_CS_INVOCATIONS_HI,
   NVK_MME_SCRATCH_CS_INVOCATIONS_LO,
+
   NVK_MME_SCRATCH_DRAW_BEGIN,
   NVK_MME_SCRATCH_DRAW_COUNT,
   NVK_MME_SCRATCH_DRAW_PAD_DW,