nouveau/mme: Add unit tests for sharing between compute and 3D scratch registers

Co-developed-by: Mary Guillemard <mary@mary.zone>
Reviewed-by: Karol Herbst <kherbst@redhat.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Tested-by: Mary Guillemard <mary@mary.zone>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37671>
This commit is contained in:
Mohamed Ahmed 2025-09-19 01:55:25 +03:00 committed by Marge Bot
parent 0bfe27553d
commit 146a64524d
5 changed files with 168 additions and 0 deletions

View file

@ -27,6 +27,7 @@ METHOD_ARRAY_SIZES = {
'SET_COLOR_COMPRESSION' : 8,
'SET_COLOR_CLEAR_VALUE' : 4,
'SET_CT_WRITE' : 8,
# For compute, this is only 8:
'SET_MME_SHADOW_SCRATCH' : 256,
'SET_MULTI_VIEW_RENDER_TARGET_ARRAY_INDEX_OFFSET' : 4,
'SET_PIPELINE_*' : 6,

View file

@ -12,6 +12,7 @@
#include "mme_tu104_sim.h"
#include "nv_push_clc597.h"
#include "nv_push_cl90c0.h"
#include "nouveau_bo.h"
#include "nouveau_context.h"
@ -142,6 +143,12 @@ mme_hw_runner::reset_push()
.class_id = dev->info.cls_eng3d,
.engine_id = 0,
});
P_MTHD(p, NV90C0, SET_OBJECT);
P_NV90C0_SET_OBJECT(p, {
.class_id = dev->info.cls_compute,
.engine_id = 0,
});
}
void

View file

@ -13,6 +13,7 @@ struct nouveau_ws_device;
#include "nv_push.h"
#include "nv_push_cl9097.h"
#include "nv_push_clc7c0.h"
#define DATA_BO_SIZE 4096
#define DATA_DWORDS 1024
@ -133,3 +134,33 @@ mme_store(mme_builder *b, struct mme_value64 addr, mme_value v,
if (free_reg && v.type == MME_VALUE_TYPE_REG)
mme_free_reg(b, v);
}
inline void
mme_store_compute_imm_addr(mme_builder *b, uint64_t addr, mme_value v,
bool free_reg = false)
{
mme_mthd(b, NVC7C0_SET_REPORT_SEMAPHORE_PAYLOAD_LOWER);
mme_emit(b, v);
mme_emit(b, mme_imm(0));
mme_emit(b, mme_imm(low32(addr)));
mme_emit(b, mme_imm(high32(addr)));
mme_emit(b, mme_imm(0x8));
if (free_reg && v.type == MME_VALUE_TYPE_REG)
mme_free_reg(b, v);
}
inline void
mme_store_compute(mme_builder *b, struct mme_value64 addr, mme_value v,
bool free_reg = false)
{
mme_mthd(b, NVC7C0_SET_REPORT_SEMAPHORE_PAYLOAD_LOWER);
mme_emit(b, v);
mme_emit(b, mme_imm(0));
mme_emit(b, addr.lo);
mme_emit(b, addr.hi);
mme_emit(b, mme_imm(0x8));
if (free_reg && v.type == MME_VALUE_TYPE_REG)
mme_free_reg(b, v);
}

View file

@ -1695,3 +1695,124 @@ TEST_F(mme_tu104_sim_test, scratch_limit)
ASSERT_EQ(data[j], i + j);
}
}
TEST_F(mme_tu104_sim_test, sanity_compute)
{
const uint32_t canary = 0xc0ffee01;
mme_builder b;
mme_builder_init(&b, devinfo);
mme_store_compute_imm_addr(&b, data_addr, mme_imm(canary));
auto macro = mme_builder_finish_vec(&b);
reset_push();
push_macro(0, macro);
P_1INC(p, NVC7C0, CALL_MME_MACRO(0));
P_NVC7C0_CALL_MME_MACRO(p, 0, 0);
submit_push();
ASSERT_EQ(data[0], canary);
}
TEST_F(mme_tu104_sim_test, scratch_limit_compute)
{
static const uint32_t chunk_size = 4;
mme_builder b;
mme_builder_init(&b, devinfo);
mme_value start = mme_load(&b);
mme_value count = mme_load(&b);
mme_value i = mme_mov(&b, start);
mme_loop(&b, count) {
mme_mthd_arr(&b, NVC7C0_SET_MME_SHADOW_SCRATCH(0), i);
mme_emit(&b, i);
mme_add_to(&b, i, i, mme_imm(1));
}
mme_value j = mme_mov(&b, start);
struct mme_value64 addr = mme_mov64(&b, mme_imm64(data_addr));
mme_loop(&b, count) {
mme_value x = mme_state_arr(&b, NVC7C0_SET_MME_SHADOW_SCRATCH(0), j);
mme_store_compute(&b, addr, x);
mme_add_to(&b, j, j, mme_imm(1));
mme_add64_to(&b, addr, addr, mme_imm64(4));
}
auto macro = mme_builder_finish_vec(&b);
for (uint32_t i = 0; i < 8; i += chunk_size) {
reset_push();
push_macro(0, macro);
P_1INC(p, NVC7C0, CALL_MME_MACRO(1));
P_INLINE_DATA(p, i);
P_INLINE_DATA(p, chunk_size);
submit_push();
for (uint32_t j = 0; j < chunk_size; j++)
ASSERT_EQ(data[j], i + j);
}
}
TEST_F(mme_tu104_sim_test, scratch_share_3d_to_compute)
{
static const uint32_t chunk_size = 4;
mme_builder b;
mme_builder_init(&b, devinfo);
mme_value start = mme_load(&b);
mme_value count = mme_load(&b);
mme_value channel = mme_load(&b);
mme_if(&b, ieq, channel, mme_zero()) {
mme_value i = mme_mov(&b, start);
mme_loop(&b, count) {
mme_mthd_arr(&b, NVC597_SET_MME_SHADOW_SCRATCH(0), i);
mme_emit(&b, i);
mme_add_to(&b, i, i, mme_imm(1));
}
}
mme_if(&b, ieq, channel, mme_imm(1)) {
mme_value i = mme_mov(&b, start);
struct mme_value64 addr = mme_mov64(&b, mme_imm64(data_addr));
mme_loop(&b, count) {
mme_value val = mme_state_arr(&b, NVC7C0_SET_MME_SHADOW_SCRATCH(0), i);
mme_store_compute(&b, addr, val);
mme_add_to(&b, i, i, mme_imm(1));
mme_add64_to(&b, addr, addr, mme_imm64(4));
}
}
auto macro = mme_builder_finish_vec(&b);
for (uint32_t i = 0; i < 8; i += chunk_size) {
reset_push();
push_macro(0, macro);
P_1INC(p, NVC597, CALL_MME_MACRO(0));
P_INLINE_DATA(p, i);
P_INLINE_DATA(p, chunk_size);
P_INLINE_DATA(p, 0);
P_1INC(p, NVC7C0, CALL_MME_MACRO(0));
P_INLINE_DATA(p, i);
P_INLINE_DATA(p, chunk_size);
P_INLINE_DATA(p, 1);
submit_push();
for (uint32_t j = 0; j < chunk_size; j++)
ASSERT_EQ(data[j], i + j);
}
}

View file

@ -43,14 +43,22 @@ enum nvk_mme {
NVK_MME_COUNT,
};
/*
* For the compute MME, as tested in scratch_limit_compute in the unit tests,
* we only have 8 registers. Using more than 8 leads to a MMU fault.
* Moreover, as tested in scratch_share_3d_to_compute, scratch space isn't
* shared between compute and 3D.
*/
enum nvk_mme_scratch {
/* These are reserved for communicating with FALCON */
NVK_MME_SCRATCH_FALCON_0 = 0,
NVK_MME_SCRATCH_FALCON_1 = 1,
NVK_MME_SCRATCH_FALCON_2 = 2,
/* These need to stay at the top since they get accessed by the compute MME */
NVK_MME_SCRATCH_CS_INVOCATIONS_HI,
NVK_MME_SCRATCH_CS_INVOCATIONS_LO,
NVK_MME_SCRATCH_DRAW_BEGIN,
NVK_MME_SCRATCH_DRAW_COUNT,
NVK_MME_SCRATCH_DRAW_PAD_DW,