nvk: Wire up ROOT_TABLE

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/work_items/12576
Reviewed-by: Mary Guillemard <mary@mary.zone>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40639>
This commit is contained in:
Mel Henning 2026-01-02 18:26:15 -05:00 committed by Marge Bot
parent bff2d8dd9b
commit 30b3de6ec4
9 changed files with 188 additions and 31 deletions

View file

@ -97,6 +97,7 @@ struct nak_constant_offset_info {
uint32_t printf_buffer_offset;
};
const extern struct nak_constant_offset_info nak_const_offsets_base;
const extern struct nak_constant_offset_info nak_const_offsets_turing_graphics;
#define NAK_PRINTF_BUFFER_SIZE 0x40000

View file

@ -346,7 +346,11 @@ enum nak_fs_out {
static inline const struct nak_constant_offset_info*
nak_const_offsets(const struct nak_compiler* nak, bool is_graphics)
{
return &nak_const_offsets_base;
if (nak->sm >= 75 && is_graphics) {
return &nak_const_offsets_turing_graphics;
} else {
return &nak_const_offsets_base;
}
}
bool nak_nir_rematerialize_load_const(nir_shader *nir);

View file

@ -16,6 +16,7 @@
#include "util/u_dynarray.h"
#include "vk_command_buffer.h"
#include "clc597.h"
#include <stdio.h>
@ -80,6 +81,17 @@ struct nvk_root_descriptor_table {
static_assert(nvk_root_descriptor_offset(push) % 8 == 0,
"Push constants should be aligned properly");
#define nvk_hw_root_table_index(member)\
(nvk_root_descriptor_offset(member) / NVK_HW_ROOT_TABLE_SIZE)
#define nvk_hw_root_table_offset(member)\
(nvk_root_descriptor_offset(member) % NVK_HW_ROOT_TABLE_SIZE)
static inline bool nvk_use_hw_root_table(const struct nv_device_info *info,
bool is_gfx)
{
return is_gfx && info->cls_eng3d >= TURING_A;
}
enum ENUM_PACKED nvk_descriptor_set_type {
NVK_DESCRIPTOR_SET_TYPE_NONE,
NVK_DESCRIPTOR_SET_TYPE_SET,

View file

@ -521,6 +521,16 @@ nvk_push_draw_state_init(struct nvk_queue *queue, struct nv_push *p)
.binding_group4_enable = 0x3,
});
}
for (int i = 0; i < 8; i++) {
P_1INC(p, NVC597, SET_ROOT_TABLE_SELECTOR);
P_NVC597_SET_ROOT_TABLE_SELECTOR(p, {
.root_table = i,
.offset = 0,
});
for (uint32_t dw = 0; dw < 64; dw++)
P_INLINE_DATA(p, 0);
}
}
if (pdev->info.cls_eng3d >= TURING_A) {
@ -702,16 +712,45 @@ nvk_cmd_flush_gfx_root_desc(struct nvk_cmd_buffer *cmd,
struct nvk_descriptor_state *desc,
size_t offset, size_t size)
{
const struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
const struct nvk_physical_device *pdev = nvk_device_physical(dev);
const uint32_t start_dw = offset / 4;
const uint32_t end_dw = DIV_ROUND_UP(offset + size, 4);
const uint32_t len_dw = end_dw - start_dw;
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2 + len_dw);
P_1INC(p, NV9097, LOAD_CONSTANT_BUFFER_OFFSET);
P_NV9097_LOAD_CONSTANT_BUFFER_OFFSET(p, start_dw * 4);
const uint32_t *root_dw = (uint32_t *)desc->root;
P_INLINE_ARRAY(p, &root_dw[start_dw], len_dw);
if (nvk_use_hw_root_table(&pdev->info, true)) {
const uint32_t TABLE_SIZE_DW = NVK_HW_ROOT_TABLE_SIZE / sizeof(uint32_t);
const uint32_t start_table = start_dw / TABLE_SIZE_DW;
const uint32_t end_table = DIV_ROUND_UP(end_dw, TABLE_SIZE_DW);
for (uint32_t table = start_table; table < end_table; table++) {
const uint32_t start_dw_table =
(table == start_table)
? (start_dw - table * TABLE_SIZE_DW)
: 0;
const uint32_t end_dw_table =
(table == end_table - 1)
? (end_dw - table * TABLE_SIZE_DW)
: TABLE_SIZE_DW;
const uint32_t len_dw_table = end_dw_table - start_dw_table;
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2 + len_dw_table);
P_1INC(p, NVC597, SET_ROOT_TABLE_SELECTOR);
P_NVC597_SET_ROOT_TABLE_SELECTOR(p, {
.root_table = table,
.offset = start_dw_table * 4,
});
P_INLINE_ARRAY(p, &root_dw[start_dw_table + table * TABLE_SIZE_DW], len_dw_table);
}
} else {
const uint32_t len_dw = end_dw - start_dw;
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2 + len_dw);
P_1INC(p, NV9097, LOAD_CONSTANT_BUFFER_OFFSET);
P_NV9097_LOAD_CONSTANT_BUFFER_OFFSET(p, start_dw * 4);
P_INLINE_ARRAY(p, &root_dw[start_dw], len_dw);
}
}
void
@ -3254,6 +3293,14 @@ nvk_mme_anti_alias_samples(uint32_t samples)
return nvk_mme_val_mask(samples_log2 << 4, 0x00f0);
}
static void
emit_anti_alias_mask(struct mme_builder *b, struct mme_value mask)
{
if (nvk_use_hw_root_table(b->devinfo, true))
mme_mthd(b, NVC597_LOAD_ROOT_TABLE);
mme_emit(b, mask);
}
void
nvk_mme_set_anti_alias(struct mme_builder *b)
{
@ -3316,9 +3363,20 @@ nvk_mme_set_anti_alias(struct mme_builder *b)
*/
STATIC_ASSERT(sizeof(struct nak_sample_mask) == 2);
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER_OFFSET);
mme_emit(b, mme_imm(nvk_root_descriptor_offset(draw.sample_masks)));
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER(0));
if (nvk_use_hw_root_table(b->devinfo, true)) {
uint32_t root_table_selector;
V_NVC597_SET_ROOT_TABLE_SELECTOR(root_table_selector, {
.root_table = nvk_hw_root_table_index(draw.sample_masks),
.offset = nvk_hw_root_table_offset(draw.sample_masks),
});
mme_mthd(b, NVC597_SET_ROOT_TABLE_SELECTOR);
mme_emit(b, mme_imm(root_table_selector));
} else {
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER_OFFSET);
mme_emit(b, mme_imm(nvk_root_descriptor_offset(draw.sample_masks)));
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER(0));
}
/* Annoyingly, we have to pack these in pairs */
@ -3331,7 +3389,7 @@ nvk_mme_set_anti_alias(struct mme_builder *b)
for (uint32_t i = 0; i < NVK_MAX_SAMPLES; i += 2) {
uint32_t mask0 = 1 << i;
uint32_t mask1 = 1 << (i + 1);
mme_emit(b, mme_imm(mask0 | (mask1 << 16)));
emit_anti_alias_mask(b, mme_imm(mask0 | (mask1 << 16)));
}
}
@ -3339,14 +3397,14 @@ nvk_mme_set_anti_alias(struct mme_builder *b)
mme_if(b, ieq, passes_log2, mme_zero()) {
/* It's a single pass so we can use 0xffff */
for (uint32_t i = 0; i < NVK_MAX_SAMPLES / 2; i++)
mme_emit(b, mme_imm(~0));
emit_anti_alias_mask(b, mme_imm(~0));
}
mme_if(b, ieq, passes_log2, mme_imm(1)) {
for (uint32_t i = 0; i < NVK_MAX_SAMPLES / 2; i++) {
struct mme_value mask =
nvk_mme_load_scratch_arr(b, SAMPLE_MASKS_2PASS_0, i);
mme_emit(b, mask);
emit_anti_alias_mask(b, mask);
mme_free_reg(b, mask);
}
}
@ -3355,7 +3413,7 @@ nvk_mme_set_anti_alias(struct mme_builder *b)
for (uint32_t i = 0; i < NVK_MAX_SAMPLES / 2; i++) {
struct mme_value mask =
nvk_mme_load_scratch_arr(b, SAMPLE_MASKS_4PASS_0, i);
mme_emit(b, mask);
emit_anti_alias_mask(b, mask);
mme_free_reg(b, mask);
}
}
@ -3390,11 +3448,24 @@ nvk_mme_set_anti_alias_test_check(
assert(results[1].mthd == NV9097_SET_HYBRID_ANTI_ALIAS_CONTROL);
assert(results[1].data == expected[2]);
assert(results[2].mthd == NV9097_LOAD_CONSTANT_BUFFER_OFFSET);
assert(results[2].data == nvk_root_descriptor_offset(draw.sample_masks));
if (nvk_use_hw_root_table(devinfo, true)) {
uint32_t root_table_selector;
V_NVC597_SET_ROOT_TABLE_SELECTOR(root_table_selector, {
.root_table = nvk_hw_root_table_index(draw.sample_masks),
.offset = nvk_hw_root_table_offset(draw.sample_masks),
});
assert(results[2].mthd == NVC597_SET_ROOT_TABLE_SELECTOR);
assert(results[2].data == root_table_selector);
} else {
assert(results[2].mthd == NV9097_LOAD_CONSTANT_BUFFER_OFFSET);
assert(results[2].data == nvk_root_descriptor_offset(draw.sample_masks));
}
for (int i = 0; i < 4; i++) {
assert(results[3 + i].mthd == NV9097_LOAD_CONSTANT_BUFFER(i));
if (nvk_use_hw_root_table(devinfo, true))
assert(results[3 + i].mthd == NVC597_LOAD_ROOT_TABLE);
else
assert(results[3 + i].mthd == NV9097_LOAD_CONSTANT_BUFFER(i));
assert(results[3 + i].data == expected[3 + i]);
}
@ -4434,10 +4505,23 @@ nvk_mme_set_cb0_mthd(struct mme_builder *b,
mme_mthd(b, mthd);
mme_emit(b, val);
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER_OFFSET);
mme_emit(b, mme_imm(cb0_offset));
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER(0));
mme_emit(b, val);
if (nvk_use_hw_root_table(b->devinfo, true)) {
uint32_t root_table_selector;
V_NVC597_SET_ROOT_TABLE_SELECTOR(root_table_selector,{
.root_table = cb0_offset / NVK_HW_ROOT_TABLE_SIZE,
.offset = cb0_offset % NVK_HW_ROOT_TABLE_SIZE,
});
mme_mthd(b, NVC597_SET_ROOT_TABLE_SELECTOR);
mme_emit(b, mme_imm(root_table_selector));
mme_mthd(b, NVC597_LOAD_ROOT_TABLE);
mme_emit(b, val);
} else {
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER_OFFSET);
mme_emit(b, mme_imm(cb0_offset));
mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER(0));
mme_emit(b, val);
}
}
mme_free_reg(b, old);
} else {

View file

@ -508,9 +508,24 @@ build_push_write_push_const(nir_builder *b, struct nvk_nir_push *p,
assert(pc_range->size % 4 == 0);
const uint32_t dw_count = pc_range->size / 4;
nvk_nir_P_1INC(b, p, NV9097, LOAD_CONSTANT_BUFFER_OFFSET, 1 + dw_count);
nvk_nir_push_dw(b, p, nir_imm_int(b,
nvk_root_descriptor_offset(push) + pc_range->offset));
assert(!(pc_range->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT));
if (nvk_use_hw_root_table(&pdev->info, true)) {
const uint32_t table = nvk_hw_root_table_index(push);
static_assert(nvk_hw_root_table_offset(push) == 0,
"Push constants are aligned");
uint32_t root_table_selector;
V_NVC597_SET_ROOT_TABLE_SELECTOR(root_table_selector,{
.root_table = table,
.offset = pc_range->offset,
});
nvk_nir_P_1INC(b, p, NVC597, SET_ROOT_TABLE_SELECTOR, 1 + dw_count);
nvk_nir_push_dw(b, p, nir_imm_int(b, root_table_selector));
} else {
nvk_nir_P_1INC(b, p, NV9097, LOAD_CONSTANT_BUFFER_OFFSET, 1 + dw_count);
nvk_nir_push_dw(b, p, nir_imm_int(b,
nvk_root_descriptor_offset(push) + pc_range->offset));
}
}
static void

View file

@ -568,12 +568,22 @@ _load_root_table(nir_builder *b,
const struct lower_descriptors_ctx *ctx)
{
unsigned align_mul = bit_size / 8;
uint32_t base, cbuf;
if (nvk_use_hw_root_table(ctx->dev_info,
b->shader->info.stage != MESA_SHADER_COMPUTE)) {
cbuf = NVK_HW_ROOT_TABLE_FIRST_CB +
root_table_offset / NVK_HW_ROOT_TABLE_SIZE;
base = root_table_offset % NVK_HW_ROOT_TABLE_SIZE;
} else {
cbuf = 0; /* Root table */
base = root_table_offset;
}
return nir_ldc_nv(b, num_components, bit_size,
nir_imm_int(b, 0), /* Root table */
nir_imm_int(b, cbuf),
nir_imm_int(b, 0),
.align_mul = align_mul,
.align_offset = 0,
.base = root_table_offset);
.base = base);
}
#define load_root_table(b, nc, bs, member, ctx) \
@ -583,18 +593,32 @@ static nir_def *
_load_root_table_array(nir_builder *b,
unsigned num_components, unsigned bit_size,
uint32_t root_table_offset, uint32_t stride,
nir_def *index,
uint32_t array_size, nir_def *index,
const struct lower_descriptors_ctx *ctx)
{
uint32_t base, cbuf;
if (nvk_use_hw_root_table(ctx->dev_info,
b->shader->info.stage != MESA_SHADER_COMPUTE)) {
assert(root_table_offset % NVK_HW_ROOT_TABLE_SIZE + array_size <=
NVK_HW_ROOT_TABLE_SIZE);
cbuf = NVK_HW_ROOT_TABLE_FIRST_CB +
root_table_offset / NVK_HW_ROOT_TABLE_SIZE;
base = root_table_offset % NVK_HW_ROOT_TABLE_SIZE;
} else {
cbuf = 0; /* Root table */
base = root_table_offset;
}
return nir_ldc_nv(b, num_components, bit_size,
nir_imm_int(b, 0), /* Root table */
nir_imm_int(b, cbuf),
nir_imul_imm(b, index, stride),
.base = root_table_offset);
.base = base);
}
#define load_root_table_array(b, nc, bs, member, index, ctx) \
_load_root_table_array(b, nc, bs, nvk_root_descriptor_offset(member), \
sizeof(((struct nvk_root_descriptor_table){}).member[0]), \
sizeof(((struct nvk_root_descriptor_table){}).member), \
index, ctx)
static bool

View file

@ -42,6 +42,10 @@
#define NVK_MAX_IMAGE_PLANES 3
#define NVK_MAX_SAMPLER_PLANES 2
#define NVK_HW_ROOT_TABLE_SIZE 256
#define NVK_HW_ROOT_TABLE_FIRST_CB 24
#define NVK_HW_ROOT_TABLE_COUNT 8
/* Device Generated Commands */
#define NVK_DGC_ALIGN 0x100

View file

@ -331,7 +331,7 @@ nvk_queue_init_context_state(struct nvk_queue *queue)
const struct nvk_physical_device *pdev = nvk_device_physical(dev);
VkResult result;
uint32_t push_data[4096];
uint32_t push_data[4096 + 1024];
struct nv_push push;
nv_push_init(&push, push_data, ARRAY_SIZE(push_data),
nvk_queue_subchannels_from_engines(queue->engines));

View file

@ -42,6 +42,19 @@ const struct nak_constant_offset_info nak_const_offsets_base = {
.printf_buffer_offset = nvk_root_descriptor_offset(printf_buffer_addr),
};
const struct nak_constant_offset_info nak_const_offsets_turing_graphics = {
.sample_info_cb = NVK_HW_ROOT_TABLE_FIRST_CB +
nvk_hw_root_table_index(draw.sample_locations),
.sample_locations_offset = nvk_hw_root_table_offset(draw.sample_locations),
.sample_masks_offset = nvk_hw_root_table_offset(draw.sample_masks),
.printf_cb = NVK_HW_ROOT_TABLE_FIRST_CB +
nvk_hw_root_table_index(printf_buffer_addr),
.printf_buffer_offset = nvk_hw_root_table_offset(printf_buffer_addr),
};
static_assert(nvk_hw_root_table_index(draw.sample_locations) ==
nvk_hw_root_table_index(draw.sample_masks),
"Sample info is in same root table");
static void
shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
{