From 30b3de6ec443b39ca8bd52d38d1a57b12fed4a83 Mon Sep 17 00:00:00 2001 From: Mel Henning Date: Fri, 2 Jan 2026 18:26:15 -0500 Subject: [PATCH] nvk: Wire up ROOT_TABLE Closes: https://gitlab.freedesktop.org/mesa/mesa/-/work_items/12576 Reviewed-by: Mary Guillemard Part-of: --- src/nouveau/compiler/nak.h | 1 + src/nouveau/compiler/nak_private.h | 6 +- src/nouveau/vulkan/nvk_cmd_buffer.h | 12 ++ src/nouveau/vulkan/nvk_cmd_draw.c | 126 +++++++++++++++--- src/nouveau/vulkan/nvk_cmd_indirect.c | 21 ++- .../vulkan/nvk_nir_lower_descriptors.c | 34 ++++- src/nouveau/vulkan/nvk_private.h | 4 + src/nouveau/vulkan/nvk_queue.c | 2 +- src/nouveau/vulkan/nvk_shader.c | 13 ++ 9 files changed, 188 insertions(+), 31 deletions(-) diff --git a/src/nouveau/compiler/nak.h b/src/nouveau/compiler/nak.h index c5286a2a060..6e952b00e98 100644 --- a/src/nouveau/compiler/nak.h +++ b/src/nouveau/compiler/nak.h @@ -97,6 +97,7 @@ struct nak_constant_offset_info { uint32_t printf_buffer_offset; }; const extern struct nak_constant_offset_info nak_const_offsets_base; +const extern struct nak_constant_offset_info nak_const_offsets_turing_graphics; #define NAK_PRINTF_BUFFER_SIZE 0x40000 diff --git a/src/nouveau/compiler/nak_private.h b/src/nouveau/compiler/nak_private.h index 45d3ecebecd..d6b2f7e976d 100644 --- a/src/nouveau/compiler/nak_private.h +++ b/src/nouveau/compiler/nak_private.h @@ -346,7 +346,11 @@ enum nak_fs_out { static inline const struct nak_constant_offset_info* nak_const_offsets(const struct nak_compiler* nak, bool is_graphics) { - return &nak_const_offsets_base; + if (nak->sm >= 75 && is_graphics) { + return &nak_const_offsets_turing_graphics; + } else { + return &nak_const_offsets_base; + } } bool nak_nir_rematerialize_load_const(nir_shader *nir); diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.h b/src/nouveau/vulkan/nvk_cmd_buffer.h index 8e8e3bc0857..4bf5321682a 100644 --- a/src/nouveau/vulkan/nvk_cmd_buffer.h +++ b/src/nouveau/vulkan/nvk_cmd_buffer.h @@ -16,6 +16,7 @@ #include "util/u_dynarray.h" #include "vk_command_buffer.h" +#include "clc597.h" #include @@ -80,6 +81,17 @@ struct nvk_root_descriptor_table { static_assert(nvk_root_descriptor_offset(push) % 8 == 0, "Push constants should be aligned properly"); +#define nvk_hw_root_table_index(member)\ + (nvk_root_descriptor_offset(member) / NVK_HW_ROOT_TABLE_SIZE) +#define nvk_hw_root_table_offset(member)\ + (nvk_root_descriptor_offset(member) % NVK_HW_ROOT_TABLE_SIZE) + +static inline bool nvk_use_hw_root_table(const struct nv_device_info *info, + bool is_gfx) +{ + return is_gfx && info->cls_eng3d >= TURING_A; +} + enum ENUM_PACKED nvk_descriptor_set_type { NVK_DESCRIPTOR_SET_TYPE_NONE, NVK_DESCRIPTOR_SET_TYPE_SET, diff --git a/src/nouveau/vulkan/nvk_cmd_draw.c b/src/nouveau/vulkan/nvk_cmd_draw.c index 10bed1275ee..76b57c3f006 100644 --- a/src/nouveau/vulkan/nvk_cmd_draw.c +++ b/src/nouveau/vulkan/nvk_cmd_draw.c @@ -521,6 +521,16 @@ nvk_push_draw_state_init(struct nvk_queue *queue, struct nv_push *p) .binding_group4_enable = 0x3, }); } + + for (int i = 0; i < 8; i++) { + P_1INC(p, NVC597, SET_ROOT_TABLE_SELECTOR); + P_NVC597_SET_ROOT_TABLE_SELECTOR(p, { + .root_table = i, + .offset = 0, + }); + for (uint32_t dw = 0; dw < 64; dw++) + P_INLINE_DATA(p, 0); + } } if (pdev->info.cls_eng3d >= TURING_A) { @@ -702,16 +712,45 @@ nvk_cmd_flush_gfx_root_desc(struct nvk_cmd_buffer *cmd, struct nvk_descriptor_state *desc, size_t offset, size_t size) { + const struct nvk_device *dev = nvk_cmd_buffer_device(cmd); + const struct nvk_physical_device *pdev = nvk_device_physical(dev); + const uint32_t start_dw = offset / 4; const uint32_t end_dw = DIV_ROUND_UP(offset + size, 4); - const uint32_t len_dw = end_dw - start_dw; - - struct nv_push *p = nvk_cmd_buffer_push(cmd, 2 + len_dw); - P_1INC(p, NV9097, LOAD_CONSTANT_BUFFER_OFFSET); - P_NV9097_LOAD_CONSTANT_BUFFER_OFFSET(p, start_dw * 4); - const uint32_t *root_dw = (uint32_t *)desc->root; - P_INLINE_ARRAY(p, &root_dw[start_dw], len_dw); + + if (nvk_use_hw_root_table(&pdev->info, true)) { + const uint32_t TABLE_SIZE_DW = NVK_HW_ROOT_TABLE_SIZE / sizeof(uint32_t); + const uint32_t start_table = start_dw / TABLE_SIZE_DW; + const uint32_t end_table = DIV_ROUND_UP(end_dw, TABLE_SIZE_DW); + for (uint32_t table = start_table; table < end_table; table++) { + const uint32_t start_dw_table = + (table == start_table) + ? (start_dw - table * TABLE_SIZE_DW) + : 0; + const uint32_t end_dw_table = + (table == end_table - 1) + ? (end_dw - table * TABLE_SIZE_DW) + : TABLE_SIZE_DW; + const uint32_t len_dw_table = end_dw_table - start_dw_table; + + struct nv_push *p = nvk_cmd_buffer_push(cmd, 2 + len_dw_table); + P_1INC(p, NVC597, SET_ROOT_TABLE_SELECTOR); + P_NVC597_SET_ROOT_TABLE_SELECTOR(p, { + .root_table = table, + .offset = start_dw_table * 4, + }); + P_INLINE_ARRAY(p, &root_dw[start_dw_table + table * TABLE_SIZE_DW], len_dw_table); + } + } else { + const uint32_t len_dw = end_dw - start_dw; + + struct nv_push *p = nvk_cmd_buffer_push(cmd, 2 + len_dw); + P_1INC(p, NV9097, LOAD_CONSTANT_BUFFER_OFFSET); + P_NV9097_LOAD_CONSTANT_BUFFER_OFFSET(p, start_dw * 4); + + P_INLINE_ARRAY(p, &root_dw[start_dw], len_dw); + } } void @@ -3254,6 +3293,14 @@ nvk_mme_anti_alias_samples(uint32_t samples) return nvk_mme_val_mask(samples_log2 << 4, 0x00f0); } +static void +emit_anti_alias_mask(struct mme_builder *b, struct mme_value mask) +{ + if (nvk_use_hw_root_table(b->devinfo, true)) + mme_mthd(b, NVC597_LOAD_ROOT_TABLE); + mme_emit(b, mask); +} + void nvk_mme_set_anti_alias(struct mme_builder *b) { @@ -3316,9 +3363,20 @@ nvk_mme_set_anti_alias(struct mme_builder *b) */ STATIC_ASSERT(sizeof(struct nak_sample_mask) == 2); - mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER_OFFSET); - mme_emit(b, mme_imm(nvk_root_descriptor_offset(draw.sample_masks))); - mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER(0)); + if (nvk_use_hw_root_table(b->devinfo, true)) { + uint32_t root_table_selector; + V_NVC597_SET_ROOT_TABLE_SELECTOR(root_table_selector, { + .root_table = nvk_hw_root_table_index(draw.sample_masks), + .offset = nvk_hw_root_table_offset(draw.sample_masks), + }); + + mme_mthd(b, NVC597_SET_ROOT_TABLE_SELECTOR); + mme_emit(b, mme_imm(root_table_selector)); + } else { + mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER_OFFSET); + mme_emit(b, mme_imm(nvk_root_descriptor_offset(draw.sample_masks))); + mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER(0)); + } /* Annoyingly, we have to pack these in pairs */ @@ -3331,7 +3389,7 @@ nvk_mme_set_anti_alias(struct mme_builder *b) for (uint32_t i = 0; i < NVK_MAX_SAMPLES; i += 2) { uint32_t mask0 = 1 << i; uint32_t mask1 = 1 << (i + 1); - mme_emit(b, mme_imm(mask0 | (mask1 << 16))); + emit_anti_alias_mask(b, mme_imm(mask0 | (mask1 << 16))); } } @@ -3339,14 +3397,14 @@ nvk_mme_set_anti_alias(struct mme_builder *b) mme_if(b, ieq, passes_log2, mme_zero()) { /* It's a single pass so we can use 0xffff */ for (uint32_t i = 0; i < NVK_MAX_SAMPLES / 2; i++) - mme_emit(b, mme_imm(~0)); + emit_anti_alias_mask(b, mme_imm(~0)); } mme_if(b, ieq, passes_log2, mme_imm(1)) { for (uint32_t i = 0; i < NVK_MAX_SAMPLES / 2; i++) { struct mme_value mask = nvk_mme_load_scratch_arr(b, SAMPLE_MASKS_2PASS_0, i); - mme_emit(b, mask); + emit_anti_alias_mask(b, mask); mme_free_reg(b, mask); } } @@ -3355,7 +3413,7 @@ nvk_mme_set_anti_alias(struct mme_builder *b) for (uint32_t i = 0; i < NVK_MAX_SAMPLES / 2; i++) { struct mme_value mask = nvk_mme_load_scratch_arr(b, SAMPLE_MASKS_4PASS_0, i); - mme_emit(b, mask); + emit_anti_alias_mask(b, mask); mme_free_reg(b, mask); } } @@ -3390,11 +3448,24 @@ nvk_mme_set_anti_alias_test_check( assert(results[1].mthd == NV9097_SET_HYBRID_ANTI_ALIAS_CONTROL); assert(results[1].data == expected[2]); - assert(results[2].mthd == NV9097_LOAD_CONSTANT_BUFFER_OFFSET); - assert(results[2].data == nvk_root_descriptor_offset(draw.sample_masks)); + if (nvk_use_hw_root_table(devinfo, true)) { + uint32_t root_table_selector; + V_NVC597_SET_ROOT_TABLE_SELECTOR(root_table_selector, { + .root_table = nvk_hw_root_table_index(draw.sample_masks), + .offset = nvk_hw_root_table_offset(draw.sample_masks), + }); + assert(results[2].mthd == NVC597_SET_ROOT_TABLE_SELECTOR); + assert(results[2].data == root_table_selector); + } else { + assert(results[2].mthd == NV9097_LOAD_CONSTANT_BUFFER_OFFSET); + assert(results[2].data == nvk_root_descriptor_offset(draw.sample_masks)); + } for (int i = 0; i < 4; i++) { - assert(results[3 + i].mthd == NV9097_LOAD_CONSTANT_BUFFER(i)); + if (nvk_use_hw_root_table(devinfo, true)) + assert(results[3 + i].mthd == NVC597_LOAD_ROOT_TABLE); + else + assert(results[3 + i].mthd == NV9097_LOAD_CONSTANT_BUFFER(i)); assert(results[3 + i].data == expected[3 + i]); } @@ -4434,10 +4505,23 @@ nvk_mme_set_cb0_mthd(struct mme_builder *b, mme_mthd(b, mthd); mme_emit(b, val); - mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER_OFFSET); - mme_emit(b, mme_imm(cb0_offset)); - mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER(0)); - mme_emit(b, val); + if (nvk_use_hw_root_table(b->devinfo, true)) { + uint32_t root_table_selector; + V_NVC597_SET_ROOT_TABLE_SELECTOR(root_table_selector,{ + .root_table = cb0_offset / NVK_HW_ROOT_TABLE_SIZE, + .offset = cb0_offset % NVK_HW_ROOT_TABLE_SIZE, + }); + + mme_mthd(b, NVC597_SET_ROOT_TABLE_SELECTOR); + mme_emit(b, mme_imm(root_table_selector)); + mme_mthd(b, NVC597_LOAD_ROOT_TABLE); + mme_emit(b, val); + } else { + mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER_OFFSET); + mme_emit(b, mme_imm(cb0_offset)); + mme_mthd(b, NV9097_LOAD_CONSTANT_BUFFER(0)); + mme_emit(b, val); + } } mme_free_reg(b, old); } else { diff --git a/src/nouveau/vulkan/nvk_cmd_indirect.c b/src/nouveau/vulkan/nvk_cmd_indirect.c index 505ff6ab3da..5b0be386fa7 100644 --- a/src/nouveau/vulkan/nvk_cmd_indirect.c +++ b/src/nouveau/vulkan/nvk_cmd_indirect.c @@ -508,9 +508,24 @@ build_push_write_push_const(nir_builder *b, struct nvk_nir_push *p, assert(pc_range->size % 4 == 0); const uint32_t dw_count = pc_range->size / 4; - nvk_nir_P_1INC(b, p, NV9097, LOAD_CONSTANT_BUFFER_OFFSET, 1 + dw_count); - nvk_nir_push_dw(b, p, nir_imm_int(b, - nvk_root_descriptor_offset(push) + pc_range->offset)); + assert(!(pc_range->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)); + if (nvk_use_hw_root_table(&pdev->info, true)) { + const uint32_t table = nvk_hw_root_table_index(push); + static_assert(nvk_hw_root_table_offset(push) == 0, + "Push constants are aligned"); + + uint32_t root_table_selector; + V_NVC597_SET_ROOT_TABLE_SELECTOR(root_table_selector,{ + .root_table = table, + .offset = pc_range->offset, + }); + nvk_nir_P_1INC(b, p, NVC597, SET_ROOT_TABLE_SELECTOR, 1 + dw_count); + nvk_nir_push_dw(b, p, nir_imm_int(b, root_table_selector)); + } else { + nvk_nir_P_1INC(b, p, NV9097, LOAD_CONSTANT_BUFFER_OFFSET, 1 + dw_count); + nvk_nir_push_dw(b, p, nir_imm_int(b, + nvk_root_descriptor_offset(push) + pc_range->offset)); + } } static void diff --git a/src/nouveau/vulkan/nvk_nir_lower_descriptors.c b/src/nouveau/vulkan/nvk_nir_lower_descriptors.c index 6c266892c12..46490181f0f 100644 --- a/src/nouveau/vulkan/nvk_nir_lower_descriptors.c +++ b/src/nouveau/vulkan/nvk_nir_lower_descriptors.c @@ -568,12 +568,22 @@ _load_root_table(nir_builder *b, const struct lower_descriptors_ctx *ctx) { unsigned align_mul = bit_size / 8; + uint32_t base, cbuf; + if (nvk_use_hw_root_table(ctx->dev_info, + b->shader->info.stage != MESA_SHADER_COMPUTE)) { + cbuf = NVK_HW_ROOT_TABLE_FIRST_CB + + root_table_offset / NVK_HW_ROOT_TABLE_SIZE; + base = root_table_offset % NVK_HW_ROOT_TABLE_SIZE; + } else { + cbuf = 0; /* Root table */ + base = root_table_offset; + } return nir_ldc_nv(b, num_components, bit_size, - nir_imm_int(b, 0), /* Root table */ + nir_imm_int(b, cbuf), nir_imm_int(b, 0), .align_mul = align_mul, .align_offset = 0, - .base = root_table_offset); + .base = base); } #define load_root_table(b, nc, bs, member, ctx) \ @@ -583,18 +593,32 @@ static nir_def * _load_root_table_array(nir_builder *b, unsigned num_components, unsigned bit_size, uint32_t root_table_offset, uint32_t stride, - nir_def *index, + uint32_t array_size, nir_def *index, const struct lower_descriptors_ctx *ctx) { + uint32_t base, cbuf; + if (nvk_use_hw_root_table(ctx->dev_info, + b->shader->info.stage != MESA_SHADER_COMPUTE)) { + assert(root_table_offset % NVK_HW_ROOT_TABLE_SIZE + array_size <= + NVK_HW_ROOT_TABLE_SIZE); + + cbuf = NVK_HW_ROOT_TABLE_FIRST_CB + + root_table_offset / NVK_HW_ROOT_TABLE_SIZE; + base = root_table_offset % NVK_HW_ROOT_TABLE_SIZE; + } else { + cbuf = 0; /* Root table */ + base = root_table_offset; + } return nir_ldc_nv(b, num_components, bit_size, - nir_imm_int(b, 0), /* Root table */ + nir_imm_int(b, cbuf), nir_imul_imm(b, index, stride), - .base = root_table_offset); + .base = base); } #define load_root_table_array(b, nc, bs, member, index, ctx) \ _load_root_table_array(b, nc, bs, nvk_root_descriptor_offset(member), \ sizeof(((struct nvk_root_descriptor_table){}).member[0]), \ + sizeof(((struct nvk_root_descriptor_table){}).member), \ index, ctx) static bool diff --git a/src/nouveau/vulkan/nvk_private.h b/src/nouveau/vulkan/nvk_private.h index 8a8acebb672..b7f4c0b56a0 100644 --- a/src/nouveau/vulkan/nvk_private.h +++ b/src/nouveau/vulkan/nvk_private.h @@ -42,6 +42,10 @@ #define NVK_MAX_IMAGE_PLANES 3 #define NVK_MAX_SAMPLER_PLANES 2 +#define NVK_HW_ROOT_TABLE_SIZE 256 +#define NVK_HW_ROOT_TABLE_FIRST_CB 24 +#define NVK_HW_ROOT_TABLE_COUNT 8 + /* Device Generated Commands */ #define NVK_DGC_ALIGN 0x100 diff --git a/src/nouveau/vulkan/nvk_queue.c b/src/nouveau/vulkan/nvk_queue.c index 68ef6b7addb..4061705eb56 100644 --- a/src/nouveau/vulkan/nvk_queue.c +++ b/src/nouveau/vulkan/nvk_queue.c @@ -331,7 +331,7 @@ nvk_queue_init_context_state(struct nvk_queue *queue) const struct nvk_physical_device *pdev = nvk_device_physical(dev); VkResult result; - uint32_t push_data[4096]; + uint32_t push_data[4096 + 1024]; struct nv_push push; nv_push_init(&push, push_data, ARRAY_SIZE(push_data), nvk_queue_subchannels_from_engines(queue->engines)); diff --git a/src/nouveau/vulkan/nvk_shader.c b/src/nouveau/vulkan/nvk_shader.c index 550ee5b1f7c..8f7f781034b 100644 --- a/src/nouveau/vulkan/nvk_shader.c +++ b/src/nouveau/vulkan/nvk_shader.c @@ -42,6 +42,19 @@ const struct nak_constant_offset_info nak_const_offsets_base = { .printf_buffer_offset = nvk_root_descriptor_offset(printf_buffer_addr), }; +const struct nak_constant_offset_info nak_const_offsets_turing_graphics = { + .sample_info_cb = NVK_HW_ROOT_TABLE_FIRST_CB + + nvk_hw_root_table_index(draw.sample_locations), + .sample_locations_offset = nvk_hw_root_table_offset(draw.sample_locations), + .sample_masks_offset = nvk_hw_root_table_offset(draw.sample_masks), + .printf_cb = NVK_HW_ROOT_TABLE_FIRST_CB + + nvk_hw_root_table_index(printf_buffer_addr), + .printf_buffer_offset = nvk_hw_root_table_offset(printf_buffer_addr), +}; +static_assert(nvk_hw_root_table_index(draw.sample_locations) == + nvk_hw_root_table_index(draw.sample_masks), + "Sample info is in same root table"); + static void shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align) {