nvk: Allow for larger QMDs

Reviewed-by: Mel Henning <mhenning@darkrefraction.com>
Backport-to: 25.2
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36995>
This commit is contained in:
Faith Ekstrand 2025-08-25 17:23:21 -04:00 committed by Marge Bot
parent 238534e069
commit 0e268dad00
4 changed files with 59 additions and 47 deletions

View file

@ -163,6 +163,7 @@ nvk_cmd_upload_qmd(struct nvk_cmd_buffer *cmd,
struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
const struct nvk_physical_device *pdev = nvk_device_physical(dev);
const uint32_t min_cbuf_alignment = nvk_min_cbuf_alignment(&pdev->info);
const uint32_t qmd_size_B = nak_qmd_size_B(&pdev->info);
VkResult result;
/* pre Pascal the constant buffer sizes need to be 0x100 aligned. As we
@ -223,8 +224,9 @@ nvk_cmd_upload_qmd(struct nvk_cmd_buffer *cmd,
}
}
uint32_t qmd[64];
nak_fill_qmd(&pdev->info, &shader->info, &qmd_info, qmd, sizeof(qmd));
uint32_t qmd[NAK_MAX_QMD_DWORDS];
assert(qmd_size_B <= sizeof(qmd));
nak_fill_qmd(&pdev->info, &shader->info, &qmd_info, qmd, qmd_size_B);
void *qmd_map;
result = nvk_cmd_buffer_alloc_qmd(cmd, sizeof(qmd), 0x100,

View file

@ -224,18 +224,6 @@ nvk_nir_build_pad_NOP(nir_builder *b, struct nvk_nir_push *p, uint32_t nop)
nvk_nir_build_pad_NOP((b), (p), \
NVC0_FIFO_PKHDR_IL(SUBC_##class, class##_NO_OPERATION, 0))
#define QMD_ALIGN 0x100
#define QMD_ALLOC_SIZE QMD_ALIGN
#define QMD_ROOT_SIZE (sizeof(struct nvk_ies_cs_qmd) + \
sizeof(struct nvk_root_descriptor_table))
static_assert(sizeof(struct nvk_ies_cs_qmd) % QMD_ALIGN == 0,
"QMD size is not properly algined");
static_assert(sizeof(struct nvk_root_descriptor_table) % QMD_ALIGN == 0,
"Root descriptor table size is not aligned");
static_assert(NVK_DGC_ALIGN >= QMD_ALIGN,
"QMD alignment requirement is a lower bound of DGC alignment");
static void
copy_repl_global_dw(nir_builder *b, nir_def *dst_addr, nir_def *src_addr,
nir_def **repl_dw, uint32_t dw_count)
@ -250,6 +238,11 @@ copy_repl_global_dw(nir_builder *b, nir_def *dst_addr, nir_def *src_addr,
}
}
#define QMD_ALLOC_CHUNK_SIZE_B NAK_QMD_ALIGN_B
static_assert(NVK_DGC_ALIGN >= NAK_QMD_ALIGN_B,
"QMD alignment requirement is a lower bound of DGC alignment");
static void
build_process_cs_cmd_seq(nir_builder *b, struct nvk_nir_push *p,
nir_def *in_addr, nir_def *seq_idx,
@ -319,7 +312,18 @@ build_process_cs_cmd_seq(nir_builder *b, struct nvk_nir_push *p,
nir_def *disp_size_y = load_global_dw(b, token_addr, 1);
nir_def *disp_size_z = load_global_dw(b, token_addr, 2);
*qmd_size_per_seq_B_out += QMD_ROOT_SIZE;
/* We need space for both a QMD and a root table */
const uint32_t qmd_size_B = nak_qmd_size_B(&pdev->info);
const uint32_t root_offset_B = align(qmd_size_B, NAK_QMD_ALIGN_B);
const uint32_t qmd_root_size_B =
root_offset_B + sizeof(struct nvk_root_descriptor_table);
/* The root table is already aligned */
static_assert(sizeof(struct nvk_root_descriptor_table) %
NAK_QMD_ALIGN_B == 0,
"Root descriptor table size is not aligned");
*qmd_size_per_seq_B_out += qmd_root_size_B;
nir_push_if(b, nir_ior(b, nir_ior(b, nir_ine_imm(b, disp_size_x, 0),
nir_ine_imm(b, disp_size_y, 0)),
@ -328,15 +332,16 @@ build_process_cs_cmd_seq(nir_builder *b, struct nvk_nir_push *p,
/* The first dword in qmd_addr is an allocator in units of 256
* bytes.
*/
assert(qmd_root_size_B % QMD_ALLOC_CHUNK_SIZE_B == 0);
nir_def *qmd_idx =
nir_global_atomic(b, 32, in->qmd_pool_addr,
nir_imm_int(b, QMD_ROOT_SIZE / QMD_ALIGN),
nir_imm_int(b, qmd_root_size_B /
QMD_ALLOC_CHUNK_SIZE_B),
.atomic_op = nir_atomic_op_iadd);
nir_def *qmd_offset =
nir_imul_imm(b, nir_u2u64(b, qmd_idx), QMD_ALIGN);
nir_imul_imm(b, nir_u2u64(b, qmd_idx), QMD_ALLOC_CHUNK_SIZE_B);
nir_def *qmd_addr = nir_iadd(b, in->qmd_pool_addr, qmd_offset);
nir_def *root_addr =
nir_iadd_imm(b, qmd_addr, sizeof(struct nvk_ies_cs_qmd));
nir_def *root_addr = nir_iadd_imm(b, qmd_addr, root_offset_B);
/* Upload and patch the root descriptor table */
root_repl[root_dw(cs.group_count[0])] = disp_size_x;
@ -353,6 +358,9 @@ build_process_cs_cmd_seq(nir_builder *b, struct nvk_nir_push *p,
assert(qmd_layout.y_start == qmd_layout.x_start + 32);
nir_def *qmd_repl[sizeof(struct nvk_ies_cs_qmd) / 4] = {};
uint32_t qmd_repl_count = qmd_size_B / 4;
assert(qmd_repl_count <= ARRAY_SIZE(qmd_repl));
qmd_repl[qmd_layout.x_start / 32] = disp_size_x;
if (qmd_layout.z_start == qmd_layout.y_start + 32) {
@ -382,7 +390,7 @@ build_process_cs_cmd_seq(nir_builder *b, struct nvk_nir_push *p,
nir_unpack_64_2x32_split_y(b, root_addr_shifted));
copy_repl_global_dw(b, qmd_addr, shader_qmd_addr,
qmd_repl, ARRAY_SIZE(qmd_repl));
qmd_repl, qmd_repl_count);
/* Now emit commands */
nir_def *invoc = nir_imul_2x32_64(b, disp_size_x, disp_size_y);
@ -714,8 +722,8 @@ build_init_shader(struct nvk_device *dev,
struct process_cmd_in in = load_process_cmd_in(b);
if (qmd_size_per_seq_B > 0) {
/* Initialize the QMD allocator to 1 * QMD_ALIGN so that the QMDs we
* allocate don't stomp the allocator.
/* Initialize the QMD allocator to 1 * QMD_ALLOC_CHUNK_SIZE_B so that
* the QMDs we allocate don't stomp the allocator.
*/
assert(info->shaderStages == VK_SHADER_STAGE_COMPUTE_BIT);
store_global_dw(b, in.qmd_pool_addr, 0, nir_imm_int(b, 1));
@ -927,14 +935,14 @@ nvk_GetGeneratedCommandsMemoryRequirementsEXT(
uint64_t size = layout->cmd_seq_stride_B * (uint64_t)pInfo->maxSequenceCount;
if (layout->qmd_size_per_seq_B > 0) {
size = align64(size, QMD_ALIGN);
size += QMD_ALLOC_SIZE;
size = align64(size, NAK_QMD_ALIGN_B);
size += QMD_ALLOC_CHUNK_SIZE_B; /* One for the allocator */
size += layout->qmd_size_per_seq_B * pInfo->maxSequenceCount;
}
pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
.size = size,
.alignment = QMD_ALIGN,
.alignment = NAK_QMD_ALIGN_B,
.memoryTypeBits = BITFIELD_MASK(pdev->mem_type_count),
};
}
@ -975,10 +983,10 @@ nvk_cmd_process_cmds(struct nvk_cmd_buffer *cmd,
}
if (layout->qmd_size_per_seq_B > 0) {
assert(info->preprocessAddress % QMD_ALIGN == 0);
assert(info->preprocessAddress % NAK_QMD_ALIGN_B == 0);
uint64_t qmd_offset =
layout->cmd_seq_stride_B * (uint64_t)info->maxSequenceCount;
qmd_offset = align64(qmd_offset, QMD_ALIGN);
qmd_offset = align64(qmd_offset, NAK_QMD_ALIGN_B);
push.qmd_pool_addr = info->preprocessAddress + qmd_offset;
}

View file

@ -17,7 +17,13 @@ nvk_ies_map(struct nvk_indirect_execution_set *ies, uint32_t index)
return ies->mem->map + (index * (size_t)ies->stride_B);
}
void
static uint32_t
nvk_ies_stride_cs(const struct nvk_physical_device *pdev)
{
return align(nak_qmd_size_B(&pdev->info), NAK_QMD_ALIGN_B);
}
static void
nvk_ies_cs_qmd_init(const struct nvk_physical_device *pdev,
struct nvk_ies_cs_qmd *qmd,
struct nvk_shader *shader)
@ -55,8 +61,9 @@ nvk_ies_cs_qmd_init(const struct nvk_physical_device *pdev,
}
}
nak_fill_qmd(&pdev->info, &shader->info, &qmd_info,
qmd->qmd, sizeof(qmd->qmd));
const uint32_t qmd_size = nak_qmd_size_B(&pdev->info);
assert(qmd_size <= sizeof(qmd->qmd));
nak_fill_qmd(&pdev->info, &shader->info, &qmd_info, qmd->qmd, qmd_size);
}
static void
@ -65,11 +72,14 @@ nvk_ies_set_cs(struct nvk_device *dev,
uint32_t index,
struct nvk_shader *shader)
{
struct nvk_ies_cs_qmd qmd = {};
nvk_ies_cs_qmd_init(nvk_device_physical(dev), &qmd, shader);
const struct nvk_physical_device *pdev = nvk_device_physical(dev);
const uint32_t qmd_size = nak_qmd_size_B(&pdev->info);
assert(sizeof(qmd) <= ies->stride_B);
memcpy(nvk_ies_map(ies, index), &qmd, sizeof(qmd));
struct nvk_ies_cs_qmd qmd = {};
nvk_ies_cs_qmd_init(pdev, &qmd, shader);
assert(qmd_size <= ies->stride_B);
memcpy(nvk_ies_map(ies, index), &qmd, qmd_size);
}
uint16_t
@ -256,7 +266,7 @@ nvk_CreateIndirectExecutionSetEXT(VkDevice _device,
if (pipeline->stages & VK_SHADER_STAGE_COMPUTE_BIT) {
assert(pipeline->stages == VK_SHADER_STAGE_COMPUTE_BIT);
ies->type = NVK_IES_TYPE_CS_QMD;
ies->stride_B = sizeof(struct nvk_ies_cs_qmd);
ies->stride_B = nvk_ies_stride_cs(pdev);
} else if (pipeline->stages & NVK_SHADER_STAGE_GRAPHICS_BITS) {
assert(!(pipeline->stages & ~NVK_SHADER_STAGE_GRAPHICS_BITS));
ies->type = NVK_IES_TYPE_GFX_PIPELINE;
@ -281,7 +291,7 @@ nvk_CreateIndirectExecutionSetEXT(VkDevice _device,
if (stages & VK_SHADER_STAGE_COMPUTE_BIT) {
assert(stages == VK_SHADER_STAGE_COMPUTE_BIT);
ies->type = NVK_IES_TYPE_CS_QMD;
ies->stride_B = sizeof(struct nvk_ies_cs_qmd);
ies->stride_B = nvk_ies_stride_cs(pdev);
} else if (stages & NVK_SHADER_STAGE_GRAPHICS_BITS) {
assert(!(stages & ~NVK_SHADER_STAGE_GRAPHICS_BITS));
ies->type = NVK_IES_TYPE_GFX_SHADER;

View file

@ -7,6 +7,8 @@
#include "nvk_private.h"
#include "nak.h"
struct nvk_physical_device;
struct nvk_shader;
struct nvkmd_mem;
@ -20,20 +22,10 @@ enum nvk_ies_type {
PRAGMA_DIAGNOSTIC_PUSH
PRAGMA_DIAGNOSTIC_ERROR(-Wpadded)
struct nvk_ies_cs_qmd {
uint32_t qmd[64];
uint32_t qmd[NAK_MAX_QMD_DWORDS];
};
PRAGMA_DIAGNOSTIC_POP
static inline uint16_t
nvk_ies_cs_qmd_max_dw_count(const struct nvk_physical_device *pdev)
{
return 64;
}
void nvk_ies_cs_qmd_init(const struct nvk_physical_device *pdev,
struct nvk_ies_cs_qmd *qmd,
struct nvk_shader *shader);
PRAGMA_DIAGNOSTIC_PUSH
PRAGMA_DIAGNOSTIC_ERROR(-Wpadded)
struct nvk_ies_gfx_shader {