mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 15:20:10 +01:00
nvk: Allow for larger QMDs
Reviewed-by: Mel Henning <mhenning@darkrefraction.com> Backport-to: 25.2 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36995>
This commit is contained in:
parent
238534e069
commit
0e268dad00
4 changed files with 59 additions and 47 deletions
|
|
@ -163,6 +163,7 @@ nvk_cmd_upload_qmd(struct nvk_cmd_buffer *cmd,
|
|||
struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
|
||||
const struct nvk_physical_device *pdev = nvk_device_physical(dev);
|
||||
const uint32_t min_cbuf_alignment = nvk_min_cbuf_alignment(&pdev->info);
|
||||
const uint32_t qmd_size_B = nak_qmd_size_B(&pdev->info);
|
||||
VkResult result;
|
||||
|
||||
/* pre Pascal the constant buffer sizes need to be 0x100 aligned. As we
|
||||
|
|
@ -223,8 +224,9 @@ nvk_cmd_upload_qmd(struct nvk_cmd_buffer *cmd,
|
|||
}
|
||||
}
|
||||
|
||||
uint32_t qmd[64];
|
||||
nak_fill_qmd(&pdev->info, &shader->info, &qmd_info, qmd, sizeof(qmd));
|
||||
uint32_t qmd[NAK_MAX_QMD_DWORDS];
|
||||
assert(qmd_size_B <= sizeof(qmd));
|
||||
nak_fill_qmd(&pdev->info, &shader->info, &qmd_info, qmd, qmd_size_B);
|
||||
|
||||
void *qmd_map;
|
||||
result = nvk_cmd_buffer_alloc_qmd(cmd, sizeof(qmd), 0x100,
|
||||
|
|
|
|||
|
|
@ -224,18 +224,6 @@ nvk_nir_build_pad_NOP(nir_builder *b, struct nvk_nir_push *p, uint32_t nop)
|
|||
nvk_nir_build_pad_NOP((b), (p), \
|
||||
NVC0_FIFO_PKHDR_IL(SUBC_##class, class##_NO_OPERATION, 0))
|
||||
|
||||
#define QMD_ALIGN 0x100
|
||||
#define QMD_ALLOC_SIZE QMD_ALIGN
|
||||
#define QMD_ROOT_SIZE (sizeof(struct nvk_ies_cs_qmd) + \
|
||||
sizeof(struct nvk_root_descriptor_table))
|
||||
|
||||
static_assert(sizeof(struct nvk_ies_cs_qmd) % QMD_ALIGN == 0,
|
||||
"QMD size is not properly algined");
|
||||
static_assert(sizeof(struct nvk_root_descriptor_table) % QMD_ALIGN == 0,
|
||||
"Root descriptor table size is not aligned");
|
||||
static_assert(NVK_DGC_ALIGN >= QMD_ALIGN,
|
||||
"QMD alignment requirement is a lower bound of DGC alignment");
|
||||
|
||||
static void
|
||||
copy_repl_global_dw(nir_builder *b, nir_def *dst_addr, nir_def *src_addr,
|
||||
nir_def **repl_dw, uint32_t dw_count)
|
||||
|
|
@ -250,6 +238,11 @@ copy_repl_global_dw(nir_builder *b, nir_def *dst_addr, nir_def *src_addr,
|
|||
}
|
||||
}
|
||||
|
||||
#define QMD_ALLOC_CHUNK_SIZE_B NAK_QMD_ALIGN_B
|
||||
|
||||
static_assert(NVK_DGC_ALIGN >= NAK_QMD_ALIGN_B,
|
||||
"QMD alignment requirement is a lower bound of DGC alignment");
|
||||
|
||||
static void
|
||||
build_process_cs_cmd_seq(nir_builder *b, struct nvk_nir_push *p,
|
||||
nir_def *in_addr, nir_def *seq_idx,
|
||||
|
|
@ -319,7 +312,18 @@ build_process_cs_cmd_seq(nir_builder *b, struct nvk_nir_push *p,
|
|||
nir_def *disp_size_y = load_global_dw(b, token_addr, 1);
|
||||
nir_def *disp_size_z = load_global_dw(b, token_addr, 2);
|
||||
|
||||
*qmd_size_per_seq_B_out += QMD_ROOT_SIZE;
|
||||
/* We need space for both a QMD and a root table */
|
||||
const uint32_t qmd_size_B = nak_qmd_size_B(&pdev->info);
|
||||
const uint32_t root_offset_B = align(qmd_size_B, NAK_QMD_ALIGN_B);
|
||||
const uint32_t qmd_root_size_B =
|
||||
root_offset_B + sizeof(struct nvk_root_descriptor_table);
|
||||
|
||||
/* The root table is already aligned */
|
||||
static_assert(sizeof(struct nvk_root_descriptor_table) %
|
||||
NAK_QMD_ALIGN_B == 0,
|
||||
"Root descriptor table size is not aligned");
|
||||
|
||||
*qmd_size_per_seq_B_out += qmd_root_size_B;
|
||||
|
||||
nir_push_if(b, nir_ior(b, nir_ior(b, nir_ine_imm(b, disp_size_x, 0),
|
||||
nir_ine_imm(b, disp_size_y, 0)),
|
||||
|
|
@ -328,15 +332,16 @@ build_process_cs_cmd_seq(nir_builder *b, struct nvk_nir_push *p,
|
|||
/* The first dword in qmd_addr is an allocator in units of 256
|
||||
* bytes.
|
||||
*/
|
||||
assert(qmd_root_size_B % QMD_ALLOC_CHUNK_SIZE_B == 0);
|
||||
nir_def *qmd_idx =
|
||||
nir_global_atomic(b, 32, in->qmd_pool_addr,
|
||||
nir_imm_int(b, QMD_ROOT_SIZE / QMD_ALIGN),
|
||||
nir_imm_int(b, qmd_root_size_B /
|
||||
QMD_ALLOC_CHUNK_SIZE_B),
|
||||
.atomic_op = nir_atomic_op_iadd);
|
||||
nir_def *qmd_offset =
|
||||
nir_imul_imm(b, nir_u2u64(b, qmd_idx), QMD_ALIGN);
|
||||
nir_imul_imm(b, nir_u2u64(b, qmd_idx), QMD_ALLOC_CHUNK_SIZE_B);
|
||||
nir_def *qmd_addr = nir_iadd(b, in->qmd_pool_addr, qmd_offset);
|
||||
nir_def *root_addr =
|
||||
nir_iadd_imm(b, qmd_addr, sizeof(struct nvk_ies_cs_qmd));
|
||||
nir_def *root_addr = nir_iadd_imm(b, qmd_addr, root_offset_B);
|
||||
|
||||
/* Upload and patch the root descriptor table */
|
||||
root_repl[root_dw(cs.group_count[0])] = disp_size_x;
|
||||
|
|
@ -353,6 +358,9 @@ build_process_cs_cmd_seq(nir_builder *b, struct nvk_nir_push *p,
|
|||
assert(qmd_layout.y_start == qmd_layout.x_start + 32);
|
||||
|
||||
nir_def *qmd_repl[sizeof(struct nvk_ies_cs_qmd) / 4] = {};
|
||||
uint32_t qmd_repl_count = qmd_size_B / 4;
|
||||
assert(qmd_repl_count <= ARRAY_SIZE(qmd_repl));
|
||||
|
||||
qmd_repl[qmd_layout.x_start / 32] = disp_size_x;
|
||||
|
||||
if (qmd_layout.z_start == qmd_layout.y_start + 32) {
|
||||
|
|
@ -382,7 +390,7 @@ build_process_cs_cmd_seq(nir_builder *b, struct nvk_nir_push *p,
|
|||
nir_unpack_64_2x32_split_y(b, root_addr_shifted));
|
||||
|
||||
copy_repl_global_dw(b, qmd_addr, shader_qmd_addr,
|
||||
qmd_repl, ARRAY_SIZE(qmd_repl));
|
||||
qmd_repl, qmd_repl_count);
|
||||
|
||||
/* Now emit commands */
|
||||
nir_def *invoc = nir_imul_2x32_64(b, disp_size_x, disp_size_y);
|
||||
|
|
@ -714,8 +722,8 @@ build_init_shader(struct nvk_device *dev,
|
|||
struct process_cmd_in in = load_process_cmd_in(b);
|
||||
|
||||
if (qmd_size_per_seq_B > 0) {
|
||||
/* Initialize the QMD allocator to 1 * QMD_ALIGN so that the QMDs we
|
||||
* allocate don't stomp the allocator.
|
||||
/* Initialize the QMD allocator to 1 * QMD_ALLOC_CHUNK_SIZE_B so that
|
||||
* the QMDs we allocate don't stomp the allocator.
|
||||
*/
|
||||
assert(info->shaderStages == VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
store_global_dw(b, in.qmd_pool_addr, 0, nir_imm_int(b, 1));
|
||||
|
|
@ -927,14 +935,14 @@ nvk_GetGeneratedCommandsMemoryRequirementsEXT(
|
|||
|
||||
uint64_t size = layout->cmd_seq_stride_B * (uint64_t)pInfo->maxSequenceCount;
|
||||
if (layout->qmd_size_per_seq_B > 0) {
|
||||
size = align64(size, QMD_ALIGN);
|
||||
size += QMD_ALLOC_SIZE;
|
||||
size = align64(size, NAK_QMD_ALIGN_B);
|
||||
size += QMD_ALLOC_CHUNK_SIZE_B; /* One for the allocator */
|
||||
size += layout->qmd_size_per_seq_B * pInfo->maxSequenceCount;
|
||||
}
|
||||
|
||||
pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
|
||||
.size = size,
|
||||
.alignment = QMD_ALIGN,
|
||||
.alignment = NAK_QMD_ALIGN_B,
|
||||
.memoryTypeBits = BITFIELD_MASK(pdev->mem_type_count),
|
||||
};
|
||||
}
|
||||
|
|
@ -975,10 +983,10 @@ nvk_cmd_process_cmds(struct nvk_cmd_buffer *cmd,
|
|||
}
|
||||
|
||||
if (layout->qmd_size_per_seq_B > 0) {
|
||||
assert(info->preprocessAddress % QMD_ALIGN == 0);
|
||||
assert(info->preprocessAddress % NAK_QMD_ALIGN_B == 0);
|
||||
uint64_t qmd_offset =
|
||||
layout->cmd_seq_stride_B * (uint64_t)info->maxSequenceCount;
|
||||
qmd_offset = align64(qmd_offset, QMD_ALIGN);
|
||||
qmd_offset = align64(qmd_offset, NAK_QMD_ALIGN_B);
|
||||
push.qmd_pool_addr = info->preprocessAddress + qmd_offset;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,13 @@ nvk_ies_map(struct nvk_indirect_execution_set *ies, uint32_t index)
|
|||
return ies->mem->map + (index * (size_t)ies->stride_B);
|
||||
}
|
||||
|
||||
void
|
||||
static uint32_t
|
||||
nvk_ies_stride_cs(const struct nvk_physical_device *pdev)
|
||||
{
|
||||
return align(nak_qmd_size_B(&pdev->info), NAK_QMD_ALIGN_B);
|
||||
}
|
||||
|
||||
static void
|
||||
nvk_ies_cs_qmd_init(const struct nvk_physical_device *pdev,
|
||||
struct nvk_ies_cs_qmd *qmd,
|
||||
struct nvk_shader *shader)
|
||||
|
|
@ -55,8 +61,9 @@ nvk_ies_cs_qmd_init(const struct nvk_physical_device *pdev,
|
|||
}
|
||||
}
|
||||
|
||||
nak_fill_qmd(&pdev->info, &shader->info, &qmd_info,
|
||||
qmd->qmd, sizeof(qmd->qmd));
|
||||
const uint32_t qmd_size = nak_qmd_size_B(&pdev->info);
|
||||
assert(qmd_size <= sizeof(qmd->qmd));
|
||||
nak_fill_qmd(&pdev->info, &shader->info, &qmd_info, qmd->qmd, qmd_size);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -65,11 +72,14 @@ nvk_ies_set_cs(struct nvk_device *dev,
|
|||
uint32_t index,
|
||||
struct nvk_shader *shader)
|
||||
{
|
||||
struct nvk_ies_cs_qmd qmd = {};
|
||||
nvk_ies_cs_qmd_init(nvk_device_physical(dev), &qmd, shader);
|
||||
const struct nvk_physical_device *pdev = nvk_device_physical(dev);
|
||||
const uint32_t qmd_size = nak_qmd_size_B(&pdev->info);
|
||||
|
||||
assert(sizeof(qmd) <= ies->stride_B);
|
||||
memcpy(nvk_ies_map(ies, index), &qmd, sizeof(qmd));
|
||||
struct nvk_ies_cs_qmd qmd = {};
|
||||
nvk_ies_cs_qmd_init(pdev, &qmd, shader);
|
||||
|
||||
assert(qmd_size <= ies->stride_B);
|
||||
memcpy(nvk_ies_map(ies, index), &qmd, qmd_size);
|
||||
}
|
||||
|
||||
uint16_t
|
||||
|
|
@ -256,7 +266,7 @@ nvk_CreateIndirectExecutionSetEXT(VkDevice _device,
|
|||
if (pipeline->stages & VK_SHADER_STAGE_COMPUTE_BIT) {
|
||||
assert(pipeline->stages == VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
ies->type = NVK_IES_TYPE_CS_QMD;
|
||||
ies->stride_B = sizeof(struct nvk_ies_cs_qmd);
|
||||
ies->stride_B = nvk_ies_stride_cs(pdev);
|
||||
} else if (pipeline->stages & NVK_SHADER_STAGE_GRAPHICS_BITS) {
|
||||
assert(!(pipeline->stages & ~NVK_SHADER_STAGE_GRAPHICS_BITS));
|
||||
ies->type = NVK_IES_TYPE_GFX_PIPELINE;
|
||||
|
|
@ -281,7 +291,7 @@ nvk_CreateIndirectExecutionSetEXT(VkDevice _device,
|
|||
if (stages & VK_SHADER_STAGE_COMPUTE_BIT) {
|
||||
assert(stages == VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
ies->type = NVK_IES_TYPE_CS_QMD;
|
||||
ies->stride_B = sizeof(struct nvk_ies_cs_qmd);
|
||||
ies->stride_B = nvk_ies_stride_cs(pdev);
|
||||
} else if (stages & NVK_SHADER_STAGE_GRAPHICS_BITS) {
|
||||
assert(!(stages & ~NVK_SHADER_STAGE_GRAPHICS_BITS));
|
||||
ies->type = NVK_IES_TYPE_GFX_SHADER;
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@
|
|||
|
||||
#include "nvk_private.h"
|
||||
|
||||
#include "nak.h"
|
||||
|
||||
struct nvk_physical_device;
|
||||
struct nvk_shader;
|
||||
struct nvkmd_mem;
|
||||
|
|
@ -20,20 +22,10 @@ enum nvk_ies_type {
|
|||
PRAGMA_DIAGNOSTIC_PUSH
|
||||
PRAGMA_DIAGNOSTIC_ERROR(-Wpadded)
|
||||
struct nvk_ies_cs_qmd {
|
||||
uint32_t qmd[64];
|
||||
uint32_t qmd[NAK_MAX_QMD_DWORDS];
|
||||
};
|
||||
PRAGMA_DIAGNOSTIC_POP
|
||||
|
||||
static inline uint16_t
|
||||
nvk_ies_cs_qmd_max_dw_count(const struct nvk_physical_device *pdev)
|
||||
{
|
||||
return 64;
|
||||
}
|
||||
|
||||
void nvk_ies_cs_qmd_init(const struct nvk_physical_device *pdev,
|
||||
struct nvk_ies_cs_qmd *qmd,
|
||||
struct nvk_shader *shader);
|
||||
|
||||
PRAGMA_DIAGNOSTIC_PUSH
|
||||
PRAGMA_DIAGNOSTIC_ERROR(-Wpadded)
|
||||
struct nvk_ies_gfx_shader {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue