mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-29 10:40:42 +02:00
nvk: Refactor compute shader dispatch
This splits state flushing from QMD/root upload. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30826>
This commit is contained in:
parent
db0c2aa796
commit
83feed1d22
1 changed files with 47 additions and 30 deletions
|
|
@ -100,18 +100,12 @@ nvk_compute_local_size(struct nvk_cmd_buffer *cmd)
|
|||
shader->info.cs.local_size[2];
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
static void
|
||||
nvk_flush_compute_state(struct nvk_cmd_buffer *cmd,
|
||||
uint32_t base_workgroup[3],
|
||||
uint32_t global_size[3],
|
||||
uint64_t *root_desc_addr_out)
|
||||
uint32_t global_size[3])
|
||||
{
|
||||
struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
|
||||
struct nvk_physical_device *pdev = nvk_device_physical(dev);
|
||||
const uint32_t min_cbuf_alignment = nvk_min_cbuf_alignment(&pdev->info);
|
||||
const struct nvk_shader *shader = cmd->state.cs.shader;
|
||||
struct nvk_descriptor_state *desc = &cmd->state.cs.descriptors;
|
||||
VkResult result;
|
||||
|
||||
nvk_cmd_buffer_flush_push_descriptors(cmd, desc);
|
||||
|
||||
|
|
@ -119,25 +113,37 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd,
|
|||
0, 3, base_workgroup);
|
||||
nvk_descriptor_state_set_root_array(cmd, desc, cs.group_count,
|
||||
0, 3, global_size);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
nvk_cmd_upload_qmd(struct nvk_cmd_buffer *cmd,
|
||||
const struct nvk_shader *shader,
|
||||
const struct nvk_descriptor_state *desc,
|
||||
const struct nvk_root_descriptor_table *root,
|
||||
uint32_t global_size[3],
|
||||
uint64_t *qmd_addr_out,
|
||||
uint64_t *root_desc_addr_out)
|
||||
{
|
||||
struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
|
||||
struct nvk_physical_device *pdev = nvk_device_physical(dev);
|
||||
const uint32_t min_cbuf_alignment = nvk_min_cbuf_alignment(&pdev->info);
|
||||
VkResult result;
|
||||
|
||||
/* pre Pascal the constant buffer sizes need to be 0x100 aligned. As we
|
||||
* simply allocated a buffer and upload data to it, make sure its size is
|
||||
* 0x100 aligned.
|
||||
*/
|
||||
STATIC_ASSERT((sizeof(desc->root) & 0xff) == 0);
|
||||
assert(sizeof(desc->root) % min_cbuf_alignment == 0);
|
||||
STATIC_ASSERT((sizeof(*root) & 0xff) == 0);
|
||||
assert(sizeof(*root) % min_cbuf_alignment == 0);
|
||||
|
||||
void *root_desc_map;
|
||||
uint64_t root_desc_addr;
|
||||
result = nvk_cmd_buffer_upload_alloc(cmd, sizeof(desc->root),
|
||||
min_cbuf_alignment,
|
||||
result = nvk_cmd_buffer_upload_alloc(cmd, sizeof(*root), min_cbuf_alignment,
|
||||
&root_desc_addr, &root_desc_map);
|
||||
if (unlikely(result != VK_SUCCESS)) {
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return 0;
|
||||
}
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
return result;
|
||||
|
||||
memcpy(root_desc_map, &desc->root, sizeof(desc->root));
|
||||
memcpy(root_desc_map, root, sizeof(*root));
|
||||
|
||||
struct nak_qmd_info qmd_info = {
|
||||
.addr = shader->hdr_addr,
|
||||
|
|
@ -158,7 +164,7 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd,
|
|||
if (cbuf->type == NVK_CBUF_TYPE_ROOT_DESC) {
|
||||
ba = (struct nvk_buffer_address) {
|
||||
.base_addr = root_desc_addr,
|
||||
.size = sizeof(desc->root),
|
||||
.size = sizeof(*root),
|
||||
};
|
||||
} else {
|
||||
ASSERTED bool direct_descriptor =
|
||||
|
|
@ -184,15 +190,14 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd,
|
|||
|
||||
uint64_t qmd_addr;
|
||||
result = nvk_cmd_buffer_upload_data(cmd, qmd, sizeof(qmd), 0x100, &qmd_addr);
|
||||
if (unlikely(result != VK_SUCCESS)) {
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return 0;
|
||||
}
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
return result;
|
||||
|
||||
*qmd_addr_out = qmd_addr;
|
||||
if (root_desc_addr_out != NULL)
|
||||
*root_desc_addr_out = root_desc_addr;
|
||||
|
||||
return qmd_addr;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -233,13 +238,20 @@ nvk_CmdDispatchBase(VkCommandBuffer commandBuffer,
|
|||
uint32_t groupCountZ)
|
||||
{
|
||||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||||
struct nvk_descriptor_state *desc = &cmd->state.cs.descriptors;
|
||||
|
||||
uint32_t base_workgroup[3] = { baseGroupX, baseGroupY, baseGroupZ };
|
||||
uint32_t global_size[3] = { groupCountX, groupCountY, groupCountZ };
|
||||
uint64_t qmd_addr = nvk_flush_compute_state(cmd, base_workgroup,
|
||||
global_size, NULL);
|
||||
if (unlikely(qmd_addr == 0))
|
||||
nvk_flush_compute_state(cmd, base_workgroup, global_size);
|
||||
|
||||
uint64_t qmd_addr;
|
||||
VkResult result = nvk_cmd_upload_qmd(cmd, cmd->state.cs.shader,
|
||||
desc, (void *)desc->root, global_size,
|
||||
&qmd_addr, NULL);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return;
|
||||
}
|
||||
|
||||
const uint32_t local_size = nvk_compute_local_size(cmd);
|
||||
const uint64_t cs_invocations =
|
||||
|
|
@ -417,18 +429,23 @@ nvk_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
|
|||
{
|
||||
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
|
||||
VK_FROM_HANDLE(nvk_buffer, buffer, _buffer);
|
||||
struct nvk_descriptor_state *desc = &cmd->state.cs.descriptors;
|
||||
|
||||
uint64_t dispatch_addr = nvk_buffer_address(buffer, offset);
|
||||
|
||||
/* We set these through the MME */
|
||||
uint32_t base_workgroup[3] = { 0, 0, 0 };
|
||||
uint32_t global_size[3] = { 0, 0, 0 };
|
||||
nvk_flush_compute_state(cmd, base_workgroup, global_size);
|
||||
|
||||
uint64_t root_desc_addr;
|
||||
uint64_t qmd_addr = nvk_flush_compute_state(cmd, base_workgroup,
|
||||
global_size, &root_desc_addr);
|
||||
if (unlikely(qmd_addr == 0))
|
||||
uint64_t qmd_addr, root_desc_addr;
|
||||
VkResult result = nvk_cmd_upload_qmd(cmd, cmd->state.cs.shader,
|
||||
desc, (void *)desc->root, global_size,
|
||||
&qmd_addr, &root_desc_addr);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return;
|
||||
}
|
||||
|
||||
struct nv_push *p;
|
||||
if (nvk_cmd_buffer_compute_cls(cmd) >= TURING_A) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue