nvk: Be much more conservative about rebinding cbufs

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29591>
This commit is contained in:
Faith Ekstrand 2024-05-15 15:32:21 -05:00 committed by Marge Bot
parent 8b5835af31
commit 091a945b57
4 changed files with 108 additions and 24 deletions

View file

@ -595,6 +595,53 @@ nvk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd,
} }
} }
#define NVK_VK_GRAPHICS_STAGE_BITS VK_SHADER_STAGE_ALL_GRAPHICS
void
nvk_cmd_dirty_cbufs_for_descriptors(struct nvk_cmd_buffer *cmd,
VkShaderStageFlags stages,
uint32_t sets_start, uint32_t sets_end,
uint32_t dyn_start, uint32_t dyn_end)
{
if (!(stages & VK_SHADER_STAGE_ALL_GRAPHICS))
return;
uint32_t groups = 0;
u_foreach_bit(i, stages & VK_SHADER_STAGE_ALL_GRAPHICS) {
gl_shader_stage stage = vk_to_mesa_shader_stage(1 << i);
uint32_t g = nvk_cbuf_binding_for_stage(stage);
groups |= BITFIELD_BIT(g);
}
u_foreach_bit(g, groups) {
struct nvk_cbuf_group *group = &cmd->state.gfx.cbuf_groups[g];
for (uint32_t i = 0; i < ARRAY_SIZE(group->cbufs); i++) {
const struct nvk_cbuf *cbuf = &group->cbufs[i];
switch (cbuf->type) {
case NVK_CBUF_TYPE_INVALID:
case NVK_CBUF_TYPE_ROOT_DESC:
case NVK_CBUF_TYPE_SHADER_DATA:
break;
case NVK_CBUF_TYPE_DESC_SET:
case NVK_CBUF_TYPE_UBO_DESC:
if (cbuf->desc_set >= sets_start && cbuf->desc_set < sets_end)
group->dirty |= BITFIELD_BIT(i);
break;
case NVK_CBUF_TYPE_DYNAMIC_UBO:
if (cbuf->dynamic_idx >= dyn_start && cbuf->dynamic_idx < dyn_end)
group->dirty |= BITFIELD_BIT(i);
break;
default:
unreachable("Invalid cbuf type");
}
}
}
}
static void static void
nvk_bind_descriptor_sets(struct nvk_cmd_buffer *cmd, nvk_bind_descriptor_sets(struct nvk_cmd_buffer *cmd,
struct nvk_descriptor_state *desc, struct nvk_descriptor_state *desc,
@ -621,8 +668,9 @@ nvk_bind_descriptor_sets(struct nvk_cmd_buffer *cmd,
* range and it's only our responsibility to adjust all * range and it's only our responsibility to adjust all
* set_dynamic_buffer_start[p] for p > s as needed. * set_dynamic_buffer_start[p] for p > s as needed.
*/ */
uint8_t dyn_buffer_start = const uint8_t dyn_buffer_start =
desc->root.set_dynamic_buffer_start[info->firstSet]; desc->root.set_dynamic_buffer_start[info->firstSet];
uint8_t dyn_buffer_end = dyn_buffer_start;
uint32_t next_dyn_offset = 0; uint32_t next_dyn_offset = 0;
for (uint32_t i = 0; i < info->descriptorSetCount; ++i) { for (uint32_t i = 0; i < info->descriptorSetCount; ++i) {
@ -638,7 +686,7 @@ nvk_bind_descriptor_sets(struct nvk_cmd_buffer *cmd,
desc->sets[s] = set; desc->sets[s] = set;
} }
desc->root.set_dynamic_buffer_start[s] = dyn_buffer_start; desc->root.set_dynamic_buffer_start[s] = dyn_buffer_end;
if (pipeline_layout->set_layouts[s] != NULL) { if (pipeline_layout->set_layouts[s] != NULL) {
const struct nvk_descriptor_set_layout *set_layout = const struct nvk_descriptor_set_layout *set_layout =
@ -655,22 +703,26 @@ nvk_bind_descriptor_sets(struct nvk_cmd_buffer *cmd,
} else { } else {
db.addr.base_addr += offset; db.addr.base_addr += offset;
} }
desc->root.dynamic_buffers[dyn_buffer_start + j] = db; desc->root.dynamic_buffers[dyn_buffer_end + j] = db;
} }
next_dyn_offset += set->layout->dynamic_buffer_count; next_dyn_offset += set->layout->dynamic_buffer_count;
} }
dyn_buffer_start += set_layout->dynamic_buffer_count; dyn_buffer_end += set_layout->dynamic_buffer_count;
} else { } else {
assert(set == NULL); assert(set == NULL);
} }
} }
assert(dyn_buffer_start <= NVK_MAX_DYNAMIC_BUFFERS); assert(dyn_buffer_end <= NVK_MAX_DYNAMIC_BUFFERS);
assert(next_dyn_offset <= info->dynamicOffsetCount); assert(next_dyn_offset <= info->dynamicOffsetCount);
for (uint32_t s = info->firstSet + info->descriptorSetCount; for (uint32_t s = info->firstSet + info->descriptorSetCount;
s < NVK_MAX_SETS; s++) s < NVK_MAX_SETS; s++)
desc->root.set_dynamic_buffer_start[s] = dyn_buffer_start; desc->root.set_dynamic_buffer_start[s] = dyn_buffer_end;
nvk_cmd_dirty_cbufs_for_descriptors(cmd, info->stageFlags, info->firstSet,
info->firstSet + info->descriptorSetCount,
dyn_buffer_start, dyn_buffer_end);
} }
VKAPI_ATTR void VKAPI_CALL VKAPI_ATTR void VKAPI_CALL
@ -679,7 +731,7 @@ nvk_CmdBindDescriptorSets2KHR(VkCommandBuffer commandBuffer,
{ {
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) { if (pBindDescriptorSetsInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
nvk_bind_descriptor_sets(cmd, &cmd->state.gfx.descriptors, nvk_bind_descriptor_sets(cmd, &cmd->state.gfx.descriptors,
pBindDescriptorSetsInfo); pBindDescriptorSetsInfo);
} }
@ -705,7 +757,7 @@ nvk_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,
{ {
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) if (pPushConstantsInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS)
nvk_push_constants(cmd, &cmd->state.gfx.descriptors, pPushConstantsInfo); nvk_push_constants(cmd, &cmd->state.gfx.descriptors, pPushConstantsInfo);
if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
@ -754,6 +806,9 @@ nvk_push_descriptor_set(struct nvk_cmd_buffer *cmd,
nvk_push_descriptor_set_update(dev, push_set, set_layout, nvk_push_descriptor_set_update(dev, push_set, set_layout,
info->descriptorWriteCount, info->descriptorWriteCount,
info->pDescriptorWrites); info->pDescriptorWrites);
nvk_cmd_dirty_cbufs_for_descriptors(cmd, info->stageFlags,
info->set, info->set + 1, 0, 0);
} }
VKAPI_ATTR void VKAPI_CALL VKAPI_ATTR void VKAPI_CALL
@ -762,7 +817,7 @@ nvk_CmdPushDescriptorSet2KHR(VkCommandBuffer commandBuffer,
{ {
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) { if (pPushDescriptorSetInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
nvk_push_descriptor_set(cmd, &cmd->state.gfx.descriptors, nvk_push_descriptor_set(cmd, &cmd->state.gfx.descriptors,
pPushDescriptorSetInfo); pPushDescriptorSetInfo);
} }

View file

@ -11,6 +11,7 @@
#include "nvk_cmd_pool.h" #include "nvk_cmd_pool.h"
#include "nvk_descriptor_set.h" #include "nvk_descriptor_set.h"
#include "nvk_image.h" #include "nvk_image.h"
#include "nvk_shader.h"
#include "util/u_dynarray.h" #include "util/u_dynarray.h"
@ -115,6 +116,11 @@ struct nvk_graphics_state {
uint32_t shaders_dirty; uint32_t shaders_dirty;
struct nvk_shader *shaders[MESA_SHADER_MESH + 1]; struct nvk_shader *shaders[MESA_SHADER_MESH + 1];
struct nvk_cbuf_group {
uint16_t dirty;
struct nvk_cbuf cbufs[16];
} cbuf_groups[5];
/* Used for meta save/restore */ /* Used for meta save/restore */
struct nvk_addr_range vb0; struct nvk_addr_range vb0;
@ -231,6 +237,10 @@ void nvk_cmd_bind_graphics_shader(struct nvk_cmd_buffer *cmd,
void nvk_cmd_bind_compute_shader(struct nvk_cmd_buffer *cmd, void nvk_cmd_bind_compute_shader(struct nvk_cmd_buffer *cmd,
struct nvk_shader *shader); struct nvk_shader *shader);
void nvk_cmd_dirty_cbufs_for_descriptors(struct nvk_cmd_buffer *cmd,
VkShaderStageFlags stages,
uint32_t sets_start, uint32_t sets_end,
uint32_t dyn_start, uint32_t dyn_end);
void nvk_cmd_bind_vertex_buffer(struct nvk_cmd_buffer *cmd, uint32_t vb_idx, void nvk_cmd_bind_vertex_buffer(struct nvk_cmd_buffer *cmd, uint32_t vb_idx,
struct nvk_addr_range addr_range); struct nvk_addr_range addr_range);

View file

@ -1176,10 +1176,22 @@ nvk_flush_shaders(struct nvk_cmd_buffer *cmd)
/* Only copy non-NULL shaders because mesh/task alias with vertex and /* Only copy non-NULL shaders because mesh/task alias with vertex and
* tessellation stages. * tessellation stages.
*/ */
if (cmd->state.gfx.shaders[stage] != NULL) { struct nvk_shader *shader = cmd->state.gfx.shaders[stage];
if (shader != NULL) {
assert(type < ARRAY_SIZE(type_shader)); assert(type < ARRAY_SIZE(type_shader));
assert(type_shader[type] == NULL); assert(type_shader[type] == NULL);
type_shader[type] = cmd->state.gfx.shaders[stage]; type_shader[type] = shader;
const struct nvk_cbuf_map *cbuf_map = &shader->cbuf_map;
struct nvk_cbuf_group *cbuf_group =
&cmd->state.gfx.cbuf_groups[nvk_cbuf_binding_for_stage(stage)];
for (uint32_t i = 0; i < cbuf_map->cbuf_count; i++) {
if (memcmp(&cbuf_group->cbufs[i], &cbuf_map->cbufs[i],
sizeof(cbuf_group->cbufs[i])) != 0) {
cbuf_group->cbufs[i] = cbuf_map->cbufs[i];
cbuf_group->dirty |= BITFIELD_BIT(i);
}
}
} }
} }
@ -2369,22 +2381,26 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
cbuf_shaders[group] = shader; cbuf_shaders[group] = shader;
} }
uint32_t root_cbuf_count = 0; for (uint32_t g = 0; g < ARRAY_SIZE(cbuf_shaders); g++) {
for (uint32_t group = 0; group < ARRAY_SIZE(cbuf_shaders); group++) { if (cbuf_shaders[g] == NULL)
if (cbuf_shaders[group] == NULL)
continue; continue;
const struct nvk_shader *shader = cbuf_shaders[group]; const struct nvk_shader *shader = cbuf_shaders[g];
const struct nvk_cbuf_map *cbuf_map = &shader->cbuf_map; const struct nvk_cbuf_map *cbuf_map = &shader->cbuf_map;
struct nvk_cbuf_group *group = &cmd->state.gfx.cbuf_groups[g];
for (uint32_t c = 0; c < cbuf_map->cbuf_count; c++) { /* We only bother to re-bind cbufs that are in use */
const struct nvk_cbuf *cbuf = &cbuf_map->cbufs[c]; const uint32_t rebind =
group->dirty & BITFIELD_MASK(cbuf_map->cbuf_count);
if (!rebind)
continue;
u_foreach_bit(c, rebind) {
const struct nvk_cbuf *cbuf = &group->cbufs[c];
/* We bind these at the very end */ /* We bind these at the very end */
if (cbuf->type == NVK_CBUF_TYPE_ROOT_DESC) { if (cbuf->type == NVK_CBUF_TYPE_ROOT_DESC)
root_cbuf_count++;
continue; continue;
}
struct nvk_buffer_address ba; struct nvk_buffer_address ba;
if (nvk_cmd_buffer_get_cbuf_addr(cmd, desc, shader, cbuf, &ba)) { if (nvk_cmd_buffer_get_cbuf_addr(cmd, desc, shader, cbuf, &ba)) {
@ -2401,7 +2417,7 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
P_NV9097_SET_CONSTANT_BUFFER_SELECTOR_C(p, ba.base_addr); P_NV9097_SET_CONSTANT_BUFFER_SELECTOR_C(p, ba.base_addr);
} }
P_IMMD(p, NV9097, BIND_GROUP_CONSTANT_BUFFER(group), { P_IMMD(p, NV9097, BIND_GROUP_CONSTANT_BUFFER(g), {
.valid = ba.size > 0, .valid = ba.size > 0,
.shader_slot = c, .shader_slot = c,
}); });
@ -2413,20 +2429,22 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
struct nv_push *p = nvk_cmd_buffer_push(cmd, 4); struct nv_push *p = nvk_cmd_buffer_push(cmd, 4);
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_BIND_CBUF_DESC)); P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_BIND_CBUF_DESC));
P_INLINE_DATA(p, group | (c << 4)); P_INLINE_DATA(p, g | (c << 4));
P_INLINE_DATA(p, desc_addr >> 32); P_INLINE_DATA(p, desc_addr >> 32);
P_INLINE_DATA(p, desc_addr); P_INLINE_DATA(p, desc_addr);
} else { } else {
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2); struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_BIND_CBUF_DESC)); P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_BIND_CBUF_DESC));
P_INLINE_DATA(p, group | (c << 4)); P_INLINE_DATA(p, g | (c << 4));
nv_push_update_count(p, 3); nv_push_update_count(p, 3);
nvk_cmd_buffer_push_indirect(cmd, desc_addr, 3); nvk_cmd_buffer_push_indirect(cmd, desc_addr, 3);
} }
} }
} }
group->dirty &= ~rebind;
} }
/* We bind all root descriptors last so that CONSTANT_BUFFER_SELECTOR is /* We bind all root descriptors last so that CONSTANT_BUFFER_SELECTOR is
@ -2434,7 +2452,7 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
* parameters and similar MME root table updates always hit the root * parameters and similar MME root table updates always hit the root
* descriptor table and not some random UBO. * descriptor table and not some random UBO.
*/ */
struct nv_push *p = nvk_cmd_buffer_push(cmd, 4 + 2 * root_cbuf_count); struct nv_push *p = nvk_cmd_buffer_push(cmd, 14);
P_MTHD(p, NV9097, SET_CONSTANT_BUFFER_SELECTOR_A); P_MTHD(p, NV9097, SET_CONSTANT_BUFFER_SELECTOR_A);
P_NV9097_SET_CONSTANT_BUFFER_SELECTOR_A(p, sizeof(desc->root)); P_NV9097_SET_CONSTANT_BUFFER_SELECTOR_A(p, sizeof(desc->root));
P_NV9097_SET_CONSTANT_BUFFER_SELECTOR_B(p, root_desc_addr >> 32); P_NV9097_SET_CONSTANT_BUFFER_SELECTOR_B(p, root_desc_addr >> 32);

View file

@ -147,6 +147,7 @@ nvk_meta_end(struct nvk_cmd_buffer *cmd,
*desc->push[0] = save->push_desc0; *desc->push[0] = save->push_desc0;
desc->push_dirty |= BITFIELD_BIT(0); desc->push_dirty |= BITFIELD_BIT(0);
} }
nvk_cmd_dirty_cbufs_for_descriptors(cmd, ~0, 0, 1, 0, 0);
/* Restore set_dynaic_buffer_start because meta binding set 0 can disturb /* Restore set_dynaic_buffer_start because meta binding set 0 can disturb
* all dynamic buffers starts for all sets. * all dynamic buffers starts for all sets.