mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-03 20:48:08 +02:00
nvk: Use inline constant buffer updates for CB0
This is what the old GL driver did and appears to be what the blob does as well. They should pipeline much better than full buffer re-binds which appear to be causing stalling issues inside the GPU. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29591>
This commit is contained in:
parent
f716bab6b7
commit
9f7081b921
6 changed files with 117 additions and 44 deletions
|
|
@ -77,6 +77,10 @@ struct nvk_root_descriptor_table {
|
|||
|
||||
struct nvk_descriptor_state {
|
||||
alignas(16) char root[sizeof(struct nvk_root_descriptor_table)];
|
||||
void (*flush_root)(struct nvk_cmd_buffer *cmd,
|
||||
struct nvk_descriptor_state *desc,
|
||||
size_t offset, size_t size);
|
||||
|
||||
struct nvk_descriptor_set *sets[NVK_MAX_SETS];
|
||||
struct nvk_push_descriptor_set *push[NVK_MAX_SETS];
|
||||
uint32_t push_dirty;
|
||||
|
|
@ -103,6 +107,10 @@ struct nvk_descriptor_state {
|
|||
struct nvk_root_descriptor_table *root = \
|
||||
(struct nvk_root_descriptor_table *)_desc->root; \
|
||||
root->member = (src); \
|
||||
if (_desc->flush_root != NULL) { \
|
||||
size_t offset = (char *)&root->member - (char *)root; \
|
||||
_desc->flush_root((cmd), _desc, offset, sizeof(root->member)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define nvk_descriptor_state_set_root_array(cmd, desc, member, \
|
||||
|
|
@ -114,6 +122,11 @@ struct nvk_descriptor_state {
|
|||
assert(_start + count <= ARRAY_SIZE(root->member)); \
|
||||
for (unsigned i = 0; i < count; i++) \
|
||||
root->member[i + _start] = (src)[i]; \
|
||||
if (_desc->flush_root != NULL) { \
|
||||
size_t offset = (char *)&root->member[_start] - (char *)root; \
|
||||
_desc->flush_root((cmd), _desc, offset, \
|
||||
count * sizeof(root->member[0])); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
struct nvk_attachment {
|
||||
|
|
|
|||
|
|
@ -88,6 +88,20 @@ nvk_mme_set_conservative_raster_state(struct mme_builder *b)
|
|||
}
|
||||
}
|
||||
|
||||
#define NVK_DRAW_CB0_SIZE sizeof(struct nvk_root_descriptor_table)
|
||||
|
||||
void
|
||||
nvk_mme_select_cb0(struct mme_builder *b)
|
||||
{
|
||||
struct mme_value addr_hi = nvk_mme_load_scratch(b, CB0_ADDR_HI);
|
||||
struct mme_value addr_lo = nvk_mme_load_scratch(b, CB0_ADDR_LO);
|
||||
|
||||
mme_mthd(b, NV9097_SET_CONSTANT_BUFFER_SELECTOR_A);
|
||||
mme_emit(b, mme_imm(NVK_DRAW_CB0_SIZE));
|
||||
mme_emit(b, addr_hi);
|
||||
mme_emit(b, addr_lo);
|
||||
}
|
||||
|
||||
VkResult
|
||||
nvk_push_draw_state_init(struct nvk_queue *queue, struct nv_push *p)
|
||||
{
|
||||
|
|
@ -490,6 +504,30 @@ nvk_push_draw_state_init(struct nvk_queue *queue, struct nv_push *p)
|
|||
if (pdev->info.cls_eng3d == MAXWELL_A)
|
||||
P_IMMD(p, NVB097, SET_SELECT_MAXWELL_TEXTURE_HEADERS, V_TRUE);
|
||||
|
||||
/* Store the address to CB0 in a pair of state registers */
|
||||
uint64_t cb0_addr = queue->draw_cb0->offset;
|
||||
P_MTHD(p, NV9097, SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_CB0_ADDR_HI));
|
||||
P_NV9097_SET_MME_SHADOW_SCRATCH(p, NVK_MME_SCRATCH_CB0_ADDR_HI, cb0_addr >> 32);
|
||||
P_NV9097_SET_MME_SHADOW_SCRATCH(p, NVK_MME_SCRATCH_CB0_ADDR_LO, cb0_addr);
|
||||
|
||||
/* We leave CB0 selected by default */
|
||||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_SELECT_CB0));
|
||||
P_INLINE_DATA(p, 0);
|
||||
|
||||
/* Bind CB0 to all shader groups */
|
||||
for (uint32_t group = 0; group < 5; group++) {
|
||||
P_IMMD(p, NV9097, BIND_GROUP_CONSTANT_BUFFER(group), {
|
||||
.valid = VALID_TRUE,
|
||||
.shader_slot = 0,
|
||||
});
|
||||
}
|
||||
|
||||
/* Zero out CB0 */
|
||||
P_1INC(p, NV9097, LOAD_CONSTANT_BUFFER_OFFSET);
|
||||
P_NV9097_LOAD_CONSTANT_BUFFER_OFFSET(p, 0);
|
||||
for (uint32_t dw = 0; dw < NVK_DRAW_CB0_SIZE / 4; dw++)
|
||||
P_INLINE_DATA(p, 0);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
@ -514,6 +552,23 @@ nvk_cmd_buffer_dirty_render_pass(struct nvk_cmd_buffer *cmd)
|
|||
BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES);
|
||||
}
|
||||
|
||||
static void
|
||||
nvk_cmd_flush_gfx_root_desc(struct nvk_cmd_buffer *cmd,
|
||||
struct nvk_descriptor_state *desc,
|
||||
size_t offset, size_t size)
|
||||
{
|
||||
const uint32_t start_dw = offset / 4;
|
||||
const uint32_t end_dw = DIV_ROUND_UP(offset + size, 4);
|
||||
const uint32_t len_dw = end_dw - start_dw;
|
||||
|
||||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2 + len_dw);
|
||||
P_1INC(p, NV9097, LOAD_CONSTANT_BUFFER_OFFSET);
|
||||
P_NV9097_LOAD_CONSTANT_BUFFER_OFFSET(p, start_dw * 4);
|
||||
|
||||
const uint32_t *root_dw = (uint32_t *)desc->root;
|
||||
P_INLINE_ARRAY(p, &root_dw[start_dw], len_dw);
|
||||
}
|
||||
|
||||
void
|
||||
nvk_cmd_buffer_begin_graphics(struct nvk_cmd_buffer *cmd,
|
||||
const VkCommandBufferBeginInfo *pBeginInfo)
|
||||
|
|
@ -533,6 +588,8 @@ nvk_cmd_buffer_begin_graphics(struct nvk_cmd_buffer *cmd,
|
|||
});
|
||||
}
|
||||
|
||||
cmd->state.gfx.descriptors.flush_root = nvk_cmd_flush_gfx_root_desc;
|
||||
|
||||
if (cmd->vk.level != VK_COMMAND_BUFFER_LEVEL_PRIMARY &&
|
||||
(pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) {
|
||||
char gcbiar_data[VK_GCBIARR_DATA_SIZE(NVK_MAX_RTS)];
|
||||
|
|
@ -590,6 +647,9 @@ nvk_cmd_invalidate_graphics_state(struct nvk_cmd_buffer *cmd)
|
|||
memset(&cmd->state.gfx, 0, sizeof(cmd->state.gfx));
|
||||
cmd->state.gfx.render = render_save;
|
||||
|
||||
/* We need to keep the flush_root callback */
|
||||
cmd->state.gfx.descriptors.flush_root = nvk_cmd_flush_gfx_root_desc;
|
||||
|
||||
cmd->state.gfx.shaders_dirty = ~0;
|
||||
}
|
||||
|
||||
|
|
@ -2348,29 +2408,9 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
|
|||
struct nvk_physical_device *pdev = nvk_device_physical(dev);
|
||||
const uint32_t min_cbuf_alignment = nvk_min_cbuf_alignment(&pdev->info);
|
||||
struct nvk_descriptor_state *desc = &cmd->state.gfx.descriptors;
|
||||
VkResult result;
|
||||
|
||||
nvk_cmd_buffer_flush_push_descriptors(cmd, desc);
|
||||
|
||||
/* pre Pascal the constant buffer sizes need to be 0x100 aligned. As we
|
||||
* simply allocated a buffer and upload data to it, make sure its size is
|
||||
* 0x100 aligned.
|
||||
*/
|
||||
STATIC_ASSERT((sizeof(desc->root) & 0xff) == 0);
|
||||
assert(sizeof(desc->root) % min_cbuf_alignment == 0);
|
||||
|
||||
void *root_desc_map;
|
||||
uint64_t root_desc_addr;
|
||||
result = nvk_cmd_buffer_upload_alloc(cmd, sizeof(desc->root),
|
||||
min_cbuf_alignment,
|
||||
&root_desc_addr, &root_desc_map);
|
||||
if (unlikely(result != VK_SUCCESS)) {
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return;
|
||||
}
|
||||
|
||||
memcpy(root_desc_map, &desc->root, sizeof(desc->root));
|
||||
|
||||
/* Find cbuf maps for the 5 cbuf groups */
|
||||
const struct nvk_shader *cbuf_shaders[5] = { NULL, };
|
||||
for (gl_shader_stage stage = 0; stage < MESA_SHADER_STAGES; stage++) {
|
||||
|
|
@ -2383,6 +2423,7 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
|
|||
cbuf_shaders[group] = shader;
|
||||
}
|
||||
|
||||
bool bound_any_cbuf = false;
|
||||
for (uint32_t g = 0; g < ARRAY_SIZE(cbuf_shaders); g++) {
|
||||
if (cbuf_shaders[g] == NULL)
|
||||
continue;
|
||||
|
|
@ -2404,6 +2445,8 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
|
|||
if (cbuf->type == NVK_CBUF_TYPE_ROOT_DESC)
|
||||
continue;
|
||||
|
||||
bound_any_cbuf = true;
|
||||
|
||||
struct nvk_buffer_address ba;
|
||||
if (nvk_cmd_buffer_get_cbuf_addr(cmd, desc, shader, cbuf, &ba)) {
|
||||
assert(ba.base_addr % min_cbuf_alignment == 0);
|
||||
|
|
@ -2454,27 +2497,10 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
|
|||
* parameters and similar MME root table updates always hit the root
|
||||
* descriptor table and not some random UBO.
|
||||
*/
|
||||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 14);
|
||||
P_MTHD(p, NV9097, SET_CONSTANT_BUFFER_SELECTOR_A);
|
||||
P_NV9097_SET_CONSTANT_BUFFER_SELECTOR_A(p, sizeof(desc->root));
|
||||
P_NV9097_SET_CONSTANT_BUFFER_SELECTOR_B(p, root_desc_addr >> 32);
|
||||
P_NV9097_SET_CONSTANT_BUFFER_SELECTOR_C(p, root_desc_addr);
|
||||
|
||||
for (uint32_t group = 0; group < ARRAY_SIZE(cbuf_shaders); group++) {
|
||||
if (cbuf_shaders[group] == NULL)
|
||||
continue;
|
||||
|
||||
const struct nvk_cbuf_map *cbuf_map = &cbuf_shaders[group]->cbuf_map;
|
||||
|
||||
for (uint32_t c = 0; c < cbuf_map->cbuf_count; c++) {
|
||||
const struct nvk_cbuf *cbuf = &cbuf_map->cbufs[c];
|
||||
if (cbuf->type == NVK_CBUF_TYPE_ROOT_DESC) {
|
||||
P_IMMD(p, NV9097, BIND_GROUP_CONSTANT_BUFFER(group), {
|
||||
.valid = VALID_TRUE,
|
||||
.shader_slot = c,
|
||||
});
|
||||
}
|
||||
}
|
||||
if (bound_any_cbuf) {
|
||||
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
|
||||
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_SELECT_CB0));
|
||||
P_INLINE_DATA(p, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
#include "nvk_private.h"
|
||||
|
||||
static const nvk_mme_builder_func mme_builders[NVK_MME_COUNT] = {
|
||||
[NVK_MME_SELECT_CB0] = nvk_mme_select_cb0,
|
||||
[NVK_MME_BIND_CBUF_DESC] = nvk_mme_bind_cbuf_desc,
|
||||
[NVK_MME_CLEAR] = nvk_mme_clear,
|
||||
[NVK_MME_DRAW] = nvk_mme_draw,
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@
|
|||
struct nv_device_info;
|
||||
|
||||
enum nvk_mme {
|
||||
NVK_MME_SELECT_CB0,
|
||||
NVK_MME_BIND_CBUF_DESC,
|
||||
NVK_MME_CLEAR,
|
||||
NVK_MME_DRAW,
|
||||
|
|
@ -41,6 +42,10 @@ enum nvk_mme_scratch {
|
|||
NVK_MME_SCRATCH_WRITE_MASK_PIPELINE,
|
||||
NVK_MME_SCRATCH_CONSERVATIVE_RASTER_STATE,
|
||||
|
||||
/* Addres of cb0 */
|
||||
NVK_MME_SCRATCH_CB0_ADDR_HI,
|
||||
NVK_MME_SCRATCH_CB0_ADDR_LO,
|
||||
|
||||
/* Must be at the end */
|
||||
NVK_MME_NUM_SCRATCH,
|
||||
};
|
||||
|
|
@ -113,6 +118,7 @@ uint32_t *nvk_build_mme(const struct nv_device_info *devinfo,
|
|||
|
||||
void nvk_test_build_all_mmes(const struct nv_device_info *devinfo);
|
||||
|
||||
void nvk_mme_select_cb0(struct mme_builder *b);
|
||||
void nvk_mme_bind_cbuf_desc(struct mme_builder *b);
|
||||
void nvk_mme_clear(struct mme_builder *b);
|
||||
void nvk_mme_draw(struct mme_builder *b);
|
||||
|
|
|
|||
|
|
@ -295,7 +295,7 @@ nvk_queue_init_context_state(struct nvk_queue *queue,
|
|||
struct nvk_physical_device *pdev = nvk_device_physical(dev);
|
||||
VkResult result;
|
||||
|
||||
uint32_t push_data[2048];
|
||||
uint32_t push_data[1024 * 3];
|
||||
struct nv_push push;
|
||||
nv_push_init(&push, push_data, ARRAY_SIZE(push_data));
|
||||
struct nv_push *p = &push;
|
||||
|
|
@ -358,9 +358,25 @@ nvk_queue_init(struct nvk_device *dev, struct nvk_queue *queue,
|
|||
|
||||
nvk_queue_state_init(&queue->state);
|
||||
|
||||
if (queue_flags & VK_QUEUE_GRAPHICS_BIT) {
|
||||
queue->draw_cb0 = nouveau_ws_bo_new(dev->ws_dev, 4096, 0,
|
||||
NOUVEAU_WS_BO_LOCAL |
|
||||
NOUVEAU_WS_BO_NO_SHARE);
|
||||
if (queue->draw_cb0 == NULL) {
|
||||
result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
goto fail_state;
|
||||
}
|
||||
|
||||
result = nvk_upload_queue_fill(dev, &dev->upload,
|
||||
queue->draw_cb0->offset, 0,
|
||||
queue->draw_cb0->size);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_draw_cb0;
|
||||
}
|
||||
|
||||
result = nvk_queue_init_drm_nouveau(dev, queue, queue_flags);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_init;
|
||||
goto fail_draw_cb0;
|
||||
|
||||
result = nvk_queue_init_context_state(queue, queue_flags);
|
||||
if (result != VK_SUCCESS)
|
||||
|
|
@ -370,7 +386,11 @@ nvk_queue_init(struct nvk_device *dev, struct nvk_queue *queue,
|
|||
|
||||
fail_drm:
|
||||
nvk_queue_finish_drm_nouveau(dev, queue);
|
||||
fail_init:
|
||||
fail_draw_cb0:
|
||||
if (queue->draw_cb0 != NULL)
|
||||
nouveau_ws_bo_destroy(queue->draw_cb0);
|
||||
fail_state:
|
||||
nvk_queue_state_finish(dev, &queue->state);
|
||||
vk_queue_finish(&queue->vk);
|
||||
|
||||
return result;
|
||||
|
|
@ -379,6 +399,10 @@ fail_init:
|
|||
void
|
||||
nvk_queue_finish(struct nvk_device *dev, struct nvk_queue *queue)
|
||||
{
|
||||
if (queue->draw_cb0 != NULL) {
|
||||
nvk_upload_queue_sync(dev, &dev->upload);
|
||||
nouveau_ws_bo_destroy(queue->draw_cb0);
|
||||
}
|
||||
nvk_queue_state_finish(dev, &queue->state);
|
||||
nvk_queue_finish_drm_nouveau(dev, queue);
|
||||
vk_queue_finish(&queue->vk);
|
||||
|
|
|
|||
|
|
@ -51,6 +51,9 @@ struct nvk_queue {
|
|||
} drm;
|
||||
|
||||
struct nvk_queue_state state;
|
||||
|
||||
/* CB0 for all draw commands on this queue */
|
||||
struct nouveau_ws_bo *draw_cb0;
|
||||
};
|
||||
|
||||
static inline struct nvk_device *
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue