From f14dbc02fbb65f25b1a3a7cddfcf2af4f57bf2de Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 19 May 2021 18:34:25 -0400 Subject: [PATCH] panfrost: Don't duplicate attribute buffers If the (vbi, divisor) tuple matches, we can save an attribute buffer descriptor. We do the linking at CSO create time. This should be a bit more cache friendly. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/panfrost/pan_cmdstream.c | 25 +++++++------- src/gallium/drivers/panfrost/pan_context.c | 34 ++++++++++++++++++++ src/gallium/drivers/panfrost/pan_context.h | 11 +++++++ 3 files changed, 57 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index dd8591e51fd..fdd41cbb943 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1623,9 +1623,15 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, * Also, we allocate more memory than what's needed here if either instancing * is enabled or images are present, this can be improved. */ unsigned bufs_per_attrib = (instanced || nr_images > 0) ? 2 : 1; - unsigned nr_bufs = (vs->info.attribute_count * bufs_per_attrib) + + unsigned nr_bufs = ((so->nr_bufs + nr_images) * bufs_per_attrib) + (pan_is_bifrost(dev) ? 1 : 0); + /* Midgard needs vertexid/instanceid handled specially */ + bool special_vbufs = dev->arch < 6 && vs->info.attribute_count >= PAN_VERTEX_ID; + + if (special_vbufs) + nr_bufs += 2; + if (!nr_bufs) { *buffers = 0; return 0; @@ -1648,14 +1654,9 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, unsigned attrib_to_buffer[PIPE_MAX_ATTRIBS] = { 0 }; unsigned k = 0; - for (unsigned i = 0; i < so->num_elements; ++i) { - /* We map buffers 1:1 with the attributes, which - * means duplicating some vertex buffers (who cares? aside from - * maybe some caching implications but I somehow doubt that - * matters) */ - - struct pipe_vertex_element *elem = &so->pipe[i]; - unsigned vbi = elem->vertex_buffer_index; + for (unsigned i = 0; i < so->nr_bufs; ++i) { + unsigned vbi = so->buffers[i].vbi; + unsigned divisor = so->buffers[i].divisor; attrib_to_buffer[i] = k; if (!(ctx->vb_mask & (1 << vbi))) @@ -1685,7 +1686,6 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, /* When there is a divisor, the hardware-level divisor is * the product of the instance divisor and the padded count */ - unsigned divisor = elem->instance_divisor; unsigned stride = buf->stride; if (ctx->indirect_draw) { @@ -1766,8 +1766,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, } /* Add special gl_VertexID/gl_InstanceID buffers */ - - if (unlikely(vs->info.attribute_count >= PAN_VERTEX_ID)) { + if (unlikely(special_vbufs)) { panfrost_vertex_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1); pan_pack(out + PAN_VERTEX_ID, ATTRIBUTE, cfg) { @@ -1821,7 +1820,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, src_offset -= buf->stride * ctx->offset_start; pan_pack(out + i, ATTRIBUTE, cfg) { - cfg.buffer_index = attrib_to_buffer[i]; + cfg.buffer_index = attrib_to_buffer[so->element_buffer[i]]; cfg.format = so->formats[i]; cfg.offset = src_offset; } diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 186b2d59251..de4113b5631 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -897,6 +897,31 @@ panfrost_set_shader_images( } } +/* Assigns a vertex buffer for a given (index, divisor) tuple */ + +static unsigned +pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers, + unsigned *nr_bufs, + unsigned vbi, + unsigned divisor) +{ + /* Look up the buffer */ + for (unsigned i = 0; i < (*nr_bufs); ++i) { + if (buffers[i].vbi == vbi && buffers[i].divisor == divisor) + return i; + } + + /* Else, create a new buffer */ + unsigned idx = (*nr_bufs)++; + + buffers[idx] = (struct pan_vertex_buffer) { + .vbi = vbi, + .divisor = divisor + }; + + return idx; +} + static void * panfrost_create_vertex_elements_state( struct pipe_context *pctx, @@ -909,6 +934,15 @@ panfrost_create_vertex_elements_state( so->num_elements = num_elements; memcpy(so->pipe, elements, sizeof(*elements) * num_elements); + /* Assign attribute buffers corresponding to the vertex buffers, keyed + * for a particular divisor since that's how instancing works on Mali */ + for (unsigned i = 0; i < num_elements; ++i) { + so->element_buffer[i] = pan_assign_vertex_buffer( + so->buffers, &so->nr_bufs, + elements[i].vertex_buffer_index, + elements[i].instance_divisor); + } + for (int i = 0; i < num_elements; ++i) { enum pipe_format fmt = elements[i].src_format; const struct util_format_description *desc = util_format_description(fmt); diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index 9e3f86fb10d..dcc970db984 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -306,9 +306,20 @@ struct panfrost_shader_variants { unsigned active_variant; }; +struct pan_vertex_buffer { + unsigned vbi; + unsigned divisor; +}; + struct panfrost_vertex_state { unsigned num_elements; + /* buffers corresponds to attribute buffer, element_buffers corresponds + * to an index in buffers for each vertex element */ + struct pan_vertex_buffer buffers[PIPE_MAX_ATTRIBS]; + unsigned element_buffer[PIPE_MAX_ATTRIBS]; + unsigned nr_bufs; + struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS]; unsigned formats[PIPE_MAX_ATTRIBS]; };