vbo: switch immediate Begin/End to DrawGallium

This makes gallium faster because st/mesa doesn't have to translate
_mesa_prim.

Reviewed-by: Zoltán Böszörményi <zboszor@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7679>
This commit is contained in:
Marek Olšák 2020-11-02 02:00:37 -05:00
parent bc6741832e
commit 375453bb8b
3 changed files with 92 additions and 50 deletions

View file

@ -37,6 +37,7 @@
#include "main/draw.h"
#include "main/macros.h"
#include "vbo_attrib.h"
#include "gallium/include/pipe/p_state.h"
#ifdef __cplusplus
extern "C" {
@ -80,20 +81,42 @@ struct vbo_exec_copied_vtx {
GLuint nr;
};
struct vbo_markers
{
/**
* If false and the primitive is a line loop, the first vertex is
* the beginning of the line loop and it won't be drawn.
* Instead, it will be moved to the end.
*
* Drivers shouldn't reset the line stipple pattern walker if begin is
* false and mode is a line strip.
*/
bool begin;
/**
* If true and the primitive is a line loop, it will be closed.
*/
bool end;
};
struct vbo_exec_context
{
GLvertexformat vtxfmt;
GLvertexformat vtxfmt_noop;
struct {
/* Multi draw where the mode can vary between draws. */
struct pipe_draw_info info;
struct pipe_draw_start_count draw[VBO_MAX_PRIM];
GLubyte mode[VBO_MAX_PRIM]; /**< primitive modes per draw */
struct vbo_markers markers[VBO_MAX_PRIM];
unsigned prim_count;
struct gl_buffer_object *bufferobj;
GLuint vertex_size; /* in dwords */
GLuint vertex_size_no_pos;
struct _mesa_prim prim[VBO_MAX_PRIM];
GLuint prim_count;
fi_type *buffer_map;
fi_type *buffer_ptr; /* cursor, points into buffer */
GLuint buffer_used; /* in bytes */

View file

@ -82,29 +82,30 @@ vbo_exec_wrap_buffers(struct vbo_exec_context *exec)
}
else {
struct gl_context *ctx = gl_context_from_vbo_exec(exec);
struct _mesa_prim *last_prim = &exec->vtx.prim[exec->vtx.prim_count - 1];
const GLuint last_begin = last_prim->begin;
unsigned last = exec->vtx.prim_count - 1;
struct pipe_draw_start_count *last_draw = &exec->vtx.draw[last];
const bool last_begin = exec->vtx.markers[last].begin;
GLuint last_count = 0;
if (_mesa_inside_begin_end(ctx)) {
last_prim->count = exec->vtx.vert_count - last_prim->start;
last_count = last_prim->count;
last_prim->end = 0;
last_draw->count = exec->vtx.vert_count - last_draw->start;
last_count = last_draw->count;
exec->vtx.markers[last].end = 0;
}
/* Special handling for wrapping GL_LINE_LOOP */
if (last_prim->mode == GL_LINE_LOOP &&
if (exec->vtx.mode[last] == GL_LINE_LOOP &&
last_count > 0 &&
!last_prim->end) {
!exec->vtx.markers[last].end) {
/* draw this section of the incomplete line loop as a line strip */
last_prim->mode = GL_LINE_STRIP;
if (!last_prim->begin) {
exec->vtx.mode[last] = GL_LINE_STRIP;
if (!last_begin) {
/* This is not the first section of the line loop, so don't
* draw the 0th vertex. We're saving it until we draw the
* very last section of the loop.
*/
last_prim->start++;
last_prim->count--;
last_draw->start++;
last_draw->count--;
}
}
@ -122,13 +123,13 @@ vbo_exec_wrap_buffers(struct vbo_exec_context *exec)
assert(exec->vtx.prim_count == 0);
if (_mesa_inside_begin_end(ctx)) {
exec->vtx.prim[0].mode = ctx->Driver.CurrentExecPrimitive;
exec->vtx.prim[0].begin = 0;
exec->vtx.prim[0].start = 0;
exec->vtx.mode[0] = ctx->Driver.CurrentExecPrimitive;
exec->vtx.draw[0].start = 0;
exec->vtx.markers[0].begin = 0;
exec->vtx.prim_count++;
if (exec->vtx.copied.nr == last_count)
exec->vtx.prim[0].begin = last_begin;
exec->vtx.markers[0].begin = last_begin;
}
}
}
@ -836,9 +837,9 @@ vbo_exec_Begin(GLenum mode)
vbo_exec_FlushVertices_internal(exec, FLUSH_STORED_VERTICES);
i = exec->vtx.prim_count++;
exec->vtx.prim[i].mode = mode;
exec->vtx.prim[i].begin = 1;
exec->vtx.prim[i].start = exec->vtx.vert_count;
exec->vtx.mode[i] = mode;
exec->vtx.draw[i].start = exec->vtx.vert_count;
exec->vtx.markers[i].begin = 1;
ctx->Driver.CurrentExecPrimitive = mode;
@ -864,22 +865,27 @@ vbo_exec_Begin(GLenum mode)
static void
try_vbo_merge(struct vbo_exec_context *exec)
{
struct _mesa_prim *cur = &exec->vtx.prim[exec->vtx.prim_count - 1];
unsigned cur = exec->vtx.prim_count - 1;
assert(exec->vtx.prim_count >= 1);
vbo_try_prim_conversion(&cur->mode, &cur->count);
vbo_try_prim_conversion(&exec->vtx.mode[cur], &exec->vtx.draw[cur].count);
if (exec->vtx.prim_count >= 2) {
struct gl_context *ctx = gl_context_from_vbo_exec(exec);
struct _mesa_prim *prev = &exec->vtx.prim[exec->vtx.prim_count - 2];
assert(prev == cur - 1);
unsigned prev = cur - 1;
if (vbo_merge_draws(ctx, false,
prev->mode, cur->mode, prev->start, cur->start,
&prev->count, cur->count,
prev->basevertex, cur->basevertex,
&prev->end, cur->begin, cur->end))
exec->vtx.mode[prev],
exec->vtx.mode[cur],
exec->vtx.draw[prev].start,
exec->vtx.draw[cur].start,
&exec->vtx.draw[prev].count,
exec->vtx.draw[cur].count,
0, 0,
&exec->vtx.markers[prev].end,
exec->vtx.markers[cur].begin,
exec->vtx.markers[cur].end))
exec->vtx.prim_count--; /* drop the last primitive */
}
}
@ -910,31 +916,33 @@ vbo_exec_End(void)
if (exec->vtx.prim_count > 0) {
/* close off current primitive */
struct _mesa_prim *last_prim = &exec->vtx.prim[exec->vtx.prim_count - 1];
unsigned count = exec->vtx.vert_count - last_prim->start;
unsigned last = exec->vtx.prim_count - 1;
struct pipe_draw_start_count *last_draw = &exec->vtx.draw[last];
unsigned count = exec->vtx.vert_count - last_draw->start;
last_prim->end = 1;
last_prim->count = count;
last_draw->count = count;
exec->vtx.markers[last].end = 1;
if (count)
ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
/* Special handling for GL_LINE_LOOP */
if (last_prim->mode == GL_LINE_LOOP && last_prim->begin == 0) {
if (exec->vtx.mode[last] == GL_LINE_LOOP &&
exec->vtx.markers[last].begin == 0) {
/* We're finishing drawing a line loop. Append 0th vertex onto
* end of vertex buffer so we can draw it as a line strip.
*/
const fi_type *src = exec->vtx.buffer_map +
last_prim->start * exec->vtx.vertex_size;
last_draw->start * exec->vtx.vertex_size;
fi_type *dst = exec->vtx.buffer_map +
exec->vtx.vert_count * exec->vtx.vertex_size;
/* copy 0th vertex to end of buffer */
memcpy(dst, src, exec->vtx.vertex_size * sizeof(fi_type));
last_prim->start++; /* skip vertex0 */
/* note that last_prim->count stays unchanged */
last_prim->mode = GL_LINE_STRIP;
last_draw->start++; /* skip vertex0 */
/* note that the count stays unchanged */
exec->vtx.mode[last] = GL_LINE_STRIP;
/* Increment the vertex count so the next primitive doesn't
* overwrite the last vertex which we just added.
@ -1037,6 +1045,9 @@ vbo_exec_vtx_init(struct vbo_exec_context *exec, bool use_buffer_objects)
exec->vtx.enabled = u_bit_consecutive64(0, VBO_ATTRIB_MAX); /* reset all */
vbo_reset_all_attr(exec);
exec->vtx.info.instance_count = 1;
exec->vtx.info.max_index = ~0;
}

View file

@ -53,14 +53,13 @@ vbo_exec_debug_verts(struct vbo_exec_context *exec)
exec->vtx.vertex_size);
for (i = 0 ; i < exec->vtx.prim_count ; i++) {
struct _mesa_prim *prim = &exec->vtx.prim[i];
printf(" prim %d: %s %d..%d %s %s\n",
i,
_mesa_lookup_prim_by_nr(prim->mode),
prim->start,
prim->start + prim->count,
prim->begin ? "BEGIN" : "(wrap)",
prim->end ? "END" : "(wrap)");
_mesa_lookup_prim_by_nr(exec->vtx.mode[i]),
exec->vtx.draw[i].start,
exec->vtx.draw[i].start + exec->vtx.draw[i].count,
exec->vtx.markers[i].begin ? "BEGIN" : "(wrap)",
exec->vtx.markers[i].end ? "END" : "(wrap)");
}
}
@ -69,14 +68,17 @@ static GLuint
vbo_exec_copy_vertices(struct vbo_exec_context *exec)
{
struct gl_context *ctx = gl_context_from_vbo_exec(exec);
struct _mesa_prim *last_prim = &exec->vtx.prim[exec->vtx.prim_count - 1];
const GLuint sz = exec->vtx.vertex_size;
fi_type *dst = exec->vtx.copied.buffer;
const fi_type *src = exec->vtx.buffer_map + last_prim->start * sz;
unsigned last = exec->vtx.prim_count - 1;
unsigned start = exec->vtx.draw[last].start;
const fi_type *src = exec->vtx.buffer_map + start * sz;
return vbo_copy_vertices(ctx, ctx->Driver.CurrentExecPrimitive,
last_prim->start, &last_prim->count,
last_prim->begin, sz, false, dst, src);
start,
&exec->vtx.draw[last].count,
exec->vtx.markers[last].begin,
sz, false, dst, src);
}
@ -328,8 +330,14 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec)
printf("%s %d %d\n", __func__, exec->vtx.prim_count,
exec->vtx.vert_count);
ctx->Driver.Draw(ctx, exec->vtx.prim, exec->vtx.prim_count, NULL,
true, false, 0, 0, exec->vtx.vert_count - 1, 1, 0);
exec->vtx.info.vertices_per_patch =
ctx->TessCtrlProgram.patch_vertices;
ctx->Driver.DrawGalliumComplex(ctx, &exec->vtx.info,
exec->vtx.draw,
exec->vtx.mode,
NULL,
exec->vtx.prim_count);
/* Get new storage -- unless asked not to. */
if (!persistent_mapping)