mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-21 14:20:29 +01:00
st/mesa: set vertex arrays state only when necessary
The vertex arrays state should be set only when (_NEW_ARRAY | _NEW_PROGRAM)
is dirty. This assumes user buffer content is mutable, which will be
sorted out in the next commit. The following usage case should be much faster
now:
for (i = 0; i < 1000; i++) {
glDrawElements(...);
}
Or even:
for (i = 0; i < 1000; i++) {
glSomeStateChangeOtherThanArraysOrProgram(...);
glDrawElements(...);
}
The performance increase from this may be significant in some apps and
negligible in others. It is especially noticable in the Torcs game (r300g):
Before: 15.4 fps
After: 20 fps
Also less looping over attribs in st_draw_vbo yields slight speed-up
in apps with lots of glDraw* calls.
This commit is contained in:
parent
cdca3c58aa
commit
2a904fd6a0
1 changed files with 111 additions and 183 deletions
|
|
@ -243,13 +243,11 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
|
|||
static GLboolean
|
||||
is_interleaved_arrays(const struct st_vertex_program *vp,
|
||||
const struct st_vp_variant *vpv,
|
||||
const struct gl_client_array **arrays,
|
||||
GLboolean *userSpace)
|
||||
const struct gl_client_array **arrays)
|
||||
{
|
||||
GLuint attr;
|
||||
const struct gl_buffer_object *firstBufObj = NULL;
|
||||
GLint firstStride = -1;
|
||||
GLuint num_client_arrays = 0;
|
||||
const GLubyte *client_addr = NULL;
|
||||
|
||||
for (attr = 0; attr < vpv->num_inputs; attr++) {
|
||||
|
|
@ -263,9 +261,8 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
|
|||
else if (firstStride != stride) {
|
||||
return GL_FALSE;
|
||||
}
|
||||
|
||||
|
||||
if (!bufObj || !bufObj->Name) {
|
||||
num_client_arrays++;
|
||||
/* Try to detect if the client-space arrays are
|
||||
* "close" to each other.
|
||||
*/
|
||||
|
|
@ -285,56 +282,10 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
|
|||
}
|
||||
}
|
||||
|
||||
*userSpace = (num_client_arrays == vpv->num_inputs);
|
||||
/* debug_printf("user space: %s (%d arrays, %d inputs)\n",
|
||||
(int)*userSpace ? "Yes" : "No", num_client_arrays, vp->num_inputs); */
|
||||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compute the memory range occupied by the arrays.
|
||||
*/
|
||||
static void
|
||||
get_arrays_bounds(const struct st_vertex_program *vp,
|
||||
const struct st_vp_variant *vpv,
|
||||
const struct gl_client_array **arrays,
|
||||
GLuint max_index,
|
||||
const GLubyte **low, const GLubyte **high)
|
||||
{
|
||||
const GLubyte *low_addr = NULL;
|
||||
const GLubyte *high_addr = NULL;
|
||||
GLuint attr;
|
||||
|
||||
/* debug_printf("get_arrays_bounds: Handling %u attrs\n", vpv->num_inputs); */
|
||||
|
||||
for (attr = 0; attr < vpv->num_inputs; attr++) {
|
||||
const GLuint mesaAttr = vp->index_to_input[attr];
|
||||
const GLint stride = arrays[mesaAttr]->StrideB;
|
||||
const GLubyte *start = arrays[mesaAttr]->Ptr;
|
||||
const unsigned sz = (arrays[mesaAttr]->Size *
|
||||
_mesa_sizeof_type(arrays[mesaAttr]->Type));
|
||||
const GLubyte *end = start + (max_index * stride) + sz;
|
||||
|
||||
/* debug_printf("attr %u: stride %d size %u start %p end %p\n",
|
||||
attr, stride, sz, start, end); */
|
||||
|
||||
if (attr == 0) {
|
||||
low_addr = start;
|
||||
high_addr = end;
|
||||
}
|
||||
else {
|
||||
low_addr = MIN2(low_addr, start);
|
||||
high_addr = MAX2(high_addr, end);
|
||||
}
|
||||
}
|
||||
|
||||
*low = low_addr;
|
||||
*high = high_addr;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Set up for drawing interleaved arrays that all live in one VBO
|
||||
* or all live in user space.
|
||||
|
|
@ -346,15 +297,21 @@ setup_interleaved_attribs(struct gl_context *ctx,
|
|||
const struct st_vertex_program *vp,
|
||||
const struct st_vp_variant *vpv,
|
||||
const struct gl_client_array **arrays,
|
||||
GLuint max_index,
|
||||
GLboolean userSpace,
|
||||
struct pipe_vertex_buffer *vbuffer,
|
||||
struct pipe_vertex_element velements[])
|
||||
struct pipe_vertex_element velements[],
|
||||
unsigned max_index)
|
||||
{
|
||||
struct st_context *st = st_context(ctx);
|
||||
struct pipe_context *pipe = st->pipe;
|
||||
GLuint attr;
|
||||
const GLubyte *offset0 = NULL;
|
||||
const GLubyte *low_addr = NULL;
|
||||
|
||||
/* Find the lowest address. */
|
||||
for (attr = 0; attr < vpv->num_inputs; attr++) {
|
||||
const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr;
|
||||
|
||||
low_addr = !low_addr ? start : MIN2(low_addr, start);
|
||||
}
|
||||
|
||||
for (attr = 0; attr < vpv->num_inputs; attr++) {
|
||||
const GLuint mesaAttr = vp->index_to_input[attr];
|
||||
|
|
@ -362,39 +319,23 @@ setup_interleaved_attribs(struct gl_context *ctx,
|
|||
struct st_buffer_object *stobj = st_buffer_object(bufobj);
|
||||
GLsizei stride = arrays[mesaAttr]->StrideB;
|
||||
|
||||
/*printf("stobj %u = %p\n", attr, (void*)stobj);*/
|
||||
|
||||
if (attr == 0) {
|
||||
const GLubyte *low, *high;
|
||||
|
||||
get_arrays_bounds(vp, vpv, arrays, max_index, &low, &high);
|
||||
/* debug_printf("buffer range: %p %p range %d max index %u\n",
|
||||
low, high, high - low, max_index); */
|
||||
|
||||
offset0 = low;
|
||||
if (userSpace) {
|
||||
vbuffer->buffer =
|
||||
pipe_user_buffer_create(pipe->screen, (void *) low, high - low,
|
||||
PIPE_BIND_VERTEX_BUFFER);
|
||||
vbuffer->buffer_offset = 0;
|
||||
}
|
||||
else {
|
||||
if (bufobj && bufobj->Name) {
|
||||
vbuffer->buffer = NULL;
|
||||
pipe_resource_reference(&vbuffer->buffer, stobj->buffer);
|
||||
vbuffer->buffer_offset = pointer_to_offset(low);
|
||||
vbuffer->buffer_offset = pointer_to_offset(low_addr);
|
||||
} else {
|
||||
vbuffer->buffer =
|
||||
pipe_user_buffer_create(pipe->screen, (void*)low_addr,
|
||||
stride * (max_index + 1),
|
||||
PIPE_BIND_VERTEX_BUFFER);
|
||||
vbuffer->buffer_offset = 0;
|
||||
}
|
||||
vbuffer->stride = stride; /* in bytes */
|
||||
}
|
||||
|
||||
/*
|
||||
if (arrays[mesaAttr]->InstanceDivisor)
|
||||
vbuffer[attr].max_index = arrays[mesaAttr]->_MaxElement;
|
||||
else
|
||||
vbuffer[attr].max_index = max_index;
|
||||
*/
|
||||
|
||||
velements[attr].src_offset =
|
||||
(unsigned) (arrays[mesaAttr]->Ptr - offset0);
|
||||
(unsigned) (arrays[mesaAttr]->Ptr - low_addr);
|
||||
velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor;
|
||||
velements[attr].vertex_buffer_index = 0;
|
||||
velements[attr].src_format =
|
||||
|
|
@ -418,10 +359,9 @@ setup_non_interleaved_attribs(struct gl_context *ctx,
|
|||
const struct st_vertex_program *vp,
|
||||
const struct st_vp_variant *vpv,
|
||||
const struct gl_client_array **arrays,
|
||||
GLuint max_index,
|
||||
GLboolean *userSpace,
|
||||
struct pipe_vertex_buffer vbuffer[],
|
||||
struct pipe_vertex_element velements[])
|
||||
struct pipe_vertex_element velements[],
|
||||
unsigned max_index)
|
||||
{
|
||||
struct st_context *st = st_context(ctx);
|
||||
struct pipe_context *pipe = st->pipe;
|
||||
|
|
@ -432,8 +372,6 @@ setup_non_interleaved_attribs(struct gl_context *ctx,
|
|||
struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj;
|
||||
GLsizei stride = arrays[mesaAttr]->StrideB;
|
||||
|
||||
*userSpace = GL_FALSE;
|
||||
|
||||
if (bufobj && bufobj->Name) {
|
||||
/* Attribute data is in a VBO.
|
||||
* Recall that for VBOs, the gl_client_array->Ptr field is
|
||||
|
|
@ -441,37 +379,23 @@ setup_non_interleaved_attribs(struct gl_context *ctx,
|
|||
*/
|
||||
struct st_buffer_object *stobj = st_buffer_object(bufobj);
|
||||
assert(stobj->buffer);
|
||||
/*printf("stobj %u = %p\n", attr, (void*) stobj);*/
|
||||
|
||||
vbuffer[attr].buffer = NULL;
|
||||
pipe_resource_reference(&vbuffer[attr].buffer, stobj->buffer);
|
||||
vbuffer[attr].buffer_offset = pointer_to_offset(arrays[mesaAttr]->Ptr);
|
||||
}
|
||||
else {
|
||||
/* attribute data is in user-space memory, not a VBO */
|
||||
uint bytes;
|
||||
/*printf("user-space array %d stride %d\n", attr, stride);*/
|
||||
|
||||
*userSpace = GL_TRUE;
|
||||
|
||||
/* wrap user data */
|
||||
if (arrays[mesaAttr]->Ptr) {
|
||||
/* user's vertex array */
|
||||
if (arrays[mesaAttr]->StrideB) {
|
||||
bytes = arrays[mesaAttr]->StrideB * (max_index + 1);
|
||||
}
|
||||
else {
|
||||
bytes = arrays[mesaAttr]->Size
|
||||
* _mesa_sizeof_type(arrays[mesaAttr]->Type);
|
||||
}
|
||||
vbuffer[attr].buffer =
|
||||
pipe_user_buffer_create(pipe->screen,
|
||||
(void *) arrays[mesaAttr]->Ptr, bytes,
|
||||
(void *) arrays[mesaAttr]->Ptr,
|
||||
stride * (max_index + 1),
|
||||
PIPE_BIND_VERTEX_BUFFER);
|
||||
}
|
||||
else {
|
||||
/* no array, use ctx->Current.Attrib[] value */
|
||||
bytes = sizeof(ctx->Current.Attrib[0]);
|
||||
uint bytes = sizeof(ctx->Current.Attrib[0]);
|
||||
vbuffer[attr].buffer =
|
||||
pipe_user_buffer_create(pipe->screen,
|
||||
(void *) ctx->Current.Attrib[mesaAttr],
|
||||
|
|
@ -483,8 +407,6 @@ setup_non_interleaved_attribs(struct gl_context *ctx,
|
|||
vbuffer[attr].buffer_offset = 0;
|
||||
}
|
||||
|
||||
assert(velements[attr].src_offset <= 2048); /* 11-bit field */
|
||||
|
||||
/* common-case setup */
|
||||
vbuffer[attr].stride = stride; /* in bytes */
|
||||
|
||||
|
|
@ -604,6 +526,54 @@ translate_prim(const struct gl_context *ctx, unsigned prim)
|
|||
}
|
||||
|
||||
|
||||
static void
|
||||
st_validate_varrays(struct gl_context *ctx,
|
||||
const struct gl_client_array **arrays,
|
||||
unsigned max_index)
|
||||
{
|
||||
struct st_context *st = st_context(ctx);
|
||||
const struct st_vertex_program *vp;
|
||||
const struct st_vp_variant *vpv;
|
||||
struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS];
|
||||
struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
|
||||
unsigned num_vbuffers, num_velements;
|
||||
GLuint attr;
|
||||
|
||||
/* must get these after state validation! */
|
||||
vp = st->vp;
|
||||
vpv = st->vp_variant;
|
||||
|
||||
memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs);
|
||||
/*
|
||||
* Setup the vbuffer[] and velements[] arrays.
|
||||
*/
|
||||
if (is_interleaved_arrays(vp, vpv, arrays)) {
|
||||
setup_interleaved_attribs(ctx, vp, vpv, arrays, vbuffer, velements,
|
||||
max_index);
|
||||
num_vbuffers = 1;
|
||||
num_velements = vpv->num_inputs;
|
||||
if (num_velements == 0)
|
||||
num_vbuffers = 0;
|
||||
}
|
||||
else {
|
||||
setup_non_interleaved_attribs(ctx, vp, vpv, arrays,
|
||||
vbuffer, velements, max_index);
|
||||
num_vbuffers = vpv->num_inputs;
|
||||
num_velements = vpv->num_inputs;
|
||||
}
|
||||
|
||||
cso_set_vertex_buffers(st->cso_context, num_vbuffers, vbuffer);
|
||||
cso_set_vertex_elements(st->cso_context, num_velements, velements);
|
||||
|
||||
/* unreference buffers (frees wrapped user-space buffer objects)
|
||||
* This is OK, because the pipe driver should reference buffers by itself
|
||||
* in set_vertex_buffers. */
|
||||
for (attr = 0; attr < num_vbuffers; attr++) {
|
||||
pipe_resource_reference(&vbuffer[attr].buffer, NULL);
|
||||
assert(!vbuffer[attr].buffer);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This function gets plugged into the VBO module and is called when
|
||||
|
|
@ -622,90 +592,59 @@ st_draw_vbo(struct gl_context *ctx,
|
|||
{
|
||||
struct st_context *st = st_context(ctx);
|
||||
struct pipe_context *pipe = st->pipe;
|
||||
const struct st_vertex_program *vp;
|
||||
const struct st_vp_variant *vpv;
|
||||
struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS];
|
||||
GLuint attr;
|
||||
struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
|
||||
unsigned num_vbuffers, num_velements;
|
||||
struct pipe_index_buffer ibuffer;
|
||||
GLboolean userSpace = GL_FALSE;
|
||||
GLboolean vertDataEdgeFlags;
|
||||
struct pipe_draw_info info;
|
||||
unsigned i;
|
||||
GLboolean new_array =
|
||||
st->dirty.st && (st->dirty.mesa & (_NEW_ARRAY | _NEW_PROGRAM)) != 0;
|
||||
|
||||
/* Mesa core state should have been validated already */
|
||||
assert(ctx->NewState == 0x0);
|
||||
|
||||
/* Gallium probably doesn't want this in some cases. */
|
||||
if (!index_bounds_valid)
|
||||
if (!vbo_all_varyings_in_vbos(arrays))
|
||||
vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);
|
||||
if (ib) {
|
||||
/* Gallium probably doesn't want this in some cases. */
|
||||
if (!index_bounds_valid)
|
||||
if (!vbo_all_varyings_in_vbos(arrays))
|
||||
vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);
|
||||
} else {
|
||||
/* Get min/max index for non-indexed drawing. */
|
||||
min_index = ~0;
|
||||
max_index = 0;
|
||||
|
||||
/* sanity check for pointer arithmetic below */
|
||||
assert(sizeof(arrays[0]->Ptr[0]) == 1);
|
||||
|
||||
vertDataEdgeFlags = arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj &&
|
||||
arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj->Name;
|
||||
if (vertDataEdgeFlags != st->vertdata_edgeflags) {
|
||||
st->vertdata_edgeflags = vertDataEdgeFlags;
|
||||
st->dirty.st |= ST_NEW_EDGEFLAGS_DATA;
|
||||
for (i = 0; i < nr_prims; i++) {
|
||||
min_index = MIN2(min_index, prims[i].start);
|
||||
max_index = MAX2(max_index, prims[i].start + prims[i].count - 1);
|
||||
}
|
||||
}
|
||||
|
||||
st_validate_state(st);
|
||||
/* Validate state. */
|
||||
if (st->dirty.st) {
|
||||
GLboolean vertDataEdgeFlags;
|
||||
|
||||
/* must get these after state validation! */
|
||||
vp = st->vp;
|
||||
vpv = st->vp_variant;
|
||||
/* sanity check for pointer arithmetic below */
|
||||
assert(sizeof(arrays[0]->Ptr[0]) == 1);
|
||||
|
||||
vertDataEdgeFlags = arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj &&
|
||||
arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj->Name;
|
||||
if (vertDataEdgeFlags != st->vertdata_edgeflags) {
|
||||
st->vertdata_edgeflags = vertDataEdgeFlags;
|
||||
st->dirty.st |= ST_NEW_EDGEFLAGS_DATA;
|
||||
}
|
||||
|
||||
st_validate_state(st);
|
||||
|
||||
if (new_array) {
|
||||
st_validate_varrays(ctx, arrays, max_index);
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (MESA_VERBOSE & VERBOSE_GLSL) {
|
||||
check_uniforms(ctx);
|
||||
}
|
||||
if (MESA_VERBOSE & VERBOSE_GLSL) {
|
||||
check_uniforms(ctx);
|
||||
}
|
||||
#else
|
||||
(void) check_uniforms;
|
||||
(void) check_uniforms;
|
||||
#endif
|
||||
|
||||
memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs);
|
||||
/*
|
||||
* Setup the vbuffer[] and velements[] arrays.
|
||||
*/
|
||||
if (is_interleaved_arrays(vp, vpv, arrays, &userSpace)) {
|
||||
/*printf("Draw interleaved\n");*/
|
||||
setup_interleaved_attribs(ctx, vp, vpv, arrays, max_index, userSpace,
|
||||
vbuffer, velements);
|
||||
num_vbuffers = 1;
|
||||
num_velements = vpv->num_inputs;
|
||||
if (num_velements == 0)
|
||||
num_vbuffers = 0;
|
||||
}
|
||||
else {
|
||||
/*printf("Draw non-interleaved\n");*/
|
||||
setup_non_interleaved_attribs(ctx, vp, vpv, arrays, max_index,
|
||||
&userSpace, vbuffer, velements);
|
||||
num_vbuffers = vpv->num_inputs;
|
||||
num_velements = vpv->num_inputs;
|
||||
}
|
||||
|
||||
#if 0
|
||||
{
|
||||
GLuint i;
|
||||
for (i = 0; i < num_vbuffers; i++) {
|
||||
printf("buffers[%d].stride = %u\n", i, vbuffer[i].stride);
|
||||
printf("buffers[%d].max_index = %u\n", i, vbuffer[i].max_index);
|
||||
printf("buffers[%d].buffer_offset = %u\n", i, vbuffer[i].buffer_offset);
|
||||
printf("buffers[%d].buffer = %p\n", i, (void*) vbuffer[i].buffer);
|
||||
}
|
||||
for (i = 0; i < num_velements; i++) {
|
||||
printf("vlements[%d].vbuffer_index = %u\n", i, velements[i].vertex_buffer_index);
|
||||
printf("vlements[%d].src_offset = %u\n", i, velements[i].src_offset);
|
||||
printf("vlements[%d].format = %s\n", i, util_format_name(velements[i].src_format));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
cso_set_vertex_buffers(st->cso_context, num_vbuffers, vbuffer);
|
||||
cso_set_vertex_elements(st->cso_context, num_velements, velements);
|
||||
|
||||
setup_index_buffer(ctx, ib, &ibuffer);
|
||||
pipe->set_index_buffer(pipe, &ibuffer);
|
||||
|
|
@ -739,17 +678,6 @@ st_draw_vbo(struct gl_context *ctx,
|
|||
}
|
||||
|
||||
pipe_resource_reference(&ibuffer.buffer, NULL);
|
||||
|
||||
/* unreference buffers (frees wrapped user-space buffer objects) */
|
||||
for (attr = 0; attr < num_vbuffers; attr++) {
|
||||
pipe_resource_reference(&vbuffer[attr].buffer, NULL);
|
||||
assert(!vbuffer[attr].buffer);
|
||||
}
|
||||
|
||||
if (userSpace)
|
||||
{
|
||||
pipe->set_vertex_buffers(pipe, 0, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue