st/mesa: set vertex arrays state only when necessary

The vertex arrays state should be set only when (_NEW_ARRAY | _NEW_PROGRAM)
is dirty. This assumes user buffer content is mutable, which will be
sorted out in the next commit. The following usage case should be much faster
now:

for (i = 0; i < 1000; i++) {
   glDrawElements(...);
}

Or even:

for (i = 0; i < 1000; i++) {
   glSomeStateChangeOtherThanArraysOrProgram(...);
   glDrawElements(...);
}

The performance increase from this may be significant in some apps and
negligible in others. It is especially noticable in the Torcs game (r300g):
    Before: 15.4 fps
    After: 20 fps

Also less looping over attribs in st_draw_vbo yields slight speed-up
in apps with lots of glDraw* calls.
This commit is contained in:
Marek Olšák 2010-12-26 04:30:51 +01:00
parent cdca3c58aa
commit 2a904fd6a0

View file

@ -243,13 +243,11 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
static GLboolean
is_interleaved_arrays(const struct st_vertex_program *vp,
const struct st_vp_variant *vpv,
const struct gl_client_array **arrays,
GLboolean *userSpace)
const struct gl_client_array **arrays)
{
GLuint attr;
const struct gl_buffer_object *firstBufObj = NULL;
GLint firstStride = -1;
GLuint num_client_arrays = 0;
const GLubyte *client_addr = NULL;
for (attr = 0; attr < vpv->num_inputs; attr++) {
@ -263,9 +261,8 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
else if (firstStride != stride) {
return GL_FALSE;
}
if (!bufObj || !bufObj->Name) {
num_client_arrays++;
/* Try to detect if the client-space arrays are
* "close" to each other.
*/
@ -285,56 +282,10 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
}
}
*userSpace = (num_client_arrays == vpv->num_inputs);
/* debug_printf("user space: %s (%d arrays, %d inputs)\n",
(int)*userSpace ? "Yes" : "No", num_client_arrays, vp->num_inputs); */
return GL_TRUE;
}
/**
* Compute the memory range occupied by the arrays.
*/
static void
get_arrays_bounds(const struct st_vertex_program *vp,
const struct st_vp_variant *vpv,
const struct gl_client_array **arrays,
GLuint max_index,
const GLubyte **low, const GLubyte **high)
{
const GLubyte *low_addr = NULL;
const GLubyte *high_addr = NULL;
GLuint attr;
/* debug_printf("get_arrays_bounds: Handling %u attrs\n", vpv->num_inputs); */
for (attr = 0; attr < vpv->num_inputs; attr++) {
const GLuint mesaAttr = vp->index_to_input[attr];
const GLint stride = arrays[mesaAttr]->StrideB;
const GLubyte *start = arrays[mesaAttr]->Ptr;
const unsigned sz = (arrays[mesaAttr]->Size *
_mesa_sizeof_type(arrays[mesaAttr]->Type));
const GLubyte *end = start + (max_index * stride) + sz;
/* debug_printf("attr %u: stride %d size %u start %p end %p\n",
attr, stride, sz, start, end); */
if (attr == 0) {
low_addr = start;
high_addr = end;
}
else {
low_addr = MIN2(low_addr, start);
high_addr = MAX2(high_addr, end);
}
}
*low = low_addr;
*high = high_addr;
}
/**
* Set up for drawing interleaved arrays that all live in one VBO
* or all live in user space.
@ -346,15 +297,21 @@ setup_interleaved_attribs(struct gl_context *ctx,
const struct st_vertex_program *vp,
const struct st_vp_variant *vpv,
const struct gl_client_array **arrays,
GLuint max_index,
GLboolean userSpace,
struct pipe_vertex_buffer *vbuffer,
struct pipe_vertex_element velements[])
struct pipe_vertex_element velements[],
unsigned max_index)
{
struct st_context *st = st_context(ctx);
struct pipe_context *pipe = st->pipe;
GLuint attr;
const GLubyte *offset0 = NULL;
const GLubyte *low_addr = NULL;
/* Find the lowest address. */
for (attr = 0; attr < vpv->num_inputs; attr++) {
const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr;
low_addr = !low_addr ? start : MIN2(low_addr, start);
}
for (attr = 0; attr < vpv->num_inputs; attr++) {
const GLuint mesaAttr = vp->index_to_input[attr];
@ -362,39 +319,23 @@ setup_interleaved_attribs(struct gl_context *ctx,
struct st_buffer_object *stobj = st_buffer_object(bufobj);
GLsizei stride = arrays[mesaAttr]->StrideB;
/*printf("stobj %u = %p\n", attr, (void*)stobj);*/
if (attr == 0) {
const GLubyte *low, *high;
get_arrays_bounds(vp, vpv, arrays, max_index, &low, &high);
/* debug_printf("buffer range: %p %p range %d max index %u\n",
low, high, high - low, max_index); */
offset0 = low;
if (userSpace) {
vbuffer->buffer =
pipe_user_buffer_create(pipe->screen, (void *) low, high - low,
PIPE_BIND_VERTEX_BUFFER);
vbuffer->buffer_offset = 0;
}
else {
if (bufobj && bufobj->Name) {
vbuffer->buffer = NULL;
pipe_resource_reference(&vbuffer->buffer, stobj->buffer);
vbuffer->buffer_offset = pointer_to_offset(low);
vbuffer->buffer_offset = pointer_to_offset(low_addr);
} else {
vbuffer->buffer =
pipe_user_buffer_create(pipe->screen, (void*)low_addr,
stride * (max_index + 1),
PIPE_BIND_VERTEX_BUFFER);
vbuffer->buffer_offset = 0;
}
vbuffer->stride = stride; /* in bytes */
}
/*
if (arrays[mesaAttr]->InstanceDivisor)
vbuffer[attr].max_index = arrays[mesaAttr]->_MaxElement;
else
vbuffer[attr].max_index = max_index;
*/
velements[attr].src_offset =
(unsigned) (arrays[mesaAttr]->Ptr - offset0);
(unsigned) (arrays[mesaAttr]->Ptr - low_addr);
velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor;
velements[attr].vertex_buffer_index = 0;
velements[attr].src_format =
@ -418,10 +359,9 @@ setup_non_interleaved_attribs(struct gl_context *ctx,
const struct st_vertex_program *vp,
const struct st_vp_variant *vpv,
const struct gl_client_array **arrays,
GLuint max_index,
GLboolean *userSpace,
struct pipe_vertex_buffer vbuffer[],
struct pipe_vertex_element velements[])
struct pipe_vertex_element velements[],
unsigned max_index)
{
struct st_context *st = st_context(ctx);
struct pipe_context *pipe = st->pipe;
@ -432,8 +372,6 @@ setup_non_interleaved_attribs(struct gl_context *ctx,
struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj;
GLsizei stride = arrays[mesaAttr]->StrideB;
*userSpace = GL_FALSE;
if (bufobj && bufobj->Name) {
/* Attribute data is in a VBO.
* Recall that for VBOs, the gl_client_array->Ptr field is
@ -441,37 +379,23 @@ setup_non_interleaved_attribs(struct gl_context *ctx,
*/
struct st_buffer_object *stobj = st_buffer_object(bufobj);
assert(stobj->buffer);
/*printf("stobj %u = %p\n", attr, (void*) stobj);*/
vbuffer[attr].buffer = NULL;
pipe_resource_reference(&vbuffer[attr].buffer, stobj->buffer);
vbuffer[attr].buffer_offset = pointer_to_offset(arrays[mesaAttr]->Ptr);
}
else {
/* attribute data is in user-space memory, not a VBO */
uint bytes;
/*printf("user-space array %d stride %d\n", attr, stride);*/
*userSpace = GL_TRUE;
/* wrap user data */
if (arrays[mesaAttr]->Ptr) {
/* user's vertex array */
if (arrays[mesaAttr]->StrideB) {
bytes = arrays[mesaAttr]->StrideB * (max_index + 1);
}
else {
bytes = arrays[mesaAttr]->Size
* _mesa_sizeof_type(arrays[mesaAttr]->Type);
}
vbuffer[attr].buffer =
pipe_user_buffer_create(pipe->screen,
(void *) arrays[mesaAttr]->Ptr, bytes,
(void *) arrays[mesaAttr]->Ptr,
stride * (max_index + 1),
PIPE_BIND_VERTEX_BUFFER);
}
else {
/* no array, use ctx->Current.Attrib[] value */
bytes = sizeof(ctx->Current.Attrib[0]);
uint bytes = sizeof(ctx->Current.Attrib[0]);
vbuffer[attr].buffer =
pipe_user_buffer_create(pipe->screen,
(void *) ctx->Current.Attrib[mesaAttr],
@ -483,8 +407,6 @@ setup_non_interleaved_attribs(struct gl_context *ctx,
vbuffer[attr].buffer_offset = 0;
}
assert(velements[attr].src_offset <= 2048); /* 11-bit field */
/* common-case setup */
vbuffer[attr].stride = stride; /* in bytes */
@ -604,6 +526,54 @@ translate_prim(const struct gl_context *ctx, unsigned prim)
}
static void
st_validate_varrays(struct gl_context *ctx,
const struct gl_client_array **arrays,
unsigned max_index)
{
struct st_context *st = st_context(ctx);
const struct st_vertex_program *vp;
const struct st_vp_variant *vpv;
struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS];
struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
unsigned num_vbuffers, num_velements;
GLuint attr;
/* must get these after state validation! */
vp = st->vp;
vpv = st->vp_variant;
memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs);
/*
* Setup the vbuffer[] and velements[] arrays.
*/
if (is_interleaved_arrays(vp, vpv, arrays)) {
setup_interleaved_attribs(ctx, vp, vpv, arrays, vbuffer, velements,
max_index);
num_vbuffers = 1;
num_velements = vpv->num_inputs;
if (num_velements == 0)
num_vbuffers = 0;
}
else {
setup_non_interleaved_attribs(ctx, vp, vpv, arrays,
vbuffer, velements, max_index);
num_vbuffers = vpv->num_inputs;
num_velements = vpv->num_inputs;
}
cso_set_vertex_buffers(st->cso_context, num_vbuffers, vbuffer);
cso_set_vertex_elements(st->cso_context, num_velements, velements);
/* unreference buffers (frees wrapped user-space buffer objects)
* This is OK, because the pipe driver should reference buffers by itself
* in set_vertex_buffers. */
for (attr = 0; attr < num_vbuffers; attr++) {
pipe_resource_reference(&vbuffer[attr].buffer, NULL);
assert(!vbuffer[attr].buffer);
}
}
/**
* This function gets plugged into the VBO module and is called when
@ -622,90 +592,59 @@ st_draw_vbo(struct gl_context *ctx,
{
struct st_context *st = st_context(ctx);
struct pipe_context *pipe = st->pipe;
const struct st_vertex_program *vp;
const struct st_vp_variant *vpv;
struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS];
GLuint attr;
struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
unsigned num_vbuffers, num_velements;
struct pipe_index_buffer ibuffer;
GLboolean userSpace = GL_FALSE;
GLboolean vertDataEdgeFlags;
struct pipe_draw_info info;
unsigned i;
GLboolean new_array =
st->dirty.st && (st->dirty.mesa & (_NEW_ARRAY | _NEW_PROGRAM)) != 0;
/* Mesa core state should have been validated already */
assert(ctx->NewState == 0x0);
/* Gallium probably doesn't want this in some cases. */
if (!index_bounds_valid)
if (!vbo_all_varyings_in_vbos(arrays))
vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);
if (ib) {
/* Gallium probably doesn't want this in some cases. */
if (!index_bounds_valid)
if (!vbo_all_varyings_in_vbos(arrays))
vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);
} else {
/* Get min/max index for non-indexed drawing. */
min_index = ~0;
max_index = 0;
/* sanity check for pointer arithmetic below */
assert(sizeof(arrays[0]->Ptr[0]) == 1);
vertDataEdgeFlags = arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj &&
arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj->Name;
if (vertDataEdgeFlags != st->vertdata_edgeflags) {
st->vertdata_edgeflags = vertDataEdgeFlags;
st->dirty.st |= ST_NEW_EDGEFLAGS_DATA;
for (i = 0; i < nr_prims; i++) {
min_index = MIN2(min_index, prims[i].start);
max_index = MAX2(max_index, prims[i].start + prims[i].count - 1);
}
}
st_validate_state(st);
/* Validate state. */
if (st->dirty.st) {
GLboolean vertDataEdgeFlags;
/* must get these after state validation! */
vp = st->vp;
vpv = st->vp_variant;
/* sanity check for pointer arithmetic below */
assert(sizeof(arrays[0]->Ptr[0]) == 1);
vertDataEdgeFlags = arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj &&
arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj->Name;
if (vertDataEdgeFlags != st->vertdata_edgeflags) {
st->vertdata_edgeflags = vertDataEdgeFlags;
st->dirty.st |= ST_NEW_EDGEFLAGS_DATA;
}
st_validate_state(st);
if (new_array) {
st_validate_varrays(ctx, arrays, max_index);
}
#if 0
if (MESA_VERBOSE & VERBOSE_GLSL) {
check_uniforms(ctx);
}
if (MESA_VERBOSE & VERBOSE_GLSL) {
check_uniforms(ctx);
}
#else
(void) check_uniforms;
(void) check_uniforms;
#endif
memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs);
/*
* Setup the vbuffer[] and velements[] arrays.
*/
if (is_interleaved_arrays(vp, vpv, arrays, &userSpace)) {
/*printf("Draw interleaved\n");*/
setup_interleaved_attribs(ctx, vp, vpv, arrays, max_index, userSpace,
vbuffer, velements);
num_vbuffers = 1;
num_velements = vpv->num_inputs;
if (num_velements == 0)
num_vbuffers = 0;
}
else {
/*printf("Draw non-interleaved\n");*/
setup_non_interleaved_attribs(ctx, vp, vpv, arrays, max_index,
&userSpace, vbuffer, velements);
num_vbuffers = vpv->num_inputs;
num_velements = vpv->num_inputs;
}
#if 0
{
GLuint i;
for (i = 0; i < num_vbuffers; i++) {
printf("buffers[%d].stride = %u\n", i, vbuffer[i].stride);
printf("buffers[%d].max_index = %u\n", i, vbuffer[i].max_index);
printf("buffers[%d].buffer_offset = %u\n", i, vbuffer[i].buffer_offset);
printf("buffers[%d].buffer = %p\n", i, (void*) vbuffer[i].buffer);
}
for (i = 0; i < num_velements; i++) {
printf("vlements[%d].vbuffer_index = %u\n", i, velements[i].vertex_buffer_index);
printf("vlements[%d].src_offset = %u\n", i, velements[i].src_offset);
printf("vlements[%d].format = %s\n", i, util_format_name(velements[i].src_format));
}
}
#endif
cso_set_vertex_buffers(st->cso_context, num_vbuffers, vbuffer);
cso_set_vertex_elements(st->cso_context, num_velements, velements);
setup_index_buffer(ctx, ib, &ibuffer);
pipe->set_index_buffer(pipe, &ibuffer);
@ -739,17 +678,6 @@ st_draw_vbo(struct gl_context *ctx,
}
pipe_resource_reference(&ibuffer.buffer, NULL);
/* unreference buffers (frees wrapped user-space buffer objects) */
for (attr = 0; attr < num_vbuffers; attr++) {
pipe_resource_reference(&vbuffer[attr].buffer, NULL);
assert(!vbuffer[attr].buffer);
}
if (userSpace)
{
pipe->set_vertex_buffers(pipe, 0, NULL);
}
}