vbo: Avoid extra validation of DrawElements.

This saves mapping the index buffer to get a bounds on the indices that
drivers just drop on the floor in the VBO case (cache win), saves a bonus
walk of the indices in the CheckArrayBounds case, and other miscellaneous
validation.  On intel it's a particularly a large win (50-100% in my app)
because even though we let the indices stay in both CPU and GPU caches, we
still end up waiting for the GPU to be done with the buffer before reading
from it.

Drivers that want the min/max_index fields must now check index_bounds_valid
and use vbo_get_minmax_index before using them.
This commit is contained in:
Eric Anholt 2009-08-11 12:31:01 -07:00
parent ef3ad412c7
commit 2708ddfb06
17 changed files with 158 additions and 119 deletions

View file

@ -422,54 +422,31 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
return retval;
}
static GLboolean brw_need_rebase( GLcontext *ctx,
const struct gl_client_array *arrays[],
const struct _mesa_index_buffer *ib,
GLuint min_index )
{
if (min_index == 0)
return GL_FALSE;
if (ib) {
if (!vbo_all_varyings_in_vbos(arrays))
return GL_TRUE;
else
return GL_FALSE;
}
else {
/* Hmm. This isn't quite what I wanted. BRW can actually
* handle the mixed case well enough that we shouldn't need to
* rebase. However, it's probably not very common, nor hugely
* expensive to do it this way:
*/
if (!vbo_all_varyings_in_vbos(arrays))
return GL_TRUE;
else
return GL_FALSE;
}
}
void brw_draw_prims( GLcontext *ctx,
const struct gl_client_array *arrays[],
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index )
{
GLboolean retval;
/* Decide if we want to rebase. If so we end up recursing once
* only into this function.
*/
if (brw_need_rebase( ctx, arrays, ib, min_index )) {
vbo_rebase_prims( ctx, arrays,
prim, nr_prims,
ib, min_index, max_index,
brw_draw_prims );
return;
if (!vbo_all_varyings_in_vbos(arrays)) {
if (!index_bounds_valid)
vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
/* Decide if we want to rebase. If so we end up recursing once
* only into this function.
*/
if (min_index != 0) {
vbo_rebase_prims(ctx, arrays,
prim, nr_prims,
ib, min_index, max_index,
brw_draw_prims );
return;
}
}
/* Make a first attempt at drawing:

View file

@ -39,6 +39,7 @@ void brw_draw_prims( GLcontext *ctx,
const struct _mesa_prim *prims,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index );

View file

@ -462,6 +462,7 @@ static void r300DrawPrims(GLcontext *ctx,
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index)
{
@ -476,6 +477,12 @@ static void r300DrawPrims(GLcontext *ctx,
limits.max_indices = 65535;
limits.max_vb_size = 1024*1024;
/* This check should get folded into just the places that
* min/max index are really needed.
*/
if (!index_bounds_valid)
vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
if (min_index) {
vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims );
return;

View file

@ -251,7 +251,7 @@ st_RasterPos(GLcontext *ctx, const GLfloat v[4])
rs->array[0].Ptr = (GLubyte *) v;
/* draw the point */
st_feedback_draw_vbo(ctx, rs->arrays, &rs->prim, 1, NULL, 0, 1);
st_feedback_draw_vbo(ctx, rs->arrays, &rs->prim, 1, NULL, GL_TRUE, 0, 1);
}

View file

@ -533,6 +533,7 @@ st_draw_vbo(GLcontext *ctx,
const struct _mesa_prim *prims,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index)
{
@ -545,6 +546,10 @@ st_draw_vbo(GLcontext *ctx,
unsigned num_vbuffers, num_velements;
GLboolean userSpace;
/* Gallium probably doesn't want this in some cases. */
if (!index_bounds_valid)
vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);
/* sanity check for pointer arithmetic below */
assert(sizeof(arrays[0]->Ptr[0]) == 1);

View file

@ -47,6 +47,7 @@ st_draw_vbo(GLcontext *ctx,
const struct _mesa_prim *prims,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index);
@ -56,6 +57,7 @@ st_feedback_draw_vbo(GLcontext *ctx,
const struct _mesa_prim *prims,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index);

View file

@ -96,6 +96,7 @@ st_feedback_draw_vbo(GLcontext *ctx,
const struct _mesa_prim *prims,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index)
{
@ -114,6 +115,9 @@ st_feedback_draw_vbo(GLcontext *ctx,
st_validate_state(ctx->st);
if (!index_bounds_valid)
vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);
/* must get these after state validation! */
vp = ctx->st->vp;
vs = &st->vp->state;

View file

@ -81,7 +81,7 @@ _tnl_CreateContext( GLcontext *ctx )
tnl->nr_blocks = 0;
/* plug in the VBO drawing function */
vbo_set_draw_func(ctx, _tnl_draw_prims);
vbo_set_draw_func(ctx, _tnl_vbo_draw_prims);
_math_init_transformation();
_math_init_translate();

View file

@ -360,6 +360,20 @@ static void unmap_vbos( GLcontext *ctx,
}
void _tnl_vbo_draw_prims(GLcontext *ctx,
const struct gl_client_array *arrays[],
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index)
{
if (!index_bounds_valid)
vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
_tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
}
/* This is the main entrypoint into the slimmed-down software tnl
* module. In a regular swtnl driver, this can be plugged straight
@ -393,7 +407,7 @@ void _tnl_draw_prims( GLcontext *ctx,
*/
vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib,
min_index, max_index,
_tnl_draw_prims );
_tnl_vbo_draw_prims );
return;
}
else if (max_index > max) {
@ -411,7 +425,7 @@ void _tnl_draw_prims( GLcontext *ctx,
*/
vbo_split_prims( ctx, arrays, prim, nr_prims, ib,
0, max_index,
_tnl_draw_prims,
_tnl_vbo_draw_prims,
&limits );
}
else {

View file

@ -81,6 +81,16 @@ _tnl_draw_prims( GLcontext *ctx,
GLuint min_index,
GLuint max_index);
void
_tnl_vbo_draw_prims( GLcontext *ctx,
const struct gl_client_array *arrays[],
const struct _mesa_prim *prim,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index);
extern void
_mesa_load_tracked_matrices(GLcontext *ctx);

View file

@ -69,6 +69,7 @@ typedef void (*vbo_draw_func)( GLcontext *ctx,
const struct _mesa_prim *prims,
GLuint nr_prims,
const struct _mesa_index_buffer *ib,
GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index );
@ -112,7 +113,10 @@ void vbo_rebase_prims( GLcontext *ctx,
GLuint min_index,
GLuint max_index,
vbo_draw_func draw );
void
vbo_get_minmax_index(GLcontext *ctx, const struct _mesa_prim *prim,
const struct _mesa_index_buffer *ib,
GLuint *min_index, GLuint *max_index);
void vbo_use_buffer_objects(GLcontext *ctx);

View file

@ -41,15 +41,29 @@
/**
* Compute min and max elements for glDraw[Range]Elements() calls.
*/
static void
get_minmax_index(GLuint count, GLuint type, const GLvoid *indices,
GLuint *min_index, GLuint *max_index)
void
vbo_get_minmax_index(GLcontext *ctx,
const struct _mesa_prim *prim,
const struct _mesa_index_buffer *ib,
GLuint *min_index, GLuint *max_index)
{
GLuint i;
GLsizei count = prim->count;
const void *indices;
switch(type) {
if (ib->obj->Name) {
const GLvoid *map = ctx->Driver.MapBuffer(ctx,
GL_ELEMENT_ARRAY_BUFFER_ARB,
GL_READ_ONLY,
ib->obj);
indices = ADD_POINTERS(map, ib->ptr);
} else {
indices = ib->ptr;
}
switch (ib->type) {
case GL_UNSIGNED_INT: {
const GLuint *ui_indices = (const GLuint *)indices;
const GLuint *ui_indices = (const GLuint *)ib->ptr;
GLuint max_ui = ui_indices[count-1];
GLuint min_ui = ui_indices[0];
for (i = 0; i < count; i++) {
@ -88,6 +102,12 @@ get_minmax_index(GLuint count, GLuint type, const GLvoid *indices,
assert(0);
break;
}
if (ib->obj->Name != 0) {
ctx->Driver.UnmapBuffer(ctx,
GL_ELEMENT_ARRAY_BUFFER_ARB,
ib->obj);
}
}
@ -500,7 +520,7 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count)
prim[0].indexed = 0;
vbo->draw_prims( ctx, exec->array.inputs, prim, 1, NULL,
start, start + count - 1 );
GL_TRUE, start, start + count - 1 );
#if 0
print_draw_arrays(ctx, exec, mode, start, count);
@ -566,53 +586,19 @@ dump_element_buffer(GLcontext *ctx, GLenum type)
ctx->Array.ElementArrayBufferObj);
}
static void GLAPIENTRY
vbo_exec_DrawRangeElements(GLenum mode,
GLuint start, GLuint end,
GLsizei count, GLenum type, const GLvoid *indices)
/* Inner support for both _mesa_DrawElements and _mesa_DrawRangeElements */
static void
vbo_validated_drawrangeelements(GLcontext *ctx, GLenum mode,
GLboolean index_bounds_valid,
GLuint start, GLuint end,
GLsizei count, GLenum type,
const GLvoid *indices)
{
GET_CURRENT_CONTEXT(ctx);
struct vbo_context *vbo = vbo_context(ctx);
struct vbo_exec_context *exec = &vbo->exec;
struct _mesa_index_buffer ib;
struct _mesa_prim prim[1];
if (!_mesa_validate_DrawRangeElements( ctx, mode, start, end, count,
type, indices ))
return;
if (end >= ctx->Array.ArrayObj->_MaxElement) {
/* the max element is out of bounds of one or more enabled arrays */
_mesa_warning(ctx, "glDraw[Range]Elements(start %u, end %u, count %d, "
"type 0x%x, indices=%p)\n"
"\tindex=%u is out of bounds (max=%u) "
"Element Buffer %u (size %d)",
start, end, count, type, indices, end,
ctx->Array.ArrayObj->_MaxElement - 1,
ctx->Array.ElementArrayBufferObj->Name,
ctx->Array.ElementArrayBufferObj->Size);
if (0)
dump_element_buffer(ctx, type);
if (0)
_mesa_print_arrays(ctx);
return;
}
else if (0) {
_mesa_printf("glDraw[Range]Elements"
"(start %u, end %u, type 0x%x, count %d) ElemBuf %u\n",
start, end, type, count,
ctx->Array.ElementArrayBufferObj->Name);
}
#if 0
check_draw_elements_data(ctx, count, type, indices);
#else
(void) check_draw_elements_data;
#endif
FLUSH_CURRENT( ctx, 0 );
if (ctx->NewState)
@ -623,13 +609,13 @@ vbo_exec_DrawRangeElements(GLenum mode,
return;
}
bind_arrays( ctx );
if (ctx->NewState)
_mesa_update_state( ctx );
bind_arrays( ctx );
ib.count = count;
ib.type = type;
ib.type = type;
ib.obj = ctx->Array.ElementArrayBufferObj;
ib.ptr = indices;
@ -673,7 +659,54 @@ vbo_exec_DrawRangeElements(GLenum mode,
* for the latter case elsewhere.
*/
vbo->draw_prims( ctx, exec->array.inputs, prim, 1, &ib, start, end );
vbo->draw_prims( ctx, exec->array.inputs, prim, 1, &ib,
index_bounds_valid, start, end );
}
static void GLAPIENTRY
vbo_exec_DrawRangeElements(GLenum mode,
GLuint start, GLuint end,
GLsizei count, GLenum type, const GLvoid *indices)
{
GET_CURRENT_CONTEXT(ctx);
if (!_mesa_validate_DrawRangeElements( ctx, mode, start, end, count,
type, indices ))
return;
if (end >= ctx->Array.ArrayObj->_MaxElement) {
/* the max element is out of bounds of one or more enabled arrays */
_mesa_warning(ctx, "glDraw[Range]Elements(start %u, end %u, count %d, "
"type 0x%x, indices=%p)\n"
"\tindex=%u is out of bounds (max=%u) "
"Element Buffer %u (size %d)",
start, end, count, type, indices, end,
ctx->Array.ArrayObj->_MaxElement - 1,
ctx->Array.ElementArrayBufferObj->Name,
ctx->Array.ElementArrayBufferObj->Size);
if (0)
dump_element_buffer(ctx, type);
if (0)
_mesa_print_arrays(ctx);
return;
}
else if (0) {
_mesa_printf("glDraw[Range]Elements"
"(start %u, end %u, type 0x%x, count %d) ElemBuf %u\n",
start, end, type, count,
ctx->Array.ElementArrayBufferObj->Name);
}
#if 0
check_draw_elements_data(ctx, count, type, indices);
#else
(void) check_draw_elements_data;
#endif
vbo_validated_drawrangeelements(ctx, mode, GL_TRUE, start, end,
count, type, indices);
}
@ -682,35 +715,12 @@ vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type,
const GLvoid *indices)
{
GET_CURRENT_CONTEXT(ctx);
GLuint min_index = 0;
GLuint max_index = 0;
if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices ))
return;
if (!vbo_validate_shaders(ctx)) {
_mesa_error(ctx, GL_INVALID_OPERATION, "glDrawElements(bad shader)");
return;
}
if (ctx->Array.ElementArrayBufferObj->Name) {
const GLvoid *map = ctx->Driver.MapBuffer(ctx,
GL_ELEMENT_ARRAY_BUFFER_ARB,
GL_READ_ONLY,
ctx->Array.ElementArrayBufferObj);
get_minmax_index(count, type, ADD_POINTERS(map, indices),
&min_index, &max_index);
ctx->Driver.UnmapBuffer(ctx,
GL_ELEMENT_ARRAY_BUFFER_ARB,
ctx->Array.ElementArrayBufferObj);
}
else {
get_minmax_index(count, type, indices, &min_index, &max_index);
}
vbo_exec_DrawRangeElements(mode, min_index, max_index, count, type, indices);
vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0,
count, type, indices);
}

View file

@ -378,6 +378,7 @@ vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap )
exec->vtx.prim,
exec->vtx.prim_count,
NULL,
GL_TRUE,
0,
exec->vtx.vert_count - 1);

View file

@ -208,6 +208,7 @@ void vbo_rebase_prims( GLcontext *ctx,
prim,
nr_prims,
ib,
GL_TRUE,
0,
max_index - min_index );

View file

@ -279,6 +279,7 @@ void vbo_save_playback_vertex_list( GLcontext *ctx, void *data )
node->prim,
node->prim_count,
NULL,
GL_TRUE,
0, /* Node is a VBO, so this is ok */
node->count - 1);
}

View file

@ -194,6 +194,7 @@ flush( struct copy_context *copy )
copy->dstprim,
copy->dstprim_nr,
&copy->dstib,
GL_TRUE,
0,
copy->dstbuf_nr );

View file

@ -85,6 +85,7 @@ static void flush_vertex( struct split_context *split )
split->dstprim,
split->dstprim_nr,
NULL,
GL_TRUE,
min_index,
max_index);