i965: Use GTT maps when available to upload vertex arrays and system VBOs.

This speeds up OA on my GM45 by 21% (more than the original CPU cost of
the upload path).  We might still be able to squeeze a few more percent out
by avoiding repeatedly mapping/unmapping buffers as we upload elements into
them.
This commit is contained in:
Eric Anholt 2009-04-06 09:38:16 -07:00
parent 5cca1ceb81
commit e7aef006e5
3 changed files with 55 additions and 18 deletions

View file

@ -277,6 +277,7 @@ copy_array_to_vbo_array( struct brw_context *brw,
struct brw_vertex_element *element,
GLuint dst_stride)
{
struct intel_context *intel = &brw->intel;
GLuint size = element->count * dst_stride;
get_space(brw, size, &element->bo, &element->offset);
@ -289,29 +290,52 @@ copy_array_to_vbo_array( struct brw_context *brw,
}
if (dst_stride == element->glarray->StrideB) {
dri_bo_subdata(element->bo,
element->offset,
size,
element->glarray->Ptr);
if (intel->intelScreen->kernel_exec_fencing) {
drm_intel_gem_bo_map_gtt(element->bo);
memcpy((char *)element->bo->virtual + element->offset,
element->glarray->Ptr, size);
drm_intel_gem_bo_unmap_gtt(element->bo);
} else {
dri_bo_subdata(element->bo,
element->offset,
size,
element->glarray->Ptr);
}
} else {
void *data;
char *dest;
const unsigned char *src = element->glarray->Ptr;
int i;
data = _mesa_malloc(dst_stride * element->count);
dest = data;
for (i = 0; i < element->count; i++) {
memcpy(dest, src, dst_stride);
src += element->glarray->StrideB;
dest += dst_stride;
}
if (intel->intelScreen->kernel_exec_fencing) {
drm_intel_gem_bo_map_gtt(element->bo);
dest = element->bo->virtual;
dest += element->offset;
dri_bo_subdata(element->bo,
element->offset,
size,
data);
_mesa_free(data);
for (i = 0; i < element->count; i++) {
memcpy(dest, src, dst_stride);
src += element->glarray->StrideB;
dest += dst_stride;
}
drm_intel_gem_bo_unmap_gtt(element->bo);
} else {
void *data;
data = _mesa_malloc(dst_stride * element->count);
dest = data;
for (i = 0; i < element->count; i++) {
memcpy(dest, src, dst_stride);
src += element->glarray->StrideB;
dest += dst_stride;
}
dri_bo_subdata(element->bo,
element->offset,
size,
data);
_mesa_free(data);
}
}
}
@ -563,7 +587,13 @@ static void brw_prepare_indices(struct brw_context *brw)
/* Straight upload
*/
dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
if (intel->intelScreen->kernel_exec_fencing) {
drm_intel_gem_bo_map_gtt(bo);
memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
drm_intel_gem_bo_unmap_gtt(bo);
} else {
dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
}
} else {
offset = (GLuint) (unsigned long) index_buffer->ptr;

View file

@ -563,6 +563,7 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen)
GLboolean gem_supported;
struct drm_i915_getparam gp;
__DRIscreenPrivate *spriv = intelScreen->driScrnPriv;
int num_fences;
intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL;
@ -613,6 +614,11 @@ intel_init_bufmgr(intelScreenPrivate *intelScreen)
&intelScreen->sarea->last_dispatch);
}
if (intel_get_param(spriv, I915_PARAM_NUM_FENCES_AVAIL, &num_fences))
intelScreen->kernel_exec_fencing = !!num_fences;
else
intelScreen->kernel_exec_fencing = GL_FALSE;
return GL_TRUE;
}

View file

@ -79,6 +79,7 @@ typedef struct
GLboolean no_vbo;
int ttm;
dri_bufmgr *bufmgr;
GLboolean kernel_exec_fencing;
/**
* Configuration cache with default values for all contexts