intel: Pack dynamic draws together

Dynamic arrays have the tendency to be small and so allocating a bo for
each one is overkill and we can exploit many efficiency gains by packing
them together.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
Chris Wilson 2011-02-08 20:01:10 +00:00
parent d0809d7b15
commit e476e12220
7 changed files with 89 additions and 21 deletions

View file

@ -359,13 +359,13 @@ static void brw_prepare_vertices(struct brw_context *brw)
if (_mesa_is_bufferobj(input->glarray->BufferObj)) {
struct intel_buffer_object *intel_buffer =
intel_buffer_object(input->glarray->BufferObj);
GLuint offset;
/* Named buffer object: Just reference its contents directly. */
drm_intel_bo_unreference(input->bo);
input->bo = intel_bufferobj_buffer(intel, intel_buffer,
INTEL_READ);
input->bo = intel_bufferobj_source(intel, intel_buffer, &offset);
drm_intel_bo_reference(input->bo);
input->offset = (unsigned long)input->glarray->Ptr;
input->offset = offset + (unsigned long)input->glarray->Ptr;
input->stride = input->glarray->StrideB;
input->count = input->glarray->_MaxElement;
@ -633,16 +633,16 @@ static void brw_prepare_indices(struct brw_context *brw)
ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj);
} else {
bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj),
INTEL_READ);
drm_intel_bo_reference(bo);
/* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
* the index buffer state when we're just moving the start index
* of our drawing.
*/
brw->ib.start_vertex_offset = offset / ib_type_size;
offset = 0;
bo = intel_bufferobj_source(intel, intel_buffer_object(bufferobj),
&offset);
drm_intel_bo_reference(bo);
ib_size = bo->size;
}
}

View file

@ -175,6 +175,12 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
if (intel->vtbl.finish_batch)
intel->vtbl.finish_batch(intel);
if (intel->upload.bo) {
drm_intel_bo_unreference(intel->upload.bo);
intel->upload.bo = NULL;
intel->upload.offset = 0;
}
/* Check that we didn't just wrap our batchbuffer at a bad time. */
assert(!intel->no_batch_wrap);

View file

@ -528,7 +528,8 @@ intel_bufferobj_unmap(struct gl_context * ctx,
drm_intel_bo *
intel_bufferobj_buffer(struct intel_context *intel,
struct intel_buffer_object *intel_obj, GLuint flag)
struct intel_buffer_object *intel_obj,
GLuint flag)
{
if (intel_obj->region) {
if (flag == INTEL_WRITE_PART)
@ -539,22 +540,68 @@ intel_bufferobj_buffer(struct intel_context *intel,
}
}
if (intel_obj->source) {
drm_intel_bo_unreference(intel_obj->buffer);
intel_obj->buffer = NULL;
intel_obj->source = 0;
}
if (intel_obj->buffer == NULL) {
/* XXX suballocate for DYNAMIC READ */
intel_bufferobj_alloc_buffer(intel, intel_obj);
drm_intel_bo_subdata(intel_obj->buffer,
0, intel_obj->Base.Size,
intel_obj->sys_buffer);
if (flag != INTEL_READ) {
free(intel_obj->sys_buffer);
intel_obj->sys_buffer = NULL;
}
free(intel_obj->sys_buffer);
intel_obj->sys_buffer = NULL;
intel_obj->offset = 0;
}
return intel_obj->buffer;
}
#define INTEL_UPLOAD_SIZE (64*1024)
static void wrap_buffers(struct intel_context *intel, GLuint size)
{
if (size < INTEL_UPLOAD_SIZE)
size = INTEL_UPLOAD_SIZE;
if (intel->upload.bo != NULL)
drm_intel_bo_unreference(intel->upload.bo);
intel->upload.bo = drm_intel_bo_alloc(intel->bufmgr, "upload", size, 0);
intel->upload.offset = 0;
}
drm_intel_bo *
intel_bufferobj_source(struct intel_context *intel,
struct intel_buffer_object *intel_obj,
GLuint *offset)
{
if (intel_obj->buffer == NULL) {
GLuint size = ALIGN(intel_obj->Base.Size, 64);
if (intel->upload.bo == NULL ||
intel->upload.offset + size > intel->upload.bo->size) {
wrap_buffers(intel, size);
}
drm_intel_bo_reference(intel->upload.bo);
intel_obj->buffer = intel->upload.bo;
intel_obj->offset = intel->upload.offset;
intel_obj->source = 1;
intel->upload.offset += size;
drm_intel_bo_subdata(intel_obj->buffer,
intel_obj->offset, intel_obj->Base.Size,
intel_obj->sys_buffer);
}
*offset = intel_obj->offset;
return intel_obj->buffer;
}
static void
intel_bufferobj_copy_subdata(struct gl_context *ctx,
struct gl_buffer_object *src,
@ -566,6 +613,7 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx,
struct intel_buffer_object *intel_src = intel_buffer_object(src);
struct intel_buffer_object *intel_dst = intel_buffer_object(dst);
drm_intel_bo *src_bo, *dst_bo;
GLuint src_offset;
if (size == 0)
return;
@ -600,11 +648,11 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx,
/* Otherwise, we have real BOs, so blit them. */
dst_bo = intel_bufferobj_buffer(intel, intel_dst, INTEL_WRITE_PART);
src_bo = intel_bufferobj_buffer(intel, intel_src, INTEL_READ);
src_bo = intel_bufferobj_source(intel, intel_src, &src_offset);
intel_emit_linear_blit(intel,
dst_bo, write_offset,
src_bo, read_offset, size);
src_bo, read_offset + src_offset, size);
/* Since we've emitted some blits to buffers that will (likely) be used
* in rendering operations in other cache domains in this batch, emit a

View file

@ -42,6 +42,8 @@ struct intel_buffer_object
{
struct gl_buffer_object Base;
drm_intel_bo *buffer; /* the low-level buffer manager's buffer handle */
GLuint offset; /* any offset into that buffer */
/** System memory buffer data, if not using a BO to store the data. */
void *sys_buffer;
@ -55,6 +57,7 @@ struct intel_buffer_object
GLsizei range_map_size;
GLboolean mapped_gtt;
GLboolean source;
};
@ -63,6 +66,9 @@ struct intel_buffer_object
drm_intel_bo *intel_bufferobj_buffer(struct intel_context *intel,
struct intel_buffer_object *obj,
GLuint flag);
drm_intel_bo *intel_bufferobj_source(struct intel_context *intel,
struct intel_buffer_object *obj,
GLuint *offset);
/* Hook the bufferobject implementation into mesa:
*/

View file

@ -187,6 +187,11 @@ struct intel_context
unsigned int count; /**< Number of vertices in current primitive */
} prim;
struct {
drm_intel_bo *bo;
GLuint offset;
} upload;
GLuint stats_wm;
/* Offsets of fields within the current vertex:

View file

@ -78,6 +78,7 @@ do_blit_readpixels(struct gl_context * ctx,
GLuint dst_offset;
GLuint rowLength;
drm_intel_bo *dst_buffer;
GLuint offset;
GLboolean all;
GLint dst_x, dst_y;
@ -138,8 +139,8 @@ do_blit_readpixels(struct gl_context * ctx,
dst_y = 0;
dst_buffer = intel_bufferobj_buffer(intel, dst,
all ? INTEL_WRITE_FULL :
INTEL_WRITE_PART);
all ? INTEL_WRITE_FULL :
INTEL_WRITE_PART);
if (ctx->ReadBuffer->Name == 0)
y = ctx->ReadBuffer->Height - (y + height);
@ -147,7 +148,7 @@ do_blit_readpixels(struct gl_context * ctx,
if (!intelEmitCopyBlit(intel,
src->cpp,
src->pitch, src->buffer, 0, src->tiling,
rowLength, dst_buffer, dst_offset, GL_FALSE,
rowLength, dst_buffer, dst_offset + offset, GL_FALSE,
x, y,
dst_x, dst_y,
width, height,

View file

@ -235,11 +235,13 @@ try_pbo_upload(struct intel_context *intel,
intel_flush(&intel->ctx);
{
drm_intel_bo *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ);
GLuint offset;
drm_intel_bo *src_buffer = intel_bufferobj_source(intel, pbo, &offset);
if (!intelEmitCopyBlit(intel,
intelImage->mt->cpp,
src_stride, src_buffer, src_offset, GL_FALSE,
src_stride, src_buffer,
src_offset + offset, GL_FALSE,
dst_stride, dst_buffer, 0,
intelImage->mt->region->tiling,
0, 0, dst_x, dst_y, width, height,