diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 2fee0ca502a..3d914fb3fbb 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -45,7 +45,6 @@ static void i915_render_start( struct intel_context *intel ) { - GLcontext *ctx = &intel->ctx; struct i915_context *i915 = i915_context(&intel->ctx); i915ValidateFragmentProgram( i915 ); diff --git a/src/mesa/drivers/dri/i915/intel_blit.c b/src/mesa/drivers/dri/i915/intel_blit.c index e55475d3583..5cbbea1005e 100644 --- a/src/mesa/drivers/dri/i915/intel_blit.c +++ b/src/mesa/drivers/dri/i915/intel_blit.c @@ -438,6 +438,10 @@ void intelClearWithBlit(GLcontext *ctx, GLbitfield mask, GLboolean all, const struct intel_renderbuffer *irb = intel_renderbuffer(ctx->DrawBuffer-> Attachment[buf].Renderbuffer); + struct buffer *write_buffer = + intel_region_buffer(intel, irb->region, + all ? INTEL_WRITE_FULL : INTEL_WRITE_PART); + GLuint clearVal; GLint pitch, cpp; GLuint BR13, CMD; @@ -448,7 +452,7 @@ void intelClearWithBlit(GLcontext *ctx, GLbitfield mask, GLboolean all, pitch = irb->region->pitch; cpp = irb->region->cpp; - DBG("%s dst:buf(%d)/%d+%d %d,%d sz:%dx%d\n", + DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, irb->region->buffer, (pitch * cpp), irb->region->draw_offset, @@ -495,7 +499,7 @@ void intelClearWithBlit(GLcontext *ctx, GLbitfield mask, GLboolean all, OUT_BATCH( BR13 ); OUT_BATCH( (b.y1 << 16) | b.x1 ); OUT_BATCH( (b.y2 << 16) | b.x2 ); - OUT_RELOC( irb->region->buffer, DRM_MM_TT|DRM_MM_WRITE, + OUT_RELOC( write_buffer, DRM_MM_TT|DRM_MM_WRITE, irb->region->draw_offset ); OUT_BATCH( clearVal ); ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i915/intel_buffer_objects.c b/src/mesa/drivers/dri/i915/intel_buffer_objects.c index 9bd7d8b4169..6cb2a65035c 100644 --- a/src/mesa/drivers/dri/i915/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/i915/intel_buffer_objects.c @@ -32,6 +32,7 @@ #include "intel_context.h" #include "intel_buffer_objects.h" +#include "intel_regions.h" #include "intel_bufmgr.h" @@ -46,18 +47,44 @@ static struct gl_buffer_object *intel_bufferobj_alloc( GLcontext *ctx, GLenum target ) { struct intel_context *intel = intel_context(ctx); - struct intel_buffer_object *obj = MALLOC_STRUCT(intel_buffer_object); + struct intel_buffer_object *obj = CALLOC_STRUCT(intel_buffer_object); _mesa_initialize_buffer_object(&obj->Base, name, target); - /* XXX: We generate our own handle, which is different to 'name' above. - */ bmGenBuffers(intel, "bufferobj", 1, &obj->buffer, 0); return &obj->Base; } +/* Break the COW tie to the region. The region gets to keep the data. + */ +void intel_bufferobj_release_region( struct intel_context *intel, + struct intel_buffer_object *intel_obj ) +{ + assert(intel_obj->region->buffer == intel_obj->buffer); + intel_obj->region->pbo = NULL; + intel_obj->region = NULL; + intel_obj->buffer = NULL; /* refcount? */ + + /* This leads to a large number of buffer deletion/creation events. + * Currently the drm doesn't like that: + */ + bmGenBuffers(intel, "buffer object", 1, &intel_obj->buffer, 0); + bmBufferData(intel, intel_obj->buffer, intel_obj->Base.Size, NULL, 0); +} + +/* Break the COW tie to the region. Both the pbo and the region end + * up with a copy of the data. + */ +void intel_bufferobj_cow( struct intel_context *intel, + struct intel_buffer_object *intel_obj ) +{ + assert(intel_obj->region); + intel_region_cow( intel, intel_obj->region ); +} + + /** * Deallocate/free a vertex/pixel buffer object. * Called via glDeleteBuffersARB(). @@ -70,8 +97,12 @@ static void intel_bufferobj_free( GLcontext *ctx, assert(intel_obj); - if (intel_obj->buffer) + if (intel_obj->region) { + intel_bufferobj_release_region(intel, intel_obj); + } + else if (intel_obj->buffer) { bmDeleteBuffers( intel, 1, &intel_obj->buffer ); + } _mesa_free(intel_obj); } @@ -94,13 +125,11 @@ static void intel_bufferobj_data( GLcontext *ctx, struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); - /* XXX: do something useful with 'usage' (eg. populate flags - * argument below) - */ - assert(intel_obj); + intel_obj->Base.Size = size; + intel_obj->Base.Usage = usage; - obj->Size = size; - obj->Usage = usage; + if (intel_obj->region) + intel_bufferobj_release_region(intel, intel_obj); bmBufferData(intel, intel_obj->buffer, size, data, 0); } @@ -123,6 +152,10 @@ static void intel_bufferobj_subdata( GLcontext *ctx, struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); + + if (intel_obj->region) + intel_bufferobj_cow(intel, intel_obj); + bmBufferSubData(intel, intel_obj->buffer, offset, size, data); } @@ -160,6 +193,10 @@ static void *intel_bufferobj_map( GLcontext *ctx, /* XXX: Translate access to flags arg below: */ assert(intel_obj); + + if (intel_obj->region) + intel_bufferobj_cow(intel, intel_obj); + obj->Pointer = bmMapBuffer(intel, intel_obj->buffer, 0); return obj->Pointer; } @@ -182,8 +219,17 @@ static GLboolean intel_bufferobj_unmap( GLcontext *ctx, return GL_TRUE; } -struct buffer *intel_bufferobj_buffer( const struct intel_buffer_object *intel_obj ) +struct buffer *intel_bufferobj_buffer( struct intel_context *intel, + struct intel_buffer_object *intel_obj, + GLuint flag ) { + if (intel_obj->region) { + if (flag == INTEL_WRITE_PART) + intel_bufferobj_cow(intel, intel_obj); + else if (flag == INTEL_WRITE_FULL) + intel_bufferobj_release_region(intel, intel_obj); + } + return intel_obj->buffer; } diff --git a/src/mesa/drivers/dri/i915/intel_buffer_objects.h b/src/mesa/drivers/dri/i915/intel_buffer_objects.h index 445ddf4273b..93dfbbc3c92 100644 --- a/src/mesa/drivers/dri/i915/intel_buffer_objects.h +++ b/src/mesa/drivers/dri/i915/intel_buffer_objects.h @@ -31,6 +31,7 @@ #include "mtypes.h" struct intel_context; +struct intel_region; struct gl_buffer_object; @@ -40,12 +41,18 @@ struct gl_buffer_object; struct intel_buffer_object { struct gl_buffer_object Base; struct buffer *buffer; /* the low-level buffer manager's buffer handle */ + + struct intel_region *region; /* Is there a zero-copy texture + associated with this (pixel) + buffer object? */ }; /* Get the bm buffer associated with a GL bufferobject: */ -struct buffer *intel_bufferobj_buffer( const struct intel_buffer_object *obj ); +struct buffer *intel_bufferobj_buffer( struct intel_context *intel, + struct intel_buffer_object *obj, + GLuint flag ); /* Hook the bufferobject implementation into mesa: */ @@ -67,4 +74,12 @@ intel_buffer_object( struct gl_buffer_object *obj ) return NULL; } +/* Helpers for zerocopy image uploads. See also intel_regions.h: + */ +void intel_bufferobj_cow( struct intel_context *intel, + struct intel_buffer_object *intel_obj ); +void intel_bufferobj_release_region( struct intel_context *intel, + struct intel_buffer_object *intel_obj ); + + #endif diff --git a/src/mesa/drivers/dri/i915/intel_context.h b/src/mesa/drivers/dri/i915/intel_context.h index c7a4855aee2..150baffc4ea 100644 --- a/src/mesa/drivers/dri/i915/intel_context.h +++ b/src/mesa/drivers/dri/i915/intel_context.h @@ -68,6 +68,9 @@ extern void intelFallback( struct intel_context *intel, GLuint bit, GLboolean mo #define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode ) +#define INTEL_WRITE_PART 0x1 +#define INTEL_WRITE_FULL 0x2 +#define INTEL_READ 0x4 struct intel_texture_object { diff --git a/src/mesa/drivers/dri/i915/intel_pixel_draw.c b/src/mesa/drivers/dri/i915/intel_pixel_draw.c index 52d6856184f..b35ab635658 100644 --- a/src/mesa/drivers/dri/i915/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/i915/intel_pixel_draw.c @@ -299,6 +299,7 @@ static GLboolean do_blit_drawpixels( GLcontext *ctx, drm_clip_rect_t *box = dPriv->pClipRects; drm_clip_rect_t rect; drm_clip_rect_t dest_rect; + struct buffer *src_buffer = intel_bufferobj_buffer(intel, src, INTEL_READ); int i; dest_rect.x1 = dPriv->x + x; @@ -314,7 +315,7 @@ static GLboolean do_blit_drawpixels( GLcontext *ctx, intelEmitCopyBlit( intel, dest->cpp, rowLength, - intel_bufferobj_buffer(src), src_offset, + src_buffer, src_offset, dest->pitch, dest->buffer, 0, rect.x1 - dest_rect.x1, diff --git a/src/mesa/drivers/dri/i915/intel_pixel_read.c b/src/mesa/drivers/dri/i915/intel_pixel_read.c index 7a5740fae82..13890550d11 100644 --- a/src/mesa/drivers/dri/i915/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i915/intel_pixel_read.c @@ -250,6 +250,13 @@ static GLboolean do_blit_readpixels( GLcontext *ctx, if (intel->driDrawable->numClipRects) { + GLboolean all = (width * height * src->cpp == dst->Base.Size && + x == 0 && + dst_offset == 0); + + struct buffer *dst_buffer = intel_bufferobj_buffer(intel, dst, + all ? INTEL_WRITE_FULL : + INTEL_WRITE_PART); __DRIdrawablePrivate *dPriv = intel->driDrawable; int nbox = dPriv->numClipRects; drm_clip_rect_t *box = dPriv->pClipRects; @@ -273,7 +280,7 @@ static GLboolean do_blit_readpixels( GLcontext *ctx, src->cpp, src->pitch, src->buffer, 0, rowLength, - intel_bufferobj_buffer(dst), dst_offset, + dst_buffer, dst_offset, rect.x1, rect.y1, rect.x1 - src_rect.x1, diff --git a/src/mesa/drivers/dri/i915/intel_regions.c b/src/mesa/drivers/dri/i915/intel_regions.c index 5c874625e75..2cde3df9822 100644 --- a/src/mesa/drivers/dri/i915/intel_regions.c +++ b/src/mesa/drivers/dri/i915/intel_regions.c @@ -43,6 +43,7 @@ #include "intel_regions.h" #include "intel_blit.h" #include "intel_bufmgr.h" +#include "intel_buffer_objects.h" #define FILE_DEBUG_FLAG DEBUG_BUFMGR @@ -53,6 +54,9 @@ GLubyte *intel_region_map(struct intel_context *intel, struct intel_region *regi { DBG("%s\n", __FUNCTION__); if (!region->map_refcount++) { + if (region->pbo) + intel_region_cow(intel, region); + region->map = bmMapBuffer(intel, region->buffer, 0); } @@ -112,7 +116,12 @@ void intel_region_release( struct intel_context *intel, if ((*region)->refcount == 0) { assert((*region)->map_refcount == 0); - bmDeleteBuffers(intel, 1, &(*region)->buffer); + + if ((*region)->pbo) + intel_region_release_pbo( intel, *region ); + else + bmDeleteBuffers(intel, 1, &(*region)->buffer); + free(*region); } *region = NULL; @@ -203,6 +212,17 @@ void intel_region_data(struct intel_context *intel, { DBG("%s\n", __FUNCTION__); + if (dst->pbo) { + if (dstx == 0 && + dsty == 0 && + width == dst->pitch && + height == dst->height) + intel_region_release_pbo(intel, dst); + else + intel_region_cow(intel, dst); + } + + LOCK_HARDWARE(intel); _mesa_copy_rect(intel_region_map(intel, dst) + dst_offset, @@ -234,6 +254,16 @@ void intel_region_copy( struct intel_context *intel, { DBG("%s\n", __FUNCTION__); + if (dst->pbo) { + if (dstx == 0 && + dsty == 0 && + width == dst->pitch && + height == dst->height) + intel_region_release_pbo(intel, dst); + else + intel_region_cow(intel, dst); + } + assert(src->cpp == dst->cpp); intelEmitCopyBlit(intel, @@ -257,6 +287,16 @@ void intel_region_fill( struct intel_context *intel, { DBG("%s\n", __FUNCTION__); + if (dst->pbo) { + if (dstx == 0 && + dsty == 0 && + width == dst->pitch && + height == dst->height) + intel_region_release_pbo(intel, dst); + else + intel_region_cow(intel, dst); + } + intelEmitFillBlit(intel, dst->cpp, dst->pitch, dst->buffer, dst_offset, @@ -265,3 +305,96 @@ void intel_region_fill( struct intel_context *intel, color ); } +/* Attach to a pbo, discarding our data. Effectively zero-copy upload + * the pbo's data. + */ +void intel_region_attach_pbo( struct intel_context *intel, + struct intel_region *region, + struct intel_buffer_object *pbo ) +{ + if (region->pbo == pbo) + return; + + /* If there is already a pbo attached, break the cow tie now. + * Don't call intel_region_release_pbo() as that would + * unnecessarily allocate a new buffer we would have to immediately + * discard. + */ + if (region->pbo) { + region->pbo->region = NULL; + region->pbo = NULL; + region->buffer = NULL; /* refcount? */ + } + + if (region->buffer) { + bmDeleteBuffers(intel, 1, region->buffer); + } + + region->pbo = pbo; + region->pbo->region = region; + region->buffer = pbo->buffer; /* refcount? */ + + _mesa_printf("%s attach buffer %p from pbo\n", region->buffer); +} + + +/* Break the COW tie to the pbo. The pbo gets to keep the data. + */ +void intel_region_release_pbo( struct intel_context *intel, + struct intel_region *region ) +{ + assert(region->buffer == region->pbo->buffer); + region->pbo->region = NULL; + region->pbo = NULL; + region->buffer = NULL; /* refcount? */ + + bmGenBuffers(intel, "region", 1, ®ion->buffer, 0); + bmBufferData(intel, region->buffer, + region->cpp * region->pitch * region->height, NULL, 0); +} + +/* Break the COW tie to the pbo. Both the pbo and the region end up + * with a copy of the data. + */ +void intel_region_cow( struct intel_context *intel, + struct intel_region *region ) +{ + struct intel_buffer_object *pbo = region->pbo; + + intel_region_release_pbo(intel, region); + + assert(region->cpp * + region->pitch * + region->height == pbo->Base.Size); + + _mesa_printf("%s (%d bytes)\n", __FUNCTION__, pbo->Base.Size); + + /* Now blit from the texture buffer to the new buffer: + */ + + /* LOCKING??? */ + intelEmitCopyBlit( intel, + region->cpp, + region->pitch, + region->buffer, 0, + region->pitch, + pbo->buffer, 0, + 0,0, + 0,0, + region->pitch, + region->height ); +} + +struct buffer *intel_region_buffer( struct intel_context *intel, + struct intel_region *region, + GLuint flag ) +{ + if (region->pbo) { + if (flag == INTEL_WRITE_PART) + intel_region_cow(intel, region); + else if (flag == INTEL_WRITE_FULL) + intel_region_release_pbo(intel, region); + } + + return region->buffer; +} diff --git a/src/mesa/drivers/dri/i915/intel_regions.h b/src/mesa/drivers/dri/i915/intel_regions.h index eb744dfda55..a54de926d35 100644 --- a/src/mesa/drivers/dri/i915/intel_regions.h +++ b/src/mesa/drivers/dri/i915/intel_regions.h @@ -31,6 +31,7 @@ #include "mtypes.h" #include "intel_bufmgr.h" /* for DBG! */ struct intel_context; +struct intel_buffer_object; /** * A layer on top of the bufmgr buffers that adds a few useful things: @@ -50,6 +51,8 @@ struct intel_region { GLuint map_refcount; /**< Reference count for mapping */ GLuint draw_offset; /**< Offset of drawing address within the region */ + + struct intel_buffer_object *pbo; /* zero-copy uploads */ }; @@ -115,5 +118,18 @@ void intel_region_fill( struct intel_context *intel, GLuint width, GLuint height, GLuint color ); +/* Helpers for zerocopy uploads, particularly texture image uploads: + */ +void intel_region_attach_pbo( struct intel_context *intel, + struct intel_region *region, + struct intel_buffer_object *pbo ); +void intel_region_release_pbo( struct intel_context *intel, + struct intel_region *region ); +void intel_region_cow( struct intel_context *intel, + struct intel_region *region ); + +struct buffer *intel_region_buffer( struct intel_context *intel, + struct intel_region *region, + GLuint flag ); #endif diff --git a/src/mesa/drivers/dri/i915/intel_tex_image.c b/src/mesa/drivers/dri/i915/intel_tex_image.c index c67e58533fa..26bd0003c3c 100644 --- a/src/mesa/drivers/dri/i915/intel_tex_image.c +++ b/src/mesa/drivers/dri/i915/intel_tex_image.c @@ -153,9 +153,9 @@ static GLuint target_to_face( GLenum target ) /* There are actually quite a few combinations this will work for, * more than what I've listed here. */ -static GLboolean check_pbo_blit( GLint internalFormat, - GLenum format, GLenum type, - const struct gl_texture_format *mesa_format ) +static GLboolean check_pbo_format( GLint internalFormat, + GLenum format, GLenum type, + const struct gl_texture_format *mesa_format ) { switch (internalFormat) { case 4: @@ -178,7 +178,8 @@ static GLboolean check_pbo_blit( GLint internalFormat, } - +/* XXX: Do this for TexSubImage also: + */ static GLboolean try_pbo_upload( struct intel_context *intel, struct intel_texture_image *intelImage, const struct gl_pixelstore_attrib *unpack, @@ -187,11 +188,11 @@ static GLboolean try_pbo_upload( struct intel_context *intel, GLenum format, GLenum type, const void *pixels) { - struct intel_buffer_object *intelObj = intel_buffer_object(unpack->BufferObj); + struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj); GLuint src_offset, src_stride; GLuint dst_offset, dst_stride; - if (!intelObj || + if (!pbo || intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { @@ -199,11 +200,6 @@ static GLboolean try_pbo_upload( struct intel_context *intel, return GL_FALSE; } - if (!check_pbo_blit(internalFormat, format, type, intelImage->base.TexFormat)) { - _mesa_printf("%s - bad format for blit\n", __FUNCTION__); - return GL_FALSE; - } - src_offset = (GLuint) pixels; if (unpack->RowLength > 0) @@ -219,19 +215,25 @@ static GLboolean try_pbo_upload( struct intel_context *intel, intelFlush( &intel->ctx ); LOCK_HARDWARE( intel ); + { + struct buffer *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ); + struct buffer *dst_buffer = intel_region_buffer(intel, intelImage->mt->region, + INTEL_WRITE_FULL); - intelEmitCopyBlit( intel, - intelImage->mt->cpp, - src_stride, intel_bufferobj_buffer(intelObj), src_offset, - dst_stride, intelImage->mt->region->buffer, dst_offset, - 0, - 0, - 0, - 0, - width, - height ); - intel_batchbuffer_flush( intel->batch ); + intelEmitCopyBlit( intel, + intelImage->mt->cpp, + src_stride, src_buffer, src_offset, + dst_stride, dst_buffer, dst_offset, + 0, + 0, + 0, + 0, + width, + height ); + + intel_batchbuffer_flush( intel->batch ); + } UNLOCK_HARDWARE( intel ); return GL_TRUE; @@ -239,6 +241,58 @@ static GLboolean try_pbo_upload( struct intel_context *intel, +static GLboolean try_pbo_zcopy( struct intel_context *intel, + struct intel_texture_image *intelImage, + const struct gl_pixelstore_attrib *unpack, + GLint internalFormat, + GLint width, GLint height, + GLenum format, GLenum type, + const void *pixels) +{ + struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj); + GLuint src_offset, src_stride; + GLuint dst_offset, dst_stride; + + if (!pbo || + intel->ctx._ImageTransferState || + unpack->SkipPixels || + unpack->SkipRows) { + _mesa_printf("%s: failure 1\n", __FUNCTION__); + return GL_FALSE; + } + + src_offset = (GLuint) pixels; + + if (unpack->RowLength > 0) + src_stride = unpack->RowLength; + else + src_stride = width; + + dst_offset = intel_miptree_image_offset(intelImage->mt, + intelImage->face, + intelImage->level); + + dst_stride = intelImage->mt->pitch; + + if (src_stride != dst_stride || + dst_offset != 0 || + src_offset != 0) { + _mesa_printf("%s: failure 2\n", __FUNCTION__); + return GL_FALSE; + } + + intel_region_attach_pbo( intel, + intelImage->mt->region, + pbo ); + + return GL_TRUE; +} + + + + + + static void intelTexImage(GLcontext *ctx, GLint dims, GLenum target, GLint level, @@ -345,43 +399,57 @@ static void intelTexImage(GLcontext *ctx, } + assert(!intelImage->mt); + if (intelObj->mt && - intelObj->mt != intelImage->mt && intel_miptree_match_image(intelObj->mt, &intelImage->base, intelImage->face, intelImage->level)) { - if (intelImage->mt) { - intel_miptree_release(intel, &intelImage->mt); - } - intel_miptree_reference(&intelImage->mt, intelObj->mt); assert(intelImage->mt); } - if (!intelImage->mt) { - if (INTEL_DEBUG & DEBUG_TEXTURE) - _mesa_printf("XXX: Image did not fit into tree - storing in local memory!\n"); - } + if (!intelImage->mt) + DBG("XXX: Image did not fit into tree - storing in local memory!\n"); - - /* Attempt to use the blitter for PBO image uploads: - * - * Next step would be texturing directly from PBO's. + /* PBO fastpaths: */ if (dims <= 2 && intelImage->mt && - intel_buffer_object(unpack->BufferObj)) { + intel_buffer_object(unpack->BufferObj) && + check_pbo_format(internalFormat, format, + type, intelImage->base.TexFormat)) { + + DBG("trying pbo upload\n"); + + /* Attempt to texture directly from PBO data (zero copy upload). + * This is about twice as fast as regular uploads: + */ + if (intelObj->mt == intelImage->mt && + intelObj->mt->first_level == level && + intelObj->mt->last_level == level) { + + if (try_pbo_zcopy(intel, intelImage, unpack, + internalFormat, + width, height, format, type, pixels)) { + + DBG("pbo zcopy upload succeeded\n"); + return; + } + } - _mesa_printf("trying pbo upload\n"); + /* Otherwise, attempt to use the blitter for PBO image uploads. + * This is about 20% faster than regular uploads: + */ if (try_pbo_upload(intel, intelImage, unpack, internalFormat, width, height, format, type, pixels)) { - _mesa_printf("pbo upload succeeded\n"); + DBG("pbo upload succeeded\n"); return; } - _mesa_printf("pbo upload failed\n"); + DBG("pbo upload failed\n"); }