implement zero-copy uploads for glTexImage from suitable pixel buffer objects

This commit is contained in:
Keith Whitwell 2006-08-30 19:55:32 +00:00
parent fe239744aa
commit 5ac3ad7722
10 changed files with 349 additions and 57 deletions

View file

@ -45,7 +45,6 @@
static void i915_render_start( struct intel_context *intel )
{
GLcontext *ctx = &intel->ctx;
struct i915_context *i915 = i915_context(&intel->ctx);
i915ValidateFragmentProgram( i915 );

View file

@ -438,6 +438,10 @@ void intelClearWithBlit(GLcontext *ctx, GLbitfield mask, GLboolean all,
const struct intel_renderbuffer *irb
= intel_renderbuffer(ctx->DrawBuffer->
Attachment[buf].Renderbuffer);
struct buffer *write_buffer =
intel_region_buffer(intel, irb->region,
all ? INTEL_WRITE_FULL : INTEL_WRITE_PART);
GLuint clearVal;
GLint pitch, cpp;
GLuint BR13, CMD;
@ -448,7 +452,7 @@ void intelClearWithBlit(GLcontext *ctx, GLbitfield mask, GLboolean all,
pitch = irb->region->pitch;
cpp = irb->region->cpp;
DBG("%s dst:buf(%d)/%d+%d %d,%d sz:%dx%d\n",
DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
__FUNCTION__,
irb->region->buffer, (pitch * cpp),
irb->region->draw_offset,
@ -495,7 +499,7 @@ void intelClearWithBlit(GLcontext *ctx, GLbitfield mask, GLboolean all,
OUT_BATCH( BR13 );
OUT_BATCH( (b.y1 << 16) | b.x1 );
OUT_BATCH( (b.y2 << 16) | b.x2 );
OUT_RELOC( irb->region->buffer, DRM_MM_TT|DRM_MM_WRITE,
OUT_RELOC( write_buffer, DRM_MM_TT|DRM_MM_WRITE,
irb->region->draw_offset );
OUT_BATCH( clearVal );
ADVANCE_BATCH();

View file

@ -32,6 +32,7 @@
#include "intel_context.h"
#include "intel_buffer_objects.h"
#include "intel_regions.h"
#include "intel_bufmgr.h"
@ -46,18 +47,44 @@ static struct gl_buffer_object *intel_bufferobj_alloc( GLcontext *ctx,
GLenum target )
{
struct intel_context *intel = intel_context(ctx);
struct intel_buffer_object *obj = MALLOC_STRUCT(intel_buffer_object);
struct intel_buffer_object *obj = CALLOC_STRUCT(intel_buffer_object);
_mesa_initialize_buffer_object(&obj->Base, name, target);
/* XXX: We generate our own handle, which is different to 'name' above.
*/
bmGenBuffers(intel, "bufferobj", 1, &obj->buffer, 0);
return &obj->Base;
}
/* Break the COW tie to the region. The region gets to keep the data.
*/
void intel_bufferobj_release_region( struct intel_context *intel,
struct intel_buffer_object *intel_obj )
{
assert(intel_obj->region->buffer == intel_obj->buffer);
intel_obj->region->pbo = NULL;
intel_obj->region = NULL;
intel_obj->buffer = NULL; /* refcount? */
/* This leads to a large number of buffer deletion/creation events.
* Currently the drm doesn't like that:
*/
bmGenBuffers(intel, "buffer object", 1, &intel_obj->buffer, 0);
bmBufferData(intel, intel_obj->buffer, intel_obj->Base.Size, NULL, 0);
}
/* Break the COW tie to the region. Both the pbo and the region end
* up with a copy of the data.
*/
void intel_bufferobj_cow( struct intel_context *intel,
struct intel_buffer_object *intel_obj )
{
assert(intel_obj->region);
intel_region_cow( intel, intel_obj->region );
}
/**
* Deallocate/free a vertex/pixel buffer object.
* Called via glDeleteBuffersARB().
@ -70,8 +97,12 @@ static void intel_bufferobj_free( GLcontext *ctx,
assert(intel_obj);
if (intel_obj->buffer)
if (intel_obj->region) {
intel_bufferobj_release_region(intel, intel_obj);
}
else if (intel_obj->buffer) {
bmDeleteBuffers( intel, 1, &intel_obj->buffer );
}
_mesa_free(intel_obj);
}
@ -94,13 +125,11 @@ static void intel_bufferobj_data( GLcontext *ctx,
struct intel_context *intel = intel_context(ctx);
struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
/* XXX: do something useful with 'usage' (eg. populate flags
* argument below)
*/
assert(intel_obj);
intel_obj->Base.Size = size;
intel_obj->Base.Usage = usage;
obj->Size = size;
obj->Usage = usage;
if (intel_obj->region)
intel_bufferobj_release_region(intel, intel_obj);
bmBufferData(intel, intel_obj->buffer, size, data, 0);
}
@ -123,6 +152,10 @@ static void intel_bufferobj_subdata( GLcontext *ctx,
struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
assert(intel_obj);
if (intel_obj->region)
intel_bufferobj_cow(intel, intel_obj);
bmBufferSubData(intel, intel_obj->buffer, offset, size, data);
}
@ -160,6 +193,10 @@ static void *intel_bufferobj_map( GLcontext *ctx,
/* XXX: Translate access to flags arg below:
*/
assert(intel_obj);
if (intel_obj->region)
intel_bufferobj_cow(intel, intel_obj);
obj->Pointer = bmMapBuffer(intel, intel_obj->buffer, 0);
return obj->Pointer;
}
@ -182,8 +219,17 @@ static GLboolean intel_bufferobj_unmap( GLcontext *ctx,
return GL_TRUE;
}
struct buffer *intel_bufferobj_buffer( const struct intel_buffer_object *intel_obj )
struct buffer *intel_bufferobj_buffer( struct intel_context *intel,
struct intel_buffer_object *intel_obj,
GLuint flag )
{
if (intel_obj->region) {
if (flag == INTEL_WRITE_PART)
intel_bufferobj_cow(intel, intel_obj);
else if (flag == INTEL_WRITE_FULL)
intel_bufferobj_release_region(intel, intel_obj);
}
return intel_obj->buffer;
}

View file

@ -31,6 +31,7 @@
#include "mtypes.h"
struct intel_context;
struct intel_region;
struct gl_buffer_object;
@ -40,12 +41,18 @@ struct gl_buffer_object;
struct intel_buffer_object {
struct gl_buffer_object Base;
struct buffer *buffer; /* the low-level buffer manager's buffer handle */
struct intel_region *region; /* Is there a zero-copy texture
associated with this (pixel)
buffer object? */
};
/* Get the bm buffer associated with a GL bufferobject:
*/
struct buffer *intel_bufferobj_buffer( const struct intel_buffer_object *obj );
struct buffer *intel_bufferobj_buffer( struct intel_context *intel,
struct intel_buffer_object *obj,
GLuint flag );
/* Hook the bufferobject implementation into mesa:
*/
@ -67,4 +74,12 @@ intel_buffer_object( struct gl_buffer_object *obj )
return NULL;
}
/* Helpers for zerocopy image uploads. See also intel_regions.h:
*/
void intel_bufferobj_cow( struct intel_context *intel,
struct intel_buffer_object *intel_obj );
void intel_bufferobj_release_region( struct intel_context *intel,
struct intel_buffer_object *intel_obj );
#endif

View file

@ -68,6 +68,9 @@ extern void intelFallback( struct intel_context *intel, GLuint bit, GLboolean mo
#define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode )
#define INTEL_WRITE_PART 0x1
#define INTEL_WRITE_FULL 0x2
#define INTEL_READ 0x4
struct intel_texture_object
{

View file

@ -299,6 +299,7 @@ static GLboolean do_blit_drawpixels( GLcontext *ctx,
drm_clip_rect_t *box = dPriv->pClipRects;
drm_clip_rect_t rect;
drm_clip_rect_t dest_rect;
struct buffer *src_buffer = intel_bufferobj_buffer(intel, src, INTEL_READ);
int i;
dest_rect.x1 = dPriv->x + x;
@ -314,7 +315,7 @@ static GLboolean do_blit_drawpixels( GLcontext *ctx,
intelEmitCopyBlit( intel,
dest->cpp,
rowLength,
intel_bufferobj_buffer(src), src_offset,
src_buffer, src_offset,
dest->pitch,
dest->buffer, 0,
rect.x1 - dest_rect.x1,

View file

@ -250,6 +250,13 @@ static GLboolean do_blit_readpixels( GLcontext *ctx,
if (intel->driDrawable->numClipRects)
{
GLboolean all = (width * height * src->cpp == dst->Base.Size &&
x == 0 &&
dst_offset == 0);
struct buffer *dst_buffer = intel_bufferobj_buffer(intel, dst,
all ? INTEL_WRITE_FULL :
INTEL_WRITE_PART);
__DRIdrawablePrivate *dPriv = intel->driDrawable;
int nbox = dPriv->numClipRects;
drm_clip_rect_t *box = dPriv->pClipRects;
@ -273,7 +280,7 @@ static GLboolean do_blit_readpixels( GLcontext *ctx,
src->cpp,
src->pitch, src->buffer, 0,
rowLength,
intel_bufferobj_buffer(dst), dst_offset,
dst_buffer, dst_offset,
rect.x1,
rect.y1,
rect.x1 - src_rect.x1,

View file

@ -43,6 +43,7 @@
#include "intel_regions.h"
#include "intel_blit.h"
#include "intel_bufmgr.h"
#include "intel_buffer_objects.h"
#define FILE_DEBUG_FLAG DEBUG_BUFMGR
@ -53,6 +54,9 @@ GLubyte *intel_region_map(struct intel_context *intel, struct intel_region *regi
{
DBG("%s\n", __FUNCTION__);
if (!region->map_refcount++) {
if (region->pbo)
intel_region_cow(intel, region);
region->map = bmMapBuffer(intel, region->buffer, 0);
}
@ -112,7 +116,12 @@ void intel_region_release( struct intel_context *intel,
if ((*region)->refcount == 0) {
assert((*region)->map_refcount == 0);
bmDeleteBuffers(intel, 1, &(*region)->buffer);
if ((*region)->pbo)
intel_region_release_pbo( intel, *region );
else
bmDeleteBuffers(intel, 1, &(*region)->buffer);
free(*region);
}
*region = NULL;
@ -203,6 +212,17 @@ void intel_region_data(struct intel_context *intel,
{
DBG("%s\n", __FUNCTION__);
if (dst->pbo) {
if (dstx == 0 &&
dsty == 0 &&
width == dst->pitch &&
height == dst->height)
intel_region_release_pbo(intel, dst);
else
intel_region_cow(intel, dst);
}
LOCK_HARDWARE(intel);
_mesa_copy_rect(intel_region_map(intel, dst) + dst_offset,
@ -234,6 +254,16 @@ void intel_region_copy( struct intel_context *intel,
{
DBG("%s\n", __FUNCTION__);
if (dst->pbo) {
if (dstx == 0 &&
dsty == 0 &&
width == dst->pitch &&
height == dst->height)
intel_region_release_pbo(intel, dst);
else
intel_region_cow(intel, dst);
}
assert(src->cpp == dst->cpp);
intelEmitCopyBlit(intel,
@ -257,6 +287,16 @@ void intel_region_fill( struct intel_context *intel,
{
DBG("%s\n", __FUNCTION__);
if (dst->pbo) {
if (dstx == 0 &&
dsty == 0 &&
width == dst->pitch &&
height == dst->height)
intel_region_release_pbo(intel, dst);
else
intel_region_cow(intel, dst);
}
intelEmitFillBlit(intel,
dst->cpp,
dst->pitch, dst->buffer, dst_offset,
@ -265,3 +305,96 @@ void intel_region_fill( struct intel_context *intel,
color );
}
/* Attach to a pbo, discarding our data. Effectively zero-copy upload
* the pbo's data.
*/
void intel_region_attach_pbo( struct intel_context *intel,
struct intel_region *region,
struct intel_buffer_object *pbo )
{
if (region->pbo == pbo)
return;
/* If there is already a pbo attached, break the cow tie now.
* Don't call intel_region_release_pbo() as that would
* unnecessarily allocate a new buffer we would have to immediately
* discard.
*/
if (region->pbo) {
region->pbo->region = NULL;
region->pbo = NULL;
region->buffer = NULL; /* refcount? */
}
if (region->buffer) {
bmDeleteBuffers(intel, 1, region->buffer);
}
region->pbo = pbo;
region->pbo->region = region;
region->buffer = pbo->buffer; /* refcount? */
_mesa_printf("%s attach buffer %p from pbo\n", region->buffer);
}
/* Break the COW tie to the pbo. The pbo gets to keep the data.
*/
void intel_region_release_pbo( struct intel_context *intel,
struct intel_region *region )
{
assert(region->buffer == region->pbo->buffer);
region->pbo->region = NULL;
region->pbo = NULL;
region->buffer = NULL; /* refcount? */
bmGenBuffers(intel, "region", 1, &region->buffer, 0);
bmBufferData(intel, region->buffer,
region->cpp * region->pitch * region->height, NULL, 0);
}
/* Break the COW tie to the pbo. Both the pbo and the region end up
* with a copy of the data.
*/
void intel_region_cow( struct intel_context *intel,
struct intel_region *region )
{
struct intel_buffer_object *pbo = region->pbo;
intel_region_release_pbo(intel, region);
assert(region->cpp *
region->pitch *
region->height == pbo->Base.Size);
_mesa_printf("%s (%d bytes)\n", __FUNCTION__, pbo->Base.Size);
/* Now blit from the texture buffer to the new buffer:
*/
/* LOCKING??? */
intelEmitCopyBlit( intel,
region->cpp,
region->pitch,
region->buffer, 0,
region->pitch,
pbo->buffer, 0,
0,0,
0,0,
region->pitch,
region->height );
}
struct buffer *intel_region_buffer( struct intel_context *intel,
struct intel_region *region,
GLuint flag )
{
if (region->pbo) {
if (flag == INTEL_WRITE_PART)
intel_region_cow(intel, region);
else if (flag == INTEL_WRITE_FULL)
intel_region_release_pbo(intel, region);
}
return region->buffer;
}

View file

@ -31,6 +31,7 @@
#include "mtypes.h"
#include "intel_bufmgr.h" /* for DBG! */
struct intel_context;
struct intel_buffer_object;
/**
* A layer on top of the bufmgr buffers that adds a few useful things:
@ -50,6 +51,8 @@ struct intel_region {
GLuint map_refcount; /**< Reference count for mapping */
GLuint draw_offset; /**< Offset of drawing address within the region */
struct intel_buffer_object *pbo; /* zero-copy uploads */
};
@ -115,5 +118,18 @@ void intel_region_fill( struct intel_context *intel,
GLuint width, GLuint height,
GLuint color );
/* Helpers for zerocopy uploads, particularly texture image uploads:
*/
void intel_region_attach_pbo( struct intel_context *intel,
struct intel_region *region,
struct intel_buffer_object *pbo );
void intel_region_release_pbo( struct intel_context *intel,
struct intel_region *region );
void intel_region_cow( struct intel_context *intel,
struct intel_region *region );
struct buffer *intel_region_buffer( struct intel_context *intel,
struct intel_region *region,
GLuint flag );
#endif

View file

@ -153,9 +153,9 @@ static GLuint target_to_face( GLenum target )
/* There are actually quite a few combinations this will work for,
* more than what I've listed here.
*/
static GLboolean check_pbo_blit( GLint internalFormat,
GLenum format, GLenum type,
const struct gl_texture_format *mesa_format )
static GLboolean check_pbo_format( GLint internalFormat,
GLenum format, GLenum type,
const struct gl_texture_format *mesa_format )
{
switch (internalFormat) {
case 4:
@ -178,7 +178,8 @@ static GLboolean check_pbo_blit( GLint internalFormat,
}
/* XXX: Do this for TexSubImage also:
*/
static GLboolean try_pbo_upload( struct intel_context *intel,
struct intel_texture_image *intelImage,
const struct gl_pixelstore_attrib *unpack,
@ -187,11 +188,11 @@ static GLboolean try_pbo_upload( struct intel_context *intel,
GLenum format, GLenum type,
const void *pixels)
{
struct intel_buffer_object *intelObj = intel_buffer_object(unpack->BufferObj);
struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj);
GLuint src_offset, src_stride;
GLuint dst_offset, dst_stride;
if (!intelObj ||
if (!pbo ||
intel->ctx._ImageTransferState ||
unpack->SkipPixels ||
unpack->SkipRows) {
@ -199,11 +200,6 @@ static GLboolean try_pbo_upload( struct intel_context *intel,
return GL_FALSE;
}
if (!check_pbo_blit(internalFormat, format, type, intelImage->base.TexFormat)) {
_mesa_printf("%s - bad format for blit\n", __FUNCTION__);
return GL_FALSE;
}
src_offset = (GLuint) pixels;
if (unpack->RowLength > 0)
@ -219,19 +215,25 @@ static GLboolean try_pbo_upload( struct intel_context *intel,
intelFlush( &intel->ctx );
LOCK_HARDWARE( intel );
{
struct buffer *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ);
struct buffer *dst_buffer = intel_region_buffer(intel, intelImage->mt->region,
INTEL_WRITE_FULL);
intelEmitCopyBlit( intel,
intelImage->mt->cpp,
src_stride, intel_bufferobj_buffer(intelObj), src_offset,
dst_stride, intelImage->mt->region->buffer, dst_offset,
0,
0,
0,
0,
width,
height );
intel_batchbuffer_flush( intel->batch );
intelEmitCopyBlit( intel,
intelImage->mt->cpp,
src_stride, src_buffer, src_offset,
dst_stride, dst_buffer, dst_offset,
0,
0,
0,
0,
width,
height );
intel_batchbuffer_flush( intel->batch );
}
UNLOCK_HARDWARE( intel );
return GL_TRUE;
@ -239,6 +241,58 @@ static GLboolean try_pbo_upload( struct intel_context *intel,
static GLboolean try_pbo_zcopy( struct intel_context *intel,
struct intel_texture_image *intelImage,
const struct gl_pixelstore_attrib *unpack,
GLint internalFormat,
GLint width, GLint height,
GLenum format, GLenum type,
const void *pixels)
{
struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj);
GLuint src_offset, src_stride;
GLuint dst_offset, dst_stride;
if (!pbo ||
intel->ctx._ImageTransferState ||
unpack->SkipPixels ||
unpack->SkipRows) {
_mesa_printf("%s: failure 1\n", __FUNCTION__);
return GL_FALSE;
}
src_offset = (GLuint) pixels;
if (unpack->RowLength > 0)
src_stride = unpack->RowLength;
else
src_stride = width;
dst_offset = intel_miptree_image_offset(intelImage->mt,
intelImage->face,
intelImage->level);
dst_stride = intelImage->mt->pitch;
if (src_stride != dst_stride ||
dst_offset != 0 ||
src_offset != 0) {
_mesa_printf("%s: failure 2\n", __FUNCTION__);
return GL_FALSE;
}
intel_region_attach_pbo( intel,
intelImage->mt->region,
pbo );
return GL_TRUE;
}
static void intelTexImage(GLcontext *ctx,
GLint dims,
GLenum target, GLint level,
@ -345,43 +399,57 @@ static void intelTexImage(GLcontext *ctx,
}
assert(!intelImage->mt);
if (intelObj->mt &&
intelObj->mt != intelImage->mt &&
intel_miptree_match_image(intelObj->mt, &intelImage->base,
intelImage->face, intelImage->level)) {
if (intelImage->mt) {
intel_miptree_release(intel, &intelImage->mt);
}
intel_miptree_reference(&intelImage->mt, intelObj->mt);
assert(intelImage->mt);
}
if (!intelImage->mt) {
if (INTEL_DEBUG & DEBUG_TEXTURE)
_mesa_printf("XXX: Image did not fit into tree - storing in local memory!\n");
}
if (!intelImage->mt)
DBG("XXX: Image did not fit into tree - storing in local memory!\n");
/* Attempt to use the blitter for PBO image uploads:
*
* Next step would be texturing directly from PBO's.
/* PBO fastpaths:
*/
if (dims <= 2 &&
intelImage->mt &&
intel_buffer_object(unpack->BufferObj)) {
intel_buffer_object(unpack->BufferObj) &&
check_pbo_format(internalFormat, format,
type, intelImage->base.TexFormat)) {
DBG("trying pbo upload\n");
/* Attempt to texture directly from PBO data (zero copy upload).
* This is about twice as fast as regular uploads:
*/
if (intelObj->mt == intelImage->mt &&
intelObj->mt->first_level == level &&
intelObj->mt->last_level == level) {
if (try_pbo_zcopy(intel, intelImage, unpack,
internalFormat,
width, height, format, type, pixels)) {
DBG("pbo zcopy upload succeeded\n");
return;
}
}
_mesa_printf("trying pbo upload\n");
/* Otherwise, attempt to use the blitter for PBO image uploads.
* This is about 20% faster than regular uploads:
*/
if (try_pbo_upload(intel, intelImage, unpack,
internalFormat,
width, height, format, type, pixels)) {
_mesa_printf("pbo upload succeeded\n");
DBG("pbo upload succeeded\n");
return;
}
_mesa_printf("pbo upload failed\n");
DBG("pbo upload failed\n");
}