use a shadow buffer for vertex data to optimize memory access

This commit is contained in:
Christian König 2010-11-28 14:48:31 +01:00
parent a984c67b31
commit 4abe738288
6 changed files with 249 additions and 129 deletions

View file

@ -147,7 +147,8 @@ C_SOURCES = \
vl/vl_mpeg12_mc_renderer.c \
vl/vl_compositor.c \
vl/vl_csc.c \
vl/vl_idct.c
vl/vl_idct.c \
vl/vl_vertex_buffers.c
GALLIVM_SOURCES = \
gallivm/lp_bld_arit.c \

View file

@ -26,6 +26,7 @@
**************************************************************************/
#include "vl_idct.h"
#include "vl_vertex_buffers.h"
#include "util/u_draw.h"
#include <assert.h>
#include <pipe/p_context.h>
@ -78,11 +79,6 @@ static const float const_matrix[8][8] = {
{ 0.0975451f, -0.2777850f, 0.4157350f, -0.4903930f, 0.4903930f, -0.4157350f, 0.277786f, -0.0975458f }
};
/* vertices for a quad covering a block */
static const struct vertex2f const_quad[4] = {
{0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
};
static void *
create_vert_shader(struct vl_idct *idct, bool calc_src_cords)
{
@ -409,11 +405,6 @@ init_buffers(struct vl_idct *idct)
struct pipe_vertex_element vertex_elems[2];
unsigned i;
idct->max_blocks =
align(idct->destination->width0, BLOCK_WIDTH) / BLOCK_WIDTH *
align(idct->destination->height0, BLOCK_HEIGHT) / BLOCK_HEIGHT *
idct->destination->depth0;
memset(&template, 0, sizeof(struct pipe_resource));
template.last_level = 0;
template.depth0 = 1;
@ -443,15 +434,7 @@ init_buffers(struct vl_idct *idct)
idct->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, idct->textures.all[i], &sampler_view);
}
idct->vertex_bufs.individual.quad.stride = sizeof(struct vertex2f);
idct->vertex_bufs.individual.quad.max_index = 4 * idct->max_blocks - 1;
idct->vertex_bufs.individual.quad.buffer_offset = 0;
idct->vertex_bufs.individual.quad.buffer = pipe_buffer_create
(
idct->pipe->screen,
PIPE_BIND_VERTEX_BUFFER,
sizeof(struct vertex2f) * 4 * idct->max_blocks
);
idct->vertex_bufs.individual.quad = vl_vb_upload_quads(idct->pipe, idct->max_blocks);
if(idct->vertex_bufs.individual.quad.buffer == NULL)
return false;
@ -503,36 +486,12 @@ cleanup_buffers(struct vl_idct *idct)
pipe_resource_reference(&idct->vertex_bufs.individual.pos.buffer, NULL);
}
static void
init_constants(struct vl_idct *idct)
{
struct pipe_transfer *buf_transfer;
struct vertex2f *v;
unsigned i;
/* quad vectors */
v = pipe_buffer_map
(
idct->pipe,
idct->vertex_bufs.individual.quad.buffer,
PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
&buf_transfer
);
for ( i = 0; i < idct->max_blocks; ++i)
memcpy(v + i * 4, &const_quad, sizeof(const_quad));
pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.quad.buffer, buf_transfer);
}
static void
init_state(struct vl_idct *idct)
{
struct pipe_sampler_state sampler;
unsigned i;
idct->num_blocks = 0;
idct->num_empty_blocks = 0;
idct->viewport[0].scale[0] = idct->textures.individual.intermediate->width0;
idct->viewport[0].scale[1] = idct->textures.individual.intermediate->height0;
@ -674,26 +633,11 @@ xfer_buffers_map(struct vl_idct *idct)
);
idct->texels = idct->pipe->transfer_map(idct->pipe, idct->tex_transfer);
idct->vectors = pipe_buffer_map
(
idct->pipe,
idct->vertex_bufs.individual.pos.buffer,
PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
&idct->vec_transfer
);
idct->next_empty_block.l_x = ~1;
idct->next_empty_block.l_y = ~1;
idct->next_empty_block.r_x = ~1;
idct->next_empty_block.r_y = ~1;
}
static void
xfer_buffers_unmap(struct vl_idct *idct)
{
pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, idct->vec_transfer);
idct->pipe->transfer_unmap(idct->pipe, idct->tex_transfer);
idct->pipe->transfer_destroy(idct->pipe, idct->tex_transfer);
}
@ -708,6 +652,11 @@ vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resour
pipe_resource_reference(&idct->textures.individual.transpose, matrix);
pipe_resource_reference(&idct->destination, dst);
idct->max_blocks =
align(idct->destination->width0, BLOCK_WIDTH) / BLOCK_WIDTH *
align(idct->destination->height0, BLOCK_HEIGHT) / BLOCK_HEIGHT *
idct->destination->depth0;
if(!init_buffers(idct))
return false;
@ -716,9 +665,21 @@ vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resour
return false;
}
if(!vl_vb_init(&idct->blocks, idct->max_blocks)) {
cleanup_shaders(idct);
cleanup_buffers(idct);
return false;
}
if(!vl_vb_init(&idct->empty_blocks, idct->max_blocks)) {
vl_vb_cleanup(&idct->blocks);
cleanup_shaders(idct);
cleanup_buffers(idct);
return false;
}
init_state(idct);
init_constants(idct);
xfer_buffers_map(idct);
return true;
@ -727,6 +688,8 @@ vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resour
void
vl_idct_cleanup(struct vl_idct *idct)
{
vl_vb_cleanup(&idct->blocks);
vl_vb_cleanup(&idct->empty_blocks);
cleanup_shaders(idct);
cleanup_buffers(idct);
@ -735,43 +698,9 @@ vl_idct_cleanup(struct vl_idct *idct)
pipe_resource_reference(&idct->destination, NULL);
}
static void
flush_empty_block(struct vl_idct *idct, unsigned new_x, unsigned new_y)
{
if (idct->next_empty_block.l_x == ~1 ||
idct->next_empty_block.l_y == ~1) {
idct->next_empty_block.l_x = new_x;
idct->next_empty_block.l_y = new_y;
} else if (idct->next_empty_block.r_x != (new_x - 1) ||
idct->next_empty_block.r_y != new_y) {
struct vertex2f l, r, *v_dst;
v_dst = idct->vectors + (idct->max_blocks - idct->num_empty_blocks) * 4 - 4;
l.x = idct->next_empty_block.l_x;
l.y = idct->next_empty_block.l_y;
r.x = idct->next_empty_block.r_x;
r.y = idct->next_empty_block.r_y;
v_dst[0] = v_dst[3] = l;
v_dst[1] = v_dst[2] = r;
idct->next_empty_block.l_x = new_x;
idct->next_empty_block.l_y = new_y;
idct->num_empty_blocks++;
}
idct->next_empty_block.r_x = new_x;
idct->next_empty_block.r_y = new_y;
}
void
vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
{
struct vertex2f v, *v_dst;
unsigned tex_pitch;
short *texels;
@ -786,32 +715,38 @@ vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
for (i = 0; i < BLOCK_HEIGHT; ++i)
memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
/* non empty blocks fills the vector buffer from left to right */
v_dst = idct->vectors + idct->num_blocks * 4;
idct->num_blocks++;
v.x = x;
v.y = y;
for (i = 0; i < 4; ++i) {
v_dst[i] = v;
}
vl_vb_add_block(&idct->blocks, false, x, y);
} else {
/* while empty blocks fills the vector buffer from right to left */
flush_empty_block(idct, x, y);
vl_vb_add_block(&idct->empty_blocks, true, x, y);
}
}
void
vl_idct_flush(struct vl_idct *idct)
{
flush_empty_block(idct, ~1, ~1);
struct pipe_transfer *vec_transfer;
struct quadf *vectors;
unsigned num_blocks, num_empty_blocks;
assert(idct);
vectors = pipe_buffer_map
(
idct->pipe,
idct->vertex_bufs.individual.pos.buffer,
PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
&vec_transfer
);
num_blocks = vl_vb_upload(&idct->blocks, vectors);
num_empty_blocks = vl_vb_upload(&idct->empty_blocks, vectors + num_blocks);
pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, vec_transfer);
xfer_buffers_unmap(idct);
if(idct->num_blocks > 0) {
if(num_blocks > 0) {
/* first stage */
idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[0]);
@ -824,7 +759,7 @@ vl_idct_flush(struct vl_idct *idct)
idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_blocks * 4);
/* second stage */
idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
@ -837,10 +772,10 @@ vl_idct_flush(struct vl_idct *idct)
idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_blocks * 4);
}
if(idct->num_empty_blocks > 0) {
if(num_empty_blocks > 0) {
/* empty block handling */
idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
@ -851,12 +786,8 @@ vl_idct_flush(struct vl_idct *idct)
idct->pipe->bind_vs_state(idct->pipe, idct->eb_vs);
idct->pipe->bind_fs_state(idct->pipe, idct->eb_fs);
util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS,
(idct->max_blocks - idct->num_empty_blocks) * 4,
idct->num_empty_blocks * 4);
util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, num_blocks * 4, num_empty_blocks * 4);
}
idct->num_blocks = 0;
idct->num_empty_blocks = 0;
xfer_buffers_map(idct);
}

View file

@ -29,6 +29,7 @@
#define vl_idct_h
#include <pipe/p_state.h>
#include "vl_vertex_buffers.h"
struct vl_idct
{
@ -82,20 +83,11 @@ struct vl_idct
struct { struct pipe_vertex_buffer quad, pos; } individual;
} vertex_bufs;
unsigned num_blocks;
struct
{
unsigned l_x, l_y, r_x, r_y;
} next_empty_block;
unsigned num_empty_blocks;
struct vl_vertex_buffer blocks;
struct vl_vertex_buffer empty_blocks;
struct pipe_transfer *tex_transfer;
short *texels;
struct pipe_transfer *vec_transfer;
struct vertex2f *vectors;
};
struct pipe_resource *vl_idct_upload_matrix(struct pipe_context *pipe);

View file

@ -38,4 +38,9 @@ struct vertex4f
float x, y, z, w;
};
struct quadf
{
struct vertex2f bl, tl, tr, br;
};
#endif /* vl_types_h */

View file

@ -0,0 +1,116 @@
/**************************************************************************
*
* Copyright 2010 Christian König
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#include <assert.h>
#include <pipe/p_context.h>
#include <pipe/p_screen.h>
#include <util/u_memory.h>
#include <util/u_inlines.h>
#include "vl_vertex_buffers.h"
#include "vl_types.h"
/* vertices for a quad covering a block */
static const struct quadf const_quad = {
{0.0f, 1.0f}, {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}
};
struct pipe_vertex_buffer
vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks)
{
struct pipe_vertex_buffer quad;
struct pipe_transfer *buf_transfer;
struct quadf *v;
unsigned i;
assert(pipe);
assert(max_blocks);
/* create buffer */
quad.stride = sizeof(struct vertex2f);
quad.max_index = 4 * max_blocks - 1;
quad.buffer_offset = 0;
quad.buffer = pipe_buffer_create
(
pipe->screen,
PIPE_BIND_VERTEX_BUFFER,
sizeof(struct vertex2f) * 4 * max_blocks
);
if(!quad.buffer)
return quad;
/* and fill it */
v = pipe_buffer_map
(
pipe,
quad.buffer,
PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
&buf_transfer
);
for ( i = 0; i < max_blocks; ++i)
memcpy(v + i, &const_quad, sizeof(const_quad));
pipe_buffer_unmap(pipe, quad.buffer, buf_transfer);
return quad;
}
bool
vl_vb_init(struct vl_vertex_buffer *buffer, unsigned max_blocks)
{
assert(buffer);
buffer->num_blocks = 0;
buffer->blocks = MALLOC(max_blocks * sizeof(struct quadf));
return buffer->blocks != NULL;
}
unsigned
vl_vb_upload(struct vl_vertex_buffer *buffer, struct quadf *dst)
{
unsigned todo;
assert(buffer);
todo = buffer->num_blocks;
buffer->num_blocks = 0;
if(todo)
memcpy(dst, buffer->blocks, sizeof(struct quadf) * todo);
return todo;
}
void
vl_vb_cleanup(struct vl_vertex_buffer *buffer)
{
assert(buffer);
FREE(buffer->blocks);
}

View file

@ -0,0 +1,75 @@
/**************************************************************************
*
* Copyright 2010 Christian König
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#ifndef vl_vertex_buffers_h
#define vl_vertex_buffers_h
#include <assert.h>
#include <pipe/p_state.h>
#include "vl_types.h"
struct vl_vertex_buffer
{
unsigned num_blocks;
struct quadf *blocks;
};
struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks);
bool vl_vb_init(struct vl_vertex_buffer *buffer, unsigned max_blocks);
static inline bool
vl_vb_add_block(struct vl_vertex_buffer *buffer, bool allow_merge, signed x, signed y)
{
struct quadf *quad;
assert(buffer);
allow_merge &= buffer->num_blocks > 0;
if (allow_merge) {
quad = buffer->blocks + buffer->num_blocks - 1;
if(quad->tr.x == (x - 1) && quad->br.x == (x - 1) &&
quad->tr.y == y && quad->br.y == y) {
quad->tr.x = quad->br.x = x;
quad->tr.y = quad->br.y = y;
return true;
}
}
quad = buffer->blocks + buffer->num_blocks;
quad->bl.x = quad->tl.x = quad->tr.x = quad->br.x = x;
quad->bl.y = quad->tl.y = quad->tr.y = quad->br.y = y;
buffer->num_blocks++;
return false;
}
unsigned vl_vb_upload(struct vl_vertex_buffer *buffer, struct quadf *dst);
void vl_vb_cleanup(struct vl_vertex_buffer *buffer);
#endif