r600g: do fine-grained vertex buffer updates

If only some buffers are changed, the other ones don't have to re-emitted.
This uses bitmasks of enabled and dirty buffers just like
emit_constant_buffers does.
This commit is contained in:
Marek Olšák 2012-07-06 03:18:06 +02:00
parent f4f2e8ebe1
commit 585baac652
8 changed files with 86 additions and 62 deletions

View file

@ -89,14 +89,15 @@ static void evergreen_cs_set_vertex_buffer(
unsigned offset,
struct pipe_resource * buffer)
{
struct pipe_vertex_buffer *vb = &rctx->cs_vertex_buffer[vb_index];
struct r600_vertexbuf_state * state = &rctx->cs_vertex_buffer_state;
struct r600_vertexbuf_state *state = &rctx->cs_vertex_buffer_state;
struct pipe_vertex_buffer *vb = &state->vb[vb_index];
vb->stride = 1;
vb->buffer_offset = offset;
vb->buffer = buffer;
vb->user_buffer = NULL;
r600_inval_vertex_cache(rctx);
state->enabled_mask |= 1 << vb_index;
state->dirty_mask |= 1 << vb_index;
r600_atom_dirty(rctx, &state->atom);
}
@ -369,7 +370,7 @@ static void compute_emit_cs(struct r600_context *ctx)
r600_context_pipe_state_emit(ctx, cb_state, RADEON_CP_PACKET3_COMPUTE_MODE);
/* Emit vertex buffer state */
ctx->cs_vertex_buffer_state.atom.num_dw = 12 * ctx->nr_cs_vertex_buffers;
ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask);
r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom);
for (i = 0; i < get_compute_resource_num(); i++) {
@ -493,10 +494,8 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_,
evergreen_cs_set_vertex_buffer(ctx, vtx_id,
buffer->chunk->start_in_dw * 4,
resources[i]->base.texture);
ctx->nr_cs_vertex_buffers = vtx_id + 1;
}
}
}
static void evergreen_set_cs_sampler_view(struct pipe_context *ctx_,
@ -740,7 +739,8 @@ void evergreen_init_compute_state_functions(struct r600_context *ctx)
/* We always use at least two vertex buffers for compute, one for
* parameters and one for global memory */
ctx->nr_cs_vertex_buffers = 2;
ctx->cs_vertex_buffer_state.enabled_mask =
ctx->cs_vertex_buffer_state.dirty_mask = 1 | 2;
}

View file

@ -1772,8 +1772,6 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
struct r600_vertexbuf_state *state,
struct pipe_vertex_buffer *vertex_buffers,
unsigned vb_count,
unsigned resource_offset,
unsigned pkt_flags)
{
@ -1784,13 +1782,11 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
struct pipe_vertex_buffer *vb;
struct r600_resource *rbuffer;
uint64_t va;
unsigned buffer_index = ffs(dirty_mask) - 1;
unsigned buffer_index = u_bit_scan(&dirty_mask);
vb = &vertex_buffers[buffer_index];
vb = &state->vb[buffer_index];
rbuffer = (struct r600_resource*)vb->buffer;
if (!rbuffer) {
goto next;
}
assert(rbuffer);
va = r600_resource_va(&rctx->screen->screen, &rbuffer->b.b);
va += vb->buffer_offset;
@ -1816,26 +1812,19 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
r600_write_value(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
next:
dirty_mask &= ~(1 << buffer_index);
}
state->dirty_mask = 0;
}
static void evergreen_fs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
{
evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state,
rctx->vertex_buffer,
rctx->nr_vertex_buffers, 992, 0);
evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state, 992, 0);
}
static void evergreen_cs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
{
evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state,
rctx->cs_vertex_buffer,
rctx->nr_cs_vertex_buffers, 816,
RADEON_CP_PACKET3_COMPUTE_MODE);
evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state, 816,
RADEON_CP_PACKET3_COMPUTE_MODE);
}
static void evergreen_emit_constant_buffers(struct r600_context *rctx,

View file

@ -60,8 +60,8 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
util_blitter_save_viewport(rctx->blitter, &rctx->viewport);
}
util_blitter_save_vertex_buffers(rctx->blitter,
rctx->nr_vertex_buffers,
rctx->vertex_buffer);
util_last_bit(rctx->vertex_buffer_state.enabled_mask),
rctx->vertex_buffer_state.vb);
util_blitter_save_so_targets(rctx->blitter, rctx->num_so_targets,
(struct pipe_stream_output_target**)rctx->so_targets);

View file

@ -93,7 +93,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
/* Check if mapping this buffer would cause waiting for the GPU. */
if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
unsigned i;
unsigned i, mask;
/* Discard the buffer. */
pb_reference(&rbuffer->buf, NULL);
@ -105,13 +105,12 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
/* We changed the buffer, now we need to bind it where the old one was bound. */
/* Vertex buffers. */
for (i = 0; i < rctx->nr_vertex_buffers; i++) {
if (rctx->vertex_buffer[i].buffer == &rbuffer->b.b) {
struct r600_vertexbuf_state * state =
&rctx->vertex_buffer_state;
state->dirty_mask |= 1 << i;
r600_inval_vertex_cache(rctx);
r600_atom_dirty(rctx, &state->atom);
mask = rctx->vertex_buffer_state.enabled_mask;
while (mask) {
i = u_bit_scan(&mask);
if (rctx->vertex_buffer_state.vb[i].buffer == &rbuffer->b.b) {
rctx->vertex_buffer_state.dirty_mask |= 1 << i;
r600_vertex_buffers_dirty(rctx);
}
}
/* Streamout buffers. */

View file

@ -1274,14 +1274,15 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
/* Invalidate caches. */
r600_inval_vertex_cache(ctx);
r600_inval_texture_cache(ctx);
r600_flush_framebuffer(ctx, false);
/* Re-emit states. */
r600_atom_dirty(ctx, &ctx->cb_misc_state.atom);
r600_atom_dirty(ctx, &ctx->db_misc_state.atom);
r600_atom_dirty(ctx, &ctx->vertex_buffer_state.atom);
ctx->vertex_buffer_state.dirty_mask = ctx->vertex_buffer_state.enabled_mask;
r600_vertex_buffers_dirty(ctx);
ctx->vs_constbuf_state.dirty_mask = ctx->vs_constbuf_state.enabled_mask;
ctx->ps_constbuf_state.dirty_mask = ctx->ps_constbuf_state.enabled_mask;

View file

@ -278,6 +278,8 @@ struct r600_constbuf_state
struct r600_vertexbuf_state
{
struct r600_atom atom;
struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
uint32_t enabled_mask; /* non-NULL buffers */
uint32_t dirty_mask;
};
@ -399,13 +401,8 @@ struct r600_context {
boolean dual_src_blend;
/* Vertex and index buffers. */
bool vertex_buffers_dirty;
/* Index buffer. */
struct pipe_index_buffer index_buffer;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
unsigned nr_vertex_buffers;
struct pipe_vertex_buffer cs_vertex_buffer[PIPE_MAX_ATTRIBS];
unsigned nr_cs_vertex_buffers;
};
static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
@ -528,8 +525,9 @@ unsigned r600_get_cb_flush_flags(struct r600_context *rctx);
void r600_texture_barrier(struct pipe_context *ctx);
void r600_set_index_buffer(struct pipe_context *ctx,
const struct pipe_index_buffer *ib);
void r600_vertex_buffers_dirty(struct r600_context *rctx);
void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
const struct pipe_vertex_buffer *buffers);
const struct pipe_vertex_buffer *input);
void *r600_create_vertex_elements(struct pipe_context *ctx,
unsigned count,
const struct pipe_vertex_element *elements);

View file

@ -1748,27 +1748,28 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = rctx->cs;
struct pipe_vertex_buffer *vb = rctx->vertex_buffer;
unsigned count = rctx->nr_vertex_buffers;
unsigned i, offset;
uint32_t dirty_mask = rctx->vertex_buffer_state.dirty_mask;
for (i = 0; i < count; i++) {
struct r600_resource *rbuffer = (struct r600_resource*)vb[i].buffer;
while (dirty_mask) {
struct pipe_vertex_buffer *vb;
struct r600_resource *rbuffer;
unsigned offset;
unsigned buffer_index = u_bit_scan(&dirty_mask);
if (!rbuffer) {
continue;
}
vb = &rctx->vertex_buffer_state.vb[buffer_index];
rbuffer = (struct r600_resource*)vb->buffer;
assert(rbuffer);
offset = vb[i].buffer_offset;
offset = vb->buffer_offset;
/* fetch resources start at index 320 */
r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
r600_write_value(cs, (320 + i) * 7);
r600_write_value(cs, (320 + buffer_index) * 7);
r600_write_value(cs, offset); /* RESOURCEi_WORD0 */
r600_write_value(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
r600_write_value(cs, /* RESOURCEi_WORD2 */
S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
S_038008_STRIDE(vb[i].stride));
S_038008_STRIDE(vb->stride));
r600_write_value(cs, 0); /* RESOURCEi_WORD3 */
r600_write_value(cs, 0); /* RESOURCEi_WORD4 */
r600_write_value(cs, 0); /* RESOURCEi_WORD5 */

View file

@ -403,22 +403,58 @@ void r600_set_index_buffer(struct pipe_context *ctx,
}
}
void r600_vertex_buffers_dirty(struct r600_context *rctx)
{
if (rctx->vertex_buffer_state.dirty_mask) {
r600_inval_vertex_cache(rctx);
rctx->vertex_buffer_state.atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 11) *
util_bitcount(rctx->vertex_buffer_state.dirty_mask);
r600_atom_dirty(rctx, &rctx->vertex_buffer_state.atom);
}
}
void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
const struct pipe_vertex_buffer *buffers)
const struct pipe_vertex_buffer *input)
{
struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_vertexbuf_state * state = &rctx->vertex_buffer_state;
struct r600_vertexbuf_state *state = &rctx->vertex_buffer_state;
struct pipe_vertex_buffer *vb = state->vb;
unsigned i;
/* This sets 1-bit for buffers with index >= count. */
uint32_t disable_mask = ~((1ull << count) - 1);
/* These are the new buffers set by this function. */
uint32_t new_buffer_mask = 0;
util_copy_vertex_buffers(rctx->vertex_buffer, &rctx->nr_vertex_buffers, buffers, count);
/* Set buffers with index >= count to NULL. */
uint32_t remaining_buffers_mask =
rctx->vertex_buffer_state.enabled_mask & disable_mask;
r600_inval_vertex_cache(rctx);
state->atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 11) *
rctx->nr_vertex_buffers;
for (i = 0 ; i < rctx->nr_vertex_buffers; i++) {
state->dirty_mask |= 1 << i;
while (remaining_buffers_mask) {
i = u_bit_scan(&remaining_buffers_mask);
pipe_resource_reference(&vb[i].buffer, NULL);
}
r600_atom_dirty(rctx, &state->atom);
/* Set vertex buffers. */
for (i = 0; i < count; i++) {
if (memcmp(&input[i], &vb[i], sizeof(struct pipe_vertex_buffer))) {
if (input[i].buffer) {
vb[i].stride = input[i].stride;
vb[i].buffer_offset = input[i].buffer_offset;
pipe_resource_reference(&vb[i].buffer, input[i].buffer);
new_buffer_mask |= 1 << i;
} else {
pipe_resource_reference(&vb[i].buffer, NULL);
disable_mask |= 1 << i;
}
}
}
rctx->vertex_buffer_state.enabled_mask &= ~disable_mask;
rctx->vertex_buffer_state.dirty_mask &= rctx->vertex_buffer_state.enabled_mask;
rctx->vertex_buffer_state.enabled_mask |= new_buffer_mask;
rctx->vertex_buffer_state.dirty_mask |= new_buffer_mask;
r600_vertex_buffers_dirty(rctx);
}
void *r600_create_vertex_elements(struct pipe_context *ctx,