r600g: only flush the caches that need to be flushed during CP DMA operations

This should increase performance if constant uploads are done with the CP DMA,
because only the cache that needs to be flushed is flushed.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Marek Olšák 2013-06-30 19:57:59 +02:00
parent 1b40398d02
commit 7948ed1250
3 changed files with 117 additions and 32 deletions

View file

@ -121,18 +121,8 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
offset += r600_resource_va(&rctx->screen->screen, dst);
/* We flush the caches, because we might read from or write
* to resources which are bound right now. */
rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
R600_CONTEXT_INV_VERTEX_CACHE |
R600_CONTEXT_INV_TEX_CACHE |
R600_CONTEXT_FLUSH_AND_INV |
R600_CONTEXT_FLUSH_AND_INV_CB |
R600_CONTEXT_FLUSH_AND_INV_DB |
R600_CONTEXT_FLUSH_AND_INV_CB_META |
R600_CONTEXT_FLUSH_AND_INV_DB_META |
R600_CONTEXT_STREAMOUT_FLUSH |
R600_CONTEXT_WAIT_3D_IDLE;
/* Flush the cache where the resource is bound. */
r600_flag_resource_cache_flush(rctx, dst);
while (size) {
unsigned sync = 0;
@ -169,10 +159,9 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
offset += byte_count;
}
/* Invalidate the read caches. */
rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
R600_CONTEXT_INV_VERTEX_CACHE |
R600_CONTEXT_INV_TEX_CACHE;
/* Flush the cache again in case the 3D engine has been prefetching
* the resource. */
r600_flag_resource_cache_flush(rctx, dst);
util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
offset + size);

View file

@ -626,18 +626,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
dst_offset += r600_resource_va(&rctx->screen->screen, dst);
src_offset += r600_resource_va(&rctx->screen->screen, src);
/* We flush the caches, because we might read from or write
* to resources which are bound right now. */
rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
R600_CONTEXT_INV_VERTEX_CACHE |
R600_CONTEXT_INV_TEX_CACHE |
R600_CONTEXT_FLUSH_AND_INV |
R600_CONTEXT_FLUSH_AND_INV_CB |
R600_CONTEXT_FLUSH_AND_INV_DB |
R600_CONTEXT_FLUSH_AND_INV_CB_META |
R600_CONTEXT_FLUSH_AND_INV_DB_META |
R600_CONTEXT_STREAMOUT_FLUSH |
R600_CONTEXT_WAIT_3D_IDLE;
/* Flush the caches where the resources are bound. */
r600_flag_resource_cache_flush(rctx, src);
r600_flag_resource_cache_flush(rctx, dst);
/* There are differences between R700 and EG in CP DMA,
* but we only use the common bits here. */
@ -679,10 +670,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
dst_offset += byte_count;
}
/* Invalidate the read caches. */
rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
R600_CONTEXT_INV_VERTEX_CACHE |
R600_CONTEXT_INV_TEX_CACHE;
/* Flush the cache of the dst resource again in case the 3D engine
* has been prefetching it. */
r600_flag_resource_cache_flush(rctx, dst);
util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
dst_offset + size);
@ -736,3 +726,107 @@ void r600_dma_copy(struct r600_context *rctx,
util_range_add(&rdst->valid_buffer_range, dst_offset,
dst_offset + size);
}
/* Flag the cache of the resource for it to be flushed later if the resource
* is bound. Otherwise do nothing. Used for synchronization between engines.
*/
void r600_flag_resource_cache_flush(struct r600_context *rctx,
struct pipe_resource *res)
{
/* Check vertex buffers. */
uint32_t mask = rctx->vertex_buffer_state.enabled_mask;
while (mask) {
uint32_t i = u_bit_scan(&mask);
if (rctx->vertex_buffer_state.vb[i].buffer == res) {
rctx->flags |= R600_CONTEXT_INV_VERTEX_CACHE;
}
}
/* Check vertex buffers for compute. */
mask = rctx->cs_vertex_buffer_state.enabled_mask;
while (mask) {
uint32_t i = u_bit_scan(&mask);
if (rctx->cs_vertex_buffer_state.vb[i].buffer == res) {
rctx->flags |= R600_CONTEXT_INV_VERTEX_CACHE;
}
}
/* Check constant buffers. */
unsigned shader;
for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) {
struct r600_constbuf_state *state = &rctx->constbuf_state[shader];
uint32_t mask = state->enabled_mask;
while (mask) {
unsigned i = u_bit_scan(&mask);
if (state->cb[i].buffer == res) {
rctx->flags |= R600_CONTEXT_INV_CONST_CACHE;
shader = PIPE_SHADER_TYPES; /* break the outer loop */
break;
}
}
}
/* Check textures. */
for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) {
struct r600_samplerview_state *state = &rctx->samplers[shader].views;
uint32_t mask = state->enabled_mask;
while (mask) {
uint32_t i = u_bit_scan(&mask);
if (&state->views[i]->tex_resource->b.b == res) {
rctx->flags |= R600_CONTEXT_INV_TEX_CACHE;
shader = PIPE_SHADER_TYPES; /* break the outer loop */
break;
}
}
}
/* Check streamout buffers. */
int i;
for (i = 0; i < rctx->streamout.num_targets; i++) {
if (rctx->streamout.targets[i]->b.buffer == res) {
rctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH |
R600_CONTEXT_FLUSH_AND_INV |
R600_CONTEXT_WAIT_3D_IDLE;
break;
}
}
/* Check colorbuffers. */
for (i = 0; i < rctx->framebuffer.state.nr_cbufs; i++) {
if (rctx->framebuffer.state.cbufs[i] &&
rctx->framebuffer.state.cbufs[i]->texture == res) {
struct r600_texture *tex =
(struct r600_texture*)rctx->framebuffer.state.cbufs[i]->texture;
rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB |
R600_CONTEXT_FLUSH_AND_INV |
R600_CONTEXT_WAIT_3D_IDLE;
if (tex->cmask_size || tex->fmask_size) {
rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META;
}
break;
}
}
/* Check a depth buffer. */
if (rctx->framebuffer.state.zsbuf) {
if (rctx->framebuffer.state.zsbuf->texture == res) {
rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB |
R600_CONTEXT_FLUSH_AND_INV |
R600_CONTEXT_WAIT_3D_IDLE;
}
struct r600_texture *tex =
(struct r600_texture*)rctx->framebuffer.state.zsbuf->texture;
if (tex && tex->htile && &tex->htile->b.b == res) {
rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB_META |
R600_CONTEXT_FLUSH_AND_INV |
R600_CONTEXT_WAIT_3D_IDLE;
}
}
}

View file

@ -834,6 +834,8 @@ boolean r600_dma_blit(struct pipe_context *ctx,
const struct pipe_box *src_box);
void r600_emit_streamout_begin(struct r600_context *ctx, struct r600_atom *atom);
void r600_emit_streamout_end(struct r600_context *ctx);
void r600_flag_resource_cache_flush(struct r600_context *rctx,
struct pipe_resource *res);
/*
* evergreen_hw_context.c