radeonsi: avoid redundant CB and DB register updates

The main idea is to avoid setting CB_COLORi_INFO = 0 for i>0 repeatedly
when those colorbuffers aren't used. This is mainly for glamor.

Same for DB. Z_INFO and STENCIL_INFO need to be cleared only once.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
This commit is contained in:
Marek Olšák 2015-08-29 02:32:13 +02:00
parent c2a42d1f9f
commit 0c2eed0ede
7 changed files with 36 additions and 12 deletions

View file

@ -395,7 +395,7 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
if (buffers & PIPE_CLEAR_COLOR && rctx->b.chip_class >= EVERGREEN) {
evergreen_do_fast_color_clear(&rctx->b, fb, &rctx->framebuffer.atom,
&buffers, color);
&buffers, NULL, color);
if (!buffers)
return; /* all buffers have been fast cleared */
}

View file

@ -562,7 +562,7 @@ unsigned r600_translate_colorswap(enum pipe_format format);
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
struct pipe_framebuffer_state *fb,
struct r600_atom *fb_state,
unsigned *buffers,
unsigned *buffers, unsigned *dirty_cbufs,
const union pipe_color_union *color);
void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
void r600_init_context_texture_functions(struct r600_common_context *rctx);

View file

@ -1217,7 +1217,7 @@ static void evergreen_set_clear_color(struct r600_texture *rtex,
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
struct pipe_framebuffer_state *fb,
struct r600_atom *fb_state,
unsigned *buffers,
unsigned *buffers, unsigned *dirty_cbufs,
const union pipe_color_union *color)
{
int i;
@ -1279,6 +1279,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
tex->cmask.offset, tex->cmask.size, 0, true);
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
if (dirty_cbufs)
*dirty_cbufs |= 1 << i;
rctx->set_atom_dirty(rctx, fb_state, true);
*buffers &= ~clear_bit;
}

View file

@ -336,8 +336,10 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
zsbuf ? (struct r600_texture*)zsbuf->texture : NULL;
if (buffers & PIPE_CLEAR_COLOR) {
evergreen_do_fast_color_clear(&sctx->b, fb, &sctx->framebuffer.atom,
&buffers, color);
evergreen_do_fast_color_clear(&sctx->b, fb,
&sctx->framebuffer.atom, &buffers,
&sctx->framebuffer.dirty_cbufs,
color);
if (!buffers)
return; /* all buffers have been fast cleared */
}
@ -374,6 +376,7 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
}
zstex->depth_clear_value = depth;
sctx->framebuffer.dirty_zsbuf = true;
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
sctx->db_depth_clear = true;
si_mark_atom_dirty(sctx, &sctx->db_render_state);

View file

@ -184,8 +184,11 @@ void si_begin_new_cs(struct si_context *ctx)
/* The CS initialization should be emitted before everything else. */
si_pm4_emit(ctx, ctx->init_config);
si_mark_atom_dirty(ctx, &ctx->clip_regs);
ctx->framebuffer.dirty_cbufs = (1 << 8) - 1;
ctx->framebuffer.dirty_zsbuf = true;
si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
si_mark_atom_dirty(ctx, &ctx->clip_regs);
si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
si_mark_atom_dirty(ctx, &ctx->msaa_config);
si_mark_atom_dirty(ctx, &ctx->db_render_state);

View file

@ -127,6 +127,8 @@ struct si_framebuffer {
unsigned cb0_is_integer;
unsigned compressed_cb_mask;
unsigned export_16bpc;
unsigned dirty_cbufs;
bool dirty_zsbuf;
};
struct si_scissors {

View file

@ -2109,6 +2109,13 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
/* Take the maximum of the old and new count. If the new count is lower,
* dirtying is needed to disable the unbound colorbuffers.
*/
sctx->framebuffer.dirty_cbufs |=
(1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1;
sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf;
util_copy_framebuffer_state(&sctx->framebuffer.state, state);
sctx->framebuffer.export_16bpc = 0;
@ -2219,6 +2226,9 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
/* Colorbuffers. */
for (i = 0; i < nr_cbufs; i++) {
if (!(sctx->framebuffer.dirty_cbufs & (1 << i)))
continue;
cb = (struct r600_surface*)state->cbufs[i];
if (!cb) {
r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
@ -2259,17 +2269,18 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
radeon_emit(cs, 0); /* R_028C94_CB_COLOR0_DCC_BASE */
}
/* set CB_COLOR1_INFO for possible dual-src blending */
if (i == 1 && state->cbufs[0]) {
if (i == 1 && state->cbufs[0] &&
sctx->framebuffer.dirty_cbufs & (1 << 0)) {
r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
cb->cb_color_info | tex->cb_color_info);
i++;
}
for (; i < 8 ; i++) {
r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
}
for (; i < 8 ; i++)
if (sctx->framebuffer.dirty_cbufs & (1 << i))
r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
/* ZS buffer. */
if (state->zsbuf) {
if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
@ -2304,7 +2315,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
zb->pa_su_poly_offset_db_fmt_cntl);
} else {
} else if (sctx->framebuffer.dirty_zsbuf) {
r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
@ -2314,6 +2325,9 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
/* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
sctx->framebuffer.dirty_cbufs = 0;
sctx->framebuffer.dirty_zsbuf = false;
}
static void si_emit_msaa_sample_locs(struct si_context *sctx,