radeonsi: implement fast stencil clear

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Marek Olšák 2015-12-10 01:37:39 +01:00
parent 8ee96ce834
commit 1a24f443b4
4 changed files with 53 additions and 23 deletions

View file

@ -222,6 +222,8 @@ struct r600_texture {
struct r600_resource *htile_buffer;
bool depth_cleared; /* if it was cleared at least once */
float depth_clear_value;
bool stencil_cleared; /* if it was cleared at least once */
uint8_t stencil_clear_value;
bool non_disp_tiling; /* R600-Cayman only */
};

View file

@ -377,22 +377,39 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
}
}
if (buffers & PIPE_CLEAR_DEPTH &&
zstex && zstex->htile_buffer &&
if (zstex && zstex->htile_buffer &&
zsbuf->u.tex.level == 0 &&
zsbuf->u.tex.first_layer == 0 &&
zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
/* Need to disable EXPCLEAR temporarily if clearing
* to a new value. */
if (zstex->depth_cleared && zstex->depth_clear_value != depth) {
sctx->db_depth_disable_expclear = true;
if (buffers & PIPE_CLEAR_DEPTH) {
/* Need to disable EXPCLEAR temporarily if clearing
* to a new value. */
if (zstex->depth_cleared && zstex->depth_clear_value != depth) {
sctx->db_depth_disable_expclear = true;
}
zstex->depth_clear_value = depth;
sctx->framebuffer.dirty_zsbuf = true;
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
sctx->db_depth_clear = true;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
zstex->depth_clear_value = depth;
sctx->framebuffer.dirty_zsbuf = true;
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
sctx->db_depth_clear = true;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
if (buffers & PIPE_CLEAR_STENCIL) {
stencil &= 0xff;
/* Need to disable EXPCLEAR temporarily if clearing
* to a new value. */
if (zstex->stencil_cleared && zstex->stencil_clear_value != stencil) {
sctx->db_stencil_disable_expclear = true;
}
zstex->stencil_clear_value = stencil;
sctx->framebuffer.dirty_zsbuf = true;
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_STENCIL_CLEAR */
sctx->db_stencil_clear = true;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
}
si_blitter_begin(ctx, SI_CLEAR);
@ -407,6 +424,13 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
zstex->depth_cleared = true;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
if (sctx->db_stencil_clear) {
sctx->db_stencil_clear = false;
sctx->db_stencil_disable_expclear = false;
zstex->stencil_cleared = true;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
}
static void si_clear_render_target(struct pipe_context *ctx,

View file

@ -253,6 +253,8 @@ struct si_context {
bool db_flush_stencil_inplace;
bool db_depth_clear;
bool db_depth_disable_expclear;
bool db_stencil_clear;
bool db_stencil_disable_expclear;
unsigned ps_db_shader_control;
/* Emitted draw state. */

View file

@ -1090,10 +1090,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
radeon_emit(cs,
S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace));
} else if (sctx->db_depth_clear) {
radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1));
} else {
radeon_emit(cs, 0);
radeon_emit(cs,
S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear));
}
/* DB_COUNT_CONTROL (occlusion queries) */
@ -1120,12 +1120,9 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
}
/* DB_RENDER_OVERRIDE2 */
if (sctx->db_depth_disable_expclear) {
radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1));
} else {
radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0);
}
radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear));
db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) |
sctx->ps_db_shader_control;
@ -2217,7 +2214,10 @@ static void si_init_depth_surface(struct si_context *sctx,
z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
S_028040_ALLOW_EXPCLEAR(1);
if (!(rtex->surface.flags & RADEON_SURF_SBUFFER))
if (rtex->surface.flags & RADEON_SURF_SBUFFER)
s_info |= S_028044_ALLOW_EXPCLEAR(1);
else
/* Use all of the htile_buffer for depth if there's no stencil. */
s_info |= S_028044_TILE_STENCIL_DISABLE(1);
uint64_t va = rtex->htile_buffer->gpu_address;
@ -2486,8 +2486,11 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */
radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */
radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);
radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */
radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */
radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
radeon_set_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
zb->pa_su_poly_offset_db_fmt_cntl);
} else if (sctx->framebuffer.dirty_zsbuf) {
@ -3578,7 +3581,6 @@ static void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0));
si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0));
si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0);
si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);