mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 02:20:11 +01:00
radeonsi: remove the separate DCC optimization for Stoney
This removes some complexity from the driver. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10343>
This commit is contained in:
parent
1f8fa96412
commit
804e292440
7 changed files with 7 additions and 394 deletions
|
|
@ -1276,52 +1276,16 @@ static void si_flush_resource(struct pipe_context *ctx, struct pipe_resource *re
|
|||
struct si_texture *tex = (struct si_texture *)res;
|
||||
|
||||
assert(res->target != PIPE_BUFFER);
|
||||
assert(!tex->dcc_separate_buffer || tex->dcc_gather_statistics);
|
||||
|
||||
/* st/dri calls flush twice per frame (not a bug), this prevents double
|
||||
* decompression. */
|
||||
if (tex->dcc_separate_buffer && !tex->separate_dcc_dirty)
|
||||
return;
|
||||
|
||||
if (!tex->is_depth && (tex->cmask_buffer || vi_dcc_enabled(tex, 0))) {
|
||||
si_blit_decompress_color(sctx, tex, 0, res->last_level, 0, util_max_layer(res, 0),
|
||||
tex->dcc_separate_buffer != NULL, false);
|
||||
false, false);
|
||||
|
||||
if (tex->surface.display_dcc_offset && tex->displayable_dcc_dirty) {
|
||||
si_retile_dcc(sctx, tex);
|
||||
tex->displayable_dcc_dirty = false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Always do the analysis even if DCC is disabled at the moment. */
|
||||
if (tex->dcc_gather_statistics) {
|
||||
bool separate_dcc_dirty = tex->separate_dcc_dirty;
|
||||
|
||||
/* If the color buffer hasn't been unbound and fast clear hasn't
|
||||
* been used, separate_dcc_dirty is false, but there may have been
|
||||
* new rendering. Check if the color buffer is bound and assume
|
||||
* it's dirty.
|
||||
*
|
||||
* Note that DRI2 never unbinds window colorbuffers, which means
|
||||
* the DCC pipeline statistics query would never be re-set and would
|
||||
* keep adding new results until all free memory is exhausted if we
|
||||
* didn't do this.
|
||||
*/
|
||||
if (!separate_dcc_dirty) {
|
||||
for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
|
||||
if (sctx->framebuffer.state.cbufs[i] &&
|
||||
sctx->framebuffer.state.cbufs[i]->texture == res) {
|
||||
separate_dcc_dirty = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (separate_dcc_dirty) {
|
||||
tex->separate_dcc_dirty = false;
|
||||
vi_separate_dcc_process_and_reset_stats(ctx, tex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void si_flush_implicit_resources(struct si_context *sctx)
|
||||
|
|
|
|||
|
|
@ -293,19 +293,12 @@ static bool vi_get_fast_clear_parameters(struct si_screen *sscreen, enum pipe_fo
|
|||
bool vi_dcc_get_clear_info(struct si_context *sctx, struct si_texture *tex, unsigned level,
|
||||
unsigned clear_value, struct si_clear_info *out)
|
||||
{
|
||||
struct pipe_resource *dcc_buffer;
|
||||
uint64_t dcc_offset, clear_size;
|
||||
struct pipe_resource *dcc_buffer = &tex->buffer.b.b;
|
||||
uint64_t dcc_offset = tex->surface.meta_offset;
|
||||
uint32_t clear_size;
|
||||
|
||||
assert(vi_dcc_enabled(tex, level));
|
||||
|
||||
if (tex->dcc_separate_buffer) {
|
||||
dcc_buffer = &tex->dcc_separate_buffer->b.b;
|
||||
dcc_offset = 0;
|
||||
} else {
|
||||
dcc_buffer = &tex->buffer.b.b;
|
||||
dcc_offset = tex->surface.meta_offset;
|
||||
}
|
||||
|
||||
if (sctx->chip_class >= GFX10) {
|
||||
/* 4x and 8x MSAA needs a sophisticated compute shader for
|
||||
* the clear. */
|
||||
|
|
@ -606,21 +599,6 @@ static void si_fast_clear(struct si_context *sctx, unsigned *buffers,
|
|||
bool eliminate_needed = false;
|
||||
bool fmask_decompress_needed = false;
|
||||
|
||||
/* Fast clear is the most appropriate place to enable DCC for
|
||||
* displayable surfaces.
|
||||
*/
|
||||
if (sctx->family == CHIP_STONEY && !too_small) {
|
||||
vi_separate_dcc_try_enable(sctx, tex);
|
||||
|
||||
/* RB+ isn't supported with a CMASK clear only on Stoney,
|
||||
* so all clears are considered to be hypothetically slow
|
||||
* clears, which is weighed when determining whether to
|
||||
* enable separate DCC.
|
||||
*/
|
||||
if (tex->dcc_gather_statistics) /* only for Stoney */
|
||||
tex->num_slow_clears++;
|
||||
}
|
||||
|
||||
/* Try to clear DCC first, otherwise try CMASK. */
|
||||
if (vi_dcc_enabled(tex, level)) {
|
||||
uint32_t reset_value;
|
||||
|
|
@ -666,7 +644,6 @@ static void si_fast_clear(struct si_context *sctx, unsigned *buffers,
|
|||
num_clears++;
|
||||
clear_types |= SI_CLEAR_TYPE_DCC;
|
||||
|
||||
tex->separate_dcc_dirty = true;
|
||||
si_mark_display_dcc_dirty(sctx, tex);
|
||||
|
||||
/* DCC fast clear with MSAA should clear CMASK to 0xC. */
|
||||
|
|
|
|||
|
|
@ -231,15 +231,6 @@ static void si_sampler_view_add_buffer(struct si_context *sctx, struct pipe_reso
|
|||
|
||||
priority = si_get_sampler_view_priority(&tex->buffer);
|
||||
radeon_add_to_gfx_buffer_list_check_mem(sctx, &tex->buffer, usage, priority, check_mem);
|
||||
|
||||
if (resource->target == PIPE_BUFFER)
|
||||
return;
|
||||
|
||||
/* Add separate DCC. */
|
||||
if (tex->dcc_separate_buffer) {
|
||||
radeon_add_to_gfx_buffer_list_check_mem(sctx, tex->dcc_separate_buffer, usage,
|
||||
RADEON_PRIO_SEPARATE_META, check_mem);
|
||||
}
|
||||
}
|
||||
|
||||
static void si_sampler_views_begin_new_cs(struct si_context *sctx, struct si_samplers *samplers)
|
||||
|
|
@ -331,8 +322,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture
|
|||
state[6] &= C_008F28_COMPRESSION_EN;
|
||||
|
||||
if (!(access & SI_IMAGE_ACCESS_DCC_OFF) && vi_dcc_enabled(tex, first_level)) {
|
||||
meta_va =
|
||||
(!tex->dcc_separate_buffer ? tex->buffer.gpu_address : 0) + tex->surface.meta_offset;
|
||||
meta_va = tex->buffer.gpu_address + tex->surface.meta_offset;
|
||||
|
||||
if (sscreen->info.chip_class == GFX8) {
|
||||
meta_va += tex->surface.u.legacy.color.dcc_level[base_level].dcc_offset;
|
||||
|
|
|
|||
|
|
@ -284,17 +284,6 @@ static void si_destroy_context(struct pipe_context *context)
|
|||
if (sctx->blitter)
|
||||
util_blitter_destroy(sctx->blitter);
|
||||
|
||||
/* Release DCC stats. */
|
||||
for (int i = 0; i < ARRAY_SIZE(sctx->dcc_stats); i++) {
|
||||
assert(!sctx->dcc_stats[i].query_active);
|
||||
|
||||
for (int j = 0; j < ARRAY_SIZE(sctx->dcc_stats[i].ps_stats); j++)
|
||||
if (sctx->dcc_stats[i].ps_stats[j])
|
||||
sctx->b.destroy_query(&sctx->b, sctx->dcc_stats[i].ps_stats[j]);
|
||||
|
||||
si_texture_reference(&sctx->dcc_stats[i].tex, NULL);
|
||||
}
|
||||
|
||||
if (sctx->query_result_shader)
|
||||
sctx->b.delete_compute_state(&sctx->b, sctx->query_result_shader);
|
||||
if (sctx->sh_query_result_shader)
|
||||
|
|
|
|||
|
|
@ -385,37 +385,14 @@ struct si_texture {
|
|||
|
||||
/* We need to track DCC dirtiness, because st/dri usually calls
|
||||
* flush_resource twice per frame (not a bug) and we don't wanna
|
||||
* decompress DCC twice. Also, the dirty tracking must be done even
|
||||
* if DCC isn't used, because it's required by the DCC usage analysis
|
||||
* for a possible future enablement.
|
||||
* decompress DCC twice.
|
||||
*/
|
||||
bool separate_dcc_dirty : 1;
|
||||
bool displayable_dcc_dirty : 1;
|
||||
|
||||
/* Statistics gathering for the DCC enablement heuristic. */
|
||||
bool dcc_gather_statistics : 1;
|
||||
/* Counter that should be non-zero if the texture is bound to a
|
||||
* framebuffer.
|
||||
*/
|
||||
unsigned framebuffers_bound;
|
||||
/* Whether the texture is a displayable back buffer and needs DCC
|
||||
* decompression, which is expensive. Therefore, it's enabled only
|
||||
* if statistics suggest that it will pay off and it's allocated
|
||||
* separately. It can't be bound as a sampler by apps. Limited to
|
||||
* target == 2D and last_level == 0. If enabled, dcc_offset contains
|
||||
* the absolute GPUVM address, not the relative one.
|
||||
*/
|
||||
struct si_resource *dcc_separate_buffer;
|
||||
/* When DCC is temporarily disabled, the separate buffer is here. */
|
||||
struct si_resource *last_dcc_separate_buffer;
|
||||
/* Estimate of how much this color buffer is written to in units of
|
||||
* full-screen draws: ps_invocations / (width * height)
|
||||
* Shader kills, late Z, and blending with trivial discards make it
|
||||
* inaccurate (we need to count CB updates, not PS invocations).
|
||||
*/
|
||||
unsigned ps_draw_ratio;
|
||||
/* The number of clears since the last DCC usage analysis. */
|
||||
unsigned num_slow_clears;
|
||||
};
|
||||
|
||||
struct si_surface {
|
||||
|
|
@ -1281,25 +1258,6 @@ struct si_context {
|
|||
|
||||
bool force_cb_shader_coherent;
|
||||
|
||||
/* Statistics gathering for the DCC enablement heuristic. It can't be
|
||||
* in si_texture because si_texture can be shared by multiple
|
||||
* contexts. This is for back buffers only. We shouldn't get too many
|
||||
* of those.
|
||||
*
|
||||
* X11 DRI3 rotates among a finite set of back buffers. They should
|
||||
* all fit in this array. If they don't, separate DCC might never be
|
||||
* enabled by DCC stat gathering.
|
||||
*/
|
||||
struct {
|
||||
struct si_texture *tex;
|
||||
/* Query queue: 0 = usually active, 1 = waiting, 2 = readback. */
|
||||
struct pipe_query *ps_stats[3];
|
||||
/* If all slots are used and another slot is needed,
|
||||
* the least recently used slot is evicted based on this. */
|
||||
int64_t last_use_timestamp;
|
||||
bool query_active;
|
||||
} dcc_stats[5];
|
||||
|
||||
struct si_tracked_regs tracked_regs;
|
||||
|
||||
/* Resources that need to be flushed, but will not get an explicit
|
||||
|
|
@ -1617,10 +1575,6 @@ struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
|
|||
const struct pipe_surface *templ, unsigned width0,
|
||||
unsigned height0, unsigned width, unsigned height);
|
||||
unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap);
|
||||
void vi_separate_dcc_try_enable(struct si_context *sctx, struct si_texture *tex);
|
||||
void vi_separate_dcc_start_query(struct si_context *sctx, struct si_texture *tex);
|
||||
void vi_separate_dcc_stop_query(struct si_context *sctx, struct si_texture *tex);
|
||||
void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx, struct si_texture *tex);
|
||||
bool si_texture_disable_dcc(struct si_context *sctx, struct si_texture *tex);
|
||||
void si_init_screen_texture_functions(struct si_screen *sscreen);
|
||||
void si_init_context_texture_functions(struct si_context *sctx);
|
||||
|
|
|
|||
|
|
@ -2583,8 +2583,6 @@ void si_update_fb_dirtiness_after_rendering(struct si_context *sctx)
|
|||
tex->dirty_level_mask |= 1 << surf->u.tex.level;
|
||||
tex->fmask_is_identity = false;
|
||||
}
|
||||
if (tex->dcc_gather_statistics)
|
||||
tex->separate_dcc_dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2656,15 +2654,6 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
|
|||
|
||||
si_update_fb_dirtiness_after_rendering(sctx);
|
||||
|
||||
for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
|
||||
if (!sctx->framebuffer.state.cbufs[i])
|
||||
continue;
|
||||
|
||||
tex = (struct si_texture *)sctx->framebuffer.state.cbufs[i]->texture;
|
||||
if (tex->dcc_gather_statistics)
|
||||
vi_separate_dcc_stop_query(sctx, tex);
|
||||
}
|
||||
|
||||
/* Disable DCC if the formats are incompatible. */
|
||||
for (i = 0; i < state->nr_cbufs; i++) {
|
||||
if (!state->cbufs[i])
|
||||
|
|
@ -2821,12 +2810,6 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
|
|||
|
||||
p_atomic_inc(&tex->framebuffers_bound);
|
||||
|
||||
if (tex->dcc_gather_statistics) {
|
||||
/* Dirty tracking must be enabled for DCC usage analysis. */
|
||||
sctx->framebuffer.compressed_cb_mask |= 1 << i;
|
||||
vi_separate_dcc_start_query(sctx, tex);
|
||||
}
|
||||
|
||||
/* Update the minimum but don't keep 0. */
|
||||
if (!sctx->framebuffer.min_bytes_per_pixel ||
|
||||
tex->surface.bpe < sctx->framebuffer.min_bytes_per_pixel)
|
||||
|
|
@ -2967,11 +2950,6 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
|
|||
RADEON_PRIO_SEPARATE_META);
|
||||
}
|
||||
|
||||
if (tex->dcc_separate_buffer)
|
||||
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->dcc_separate_buffer,
|
||||
RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC,
|
||||
RADEON_PRIO_SEPARATE_META);
|
||||
|
||||
/* Compute mutable surface parameters. */
|
||||
cb_color_base = tex->buffer.gpu_address >> 8;
|
||||
cb_color_fmask = 0;
|
||||
|
|
@ -3011,9 +2989,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
|
|||
if (!is_msaa_resolve_dst)
|
||||
cb_color_info |= S_028C70_DCC_ENABLE(1);
|
||||
|
||||
cb_dcc_base =
|
||||
((!tex->dcc_separate_buffer ? tex->buffer.gpu_address : 0) + tex->surface.meta_offset) >>
|
||||
8;
|
||||
cb_dcc_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8;
|
||||
|
||||
unsigned dcc_tile_swizzle = tex->surface.tile_swizzle;
|
||||
dcc_tile_swizzle &= ((1 << tex->surface.meta_alignment_log2) - 1) >> 8;
|
||||
|
|
|
|||
|
|
@ -347,8 +347,6 @@ static bool si_texture_discard_dcc(struct si_screen *sscreen, struct si_texture
|
|||
if (!si_can_disable_dcc(tex))
|
||||
return false;
|
||||
|
||||
assert(tex->dcc_separate_buffer == NULL);
|
||||
|
||||
/* Disable DCC. */
|
||||
ac_surface_zero_dcc_fields(&tex->surface);
|
||||
|
||||
|
|
@ -491,11 +489,7 @@ static void si_reallocate_texture_inplace(struct si_context *sctx, struct si_tex
|
|||
tex->can_sample_z = new_tex->can_sample_z;
|
||||
tex->can_sample_s = new_tex->can_sample_s;
|
||||
|
||||
tex->separate_dcc_dirty = new_tex->separate_dcc_dirty;
|
||||
tex->displayable_dcc_dirty = new_tex->displayable_dcc_dirty;
|
||||
tex->dcc_gather_statistics = new_tex->dcc_gather_statistics;
|
||||
si_resource_reference(&tex->dcc_separate_buffer, new_tex->dcc_separate_buffer);
|
||||
si_resource_reference(&tex->last_dcc_separate_buffer, new_tex->last_dcc_separate_buffer);
|
||||
|
||||
if (new_bind_flag == PIPE_BIND_LINEAR) {
|
||||
assert(!tex->surface.meta_offset);
|
||||
|
|
@ -516,7 +510,6 @@ static void si_set_tex_bo_metadata(struct si_screen *sscreen, struct si_texture
|
|||
|
||||
memset(&md, 0, sizeof(md));
|
||||
|
||||
assert(tex->dcc_separate_buffer == NULL);
|
||||
assert(tex->surface.fmask_size == 0);
|
||||
|
||||
static const unsigned char swizzle[] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
|
||||
|
|
@ -802,8 +795,6 @@ static void si_texture_destroy(struct pipe_screen *screen, struct pipe_resource
|
|||
si_resource_reference(&tex->cmask_buffer, NULL);
|
||||
}
|
||||
radeon_bo_reference(((struct si_screen*)screen)->ws, &resource->buf, NULL);
|
||||
si_resource_reference(&tex->dcc_separate_buffer, NULL);
|
||||
si_resource_reference(&tex->last_dcc_separate_buffer, NULL);
|
||||
FREE(tex);
|
||||
}
|
||||
|
||||
|
|
@ -966,12 +957,6 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
|
|||
/* Applies to GCN. */
|
||||
tex->last_msaa_resolve_target_micro_mode = tex->surface.micro_tile_mode;
|
||||
|
||||
/* Disable separate DCC at the beginning. DRI2 doesn't reuse buffers
|
||||
* between frames, so the only thing that can enable separate DCC
|
||||
* with DRI2 is multiple slow clears within a frame.
|
||||
*/
|
||||
tex->ps_draw_ratio = 0;
|
||||
|
||||
if (!ac_surface_override_offset_stride(&sscreen->info, &tex->surface,
|
||||
tex->buffer.b.b.last_level + 1,
|
||||
offset, pitch_in_bytes / tex->surface.bpe))
|
||||
|
|
@ -2189,228 +2174,6 @@ unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap)
|
|||
return ~0U;
|
||||
}
|
||||
|
||||
/* PIPELINE_STAT-BASED DCC ENABLEMENT FOR DISPLAYABLE SURFACES */
|
||||
|
||||
static void vi_dcc_clean_up_context_slot(struct si_context *sctx, int slot)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (sctx->dcc_stats[slot].query_active)
|
||||
vi_separate_dcc_stop_query(sctx, sctx->dcc_stats[slot].tex);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(sctx->dcc_stats[slot].ps_stats); i++)
|
||||
if (sctx->dcc_stats[slot].ps_stats[i]) {
|
||||
sctx->b.destroy_query(&sctx->b, sctx->dcc_stats[slot].ps_stats[i]);
|
||||
sctx->dcc_stats[slot].ps_stats[i] = NULL;
|
||||
}
|
||||
|
||||
si_texture_reference(&sctx->dcc_stats[slot].tex, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the per-context slot where DCC statistics queries for the texture live.
|
||||
*/
|
||||
static unsigned vi_get_context_dcc_stats_index(struct si_context *sctx, struct si_texture *tex)
|
||||
{
|
||||
int i, empty_slot = -1;
|
||||
|
||||
/* Remove zombie textures (textures kept alive by this array only). */
|
||||
for (i = 0; i < ARRAY_SIZE(sctx->dcc_stats); i++)
|
||||
if (sctx->dcc_stats[i].tex && sctx->dcc_stats[i].tex->buffer.b.b.reference.count == 1)
|
||||
vi_dcc_clean_up_context_slot(sctx, i);
|
||||
|
||||
/* Find the texture. */
|
||||
for (i = 0; i < ARRAY_SIZE(sctx->dcc_stats); i++) {
|
||||
/* Return if found. */
|
||||
if (sctx->dcc_stats[i].tex == tex) {
|
||||
sctx->dcc_stats[i].last_use_timestamp = os_time_get();
|
||||
return i;
|
||||
}
|
||||
|
||||
/* Record the first seen empty slot. */
|
||||
if (empty_slot == -1 && !sctx->dcc_stats[i].tex)
|
||||
empty_slot = i;
|
||||
}
|
||||
|
||||
/* Not found. Remove the oldest member to make space in the array. */
|
||||
if (empty_slot == -1) {
|
||||
int oldest_slot = 0;
|
||||
|
||||
/* Find the oldest slot. */
|
||||
for (i = 1; i < ARRAY_SIZE(sctx->dcc_stats); i++)
|
||||
if (sctx->dcc_stats[oldest_slot].last_use_timestamp >
|
||||
sctx->dcc_stats[i].last_use_timestamp)
|
||||
oldest_slot = i;
|
||||
|
||||
/* Clean up the oldest slot. */
|
||||
vi_dcc_clean_up_context_slot(sctx, oldest_slot);
|
||||
empty_slot = oldest_slot;
|
||||
}
|
||||
|
||||
/* Add the texture to the new slot. */
|
||||
si_texture_reference(&sctx->dcc_stats[empty_slot].tex, tex);
|
||||
sctx->dcc_stats[empty_slot].last_use_timestamp = os_time_get();
|
||||
return empty_slot;
|
||||
}
|
||||
|
||||
static struct pipe_query *vi_create_resuming_pipestats_query(struct si_context *sctx)
|
||||
{
|
||||
struct si_query_hw *query =
|
||||
(struct si_query_hw *)sctx->b.create_query(&sctx->b, PIPE_QUERY_PIPELINE_STATISTICS, 0);
|
||||
|
||||
query->flags |= SI_QUERY_HW_FLAG_BEGIN_RESUMES;
|
||||
return (struct pipe_query *)query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when binding a color buffer.
|
||||
*/
|
||||
void vi_separate_dcc_start_query(struct si_context *sctx, struct si_texture *tex)
|
||||
{
|
||||
unsigned i = vi_get_context_dcc_stats_index(sctx, tex);
|
||||
|
||||
assert(!sctx->dcc_stats[i].query_active);
|
||||
|
||||
if (!sctx->dcc_stats[i].ps_stats[0])
|
||||
sctx->dcc_stats[i].ps_stats[0] = vi_create_resuming_pipestats_query(sctx);
|
||||
|
||||
/* begin or resume the query */
|
||||
sctx->b.begin_query(&sctx->b, sctx->dcc_stats[i].ps_stats[0]);
|
||||
sctx->dcc_stats[i].query_active = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when unbinding a color buffer.
|
||||
*/
|
||||
void vi_separate_dcc_stop_query(struct si_context *sctx, struct si_texture *tex)
|
||||
{
|
||||
unsigned i = vi_get_context_dcc_stats_index(sctx, tex);
|
||||
|
||||
assert(sctx->dcc_stats[i].query_active);
|
||||
assert(sctx->dcc_stats[i].ps_stats[0]);
|
||||
|
||||
/* pause or end the query */
|
||||
sctx->b.end_query(&sctx->b, sctx->dcc_stats[i].ps_stats[0]);
|
||||
sctx->dcc_stats[i].query_active = false;
|
||||
}
|
||||
|
||||
static bool vi_should_enable_separate_dcc(struct si_texture *tex)
|
||||
{
|
||||
/* The minimum number of fullscreen draws per frame that is required
|
||||
* to enable DCC. */
|
||||
return tex->ps_draw_ratio + tex->num_slow_clears >= 5;
|
||||
}
|
||||
|
||||
/* Called by fast clear. */
|
||||
void vi_separate_dcc_try_enable(struct si_context *sctx, struct si_texture *tex)
|
||||
{
|
||||
/* The intent is to use this with shared displayable back buffers,
|
||||
* but it's not strictly limited only to them.
|
||||
*/
|
||||
if (!tex->buffer.b.is_shared ||
|
||||
!(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) ||
|
||||
tex->buffer.b.b.target != PIPE_TEXTURE_2D || tex->buffer.b.b.last_level > 0 ||
|
||||
!tex->surface.meta_size || sctx->screen->debug_flags & DBG(NO_DCC) ||
|
||||
sctx->screen->debug_flags & DBG(NO_DCC_FB))
|
||||
return;
|
||||
|
||||
assert(sctx->chip_class >= GFX8);
|
||||
assert(!tex->is_depth);
|
||||
|
||||
if (tex->surface.meta_offset)
|
||||
return; /* already enabled */
|
||||
|
||||
/* Enable the DCC stat gathering. */
|
||||
if (!tex->dcc_gather_statistics) {
|
||||
tex->dcc_gather_statistics = true;
|
||||
vi_separate_dcc_start_query(sctx, tex);
|
||||
}
|
||||
|
||||
if (!vi_should_enable_separate_dcc(tex))
|
||||
return; /* stats show that DCC decompression is too expensive */
|
||||
|
||||
assert(tex->surface.num_meta_levels);
|
||||
assert(!tex->dcc_separate_buffer);
|
||||
|
||||
si_texture_discard_cmask(sctx->screen, tex);
|
||||
|
||||
/* Get a DCC buffer. */
|
||||
if (tex->last_dcc_separate_buffer) {
|
||||
assert(tex->dcc_gather_statistics);
|
||||
assert(!tex->dcc_separate_buffer);
|
||||
tex->dcc_separate_buffer = tex->last_dcc_separate_buffer;
|
||||
tex->last_dcc_separate_buffer = NULL;
|
||||
} else {
|
||||
tex->dcc_separate_buffer =
|
||||
si_aligned_buffer_create(sctx->b.screen, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT,
|
||||
tex->surface.meta_size, 1 << tex->surface.meta_alignment_log2);
|
||||
if (!tex->dcc_separate_buffer)
|
||||
return;
|
||||
}
|
||||
|
||||
/* dcc_offset is the absolute GPUVM address. */
|
||||
tex->surface.meta_offset = tex->dcc_separate_buffer->gpu_address;
|
||||
|
||||
/* no need to flag anything since this is called by fast clear that
|
||||
* flags framebuffer state
|
||||
*/
|
||||
}
|
||||
|
||||
/**
|
||||
* Called by pipe_context::flush_resource, the place where DCC decompression
|
||||
* takes place.
|
||||
*/
|
||||
void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx, struct si_texture *tex)
|
||||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
struct pipe_query *tmp;
|
||||
unsigned i = vi_get_context_dcc_stats_index(sctx, tex);
|
||||
bool query_active = sctx->dcc_stats[i].query_active;
|
||||
bool disable = false;
|
||||
|
||||
if (sctx->dcc_stats[i].ps_stats[2]) {
|
||||
union pipe_query_result result;
|
||||
|
||||
/* Read the results. */
|
||||
struct pipe_query *query = sctx->dcc_stats[i].ps_stats[2];
|
||||
ctx->get_query_result(ctx, query, true, &result);
|
||||
si_query_buffer_reset(sctx, &((struct si_query_hw *)query)->buffer);
|
||||
|
||||
/* Compute the approximate number of fullscreen draws. */
|
||||
tex->ps_draw_ratio = result.pipeline_statistics.ps_invocations /
|
||||
(tex->buffer.b.b.width0 * tex->buffer.b.b.height0);
|
||||
sctx->last_tex_ps_draw_ratio = tex->ps_draw_ratio;
|
||||
|
||||
disable = tex->dcc_separate_buffer && !vi_should_enable_separate_dcc(tex);
|
||||
}
|
||||
|
||||
tex->num_slow_clears = 0;
|
||||
|
||||
/* stop the statistics query for ps_stats[0] */
|
||||
if (query_active)
|
||||
vi_separate_dcc_stop_query(sctx, tex);
|
||||
|
||||
/* Move the queries in the queue by one. */
|
||||
tmp = sctx->dcc_stats[i].ps_stats[2];
|
||||
sctx->dcc_stats[i].ps_stats[2] = sctx->dcc_stats[i].ps_stats[1];
|
||||
sctx->dcc_stats[i].ps_stats[1] = sctx->dcc_stats[i].ps_stats[0];
|
||||
sctx->dcc_stats[i].ps_stats[0] = tmp;
|
||||
|
||||
/* create and start a new query as ps_stats[0] */
|
||||
if (query_active)
|
||||
vi_separate_dcc_start_query(sctx, tex);
|
||||
|
||||
if (disable) {
|
||||
assert(!tex->last_dcc_separate_buffer);
|
||||
tex->last_dcc_separate_buffer = tex->dcc_separate_buffer;
|
||||
tex->dcc_separate_buffer = NULL;
|
||||
tex->surface.meta_offset = 0;
|
||||
/* no need to flag anything since this is called after
|
||||
* decompression that re-sets framebuffer state
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
static struct pipe_memory_object *
|
||||
si_memobj_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle, bool dedicated)
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue