From 804e29244066f70d7111d8c07ff922a0e1362b65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 5 Apr 2021 10:06:27 -0400 Subject: [PATCH] radeonsi: remove the separate DCC optimization for Stoney This removes some complexity from the driver. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_blit.c | 38 +-- src/gallium/drivers/radeonsi/si_clear.c | 29 +-- src/gallium/drivers/radeonsi/si_descriptors.c | 12 +- src/gallium/drivers/radeonsi/si_pipe.c | 11 - src/gallium/drivers/radeonsi/si_pipe.h | 48 +--- src/gallium/drivers/radeonsi/si_state.c | 26 +- src/gallium/drivers/radeonsi/si_texture.c | 237 ------------------ 7 files changed, 7 insertions(+), 394 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 653dfc343e7..bf95fa6c7c5 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -1276,52 +1276,16 @@ static void si_flush_resource(struct pipe_context *ctx, struct pipe_resource *re struct si_texture *tex = (struct si_texture *)res; assert(res->target != PIPE_BUFFER); - assert(!tex->dcc_separate_buffer || tex->dcc_gather_statistics); - - /* st/dri calls flush twice per frame (not a bug), this prevents double - * decompression. */ - if (tex->dcc_separate_buffer && !tex->separate_dcc_dirty) - return; if (!tex->is_depth && (tex->cmask_buffer || vi_dcc_enabled(tex, 0))) { si_blit_decompress_color(sctx, tex, 0, res->last_level, 0, util_max_layer(res, 0), - tex->dcc_separate_buffer != NULL, false); + false, false); if (tex->surface.display_dcc_offset && tex->displayable_dcc_dirty) { si_retile_dcc(sctx, tex); tex->displayable_dcc_dirty = false; } } - - /* Always do the analysis even if DCC is disabled at the moment. */ - if (tex->dcc_gather_statistics) { - bool separate_dcc_dirty = tex->separate_dcc_dirty; - - /* If the color buffer hasn't been unbound and fast clear hasn't - * been used, separate_dcc_dirty is false, but there may have been - * new rendering. Check if the color buffer is bound and assume - * it's dirty. - * - * Note that DRI2 never unbinds window colorbuffers, which means - * the DCC pipeline statistics query would never be re-set and would - * keep adding new results until all free memory is exhausted if we - * didn't do this. - */ - if (!separate_dcc_dirty) { - for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { - if (sctx->framebuffer.state.cbufs[i] && - sctx->framebuffer.state.cbufs[i]->texture == res) { - separate_dcc_dirty = true; - break; - } - } - } - - if (separate_dcc_dirty) { - tex->separate_dcc_dirty = false; - vi_separate_dcc_process_and_reset_stats(ctx, tex); - } - } } void si_flush_implicit_resources(struct si_context *sctx) diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index b6003d11921..538c4b18e92 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -293,19 +293,12 @@ static bool vi_get_fast_clear_parameters(struct si_screen *sscreen, enum pipe_fo bool vi_dcc_get_clear_info(struct si_context *sctx, struct si_texture *tex, unsigned level, unsigned clear_value, struct si_clear_info *out) { - struct pipe_resource *dcc_buffer; - uint64_t dcc_offset, clear_size; + struct pipe_resource *dcc_buffer = &tex->buffer.b.b; + uint64_t dcc_offset = tex->surface.meta_offset; + uint32_t clear_size; assert(vi_dcc_enabled(tex, level)); - if (tex->dcc_separate_buffer) { - dcc_buffer = &tex->dcc_separate_buffer->b.b; - dcc_offset = 0; - } else { - dcc_buffer = &tex->buffer.b.b; - dcc_offset = tex->surface.meta_offset; - } - if (sctx->chip_class >= GFX10) { /* 4x and 8x MSAA needs a sophisticated compute shader for * the clear. */ @@ -606,21 +599,6 @@ static void si_fast_clear(struct si_context *sctx, unsigned *buffers, bool eliminate_needed = false; bool fmask_decompress_needed = false; - /* Fast clear is the most appropriate place to enable DCC for - * displayable surfaces. - */ - if (sctx->family == CHIP_STONEY && !too_small) { - vi_separate_dcc_try_enable(sctx, tex); - - /* RB+ isn't supported with a CMASK clear only on Stoney, - * so all clears are considered to be hypothetically slow - * clears, which is weighed when determining whether to - * enable separate DCC. - */ - if (tex->dcc_gather_statistics) /* only for Stoney */ - tex->num_slow_clears++; - } - /* Try to clear DCC first, otherwise try CMASK. */ if (vi_dcc_enabled(tex, level)) { uint32_t reset_value; @@ -666,7 +644,6 @@ static void si_fast_clear(struct si_context *sctx, unsigned *buffers, num_clears++; clear_types |= SI_CLEAR_TYPE_DCC; - tex->separate_dcc_dirty = true; si_mark_display_dcc_dirty(sctx, tex); /* DCC fast clear with MSAA should clear CMASK to 0xC. */ diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 6946f3419e5..78fb5d79b13 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -231,15 +231,6 @@ static void si_sampler_view_add_buffer(struct si_context *sctx, struct pipe_reso priority = si_get_sampler_view_priority(&tex->buffer); radeon_add_to_gfx_buffer_list_check_mem(sctx, &tex->buffer, usage, priority, check_mem); - - if (resource->target == PIPE_BUFFER) - return; - - /* Add separate DCC. */ - if (tex->dcc_separate_buffer) { - radeon_add_to_gfx_buffer_list_check_mem(sctx, tex->dcc_separate_buffer, usage, - RADEON_PRIO_SEPARATE_META, check_mem); - } } static void si_sampler_views_begin_new_cs(struct si_context *sctx, struct si_samplers *samplers) @@ -331,8 +322,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture state[6] &= C_008F28_COMPRESSION_EN; if (!(access & SI_IMAGE_ACCESS_DCC_OFF) && vi_dcc_enabled(tex, first_level)) { - meta_va = - (!tex->dcc_separate_buffer ? tex->buffer.gpu_address : 0) + tex->surface.meta_offset; + meta_va = tex->buffer.gpu_address + tex->surface.meta_offset; if (sscreen->info.chip_class == GFX8) { meta_va += tex->surface.u.legacy.color.dcc_level[base_level].dcc_offset; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 3556c8a074d..ac57c8d13d1 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -284,17 +284,6 @@ static void si_destroy_context(struct pipe_context *context) if (sctx->blitter) util_blitter_destroy(sctx->blitter); - /* Release DCC stats. */ - for (int i = 0; i < ARRAY_SIZE(sctx->dcc_stats); i++) { - assert(!sctx->dcc_stats[i].query_active); - - for (int j = 0; j < ARRAY_SIZE(sctx->dcc_stats[i].ps_stats); j++) - if (sctx->dcc_stats[i].ps_stats[j]) - sctx->b.destroy_query(&sctx->b, sctx->dcc_stats[i].ps_stats[j]); - - si_texture_reference(&sctx->dcc_stats[i].tex, NULL); - } - if (sctx->query_result_shader) sctx->b.delete_compute_state(&sctx->b, sctx->query_result_shader); if (sctx->sh_query_result_shader) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 60a26230986..da316134201 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -385,37 +385,14 @@ struct si_texture { /* We need to track DCC dirtiness, because st/dri usually calls * flush_resource twice per frame (not a bug) and we don't wanna - * decompress DCC twice. Also, the dirty tracking must be done even - * if DCC isn't used, because it's required by the DCC usage analysis - * for a possible future enablement. + * decompress DCC twice. */ - bool separate_dcc_dirty : 1; bool displayable_dcc_dirty : 1; - /* Statistics gathering for the DCC enablement heuristic. */ - bool dcc_gather_statistics : 1; /* Counter that should be non-zero if the texture is bound to a * framebuffer. */ unsigned framebuffers_bound; - /* Whether the texture is a displayable back buffer and needs DCC - * decompression, which is expensive. Therefore, it's enabled only - * if statistics suggest that it will pay off and it's allocated - * separately. It can't be bound as a sampler by apps. Limited to - * target == 2D and last_level == 0. If enabled, dcc_offset contains - * the absolute GPUVM address, not the relative one. - */ - struct si_resource *dcc_separate_buffer; - /* When DCC is temporarily disabled, the separate buffer is here. */ - struct si_resource *last_dcc_separate_buffer; - /* Estimate of how much this color buffer is written to in units of - * full-screen draws: ps_invocations / (width * height) - * Shader kills, late Z, and blending with trivial discards make it - * inaccurate (we need to count CB updates, not PS invocations). - */ - unsigned ps_draw_ratio; - /* The number of clears since the last DCC usage analysis. */ - unsigned num_slow_clears; }; struct si_surface { @@ -1281,25 +1258,6 @@ struct si_context { bool force_cb_shader_coherent; - /* Statistics gathering for the DCC enablement heuristic. It can't be - * in si_texture because si_texture can be shared by multiple - * contexts. This is for back buffers only. We shouldn't get too many - * of those. - * - * X11 DRI3 rotates among a finite set of back buffers. They should - * all fit in this array. If they don't, separate DCC might never be - * enabled by DCC stat gathering. - */ - struct { - struct si_texture *tex; - /* Query queue: 0 = usually active, 1 = waiting, 2 = readback. */ - struct pipe_query *ps_stats[3]; - /* If all slots are used and another slot is needed, - * the least recently used slot is evicted based on this. */ - int64_t last_use_timestamp; - bool query_active; - } dcc_stats[5]; - struct si_tracked_regs tracked_regs; /* Resources that need to be flushed, but will not get an explicit @@ -1617,10 +1575,6 @@ struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe, const struct pipe_surface *templ, unsigned width0, unsigned height0, unsigned width, unsigned height); unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap); -void vi_separate_dcc_try_enable(struct si_context *sctx, struct si_texture *tex); -void vi_separate_dcc_start_query(struct si_context *sctx, struct si_texture *tex); -void vi_separate_dcc_stop_query(struct si_context *sctx, struct si_texture *tex); -void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx, struct si_texture *tex); bool si_texture_disable_dcc(struct si_context *sctx, struct si_texture *tex); void si_init_screen_texture_functions(struct si_screen *sscreen); void si_init_context_texture_functions(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index c6ded8ea179..923ed3a25a3 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2583,8 +2583,6 @@ void si_update_fb_dirtiness_after_rendering(struct si_context *sctx) tex->dirty_level_mask |= 1 << surf->u.tex.level; tex->fmask_is_identity = false; } - if (tex->dcc_gather_statistics) - tex->separate_dcc_dirty = true; } } @@ -2656,15 +2654,6 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, si_update_fb_dirtiness_after_rendering(sctx); - for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { - if (!sctx->framebuffer.state.cbufs[i]) - continue; - - tex = (struct si_texture *)sctx->framebuffer.state.cbufs[i]->texture; - if (tex->dcc_gather_statistics) - vi_separate_dcc_stop_query(sctx, tex); - } - /* Disable DCC if the formats are incompatible. */ for (i = 0; i < state->nr_cbufs; i++) { if (!state->cbufs[i]) @@ -2821,12 +2810,6 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, p_atomic_inc(&tex->framebuffers_bound); - if (tex->dcc_gather_statistics) { - /* Dirty tracking must be enabled for DCC usage analysis. */ - sctx->framebuffer.compressed_cb_mask |= 1 << i; - vi_separate_dcc_start_query(sctx, tex); - } - /* Update the minimum but don't keep 0. */ if (!sctx->framebuffer.min_bytes_per_pixel || tex->surface.bpe < sctx->framebuffer.min_bytes_per_pixel) @@ -2967,11 +2950,6 @@ static void si_emit_framebuffer_state(struct si_context *sctx) RADEON_PRIO_SEPARATE_META); } - if (tex->dcc_separate_buffer) - radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->dcc_separate_buffer, - RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC, - RADEON_PRIO_SEPARATE_META); - /* Compute mutable surface parameters. */ cb_color_base = tex->buffer.gpu_address >> 8; cb_color_fmask = 0; @@ -3011,9 +2989,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) if (!is_msaa_resolve_dst) cb_color_info |= S_028C70_DCC_ENABLE(1); - cb_dcc_base = - ((!tex->dcc_separate_buffer ? tex->buffer.gpu_address : 0) + tex->surface.meta_offset) >> - 8; + cb_dcc_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8; unsigned dcc_tile_swizzle = tex->surface.tile_swizzle; dcc_tile_swizzle &= ((1 << tex->surface.meta_alignment_log2) - 1) >> 8; diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index 50621e0ed8c..5ec7ed81ed3 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -347,8 +347,6 @@ static bool si_texture_discard_dcc(struct si_screen *sscreen, struct si_texture if (!si_can_disable_dcc(tex)) return false; - assert(tex->dcc_separate_buffer == NULL); - /* Disable DCC. */ ac_surface_zero_dcc_fields(&tex->surface); @@ -491,11 +489,7 @@ static void si_reallocate_texture_inplace(struct si_context *sctx, struct si_tex tex->can_sample_z = new_tex->can_sample_z; tex->can_sample_s = new_tex->can_sample_s; - tex->separate_dcc_dirty = new_tex->separate_dcc_dirty; tex->displayable_dcc_dirty = new_tex->displayable_dcc_dirty; - tex->dcc_gather_statistics = new_tex->dcc_gather_statistics; - si_resource_reference(&tex->dcc_separate_buffer, new_tex->dcc_separate_buffer); - si_resource_reference(&tex->last_dcc_separate_buffer, new_tex->last_dcc_separate_buffer); if (new_bind_flag == PIPE_BIND_LINEAR) { assert(!tex->surface.meta_offset); @@ -516,7 +510,6 @@ static void si_set_tex_bo_metadata(struct si_screen *sscreen, struct si_texture memset(&md, 0, sizeof(md)); - assert(tex->dcc_separate_buffer == NULL); assert(tex->surface.fmask_size == 0); static const unsigned char swizzle[] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, @@ -802,8 +795,6 @@ static void si_texture_destroy(struct pipe_screen *screen, struct pipe_resource si_resource_reference(&tex->cmask_buffer, NULL); } radeon_bo_reference(((struct si_screen*)screen)->ws, &resource->buf, NULL); - si_resource_reference(&tex->dcc_separate_buffer, NULL); - si_resource_reference(&tex->last_dcc_separate_buffer, NULL); FREE(tex); } @@ -966,12 +957,6 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, /* Applies to GCN. */ tex->last_msaa_resolve_target_micro_mode = tex->surface.micro_tile_mode; - /* Disable separate DCC at the beginning. DRI2 doesn't reuse buffers - * between frames, so the only thing that can enable separate DCC - * with DRI2 is multiple slow clears within a frame. - */ - tex->ps_draw_ratio = 0; - if (!ac_surface_override_offset_stride(&sscreen->info, &tex->surface, tex->buffer.b.b.last_level + 1, offset, pitch_in_bytes / tex->surface.bpe)) @@ -2189,228 +2174,6 @@ unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap) return ~0U; } -/* PIPELINE_STAT-BASED DCC ENABLEMENT FOR DISPLAYABLE SURFACES */ - -static void vi_dcc_clean_up_context_slot(struct si_context *sctx, int slot) -{ - int i; - - if (sctx->dcc_stats[slot].query_active) - vi_separate_dcc_stop_query(sctx, sctx->dcc_stats[slot].tex); - - for (i = 0; i < ARRAY_SIZE(sctx->dcc_stats[slot].ps_stats); i++) - if (sctx->dcc_stats[slot].ps_stats[i]) { - sctx->b.destroy_query(&sctx->b, sctx->dcc_stats[slot].ps_stats[i]); - sctx->dcc_stats[slot].ps_stats[i] = NULL; - } - - si_texture_reference(&sctx->dcc_stats[slot].tex, NULL); -} - -/** - * Return the per-context slot where DCC statistics queries for the texture live. - */ -static unsigned vi_get_context_dcc_stats_index(struct si_context *sctx, struct si_texture *tex) -{ - int i, empty_slot = -1; - - /* Remove zombie textures (textures kept alive by this array only). */ - for (i = 0; i < ARRAY_SIZE(sctx->dcc_stats); i++) - if (sctx->dcc_stats[i].tex && sctx->dcc_stats[i].tex->buffer.b.b.reference.count == 1) - vi_dcc_clean_up_context_slot(sctx, i); - - /* Find the texture. */ - for (i = 0; i < ARRAY_SIZE(sctx->dcc_stats); i++) { - /* Return if found. */ - if (sctx->dcc_stats[i].tex == tex) { - sctx->dcc_stats[i].last_use_timestamp = os_time_get(); - return i; - } - - /* Record the first seen empty slot. */ - if (empty_slot == -1 && !sctx->dcc_stats[i].tex) - empty_slot = i; - } - - /* Not found. Remove the oldest member to make space in the array. */ - if (empty_slot == -1) { - int oldest_slot = 0; - - /* Find the oldest slot. */ - for (i = 1; i < ARRAY_SIZE(sctx->dcc_stats); i++) - if (sctx->dcc_stats[oldest_slot].last_use_timestamp > - sctx->dcc_stats[i].last_use_timestamp) - oldest_slot = i; - - /* Clean up the oldest slot. */ - vi_dcc_clean_up_context_slot(sctx, oldest_slot); - empty_slot = oldest_slot; - } - - /* Add the texture to the new slot. */ - si_texture_reference(&sctx->dcc_stats[empty_slot].tex, tex); - sctx->dcc_stats[empty_slot].last_use_timestamp = os_time_get(); - return empty_slot; -} - -static struct pipe_query *vi_create_resuming_pipestats_query(struct si_context *sctx) -{ - struct si_query_hw *query = - (struct si_query_hw *)sctx->b.create_query(&sctx->b, PIPE_QUERY_PIPELINE_STATISTICS, 0); - - query->flags |= SI_QUERY_HW_FLAG_BEGIN_RESUMES; - return (struct pipe_query *)query; -} - -/** - * Called when binding a color buffer. - */ -void vi_separate_dcc_start_query(struct si_context *sctx, struct si_texture *tex) -{ - unsigned i = vi_get_context_dcc_stats_index(sctx, tex); - - assert(!sctx->dcc_stats[i].query_active); - - if (!sctx->dcc_stats[i].ps_stats[0]) - sctx->dcc_stats[i].ps_stats[0] = vi_create_resuming_pipestats_query(sctx); - - /* begin or resume the query */ - sctx->b.begin_query(&sctx->b, sctx->dcc_stats[i].ps_stats[0]); - sctx->dcc_stats[i].query_active = true; -} - -/** - * Called when unbinding a color buffer. - */ -void vi_separate_dcc_stop_query(struct si_context *sctx, struct si_texture *tex) -{ - unsigned i = vi_get_context_dcc_stats_index(sctx, tex); - - assert(sctx->dcc_stats[i].query_active); - assert(sctx->dcc_stats[i].ps_stats[0]); - - /* pause or end the query */ - sctx->b.end_query(&sctx->b, sctx->dcc_stats[i].ps_stats[0]); - sctx->dcc_stats[i].query_active = false; -} - -static bool vi_should_enable_separate_dcc(struct si_texture *tex) -{ - /* The minimum number of fullscreen draws per frame that is required - * to enable DCC. */ - return tex->ps_draw_ratio + tex->num_slow_clears >= 5; -} - -/* Called by fast clear. */ -void vi_separate_dcc_try_enable(struct si_context *sctx, struct si_texture *tex) -{ - /* The intent is to use this with shared displayable back buffers, - * but it's not strictly limited only to them. - */ - if (!tex->buffer.b.is_shared || - !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) || - tex->buffer.b.b.target != PIPE_TEXTURE_2D || tex->buffer.b.b.last_level > 0 || - !tex->surface.meta_size || sctx->screen->debug_flags & DBG(NO_DCC) || - sctx->screen->debug_flags & DBG(NO_DCC_FB)) - return; - - assert(sctx->chip_class >= GFX8); - assert(!tex->is_depth); - - if (tex->surface.meta_offset) - return; /* already enabled */ - - /* Enable the DCC stat gathering. */ - if (!tex->dcc_gather_statistics) { - tex->dcc_gather_statistics = true; - vi_separate_dcc_start_query(sctx, tex); - } - - if (!vi_should_enable_separate_dcc(tex)) - return; /* stats show that DCC decompression is too expensive */ - - assert(tex->surface.num_meta_levels); - assert(!tex->dcc_separate_buffer); - - si_texture_discard_cmask(sctx->screen, tex); - - /* Get a DCC buffer. */ - if (tex->last_dcc_separate_buffer) { - assert(tex->dcc_gather_statistics); - assert(!tex->dcc_separate_buffer); - tex->dcc_separate_buffer = tex->last_dcc_separate_buffer; - tex->last_dcc_separate_buffer = NULL; - } else { - tex->dcc_separate_buffer = - si_aligned_buffer_create(sctx->b.screen, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, - tex->surface.meta_size, 1 << tex->surface.meta_alignment_log2); - if (!tex->dcc_separate_buffer) - return; - } - - /* dcc_offset is the absolute GPUVM address. */ - tex->surface.meta_offset = tex->dcc_separate_buffer->gpu_address; - - /* no need to flag anything since this is called by fast clear that - * flags framebuffer state - */ -} - -/** - * Called by pipe_context::flush_resource, the place where DCC decompression - * takes place. - */ -void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx, struct si_texture *tex) -{ - struct si_context *sctx = (struct si_context *)ctx; - struct pipe_query *tmp; - unsigned i = vi_get_context_dcc_stats_index(sctx, tex); - bool query_active = sctx->dcc_stats[i].query_active; - bool disable = false; - - if (sctx->dcc_stats[i].ps_stats[2]) { - union pipe_query_result result; - - /* Read the results. */ - struct pipe_query *query = sctx->dcc_stats[i].ps_stats[2]; - ctx->get_query_result(ctx, query, true, &result); - si_query_buffer_reset(sctx, &((struct si_query_hw *)query)->buffer); - - /* Compute the approximate number of fullscreen draws. */ - tex->ps_draw_ratio = result.pipeline_statistics.ps_invocations / - (tex->buffer.b.b.width0 * tex->buffer.b.b.height0); - sctx->last_tex_ps_draw_ratio = tex->ps_draw_ratio; - - disable = tex->dcc_separate_buffer && !vi_should_enable_separate_dcc(tex); - } - - tex->num_slow_clears = 0; - - /* stop the statistics query for ps_stats[0] */ - if (query_active) - vi_separate_dcc_stop_query(sctx, tex); - - /* Move the queries in the queue by one. */ - tmp = sctx->dcc_stats[i].ps_stats[2]; - sctx->dcc_stats[i].ps_stats[2] = sctx->dcc_stats[i].ps_stats[1]; - sctx->dcc_stats[i].ps_stats[1] = sctx->dcc_stats[i].ps_stats[0]; - sctx->dcc_stats[i].ps_stats[0] = tmp; - - /* create and start a new query as ps_stats[0] */ - if (query_active) - vi_separate_dcc_start_query(sctx, tex); - - if (disable) { - assert(!tex->last_dcc_separate_buffer); - tex->last_dcc_separate_buffer = tex->dcc_separate_buffer; - tex->dcc_separate_buffer = NULL; - tex->surface.meta_offset = 0; - /* no need to flag anything since this is called after - * decompression that re-sets framebuffer state - */ - } -} - static struct pipe_memory_object * si_memobj_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle, bool dedicated) {