radeonsi: enable fast FB clears for conditional rendering

They use compute shaders, which always support the render condition.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28725>
This commit is contained in:
Marek Olšák 2024-04-12 19:13:02 -04:00 committed by Marge Bot
parent 9a47fbecd7
commit eccaba9dfa
5 changed files with 18 additions and 14 deletions

View file

@ -1164,7 +1164,7 @@ bool si_msaa_resolve_blit_via_CB(struct pipe_context *ctx, const struct pipe_bli
if (!vi_dcc_get_clear_info(sctx, dst, info->dst.level, DCC_UNCOMPRESSED, &clear_info))
goto resolve_to_temp;
si_execute_clears(sctx, &clear_info, 1, SI_CLEAR_TYPE_DCC);
si_execute_clears(sctx, &clear_info, 1, SI_CLEAR_TYPE_DCC, info->render_condition_enable);
dst->dirty_level_mask &= ~(1 << info->dst.level);
}

View file

@ -48,7 +48,7 @@ static void si_init_clear_image_dcc_single(struct si_clear_info *info, struct si
}
void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,
unsigned num_clears, unsigned types)
unsigned num_clears, unsigned types, bool render_condition_enable)
{
if (!num_clears)
return;
@ -74,18 +74,21 @@ void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
unsigned flags = SI_OP_SKIP_CACHE_INV_BEFORE |
(render_condition_enable ? SI_OP_CS_RENDER_COND_ENABLE : 0);
/* Execute clears. */
for (unsigned i = 0; i < num_clears; i++) {
if (info[i].format) {
si_compute_clear_image_dcc_single(sctx, (struct si_texture*)info[i].resource,
info[i].level, info[i].format, &info[i].color,
SI_OP_SKIP_CACHE_INV_BEFORE);
flags);
continue;
}
if (info[i].is_dcc_msaa) {
gfx9_clear_dcc_msaa(sctx, info[i].resource, info[i].clear_value,
SI_OP_SKIP_CACHE_INV_BEFORE, SI_COHERENCY_CP);
flags, SI_COHERENCY_CP);
continue;
}
@ -94,12 +97,12 @@ void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,
if (info[i].writemask != 0xffffffff) {
si_compute_clear_buffer_rmw(sctx, info[i].resource, info[i].offset, info[i].size,
info[i].clear_value, info[i].writemask,
SI_OP_SKIP_CACHE_INV_BEFORE, SI_COHERENCY_CP);
flags, SI_COHERENCY_CP);
} else {
/* Compute shaders are much faster on both dGPUs and APUs. Don't use CP DMA. */
si_clear_buffer(sctx, info[i].resource, info[i].offset, info[i].size,
&info[i].clear_value, 4, SI_OP_SKIP_CACHE_INV_BEFORE,
SI_COHERENCY_CP, SI_COMPUTE_CLEAR_METHOD);
&info[i].clear_value, 4, flags, SI_COHERENCY_CP,
SI_COMPUTE_CLEAR_METHOD);
}
}
@ -708,9 +711,6 @@ static void si_fast_clear(struct si_context *sctx, unsigned *buffers,
return;
#endif
if (sctx->render_cond)
return;
/* Gather information about what to clear. */
unsigned color_buffer_mask = (*buffers & PIPE_CLEAR_COLOR) >> util_logbase2(PIPE_CLEAR_COLOR0);
while (color_buffer_mask) {
@ -1145,7 +1145,7 @@ static void si_fast_clear(struct si_context *sctx, unsigned *buffers,
}
}
si_execute_clears(sctx, info, num_clears, clear_types);
si_execute_clears(sctx, info, num_clears, clear_types, sctx->render_cond_enabled);
}
static void si_clear(struct pipe_context *ctx, unsigned buffers,

View file

@ -434,7 +434,8 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
}
/* TODO: use compute for unaligned big sizes */
if (method == SI_AUTO_SELECT_CLEAR_METHOD && (
if (method == SI_AUTO_SELECT_CLEAR_METHOD &&
(flags & SI_OP_CS_RENDER_COND_ENABLE ||
clear_value_size > 4 ||
(clear_value_size == 4 && offset % 4 == 0 && size > compute_min_size))) {
method = SI_COMPUTE_CLEAR_METHOD;
@ -444,6 +445,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
clear_value_size, flags, coher);
} else {
assert(clear_value_size == 4);
assert(!(flags & SI_OP_CS_RENDER_COND_ENABLE));
si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, dst, offset, aligned_size, *clear_value,
flags, coher, get_cache_policy(sctx, coher, size));
}
@ -454,6 +456,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
/* Handle non-dword alignment. */
if (size) {
assert(!(flags & SI_OP_CS_RENDER_COND_ENABLE));
assert(dst);
assert(dst->target == PIPE_BUFFER);
assert(size < 4);

View file

@ -1484,7 +1484,7 @@ void si_init_buffer_clear(struct si_clear_info *info,
struct pipe_resource *resource, uint64_t offset,
uint32_t size, uint32_t clear_value);
void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,
unsigned num_clears, unsigned types);
unsigned num_clears, unsigned types, bool render_condition_enabled);
void si_gfx_clear_render_target(struct pipe_context *ctx, struct pipe_surface *dst,
const union pipe_color_union *color, unsigned dstx,
unsigned dsty, unsigned width, unsigned height,

View file

@ -1194,7 +1194,8 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
/* Execute the clears. */
if (num_clears) {
si_execute_clears(si_get_aux_context(&sscreen->aux_context.general), clears, num_clears, 0);
si_execute_clears(si_get_aux_context(&sscreen->aux_context.general), clears, num_clears, 0,
false);
si_put_aux_context_flush(&sscreen->aux_context.general);
}