From 5ce74df3f397a7436fd4fd34f2a7b4e1b5f9e408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 12 Jan 2026 07:45:38 -0500 Subject: [PATCH] radeonsi: add faster color clear for gfx12 Compute shader image clears are pretty good on gfx12. Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_clear.c | 55 +++++++++++++++++-------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index d73604dc185..06a033446d7 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -1068,35 +1068,50 @@ static void si_fb_clear_via_compute(struct si_context *sctx, unsigned *buffers, const union pipe_color_union *color) { struct pipe_framebuffer_state *fb = &sctx->framebuffer.state; - unsigned color_buffer_mask = (*buffers & PIPE_CLEAR_COLOR) >> util_logbase2(PIPE_CLEAR_COLOR0); + + /* Don't do anything if we are clearing multiple render targets because we would wait + * unnecesarily between clears. + * + * TODO: Use compute for those too but don't wait between compute clears. Do all compute clears + * in parallel with each other and in parallel with the gfx color/Z/S clear as well. + */ + if (sctx->gfx_level >= GFX12 && util_bitcount(color_buffer_mask) > 1) + return; + while (color_buffer_mask) { unsigned i = u_bit_scan(&color_buffer_mask); - struct pipe_surface *surf = &fb->cbufs[i]; struct si_texture *tex = (struct si_texture *)surf->texture; - - /* If DCC is enable (which can happen with thick tiling on gfx8, don't use compute to get - * compressed clears. - */ - if (vi_dcc_enabled(tex, surf->level)) - continue; - unsigned width = u_minify(tex->buffer.b.b.width0, surf->level); unsigned height = u_minify(tex->buffer.b.b.height0, surf->level); unsigned depth = surf->last_layer - surf->first_layer + 1; + bool compute_clear = false; - /* Clears of thick and linear layouts are fastest with compute. */ - if (tex->surface.thick_tiling || - (tex->surface.is_linear && (height > 1 || depth > 1 || width >= 8192))) { - struct pipe_box box; + if (sctx->gfx_level >= GFX12) { + if (tex->surface.is_linear || tex->surface.thick_tiling || tex->surface.bpe <= 4 || + (tex->surface.bpe == 16 && tex->buffer.b.b.nr_samples <= 2)) + compute_clear = true; + } else { + /* If DCC is enabled (which can happen with thick tiling on gfx8, don't use compute to get + * compressed clears. + */ + if (vi_dcc_enabled(tex, surf->level)) + continue; - u_box_3d(0, 0, surf->first_layer, width, height, depth, &box); - - if (si_compute_clear_image(sctx, &tex->buffer.b.b, surf->format, surf->level, &box, - color, sctx->render_cond_enabled, true)) - *buffers &= ~(PIPE_CLEAR_COLOR0 << i); /* success */ + /* Clears of thick and linear layouts are fastest with compute. */ + if (tex->surface.thick_tiling || + (tex->surface.is_linear && (height > 1 || depth > 1 || width >= 8192))) + compute_clear = true; } + + struct pipe_box box; + u_box_3d(0, 0, surf->first_layer, width, height, depth, &box); + + if (compute_clear && + si_compute_clear_image(sctx, &tex->buffer.b.b, surf->format, surf->level, &box, + color, sctx->render_cond_enabled, true)) + *buffers &= ~(PIPE_CLEAR_COLOR0 << i); /* success */ } } @@ -1237,6 +1252,10 @@ static void gfx12_clear(struct pipe_context *ctx, unsigned buffers, else if (!util_format_has_stencil(util_format_description(fb->zsbuf.format))) buffers &= ~PIPE_CLEAR_STENCIL; + si_fb_clear_via_compute(sctx, &buffers, color); + if (!buffers) + return; /* all buffers have been cleared */ + if (unlikely(sctx->sqtt_enabled)) { if (buffers & PIPE_CLEAR_COLOR) sctx->sqtt_next_event = EventCmdClearColorImage;