From a4602395d218c5920ef3b9e61491ad1867f0b429 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 25 Mar 2024 18:46:08 -0400 Subject: [PATCH] radeonsi: switch compute image clears to the compute blit shader The compute blit shader is faster and handles more stuff. This removes the old clear_render_target shader. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_clear.c | 7 +- .../drivers/radeonsi/si_compute_blit.c | 76 ------------------- src/gallium/drivers/radeonsi/si_pipe.c | 4 - src/gallium/drivers/radeonsi/si_pipe.h | 7 -- .../drivers/radeonsi/si_shaderlib_nir.c | 59 -------------- 5 files changed, 2 insertions(+), 151 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index 5cdd3f9e187..9edbed6d3d3 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -1421,12 +1421,9 @@ static void si_clear_render_target(struct pipe_context *ctx, struct pipe_surface render_condition_enabled, true)) return; - if (dst->texture->nr_samples <= 1 && - (sctx->gfx_level >= GFX10 || !vi_dcc_enabled(sdst, dst->u.tex.level))) { - si_compute_clear_render_target(ctx, dst, color, dstx, dsty, width, height, - render_condition_enabled); + if (si_compute_clear_image(sctx, dst->texture, dst->format, dst->u.tex.level, &box, color, + render_condition_enabled, true)) return; - } si_gfx_clear_render_target(ctx, dst, color, dstx, dsty, width, height, render_condition_enabled); diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index 5699ba87de5..92c026af899 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -933,82 +933,6 @@ void si_init_compute_blit_functions(struct si_context *sctx) sctx->b.clear_buffer = si_pipe_clear_buffer; } -/* Clear a region of a color surface to a constant value. */ -void si_compute_clear_render_target(struct pipe_context *ctx, struct pipe_surface *dstsurf, - const union pipe_color_union *color, unsigned dstx, - unsigned dsty, unsigned width, unsigned height, - bool render_condition_enabled) -{ - struct si_context *sctx = (struct si_context *)ctx; - unsigned num_layers = dstsurf->u.tex.last_layer - dstsurf->u.tex.first_layer + 1; - unsigned data[4 + sizeof(color->ui)] = {dstx, dsty, dstsurf->u.tex.first_layer, 0}; - - if (width == 0 || height == 0) - return; - - if (util_format_is_srgb(dstsurf->format)) { - union pipe_color_union color_srgb; - for (int i = 0; i < 3; i++) - color_srgb.f[i] = util_format_linear_to_srgb_float(color->f[i]); - color_srgb.f[3] = color->f[3]; - memcpy(data + 4, color_srgb.ui, sizeof(color->ui)); - } else { - memcpy(data + 4, color->ui, sizeof(color->ui)); - } - - struct pipe_constant_buffer saved_cb = {}; - si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &saved_cb); - - struct pipe_constant_buffer cb = {}; - cb.buffer_size = sizeof(data); - cb.user_buffer = data; - ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, false, &cb); - - struct pipe_image_view image = {0}; - image.resource = dstsurf->texture; - image.shader_access = image.access = PIPE_IMAGE_ACCESS_WRITE; - image.format = util_format_linear(dstsurf->format); - image.u.tex.level = dstsurf->u.tex.level; - image.u.tex.first_layer = 0; /* 3D images ignore first_layer (BASE_ARRAY) */ - image.u.tex.last_layer = dstsurf->u.tex.last_layer; - - struct pipe_grid_info info = {0}; - void *shader; - - if (dstsurf->texture->target != PIPE_TEXTURE_1D_ARRAY) { - if (!sctx->cs_clear_render_target) - sctx->cs_clear_render_target = si_clear_render_target_shader(sctx, PIPE_TEXTURE_2D_ARRAY); - shader = sctx->cs_clear_render_target; - - info.block[0] = 8; - info.last_block[0] = width % 8; - info.block[1] = 8; - info.last_block[1] = height % 8; - info.block[2] = 1; - info.grid[0] = DIV_ROUND_UP(width, 8); - info.grid[1] = DIV_ROUND_UP(height, 8); - info.grid[2] = num_layers; - } else { - if (!sctx->cs_clear_render_target_1d_array) - sctx->cs_clear_render_target_1d_array = si_clear_render_target_shader(sctx, PIPE_TEXTURE_1D_ARRAY); - shader = sctx->cs_clear_render_target_1d_array; - - info.block[0] = 64; - info.last_block[0] = width % 64; - info.block[1] = 1; - info.block[2] = 1; - info.grid[0] = DIV_ROUND_UP(width, 64); - info.grid[1] = num_layers; - info.grid[2] = 1; - } - - si_launch_grid_internal_images(sctx, &image, 1, &info, shader, - SI_OP_SYNC_BEFORE_AFTER | - (render_condition_enabled ? SI_OP_CS_RENDER_COND_ENABLE : 0)); - - ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, true, &saved_cb); -} - static bool si_should_blit_clamp_xy(const struct pipe_blit_info *info) { int src_width = u_minify(info->src.resource->width0, info->src.level); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index aba1b71a7bd..7dad6182cb2 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -280,10 +280,6 @@ static void si_destroy_context(struct pipe_context *context) } } } - if (sctx->cs_clear_render_target) - sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_render_target); - if (sctx->cs_clear_render_target_1d_array) - sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_render_target_1d_array); if (sctx->cs_clear_12bytes_buffer) sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_12bytes_buffer); for (unsigned i = 0; i < ARRAY_SIZE(sctx->cs_dcc_retile); i++) { diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 3b47ab18d9f..b8a72413c8d 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -991,8 +991,6 @@ struct si_context { void *cs_copy_buffer; void *cs_ubyte_to_ushort; void *cs_copy_image[3][2][2]; /* [wg_dim-1][src_is_1d][dst_is_1d] */ - void *cs_clear_render_target; - void *cs_clear_render_target_1d_array; void *cs_clear_12bytes_buffer; void *cs_dcc_retile[32]; void *cs_fmask_expand[3][2]; /* [log2(samples)-1][is_array] */ @@ -1511,10 +1509,6 @@ bool si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u void si_compute_clear_image_dcc_single(struct si_context *sctx, struct si_texture *tex, unsigned level, enum pipe_format format, const union pipe_color_union *color, unsigned flags); -void si_compute_clear_render_target(struct pipe_context *ctx, struct pipe_surface *dstsurf, - const union pipe_color_union *color, unsigned dstx, - unsigned dsty, unsigned width, unsigned height, - bool render_condition_enabled); void si_retile_dcc(struct si_context *sctx, struct si_texture *tex); void gfx9_clear_dcc_msaa(struct si_context *sctx, struct pipe_resource *res, uint32_t clear_value, unsigned flags, enum si_coherency coher); @@ -1682,7 +1676,6 @@ void *si_create_dma_compute_shader(struct si_context *sctx, unsigned num_dwords_ bool is_clear); void *si_create_ubyte_to_ushort_compute_shader(struct si_context *sctx); void *si_create_clear_buffer_rmw_cs(struct si_context *sctx); -void *si_clear_render_target_shader(struct si_context *sctx, enum pipe_texture_target type); void *si_create_fmask_expand_cs(struct si_context *sctx, unsigned num_samples, bool is_array); void *si_create_query_result_cs(struct si_context *sctx); void *gfx11_create_sh_query_result_cs(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c index cb51418b2d5..26dc52c6059 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c @@ -569,65 +569,6 @@ void *si_create_blit_cs(struct si_context *sctx, const union si_compute_blit_sha return create_shader_state(sctx, b.shader); } -void *si_clear_render_target_shader(struct si_context *sctx, enum pipe_texture_target type) -{ - nir_def *address; - enum glsl_sampler_dim sampler_type; - - const nir_shader_compiler_options *options = - sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE); - - nir_builder b = - nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "clear_render_target"); - b.shader->info.num_ubos = 1; - b.shader->info.num_images = 1; - b.shader->num_uniforms = 2; - - switch (type) { - case PIPE_TEXTURE_1D_ARRAY: - b.shader->info.workgroup_size[0] = 64; - b.shader->info.workgroup_size[1] = 1; - b.shader->info.workgroup_size[2] = 1; - sampler_type = GLSL_SAMPLER_DIM_1D; - address = get_global_ids(&b, 2); - break; - case PIPE_TEXTURE_2D_ARRAY: - b.shader->info.workgroup_size[0] = 8; - b.shader->info.workgroup_size[1] = 8; - b.shader->info.workgroup_size[2] = 1; - sampler_type = GLSL_SAMPLER_DIM_2D; - address = get_global_ids(&b, 3); - break; - default: - unreachable("unsupported texture target type"); - } - - const struct glsl_type *img_type = glsl_image_type(sampler_type, true, GLSL_TYPE_FLOAT); - nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "image"); - output_img->data.image.format = PIPE_FORMAT_R32G32B32A32_FLOAT; - - nir_def *zero = nir_imm_int(&b, 0); - nir_def *ubo = nir_load_ubo(&b, 4, 32, zero, zero, .range_base = 0, .range = 16); - - /* TODO: No GL CTS tests for 1D arrays, relying on OpenCL CTS for now. - * As a sanity check, "OpenCL-CTS/test_conformance/images/clFillImage" tests should pass - */ - if (type == PIPE_TEXTURE_1D_ARRAY) { - unsigned swizzle[4] = {0, 2, 0, 0}; - ubo = nir_swizzle(&b, ubo, swizzle, 4); - } - - address = nir_iadd(&b, address, ubo); - nir_def *coord = nir_pad_vector(&b, address, 4); - - nir_def *data = nir_load_ubo(&b, 4, 32, zero, nir_imm_int(&b, 16), .range_base = 16, .range = 16); - - nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, zero, data, zero, - .image_dim = sampler_type, .image_array = true); - - return create_shader_state(sctx, b.shader); -} - /* Store the clear color at the beginning of every 256B block. This is required when we clear DCC * to GFX11_DCC_CLEAR_SINGLE. */