From d2ce5fc07ab618cf0737ed3f145ce844925bee09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 4 Apr 2024 04:56:55 -0400 Subject: [PATCH] radeonsi: split xy_clamp_to_edge to separate X and Y flags for the compute blit to generate less shader code if only one of the axes needs clamping. Use util_is_box_out_of_bounds instead of doing it manually. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- .../drivers/radeonsi/si_compute_blit.c | 28 ++++--------------- src/gallium/drivers/radeonsi/si_pipe.h | 3 +- .../drivers/radeonsi/si_shaderlib_nir.c | 21 +++++++------- 3 files changed, 18 insertions(+), 34 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index f3580221031..f0adffb7a9c 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -721,29 +721,10 @@ void si_init_compute_blit_functions(struct si_context *sctx) sctx->b.clear_buffer = si_pipe_clear_buffer; } -static bool si_should_blit_clamp_xy(const struct pipe_blit_info *info) +static bool si_should_blit_clamp_to_edge(const struct pipe_blit_info *info, unsigned coord_mask) { - int src_width = u_minify(info->src.resource->width0, info->src.level); - int src_height = u_minify(info->src.resource->height0, info->src.level); - struct pipe_box box = info->src.box; - - /* Eliminate negative width/height/depth. */ - if (box.width < 0) { - box.x += box.width; - box.width *= -1; - } - if (box.height < 0) { - box.y += box.height; - box.height *= -1; - } - - bool in_bounds = box.x >= 0 && box.x < src_width && - box.y >= 0 && box.y < src_height && - box.x + box.width > 0 && box.x + box.width <= src_width && - box.y + box.height > 0 && box.y + box.height <= src_height; - - /* Return if the box is not in bounds. */ - return !in_bounds; + return util_is_box_out_of_bounds(&info->src.box, coord_mask, info->src.resource->width0, + info->src.resource->height0, info->src.level); } bool si_compute_clear_image(struct si_context *sctx, struct pipe_resource *tex, @@ -1140,7 +1121,8 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info, options.sample0_only = sample0_only; unsigned num_samples = MAX2(src_samples, dst_samples); options.log2_samples = sample0_only ? 0 : util_logbase2(num_samples); - options.xy_clamp_to_edge = si_should_blit_clamp_xy(info); + options.x_clamp_to_edge = si_should_blit_clamp_to_edge(info, BITFIELD_BIT(0)); + options.y_clamp_to_edge = si_should_blit_clamp_to_edge(info, BITFIELD_BIT(1)); options.flip_x = info->src.box.width < 0; options.flip_y = info->src.box.height < 0; options.sint_to_uint = util_format_is_pure_sint(info->src.format) && diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index cb3b04973b0..3468742eb05 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1649,7 +1649,8 @@ union si_compute_blit_shader_key { uint8_t log2_samples:4; bool sample0_only:1; /* src is MSAA, dst is not MSAA, log2_samples is ignored */ /* Source coordinate modifiers. */ - bool xy_clamp_to_edge:1; + bool x_clamp_to_edge:1; + bool y_clamp_to_edge:1; bool flip_x:1; bool flip_y:1; /* Output modifiers. */ diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c index 7e97d057214..0df3ca85199 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c @@ -407,24 +407,25 @@ void *si_create_blit_cs(struct si_context *sctx, const union si_compute_blit_sha } /* Add box.xyz. */ - nir_def *coord_src = NULL, *coord_dst = NULL; + nir_def *coord_src = NULL, *coord_dst = NULL, *dim = NULL; unpack_2x16_signed(&b, nir_trim_vector(&b, nir_load_user_data_amd(&b), 3), &coord_src, &coord_dst); coord_dst = nir_iadd(&b, coord_dst, dst_xyz); coord_src = nir_iadd(&b, coord_src, src_xyz); /* Clamp to edge for src, only X and Y because Z can't be out of bounds. */ - if (options->xy_clamp_to_edge) { - unsigned src_clamp_channels = options->src_is_1d ? 0x1 : 0x3; - nir_def *dim = nir_image_deref_size(&b, 4, 32, deref_ssa(&b, img_src), zero); - dim = nir_channels(&b, dim, src_clamp_channels); + for (unsigned i = 0; i < 2; i++) { + if (i ? options->y_clamp_to_edge : options->x_clamp_to_edge) { + assert(!options->src_is_1d || i == 0); - nir_def *coord_src_clamped = nir_channels(&b, coord_src, src_clamp_channels); - coord_src_clamped = nir_imax(&b, coord_src_clamped, nir_imm_int(&b, 0)); - coord_src_clamped = nir_imin(&b, coord_src_clamped, nir_iadd_imm(&b, dim, -1)); + if (!dim) + dim = nir_image_deref_size(&b, 4, 32, deref_ssa(&b, img_src), zero); - for (unsigned i = 0; i < util_bitcount(src_clamp_channels); i++) - coord_src = nir_vector_insert_imm(&b, coord_src, nir_channel(&b, coord_src_clamped, i), i); + nir_def *tmp = nir_channel(&b, coord_src, i); + tmp = nir_imax(&b, tmp, nir_imm_int(&b, 0)); + tmp = nir_imin(&b, tmp, nir_iadd_imm(&b, nir_channel(&b, dim, i), -1)); + coord_src = nir_vector_insert_imm(&b, coord_src, tmp, i); + } } /* Swizzle coordinates for 1D_ARRAY. */