radeonsi: split xy_clamp_to_edge to separate X and Y flags for the compute blit

to generate less shader code if only one of the axes needs clamping.

Use util_is_box_out_of_bounds instead of doing it manually.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28917>
This commit is contained in:
Marek Olšák 2024-04-04 04:56:55 -04:00 committed by Marge Bot
parent 7ee936bf65
commit d2ce5fc07a
3 changed files with 18 additions and 34 deletions

View file

@ -721,29 +721,10 @@ void si_init_compute_blit_functions(struct si_context *sctx)
sctx->b.clear_buffer = si_pipe_clear_buffer;
}
static bool si_should_blit_clamp_xy(const struct pipe_blit_info *info)
static bool si_should_blit_clamp_to_edge(const struct pipe_blit_info *info, unsigned coord_mask)
{
int src_width = u_minify(info->src.resource->width0, info->src.level);
int src_height = u_minify(info->src.resource->height0, info->src.level);
struct pipe_box box = info->src.box;
/* Eliminate negative width/height/depth. */
if (box.width < 0) {
box.x += box.width;
box.width *= -1;
}
if (box.height < 0) {
box.y += box.height;
box.height *= -1;
}
bool in_bounds = box.x >= 0 && box.x < src_width &&
box.y >= 0 && box.y < src_height &&
box.x + box.width > 0 && box.x + box.width <= src_width &&
box.y + box.height > 0 && box.y + box.height <= src_height;
/* Return if the box is not in bounds. */
return !in_bounds;
return util_is_box_out_of_bounds(&info->src.box, coord_mask, info->src.resource->width0,
info->src.resource->height0, info->src.level);
}
bool si_compute_clear_image(struct si_context *sctx, struct pipe_resource *tex,
@ -1140,7 +1121,8 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info,
options.sample0_only = sample0_only;
unsigned num_samples = MAX2(src_samples, dst_samples);
options.log2_samples = sample0_only ? 0 : util_logbase2(num_samples);
options.xy_clamp_to_edge = si_should_blit_clamp_xy(info);
options.x_clamp_to_edge = si_should_blit_clamp_to_edge(info, BITFIELD_BIT(0));
options.y_clamp_to_edge = si_should_blit_clamp_to_edge(info, BITFIELD_BIT(1));
options.flip_x = info->src.box.width < 0;
options.flip_y = info->src.box.height < 0;
options.sint_to_uint = util_format_is_pure_sint(info->src.format) &&

View file

@ -1649,7 +1649,8 @@ union si_compute_blit_shader_key {
uint8_t log2_samples:4;
bool sample0_only:1; /* src is MSAA, dst is not MSAA, log2_samples is ignored */
/* Source coordinate modifiers. */
bool xy_clamp_to_edge:1;
bool x_clamp_to_edge:1;
bool y_clamp_to_edge:1;
bool flip_x:1;
bool flip_y:1;
/* Output modifiers. */

View file

@ -407,24 +407,25 @@ void *si_create_blit_cs(struct si_context *sctx, const union si_compute_blit_sha
}
/* Add box.xyz. */
nir_def *coord_src = NULL, *coord_dst = NULL;
nir_def *coord_src = NULL, *coord_dst = NULL, *dim = NULL;
unpack_2x16_signed(&b, nir_trim_vector(&b, nir_load_user_data_amd(&b), 3),
&coord_src, &coord_dst);
coord_dst = nir_iadd(&b, coord_dst, dst_xyz);
coord_src = nir_iadd(&b, coord_src, src_xyz);
/* Clamp to edge for src, only X and Y because Z can't be out of bounds. */
if (options->xy_clamp_to_edge) {
unsigned src_clamp_channels = options->src_is_1d ? 0x1 : 0x3;
nir_def *dim = nir_image_deref_size(&b, 4, 32, deref_ssa(&b, img_src), zero);
dim = nir_channels(&b, dim, src_clamp_channels);
for (unsigned i = 0; i < 2; i++) {
if (i ? options->y_clamp_to_edge : options->x_clamp_to_edge) {
assert(!options->src_is_1d || i == 0);
nir_def *coord_src_clamped = nir_channels(&b, coord_src, src_clamp_channels);
coord_src_clamped = nir_imax(&b, coord_src_clamped, nir_imm_int(&b, 0));
coord_src_clamped = nir_imin(&b, coord_src_clamped, nir_iadd_imm(&b, dim, -1));
if (!dim)
dim = nir_image_deref_size(&b, 4, 32, deref_ssa(&b, img_src), zero);
for (unsigned i = 0; i < util_bitcount(src_clamp_channels); i++)
coord_src = nir_vector_insert_imm(&b, coord_src, nir_channel(&b, coord_src_clamped, i), i);
nir_def *tmp = nir_channel(&b, coord_src, i);
tmp = nir_imax(&b, tmp, nir_imm_int(&b, 0));
tmp = nir_imin(&b, tmp, nir_iadd_imm(&b, nir_channel(&b, dim, i), -1));
coord_src = nir_vector_insert_imm(&b, coord_src, tmp, i);
}
}
/* Swizzle coordinates for 1D_ARRAY. */