mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 00:40:09 +01:00
radeonsi: split xy_clamp_to_edge to separate X and Y flags for the compute blit
to generate less shader code if only one of the axes needs clamping. Use util_is_box_out_of_bounds instead of doing it manually. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28917>
This commit is contained in:
parent
7ee936bf65
commit
d2ce5fc07a
3 changed files with 18 additions and 34 deletions
|
|
@ -721,29 +721,10 @@ void si_init_compute_blit_functions(struct si_context *sctx)
|
|||
sctx->b.clear_buffer = si_pipe_clear_buffer;
|
||||
}
|
||||
|
||||
static bool si_should_blit_clamp_xy(const struct pipe_blit_info *info)
|
||||
static bool si_should_blit_clamp_to_edge(const struct pipe_blit_info *info, unsigned coord_mask)
|
||||
{
|
||||
int src_width = u_minify(info->src.resource->width0, info->src.level);
|
||||
int src_height = u_minify(info->src.resource->height0, info->src.level);
|
||||
struct pipe_box box = info->src.box;
|
||||
|
||||
/* Eliminate negative width/height/depth. */
|
||||
if (box.width < 0) {
|
||||
box.x += box.width;
|
||||
box.width *= -1;
|
||||
}
|
||||
if (box.height < 0) {
|
||||
box.y += box.height;
|
||||
box.height *= -1;
|
||||
}
|
||||
|
||||
bool in_bounds = box.x >= 0 && box.x < src_width &&
|
||||
box.y >= 0 && box.y < src_height &&
|
||||
box.x + box.width > 0 && box.x + box.width <= src_width &&
|
||||
box.y + box.height > 0 && box.y + box.height <= src_height;
|
||||
|
||||
/* Return if the box is not in bounds. */
|
||||
return !in_bounds;
|
||||
return util_is_box_out_of_bounds(&info->src.box, coord_mask, info->src.resource->width0,
|
||||
info->src.resource->height0, info->src.level);
|
||||
}
|
||||
|
||||
bool si_compute_clear_image(struct si_context *sctx, struct pipe_resource *tex,
|
||||
|
|
@ -1140,7 +1121,8 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info,
|
|||
options.sample0_only = sample0_only;
|
||||
unsigned num_samples = MAX2(src_samples, dst_samples);
|
||||
options.log2_samples = sample0_only ? 0 : util_logbase2(num_samples);
|
||||
options.xy_clamp_to_edge = si_should_blit_clamp_xy(info);
|
||||
options.x_clamp_to_edge = si_should_blit_clamp_to_edge(info, BITFIELD_BIT(0));
|
||||
options.y_clamp_to_edge = si_should_blit_clamp_to_edge(info, BITFIELD_BIT(1));
|
||||
options.flip_x = info->src.box.width < 0;
|
||||
options.flip_y = info->src.box.height < 0;
|
||||
options.sint_to_uint = util_format_is_pure_sint(info->src.format) &&
|
||||
|
|
|
|||
|
|
@ -1649,7 +1649,8 @@ union si_compute_blit_shader_key {
|
|||
uint8_t log2_samples:4;
|
||||
bool sample0_only:1; /* src is MSAA, dst is not MSAA, log2_samples is ignored */
|
||||
/* Source coordinate modifiers. */
|
||||
bool xy_clamp_to_edge:1;
|
||||
bool x_clamp_to_edge:1;
|
||||
bool y_clamp_to_edge:1;
|
||||
bool flip_x:1;
|
||||
bool flip_y:1;
|
||||
/* Output modifiers. */
|
||||
|
|
|
|||
|
|
@ -407,24 +407,25 @@ void *si_create_blit_cs(struct si_context *sctx, const union si_compute_blit_sha
|
|||
}
|
||||
|
||||
/* Add box.xyz. */
|
||||
nir_def *coord_src = NULL, *coord_dst = NULL;
|
||||
nir_def *coord_src = NULL, *coord_dst = NULL, *dim = NULL;
|
||||
unpack_2x16_signed(&b, nir_trim_vector(&b, nir_load_user_data_amd(&b), 3),
|
||||
&coord_src, &coord_dst);
|
||||
coord_dst = nir_iadd(&b, coord_dst, dst_xyz);
|
||||
coord_src = nir_iadd(&b, coord_src, src_xyz);
|
||||
|
||||
/* Clamp to edge for src, only X and Y because Z can't be out of bounds. */
|
||||
if (options->xy_clamp_to_edge) {
|
||||
unsigned src_clamp_channels = options->src_is_1d ? 0x1 : 0x3;
|
||||
nir_def *dim = nir_image_deref_size(&b, 4, 32, deref_ssa(&b, img_src), zero);
|
||||
dim = nir_channels(&b, dim, src_clamp_channels);
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
if (i ? options->y_clamp_to_edge : options->x_clamp_to_edge) {
|
||||
assert(!options->src_is_1d || i == 0);
|
||||
|
||||
nir_def *coord_src_clamped = nir_channels(&b, coord_src, src_clamp_channels);
|
||||
coord_src_clamped = nir_imax(&b, coord_src_clamped, nir_imm_int(&b, 0));
|
||||
coord_src_clamped = nir_imin(&b, coord_src_clamped, nir_iadd_imm(&b, dim, -1));
|
||||
if (!dim)
|
||||
dim = nir_image_deref_size(&b, 4, 32, deref_ssa(&b, img_src), zero);
|
||||
|
||||
for (unsigned i = 0; i < util_bitcount(src_clamp_channels); i++)
|
||||
coord_src = nir_vector_insert_imm(&b, coord_src, nir_channel(&b, coord_src_clamped, i), i);
|
||||
nir_def *tmp = nir_channel(&b, coord_src, i);
|
||||
tmp = nir_imax(&b, tmp, nir_imm_int(&b, 0));
|
||||
tmp = nir_imin(&b, tmp, nir_iadd_imm(&b, nir_channel(&b, dim, i), -1));
|
||||
coord_src = nir_vector_insert_imm(&b, coord_src, tmp, i);
|
||||
}
|
||||
}
|
||||
|
||||
/* Swizzle coordinates for 1D_ARRAY. */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue