radeonsi: split xy_clamp_to_edge to separate X and Y flags for the compute blit

to generate less shader code if only one of the axes needs clamping. Use util_is_box_out_of_bounds instead of doing it manually. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28917>
2026-02-23 07:30:30 +01:00 · 2024-04-04 04:56:55 -04:00 · 2024-04-04 04:56:55 -04:00 · d2ce5fc07a
commit d2ce5fc07a
parent 7ee936bf65
3 changed files with 18 additions and 34 deletions
--- a/src/gallium/drivers/radeonsi/si_compute_blit.c
+++ b/src/gallium/drivers/radeonsi/si_compute_blit.c
@ -721,29 +721,10 @@ void si_init_compute_blit_functions(struct si_context *sctx)
   sctx->b.clear_buffer = si_pipe_clear_buffer;
 }

-static bool si_should_blit_clamp_xy(const struct pipe_blit_info *info)
+static bool si_should_blit_clamp_to_edge(const struct pipe_blit_info *info, unsigned coord_mask)
 {
-   int src_width = u_minify(info->src.resource->width0, info->src.level);
-   int src_height = u_minify(info->src.resource->height0, info->src.level);
-   struct pipe_box box = info->src.box;
-
-   /* Eliminate negative width/height/depth. */
-   if (box.width < 0) {
-      box.x += box.width;
-      box.width *= -1;
-   }
-   if (box.height < 0) {
-      box.y += box.height;
-      box.height *= -1;
-   }
-
-   bool in_bounds = box.x >= 0 && box.x < src_width &&
-                    box.y >= 0 && box.y < src_height &&
-                    box.x + box.width > 0 && box.x + box.width <= src_width &&
-                    box.y + box.height > 0 && box.y + box.height <= src_height;
-
-   /* Return if the box is not in bounds. */
-   return !in_bounds;
+   return util_is_box_out_of_bounds(&info->src.box, coord_mask, info->src.resource->width0,
+                                    info->src.resource->height0, info->src.level);
 }

 bool si_compute_clear_image(struct si_context *sctx, struct pipe_resource *tex,
@ -1140,7 +1121,8 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info,
      options.sample0_only = sample0_only;
      unsigned num_samples = MAX2(src_samples, dst_samples);
      options.log2_samples = sample0_only ? 0 : util_logbase2(num_samples);
-      options.xy_clamp_to_edge = si_should_blit_clamp_xy(info);
+      options.x_clamp_to_edge = si_should_blit_clamp_to_edge(info, BITFIELD_BIT(0));
+      options.y_clamp_to_edge = si_should_blit_clamp_to_edge(info, BITFIELD_BIT(1));
      options.flip_x = info->src.box.width < 0;
      options.flip_y = info->src.box.height < 0;
      options.sint_to_uint = util_format_is_pure_sint(info->src.format) &&
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@ -1649,7 +1649,8 @@ union si_compute_blit_shader_key {
      uint8_t log2_samples:4;
      bool sample0_only:1; /* src is MSAA, dst is not MSAA, log2_samples is ignored */
      /* Source coordinate modifiers. */
-      bool xy_clamp_to_edge:1;
+      bool x_clamp_to_edge:1;
+      bool y_clamp_to_edge:1;
      bool flip_x:1;
      bool flip_y:1;
      /* Output modifiers. */
--- a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c
@ -407,24 +407,25 @@ void *si_create_blit_cs(struct si_context *sctx, const union si_compute_blit_sha
   }

   /* Add box.xyz. */
-   nir_def *coord_src = NULL, *coord_dst = NULL;
+   nir_def *coord_src = NULL, *coord_dst = NULL, *dim = NULL;
   unpack_2x16_signed(&b, nir_trim_vector(&b, nir_load_user_data_amd(&b), 3),
                      &coord_src, &coord_dst);
   coord_dst = nir_iadd(&b, coord_dst, dst_xyz);
   coord_src = nir_iadd(&b, coord_src, src_xyz);

   /* Clamp to edge for src, only X and Y because Z can't be out of bounds. */
-   if (options->xy_clamp_to_edge) {
-      unsigned src_clamp_channels = options->src_is_1d ? 0x1 : 0x3;
-      nir_def *dim = nir_image_deref_size(&b, 4, 32, deref_ssa(&b, img_src), zero);
-      dim = nir_channels(&b, dim, src_clamp_channels);
+   for (unsigned i = 0; i < 2; i++) {
+      if (i ? options->y_clamp_to_edge : options->x_clamp_to_edge) {
+         assert(!options->src_is_1d || i == 0);

-      nir_def *coord_src_clamped = nir_channels(&b, coord_src, src_clamp_channels);
-      coord_src_clamped = nir_imax(&b, coord_src_clamped, nir_imm_int(&b, 0));
-      coord_src_clamped = nir_imin(&b, coord_src_clamped, nir_iadd_imm(&b, dim, -1));
+         if (!dim)
+            dim = nir_image_deref_size(&b, 4, 32, deref_ssa(&b, img_src), zero);

-      for (unsigned i = 0; i < util_bitcount(src_clamp_channels); i++)
-         coord_src = nir_vector_insert_imm(&b, coord_src, nir_channel(&b, coord_src_clamped, i), i);
+         nir_def *tmp = nir_channel(&b, coord_src, i);
+         tmp = nir_imax(&b, tmp, nir_imm_int(&b, 0));
+         tmp = nir_imin(&b, tmp, nir_iadd_imm(&b, nir_channel(&b, dim, i), -1));
+         coord_src = nir_vector_insert_imm(&b, coord_src, tmp, i);
+      }
   }

   /* Swizzle coordinates for 1D_ARRAY. */