radeonsi: don't load/resolve/store non-existent src/dst channels in blit shaders

RGBX only loads and resolves 3 components, etc. v2: buf fixes to make AMD_TEST=computeblit pass Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> (v1) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19477>
2026-03-06 07:50:30 +01:00 · 2022-11-03 18:39:00 -04:00 · 2022-11-03 18:39:00 -04:00 · 11993185a2
commit 11993185a2
parent 8956682810
3 changed files with 38 additions and 0 deletions
--- a/src/gallium/drivers/radeonsi/si_compute_blit.c
+++ b/src/gallium/drivers/radeonsi/si_compute_blit.c
@ -1036,6 +1036,22 @@ void si_compute_clear_render_target(struct pipe_context *ctx, struct pipe_surfac
   ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, true, &saved_cb);
 }

+/* Return the last component that a compute blit should load and store. */
+static unsigned si_format_get_last_blit_component(enum pipe_format format, bool is_dst)
+{
+   const struct util_format_description *desc = util_format_description(format);
+   unsigned num = 0;
+
+   for (unsigned i = 1; i < 4; i++) {
+      if (desc->swizzle[i] <= PIPE_SWIZZLE_W ||
+          /* If the swizzle is 1 for dst, we need to store 1 explicitly.
+           * The hardware stores 0 by default. */
+          (is_dst && desc->swizzle[i] == PIPE_SWIZZLE_1))
+         num = i;
+   }
+   return num;
+}
+
 bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info)
 {
   /* Compute blits require D16 right now (see the ISA).
@ -1109,6 +1125,12 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info)
   options.uint_to_sint = util_format_is_pure_uint(info->src.format) &&
                          util_format_is_pure_sint(info->dst.format);
   options.dst_is_srgb = util_format_is_srgb(info->dst.format);
+   options.last_dst_channel = si_format_get_last_blit_component(info->dst.format, true);
+   options.last_src_channel = MIN2(si_format_get_last_blit_component(info->src.format, false),
+                                   options.last_dst_channel);
+   options.use_integer_one = util_format_is_pure_integer(info->dst.format) &&
+                             options.last_src_channel < options.last_dst_channel &&
+                             options.last_dst_channel == 3;
   options.fp16_rtz = !util_format_is_pure_integer(info->dst.format) &&
                      (dst_desc->channel[i].size <= 10 ||
                       (dst_desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT &&
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@ -1579,6 +1579,9 @@ union si_compute_blit_shader_key {
      bool sint_to_uint:1;
      bool uint_to_sint:1;
      bool dst_is_srgb:1;
+      bool use_integer_one:1;
+      uint8_t last_src_channel:2;
+      uint8_t last_dst_channel:2;
      bool fp16_rtz:1; /* only for equality with pixel shaders, not necessary otherwise */
   };
   uint32_t key;
--- a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c
@ -392,6 +392,19 @@ static nir_ssa_def *apply_blit_output_modifiers(nir_builder *b, nir_ssa_def *col
   if (options->dst_is_srgb)
      color = convert_linear_to_srgb(b, color);

+   nir_ssa_def *zero = nir_imm_int(b, 0);
+   nir_ssa_def *one = options->use_integer_one ? nir_imm_int(b, 1) : nir_imm_float(b, 1);
+
+   /* Set channels not present in src to 0 or 1. This will eliminate code loading and resolving
+    * those channels.
+    */
+   for (unsigned chan = options->last_src_channel + 1; chan <= options->last_dst_channel; chan++)
+      color = nir_vector_insert_imm(b, color, chan == 3 ? one : zero, chan);
+
+   /* Discard channels not present in dst. The hardware fills unstored channels with 0. */
+   if (options->last_dst_channel < 3)
+      color = nir_trim_vector(b, color, options->last_dst_channel + 1);
+
   /* Convert to FP16 with rtz to match the pixel shader. Not necessary, but it helps verify
    * the behavior of the whole shader by comparing it to the gfx blit.
    */