radeonsi: don't load/resolve/store non-existent src/dst channels in blit shaders

RGBX only loads and resolves 3 components, etc.

v2: buf fixes to make AMD_TEST=computeblit pass

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> (v1)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19477>
This commit is contained in:
Marek Olšák 2022-11-03 18:39:00 -04:00
parent 8956682810
commit 11993185a2
3 changed files with 38 additions and 0 deletions

View file

@ -1036,6 +1036,22 @@ void si_compute_clear_render_target(struct pipe_context *ctx, struct pipe_surfac
ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, true, &saved_cb);
}
/* Return the last component that a compute blit should load and store. */
static unsigned si_format_get_last_blit_component(enum pipe_format format, bool is_dst)
{
const struct util_format_description *desc = util_format_description(format);
unsigned num = 0;
for (unsigned i = 1; i < 4; i++) {
if (desc->swizzle[i] <= PIPE_SWIZZLE_W ||
/* If the swizzle is 1 for dst, we need to store 1 explicitly.
* The hardware stores 0 by default. */
(is_dst && desc->swizzle[i] == PIPE_SWIZZLE_1))
num = i;
}
return num;
}
bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info)
{
/* Compute blits require D16 right now (see the ISA).
@ -1109,6 +1125,12 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info)
options.uint_to_sint = util_format_is_pure_uint(info->src.format) &&
util_format_is_pure_sint(info->dst.format);
options.dst_is_srgb = util_format_is_srgb(info->dst.format);
options.last_dst_channel = si_format_get_last_blit_component(info->dst.format, true);
options.last_src_channel = MIN2(si_format_get_last_blit_component(info->src.format, false),
options.last_dst_channel);
options.use_integer_one = util_format_is_pure_integer(info->dst.format) &&
options.last_src_channel < options.last_dst_channel &&
options.last_dst_channel == 3;
options.fp16_rtz = !util_format_is_pure_integer(info->dst.format) &&
(dst_desc->channel[i].size <= 10 ||
(dst_desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT &&

View file

@ -1579,6 +1579,9 @@ union si_compute_blit_shader_key {
bool sint_to_uint:1;
bool uint_to_sint:1;
bool dst_is_srgb:1;
bool use_integer_one:1;
uint8_t last_src_channel:2;
uint8_t last_dst_channel:2;
bool fp16_rtz:1; /* only for equality with pixel shaders, not necessary otherwise */
};
uint32_t key;

View file

@ -392,6 +392,19 @@ static nir_ssa_def *apply_blit_output_modifiers(nir_builder *b, nir_ssa_def *col
if (options->dst_is_srgb)
color = convert_linear_to_srgb(b, color);
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_ssa_def *one = options->use_integer_one ? nir_imm_int(b, 1) : nir_imm_float(b, 1);
/* Set channels not present in src to 0 or 1. This will eliminate code loading and resolving
* those channels.
*/
for (unsigned chan = options->last_src_channel + 1; chan <= options->last_dst_channel; chan++)
color = nir_vector_insert_imm(b, color, chan == 3 ? one : zero, chan);
/* Discard channels not present in dst. The hardware fills unstored channels with 0. */
if (options->last_dst_channel < 3)
color = nir_trim_vector(b, color, options->last_dst_channel + 1);
/* Convert to FP16 with rtz to match the pixel shader. Not necessary, but it helps verify
* the behavior of the whole shader by comparing it to the gfx blit.
*/