radeonsi: don't use threadID.yz/blockID.yz for compute_blit if they're always 0

This can improve performance because fewer VGPRs and SGPRs need to be
initialized.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24732>
This commit is contained in:
Marek Olšák 2023-08-06 22:15:42 -04:00 committed by Marge Bot
parent 3952b89ebb
commit f3398683f2
3 changed files with 9 additions and 4 deletions

View file

@ -1121,6 +1121,11 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info,
image[1].u.tex.first_layer = 0;
image[1].u.tex.last_layer = util_max_layer(info->dst.resource, info->dst.level);
struct pipe_grid_info grid = {0};
unsigned wg_dim =
set_work_size(&grid, 8, 8, 1, info->dst.box.width, info->dst.box.height,
info->dst.box.depth);
/* Get the shader key. */
const struct util_format_description *dst_desc = util_format_description(info->dst.format);
unsigned i = util_format_get_first_non_void_channel(info->dst.format);
@ -1128,6 +1133,7 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info,
options.key = 0;
options.always_true = true;
options.wg_dim = wg_dim;
options.src_is_1d = info->src.resource->target == PIPE_TEXTURE_1D ||
info->src.resource->target == PIPE_TEXTURE_1D_ARRAY;
options.dst_is_1d = info->dst.resource->target == PIPE_TEXTURE_1D ||
@ -1177,9 +1183,6 @@ bool si_compute_blit(struct si_context *sctx, const struct pipe_blit_info *info,
sctx->cs_user_data[1] = (info->src.box.y & 0xffff) | ((info->dst.box.y & 0xffff) << 16);
sctx->cs_user_data[2] = (info->src.box.z & 0xffff) | ((info->dst.box.z & 0xffff) << 16);
struct pipe_grid_info grid = {0};
set_work_size(&grid, 8, 8, 1, info->dst.box.width, info->dst.box.height, info->dst.box.depth);
si_launch_grid_internal_images(sctx, image, 2, &grid, shader,
SI_OP_SYNC_BEFORE_AFTER |
(info->render_condition_enable ? SI_OP_CS_RENDER_COND_ENABLE : 0));

View file

@ -1593,6 +1593,7 @@ union si_compute_blit_shader_key {
/* The key saved in _mesa_hash_table_create_u32_keys() can't be 0. */
bool always_true:1;
/* Declaration modifiers. */
uint8_t wg_dim:2; /* 1, 2, or 3 */
bool src_is_1d:1;
bool dst_is_1d:1;
bool src_is_msaa:1;

View file

@ -438,6 +438,7 @@ void *si_create_blit_cs(struct si_context *sctx, const union si_compute_blit_sha
BITSET_SET(b.shader->info.msaa_images, 0);
if (options->dst_is_msaa)
BITSET_SET(b.shader->info.msaa_images, 1);
/* TODO: 1D blits are 8x slower because the workgroup size is 8x8 */
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
b.shader->info.workgroup_size[2] = 1;
@ -462,7 +463,7 @@ void *si_create_blit_cs(struct si_context *sctx, const union si_compute_blit_sha
/* Instructions. */
/* Let's work with 0-based src and dst coordinates (thread IDs) first. */
nir_def *dst_xyz = get_global_ids(&b, 3);
nir_def *dst_xyz = nir_pad_vector_imm_int(&b, get_global_ids(&b, options->wg_dim), 0, 3);
nir_def *src_xyz = dst_xyz;
/* Flip src coordinates. */