mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 22:38:05 +02:00
radeonsi: implement compute_copy_image between 1D_ARRAY and other texture types
And set more optimal compute block sizes. The compute copy is required to preserve NaNs, so this fixes a lot of AMD_TEST=copyimage cases. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16215>
This commit is contained in:
parent
01d994f5e6
commit
8c0669fe3f
5 changed files with 50 additions and 39 deletions
|
|
@ -954,9 +954,7 @@ void si_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst
|
|||
if (si_can_use_compute_blit(sctx, dst->format, dst->nr_samples, true,
|
||||
vi_dcc_enabled(sdst, dst_level)) &&
|
||||
si_can_use_compute_blit(sctx, src->format, src->nr_samples, false,
|
||||
vi_dcc_enabled(ssrc, src_level)) &&
|
||||
!(dst->target != src->target &&
|
||||
(src->target == PIPE_TEXTURE_1D_ARRAY || dst->target == PIPE_TEXTURE_1D_ARRAY))) {
|
||||
vi_dcc_enabled(ssrc, src_level))) {
|
||||
si_compute_copy_image(sctx, dst, dst_level, src, src_level, dstx, dsty, dstz,
|
||||
src_box, false, SI_OP_SYNC_BEFORE_AFTER);
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -504,7 +504,6 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
|
|||
enum pipe_format src_format = util_format_linear(src->format);
|
||||
enum pipe_format dst_format = util_format_linear(dst->format);
|
||||
bool is_linear = ssrc->surface.is_linear || sdst->surface.is_linear;
|
||||
bool is_1D = dst->target == PIPE_TEXTURE_1D_ARRAY && src->target == PIPE_TEXTURE_1D_ARRAY;
|
||||
|
||||
assert(util_format_is_subsampled_422(src_format) == util_format_is_subsampled_422(dst_format));
|
||||
|
||||
|
|
@ -624,31 +623,37 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
|
|||
|
||||
si_launch_grid_internal(sctx, &info, sctx->cs_dcc_decompress, flags | SI_OP_CS_IMAGE);
|
||||
} else {
|
||||
sctx->cs_user_data[0] = src_box->x | (dstx << 16);
|
||||
|
||||
int block_x = is_1D || is_linear ? 64 : 8;
|
||||
int block_y = is_1D || is_linear ? 1 : 8;
|
||||
bool dst_is_1d = dst->target == PIPE_TEXTURE_1D ||
|
||||
dst->target == PIPE_TEXTURE_1D_ARRAY;
|
||||
bool src_is_1d = src->target == PIPE_TEXTURE_1D ||
|
||||
src->target == PIPE_TEXTURE_1D_ARRAY;
|
||||
int block_x, block_y;
|
||||
int block_z = 1;
|
||||
|
||||
if (is_1D) {
|
||||
assert(height == 1); /* height is not used for 1D images */
|
||||
assert(src_box->y == 0 && dsty == 0);
|
||||
|
||||
sctx->cs_user_data[1] = src_box->z | (dstz << 16);
|
||||
|
||||
/* We pass array index in 'y' for 1D images. */
|
||||
height = depth;
|
||||
depth = 1;
|
||||
/* Choose the block dimensions based on the copy area size. */
|
||||
if (src_box->height <= 4) {
|
||||
block_y = util_next_power_of_two(src_box->height);
|
||||
block_x = 64 / block_y;
|
||||
} else if (src_box->width <= 4) {
|
||||
block_x = util_next_power_of_two(src_box->width);
|
||||
block_y = 64 / block_x;
|
||||
} else if (is_linear) {
|
||||
block_x = 64;
|
||||
block_y = 1;
|
||||
} else {
|
||||
sctx->cs_user_data[1] = src_box->y | (dsty << 16);
|
||||
sctx->cs_user_data[2] = src_box->z | (dstz << 16);
|
||||
block_x = 8;
|
||||
block_y = 8;
|
||||
}
|
||||
|
||||
sctx->cs_user_data[0] = src_box->x | (dstx << 16);
|
||||
sctx->cs_user_data[1] = src_box->y | (dsty << 16);
|
||||
sctx->cs_user_data[2] = src_box->z | (dstz << 16);
|
||||
|
||||
set_work_size(&info, block_x, block_y, block_z, width, height, depth);
|
||||
|
||||
void **copy_image_cs_ptr = is_1D ? &sctx->cs_copy_image_1D : &sctx->cs_copy_image_2D;
|
||||
void **copy_image_cs_ptr = &sctx->cs_copy_image[src_is_1d][dst_is_1d];
|
||||
if (!*copy_image_cs_ptr)
|
||||
*copy_image_cs_ptr = si_create_copy_image_cs(sctx, is_1D);
|
||||
*copy_image_cs_ptr = si_create_copy_image_cs(sctx, src_is_1d, dst_is_1d);
|
||||
|
||||
assert(*copy_image_cs_ptr);
|
||||
|
||||
|
|
|
|||
|
|
@ -258,10 +258,12 @@ static void si_destroy_context(struct pipe_context *context)
|
|||
sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_buffer_rmw);
|
||||
if (sctx->cs_copy_buffer)
|
||||
sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_buffer);
|
||||
if (sctx->cs_copy_image_1D)
|
||||
sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image_1D);
|
||||
if (sctx->cs_copy_image_2D)
|
||||
sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image_2D);
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(sctx->cs_copy_image); i++) {
|
||||
for (unsigned j = 0; j < ARRAY_SIZE(sctx->cs_copy_image[i]); j++) {
|
||||
if (sctx->cs_copy_image[i][j])
|
||||
sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image[i][j]);
|
||||
}
|
||||
}
|
||||
if (sctx->cs_clear_render_target)
|
||||
sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_render_target);
|
||||
if (sctx->cs_clear_render_target_1d_array)
|
||||
|
|
|
|||
|
|
@ -965,8 +965,7 @@ struct si_context {
|
|||
void *cs_clear_buffer;
|
||||
void *cs_clear_buffer_rmw;
|
||||
void *cs_copy_buffer;
|
||||
void *cs_copy_image_1D;
|
||||
void *cs_copy_image_2D;
|
||||
void *cs_copy_image[2][2]; /* [src_is_1d][dst_is_1d] */
|
||||
void *cs_clear_render_target;
|
||||
void *cs_clear_render_target_1d_array;
|
||||
void *cs_clear_12bytes_buffer;
|
||||
|
|
@ -1519,7 +1518,7 @@ void si_suspend_queries(struct si_context *sctx);
|
|||
void si_resume_queries(struct si_context *sctx);
|
||||
|
||||
/* si_shaderlib_nir.c */
|
||||
void *si_create_copy_image_cs(struct si_context *sctx, bool is_1D);
|
||||
void *si_create_copy_image_cs(struct si_context *sctx, bool src_is_1d_array, bool dst_is_1d_array);
|
||||
void *si_create_dcc_retile_cs(struct si_context *sctx, struct radeon_surf *surf);
|
||||
void *gfx9_create_clear_dcc_msaa_cs(struct si_context *sctx, struct si_texture *tex);
|
||||
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ deref_ssa(nir_builder *b, nir_variable *var)
|
|||
* It expects the source and destination (x,y,z) coords as user_data_amd,
|
||||
* packed into 3 SGPRs as 2x16bits per component.
|
||||
*/
|
||||
void *si_create_copy_image_cs(struct si_context *sctx, bool is_1D)
|
||||
void *si_create_copy_image_cs(struct si_context *sctx, bool src_is_1d_array, bool dst_is_1d_array)
|
||||
{
|
||||
const nir_shader_compiler_options *options =
|
||||
sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE);
|
||||
|
|
@ -78,12 +78,8 @@ void *si_create_copy_image_cs(struct si_context *sctx, bool is_1D)
|
|||
*/
|
||||
b.shader->info.workgroup_size_variable = true;
|
||||
|
||||
/* 1D uses 'x' as image coord, and 'y' as array index.
|
||||
* 2D uses 'x'&'y' as image coords, and 'z' as array index.
|
||||
*/
|
||||
int n_components = is_1D ? 2 : 3;
|
||||
b.shader->info.cs.user_data_components_amd = n_components;
|
||||
nir_ssa_def *ids = get_global_ids(&b, n_components);
|
||||
b.shader->info.cs.user_data_components_amd = 3;
|
||||
nir_ssa_def *ids = get_global_ids(&b, 3);
|
||||
|
||||
nir_ssa_def *coord_src = NULL, *coord_dst = NULL;
|
||||
unpack_2x16(&b, nir_load_user_data_amd(&b), &coord_src, &coord_dst);
|
||||
|
|
@ -91,13 +87,24 @@ void *si_create_copy_image_cs(struct si_context *sctx, bool is_1D)
|
|||
coord_src = nir_iadd(&b, coord_src, ids);
|
||||
coord_dst = nir_iadd(&b, coord_dst, ids);
|
||||
|
||||
const struct glsl_type *img_type = glsl_image_type(is_1D ? GLSL_SAMPLER_DIM_1D : GLSL_SAMPLER_DIM_2D,
|
||||
/*is_array*/ true, GLSL_TYPE_FLOAT);
|
||||
static unsigned swizzle_xz[] = {0, 2, 0, 0};
|
||||
|
||||
nir_variable *img_src = nir_variable_create(b.shader, nir_var_image, img_type, "img_src");
|
||||
if (src_is_1d_array)
|
||||
coord_src = nir_swizzle(&b, coord_src, swizzle_xz, 4);
|
||||
if (dst_is_1d_array)
|
||||
coord_dst = nir_swizzle(&b, coord_dst, swizzle_xz, 4);
|
||||
|
||||
const struct glsl_type *src_img_type = glsl_image_type(src_is_1d_array ? GLSL_SAMPLER_DIM_1D
|
||||
: GLSL_SAMPLER_DIM_2D,
|
||||
/*is_array*/ true, GLSL_TYPE_FLOAT);
|
||||
const struct glsl_type *dst_img_type = glsl_image_type(dst_is_1d_array ? GLSL_SAMPLER_DIM_1D
|
||||
: GLSL_SAMPLER_DIM_2D,
|
||||
/*is_array*/ true, GLSL_TYPE_FLOAT);
|
||||
|
||||
nir_variable *img_src = nir_variable_create(b.shader, nir_var_image, src_img_type, "img_src");
|
||||
img_src->data.binding = 0;
|
||||
|
||||
nir_variable *img_dst = nir_variable_create(b.shader, nir_var_image, img_type, "img_dst");
|
||||
nir_variable *img_dst = nir_variable_create(b.shader, nir_var_image, dst_img_type, "img_dst");
|
||||
img_dst->data.binding = 1;
|
||||
|
||||
nir_ssa_def *undef32 = nir_ssa_undef(&b, 1, 32);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue