diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index ae9b05271e1..cca67ab693c 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -620,7 +620,7 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u si_launch_grid_internal(sctx, &info, sctx->cs_dcc_decompress, flags | SI_OP_CS_IMAGE); } else if (dst->target == PIPE_TEXTURE_1D_ARRAY && src->target == PIPE_TEXTURE_1D_ARRAY) { if (!sctx->cs_copy_image_1d_array) - sctx->cs_copy_image_1d_array = si_create_copy_image_compute_shader_1d_array(ctx); + sctx->cs_copy_image_1d_array = si_create_copy_image_1d_array_cs(ctx); info.block[0] = 64; info.last_block[0] = width % 64; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 6e68c6dc3e4..1c994d6833b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1518,6 +1518,7 @@ void si_resume_queries(struct si_context *sctx); /* si_shaderlib_nir.c */ void *si_create_copy_image_cs(struct pipe_context *ctx); +void *si_create_copy_image_1d_array_cs(struct pipe_context *ctx); void *si_create_dcc_retile_cs(struct si_context *sctx, struct radeon_surf *surf); void *gfx9_create_clear_dcc_msaa_cs(struct si_context *sctx, struct si_texture *tex); @@ -1528,7 +1529,6 @@ void *si_create_fixed_func_tcs(struct si_context *sctx); void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread, bool dst_stream_cache_policy, bool is_copy); void *si_create_clear_buffer_rmw_cs(struct si_context *sctx); -void *si_create_copy_image_compute_shader_1d_array(struct pipe_context *ctx); void *si_create_dcc_decompress_cs(struct pipe_context *ctx); void *si_clear_render_target_shader(struct pipe_context *ctx); void *si_clear_render_target_shader_1d_array(struct pipe_context *ctx); diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c index b0259347d0c..3d8758d456d 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c @@ -53,6 +53,51 @@ static void unpack_2x16(nir_builder *b, nir_ssa_def *src, nir_ssa_def **x, nir_s *y = nir_ushr(b, src, nir_imm_int(b, 16)); } +/* Create a NIR compute shader implementing copy_image for 1D_ARRAY images. + */ +void *si_create_copy_image_1d_array_cs(struct pipe_context *ctx) +{ + struct si_context *sctx = (struct si_context *) ctx; + + const nir_shader_compiler_options *options = + sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE); + + nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "copy_image_1d_array_cs"); + + b.shader->info.workgroup_size[0] = 64; + b.shader->info.workgroup_size[1] = 1; + b.shader->info.workgroup_size[2] = 1; + + b.shader->info.cs.user_data_components_amd = 3; + b.shader->info.num_images = 2; + + nir_ssa_def *coord_src = NULL, *coord_dst = NULL; + unpack_2x16(&b, nir_load_user_data_amd(&b), &coord_src, &coord_dst); + + nir_ssa_def *ids = get_global_ids(&b, 3); + coord_src = nir_channels(&b, nir_iadd(&b, coord_src, ids), /*xz*/ 0x5); + coord_dst = nir_channels(&b, nir_iadd(&b, coord_dst, ids), /*xz*/ 0x5); + + const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_1D, /*is_array*/ true, GLSL_TYPE_FLOAT); + + nir_variable *img_src = nir_variable_create(b.shader, nir_var_image, img_type, "img_src"); + img_src->data.binding = 0; + + nir_variable *img_dst = nir_variable_create(b.shader, nir_var_image, img_type, "img_dst"); + img_dst->data.binding = 1; + + nir_ssa_def *undef32 = nir_ssa_undef(&b, 1, 32); + nir_ssa_def *zero = nir_imm_int(&b, 0); + + nir_ssa_def *data = nir_image_deref_load(&b, /*num_components*/ 4, /*bit_size*/ 32, + &nir_build_deref_var(&b, img_src)->dest.ssa, coord_src, undef32, zero); + + nir_image_deref_store(&b, + &nir_build_deref_var(&b, img_dst)->dest.ssa, coord_dst, undef32, data, zero); + + return create_nir_cs(sctx, &b); +} + /* Create a NIR compute shader implementing copy_image. * * This is the NIR version of the removed si_create_copy_image_compute_shader() [TGSI]. diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c index 90e34fe0b36..045ea934cf4 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c @@ -429,45 +429,6 @@ void *si_create_query_result_cs(struct si_context *sctx) return sctx->b.create_compute_state(&sctx->b, &state); } -void *si_create_copy_image_compute_shader_1d_array(struct pipe_context *ctx) -{ - static const char text[] = - "COMP\n" - "PROPERTY CS_FIXED_BLOCK_WIDTH 64\n" - "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n" - "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" - "PROPERTY CS_USER_DATA_COMPONENTS_AMD 3\n" - "DCL SV[0], THREAD_ID\n" - "DCL SV[1], BLOCK_ID\n" - "DCL SV[2], CS_USER_DATA_AMD\n" - "DCL IMAGE[0], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n" - "DCL IMAGE[1], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n" - "DCL TEMP[0..4], LOCAL\n" - "IMM[0] UINT32 {64, 1, 65535, 16}\n" - - "UMAD TEMP[0].xz, SV[1].xyyy, IMM[0].xyyy, SV[0].xyyy\n" /* threadID.xz */ - "AND TEMP[1].xz, SV[2], IMM[0].zzzz\n" /* src.xz */ - "UADD TEMP[1].xz, TEMP[1], TEMP[0]\n" /* src.xz + threadID.xz */ - "LOAD TEMP[3], IMAGE[0], TEMP[1].xzzz, 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n" - "USHR TEMP[2].xz, SV[2], IMM[0].wwww\n" /* dst.xz */ - "UADD TEMP[2].xz, TEMP[2], TEMP[0]\n" /* dst.xz + threadID.xz */ - "STORE IMAGE[1], TEMP[2].xzzz, TEMP[3], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n" - "END\n"; - - struct tgsi_token tokens[1024]; - struct pipe_compute_state state = {0}; - - if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { - assert(false); - return NULL; - } - - state.ir_type = PIPE_SHADER_IR_TGSI; - state.prog = tokens; - - return ctx->create_compute_state(ctx, &state); -} - /* Create a compute shader implementing DCC decompression via a blit. * This is a trivial copy_image shader except that it has a variable block * size and a barrier.