diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 00e6a98ff76..b2e552c3a31 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -132,8 +132,11 @@ static void si_create_compute_state_async(void *job, int thread_index) program->shader.is_monolithic = true; + /* Variable block sizes need 10 bits (1 + log2(SI_MAX_VARIABLE_THREADS_PER_BLOCK)) per dim. + * We pack them into a single user SGPR. + */ unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS + (sel->info.uses_grid_size ? 3 : 0) + - (sel->info.uses_variable_block_size ? 3 : 0) + + (sel->info.uses_variable_block_size ? 1 : 0) + sel->info.base.cs.user_data_components_amd; /* Fast path for compute shaders - some descriptors passed via user SGPRs. */ @@ -707,7 +710,7 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr unsigned block_size_reg = grid_size_reg + /* 12 bytes = 3 dwords. */ 12 * sel->info.uses_grid_size; - unsigned cs_user_data_reg = block_size_reg + 12 * program->sel.info.uses_variable_block_size; + unsigned cs_user_data_reg = block_size_reg + 4 * program->sel.info.uses_variable_block_size; radeon_begin(cs); @@ -730,10 +733,8 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr } if (sel->info.uses_variable_block_size) { - radeon_set_sh_reg_seq(cs, block_size_reg, 3); - radeon_emit(cs, info->block[0]); - radeon_emit(cs, info->block[1]); - radeon_emit(cs, info->block[2]); + radeon_set_sh_reg(cs, block_size_reg, + info->block[0] | (info->block[1] << 10) | (info->block[2] << 20)); } if (sel->info.base.cs.user_data_components_amd) { diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b1691cf6a4d..65c574a60fb 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -702,7 +702,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) if (shader->selector->info.uses_grid_size) ac_add_arg(&ctx->args, AC_ARG_SGPR, 3, AC_ARG_INT, &ctx->args.num_work_groups); if (shader->selector->info.uses_variable_block_size) - ac_add_arg(&ctx->args, AC_ARG_SGPR, 3, AC_ARG_INT, &ctx->block_size); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->block_size); unsigned cs_user_data_dwords = shader->selector->info.base.cs.user_data_components_amd; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 0729d95a67e..74ae2889d2b 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -412,8 +412,15 @@ static LLVMValueRef si_llvm_get_block_size(struct ac_shader_abi *abi) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); - assert(ctx->shader->selector->info.base.cs.local_size_variable); - return ac_get_arg(&ctx->ac, ctx->block_size); + assert(ctx->shader->selector->info.base.cs.local_size_variable && + ctx->shader->selector->info.uses_variable_block_size); + + LLVMValueRef chan[3] = { + si_unpack_param(ctx, ctx->block_size, 0, 10), + si_unpack_param(ctx, ctx->block_size, 10, 10), + si_unpack_param(ctx, ctx->block_size, 20, 10), + }; + return ac_build_gather_values(&ctx->ac, chan, 3); } static void si_llvm_declare_compute_memory(struct si_shader_context *ctx)