radeonsi: refine si_llvm_ls_build_end

1. merge si_set_ls_return_value_for_tcs into si_llvm_ls_build_end because they
   do the same job to return value
2. stop return value when mono mode with different thread count, in which case
   TCS use LS input as its input instead of LS output
3. use si_insert_input_ret_float

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24204>
This commit is contained in:
Qiang Yu 2023-07-12 15:21:43 +08:00 committed by Marge Bot
parent 07fcb4aa19
commit 401a40a5f4
2 changed files with 29 additions and 30 deletions

View file

@ -478,16 +478,18 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args)
declare_vb_descriptor_input_sgprs(args, shader);
/* LS return values are inputs to the TCS main shader part. */
for (i = 0; i < 8 + GFX9_TCS_NUM_USER_SGPR; i++)
ac_add_return(&args->ac, AC_ARG_SGPR);
for (i = 0; i < 2; i++)
ac_add_return(&args->ac, AC_ARG_VGPR);
/* VS outputs passed via VGPRs to TCS. */
if (shader->key.ge.opt.same_patch_vertices && !shader->use_aco) {
unsigned num_outputs = util_last_bit64(shader->selector->info.outputs_written);
for (i = 0; i < num_outputs * 4; i++)
if (!shader->is_monolithic || shader->key.ge.opt.same_patch_vertices) {
for (i = 0; i < 8 + GFX9_TCS_NUM_USER_SGPR; i++)
ac_add_return(&args->ac, AC_ARG_SGPR);
for (i = 0; i < 2; i++)
ac_add_return(&args->ac, AC_ARG_VGPR);
/* VS outputs passed via VGPRs to TCS. */
if (shader->key.ge.opt.same_patch_vertices && !shader->use_aco) {
unsigned num_outputs = util_last_bit64(shader->selector->info.outputs_written);
for (i = 0; i < num_outputs * 4; i++)
ac_add_return(&args->ac, AC_ARG_VGPR);
}
}
} else {
/* TCS inputs are passed via VGPRs from VS. */

View file

@ -482,9 +482,15 @@ void si_llvm_tcs_build_end(struct si_shader_context *ctx)
ctx->return_value = ret;
}
/* Pass TCS inputs from LS to TCS on GFX9. */
static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
void si_llvm_ls_build_end(struct si_shader_context *ctx)
{
struct si_shader *shader = ctx->shader;
bool same_thread_count = shader->key.ge.opt.same_patch_vertices;
/* Only need return value when merged shader on part mode or mono mode with same thread count. */
if (ctx->screen->info.gfx_level < GFX9 || (shader->is_monolithic && !same_thread_count))
return;
if (!ctx->shader->is_monolithic)
ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
@ -508,23 +514,16 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
ret = si_insert_input_ret(ctx, ret, ctx->args->tes_offchip_addr, 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR);
unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.tcs_patch_id)),
vgpr++, "");
ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.tcs_rel_ids)),
vgpr++, "");
ctx->return_value = ret;
}
ret = si_insert_input_ret_float(ctx, ret, ctx->args->ac.tcs_patch_id, vgpr++);
ret = si_insert_input_ret_float(ctx, ret, ctx->args->ac.tcs_rel_ids, vgpr++);
void si_llvm_ls_build_end(struct si_shader_context *ctx)
{
struct si_shader *shader = ctx->shader;
struct si_shader_info *info = &shader->selector->info;
LLVMValueRef *addrs = ctx->abi.outputs;
unsigned ret_offset = 8 + GFX9_TCS_NUM_USER_SGPR + 2;
if (same_thread_count) {
/* Same thread count is set only when mono mode. */
assert(shader->is_monolithic);
struct si_shader_info *info = &shader->selector->info;
LLVMValueRef *addrs = ctx->abi.outputs;
if (shader->key.ge.opt.same_patch_vertices) {
for (unsigned i = 0; i < info->num_outputs; i++) {
unsigned semantic = info->output_semantic[i];
int param = si_shader_io_get_unique_index(semantic);
@ -535,14 +534,12 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx)
LLVMValueRef value = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + chan], "");
ctx->return_value = LLVMBuildInsertValue(ctx->ac.builder, ctx->return_value,
value, ret_offset + param * 4 + chan, "");
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, value, vgpr + param * 4 + chan, "");
}
}
}
if (ctx->screen->info.gfx_level >= GFX9)
si_set_ls_return_value_for_tcs(ctx);
ctx->return_value = ret;
}
/**