From eb249bb18e250dca198332def96a1ad921cfd77b Mon Sep 17 00:00:00 2001 From: Konstantin Seurer Date: Wed, 17 Jan 2024 15:58:13 +0100 Subject: [PATCH] aco: Only fix used variables to registers Reviewed-by: Friedrich Vock Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 16 +++++++++++++--- src/amd/compiler/aco_instruction_selection.h | 2 ++ .../compiler/aco_instruction_selection_setup.cpp | 2 ++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 4c00d971a0c..939f2e6fc49 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -9308,11 +9308,13 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) break; } case nir_intrinsic_store_scalar_arg_amd: { + BITSET_SET(ctx->output_args, nir_intrinsic_base(instr)); ctx->arg_temps[nir_intrinsic_base(instr)] = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); break; } case nir_intrinsic_store_vector_arg_amd: { + BITSET_SET(ctx->output_args, nir_intrinsic_base(instr)); ctx->arg_temps[nir_intrinsic_base(instr)] = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa)); break; @@ -11668,19 +11670,27 @@ merged_wave_info_to_mask(isel_context* ctx, unsigned i) static void insert_rt_jump_next(isel_context& ctx, const struct ac_shader_args* args) { - unsigned src_count = ctx.args->arg_count; + unsigned src_count = 0; + for (unsigned i = 0; i < ctx.args->arg_count; i++) + src_count += !!BITSET_TEST(ctx.output_args, i); + Pseudo_instruction* ret = create_instruction(aco_opcode::p_return, Format::PSEUDO, src_count, 0); ctx.block->instructions.emplace_back(ret); - for (unsigned i = 0; i < src_count; i++) { + src_count = 0; + for (unsigned i = 0; i < ctx.args->arg_count; i++) { + if (!BITSET_TEST(ctx.output_args, i)) + continue; + enum ac_arg_regfile file = ctx.args->args[i].file; unsigned size = ctx.args->args[i].size; unsigned reg = ctx.args->args[i].offset + (file == AC_ARG_SGPR ? 0 : 256); RegClass type = RegClass(file == AC_ARG_SGPR ? RegType::sgpr : RegType::vgpr, size); Operand op = ctx.arg_temps[i].id() ? Operand(ctx.arg_temps[i], PhysReg{reg}) : Operand(PhysReg{reg}, type); - ret->operands[i] = op; + ret->operands[src_count] = op; + src_count++; } Builder bld(ctx.program, ctx.block); diff --git a/src/amd/compiler/aco_instruction_selection.h b/src/amd/compiler/aco_instruction_selection.h index 4c5115bbd22..31803e5ce3b 100644 --- a/src/amd/compiler/aco_instruction_selection.h +++ b/src/amd/compiler/aco_instruction_selection.h @@ -106,6 +106,8 @@ struct isel_context { /* WQM information */ uint32_t wqm_block_idx; uint32_t wqm_instruction_idx; + + BITSET_DECLARE(output_args, AC_MAX_ARGS); }; inline Temp diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 65b4468290e..ec7a29e8d55 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -635,6 +635,8 @@ init_context(isel_context* ctx, nir_shader* shader) ctx->program->constant_data.insert(ctx->program->constant_data.end(), (uint8_t*)shader->constant_data, (uint8_t*)shader->constant_data + shader->constant_data_size); + + BITSET_CLEAR_RANGE(ctx->output_args, 0, BITSET_SIZE(ctx->output_args)); } void