diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 78f13f8a79a..3bef9354e60 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2933,21 +2933,20 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vector sgpr_operands; for (unsigned i = 0; i < parallelcopy.size(); i++) { linear_vgpr |= parallelcopy[i].first.regClass().is_linear_vgpr(); - if (!sgpr_operands_alias_defs && parallelcopy[i].first.isTemp() && + if (!may_swap_sgprs && parallelcopy[i].first.isTemp() && parallelcopy[i].first.getTemp().type() == RegType::sgpr) { - unsigned reg = parallelcopy[i].first.physReg().reg(); - unsigned size = parallelcopy[i].first.getTemp().size(); - sgpr_operands[reg / 64u] |= u_bit_consecutive64(reg % 64u, size); - - reg = parallelcopy[i].second.physReg().reg(); - size = parallelcopy[i].second.getTemp().size(); - if (sgpr_operands[reg / 64u] & u_bit_consecutive64(reg % 64u, size)) - sgpr_operands_alias_defs = true; + unsigned op_reg = parallelcopy[i].first.physReg().reg(); + unsigned def_reg = parallelcopy[i].second.physReg().reg(); + for (unsigned j = 0; j < parallelcopy[i].first.size(); j++) { + sgpr_operands.set(op_reg + j); + if (sgpr_operands.test(def_reg + j)) + may_swap_sgprs = true; + } } pc->operands[i] = parallelcopy[i].first; @@ -2961,7 +2960,7 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vectordefinitions[i].getTemp()); } - if (temp_in_scc && (sgpr_operands_alias_defs || linear_vgpr)) { + if (temp_in_scc && (may_swap_sgprs || linear_vgpr)) { /* disable definitions and re-enable operands */ RegisterFile tmp_file(register_file); for (const Definition& def : instr->definitions) { @@ -2975,7 +2974,7 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vectorpseudo().needs_scratch_reg = sgpr_operands_alias_defs || linear_vgpr; + pc->pseudo().needs_scratch_reg = may_swap_sgprs || linear_vgpr; pc->pseudo().tmp_in_scc = false; pc->pseudo().scratch_sgpr = scc; }