diff --git a/.pick_status.json b/.pick_status.json index b1689716c00..e3d88d199cf 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -534,7 +534,7 @@ "description": "aco/ra: don't write to exec/ttmp with mulk/addk/cmovk", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "e06773281b3ff7fff86a50e3d2ec4a58b3e035cb", "notes": null diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 3bef9354e60..98543a72fba 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -514,6 +514,17 @@ print_regs(ra_ctx& ctx, PhysRegInterval regs, const RegisterFile& reg_file) } } +bool +is_sgpr_writable_without_side_effects(amd_gfx_level gfx_level, PhysReg reg) +{ + assert(reg < 256); + bool has_flat_scr_lo_gfx89 = gfx_level >= GFX8 && gfx_level <= GFX9; + bool has_flat_scr_lo_gfx7_or_xnack_mask = gfx_level <= GFX9; + return (reg <= vcc_hi || reg == m0) && + (!has_flat_scr_lo_gfx89 || (reg != flat_scr_lo && reg != flat_scr_hi)) && + (!has_flat_scr_lo_gfx7_or_xnack_mask || (reg != 104 || reg != 105)); +} + unsigned get_subdword_operand_stride(amd_gfx_level gfx_level, const aco_ptr& instr, unsigned idx, RegClass rc) @@ -2883,7 +2894,8 @@ optimize_encoding_sopk(ra_ctx& ctx, RegisterFile& register_file, aco_ptroperands[1].isLiteral(); - if (instr->operands[!literal_idx].physReg() >= 128) + PhysReg op_reg = instr->operands[!literal_idx].physReg(); + if (!is_sgpr_writable_without_side_effects(ctx.program->gfx_level, op_reg)) return; unsigned def_id = instr->definitions[0].tempId();