From 2ba6a1f3002db92df797d3b295fbcb3214d4dc09 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Mon, 9 Dec 2024 10:15:38 +0100 Subject: [PATCH] aco/ra: don't write to exec/ttmp with mulk/addk/cmovk ttmp sgprs are readonly outside of trap handlers, so the instructions were probably skipped. RA should also never create additional exec writes. Fixes: e06773281b3 ("aco/ra: Optimize some SOP2 instructions with literal to SOPK.") Reviewed-by: Rhys Perry (cherry picked from commit fe0c72caec78d9181e446d7a1b81dd6eac515b89) Part-of: --- .pick_status.json | 2 +- src/amd/compiler/aco_register_allocation.cpp | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index b1689716c00..e3d88d199cf 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -534,7 +534,7 @@ "description": "aco/ra: don't write to exec/ttmp with mulk/addk/cmovk", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "e06773281b3ff7fff86a50e3d2ec4a58b3e035cb", "notes": null diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 3bef9354e60..98543a72fba 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -514,6 +514,17 @@ print_regs(ra_ctx& ctx, PhysRegInterval regs, const RegisterFile& reg_file) } } +bool +is_sgpr_writable_without_side_effects(amd_gfx_level gfx_level, PhysReg reg) +{ + assert(reg < 256); + bool has_flat_scr_lo_gfx89 = gfx_level >= GFX8 && gfx_level <= GFX9; + bool has_flat_scr_lo_gfx7_or_xnack_mask = gfx_level <= GFX9; + return (reg <= vcc_hi || reg == m0) && + (!has_flat_scr_lo_gfx89 || (reg != flat_scr_lo && reg != flat_scr_hi)) && + (!has_flat_scr_lo_gfx7_or_xnack_mask || (reg != 104 || reg != 105)); +} + unsigned get_subdword_operand_stride(amd_gfx_level gfx_level, const aco_ptr& instr, unsigned idx, RegClass rc) @@ -2883,7 +2894,8 @@ optimize_encoding_sopk(ra_ctx& ctx, RegisterFile& register_file, aco_ptroperands[1].isLiteral(); - if (instr->operands[!literal_idx].physReg() >= 128) + PhysReg op_reg = instr->operands[!literal_idx].physReg(); + if (!is_sgpr_writable_without_side_effects(ctx.program->gfx_level, op_reg)) return; unsigned def_id = instr->definitions[0].tempId();