aco/ra: don't write to exec/ttmp with mulk/addk/cmovk

ttmp sgprs are readonly outside of trap handlers, so the instructions were
probably skipped. RA should also never create additional exec writes.

Fixes: e06773281b ("aco/ra: Optimize some SOP2 instructions with literal to SOPK.")

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
(cherry picked from commit fe0c72caec)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32730>
This commit is contained in:
Georg Lehmann 2024-12-09 10:15:38 +01:00 committed by Dylan Baker
parent 851f519db6
commit 2ba6a1f300
2 changed files with 14 additions and 2 deletions

View file

@ -534,7 +534,7 @@
"description": "aco/ra: don't write to exec/ttmp with mulk/addk/cmovk",
"nominated": true,
"nomination_type": 2,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "e06773281b3ff7fff86a50e3d2ec4a58b3e035cb",
"notes": null

View file

@ -514,6 +514,17 @@ print_regs(ra_ctx& ctx, PhysRegInterval regs, const RegisterFile& reg_file)
}
}
bool
is_sgpr_writable_without_side_effects(amd_gfx_level gfx_level, PhysReg reg)
{
assert(reg < 256);
bool has_flat_scr_lo_gfx89 = gfx_level >= GFX8 && gfx_level <= GFX9;
bool has_flat_scr_lo_gfx7_or_xnack_mask = gfx_level <= GFX9;
return (reg <= vcc_hi || reg == m0) &&
(!has_flat_scr_lo_gfx89 || (reg != flat_scr_lo && reg != flat_scr_hi)) &&
(!has_flat_scr_lo_gfx7_or_xnack_mask || (reg != 104 || reg != 105));
}
unsigned
get_subdword_operand_stride(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr,
unsigned idx, RegClass rc)
@ -2883,7 +2894,8 @@ optimize_encoding_sopk(ra_ctx& ctx, RegisterFile& register_file, aco_ptr<Instruc
return;
unsigned literal_idx = instr->operands[1].isLiteral();
if (instr->operands[!literal_idx].physReg() >= 128)
PhysReg op_reg = instr->operands[!literal_idx].physReg();
if (!is_sgpr_writable_without_side_effects(ctx.program->gfx_level, op_reg))
return;
unsigned def_id = instr->definitions[0].tempId();