aco/ra: use SDWA for 16bit instructions when the second byte is blocked

Found by inspection, I think this can happen with pack_32_4x8(f2u8(a@16)),
which will use v_cvt_u16_f16 (a 16bit instruction) with a v1b definition.

No Foz-DB changes on Navi21.

Cc: mesa-stable

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28443>
This commit is contained in:
Georg Lehmann 2024-03-28 14:19:46 +01:00 committed by Marge Bot
parent e215200617
commit 80652de67b

View file

@ -47,7 +47,8 @@ void add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx
RegClass rc);
std::pair<unsigned, unsigned>
get_subdword_definition_info(Program* program, const aco_ptr<Instruction>& instr, RegClass rc);
void add_subdword_definition(Program* program, aco_ptr<Instruction>& instr, PhysReg reg);
void add_subdword_definition(Program* program, aco_ptr<Instruction>& instr, PhysReg reg,
bool allow_16bit_write);
struct assignment {
PhysReg reg;
@ -697,7 +698,8 @@ get_subdword_definition_info(Program* program, const aco_ptr<Instruction>& instr
}
void
add_subdword_definition(Program* program, aco_ptr<Instruction>& instr, PhysReg reg)
add_subdword_definition(Program* program, aco_ptr<Instruction>& instr, PhysReg reg,
bool allow_16bit_write)
{
if (instr->isPseudo())
return;
@ -706,7 +708,7 @@ add_subdword_definition(Program* program, aco_ptr<Instruction>& instr, PhysReg r
amd_gfx_level gfx_level = program->gfx_level;
assert(instr->definitions[0].bytes() <= 2);
if (reg.byte() == 0 && instr_is_16bit(gfx_level, instr->opcode))
if (reg.byte() == 0 && allow_16bit_write && instr_is_16bit(gfx_level, instr->opcode))
return;
/* use SDWA */
@ -715,6 +717,8 @@ add_subdword_definition(Program* program, aco_ptr<Instruction>& instr, PhysReg r
return;
}
assert(allow_16bit_write);
if (instr->opcode == aco_opcode::v_fma_mixlo_f16) {
instr->opcode = aco_opcode::v_fma_mixhi_f16;
return;
@ -3230,7 +3234,8 @@ register_allocation(Program* program, live& live_vars, ra_test_policy policy)
PhysReg reg = get_reg(ctx, register_file, tmp, parallelcopy, instr);
definition->setFixed(reg);
if (reg.byte() || register_file.test(reg, 4)) {
add_subdword_definition(program, instr, reg);
bool allow_16bit_write = reg.byte() % 2 == 0 && !register_file.test(reg, 2);
add_subdword_definition(program, instr, reg, allow_16bit_write);
definition = &instr->definitions[i]; /* add_subdword_definition can invalidate
the reference */
}