aco: rework subdword definition RA validation a bit

Some issues fixed:
- check that byte=0 for p_as_uniform/etc
- validate 5+ byte definitions
- fix v3b pseudo definition validation
- fix validation when the index of the definition is not 0

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41018>
This commit is contained in:
Rhys Perry 2026-04-13 16:29:09 +01:00 committed by Marge Bot
parent 4249daeedd
commit 7c4429d21a

View file

@ -1430,17 +1430,20 @@ validate_subdword_operand(amd_gfx_level gfx_level, const aco_ptr<Instruction>& i
}
bool
validate_subdword_definition(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr)
validate_subdword_definition(Program* program, const aco_ptr<Instruction>& instr, unsigned idx)
{
Definition def = instr->definitions[0];
Definition def = instr->definitions[idx];
unsigned byte = def.physReg().byte();
if (instr->isPseudo() && gfx_level >= GFX8)
return true;
if (instr->isPseudo()) {
return byte % get_subdword_definition_caps(program, instr.get(), idx, def.regClass())
.placement_stride ==
0;
}
if (instr->isSDWA())
return byte + instr->sdwa().dst_sel.offset() + instr->sdwa().dst_sel.size() <= 4 &&
byte % instr->sdwa().dst_sel.size() == 0;
if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, -1))
if (byte == 2 && can_use_opsel(program->gfx_level, instr->opcode, -1))
return true;
switch (instr->opcode) {
@ -1469,29 +1472,44 @@ validate_subdword_definition(amd_gfx_level gfx_level, const aco_ptr<Instruction>
return byte == 0;
}
unsigned
struct SubdwordDef {
unsigned offset;
unsigned bytes;
};
SubdwordDef
get_subdword_bytes_written(Program* program, const aco_ptr<Instruction>& instr, unsigned index)
{
amd_gfx_level gfx_level = program->gfx_level;
Definition def = instr->definitions[index];
SubdwordDef res;
res.offset = def.physReg().byte();
res.bytes = def.bytes();
if (instr->isPseudo())
return gfx_level >= GFX8 ? def.bytes() : def.size() * 4u;
return res;
if (instr->isVALU() || instr->isVINTRP()) {
if (instr->isSDWA())
return instr->sdwa().dst_sel.size();
if (instr->isSDWA()) {
res.bytes = instr->sdwa().dst_sel.size();
return res;
}
if (instr_is_16bit(gfx_level, instr->opcode))
return 2;
if (instr_is_16bit(gfx_level, instr->opcode)) {
res.bytes = 2;
return res;
}
return 4;
/* The instruction still might write the upper half. In that case, it's the lower half that
* isn't preserved */
return {0, 4};
}
if (instr->isMIMG()) {
assert(instr->mimg().d16);
return program->dev.sram_ecc_enabled ? def.size() * 4u : def.bytes();
}
if (program->dev.sram_ecc_enabled)
return {0, def.size() * 4};
if (instr->isMIMG() && instr->mimg().d16)
return res;
switch (instr->opcode) {
case aco_opcode::buffer_load_ubyte_d16:
@ -1526,10 +1544,16 @@ get_subdword_bytes_written(Program* program, const aco_ptr<Instruction>& instr,
case aco_opcode::global_load_short_d16_hi:
case aco_opcode::ds_read_u8_d16_hi:
case aco_opcode::ds_read_i8_d16_hi:
case aco_opcode::ds_read_u16_d16_hi: return program->dev.sram_ecc_enabled ? 4 : 2;
case aco_opcode::ds_read_u16_d16_hi: {
res.bytes = 2;
return res;
}
case aco_opcode::buffer_load_format_d16_xyz:
case aco_opcode::tbuffer_load_format_d16_xyz: return program->dev.sram_ecc_enabled ? 8 : 6;
default: return def.size() * 4;
case aco_opcode::tbuffer_load_format_d16_xyz: {
res.bytes = 6;
return res;
}
default: return {0, def.size() * 4};
}
}
@ -1554,11 +1578,9 @@ validate_instr_defs(Program* program, std::array<unsigned, 2048>& regs,
tmp.id(), regs[reg.reg_b + j]);
regs[reg.reg_b + j] = tmp.id();
}
if (def.regClass().is_subdword() && def.bytes() < 4) {
unsigned written = get_subdword_bytes_written(program, instr, i);
/* If written=4, the instruction still might write the upper half. In that case, it's
* the lower half that isn't preserved */
for (unsigned j = reg.byte() & ~(written - 1); j < written; j++) {
if (def.regClass().is_subdword()) {
SubdwordDef written = get_subdword_bytes_written(program, instr, i);
for (unsigned j = written.offset; j < written.bytes; j++) {
unsigned written_reg = reg.reg() * 4u + j;
if (regs[written_reg] && regs[written_reg] != def.tempId())
err |= ra_fail(program, loc, assignments[regs[written_reg]].defloc,
@ -1705,8 +1727,7 @@ validate_ra(Program* program)
if (def.physReg() == vcc && !program->needs_vcc)
err |= ra_fail(program, loc, Location(),
"Definition %d fixed to vcc but needs_vcc=false", i);
if (def.regClass().is_subdword() &&
!validate_subdword_definition(program->gfx_level, instr))
if (def.regClass().is_subdword() && !validate_subdword_definition(program, instr, i))
err |= ra_fail(program, loc, Location(), "Definition %d not aligned correctly", i);
if (!assignments[def.tempId()].firstloc.block)
assignments[def.tempId()].firstloc = loc;