intel/brw: Make register coalescing obey the g112-g127 restriction

opt_register_coalesce can sometimes unpleasantly coalesce both
SENDS payload sources into the larger of the two registers.
This can break the assumption that the VGRFs for sources 2-3
must occupy no more than 16 registers, so they fit in g112-127.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27876>
This commit is contained in:
Kenneth Graunke 2024-01-22 02:06:02 -08:00 committed by Marge Bot
parent 1c1e79d75a
commit e8ef184677

View file

@ -188,6 +188,42 @@ can_coalesce_vars(const fs_live_variables &live, const cfg_t *cfg,
return true;
}
/**
* Check if coalescing this register would expand the size of the last
* SEND instruction's payload to more than would fit in g112-g127.
*/
static bool
would_violate_eot_restriction(const brw::simple_allocator &alloc,
const cfg_t *cfg,
unsigned dst_reg, unsigned src_reg)
{
if (alloc.sizes[dst_reg] > alloc.sizes[src_reg]) {
foreach_inst_in_block_reverse(fs_inst, send, cfg->last_block()) {
if (send->opcode != SHADER_OPCODE_SEND || !send->eot)
continue;
if ((send->src[2].file == VGRF && send->src[2].nr == src_reg) ||
(send->sources >= 4 &&
send->src[3].file == VGRF && send->src[3].nr == src_reg)) {
const unsigned s2 =
send->src[2].file == VGRF ? alloc.sizes[send->src[2].nr] : 0;
const unsigned s3 = send->sources >= 4 &&
send->src[3].file == VGRF ?
alloc.sizes[send->src[3].nr] : 0;
const unsigned increase =
alloc.sizes[dst_reg] - alloc.sizes[src_reg];
if (s2 + s3 + increase > 15)
return true;
}
break;
}
}
return false;
}
bool
brw_fs_opt_register_coalesce(fs_visitor &s)
{
@ -267,7 +303,8 @@ brw_fs_opt_register_coalesce(fs_visitor &s)
dst_var[i] = live.var_from_vgrf[dst_reg] + dst_reg_offset[i];
src_var[i] = live.var_from_vgrf[src_reg] + i;
if (!can_coalesce_vars(live, s.cfg, block, inst, dst_var[i], src_var[i])) {
if (!can_coalesce_vars(live, s.cfg, block, inst, dst_var[i], src_var[i]) ||
would_violate_eot_restriction(s.alloc, s.cfg, dst_reg, src_reg)) {
can_coalesce = false;
src_reg = ~0u;
break;