From e8ef184677f36cc729fc5893d7a8fa579923b21c Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 22 Jan 2024 02:06:02 -0800 Subject: [PATCH] intel/brw: Make register coalescing obey the g112-g127 restriction opt_register_coalesce can sometimes unpleasantly coalesce both SENDS payload sources into the larger of the two registers. This can break the assumption that the VGRFs for sources 2-3 must occupy no more than 16 registers, so they fit in g112-127. Reviewed-by: Ian Romanick Part-of: --- .../compiler/brw_fs_register_coalesce.cpp | 39 ++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs_register_coalesce.cpp b/src/intel/compiler/brw_fs_register_coalesce.cpp index 80ac7c2fd14..18b0d25d1ac 100644 --- a/src/intel/compiler/brw_fs_register_coalesce.cpp +++ b/src/intel/compiler/brw_fs_register_coalesce.cpp @@ -188,6 +188,42 @@ can_coalesce_vars(const fs_live_variables &live, const cfg_t *cfg, return true; } +/** + * Check if coalescing this register would expand the size of the last + * SEND instruction's payload to more than would fit in g112-g127. + */ +static bool +would_violate_eot_restriction(const brw::simple_allocator &alloc, + const cfg_t *cfg, + unsigned dst_reg, unsigned src_reg) +{ + if (alloc.sizes[dst_reg] > alloc.sizes[src_reg]) { + foreach_inst_in_block_reverse(fs_inst, send, cfg->last_block()) { + if (send->opcode != SHADER_OPCODE_SEND || !send->eot) + continue; + + if ((send->src[2].file == VGRF && send->src[2].nr == src_reg) || + (send->sources >= 4 && + send->src[3].file == VGRF && send->src[3].nr == src_reg)) { + const unsigned s2 = + send->src[2].file == VGRF ? alloc.sizes[send->src[2].nr] : 0; + const unsigned s3 = send->sources >= 4 && + send->src[3].file == VGRF ? + alloc.sizes[send->src[3].nr] : 0; + + const unsigned increase = + alloc.sizes[dst_reg] - alloc.sizes[src_reg]; + + if (s2 + s3 + increase > 15) + return true; + } + break; + } + } + + return false; +} + bool brw_fs_opt_register_coalesce(fs_visitor &s) { @@ -267,7 +303,8 @@ brw_fs_opt_register_coalesce(fs_visitor &s) dst_var[i] = live.var_from_vgrf[dst_reg] + dst_reg_offset[i]; src_var[i] = live.var_from_vgrf[src_reg] + i; - if (!can_coalesce_vars(live, s.cfg, block, inst, dst_var[i], src_var[i])) { + if (!can_coalesce_vars(live, s.cfg, block, inst, dst_var[i], src_var[i]) || + would_violate_eot_restriction(s.alloc, s.cfg, dst_reg, src_reg)) { can_coalesce = false; src_reg = ~0u; break;