brw: Bound register coalesce rewrites by live range

When updating a register after successfully finding a pair to coalesce,
use the live range of the source register to walk only the instructions
that might use it.  Depending on the shader this allows skipping a bunch
of blocks -- and also terminating early.

Below are fossil compilation times in a MTL machine compiling shaders
for a BMG GPU, the big win here was for Cyberpunk 2077.

```
// Differences at 95.0% confidence.

// Rise of the Tomb Raider (n=20)
   -0.0095 +/- 0.00706877
   -1.90572% +/- 1.40609%

// Alan Wake (n=20)
   -0.031 +/- 0.0172806
   -0.93599% +/- 0.51952%

// Borderlands 3 (n=15)
   -0.353333 +/- 0.118679
   -2.44307% +/- 0.80787%

// Oblivion Remastered (n=15)
   -0.134 +/- 0.026008
   -2.76898% +/- 0.531637%

// Baldur's Gate 3 (n=15)
   -0.954286 +/- 0.163625
   -2.21713% +/- 0.377562%

// Cyberpunk 2077 (n=20)
   -2.8665 +/- 0.228489
   -8.08661% +/- 0.621779%
```

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41495>
This commit is contained in:
Caio Oliveira 2026-05-09 19:14:27 -07:00 committed by Marge Bot
parent 821a812c7d
commit e2402f6a07

View file

@ -373,20 +373,40 @@ brw_opt_register_coalesce(brw_shader &s)
i += written;
}
foreach_block_and_inst(block, brw_inst, scan_inst, s.cfg) {
if (scan_inst->dst.file == VGRF &&
scan_inst->dst.nr == src_reg) {
scan_inst->dst.nr = dst_reg;
scan_inst->dst.offset = scan_inst->dst.offset % REG_SIZE +
dst_reg_offset[scan_inst->dst.offset / REG_SIZE] * REG_SIZE;
}
brw_range rewrite_range = { 0, 0 };
for (int i = 0; i < src_size; i++)
rewrite_range = merge(rewrite_range, live.vars_range[src_var[i]]);
assert(!rewrite_range.is_empty());
for (int j = 0; j < scan_inst->sources; j++) {
if (scan_inst->src[j].file == VGRF &&
scan_inst->src[j].nr == src_reg) {
scan_inst->src[j].nr = dst_reg;
scan_inst->src[j].offset = scan_inst->src[j].offset % REG_SIZE +
dst_reg_offset[scan_inst->src[j].offset / REG_SIZE] * REG_SIZE;
foreach_block(block, s.cfg) {
if (ips.range(block).last() < rewrite_range.start)
continue;
if (ips.range(block).start > rewrite_range.last())
break;
int scan_ip = ips.range(block).start - 1;
foreach_inst_in_block(brw_inst, scan_inst, block) {
scan_ip++;
if (scan_ip < rewrite_range.start)
continue;
if (scan_ip > rewrite_range.last())
break;
if (scan_inst->dst.file == VGRF &&
scan_inst->dst.nr == src_reg) {
scan_inst->dst.nr = dst_reg;
scan_inst->dst.offset = scan_inst->dst.offset % REG_SIZE +
dst_reg_offset[scan_inst->dst.offset / REG_SIZE] * REG_SIZE;
}
for (int j = 0; j < scan_inst->sources; j++) {
if (scan_inst->src[j].file == VGRF &&
scan_inst->src[j].nr == src_reg) {
scan_inst->src[j].nr = dst_reg;
scan_inst->src[j].offset = scan_inst->src[j].offset % REG_SIZE +
dst_reg_offset[scan_inst->src[j].offset / REG_SIZE] * REG_SIZE;
}
}
}
}