From e2402f6a0794544476bf3d6f490a1cc18d262a1b Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Sat, 9 May 2026 19:14:27 -0700 Subject: [PATCH] brw: Bound register coalesce rewrites by live range When updating a register after successfully finding a pair to coalesce, use the live range of the source register to walk only the instructions that might use it. Depending on the shader this allows skipping a bunch of blocks -- and also terminating early. Below are fossil compilation times in a MTL machine compiling shaders for a BMG GPU, the big win here was for Cyberpunk 2077. ``` // Differences at 95.0% confidence. // Rise of the Tomb Raider (n=20) -0.0095 +/- 0.00706877 -1.90572% +/- 1.40609% // Alan Wake (n=20) -0.031 +/- 0.0172806 -0.93599% +/- 0.51952% // Borderlands 3 (n=15) -0.353333 +/- 0.118679 -2.44307% +/- 0.80787% // Oblivion Remastered (n=15) -0.134 +/- 0.026008 -2.76898% +/- 0.531637% // Baldur's Gate 3 (n=15) -0.954286 +/- 0.163625 -2.21713% +/- 0.377562% // Cyberpunk 2077 (n=20) -2.8665 +/- 0.228489 -8.08661% +/- 0.621779% ``` Reviewed-by: Ian Romanick Part-of: --- .../brw/brw_opt_register_coalesce.cpp | 46 +++++++++++++------ 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/src/intel/compiler/brw/brw_opt_register_coalesce.cpp b/src/intel/compiler/brw/brw_opt_register_coalesce.cpp index 336a37afdf0..448d6480240 100644 --- a/src/intel/compiler/brw/brw_opt_register_coalesce.cpp +++ b/src/intel/compiler/brw/brw_opt_register_coalesce.cpp @@ -373,20 +373,40 @@ brw_opt_register_coalesce(brw_shader &s) i += written; } - foreach_block_and_inst(block, brw_inst, scan_inst, s.cfg) { - if (scan_inst->dst.file == VGRF && - scan_inst->dst.nr == src_reg) { - scan_inst->dst.nr = dst_reg; - scan_inst->dst.offset = scan_inst->dst.offset % REG_SIZE + - dst_reg_offset[scan_inst->dst.offset / REG_SIZE] * REG_SIZE; - } + brw_range rewrite_range = { 0, 0 }; + for (int i = 0; i < src_size; i++) + rewrite_range = merge(rewrite_range, live.vars_range[src_var[i]]); + assert(!rewrite_range.is_empty()); - for (int j = 0; j < scan_inst->sources; j++) { - if (scan_inst->src[j].file == VGRF && - scan_inst->src[j].nr == src_reg) { - scan_inst->src[j].nr = dst_reg; - scan_inst->src[j].offset = scan_inst->src[j].offset % REG_SIZE + - dst_reg_offset[scan_inst->src[j].offset / REG_SIZE] * REG_SIZE; + foreach_block(block, s.cfg) { + if (ips.range(block).last() < rewrite_range.start) + continue; + if (ips.range(block).start > rewrite_range.last()) + break; + + int scan_ip = ips.range(block).start - 1; + foreach_inst_in_block(brw_inst, scan_inst, block) { + scan_ip++; + + if (scan_ip < rewrite_range.start) + continue; + if (scan_ip > rewrite_range.last()) + break; + + if (scan_inst->dst.file == VGRF && + scan_inst->dst.nr == src_reg) { + scan_inst->dst.nr = dst_reg; + scan_inst->dst.offset = scan_inst->dst.offset % REG_SIZE + + dst_reg_offset[scan_inst->dst.offset / REG_SIZE] * REG_SIZE; + } + + for (int j = 0; j < scan_inst->sources; j++) { + if (scan_inst->src[j].file == VGRF && + scan_inst->src[j].nr == src_reg) { + scan_inst->src[j].nr = dst_reg; + scan_inst->src[j].offset = scan_inst->src[j].offset % REG_SIZE + + dst_reg_offset[scan_inst->src[j].offset / REG_SIZE] * REG_SIZE; + } } } }