intel/fs: Don't add MOV instructions to DO blocks in combine constants

There was a subtle bug related to CFG tracking. Namely, some branch
instructions may point *only* to the block after the DO instruction
for the loop. If the MOV instructions are in the DO block, the may not
have liveness properly tracked.

Like in !25132, having the MOV instructions in blocks that might
contain other instructions helps scheduling.

shader-db:

All Broadwell and newer Intel GPUs had similar results (Ice Lake shown)
total cycles in shared programs: 848577248 -> 848557268 (<.01%)
cycles in affected programs: 78256396 -> 78236416 (-0.03%)
helped: 361 / HURT: 18

fossil-db:

All Skylake and newer Intel GPUs had similar results (Ice Lake shown)
Totals:
Cycles: 15021501924 -> 15021372904 (-0.00%); split: -0.00%, +0.00%

Totals from 735 (0.11% of 656080) affected shaders:
Cycles: 676429502 -> 676300482 (-0.02%); split: -0.02%, +0.00%

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26439>
This commit is contained in:
Ian Romanick 2023-11-29 11:15:26 -08:00 committed by Marge Bot
parent 23f07f4942
commit 65237f8bbc

View file

@ -1591,14 +1591,61 @@ fs_visitor::opt_combine_constants()
} }
} }
bool rebuild_cfg = false;
/* Insert MOVs to load the constant values into GRFs. */ /* Insert MOVs to load the constant values into GRFs. */
for (int i = 0; i < table.len; i++) { for (int i = 0; i < table.len; i++) {
struct imm *imm = &table.imm[i]; struct imm *imm = &table.imm[i];
/* Insert it either before the instruction that generated the immediate /* Insert it either before the instruction that generated the immediate
* or after the last non-control flow instruction of the common ancestor. * or after the last non-control flow instruction of the common ancestor.
*/ */
exec_node *n = (imm->inst ? imm->inst : exec_node *n;
imm->block->last_non_control_flow_inst()->next); bblock_t *insert_block;
if (imm->inst != nullptr) {
n = imm->inst;
insert_block = imm->block;
} else {
if (imm->block->start()->opcode == BRW_OPCODE_DO) {
/* DO blocks are weird. They can contain only the single DO
* instruction. As a result, MOV instructions cannot be added to
* the DO block.
*/
bblock_t *next_block = imm->block->next();
if (next_block->starts_with_control_flow()) {
/* This is the difficult case. This occurs for code like
*
* do {
* do {
* ...
* } while (...);
* } while (...);
*
* when the MOV instructions need to be inserted between the
* two DO instructions.
*
* To properly handle this scenario, a new block would need to
* be inserted. Doing so would require modifying arbitrary many
* CONTINUE, BREAK, and WHILE instructions to point to the new
* block.
*
* It is unlikely that this would ever be correct. Instead,
* insert the MOV instructions in the known wrong place and
* rebuild the CFG at the end of the pass.
*/
insert_block = imm->block;
n = insert_block->last_non_control_flow_inst()->next;
rebuild_cfg = true;
} else {
insert_block = next_block;
n = insert_block->start();
}
} else {
insert_block = imm->block;
n = insert_block->last_non_control_flow_inst()->next;
}
}
/* From the BDW and CHV PRM, 3D Media GPGPU, Special Restrictions: /* From the BDW and CHV PRM, 3D Media GPGPU, Special Restrictions:
* *
@ -1613,7 +1660,7 @@ fs_visitor::opt_combine_constants()
* both HF slots within a DWord with the constant. * both HF slots within a DWord with the constant.
*/ */
const uint32_t width = devinfo->ver == 8 && imm->is_half_float ? 2 : 1; const uint32_t width = devinfo->ver == 8 && imm->is_half_float ? 2 : 1;
const fs_builder ibld = bld.at(imm->block, n).exec_all().group(width, 0); const fs_builder ibld = bld.at(insert_block, n).exec_all().group(width, 0);
fs_reg reg(VGRF, imm->nr); fs_reg reg(VGRF, imm->nr);
reg.offset = imm->subreg_offset; reg.offset = imm->subreg_offset;
@ -1785,8 +1832,16 @@ fs_visitor::opt_combine_constants()
} }
} }
if (rebuild_cfg) {
delete cfg;
cfg = NULL;
calculate_cfg();
}
ralloc_free(const_ctx); ralloc_free(const_ctx);
invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES |
(rebuild_cfg ? DEPENDENCY_BLOCKS : DEPENDENCY_NOTHING));
return true; return true;
} }