mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 20:10:14 +01:00
intel/fs: Don't add MOV instructions to DO blocks in combine constants
There was a subtle bug related to CFG tracking. Namely, some branch instructions may point *only* to the block after the DO instruction for the loop. If the MOV instructions are in the DO block, the may not have liveness properly tracked. Like in !25132, having the MOV instructions in blocks that might contain other instructions helps scheduling. shader-db: All Broadwell and newer Intel GPUs had similar results (Ice Lake shown) total cycles in shared programs: 848577248 -> 848557268 (<.01%) cycles in affected programs: 78256396 -> 78236416 (-0.03%) helped: 361 / HURT: 18 fossil-db: All Skylake and newer Intel GPUs had similar results (Ice Lake shown) Totals: Cycles: 15021501924 -> 15021372904 (-0.00%); split: -0.00%, +0.00% Totals from 735 (0.11% of 656080) affected shaders: Cycles: 676429502 -> 676300482 (-0.02%); split: -0.02%, +0.00% Reviewed-by: Francisco Jerez <currojerez@riseup.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26439>
This commit is contained in:
parent
23f07f4942
commit
65237f8bbc
1 changed files with 59 additions and 4 deletions
|
|
@ -1591,14 +1591,61 @@ fs_visitor::opt_combine_constants()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool rebuild_cfg = false;
|
||||||
|
|
||||||
/* Insert MOVs to load the constant values into GRFs. */
|
/* Insert MOVs to load the constant values into GRFs. */
|
||||||
for (int i = 0; i < table.len; i++) {
|
for (int i = 0; i < table.len; i++) {
|
||||||
struct imm *imm = &table.imm[i];
|
struct imm *imm = &table.imm[i];
|
||||||
|
|
||||||
/* Insert it either before the instruction that generated the immediate
|
/* Insert it either before the instruction that generated the immediate
|
||||||
* or after the last non-control flow instruction of the common ancestor.
|
* or after the last non-control flow instruction of the common ancestor.
|
||||||
*/
|
*/
|
||||||
exec_node *n = (imm->inst ? imm->inst :
|
exec_node *n;
|
||||||
imm->block->last_non_control_flow_inst()->next);
|
bblock_t *insert_block;
|
||||||
|
if (imm->inst != nullptr) {
|
||||||
|
n = imm->inst;
|
||||||
|
insert_block = imm->block;
|
||||||
|
} else {
|
||||||
|
if (imm->block->start()->opcode == BRW_OPCODE_DO) {
|
||||||
|
/* DO blocks are weird. They can contain only the single DO
|
||||||
|
* instruction. As a result, MOV instructions cannot be added to
|
||||||
|
* the DO block.
|
||||||
|
*/
|
||||||
|
bblock_t *next_block = imm->block->next();
|
||||||
|
if (next_block->starts_with_control_flow()) {
|
||||||
|
/* This is the difficult case. This occurs for code like
|
||||||
|
*
|
||||||
|
* do {
|
||||||
|
* do {
|
||||||
|
* ...
|
||||||
|
* } while (...);
|
||||||
|
* } while (...);
|
||||||
|
*
|
||||||
|
* when the MOV instructions need to be inserted between the
|
||||||
|
* two DO instructions.
|
||||||
|
*
|
||||||
|
* To properly handle this scenario, a new block would need to
|
||||||
|
* be inserted. Doing so would require modifying arbitrary many
|
||||||
|
* CONTINUE, BREAK, and WHILE instructions to point to the new
|
||||||
|
* block.
|
||||||
|
*
|
||||||
|
* It is unlikely that this would ever be correct. Instead,
|
||||||
|
* insert the MOV instructions in the known wrong place and
|
||||||
|
* rebuild the CFG at the end of the pass.
|
||||||
|
*/
|
||||||
|
insert_block = imm->block;
|
||||||
|
n = insert_block->last_non_control_flow_inst()->next;
|
||||||
|
|
||||||
|
rebuild_cfg = true;
|
||||||
|
} else {
|
||||||
|
insert_block = next_block;
|
||||||
|
n = insert_block->start();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
insert_block = imm->block;
|
||||||
|
n = insert_block->last_non_control_flow_inst()->next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* From the BDW and CHV PRM, 3D Media GPGPU, Special Restrictions:
|
/* From the BDW and CHV PRM, 3D Media GPGPU, Special Restrictions:
|
||||||
*
|
*
|
||||||
|
|
@ -1613,7 +1660,7 @@ fs_visitor::opt_combine_constants()
|
||||||
* both HF slots within a DWord with the constant.
|
* both HF slots within a DWord with the constant.
|
||||||
*/
|
*/
|
||||||
const uint32_t width = devinfo->ver == 8 && imm->is_half_float ? 2 : 1;
|
const uint32_t width = devinfo->ver == 8 && imm->is_half_float ? 2 : 1;
|
||||||
const fs_builder ibld = bld.at(imm->block, n).exec_all().group(width, 0);
|
const fs_builder ibld = bld.at(insert_block, n).exec_all().group(width, 0);
|
||||||
|
|
||||||
fs_reg reg(VGRF, imm->nr);
|
fs_reg reg(VGRF, imm->nr);
|
||||||
reg.offset = imm->subreg_offset;
|
reg.offset = imm->subreg_offset;
|
||||||
|
|
@ -1785,8 +1832,16 @@ fs_visitor::opt_combine_constants()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rebuild_cfg) {
|
||||||
|
delete cfg;
|
||||||
|
cfg = NULL;
|
||||||
|
calculate_cfg();
|
||||||
|
}
|
||||||
|
|
||||||
ralloc_free(const_ctx);
|
ralloc_free(const_ctx);
|
||||||
invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
|
|
||||||
|
invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES |
|
||||||
|
(rebuild_cfg ? DEPENDENCY_BLOCKS : DEPENDENCY_NOTHING));
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue