mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 02:58:05 +02:00
i965: Make DCE set null destinations on messages with side effects.
(Co-authored by Matt Turner.) Image atomics, for example, return a value - but the shader may not want to use it. We assigned a useless VGRF destination. This seemed harmless, but it can actually be quite harmful. The register allocator has to assign that VGRF to a real register. It may assign the same actual GRF to the destination of an instruction that follows soon after. This results in a write-after-write (WAW) dependency, and stall. A number of "Deus Ex: Mankind Divided" shaders use image atomics, but don't use the return value. Several of these were hitting WAW stalls for nearly 14,000 (poorly estimated) cycles a pop. Making dead code elimination null out the destination avoids this issue. This patch cuts one shader's estimated cycles by -98.39%! Removing the message response should also help with data cluster bandwidth. On Skylake: (instruction counts remain identical) total cycles in shared programs: 255413890 -> 248081010 (-2.87%) cycles in affected programs: 12019948 -> 4687068 (-61.01%) helped: 24 HURT: 10 v2: Make can_omit_write independent of can_eliminate (Curro). Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
parent
90bf39cd2b
commit
9919542f1c
1 changed files with 41 additions and 13 deletions
|
|
@ -34,6 +34,42 @@
|
|||
* yet in the tail end of this block.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Is it safe to eliminate the instruction?
|
||||
*/
|
||||
static bool
|
||||
can_eliminate(const fs_inst *inst, BITSET_WORD *flag_live)
|
||||
{
|
||||
return !inst->is_control_flow() &&
|
||||
!inst->has_side_effects() &&
|
||||
!(flag_live[0] & inst->flags_written()) &&
|
||||
!inst->writes_accumulator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is it safe to omit the write, making the destination ARF null?
|
||||
*/
|
||||
static bool
|
||||
can_omit_write(const fs_inst *inst)
|
||||
{
|
||||
switch (inst->opcode) {
|
||||
case SHADER_OPCODE_UNTYPED_ATOMIC:
|
||||
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
|
||||
case SHADER_OPCODE_TYPED_ATOMIC:
|
||||
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
|
||||
return true;
|
||||
default:
|
||||
/* We can eliminate the destination write for ordinary instructions,
|
||||
* but not most SENDs.
|
||||
*/
|
||||
if (inst->opcode < 128 && inst->mlen == 0)
|
||||
return true;
|
||||
|
||||
/* It might not be safe for other virtual opcodes. */
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::dead_code_eliminate()
|
||||
{
|
||||
|
|
@ -52,29 +88,21 @@ fs_visitor::dead_code_eliminate()
|
|||
sizeof(BITSET_WORD));
|
||||
|
||||
foreach_inst_in_block_reverse_safe(fs_inst, inst, block) {
|
||||
if (inst->dst.file == VGRF && !inst->has_side_effects()) {
|
||||
if (inst->dst.file == VGRF) {
|
||||
const unsigned var = live_intervals->var_from_reg(inst->dst);
|
||||
bool result_live = false;
|
||||
|
||||
for (unsigned i = 0; i < regs_written(inst); i++)
|
||||
result_live |= BITSET_TEST(live, var + i);
|
||||
|
||||
if (!result_live) {
|
||||
if (!result_live &&
|
||||
(can_omit_write(inst) || can_eliminate(inst, flag_live))) {
|
||||
inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
|
||||
progress = true;
|
||||
|
||||
if (inst->writes_accumulator || inst->flags_written()) {
|
||||
inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
|
||||
} else {
|
||||
inst->opcode = BRW_OPCODE_NOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->dst.is_null() &&
|
||||
!inst->is_control_flow() &&
|
||||
!inst->has_side_effects() &&
|
||||
!(flag_live[0] & inst->flags_written()) &&
|
||||
!inst->writes_accumulator) {
|
||||
if (inst->dst.is_null() && can_eliminate(inst, flag_live)) {
|
||||
inst->opcode = BRW_OPCODE_NOP;
|
||||
progress = true;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue