i965/fs: Reimplement dead_code_elimination().

total instructions in shared programs: 1653399 -> 1651790 (-0.10%)
instructions in affected programs:     92157 -> 90548 (-1.75%)
GAINED:                                2
LOST:                                  2

Also significantly reduces the number of optimization loop iterations:

total loop iterations in shared programs: 39724 -> 31651 (-20.32%)
loop iterations in affected programs:     21617 -> 13544 (-37.35%)

Including some great pathological cases, like 29 -> 3 in Strike Suit
Zero and 24 -> 3 in Dota2.

Reviewed-by: Eric Anholt <eric@anholt.net>
This commit is contained in:
Matt Turner 2014-04-12 17:40:18 -07:00
parent 596737ee91
commit f34f39330b
3 changed files with 117 additions and 56 deletions

View file

@ -58,6 +58,7 @@ i965_FILES = \
brw_fs_channel_expressions.cpp \
brw_fs_copy_propagation.cpp \
brw_fs_cse.cpp \
brw_fs_dead_code_eliminate.cpp \
brw_fs_fp.cpp \
brw_fs_generator.cpp \
brw_fs_live_variables.cpp \

View file

@ -2085,61 +2085,6 @@ fs_visitor::opt_algebraic()
return progress;
}
/**
* Removes any instructions writing a VGRF where that VGRF is not used by any
* later instruction.
*/
bool
fs_visitor::dead_code_eliminate()
{
bool progress = false;
int pc = 0;
calculate_live_intervals();
foreach_list_safe(node, &this->instructions) {
fs_inst *inst = (fs_inst *)node;
if (inst->dst.file == GRF && !inst->has_side_effects()) {
bool dead = true;
for (int i = 0; i < inst->regs_written; i++) {
int var = live_intervals->var_from_vgrf[inst->dst.reg];
assert(live_intervals->end[var + inst->dst.reg_offset + i] >= pc);
if (live_intervals->end[var + inst->dst.reg_offset + i] != pc) {
dead = false;
break;
}
}
if (dead) {
/* Don't dead code eliminate instructions that write to the
* accumulator as a side-effect. Instead just set the destination
* to the null register to free it.
*/
switch (inst->opcode) {
case BRW_OPCODE_ADDC:
case BRW_OPCODE_SUBB:
case BRW_OPCODE_MACH:
inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
break;
default:
inst->remove();
progress = true;
break;
}
}
}
pc++;
}
if (progress)
invalidate_live_intervals();
return progress;
}
struct dead_code_hash_key
{
int vgrf;
@ -3249,8 +3194,8 @@ fs_visitor::run()
progress = opt_cse() || progress;
progress = opt_copy_propagate() || progress;
progress = opt_peephole_predicated_break() || progress;
progress = dead_code_eliminate() || progress;
progress = dead_code_eliminate_local() || progress;
progress = dead_code_eliminate() || progress;
progress = opt_peephole_sel() || progress;
progress = dead_control_flow_eliminate(this) || progress;
progress = opt_saturate_propagation() || progress;

View file

@ -0,0 +1,115 @@
/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_fs.h"
#include "brw_fs_live_variables.h"
#include "brw_cfg.h"
/** @file brw_fs_dead_code_eliminate.cpp
*
* Dataflow-aware dead code elimination.
*
* Walks the instruction list from the bottom, removing instructions that
* have results that both aren't used in later blocks and haven't been read
* yet in the tail end of this block.
*/
bool
fs_visitor::dead_code_eliminate()
{
bool progress = false;
cfg_t cfg(&instructions);
calculate_live_intervals();
int num_vars = live_intervals->num_vars;
BITSET_WORD *live = ralloc_array(NULL, BITSET_WORD, BITSET_WORDS(num_vars));
for (int b = 0; b < cfg.num_blocks; b++) {
bblock_t *block = cfg.blocks[b];
memcpy(live, live_intervals->bd[b].liveout,
sizeof(BITSET_WORD) * BITSET_WORDS(num_vars));
for (fs_inst *inst = (fs_inst *)block->end;
inst != block->start->prev;
inst = (fs_inst *)inst->prev) {
if (inst->dst.file == GRF &&
!inst->has_side_effects() &&
!inst->writes_flag()) {
bool result_live = false;
if (inst->regs_written == 1) {
int var = live_intervals->var_from_reg(&inst->dst);
result_live = BITSET_TEST(live, var);
} else {
int var = live_intervals->var_from_vgrf[inst->dst.reg];
for (int i = 0; i < inst->regs_written; i++) {
result_live = result_live || BITSET_TEST(live, var + i);
}
}
if (!result_live) {
progress = true;
switch (inst->opcode) {
case BRW_OPCODE_ADDC:
case BRW_OPCODE_SUBB:
case BRW_OPCODE_MACH:
inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
break;
default:
inst->opcode = BRW_OPCODE_NOP;
continue;
}
}
}
for (int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
int var = live_intervals->var_from_vgrf[inst->src[i].reg];
for (int j = 0; j < inst->regs_read(this, i); j++) {
BITSET_SET(live, var + inst->src[i].reg_offset + j);
}
}
}
}
}
ralloc_free(live);
if (progress) {
foreach_list_safe(node, &this->instructions) {
fs_inst *inst = (fs_inst *)node;
if (inst->opcode == BRW_OPCODE_NOP) {
inst->remove();
}
}
invalidate_live_intervals();
}
return progress;
}