i965/vec4: Rewrite dead code elimination to use live in/out.

Improves 359 shaders by >=10%
         114 shaders by >=20%
          91 shaders by >=30%
          82 shaders by >=40%
          22 shaders by >=50%
           4 shaders by >=60%
           2 shaders by >=80%

total instructions in shared programs: 5845346 -> 5822422 (-0.39%)
instructions in affected programs:     364979 -> 342055 (-6.28%)

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Matt Turner 2014-11-02 20:36:53 -08:00
parent 7a5cc789de
commit 5df88c2096
3 changed files with 170 additions and 155 deletions

View file

@ -103,6 +103,7 @@ i965_FILES = \
brw_vec4.cpp \ brw_vec4.cpp \
brw_vec4_copy_propagation.cpp \ brw_vec4_copy_propagation.cpp \
brw_vec4_cse.cpp \ brw_vec4_cse.cpp \
brw_vec4_dead_code_eliminate.cpp \
brw_vec4_generator.cpp \ brw_vec4_generator.cpp \
brw_vec4_gs_visitor.cpp \ brw_vec4_gs_visitor.cpp \
brw_vec4_live_variables.cpp \ brw_vec4_live_variables.cpp \

View file

@ -429,161 +429,6 @@ vec4_visitor::opt_reduce_swizzle()
return progress; return progress;
} }
static bool
try_eliminate_instruction(vec4_instruction *inst, int new_writemask,
const struct brw_context *brw)
{
if (inst->has_side_effects())
return false;
if (new_writemask == 0) {
/* Don't dead code eliminate instructions that write to the
* accumulator as a side-effect. Instead just set the destination
* to the null register to free it.
*/
if (inst->writes_accumulator || inst->writes_flag()) {
inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
} else {
inst->opcode = BRW_OPCODE_NOP;
}
return true;
} else if (inst->dst.writemask != new_writemask) {
switch (inst->opcode) {
case SHADER_OPCODE_TXF_CMS:
case SHADER_OPCODE_GEN4_SCRATCH_READ:
case VS_OPCODE_PULL_CONSTANT_LOAD:
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
break;
default:
/* Do not set a writemask on Gen6 for math instructions, those are
* executed using align1 mode that does not support a destination mask.
*/
if (!(brw->gen == 6 && inst->is_math()) && !inst->is_tex()) {
inst->dst.writemask = new_writemask;
return true;
}
}
}
return false;
}
/**
* Must be called after calculate_live_intervals() to remove unused
* writes to registers -- register allocation will fail otherwise
* because something deffed but not used won't be considered to
* interfere with other regs.
*/
bool
vec4_visitor::dead_code_eliminate()
{
bool progress = false;
int pc = -1;
calculate_live_intervals();
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
pc++;
bool inst_writes_flag = false;
if (inst->dst.file != GRF) {
if (inst->dst.is_null() && inst->writes_flag()) {
inst_writes_flag = true;
} else {
continue;
}
}
if (inst->dst.file == GRF) {
int write_mask = inst->dst.writemask;
for (int c = 0; c < 4; c++) {
if (write_mask & (1 << c)) {
assert(this->virtual_grf_end[inst->dst.reg * 4 + c] >= pc);
if (this->virtual_grf_end[inst->dst.reg * 4 + c] == pc) {
write_mask &= ~(1 << c);
}
}
}
progress = try_eliminate_instruction(inst, write_mask, brw) ||
progress;
}
if (inst->predicate || inst->prev == NULL)
continue;
int dead_channels;
if (inst_writes_flag) {
/* Arbitrarily chosen, other than not being an xyzw writemask. */
#define FLAG_WRITEMASK (1 << 5)
dead_channels = inst->reads_flag() ? 0 : FLAG_WRITEMASK;
} else {
dead_channels = inst->dst.writemask;
for (int i = 0; i < 3; i++) {
if (inst->src[i].file != GRF ||
inst->src[i].reg != inst->dst.reg)
continue;
for (int j = 0; j < 4; j++) {
int swiz = BRW_GET_SWZ(inst->src[i].swizzle, j);
dead_channels &= ~(1 << swiz);
}
}
}
foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst,
inst, block) {
if (dead_channels == 0)
break;
if (inst_writes_flag) {
if (scan_inst->dst.is_null() && scan_inst->writes_flag()) {
scan_inst->opcode = BRW_OPCODE_NOP;
progress = true;
continue;
} else if (scan_inst->reads_flag()) {
break;
}
}
if (inst->dst.file == scan_inst->dst.file &&
inst->dst.reg == scan_inst->dst.reg &&
inst->dst.reg_offset == scan_inst->dst.reg_offset) {
int new_writemask = scan_inst->dst.writemask & ~dead_channels;
progress = try_eliminate_instruction(scan_inst, new_writemask, brw) ||
progress;
}
for (int i = 0; i < 3; i++) {
if (scan_inst->src[i].file != inst->dst.file ||
scan_inst->src[i].reg != inst->dst.reg)
continue;
for (int j = 0; j < 4; j++) {
int swiz = BRW_GET_SWZ(scan_inst->src[i].swizzle, j);
dead_channels &= ~(1 << swiz);
}
}
}
}
if (progress) {
foreach_block_and_inst_safe (block, backend_instruction, inst, cfg) {
if (inst->opcode == BRW_OPCODE_NOP) {
inst->remove(block);
}
}
invalidate_live_intervals();
}
return progress;
}
void void
vec4_visitor::split_uniform_registers() vec4_visitor::split_uniform_registers()
{ {

View file

@ -0,0 +1,169 @@
/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_vec4.h"
#include "brw_vec4_live_variables.h"
#include "brw_cfg.h"
/** @file brw_vec4_dead_code_eliminate.cpp
*
* Dataflow-aware dead code elimination.
*
* Walks the instruction list from the bottom, removing instructions that
* have results that both aren't used in later blocks and haven't been read
* yet in the tail end of this block.
*/
using namespace brw;
static bool
can_do_writemask(const struct brw_context *brw,
const vec4_instruction *inst)
{
switch (inst->opcode) {
case SHADER_OPCODE_GEN4_SCRATCH_READ:
case VS_OPCODE_PULL_CONSTANT_LOAD:
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
return false;
default:
/* The MATH instruction on Gen6 only executes in align1 mode, which does
* not support writemasking.
*/
if (brw->gen == 6 && inst->is_math())
return false;
if (inst->is_tex())
return false;
return true;
}
}
bool
vec4_visitor::dead_code_eliminate()
{
bool progress = false;
calculate_live_intervals();
int num_vars = live_intervals->num_vars;
BITSET_WORD *live = ralloc_array(NULL, BITSET_WORD, BITSET_WORDS(num_vars));
BITSET_WORD *flag_live = ralloc_array(NULL, BITSET_WORD, 1);
foreach_block(block, cfg) {
memcpy(live, live_intervals->block_data[block->num].liveout,
sizeof(BITSET_WORD) * BITSET_WORDS(num_vars));
memcpy(flag_live, live_intervals->block_data[block->num].flag_liveout,
sizeof(BITSET_WORD));
foreach_inst_in_block_reverse(vec4_instruction, inst, block) {
if (inst->dst.file == GRF && !inst->has_side_effects()) {
bool result_live[4] = { false };
for (int c = 0; c < 4; c++) {
int var = inst->dst.reg * 4 + c;
result_live[c] = BITSET_TEST(live, var);
}
/* If the instruction can't do writemasking, then it's all or
* nothing.
*/
if (!can_do_writemask(brw, inst)) {
bool result = result_live[0] | result_live[1] |
result_live[2] | result_live[3];
result_live[0] = result;
result_live[1] = result;
result_live[2] = result;
result_live[3] = result;
}
for (int c = 0; c < 4; c++) {
if (!result_live[c] && inst->dst.writemask & (1 << c)) {
inst->dst.writemask &= ~(1 << c);
progress = true;
if (inst->dst.writemask == 0) {
if (inst->writes_accumulator) {
inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
} else {
inst->opcode = BRW_OPCODE_NOP;
continue;
}
}
}
}
}
if (inst->dst.is_null() && inst->writes_flag()) {
if (!BITSET_TEST(flag_live, 0)) {
inst->opcode = BRW_OPCODE_NOP;
progress = true;
continue;
}
}
if (inst->dst.file == GRF && !inst->predicate) {
for (int c = 0; c < 4; c++) {
if (inst->dst.writemask & (1 << c)) {
int var = inst->dst.reg * 4 + c;
BITSET_CLEAR(live, var);
}
}
}
if (inst->writes_flag()) {
BITSET_CLEAR(flag_live, 0);
}
for (int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
for (int c = 0; c < 4; c++) {
int swiz = BRW_GET_SWZ(inst->src[i].swizzle, c);
int var = inst->src[i].reg * 4 + swiz;
BITSET_SET(live, var);
}
}
}
if (inst->reads_flag()) {
BITSET_SET(flag_live, 0);
}
}
}
ralloc_free(live);
ralloc_free(flag_live);
if (progress) {
foreach_block_and_inst_safe(block, backend_instruction, inst, cfg) {
if (inst->opcode == BRW_OPCODE_NOP) {
inst->remove(block);
}
}
invalidate_live_intervals();
}
return progress;
}