mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 20:10:14 +01:00
i965/vec4: Rewrite dead code elimination to use live in/out.
Improves 359 shaders by >=10%
114 shaders by >=20%
91 shaders by >=30%
82 shaders by >=40%
22 shaders by >=50%
4 shaders by >=60%
2 shaders by >=80%
total instructions in shared programs: 5845346 -> 5822422 (-0.39%)
instructions in affected programs: 364979 -> 342055 (-6.28%)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
7a5cc789de
commit
5df88c2096
3 changed files with 170 additions and 155 deletions
|
|
@ -103,6 +103,7 @@ i965_FILES = \
|
||||||
brw_vec4.cpp \
|
brw_vec4.cpp \
|
||||||
brw_vec4_copy_propagation.cpp \
|
brw_vec4_copy_propagation.cpp \
|
||||||
brw_vec4_cse.cpp \
|
brw_vec4_cse.cpp \
|
||||||
|
brw_vec4_dead_code_eliminate.cpp \
|
||||||
brw_vec4_generator.cpp \
|
brw_vec4_generator.cpp \
|
||||||
brw_vec4_gs_visitor.cpp \
|
brw_vec4_gs_visitor.cpp \
|
||||||
brw_vec4_live_variables.cpp \
|
brw_vec4_live_variables.cpp \
|
||||||
|
|
|
||||||
|
|
@ -429,161 +429,6 @@ vec4_visitor::opt_reduce_swizzle()
|
||||||
return progress;
|
return progress;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
|
||||||
try_eliminate_instruction(vec4_instruction *inst, int new_writemask,
|
|
||||||
const struct brw_context *brw)
|
|
||||||
{
|
|
||||||
if (inst->has_side_effects())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (new_writemask == 0) {
|
|
||||||
/* Don't dead code eliminate instructions that write to the
|
|
||||||
* accumulator as a side-effect. Instead just set the destination
|
|
||||||
* to the null register to free it.
|
|
||||||
*/
|
|
||||||
if (inst->writes_accumulator || inst->writes_flag()) {
|
|
||||||
inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
|
|
||||||
} else {
|
|
||||||
inst->opcode = BRW_OPCODE_NOP;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
} else if (inst->dst.writemask != new_writemask) {
|
|
||||||
switch (inst->opcode) {
|
|
||||||
case SHADER_OPCODE_TXF_CMS:
|
|
||||||
case SHADER_OPCODE_GEN4_SCRATCH_READ:
|
|
||||||
case VS_OPCODE_PULL_CONSTANT_LOAD:
|
|
||||||
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
/* Do not set a writemask on Gen6 for math instructions, those are
|
|
||||||
* executed using align1 mode that does not support a destination mask.
|
|
||||||
*/
|
|
||||||
if (!(brw->gen == 6 && inst->is_math()) && !inst->is_tex()) {
|
|
||||||
inst->dst.writemask = new_writemask;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Must be called after calculate_live_intervals() to remove unused
|
|
||||||
* writes to registers -- register allocation will fail otherwise
|
|
||||||
* because something deffed but not used won't be considered to
|
|
||||||
* interfere with other regs.
|
|
||||||
*/
|
|
||||||
bool
|
|
||||||
vec4_visitor::dead_code_eliminate()
|
|
||||||
{
|
|
||||||
bool progress = false;
|
|
||||||
int pc = -1;
|
|
||||||
|
|
||||||
calculate_live_intervals();
|
|
||||||
|
|
||||||
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
|
|
||||||
pc++;
|
|
||||||
|
|
||||||
bool inst_writes_flag = false;
|
|
||||||
if (inst->dst.file != GRF) {
|
|
||||||
if (inst->dst.is_null() && inst->writes_flag()) {
|
|
||||||
inst_writes_flag = true;
|
|
||||||
} else {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (inst->dst.file == GRF) {
|
|
||||||
int write_mask = inst->dst.writemask;
|
|
||||||
|
|
||||||
for (int c = 0; c < 4; c++) {
|
|
||||||
if (write_mask & (1 << c)) {
|
|
||||||
assert(this->virtual_grf_end[inst->dst.reg * 4 + c] >= pc);
|
|
||||||
if (this->virtual_grf_end[inst->dst.reg * 4 + c] == pc) {
|
|
||||||
write_mask &= ~(1 << c);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
progress = try_eliminate_instruction(inst, write_mask, brw) ||
|
|
||||||
progress;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (inst->predicate || inst->prev == NULL)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
int dead_channels;
|
|
||||||
if (inst_writes_flag) {
|
|
||||||
/* Arbitrarily chosen, other than not being an xyzw writemask. */
|
|
||||||
#define FLAG_WRITEMASK (1 << 5)
|
|
||||||
dead_channels = inst->reads_flag() ? 0 : FLAG_WRITEMASK;
|
|
||||||
} else {
|
|
||||||
dead_channels = inst->dst.writemask;
|
|
||||||
|
|
||||||
for (int i = 0; i < 3; i++) {
|
|
||||||
if (inst->src[i].file != GRF ||
|
|
||||||
inst->src[i].reg != inst->dst.reg)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
for (int j = 0; j < 4; j++) {
|
|
||||||
int swiz = BRW_GET_SWZ(inst->src[i].swizzle, j);
|
|
||||||
dead_channels &= ~(1 << swiz);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst,
|
|
||||||
inst, block) {
|
|
||||||
if (dead_channels == 0)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (inst_writes_flag) {
|
|
||||||
if (scan_inst->dst.is_null() && scan_inst->writes_flag()) {
|
|
||||||
scan_inst->opcode = BRW_OPCODE_NOP;
|
|
||||||
progress = true;
|
|
||||||
continue;
|
|
||||||
} else if (scan_inst->reads_flag()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (inst->dst.file == scan_inst->dst.file &&
|
|
||||||
inst->dst.reg == scan_inst->dst.reg &&
|
|
||||||
inst->dst.reg_offset == scan_inst->dst.reg_offset) {
|
|
||||||
int new_writemask = scan_inst->dst.writemask & ~dead_channels;
|
|
||||||
|
|
||||||
progress = try_eliminate_instruction(scan_inst, new_writemask, brw) ||
|
|
||||||
progress;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < 3; i++) {
|
|
||||||
if (scan_inst->src[i].file != inst->dst.file ||
|
|
||||||
scan_inst->src[i].reg != inst->dst.reg)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
for (int j = 0; j < 4; j++) {
|
|
||||||
int swiz = BRW_GET_SWZ(scan_inst->src[i].swizzle, j);
|
|
||||||
dead_channels &= ~(1 << swiz);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (progress) {
|
|
||||||
foreach_block_and_inst_safe (block, backend_instruction, inst, cfg) {
|
|
||||||
if (inst->opcode == BRW_OPCODE_NOP) {
|
|
||||||
inst->remove(block);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
invalidate_live_intervals();
|
|
||||||
}
|
|
||||||
|
|
||||||
return progress;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
vec4_visitor::split_uniform_registers()
|
vec4_visitor::split_uniform_registers()
|
||||||
{
|
{
|
||||||
|
|
|
||||||
169
src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
Normal file
169
src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
Normal file
|
|
@ -0,0 +1,169 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2014 Intel Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "brw_vec4.h"
|
||||||
|
#include "brw_vec4_live_variables.h"
|
||||||
|
#include "brw_cfg.h"
|
||||||
|
|
||||||
|
/** @file brw_vec4_dead_code_eliminate.cpp
|
||||||
|
*
|
||||||
|
* Dataflow-aware dead code elimination.
|
||||||
|
*
|
||||||
|
* Walks the instruction list from the bottom, removing instructions that
|
||||||
|
* have results that both aren't used in later blocks and haven't been read
|
||||||
|
* yet in the tail end of this block.
|
||||||
|
*/
|
||||||
|
|
||||||
|
using namespace brw;
|
||||||
|
|
||||||
|
static bool
|
||||||
|
can_do_writemask(const struct brw_context *brw,
|
||||||
|
const vec4_instruction *inst)
|
||||||
|
{
|
||||||
|
switch (inst->opcode) {
|
||||||
|
case SHADER_OPCODE_GEN4_SCRATCH_READ:
|
||||||
|
case VS_OPCODE_PULL_CONSTANT_LOAD:
|
||||||
|
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
|
||||||
|
return false;
|
||||||
|
default:
|
||||||
|
/* The MATH instruction on Gen6 only executes in align1 mode, which does
|
||||||
|
* not support writemasking.
|
||||||
|
*/
|
||||||
|
if (brw->gen == 6 && inst->is_math())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (inst->is_tex())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
vec4_visitor::dead_code_eliminate()
|
||||||
|
{
|
||||||
|
bool progress = false;
|
||||||
|
|
||||||
|
calculate_live_intervals();
|
||||||
|
|
||||||
|
int num_vars = live_intervals->num_vars;
|
||||||
|
BITSET_WORD *live = ralloc_array(NULL, BITSET_WORD, BITSET_WORDS(num_vars));
|
||||||
|
BITSET_WORD *flag_live = ralloc_array(NULL, BITSET_WORD, 1);
|
||||||
|
|
||||||
|
foreach_block(block, cfg) {
|
||||||
|
memcpy(live, live_intervals->block_data[block->num].liveout,
|
||||||
|
sizeof(BITSET_WORD) * BITSET_WORDS(num_vars));
|
||||||
|
memcpy(flag_live, live_intervals->block_data[block->num].flag_liveout,
|
||||||
|
sizeof(BITSET_WORD));
|
||||||
|
|
||||||
|
foreach_inst_in_block_reverse(vec4_instruction, inst, block) {
|
||||||
|
if (inst->dst.file == GRF && !inst->has_side_effects()) {
|
||||||
|
bool result_live[4] = { false };
|
||||||
|
|
||||||
|
for (int c = 0; c < 4; c++) {
|
||||||
|
int var = inst->dst.reg * 4 + c;
|
||||||
|
result_live[c] = BITSET_TEST(live, var);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the instruction can't do writemasking, then it's all or
|
||||||
|
* nothing.
|
||||||
|
*/
|
||||||
|
if (!can_do_writemask(brw, inst)) {
|
||||||
|
bool result = result_live[0] | result_live[1] |
|
||||||
|
result_live[2] | result_live[3];
|
||||||
|
result_live[0] = result;
|
||||||
|
result_live[1] = result;
|
||||||
|
result_live[2] = result;
|
||||||
|
result_live[3] = result;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int c = 0; c < 4; c++) {
|
||||||
|
if (!result_live[c] && inst->dst.writemask & (1 << c)) {
|
||||||
|
inst->dst.writemask &= ~(1 << c);
|
||||||
|
progress = true;
|
||||||
|
|
||||||
|
if (inst->dst.writemask == 0) {
|
||||||
|
if (inst->writes_accumulator) {
|
||||||
|
inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
|
||||||
|
} else {
|
||||||
|
inst->opcode = BRW_OPCODE_NOP;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inst->dst.is_null() && inst->writes_flag()) {
|
||||||
|
if (!BITSET_TEST(flag_live, 0)) {
|
||||||
|
inst->opcode = BRW_OPCODE_NOP;
|
||||||
|
progress = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inst->dst.file == GRF && !inst->predicate) {
|
||||||
|
for (int c = 0; c < 4; c++) {
|
||||||
|
if (inst->dst.writemask & (1 << c)) {
|
||||||
|
int var = inst->dst.reg * 4 + c;
|
||||||
|
BITSET_CLEAR(live, var);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inst->writes_flag()) {
|
||||||
|
BITSET_CLEAR(flag_live, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
if (inst->src[i].file == GRF) {
|
||||||
|
for (int c = 0; c < 4; c++) {
|
||||||
|
int swiz = BRW_GET_SWZ(inst->src[i].swizzle, c);
|
||||||
|
int var = inst->src[i].reg * 4 + swiz;
|
||||||
|
|
||||||
|
BITSET_SET(live, var);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inst->reads_flag()) {
|
||||||
|
BITSET_SET(flag_live, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ralloc_free(live);
|
||||||
|
ralloc_free(flag_live);
|
||||||
|
|
||||||
|
if (progress) {
|
||||||
|
foreach_block_and_inst_safe(block, backend_instruction, inst, cfg) {
|
||||||
|
if (inst->opcode == BRW_OPCODE_NOP) {
|
||||||
|
inst->remove(block);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
invalidate_live_intervals();
|
||||||
|
}
|
||||||
|
|
||||||
|
return progress;
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue