i965/fs: Move constant propagation to the same codebase as copy prop.

This means that we don't get constant prop across into the first block after a
BRW_OPCODE_IF or a BRW_OPCODE_DO, but we have hope for properly doing it
across control flow at some point.  More importantly, with the next commit it
will help avoid O(n^2) with instruction count runtime for shaders that have
many constant moves.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Eric Anholt 2012-09-21 13:11:54 +02:00
parent 098acf6c84
commit fb5bf03a20
3 changed files with 124 additions and 165 deletions

View file

@ -1249,168 +1249,6 @@ fs_visitor::setup_pull_constants()
c->prog_data.nr_pull_params = pull_uniform_count;
}
/**
* Attempts to move immediate constants into the immediate
* constant slot of following instructions.
*
* Immediate constants are a bit tricky -- they have to be in the last
* operand slot, you can't do abs/negate on them,
*/
bool
fs_visitor::propagate_constants()
{
bool progress = false;
calculate_live_intervals();
foreach_list(node, &this->instructions) {
fs_inst *inst = (fs_inst *)node;
if (inst->opcode != BRW_OPCODE_MOV ||
inst->predicated ||
inst->dst.file != GRF || inst->src[0].file != IMM ||
inst->dst.type != inst->src[0].type ||
(c->dispatch_width == 16 &&
(inst->force_uncompressed || inst->force_sechalf)))
continue;
/* Don't bother with cases where we should have had the
* operation on the constant folded in GLSL already.
*/
if (inst->saturate)
continue;
/* Found a move of a constant to a GRF. Find anything else using the GRF
* before it's written, and replace it with the constant if we can.
*/
for (fs_inst *scan_inst = (fs_inst *)inst->next;
!scan_inst->is_tail_sentinel();
scan_inst = (fs_inst *)scan_inst->next) {
if (scan_inst->opcode == BRW_OPCODE_DO ||
scan_inst->opcode == BRW_OPCODE_WHILE ||
scan_inst->opcode == BRW_OPCODE_ELSE ||
scan_inst->opcode == BRW_OPCODE_ENDIF) {
break;
}
for (int i = 2; i >= 0; i--) {
if (scan_inst->src[i].file != GRF ||
scan_inst->src[i].reg != inst->dst.reg ||
scan_inst->src[i].reg_offset != inst->dst.reg_offset)
continue;
/* Don't bother with cases where we should have had the
* operation on the constant folded in GLSL already.
*/
if (scan_inst->src[i].negate || scan_inst->src[i].abs)
continue;
switch (scan_inst->opcode) {
case BRW_OPCODE_MOV:
scan_inst->src[i] = inst->src[0];
progress = true;
break;
case BRW_OPCODE_MUL:
case BRW_OPCODE_ADD:
if (i == 1) {
scan_inst->src[i] = inst->src[0];
progress = true;
} else if (i == 0 && scan_inst->src[1].file != IMM) {
/* Fit this constant in by commuting the operands.
* Exception: we can't do this for 32-bit integer MUL
* because it's asymmetric.
*/
if (scan_inst->opcode == BRW_OPCODE_MUL &&
(scan_inst->src[1].type == BRW_REGISTER_TYPE_D ||
scan_inst->src[1].type == BRW_REGISTER_TYPE_UD))
break;
scan_inst->src[0] = scan_inst->src[1];
scan_inst->src[1] = inst->src[0];
progress = true;
}
break;
case BRW_OPCODE_CMP:
case BRW_OPCODE_IF:
if (i == 1) {
scan_inst->src[i] = inst->src[0];
progress = true;
} else if (i == 0 && scan_inst->src[1].file != IMM) {
uint32_t new_cmod;
new_cmod = brw_swap_cmod(scan_inst->conditional_mod);
if (new_cmod != ~0u) {
/* Fit this constant in by swapping the operands and
* flipping the test
*/
scan_inst->src[0] = scan_inst->src[1];
scan_inst->src[1] = inst->src[0];
scan_inst->conditional_mod = new_cmod;
progress = true;
}
}
break;
case BRW_OPCODE_SEL:
if (i == 1) {
scan_inst->src[i] = inst->src[0];
progress = true;
} else if (i == 0 && scan_inst->src[1].file != IMM) {
scan_inst->src[0] = scan_inst->src[1];
scan_inst->src[1] = inst->src[0];
/* If this was predicated, flipping operands means
* we also need to flip the predicate.
*/
if (scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) {
scan_inst->predicate_inverse =
!scan_inst->predicate_inverse;
}
progress = true;
}
break;
case SHADER_OPCODE_RCP:
/* The hardware doesn't do math on immediate values
* (because why are you doing that, seriously?), but
* the correct answer is to just constant fold it
* anyway.
*/
assert(i == 0);
if (inst->src[0].imm.f != 0.0f) {
scan_inst->opcode = BRW_OPCODE_MOV;
scan_inst->src[0] = inst->src[0];
scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f;
progress = true;
}
break;
case FS_OPCODE_PULL_CONSTANT_LOAD:
scan_inst->src[i] = inst->src[0];
progress = true;
break;
default:
break;
}
}
if (scan_inst->dst.file == GRF &&
scan_inst->overwrites_reg(inst->dst)) {
break;
}
}
}
if (progress)
this->live_intervals_valid = false;
return progress;
}
bool
fs_visitor::opt_algebraic()
{
@ -2025,7 +1863,6 @@ fs_visitor::run()
progress = remove_duplicate_mrf_writes() || progress;
progress = propagate_constants() || progress;
progress = opt_algebraic() || progress;
progress = opt_cse() || progress;
progress = opt_copy_propagate() || progress;

View file

@ -240,12 +240,12 @@ public:
void split_virtual_grfs();
void setup_pull_constants();
void calculate_live_intervals();
bool propagate_constants();
bool opt_algebraic();
bool opt_cse();
bool opt_cse_local(fs_bblock *block, exec_list *aeb);
bool opt_copy_propagate();
bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block,
exec_list *acp);
bool register_coalesce();

View file

@ -34,6 +34,9 @@ struct acp_entry : public exec_node {
bool
fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
{
if (entry->src.file == IMM)
return false;
if (inst->src[arg].file != entry->dst.file ||
inst->src[arg].reg != entry->dst.reg ||
inst->src[arg].reg_offset != entry->dst.reg_offset) {
@ -64,6 +67,121 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
return true;
}
bool
fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
{
bool progress = false;
if (entry->src.file != IMM)
return false;
for (int i = 2; i >= 0; i--) {
if (inst->src[i].file != entry->dst.file ||
inst->src[i].reg != entry->dst.reg ||
inst->src[i].reg_offset != entry->dst.reg_offset)
continue;
/* Don't bother with cases that should have been taken care of by the
* GLSL compiler's constant folding pass.
*/
if (inst->src[i].negate || inst->src[i].abs)
continue;
switch (inst->opcode) {
case BRW_OPCODE_MOV:
inst->src[i] = entry->src;
progress = true;
break;
case BRW_OPCODE_MUL:
case BRW_OPCODE_ADD:
if (i == 1) {
inst->src[i] = entry->src;
progress = true;
} else if (i == 0 && inst->src[1].file != IMM) {
/* Fit this constant in by commuting the operands.
* Exception: we can't do this for 32-bit integer MUL
* because it's asymmetric.
*/
if (inst->opcode == BRW_OPCODE_MUL &&
(inst->src[1].type == BRW_REGISTER_TYPE_D ||
inst->src[1].type == BRW_REGISTER_TYPE_UD))
break;
inst->src[0] = inst->src[1];
inst->src[1] = entry->src;
progress = true;
}
break;
case BRW_OPCODE_CMP:
case BRW_OPCODE_IF:
if (i == 1) {
inst->src[i] = entry->src;
progress = true;
} else if (i == 0 && inst->src[1].file != IMM) {
uint32_t new_cmod;
new_cmod = brw_swap_cmod(inst->conditional_mod);
if (new_cmod != ~0u) {
/* Fit this constant in by swapping the operands and
* flipping the test
*/
inst->src[0] = inst->src[1];
inst->src[1] = entry->src;
inst->conditional_mod = new_cmod;
progress = true;
}
}
break;
case BRW_OPCODE_SEL:
if (i == 1) {
inst->src[i] = entry->src;
progress = true;
} else if (i == 0 && inst->src[1].file != IMM) {
inst->src[0] = inst->src[1];
inst->src[1] = entry->src;
/* If this was predicated, flipping operands means
* we also need to flip the predicate.
*/
if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
inst->predicate_inverse =
!inst->predicate_inverse;
}
progress = true;
}
break;
case SHADER_OPCODE_RCP:
/* The hardware doesn't do math on immediate values
* (because why are you doing that, seriously?), but
* the correct answer is to just constant fold it
* anyway.
*/
assert(i == 0);
if (inst->src[0].imm.f != 0.0f) {
inst->opcode = BRW_OPCODE_MOV;
inst->src[0] = entry->src;
inst->src[0].imm.f = 1.0f / inst->src[0].imm.f;
progress = true;
}
break;
case FS_OPCODE_PULL_CONSTANT_LOAD:
inst->src[i] = entry->src;
progress = true;
break;
default:
break;
}
}
return progress;
}
/** @file brw_fs_copy_propagation.cpp
*
* Support for local copy propagation by walking the list of instructions
@ -90,6 +208,9 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
foreach_list(entry_node, acp) {
acp_entry *entry = (acp_entry *)entry_node;
if (try_constant_propagate(inst, entry))
progress = true;
for (int i = 0; i < 3; i++) {
if (try_copy_propagate(inst, i, entry))
progress = true;
@ -114,7 +235,8 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
((inst->src[0].file == GRF &&
(inst->src[0].reg != inst->dst.reg ||
inst->src[0].reg_offset != inst->dst.reg_offset)) ||
inst->src[0].file == UNIFORM) &&
inst->src[0].file == UNIFORM ||
inst->src[0].file == IMM) &&
inst->src[0].type == inst->dst.type &&
!inst->saturate &&
!inst->predicated &&