mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 09:38:07 +02:00
i965/fs: Move constant propagation to the same codebase as copy prop.
This means that we don't get constant prop across into the first block after a BRW_OPCODE_IF or a BRW_OPCODE_DO, but we have hope for properly doing it across control flow at some point. More importantly, with the next commit it will help avoid O(n^2) with instruction count runtime for shaders that have many constant moves. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
098acf6c84
commit
fb5bf03a20
3 changed files with 124 additions and 165 deletions
|
|
@ -1249,168 +1249,6 @@ fs_visitor::setup_pull_constants()
|
|||
c->prog_data.nr_pull_params = pull_uniform_count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to move immediate constants into the immediate
|
||||
* constant slot of following instructions.
|
||||
*
|
||||
* Immediate constants are a bit tricky -- they have to be in the last
|
||||
* operand slot, you can't do abs/negate on them,
|
||||
*/
|
||||
|
||||
bool
|
||||
fs_visitor::propagate_constants()
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
calculate_live_intervals();
|
||||
|
||||
foreach_list(node, &this->instructions) {
|
||||
fs_inst *inst = (fs_inst *)node;
|
||||
|
||||
if (inst->opcode != BRW_OPCODE_MOV ||
|
||||
inst->predicated ||
|
||||
inst->dst.file != GRF || inst->src[0].file != IMM ||
|
||||
inst->dst.type != inst->src[0].type ||
|
||||
(c->dispatch_width == 16 &&
|
||||
(inst->force_uncompressed || inst->force_sechalf)))
|
||||
continue;
|
||||
|
||||
/* Don't bother with cases where we should have had the
|
||||
* operation on the constant folded in GLSL already.
|
||||
*/
|
||||
if (inst->saturate)
|
||||
continue;
|
||||
|
||||
/* Found a move of a constant to a GRF. Find anything else using the GRF
|
||||
* before it's written, and replace it with the constant if we can.
|
||||
*/
|
||||
for (fs_inst *scan_inst = (fs_inst *)inst->next;
|
||||
!scan_inst->is_tail_sentinel();
|
||||
scan_inst = (fs_inst *)scan_inst->next) {
|
||||
if (scan_inst->opcode == BRW_OPCODE_DO ||
|
||||
scan_inst->opcode == BRW_OPCODE_WHILE ||
|
||||
scan_inst->opcode == BRW_OPCODE_ELSE ||
|
||||
scan_inst->opcode == BRW_OPCODE_ENDIF) {
|
||||
break;
|
||||
}
|
||||
|
||||
for (int i = 2; i >= 0; i--) {
|
||||
if (scan_inst->src[i].file != GRF ||
|
||||
scan_inst->src[i].reg != inst->dst.reg ||
|
||||
scan_inst->src[i].reg_offset != inst->dst.reg_offset)
|
||||
continue;
|
||||
|
||||
/* Don't bother with cases where we should have had the
|
||||
* operation on the constant folded in GLSL already.
|
||||
*/
|
||||
if (scan_inst->src[i].negate || scan_inst->src[i].abs)
|
||||
continue;
|
||||
|
||||
switch (scan_inst->opcode) {
|
||||
case BRW_OPCODE_MOV:
|
||||
scan_inst->src[i] = inst->src[0];
|
||||
progress = true;
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_MUL:
|
||||
case BRW_OPCODE_ADD:
|
||||
if (i == 1) {
|
||||
scan_inst->src[i] = inst->src[0];
|
||||
progress = true;
|
||||
} else if (i == 0 && scan_inst->src[1].file != IMM) {
|
||||
/* Fit this constant in by commuting the operands.
|
||||
* Exception: we can't do this for 32-bit integer MUL
|
||||
* because it's asymmetric.
|
||||
*/
|
||||
if (scan_inst->opcode == BRW_OPCODE_MUL &&
|
||||
(scan_inst->src[1].type == BRW_REGISTER_TYPE_D ||
|
||||
scan_inst->src[1].type == BRW_REGISTER_TYPE_UD))
|
||||
break;
|
||||
scan_inst->src[0] = scan_inst->src[1];
|
||||
scan_inst->src[1] = inst->src[0];
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_CMP:
|
||||
case BRW_OPCODE_IF:
|
||||
if (i == 1) {
|
||||
scan_inst->src[i] = inst->src[0];
|
||||
progress = true;
|
||||
} else if (i == 0 && scan_inst->src[1].file != IMM) {
|
||||
uint32_t new_cmod;
|
||||
|
||||
new_cmod = brw_swap_cmod(scan_inst->conditional_mod);
|
||||
if (new_cmod != ~0u) {
|
||||
/* Fit this constant in by swapping the operands and
|
||||
* flipping the test
|
||||
*/
|
||||
scan_inst->src[0] = scan_inst->src[1];
|
||||
scan_inst->src[1] = inst->src[0];
|
||||
scan_inst->conditional_mod = new_cmod;
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_SEL:
|
||||
if (i == 1) {
|
||||
scan_inst->src[i] = inst->src[0];
|
||||
progress = true;
|
||||
} else if (i == 0 && scan_inst->src[1].file != IMM) {
|
||||
scan_inst->src[0] = scan_inst->src[1];
|
||||
scan_inst->src[1] = inst->src[0];
|
||||
|
||||
/* If this was predicated, flipping operands means
|
||||
* we also need to flip the predicate.
|
||||
*/
|
||||
if (scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) {
|
||||
scan_inst->predicate_inverse =
|
||||
!scan_inst->predicate_inverse;
|
||||
}
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_RCP:
|
||||
/* The hardware doesn't do math on immediate values
|
||||
* (because why are you doing that, seriously?), but
|
||||
* the correct answer is to just constant fold it
|
||||
* anyway.
|
||||
*/
|
||||
assert(i == 0);
|
||||
if (inst->src[0].imm.f != 0.0f) {
|
||||
scan_inst->opcode = BRW_OPCODE_MOV;
|
||||
scan_inst->src[0] = inst->src[0];
|
||||
scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f;
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case FS_OPCODE_PULL_CONSTANT_LOAD:
|
||||
scan_inst->src[i] = inst->src[0];
|
||||
progress = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (scan_inst->dst.file == GRF &&
|
||||
scan_inst->overwrites_reg(inst->dst)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (progress)
|
||||
this->live_intervals_valid = false;
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
fs_visitor::opt_algebraic()
|
||||
{
|
||||
|
|
@ -2025,7 +1863,6 @@ fs_visitor::run()
|
|||
|
||||
progress = remove_duplicate_mrf_writes() || progress;
|
||||
|
||||
progress = propagate_constants() || progress;
|
||||
progress = opt_algebraic() || progress;
|
||||
progress = opt_cse() || progress;
|
||||
progress = opt_copy_propagate() || progress;
|
||||
|
|
|
|||
|
|
@ -240,12 +240,12 @@ public:
|
|||
void split_virtual_grfs();
|
||||
void setup_pull_constants();
|
||||
void calculate_live_intervals();
|
||||
bool propagate_constants();
|
||||
bool opt_algebraic();
|
||||
bool opt_cse();
|
||||
bool opt_cse_local(fs_bblock *block, exec_list *aeb);
|
||||
bool opt_copy_propagate();
|
||||
bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
|
||||
bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
|
||||
bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block,
|
||||
exec_list *acp);
|
||||
bool register_coalesce();
|
||||
|
|
|
|||
|
|
@ -34,6 +34,9 @@ struct acp_entry : public exec_node {
|
|||
bool
|
||||
fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
|
||||
{
|
||||
if (entry->src.file == IMM)
|
||||
return false;
|
||||
|
||||
if (inst->src[arg].file != entry->dst.file ||
|
||||
inst->src[arg].reg != entry->dst.reg ||
|
||||
inst->src[arg].reg_offset != entry->dst.reg_offset) {
|
||||
|
|
@ -64,6 +67,121 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
|
|||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
if (entry->src.file != IMM)
|
||||
return false;
|
||||
|
||||
for (int i = 2; i >= 0; i--) {
|
||||
if (inst->src[i].file != entry->dst.file ||
|
||||
inst->src[i].reg != entry->dst.reg ||
|
||||
inst->src[i].reg_offset != entry->dst.reg_offset)
|
||||
continue;
|
||||
|
||||
/* Don't bother with cases that should have been taken care of by the
|
||||
* GLSL compiler's constant folding pass.
|
||||
*/
|
||||
if (inst->src[i].negate || inst->src[i].abs)
|
||||
continue;
|
||||
|
||||
switch (inst->opcode) {
|
||||
case BRW_OPCODE_MOV:
|
||||
inst->src[i] = entry->src;
|
||||
progress = true;
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_MUL:
|
||||
case BRW_OPCODE_ADD:
|
||||
if (i == 1) {
|
||||
inst->src[i] = entry->src;
|
||||
progress = true;
|
||||
} else if (i == 0 && inst->src[1].file != IMM) {
|
||||
/* Fit this constant in by commuting the operands.
|
||||
* Exception: we can't do this for 32-bit integer MUL
|
||||
* because it's asymmetric.
|
||||
*/
|
||||
if (inst->opcode == BRW_OPCODE_MUL &&
|
||||
(inst->src[1].type == BRW_REGISTER_TYPE_D ||
|
||||
inst->src[1].type == BRW_REGISTER_TYPE_UD))
|
||||
break;
|
||||
inst->src[0] = inst->src[1];
|
||||
inst->src[1] = entry->src;
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_CMP:
|
||||
case BRW_OPCODE_IF:
|
||||
if (i == 1) {
|
||||
inst->src[i] = entry->src;
|
||||
progress = true;
|
||||
} else if (i == 0 && inst->src[1].file != IMM) {
|
||||
uint32_t new_cmod;
|
||||
|
||||
new_cmod = brw_swap_cmod(inst->conditional_mod);
|
||||
if (new_cmod != ~0u) {
|
||||
/* Fit this constant in by swapping the operands and
|
||||
* flipping the test
|
||||
*/
|
||||
inst->src[0] = inst->src[1];
|
||||
inst->src[1] = entry->src;
|
||||
inst->conditional_mod = new_cmod;
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_SEL:
|
||||
if (i == 1) {
|
||||
inst->src[i] = entry->src;
|
||||
progress = true;
|
||||
} else if (i == 0 && inst->src[1].file != IMM) {
|
||||
inst->src[0] = inst->src[1];
|
||||
inst->src[1] = entry->src;
|
||||
|
||||
/* If this was predicated, flipping operands means
|
||||
* we also need to flip the predicate.
|
||||
*/
|
||||
if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
|
||||
inst->predicate_inverse =
|
||||
!inst->predicate_inverse;
|
||||
}
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_RCP:
|
||||
/* The hardware doesn't do math on immediate values
|
||||
* (because why are you doing that, seriously?), but
|
||||
* the correct answer is to just constant fold it
|
||||
* anyway.
|
||||
*/
|
||||
assert(i == 0);
|
||||
if (inst->src[0].imm.f != 0.0f) {
|
||||
inst->opcode = BRW_OPCODE_MOV;
|
||||
inst->src[0] = entry->src;
|
||||
inst->src[0].imm.f = 1.0f / inst->src[0].imm.f;
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case FS_OPCODE_PULL_CONSTANT_LOAD:
|
||||
inst->src[i] = entry->src;
|
||||
progress = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
/** @file brw_fs_copy_propagation.cpp
|
||||
*
|
||||
* Support for local copy propagation by walking the list of instructions
|
||||
|
|
@ -90,6 +208,9 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
|
|||
foreach_list(entry_node, acp) {
|
||||
acp_entry *entry = (acp_entry *)entry_node;
|
||||
|
||||
if (try_constant_propagate(inst, entry))
|
||||
progress = true;
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (try_copy_propagate(inst, i, entry))
|
||||
progress = true;
|
||||
|
|
@ -114,7 +235,8 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
|
|||
((inst->src[0].file == GRF &&
|
||||
(inst->src[0].reg != inst->dst.reg ||
|
||||
inst->src[0].reg_offset != inst->dst.reg_offset)) ||
|
||||
inst->src[0].file == UNIFORM) &&
|
||||
inst->src[0].file == UNIFORM ||
|
||||
inst->src[0].file == IMM) &&
|
||||
inst->src[0].type == inst->dst.type &&
|
||||
!inst->saturate &&
|
||||
!inst->predicated &&
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue