i965/fs: Try to avoid generating extra MOVs to do saturates.

This change (before the previous two) produced a .23% +/- .11%
performance improvement in Unigine Tropics at 1024x768 on IVB.

Total instructions: 269270 -> 262649
614/2148 programs affected (28.6%)
179386 -> 172765 instructions in affected programs (3.7% reduction)

v2: Move some of the logic of finding the instruction that produced
    the result of an expression tree to a helper.
This commit is contained in:
Eric Anholt 2012-03-10 13:48:42 -08:00
parent 3bdccbc3e0
commit 32ae8d3b32
3 changed files with 54 additions and 12 deletions

View file

@ -1717,6 +1717,35 @@ fs_visitor::virtual_grf_interferes(int a, int b)
return start < end;
}
/**
* Possibly returns an instruction that set up @param reg.
*
* Sometimes we want to take the result of some expression/variable
* dereference tree and rewrite the instruction generating the result
* of the tree. When processing the tree, we know that the
* instructions generated are all writing temporaries that are dead
* outside of this tree. So, if we have some instructions that write
* a temporary, we're free to point that temp write somewhere else.
*
* Note that this doesn't guarantee that the instruction generated
* only reg -- it might be the size=4 destination of a texture instruction.
*/
fs_inst *
fs_visitor::get_instruction_generating_reg(fs_inst *start,
fs_inst *end,
fs_reg reg)
{
if (end == start ||
end->predicated ||
end->force_uncompressed ||
end->force_sechalf ||
!reg.equals(&end->dst)) {
return NULL;
} else {
return end;
}
}
bool
fs_visitor::run()
{

View file

@ -487,6 +487,9 @@ public:
}
int type_size(const struct glsl_type *type);
fs_inst *get_instruction_generating_reg(fs_inst *start,
fs_inst *end,
fs_reg reg);
bool run();
void setup_paramvalues_refs();

View file

@ -172,12 +172,25 @@ fs_visitor::try_emit_saturate(ir_expression *ir)
if (!sat_val)
return false;
fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail();
sat_val->accept(this);
fs_reg src = this->result;
this->result = fs_reg(this, ir->type);
fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src);
inst->saturate = true;
fs_inst *last_inst = (fs_inst *) this->instructions.get_tail();
/* If the last instruction from our accept() didn't generate our
* src, generate a saturated MOV
*/
fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
if (!modify || modify->regs_written() != 1) {
fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src);
inst->saturate = true;
} else {
modify->saturate = true;
this->result = src;
}
return true;
}
@ -591,9 +604,6 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
fs_inst *pre_rhs_inst,
fs_inst *last_rhs_inst)
{
if (pre_rhs_inst == last_rhs_inst)
return false; /* No instructions generated to work with. */
/* Only attempt if we're doing a direct assignment. */
if (ir->condition ||
!(ir->lhs->type->is_scalar() ||
@ -602,20 +612,20 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
return false;
/* Make sure the last instruction generated our source reg. */
if (last_rhs_inst->predicated ||
last_rhs_inst->force_uncompressed ||
last_rhs_inst->force_sechalf ||
!src.equals(&last_rhs_inst->dst))
fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst,
last_rhs_inst,
src);
if (!modify)
return false;
/* If last_rhs_inst wrote a different number of components than our LHS,
* we can't safely rewrite it.
*/
if (ir->lhs->type->vector_elements != last_rhs_inst->regs_written())
if (ir->lhs->type->vector_elements != modify->regs_written())
return false;
/* Success! Rewrite the instruction. */
last_rhs_inst->dst = dst;
modify->dst = dst;
return true;
}