mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 20:00:11 +01:00
i965: Generalize VS compute-to-MRF for compute-to-another-GRF, too.
No statistically significant performance difference on glbenchmark 2.7 (n=60). It reduces cycles spent in the vertex shader by 3.3% +/- 0.8% (n=5), but that's only about .3% of all cycles spent according to the fixed shader_time. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
471af25fc5
commit
c9e48e5b08
3 changed files with 127 additions and 60 deletions
|
|
@ -680,12 +680,12 @@ vec4_instruction::reswizzle_dst(int dst_writemask, int swizzle)
|
|||
}
|
||||
|
||||
/*
|
||||
* Tries to reduce extra MOV instructions by taking GRFs that get just
|
||||
* written and then MOVed into an MRF and making the original write of
|
||||
* the GRF write directly to the MRF instead.
|
||||
* Tries to reduce extra MOV instructions by taking temporary GRFs that get
|
||||
* just written and then MOVed into another reg and making the original write
|
||||
* of the GRF write directly to the final destination instead.
|
||||
*/
|
||||
bool
|
||||
vec4_visitor::opt_compute_to_mrf()
|
||||
vec4_visitor::opt_register_coalesce()
|
||||
{
|
||||
bool progress = false;
|
||||
int next_ip = 0;
|
||||
|
|
@ -699,24 +699,25 @@ vec4_visitor::opt_compute_to_mrf()
|
|||
next_ip++;
|
||||
|
||||
if (inst->opcode != BRW_OPCODE_MOV ||
|
||||
(inst->dst.file != GRF && inst->dst.file != MRF) ||
|
||||
inst->predicate ||
|
||||
inst->dst.file != MRF || inst->src[0].file != GRF ||
|
||||
inst->src[0].file != GRF ||
|
||||
inst->dst.type != inst->src[0].type ||
|
||||
inst->src[0].abs || inst->src[0].negate || inst->src[0].reladdr)
|
||||
continue;
|
||||
|
||||
int mrf = inst->dst.reg;
|
||||
bool to_mrf = (inst->dst.file == MRF);
|
||||
|
||||
/* Can't compute-to-MRF this GRF if someone else was going to
|
||||
/* Can't coalesce this GRF if someone else was going to
|
||||
* read it later.
|
||||
*/
|
||||
if (this->virtual_grf_use[inst->src[0].reg] > ip)
|
||||
continue;
|
||||
|
||||
/* We need to check interference with the MRF between this
|
||||
* instruction and the earliest instruction involved in writing
|
||||
* the GRF we're eliminating. To do that, keep track of which
|
||||
* of our source channels we've seen initialized.
|
||||
/* We need to check interference with the final destination between this
|
||||
* instruction and the earliest instruction involved in writing the GRF
|
||||
* we're eliminating. To do that, keep track of which of our source
|
||||
* channels we've seen initialized.
|
||||
*/
|
||||
bool chans_needed[4] = {false, false, false, false};
|
||||
int chans_remaining = 0;
|
||||
|
|
@ -735,8 +736,9 @@ vec4_visitor::opt_compute_to_mrf()
|
|||
}
|
||||
}
|
||||
|
||||
/* Now walk up the instruction stream trying to see if we can
|
||||
* rewrite everything writing to the GRF into the MRF instead.
|
||||
/* Now walk up the instruction stream trying to see if we can rewrite
|
||||
* everything writing to the temporary to write into the destination
|
||||
* instead.
|
||||
*/
|
||||
vec4_instruction *scan_inst;
|
||||
for (scan_inst = (vec4_instruction *)inst->prev;
|
||||
|
|
@ -745,22 +747,21 @@ vec4_visitor::opt_compute_to_mrf()
|
|||
if (scan_inst->dst.file == GRF &&
|
||||
scan_inst->dst.reg == inst->src[0].reg &&
|
||||
scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
|
||||
/* Found something writing to the reg we want to turn into
|
||||
* a compute-to-MRF.
|
||||
*/
|
||||
/* Found something writing to the reg we want to coalesce away. */
|
||||
if (to_mrf) {
|
||||
/* SEND instructions can't have MRF as a destination. */
|
||||
if (scan_inst->mlen)
|
||||
break;
|
||||
|
||||
/* SEND instructions can't have MRF as a destination. */
|
||||
if (scan_inst->mlen)
|
||||
break;
|
||||
|
||||
if (intel->gen >= 6) {
|
||||
/* gen6 math instructions must have the destination be
|
||||
* GRF, so no compute-to-MRF for them.
|
||||
*/
|
||||
if (scan_inst->is_math()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (intel->gen >= 6) {
|
||||
/* gen6 math instructions must have the destination be
|
||||
* GRF, so no compute-to-MRF for them.
|
||||
*/
|
||||
if (scan_inst->is_math()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If we can't handle the swizzle, bail. */
|
||||
if (!scan_inst->can_reswizzle_dst(inst->dst.writemask,
|
||||
|
|
@ -784,9 +785,8 @@ vec4_visitor::opt_compute_to_mrf()
|
|||
break;
|
||||
}
|
||||
|
||||
/* We don't handle flow control here. Most computation of
|
||||
* values that end up in MRFs are shortly before the MRF
|
||||
* write anyway.
|
||||
/* We don't handle flow control here. Most computation of values
|
||||
* that could be coalesced happens just before their use.
|
||||
*/
|
||||
if (scan_inst->opcode == BRW_OPCODE_DO ||
|
||||
scan_inst->opcode == BRW_OPCODE_WHILE ||
|
||||
|
|
@ -795,9 +795,11 @@ vec4_visitor::opt_compute_to_mrf()
|
|||
break;
|
||||
}
|
||||
|
||||
/* You can't read from an MRF, so if someone else reads our
|
||||
* MRF's source GRF that we wanted to rewrite, that stops us.
|
||||
*/
|
||||
/* You can't read from an MRF, so if someone else reads our MRF's
|
||||
* source GRF that we wanted to rewrite, that stops us. If it's a
|
||||
* GRF we're trying to coalesce to, we don't actually handle
|
||||
* rewriting sources so bail in that case as well.
|
||||
*/
|
||||
bool interfered = false;
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (scan_inst->src[i].file == GRF &&
|
||||
|
|
@ -809,30 +811,41 @@ vec4_visitor::opt_compute_to_mrf()
|
|||
if (interfered)
|
||||
break;
|
||||
|
||||
/* If somebody else writes our MRF here, we can't
|
||||
* compute-to-MRF before that.
|
||||
*/
|
||||
if (scan_inst->dst.file == MRF && mrf == scan_inst->dst.reg)
|
||||
/* If somebody else writes our destination here, we can't coalesce
|
||||
* before that.
|
||||
*/
|
||||
if (scan_inst->dst.file == inst->dst.file &&
|
||||
scan_inst->dst.reg == inst->dst.reg) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (scan_inst->mlen > 0) {
|
||||
/* Found a SEND instruction, which means that there are
|
||||
* live values in MRFs from base_mrf to base_mrf +
|
||||
* scan_inst->mlen - 1. Don't go pushing our MRF write up
|
||||
* above it.
|
||||
*/
|
||||
if (mrf >= scan_inst->base_mrf &&
|
||||
mrf < scan_inst->base_mrf + scan_inst->mlen) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* Check for reads of the register we're trying to coalesce into. We
|
||||
* can't go rewriting instructions above that to put some other value
|
||||
* in the register instead.
|
||||
*/
|
||||
if (to_mrf && scan_inst->mlen > 0) {
|
||||
if (inst->dst.reg >= scan_inst->base_mrf &&
|
||||
inst->dst.reg < scan_inst->base_mrf + scan_inst->mlen) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (scan_inst->src[i].file == inst->dst.file &&
|
||||
scan_inst->src[i].reg == inst->dst.reg &&
|
||||
scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
|
||||
interfered = true;
|
||||
}
|
||||
}
|
||||
if (interfered)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (chans_remaining == 0) {
|
||||
/* If we've made it here, we have an inst we want to
|
||||
* compute-to-MRF, and a scan_inst pointing to the earliest
|
||||
* instruction involved in computing the value. Now go
|
||||
* rewrite the instruction stream between the two.
|
||||
/* If we've made it here, we have an MOV we want to coalesce out, and
|
||||
* a scan_inst pointing to the earliest instruction involved in
|
||||
* computing the value. Now go rewrite the instruction stream
|
||||
* between the two.
|
||||
*/
|
||||
|
||||
while (scan_inst != inst) {
|
||||
|
|
@ -841,9 +854,9 @@ vec4_visitor::opt_compute_to_mrf()
|
|||
scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
|
||||
scan_inst->reswizzle_dst(inst->dst.writemask,
|
||||
inst->src[0].swizzle);
|
||||
scan_inst->dst.file = MRF;
|
||||
scan_inst->dst.reg = mrf;
|
||||
scan_inst->dst.reg_offset = 0;
|
||||
scan_inst->dst.file = inst->dst.file;
|
||||
scan_inst->dst.reg = inst->dst.reg;
|
||||
scan_inst->dst.reg_offset = inst->dst.reg_offset;
|
||||
scan_inst->saturate |= inst->saturate;
|
||||
}
|
||||
scan_inst = (vec4_instruction *)scan_inst->next;
|
||||
|
|
@ -1277,7 +1290,7 @@ vec4_visitor::run()
|
|||
progress = dead_code_eliminate() || progress;
|
||||
progress = opt_copy_propagation() || progress;
|
||||
progress = opt_algebraic() || progress;
|
||||
progress = opt_compute_to_mrf() || progress;
|
||||
progress = opt_register_coalesce() || progress;
|
||||
} while (progress);
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -334,7 +334,7 @@ public:
|
|||
bool virtual_grf_interferes(int a, int b);
|
||||
bool opt_copy_propagation();
|
||||
bool opt_algebraic();
|
||||
bool opt_compute_to_mrf();
|
||||
bool opt_register_coalesce();
|
||||
|
||||
vec4_instruction *emit(vec4_instruction *inst);
|
||||
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ _register_coalesce(vec4_visitor *v, const char *func)
|
|||
v->dump_instructions();
|
||||
}
|
||||
|
||||
v->opt_compute_to_mrf();
|
||||
v->opt_register_coalesce();
|
||||
|
||||
if (print) {
|
||||
printf("%s: instructions after:\n", func);
|
||||
|
|
@ -78,7 +78,7 @@ _register_coalesce(vec4_visitor *v, const char *func)
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(register_coalesce_test, test_easy_success)
|
||||
TEST_F(register_coalesce_test, test_compute_to_mrf)
|
||||
{
|
||||
src_reg something = src_reg(v, glsl_type::float_type);
|
||||
dst_reg temp = dst_reg(v, glsl_type::float_type);
|
||||
|
|
@ -143,3 +143,57 @@ TEST_F(register_coalesce_test, test_dp4_mrf)
|
|||
EXPECT_EQ(dp4->dst.file, MRF);
|
||||
EXPECT_EQ(dp4->dst.writemask, WRITEMASK_Y);
|
||||
}
|
||||
|
||||
TEST_F(register_coalesce_test, test_dp4_grf)
|
||||
{
|
||||
src_reg some_src_1 = src_reg(v, glsl_type::vec4_type);
|
||||
src_reg some_src_2 = src_reg(v, glsl_type::vec4_type);
|
||||
dst_reg init;
|
||||
|
||||
dst_reg to = dst_reg(v, glsl_type::vec4_type);
|
||||
dst_reg temp = dst_reg(v, glsl_type::float_type);
|
||||
|
||||
vec4_instruction *dp4 = v->emit(v->DP4(temp, some_src_1, some_src_2));
|
||||
to.writemask = WRITEMASK_Y;
|
||||
v->emit(v->MOV(to, src_reg(temp)));
|
||||
|
||||
/* if we don't do something with the result, the automatic dead code
|
||||
* elimination will remove all our instructions.
|
||||
*/
|
||||
src_reg src = src_reg(to);
|
||||
src.negate = true;
|
||||
v->emit(v->MOV(dst_reg(MRF, 0), src));
|
||||
|
||||
register_coalesce(v);
|
||||
|
||||
EXPECT_EQ(dp4->dst.reg, to.reg);
|
||||
EXPECT_EQ(dp4->dst.writemask, WRITEMASK_Y);
|
||||
}
|
||||
|
||||
TEST_F(register_coalesce_test, test_channel_mul_grf)
|
||||
{
|
||||
src_reg some_src_1 = src_reg(v, glsl_type::vec4_type);
|
||||
src_reg some_src_2 = src_reg(v, glsl_type::vec4_type);
|
||||
dst_reg init;
|
||||
|
||||
dst_reg to = dst_reg(v, glsl_type::vec4_type);
|
||||
dst_reg temp = dst_reg(v, glsl_type::float_type);
|
||||
|
||||
vec4_instruction *mul = v->emit(v->MUL(temp, some_src_1, some_src_2));
|
||||
to.writemask = WRITEMASK_Y;
|
||||
v->emit(v->MOV(to, src_reg(temp)));
|
||||
|
||||
/* if we don't do something with the result, the automatic dead code
|
||||
* elimination will remove all our instructions.
|
||||
*/
|
||||
src_reg src = src_reg(to);
|
||||
src.negate = true;
|
||||
v->emit(v->MOV(dst_reg(MRF, 0), src));
|
||||
|
||||
register_coalesce(v);
|
||||
|
||||
/* This path isn't supported yet in the reswizzling code, so we're checking
|
||||
* that we haven't done anything bad to scalar non-DP[234]s.
|
||||
*/
|
||||
EXPECT_NE(mul->dst.reg, to.reg);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue