mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 20:38:06 +02:00
vc4: Add support for coalescing ALU ops into tex_[srtb] MOVs.
This isn't as complete as I would like (can't merge interpolation because of the implicit r5 dependency, doesn't work with control flow), but this was cheap and easy. Improves 3DMMES Taiji performance by 1.15353% +/- 0.299896% (n=29, 16) total instructions in shared programs: 99810 -> 99059 (-0.75%) instructions in affected programs: 10705 -> 9954 (-7.02%)
This commit is contained in:
parent
f4baf80993
commit
4690a93b12
4 changed files with 37 additions and 29 deletions
|
|
@ -24,8 +24,8 @@
|
|||
/**
|
||||
* @file vc4_opt_coalesce_ff_writes.c
|
||||
*
|
||||
* This modifies instructions that generate the value consumed by a VPM write
|
||||
* to write directly into the VPM.
|
||||
* This modifies instructions that generate the value consumed by a VPM or TMU
|
||||
* coordinate write to write directly into the VPM or TMU.
|
||||
*/
|
||||
|
||||
#include "vc4_qir.h"
|
||||
|
|
@ -33,9 +33,6 @@
|
|||
bool
|
||||
qir_opt_coalesce_ff_writes(struct vc4_compile *c)
|
||||
{
|
||||
if (c->stage == QSTAGE_FRAG)
|
||||
return false;
|
||||
|
||||
/* For now, only do this pass when we don't have control flow. */
|
||||
struct qblock *block = qir_entry_block(c);
|
||||
if (block != qir_exit_block(c))
|
||||
|
|
@ -60,7 +57,7 @@ qir_opt_coalesce_ff_writes(struct vc4_compile *c)
|
|||
if (mov_inst->src[0].file != QFILE_TEMP)
|
||||
continue;
|
||||
|
||||
if (mov_inst->dst.file != QFILE_VPM)
|
||||
if (!(mov_inst->dst.file == QFILE_VPM || qir_is_tex(mov_inst)))
|
||||
continue;
|
||||
|
||||
uint32_t temp = mov_inst->src[0].index;
|
||||
|
|
@ -71,24 +68,37 @@ qir_opt_coalesce_ff_writes(struct vc4_compile *c)
|
|||
if (!inst)
|
||||
continue;
|
||||
|
||||
/* Don't bother trying to fold in an ALU op using a uniform to
|
||||
* a texture op, as we'll just have to lower the uniform back
|
||||
* out.
|
||||
*/
|
||||
if (qir_is_tex(mov_inst) && qir_has_uniform_read(inst))
|
||||
continue;
|
||||
|
||||
if (qir_depends_on_flags(inst) || inst->sf)
|
||||
continue;
|
||||
|
||||
if (qir_has_side_effects(c, inst) ||
|
||||
qir_has_side_effect_reads(c, inst)) {
|
||||
qir_has_side_effect_reads(c, inst) ||
|
||||
inst->op == QOP_VARY_ADD_C) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Move the generating instruction to the end of the program
|
||||
* to maintain the order of the VPM writes.
|
||||
/* Move the generating instruction into the position of the FF
|
||||
* write.
|
||||
*/
|
||||
c->defs[inst->dst.index] = NULL;
|
||||
inst->dst.file = mov_inst->dst.file;
|
||||
inst->dst.index = mov_inst->dst.index;
|
||||
if (qir_has_implicit_tex_uniform(mov_inst)) {
|
||||
inst->src[qir_get_tex_uniform_src(inst)] =
|
||||
mov_inst->src[qir_get_tex_uniform_src(mov_inst)];
|
||||
}
|
||||
|
||||
list_del(&inst->link);
|
||||
list_addtail(&inst->link, &mov_inst->link);
|
||||
qir_remove_instruction(c, mov_inst);
|
||||
|
||||
c->defs[inst->dst.index] = NULL;
|
||||
inst->dst.file = QFILE_VPM;
|
||||
inst->dst.index = 0;
|
||||
qir_remove_instruction(c, mov_inst);
|
||||
|
||||
progress = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -179,6 +179,17 @@ qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst)
|
|||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
qir_has_uniform_read(struct qinst *inst)
|
||||
{
|
||||
for (int i = 0; i < qir_get_nsrc(inst); i++) {
|
||||
if (inst->src[i].file == QFILE_UNIF)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
qir_is_mul(struct qinst *inst)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -577,6 +577,7 @@ int qir_get_tex_uniform_src(struct qinst *inst);
|
|||
bool qir_reg_equals(struct qreg a, struct qreg b);
|
||||
bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
|
||||
bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
|
||||
bool qir_has_uniform_read(struct qinst *inst);
|
||||
bool qir_is_mul(struct qinst *inst);
|
||||
bool qir_is_raw_mov(struct qinst *inst);
|
||||
bool qir_is_tex(struct qinst *inst);
|
||||
|
|
|
|||
|
|
@ -35,25 +35,11 @@
|
|||
#include "util/hash_table.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
static bool
|
||||
inst_reads_a_uniform(struct qinst *inst)
|
||||
{
|
||||
if (qir_is_tex(inst))
|
||||
return true;
|
||||
|
||||
for (int i = 0; i < qir_get_nsrc(inst); i++) {
|
||||
if (inst->src[i].file == QFILE_UNIF)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
block_reads_any_uniform(struct qblock *block)
|
||||
{
|
||||
qir_for_each_inst(inst, block) {
|
||||
if (inst_reads_a_uniform(inst))
|
||||
if (qir_has_uniform_read(inst))
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -94,7 +80,7 @@ qir_emit_uniform_stream_resets(struct vc4_compile *c)
|
|||
}
|
||||
|
||||
qir_for_each_inst(inst, block) {
|
||||
if (inst_reads_a_uniform(inst))
|
||||
if (qir_has_uniform_read(inst))
|
||||
uniform_count++;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue