mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-02 23:09:05 +02:00
vc4: Coalesce instructions using VPM reads into the VPM read.
This is done instead of copy propagating the VPM reads into the instructions using them, because VPM reads have to stay in order. shader-db results: total instructions in shared programs: 78509 -> 78114 (-0.50%) instructions in affected programs: 5203 -> 4808 (-7.59%) total estimated cycles in shared programs: 234670 -> 234318 (-0.15%) estimated cycles in affected programs: 5345 -> 4993 (-6.59%) Signed-off-by: Varad Gautam <varadgautam@gmail.com> Reviewed-by: Eric Anholt <eric@anholt.net> Tested-by: Rhys Kidd <rhyskidd@gmail.com>
This commit is contained in:
parent
00bdbb22a9
commit
e103b52aec
3 changed files with 71 additions and 7 deletions
|
|
@ -24,14 +24,16 @@
|
|||
/**
|
||||
* @file vc4_opt_vpm.c
|
||||
*
|
||||
* This modifies instructions that generate the value consumed by a VPM write
|
||||
* to write directly into the VPM.
|
||||
* This modifies instructions that:
|
||||
* 1. exclusively consume a value read from the VPM to directly read the VPM if
|
||||
* other operands allow it.
|
||||
* 2. generate the value consumed by a VPM write to write directly into the VPM.
|
||||
*/
|
||||
|
||||
#include "vc4_qir.h"
|
||||
|
||||
bool
|
||||
qir_opt_vpm_writes(struct vc4_compile *c)
|
||||
qir_opt_vpm(struct vc4_compile *c)
|
||||
{
|
||||
if (c->stage == QSTAGE_FRAG)
|
||||
return false;
|
||||
|
|
@ -52,8 +54,70 @@ qir_opt_vpm_writes(struct vc4_compile *c)
|
|||
}
|
||||
|
||||
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
|
||||
if (inst->src[i].file == QFILE_TEMP)
|
||||
use_count[inst->src[i].index]++;
|
||||
if (inst->src[i].file == QFILE_TEMP) {
|
||||
uint32_t temp = inst->src[i].index;
|
||||
use_count[temp]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* For instructions reading from a temporary that contains a VPM read
|
||||
* result, try to move the instruction up in place of the VPM read.
|
||||
*/
|
||||
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
|
||||
if (!inst || qir_is_multi_instruction(inst))
|
||||
continue;
|
||||
|
||||
if (qir_depends_on_flags(inst) || inst->sf)
|
||||
continue;
|
||||
|
||||
if (qir_has_side_effects(c, inst) ||
|
||||
qir_has_side_effect_reads(c, inst) ||
|
||||
qir_is_tex(inst))
|
||||
continue;
|
||||
|
||||
for (int j = 0; j < qir_get_op_nsrc(inst->op); j++) {
|
||||
if (inst->src[j].file != QFILE_TEMP ||
|
||||
inst->src[j].pack)
|
||||
continue;
|
||||
|
||||
uint32_t temp = inst->src[j].index;
|
||||
|
||||
/* Since VPM reads pull from a FIFO, we only get to
|
||||
* read each VPM entry once (unless we reset the read
|
||||
* pointer). That means we can't copy-propagate a VPM
|
||||
* read to multiple locations.
|
||||
*/
|
||||
if (use_count[temp] != 1)
|
||||
continue;
|
||||
|
||||
struct qinst *mov = c->defs[temp];
|
||||
if (!mov ||
|
||||
(mov->op != QOP_MOV &&
|
||||
mov->op != QOP_FMOV &&
|
||||
mov->op != QOP_MMOV) ||
|
||||
mov->src[0].file != QFILE_VPM) {
|
||||
continue;
|
||||
}
|
||||
|
||||
uint32_t temps = 0;
|
||||
for (int k = 0; k < qir_get_op_nsrc(inst->op); k++) {
|
||||
if (inst->src[k].file == QFILE_TEMP)
|
||||
temps++;
|
||||
}
|
||||
|
||||
/* The instruction is safe to reorder if its other
|
||||
* sources are independent of previous instructions
|
||||
*/
|
||||
if (temps == 1) {
|
||||
list_del(&inst->link);
|
||||
inst->src[j] = mov->src[0];
|
||||
list_replace(&mov->link, &inst->link);
|
||||
c->defs[temp] = NULL;
|
||||
free(mov);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -526,7 +526,7 @@ qir_optimize(struct vc4_compile *c)
|
|||
OPTPASS(qir_opt_copy_propagation);
|
||||
OPTPASS(qir_opt_dead_code);
|
||||
OPTPASS(qir_opt_small_immediates);
|
||||
OPTPASS(qir_opt_vpm_writes);
|
||||
OPTPASS(qir_opt_vpm);
|
||||
|
||||
if (!progress)
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -484,7 +484,7 @@ bool qir_opt_copy_propagation(struct vc4_compile *c);
|
|||
bool qir_opt_cse(struct vc4_compile *c);
|
||||
bool qir_opt_dead_code(struct vc4_compile *c);
|
||||
bool qir_opt_small_immediates(struct vc4_compile *c);
|
||||
bool qir_opt_vpm_writes(struct vc4_compile *c);
|
||||
bool qir_opt_vpm(struct vc4_compile *c);
|
||||
void vc4_nir_lower_blend(struct vc4_compile *c);
|
||||
void vc4_nir_lower_io(struct vc4_compile *c);
|
||||
nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue