mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-05 04:40:11 +01:00
agx/opt_preamble: improve rewrite cost est
this keeps us from hoisting piles of iadd for no benefit withthe new vertex path. results on shaderdb without HW VS: total bytes in shared programs: 13975632 -> 13975666 (<.01%) bytes in affected programs: 3298 -> 3332 (1.03%) helped: 0 HURT: 3 total uniforms in shared programs: 1516540 -> 1516522 (<.01%) uniforms in affected programs: 234 -> 216 (-7.69%) helped: 3 HURT: 0 Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28483>
This commit is contained in:
parent
fd76caf5d3
commit
43613314ab
1 changed files with 14 additions and 4 deletions
|
|
@ -241,25 +241,35 @@ instr_cost(nir_instr *instr, const void *data)
|
|||
static float
|
||||
rewrite_cost(nir_def *def, const void *data)
|
||||
{
|
||||
bool mov_needed = false;
|
||||
bool mov_needed = false, vectorizable = true;
|
||||
nir_foreach_use(use, def) {
|
||||
nir_instr *parent_instr = nir_src_parent_instr(use);
|
||||
if (parent_instr->type != nir_instr_type_alu) {
|
||||
if (parent_instr->type == nir_instr_type_tex) {
|
||||
/* TODO: Maybe check the source index, but biases can be uniform */
|
||||
break;
|
||||
} else if (parent_instr->type == nir_instr_type_phi) {
|
||||
/* Assume we'd eat a move anyway */
|
||||
} else if (parent_instr->type != nir_instr_type_alu) {
|
||||
mov_needed = true;
|
||||
vectorizable = false;
|
||||
break;
|
||||
} else {
|
||||
nir_alu_instr *alu = nir_instr_as_alu(parent_instr);
|
||||
if (alu->op == nir_op_vec2 || alu->op == nir_op_vec3 ||
|
||||
alu->op == nir_op_vec4 || alu->op == nir_op_mov) {
|
||||
alu->op == nir_op_vec4) {
|
||||
mov_needed = true;
|
||||
break;
|
||||
} else if (alu->op == nir_op_mov) {
|
||||
mov_needed = true;
|
||||
vectorizable = false;
|
||||
} else {
|
||||
/* Assume for non-moves that the const is folded into the src */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mov_needed ? ((float)(def->num_components * def->bit_size) / 32.0)
|
||||
return mov_needed ? ((float)(def->num_components * def->bit_size) /
|
||||
(vectorizable ? 32.0 : 16.0))
|
||||
: 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue