From 43613314ab5b5d0a2d8f57e021e16177fd7b05ab Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 5 Mar 2024 18:42:46 -0400 Subject: [PATCH] agx/opt_preamble: improve rewrite cost est this keeps us from hoisting piles of iadd for no benefit withthe new vertex path. results on shaderdb without HW VS: total bytes in shared programs: 13975632 -> 13975666 (<.01%) bytes in affected programs: 3298 -> 3332 (1.03%) helped: 0 HURT: 3 total uniforms in shared programs: 1516540 -> 1516522 (<.01%) uniforms in affected programs: 234 -> 216 (-7.69%) helped: 3 HURT: 0 Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_nir_opt_preamble.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/asahi/compiler/agx_nir_opt_preamble.c b/src/asahi/compiler/agx_nir_opt_preamble.c index 33d2c6d14b2..7936b8a10f8 100644 --- a/src/asahi/compiler/agx_nir_opt_preamble.c +++ b/src/asahi/compiler/agx_nir_opt_preamble.c @@ -241,25 +241,35 @@ instr_cost(nir_instr *instr, const void *data) static float rewrite_cost(nir_def *def, const void *data) { - bool mov_needed = false; + bool mov_needed = false, vectorizable = true; nir_foreach_use(use, def) { nir_instr *parent_instr = nir_src_parent_instr(use); - if (parent_instr->type != nir_instr_type_alu) { + if (parent_instr->type == nir_instr_type_tex) { + /* TODO: Maybe check the source index, but biases can be uniform */ + break; + } else if (parent_instr->type == nir_instr_type_phi) { + /* Assume we'd eat a move anyway */ + } else if (parent_instr->type != nir_instr_type_alu) { mov_needed = true; + vectorizable = false; break; } else { nir_alu_instr *alu = nir_instr_as_alu(parent_instr); if (alu->op == nir_op_vec2 || alu->op == nir_op_vec3 || - alu->op == nir_op_vec4 || alu->op == nir_op_mov) { + alu->op == nir_op_vec4) { mov_needed = true; break; + } else if (alu->op == nir_op_mov) { + mov_needed = true; + vectorizable = false; } else { /* Assume for non-moves that the const is folded into the src */ } } } - return mov_needed ? ((float)(def->num_components * def->bit_size) / 32.0) + return mov_needed ? ((float)(def->num_components * def->bit_size) / + (vectorizable ? 32.0 : 16.0)) : 0; }