From 43613314ab5b5d0a2d8f57e021e16177fd7b05ab Mon Sep 17 00:00:00 2001
From: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Date: Tue, 5 Mar 2024 18:42:46 -0400
Subject: [PATCH] agx/opt_preamble: improve rewrite cost est

this keeps us from hoisting piles of iadd for no benefit withthe new vertex
path. results on shaderdb without HW VS:

total bytes in shared programs: 13975632 -> 13975666 (<.01%)
bytes in affected programs: 3298 -> 3332 (1.03%)
helped: 0
HURT: 3

total uniforms in shared programs: 1516540 -> 1516522 (<.01%)
uniforms in affected programs: 234 -> 216 (-7.69%)
helped: 3
HURT: 0

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28483>
---
 src/asahi/compiler/agx_nir_opt_preamble.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/asahi/compiler/agx_nir_opt_preamble.c b/src/asahi/compiler/agx_nir_opt_preamble.c
index 33d2c6d14b2..7936b8a10f8 100644
--- a/src/asahi/compiler/agx_nir_opt_preamble.c
+++ b/src/asahi/compiler/agx_nir_opt_preamble.c
@@ -241,25 +241,35 @@ instr_cost(nir_instr *instr, const void *data)
 static float
 rewrite_cost(nir_def *def, const void *data)
 {
-   bool mov_needed = false;
+   bool mov_needed = false, vectorizable = true;
    nir_foreach_use(use, def) {
       nir_instr *parent_instr = nir_src_parent_instr(use);
-      if (parent_instr->type != nir_instr_type_alu) {
+      if (parent_instr->type == nir_instr_type_tex) {
+         /* TODO: Maybe check the source index, but biases can be uniform */
+         break;
+      } else if (parent_instr->type == nir_instr_type_phi) {
+         /* Assume we'd eat a move anyway */
+      } else if (parent_instr->type != nir_instr_type_alu) {
          mov_needed = true;
+         vectorizable = false;
          break;
       } else {
          nir_alu_instr *alu = nir_instr_as_alu(parent_instr);
          if (alu->op == nir_op_vec2 || alu->op == nir_op_vec3 ||
-             alu->op == nir_op_vec4 || alu->op == nir_op_mov) {
+             alu->op == nir_op_vec4) {
             mov_needed = true;
             break;
+         } else if (alu->op == nir_op_mov) {
+            mov_needed = true;
+            vectorizable = false;
          } else {
             /* Assume for non-moves that the const is folded into the src */
          }
       }
    }
 
-   return mov_needed ? ((float)(def->num_components * def->bit_size) / 32.0)
+   return mov_needed ? ((float)(def->num_components * def->bit_size) /
+                        (vectorizable ? 32.0 : 16.0))
                      : 0;
 }