From e80d451e55af8fecd8b98a24fa06cfa9324351f3 Mon Sep 17 00:00:00 2001
From: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Date: Wed, 28 Feb 2024 14:47:42 -0400
Subject: [PATCH] agx: move spill/fills accounting to shaderdb

don't bother the compiler proper about it. this now counts NIR scratch access as
spills/fills, which I think is probably the right call

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28483>
---
 src/asahi/compiler/agx_compile.c             | 12 ++++++---
 src/asahi/compiler/agx_compiler.h            |  2 --
 src/asahi/compiler/agx_lower_spill.c         |  2 --
 src/asahi/compiler/test/test-lower-spill.cpp | 28 ++++++++------------
 4 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c
index 1c58da7c4ec..54836acee4c 100644
--- a/src/asahi/compiler/agx_compile.c
+++ b/src/asahi/compiler/agx_compile.c
@@ -2295,12 +2295,18 @@ agx_set_st_vary_final(agx_context *ctx)
 static int
 agx_dump_stats(agx_context *ctx, unsigned size, char **out)
 {
-   unsigned nr_ins = 0;
+   unsigned nr_ins = 0, spills = 0, fills = 0;
 
    /* Count instructions */
-   agx_foreach_instr_global(ctx, I)
+   agx_foreach_instr_global(ctx, I) {
       nr_ins++;
 
+      if (I->op == AGX_OPCODE_STACK_STORE)
+         spills++;
+      else if (I->op == AGX_OPCODE_STACK_LOAD)
+         fills++;
+   }
+
    unsigned nr_threads =
       agx_occupancy_for_register_count(ctx->max_reg).max_threads;
 
@@ -2308,7 +2314,7 @@ agx_dump_stats(agx_context *ctx, unsigned size, char **out)
                    "%s shader: %u inst, %u bytes, %u halfregs, %u threads, "
                    "%u loops, %u:%u spills:fills",
                    gl_shader_stage_name(ctx->stage), nr_ins, size, ctx->max_reg,
-                   nr_threads, ctx->loop_count, ctx->spills, ctx->fills);
+                   nr_threads, ctx->loop_count, spills, fills);
 }
 
 static bool
diff --git a/src/asahi/compiler/agx_compiler.h b/src/asahi/compiler/agx_compiler.h
index d8330eace08..ed02645fd20 100644
--- a/src/asahi/compiler/agx_compiler.h
+++ b/src/asahi/compiler/agx_compiler.h
@@ -494,8 +494,6 @@ typedef struct {
 
    /* Stats for shader-db */
    unsigned loop_count;
-   unsigned spills;
-   unsigned fills;
    unsigned max_reg;
 } agx_context;
 
diff --git a/src/asahi/compiler/agx_lower_spill.c b/src/asahi/compiler/agx_lower_spill.c
index 5548690ab27..b865f4bf0cf 100644
--- a/src/asahi/compiler/agx_lower_spill.c
+++ b/src/asahi/compiler/agx_lower_spill.c
@@ -44,10 +44,8 @@ spill_fill(agx_builder *b, agx_instr *I, enum agx_size size, unsigned channels,
    /* Emit the spill/fill */
    if (I->dest[0].memory) {
       agx_stack_store(b, reg, agx_immediate(stack_offs_B), format, mask);
-      b->shader->spills++;
    } else {
       agx_stack_load_to(b, reg, agx_immediate(stack_offs_B), format, mask);
-      b->shader->fills++;
    }
 }
 
diff --git a/src/asahi/compiler/test/test-lower-spill.cpp b/src/asahi/compiler/test/test-lower-spill.cpp
index 675de8f80c0..8c2daf77fc0 100644
--- a/src/asahi/compiler/test/test-lower-spill.cpp
+++ b/src/asahi/compiler/test/test-lower-spill.cpp
@@ -11,7 +11,7 @@
 #include "util/macros.h"
 #include <gtest/gtest.h>
 
-#define CASE(expected_spills, expected_fills, instr, expected)                 \
+#define CASE(instr, expected)                                                  \
    do {                                                                        \
       agx_builder *A = agx_test_builder(mem_ctx);                              \
       agx_builder *B = agx_test_builder(mem_ctx);                              \
@@ -25,8 +25,6 @@
       }                                                                        \
       agx_lower_spill(A->shader);                                              \
       ASSERT_SHADER_EQUAL(A->shader, B->shader);                               \
-      ASSERT_EQ(A->shader->spills, expected_spills);                           \
-      ASSERT_EQ(A->shader->fills, expected_fills);                             \
    } while (0)
 
 class LowerSpill : public testing::Test {
@@ -68,44 +66,43 @@ class LowerSpill : public testing::Test {
 
 TEST_F(LowerSpill, ScalarSpills)
 {
-   CASE(1, 0, agx_mov_to(b, agx_memory_register(11, AGX_SIZE_16), hy),
+   CASE(agx_mov_to(b, agx_memory_register(11, AGX_SIZE_16), hy),
         agx_stack_store(b, hy, agx_immediate(22), i16, scalar));
 
-   CASE(1, 0, agx_mov_to(b, agx_memory_register(18, AGX_SIZE_32), wx),
+   CASE(agx_mov_to(b, agx_memory_register(18, AGX_SIZE_32), wx),
         agx_stack_store(b, wx, agx_immediate(36), i32, scalar));
 }
 
 TEST_F(LowerSpill, ScalarFills)
 {
-   CASE(0, 1, agx_mov_to(b, hy, agx_memory_register(11, AGX_SIZE_16)),
+   CASE(agx_mov_to(b, hy, agx_memory_register(11, AGX_SIZE_16)),
         agx_stack_load_to(b, hy, agx_immediate(22), i16, scalar));
 
-   CASE(0, 1, agx_mov_to(b, wx, agx_memory_register(18, AGX_SIZE_32)),
+   CASE(agx_mov_to(b, wx, agx_memory_register(18, AGX_SIZE_32)),
         agx_stack_load_to(b, wx, agx_immediate(36), i32, scalar));
 }
 
 TEST_F(LowerSpill, VectorSpills)
 {
-   CASE(1, 0, agx_mov_to(b, mh4, hy4),
+   CASE(agx_mov_to(b, mh4, hy4),
         agx_stack_store(b, hy4, agx_immediate(0), i16, vec4));
 
-   CASE(1, 0, agx_mov_to(b, mw4, wx4),
+   CASE(agx_mov_to(b, mw4, wx4),
         agx_stack_store(b, wx4, agx_immediate(0), i32, vec4));
 }
 
 TEST_F(LowerSpill, VectorFills)
 {
-   CASE(0, 1, agx_mov_to(b, hy4, mh4),
+   CASE(agx_mov_to(b, hy4, mh4),
         agx_stack_load_to(b, hy4, agx_immediate(0), i16, vec4));
 
-   CASE(0, 1, agx_mov_to(b, wx4, mw4),
+   CASE(agx_mov_to(b, wx4, mw4),
         agx_stack_load_to(b, wx4, agx_immediate(0), i32, vec4));
 }
 
 TEST_F(LowerSpill, ScalarSpill64)
 {
-   CASE(1, 0,
-        agx_mov_to(b, agx_memory_register(16, AGX_SIZE_64),
+   CASE(agx_mov_to(b, agx_memory_register(16, AGX_SIZE_64),
                    agx_register(8, AGX_SIZE_64)),
         agx_stack_store(b, agx_register(8, AGX_SIZE_64), agx_immediate(32), i32,
                         BITFIELD_MASK(2)));
@@ -113,8 +110,7 @@ TEST_F(LowerSpill, ScalarSpill64)
 
 TEST_F(LowerSpill, ScalarFill64)
 {
-   CASE(0, 1,
-        agx_mov_to(b, agx_register(16, AGX_SIZE_64),
+   CASE(agx_mov_to(b, agx_register(16, AGX_SIZE_64),
                    agx_memory_register(8, AGX_SIZE_64)),
         agx_stack_load_to(b, agx_register(16, AGX_SIZE_64), agx_immediate(16),
                           i32, BITFIELD_MASK(2)));
@@ -123,7 +119,6 @@ TEST_F(LowerSpill, ScalarFill64)
 TEST_F(LowerSpill, Vec6Spill)
 {
    CASE(
-      2, 0,
       {
          agx_index mvec6 = agx_memory_register(16, AGX_SIZE_32);
          agx_index vec6 = agx_register(8, AGX_SIZE_32);
@@ -147,7 +142,6 @@ TEST_F(LowerSpill, Vec6Spill)
 TEST_F(LowerSpill, Vec6Fill)
 {
    CASE(
-      0, 2,
       {
          agx_index mvec6 = agx_memory_register(16, AGX_SIZE_32);
          agx_index vec6 = agx_register(8, AGX_SIZE_32);