From 8b7389b1a9bfd43b56bd7dabe57fd681403083cc Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 27 Feb 2025 17:30:19 -0500 Subject: [PATCH] asahi: port to common stats framework Signed-off-by: Alyssa Rosenzweig Reviewed-by: Mary Guillemard Part-of: --- src/asahi/clc/asahi_clc.c | 2 +- src/asahi/compiler/agx_compile.c | 59 +++++++++++---------------- src/asahi/compiler/agx_compile.h | 4 +- src/asahi/lib/agx_bg_eot.c | 2 +- src/asahi/vulkan/hk_cmd_draw.c | 2 +- src/asahi/vulkan/hk_shader.c | 21 +--------- src/gallium/drivers/asahi/agx_state.c | 46 ++++++++++----------- src/util/shader_stats.xml | 14 +++++++ 8 files changed, 68 insertions(+), 82 deletions(-) diff --git a/src/asahi/clc/asahi_clc.c b/src/asahi/clc/asahi_clc.c index b19b30f0a22..4bee42e1b9e 100644 --- a/src/asahi/clc/asahi_clc.c +++ b/src/asahi/clc/asahi_clc.c @@ -353,7 +353,7 @@ main(int argc, char **argv) } nir_shader *clone = nir_shader_clone(NULL, s); - agx_compile_shader_nir(clone, &key, NULL, &compiled); + agx_compile_shader_nir(clone, &key, &compiled); print_shader(fp_c, libfunc->name, *target, v, &compiled); free(compiled.binary); ralloc_free(clone); diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index aad6c09ff20..14a9f82c6b1 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -2744,34 +2744,32 @@ agx_set_st_vary_final(agx_context *ctx) agx_no_varyings(&_b); } -static int -agx_dump_stats(agx_context *ctx, unsigned size, char **out) +static void +agx_calc_stats(agx_context *ctx, unsigned size, struct agx2_stats *stats) { - unsigned nr_ins = 0, spills = 0, fills = 0; + struct agx_cycle_estimate cycles = agx_estimate_cycles(ctx); + + *stats = (struct agx2_stats){ + .alu = cycles.alu, + .fscib = cycles.f_scib, + .ic = cycles.ic, + .code_size = size, + .gprs = ctx->max_reg, + .uniforms = ctx->out->push_count, + .scratch = ctx->scratch_size_B, + .threads = agx_occupancy_for_register_count(ctx->max_reg).max_threads, + .loops = ctx->loop_count, + }; /* Count instructions */ agx_foreach_instr_global(ctx, I) { - nr_ins++; + stats->instrs++; if (I->op == AGX_OPCODE_STACK_STORE) - spills++; + stats->spills++; else if (I->op == AGX_OPCODE_STACK_LOAD) - fills++; + stats->fills++; } - - struct agx_cycle_estimate cycles = agx_estimate_cycles(ctx); - - unsigned nr_threads = - agx_occupancy_for_register_count(ctx->max_reg).max_threads; - - return asprintf( - out, - "%s shader: %u inst, %u alu, %u fscib, %u ic, %u bytes, %u regs, " - "%u uniforms, %u scratch, %u threads, %u loops, " - "%u:%u spills:fills", - gl_shader_stage_name(ctx->stage), nr_ins, cycles.alu, cycles.f_scib, - cycles.ic, size, ctx->max_reg, ctx->out->push_count, ctx->scratch_size_B, - nr_threads, ctx->loop_count, spills, fills); } static bool @@ -3461,7 +3459,6 @@ agx_should_dump(nir_shader *nir, unsigned agx_dbg_bit) static unsigned agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl, struct agx_shader_key *key, - struct util_debug_callback *debug, struct util_dynarray *binary, struct agx_shader_info *out) { @@ -3629,19 +3626,12 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl, /* Don't dump statistics for preambles, since they're not worth optimizing */ if (!impl->function->is_preamble) { - char *stats; - int ret = agx_dump_stats(ctx, binary->size, &stats); + agx_calc_stats(ctx, binary->size, &ctx->out->stats); - if (ret >= 0) { - if (agx_should_dump(nir, AGX_DBG_SHADERDB)) { - fprintf(stderr, "SHADER-DB: %s - %s\n", nir->info.label ?: "", - stats); - } - - if (debug) - util_debug_message(debug, SHADER_INFO, "%s", stats); - - free(stats); + if (agx_should_dump(nir, AGX_DBG_SHADERDB)) { + agx2_stats_fprintf(stderr, + _mesa_shader_stage_to_abbrev(nir->info.stage), + &ctx->out->stats); } } @@ -3769,7 +3759,6 @@ agx_preprocess_nir(nir_shader *nir) void agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key, - struct util_debug_callback *debug, struct agx_shader_part *out) { agx_compiler_debug = agx_get_compiler_debug(); @@ -3849,7 +3838,7 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key, nir_foreach_function_with_impl(func, impl, nir) { unsigned offset = - agx_compile_function_nir(nir, impl, key, debug, &binary, &out->info); + agx_compile_function_nir(nir, impl, key, &binary, &out->info); if (func->is_preamble) { info->preamble_offset = offset; diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index ca4979f3be5..92857285c67 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -6,6 +6,7 @@ #pragma once #include "compiler/nir/nir.h" +#include "util/shader_stats.h" #include "util/u_dynarray.h" #include "util/u_tristate.h" #include "shader_enums.h" @@ -154,6 +155,8 @@ struct agx_shader_info { * registers as specified hre. */ struct agx_rodata rodata; + + struct agx2_stats stats; }; struct agx_precompiled_kernel_info { @@ -305,7 +308,6 @@ bool agx_mem_vectorize_cb(unsigned align_mul, unsigned align_offset, nir_intrinsic_instr *high, void *data); void agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key, - struct util_debug_callback *debug, struct agx_shader_part *out); struct agx_occupancy { diff --git a/src/asahi/lib/agx_bg_eot.c b/src/asahi/lib/agx_bg_eot.c index 68b8a9782d2..2b1325a49d6 100644 --- a/src/asahi/lib/agx_bg_eot.c +++ b/src/asahi/lib/agx_bg_eot.c @@ -52,7 +52,7 @@ agx_compile_bg_eot_shader(struct agx_bg_eot_cache *cache, nir_shader *shader, struct agx_bg_eot_shader *res = rzalloc(cache->ht, struct agx_bg_eot_shader); struct agx_shader_part bin; - agx_compile_shader_nir(shader, key, NULL, &bin); + agx_compile_shader_nir(shader, key, &bin); res->info = bin.info; res->ptr = agx_pool_upload_aligned_with_bo( diff --git a/src/asahi/vulkan/hk_cmd_draw.c b/src/asahi/vulkan/hk_cmd_draw.c index 41330624092..54c0b50cfd6 100644 --- a/src/asahi/vulkan/hk_cmd_draw.c +++ b/src/asahi/vulkan/hk_cmd_draw.c @@ -1672,7 +1672,7 @@ hk_get_prolog_epilog_locked(struct hk_device *dev, struct hk_internal_key *key, struct agx_shader_part *part = rzalloc(dev->prolog_epilog.ht, struct agx_shader_part); - agx_compile_shader_nir(b.shader, &backend_key, NULL, part); + agx_compile_shader_nir(b.shader, &backend_key, part); ralloc_free(b.shader); diff --git a/src/asahi/vulkan/hk_shader.c b/src/asahi/vulkan/hk_shader.c index 695ad0a6a7f..bd0d965087d 100644 --- a/src/asahi/vulkan/hk_shader.c +++ b/src/asahi/vulkan/hk_shader.c @@ -963,7 +963,7 @@ hk_compile_nir(struct hk_device *dev, const VkAllocationCallbacks *pAllocator, if (lock) simple_mtx_lock(lock); - agx_compile_shader_nir(nir, &backend_key, NULL, &shader->b); + agx_compile_shader_nir(nir, &backend_key, &shader->b); if (lock) simple_mtx_unlock(lock); @@ -1460,24 +1460,7 @@ hk_shader_get_executable_statistics( * with zink. */ struct hk_shader *shader = hk_any_variant(obj); - - vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) - { - VK_COPY_STR(stat->name, "Code Size"); - VK_COPY_STR(stat->description, - "Size of the compiled shader binary, in bytes"); - stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; - stat->value.u64 = shader->code_size; - } - - vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) - { - VK_COPY_STR(stat->name, "Number of GPRs"); - VK_COPY_STR(stat->description, "Number of GPRs used by this pipeline"); - stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; - stat->value.u64 = shader->b.info.nr_gprs; - } - + vk_add_agx2_stats(out, &shader->b.info.stats); return vk_outarray_status(&out); } diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index b33564fca7a..479b4ac15fd 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -1554,7 +1554,10 @@ agx_compile_nir(struct agx_device *dev, nir_shader *nir, key.fs.cf_base = cf_base; } - agx_compile_shader_nir(nir, &key, debug, &compiled->b); + agx_compile_shader_nir(nir, &key, &compiled->b); + + agx2_stats_util_debug(debug, _mesa_shader_stage_to_abbrev(nir->info.stage), + &compiled->b.info.stats); if (compiled->b.info.binary_size && !secondary) { compiled->bo = agx_bo_create(dev, compiled->b.info.binary_size, 0, @@ -1577,7 +1580,6 @@ agx_build_meta_shader_internal(struct agx_context *ctx, static struct agx_compiled_shader * agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx, struct agx_uncompiled_shader *so, - struct util_debug_callback *debug, union asahi_shader_key *key_) { struct blob_reader reader; @@ -1679,8 +1681,8 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx, NIR_PASS(_, nir, agx_nir_lower_multisampled_image_store); struct agx_compiled_shader *compiled = agx_compile_nir( - dev, nir, debug, so->type, false, so->type != PIPE_SHADER_FRAGMENT, false, - 0, attrib_components_read); + dev, nir, &pctx->debug, so->type, false, so->type != PIPE_SHADER_FRAGMENT, + false, 0, attrib_components_read); if (so->type == PIPE_SHADER_FRAGMENT) { /* XXX: don't replicate this all over the driver */ @@ -1696,14 +1698,15 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx, /* Compile auxiliary programs */ if (gs_count) { - compiled->gs_count = agx_compile_nir(dev, gs_count, debug, so->type, - false, true, false, 0, NULL); + compiled->gs_count = agx_compile_nir( + dev, gs_count, &pctx->debug, so->type, false, true, false, 0, NULL); compiled->gs_count->so = so; } if (pre_gs) { - compiled->pre_gs = agx_compile_nir( - dev, pre_gs, debug, PIPE_SHADER_COMPUTE, false, true, false, 0, NULL); + compiled->pre_gs = + agx_compile_nir(dev, pre_gs, &pctx->debug, PIPE_SHADER_COMPUTE, false, + true, false, 0, NULL); } if (gs_copy) { @@ -1723,8 +1726,8 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx, NIR_PASS(_, gs_copy, agx_nir_lower_uvs, &uvs); compiled->gs_copy = - agx_compile_nir(dev, gs_copy, debug, PIPE_SHADER_GEOMETRY, false, true, - false, 0, NULL); + agx_compile_nir(dev, gs_copy, &pctx->debug, PIPE_SHADER_GEOMETRY, + false, true, false, 0, NULL); compiled->gs_copy->so = so; compiled->gs_copy->stage = so->type; compiled->gs_copy->uvs = uvs; @@ -1743,14 +1746,13 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx, static struct agx_compiled_shader * agx_get_shader_variant(struct agx_screen *screen, struct pipe_context *pctx, struct agx_uncompiled_shader *so, - struct util_debug_callback *debug, union asahi_shader_key *key) { struct agx_compiled_shader *compiled = agx_disk_cache_retrieve(screen, so, key); if (!compiled) { - compiled = agx_compile_variant(&screen->dev, pctx, so, debug, key); + compiled = agx_compile_variant(&screen->dev, pctx, so, key); agx_disk_cache_store(screen->disk_cache, so, key, compiled); } @@ -2001,19 +2003,16 @@ agx_create_shader_state(struct pipe_context *pctx, if ((so->type == PIPE_SHADER_TESS_CTRL) || (so->type == PIPE_SHADER_FRAGMENT && !so->info.uses_fbfetch)) { union asahi_shader_key key = {0}; - agx_get_shader_variant(agx_screen(pctx->screen), pctx, so, &pctx->debug, - &key); + agx_get_shader_variant(agx_screen(pctx->screen), pctx, so, &key); } else if (so->type == PIPE_SHADER_VERTEX) { union asahi_shader_key key = { .vs.hw = next_stage == MESA_SHADER_FRAGMENT, }; - agx_get_shader_variant(agx_screen(pctx->screen), pctx, so, &pctx->debug, - &key); + agx_get_shader_variant(agx_screen(pctx->screen), pctx, so, &key); if (!next_stage) { key.vs.hw = true; - agx_get_shader_variant(agx_screen(pctx->screen), pctx, so, - &pctx->debug, &key); + agx_get_shader_variant(agx_screen(pctx->screen), pctx, so, &key); } } else if (dev->debug & AGX_DBG_PRECOMPILE) { union asahi_shader_key key = {0}; @@ -2033,7 +2032,7 @@ agx_create_shader_state(struct pipe_context *pctx, unreachable("Unknown shader stage in shader-db precompile"); } - agx_compile_variant(dev, pctx, so, &pctx->debug, &key); + agx_compile_variant(dev, pctx, so, &key); } return so; @@ -2060,8 +2059,7 @@ agx_create_compute_state(struct pipe_context *pctx, nir_shader *nir = (void *)cso->prog; agx_shader_initialize(dev, so, nir, ctx->support_lod_bias, ctx->robust); - agx_get_shader_variant(agx_screen(pctx->screen), pctx, so, &pctx->debug, - &key); + agx_get_shader_variant(agx_screen(pctx->screen), pctx, so, &key); /* We're done with the NIR, throw it away */ ralloc_free(nir); @@ -2073,8 +2071,8 @@ agx_get_compute_state_info(struct pipe_context *pctx, void *cso, struct pipe_compute_state_object_info *info) { union asahi_shader_key key = {0}; - struct agx_compiled_shader *so = agx_get_shader_variant( - agx_screen(pctx->screen), pctx, cso, &pctx->debug, &key); + struct agx_compiled_shader *so = + agx_get_shader_variant(agx_screen(pctx->screen), pctx, cso, &key); info->max_threads = agx_occupancy_for_register_count(so->b.info.nr_gprs).max_threads; @@ -2102,7 +2100,7 @@ agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out, } struct agx_screen *screen = agx_screen(ctx->base.screen); - *out = agx_get_shader_variant(screen, &ctx->base, so, &ctx->base.debug, key); + *out = agx_get_shader_variant(screen, &ctx->base, so, key); return true; } diff --git a/src/util/shader_stats.xml b/src/util/shader_stats.xml index 467fc4afdf2..89b37e1e3fa 100644 --- a/src/util/shader_stats.xml +++ b/src/util/shader_stats.xml @@ -1,2 +1,16 @@ + + Instruction count + Estimated ALU cycle count + Estimated F16/F32/SCIB cycle count + Estimated IC cycle count + Binary size in bytes + Number of 16-bit GPRs + Number of 16-bit uniform registers + Scratch size per thread in bytes + Maximum number of threads in flight on a compute unit + Number of hardware loops + Number of spill (stack store) instructions + Number of fill (stack load) instructions +