asahi: port to common stats framework

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33814>
This commit is contained in:
Alyssa Rosenzweig 2025-02-27 17:30:19 -05:00 committed by Marge Bot
parent 722b83434a
commit 8b7389b1a9
8 changed files with 68 additions and 82 deletions

View file

@ -353,7 +353,7 @@ main(int argc, char **argv)
}
nir_shader *clone = nir_shader_clone(NULL, s);
agx_compile_shader_nir(clone, &key, NULL, &compiled);
agx_compile_shader_nir(clone, &key, &compiled);
print_shader(fp_c, libfunc->name, *target, v, &compiled);
free(compiled.binary);
ralloc_free(clone);

View file

@ -2744,34 +2744,32 @@ agx_set_st_vary_final(agx_context *ctx)
agx_no_varyings(&_b);
}
static int
agx_dump_stats(agx_context *ctx, unsigned size, char **out)
static void
agx_calc_stats(agx_context *ctx, unsigned size, struct agx2_stats *stats)
{
unsigned nr_ins = 0, spills = 0, fills = 0;
struct agx_cycle_estimate cycles = agx_estimate_cycles(ctx);
*stats = (struct agx2_stats){
.alu = cycles.alu,
.fscib = cycles.f_scib,
.ic = cycles.ic,
.code_size = size,
.gprs = ctx->max_reg,
.uniforms = ctx->out->push_count,
.scratch = ctx->scratch_size_B,
.threads = agx_occupancy_for_register_count(ctx->max_reg).max_threads,
.loops = ctx->loop_count,
};
/* Count instructions */
agx_foreach_instr_global(ctx, I) {
nr_ins++;
stats->instrs++;
if (I->op == AGX_OPCODE_STACK_STORE)
spills++;
stats->spills++;
else if (I->op == AGX_OPCODE_STACK_LOAD)
fills++;
stats->fills++;
}
struct agx_cycle_estimate cycles = agx_estimate_cycles(ctx);
unsigned nr_threads =
agx_occupancy_for_register_count(ctx->max_reg).max_threads;
return asprintf(
out,
"%s shader: %u inst, %u alu, %u fscib, %u ic, %u bytes, %u regs, "
"%u uniforms, %u scratch, %u threads, %u loops, "
"%u:%u spills:fills",
gl_shader_stage_name(ctx->stage), nr_ins, cycles.alu, cycles.f_scib,
cycles.ic, size, ctx->max_reg, ctx->out->push_count, ctx->scratch_size_B,
nr_threads, ctx->loop_count, spills, fills);
}
static bool
@ -3461,7 +3459,6 @@ agx_should_dump(nir_shader *nir, unsigned agx_dbg_bit)
static unsigned
agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl,
struct agx_shader_key *key,
struct util_debug_callback *debug,
struct util_dynarray *binary,
struct agx_shader_info *out)
{
@ -3629,19 +3626,12 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl,
/* Don't dump statistics for preambles, since they're not worth optimizing */
if (!impl->function->is_preamble) {
char *stats;
int ret = agx_dump_stats(ctx, binary->size, &stats);
agx_calc_stats(ctx, binary->size, &ctx->out->stats);
if (ret >= 0) {
if (agx_should_dump(nir, AGX_DBG_SHADERDB)) {
fprintf(stderr, "SHADER-DB: %s - %s\n", nir->info.label ?: "",
stats);
}
if (debug)
util_debug_message(debug, SHADER_INFO, "%s", stats);
free(stats);
if (agx_should_dump(nir, AGX_DBG_SHADERDB)) {
agx2_stats_fprintf(stderr,
_mesa_shader_stage_to_abbrev(nir->info.stage),
&ctx->out->stats);
}
}
@ -3769,7 +3759,6 @@ agx_preprocess_nir(nir_shader *nir)
void
agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
struct util_debug_callback *debug,
struct agx_shader_part *out)
{
agx_compiler_debug = agx_get_compiler_debug();
@ -3849,7 +3838,7 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
nir_foreach_function_with_impl(func, impl, nir) {
unsigned offset =
agx_compile_function_nir(nir, impl, key, debug, &binary, &out->info);
agx_compile_function_nir(nir, impl, key, &binary, &out->info);
if (func->is_preamble) {
info->preamble_offset = offset;

View file

@ -6,6 +6,7 @@
#pragma once
#include "compiler/nir/nir.h"
#include "util/shader_stats.h"
#include "util/u_dynarray.h"
#include "util/u_tristate.h"
#include "shader_enums.h"
@ -154,6 +155,8 @@ struct agx_shader_info {
* registers as specified hre.
*/
struct agx_rodata rodata;
struct agx2_stats stats;
};
struct agx_precompiled_kernel_info {
@ -305,7 +308,6 @@ bool agx_mem_vectorize_cb(unsigned align_mul, unsigned align_offset,
nir_intrinsic_instr *high, void *data);
void agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
struct util_debug_callback *debug,
struct agx_shader_part *out);
struct agx_occupancy {

View file

@ -52,7 +52,7 @@ agx_compile_bg_eot_shader(struct agx_bg_eot_cache *cache, nir_shader *shader,
struct agx_bg_eot_shader *res = rzalloc(cache->ht, struct agx_bg_eot_shader);
struct agx_shader_part bin;
agx_compile_shader_nir(shader, key, NULL, &bin);
agx_compile_shader_nir(shader, key, &bin);
res->info = bin.info;
res->ptr = agx_pool_upload_aligned_with_bo(

View file

@ -1672,7 +1672,7 @@ hk_get_prolog_epilog_locked(struct hk_device *dev, struct hk_internal_key *key,
struct agx_shader_part *part =
rzalloc(dev->prolog_epilog.ht, struct agx_shader_part);
agx_compile_shader_nir(b.shader, &backend_key, NULL, part);
agx_compile_shader_nir(b.shader, &backend_key, part);
ralloc_free(b.shader);

View file

@ -963,7 +963,7 @@ hk_compile_nir(struct hk_device *dev, const VkAllocationCallbacks *pAllocator,
if (lock)
simple_mtx_lock(lock);
agx_compile_shader_nir(nir, &backend_key, NULL, &shader->b);
agx_compile_shader_nir(nir, &backend_key, &shader->b);
if (lock)
simple_mtx_unlock(lock);
@ -1460,24 +1460,7 @@ hk_shader_get_executable_statistics(
* with zink.
*/
struct hk_shader *shader = hk_any_variant(obj);
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat)
{
VK_COPY_STR(stat->name, "Code Size");
VK_COPY_STR(stat->description,
"Size of the compiled shader binary, in bytes");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = shader->code_size;
}
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat)
{
VK_COPY_STR(stat->name, "Number of GPRs");
VK_COPY_STR(stat->description, "Number of GPRs used by this pipeline");
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
stat->value.u64 = shader->b.info.nr_gprs;
}
vk_add_agx2_stats(out, &shader->b.info.stats);
return vk_outarray_status(&out);
}

View file

@ -1554,7 +1554,10 @@ agx_compile_nir(struct agx_device *dev, nir_shader *nir,
key.fs.cf_base = cf_base;
}
agx_compile_shader_nir(nir, &key, debug, &compiled->b);
agx_compile_shader_nir(nir, &key, &compiled->b);
agx2_stats_util_debug(debug, _mesa_shader_stage_to_abbrev(nir->info.stage),
&compiled->b.info.stats);
if (compiled->b.info.binary_size && !secondary) {
compiled->bo = agx_bo_create(dev, compiled->b.info.binary_size, 0,
@ -1577,7 +1580,6 @@ agx_build_meta_shader_internal(struct agx_context *ctx,
static struct agx_compiled_shader *
agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
struct agx_uncompiled_shader *so,
struct util_debug_callback *debug,
union asahi_shader_key *key_)
{
struct blob_reader reader;
@ -1679,8 +1681,8 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
NIR_PASS(_, nir, agx_nir_lower_multisampled_image_store);
struct agx_compiled_shader *compiled = agx_compile_nir(
dev, nir, debug, so->type, false, so->type != PIPE_SHADER_FRAGMENT, false,
0, attrib_components_read);
dev, nir, &pctx->debug, so->type, false, so->type != PIPE_SHADER_FRAGMENT,
false, 0, attrib_components_read);
if (so->type == PIPE_SHADER_FRAGMENT) {
/* XXX: don't replicate this all over the driver */
@ -1696,14 +1698,15 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
/* Compile auxiliary programs */
if (gs_count) {
compiled->gs_count = agx_compile_nir(dev, gs_count, debug, so->type,
false, true, false, 0, NULL);
compiled->gs_count = agx_compile_nir(
dev, gs_count, &pctx->debug, so->type, false, true, false, 0, NULL);
compiled->gs_count->so = so;
}
if (pre_gs) {
compiled->pre_gs = agx_compile_nir(
dev, pre_gs, debug, PIPE_SHADER_COMPUTE, false, true, false, 0, NULL);
compiled->pre_gs =
agx_compile_nir(dev, pre_gs, &pctx->debug, PIPE_SHADER_COMPUTE, false,
true, false, 0, NULL);
}
if (gs_copy) {
@ -1723,8 +1726,8 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
NIR_PASS(_, gs_copy, agx_nir_lower_uvs, &uvs);
compiled->gs_copy =
agx_compile_nir(dev, gs_copy, debug, PIPE_SHADER_GEOMETRY, false, true,
false, 0, NULL);
agx_compile_nir(dev, gs_copy, &pctx->debug, PIPE_SHADER_GEOMETRY,
false, true, false, 0, NULL);
compiled->gs_copy->so = so;
compiled->gs_copy->stage = so->type;
compiled->gs_copy->uvs = uvs;
@ -1743,14 +1746,13 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
static struct agx_compiled_shader *
agx_get_shader_variant(struct agx_screen *screen, struct pipe_context *pctx,
struct agx_uncompiled_shader *so,
struct util_debug_callback *debug,
union asahi_shader_key *key)
{
struct agx_compiled_shader *compiled =
agx_disk_cache_retrieve(screen, so, key);
if (!compiled) {
compiled = agx_compile_variant(&screen->dev, pctx, so, debug, key);
compiled = agx_compile_variant(&screen->dev, pctx, so, key);
agx_disk_cache_store(screen->disk_cache, so, key, compiled);
}
@ -2001,19 +2003,16 @@ agx_create_shader_state(struct pipe_context *pctx,
if ((so->type == PIPE_SHADER_TESS_CTRL) ||
(so->type == PIPE_SHADER_FRAGMENT && !so->info.uses_fbfetch)) {
union asahi_shader_key key = {0};
agx_get_shader_variant(agx_screen(pctx->screen), pctx, so, &pctx->debug,
&key);
agx_get_shader_variant(agx_screen(pctx->screen), pctx, so, &key);
} else if (so->type == PIPE_SHADER_VERTEX) {
union asahi_shader_key key = {
.vs.hw = next_stage == MESA_SHADER_FRAGMENT,
};
agx_get_shader_variant(agx_screen(pctx->screen), pctx, so, &pctx->debug,
&key);
agx_get_shader_variant(agx_screen(pctx->screen), pctx, so, &key);
if (!next_stage) {
key.vs.hw = true;
agx_get_shader_variant(agx_screen(pctx->screen), pctx, so,
&pctx->debug, &key);
agx_get_shader_variant(agx_screen(pctx->screen), pctx, so, &key);
}
} else if (dev->debug & AGX_DBG_PRECOMPILE) {
union asahi_shader_key key = {0};
@ -2033,7 +2032,7 @@ agx_create_shader_state(struct pipe_context *pctx,
unreachable("Unknown shader stage in shader-db precompile");
}
agx_compile_variant(dev, pctx, so, &pctx->debug, &key);
agx_compile_variant(dev, pctx, so, &key);
}
return so;
@ -2060,8 +2059,7 @@ agx_create_compute_state(struct pipe_context *pctx,
nir_shader *nir = (void *)cso->prog;
agx_shader_initialize(dev, so, nir, ctx->support_lod_bias, ctx->robust);
agx_get_shader_variant(agx_screen(pctx->screen), pctx, so, &pctx->debug,
&key);
agx_get_shader_variant(agx_screen(pctx->screen), pctx, so, &key);
/* We're done with the NIR, throw it away */
ralloc_free(nir);
@ -2073,8 +2071,8 @@ agx_get_compute_state_info(struct pipe_context *pctx, void *cso,
struct pipe_compute_state_object_info *info)
{
union asahi_shader_key key = {0};
struct agx_compiled_shader *so = agx_get_shader_variant(
agx_screen(pctx->screen), pctx, cso, &pctx->debug, &key);
struct agx_compiled_shader *so =
agx_get_shader_variant(agx_screen(pctx->screen), pctx, cso, &key);
info->max_threads =
agx_occupancy_for_register_count(so->b.info.nr_gprs).max_threads;
@ -2102,7 +2100,7 @@ agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out,
}
struct agx_screen *screen = agx_screen(ctx->base.screen);
*out = agx_get_shader_variant(screen, &ctx->base, so, &ctx->base.debug, key);
*out = agx_get_shader_variant(screen, &ctx->base, so, key);
return true;
}

View file

@ -1,2 +1,16 @@
<shaderdb>
<isa name="AGX2">
<stat name="Instructions" display="Instrs">Instruction count</stat>
<stat name="ALU">Estimated ALU cycle count</stat>
<stat name="FSCIB">Estimated F16/F32/SCIB cycle count</stat>
<stat name="IC">Estimated IC cycle count</stat>
<stat name="Code size">Binary size in bytes</stat>
<stat name="GPRs" type="u16">Number of 16-bit GPRs</stat>
<stat name="Uniforms" type="u16">Number of 16-bit uniform registers</stat>
<stat name="Scratch">Scratch size per thread in bytes</stat>
<stat name="Threads" more="better" type="u16">Maximum number of threads in flight on a compute unit</stat>
<stat name="Loops">Number of hardware loops</stat>
<stat name="Spills">Number of spill (stack store) instructions</stat>
<stat name="Fills">Number of fill (stack load) instructions</stat>
</isa>
</shaderdb>