agx: Add helper for calculating occupancy

Add information about the relationship between program register usage and
program occupancy (the maximum number of threads that may execute concurrently
on a single shader core). This table is derived from studying the
maxTotalThreadsPerThreadgroup property in Metal while varying the register
usage, something I blogged about a few years back. It's probably not 100%
accurate and it hasn't been tested against hardware, but it matters "only" for
performance (not correctness) so I'm not super stressed about the details.

In the (near) future, RA will be able to make use of this information to know
exactly when it can use more registers without hurting performance. In the
present, it's just used for better shader-db statistics.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22353>
This commit is contained in:
Alyssa Rosenzweig 2023-03-11 15:39:09 -05:00 committed by Marge Bot
parent 05e614cc31
commit e713983875
4 changed files with 41 additions and 2 deletions

View file

@ -1805,8 +1805,8 @@ agx_dump_stats(agx_context *ctx, unsigned size, char **out)
agx_foreach_instr_global(ctx, I)
nr_ins++;
/* TODO: Pipe through occupancy */
unsigned nr_threads = 1;
unsigned nr_threads =
agx_occupancy_for_register_count(ctx->max_reg).max_threads;
return asprintf(out,
"%s shader: %u inst, %u bytes, %u halfregs, %u threads, "

View file

@ -802,6 +802,13 @@ bool agx_nir_lower_ubo(nir_shader *shader);
bool agx_nir_lower_shared_bitsize(nir_shader *shader);
bool agx_nir_lower_frag_sidefx(nir_shader *s);
struct agx_occupancy {
unsigned max_registers;
unsigned max_threads;
};
struct agx_occupancy agx_occupancy_for_register_count(unsigned halfregs);
#ifdef __cplusplus
} /* extern C */
#endif

View file

@ -0,0 +1,31 @@
/*
* Copyright 2023 Alyssa Rosenzweig
* SPDX-License-Identifier: MIT
*/
#include "agx_compiler.h"
/* Table describing the relationship between registers pressure and thread
* count. Each entry describes a maximum number of registers and the associated
* best-case thread count.
*
* Sorted in ascending order of maximum registers for easy lookup.
*/
static const struct agx_occupancy occupancies[] = {
{104, 1024}, {112, 896}, {128, 832}, {136, 768}, {144, 704},
{160, 640}, {184, 576}, {208, 512}, {232, 448}, {256, 384},
};
struct agx_occupancy
agx_occupancy_for_register_count(unsigned halfregs)
{
for (unsigned i = 0; i < ARRAY_SIZE(occupancies); ++i) {
unsigned max = occupancies[i].max_registers;
assert((i == 0 || max > occupancies[i - 1].max_registers) && "ascending");
if (halfregs <= max)
return occupancies[i];
}
unreachable("Register count must be less than the maximum");
}

View file

@ -20,6 +20,7 @@ libasahi_agx_files = files(
'agx_lower_pseudo.c',
'agx_lower_uniform_sources.c',
'agx_pack.c',
'agx_performance.c',
'agx_print.c',
'agx_ir.c',
'agx_opt_cse.c',