util/u_printf: add singleton implementation

Currently, nir_lower_printf depends on a per-nir_shader table, writing out
indices into the printf buffer. This works for real OpenCL implementations
(rusticl, microsoft) which can associate the printf buffer with a particular
kernel, I guess. (Actually it's not clear to me that it works well there either
but that's not my problem.)

This mechanism is unsuitable for internal driver shaders, where printfs with
unique format strings can come from many different nir_shaders. There are two
current solutions in tree to this for driver CL:

* Honeykrisp: Only use one single nir_shader (libagx). This prevents us from
  using printf in common CL and requires extra driver tracking. It won't work
  with my upcoming vtn_bindgen rework, which is why I'm addressing this now.

* Anv: Offset format-string indices by a dynamic "base identifier" using relocs
  or a push constant, then pool format strings into a table from nir_shader's
  across the device. The problem here is that these indices now depend on the
  order that nir_shaders are seen (which causes a mess for caching if relocs are
  used, or requires extra push constants and extra bookkeping if relocs aren't
  used). And the driver tracking required to do this pooling correctly is even
  more complicated than what Honeykrisp does. I do not want every driver in-tree
  needing to go down this path, and it wouldn't work with my upcoming
  vtn_bindgen.

This MR introduces an alternate approach: rather than writing indices into the
table, we instead hash the format string itself and write the hash. That doesn't
depend on what nir_shader we came from, so we can freely mix & match and get
consistent hashes. That greatly alleviates driver tracking burden. To make that
possible, we need a global hash table mapping hashed format identifiers to the
format strings themselves.

That approach still requires a step to "register" format strings into the table.
That step would not be required if we wrote the actual strings themselves into
the table, but that was ruled out for performance/code size reasons. However, we
do not want drivers to need to explicitly register all the strings they use,
because once we have OpenCL in common code via vtn_bindgen2, drivers won't know
all the strings they use. Fortunately, there's a neat solution for that too.

By making this global table a singleton (with internal locking), vtn_bindgen2
can automatically register format strings via a static constructor. In
conjunction with the infrastructure added here, that eliminates all driver
bookkeeping required for format-strings.

The code itself is inspired by the glsl type singleton. Is it pretty? Not
really, but it gets the job done well.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Jesse Natalie <jenatali@microsoft.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33067>
This commit is contained in:
Alyssa Rosenzweig 2025-01-15 15:40:20 -05:00 committed by Marge Bot
parent 48dbfba17a
commit 007f60c8b8
2 changed files with 117 additions and 0 deletions

View file

@ -31,6 +31,7 @@
#include "hash_table.h"
#include "macros.h"
#include "ralloc.h"
#include "simple_mtx.h"
#include "strndup.h"
#include "u_math.h"
#include "u_printf.h"
@ -360,3 +361,113 @@ u_printf_hash(const u_printf_info *info)
assert(hash != 0);
return hash;
}
static struct {
uint32_t users;
struct hash_table_u64 *ht;
} u_printf_cache = {0};
static simple_mtx_t u_printf_lock = SIMPLE_MTX_INITIALIZER;
void
u_printf_singleton_init_or_ref(void)
{
simple_mtx_lock(&u_printf_lock);
if ((u_printf_cache.users++) == 0) {
u_printf_cache.ht = _mesa_hash_table_u64_create(NULL);
}
simple_mtx_unlock(&u_printf_lock);
}
void
u_printf_singleton_decref()
{
simple_mtx_lock(&u_printf_lock);
assert(u_printf_cache.users > 0);
if ((--u_printf_cache.users) == 0) {
ralloc_free(u_printf_cache.ht);
memset(&u_printf_cache, 0, sizeof(u_printf_cache));
}
simple_mtx_unlock(&u_printf_lock);
}
static void
assert_singleton_exists_and_is_locked()
{
simple_mtx_assert_locked(&u_printf_lock);
assert(u_printf_cache.users > 0);
}
static const u_printf_info *
u_printf_singleton_search_locked(uint32_t hash)
{
assert_singleton_exists_and_is_locked();
return _mesa_hash_table_u64_search(u_printf_cache.ht, hash);
}
static void
u_printf_singleton_add_locked(const u_printf_info *info)
{
assert_singleton_exists_and_is_locked();
/* If the format string is already known, do nothing. */
uint32_t hash = u_printf_hash(info);
const u_printf_info *cached = u_printf_singleton_search_locked(hash);
if (cached != NULL) {
assert(u_printf_hash(cached) == hash && "hash table invariant");
assert(!strcmp(cached->strings, info->strings) && "assume no collisions");
return;
}
/* Otherwise, we need to add the string to the table. Doing so requires
* a deep-clone, so the singleton will probably outlive our parameter.
*/
u_printf_info *clone = rzalloc(u_printf_cache.ht, u_printf_info);
clone->num_args = info->num_args;
clone->string_size = info->string_size;
clone->arg_sizes = ralloc_memdup(u_printf_cache.ht, info->arg_sizes,
sizeof(info->arg_sizes[0]) * info->num_args);
clone->strings = ralloc_memdup(u_printf_cache.ht, info->strings,
info->string_size);
assert(_mesa_hash_table_u64_search(u_printf_cache.ht, hash) == NULL &&
"no duplicates at this point");
_mesa_hash_table_u64_insert(u_printf_cache.ht, hash, clone);
}
const u_printf_info *
u_printf_singleton_search(uint32_t hash)
{
simple_mtx_lock(&u_printf_lock);
const u_printf_info *info = u_printf_singleton_search_locked(hash);
simple_mtx_unlock(&u_printf_lock);
return info;
}
void
u_printf_singleton_add(const u_printf_info *info, unsigned count)
{
simple_mtx_lock(&u_printf_lock);
for (unsigned i = 0; i < count; ++i) {
u_printf_singleton_add_locked(&info[i]);
}
simple_mtx_unlock(&u_printf_lock);
}
void
u_printf_singleton_add_serialized(const void *data, size_t data_size)
{
struct blob_reader blob;
blob_reader_init(&blob, data, data_size);
unsigned count = 0;
u_printf_info *info = u_printf_deserialize_info(NULL, &blob, &count);
u_printf_singleton_add(info, count);
ralloc_free(info);
}

View file

@ -51,6 +51,12 @@ u_printf_info *u_printf_deserialize_info(void *mem_ctx,
uint32_t u_printf_hash(const u_printf_info *info);
void u_printf_singleton_init_or_ref(void);
void u_printf_singleton_decref(void);
void u_printf_singleton_add(const u_printf_info *info, unsigned count);
void u_printf_singleton_add_serialized(const void *data, size_t data_size);
const u_printf_info *u_printf_singleton_search(uint32_t hash);
struct u_printf_ctx {
simple_mtx_t lock;
void *bo;