asahi: Add compute kernel scaffolding

This adds the basic scaffolding for compute kernels. There's a bit of churn to make sure we don't need to hang onto the kernel NIR, since it's never used for anything else except looking up the shader stage. The compute kernels aren't actually wired up here, but they do get compiled. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21062>
2025-12-29 12:20:10 +01:00 · 2022-10-22 11:07:02 -04:00 · 2022-10-22 11:07:02 -04:00 · 227d4f6d75
commit 227d4f6d75
parent 60121e3a42
3 changed files with 126 additions and 15 deletions
--- a/src/gallium/drivers/asahi/agx_pipe.c
+++ b/src/gallium/drivers/asahi/agx_pipe.c
@ -1437,8 +1437,10 @@ agx_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader,
                     enum pipe_shader_cap param)
 {
   bool is_no16 = agx_device(pscreen)->debug & AGX_DBG_NO16;
+   bool is_deqp = agx_device(pscreen)->debug & AGX_DBG_DEQP;

-   if (shader != PIPE_SHADER_VERTEX && shader != PIPE_SHADER_FRAGMENT)
+   if (shader != PIPE_SHADER_VERTEX && shader != PIPE_SHADER_FRAGMENT &&
+       !(shader == PIPE_SHADER_COMPUTE && is_deqp))
      return 0;

   /* this is probably not totally correct.. but it's a start: */
@ -1525,6 +1527,66 @@ static int
 agx_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
                      enum pipe_compute_cap param, void *ret)
 {
+   if (!(agx_device(pscreen)->debug & AGX_DBG_DEQP))
+      return 0;
+
+#define RET(x)                                                                 \
+   do {                                                                        \
+      if (ret)                                                                 \
+         memcpy(ret, x, sizeof(x));                                            \
+      return sizeof(x);                                                        \
+   } while (0)
+
+   switch (param) {
+   case PIPE_COMPUTE_CAP_ADDRESS_BITS:
+      RET((uint32_t[]){64});
+
+   case PIPE_COMPUTE_CAP_IR_TARGET:
+      if (ret)
+         sprintf(ret, "agx");
+      return strlen("agx") * sizeof(char);
+
+   case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+      RET((uint64_t[]){3});
+
+   case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+      RET(((uint64_t[]){65535, 65535, 65535}));
+
+   case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+      RET(((uint64_t[]){256, 256, 256}));
+
+   case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+      RET((uint64_t[]){256});
+
+   case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
+      RET((uint64_t[]){1024 * 1024 * 512 /* Maybe get memory */});
+
+   case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+      RET((uint64_t[]){32768});
+
+   case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
+   case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
+      RET((uint64_t[]){4096});
+
+   case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+      RET((uint64_t[]){1024 * 1024 * 512 /* Maybe get memory */});
+
+   case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+      RET((uint32_t[]){800 /* MHz -- TODO */});
+
+   case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+      RET((uint32_t[]){4 /* TODO */});
+
+   case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+      RET((uint32_t[]){1});
+
+   case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+      RET((uint32_t[]){32});
+
+   case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
+      RET((uint64_t[]){1024}); // TODO
+   }
+
   return 0;
 }

--- a/src/gallium/drivers/asahi/agx_state.c
+++ b/src/gallium/drivers/asahi/agx_state.c
@ -1168,6 +1168,17 @@ asahi_fs_shader_key_equal(const void *a, const void *b)
   return memcmp(a, b, sizeof(struct asahi_fs_shader_key)) == 0;
 }

+/* No compute variants */
+static uint32_t asahi_cs_shader_key_hash(const void *key)
+{
+   return 0;
+}
+
+static bool asahi_cs_shader_key_equal(const void *a, const void *b)
+{
+   return true;
+}
+
 static unsigned
 agx_find_linked_slot(struct agx_varyings_vs *vs, struct agx_varyings_fs *fs,
                     gl_varying_slot slot, unsigned offset)
@ -1397,14 +1408,11 @@ agx_create_shader_state(struct pipe_context *pctx,

   so->base = *cso;

-   if (cso->type == PIPE_SHADER_IR_NIR) {
-      so->nir = cso->ir.nir;
-   } else {
-      assert(cso->type == PIPE_SHADER_IR_TGSI);
-      so->nir = tgsi_to_nir(cso->tokens, pctx->screen, false);
-   }
+   nir_shader *nir = cso->type == PIPE_SHADER_IR_NIR
+                        ? cso->ir.nir
+                        : tgsi_to_nir(cso->tokens, pctx->screen, false);

-   if (so->nir->info.stage == MESA_SHADER_VERTEX) {
+   if (nir->info.stage == MESA_SHADER_VERTEX) {
      so->variants = _mesa_hash_table_create(NULL, asahi_vs_shader_key_hash,
                                             asahi_vs_shader_key_equal);
   } else {
@ -1412,12 +1420,16 @@ agx_create_shader_state(struct pipe_context *pctx,
                                             asahi_fs_shader_key_equal);
   }

+   so->type = pipe_shader_type_from_mesa(nir->info.stage);
+
   struct blob blob;
   blob_init(&blob);
-   nir_serialize(&blob, so->nir, true);
+   nir_serialize(&blob, nir, true);
   _mesa_sha1_compute(blob.data, blob.size, so->nir_sha1);
   blob_finish(&blob);

+   so->nir = nir;
+
   /* For shader-db, precompile a shader with a default key. This could be
    * improved but hopefully this is acceptable for now.
    */
@ -1451,6 +1463,40 @@ agx_create_shader_state(struct pipe_context *pctx,
   return so;
 }

+static void *
+agx_create_compute_state(struct pipe_context *pctx,
+                         const struct pipe_compute_state *cso)
+{
+   struct agx_uncompiled_shader *so = CALLOC_STRUCT(agx_uncompiled_shader);
+
+   if (!so)
+      return NULL;
+
+   so->variants = _mesa_hash_table_create(NULL, asahi_cs_shader_key_hash,
+                                          asahi_cs_shader_key_equal);
+
+   union asahi_shader_key key = { 0 };
+
+   assert(cso->ir_type == PIPE_SHADER_IR_NIR && "TGSI kernels unsupported");
+   nir_shader *nir = nir_shader_clone(NULL, cso->prog);
+
+   so->type = pipe_shader_type_from_mesa(nir->info.stage);
+
+   struct blob blob;
+   blob_init(&blob);
+   nir_serialize(&blob, nir, true);
+   _mesa_sha1_compute(blob.data, blob.size, so->nir_sha1);
+   blob_finish(&blob);
+
+   so->nir = nir;
+   agx_get_shader_variant(agx_screen(pctx->screen), so, &pctx->debug, &key);
+
+   /* We're done with the NIR, throw it away */
+   so->nir = NULL;
+   ralloc_free(nir);
+   return so;
+}
+
 /* Does not take ownership of key. Clones if necessary. */
 static bool
 agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out,
@ -1542,13 +1588,12 @@ agx_bind_shader_state(struct pipe_context *pctx, void *cso)
   struct agx_context *ctx = agx_context(pctx);
   struct agx_uncompiled_shader *so = cso;

-   enum pipe_shader_type type = pipe_shader_type_from_mesa(so->nir->info.stage);
-   ctx->stage[type].shader = so;
-
-   if (type == PIPE_SHADER_VERTEX)
+   if (so->type == PIPE_SHADER_VERTEX)
      ctx->dirty |= AGX_DIRTY_VS_PROG;
-   else
+   else if (so->type == PIPE_SHADER_FRAGMENT)
      ctx->dirty |= AGX_DIRTY_FS_PROG;
+
+   ctx->stage[so->type].shader = so;
 }

 static void
@ -2445,6 +2490,7 @@ agx_init_state_functions(struct pipe_context *ctx)
   ctx->create_surface = agx_create_surface;
   ctx->create_vertex_elements_state = agx_create_vertex_elements;
   ctx->create_vs_state = agx_create_shader_state;
+   ctx->create_compute_state = agx_create_compute_state;
   ctx->bind_blend_state = agx_bind_blend_state;
   ctx->bind_depth_stencil_alpha_state = agx_bind_zsa_state;
   ctx->bind_sampler_states = agx_bind_sampler_states;
@ -2452,9 +2498,11 @@ agx_init_state_functions(struct pipe_context *ctx)
   ctx->bind_rasterizer_state = agx_bind_rasterizer_state;
   ctx->bind_vertex_elements_state = agx_bind_vertex_elements_state;
   ctx->bind_vs_state = agx_bind_shader_state;
+   ctx->bind_compute_state = agx_bind_shader_state;
   ctx->delete_blend_state = agx_delete_state;
   ctx->delete_depth_stencil_alpha_state = agx_delete_state;
   ctx->delete_fs_state = agx_delete_shader_state;
+   ctx->delete_compute_state = agx_delete_shader_state;
   ctx->delete_rasterizer_state = agx_delete_state;
   ctx->delete_sampler_state = agx_delete_sampler_state;
   ctx->delete_vertex_elements_state = agx_delete_state;
--- a/src/gallium/drivers/asahi/agx_state.h
+++ b/src/gallium/drivers/asahi/agx_state.h
@ -116,7 +116,8 @@ struct agx_compiled_shader {

 struct agx_uncompiled_shader {
   struct pipe_shader_state base;
-   struct nir_shader *nir;
+   enum pipe_shader_type type;
+   const struct nir_shader *nir;
   uint8_t nir_sha1[20];
   struct hash_table *variants;