libagx: use common heap allocs

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34326>
2025-12-23 09:00:10 +01:00 · 2025-04-01 12:34:29 -04:00 · 2025-04-01 12:34:29 -04:00 · 1a36d0b5d7
commit 1a36d0b5d7
parent 55046d0293
3 changed files with 32 additions and 23 deletions
--- a/src/asahi/libagx/geometry.cl
+++ b/src/asahi/libagx/geometry.cl
@ -459,8 +459,7 @@ setup_unroll_for_draw(global struct agx_geometry_state *heap,
    * TODO: For multidraw, should be atomic. But multidraw+unroll isn't
    * currently wired up in any driver.
    */
-   uint old_heap_bottom_B = heap->heap_bottom;
-   heap->heap_bottom += align(alloc_size, 8);
+   uint old_heap_bottom_B = agx_heap_alloc_nonatomic_offs(heap, alloc_size);

   /* Setup most of the descriptor. Count will be determined after unroll. */
   out[1] = in_draw[1];                       /* instance count */
@ -654,32 +653,29 @@ libagx_gs_setup_indirect(
      libagx_tcs_in_size(vertex_count * instance_count, vs_outputs);

   if (is_prefix_summing) {
-      p->count_buffer = (global uint *)(state->heap + state->heap_bottom);
-      state->heap_bottom +=
-         align(p->input_primitives * p->count_buffer_stride, 16);
+      p->count_buffer = agx_heap_alloc_nonatomic(
+         state, p->input_primitives * p->count_buffer_stride);
   }

-   p->input_buffer = (uintptr_t)(state->heap + state->heap_bottom);
+   p->input_buffer =
+      (uintptr_t)agx_heap_alloc_nonatomic(state, vertex_buffer_size);
   *vertex_buffer = p->input_buffer;
-   state->heap_bottom += align(vertex_buffer_size, 4);
-   assert(state->heap_bottom < state->heap_size);

   p->input_mask = vs_outputs;

   /* Allocate the index buffer and write the draw consuming it */
   global VkDrawIndexedIndirectCommand *cmd = (global void *)p->indirect_desc;
-   uint index_buffer_offset_B = state->heap_bottom;
+   uint count = p->input_primitives * indices_per_in_prim;
+   uint index_buffer_offset_B = agx_heap_alloc_nonatomic_offs(state, count * 4);

   *cmd = (VkDrawIndexedIndirectCommand){
-      .indexCount = p->input_primitives * indices_per_in_prim,
+      .indexCount = count,
      .instanceCount = 1,
      .firstIndex = index_buffer_offset_B / 4,
   };

   p->output_index_buffer =
      (global uint *)(state->heap + index_buffer_offset_B);
-   state->heap_bottom += (cmd->indexCount * 4);
-   assert(state->heap_bottom < state->heap_size);
 }

 /*
@ -778,10 +774,7 @@ libagx_prefix_sum_tess(global struct libagx_tess_args *p)
   /* Allocate 4-byte indices */
   uint32_t elsize_B = sizeof(uint32_t);
   uint32_t size_B = total * elsize_B;
-   uint alloc_B = p->heap->heap_bottom;
-   p->heap->heap_bottom += size_B;
-   p->heap->heap_bottom = align(p->heap->heap_bottom, 8);
-
+   uint alloc_B = agx_heap_alloc_nonatomic_offs(p->heap, size_B);
   p->index_buffer = (global uint32_t *)(((uintptr_t)p->heap->heap) + alloc_B);

   /* ...and now we can generate the API indexed draw */
--- a/src/asahi/libagx/geometry.h
+++ b/src/asahi/libagx/geometry.h
@ -32,12 +32,29 @@ struct agx_geometry_state {
 static_assert(sizeof(struct agx_geometry_state) == 4 * 4);

 #ifdef __OPENCL_VERSION__
-static inline global void *
-agx_heap_alloc_nonatomic(global struct agx_geometry_state *heap, size_t size)
+static inline uint
+agx_heap_alloc_nonatomic_offs(global struct agx_geometry_state *heap,
+                              uint size_B)
 {
-   global void *out = heap->heap + heap->heap_bottom;
-   heap->heap_bottom += size;
-   return out;
+   uint offs = heap->heap_bottom;
+   heap->heap_bottom += align(size_B, 16);
+
+   // Use printf+abort because assert is stripped from release builds.
+   if (heap->heap_bottom >= heap->heap_size) {
+      printf(
+         "FATAL: GPU heap overflow, allocating size %u, at offset %u, heap size %u!",
+         size_B, offs, heap->heap_size);
+
+      abort();
+   }
+
+   return offs;
+}
+
+static inline global void *
+agx_heap_alloc_nonatomic(global struct agx_geometry_state *heap, uint size_B)
+{
+   return heap->heap + agx_heap_alloc_nonatomic_offs(heap, size_B);
 }
 #endif

--- a/src/asahi/libagx/tessellation.cl
+++ b/src/asahi/libagx/tessellation.cl
@ -178,8 +178,7 @@ libagx_tess_setup_indirect(
   alloc += vb_size;

   /* Allocate all patch calculations in one go */
-   global uchar *blob = p->heap->heap + p->heap->heap_bottom;
-   p->heap->heap_bottom += alloc;
+   global uchar *blob = agx_heap_alloc_nonatomic(p->heap, alloc);

   p->tcs_buffer = (global float *)(blob + tcs_out_offs);
   p->patches_per_instance = in_patches;