diff --git a/src/asahi/libagx/geometry.cl b/src/asahi/libagx/geometry.cl index eb1bc16cf3c..ba884d651c7 100644 --- a/src/asahi/libagx/geometry.cl +++ b/src/asahi/libagx/geometry.cl @@ -459,8 +459,7 @@ setup_unroll_for_draw(global struct agx_geometry_state *heap, * TODO: For multidraw, should be atomic. But multidraw+unroll isn't * currently wired up in any driver. */ - uint old_heap_bottom_B = heap->heap_bottom; - heap->heap_bottom += align(alloc_size, 8); + uint old_heap_bottom_B = agx_heap_alloc_nonatomic_offs(heap, alloc_size); /* Setup most of the descriptor. Count will be determined after unroll. */ out[1] = in_draw[1]; /* instance count */ @@ -654,32 +653,29 @@ libagx_gs_setup_indirect( libagx_tcs_in_size(vertex_count * instance_count, vs_outputs); if (is_prefix_summing) { - p->count_buffer = (global uint *)(state->heap + state->heap_bottom); - state->heap_bottom += - align(p->input_primitives * p->count_buffer_stride, 16); + p->count_buffer = agx_heap_alloc_nonatomic( + state, p->input_primitives * p->count_buffer_stride); } - p->input_buffer = (uintptr_t)(state->heap + state->heap_bottom); + p->input_buffer = + (uintptr_t)agx_heap_alloc_nonatomic(state, vertex_buffer_size); *vertex_buffer = p->input_buffer; - state->heap_bottom += align(vertex_buffer_size, 4); - assert(state->heap_bottom < state->heap_size); p->input_mask = vs_outputs; /* Allocate the index buffer and write the draw consuming it */ global VkDrawIndexedIndirectCommand *cmd = (global void *)p->indirect_desc; - uint index_buffer_offset_B = state->heap_bottom; + uint count = p->input_primitives * indices_per_in_prim; + uint index_buffer_offset_B = agx_heap_alloc_nonatomic_offs(state, count * 4); *cmd = (VkDrawIndexedIndirectCommand){ - .indexCount = p->input_primitives * indices_per_in_prim, + .indexCount = count, .instanceCount = 1, .firstIndex = index_buffer_offset_B / 4, }; p->output_index_buffer = (global uint *)(state->heap + index_buffer_offset_B); - state->heap_bottom += (cmd->indexCount * 4); - assert(state->heap_bottom < state->heap_size); } /* @@ -778,10 +774,7 @@ libagx_prefix_sum_tess(global struct libagx_tess_args *p) /* Allocate 4-byte indices */ uint32_t elsize_B = sizeof(uint32_t); uint32_t size_B = total * elsize_B; - uint alloc_B = p->heap->heap_bottom; - p->heap->heap_bottom += size_B; - p->heap->heap_bottom = align(p->heap->heap_bottom, 8); - + uint alloc_B = agx_heap_alloc_nonatomic_offs(p->heap, size_B); p->index_buffer = (global uint32_t *)(((uintptr_t)p->heap->heap) + alloc_B); /* ...and now we can generate the API indexed draw */ diff --git a/src/asahi/libagx/geometry.h b/src/asahi/libagx/geometry.h index 42b3d7d2845..7deee8f3093 100644 --- a/src/asahi/libagx/geometry.h +++ b/src/asahi/libagx/geometry.h @@ -32,12 +32,29 @@ struct agx_geometry_state { static_assert(sizeof(struct agx_geometry_state) == 4 * 4); #ifdef __OPENCL_VERSION__ -static inline global void * -agx_heap_alloc_nonatomic(global struct agx_geometry_state *heap, size_t size) +static inline uint +agx_heap_alloc_nonatomic_offs(global struct agx_geometry_state *heap, + uint size_B) { - global void *out = heap->heap + heap->heap_bottom; - heap->heap_bottom += size; - return out; + uint offs = heap->heap_bottom; + heap->heap_bottom += align(size_B, 16); + + // Use printf+abort because assert is stripped from release builds. + if (heap->heap_bottom >= heap->heap_size) { + printf( + "FATAL: GPU heap overflow, allocating size %u, at offset %u, heap size %u!", + size_B, offs, heap->heap_size); + + abort(); + } + + return offs; +} + +static inline global void * +agx_heap_alloc_nonatomic(global struct agx_geometry_state *heap, uint size_B) +{ + return heap->heap + agx_heap_alloc_nonatomic_offs(heap, size_B); } #endif diff --git a/src/asahi/libagx/tessellation.cl b/src/asahi/libagx/tessellation.cl index ce6dd1aedb4..244158f3d38 100644 --- a/src/asahi/libagx/tessellation.cl +++ b/src/asahi/libagx/tessellation.cl @@ -178,8 +178,7 @@ libagx_tess_setup_indirect( alloc += vb_size; /* Allocate all patch calculations in one go */ - global uchar *blob = p->heap->heap + p->heap->heap_bottom; - p->heap->heap_bottom += alloc; + global uchar *blob = agx_heap_alloc_nonatomic(p->heap, alloc); p->tcs_buffer = (global float *)(blob + tcs_out_offs); p->patches_per_instance = in_patches;