diff --git a/src/asahi/libagx/geometry.cl b/src/asahi/libagx/geometry.cl index 4505ae7cdee..ac7eb0fe0ec 100644 --- a/src/asahi/libagx/geometry.cl +++ b/src/asahi/libagx/geometry.cl @@ -443,9 +443,8 @@ first_true_thread_in_workgroup(bool cond, local uint *scratch) * sets up most of the new draw descriptor. */ static global void * -setup_unroll_for_draw(global struct agx_geometry_state *heap, - constant uint *in_draw, global uint *out, - enum mesa_prim mode, uint index_size_B) +setup_unroll_for_draw(global struct agx_heap *heap, constant uint *in_draw, + global uint *out, enum mesa_prim mode, uint index_size_B) { /* Determine an upper bound on the memory required for the index buffer. * Restarts only decrease the unrolled index buffer size, so the maximum size @@ -469,16 +468,15 @@ setup_unroll_for_draw(global struct agx_geometry_state *heap, out[4] = in_draw[4]; /* base instance */ /* Return the index buffer we allocated */ - return (global uchar *)heap->heap + old_heap_bottom_B; + return (global uchar *)heap->base + old_heap_bottom_B; } KERNEL(1024) -libagx_unroll_restart(global struct agx_geometry_state *heap, - uint64_t index_buffer, constant uint *in_draw, - global uint32_t *out_draw, uint32_t max_draws, - uint32_t restart_index, uint32_t index_buffer_size_el, - uint32_t index_size_log2, uint32_t flatshade_first, - uint mode__11) +libagx_unroll_restart(global struct agx_heap *heap, uint64_t index_buffer, + constant uint *in_draw, global uint32_t *out_draw, + uint32_t max_draws, uint32_t restart_index, + uint32_t index_buffer_size_el, uint32_t index_size_log2, + uint32_t flatshade_first, uint mode__11) { uint32_t index_size_B = 1 << index_size_log2; enum mesa_prim mode = libagx_uncompact_prim(mode__11); @@ -579,7 +577,7 @@ libagx_gs_setup_indirect( global uintptr_t *vertex_buffer /* output */, global struct agx_ia_state *ia /* output */, global struct agx_geometry_params *p /* output */, - global struct agx_geometry_state *state, + global struct agx_heap *heap, uint64_t vs_outputs /* Vertex (TES) output mask */, uint32_t index_size_B /* 0 if no index bffer */, uint32_t index_buffer_range_el, @@ -624,11 +622,11 @@ libagx_gs_setup_indirect( if (is_prefix_summing) { p->count_buffer = agx_heap_alloc_nonatomic( - state, p->input_primitives * p->count_buffer_stride); + heap, p->input_primitives * p->count_buffer_stride); } p->input_buffer = - (uintptr_t)agx_heap_alloc_nonatomic(state, vertex_buffer_size); + (uintptr_t)agx_heap_alloc_nonatomic(heap, vertex_buffer_size); *vertex_buffer = p->input_buffer; p->input_mask = vs_outputs; @@ -645,10 +643,10 @@ libagx_gs_setup_indirect( if (shape == AGX_GS_SHAPE_DYNAMIC_INDEXED) { cmd->firstIndex = - agx_heap_alloc_nonatomic_offs(state, cmd->indexCount * 4) / 4; + agx_heap_alloc_nonatomic_offs(heap, cmd->indexCount * 4) / 4; p->output_index_buffer = - (global uint *)(state->heap + (cmd->firstIndex * 4)); + (global uint *)(heap->base + (cmd->firstIndex * 4)); } } @@ -750,7 +748,7 @@ libagx_prefix_sum_tess(global struct libagx_tess_args *p, global uint *c_prims, uint32_t elsize_B = sizeof(uint32_t); uint32_t size_B = total * elsize_B; uint alloc_B = agx_heap_alloc_nonatomic_offs(p->heap, size_B); - p->index_buffer = (global uint32_t *)(((uintptr_t)p->heap->heap) + alloc_B); + p->index_buffer = (global uint32_t *)(((uintptr_t)p->heap->base) + alloc_B); /* ...and now we can generate the API indexed draw */ global uint32_t *desc = p->out_draws; diff --git a/src/asahi/libagx/geometry.h b/src/asahi/libagx/geometry.h index 8fbc380bcd6..103f6b0fcd1 100644 --- a/src/asahi/libagx/geometry.h +++ b/src/asahi/libagx/geometry.h @@ -104,27 +104,25 @@ agx_gs_index_size(enum agx_gs_shape shape) } } -/* Packed geometry state buffer */ -struct agx_geometry_state { - /* Heap to allocate from. */ - DEVICE(uchar) heap; - uint32_t heap_bottom, heap_size; +/* Heap to allocate from. */ +struct agx_heap { + DEVICE(uchar) base; + uint32_t bottom, size; } PACKED; -static_assert(sizeof(struct agx_geometry_state) == 4 * 4); +static_assert(sizeof(struct agx_heap) == 4 * 4); #ifdef __OPENCL_VERSION__ static inline uint -agx_heap_alloc_nonatomic_offs(global struct agx_geometry_state *heap, - uint size_B) +agx_heap_alloc_nonatomic_offs(global struct agx_heap *heap, uint size_B) { - uint offs = heap->heap_bottom; - heap->heap_bottom += align(size_B, 16); + uint offs = heap->bottom; + heap->bottom += align(size_B, 16); - // Use printf+abort because assert is stripped from release builds. - if (heap->heap_bottom >= heap->heap_size) { + /* Use printf+abort because assert is stripped from release builds. */ + if (heap->bottom >= heap->size) { printf( "FATAL: GPU heap overflow, allocating size %u, at offset %u, heap size %u!", - size_B, offs, heap->heap_size); + size_B, offs, heap->size); abort(); } @@ -133,9 +131,9 @@ agx_heap_alloc_nonatomic_offs(global struct agx_geometry_state *heap, } static inline global void * -agx_heap_alloc_nonatomic(global struct agx_geometry_state *heap, uint size_B) +agx_heap_alloc_nonatomic(global struct agx_heap *heap, uint size_B) { - return heap->heap + agx_heap_alloc_nonatomic_offs(heap, size_B); + return heap->base + agx_heap_alloc_nonatomic_offs(heap, size_B); } #endif diff --git a/src/asahi/libagx/tessellator.cl b/src/asahi/libagx/tessellator.cl index bfbe862d7b1..090064b7cf3 100644 --- a/src/asahi/libagx/tessellator.cl +++ b/src/asahi/libagx/tessellator.cl @@ -118,10 +118,10 @@ tess_factors(constant struct libagx_tess_args *p, uint patch) } static inline uint -libagx_heap_alloc(global struct agx_geometry_state *heap, uint size_B) +libagx_heap_alloc(global struct agx_heap *heap, uint size_B) { // TODO: drop align to 4 I think - return atomic_fetch_add((volatile atomic_uint *)(&heap->heap_bottom), + return atomic_fetch_add((volatile atomic_uint *)(&heap->bottom), align(size_B, 8)); } @@ -200,7 +200,7 @@ libagx_heap_alloc_points(constant struct libagx_tess_args *p, uint patch, uint32_t alloc_el = alloc_B / elsize_B; p->coord_allocs[patch] = alloc_el; - return (global struct libagx_tess_point *)(((uintptr_t)p->heap->heap) + + return (global struct libagx_tess_point *)(((uintptr_t)p->heap->base) + alloc_B); } diff --git a/src/asahi/libagx/tessellator.h b/src/asahi/libagx/tessellator.h index ced21ad563e..5841d5578f1 100644 --- a/src/asahi/libagx/tessellator.h +++ b/src/asahi/libagx/tessellator.h @@ -29,7 +29,7 @@ static_assert(sizeof(struct libagx_tess_point) == 8); struct libagx_tess_args { /* Heap to allocate tessellator outputs in */ - DEVICE(struct agx_geometry_state) heap; + DEVICE(struct agx_heap) heap; /* Patch coordinate buffer, indexed as: * diff --git a/src/asahi/vulkan/hk_cmd_draw.c b/src/asahi/vulkan/hk_cmd_draw.c index 9a558965bdb..1c83dd90f72 100644 --- a/src/asahi/vulkan/hk_cmd_draw.c +++ b/src/asahi/vulkan/hk_cmd_draw.c @@ -994,11 +994,10 @@ hk_CmdEndRendering(VkCommandBuffer commandBuffer) } static uint64_t -hk_geometry_state(struct hk_cmd_buffer *cmd) +hk_heap(struct hk_cmd_buffer *cmd) { struct hk_device *dev = hk_cmd_buffer_device(cmd); - /* We tie heap allocation to geometry state allocation, so allocate now. */ if (unlikely(!dev->heap)) { perf_debug(cmd, "Allocating heap"); @@ -1008,29 +1007,28 @@ hk_geometry_state(struct hk_cmd_buffer *cmd) /* The geometry state buffer is initialized here and then is treated by * the CPU as rodata, even though the GPU uses it for scratch internally. */ - off_t off = dev->rodata.geometry_state - dev->rodata.bo->va->addr; - struct agx_geometry_state *map = agx_bo_map(dev->rodata.bo) + off; + off_t off = dev->rodata.heap - dev->rodata.bo->va->addr; + struct agx_heap *map = agx_bo_map(dev->rodata.bo) + off; - *map = (struct agx_geometry_state){ - .heap = dev->heap->va->addr, - .heap_size = size, + *map = (struct agx_heap){ + .base = dev->heap->va->addr, + .size = size, }; } /* We need to free all allocations after each command buffer execution */ if (!cmd->uses_heap) { perf_debug(cmd, "Freeing heap"); - uint64_t addr = dev->rodata.geometry_state; + uint64_t addr = dev->rodata.heap; /* Zeroing the allocated index frees everything */ - hk_queue_write(cmd, - addr + offsetof(struct agx_geometry_state, heap_bottom), 0, + hk_queue_write(cmd, addr + offsetof(struct agx_heap, bottom), 0, true /* after gfx */); cmd->uses_heap = true; } - return dev->rodata.geometry_state; + return dev->rodata.heap; } static uint64_t @@ -1222,7 +1220,7 @@ hk_upload_tess_params(struct hk_cmd_buffer *cmd, struct libagx_tess_args *out, : LIBAGX_TESS_PARTITIONING_FRACTIONAL_EVEN; struct libagx_tess_args args = { - .heap = hk_geometry_state(cmd), + .heap = hk_heap(cmd), .tcs_stride_el = tcs->info.tess.tcs_output_stride / 4, .statistic = hk_pipeline_stat_addr( cmd, @@ -1246,7 +1244,7 @@ hk_upload_tess_params(struct hk_cmd_buffer *cmd, struct libagx_tess_args *out, uint32_t draw_stride_el = 5; size_t draw_stride_B = draw_stride_el * sizeof(uint32_t); - /* heap is allocated by hk_geometry_state */ + /* heap is allocated by hk_heap */ args.patch_coord_buffer = dev->heap->va->addr; if (!agx_is_indirect(draw.b)) { @@ -1389,7 +1387,7 @@ hk_draw_without_restart(struct hk_cmd_buffer *cmd, struct agx_draw draw, assert(draw_count == 1 && "TODO: multidraw"); struct libagx_unroll_restart_args ia = { - .heap = hk_geometry_state(cmd), + .heap = hk_heap(cmd), .index_buffer = draw.index_buffer, .in_draw = draw.b.ptr, .out_draw = hk_pool_alloc(cmd, 5 * sizeof(uint32_t) * draw_count, 4).gpu, @@ -1449,7 +1447,7 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs, /* Setup grids */ if (agx_is_indirect(draw.b)) { struct libagx_gs_setup_indirect_args gsi = { - .state = hk_geometry_state(cmd), + .heap = hk_heap(cmd), .index_buffer = draw.index_buffer, .draw = draw.b.ptr, .ia = desc->root.draw.input_assembly, @@ -3534,7 +3532,7 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_) uint64_t target = hk_cs_alloc_for_indirect(cs, size_B); libagx_draw_robust_index(cmd, agx_1d(32), AGX_BARRIER_ALL | AGX_PREGFX, - target, hk_geometry_state(cmd), draw.b.ptr, + target, hk_heap(cmd), draw.b.ptr, draw.index_buffer, draw.index_buffer_range_B, draw.restart, topology, draw.index_size); } else { diff --git a/src/asahi/vulkan/hk_device.c b/src/asahi/vulkan/hk_device.c index efd3ada4099..ccbc62fa7f8 100644 --- a/src/asahi/vulkan/hk_device.c +++ b/src/asahi/vulkan/hk_device.c @@ -101,7 +101,7 @@ hk_upload_rodata(struct hk_device *dev) *image_heap_ptr = dev->images.bo->va->addr; offs += sizeof(uint64_t); - /* The geometry state buffer isn't strictly readonly data, but we only have a + /* The heap descriptor isn't strictly readonly data, but we only have a * single instance of it device-wide and -- after initializing at heap * allocate time -- it is read-only from the CPU perspective. The GPU uses it * for scratch, but is required to reset it after use to ensure resubmitting @@ -110,8 +110,8 @@ hk_upload_rodata(struct hk_device *dev) * So, we allocate it here for convenience. */ offs = align(offs, sizeof(uint64_t)); - dev->rodata.geometry_state = dev->rodata.bo->va->addr + offs; - offs += sizeof(struct agx_geometry_state); + dev->rodata.heap = dev->rodata.bo->va->addr + offs; + offs += sizeof(struct agx_heap); /* For null storage descriptors, we need to reserve 16 bytes to catch writes. * No particular content is required; we cannot get robustness2 semantics diff --git a/src/asahi/vulkan/hk_device.h b/src/asahi/vulkan/hk_device.h index 651e865e2d0..1582793c68e 100644 --- a/src/asahi/vulkan/hk_device.h +++ b/src/asahi/vulkan/hk_device.h @@ -85,7 +85,7 @@ struct hk_device { struct agx_bo *bo; struct agx_usc_uniform_packed image_heap; uint64_t null_sink; - uint64_t geometry_state; + uint64_t heap; } rodata; /* Pages for backing sparse resources */ diff --git a/src/gallium/drivers/asahi/agx_batch.c b/src/gallium/drivers/asahi/agx_batch.c index 6d61c85470d..20274478b47 100644 --- a/src/gallium/drivers/asahi/agx_batch.c +++ b/src/gallium/drivers/asahi/agx_batch.c @@ -133,7 +133,7 @@ agx_batch_init(struct agx_context *ctx, batch->clear_depth = 0; batch->clear_stencil = 0; batch->varyings = 0; - batch->geometry_state = 0; + batch->heap = 0; batch->initialized = false; batch->draws = 0; batch->incoherent_writes = false; diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 0f22d97d03c..22b7c12cffa 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -3938,11 +3938,11 @@ agx_ia_update(struct agx_batch *batch, const struct pipe_draw_info *info, } static uint64_t -agx_batch_geometry_state(struct agx_batch *batch) +agx_batch_heap(struct agx_batch *batch) { struct agx_context *ctx = batch->ctx; - if (!batch->geometry_state) { + if (!batch->heap) { uint32_t size = 128 * 1024 * 1024; if (!ctx->heap) { @@ -3950,18 +3950,18 @@ agx_batch_geometry_state(struct agx_batch *batch) PIPE_USAGE_DEFAULT, size); } - struct agx_geometry_state state = { - .heap = agx_resource(ctx->heap)->bo->va->addr, - .heap_size = size, + struct agx_heap heap = { + .base = agx_resource(ctx->heap)->bo->va->addr, + .size = size, }; agx_batch_writes(batch, agx_resource(ctx->heap), 0); - batch->geometry_state = - agx_pool_upload_aligned(&batch->pool, &state, sizeof(state), 8); + batch->heap = + agx_pool_upload_aligned(&batch->pool, &heap, sizeof(heap), 8); } - return batch->geometry_state; + return batch->heap; } static uint64_t @@ -4154,7 +4154,7 @@ agx_launch_gs_prerast(struct agx_batch *batch, .vertex_buffer = batch->uniforms.vertex_output_buffer_ptr, .ia = batch->uniforms.input_assembly, .p = batch->uniforms.geometry_params, - .state = agx_batch_geometry_state(batch), + .heap = agx_batch_heap(batch), .vs_outputs = batch->uniforms.vertex_outputs, .index_size_B = info->index_size, .prim = info->mode, @@ -4274,7 +4274,7 @@ agx_draw_without_restart(struct agx_batch *batch, &out_draws_rsrc.bo); struct libagx_unroll_restart_args unroll = { - .heap = agx_batch_geometry_state(batch), + .heap = agx_batch_heap(batch), .index_buffer = ib, .out_draw = out_draws.gpu, .restart_index = info->restart_index, @@ -4610,11 +4610,11 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info, agx_upload_draw_params(batch, indirect, draws, info); /* Setup parameters */ - uint64_t geom_state = agx_batch_geometry_state(batch); + uint64_t heap = agx_batch_heap(batch); assert((tcs->tess.output_stride & 3) == 0 && "must be aligned"); struct libagx_tess_args args = { - .heap = geom_state, + .heap = heap, .tcs_stride_el = tcs->tess.output_stride / 4, .statistic = agx_get_query_address( batch, ctx->pipeline_statistics[PIPE_STAT_QUERY_DS_INVOCATIONS]), diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index e6dd33aaf27..5ae930d4af4 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -419,8 +419,8 @@ struct agx_batch { uint64_t geom_indirect; struct agx_bo *geom_indirect_bo; - /* Geometry state buffer if geometry/etc shaders are used */ - uint64_t geometry_state; + /* Heap descriptor if dynamic allocation is required */ + uint64_t heap; /* Uploaded descriptors */ uint32_t texture_count[PIPE_SHADER_TYPES];