libagx: rename agx_geometry_state to agx_heap

no other state persists. this cleans up a lot of naming.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34661>
This commit is contained in:
Alyssa Rosenzweig 2025-04-23 10:54:44 -04:00 committed by Marge Bot
parent 29cc2b6d42
commit d339bf7a98
10 changed files with 64 additions and 70 deletions

View file

@ -443,9 +443,8 @@ first_true_thread_in_workgroup(bool cond, local uint *scratch)
* sets up most of the new draw descriptor.
*/
static global void *
setup_unroll_for_draw(global struct agx_geometry_state *heap,
constant uint *in_draw, global uint *out,
enum mesa_prim mode, uint index_size_B)
setup_unroll_for_draw(global struct agx_heap *heap, constant uint *in_draw,
global uint *out, enum mesa_prim mode, uint index_size_B)
{
/* Determine an upper bound on the memory required for the index buffer.
* Restarts only decrease the unrolled index buffer size, so the maximum size
@ -469,16 +468,15 @@ setup_unroll_for_draw(global struct agx_geometry_state *heap,
out[4] = in_draw[4]; /* base instance */
/* Return the index buffer we allocated */
return (global uchar *)heap->heap + old_heap_bottom_B;
return (global uchar *)heap->base + old_heap_bottom_B;
}
KERNEL(1024)
libagx_unroll_restart(global struct agx_geometry_state *heap,
uint64_t index_buffer, constant uint *in_draw,
global uint32_t *out_draw, uint32_t max_draws,
uint32_t restart_index, uint32_t index_buffer_size_el,
uint32_t index_size_log2, uint32_t flatshade_first,
uint mode__11)
libagx_unroll_restart(global struct agx_heap *heap, uint64_t index_buffer,
constant uint *in_draw, global uint32_t *out_draw,
uint32_t max_draws, uint32_t restart_index,
uint32_t index_buffer_size_el, uint32_t index_size_log2,
uint32_t flatshade_first, uint mode__11)
{
uint32_t index_size_B = 1 << index_size_log2;
enum mesa_prim mode = libagx_uncompact_prim(mode__11);
@ -579,7 +577,7 @@ libagx_gs_setup_indirect(
global uintptr_t *vertex_buffer /* output */,
global struct agx_ia_state *ia /* output */,
global struct agx_geometry_params *p /* output */,
global struct agx_geometry_state *state,
global struct agx_heap *heap,
uint64_t vs_outputs /* Vertex (TES) output mask */,
uint32_t index_size_B /* 0 if no index bffer */,
uint32_t index_buffer_range_el,
@ -624,11 +622,11 @@ libagx_gs_setup_indirect(
if (is_prefix_summing) {
p->count_buffer = agx_heap_alloc_nonatomic(
state, p->input_primitives * p->count_buffer_stride);
heap, p->input_primitives * p->count_buffer_stride);
}
p->input_buffer =
(uintptr_t)agx_heap_alloc_nonatomic(state, vertex_buffer_size);
(uintptr_t)agx_heap_alloc_nonatomic(heap, vertex_buffer_size);
*vertex_buffer = p->input_buffer;
p->input_mask = vs_outputs;
@ -645,10 +643,10 @@ libagx_gs_setup_indirect(
if (shape == AGX_GS_SHAPE_DYNAMIC_INDEXED) {
cmd->firstIndex =
agx_heap_alloc_nonatomic_offs(state, cmd->indexCount * 4) / 4;
agx_heap_alloc_nonatomic_offs(heap, cmd->indexCount * 4) / 4;
p->output_index_buffer =
(global uint *)(state->heap + (cmd->firstIndex * 4));
(global uint *)(heap->base + (cmd->firstIndex * 4));
}
}
@ -750,7 +748,7 @@ libagx_prefix_sum_tess(global struct libagx_tess_args *p, global uint *c_prims,
uint32_t elsize_B = sizeof(uint32_t);
uint32_t size_B = total * elsize_B;
uint alloc_B = agx_heap_alloc_nonatomic_offs(p->heap, size_B);
p->index_buffer = (global uint32_t *)(((uintptr_t)p->heap->heap) + alloc_B);
p->index_buffer = (global uint32_t *)(((uintptr_t)p->heap->base) + alloc_B);
/* ...and now we can generate the API indexed draw */
global uint32_t *desc = p->out_draws;

View file

@ -104,27 +104,25 @@ agx_gs_index_size(enum agx_gs_shape shape)
}
}
/* Packed geometry state buffer */
struct agx_geometry_state {
/* Heap to allocate from. */
DEVICE(uchar) heap;
uint32_t heap_bottom, heap_size;
/* Heap to allocate from. */
struct agx_heap {
DEVICE(uchar) base;
uint32_t bottom, size;
} PACKED;
static_assert(sizeof(struct agx_geometry_state) == 4 * 4);
static_assert(sizeof(struct agx_heap) == 4 * 4);
#ifdef __OPENCL_VERSION__
static inline uint
agx_heap_alloc_nonatomic_offs(global struct agx_geometry_state *heap,
uint size_B)
agx_heap_alloc_nonatomic_offs(global struct agx_heap *heap, uint size_B)
{
uint offs = heap->heap_bottom;
heap->heap_bottom += align(size_B, 16);
uint offs = heap->bottom;
heap->bottom += align(size_B, 16);
// Use printf+abort because assert is stripped from release builds.
if (heap->heap_bottom >= heap->heap_size) {
/* Use printf+abort because assert is stripped from release builds. */
if (heap->bottom >= heap->size) {
printf(
"FATAL: GPU heap overflow, allocating size %u, at offset %u, heap size %u!",
size_B, offs, heap->heap_size);
size_B, offs, heap->size);
abort();
}
@ -133,9 +131,9 @@ agx_heap_alloc_nonatomic_offs(global struct agx_geometry_state *heap,
}
static inline global void *
agx_heap_alloc_nonatomic(global struct agx_geometry_state *heap, uint size_B)
agx_heap_alloc_nonatomic(global struct agx_heap *heap, uint size_B)
{
return heap->heap + agx_heap_alloc_nonatomic_offs(heap, size_B);
return heap->base + agx_heap_alloc_nonatomic_offs(heap, size_B);
}
#endif

View file

@ -118,10 +118,10 @@ tess_factors(constant struct libagx_tess_args *p, uint patch)
}
static inline uint
libagx_heap_alloc(global struct agx_geometry_state *heap, uint size_B)
libagx_heap_alloc(global struct agx_heap *heap, uint size_B)
{
// TODO: drop align to 4 I think
return atomic_fetch_add((volatile atomic_uint *)(&heap->heap_bottom),
return atomic_fetch_add((volatile atomic_uint *)(&heap->bottom),
align(size_B, 8));
}
@ -200,7 +200,7 @@ libagx_heap_alloc_points(constant struct libagx_tess_args *p, uint patch,
uint32_t alloc_el = alloc_B / elsize_B;
p->coord_allocs[patch] = alloc_el;
return (global struct libagx_tess_point *)(((uintptr_t)p->heap->heap) +
return (global struct libagx_tess_point *)(((uintptr_t)p->heap->base) +
alloc_B);
}

View file

@ -29,7 +29,7 @@ static_assert(sizeof(struct libagx_tess_point) == 8);
struct libagx_tess_args {
/* Heap to allocate tessellator outputs in */
DEVICE(struct agx_geometry_state) heap;
DEVICE(struct agx_heap) heap;
/* Patch coordinate buffer, indexed as:
*

View file

@ -994,11 +994,10 @@ hk_CmdEndRendering(VkCommandBuffer commandBuffer)
}
static uint64_t
hk_geometry_state(struct hk_cmd_buffer *cmd)
hk_heap(struct hk_cmd_buffer *cmd)
{
struct hk_device *dev = hk_cmd_buffer_device(cmd);
/* We tie heap allocation to geometry state allocation, so allocate now. */
if (unlikely(!dev->heap)) {
perf_debug(cmd, "Allocating heap");
@ -1008,29 +1007,28 @@ hk_geometry_state(struct hk_cmd_buffer *cmd)
/* The geometry state buffer is initialized here and then is treated by
* the CPU as rodata, even though the GPU uses it for scratch internally.
*/
off_t off = dev->rodata.geometry_state - dev->rodata.bo->va->addr;
struct agx_geometry_state *map = agx_bo_map(dev->rodata.bo) + off;
off_t off = dev->rodata.heap - dev->rodata.bo->va->addr;
struct agx_heap *map = agx_bo_map(dev->rodata.bo) + off;
*map = (struct agx_geometry_state){
.heap = dev->heap->va->addr,
.heap_size = size,
*map = (struct agx_heap){
.base = dev->heap->va->addr,
.size = size,
};
}
/* We need to free all allocations after each command buffer execution */
if (!cmd->uses_heap) {
perf_debug(cmd, "Freeing heap");
uint64_t addr = dev->rodata.geometry_state;
uint64_t addr = dev->rodata.heap;
/* Zeroing the allocated index frees everything */
hk_queue_write(cmd,
addr + offsetof(struct agx_geometry_state, heap_bottom), 0,
hk_queue_write(cmd, addr + offsetof(struct agx_heap, bottom), 0,
true /* after gfx */);
cmd->uses_heap = true;
}
return dev->rodata.geometry_state;
return dev->rodata.heap;
}
static uint64_t
@ -1222,7 +1220,7 @@ hk_upload_tess_params(struct hk_cmd_buffer *cmd, struct libagx_tess_args *out,
: LIBAGX_TESS_PARTITIONING_FRACTIONAL_EVEN;
struct libagx_tess_args args = {
.heap = hk_geometry_state(cmd),
.heap = hk_heap(cmd),
.tcs_stride_el = tcs->info.tess.tcs_output_stride / 4,
.statistic = hk_pipeline_stat_addr(
cmd,
@ -1246,7 +1244,7 @@ hk_upload_tess_params(struct hk_cmd_buffer *cmd, struct libagx_tess_args *out,
uint32_t draw_stride_el = 5;
size_t draw_stride_B = draw_stride_el * sizeof(uint32_t);
/* heap is allocated by hk_geometry_state */
/* heap is allocated by hk_heap */
args.patch_coord_buffer = dev->heap->va->addr;
if (!agx_is_indirect(draw.b)) {
@ -1389,7 +1387,7 @@ hk_draw_without_restart(struct hk_cmd_buffer *cmd, struct agx_draw draw,
assert(draw_count == 1 && "TODO: multidraw");
struct libagx_unroll_restart_args ia = {
.heap = hk_geometry_state(cmd),
.heap = hk_heap(cmd),
.index_buffer = draw.index_buffer,
.in_draw = draw.b.ptr,
.out_draw = hk_pool_alloc(cmd, 5 * sizeof(uint32_t) * draw_count, 4).gpu,
@ -1449,7 +1447,7 @@ hk_launch_gs_prerast(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
/* Setup grids */
if (agx_is_indirect(draw.b)) {
struct libagx_gs_setup_indirect_args gsi = {
.state = hk_geometry_state(cmd),
.heap = hk_heap(cmd),
.index_buffer = draw.index_buffer,
.draw = draw.b.ptr,
.ia = desc->root.draw.input_assembly,
@ -3534,7 +3532,7 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_)
uint64_t target = hk_cs_alloc_for_indirect(cs, size_B);
libagx_draw_robust_index(cmd, agx_1d(32), AGX_BARRIER_ALL | AGX_PREGFX,
target, hk_geometry_state(cmd), draw.b.ptr,
target, hk_heap(cmd), draw.b.ptr,
draw.index_buffer, draw.index_buffer_range_B,
draw.restart, topology, draw.index_size);
} else {

View file

@ -101,7 +101,7 @@ hk_upload_rodata(struct hk_device *dev)
*image_heap_ptr = dev->images.bo->va->addr;
offs += sizeof(uint64_t);
/* The geometry state buffer isn't strictly readonly data, but we only have a
/* The heap descriptor isn't strictly readonly data, but we only have a
* single instance of it device-wide and -- after initializing at heap
* allocate time -- it is read-only from the CPU perspective. The GPU uses it
* for scratch, but is required to reset it after use to ensure resubmitting
@ -110,8 +110,8 @@ hk_upload_rodata(struct hk_device *dev)
* So, we allocate it here for convenience.
*/
offs = align(offs, sizeof(uint64_t));
dev->rodata.geometry_state = dev->rodata.bo->va->addr + offs;
offs += sizeof(struct agx_geometry_state);
dev->rodata.heap = dev->rodata.bo->va->addr + offs;
offs += sizeof(struct agx_heap);
/* For null storage descriptors, we need to reserve 16 bytes to catch writes.
* No particular content is required; we cannot get robustness2 semantics

View file

@ -85,7 +85,7 @@ struct hk_device {
struct agx_bo *bo;
struct agx_usc_uniform_packed image_heap;
uint64_t null_sink;
uint64_t geometry_state;
uint64_t heap;
} rodata;
/* Pages for backing sparse resources */

View file

@ -133,7 +133,7 @@ agx_batch_init(struct agx_context *ctx,
batch->clear_depth = 0;
batch->clear_stencil = 0;
batch->varyings = 0;
batch->geometry_state = 0;
batch->heap = 0;
batch->initialized = false;
batch->draws = 0;
batch->incoherent_writes = false;

View file

@ -3938,11 +3938,11 @@ agx_ia_update(struct agx_batch *batch, const struct pipe_draw_info *info,
}
static uint64_t
agx_batch_geometry_state(struct agx_batch *batch)
agx_batch_heap(struct agx_batch *batch)
{
struct agx_context *ctx = batch->ctx;
if (!batch->geometry_state) {
if (!batch->heap) {
uint32_t size = 128 * 1024 * 1024;
if (!ctx->heap) {
@ -3950,18 +3950,18 @@ agx_batch_geometry_state(struct agx_batch *batch)
PIPE_USAGE_DEFAULT, size);
}
struct agx_geometry_state state = {
.heap = agx_resource(ctx->heap)->bo->va->addr,
.heap_size = size,
struct agx_heap heap = {
.base = agx_resource(ctx->heap)->bo->va->addr,
.size = size,
};
agx_batch_writes(batch, agx_resource(ctx->heap), 0);
batch->geometry_state =
agx_pool_upload_aligned(&batch->pool, &state, sizeof(state), 8);
batch->heap =
agx_pool_upload_aligned(&batch->pool, &heap, sizeof(heap), 8);
}
return batch->geometry_state;
return batch->heap;
}
static uint64_t
@ -4154,7 +4154,7 @@ agx_launch_gs_prerast(struct agx_batch *batch,
.vertex_buffer = batch->uniforms.vertex_output_buffer_ptr,
.ia = batch->uniforms.input_assembly,
.p = batch->uniforms.geometry_params,
.state = agx_batch_geometry_state(batch),
.heap = agx_batch_heap(batch),
.vs_outputs = batch->uniforms.vertex_outputs,
.index_size_B = info->index_size,
.prim = info->mode,
@ -4274,7 +4274,7 @@ agx_draw_without_restart(struct agx_batch *batch,
&out_draws_rsrc.bo);
struct libagx_unroll_restart_args unroll = {
.heap = agx_batch_geometry_state(batch),
.heap = agx_batch_heap(batch),
.index_buffer = ib,
.out_draw = out_draws.gpu,
.restart_index = info->restart_index,
@ -4610,11 +4610,11 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info,
agx_upload_draw_params(batch, indirect, draws, info);
/* Setup parameters */
uint64_t geom_state = agx_batch_geometry_state(batch);
uint64_t heap = agx_batch_heap(batch);
assert((tcs->tess.output_stride & 3) == 0 && "must be aligned");
struct libagx_tess_args args = {
.heap = geom_state,
.heap = heap,
.tcs_stride_el = tcs->tess.output_stride / 4,
.statistic = agx_get_query_address(
batch, ctx->pipeline_statistics[PIPE_STAT_QUERY_DS_INVOCATIONS]),

View file

@ -419,8 +419,8 @@ struct agx_batch {
uint64_t geom_indirect;
struct agx_bo *geom_indirect_bo;
/* Geometry state buffer if geometry/etc shaders are used */
uint64_t geometry_state;
/* Heap descriptor if dynamic allocation is required */
uint64_t heap;
/* Uploaded descriptors */
uint32_t texture_count[PIPE_SHADER_TYPES];