hk: drop image heap

now everything is just inline in the set. saves 24MiB. fossil changes from start
of series to here:

Totals from 45270 (83.80% of 54019) affected shaders:
MaxWaves: 44151552 -> 44167808 (+0.04%); split: +0.05%, -0.01%
Instrs: 22630059 -> 22599471 (-0.14%); split: -0.31%, +0.17%
CodeSize: 160981702 -> 160987912 (+0.00%); split: -0.29%, +0.29%
Spills: 66105 -> 66276 (+0.26%); split: -1.09%, +1.35%
Fills: 43894 -> 43987 (+0.21%); split: -1.82%, +2.03%
Scratch: 375176 -> 374896 (-0.07%); split: -0.20%, +0.13%
ALU: 17656552 -> 17640750 (-0.09%); split: -0.29%, +0.20%
FSCIB: 17601880 -> 17586092 (-0.09%); split: -0.29%, +0.20%
IC: 4990178 -> 4995334 (+0.10%); split: -0.05%, +0.15%
GPRs: 3417893 -> 3413589 (-0.13%); split: -0.17%, +0.04%
Uniforms: 9260606 -> 9474203 (+2.31%); split: -0.22%, +2.52%
Preamble instrs: 9679627 -> 9282629 (-4.10%); split: -4.22%, +0.12%

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36127>
This commit is contained in:
Alyssa Rosenzweig 2025-07-12 20:25:26 -04:00 committed by Marge Bot
parent 1ea1b2d360
commit 5598d6aa10
3 changed files with 1 additions and 53 deletions

View file

@ -76,20 +76,6 @@ hk_upload_rodata(struct hk_device *dev)
agx_pack_txf_sampler((struct agx_sampler_packed *)(map + offs));
offs += AGX_SAMPLER_LENGTH;
/* The image heap is allocated on the device prior to the rodata. The heap
* lives as long as the device does and has a stable address (requiring
* sparse binding to grow dynamically). That means its address is effectively
* rodata and can be uploaded now. agx_usc_uniform requires an indirection to
* push the heap address, so this takes care of that indirection up front to
* cut an alloc/upload at draw time.
*/
offs = align(offs, sizeof(uint64_t));
dev->rodata.image_heap_ptr = dev->rodata.bo->va->addr + offs;
uint64_t *image_heap_ptr = (void *)map + offs;
*image_heap_ptr = dev->images.bo->va->addr;
offs += sizeof(uint64_t);
/* The heap descriptor isn't strictly readonly data, but we only have a
* single instance of it device-wide and -- after initializing at heap
* allocate time -- it is read-only from the CPU perspective. The GPU uses it
@ -276,28 +262,6 @@ hk_get_timestamp(struct vk_device *device, uint64_t *timestamp)
return VK_SUCCESS;
}
/*
* To implement nullDescriptor, the descriptor set code will reference
* preuploaded null descriptors at fixed offsets in the image heap. Here we
* upload those descriptors, initializing the image heap.
*/
static void
hk_upload_null_descriptors(struct hk_device *dev)
{
struct agx_texture_packed null_tex;
struct agx_pbe_packed null_pbe;
uint32_t offset_tex, offset_pbe;
agx_set_null_texture(&null_tex);
agx_set_null_pbe(&null_pbe);
hk_descriptor_table_add(dev, &dev->images, &null_tex, sizeof(null_tex),
&offset_tex);
hk_descriptor_table_add(dev, &dev->images, &null_pbe, sizeof(null_pbe),
&offset_pbe);
}
VKAPI_ATTR VkResult VKAPI_CALL
hk_CreateDevice(VkPhysicalDevice physicalDevice,
const VkDeviceCreateInfo *pCreateInfo,
@ -395,14 +359,9 @@ hk_CreateDevice(VkPhysicalDevice physicalDevice,
dev->dev.user_timestamp_to_ns.den &&
"user timestamps are in ns");
result = hk_descriptor_table_init(dev, &dev->images, AGX_TEXTURE_LENGTH,
1024, 1024 * 1024);
if (result != VK_SUCCESS)
goto fail_dev;
result = hk_init_sampler_heap(dev, &dev->samplers);
if (result != VK_SUCCESS)
goto fail_images;
goto fail_dev;
result = hk_descriptor_table_init(
dev, &dev->occlusion_queries, sizeof(uint64_t), AGX_MAX_OCCLUSION_QUERIES,
@ -414,9 +373,6 @@ hk_CreateDevice(VkPhysicalDevice physicalDevice,
if (result != VK_SUCCESS)
goto fail_queries;
/* Depends on rodata */
hk_upload_null_descriptors(dev);
/* XXX: error handling, and should this even go on the device? */
agx_bg_eot_init(&dev->bg_eot, &dev->dev);
if (!dev->bg_eot.ht) {
@ -498,8 +454,6 @@ fail_queries:
hk_descriptor_table_finish(dev, &dev->occlusion_queries);
fail_samplers:
hk_destroy_sampler_heap(dev, &dev->samplers);
fail_images:
hk_descriptor_table_finish(dev, &dev->images);
fail_dev:
agx_close_device(&dev->dev);
fail_fd:
@ -541,7 +495,6 @@ hk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
}
hk_destroy_sampler_heap(dev, &dev->samplers);
hk_descriptor_table_finish(dev, &dev->images);
hk_descriptor_table_finish(dev, &dev->occlusion_queries);
agx_bo_unreference(&dev->dev, dev->rodata.bo);
agx_bo_unreference(&dev->dev, dev->heap);

View file

@ -66,7 +66,6 @@ struct hk_device {
struct agx_device dev;
struct agxdecode_ctx *decode_ctx;
struct hk_descriptor_table images;
struct hk_descriptor_table occlusion_queries;
struct hk_sampler_heap samplers;
@ -79,7 +78,6 @@ struct hk_device {
struct {
struct agx_bo *bo;
uint64_t image_heap_ptr;
uint64_t heap;
} rodata;

View file

@ -868,9 +868,6 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
agxdecode_drm_cmdbuf(dev->dev.agxdecode, &dev->dev.params, &payload,
true);
agxdecode_image_heap(dev->dev.agxdecode, dev->images.bo->va->addr,
dev->images.alloc);
agxdecode_next_frame();
}