mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-23 06:10:23 +01:00
hk: drop image heap
now everything is just inline in the set. saves 24MiB. fossil changes from start of series to here: Totals from 45270 (83.80% of 54019) affected shaders: MaxWaves: 44151552 -> 44167808 (+0.04%); split: +0.05%, -0.01% Instrs: 22630059 -> 22599471 (-0.14%); split: -0.31%, +0.17% CodeSize: 160981702 -> 160987912 (+0.00%); split: -0.29%, +0.29% Spills: 66105 -> 66276 (+0.26%); split: -1.09%, +1.35% Fills: 43894 -> 43987 (+0.21%); split: -1.82%, +2.03% Scratch: 375176 -> 374896 (-0.07%); split: -0.20%, +0.13% ALU: 17656552 -> 17640750 (-0.09%); split: -0.29%, +0.20% FSCIB: 17601880 -> 17586092 (-0.09%); split: -0.29%, +0.20% IC: 4990178 -> 4995334 (+0.10%); split: -0.05%, +0.15% GPRs: 3417893 -> 3413589 (-0.13%); split: -0.17%, +0.04% Uniforms: 9260606 -> 9474203 (+2.31%); split: -0.22%, +2.52% Preamble instrs: 9679627 -> 9282629 (-4.10%); split: -4.22%, +0.12% Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36127>
This commit is contained in:
parent
1ea1b2d360
commit
5598d6aa10
3 changed files with 1 additions and 53 deletions
|
|
@ -76,20 +76,6 @@ hk_upload_rodata(struct hk_device *dev)
|
|||
agx_pack_txf_sampler((struct agx_sampler_packed *)(map + offs));
|
||||
offs += AGX_SAMPLER_LENGTH;
|
||||
|
||||
/* The image heap is allocated on the device prior to the rodata. The heap
|
||||
* lives as long as the device does and has a stable address (requiring
|
||||
* sparse binding to grow dynamically). That means its address is effectively
|
||||
* rodata and can be uploaded now. agx_usc_uniform requires an indirection to
|
||||
* push the heap address, so this takes care of that indirection up front to
|
||||
* cut an alloc/upload at draw time.
|
||||
*/
|
||||
offs = align(offs, sizeof(uint64_t));
|
||||
dev->rodata.image_heap_ptr = dev->rodata.bo->va->addr + offs;
|
||||
|
||||
uint64_t *image_heap_ptr = (void *)map + offs;
|
||||
*image_heap_ptr = dev->images.bo->va->addr;
|
||||
offs += sizeof(uint64_t);
|
||||
|
||||
/* The heap descriptor isn't strictly readonly data, but we only have a
|
||||
* single instance of it device-wide and -- after initializing at heap
|
||||
* allocate time -- it is read-only from the CPU perspective. The GPU uses it
|
||||
|
|
@ -276,28 +262,6 @@ hk_get_timestamp(struct vk_device *device, uint64_t *timestamp)
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* To implement nullDescriptor, the descriptor set code will reference
|
||||
* preuploaded null descriptors at fixed offsets in the image heap. Here we
|
||||
* upload those descriptors, initializing the image heap.
|
||||
*/
|
||||
static void
|
||||
hk_upload_null_descriptors(struct hk_device *dev)
|
||||
{
|
||||
struct agx_texture_packed null_tex;
|
||||
struct agx_pbe_packed null_pbe;
|
||||
uint32_t offset_tex, offset_pbe;
|
||||
|
||||
agx_set_null_texture(&null_tex);
|
||||
agx_set_null_pbe(&null_pbe);
|
||||
|
||||
hk_descriptor_table_add(dev, &dev->images, &null_tex, sizeof(null_tex),
|
||||
&offset_tex);
|
||||
|
||||
hk_descriptor_table_add(dev, &dev->images, &null_pbe, sizeof(null_pbe),
|
||||
&offset_pbe);
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
hk_CreateDevice(VkPhysicalDevice physicalDevice,
|
||||
const VkDeviceCreateInfo *pCreateInfo,
|
||||
|
|
@ -395,14 +359,9 @@ hk_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
dev->dev.user_timestamp_to_ns.den &&
|
||||
"user timestamps are in ns");
|
||||
|
||||
result = hk_descriptor_table_init(dev, &dev->images, AGX_TEXTURE_LENGTH,
|
||||
1024, 1024 * 1024);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_dev;
|
||||
|
||||
result = hk_init_sampler_heap(dev, &dev->samplers);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_images;
|
||||
goto fail_dev;
|
||||
|
||||
result = hk_descriptor_table_init(
|
||||
dev, &dev->occlusion_queries, sizeof(uint64_t), AGX_MAX_OCCLUSION_QUERIES,
|
||||
|
|
@ -414,9 +373,6 @@ hk_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
if (result != VK_SUCCESS)
|
||||
goto fail_queries;
|
||||
|
||||
/* Depends on rodata */
|
||||
hk_upload_null_descriptors(dev);
|
||||
|
||||
/* XXX: error handling, and should this even go on the device? */
|
||||
agx_bg_eot_init(&dev->bg_eot, &dev->dev);
|
||||
if (!dev->bg_eot.ht) {
|
||||
|
|
@ -498,8 +454,6 @@ fail_queries:
|
|||
hk_descriptor_table_finish(dev, &dev->occlusion_queries);
|
||||
fail_samplers:
|
||||
hk_destroy_sampler_heap(dev, &dev->samplers);
|
||||
fail_images:
|
||||
hk_descriptor_table_finish(dev, &dev->images);
|
||||
fail_dev:
|
||||
agx_close_device(&dev->dev);
|
||||
fail_fd:
|
||||
|
|
@ -541,7 +495,6 @@ hk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
|
|||
}
|
||||
|
||||
hk_destroy_sampler_heap(dev, &dev->samplers);
|
||||
hk_descriptor_table_finish(dev, &dev->images);
|
||||
hk_descriptor_table_finish(dev, &dev->occlusion_queries);
|
||||
agx_bo_unreference(&dev->dev, dev->rodata.bo);
|
||||
agx_bo_unreference(&dev->dev, dev->heap);
|
||||
|
|
|
|||
|
|
@ -66,7 +66,6 @@ struct hk_device {
|
|||
struct agx_device dev;
|
||||
struct agxdecode_ctx *decode_ctx;
|
||||
|
||||
struct hk_descriptor_table images;
|
||||
struct hk_descriptor_table occlusion_queries;
|
||||
struct hk_sampler_heap samplers;
|
||||
|
||||
|
|
@ -79,7 +78,6 @@ struct hk_device {
|
|||
|
||||
struct {
|
||||
struct agx_bo *bo;
|
||||
uint64_t image_heap_ptr;
|
||||
uint64_t heap;
|
||||
} rodata;
|
||||
|
||||
|
|
|
|||
|
|
@ -868,9 +868,6 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
|
|||
agxdecode_drm_cmdbuf(dev->dev.agxdecode, &dev->dev.params, &payload,
|
||||
true);
|
||||
|
||||
agxdecode_image_heap(dev->dev.agxdecode, dev->images.bo->va->addr,
|
||||
dev->images.alloc);
|
||||
|
||||
agxdecode_next_frame();
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue