diff --git a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h index 33b79e14061..4863d04c2e2 100644 --- a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h +++ b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h @@ -94,6 +94,7 @@ struct panvk_cs_subqueue_context { uint64_t syncobjs; uint32_t iter_sb; uint32_t pad; + uint64_t reg_dump_addr; struct { struct panvk_cs_desc_ringbuf desc_ringbuf; uint64_t tiler_heap; @@ -105,7 +106,6 @@ struct panvk_cs_subqueue_context { uint64_t fbds[PANVK_IR_PASS_COUNT]; uint32_t td_count; uint32_t layer_count; - uint64_t reg_dump_addr; } tiler_oom_ctx; struct { uint64_t syncobjs; diff --git a/src/panfrost/vulkan/csf/panvk_queue.h b/src/panfrost/vulkan/csf/panvk_queue.h index 09041a748e6..1d39b05c2bf 100644 --- a/src/panfrost/vulkan/csf/panvk_queue.h +++ b/src/panfrost/vulkan/csf/panvk_queue.h @@ -38,6 +38,12 @@ struct panvk_subqueue { struct panvk_priv_mem context; uint32_t *reg_file; + /* Memory to save/restore CS registers in functions/exception handlers. + * Because registers are dumped to a fixed address rather than a moving + * stack pointer, nested function/exception handler calls are not supported. + */ + struct panvk_priv_mem regs_save; + struct { struct pan_kmod_bo *bo; size_t size; @@ -68,7 +74,6 @@ struct panvk_queue { struct panvk_desc_ringbuf render_desc_ringbuf; struct panvk_priv_mem syncobjs; struct panvk_priv_mem debug_syncobjs; - struct panvk_priv_mem tiler_oom_regs_save; struct { struct vk_sync *sync; diff --git a/src/panfrost/vulkan/csf/panvk_vX_exception_handler.c b/src/panfrost/vulkan/csf/panvk_vX_exception_handler.c index 3b4f1238a7e..5a746db7737 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_exception_handler.c +++ b/src/panfrost/vulkan/csf/panvk_vX_exception_handler.c @@ -47,7 +47,8 @@ generate_tiler_oom_handler(struct panvk_device *dev, struct cs_function handler; struct cs_function_ctx handler_ctx = { .ctx_reg = cs_subqueue_ctx_reg(&b), - .dump_addr_offset = TILER_OOM_CTX_FIELD_OFFSET(reg_dump_addr), + .dump_addr_offset = + offsetof(struct panvk_cs_subqueue_context, reg_dump_addr), }; struct cs_tracing_ctx tracing_ctx = { .enabled = tracing_enabled, @@ -172,10 +173,10 @@ panvk_per_arch(init_tiler_oom)(struct panvk_device *device) /* All handlers must have the same length */ assert(idx == 0 || handler_length == device->tiler_oom.handler_stride); - assert(idx == 0 || - dump_region_size == device->tiler_oom.dump_region_size); device->tiler_oom.handler_stride = handler_length; - device->tiler_oom.dump_region_size = dump_region_size; + device->dump_region_size[PANVK_SUBQUEUE_FRAGMENT] = + MAX2(device->dump_region_size[PANVK_SUBQUEUE_FRAGMENT], + dump_region_size); } } diff --git a/src/panfrost/vulkan/csf/panvk_vX_queue.c b/src/panfrost/vulkan/csf/panvk_vX_queue.c index 7f02bc1a520..86e50d98366 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_queue.c +++ b/src/panfrost/vulkan/csf/panvk_vX_queue.c @@ -321,6 +321,7 @@ static void finish_subqueue(struct panvk_queue *queue, enum panvk_subqueue_id subqueue) { panvk_pool_free_mem(&queue->subqueues[subqueue].context); + panvk_pool_free_mem(&queue->subqueues[subqueue].regs_save); finish_subqueue_tracing(queue, subqueue); } @@ -362,10 +363,20 @@ init_subqueue(struct panvk_queue *queue, enum panvk_subqueue_id subqueue) if (result != VK_SUCCESS) return result; - struct panvk_pool_alloc_info alloc_info = { - .size = sizeof(struct panvk_cs_subqueue_context), - .alignment = 64, - }; + struct panvk_pool_alloc_info alloc_info; + + if (dev->dump_region_size[subqueue]) { + alloc_info.size = dev->dump_region_size[subqueue]; + alloc_info.alignment = sizeof(uint32_t); + subq->regs_save = panvk_pool_alloc_mem(&dev->mempools.rw, alloc_info); + if (!panvk_priv_mem_host_addr(subq->regs_save)) { + return panvk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, + "Failed to allocate register save area"); + } + } + + alloc_info.size = sizeof(struct panvk_cs_subqueue_context); + alloc_info.alignment = 64; /* When tracing is enabled, we want to use a non-cached pool, so can get * up-to-date context even if the CS crashed in the middle. */ @@ -385,8 +396,7 @@ init_subqueue(struct panvk_queue *queue, enum panvk_subqueue_id subqueue) .debug.syncobjs = panvk_priv_mem_dev_addr(queue->debug_syncobjs), .debug.tracebuf.cs = subq->tracebuf.addr.dev, .iter_sb = 0, - .tiler_oom_ctx.reg_dump_addr = - panvk_priv_mem_dev_addr(queue->tiler_oom_regs_save), + .reg_dump_addr = panvk_priv_mem_dev_addr(subq->regs_save), }; /* We use the geometry buffer for our temporary CS buffer. */ @@ -524,7 +534,6 @@ cleanup_queue(struct panvk_queue *queue) finish_render_desc_ringbuf(queue); - panvk_pool_free_mem(&queue->tiler_oom_regs_save); panvk_pool_free_mem(&queue->debug_syncobjs); panvk_pool_free_mem(&queue->syncobjs); } @@ -561,16 +570,6 @@ init_queue(struct panvk_queue *queue) } } - alloc_info.size = dev->tiler_oom.dump_region_size; - alloc_info.alignment = sizeof(uint32_t); - queue->tiler_oom_regs_save = - panvk_pool_alloc_mem(&dev->mempools.rw, alloc_info); - if (!panvk_priv_mem_host_addr(queue->tiler_oom_regs_save)) { - result = panvk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, - "Failed to allocate tiler oom register save area"); - goto err_cleanup_queue; - } - result = init_render_desc_ringbuf(queue); if (result != VK_SUCCESS) goto err_cleanup_queue; diff --git a/src/panfrost/vulkan/panvk_device.h b/src/panfrost/vulkan/panvk_device.h index 33c1f78a973..cda025c2a91 100644 --- a/src/panfrost/vulkan/panvk_device.h +++ b/src/panfrost/vulkan/panvk_device.h @@ -51,7 +51,6 @@ struct panvk_device { struct { struct panvk_priv_bo *handlers_bo; uint32_t handler_stride; - uint32_t dump_region_size; } tiler_oom; struct vk_meta_device meta; @@ -62,6 +61,10 @@ struct panvk_device { struct panvk_pool exec; } mempools; + /* For each subqueue, maximum size of the register dump region needed by + * exception handlers or functions */ + uint32_t *dump_region_size; + struct vk_device_dispatch_table cmd_dispatch; struct panvk_queue *queues[PANVK_MAX_QUEUE_FAMILIES]; diff --git a/src/panfrost/vulkan/panvk_vX_device.c b/src/panfrost/vulkan/panvk_vX_device.c index aa89d7a09fd..f4397f6d058 100644 --- a/src/panfrost/vulkan/panvk_vX_device.c +++ b/src/panfrost/vulkan/panvk_vX_device.c @@ -321,6 +321,18 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device, panvk_device_init_mempools(device); +#if PAN_ARCH >= 10 + /* The only reason this is a heap allocation is that PANVK_SUBQUEUE_COUNT + * isn't available in the header */ + device->dump_region_size = + vk_zalloc(&device->vk.alloc, PANVK_SUBQUEUE_COUNT * sizeof(uint32_t), + alignof(uint32_t), VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!device->dump_region_size) { + result = panvk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto err_free_priv_bos; + } +#endif + #if PAN_ARCH <= 9 result = panvk_priv_bo_create( device, 128 * 1024 * 1024, @@ -423,6 +435,7 @@ err_free_priv_bos: panvk_priv_bo_unref(device->sample_positions); panvk_priv_bo_unref(device->tiler_heap); panvk_device_cleanup_mempools(device); + vk_free(&device->vk.alloc, device->dump_region_size); pan_kmod_vm_destroy(device->kmod.vm); util_vma_heap_finish(&device->as.heap); simple_mtx_destroy(&device->as.lock); @@ -462,6 +475,7 @@ panvk_per_arch(destroy_device)(struct panvk_device *device, panvk_priv_bo_unref(device->tiler_heap); panvk_priv_bo_unref(device->sample_positions); panvk_device_cleanup_mempools(device); + vk_free(&device->vk.alloc, device->dump_region_size); pan_kmod_vm_destroy(device->kmod.vm); util_vma_heap_finish(&device->as.heap); simple_mtx_destroy(&device->as.lock);