From db1073357a4e934f89352a3802f494b544b9584f Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 10 Sep 2024 12:27:16 +0200 Subject: [PATCH] panvk: Be robust against allocation failures in a command buffer context When an allocation failure happens, the command buffer should be flagged as invalid, and anything using this memory from the CPU side should be skipped to avoid segfaults. For allocations going through memory pools owned by a command buffer we automate that with panvk_cmd_alloc_xx() macros. Signed-off-by: Boris Brezillon Reviewed-by: Rebecca Mckeever Reviewed-by: Mary Guillemard Reviewed-by: Lars-Ivar Hesselberg Simonsen Reviewed-by: John Anthony Acked-by: Erik Faye-Lund Part-of: --- .../vulkan/bifrost/panvk_vX_meta_desc_copy.c | 54 ++-- src/panfrost/vulkan/jm/panvk_cmd_buffer.h | 11 +- src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c | 74 ++++-- .../vulkan/jm/panvk_vX_cmd_dispatch.c | 44 ++-- src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c | 239 ++++++++++++------ src/panfrost/vulkan/panvk_cmd_alloc.h | 48 ++++ src/panfrost/vulkan/panvk_cmd_desc_state.h | 23 +- src/panfrost/vulkan/panvk_cmd_push_constant.h | 21 +- src/panfrost/vulkan/panvk_meta.h | 8 +- src/panfrost/vulkan/panvk_shader.h | 8 +- src/panfrost/vulkan/panvk_vX_cmd_desc_state.c | 72 ++++-- .../vulkan/panvk_vX_cmd_push_constant.c | 21 +- src/panfrost/vulkan/panvk_vX_device.c | 6 +- src/panfrost/vulkan/panvk_vX_shader.c | 40 +-- 14 files changed, 451 insertions(+), 218 deletions(-) create mode 100644 src/panfrost/vulkan/panvk_cmd_alloc.h diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c index dd2551eb5fa..f0173f46722 100644 --- a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c +++ b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c @@ -15,6 +15,7 @@ #include "pan_encoder.h" #include "pan_shader.h" +#include "panvk_cmd_alloc.h" #include "panvk_cmd_buffer.h" #include "panvk_device.h" #include "panvk_shader.h" @@ -302,27 +303,31 @@ panvk_per_arch(meta_desc_copy_cleanup)(struct panvk_device *dev) panvk_pool_free_mem(&dev->mempools.exec, dev->desc_copy.shader); } -struct panfrost_ptr +VkResult panvk_per_arch(meta_get_copy_desc_job)( - struct panvk_device *dev, struct pan_pool *desc_pool, - const struct panvk_shader *shader, + struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader *shader, const struct panvk_descriptor_state *desc_state, const struct panvk_shader_desc_state *shader_desc_state, - uint32_t attrib_buf_idx_offset) + uint32_t attrib_buf_idx_offset, struct panfrost_ptr *job_desc) { + struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); + + *job_desc = (struct panfrost_ptr){0}; + if (!shader) - return (struct panfrost_ptr){0}; + return VK_SUCCESS; mali_ptr copy_table = panvk_priv_mem_dev_addr(shader->desc_info.others.map); if (!copy_table) - return (struct panfrost_ptr){0}; + return VK_SUCCESS; struct pan_nir_desc_copy_info copy_info = { .img_attrib_table = shader_desc_state->img_attrib_table, - .desc_copy = { - .table = copy_table, - .attrib_buf_idx_offset = attrib_buf_idx_offset, - }, + .desc_copy = + { + .table = copy_table, + .attrib_buf_idx_offset = attrib_buf_idx_offset, + }, }; for (uint32_t i = 0; i < ARRAY_SIZE(copy_info.desc_copy.limits); i++) @@ -349,10 +354,17 @@ panvk_per_arch(meta_get_copy_desc_job)( copy_info.tables[i] = shader_desc_state->tables[i]; } - mali_ptr push_uniforms = - pan_pool_upload_aligned(desc_pool, ©_info, sizeof(copy_info), 16); + struct panfrost_ptr push_uniforms = + panvk_cmd_alloc_dev_mem(cmdbuf, desc, sizeof(copy_info), 16); - struct panfrost_ptr job = pan_pool_alloc_desc(desc_pool, COMPUTE_JOB); + if (!push_uniforms.gpu) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + memcpy(push_uniforms.cpu, ©_info, sizeof(copy_info)); + + *job_desc = panvk_cmd_alloc_desc(cmdbuf, COMPUTE_JOB); + if (!job_desc->gpu) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; /* Given the per-stage max descriptors limit, we should never * reach the workgroup dimension limit. */ @@ -362,25 +374,27 @@ panvk_per_arch(meta_get_copy_desc_job)( assert(copy_count - 1 < BITFIELD_MASK(10)); panfrost_pack_work_groups_compute( - pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), 1, 1, 1, copy_count, 1, - 1, false, false); + pan_section_ptr(job_desc->cpu, COMPUTE_JOB, INVOCATION), 1, 1, 1, + copy_count, 1, 1, false, false); - pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) { + pan_section_pack(job_desc->cpu, COMPUTE_JOB, PARAMETERS, cfg) { cfg.job_task_split = util_logbase2_ceil(copy_count + 1) + util_logbase2_ceil(1 + 1) + util_logbase2_ceil(1 + 1); } struct pan_tls_info tlsinfo = {0}; - struct panfrost_ptr tls = pan_pool_alloc_desc(desc_pool, LOCAL_STORAGE); + struct panfrost_ptr tls = panvk_cmd_alloc_desc(cmdbuf, LOCAL_STORAGE); + if (!tls.gpu) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; GENX(pan_emit_tls)(&tlsinfo, tls.cpu); - pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) { + pan_section_pack(job_desc->cpu, COMPUTE_JOB, DRAW, cfg) { cfg.state = panvk_priv_mem_dev_addr(dev->desc_copy.rsd); - cfg.push_uniforms = push_uniforms; + cfg.push_uniforms = push_uniforms.gpu; cfg.thread_storage = tls.gpu; } - return job; + return VK_SUCCESS; } diff --git a/src/panfrost/vulkan/jm/panvk_cmd_buffer.h b/src/panfrost/vulkan/jm/panvk_cmd_buffer.h index 55d3c64b36a..a0eba523662 100644 --- a/src/panfrost/vulkan/jm/panvk_cmd_buffer.h +++ b/src/panfrost/vulkan/jm/panvk_cmd_buffer.h @@ -234,13 +234,14 @@ struct panvk_batch * void panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf); -void panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf); +VkResult panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf); -void panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, - bool gfx); +VkResult panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, + bool gfx); -void panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf, - uint32_t layer_idx); +VkResult + panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf, + uint32_t layer_idx); void panvk_per_arch(cmd_preload_fb_after_batch_split)( struct panvk_cmd_buffer *cmdbuf); diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c index b2ae88849e2..e6e5a77d6da 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c @@ -29,6 +29,7 @@ #include "genxml/gen_macros.h" #include "panvk_buffer.h" +#include "panvk_cmd_alloc.h" #include "panvk_cmd_buffer.h" #include "panvk_cmd_desc_state.h" #include "panvk_cmd_pool.h" @@ -48,13 +49,15 @@ #include "vk_descriptor_update_template.h" #include "vk_format.h" -static void +static VkResult panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf, mali_ptr fbd) { const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info; struct panvk_batch *batch = cmdbuf->cur_batch; - struct panfrost_ptr job_ptr = - pan_pool_alloc_desc(&cmdbuf->desc_pool.base, FRAGMENT_JOB); + struct panfrost_ptr job_ptr = panvk_cmd_alloc_desc(cmdbuf, FRAGMENT_JOB); + + if (!job_ptr.gpu) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; GENX(pan_emit_fragment_job_payload)(fbinfo, fbd, job_ptr.cpu); @@ -66,6 +69,7 @@ panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf, mali_ptr fbd) pan_jc_add_job(&batch->frag_jc, MALI_JOB_TYPE_FRAGMENT, false, false, 0, 0, &job_ptr, false); util_dynarray_append(&batch->jobs, void *, job_ptr.cpu); + return VK_SUCCESS; } void @@ -89,11 +93,14 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf) /* Batch has no jobs but is needed for synchronization, let's add a * NULL job so the SUBMIT ioctl doesn't choke on it. */ - struct panfrost_ptr ptr = - pan_pool_alloc_desc(&cmdbuf->desc_pool.base, JOB_HEADER); - util_dynarray_append(&batch->jobs, void *, ptr.cpu); - pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_NULL, false, false, 0, 0, - &ptr, false); + struct panfrost_ptr ptr = panvk_cmd_alloc_desc(cmdbuf, JOB_HEADER); + + if (ptr.gpu) { + util_dynarray_append(&batch->jobs, void *, ptr.cpu); + pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_NULL, false, false, 0, + 0, &ptr, false); + } + list_addtail(&batch->node, &cmdbuf->batches); } cmdbuf->cur_batch = NULL; @@ -116,15 +123,13 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf) unsigned size = panfrost_get_total_stack_size( batch->tlsinfo.tls.size, thread_tls_alloc, core_id_range); batch->tlsinfo.tls.ptr = - pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, size, 4096).gpu; + panvk_cmd_alloc_dev_mem(cmdbuf, tls, size, 4096).gpu; } if (batch->tlsinfo.wls.size) { assert(batch->wls_total_size); batch->tlsinfo.wls.ptr = - pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, batch->wls_total_size, - 4096) - .gpu; + panvk_cmd_alloc_dev_mem(cmdbuf, tls, batch->wls_total_size, 4096).gpu; } if (batch->tls.cpu) @@ -136,6 +141,8 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf) pan_sample_pattern(fbinfo->nr_samples)); for (uint32_t i = 0; i < batch->fb.layer_count; i++) { + VkResult result; + mali_ptr fbd = batch->fb.desc.gpu + (batch->fb.desc_stride * i); if (batch->vtc_jc.first_tiler) { cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds.gpu = 0; @@ -150,26 +157,31 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf) assert(!num_preload_jobs); } - panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, i); + result = panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, i); + if (result != VK_SUCCESS) + break; + fbd |= GENX(pan_emit_fbd)( &cmdbuf->state.gfx.render.fb.info, i, &batch->tlsinfo, &batch->tiler.ctx, batch->fb.desc.cpu + (batch->fb.desc_stride * i)); - panvk_cmd_prepare_fragment_job(cmdbuf, fbd); + result = panvk_cmd_prepare_fragment_job(cmdbuf, fbd); + if (result != VK_SUCCESS) + break; } } cmdbuf->cur_batch = NULL; } -void +VkResult panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf) { struct panvk_batch *batch = cmdbuf->cur_batch; if (batch->fb.desc.gpu) - return; + return VK_SUCCESS; const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info; bool has_zs_ext = fbinfo->zs.view.zs || fbinfo->zs.view.s; @@ -187,27 +199,33 @@ panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf) memcpy(batch->fb.bos, cmdbuf->state.gfx.render.fb.bos, batch->fb.bo_count * sizeof(batch->fb.bos[0])); - batch->fb.desc = pan_pool_alloc_aligned(&cmdbuf->desc_pool.base, - fbd_size * batch->fb.layer_count, - pan_alignment(FRAMEBUFFER)); + batch->fb.desc = + panvk_cmd_alloc_dev_mem(cmdbuf, desc, fbd_size * batch->fb.layer_count, + pan_alignment(FRAMEBUFFER)); batch->fb.desc_stride = fbd_size; memset(&cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds, 0, sizeof(cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds)); + + return batch->fb.desc.gpu ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY; } -void +VkResult panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, bool gfx) { struct panvk_batch *batch = cmdbuf->cur_batch; assert(batch); if (!batch->tls.gpu) { - batch->tls = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE); + batch->tls = panvk_cmd_alloc_desc(cmdbuf, LOCAL_STORAGE); + if (!batch->tls.gpu) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; } + + return VK_SUCCESS; } -void +VkResult panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf, uint32_t layer_idx) { @@ -223,11 +241,11 @@ panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf, const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info; uint32_t layer_count = cmdbuf->state.gfx.render.layer_count; - batch->tiler.heap_desc = - pan_pool_alloc_desc(&cmdbuf->desc_pool.base, TILER_HEAP); - - batch->tiler.ctx_descs = pan_pool_alloc_desc_array( - &cmdbuf->desc_pool.base, layer_count, TILER_CONTEXT); + batch->tiler.heap_desc = panvk_cmd_alloc_desc(cmdbuf, TILER_HEAP); + batch->tiler.ctx_descs = + panvk_cmd_alloc_desc_array(cmdbuf, layer_count, TILER_CONTEXT); + if (!batch->tiler.heap_desc.gpu || !batch->tiler.ctx_descs.gpu) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; tiler_desc = batch->tiler.ctx_descs.gpu + (pan_size(TILER_CONTEXT) * layer_idx); @@ -265,6 +283,8 @@ out_set_layer_ctx: batch->tiler.ctx.valhall.desc = tiler_desc; else batch->tiler.ctx.bifrost.desc = tiler_desc; + + return VK_SUCCESS; } struct panvk_batch * diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c index a87879150d5..1cc7669545a 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c @@ -11,6 +11,7 @@ #include "genxml/gen_macros.h" +#include "panvk_cmd_alloc.h" #include "panvk_cmd_buffer.h" #include "panvk_cmd_desc_state.h" #include "panvk_device.h" @@ -21,7 +22,6 @@ #include "pan_desc.h" #include "pan_encoder.h" #include "pan_jc.h" -#include "pan_pool.h" #include "pan_props.h" #include @@ -40,6 +40,7 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer, { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); const struct panvk_shader *shader = cmdbuf->state.compute.shader; + VkResult result; if (groupCountX == 0 || groupCountY == 0 || groupCountZ == 0) return; @@ -66,8 +67,10 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer, panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); dispatch.tsd = batch->tls.gpu; - panvk_per_arch(cmd_prepare_push_descs)(&cmdbuf->desc_pool.base, desc_state, - shader->desc_info.used_set_mask); + result = panvk_per_arch(cmd_prepare_push_descs)( + cmdbuf, desc_state, shader->desc_info.used_set_mask); + if (result != VK_SUCCESS) + return; struct panvk_compute_sysvals *sysvals = &cmdbuf->state.compute.sysvals; sysvals->base.x = baseGroupX; @@ -79,8 +82,12 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer, sysvals->local_group_size.x = shader->local_size.x; sysvals->local_group_size.y = shader->local_size.y; sysvals->local_group_size.z = shader->local_size.z; - panvk_per_arch(cmd_prepare_dyn_ssbos)(&cmdbuf->desc_pool.base, desc_state, - shader, cs_desc_state); + + result = panvk_per_arch(cmd_prepare_dyn_ssbos)(cmdbuf, desc_state, shader, + cs_desc_state); + if (result != VK_SUCCESS) + return; + sysvals->desc.dyn_ssbos = cs_desc_state->dyn_ssbos; for (uint32_t i = 0; i < MAX_SETS; i++) { @@ -91,25 +98,32 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer, cmdbuf->state.compute.push_uniforms = 0; if (!cmdbuf->state.compute.push_uniforms) { - cmdbuf->state.compute.push_uniforms = panvk_cmd_prepare_push_uniforms( - &cmdbuf->desc_pool.base, &cmdbuf->state.push_constants, - &cmdbuf->state.compute.sysvals, sizeof(cmdbuf->state.compute.sysvals)); + cmdbuf->state.compute.push_uniforms = panvk_per_arch( + cmd_prepare_push_uniforms)(cmdbuf, &cmdbuf->state.compute.sysvals, + sizeof(cmdbuf->state.compute.sysvals)); + if (!cmdbuf->state.compute.push_uniforms) + return; } dispatch.push_uniforms = cmdbuf->state.compute.push_uniforms; - panvk_per_arch(cmd_prepare_shader_desc_tables)( - &cmdbuf->desc_pool.base, desc_state, shader, cs_desc_state); + result = panvk_per_arch(cmd_prepare_shader_desc_tables)( + cmdbuf, desc_state, shader, cs_desc_state); - struct panfrost_ptr copy_desc_job = panvk_per_arch(meta_get_copy_desc_job)( - dev, &cmdbuf->desc_pool.base, shader, &cmdbuf->state.compute.desc_state, - cs_desc_state, 0); + struct panfrost_ptr copy_desc_job; + result = panvk_per_arch(meta_get_copy_desc_job)( + cmdbuf, shader, &cmdbuf->state.compute.desc_state, cs_desc_state, 0, + ©_desc_job); + if (result != VK_SUCCESS) + return; if (copy_desc_job.cpu) util_dynarray_append(&batch->jobs, void *, copy_desc_job.cpu); - struct panfrost_ptr job = - pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB); + struct panfrost_ptr job = panvk_cmd_alloc_desc(cmdbuf, COMPUTE_JOB); + if (!job.gpu) + return; + util_dynarray_append(&batch->jobs, void *, job.cpu); panfrost_pack_work_groups_compute( diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c index 6cf66bbbbe7..a8c6c03abcd 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c @@ -12,6 +12,7 @@ #include "genxml/gen_macros.h" #include "panvk_buffer.h" +#include "panvk_cmd_alloc.h" #include "panvk_cmd_buffer.h" #include "panvk_cmd_desc_state.h" #include "panvk_cmd_meta.h" @@ -28,7 +29,6 @@ #include "pan_encoder.h" #include "pan_format.h" #include "pan_jc.h" -#include "pan_pool.h" #include "pan_props.h" #include "pan_shader.h" @@ -88,7 +88,7 @@ struct panvk_draw_info { BITSET_TEST((__cmdbuf)->vk.dynamic_graphics_state.dirty, \ MESA_VK_DYNAMIC_##__name) -static void +static VkResult panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { @@ -152,11 +152,17 @@ panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf, cmdbuf->state.gfx.push_uniforms = 0; } - panvk_per_arch(cmd_prepare_dyn_ssbos)(&cmdbuf->desc_pool.base, desc_state, - vs, vs_desc_state); + VkResult result = panvk_per_arch(cmd_prepare_dyn_ssbos)(cmdbuf, desc_state, + vs, vs_desc_state); + if (result != VK_SUCCESS) + return result; + sysvals->desc.vs_dyn_ssbos = vs_desc_state->dyn_ssbos; - panvk_per_arch(cmd_prepare_dyn_ssbos)(&cmdbuf->desc_pool.base, desc_state, - fs, fs_desc_state); + result = panvk_per_arch(cmd_prepare_dyn_ssbos)(cmdbuf, desc_state, fs, + fs_desc_state); + if (result != VK_SUCCESS) + return result; + sysvals->desc.fs_dyn_ssbos = fs_desc_state->dyn_ssbos; for (uint32_t i = 0; i < MAX_SETS; i++) { @@ -166,6 +172,8 @@ panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf, if (used_set_mask & BITFIELD_BIT(i)) sysvals->desc.sets[i] = desc_state->sets[i]->descs.dev; } + + return VK_SUCCESS; } static bool @@ -304,7 +312,7 @@ fs_required(struct panvk_cmd_buffer *cmdbuf) return (fs_info->fs.writes_depth || fs_info->fs.writes_stencil); } -static void +static VkResult panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { @@ -336,7 +344,7 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, if (!dirty) { draw->fs.rsd = cmdbuf->state.gfx.fs.rsd; - return; + return VK_SUCCESS; } struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); @@ -354,9 +362,11 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, bool writes_s = writes_stencil(cmdbuf); bool needs_fs = fs_required(cmdbuf); - struct panfrost_ptr ptr = pan_pool_alloc_desc_aggregate( - &cmdbuf->desc_pool.base, PAN_DESC(RENDERER_STATE), - PAN_DESC_ARRAY(bd_count, BLEND)); + struct panfrost_ptr ptr = panvk_cmd_alloc_desc_aggregate( + cmdbuf, PAN_DESC(RENDERER_STATE), PAN_DESC_ARRAY(bd_count, BLEND)); + if (!ptr.gpu) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + struct mali_renderer_state_packed *rsd = ptr.cpu; struct mali_blend_packed *bds = ptr.cpu + pan_size(RENDERER_STATE); struct panvk_blend_info binfo = {0}; @@ -472,16 +482,21 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, cmdbuf->state.gfx.fs.rsd = ptr.gpu; draw->fs.rsd = cmdbuf->state.gfx.fs.rsd; + return VK_SUCCESS; } -static void +static VkResult panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { struct panvk_batch *batch = cmdbuf->cur_batch; + VkResult result = + panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, draw->layer_id); + if (result != VK_SUCCESS) + return result; - panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, draw->layer_id); draw->tiler_ctx = &batch->tiler.ctx; + return VK_SUCCESS; } static mali_pixel_format @@ -514,14 +529,17 @@ panvk_varying_hw_format(gl_shader_stage stage, gl_varying_slot loc, } } -static void +static VkResult panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; const struct panvk_shader_link *link = &cmdbuf->state.gfx.link; - struct panfrost_ptr bufs = pan_pool_alloc_desc_array( - &cmdbuf->desc_pool.base, PANVK_VARY_BUF_MAX + 1, ATTRIBUTE_BUFFER); + struct panfrost_ptr bufs = panvk_cmd_alloc_desc_array( + cmdbuf, PANVK_VARY_BUF_MAX + 1, ATTRIBUTE_BUFFER); + if (!bufs.gpu) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + struct mali_attribute_buffer_packed *buf_descs = bufs.cpu; const struct vk_input_assembly_state *ia = &cmdbuf->vk.dynamic_graphics_state.ia; @@ -534,10 +552,10 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) { unsigned buf_size = vertex_count * link->buf_strides[i]; mali_ptr buf_addr = - buf_size - ? pan_pool_alloc_aligned(&cmdbuf->varying_pool.base, buf_size, 64) - .gpu - : 0; + buf_size ? panvk_cmd_alloc_dev_mem(cmdbuf, varying, buf_size, 64).gpu + : 0; + if (buf_size && !buf_addr) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; pan_pack(&buf_descs[i], ATTRIBUTE_BUFFER, cfg) { cfg.stride = link->buf_strides[i]; @@ -567,6 +585,7 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, draw->varying_bufs = bufs.gpu; draw->vs.varyings = panvk_priv_mem_dev_addr(link->vs.attribs); draw->fs.varyings = panvk_priv_mem_dev_addr(link->fs.attribs); + return VK_SUCCESS; } static void @@ -662,7 +681,7 @@ panvk_draw_emit_attrib(const struct panvk_draw_info *draw, } } -static void +static VkResult panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { @@ -682,16 +701,19 @@ panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf, (attrib_count && !cmdbuf->state.gfx.vs.attribs); if (!dirty) - return; + return VK_SUCCESS; unsigned attrib_buf_count = (num_vbs + num_imgs) * 2; - struct panfrost_ptr bufs = pan_pool_alloc_desc_array( - &cmdbuf->desc_pool.base, attrib_buf_count + 1, ATTRIBUTE_BUFFER); + struct panfrost_ptr bufs = panvk_cmd_alloc_desc_array( + cmdbuf, attrib_buf_count + 1, ATTRIBUTE_BUFFER); struct mali_attribute_buffer_packed *attrib_buf_descs = bufs.cpu; - struct panfrost_ptr attribs = pan_pool_alloc_desc_array( - &cmdbuf->desc_pool.base, attrib_count, ATTRIBUTE); + struct panfrost_ptr attribs = + panvk_cmd_alloc_desc_array(cmdbuf, attrib_count, ATTRIBUTE); struct mali_attribute_packed *attrib_descs = attribs.cpu; + if (!bufs.gpu || (attrib_count && !attribs.gpu)) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + for (unsigned i = 0; i < num_vbs; i++) { if (vi->bindings_valid & BITFIELD_BIT(i)) { panvk_draw_emit_attrib_buf(draw, &vi->bindings[i], @@ -726,6 +748,8 @@ panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf, cmdbuf->state.gfx.vs.desc.tables[PANVK_BIFROST_DESC_TABLE_IMG] = bufs.gpu + (num_vbs * pan_size(ATTRIBUTE_BUFFER) * 2); } + + return VK_SUCCESS; } static void @@ -783,7 +807,7 @@ panvk_emit_viewport(const struct vk_viewport_state *vp, void *vpd) } } -static void +static VkResult panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { @@ -793,8 +817,9 @@ panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf, */ if (!cmdbuf->state.gfx.vpd || is_dirty(cmdbuf, VP_VIEWPORTS) || is_dirty(cmdbuf, VP_SCISSORS)) { - struct panfrost_ptr vp = - pan_pool_alloc_desc(&cmdbuf->desc_pool.base, VIEWPORT); + struct panfrost_ptr vp = panvk_cmd_alloc_desc(cmdbuf, VIEWPORT); + if (!vp.gpu) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; const struct vk_viewport_state *vps = &cmdbuf->vk.dynamic_graphics_state.vp; @@ -805,6 +830,7 @@ panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf, } draw->viewport = cmdbuf->state.gfx.vpd; + return VK_SUCCESS; } static void @@ -832,13 +858,14 @@ panvk_emit_vertex_dcd(struct panvk_cmd_buffer *cmdbuf, } } -static void +static VkResult panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { struct panvk_batch *batch = cmdbuf->cur_batch; - struct panfrost_ptr ptr = - pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB); + struct panfrost_ptr ptr = panvk_cmd_alloc_desc(cmdbuf, COMPUTE_JOB); + if (!ptr.gpu) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; util_dynarray_append(&batch->jobs, void *, ptr.cpu); draw->jobs.vertex = ptr; @@ -852,6 +879,7 @@ panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf, panvk_emit_vertex_dcd(cmdbuf, draw, pan_section_ptr(ptr.cpu, COMPUTE_JOB, DRAW)); + return VK_SUCCESS; } static enum mali_draw_mode @@ -1000,24 +1028,26 @@ panvk_emit_tiler_dcd(struct panvk_cmd_buffer *cmdbuf, } } -static void +static VkResult panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { - struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); struct panvk_batch *batch = cmdbuf->cur_batch; const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; - struct panfrost_ptr ptr = panvk_per_arch(meta_get_copy_desc_job)( - dev, &cmdbuf->desc_pool.base, fs, &cmdbuf->state.gfx.desc_state, - fs_desc_state, 0); + struct panfrost_ptr ptr; + VkResult result = panvk_per_arch(meta_get_copy_desc_job)( + cmdbuf, fs, &cmdbuf->state.gfx.desc_state, fs_desc_state, 0, &ptr); + + if (result != VK_SUCCESS) + return result; if (ptr.cpu) util_dynarray_append(&batch->jobs, void *, ptr.cpu); draw->jobs.frag_copy_desc = ptr; - ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, TILER_JOB); + ptr = panvk_cmd_alloc_desc(cmdbuf, TILER_JOB); util_dynarray_append(&batch->jobs, void *, ptr.cpu); draw->jobs.tiler = ptr; @@ -1040,15 +1070,18 @@ panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf, pan_section_pack(ptr.cpu, TILER_JOB, PADDING, padding) ; + + return VK_SUCCESS; } -static void +static VkResult panvk_draw_prepare_idvs_job(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { struct panvk_batch *batch = cmdbuf->cur_batch; - struct panfrost_ptr ptr = - pan_pool_alloc_desc(&cmdbuf->desc_pool.base, INDEXED_VERTEX_JOB); + struct panfrost_ptr ptr = panvk_cmd_alloc_desc(cmdbuf, INDEXED_VERTEX_JOB); + if (!ptr.gpu) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; util_dynarray_append(&batch->jobs, void *, ptr.cpu); draw->jobs.idvs = ptr; @@ -1077,13 +1110,13 @@ panvk_draw_prepare_idvs_job(struct panvk_cmd_buffer *cmdbuf, panvk_emit_vertex_dcd( cmdbuf, draw, pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, VERTEX_DRAW)); + return VK_SUCCESS; } -static void +static VkResult panvk_draw_prepare_vs_copy_desc_job(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { - struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); struct panvk_batch *batch = cmdbuf->cur_batch; const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; const struct panvk_shader_desc_state *vs_desc_state = @@ -1091,32 +1124,39 @@ panvk_draw_prepare_vs_copy_desc_job(struct panvk_cmd_buffer *cmdbuf, const struct vk_vertex_input_state *vi = cmdbuf->vk.dynamic_graphics_state.vi; unsigned num_vbs = util_last_bit(vi->bindings_valid); - struct panfrost_ptr ptr = panvk_per_arch(meta_get_copy_desc_job)( - dev, &cmdbuf->desc_pool.base, vs, &cmdbuf->state.gfx.desc_state, - vs_desc_state, num_vbs * pan_size(ATTRIBUTE_BUFFER) * 2); + struct panfrost_ptr ptr; + VkResult result = panvk_per_arch(meta_get_copy_desc_job)( + cmdbuf, vs, &cmdbuf->state.gfx.desc_state, vs_desc_state, + num_vbs * pan_size(ATTRIBUTE_BUFFER) * 2, &ptr); + if (result != VK_SUCCESS) + return result; if (ptr.cpu) util_dynarray_append(&batch->jobs, void *, ptr.cpu); draw->jobs.vertex_copy_desc = ptr; + return VK_SUCCESS; } -static void +static VkResult panvk_draw_prepare_fs_copy_desc_job(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { - struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; struct panvk_batch *batch = cmdbuf->cur_batch; - struct panfrost_ptr ptr = panvk_per_arch(meta_get_copy_desc_job)( - dev, &cmdbuf->desc_pool.base, fs, &cmdbuf->state.gfx.desc_state, - fs_desc_state, 0); + struct panfrost_ptr ptr; + VkResult result = panvk_per_arch(meta_get_copy_desc_job)( + cmdbuf, fs, &cmdbuf->state.gfx.desc_state, fs_desc_state, 0, &ptr); + + if (result != VK_SUCCESS) + return result; if (ptr.cpu) util_dynarray_append(&batch->jobs, void *, ptr.cpu); draw->jobs.frag_copy_desc = ptr; + return VK_SUCCESS; } void @@ -1143,18 +1183,23 @@ panvk_per_arch(cmd_preload_fb_after_batch_split)(struct panvk_cmd_buffer *cmdbuf } } -static void +static VkResult panvk_cmd_prepare_draw_link_shaders(struct panvk_cmd_buffer *cmd) { struct panvk_cmd_graphics_state *gfx = &cmd->state.gfx; if (gfx->linked) - return; + return VK_SUCCESS; - panvk_per_arch(link_shaders)(&cmd->desc_pool, gfx->vs.shader, gfx->fs.shader, - &gfx->link); + VkResult result = panvk_per_arch(link_shaders)( + &cmd->desc_pool, gfx->vs.shader, gfx->fs.shader, &gfx->link); + if (result != VK_SUCCESS) { + vk_command_buffer_set_error(&cmd->vk, result); + return result; + } gfx->linked = true; + return VK_SUCCESS; } static void @@ -1170,6 +1215,7 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) const struct vk_rasterization_state *rs = &cmdbuf->vk.dynamic_graphics_state.rs; bool idvs = vs->info.vs.idvs; + VkResult result; /* If there's no vertex shader, we can skip the draw. */ if (!panvk_priv_mem_dev_addr(vs->rsd)) @@ -1185,24 +1231,35 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) batch = panvk_per_arch(cmd_open_batch)(cmdbuf); } - panvk_cmd_prepare_draw_link_shaders(cmdbuf); + result = panvk_cmd_prepare_draw_link_shaders(cmdbuf); + if (result != VK_SUCCESS) + return; - if (!rs->rasterizer_discard_enable) - panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); + if (!rs->rasterizer_discard_enable) { + result = panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); + if (result != VK_SUCCESS) + return; + } - panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true); + result = panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true); + if (result != VK_SUCCESS) + return; panvk_draw_prepare_attributes(cmdbuf, draw); uint32_t used_set_mask = vs->desc_info.used_set_mask | (fs ? fs->desc_info.used_set_mask : 0); - panvk_per_arch(cmd_prepare_push_descs)(&cmdbuf->desc_pool.base, desc_state, - used_set_mask); + result = + panvk_per_arch(cmd_prepare_push_descs)(cmdbuf, desc_state, used_set_mask); + if (result != VK_SUCCESS) + return; + + result = panvk_per_arch(cmd_prepare_shader_desc_tables)( + cmdbuf, &cmdbuf->state.gfx.desc_state, vs, vs_desc_state); + if (result != VK_SUCCESS) + return; - panvk_per_arch(cmd_prepare_shader_desc_tables)(&cmdbuf->desc_pool.base, - &cmdbuf->state.gfx.desc_state, - vs, vs_desc_state); panvk_draw_prepare_vs_copy_desc_job(cmdbuf, draw); unsigned copy_desc_job_id = @@ -1217,10 +1274,14 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) /* No need to setup the FS desc tables if the FS is not executed. */ if (needs_tiling && fs_required(cmdbuf)) { - panvk_per_arch(cmd_prepare_shader_desc_tables)( - &cmdbuf->desc_pool.base, &cmdbuf->state.gfx.desc_state, fs, - fs_desc_state); - panvk_draw_prepare_fs_copy_desc_job(cmdbuf, draw); + result = panvk_per_arch(cmd_prepare_shader_desc_tables)( + cmdbuf, &cmdbuf->state.gfx.desc_state, fs, fs_desc_state); + if (result != VK_SUCCESS) + return; + + result = panvk_draw_prepare_fs_copy_desc_job(cmdbuf, draw); + if (result != VK_SUCCESS) + return; if (draw->jobs.frag_copy_desc.gpu) { /* We don't need to add frag_copy_desc as a dependency because the @@ -1240,27 +1301,49 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) draw->instance_count, 1, 1, 1, true, false); - panvk_draw_prepare_fs_rsd(cmdbuf, draw); - panvk_draw_prepare_viewport(cmdbuf, draw); + result = panvk_draw_prepare_fs_rsd(cmdbuf, draw); + if (result != VK_SUCCESS) + return; + + result = panvk_draw_prepare_viewport(cmdbuf, draw); + if (result != VK_SUCCESS) + return; + batch->tlsinfo.tls.size = MAX3(vs->info.tls_size, fs ? fs->info.tls_size : 0, batch->tlsinfo.tls.size); for (uint32_t i = 0; i < layer_count; i++) { draw->layer_id = i; - panvk_draw_prepare_varyings(cmdbuf, draw); - panvk_cmd_prepare_draw_sysvals(cmdbuf, draw); - cmdbuf->state.gfx.push_uniforms = panvk_cmd_prepare_push_uniforms( - &cmdbuf->desc_pool.base, &cmdbuf->state.push_constants, - &cmdbuf->state.gfx.sysvals, sizeof(cmdbuf->state.gfx.sysvals)); + result = panvk_draw_prepare_varyings(cmdbuf, draw); + if (result != VK_SUCCESS) + return; + + result = panvk_cmd_prepare_draw_sysvals(cmdbuf, draw); + if (result != VK_SUCCESS) + return; + + cmdbuf->state.gfx.push_uniforms = panvk_per_arch( + cmd_prepare_push_uniforms)(cmdbuf, &cmdbuf->state.gfx.sysvals, + sizeof(cmdbuf->state.gfx.sysvals)); + if (!cmdbuf->state.gfx.push_uniforms) + return; + draw->push_uniforms = cmdbuf->state.gfx.push_uniforms; - panvk_draw_prepare_tiler_context(cmdbuf, draw); + result = panvk_draw_prepare_tiler_context(cmdbuf, draw); + if (result != VK_SUCCESS) + return; if (idvs) { - panvk_draw_prepare_idvs_job(cmdbuf, draw); + result = panvk_draw_prepare_idvs_job(cmdbuf, draw); + if (result != VK_SUCCESS) + return; + pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_INDEXED_VERTEX, false, false, 0, copy_desc_job_id, &draw->jobs.idvs, false); } else { - panvk_draw_prepare_vertex_job(cmdbuf, draw); + result = panvk_draw_prepare_vertex_job(cmdbuf, draw); + if (result != VK_SUCCESS) + return; unsigned vjob_id = pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_VERTEX, false, false, diff --git a/src/panfrost/vulkan/panvk_cmd_alloc.h b/src/panfrost/vulkan/panvk_cmd_alloc.h new file mode 100644 index 00000000000..f3e24a5b9cc --- /dev/null +++ b/src/panfrost/vulkan/panvk_cmd_alloc.h @@ -0,0 +1,48 @@ +/* + * Copyright © 2024 Collabora Ltd. + * SPDX-License-Identifier: MIT + */ + +#ifndef PANVK_CMD_ALLOC_H +#define PANVK_CMD_ALLOC_H + +#include "panvk_cmd_buffer.h" +#include "panvk_mempool.h" + +static inline struct panfrost_ptr +panvk_cmd_alloc_from_pool(struct panvk_cmd_buffer *cmdbuf, + struct panvk_pool *pool, + struct panvk_pool_alloc_info info) +{ + if (!info.size) + return (struct panfrost_ptr){0}; + + struct panfrost_ptr ptr = + pan_pool_alloc_aligned(&pool->base, info.size, info.alignment); + + if (!ptr.gpu) + vk_command_buffer_set_error(&cmdbuf->vk, + VK_ERROR_OUT_OF_DEVICE_MEMORY); + + return ptr; +} + +#define panvk_cmd_alloc_dev_mem(__cmdbuf, __poolnm, __sz, __alignment) \ + panvk_cmd_alloc_from_pool(__cmdbuf, &(__cmdbuf)->__poolnm##_pool, \ + (struct panvk_pool_alloc_info){ \ + .size = __sz, \ + .alignment = __alignment, \ + }) + +#define panvk_cmd_alloc_desc_aggregate(__cmdbuf, ...) \ + panvk_cmd_alloc_from_pool( \ + __cmdbuf, &(__cmdbuf)->desc_pool, \ + panvk_pool_descs_to_alloc_info(PAN_DESC_AGGREGATE(__VA_ARGS__))) + +#define panvk_cmd_alloc_desc(__cmdbuf, __desc) \ + panvk_cmd_alloc_desc_aggregate(__cmdbuf, PAN_DESC(__desc)) + +#define panvk_cmd_alloc_desc_array(__cmdbuf, __count, __desc) \ + panvk_cmd_alloc_desc_aggregate(__cmdbuf, PAN_DESC_ARRAY(__count, __desc)) + +#endif diff --git a/src/panfrost/vulkan/panvk_cmd_desc_state.h b/src/panfrost/vulkan/panvk_cmd_desc_state.h index 0dec17842c1..ddf5c9e1036 100644 --- a/src/panfrost/vulkan/panvk_cmd_desc_state.h +++ b/src/panfrost/vulkan/panvk_cmd_desc_state.h @@ -25,6 +25,8 @@ #include "pan_pool.h" +struct panvk_cmd_buffer; + struct panvk_shader_desc_state { #if PAN_ARCH <= 7 mali_ptr tables[PANVK_BIFROST_DESC_TABLE_COUNT]; @@ -53,28 +55,31 @@ struct panvk_descriptor_state { }; #if PAN_ARCH <= 7 -void panvk_per_arch(cmd_prepare_dyn_ssbos)( - struct pan_pool *desc_pool, const struct panvk_descriptor_state *desc_state, +VkResult panvk_per_arch(cmd_prepare_dyn_ssbos)( + struct panvk_cmd_buffer *cmdbuf, + const struct panvk_descriptor_state *desc_state, const struct panvk_shader *shader, struct panvk_shader_desc_state *shader_desc_state); -void panvk_per_arch(cmd_prepare_shader_desc_tables)( - struct pan_pool *desc_pool, const struct panvk_descriptor_state *desc_state, +VkResult panvk_per_arch(cmd_prepare_shader_desc_tables)( + struct panvk_cmd_buffer *cmdbuf, + const struct panvk_descriptor_state *desc_state, const struct panvk_shader *shader, struct panvk_shader_desc_state *shader_desc_state); #else void panvk_per_arch(cmd_fill_dyn_bufs)( - struct pan_pool *desc_pool, const struct panvk_descriptor_state *desc_state, + const struct panvk_descriptor_state *desc_state, const struct panvk_shader *shader, struct mali_buffer_packed *buffers); -void panvk_per_arch(cmd_prepare_shader_res_table)( - struct pan_pool *desc_pool, const struct panvk_descriptor_state *desc_state, +VkResult panvk_per_arch(cmd_prepare_shader_res_table)( + struct panvk_cmd_buffer *cmdbuf, + const struct panvk_descriptor_state *desc_state, const struct panvk_shader *shader, struct panvk_shader_desc_state *shader_desc_state); #endif -void panvk_per_arch(cmd_prepare_push_descs)( - struct pan_pool *desc_pool, struct panvk_descriptor_state *desc_state, +VkResult panvk_per_arch(cmd_prepare_push_descs)( + struct panvk_cmd_buffer *cmdbuf, struct panvk_descriptor_state *desc_state, uint32_t used_set_mask); #endif diff --git a/src/panfrost/vulkan/panvk_cmd_push_constant.h b/src/panfrost/vulkan/panvk_cmd_push_constant.h index 5cdd3953659..d58446c139b 100644 --- a/src/panfrost/vulkan/panvk_cmd_push_constant.h +++ b/src/panfrost/vulkan/panvk_cmd_push_constant.h @@ -10,7 +10,7 @@ #include "genxml/gen_macros.h" -#include "pan_pool.h" +struct panvk_cmd_buffer; #define MAX_PUSH_CONSTANTS_SIZE 128 @@ -18,21 +18,8 @@ struct panvk_push_constant_state { uint8_t data[MAX_PUSH_CONSTANTS_SIZE]; }; -static inline mali_ptr -panvk_cmd_prepare_push_uniforms(struct pan_pool *desc_pool_base, - struct panvk_push_constant_state *push, - void *sysvals, unsigned sysvals_sz) -{ - struct panfrost_ptr push_uniforms = - pan_pool_alloc_aligned(desc_pool_base, 512, 16); - - /* The first half is used for push constants. */ - memcpy(push_uniforms.cpu, push->data, sizeof(push->data)); - - /* The second half is used for sysvals. */ - memcpy((uint8_t *)push_uniforms.cpu + 256, sysvals, sysvals_sz); - - return push_uniforms.gpu; -} +mali_ptr +panvk_per_arch(cmd_prepare_push_uniforms)(struct panvk_cmd_buffer *cmdbuf, + void *sysvals, unsigned sysvals_sz); #endif diff --git a/src/panfrost/vulkan/panvk_meta.h b/src/panfrost/vulkan/panvk_meta.h index 2e8e182dabb..508217de15f 100644 --- a/src/panfrost/vulkan/panvk_meta.h +++ b/src/panfrost/vulkan/panvk_meta.h @@ -138,17 +138,17 @@ void panvk_per_arch(meta_desc_copy_init)(struct panvk_device *dev); void panvk_per_arch(meta_desc_copy_cleanup)(struct panvk_device *dev); +struct panvk_cmd_buffer; struct panvk_descriptor_state; struct panvk_device; struct panvk_shader; struct panvk_shader_desc_state; -struct panfrost_ptr panvk_per_arch(meta_get_copy_desc_job)( - struct panvk_device *dev, struct pan_pool *desc_pool, - const struct panvk_shader *shader, +VkResult panvk_per_arch(meta_get_copy_desc_job)( + struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader *shader, const struct panvk_descriptor_state *desc_state, const struct panvk_shader_desc_state *shader_desc_state, - uint32_t attrib_buf_idx_offset); + uint32_t attrib_buf_idx_offset, struct panfrost_ptr *job_desc); #endif #endif diff --git a/src/panfrost/vulkan/panvk_shader.h b/src/panfrost/vulkan/panvk_shader.h index 65724b2de83..eceef42ebe2 100644 --- a/src/panfrost/vulkan/panvk_shader.h +++ b/src/panfrost/vulkan/panvk_shader.h @@ -180,10 +180,10 @@ panvk_shader_get_dev_addr(const struct panvk_shader *shader) return shader != NULL ? panvk_priv_mem_dev_addr(shader->code_mem) : 0; } -void panvk_per_arch(link_shaders)(struct panvk_pool *desc_pool, - const struct panvk_shader *vs, - const struct panvk_shader *fs, - struct panvk_shader_link *link); +VkResult panvk_per_arch(link_shaders)(struct panvk_pool *desc_pool, + const struct panvk_shader *vs, + const struct panvk_shader *fs, + struct panvk_shader_link *link); static inline void panvk_shader_link_cleanup(struct panvk_pool *desc_pool, diff --git a/src/panfrost/vulkan/panvk_vX_cmd_desc_state.c b/src/panfrost/vulkan/panvk_vX_cmd_desc_state.c index cb939855e97..6fc4c0ba13e 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_desc_state.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_desc_state.c @@ -12,6 +12,7 @@ #include "genxml/gen_macros.h" #include "panvk_buffer.h" +#include "panvk_cmd_alloc.h" #include "panvk_cmd_buffer.h" #include "panvk_cmd_desc_state.h" #include "panvk_entrypoints.h" @@ -107,19 +108,22 @@ cmd_get_push_desc_set(struct vk_command_buffer *vk_cmdbuf, } #if PAN_ARCH <= 7 -void +VkResult panvk_per_arch(cmd_prepare_dyn_ssbos)( - struct pan_pool *desc_pool, const struct panvk_descriptor_state *desc_state, + struct panvk_cmd_buffer *cmdbuf, + const struct panvk_descriptor_state *desc_state, const struct panvk_shader *shader, struct panvk_shader_desc_state *shader_desc_state) { if (!shader || !shader->desc_info.dyn_ssbos.count || shader_desc_state->dyn_ssbos) - return; + return VK_SUCCESS; - struct panfrost_ptr ptr = pan_pool_alloc_aligned( - desc_pool, shader->desc_info.dyn_ssbos.count * PANVK_DESCRIPTOR_SIZE, + struct panfrost_ptr ptr = panvk_cmd_alloc_dev_mem( + cmdbuf, desc, shader->desc_info.dyn_ssbos.count * PANVK_DESCRIPTOR_SIZE, PANVK_DESCRIPTOR_SIZE); + if (!ptr.gpu) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; struct panvk_ssbo_addr *ssbos = ptr.cpu; for (uint32_t i = 0; i < shader->desc_info.dyn_ssbos.count; i++) { @@ -140,6 +144,7 @@ panvk_per_arch(cmd_prepare_dyn_ssbos)( } shader_desc_state->dyn_ssbos = ptr.gpu; + return VK_SUCCESS; } static void @@ -169,14 +174,15 @@ panvk_cmd_fill_dyn_ubos(const struct panvk_descriptor_state *desc_state, } } -void +VkResult panvk_per_arch(cmd_prepare_shader_desc_tables)( - struct pan_pool *desc_pool, const struct panvk_descriptor_state *desc_state, + struct panvk_cmd_buffer *cmdbuf, + const struct panvk_descriptor_state *desc_state, const struct panvk_shader *shader, struct panvk_shader_desc_state *shader_desc_state) { if (!shader) - return; + return VK_SUCCESS; for (uint32_t i = 0; i < ARRAY_SIZE(shader->desc_info.others.count); i++) { uint32_t desc_count = @@ -189,8 +195,10 @@ panvk_per_arch(cmd_prepare_shader_desc_tables)( if (!desc_count || shader_desc_state->tables[i]) continue; - struct panfrost_ptr ptr = pan_pool_alloc_aligned( - desc_pool, desc_count * desc_size, PANVK_DESCRIPTOR_SIZE); + struct panfrost_ptr ptr = panvk_cmd_alloc_dev_mem( + cmdbuf, desc, desc_count * desc_size, PANVK_DESCRIPTOR_SIZE); + if (!ptr.gpu) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; shader_desc_state->tables[i] = ptr.gpu; @@ -203,7 +211,10 @@ panvk_per_arch(cmd_prepare_shader_desc_tables)( shader->info.stage != MESA_SHADER_VERTEX) { assert(!shader_desc_state->img_attrib_table); - ptr = pan_pool_alloc_desc_array(desc_pool, desc_count, ATTRIBUTE); + ptr = panvk_cmd_alloc_desc_array(cmdbuf, desc_count, ATTRIBUTE); + if (!ptr.gpu) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + shader_desc_state->img_attrib_table = ptr.gpu; } } @@ -214,7 +225,9 @@ panvk_per_arch(cmd_prepare_shader_desc_tables)( shader->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_SAMPLER]; if (tex_count && !sampler_count) { - struct panfrost_ptr sampler = pan_pool_alloc_desc(desc_pool, SAMPLER); + struct panfrost_ptr sampler = panvk_cmd_alloc_desc(cmdbuf, SAMPLER); + if (!sampler.gpu) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; /* Emit a dummy sampler if we have to. */ pan_pack(sampler.cpu, SAMPLER, _) { @@ -222,11 +235,13 @@ panvk_per_arch(cmd_prepare_shader_desc_tables)( shader_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER] = sampler.gpu; } + + return VK_SUCCESS; } #else void panvk_per_arch(cmd_fill_dyn_bufs)( - struct pan_pool *desc_pool, const struct panvk_descriptor_state *desc_state, + const struct panvk_descriptor_state *desc_state, const struct panvk_shader *shader, struct mali_buffer_packed *buffers) { if (!shader) @@ -250,19 +265,23 @@ panvk_per_arch(cmd_fill_dyn_bufs)( } } -void +VkResult panvk_per_arch(cmd_prepare_shader_res_table)( - struct pan_pool *desc_pool, const struct panvk_descriptor_state *desc_state, + struct panvk_cmd_buffer *cmdbuf, + const struct panvk_descriptor_state *desc_state, const struct panvk_shader *shader, struct panvk_shader_desc_state *shader_desc_state) { if (!shader || shader_desc_state->res_table) - return; + return VK_SUCCESS; uint32_t first_unused_set = util_last_bit(shader->desc_info.used_set_mask); uint32_t res_count = 1 + first_unused_set; struct panfrost_ptr ptr = - pan_pool_alloc_desc_array(desc_pool, res_count, RESOURCE); + panvk_cmd_alloc_desc_array(cmdbuf, res_count, RESOURCE); + if (!ptr.gpu) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + struct mali_resource_packed *res_table = ptr.cpu; /* First entry is the driver set table, where we store the vertex attributes, @@ -290,11 +309,12 @@ panvk_per_arch(cmd_prepare_shader_res_table)( } shader_desc_state->res_table = ptr.gpu | res_count; + return VK_SUCCESS; } #endif -void -panvk_per_arch(cmd_prepare_push_descs)(struct pan_pool *desc_pool, +VkResult +panvk_per_arch(cmd_prepare_push_descs)(struct panvk_cmd_buffer *cmdbuf, struct panvk_descriptor_state *desc_state, uint32_t used_set_mask) { @@ -305,10 +325,18 @@ panvk_per_arch(cmd_prepare_push_descs)(struct pan_pool *desc_pool, desc_state->sets[i] != push_set || push_set->descs.dev) continue; - push_set->descs.dev = pan_pool_upload_aligned( - desc_pool, push_set->descs.host, - push_set->desc_count * PANVK_DESCRIPTOR_SIZE, PANVK_DESCRIPTOR_SIZE); + struct panfrost_ptr ptr = panvk_cmd_alloc_dev_mem( + cmdbuf, desc, push_set->desc_count * PANVK_DESCRIPTOR_SIZE, + PANVK_DESCRIPTOR_SIZE); + if (!ptr.gpu) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + memcpy(ptr.cpu, push_set->descs.host, + push_set->desc_count * PANVK_DESCRIPTOR_SIZE); + push_set->descs.dev = ptr.gpu; } + + return VK_SUCCESS; } VKAPI_ATTR void VKAPI_CALL diff --git a/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c b/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c index ed711548ee7..a67a66fe4c2 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c @@ -3,10 +3,29 @@ * SPDX-License-Identifier: MIT */ +#include "panvk_cmd_alloc.h" #include "panvk_cmd_buffer.h" -#include "panvk_cmd_push_constant.h" #include "panvk_entrypoints.h" +mali_ptr +panvk_per_arch(cmd_prepare_push_uniforms)(struct panvk_cmd_buffer *cmdbuf, + void *sysvals, unsigned sysvals_sz) +{ + struct panfrost_ptr push_uniforms = + panvk_cmd_alloc_dev_mem(cmdbuf, desc, 512, 16); + + if (push_uniforms.gpu) { + /* The first half is used for push constants. */ + memcpy(push_uniforms.cpu, cmdbuf->state.push_constants.data, + sizeof(cmdbuf->state.push_constants.data)); + + /* The second half is used for sysvals. */ + memcpy((uint8_t *)push_uniforms.cpu + 256, sysvals, sysvals_sz); + } + + return push_uniforms.gpu; +} + VKAPI_ATTR void VKAPI_CALL panvk_per_arch(CmdPushConstants2KHR)( VkCommandBuffer commandBuffer, diff --git a/src/panfrost/vulkan/panvk_vX_device.c b/src/panfrost/vulkan/panvk_vX_device.c index 72b70bcb641..d9f89a019aa 100644 --- a/src/panfrost/vulkan/panvk_vX_device.c +++ b/src/panfrost/vulkan/panvk_vX_device.c @@ -13,6 +13,7 @@ #include "vk_common_entrypoints.h" #include "panvk_buffer.h" +#include "panvk_cmd_alloc.h" #include "panvk_cmd_buffer.h" #include "panvk_device.h" #include "panvk_entrypoints.h" @@ -96,7 +97,10 @@ panvk_meta_cmd_bind_map_buffer(struct vk_command_buffer *cmd, struct panvk_cmd_buffer *cmdbuf = container_of(cmd, struct panvk_cmd_buffer, vk); struct panfrost_ptr mem = - pan_pool_alloc_aligned(&cmdbuf->desc_pool.base, buffer->vk.size, 64); + panvk_cmd_alloc_dev_mem(cmdbuf, desc, buffer->vk.size, 64); + + if (!mem.gpu) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; buffer->dev_addr = mem.gpu; *map_out = mem.cpu; diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 8ca59e47102..e3e85a9c588 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -1185,16 +1185,20 @@ varying_format(gl_varying_slot loc, enum pipe_format pfmt) } } -static struct panvk_priv_mem +static VkResult emit_varying_attrs(struct panvk_pool *desc_pool, const struct pan_shader_varying *varyings, unsigned varying_count, const struct varyings_info *info, - unsigned *buf_offsets) + unsigned *buf_offsets, struct panvk_priv_mem *mem) { unsigned attr_count = BITSET_COUNT(info->active); - struct panvk_priv_mem mem = - panvk_pool_alloc_desc_array(desc_pool, attr_count, ATTRIBUTE); - struct mali_attribute_packed *attrs = panvk_priv_mem_host_addr(mem); + + *mem = panvk_pool_alloc_desc_array(desc_pool, attr_count, ATTRIBUTE); + + if (attr_count && !panvk_priv_mem_dev_addr(*mem)) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + struct mali_attribute_packed *attrs = panvk_priv_mem_host_addr(*mem); unsigned attr_idx = 0; for (unsigned i = 0; i < varying_count; i++) { @@ -1219,10 +1223,10 @@ emit_varying_attrs(struct panvk_pool *desc_pool, } } - return mem; + return VK_SUCCESS; } -void +VkResult panvk_per_arch(link_shaders)(struct panvk_pool *desc_pool, const struct panvk_shader *vs, const struct panvk_shader *fs, @@ -1241,7 +1245,7 @@ panvk_per_arch(link_shaders)(struct panvk_pool *desc_pool, if (PAN_ARCH >= 9) { link->buf_strides[PANVK_VARY_BUF_GENERAL] = MAX2(fs->info.varyings.input_count, vs->info.varyings.output_count); - return; + return VK_SUCCESS; } collect_varyings_info(vs->info.varyings.output, @@ -1302,16 +1306,22 @@ panvk_per_arch(link_shaders)(struct panvk_pool *desc_pool, buf_strides[buf_idx] += ALIGN_POT(out_size, 4); } - link->vs.attribs = emit_varying_attrs(desc_pool, vs->info.varyings.output, - vs->info.varyings.output_count, - &out_vars, buf_offsets); + VkResult result = emit_varying_attrs( + desc_pool, vs->info.varyings.output, vs->info.varyings.output_count, + &out_vars, buf_offsets, &link->vs.attribs); + if (result != VK_SUCCESS) + return result; - if (fs) - link->fs.attribs = emit_varying_attrs(desc_pool, fs->info.varyings.input, - fs->info.varyings.input_count, - &in_vars, buf_offsets); + if (fs) { + result = emit_varying_attrs(desc_pool, fs->info.varyings.input, + fs->info.varyings.input_count, &in_vars, + buf_offsets, &link->fs.attribs); + if (result != VK_SUCCESS) + return result; + } memcpy(link->buf_strides, buf_strides, sizeof(link->buf_strides)); + return VK_SUCCESS; } static const struct vk_shader_ops panvk_shader_ops = {