From 02906baba745e6a6d2f27c1654e361665db4ac77 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 23 Sep 2021 17:54:08 +0200 Subject: [PATCH] panvk: Implement vkCmdDispatch() Signed-off-by: Boris Brezillon Reviewed-by: Jason Ekstrand Part-of: --- src/panfrost/ci/deqp-panfrost-g52-vk.toml | 8 +++ src/panfrost/vulkan/panvk_cmd_buffer.c | 9 ---- src/panfrost/vulkan/panvk_private.h | 15 ++++++ src/panfrost/vulkan/panvk_vX_cmd_buffer.c | 66 +++++++++++++++++++++++ src/panfrost/vulkan/panvk_vX_cs.c | 33 ++++++++++++ src/panfrost/vulkan/panvk_vX_cs.h | 5 ++ 6 files changed, 127 insertions(+), 9 deletions(-) diff --git a/src/panfrost/ci/deqp-panfrost-g52-vk.toml b/src/panfrost/ci/deqp-panfrost-g52-vk.toml index c93421fd7f9..b43b5450fdf 100644 --- a/src/panfrost/ci/deqp-panfrost-g52-vk.toml +++ b/src/panfrost/ci/deqp-panfrost-g52-vk.toml @@ -5,4 +5,12 @@ renderer_check = "Mali-G52" include = [ "dEQP-VK.pipeline.blend.*", "dEQP-VK.api.copy_and_blit.core.*", + "dEQP-VK.compute.builtin_var.*", + "dEQP-VK.image.load_store.with_format.1d.*", + "dEQP-VK.image.load_store.with_format.1d_array.*", + "dEQP-VK.image.load_store.with_format.2d.*", + "dEQP-VK.image.load_store.with_format.2d_array.*", + "dEQP-VK.image.load_store.with_format.3d.*", + "dEQP-VK.image.load_store.with_format.cube.*", + "dEQP-VK.ssbo.layout.single_basic_type.*", ] diff --git a/src/panfrost/vulkan/panvk_cmd_buffer.c b/src/panfrost/vulkan/panvk_cmd_buffer.c index ba20cb94cd3..16ed06f90c2 100644 --- a/src/panfrost/vulkan/panvk_cmd_buffer.c +++ b/src/panfrost/vulkan/panvk_cmd_buffer.c @@ -573,15 +573,6 @@ panvk_CmdDispatchBase(VkCommandBuffer commandBuffer, panvk_stub(); } -void -panvk_CmdDispatch(VkCommandBuffer commandBuffer, - uint32_t x, - uint32_t y, - uint32_t z) -{ - panvk_stub(); -} - void panvk_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, diff --git a/src/panfrost/vulkan/panvk_private.h b/src/panfrost/vulkan/panvk_private.h index 66566142b27..d7162fee975 100644 --- a/src/panfrost/vulkan/panvk_private.h +++ b/src/panfrost/vulkan/panvk_private.h @@ -554,6 +554,17 @@ struct panvk_draw_info { } jobs; }; +struct panvk_dispatch_info { + struct pan_compute_dim wg_count; + mali_ptr attributes; + mali_ptr attribute_bufs; + mali_ptr tsd; + mali_ptr ubos; + mali_ptr push_uniforms; + mali_ptr textures; + mali_ptr samplers; +}; + struct panvk_attrib_info { unsigned buf; unsigned offset; @@ -630,6 +641,10 @@ struct panvk_cmd_state { bool crc_valid[MAX_RTS]; } fb; + struct { + struct pan_compute_dim wg_count; + } compute; + const struct panvk_render_pass *pass; const struct panvk_subpass *subpass; const struct panvk_framebuffer *framebuffer; diff --git a/src/panfrost/vulkan/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/panvk_vX_cmd_buffer.c index 0ec3f407361..fbfe088d559 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_buffer.c @@ -349,6 +349,16 @@ panvk_cmd_upload_sysval(struct panvk_cmd_buffer *cmdbuf, */ panvk_sysval_upload_ssbo_info(cmdbuf, PAN_SYSVAL_ID(id), bind_point_state, data); break; + case PAN_SYSVAL_NUM_WORK_GROUPS: + data->u32[0] = cmdbuf->state.compute.wg_count.x; + data->u32[1] = cmdbuf->state.compute.wg_count.y; + data->u32[2] = cmdbuf->state.compute.wg_count.z; + break; + case PAN_SYSVAL_LOCAL_GROUP_SIZE: + data->u32[0] = bind_point_state->pipeline->cs.local_size.x; + data->u32[1] = bind_point_state->pipeline->cs.local_size.y; + data->u32[2] = bind_point_state->pipeline->cs.local_size.z; + break; default: unreachable("Invalid static sysval"); } @@ -1368,3 +1378,59 @@ panvk_per_arch(TrimCommandPool)(VkDevice device, &pool->free_cmd_buffers, pool_link) panvk_destroy_cmdbuf(cmdbuf); } + +void +panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + const struct panfrost_device *pdev = + &cmdbuf->device->physical_device->pdev; + struct panvk_dispatch_info dispatch = { + .wg_count = { x, y, z }, + }; + + panvk_per_arch(cmd_close_batch)(cmdbuf); + struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); + + struct panvk_cmd_bind_point_state *bind_point_state = + panvk_cmd_get_bind_point_state(cmdbuf, COMPUTE); + struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; + const struct panvk_pipeline *pipeline = bind_point_state->pipeline; + struct panfrost_ptr job = + pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB); + + cmdbuf->state.compute.wg_count = dispatch.wg_count; + panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); + dispatch.tsd = batch->tls.gpu; + + panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state); + dispatch.attributes = desc_state->non_vs_attribs; + dispatch.attribute_bufs = desc_state->non_vs_attrib_bufs; + + panvk_cmd_prepare_ubos(cmdbuf, bind_point_state); + dispatch.ubos = desc_state->ubos; + + panvk_cmd_prepare_textures(cmdbuf, bind_point_state); + dispatch.textures = desc_state->textures; + + panvk_cmd_prepare_samplers(cmdbuf, bind_point_state); + dispatch.samplers = desc_state->samplers; + + panvk_per_arch(emit_compute_job)(pipeline, &dispatch, job.cpu); + panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, + MALI_JOB_TYPE_COMPUTE, false, false, 0, 0, + &job, false); + + batch->tlsinfo.tls.size = pipeline->tls_size; + batch->tlsinfo.wls.size = pipeline->wls_size; + if (batch->tlsinfo.wls.size) { + batch->wls_total_size = + pan_wls_mem_size(pdev, &dispatch.wg_count, batch->tlsinfo.wls.size); + } + + panvk_per_arch(cmd_close_batch)(cmdbuf); + desc_state->dirty = 0; +} diff --git a/src/panfrost/vulkan/panvk_vX_cs.c b/src/panfrost/vulkan/panvk_vX_cs.c index 36c0bdbce79..caabb217fc8 100644 --- a/src/panfrost/vulkan/panvk_vX_cs.c +++ b/src/panfrost/vulkan/panvk_vX_cs.c @@ -416,6 +416,39 @@ panvk_per_arch(emit_vertex_job)(const struct panvk_pipeline *pipeline, } } +void +panvk_per_arch(emit_compute_job)(const struct panvk_pipeline *pipeline, + const struct panvk_dispatch_info *dispatch, + void *job) +{ + panfrost_pack_work_groups_compute(pan_section_ptr(job, COMPUTE_JOB, INVOCATION), + dispatch->wg_count.x, + dispatch->wg_count.y, + dispatch->wg_count.z, + pipeline->cs.local_size.x, + pipeline->cs.local_size.y, + pipeline->cs.local_size.z, + false, false); + + pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) { + cfg.job_task_split = + util_logbase2_ceil(pipeline->cs.local_size.x + 1) + + util_logbase2_ceil(pipeline->cs.local_size.y + 1) + + util_logbase2_ceil(pipeline->cs.local_size.z + 1); + } + + pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) { + cfg.state = pipeline->rsds[MESA_SHADER_COMPUTE]; + cfg.attributes = dispatch->attributes; + cfg.attribute_buffers = dispatch->attribute_bufs; + cfg.thread_storage = dispatch->tsd; + cfg.uniform_buffers = dispatch->ubos; + cfg.push_uniforms = dispatch->push_uniforms; + cfg.textures = dispatch->textures; + cfg.samplers = dispatch->samplers; + } +} + static void panvk_emit_tiler_primitive(const struct panvk_pipeline *pipeline, const struct panvk_draw_info *draw, diff --git a/src/panfrost/vulkan/panvk_vX_cs.h b/src/panfrost/vulkan/panvk_vX_cs.h index 66c12c88279..f551410782b 100644 --- a/src/panfrost/vulkan/panvk_vX_cs.h +++ b/src/panfrost/vulkan/panvk_vX_cs.h @@ -79,6 +79,11 @@ panvk_per_arch(emit_vertex_job)(const struct panvk_pipeline *pipeline, const struct panvk_draw_info *draw, void *job); +void +panvk_per_arch(emit_compute_job)(const struct panvk_pipeline *pipeline, + const struct panvk_dispatch_info *dispatch, + void *job); + void panvk_per_arch(emit_tiler_job)(const struct panvk_pipeline *pipeline, const struct panvk_draw_info *draw,