mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-01 16:10:09 +01:00
panvk: Implement indirect dispatch on JM
Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com> Acked-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32839>
This commit is contained in:
parent
bf9331b3fe
commit
8b4e515ca2
3 changed files with 96 additions and 49 deletions
|
|
@ -283,26 +283,6 @@ spec@ext_image_dma_buf_import@ext_image_dma_buf_import-refcount-multithread,Cras
|
|||
dEQP-VK.api.object_management.max_concurrent.device,Fail
|
||||
dEQP-VK.api.object_management.max_concurrent.device_group,Fail
|
||||
|
||||
# CmdDispatchIndirect not supported yet
|
||||
dEQP-VK.compute.pipeline.indirect_dispatch.gen_in_compute.empty_command,Crash
|
||||
dEQP-VK.compute.pipeline.indirect_dispatch.gen_in_compute.large_offset,Crash
|
||||
dEQP-VK.compute.pipeline.indirect_dispatch.gen_in_compute.large_offset_multiple_invocations,Crash
|
||||
dEQP-VK.compute.pipeline.indirect_dispatch.gen_in_compute.multi_dispatch,Crash
|
||||
dEQP-VK.compute.pipeline.indirect_dispatch.gen_in_compute.multi_dispatch_reuse_command,Crash
|
||||
dEQP-VK.compute.pipeline.indirect_dispatch.gen_in_compute.multiple_groups,Crash
|
||||
dEQP-VK.compute.pipeline.indirect_dispatch.gen_in_compute.multiple_groups_multiple_invocations,Crash
|
||||
dEQP-VK.compute.pipeline.indirect_dispatch.gen_in_compute.single_invocation,Crash
|
||||
dEQP-VK.compute.pipeline.indirect_dispatch.gen_in_compute.small_offset,Crash
|
||||
dEQP-VK.compute.pipeline.indirect_dispatch.upload_buffer.empty_command,Crash
|
||||
dEQP-VK.compute.pipeline.indirect_dispatch.upload_buffer.large_offset,Crash
|
||||
dEQP-VK.compute.pipeline.indirect_dispatch.upload_buffer.large_offset_multiple_invocations,Crash
|
||||
dEQP-VK.compute.pipeline.indirect_dispatch.upload_buffer.multi_dispatch,Crash
|
||||
dEQP-VK.compute.pipeline.indirect_dispatch.upload_buffer.multi_dispatch_reuse_command,Crash
|
||||
dEQP-VK.compute.pipeline.indirect_dispatch.upload_buffer.multiple_groups,Crash
|
||||
dEQP-VK.compute.pipeline.indirect_dispatch.upload_buffer.multiple_groups_multiple_invocations,Crash
|
||||
dEQP-VK.compute.pipeline.indirect_dispatch.upload_buffer.single_invocation,Crash
|
||||
dEQP-VK.compute.pipeline.indirect_dispatch.upload_buffer.small_offset,Crash
|
||||
|
||||
# CTS bug, see https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/5296
|
||||
dEQP-VK.api.device_init.create_device_unsupported_features.protected_memory_features,Fail
|
||||
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ shaders@glsl-bug-110796
|
|||
|
||||
dEQP-VK.dynamic_rendering.primary_cmd_buff.random.seed*
|
||||
|
||||
# indirect dispatch and draw not supported yet
|
||||
# indirect draw not supported yet
|
||||
dEQP-VK.synchronization.*indirect*
|
||||
dEQP-VK.synchronization2.*indirect*
|
||||
dEQP-VK.draw.renderpass.indirect_draw.*
|
||||
|
|
|
|||
|
|
@ -9,11 +9,14 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include "genxml/gen_macros.h"
|
||||
|
||||
#include "panvk_buffer.h"
|
||||
#include "panvk_cmd_alloc.h"
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#include "panvk_cmd_desc_state.h"
|
||||
#include "panvk_cmd_precomp.h"
|
||||
#include "panvk_device.h"
|
||||
#include "panvk_entrypoints.h"
|
||||
#include "panvk_macros.h"
|
||||
|
|
@ -36,7 +39,6 @@ panvk_per_arch(cmd_dispatch_prepare_tls)(struct panvk_cmd_buffer *cmdbuf,
|
|||
struct panvk_batch *batch = cmdbuf->cur_batch;
|
||||
|
||||
assert(batch);
|
||||
assert(!indirect && "Indirect not supported yet!");
|
||||
|
||||
struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(cmdbuf->vk.base.device->physical);
|
||||
|
|
@ -45,6 +47,7 @@ panvk_per_arch(cmd_dispatch_prepare_tls)(struct panvk_cmd_buffer *cmdbuf,
|
|||
|
||||
batch->tlsinfo.tls.size = shader->info.tls_size;
|
||||
batch->tlsinfo.wls.size = shader->info.wls_size;
|
||||
|
||||
if (batch->tlsinfo.wls.size) {
|
||||
unsigned core_id_range;
|
||||
|
||||
|
|
@ -58,29 +61,16 @@ panvk_per_arch(cmd_dispatch_prepare_tls)(struct panvk_cmd_buffer *cmdbuf,
|
|||
return batch->tls.gpu;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
|
||||
uint32_t baseGroupX, uint32_t baseGroupY,
|
||||
uint32_t baseGroupZ, uint32_t groupCountX,
|
||||
uint32_t groupCountY, uint32_t groupCountZ)
|
||||
static void
|
||||
cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
const struct panvk_shader *shader = cmdbuf->state.compute.shader;
|
||||
VkResult result;
|
||||
|
||||
if (groupCountX == 0 || groupCountY == 0 || groupCountZ == 0)
|
||||
return;
|
||||
|
||||
/* If there's no compute shader, we can skip the dispatch. */
|
||||
if (!panvk_priv_mem_dev_addr(shader->rsd))
|
||||
return;
|
||||
|
||||
struct panvk_dispatch_info info = {
|
||||
.wg_base = {baseGroupX, baseGroupY, baseGroupZ},
|
||||
.direct.wg_count = {groupCountX, groupCountY, groupCountZ},
|
||||
};
|
||||
struct pan_compute_dim wg_count = {groupCountX, groupCountY, groupCountZ};
|
||||
|
||||
panvk_per_arch(cmd_close_batch)(cmdbuf);
|
||||
struct panvk_batch *batch = panvk_per_arch(cmd_open_batch)(cmdbuf);
|
||||
|
||||
|
|
@ -89,8 +79,14 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
|
|||
struct panvk_shader_desc_state *cs_desc_state =
|
||||
&cmdbuf->state.compute.cs.desc;
|
||||
|
||||
struct pan_compute_dim wg_count = {
|
||||
info->direct.wg_count.x,
|
||||
info->direct.wg_count.y,
|
||||
info->direct.wg_count.z,
|
||||
};
|
||||
bool indirect = info->indirect.buffer_dev_addr != 0;
|
||||
uint64_t tsd = panvk_per_arch(cmd_dispatch_prepare_tls)(cmdbuf, shader,
|
||||
&wg_count, false);
|
||||
&wg_count, indirect);
|
||||
|
||||
result = panvk_per_arch(cmd_prepare_push_descs)(
|
||||
cmdbuf, desc_state, shader->desc_info.used_set_mask);
|
||||
|
|
@ -105,7 +101,7 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
|
|||
return;
|
||||
}
|
||||
|
||||
panvk_per_arch(cmd_prepare_dispatch_sysvals)(cmdbuf, &info);
|
||||
panvk_per_arch(cmd_prepare_dispatch_sysvals)(cmdbuf, info);
|
||||
|
||||
result = panvk_per_arch(cmd_prepare_push_uniforms)(
|
||||
cmdbuf, cmdbuf->state.compute.shader, 1);
|
||||
|
|
@ -135,10 +131,12 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
|
|||
|
||||
util_dynarray_append(&batch->jobs, void *, job.cpu);
|
||||
|
||||
pan_pack_work_groups_compute(
|
||||
pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), wg_count.x, wg_count.y,
|
||||
wg_count.z, shader->cs.local_size.x, shader->cs.local_size.y,
|
||||
shader->cs.local_size.z, false, false);
|
||||
if (!indirect) {
|
||||
pan_pack_work_groups_compute(
|
||||
pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), wg_count.x,
|
||||
wg_count.y, wg_count.z, shader->cs.local_size.x,
|
||||
shader->cs.local_size.y, shader->cs.local_size.z, false, false);
|
||||
}
|
||||
|
||||
pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
|
||||
cfg.job_task_split = util_logbase2_ceil(shader->cs.local_size.x + 1) +
|
||||
|
|
@ -158,22 +156,91 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
|
|||
cfg.samplers = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER];
|
||||
}
|
||||
|
||||
unsigned indirect_dep = 0;
|
||||
if (indirect) {
|
||||
/* We redirect write to memory sink for null pointers */
|
||||
uint64_t num_work_groups_x_sysval_addr = 0x8ull << 60;
|
||||
uint64_t num_work_groups_y_sysval_addr = 0x8ull << 60;
|
||||
uint64_t num_work_groups_z_sysval_addr = 0x8ull << 60;
|
||||
|
||||
if (shader_uses_sysval(shader, compute, num_work_groups.x)) {
|
||||
num_work_groups_x_sysval_addr =
|
||||
cmdbuf->state.compute.push_uniforms +
|
||||
shader_remapped_sysval_offset(
|
||||
shader, sysval_offset(compute, num_work_groups.x));
|
||||
}
|
||||
|
||||
if (shader_uses_sysval(shader, compute, num_work_groups.y)) {
|
||||
num_work_groups_y_sysval_addr =
|
||||
cmdbuf->state.compute.push_uniforms +
|
||||
shader_remapped_sysval_offset(
|
||||
shader, sysval_offset(compute, num_work_groups.y));
|
||||
}
|
||||
|
||||
if (shader_uses_sysval(shader, compute, num_work_groups.z)) {
|
||||
num_work_groups_z_sysval_addr =
|
||||
cmdbuf->state.compute.push_uniforms +
|
||||
shader_remapped_sysval_offset(
|
||||
shader, sysval_offset(compute, num_work_groups.z));
|
||||
}
|
||||
|
||||
struct panvk_precomp_ctx precomp_ctx = panvk_per_arch(precomp_cs)(cmdbuf);
|
||||
enum panlib_barrier precomp_barrier =
|
||||
copy_desc_job.gpu == 0 ? PANLIB_BARRIER_JM_SUPPRESS_PREFETCH
|
||||
: PANLIB_BARRIER_NONE;
|
||||
|
||||
panlib_indirect_dispatch(
|
||||
&precomp_ctx, panlib_1d(1), precomp_barrier,
|
||||
info->indirect.buffer_dev_addr, shader->cs.local_size.x,
|
||||
shader->cs.local_size.y, shader->cs.local_size.z, job.gpu,
|
||||
num_work_groups_x_sysval_addr, num_work_groups_y_sysval_addr,
|
||||
num_work_groups_z_sysval_addr);
|
||||
indirect_dep = batch->vtc_jc.job_index;
|
||||
}
|
||||
|
||||
util_dynarray_append(&batch->jobs, void *, job.cpu);
|
||||
|
||||
unsigned copy_desc_dep =
|
||||
copy_desc_job.gpu
|
||||
? pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false,
|
||||
0, 0, ©_desc_job, false)
|
||||
: 0;
|
||||
? pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false,
|
||||
indirect, 0, 0, ©_desc_job, false)
|
||||
: indirect_dep;
|
||||
|
||||
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false, 0,
|
||||
copy_desc_dep, &job, false);
|
||||
pan_jc_add_job(&batch->vtc_jc,
|
||||
indirect ? MALI_JOB_TYPE_NOT_STARTED : MALI_JOB_TYPE_COMPUTE,
|
||||
indirect, false, 0, copy_desc_dep, &job, false);
|
||||
|
||||
panvk_per_arch(cmd_close_batch)(cmdbuf);
|
||||
clear_dirty_after_dispatch(cmdbuf);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
|
||||
uint32_t baseGroupX, uint32_t baseGroupY,
|
||||
uint32_t baseGroupZ, uint32_t groupCountX,
|
||||
uint32_t groupCountY, uint32_t groupCountZ)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
|
||||
if (groupCountX == 0 || groupCountY == 0 || groupCountZ == 0)
|
||||
return;
|
||||
|
||||
struct panvk_dispatch_info info = {
|
||||
.wg_base = {baseGroupX, baseGroupY, baseGroupZ},
|
||||
.direct.wg_count = {groupCountX, groupCountY, groupCountZ},
|
||||
};
|
||||
cmd_dispatch(cmdbuf, &info);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_per_arch(CmdDispatchIndirect)(VkCommandBuffer commandBuffer,
|
||||
VkBuffer _buffer, VkDeviceSize offset)
|
||||
{
|
||||
panvk_stub();
|
||||
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
|
||||
VK_FROM_HANDLE(panvk_buffer, buffer, _buffer);
|
||||
uint64_t buffer_gpu = panvk_buffer_gpu_ptr(buffer, offset);
|
||||
struct panvk_dispatch_info info = {
|
||||
.indirect.buffer_dev_addr = buffer_gpu,
|
||||
};
|
||||
cmd_dispatch(cmdbuf, &info);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue