diff --git a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h index 8ce46c7d4c2..434ef846e37 100644 --- a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h +++ b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h @@ -434,15 +434,15 @@ panvk_per_arch(calculate_task_axis_and_increment)( { /* Pick the task_axis and task_increment to maximize thread * utilization. */ - unsigned threads_per_wg = - shader->local_size.x * shader->local_size.y * shader->local_size.z; + unsigned threads_per_wg = shader->cs.local_size.x * shader->cs.local_size.y * + shader->cs.local_size.z; unsigned max_thread_cnt = panfrost_compute_max_thread_count( &phys_dev->kmod.props, shader->info.work_reg_count); unsigned threads_per_task = threads_per_wg; unsigned local_size[3] = { - shader->local_size.x, - shader->local_size.y, - shader->local_size.z, + shader->cs.local_size.x, + shader->cs.local_size.y, + shader->cs.local_size.z, }; for (unsigned i = 0; i < 3; i++) { diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c index 22880b29d78..2934bd999a1 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c @@ -76,8 +76,8 @@ calculate_workgroups_per_task(const struct panvk_shader *shader, /* To achieve the best utilization, we should aim for as many workgroups * per tasks as we can fit without exceeding the above thread limit */ - unsigned threads_per_wg = - shader->local_size.x * shader->local_size.y * shader->local_size.z; + unsigned threads_per_wg = shader->cs.local_size.x * shader->cs.local_size.y * + shader->cs.local_size.z; assert(threads_per_wg > 0 && threads_per_wg <= phys_dev->kmod.props.max_threads_per_wg); unsigned wg_per_task = DIV_ROUND_UP(max_threads_per_task, threads_per_wg); @@ -257,19 +257,19 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) struct mali_compute_size_workgroup_packed wg_size; pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) { - cfg.workgroup_size_x = shader->local_size.x; - cfg.workgroup_size_y = shader->local_size.y; - cfg.workgroup_size_z = shader->local_size.z; + cfg.workgroup_size_x = shader->cs.local_size.x; + cfg.workgroup_size_y = shader->cs.local_size.y; + cfg.workgroup_size_z = shader->cs.local_size.z; cfg.allow_merging_workgroups = false; } cs_move32_to(b, cs_sr_reg32(b, COMPUTE, WG_SIZE), wg_size.opaque[0]); cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_X), - info->wg_base.x * shader->local_size.x); + info->wg_base.x * shader->cs.local_size.x); cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Y), - info->wg_base.y * shader->local_size.y); + info->wg_base.y * shader->cs.local_size.y); cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Z), - info->wg_base.z * shader->local_size.z); + info->wg_base.z * shader->cs.local_size.z); if (indirect) { /* Load parameters from indirect buffer and update workgroup count * registers and sysvals */ diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c index 2c70d56b439..aafe0917fee 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_precomp.c @@ -120,9 +120,9 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx, struct mali_compute_size_workgroup_packed wg_size; pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) { - cfg.workgroup_size_x = shader->local_size.x; - cfg.workgroup_size_y = shader->local_size.y; - cfg.workgroup_size_z = shader->local_size.z; + cfg.workgroup_size_x = shader->cs.local_size.x; + cfg.workgroup_size_y = shader->cs.local_size.y; + cfg.workgroup_size_z = shader->cs.local_size.z; cfg.allow_merging_workgroups = false; } cs_move32_to(b, cs_sr_reg32(b, COMPUTE, WG_SIZE), wg_size.opaque[0]); diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c index 4cc70b911fe..7e5e050f248 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c @@ -136,13 +136,13 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer, panfrost_pack_work_groups_compute( pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), wg_count.x, wg_count.y, - wg_count.z, shader->local_size.x, shader->local_size.y, - shader->local_size.z, false, false); + wg_count.z, shader->cs.local_size.x, shader->cs.local_size.y, + shader->cs.local_size.z, false, false); pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) { - cfg.job_task_split = util_logbase2_ceil(shader->local_size.x + 1) + - util_logbase2_ceil(shader->local_size.y + 1) + - util_logbase2_ceil(shader->local_size.z + 1); + cfg.job_task_split = util_logbase2_ceil(shader->cs.local_size.x + 1) + + util_logbase2_ceil(shader->cs.local_size.y + 1) + + util_logbase2_ceil(shader->cs.local_size.z + 1); } pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) { diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_precomp.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_precomp.c index cae03ea726b..075680c6a63 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_precomp.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_precomp.c @@ -49,13 +49,13 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx, panfrost_pack_work_groups_compute( pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), grid.count[0], - grid.count[1], grid.count[2], shader->local_size.x, shader->local_size.y, - shader->local_size.z, false, false); + grid.count[1], grid.count[2], shader->cs.local_size.x, + shader->cs.local_size.y, shader->cs.local_size.z, false, false); pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) { - cfg.job_task_split = util_logbase2_ceil(shader->local_size.x + 1) + - util_logbase2_ceil(shader->local_size.y + 1) + - util_logbase2_ceil(shader->local_size.z + 1); + cfg.job_task_split = util_logbase2_ceil(shader->cs.local_size.x + 1) + + util_logbase2_ceil(shader->cs.local_size.y + 1) + + util_logbase2_ceil(shader->cs.local_size.z + 1); } struct pan_compute_dim dim = {.x = grid.count[0], diff --git a/src/panfrost/vulkan/panvk_shader.h b/src/panfrost/vulkan/panvk_shader.h index 62dfa51cb3d..b73fc28c9bf 100644 --- a/src/panfrost/vulkan/panvk_shader.h +++ b/src/panfrost/vulkan/panvk_shader.h @@ -256,7 +256,12 @@ struct panvk_shader_fau_info { struct panvk_shader { struct vk_shader vk; struct pan_shader_info info; - struct pan_compute_dim local_size; + + union { + struct { + struct pan_compute_dim local_size; + } cs; + }; struct { uint32_t used_set_mask; diff --git a/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c index 55f9c640997..6b19d9b856f 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c @@ -37,11 +37,11 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)( set_compute_sysval(cmdbuf, dirty_sysvals, base.y, info->wg_base.y); set_compute_sysval(cmdbuf, dirty_sysvals, base.z, info->wg_base.z); set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.x, - shader->local_size.x); + shader->cs.local_size.x); set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.y, - shader->local_size.y); + shader->cs.local_size.y); set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.z, - shader->local_size.z); + shader->cs.local_size.z); set_compute_sysval(cmdbuf, dirty_sysvals, printf_buffer_address, dev->printf.bo->addr.dev); diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 227231c88b3..7f6aa42044a 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -913,9 +913,17 @@ panvk_compile_nir(struct panvk_device *dev, nir_shader *nir, (nir->info.stage == MESA_SHADER_VERTEX ? MAX_VS_ATTRIBS : 0); #endif - shader->local_size.x = nir->info.workgroup_size[0]; - shader->local_size.y = nir->info.workgroup_size[1]; - shader->local_size.z = nir->info.workgroup_size[2]; + switch (shader->info.stage) { + case MESA_SHADER_COMPUTE: + case MESA_SHADER_KERNEL: + shader->cs.local_size.x = nir->info.workgroup_size[0]; + shader->cs.local_size.y = nir->info.workgroup_size[1]; + shader->cs.local_size.z = nir->info.workgroup_size[2]; + break; + + default: + break; + } return VK_SUCCESS; } @@ -1169,7 +1177,7 @@ panvk_per_arch(create_shader_from_binary)(struct panvk_device *dev, return panvk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); shader->info = *info; - shader->local_size = local_size; + shader->cs.local_size = local_size; shader->bin_ptr = bin_ptr; shader->bin_size = bin_size; shader->own_bin = false; @@ -1318,7 +1326,18 @@ panvk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob, shader->info = info; blob_copy_bytes(blob, &shader->fau, sizeof(shader->fau)); - blob_copy_bytes(blob, &shader->local_size, sizeof(shader->local_size)); + + switch (shader->info.stage) { + case MESA_SHADER_COMPUTE: + case MESA_SHADER_KERNEL: + blob_copy_bytes(blob, &shader->cs.local_size, + sizeof(shader->cs.local_size)); + break; + + default: + break; + } + shader->bin_size = blob_read_uint32(blob); if (blob->overrun) { @@ -1406,7 +1425,18 @@ panvk_shader_serialize(struct vk_device *vk_dev, blob_write_bytes(blob, &shader->info, sizeof(shader->info)); blob_write_bytes(blob, &shader->fau, sizeof(shader->fau)); - blob_write_bytes(blob, &shader->local_size, sizeof(shader->local_size)); + + switch (shader->info.stage) { + case MESA_SHADER_COMPUTE: + case MESA_SHADER_KERNEL: + blob_write_bytes(blob, &shader->cs.local_size, + sizeof(shader->cs.local_size)); + break; + + default: + break; + } + blob_write_uint32(blob, shader->bin_size); blob_write_bytes(blob, shader->bin_ptr, shader->bin_size); shader_desc_info_serialize(blob, shader);