panvk: Isolate CS specific bits in panvk_shader

We are about to add FS specific info there, so let's make sure all the
per-stage bits are part of a union and are conditionally
filled/[de]serialized based on the shader type.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32540>
This commit is contained in:
Boris Brezillon 2025-03-06 13:17:12 +01:00 committed by Marge Bot
parent 8a16636444
commit b8174b21d2
8 changed files with 71 additions and 36 deletions

View file

@ -434,15 +434,15 @@ panvk_per_arch(calculate_task_axis_and_increment)(
{
/* Pick the task_axis and task_increment to maximize thread
* utilization. */
unsigned threads_per_wg =
shader->local_size.x * shader->local_size.y * shader->local_size.z;
unsigned threads_per_wg = shader->cs.local_size.x * shader->cs.local_size.y *
shader->cs.local_size.z;
unsigned max_thread_cnt = panfrost_compute_max_thread_count(
&phys_dev->kmod.props, shader->info.work_reg_count);
unsigned threads_per_task = threads_per_wg;
unsigned local_size[3] = {
shader->local_size.x,
shader->local_size.y,
shader->local_size.z,
shader->cs.local_size.x,
shader->cs.local_size.y,
shader->cs.local_size.z,
};
for (unsigned i = 0; i < 3; i++) {

View file

@ -76,8 +76,8 @@ calculate_workgroups_per_task(const struct panvk_shader *shader,
/* To achieve the best utilization, we should aim for as many workgroups
* per tasks as we can fit without exceeding the above thread limit */
unsigned threads_per_wg =
shader->local_size.x * shader->local_size.y * shader->local_size.z;
unsigned threads_per_wg = shader->cs.local_size.x * shader->cs.local_size.y *
shader->cs.local_size.z;
assert(threads_per_wg > 0 &&
threads_per_wg <= phys_dev->kmod.props.max_threads_per_wg);
unsigned wg_per_task = DIV_ROUND_UP(max_threads_per_task, threads_per_wg);
@ -257,19 +257,19 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
struct mali_compute_size_workgroup_packed wg_size;
pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) {
cfg.workgroup_size_x = shader->local_size.x;
cfg.workgroup_size_y = shader->local_size.y;
cfg.workgroup_size_z = shader->local_size.z;
cfg.workgroup_size_x = shader->cs.local_size.x;
cfg.workgroup_size_y = shader->cs.local_size.y;
cfg.workgroup_size_z = shader->cs.local_size.z;
cfg.allow_merging_workgroups = false;
}
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, WG_SIZE),
wg_size.opaque[0]);
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_X),
info->wg_base.x * shader->local_size.x);
info->wg_base.x * shader->cs.local_size.x);
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Y),
info->wg_base.y * shader->local_size.y);
info->wg_base.y * shader->cs.local_size.y);
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Z),
info->wg_base.z * shader->local_size.z);
info->wg_base.z * shader->cs.local_size.z);
if (indirect) {
/* Load parameters from indirect buffer and update workgroup count
* registers and sysvals */

View file

@ -120,9 +120,9 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
struct mali_compute_size_workgroup_packed wg_size;
pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) {
cfg.workgroup_size_x = shader->local_size.x;
cfg.workgroup_size_y = shader->local_size.y;
cfg.workgroup_size_z = shader->local_size.z;
cfg.workgroup_size_x = shader->cs.local_size.x;
cfg.workgroup_size_y = shader->cs.local_size.y;
cfg.workgroup_size_z = shader->cs.local_size.z;
cfg.allow_merging_workgroups = false;
}
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, WG_SIZE), wg_size.opaque[0]);

View file

@ -136,13 +136,13 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
panfrost_pack_work_groups_compute(
pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), wg_count.x, wg_count.y,
wg_count.z, shader->local_size.x, shader->local_size.y,
shader->local_size.z, false, false);
wg_count.z, shader->cs.local_size.x, shader->cs.local_size.y,
shader->cs.local_size.z, false, false);
pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
cfg.job_task_split = util_logbase2_ceil(shader->local_size.x + 1) +
util_logbase2_ceil(shader->local_size.y + 1) +
util_logbase2_ceil(shader->local_size.z + 1);
cfg.job_task_split = util_logbase2_ceil(shader->cs.local_size.x + 1) +
util_logbase2_ceil(shader->cs.local_size.y + 1) +
util_logbase2_ceil(shader->cs.local_size.z + 1);
}
pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {

View file

@ -49,13 +49,13 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
panfrost_pack_work_groups_compute(
pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), grid.count[0],
grid.count[1], grid.count[2], shader->local_size.x, shader->local_size.y,
shader->local_size.z, false, false);
grid.count[1], grid.count[2], shader->cs.local_size.x,
shader->cs.local_size.y, shader->cs.local_size.z, false, false);
pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
cfg.job_task_split = util_logbase2_ceil(shader->local_size.x + 1) +
util_logbase2_ceil(shader->local_size.y + 1) +
util_logbase2_ceil(shader->local_size.z + 1);
cfg.job_task_split = util_logbase2_ceil(shader->cs.local_size.x + 1) +
util_logbase2_ceil(shader->cs.local_size.y + 1) +
util_logbase2_ceil(shader->cs.local_size.z + 1);
}
struct pan_compute_dim dim = {.x = grid.count[0],

View file

@ -256,7 +256,12 @@ struct panvk_shader_fau_info {
struct panvk_shader {
struct vk_shader vk;
struct pan_shader_info info;
struct pan_compute_dim local_size;
union {
struct {
struct pan_compute_dim local_size;
} cs;
};
struct {
uint32_t used_set_mask;

View file

@ -37,11 +37,11 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)(
set_compute_sysval(cmdbuf, dirty_sysvals, base.y, info->wg_base.y);
set_compute_sysval(cmdbuf, dirty_sysvals, base.z, info->wg_base.z);
set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.x,
shader->local_size.x);
shader->cs.local_size.x);
set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.y,
shader->local_size.y);
shader->cs.local_size.y);
set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.z,
shader->local_size.z);
shader->cs.local_size.z);
set_compute_sysval(cmdbuf, dirty_sysvals, printf_buffer_address,
dev->printf.bo->addr.dev);

View file

@ -913,9 +913,17 @@ panvk_compile_nir(struct panvk_device *dev, nir_shader *nir,
(nir->info.stage == MESA_SHADER_VERTEX ? MAX_VS_ATTRIBS : 0);
#endif
shader->local_size.x = nir->info.workgroup_size[0];
shader->local_size.y = nir->info.workgroup_size[1];
shader->local_size.z = nir->info.workgroup_size[2];
switch (shader->info.stage) {
case MESA_SHADER_COMPUTE:
case MESA_SHADER_KERNEL:
shader->cs.local_size.x = nir->info.workgroup_size[0];
shader->cs.local_size.y = nir->info.workgroup_size[1];
shader->cs.local_size.z = nir->info.workgroup_size[2];
break;
default:
break;
}
return VK_SUCCESS;
}
@ -1169,7 +1177,7 @@ panvk_per_arch(create_shader_from_binary)(struct panvk_device *dev,
return panvk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
shader->info = *info;
shader->local_size = local_size;
shader->cs.local_size = local_size;
shader->bin_ptr = bin_ptr;
shader->bin_size = bin_size;
shader->own_bin = false;
@ -1318,7 +1326,18 @@ panvk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob,
shader->info = info;
blob_copy_bytes(blob, &shader->fau, sizeof(shader->fau));
blob_copy_bytes(blob, &shader->local_size, sizeof(shader->local_size));
switch (shader->info.stage) {
case MESA_SHADER_COMPUTE:
case MESA_SHADER_KERNEL:
blob_copy_bytes(blob, &shader->cs.local_size,
sizeof(shader->cs.local_size));
break;
default:
break;
}
shader->bin_size = blob_read_uint32(blob);
if (blob->overrun) {
@ -1406,7 +1425,18 @@ panvk_shader_serialize(struct vk_device *vk_dev,
blob_write_bytes(blob, &shader->info, sizeof(shader->info));
blob_write_bytes(blob, &shader->fau, sizeof(shader->fau));
blob_write_bytes(blob, &shader->local_size, sizeof(shader->local_size));
switch (shader->info.stage) {
case MESA_SHADER_COMPUTE:
case MESA_SHADER_KERNEL:
blob_write_bytes(blob, &shader->cs.local_size,
sizeof(shader->cs.local_size));
break;
default:
break;
}
blob_write_uint32(blob, shader->bin_size);
blob_write_bytes(blob, shader->bin_ptr, shader->bin_size);
shader_desc_info_serialize(blob, shader);