mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 11:30:21 +01:00
panvk: Isolate CS specific bits in panvk_shader
We are about to add FS specific info there, so let's make sure all the per-stage bits are part of a union and are conditionally filled/[de]serialized based on the shader type. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32540>
This commit is contained in:
parent
8a16636444
commit
b8174b21d2
8 changed files with 71 additions and 36 deletions
|
|
@ -434,15 +434,15 @@ panvk_per_arch(calculate_task_axis_and_increment)(
|
|||
{
|
||||
/* Pick the task_axis and task_increment to maximize thread
|
||||
* utilization. */
|
||||
unsigned threads_per_wg =
|
||||
shader->local_size.x * shader->local_size.y * shader->local_size.z;
|
||||
unsigned threads_per_wg = shader->cs.local_size.x * shader->cs.local_size.y *
|
||||
shader->cs.local_size.z;
|
||||
unsigned max_thread_cnt = panfrost_compute_max_thread_count(
|
||||
&phys_dev->kmod.props, shader->info.work_reg_count);
|
||||
unsigned threads_per_task = threads_per_wg;
|
||||
unsigned local_size[3] = {
|
||||
shader->local_size.x,
|
||||
shader->local_size.y,
|
||||
shader->local_size.z,
|
||||
shader->cs.local_size.x,
|
||||
shader->cs.local_size.y,
|
||||
shader->cs.local_size.z,
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
|
|
|
|||
|
|
@ -76,8 +76,8 @@ calculate_workgroups_per_task(const struct panvk_shader *shader,
|
|||
|
||||
/* To achieve the best utilization, we should aim for as many workgroups
|
||||
* per tasks as we can fit without exceeding the above thread limit */
|
||||
unsigned threads_per_wg =
|
||||
shader->local_size.x * shader->local_size.y * shader->local_size.z;
|
||||
unsigned threads_per_wg = shader->cs.local_size.x * shader->cs.local_size.y *
|
||||
shader->cs.local_size.z;
|
||||
assert(threads_per_wg > 0 &&
|
||||
threads_per_wg <= phys_dev->kmod.props.max_threads_per_wg);
|
||||
unsigned wg_per_task = DIV_ROUND_UP(max_threads_per_task, threads_per_wg);
|
||||
|
|
@ -257,19 +257,19 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
|
||||
struct mali_compute_size_workgroup_packed wg_size;
|
||||
pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) {
|
||||
cfg.workgroup_size_x = shader->local_size.x;
|
||||
cfg.workgroup_size_y = shader->local_size.y;
|
||||
cfg.workgroup_size_z = shader->local_size.z;
|
||||
cfg.workgroup_size_x = shader->cs.local_size.x;
|
||||
cfg.workgroup_size_y = shader->cs.local_size.y;
|
||||
cfg.workgroup_size_z = shader->cs.local_size.z;
|
||||
cfg.allow_merging_workgroups = false;
|
||||
}
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, WG_SIZE),
|
||||
wg_size.opaque[0]);
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_X),
|
||||
info->wg_base.x * shader->local_size.x);
|
||||
info->wg_base.x * shader->cs.local_size.x);
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Y),
|
||||
info->wg_base.y * shader->local_size.y);
|
||||
info->wg_base.y * shader->cs.local_size.y);
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, JOB_OFFSET_Z),
|
||||
info->wg_base.z * shader->local_size.z);
|
||||
info->wg_base.z * shader->cs.local_size.z);
|
||||
if (indirect) {
|
||||
/* Load parameters from indirect buffer and update workgroup count
|
||||
* registers and sysvals */
|
||||
|
|
|
|||
|
|
@ -120,9 +120,9 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
|
|||
|
||||
struct mali_compute_size_workgroup_packed wg_size;
|
||||
pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) {
|
||||
cfg.workgroup_size_x = shader->local_size.x;
|
||||
cfg.workgroup_size_y = shader->local_size.y;
|
||||
cfg.workgroup_size_z = shader->local_size.z;
|
||||
cfg.workgroup_size_x = shader->cs.local_size.x;
|
||||
cfg.workgroup_size_y = shader->cs.local_size.y;
|
||||
cfg.workgroup_size_z = shader->cs.local_size.z;
|
||||
cfg.allow_merging_workgroups = false;
|
||||
}
|
||||
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, WG_SIZE), wg_size.opaque[0]);
|
||||
|
|
|
|||
|
|
@ -136,13 +136,13 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
|
|||
|
||||
panfrost_pack_work_groups_compute(
|
||||
pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), wg_count.x, wg_count.y,
|
||||
wg_count.z, shader->local_size.x, shader->local_size.y,
|
||||
shader->local_size.z, false, false);
|
||||
wg_count.z, shader->cs.local_size.x, shader->cs.local_size.y,
|
||||
shader->cs.local_size.z, false, false);
|
||||
|
||||
pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
|
||||
cfg.job_task_split = util_logbase2_ceil(shader->local_size.x + 1) +
|
||||
util_logbase2_ceil(shader->local_size.y + 1) +
|
||||
util_logbase2_ceil(shader->local_size.z + 1);
|
||||
cfg.job_task_split = util_logbase2_ceil(shader->cs.local_size.x + 1) +
|
||||
util_logbase2_ceil(shader->cs.local_size.y + 1) +
|
||||
util_logbase2_ceil(shader->cs.local_size.z + 1);
|
||||
}
|
||||
|
||||
pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
|
||||
|
|
|
|||
|
|
@ -49,13 +49,13 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
|
|||
|
||||
panfrost_pack_work_groups_compute(
|
||||
pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), grid.count[0],
|
||||
grid.count[1], grid.count[2], shader->local_size.x, shader->local_size.y,
|
||||
shader->local_size.z, false, false);
|
||||
grid.count[1], grid.count[2], shader->cs.local_size.x,
|
||||
shader->cs.local_size.y, shader->cs.local_size.z, false, false);
|
||||
|
||||
pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
|
||||
cfg.job_task_split = util_logbase2_ceil(shader->local_size.x + 1) +
|
||||
util_logbase2_ceil(shader->local_size.y + 1) +
|
||||
util_logbase2_ceil(shader->local_size.z + 1);
|
||||
cfg.job_task_split = util_logbase2_ceil(shader->cs.local_size.x + 1) +
|
||||
util_logbase2_ceil(shader->cs.local_size.y + 1) +
|
||||
util_logbase2_ceil(shader->cs.local_size.z + 1);
|
||||
}
|
||||
|
||||
struct pan_compute_dim dim = {.x = grid.count[0],
|
||||
|
|
|
|||
|
|
@ -256,7 +256,12 @@ struct panvk_shader_fau_info {
|
|||
struct panvk_shader {
|
||||
struct vk_shader vk;
|
||||
struct pan_shader_info info;
|
||||
struct pan_compute_dim local_size;
|
||||
|
||||
union {
|
||||
struct {
|
||||
struct pan_compute_dim local_size;
|
||||
} cs;
|
||||
};
|
||||
|
||||
struct {
|
||||
uint32_t used_set_mask;
|
||||
|
|
|
|||
|
|
@ -37,11 +37,11 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)(
|
|||
set_compute_sysval(cmdbuf, dirty_sysvals, base.y, info->wg_base.y);
|
||||
set_compute_sysval(cmdbuf, dirty_sysvals, base.z, info->wg_base.z);
|
||||
set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.x,
|
||||
shader->local_size.x);
|
||||
shader->cs.local_size.x);
|
||||
set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.y,
|
||||
shader->local_size.y);
|
||||
shader->cs.local_size.y);
|
||||
set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.z,
|
||||
shader->local_size.z);
|
||||
shader->cs.local_size.z);
|
||||
set_compute_sysval(cmdbuf, dirty_sysvals, printf_buffer_address,
|
||||
dev->printf.bo->addr.dev);
|
||||
|
||||
|
|
|
|||
|
|
@ -913,9 +913,17 @@ panvk_compile_nir(struct panvk_device *dev, nir_shader *nir,
|
|||
(nir->info.stage == MESA_SHADER_VERTEX ? MAX_VS_ATTRIBS : 0);
|
||||
#endif
|
||||
|
||||
shader->local_size.x = nir->info.workgroup_size[0];
|
||||
shader->local_size.y = nir->info.workgroup_size[1];
|
||||
shader->local_size.z = nir->info.workgroup_size[2];
|
||||
switch (shader->info.stage) {
|
||||
case MESA_SHADER_COMPUTE:
|
||||
case MESA_SHADER_KERNEL:
|
||||
shader->cs.local_size.x = nir->info.workgroup_size[0];
|
||||
shader->cs.local_size.y = nir->info.workgroup_size[1];
|
||||
shader->cs.local_size.z = nir->info.workgroup_size[2];
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
|
@ -1169,7 +1177,7 @@ panvk_per_arch(create_shader_from_binary)(struct panvk_device *dev,
|
|||
return panvk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
shader->info = *info;
|
||||
shader->local_size = local_size;
|
||||
shader->cs.local_size = local_size;
|
||||
shader->bin_ptr = bin_ptr;
|
||||
shader->bin_size = bin_size;
|
||||
shader->own_bin = false;
|
||||
|
|
@ -1318,7 +1326,18 @@ panvk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob,
|
|||
|
||||
shader->info = info;
|
||||
blob_copy_bytes(blob, &shader->fau, sizeof(shader->fau));
|
||||
blob_copy_bytes(blob, &shader->local_size, sizeof(shader->local_size));
|
||||
|
||||
switch (shader->info.stage) {
|
||||
case MESA_SHADER_COMPUTE:
|
||||
case MESA_SHADER_KERNEL:
|
||||
blob_copy_bytes(blob, &shader->cs.local_size,
|
||||
sizeof(shader->cs.local_size));
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
shader->bin_size = blob_read_uint32(blob);
|
||||
|
||||
if (blob->overrun) {
|
||||
|
|
@ -1406,7 +1425,18 @@ panvk_shader_serialize(struct vk_device *vk_dev,
|
|||
|
||||
blob_write_bytes(blob, &shader->info, sizeof(shader->info));
|
||||
blob_write_bytes(blob, &shader->fau, sizeof(shader->fau));
|
||||
blob_write_bytes(blob, &shader->local_size, sizeof(shader->local_size));
|
||||
|
||||
switch (shader->info.stage) {
|
||||
case MESA_SHADER_COMPUTE:
|
||||
case MESA_SHADER_KERNEL:
|
||||
blob_write_bytes(blob, &shader->cs.local_size,
|
||||
sizeof(shader->cs.local_size));
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
blob_write_uint32(blob, shader->bin_size);
|
||||
blob_write_bytes(blob, shader->bin_ptr, shader->bin_size);
|
||||
shader_desc_info_serialize(blob, shader);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue