panvk/dispatch: s/shader/cs/g
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Even though "shader" uniquely means something here and we don't need to
specify what stage, "cs" is still shorter, obvious, and matches what we
do all over the 3D code so it saves some cognitive load when bouncing
back and forth.

Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38403>
This commit is contained in:
Faith Ekstrand 2025-11-06 09:51:54 -05:00 committed by Marge Bot
parent 1046f5ed48
commit 7411acaa77
3 changed files with 57 additions and 58 deletions

View file

@ -70,7 +70,7 @@ prepare_driver_set(struct panvk_cmd_buffer *cmdbuf)
uint64_t
panvk_per_arch(cmd_dispatch_prepare_tls)(
struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader_variant *shader,
struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader_variant *cs,
const struct pan_compute_dim *dim, bool indirect)
{
struct panvk_physical_device *phys_dev =
@ -81,8 +81,8 @@ panvk_per_arch(cmd_dispatch_prepare_tls)(
return tsd.gpu;
struct pan_tls_info tlsinfo = {
.tls.size = shader->info.tls_size,
.wls.size = shader->info.wls_size,
.tls.size = cs->info.tls_size,
.wls.size = cs->info.wls_size,
};
if (tlsinfo.wls.size) {
@ -90,7 +90,7 @@ panvk_per_arch(cmd_dispatch_prepare_tls)(
pan_query_core_count(&phys_dev->kmod.props, &core_id_range);
tlsinfo.wls.instances = pan_calc_wls_instances(
&shader->cs.local_size, &phys_dev->kmod.props, indirect ? NULL : dim);
&cs->cs.local_size, &phys_dev->kmod.props, indirect ? NULL : dim);
unsigned wls_total_size = pan_calc_total_wls_size(
tlsinfo.wls.size, tlsinfo.wls.instances, core_id_range);
@ -107,7 +107,7 @@ panvk_per_arch(cmd_dispatch_prepare_tls)(
}
cmdbuf->state.tls.info.tls.size =
MAX2(shader->info.tls_size, cmdbuf->state.tls.info.tls.size);
MAX2(cs->info.tls_size, cmdbuf->state.tls.info.tls.size);
if (!cmdbuf->state.tls.desc.gpu) {
cmdbuf->state.tls.desc = panvk_cmd_alloc_desc(cmdbuf, LOCAL_STORAGE);
@ -123,12 +123,12 @@ panvk_per_arch(cmd_dispatch_prepare_tls)(
static void
cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
{
const struct panvk_shader_variant *shader =
const struct panvk_shader_variant *cs =
panvk_shader_only_variant(cmdbuf->state.compute.shader);
VkResult result;
/* If there's no compute shader, we can skip the dispatch. */
if (!panvk_priv_mem_dev_addr(shader->spd))
if (!panvk_priv_mem_dev_addr(cs->spd))
return;
struct panvk_physical_device *phys_dev =
@ -148,20 +148,20 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
bool indirect = info->indirect.buffer_dev_addr != 0;
uint64_t tsd =
panvk_per_arch(cmd_dispatch_prepare_tls)(cmdbuf, shader, &dim, indirect);
panvk_per_arch(cmd_dispatch_prepare_tls)(cmdbuf, cs, &dim, indirect);
if (!tsd)
return;
/* Only used for indirect dispatch */
unsigned wg_per_task = 0;
if (indirect)
wg_per_task = pan_calc_workgroups_per_task(&shader->cs.local_size,
wg_per_task = pan_calc_workgroups_per_task(&cs->cs.local_size,
&phys_dev->kmod.props);
if (compute_state_dirty(cmdbuf, DESC_STATE) ||
compute_state_dirty(cmdbuf, CS)) {
result = panvk_per_arch(cmd_prepare_push_descs)(
cmdbuf, desc_state, shader->desc_info.used_set_mask);
cmdbuf, desc_state, cs->desc_info.used_set_mask);
if (result != VK_SUCCESS)
return;
}
@ -172,14 +172,14 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
if (result != VK_SUCCESS)
return;
result = panvk_per_arch(cmd_prepare_push_uniforms)(cmdbuf, shader, 1);
result = panvk_per_arch(cmd_prepare_push_uniforms)(cmdbuf, cs, 1);
if (result != VK_SUCCESS)
return;
if (compute_state_dirty(cmdbuf, CS) ||
compute_state_dirty(cmdbuf, DESC_STATE)) {
result = panvk_per_arch(cmd_prepare_shader_res_table)(
cmdbuf, desc_state, shader, cs_desc_state, 1);
cmdbuf, desc_state, cs, cs_desc_state, 1);
if (result != VK_SUCCESS)
return;
}
@ -187,7 +187,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_COMPUTE);
/* Copy the global TLS pointer to the per-job TSD. */
if (shader->info.tls_size) {
if (cs->info.tls_size) {
cs_move64_to(b, cs_scratch_reg64(b, 0), cmdbuf->state.tls.desc.gpu);
cs_load64_to(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8);
cs_move64_to(b, cs_scratch_reg64(b, 0), tsd);
@ -203,13 +203,13 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
if (compute_state_dirty(cmdbuf, PUSH_UNIFORMS)) {
uint64_t fau_ptr = cmdbuf->state.compute.push_uniforms |
((uint64_t)shader->fau.total_count << 56);
((uint64_t)cs->fau.total_count << 56);
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, FAU_0), fau_ptr);
}
if (compute_state_dirty(cmdbuf, CS))
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SPD_0),
panvk_priv_mem_dev_addr(shader->spd));
panvk_priv_mem_dev_addr(cs->spd));
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, TSD_0), tsd);
@ -219,9 +219,9 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
struct mali_compute_size_workgroup_packed wg_size;
pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) {
cfg.workgroup_size_x = shader->cs.local_size.x;
cfg.workgroup_size_y = shader->cs.local_size.y;
cfg.workgroup_size_z = shader->cs.local_size.z;
cfg.workgroup_size_x = cs->cs.local_size.x;
cfg.workgroup_size_y = cs->cs.local_size.y;
cfg.workgroup_size_z = cs->cs.local_size.z;
cfg.allow_merging_workgroups = false;
}
cs_move32_to(b, cs_sr_reg32(b, COMPUTE, WG_SIZE),
@ -245,25 +245,25 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
cs_move64_to(b, cs_scratch_reg64(b, 0),
cmdbuf->state.compute.push_uniforms);
if (shader_uses_sysval(shader, compute, num_work_groups.x)) {
if (shader_uses_sysval(cs, compute, num_work_groups.x)) {
cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X),
cs_scratch_reg64(b, 0),
shader_remapped_sysval_offset(
shader, sysval_offset(compute, num_work_groups.x)));
cs, sysval_offset(compute, num_work_groups.x)));
}
if (shader_uses_sysval(shader, compute, num_work_groups.y)) {
if (shader_uses_sysval(cs, compute, num_work_groups.y)) {
cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Y),
cs_scratch_reg64(b, 0),
shader_remapped_sysval_offset(
shader, sysval_offset(compute, num_work_groups.y)));
cs, sysval_offset(compute, num_work_groups.y)));
}
if (shader_uses_sysval(shader, compute, num_work_groups.z)) {
if (shader_uses_sysval(cs, compute, num_work_groups.z)) {
cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Z),
cs_scratch_reg64(b, 0),
shader_remapped_sysval_offset(
shader, sysval_offset(compute, num_work_groups.z)));
cs, sysval_offset(compute, num_work_groups.z)));
}
cs_flush_stores(b);
@ -297,7 +297,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
unsigned task_axis = MALI_TASK_AXIS_X;
unsigned task_increment = 0;
panvk_per_arch(calculate_task_axis_and_increment)(
shader, phys_dev, &task_axis, &task_increment);
cs, phys_dev, &task_axis, &task_increment);
cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
task_increment, task_axis,
cs_shader_res_sel(0, 0, 0, 0));

View file

@ -32,7 +32,7 @@
uint64_t
panvk_per_arch(cmd_dispatch_prepare_tls)(
struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader_variant *shader,
struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader_variant *cs,
const struct pan_compute_dim *dim, bool indirect)
{
struct panvk_batch *batch = cmdbuf->cur_batch;
@ -44,15 +44,15 @@ panvk_per_arch(cmd_dispatch_prepare_tls)(
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
batch->tlsinfo.tls.size = shader->info.tls_size;
batch->tlsinfo.wls.size = shader->info.wls_size;
batch->tlsinfo.tls.size = cs->info.tls_size;
batch->tlsinfo.wls.size = cs->info.wls_size;
if (batch->tlsinfo.wls.size) {
unsigned core_id_range;
pan_query_core_count(&phys_dev->kmod.props, &core_id_range);
batch->tlsinfo.wls.instances = pan_calc_wls_instances(
&shader->cs.local_size, &phys_dev->kmod.props, indirect ? NULL : dim);
&cs->cs.local_size, &phys_dev->kmod.props, indirect ? NULL : dim);
batch->wls_total_size = pan_calc_total_wls_size(
batch->tlsinfo.wls.size, batch->tlsinfo.wls.instances, core_id_range);
}
@ -63,12 +63,12 @@ panvk_per_arch(cmd_dispatch_prepare_tls)(
static void
cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
{
const struct panvk_shader_variant *shader =
const struct panvk_shader_variant *cs =
panvk_shader_only_variant(cmdbuf->state.compute.shader);
VkResult result;
/* If there's no compute shader, we can skip the dispatch. */
if (!panvk_priv_mem_dev_addr(shader->rsd))
if (!panvk_priv_mem_dev_addr(cs->rsd))
return;
panvk_per_arch(cmd_close_batch)(cmdbuf);
@ -85,17 +85,17 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
info->direct.wg_count.z,
};
bool indirect = info->indirect.buffer_dev_addr != 0;
uint64_t tsd = panvk_per_arch(cmd_dispatch_prepare_tls)(cmdbuf, shader,
uint64_t tsd = panvk_per_arch(cmd_dispatch_prepare_tls)(cmdbuf, cs,
&wg_count, indirect);
result = panvk_per_arch(cmd_prepare_push_descs)(
cmdbuf, desc_state, shader->desc_info.used_set_mask);
cmdbuf, desc_state, cs->desc_info.used_set_mask);
if (result != VK_SUCCESS)
return;
if (compute_state_dirty(cmdbuf, CS) ||
compute_state_dirty(cmdbuf, DESC_STATE)) {
result = panvk_per_arch(cmd_prepare_dyn_ssbos)(cmdbuf, desc_state, shader,
result = panvk_per_arch(cmd_prepare_dyn_ssbos)(cmdbuf, desc_state, cs,
cs_desc_state);
if (result != VK_SUCCESS)
return;
@ -103,8 +103,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
panvk_per_arch(cmd_prepare_dispatch_sysvals)(cmdbuf, info);
result = panvk_per_arch(cmd_prepare_push_uniforms)(
cmdbuf, shader, 1);
result = panvk_per_arch(cmd_prepare_push_uniforms)(cmdbuf, cs, 1);
if (result != VK_SUCCESS)
return;
@ -113,10 +112,10 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
if (compute_state_dirty(cmdbuf, CS) ||
compute_state_dirty(cmdbuf, DESC_STATE)) {
result = panvk_per_arch(cmd_prepare_shader_desc_tables)(
cmdbuf, desc_state, shader, cs_desc_state);
cmdbuf, desc_state, cs, cs_desc_state);
result = panvk_per_arch(meta_get_copy_desc_job)(
cmdbuf, shader, &cmdbuf->state.compute.desc_state, cs_desc_state, 0,
cmdbuf, cs, &cmdbuf->state.compute.desc_state, cs_desc_state, 0,
&copy_desc_job);
if (result != VK_SUCCESS)
return;
@ -134,18 +133,18 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
if (!indirect) {
pan_pack_work_groups_compute(
pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), wg_count.x,
wg_count.y, wg_count.z, shader->cs.local_size.x,
shader->cs.local_size.y, shader->cs.local_size.z, false, false);
wg_count.y, wg_count.z, cs->cs.local_size.x,
cs->cs.local_size.y, cs->cs.local_size.z, false, false);
}
pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
cfg.job_task_split = util_logbase2_ceil(shader->cs.local_size.x + 1) +
util_logbase2_ceil(shader->cs.local_size.y + 1) +
util_logbase2_ceil(shader->cs.local_size.z + 1);
cfg.job_task_split = util_logbase2_ceil(cs->cs.local_size.x + 1) +
util_logbase2_ceil(cs->cs.local_size.y + 1) +
util_logbase2_ceil(cs->cs.local_size.z + 1);
}
pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
cfg.state = panvk_priv_mem_dev_addr(shader->rsd);
cfg.state = panvk_priv_mem_dev_addr(cs->rsd);
cfg.attributes = cs_desc_state->img_attrib_table;
cfg.attribute_buffers =
cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_IMG];
@ -163,25 +162,25 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
uint64_t num_work_groups_y_sysval_addr = 0x8ull << 60;
uint64_t num_work_groups_z_sysval_addr = 0x8ull << 60;
if (shader_uses_sysval(shader, compute, num_work_groups.x)) {
if (shader_uses_sysval(cs, compute, num_work_groups.x)) {
num_work_groups_x_sysval_addr =
cmdbuf->state.compute.push_uniforms +
shader_remapped_sysval_offset(
shader, sysval_offset(compute, num_work_groups.x));
cs, sysval_offset(compute, num_work_groups.x));
}
if (shader_uses_sysval(shader, compute, num_work_groups.y)) {
if (shader_uses_sysval(cs, compute, num_work_groups.y)) {
num_work_groups_y_sysval_addr =
cmdbuf->state.compute.push_uniforms +
shader_remapped_sysval_offset(
shader, sysval_offset(compute, num_work_groups.y));
cs, sysval_offset(compute, num_work_groups.y));
}
if (shader_uses_sysval(shader, compute, num_work_groups.z)) {
if (shader_uses_sysval(cs, compute, num_work_groups.z)) {
num_work_groups_z_sysval_addr =
cmdbuf->state.compute.push_uniforms +
shader_remapped_sysval_offset(
shader, sysval_offset(compute, num_work_groups.z));
cs, sysval_offset(compute, num_work_groups.z));
}
struct panvk_precomp_ctx precomp_ctx = panvk_per_arch(precomp_cs)(cmdbuf);
@ -191,8 +190,8 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
panlib_indirect_dispatch(
&precomp_ctx, panlib_1d(1), precomp_barrier,
info->indirect.buffer_dev_addr, shader->cs.local_size.x,
shader->cs.local_size.y, shader->cs.local_size.z, job.gpu,
info->indirect.buffer_dev_addr, cs->cs.local_size.x,
cs->cs.local_size.y, cs->cs.local_size.z, job.gpu,
num_work_groups_x_sysval_addr, num_work_groups_y_sysval_addr,
num_work_groups_z_sysval_addr);
indirect_dep = batch->vtc_jc.job_index;

View file

@ -12,7 +12,7 @@ void
panvk_per_arch(cmd_prepare_dispatch_sysvals)(
struct panvk_cmd_buffer *cmdbuf, const struct panvk_dispatch_info *info)
{
const struct panvk_shader_variant *shader =
const struct panvk_shader_variant *cs =
panvk_shader_only_variant(cmdbuf->state.compute.shader);
const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
@ -38,11 +38,11 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)(
set_compute_sysval(cmdbuf, dirty_sysvals, base.y, info->wg_base.y);
set_compute_sysval(cmdbuf, dirty_sysvals, base.z, info->wg_base.z);
set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.x,
shader->cs.local_size.x);
cs->cs.local_size.x);
set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.y,
shader->cs.local_size.y);
cs->cs.local_size.y);
set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.z,
shader->cs.local_size.z);
cs->cs.local_size.z);
set_compute_sysval(cmdbuf, dirty_sysvals, printf_buffer_address,
dev->printf.bo->addr.dev);
@ -60,7 +60,7 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)(
}
for (uint32_t i = 0; i < MAX_SETS; i++) {
if (shader->desc_info.used_set_mask & BITFIELD_BIT(i)) {
if (cs->desc_info.used_set_mask & BITFIELD_BIT(i)) {
set_compute_sysval(cmdbuf, dirty_sysvals, desc.sets[i],
desc_state->sets[i]->descs.dev);
}
@ -68,7 +68,7 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)(
#endif
/* Dirty push_uniforms if the used_sysvals/dirty_sysvals overlap. */
BITSET_AND(dirty_sysvals, dirty_sysvals, shader->fau.used_sysvals);
BITSET_AND(dirty_sysvals, dirty_sysvals, cs->fau.used_sysvals);
if (!BITSET_IS_EMPTY(dirty_sysvals))
compute_state_set_dirty(cmdbuf, PUSH_UNIFORMS);
}