diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c index eca3b1f4421..f61f5ca20a1 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c @@ -70,7 +70,7 @@ prepare_driver_set(struct panvk_cmd_buffer *cmdbuf) uint64_t panvk_per_arch(cmd_dispatch_prepare_tls)( - struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader_variant *shader, + struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader_variant *cs, const struct pan_compute_dim *dim, bool indirect) { struct panvk_physical_device *phys_dev = @@ -81,8 +81,8 @@ panvk_per_arch(cmd_dispatch_prepare_tls)( return tsd.gpu; struct pan_tls_info tlsinfo = { - .tls.size = shader->info.tls_size, - .wls.size = shader->info.wls_size, + .tls.size = cs->info.tls_size, + .wls.size = cs->info.wls_size, }; if (tlsinfo.wls.size) { @@ -90,7 +90,7 @@ panvk_per_arch(cmd_dispatch_prepare_tls)( pan_query_core_count(&phys_dev->kmod.props, &core_id_range); tlsinfo.wls.instances = pan_calc_wls_instances( - &shader->cs.local_size, &phys_dev->kmod.props, indirect ? NULL : dim); + &cs->cs.local_size, &phys_dev->kmod.props, indirect ? NULL : dim); unsigned wls_total_size = pan_calc_total_wls_size( tlsinfo.wls.size, tlsinfo.wls.instances, core_id_range); @@ -107,7 +107,7 @@ panvk_per_arch(cmd_dispatch_prepare_tls)( } cmdbuf->state.tls.info.tls.size = - MAX2(shader->info.tls_size, cmdbuf->state.tls.info.tls.size); + MAX2(cs->info.tls_size, cmdbuf->state.tls.info.tls.size); if (!cmdbuf->state.tls.desc.gpu) { cmdbuf->state.tls.desc = panvk_cmd_alloc_desc(cmdbuf, LOCAL_STORAGE); @@ -123,12 +123,12 @@ panvk_per_arch(cmd_dispatch_prepare_tls)( static void cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) { - const struct panvk_shader_variant *shader = + const struct panvk_shader_variant *cs = panvk_shader_only_variant(cmdbuf->state.compute.shader); VkResult result; /* If there's no compute shader, we can skip the dispatch. */ - if (!panvk_priv_mem_dev_addr(shader->spd)) + if (!panvk_priv_mem_dev_addr(cs->spd)) return; struct panvk_physical_device *phys_dev = @@ -148,20 +148,20 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) bool indirect = info->indirect.buffer_dev_addr != 0; uint64_t tsd = - panvk_per_arch(cmd_dispatch_prepare_tls)(cmdbuf, shader, &dim, indirect); + panvk_per_arch(cmd_dispatch_prepare_tls)(cmdbuf, cs, &dim, indirect); if (!tsd) return; /* Only used for indirect dispatch */ unsigned wg_per_task = 0; if (indirect) - wg_per_task = pan_calc_workgroups_per_task(&shader->cs.local_size, + wg_per_task = pan_calc_workgroups_per_task(&cs->cs.local_size, &phys_dev->kmod.props); if (compute_state_dirty(cmdbuf, DESC_STATE) || compute_state_dirty(cmdbuf, CS)) { result = panvk_per_arch(cmd_prepare_push_descs)( - cmdbuf, desc_state, shader->desc_info.used_set_mask); + cmdbuf, desc_state, cs->desc_info.used_set_mask); if (result != VK_SUCCESS) return; } @@ -172,14 +172,14 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) if (result != VK_SUCCESS) return; - result = panvk_per_arch(cmd_prepare_push_uniforms)(cmdbuf, shader, 1); + result = panvk_per_arch(cmd_prepare_push_uniforms)(cmdbuf, cs, 1); if (result != VK_SUCCESS) return; if (compute_state_dirty(cmdbuf, CS) || compute_state_dirty(cmdbuf, DESC_STATE)) { result = panvk_per_arch(cmd_prepare_shader_res_table)( - cmdbuf, desc_state, shader, cs_desc_state, 1); + cmdbuf, desc_state, cs, cs_desc_state, 1); if (result != VK_SUCCESS) return; } @@ -187,7 +187,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_COMPUTE); /* Copy the global TLS pointer to the per-job TSD. */ - if (shader->info.tls_size) { + if (cs->info.tls_size) { cs_move64_to(b, cs_scratch_reg64(b, 0), cmdbuf->state.tls.desc.gpu); cs_load64_to(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8); cs_move64_to(b, cs_scratch_reg64(b, 0), tsd); @@ -203,13 +203,13 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) if (compute_state_dirty(cmdbuf, PUSH_UNIFORMS)) { uint64_t fau_ptr = cmdbuf->state.compute.push_uniforms | - ((uint64_t)shader->fau.total_count << 56); + ((uint64_t)cs->fau.total_count << 56); cs_move64_to(b, cs_sr_reg64(b, COMPUTE, FAU_0), fau_ptr); } if (compute_state_dirty(cmdbuf, CS)) cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SPD_0), - panvk_priv_mem_dev_addr(shader->spd)); + panvk_priv_mem_dev_addr(cs->spd)); cs_move64_to(b, cs_sr_reg64(b, COMPUTE, TSD_0), tsd); @@ -219,9 +219,9 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) struct mali_compute_size_workgroup_packed wg_size; pan_pack(&wg_size, COMPUTE_SIZE_WORKGROUP, cfg) { - cfg.workgroup_size_x = shader->cs.local_size.x; - cfg.workgroup_size_y = shader->cs.local_size.y; - cfg.workgroup_size_z = shader->cs.local_size.z; + cfg.workgroup_size_x = cs->cs.local_size.x; + cfg.workgroup_size_y = cs->cs.local_size.y; + cfg.workgroup_size_z = cs->cs.local_size.z; cfg.allow_merging_workgroups = false; } cs_move32_to(b, cs_sr_reg32(b, COMPUTE, WG_SIZE), @@ -245,25 +245,25 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) cs_move64_to(b, cs_scratch_reg64(b, 0), cmdbuf->state.compute.push_uniforms); - if (shader_uses_sysval(shader, compute, num_work_groups.x)) { + if (shader_uses_sysval(cs, compute, num_work_groups.x)) { cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_X), cs_scratch_reg64(b, 0), shader_remapped_sysval_offset( - shader, sysval_offset(compute, num_work_groups.x))); + cs, sysval_offset(compute, num_work_groups.x))); } - if (shader_uses_sysval(shader, compute, num_work_groups.y)) { + if (shader_uses_sysval(cs, compute, num_work_groups.y)) { cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Y), cs_scratch_reg64(b, 0), shader_remapped_sysval_offset( - shader, sysval_offset(compute, num_work_groups.y))); + cs, sysval_offset(compute, num_work_groups.y))); } - if (shader_uses_sysval(shader, compute, num_work_groups.z)) { + if (shader_uses_sysval(cs, compute, num_work_groups.z)) { cs_store32(b, cs_sr_reg32(b, COMPUTE, JOB_SIZE_Z), cs_scratch_reg64(b, 0), shader_remapped_sysval_offset( - shader, sysval_offset(compute, num_work_groups.z))); + cs, sysval_offset(compute, num_work_groups.z))); } cs_flush_stores(b); @@ -297,7 +297,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) unsigned task_axis = MALI_TASK_AXIS_X; unsigned task_increment = 0; panvk_per_arch(calculate_task_axis_and_increment)( - shader, phys_dev, &task_axis, &task_increment); + cs, phys_dev, &task_axis, &task_increment); cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4), task_increment, task_axis, cs_shader_res_sel(0, 0, 0, 0)); diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c index fb56cd85d7c..162b5d25657 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c @@ -32,7 +32,7 @@ uint64_t panvk_per_arch(cmd_dispatch_prepare_tls)( - struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader_variant *shader, + struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader_variant *cs, const struct pan_compute_dim *dim, bool indirect) { struct panvk_batch *batch = cmdbuf->cur_batch; @@ -44,15 +44,15 @@ panvk_per_arch(cmd_dispatch_prepare_tls)( panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); - batch->tlsinfo.tls.size = shader->info.tls_size; - batch->tlsinfo.wls.size = shader->info.wls_size; + batch->tlsinfo.tls.size = cs->info.tls_size; + batch->tlsinfo.wls.size = cs->info.wls_size; if (batch->tlsinfo.wls.size) { unsigned core_id_range; pan_query_core_count(&phys_dev->kmod.props, &core_id_range); batch->tlsinfo.wls.instances = pan_calc_wls_instances( - &shader->cs.local_size, &phys_dev->kmod.props, indirect ? NULL : dim); + &cs->cs.local_size, &phys_dev->kmod.props, indirect ? NULL : dim); batch->wls_total_size = pan_calc_total_wls_size( batch->tlsinfo.wls.size, batch->tlsinfo.wls.instances, core_id_range); } @@ -63,12 +63,12 @@ panvk_per_arch(cmd_dispatch_prepare_tls)( static void cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) { - const struct panvk_shader_variant *shader = + const struct panvk_shader_variant *cs = panvk_shader_only_variant(cmdbuf->state.compute.shader); VkResult result; /* If there's no compute shader, we can skip the dispatch. */ - if (!panvk_priv_mem_dev_addr(shader->rsd)) + if (!panvk_priv_mem_dev_addr(cs->rsd)) return; panvk_per_arch(cmd_close_batch)(cmdbuf); @@ -85,17 +85,17 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) info->direct.wg_count.z, }; bool indirect = info->indirect.buffer_dev_addr != 0; - uint64_t tsd = panvk_per_arch(cmd_dispatch_prepare_tls)(cmdbuf, shader, + uint64_t tsd = panvk_per_arch(cmd_dispatch_prepare_tls)(cmdbuf, cs, &wg_count, indirect); result = panvk_per_arch(cmd_prepare_push_descs)( - cmdbuf, desc_state, shader->desc_info.used_set_mask); + cmdbuf, desc_state, cs->desc_info.used_set_mask); if (result != VK_SUCCESS) return; if (compute_state_dirty(cmdbuf, CS) || compute_state_dirty(cmdbuf, DESC_STATE)) { - result = panvk_per_arch(cmd_prepare_dyn_ssbos)(cmdbuf, desc_state, shader, + result = panvk_per_arch(cmd_prepare_dyn_ssbos)(cmdbuf, desc_state, cs, cs_desc_state); if (result != VK_SUCCESS) return; @@ -103,8 +103,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) panvk_per_arch(cmd_prepare_dispatch_sysvals)(cmdbuf, info); - result = panvk_per_arch(cmd_prepare_push_uniforms)( - cmdbuf, shader, 1); + result = panvk_per_arch(cmd_prepare_push_uniforms)(cmdbuf, cs, 1); if (result != VK_SUCCESS) return; @@ -113,10 +112,10 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) if (compute_state_dirty(cmdbuf, CS) || compute_state_dirty(cmdbuf, DESC_STATE)) { result = panvk_per_arch(cmd_prepare_shader_desc_tables)( - cmdbuf, desc_state, shader, cs_desc_state); + cmdbuf, desc_state, cs, cs_desc_state); result = panvk_per_arch(meta_get_copy_desc_job)( - cmdbuf, shader, &cmdbuf->state.compute.desc_state, cs_desc_state, 0, + cmdbuf, cs, &cmdbuf->state.compute.desc_state, cs_desc_state, 0, ©_desc_job); if (result != VK_SUCCESS) return; @@ -134,18 +133,18 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) if (!indirect) { pan_pack_work_groups_compute( pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), wg_count.x, - wg_count.y, wg_count.z, shader->cs.local_size.x, - shader->cs.local_size.y, shader->cs.local_size.z, false, false); + wg_count.y, wg_count.z, cs->cs.local_size.x, + cs->cs.local_size.y, cs->cs.local_size.z, false, false); } pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) { - cfg.job_task_split = util_logbase2_ceil(shader->cs.local_size.x + 1) + - util_logbase2_ceil(shader->cs.local_size.y + 1) + - util_logbase2_ceil(shader->cs.local_size.z + 1); + cfg.job_task_split = util_logbase2_ceil(cs->cs.local_size.x + 1) + + util_logbase2_ceil(cs->cs.local_size.y + 1) + + util_logbase2_ceil(cs->cs.local_size.z + 1); } pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) { - cfg.state = panvk_priv_mem_dev_addr(shader->rsd); + cfg.state = panvk_priv_mem_dev_addr(cs->rsd); cfg.attributes = cs_desc_state->img_attrib_table; cfg.attribute_buffers = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_IMG]; @@ -163,25 +162,25 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) uint64_t num_work_groups_y_sysval_addr = 0x8ull << 60; uint64_t num_work_groups_z_sysval_addr = 0x8ull << 60; - if (shader_uses_sysval(shader, compute, num_work_groups.x)) { + if (shader_uses_sysval(cs, compute, num_work_groups.x)) { num_work_groups_x_sysval_addr = cmdbuf->state.compute.push_uniforms + shader_remapped_sysval_offset( - shader, sysval_offset(compute, num_work_groups.x)); + cs, sysval_offset(compute, num_work_groups.x)); } - if (shader_uses_sysval(shader, compute, num_work_groups.y)) { + if (shader_uses_sysval(cs, compute, num_work_groups.y)) { num_work_groups_y_sysval_addr = cmdbuf->state.compute.push_uniforms + shader_remapped_sysval_offset( - shader, sysval_offset(compute, num_work_groups.y)); + cs, sysval_offset(compute, num_work_groups.y)); } - if (shader_uses_sysval(shader, compute, num_work_groups.z)) { + if (shader_uses_sysval(cs, compute, num_work_groups.z)) { num_work_groups_z_sysval_addr = cmdbuf->state.compute.push_uniforms + shader_remapped_sysval_offset( - shader, sysval_offset(compute, num_work_groups.z)); + cs, sysval_offset(compute, num_work_groups.z)); } struct panvk_precomp_ctx precomp_ctx = panvk_per_arch(precomp_cs)(cmdbuf); @@ -191,8 +190,8 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) panlib_indirect_dispatch( &precomp_ctx, panlib_1d(1), precomp_barrier, - info->indirect.buffer_dev_addr, shader->cs.local_size.x, - shader->cs.local_size.y, shader->cs.local_size.z, job.gpu, + info->indirect.buffer_dev_addr, cs->cs.local_size.x, + cs->cs.local_size.y, cs->cs.local_size.z, job.gpu, num_work_groups_x_sysval_addr, num_work_groups_y_sysval_addr, num_work_groups_z_sysval_addr); indirect_dep = batch->vtc_jc.job_index; diff --git a/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c index ec9d2b02bc1..917f33ef38a 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c @@ -12,7 +12,7 @@ void panvk_per_arch(cmd_prepare_dispatch_sysvals)( struct panvk_cmd_buffer *cmdbuf, const struct panvk_dispatch_info *info) { - const struct panvk_shader_variant *shader = + const struct panvk_shader_variant *cs = panvk_shader_only_variant(cmdbuf->state.compute.shader); const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); @@ -38,11 +38,11 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)( set_compute_sysval(cmdbuf, dirty_sysvals, base.y, info->wg_base.y); set_compute_sysval(cmdbuf, dirty_sysvals, base.z, info->wg_base.z); set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.x, - shader->cs.local_size.x); + cs->cs.local_size.x); set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.y, - shader->cs.local_size.y); + cs->cs.local_size.y); set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.z, - shader->cs.local_size.z); + cs->cs.local_size.z); set_compute_sysval(cmdbuf, dirty_sysvals, printf_buffer_address, dev->printf.bo->addr.dev); @@ -60,7 +60,7 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)( } for (uint32_t i = 0; i < MAX_SETS; i++) { - if (shader->desc_info.used_set_mask & BITFIELD_BIT(i)) { + if (cs->desc_info.used_set_mask & BITFIELD_BIT(i)) { set_compute_sysval(cmdbuf, dirty_sysvals, desc.sets[i], desc_state->sets[i]->descs.dev); } @@ -68,7 +68,7 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)( #endif /* Dirty push_uniforms if the used_sysvals/dirty_sysvals overlap. */ - BITSET_AND(dirty_sysvals, dirty_sysvals, shader->fau.used_sysvals); + BITSET_AND(dirty_sysvals, dirty_sysvals, cs->fau.used_sysvals); if (!BITSET_IS_EMPTY(dirty_sysvals)) compute_state_set_dirty(cmdbuf, PUSH_UNIFORMS); }