diff --git a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h index b26c041d23b..b1fee33d36b 100644 --- a/src/panfrost/vulkan/csf/panvk_cmd_buffer.h +++ b/src/panfrost/vulkan/csf/panvk_cmd_buffer.h @@ -447,7 +447,49 @@ VkResult panvk_per_arch(cmd_prepare_exec_cmd_for_draws)( struct panvk_cmd_buffer *primary, struct panvk_cmd_buffer *secondary); void panvk_per_arch(cmd_inherit_render_state)( - struct panvk_cmd_buffer *cmdbuf, - const VkCommandBufferBeginInfo *pBeginInfo); + struct panvk_cmd_buffer *cmdbuf, const VkCommandBufferBeginInfo *pBeginInfo); + +static inline void +panvk_per_arch(calculate_task_axis_and_increment)( + const struct panvk_shader *shader, struct panvk_physical_device *phys_dev, + unsigned *task_axis, unsigned *task_increment) +{ + /* Pick the task_axis and task_increment to maximize thread + * utilization. */ + unsigned threads_per_wg = + shader->local_size.x * shader->local_size.y * shader->local_size.z; + unsigned max_thread_cnt = panfrost_compute_max_thread_count( + &phys_dev->kmod.props, shader->info.work_reg_count); + unsigned threads_per_task = threads_per_wg; + unsigned local_size[3] = { + shader->local_size.x, + shader->local_size.y, + shader->local_size.z, + }; + + for (unsigned i = 0; i < 3; i++) { + if (threads_per_task * local_size[i] >= max_thread_cnt) { + /* We reached out thread limit, stop at the current axis and + * calculate the increment so it doesn't exceed the per-core + * thread capacity. + */ + *task_increment = max_thread_cnt / threads_per_task; + break; + } else if (*task_axis == MALI_TASK_AXIS_Z) { + /* We reached the Z axis, and there's still room to stuff more + * threads. Pick the current axis grid size as our increment + * as there's no point using something bigger. + */ + *task_increment = local_size[i]; + break; + } + + threads_per_task *= local_size[i]; + (*task_axis)++; + } + + assert(*task_axis <= MALI_TASK_AXIS_Z); + assert(*task_increment > 0); +} #endif /* PANVK_CMD_BUFFER_H */ diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c index 1dc2908073f..b8511557abc 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c @@ -65,49 +65,6 @@ prepare_driver_set(struct panvk_cmd_buffer *cmdbuf) return VK_SUCCESS; } -static void -calculate_task_axis_and_increment(const struct panvk_shader *shader, - struct panvk_physical_device *phys_dev, - unsigned *task_axis, unsigned *task_increment) -{ - /* Pick the task_axis and task_increment to maximize thread - * utilization. */ - unsigned threads_per_wg = - shader->local_size.x * shader->local_size.y * shader->local_size.z; - unsigned max_thread_cnt = panfrost_compute_max_thread_count( - &phys_dev->kmod.props, shader->info.work_reg_count); - unsigned threads_per_task = threads_per_wg; - unsigned local_size[3] = { - shader->local_size.x, - shader->local_size.y, - shader->local_size.z, - }; - - for (unsigned i = 0; i < 3; i++) { - if (threads_per_task * local_size[i] >= max_thread_cnt) { - /* We reached out thread limit, stop at the current axis and - * calculate the increment so it doesn't exceed the per-core - * thread capacity. - */ - *task_increment = max_thread_cnt / threads_per_task; - break; - } else if (*task_axis == MALI_TASK_AXIS_Z) { - /* We reached the Z axis, and there's still room to stuff more - * threads. Pick the current axis grid size as our increment - * as there's no point using something bigger. - */ - *task_increment = local_size[i]; - break; - } - - threads_per_task *= local_size[i]; - (*task_axis)++; - } - - assert(*task_axis <= MALI_TASK_AXIS_Z); - assert(*task_increment > 0); -} - static unsigned calculate_workgroups_per_task(const struct panvk_shader *shader, struct panvk_physical_device *phys_dev) @@ -357,8 +314,8 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) } else { unsigned task_axis = MALI_TASK_AXIS_X; unsigned task_increment = 0; - calculate_task_axis_and_increment(shader, phys_dev, &task_axis, - &task_increment); + panvk_per_arch(calculate_task_axis_and_increment)( + shader, phys_dev, &task_axis, &task_increment); cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4), task_increment, task_axis, false, cs_shader_res_sel(0, 0, 0, 0));