mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 07:20:10 +01:00
panvk: fix calculate_task_axis_and_increment
task_axis selects the dim of the global workgroup, not the dim of the local workgroup. v2: fix assert for dEQP-VK.compute.pipeline.basic.empty_workgroup* Signed-off-by: Chia-I Wu <olvaffe@gmail.com> Tested-by: Yiwei Zhang <zzyiwei@chromium.org> (v1) Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com> (v1) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37988>
This commit is contained in:
parent
546d73721b
commit
5fd32d79ee
3 changed files with 15 additions and 12 deletions
|
|
@ -672,8 +672,8 @@ void panvk_per_arch(cmd_inherit_render_state)(
|
||||||
static inline void
|
static inline void
|
||||||
panvk_per_arch(calculate_task_axis_and_increment)(
|
panvk_per_arch(calculate_task_axis_and_increment)(
|
||||||
const struct panvk_shader_variant *shader,
|
const struct panvk_shader_variant *shader,
|
||||||
struct panvk_physical_device *phys_dev, unsigned *task_axis,
|
struct panvk_physical_device *phys_dev, const struct pan_compute_dim *wg_dim,
|
||||||
unsigned *task_increment)
|
unsigned *task_axis, unsigned *task_increment)
|
||||||
{
|
{
|
||||||
/* Pick the task_axis and task_increment to maximize thread
|
/* Pick the task_axis and task_increment to maximize thread
|
||||||
* utilization. */
|
* utilization. */
|
||||||
|
|
@ -682,14 +682,17 @@ panvk_per_arch(calculate_task_axis_and_increment)(
|
||||||
unsigned max_thread_cnt = pan_compute_max_thread_count(
|
unsigned max_thread_cnt = pan_compute_max_thread_count(
|
||||||
&phys_dev->kmod.props, shader->info.work_reg_count);
|
&phys_dev->kmod.props, shader->info.work_reg_count);
|
||||||
unsigned threads_per_task = threads_per_wg;
|
unsigned threads_per_task = threads_per_wg;
|
||||||
unsigned local_size[3] = {
|
const unsigned wg_count[3] = {wg_dim->x, wg_dim->y, wg_dim->z};
|
||||||
shader->cs.local_size.x,
|
const unsigned total_wgs = wg_dim->x * wg_dim->y * wg_dim->z;
|
||||||
shader->cs.local_size.y,
|
|
||||||
shader->cs.local_size.z,
|
if (!total_wgs) {
|
||||||
};
|
*task_axis = MALI_TASK_AXIS_X;
|
||||||
|
*task_increment = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
for (unsigned i = 0; i < 3; i++) {
|
for (unsigned i = 0; i < 3; i++) {
|
||||||
if (threads_per_task * local_size[i] >= max_thread_cnt) {
|
if (threads_per_task * wg_count[i] >= max_thread_cnt) {
|
||||||
/* We reached out thread limit, stop at the current axis and
|
/* We reached out thread limit, stop at the current axis and
|
||||||
* calculate the increment so it doesn't exceed the per-core
|
* calculate the increment so it doesn't exceed the per-core
|
||||||
* thread capacity.
|
* thread capacity.
|
||||||
|
|
@ -701,11 +704,11 @@ panvk_per_arch(calculate_task_axis_and_increment)(
|
||||||
* threads. Pick the current axis grid size as our increment
|
* threads. Pick the current axis grid size as our increment
|
||||||
* as there's no point using something bigger.
|
* as there's no point using something bigger.
|
||||||
*/
|
*/
|
||||||
*task_increment = local_size[i];
|
*task_increment = wg_count[i];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
threads_per_task *= local_size[i];
|
threads_per_task *= wg_count[i];
|
||||||
(*task_axis)++;
|
(*task_axis)++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -296,7 +296,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
||||||
unsigned task_axis = MALI_TASK_AXIS_X;
|
unsigned task_axis = MALI_TASK_AXIS_X;
|
||||||
unsigned task_increment = 0;
|
unsigned task_increment = 0;
|
||||||
panvk_per_arch(calculate_task_axis_and_increment)(
|
panvk_per_arch(calculate_task_axis_and_increment)(
|
||||||
cs, phys_dev, &task_axis, &task_increment);
|
cs, phys_dev, &dim, &task_axis, &task_increment);
|
||||||
cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||||
task_increment, task_axis,
|
task_increment, task_axis,
|
||||||
cs_shader_res_sel(0, 0, 0, 0));
|
cs_shader_res_sel(0, 0, 0, 0));
|
||||||
|
|
|
||||||
|
|
@ -117,7 +117,7 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
|
||||||
unsigned task_axis = MALI_TASK_AXIS_X;
|
unsigned task_axis = MALI_TASK_AXIS_X;
|
||||||
unsigned task_increment = 0;
|
unsigned task_increment = 0;
|
||||||
panvk_per_arch(calculate_task_axis_and_increment)(
|
panvk_per_arch(calculate_task_axis_and_increment)(
|
||||||
shader, phys_dev, &task_axis, &task_increment);
|
shader, phys_dev, &dim, &task_axis, &task_increment);
|
||||||
cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
cs_trace_run_compute(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||||
task_increment, task_axis,
|
task_increment, task_axis,
|
||||||
cs_shader_res_sel(0, 0, 0, 0));
|
cs_shader_res_sel(0, 0, 0, 0));
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue