mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-29 09:30:20 +01:00
panfrost: Apply direct dispatch WLS instance limit
Apply the direct dispatch WLS instance limit to panfrost as well to keep compute jobs with large workgroup counts from running out of memory. Fixes:1304f4578d("panfrost: Adapt emit_shared_memory for indirect dispatch") Reviewed-by: Eric R. Smith <eric.smith@collabora.com> Reviewed-by: John Anthony <john.anthony@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34979> (cherry picked from commit64ce37b2d9)
This commit is contained in:
parent
cc2e341a14
commit
c01db1fb7f
3 changed files with 13 additions and 30 deletions
|
|
@ -5134,7 +5134,7 @@
|
|||
"description": "panfrost: Apply direct dispatch WLS instance limit",
|
||||
"nominated": true,
|
||||
"nomination_type": 2,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": "1304f4578d2ee206be20bab8f9aa9a55ae4563b5",
|
||||
"notes": null
|
||||
|
|
|
|||
|
|
@ -1587,29 +1587,6 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
|
|||
return ubos.gpu;
|
||||
}
|
||||
|
||||
/*
|
||||
* Choose the number of WLS instances to allocate. This must be a power-of-two.
|
||||
* The number of WLS instances limits the number of concurrent tasks on a given
|
||||
* shader core, setting to the (rounded) total number of tasks avoids any
|
||||
* throttling. Smaller values save memory at the expense of possible throttling.
|
||||
*
|
||||
* With indirect dispatch, we don't know at launch-time how many tasks will be
|
||||
* needed, so we use a conservative value that's unlikely to cause slowdown in
|
||||
* practice without wasting too much memory.
|
||||
*/
|
||||
static unsigned
|
||||
panfrost_choose_wls_instance_count(const struct pipe_grid_info *grid)
|
||||
{
|
||||
if (grid->indirect) {
|
||||
/* May need tuning in the future, conservative guess */
|
||||
return 128;
|
||||
} else {
|
||||
return util_next_power_of_two(grid->grid[0]) *
|
||||
util_next_power_of_two(grid->grid[1]) *
|
||||
util_next_power_of_two(grid->grid[2]);
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
panfrost_emit_shared_memory(struct panfrost_batch *batch,
|
||||
const struct pipe_grid_info *grid)
|
||||
|
|
@ -1620,10 +1597,15 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch,
|
|||
struct panfrost_ptr t =
|
||||
pan_pool_alloc_desc(&batch->pool.base, LOCAL_STORAGE);
|
||||
|
||||
struct pan_compute_dim local_size = {grid->block[0], grid->block[1],
|
||||
grid->block[2]};
|
||||
struct pan_compute_dim dim = {grid->grid[0], grid->grid[1], grid->grid[2]};
|
||||
|
||||
struct pan_tls_info info = {
|
||||
.tls.size = ss->info.tls_size,
|
||||
.wls.size = ss->info.wls_size + grid->variable_shared_mem,
|
||||
.wls.instances = panfrost_choose_wls_instance_count(grid),
|
||||
.wls.instances = pan_calc_wls_instances(&local_size, &dev->kmod.props,
|
||||
grid->indirect ? NULL : &dim),
|
||||
};
|
||||
|
||||
if (ss->info.tls_size) {
|
||||
|
|
@ -1637,8 +1619,8 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch,
|
|||
}
|
||||
|
||||
if (info.wls.size) {
|
||||
unsigned size = pan_wls_adjust_size(info.wls.size) * info.wls.instances *
|
||||
dev->core_id_range;
|
||||
unsigned size = pan_calc_total_wls_size(info.wls.size, info.wls.instances,
|
||||
dev->core_id_range);
|
||||
|
||||
struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch, size, 1);
|
||||
|
||||
|
|
|
|||
|
|
@ -200,7 +200,8 @@ emit_tls(struct panfrost_batch *batch,
|
|||
struct pan_tls_info info = {
|
||||
.tls.size = shader->info.tls_size,
|
||||
.wls.size = shader->info.wls_size,
|
||||
.wls.instances = pan_wls_instances(dim),
|
||||
.wls.instances =
|
||||
pan_calc_wls_instances(&shader->local_size, &dev->kmod.props, dim),
|
||||
};
|
||||
|
||||
if (info.tls.size) {
|
||||
|
|
@ -210,8 +211,8 @@ emit_tls(struct panfrost_batch *batch,
|
|||
}
|
||||
|
||||
if (info.wls.size) {
|
||||
unsigned size = pan_wls_adjust_size(info.wls.size) * info.wls.instances *
|
||||
dev->core_id_range;
|
||||
unsigned size = pan_calc_total_wls_size(info.wls.size, info.wls.instances,
|
||||
dev->core_id_range);
|
||||
|
||||
struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch, size, 1);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue