mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-30 05:00:32 +01:00
panfrost: Adapt emit_shared_memory for indirect dispatch
Indirect dispatch does not actually require any dynamic memory allocation, even with shared memory. We just need to set wls_instances to some (mostly arbitrary) value, statically allocate memory based on that, and let the hardware throttle workgroups to fit if needed. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18661>
This commit is contained in:
parent
79b66a28cd
commit
1304f4578d
3 changed files with 32 additions and 15 deletions
|
|
@ -1587,6 +1587,29 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
|
|||
return ubos.gpu;
|
||||
}
|
||||
|
||||
/*
|
||||
* Choose the number of WLS instances to allocate. This must be a power-of-two.
|
||||
* The number of WLS instances limits the number of concurrent tasks on a given
|
||||
* shader core, setting to the (rounded) total number of tasks avoids any
|
||||
* throttling. Smaller values save memory at the expense of possible throttling.
|
||||
*
|
||||
* With indirect dispatch, we don't know at launch-time how many tasks will be
|
||||
* needed, so we use a conservative value that's unlikely to cause slowdown in
|
||||
* practice without wasting too much memory.
|
||||
*/
|
||||
static unsigned
|
||||
panfrost_choose_wls_instance_count(const struct pipe_grid_info *grid)
|
||||
{
|
||||
if (grid->indirect) {
|
||||
/* May need tuning in the future, conservative guess */
|
||||
return 128;
|
||||
} else {
|
||||
return util_next_power_of_two(grid->grid[0]) *
|
||||
util_next_power_of_two(grid->grid[1]) *
|
||||
util_next_power_of_two(grid->grid[2]);
|
||||
}
|
||||
}
|
||||
|
||||
static mali_ptr
|
||||
panfrost_emit_shared_memory(struct panfrost_batch *batch,
|
||||
const struct pipe_grid_info *grid)
|
||||
|
|
@ -1601,9 +1624,7 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch,
|
|||
struct pan_tls_info info = {
|
||||
.tls.size = ss->info.tls_size,
|
||||
.wls.size = ss->info.wls_size,
|
||||
.wls.dim.x = grid->grid[0],
|
||||
.wls.dim.y = grid->grid[1],
|
||||
.wls.dim.z = grid->grid[2],
|
||||
.wls.instances = panfrost_choose_wls_instance_count(grid),
|
||||
};
|
||||
|
||||
if (ss->info.tls_size) {
|
||||
|
|
@ -1616,10 +1637,8 @@ panfrost_emit_shared_memory(struct panfrost_batch *batch,
|
|||
}
|
||||
|
||||
if (ss->info.wls_size) {
|
||||
unsigned size =
|
||||
pan_wls_adjust_size(info.wls.size) *
|
||||
pan_wls_instances(&info.wls.dim) *
|
||||
dev->core_id_range;
|
||||
unsigned size = pan_wls_adjust_size(info.wls.size) *
|
||||
info.wls.instances * dev->core_id_range;
|
||||
|
||||
struct panfrost_bo *bo =
|
||||
panfrost_batch_get_shared_memory(batch, size, 1);
|
||||
|
|
@ -4138,12 +4157,7 @@ panfrost_launch_grid(struct pipe_context *pipe,
|
|||
|
||||
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
|
||||
|
||||
struct panfrost_shader_state *cs =
|
||||
&ctx->shader[PIPE_SHADER_COMPUTE]->variants[0];
|
||||
|
||||
/* Indirect dispatch can't handle workgroup local storage since that
|
||||
* would require dynamic memory allocation. Bail in this case. */
|
||||
if (info->indirect && ((cs->info.wls_size != 0) || !PAN_GPU_INDIRECTS)) {
|
||||
if (info->indirect && !PAN_GPU_INDIRECTS) {
|
||||
struct pipe_transfer *transfer;
|
||||
uint32_t *params = pipe_buffer_map_range(pipe, info->indirect,
|
||||
info->indirect_offset,
|
||||
|
|
@ -4215,6 +4229,9 @@ panfrost_launch_grid(struct pipe_context *pipe,
|
|||
cfg.samplers = batch->samplers[PIPE_SHADER_COMPUTE];
|
||||
}
|
||||
#else
|
||||
struct panfrost_shader_state *cs =
|
||||
&ctx->shader[PIPE_SHADER_COMPUTE]->variants[0];
|
||||
|
||||
pan_section_pack(t.cpu, COMPUTE_JOB, PAYLOAD, cfg) {
|
||||
cfg.workgroup_size_x = info->block[0];
|
||||
cfg.workgroup_size_y = info->block[1];
|
||||
|
|
|
|||
|
|
@ -566,7 +566,7 @@ GENX(pan_emit_tls)(const struct pan_tls_info *info,
|
|||
assert((info->wls.ptr & 0xffffffff00000000ULL) == ((info->wls.ptr + info->wls.size - 1) & 0xffffffff00000000ULL));
|
||||
cfg.wls_base_pointer = info->wls.ptr;
|
||||
unsigned wls_size = pan_wls_adjust_size(info->wls.size);
|
||||
cfg.wls_instances = pan_wls_instances(&info->wls.dim);
|
||||
cfg.wls_instances = info->wls.instances;
|
||||
cfg.wls_size_scale = util_logbase2(wls_size) + 1;
|
||||
} else {
|
||||
cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ struct pan_tls_info {
|
|||
} tls;
|
||||
|
||||
struct {
|
||||
struct pan_compute_dim dim;
|
||||
unsigned instances;
|
||||
mali_ptr ptr;
|
||||
unsigned size;
|
||||
} wls;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue