panfrost: Compile indirect dispatch shader on first use

For 2D UI workloads and even most 3D workloads, the indirect dispatch shader
won't actually be needed, but we currently compile it during eglInitialize() on
every v7 application. That hurts app start-up time, especially given that this
shader doesn't hit the disk cache. We can instead defer compiling this shader
until it's actually needed, when glDispatchComputeIndirect() gets called.

The tradeoff is that the first glDispatchComputeIndirect() call will be (much)
slower than successive calls, since we need to build and compile this internal
shader. I'm unconvinced that's a problem in practice.

An app would need to call glDispatchComputeIndirect for the first time in the
middle of a scene.  2D apps never would call that, OpenCL doesn't have that, and
GL compute will have the same costs just moved around.  So it's down to a 3D
GLES3.1 app that indirectly dispatches compute for the first time time in the
middle of a scene. Which, meh? It's not entirely implausible but we have bigger
fish to fry, and this fixes a real problem (about 5% of eglInitialize time spent
building this shader that won't actually get used).

es2_info starts slightly faster with this change.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19622>
This commit is contained in:
Alyssa Rosenzweig 2022-11-09 12:10:37 -05:00 committed by Marge Bot
parent c6425d4490
commit 1827b4a2db
3 changed files with 36 additions and 36 deletions

View file

@ -4827,7 +4827,6 @@ GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen)
GENX(pan_blitter_init)(dev, &screen->blitter.bin_pool.base,
&screen->blitter.desc_pool.base);
#if PAN_GPU_INDIRECTS
GENX(pan_indirect_dispatch_init)(dev);
GENX(panfrost_init_indirect_draw_shaders)(dev, &screen->indirect_draw.bin_pool.base);
#endif
}

View file

@ -53,38 +53,8 @@ get_tls(const struct panfrost_device *dev)
pan_size(RENDERER_STATE);
}
unsigned
GENX(pan_indirect_dispatch_emit)(struct pan_pool *pool,
struct pan_scoreboard *scoreboard,
const struct pan_indirect_dispatch_info *inputs)
{
struct panfrost_device *dev = pool->dev;
struct panfrost_ptr job =
pan_pool_alloc_desc(pool, COMPUTE_JOB);
void *invocation =
pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION);
panfrost_pack_work_groups_compute(invocation,
1, 1, 1, 1, 1, 1,
false, false);
pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
cfg.job_task_split = 2;
}
pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
cfg.state = get_rsd(dev);
cfg.thread_storage = get_tls(pool->dev);
cfg.push_uniforms =
pan_pool_upload_aligned(pool, inputs, sizeof(*inputs), 16);
}
return panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_COMPUTE,
false, true, 0, 0, &job, false);
}
void
GENX(pan_indirect_dispatch_init)(struct panfrost_device *dev)
static void
pan_indirect_dispatch_init(struct panfrost_device *dev)
{
nir_builder b =
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
@ -192,6 +162,40 @@ GENX(pan_indirect_dispatch_init)(struct panfrost_device *dev)
};
}
unsigned
GENX(pan_indirect_dispatch_emit)(struct pan_pool *pool,
struct pan_scoreboard *scoreboard,
const struct pan_indirect_dispatch_info *inputs)
{
struct panfrost_device *dev = pool->dev;
struct panfrost_ptr job =
pan_pool_alloc_desc(pool, COMPUTE_JOB);
void *invocation =
pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION);
/* If we haven't compiled the indirect dispatch shader yet, do it now */
if (!dev->indirect_dispatch.bin)
pan_indirect_dispatch_init(dev);
panfrost_pack_work_groups_compute(invocation,
1, 1, 1, 1, 1, 1,
false, false);
pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
cfg.job_task_split = 2;
}
pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
cfg.state = get_rsd(dev);
cfg.thread_storage = get_tls(pool->dev);
cfg.push_uniforms =
pan_pool_upload_aligned(pool, inputs, sizeof(*inputs), 16);
}
return panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_COMPUTE,
false, true, 0, 0, &job, false);
}
void
GENX(pan_indirect_dispatch_cleanup)(struct panfrost_device *dev)
{

View file

@ -41,9 +41,6 @@ GENX(pan_indirect_dispatch_emit)(struct pan_pool *pool,
struct pan_scoreboard *scoreboard,
const struct pan_indirect_dispatch_info *dispatch_info);
void
GENX(pan_indirect_dispatch_init)(struct panfrost_device *dev);
void
GENX(pan_indirect_dispatch_cleanup)(struct panfrost_device *dev);