diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 67f192a4c4e..d582b46945d 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -3634,9 +3634,6 @@ screen_destroy(struct pipe_screen *pscreen) { struct panfrost_device *dev = pan_device(pscreen); GENX(pan_blitter_cache_cleanup)(&dev->blitter); -#if PAN_GPU_INDIRECTS - GENX(pan_indirect_dispatch_cleanup)(dev); -#endif } static void @@ -3768,4 +3765,10 @@ GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen) GENX(pan_blitter_cache_init) (&dev->blitter, panfrost_device_gpu_id(dev), &dev->blend_shaders, &screen->blitter.bin_pool.base, &screen->blitter.desc_pool.base); + +#if PAN_GPU_INDIRECTS + pan_indirect_dispatch_meta_init( + &dev->indirect_dispatch, panfrost_device_gpu_id(dev), + &screen->blitter.bin_pool.base, &screen->blitter.desc_pool.base); +#endif } diff --git a/src/gallium/drivers/panfrost/pan_jm.c b/src/gallium/drivers/panfrost/pan_jm.c index 4fb7b7ba524..c674c9177a8 100644 --- a/src/gallium/drivers/panfrost/pan_jm.c +++ b/src/gallium/drivers/panfrost/pan_jm.c @@ -335,6 +335,7 @@ GENX(jm_launch_grid)(struct panfrost_batch *batch, unsigned indirect_dep = 0; #if PAN_GPU_INDIRECTS if (info->indirect) { + struct panfrost_device *dev = pan_device(batch->ctx->base.screen); struct pan_indirect_dispatch_info indirect = { .job = t.gpu, .indirect_dim = pan_resource(info->indirect)->image.data.bo->ptr.gpu + @@ -348,7 +349,8 @@ GENX(jm_launch_grid)(struct panfrost_batch *batch, }; indirect_dep = GENX(pan_indirect_dispatch_emit)( - &batch->pool.base, &batch->jm.jobs.vtc_jc, &indirect); + &dev->indirect_dispatch, &batch->pool.base, &batch->jm.jobs.vtc_jc, + &indirect); } #endif diff --git a/src/panfrost/lib/pan_device.h b/src/panfrost/lib/pan_device.h index adf15d512ee..aec372f59df 100644 --- a/src/panfrost/lib/pan_device.h +++ b/src/panfrost/lib/pan_device.h @@ -40,6 +40,7 @@ #include "panfrost/util/pan_ir.h" #include "pan_blend.h" #include "pan_blitter.h" +#include "pan_indirect_dispatch.h" #include "pan_pool.h" #include "pan_util.h" @@ -64,12 +65,6 @@ extern "C" { /* Fencepost problem, hence the off-by-one */ #define NR_BO_CACHE_BUCKETS (MAX_BO_CACHE_BUCKET - MIN_BO_CACHE_BUCKET + 1) -struct pan_indirect_dispatch { - struct panfrost_ubo_push push; - struct panfrost_bo *bin; - struct panfrost_bo *descs; -}; - /** Implementation-defined tiler features */ struct panfrost_tiler_features { /** Number of bytes per tiler bin */ @@ -181,7 +176,7 @@ struct panfrost_device { struct pan_blitter_cache blitter; struct pan_blend_shader_cache blend_shaders; - struct pan_indirect_dispatch indirect_dispatch; + struct pan_indirect_dispatch_meta indirect_dispatch; /* Tiler heap shared across all tiler jobs, allocated against the * device since there's only a single tiler. Since this is invisible to diff --git a/src/panfrost/lib/pan_indirect_dispatch.c b/src/panfrost/lib/pan_indirect_dispatch.c index 82ddaa4cee3..f1ccb1ef692 100644 --- a/src/panfrost/lib/pan_indirect_dispatch.c +++ b/src/panfrost/lib/pan_indirect_dispatch.c @@ -27,7 +27,6 @@ #include "compiler/nir/nir_builder.h" #include "util/macros.h" #include "util/u_memory.h" -#include "pan_bo.h" #include "pan_encoder.h" #include "pan_jc.h" #include "pan_pool.h" @@ -40,20 +39,8 @@ nir_imm_int(b, 0), \ .base = offsetof(struct pan_indirect_dispatch_info, name)) -static mali_ptr -get_rsd(const struct panfrost_device *dev) -{ - return dev->indirect_dispatch.descs->ptr.gpu; -} - -static mali_ptr -get_tls(const struct panfrost_device *dev) -{ - return dev->indirect_dispatch.descs->ptr.gpu + pan_size(RENDERER_STATE); -} - static void -pan_indirect_dispatch_init(struct panfrost_device *dev) +pan_indirect_dispatch_init(struct pan_indirect_dispatch_meta *meta) { nir_builder b = nir_builder_init_simple_shader( MESA_SHADER_COMPUTE, GENX(pan_shader_get_compiler_options)(), "%s", @@ -121,7 +108,7 @@ pan_indirect_dispatch_init(struct panfrost_device *dev) nir_pop_if(&b, NULL); struct panfrost_compile_inputs inputs = { - .gpu_id = panfrost_device_gpu_id(dev), + .gpu_id = meta->gpu_id, .no_ubo_to_push = true, }; struct pan_shader_info shader_info; @@ -139,40 +126,40 @@ pan_indirect_dispatch_init(struct panfrost_device *dev) shader_info.push.count = DIV_ROUND_UP(sizeof(struct pan_indirect_dispatch_info), 4); - dev->indirect_dispatch.bin = panfrost_bo_create( - dev, binary.size, PAN_BO_EXECUTE, "Indirect dispatch shader"); + struct panfrost_ptr bin = + pan_pool_alloc_aligned(meta->bin_pool, binary.size, 64); - memcpy(dev->indirect_dispatch.bin->ptr.cpu, binary.data, binary.size); + memcpy(bin.cpu, binary.data, binary.size); util_dynarray_fini(&binary); - dev->indirect_dispatch.descs = panfrost_bo_create( - dev, pan_size(RENDERER_STATE) + pan_size(LOCAL_STORAGE), 0, - "Indirect dispatch descriptors"); + struct panfrost_ptr rsd = + pan_pool_alloc_desc(meta->desc_pool, RENDERER_STATE); + struct panfrost_ptr tsd = + pan_pool_alloc_desc(meta->desc_pool, LOCAL_STORAGE); - mali_ptr address = dev->indirect_dispatch.bin->ptr.gpu; - - void *rsd = dev->indirect_dispatch.descs->ptr.cpu; - pan_pack(rsd, RENDERER_STATE, cfg) { - pan_shader_prepare_rsd(&shader_info, address, &cfg); + pan_pack(rsd.cpu, RENDERER_STATE, cfg) { + pan_shader_prepare_rsd(&shader_info, bin.gpu, &cfg); } - void *tsd = dev->indirect_dispatch.descs->ptr.cpu + pan_size(RENDERER_STATE); - pan_pack(tsd, LOCAL_STORAGE, ls) { + pan_pack(tsd.cpu, LOCAL_STORAGE, ls) { ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM; }; + + meta->rsd = rsd.gpu; + meta->tsd = tsd.gpu; } unsigned -GENX(pan_indirect_dispatch_emit)(struct pan_pool *pool, struct pan_jc *jc, +GENX(pan_indirect_dispatch_emit)(struct pan_indirect_dispatch_meta *meta, + struct pan_pool *pool, struct pan_jc *jc, const struct pan_indirect_dispatch_info *inputs) { - struct panfrost_device *dev = pool->dev; struct panfrost_ptr job = pan_pool_alloc_desc(pool, COMPUTE_JOB); void *invocation = pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION); /* If we haven't compiled the indirect dispatch shader yet, do it now */ - if (!dev->indirect_dispatch.bin) - pan_indirect_dispatch_init(dev); + if (!meta->rsd) + pan_indirect_dispatch_init(meta); panfrost_pack_work_groups_compute(invocation, 1, 1, 1, 1, 1, 1, false, false); @@ -182,8 +169,8 @@ GENX(pan_indirect_dispatch_emit)(struct pan_pool *pool, struct pan_jc *jc, } pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) { - cfg.state = get_rsd(dev); - cfg.thread_storage = get_tls(pool->dev); + cfg.state = meta->rsd; + cfg.thread_storage = meta->tsd; cfg.push_uniforms = pan_pool_upload_aligned(pool, inputs, sizeof(*inputs), 16); } @@ -191,10 +178,3 @@ GENX(pan_indirect_dispatch_emit)(struct pan_pool *pool, struct pan_jc *jc, return pan_jc_add_job(pool, jc, MALI_JOB_TYPE_COMPUTE, false, true, 0, 0, &job, false); } - -void -GENX(pan_indirect_dispatch_cleanup)(struct panfrost_device *dev) -{ - panfrost_bo_unreference(dev->indirect_dispatch.bin); - panfrost_bo_unreference(dev->indirect_dispatch.descs); -} diff --git a/src/panfrost/lib/pan_indirect_dispatch.h b/src/panfrost/lib/pan_indirect_dispatch.h index c21f145d245..acbe4d6f0a2 100644 --- a/src/panfrost/lib/pan_indirect_dispatch.h +++ b/src/panfrost/lib/pan_indirect_dispatch.h @@ -27,20 +27,54 @@ #include "genxml/gen_macros.h" #include "pan_jc.h" -struct pan_device; +#include "panfrost/util/pan_ir.h" + struct pan_jc; struct pan_pool; +struct pan_indirect_dispatch_meta { + struct panfrost_ubo_push push; + + unsigned gpu_id; + + /* Renderer state descriptor. */ + mali_ptr rsd; + + /* Thread storage descriptor. */ + mali_ptr tsd; + + /* Shader binary pool. */ + struct pan_pool *bin_pool; + + /* Shader desc pool for any descriptor that can be re-used across + * indirect dispatch calls. Job descriptors are allocated from the pool + * passed to pan_indirect_dispatch_emit(). + */ + struct pan_pool *desc_pool; +}; + struct pan_indirect_dispatch_info { mali_ptr job; mali_ptr indirect_dim; mali_ptr num_wg_sysval[3]; } PACKED; +static inline void +pan_indirect_dispatch_meta_init(struct pan_indirect_dispatch_meta *meta, + unsigned gpu_id, struct pan_pool *bin_pool, + struct pan_pool *desc_pool) +{ + memset(meta, 0, sizeof(*meta)); + meta->gpu_id = gpu_id; + meta->bin_pool = bin_pool; + meta->desc_pool = desc_pool; +} + +#ifdef PAN_ARCH unsigned GENX(pan_indirect_dispatch_emit)( + struct pan_indirect_dispatch_meta *meta, struct pan_pool *pool, struct pan_jc *jc, const struct pan_indirect_dispatch_info *dispatch_info); - -void GENX(pan_indirect_dispatch_cleanup)(struct panfrost_device *dev); +#endif #endif