panfrost: Make pan_indirect_dispatch panfrost_device agnostic

This is part of our effort to make libpanfrost panfrost_{device,bo}
agnostic.

Since we are now passed a pool for descriptor allocations, there's no
point doing a single allocation for both the RSD and TSD, and we can
replace the get_{tls,rsd}() helpers by two fields at the
pan_indirect_dispatch_meta level, thus simplifying the logic a bit.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Constantine Shablya <constantine.shablya@collabora.com>
Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26698>
This commit is contained in:
Boris Brezillon 2023-12-08 09:33:28 +01:00 committed by Marge Bot
parent 2bdcef6506
commit 1b1f1a6d76
5 changed files with 69 additions and 55 deletions

View file

@ -3634,9 +3634,6 @@ screen_destroy(struct pipe_screen *pscreen)
{
struct panfrost_device *dev = pan_device(pscreen);
GENX(pan_blitter_cache_cleanup)(&dev->blitter);
#if PAN_GPU_INDIRECTS
GENX(pan_indirect_dispatch_cleanup)(dev);
#endif
}
static void
@ -3768,4 +3765,10 @@ GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen)
GENX(pan_blitter_cache_init)
(&dev->blitter, panfrost_device_gpu_id(dev), &dev->blend_shaders,
&screen->blitter.bin_pool.base, &screen->blitter.desc_pool.base);
#if PAN_GPU_INDIRECTS
pan_indirect_dispatch_meta_init(
&dev->indirect_dispatch, panfrost_device_gpu_id(dev),
&screen->blitter.bin_pool.base, &screen->blitter.desc_pool.base);
#endif
}

View file

@ -335,6 +335,7 @@ GENX(jm_launch_grid)(struct panfrost_batch *batch,
unsigned indirect_dep = 0;
#if PAN_GPU_INDIRECTS
if (info->indirect) {
struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
struct pan_indirect_dispatch_info indirect = {
.job = t.gpu,
.indirect_dim = pan_resource(info->indirect)->image.data.bo->ptr.gpu +
@ -348,7 +349,8 @@ GENX(jm_launch_grid)(struct panfrost_batch *batch,
};
indirect_dep = GENX(pan_indirect_dispatch_emit)(
&batch->pool.base, &batch->jm.jobs.vtc_jc, &indirect);
&dev->indirect_dispatch, &batch->pool.base, &batch->jm.jobs.vtc_jc,
&indirect);
}
#endif

View file

@ -40,6 +40,7 @@
#include "panfrost/util/pan_ir.h"
#include "pan_blend.h"
#include "pan_blitter.h"
#include "pan_indirect_dispatch.h"
#include "pan_pool.h"
#include "pan_util.h"
@ -64,12 +65,6 @@ extern "C" {
/* Fencepost problem, hence the off-by-one */
#define NR_BO_CACHE_BUCKETS (MAX_BO_CACHE_BUCKET - MIN_BO_CACHE_BUCKET + 1)
struct pan_indirect_dispatch {
struct panfrost_ubo_push push;
struct panfrost_bo *bin;
struct panfrost_bo *descs;
};
/** Implementation-defined tiler features */
struct panfrost_tiler_features {
/** Number of bytes per tiler bin */
@ -181,7 +176,7 @@ struct panfrost_device {
struct pan_blitter_cache blitter;
struct pan_blend_shader_cache blend_shaders;
struct pan_indirect_dispatch indirect_dispatch;
struct pan_indirect_dispatch_meta indirect_dispatch;
/* Tiler heap shared across all tiler jobs, allocated against the
* device since there's only a single tiler. Since this is invisible to

View file

@ -27,7 +27,6 @@
#include "compiler/nir/nir_builder.h"
#include "util/macros.h"
#include "util/u_memory.h"
#include "pan_bo.h"
#include "pan_encoder.h"
#include "pan_jc.h"
#include "pan_pool.h"
@ -40,20 +39,8 @@
nir_imm_int(b, 0), \
.base = offsetof(struct pan_indirect_dispatch_info, name))
static mali_ptr
get_rsd(const struct panfrost_device *dev)
{
return dev->indirect_dispatch.descs->ptr.gpu;
}
static mali_ptr
get_tls(const struct panfrost_device *dev)
{
return dev->indirect_dispatch.descs->ptr.gpu + pan_size(RENDERER_STATE);
}
static void
pan_indirect_dispatch_init(struct panfrost_device *dev)
pan_indirect_dispatch_init(struct pan_indirect_dispatch_meta *meta)
{
nir_builder b = nir_builder_init_simple_shader(
MESA_SHADER_COMPUTE, GENX(pan_shader_get_compiler_options)(), "%s",
@ -121,7 +108,7 @@ pan_indirect_dispatch_init(struct panfrost_device *dev)
nir_pop_if(&b, NULL);
struct panfrost_compile_inputs inputs = {
.gpu_id = panfrost_device_gpu_id(dev),
.gpu_id = meta->gpu_id,
.no_ubo_to_push = true,
};
struct pan_shader_info shader_info;
@ -139,40 +126,40 @@ pan_indirect_dispatch_init(struct panfrost_device *dev)
shader_info.push.count =
DIV_ROUND_UP(sizeof(struct pan_indirect_dispatch_info), 4);
dev->indirect_dispatch.bin = panfrost_bo_create(
dev, binary.size, PAN_BO_EXECUTE, "Indirect dispatch shader");
struct panfrost_ptr bin =
pan_pool_alloc_aligned(meta->bin_pool, binary.size, 64);
memcpy(dev->indirect_dispatch.bin->ptr.cpu, binary.data, binary.size);
memcpy(bin.cpu, binary.data, binary.size);
util_dynarray_fini(&binary);
dev->indirect_dispatch.descs = panfrost_bo_create(
dev, pan_size(RENDERER_STATE) + pan_size(LOCAL_STORAGE), 0,
"Indirect dispatch descriptors");
struct panfrost_ptr rsd =
pan_pool_alloc_desc(meta->desc_pool, RENDERER_STATE);
struct panfrost_ptr tsd =
pan_pool_alloc_desc(meta->desc_pool, LOCAL_STORAGE);
mali_ptr address = dev->indirect_dispatch.bin->ptr.gpu;
void *rsd = dev->indirect_dispatch.descs->ptr.cpu;
pan_pack(rsd, RENDERER_STATE, cfg) {
pan_shader_prepare_rsd(&shader_info, address, &cfg);
pan_pack(rsd.cpu, RENDERER_STATE, cfg) {
pan_shader_prepare_rsd(&shader_info, bin.gpu, &cfg);
}
void *tsd = dev->indirect_dispatch.descs->ptr.cpu + pan_size(RENDERER_STATE);
pan_pack(tsd, LOCAL_STORAGE, ls) {
pan_pack(tsd.cpu, LOCAL_STORAGE, ls) {
ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
};
meta->rsd = rsd.gpu;
meta->tsd = tsd.gpu;
}
unsigned
GENX(pan_indirect_dispatch_emit)(struct pan_pool *pool, struct pan_jc *jc,
GENX(pan_indirect_dispatch_emit)(struct pan_indirect_dispatch_meta *meta,
struct pan_pool *pool, struct pan_jc *jc,
const struct pan_indirect_dispatch_info *inputs)
{
struct panfrost_device *dev = pool->dev;
struct panfrost_ptr job = pan_pool_alloc_desc(pool, COMPUTE_JOB);
void *invocation = pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION);
/* If we haven't compiled the indirect dispatch shader yet, do it now */
if (!dev->indirect_dispatch.bin)
pan_indirect_dispatch_init(dev);
if (!meta->rsd)
pan_indirect_dispatch_init(meta);
panfrost_pack_work_groups_compute(invocation, 1, 1, 1, 1, 1, 1, false,
false);
@ -182,8 +169,8 @@ GENX(pan_indirect_dispatch_emit)(struct pan_pool *pool, struct pan_jc *jc,
}
pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
cfg.state = get_rsd(dev);
cfg.thread_storage = get_tls(pool->dev);
cfg.state = meta->rsd;
cfg.thread_storage = meta->tsd;
cfg.push_uniforms =
pan_pool_upload_aligned(pool, inputs, sizeof(*inputs), 16);
}
@ -191,10 +178,3 @@ GENX(pan_indirect_dispatch_emit)(struct pan_pool *pool, struct pan_jc *jc,
return pan_jc_add_job(pool, jc, MALI_JOB_TYPE_COMPUTE, false, true, 0, 0,
&job, false);
}
void
GENX(pan_indirect_dispatch_cleanup)(struct panfrost_device *dev)
{
panfrost_bo_unreference(dev->indirect_dispatch.bin);
panfrost_bo_unreference(dev->indirect_dispatch.descs);
}

View file

@ -27,20 +27,54 @@
#include "genxml/gen_macros.h"
#include "pan_jc.h"
struct pan_device;
#include "panfrost/util/pan_ir.h"
struct pan_jc;
struct pan_pool;
struct pan_indirect_dispatch_meta {
struct panfrost_ubo_push push;
unsigned gpu_id;
/* Renderer state descriptor. */
mali_ptr rsd;
/* Thread storage descriptor. */
mali_ptr tsd;
/* Shader binary pool. */
struct pan_pool *bin_pool;
/* Shader desc pool for any descriptor that can be re-used across
* indirect dispatch calls. Job descriptors are allocated from the pool
* passed to pan_indirect_dispatch_emit().
*/
struct pan_pool *desc_pool;
};
struct pan_indirect_dispatch_info {
mali_ptr job;
mali_ptr indirect_dim;
mali_ptr num_wg_sysval[3];
} PACKED;
static inline void
pan_indirect_dispatch_meta_init(struct pan_indirect_dispatch_meta *meta,
unsigned gpu_id, struct pan_pool *bin_pool,
struct pan_pool *desc_pool)
{
memset(meta, 0, sizeof(*meta));
meta->gpu_id = gpu_id;
meta->bin_pool = bin_pool;
meta->desc_pool = desc_pool;
}
#ifdef PAN_ARCH
unsigned GENX(pan_indirect_dispatch_emit)(
struct pan_indirect_dispatch_meta *meta,
struct pan_pool *pool, struct pan_jc *jc,
const struct pan_indirect_dispatch_info *dispatch_info);
void GENX(pan_indirect_dispatch_cleanup)(struct panfrost_device *dev);
#endif
#endif