From f1e0efff8dfcf3ef97e29c486ddc68d319fb0cc0 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 12 May 2021 18:32:31 -0400 Subject: [PATCH] panfrost: Pool shaders Now we can do so without leaking memory :-) Likewise use a pan_pool for RSDs, to share the common path. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/panfrost/pan_assemble.c | 20 +++++++---------- src/gallium/drivers/panfrost/pan_cmdstream.c | 23 ++++++++------------ src/gallium/drivers/panfrost/pan_context.c | 22 +++++++++---------- src/gallium/drivers/panfrost/pan_context.h | 18 ++++----------- 4 files changed, 32 insertions(+), 51 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c index 22198d8c68a..d34cb79dc71 100644 --- a/src/gallium/drivers/panfrost/pan_assemble.c +++ b/src/gallium/drivers/panfrost/pan_assemble.c @@ -44,18 +44,15 @@ pan_upload_shader_descriptor(struct panfrost_context *ctx, struct panfrost_shader_state *state) { const struct panfrost_device *dev = pan_device(ctx->base.screen); - struct mali_state_packed *out; + struct panfrost_ptr ptr = + panfrost_pool_alloc_desc(&ctx->descs, RENDERER_STATE); - u_upload_alloc(ctx->state_uploader, 0, MALI_RENDERER_STATE_LENGTH, MALI_RENDERER_STATE_LENGTH, - &state->upload.offset, &state->upload.rsrc, (void **) &out); + state->state = pan_take_ref(&ctx->descs, ptr.gpu); - pan_pack(out, RENDERER_STATE, cfg) { - pan_shader_prepare_rsd(dev, &state->info, - state->bo ? state->bo->ptr.gpu : 0, + pan_pack(ptr.cpu, RENDERER_STATE, cfg) { + pan_shader_prepare_rsd(dev, &state->info, state->bin.gpu, &cfg); } - - u_upload_unmap(ctx->state_uploader); } void @@ -95,11 +92,10 @@ panfrost_shader_compile(struct panfrost_context *ctx, util_dynarray_init(&binary, NULL); pan_shader_compile(dev, s, &inputs, &binary, &state->info); - /* Prepare the compiled binary for upload */ if (binary.size) { - state->bo = panfrost_bo_create(dev, binary.size, - PAN_BO_EXECUTE, "Shader binary"); - memcpy(state->bo->ptr.cpu, binary.data, binary.size); + state->bin = pan_take_ref(&ctx->shaders, + panfrost_pool_upload_aligned(&ctx->shaders, + binary.data, binary.size, 128)); } if (stage != MESA_SHADER_FRAGMENT) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 5dcbfc13f52..55bec2ba3d3 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -339,15 +339,15 @@ panfrost_emit_bifrost_blend(struct panfrost_batch *batch, * the same top 32 bit as the fragment shader. * TODO: Ensure that's always the case. */ - assert(!fs->bo || + assert(!fs->bin.bo || (blend[i].shader.gpu & (0xffffffffull << 32)) == - (fs->bo->ptr.gpu & (0xffffffffull << 32))); + (fs->bin.gpu & (0xffffffffull << 32))); cfg.bifrost.internal.shader.pc = (u32)blend[i].shader.gpu; unsigned ret_offset = fs->info.bifrost.blend[i].return_offset; if (ret_offset) { assert(!(ret_offset & 0x7)); cfg.bifrost.internal.shader.return_value = - fs->bo->ptr.gpu + ret_offset; + fs->bin.gpu + ret_offset; } cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_SHADER; } else { @@ -465,9 +465,7 @@ panfrost_prepare_bifrost_fs_state(struct panfrost_context *ctx, state->properties.bifrost.allow_forward_pixel_to_be_killed = true; state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY; } else { - pan_shader_prepare_rsd(dev, &fs->info, - fs->bo ? fs->bo->ptr.gpu : 0, - state); + pan_shader_prepare_rsd(dev, &fs->info, fs->bin.gpu, state); /* Track if any colour buffer is reused across draws, either * from reading it directly, or from failing to write it */ @@ -513,9 +511,7 @@ panfrost_prepare_midgard_fs_state(struct panfrost_context *ctx, state->properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION; state->properties.midgard.force_early_z = true; } else { - pan_shader_prepare_rsd(dev, &fs->info, - fs->bo ? fs->bo->ptr.gpu : 0, - state); + pan_shader_prepare_rsd(dev, &fs->info, fs->bin.gpu, state); /* Reasons to disable early-Z from a shader perspective */ bool late_z = fs->info.fs.can_discard || fs->info.writes_global || @@ -659,19 +655,18 @@ mali_ptr panfrost_emit_compute_shader_meta(struct panfrost_batch *batch, enum pipe_shader_type stage) { struct panfrost_shader_state *ss = panfrost_get_shader_state(batch->ctx, stage); - struct panfrost_resource *rsrc = pan_resource(ss->upload.rsrc); - panfrost_batch_add_bo(batch, ss->bo, + panfrost_batch_add_bo(batch, ss->bin.bo, PAN_BO_ACCESS_PRIVATE | PAN_BO_ACCESS_READ | PAN_BO_ACCESS_VERTEX_TILER); - panfrost_batch_add_bo(batch, rsrc->image.data.bo, + panfrost_batch_add_bo(batch, ss->state.bo, PAN_BO_ACCESS_PRIVATE | PAN_BO_ACCESS_READ | PAN_BO_ACCESS_VERTEX_TILER); - return rsrc->image.data.bo->ptr.gpu + ss->upload.offset; + return ss->state.gpu; } mali_ptr @@ -681,7 +676,7 @@ panfrost_emit_frag_shader_meta(struct panfrost_batch *batch) struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); /* Add the shader BO to the batch. */ - panfrost_batch_add_bo(batch, ss->bo, + panfrost_batch_add_bo(batch, ss->bin.bo, PAN_BO_ACCESS_PRIVATE | PAN_BO_ACCESS_READ | PAN_BO_ACCESS_FRAGMENT); diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 2e27cf3a0d6..233ae6ca918 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -893,16 +893,11 @@ panfrost_delete_shader_state( for (unsigned i = 0; i < cso->variant_count; ++i) { struct panfrost_shader_state *shader_state = &cso->variants[i]; - panfrost_bo_unreference(shader_state->bo); - - if (shader_state->upload.rsrc) - pipe_resource_reference(&shader_state->upload.rsrc, NULL); - - shader_state->bo = NULL; + panfrost_bo_unreference(shader_state->bin.bo); + panfrost_bo_unreference(shader_state->state.bo); } + free(cso->variants); - - free(so); } @@ -1548,7 +1543,9 @@ panfrost_destroy(struct pipe_context *pipe) util_unreference_framebuffer_state(&panfrost->pipe_framebuffer); u_upload_destroy(pipe->stream_uploader); - u_upload_destroy(panfrost->state_uploader); + + panfrost_pool_cleanup(&panfrost->descs); + panfrost_pool_cleanup(&panfrost->shaders); ralloc_free(pipe); } @@ -1852,8 +1849,11 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) gallium->stream_uploader = u_upload_create_default(gallium); gallium->const_uploader = gallium->stream_uploader; - ctx->state_uploader = u_upload_create(gallium, 4096, - PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_DYNAMIC, 0); + panfrost_pool_init(&ctx->descs, ctx, dev, + 0, 4096, "Descriptors", true, false); + + panfrost_pool_init(&ctx->shaders, ctx, dev, + PAN_BO_EXECUTE, 4096, "Shaders", true, false); /* All of our GPUs support ES mode. Midgard supports additionally * QUADS/QUAD_STRIPS/POLYGON. Bifrost supports just QUADS. */ diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index 06d00c8bffa..b8bd701e8e2 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -104,10 +104,8 @@ struct panfrost_context { /* Gallium context */ struct pipe_context base; - /* Upload manager for small resident GPU-internal data structures, like - * sampler descriptors. We use an upload manager since the minimum BO - * size from the kernel is 4kb */ - struct u_upload_mgr *state_uploader; + /* Unowned pools, so manage yourself. */ + struct pan_pool descs, shaders; /* Sync obj used to keep track of in-flight jobs. */ uint32_t syncobj; @@ -213,22 +211,14 @@ struct panfrost_shader_state { /* Compiled, mapped descriptor, ready for the hardware */ bool compiled; - /* Uploaded shader descriptor (TODO: maybe stuff the packed unuploaded - * bits in a union to save some memory?) */ - - struct { - struct pipe_resource *rsrc; - uint32_t offset; - } upload; + /* Respectively, shader binary and Renderer State Descriptor */ + struct pan_pool_ref bin, state; struct pan_shader_info info; struct pipe_stream_output_info stream_output; uint64_t so_mask; - /* GPU-executable memory */ - struct panfrost_bo *bo; - /* Variants */ enum pipe_format rt_formats[8]; unsigned nr_cbufs;