mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 06:48:06 +02:00
panfrost: Allocate shaders in their own BOs
Instead of all shaders being stored in a single BO, have each shader in
its own.
This removes the need for a 16MB allocation per context, and allows us
to place transient blend shaders in BOs marked as executable (before
they were allocated in the transient pool, which shouldn't be
executable).
v2: - Store compiled blend shaders in a malloc'ed buffer, to avoid
reading from GPU-accessible memory when patching (Alyssa).
- Free struct panfrost_blend_shader (Alyssa).
- Give the job a reference to regular shaders when emitting
(Alyssa).
v3: - Split out the allocation flags change (Rob).
Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
This commit is contained in:
parent
5804d75b9c
commit
950b5fc596
7 changed files with 62 additions and 33 deletions
|
|
@ -43,6 +43,7 @@ panfrost_shader_compile(
|
|||
gl_shader_stage stage,
|
||||
struct panfrost_shader_state *state)
|
||||
{
|
||||
struct panfrost_screen *screen = pan_screen(ctx->base.screen);
|
||||
uint8_t *dst;
|
||||
|
||||
nir_shader *s;
|
||||
|
|
@ -80,7 +81,9 @@ panfrost_shader_compile(
|
|||
* I bet someone just thought that would be a cute pun. At least,
|
||||
* that's how I'd do it. */
|
||||
|
||||
meta->shader = panfrost_upload(&ctx->shaders, dst, size) | program.first_tag;
|
||||
state->bo = panfrost_drm_create_bo(screen, size, PAN_ALLOCATE_EXECUTE);
|
||||
memcpy(state->bo->cpu, dst, size);
|
||||
meta->shader = state->bo->gpu | program.first_tag;
|
||||
|
||||
util_dynarray_fini(&program.compiled);
|
||||
|
||||
|
|
|
|||
|
|
@ -33,8 +33,10 @@
|
|||
/* An internal blend shader descriptor, from the compiler */
|
||||
|
||||
struct panfrost_blend_shader {
|
||||
/* The compiled shader in GPU memory */
|
||||
struct panfrost_transfer shader;
|
||||
struct panfrost_context *ctx;
|
||||
|
||||
/* The compiled shader */
|
||||
void *buffer;
|
||||
|
||||
/* Byte count of the shader */
|
||||
unsigned size;
|
||||
|
|
@ -53,8 +55,11 @@ struct panfrost_blend_shader {
|
|||
/* A blend shader descriptor ready for actual use */
|
||||
|
||||
struct panfrost_blend_shader_final {
|
||||
/* The upload, possibly to transient memory */
|
||||
mali_ptr gpu;
|
||||
/* The compiled shader in GPU memory, possibly patched */
|
||||
struct panfrost_bo *bo;
|
||||
|
||||
/* First instruction tag (for tagging the pointer) */
|
||||
unsigned first_tag;
|
||||
|
||||
/* Same meaning as panfrost_blend_shader */
|
||||
unsigned work_count;
|
||||
|
|
|
|||
|
|
@ -152,10 +152,23 @@ panfrost_bind_blend_state(struct pipe_context *pipe,
|
|||
}
|
||||
|
||||
static void
|
||||
panfrost_delete_blend_state(struct pipe_context *pipe,
|
||||
void *blend)
|
||||
panfrost_delete_blend_shader(struct hash_entry *entry)
|
||||
{
|
||||
/* TODO: Free shader binary? */
|
||||
struct panfrost_blend_shader *shader = (struct panfrost_blend_shader *)entry->data;
|
||||
free(shader->buffer);
|
||||
free(shader);
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_delete_blend_state(struct pipe_context *pipe,
|
||||
void *cso)
|
||||
{
|
||||
struct panfrost_blend_state *blend = (struct panfrost_blend_state *) cso;
|
||||
|
||||
for (unsigned c = 0; c < 4; ++c) {
|
||||
struct panfrost_blend_rt *rt = &blend->rt[c];
|
||||
_mesa_hash_table_u64_clear(rt->shaders, panfrost_delete_blend_shader);
|
||||
}
|
||||
ralloc_free(blend);
|
||||
}
|
||||
|
||||
|
|
@ -208,6 +221,9 @@ panfrost_blend_constant(float *out, float *in, unsigned mask)
|
|||
struct panfrost_blend_final
|
||||
panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti)
|
||||
{
|
||||
struct panfrost_screen *screen = pan_screen(ctx->base.screen);
|
||||
struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
|
||||
|
||||
/* Grab the format, falling back gracefully if called invalidly (which
|
||||
* has to happen for no-color-attachment FBOs, for instance) */
|
||||
struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
|
||||
|
|
@ -241,23 +257,24 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti)
|
|||
struct panfrost_blend_shader *shader = panfrost_get_blend_shader(ctx, blend, fmt, rti);
|
||||
final.is_shader = true;
|
||||
final.shader.work_count = shader->work_count;
|
||||
final.shader.first_tag = shader->first_tag;
|
||||
|
||||
/* Upload the shader */
|
||||
final.shader.bo = panfrost_drm_create_bo(screen, shader->size, PAN_ALLOCATE_EXECUTE);
|
||||
memcpy(final.shader.bo->cpu, shader->buffer, shader->size);
|
||||
|
||||
/* Pass BO ownership to job */
|
||||
panfrost_job_add_bo(job, final.shader.bo);
|
||||
panfrost_bo_unreference(ctx->base.screen, final.shader.bo);
|
||||
|
||||
if (shader->patch_index) {
|
||||
/* We have to specialize the blend shader to use constants, so
|
||||
* patch in the current constants and upload to transient
|
||||
* memory */
|
||||
* patch in the current constants */
|
||||
|
||||
float *patch = (float *) (shader->shader.cpu + shader->patch_index);
|
||||
float *patch = (float *) (final.shader.bo->cpu + shader->patch_index);
|
||||
memcpy(patch, ctx->blend_color.color, sizeof(float) * 4);
|
||||
|
||||
final.shader.gpu = panfrost_upload_transient(
|
||||
ctx, shader->shader.cpu, shader->size);
|
||||
} else {
|
||||
/* No need to specialize further, use the preuploaded */
|
||||
final.shader.gpu = shader->shader.gpu;
|
||||
}
|
||||
|
||||
final.shader.gpu |= shader->first_tag;
|
||||
return final;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -133,6 +133,8 @@ panfrost_compile_blend_shader(
|
|||
{
|
||||
struct panfrost_blend_shader res;
|
||||
|
||||
res.ctx = ctx;
|
||||
|
||||
/* Build the shader */
|
||||
|
||||
nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, &midgard_nir_options, NULL);
|
||||
|
|
@ -172,21 +174,14 @@ panfrost_compile_blend_shader(
|
|||
midgard_program program;
|
||||
midgard_compile_shader_nir(&ctx->compiler, shader, &program, true);
|
||||
|
||||
/* Upload the shader */
|
||||
|
||||
int size = program.compiled.size;
|
||||
uint8_t *dst = program.compiled.data;
|
||||
|
||||
res.shader.cpu = mem_dup(dst, size);
|
||||
res.shader.gpu = panfrost_upload(&ctx->shaders, dst, size);
|
||||
|
||||
/* At least two work registers are needed due to an encoding quirk */
|
||||
res.work_count = MAX2(program.work_register_count, 2);
|
||||
|
||||
/* Allow us to patch later */
|
||||
res.patch_index = program.blend_patch_offset;
|
||||
res.first_tag = program.first_tag;
|
||||
res.size = size;
|
||||
res.size = program.compiled.size;
|
||||
res.buffer = program.compiled.data;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1062,6 +1062,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
|
|||
|
||||
panfrost_patch_shader_state(ctx, variant, PIPE_SHADER_FRAGMENT, false);
|
||||
|
||||
panfrost_job_add_bo(job, variant->bo);
|
||||
|
||||
#define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name
|
||||
|
||||
COPY(shader);
|
||||
|
|
@ -1140,7 +1142,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
|
|||
|
||||
if (blend.is_shader) {
|
||||
ctx->fragment_shader_core.blend.shader =
|
||||
blend.shader.gpu;
|
||||
blend.shader.bo->gpu | blend.shader.first_tag;
|
||||
} else {
|
||||
ctx->fragment_shader_core.blend.shader = 0;
|
||||
}
|
||||
|
|
@ -1215,7 +1217,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
|
|||
assert(!(is_srgb && blend.is_shader));
|
||||
|
||||
if (blend.is_shader) {
|
||||
rts[i].blend.shader = blend.shader.gpu;
|
||||
rts[i].blend.shader = blend.shader.bo->gpu | blend.shader.first_tag;
|
||||
} else {
|
||||
rts[i].blend.equation = *blend.equation.equation;
|
||||
rts[i].blend.constant = blend.equation.constant;
|
||||
|
|
@ -1904,6 +1906,12 @@ panfrost_delete_shader_state(
|
|||
DBG("Deleting TGSI shader leaks duplicated tokens\n");
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < cso->variant_count; ++i) {
|
||||
struct panfrost_shader_state *shader_state = &cso->variants[i];
|
||||
panfrost_bo_unreference(pctx->screen, shader_state->bo);
|
||||
shader_state->bo = NULL;
|
||||
}
|
||||
|
||||
free(so);
|
||||
}
|
||||
|
||||
|
|
@ -2528,7 +2536,6 @@ panfrost_destroy(struct pipe_context *pipe)
|
|||
util_blitter_destroy(panfrost->blitter_wallpaper);
|
||||
|
||||
panfrost_drm_free_slab(screen, &panfrost->scratchpad);
|
||||
panfrost_drm_free_slab(screen, &panfrost->shaders);
|
||||
panfrost_drm_free_slab(screen, &panfrost->tiler_heap);
|
||||
panfrost_drm_free_slab(screen, &panfrost->tiler_dummy);
|
||||
|
||||
|
|
@ -2673,7 +2680,6 @@ panfrost_setup_hardware(struct panfrost_context *ctx)
|
|||
struct panfrost_screen *screen = pan_screen(gallium->screen);
|
||||
|
||||
panfrost_drm_allocate_slab(screen, &ctx->scratchpad, 64*4, false, 0, 0, 0);
|
||||
panfrost_drm_allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0);
|
||||
panfrost_drm_allocate_slab(screen, &ctx->tiler_heap, 4096, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
|
||||
panfrost_drm_allocate_slab(screen, &ctx->tiler_dummy, 1, false, PAN_ALLOCATE_INVISIBLE, 0, 0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -108,7 +108,6 @@ struct panfrost_context {
|
|||
struct pipe_framebuffer_state pipe_framebuffer;
|
||||
|
||||
struct panfrost_memory cmdstream_persistent;
|
||||
struct panfrost_memory shaders;
|
||||
struct panfrost_memory scratchpad;
|
||||
struct panfrost_memory tiler_heap;
|
||||
struct panfrost_memory tiler_dummy;
|
||||
|
|
@ -224,6 +223,8 @@ struct panfrost_shader_state {
|
|||
|
||||
/* Should we enable helper invocations */
|
||||
bool helper_invocations;
|
||||
|
||||
struct panfrost_bo *bo;
|
||||
};
|
||||
|
||||
/* A collection of varyings (the CSO) */
|
||||
|
|
|
|||
|
|
@ -88,6 +88,9 @@ panfrost_drm_create_bo(struct panfrost_screen *screen, size_t size,
|
|||
/* Kernel will fail (confusingly) with EPERM otherwise */
|
||||
assert(size > 0);
|
||||
|
||||
/* To maximize BO cache usage, don't allocate tiny BOs */
|
||||
size = MAX2(size, 4096);
|
||||
|
||||
unsigned translated_flags = 0;
|
||||
|
||||
/* TODO: translate flags to kernel flags, if the kernel supports */
|
||||
|
|
@ -285,7 +288,6 @@ panfrost_drm_submit_vs_fs_job(struct panfrost_context *ctx, bool has_draws, bool
|
|||
struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
|
||||
|
||||
/* TODO: Add here the transient pools */
|
||||
panfrost_job_add_bo(job, ctx->shaders.bo);
|
||||
panfrost_job_add_bo(job, ctx->scratchpad.bo);
|
||||
panfrost_job_add_bo(job, ctx->tiler_heap.bo);
|
||||
panfrost_job_add_bo(job, job->polygon_list);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue