diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c index 722b6229aac..2948bdeffcf 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c @@ -68,23 +68,28 @@ fd5_emit_shader_obj(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct ir3_shader_variant *so, uint32_t shader_obj_reg) { - ir3_get_private_mem(ctx, so); + struct fd_screen *screen = ctx->screen; + + fd_screen_lock(screen); + ir3_get_private_mem(screen, so); OUT_PKT4(ring, shader_obj_reg, 6); OUT_RELOC(ring, so->bo, 0, 0, 0); /* SP_VS_OBJ_START */ - uint32_t per_sp_size = ctx->pvtmem[so->pvtmem_per_wave].per_sp_size; + uint32_t per_sp_size = screen->pvtmem[so->pvtmem_per_wave].per_sp_size; OUT_RING(ring, A5XX_SP_VS_PVT_MEM_PARAM_MEMSIZEPERITEM( - ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size) | + screen->pvtmem[so->pvtmem_per_wave].per_fiber_size) | A5XX_SP_VS_PVT_MEM_PARAM_HWSTACKOFFSET(per_sp_size)); if (so->pvtmem_size > 0) { /* SP_xS_PVT_MEM_ADDR */ - OUT_RELOC(ring, ctx->pvtmem[so->pvtmem_per_wave].bo, 0, 0, 0); - fd_ringbuffer_attach_bo(ring, ctx->pvtmem[so->pvtmem_per_wave].bo); + OUT_RELOC(ring, screen->pvtmem[so->pvtmem_per_wave].bo, 0, 0, 0); + fd_ringbuffer_attach_bo(ring, screen->pvtmem[so->pvtmem_per_wave].bo); } else { OUT_RING(ring, 0); OUT_RING(ring, 0); } OUT_RING(ring, A5XX_SP_VS_PVT_MEM_SIZE_TOTALPVTMEMSIZE(per_sp_size)); + + fd_screen_unlock(screen); } /* TODO maybe some of this we could pre-compute once rather than having diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.cc b/src/gallium/drivers/freedreno/a6xx/fd6_program.cc index f651e146f9f..802401b1f65 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.cc @@ -45,6 +45,7 @@ template static void emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_variant *so) { + struct fd_screen *screen = ctx->screen; fd_crb crb(cs, 14); mesa_shader_stage type = so->type; @@ -85,13 +86,15 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari enum a6xx_threadsize thrsz = so->info.double_threadsize ? THREAD128 : THREAD64; - ir3_get_private_mem(ctx, so); + fd_screen_lock(screen); + ir3_get_private_mem(screen, so); - uint32_t per_sp_size = ctx->pvtmem[so->pvtmem_per_wave].per_sp_size; + auto pvtmem = &screen->pvtmem[so->pvtmem_per_wave]; + uint32_t per_sp_size = pvtmem->per_sp_size; struct fd_bo *pvtmem_bo = NULL; if (so->pvtmem_size > 0) { /* SP_xS_PVT_MEM_ADDR */ - pvtmem_bo = ctx->pvtmem[so->pvtmem_per_wave].bo; + pvtmem_bo = pvtmem->bo; crb.attach_bo(pvtmem_bo); } @@ -110,7 +113,7 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari crb.add(A6XX_SP_VS_PROGRAM_COUNTER_OFFSET()); crb.add(A6XX_SP_VS_BASE(so->bo)); crb.add(A6XX_SP_VS_PVT_MEM_PARAM( - .memsizeperitem = ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size, + .memsizeperitem = pvtmem->per_fiber_size, )); crb.add(A6XX_SP_VS_PVT_MEM_BASE(pvtmem_bo)); crb.add(A6XX_SP_VS_PVT_MEM_SIZE( @@ -132,7 +135,7 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari crb.add(A6XX_SP_HS_PROGRAM_COUNTER_OFFSET()); crb.add(A6XX_SP_HS_BASE(so->bo)); crb.add(A6XX_SP_HS_PVT_MEM_PARAM( - .memsizeperitem = ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size, + .memsizeperitem = pvtmem->per_fiber_size, )); crb.add(A6XX_SP_HS_PVT_MEM_BASE(pvtmem_bo)); crb.add(A6XX_SP_HS_PVT_MEM_SIZE( @@ -154,7 +157,7 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari crb.add(A6XX_SP_DS_PROGRAM_COUNTER_OFFSET()); crb.add(A6XX_SP_DS_BASE(so->bo)); crb.add(A6XX_SP_DS_PVT_MEM_PARAM( - .memsizeperitem = ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size, + .memsizeperitem = pvtmem->per_fiber_size, )); crb.add(A6XX_SP_DS_PVT_MEM_BASE(pvtmem_bo)); crb.add(A6XX_SP_DS_PVT_MEM_SIZE( @@ -176,7 +179,7 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari crb.add(A6XX_SP_GS_PROGRAM_COUNTER_OFFSET()); crb.add(A6XX_SP_GS_BASE(so->bo)); crb.add(A6XX_SP_GS_PVT_MEM_PARAM( - .memsizeperitem = ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size, + .memsizeperitem = pvtmem->per_fiber_size, )); crb.add(A6XX_SP_GS_PVT_MEM_BASE(pvtmem_bo)); crb.add(A6XX_SP_GS_PVT_MEM_SIZE( @@ -210,7 +213,7 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari crb.add(A6XX_SP_PS_PROGRAM_COUNTER_OFFSET()); crb.add(A6XX_SP_PS_BASE(so->bo)); crb.add(A6XX_SP_PS_PVT_MEM_PARAM( - .memsizeperitem = ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size, + .memsizeperitem = pvtmem->per_fiber_size, )); crb.add(A6XX_SP_PS_PVT_MEM_BASE(pvtmem_bo)); crb.add(A6XX_SP_PS_PVT_MEM_SIZE( @@ -235,7 +238,7 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari crb.add(A6XX_SP_CS_PROGRAM_COUNTER_OFFSET()); crb.add(A6XX_SP_CS_BASE(so->bo)); crb.add(A6XX_SP_CS_PVT_MEM_PARAM( - .memsizeperitem = ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size, + .memsizeperitem = pvtmem->per_fiber_size, )); crb.add(A6XX_SP_CS_PVT_MEM_BASE(pvtmem_bo)); crb.add(A6XX_SP_CS_PVT_MEM_SIZE( @@ -249,6 +252,8 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari default: UNREACHABLE("bad shader stage"); } + + fd_screen_unlock(screen); } template diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index 3e74e02b63e..d7ff010d713 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -401,11 +401,6 @@ fd_context_destroy(struct pipe_context *pctx) if (ctx->in_fence_fd != -1) close(ctx->in_fence_fd); - for (i = 0; i < ARRAY_SIZE(ctx->pvtmem); i++) { - if (ctx->pvtmem[i].bo) - fd_bo_del(ctx->pvtmem[i].bo); - } - util_copy_framebuffer_state(&ctx->framebuffer, NULL); fd_batch_reference(&ctx->batch, NULL); /* unref current batch */ diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 4626c76efbf..79529aac2e7 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -506,28 +506,6 @@ struct fd_context { bool cond_cond dt; /* inverted rendering condition */ uint cond_mode dt; - /* Private memory is a memory space where each fiber gets its own piece of - * memory, in addition to registers. It is backed by a buffer which needs - * to be large enough to hold the contents of every possible wavefront in - * every core of the GPU. Because it allocates space via the internal - * wavefront ID which is shared between all currently executing shaders, - * the same buffer can be reused by all shaders, as long as all shaders - * sharing the same buffer use the exact same configuration. There are two - * inputs to the configuration, the amount of per-fiber space and whether - * to use the newer per-wave or older per-fiber layout. We only ever - * increase the size, and shaders with a smaller size requirement simply - * use the larger existing buffer, so that we only need to keep track of - * one buffer and its size, but we still need to keep track of per-fiber - * and per-wave buffers separately so that we never use the same buffer - * for different layouts. pvtmem[0] is for per-fiber, and pvtmem[1] is for - * per-wave. - */ - struct { - struct fd_bo *bo; - uint32_t per_fiber_size; - uint32_t per_sp_size; - } pvtmem[2] dt; - /* maps per-shader-stage state plus variant key to hw * program stateobj: */ diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index dd725eb78dd..1d4f7183586 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -149,6 +149,11 @@ fd_screen_destroy(struct pipe_screen *pscreen) if (screen->tess_bo) fd_bo_del(screen->tess_bo); + for (int i = 0; i < ARRAY_SIZE(screen->pvtmem); i++) { + if (screen->pvtmem[i].bo) + fd_bo_del(screen->pvtmem[i].bo); + } + if (screen->pipe) fd_pipe_del(screen->pipe); diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h index 82f0b1f6ebc..137fea1c5b8 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.h +++ b/src/gallium/drivers/freedreno/freedreno_screen.h @@ -155,6 +155,28 @@ struct fd_screen { struct fd_bo *tess_bo; + /* Private memory is a memory space where each fiber gets its own piece of + * memory, in addition to registers. It is backed by a buffer which needs + * to be large enough to hold the contents of every possible wavefront in + * every core of the GPU. Because it allocates space via the internal + * wavefront ID which is shared between all currently executing shaders, + * the same buffer can be reused by all shaders, as long as all shaders + * sharing the same buffer use the exact same configuration. There are two + * inputs to the configuration, the amount of per-fiber space and whether + * to use the newer per-wave or older per-fiber layout. We only ever + * increase the size, and shaders with a smaller size requirement simply + * use the larger existing buffer, so that we only need to keep track of + * one buffer and its size, but we still need to keep track of per-fiber + * and per-wave buffers separately so that we never use the same buffer + * for different layouts. pvtmem[0] is for per-fiber, and pvtmem[1] is for + * per-wave. + */ + struct { + struct fd_bo *bo; + uint32_t per_fiber_size; + uint32_t per_sp_size; + } pvtmem[2]; + /* table with MESA_PRIM_COUNT+1 entries mapping MESA_PRIM_x to * DI_PT_x value to use for draw initiator. There are some * slight differences between generation. diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c index 61733aa210e..7eb3a6337b2 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c @@ -15,6 +15,7 @@ #include "util/u_string.h" #include "nir/tgsi_to_nir.h" +#include "freedreno_screen.h" #include "nir_serialize.h" #include "freedreno_context.h" @@ -609,23 +610,25 @@ ir3_update_max_tf_vtx(struct fd_context *ctx, } void -ir3_get_private_mem(struct fd_context *ctx, const struct ir3_shader_variant *so) +ir3_get_private_mem(struct fd_screen *screen, const struct ir3_shader_variant *so) { - uint32_t fibers_per_sp = ctx->screen->info->fibers_per_sp; - uint32_t num_sp_cores = ctx->screen->info->num_sp_cores; + uint32_t fibers_per_sp = screen->info->fibers_per_sp; + uint32_t num_sp_cores = screen->info->num_sp_cores; + + fd_screen_assert_locked(screen); uint32_t per_fiber_size = so->pvtmem_size; - if (per_fiber_size > ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size) { - if (ctx->pvtmem[so->pvtmem_per_wave].bo) - fd_bo_del(ctx->pvtmem[so->pvtmem_per_wave].bo); + if (per_fiber_size > screen->pvtmem[so->pvtmem_per_wave].per_fiber_size) { + if (screen->pvtmem[so->pvtmem_per_wave].bo) + fd_bo_del(screen->pvtmem[so->pvtmem_per_wave].bo); uint32_t per_sp_size = align(per_fiber_size * fibers_per_sp, 1 << 12); uint32_t total_size = per_sp_size * num_sp_cores; - ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size = per_fiber_size; - ctx->pvtmem[so->pvtmem_per_wave].per_sp_size = per_sp_size; - ctx->pvtmem[so->pvtmem_per_wave].bo = fd_bo_new( - ctx->screen->dev, total_size, FD_BO_NOMAP, "pvtmem_%s_%d", + screen->pvtmem[so->pvtmem_per_wave].per_fiber_size = per_fiber_size; + screen->pvtmem[so->pvtmem_per_wave].per_sp_size = per_sp_size; + screen->pvtmem[so->pvtmem_per_wave].bo = fd_bo_new( + screen->dev, total_size, FD_BO_NOMAP, "pvtmem_%s_%d", so->pvtmem_per_wave ? "per_wave" : "per_fiber", per_fiber_size); } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.h b/src/gallium/drivers/freedreno/ir3/ir3_gallium.h index c3f8e20e7f8..32857574dbf 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.h @@ -66,7 +66,8 @@ ir3_point_sprite(const struct ir3_shader_variant *fs, int i, void ir3_update_max_tf_vtx(struct fd_context *ctx, const struct ir3_shader_variant *v) assert_dt; -void ir3_get_private_mem(struct fd_context *ctx, +struct fd_screen; +void ir3_get_private_mem(struct fd_screen *screen, const struct ir3_shader_variant *so) assert_dt; ENDC;