freedreno: Move pvtmem to screen

Since shader CSOs can be shared across contexts, we need the
corresponding stateobj to be shareable across contexts.  Otherwise
different ctxs could be racing with each other to build the stateobj.

Prep for next patch.

Signed-off-by: Rob Clark <rob.clark@oss.qualcomm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40352>
This commit is contained in:
Rob Clark 2026-03-09 10:48:45 -07:00 committed by Marge Bot
parent 9892e333d0
commit d2b497c4bb
8 changed files with 66 additions and 52 deletions

View file

@ -68,23 +68,28 @@ fd5_emit_shader_obj(struct fd_context *ctx, struct fd_ringbuffer *ring,
const struct ir3_shader_variant *so,
uint32_t shader_obj_reg)
{
ir3_get_private_mem(ctx, so);
struct fd_screen *screen = ctx->screen;
fd_screen_lock(screen);
ir3_get_private_mem(screen, so);
OUT_PKT4(ring, shader_obj_reg, 6);
OUT_RELOC(ring, so->bo, 0, 0, 0); /* SP_VS_OBJ_START */
uint32_t per_sp_size = ctx->pvtmem[so->pvtmem_per_wave].per_sp_size;
uint32_t per_sp_size = screen->pvtmem[so->pvtmem_per_wave].per_sp_size;
OUT_RING(ring, A5XX_SP_VS_PVT_MEM_PARAM_MEMSIZEPERITEM(
ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size) |
screen->pvtmem[so->pvtmem_per_wave].per_fiber_size) |
A5XX_SP_VS_PVT_MEM_PARAM_HWSTACKOFFSET(per_sp_size));
if (so->pvtmem_size > 0) { /* SP_xS_PVT_MEM_ADDR */
OUT_RELOC(ring, ctx->pvtmem[so->pvtmem_per_wave].bo, 0, 0, 0);
fd_ringbuffer_attach_bo(ring, ctx->pvtmem[so->pvtmem_per_wave].bo);
OUT_RELOC(ring, screen->pvtmem[so->pvtmem_per_wave].bo, 0, 0, 0);
fd_ringbuffer_attach_bo(ring, screen->pvtmem[so->pvtmem_per_wave].bo);
} else {
OUT_RING(ring, 0);
OUT_RING(ring, 0);
}
OUT_RING(ring, A5XX_SP_VS_PVT_MEM_SIZE_TOTALPVTMEMSIZE(per_sp_size));
fd_screen_unlock(screen);
}
/* TODO maybe some of this we could pre-compute once rather than having

View file

@ -45,6 +45,7 @@ template <chip CHIP>
static void
emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_variant *so)
{
struct fd_screen *screen = ctx->screen;
fd_crb crb(cs, 14);
mesa_shader_stage type = so->type;
@ -85,13 +86,15 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari
enum a6xx_threadsize thrsz =
so->info.double_threadsize ? THREAD128 : THREAD64;
ir3_get_private_mem(ctx, so);
fd_screen_lock(screen);
ir3_get_private_mem(screen, so);
uint32_t per_sp_size = ctx->pvtmem[so->pvtmem_per_wave].per_sp_size;
auto pvtmem = &screen->pvtmem[so->pvtmem_per_wave];
uint32_t per_sp_size = pvtmem->per_sp_size;
struct fd_bo *pvtmem_bo = NULL;
if (so->pvtmem_size > 0) { /* SP_xS_PVT_MEM_ADDR */
pvtmem_bo = ctx->pvtmem[so->pvtmem_per_wave].bo;
pvtmem_bo = pvtmem->bo;
crb.attach_bo(pvtmem_bo);
}
@ -110,7 +113,7 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari
crb.add(A6XX_SP_VS_PROGRAM_COUNTER_OFFSET());
crb.add(A6XX_SP_VS_BASE(so->bo));
crb.add(A6XX_SP_VS_PVT_MEM_PARAM(
.memsizeperitem = ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size,
.memsizeperitem = pvtmem->per_fiber_size,
));
crb.add(A6XX_SP_VS_PVT_MEM_BASE(pvtmem_bo));
crb.add(A6XX_SP_VS_PVT_MEM_SIZE(
@ -132,7 +135,7 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari
crb.add(A6XX_SP_HS_PROGRAM_COUNTER_OFFSET());
crb.add(A6XX_SP_HS_BASE(so->bo));
crb.add(A6XX_SP_HS_PVT_MEM_PARAM(
.memsizeperitem = ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size,
.memsizeperitem = pvtmem->per_fiber_size,
));
crb.add(A6XX_SP_HS_PVT_MEM_BASE(pvtmem_bo));
crb.add(A6XX_SP_HS_PVT_MEM_SIZE(
@ -154,7 +157,7 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari
crb.add(A6XX_SP_DS_PROGRAM_COUNTER_OFFSET());
crb.add(A6XX_SP_DS_BASE(so->bo));
crb.add(A6XX_SP_DS_PVT_MEM_PARAM(
.memsizeperitem = ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size,
.memsizeperitem = pvtmem->per_fiber_size,
));
crb.add(A6XX_SP_DS_PVT_MEM_BASE(pvtmem_bo));
crb.add(A6XX_SP_DS_PVT_MEM_SIZE(
@ -176,7 +179,7 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari
crb.add(A6XX_SP_GS_PROGRAM_COUNTER_OFFSET());
crb.add(A6XX_SP_GS_BASE(so->bo));
crb.add(A6XX_SP_GS_PVT_MEM_PARAM(
.memsizeperitem = ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size,
.memsizeperitem = pvtmem->per_fiber_size,
));
crb.add(A6XX_SP_GS_PVT_MEM_BASE(pvtmem_bo));
crb.add(A6XX_SP_GS_PVT_MEM_SIZE(
@ -210,7 +213,7 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari
crb.add(A6XX_SP_PS_PROGRAM_COUNTER_OFFSET());
crb.add(A6XX_SP_PS_BASE(so->bo));
crb.add(A6XX_SP_PS_PVT_MEM_PARAM(
.memsizeperitem = ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size,
.memsizeperitem = pvtmem->per_fiber_size,
));
crb.add(A6XX_SP_PS_PVT_MEM_BASE(pvtmem_bo));
crb.add(A6XX_SP_PS_PVT_MEM_SIZE(
@ -235,7 +238,7 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari
crb.add(A6XX_SP_CS_PROGRAM_COUNTER_OFFSET());
crb.add(A6XX_SP_CS_BASE(so->bo));
crb.add(A6XX_SP_CS_PVT_MEM_PARAM(
.memsizeperitem = ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size,
.memsizeperitem = pvtmem->per_fiber_size,
));
crb.add(A6XX_SP_CS_PVT_MEM_BASE(pvtmem_bo));
crb.add(A6XX_SP_CS_PVT_MEM_SIZE(
@ -249,6 +252,8 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari
default:
UNREACHABLE("bad shader stage");
}
fd_screen_unlock(screen);
}
template <chip CHIP>

View file

@ -401,11 +401,6 @@ fd_context_destroy(struct pipe_context *pctx)
if (ctx->in_fence_fd != -1)
close(ctx->in_fence_fd);
for (i = 0; i < ARRAY_SIZE(ctx->pvtmem); i++) {
if (ctx->pvtmem[i].bo)
fd_bo_del(ctx->pvtmem[i].bo);
}
util_copy_framebuffer_state(&ctx->framebuffer, NULL);
fd_batch_reference(&ctx->batch, NULL); /* unref current batch */

View file

@ -506,28 +506,6 @@ struct fd_context {
bool cond_cond dt; /* inverted rendering condition */
uint cond_mode dt;
/* Private memory is a memory space where each fiber gets its own piece of
* memory, in addition to registers. It is backed by a buffer which needs
* to be large enough to hold the contents of every possible wavefront in
* every core of the GPU. Because it allocates space via the internal
* wavefront ID which is shared between all currently executing shaders,
* the same buffer can be reused by all shaders, as long as all shaders
* sharing the same buffer use the exact same configuration. There are two
* inputs to the configuration, the amount of per-fiber space and whether
* to use the newer per-wave or older per-fiber layout. We only ever
* increase the size, and shaders with a smaller size requirement simply
* use the larger existing buffer, so that we only need to keep track of
* one buffer and its size, but we still need to keep track of per-fiber
* and per-wave buffers separately so that we never use the same buffer
* for different layouts. pvtmem[0] is for per-fiber, and pvtmem[1] is for
* per-wave.
*/
struct {
struct fd_bo *bo;
uint32_t per_fiber_size;
uint32_t per_sp_size;
} pvtmem[2] dt;
/* maps per-shader-stage state plus variant key to hw
* program stateobj:
*/

View file

@ -149,6 +149,11 @@ fd_screen_destroy(struct pipe_screen *pscreen)
if (screen->tess_bo)
fd_bo_del(screen->tess_bo);
for (int i = 0; i < ARRAY_SIZE(screen->pvtmem); i++) {
if (screen->pvtmem[i].bo)
fd_bo_del(screen->pvtmem[i].bo);
}
if (screen->pipe)
fd_pipe_del(screen->pipe);

View file

@ -155,6 +155,28 @@ struct fd_screen {
struct fd_bo *tess_bo;
/* Private memory is a memory space where each fiber gets its own piece of
* memory, in addition to registers. It is backed by a buffer which needs
* to be large enough to hold the contents of every possible wavefront in
* every core of the GPU. Because it allocates space via the internal
* wavefront ID which is shared between all currently executing shaders,
* the same buffer can be reused by all shaders, as long as all shaders
* sharing the same buffer use the exact same configuration. There are two
* inputs to the configuration, the amount of per-fiber space and whether
* to use the newer per-wave or older per-fiber layout. We only ever
* increase the size, and shaders with a smaller size requirement simply
* use the larger existing buffer, so that we only need to keep track of
* one buffer and its size, but we still need to keep track of per-fiber
* and per-wave buffers separately so that we never use the same buffer
* for different layouts. pvtmem[0] is for per-fiber, and pvtmem[1] is for
* per-wave.
*/
struct {
struct fd_bo *bo;
uint32_t per_fiber_size;
uint32_t per_sp_size;
} pvtmem[2];
/* table with MESA_PRIM_COUNT+1 entries mapping MESA_PRIM_x to
* DI_PT_x value to use for draw initiator. There are some
* slight differences between generation.

View file

@ -15,6 +15,7 @@
#include "util/u_string.h"
#include "nir/tgsi_to_nir.h"
#include "freedreno_screen.h"
#include "nir_serialize.h"
#include "freedreno_context.h"
@ -609,23 +610,25 @@ ir3_update_max_tf_vtx(struct fd_context *ctx,
}
void
ir3_get_private_mem(struct fd_context *ctx, const struct ir3_shader_variant *so)
ir3_get_private_mem(struct fd_screen *screen, const struct ir3_shader_variant *so)
{
uint32_t fibers_per_sp = ctx->screen->info->fibers_per_sp;
uint32_t num_sp_cores = ctx->screen->info->num_sp_cores;
uint32_t fibers_per_sp = screen->info->fibers_per_sp;
uint32_t num_sp_cores = screen->info->num_sp_cores;
fd_screen_assert_locked(screen);
uint32_t per_fiber_size = so->pvtmem_size;
if (per_fiber_size > ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size) {
if (ctx->pvtmem[so->pvtmem_per_wave].bo)
fd_bo_del(ctx->pvtmem[so->pvtmem_per_wave].bo);
if (per_fiber_size > screen->pvtmem[so->pvtmem_per_wave].per_fiber_size) {
if (screen->pvtmem[so->pvtmem_per_wave].bo)
fd_bo_del(screen->pvtmem[so->pvtmem_per_wave].bo);
uint32_t per_sp_size = align(per_fiber_size * fibers_per_sp, 1 << 12);
uint32_t total_size = per_sp_size * num_sp_cores;
ctx->pvtmem[so->pvtmem_per_wave].per_fiber_size = per_fiber_size;
ctx->pvtmem[so->pvtmem_per_wave].per_sp_size = per_sp_size;
ctx->pvtmem[so->pvtmem_per_wave].bo = fd_bo_new(
ctx->screen->dev, total_size, FD_BO_NOMAP, "pvtmem_%s_%d",
screen->pvtmem[so->pvtmem_per_wave].per_fiber_size = per_fiber_size;
screen->pvtmem[so->pvtmem_per_wave].per_sp_size = per_sp_size;
screen->pvtmem[so->pvtmem_per_wave].bo = fd_bo_new(
screen->dev, total_size, FD_BO_NOMAP, "pvtmem_%s_%d",
so->pvtmem_per_wave ? "per_wave" : "per_fiber", per_fiber_size);
}
}

View file

@ -66,7 +66,8 @@ ir3_point_sprite(const struct ir3_shader_variant *fs, int i,
void ir3_update_max_tf_vtx(struct fd_context *ctx,
const struct ir3_shader_variant *v) assert_dt;
void ir3_get_private_mem(struct fd_context *ctx,
struct fd_screen;
void ir3_get_private_mem(struct fd_screen *screen,
const struct ir3_shader_variant *so) assert_dt;
ENDC;