freedreno/a6xx: Fix sharable cs races

For 3d draws, we have a per-ctx cache, which ensures program stateobjs
are not shared between contexts/threads.  We don't have this for compute
shaders.

Signed-off-by: Rob Clark <rob.clark@oss.qualcomm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40352>
This commit is contained in:
Rob Clark 2026-03-09 11:01:45 -07:00 committed by Marge Bot
parent d2b497c4bb
commit b91b0535d8
3 changed files with 43 additions and 30 deletions

View file

@ -9,6 +9,7 @@
#include "drm/freedreno_ringbuffer.h"
#include "pipe/p_state.h"
#include "util/simple_mtx.h"
#include "util/u_dump.h"
#include "u_tracepoints.h"
@ -25,7 +26,7 @@
/* nregs: 2 */
template <chip CHIP>
static void
cs_program_emit_local_size(struct fd_context *ctx, fd_crb &crb,
cs_program_emit_local_size(struct fd_screen *screen, fd_crb &crb,
struct ir3_shader_variant *v, uint16_t local_size[3])
{
/*
@ -34,7 +35,7 @@ cs_program_emit_local_size(struct fd_context *ctx, fd_crb &crb,
* which is always set to THREAD128.
*/
enum a6xx_threadsize thrsz = v->info.double_threadsize ? THREAD128 : THREAD64;
enum a6xx_threadsize thrsz_cs = ctx->screen->info->props
enum a6xx_threadsize thrsz_cs = screen->info->props
.supports_double_threadsize ? thrsz : THREAD128;
if (CHIP >= A7XX) {
@ -62,7 +63,7 @@ cs_program_emit_local_size(struct fd_context *ctx, fd_crb &crb,
/* nregs: 9 */
template <chip CHIP>
static void
cs_program_emit(struct fd_context *ctx, fd_crb &crb, struct ir3_shader_variant *v)
cs_program_emit(struct fd_screen *screen, fd_crb &crb, struct ir3_shader_variant *v)
assert_dt
{
crb.add(SP_UPDATE_CNTL(CHIP,
@ -97,7 +98,7 @@ cs_program_emit(struct fd_context *ctx, fd_crb &crb, struct ir3_shader_variant *
* which is always set to THREAD128.
*/
enum a6xx_threadsize thrsz = v->info.double_threadsize ? THREAD128 : THREAD64;
enum a6xx_threadsize thrsz_cs = ctx->screen->info->props
enum a6xx_threadsize thrsz_cs = screen->info->props
.supports_double_threadsize ? thrsz : THREAD128;
if (CHIP == A6XX) {
@ -112,11 +113,11 @@ cs_program_emit(struct fd_context *ctx, fd_crb &crb, struct ir3_shader_variant *
.threadsize = thrsz_cs,
));
if (!ctx->screen->info->props.supports_double_threadsize) {
if (!screen->info->props.supports_double_threadsize) {
crb.add(SP_PS_WAVE_CNTL(CHIP, .threadsize = thrsz));
}
if (ctx->screen->info->props.has_lpac) {
if (screen->info->props.has_lpac) {
crb.add(A6XX_SP_CS_WIE_CNTL_0(
.wgidconstid = work_group_id,
.wgsizeconstid = INVALID_REG,
@ -147,7 +148,7 @@ cs_program_emit(struct fd_context *ctx, fd_crb &crb, struct ir3_shader_variant *
}
if (!v->local_size_variable)
cs_program_emit_local_size<CHIP>(ctx, crb, v, v->local_size);
cs_program_emit_local_size<CHIP>(screen, crb, v, v->local_size);
}
template <chip CHIP>
@ -157,18 +158,29 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
struct fd6_compute_state *cp = (struct fd6_compute_state *)ctx->compute;
if (unlikely(!cp->v)) {
struct ir3_shader_state *hwcso = (struct ir3_shader_state *)cp->hwcso;
struct ir3_shader_key key = {};
struct fd_screen *screen = ctx->screen;
static simple_mtx_t lock = SIMPLE_MTX_INITIALIZER;
cp->v = ir3_shader_variant(ir3_get_shader(hwcso), key, false, &ctx->debug);
if (!cp->v)
return;
simple_mtx_lock(&lock);
/* check again under lock: */
if (!cp->v) {
struct ir3_shader_state *hwcso = (struct ir3_shader_state *)cp->hwcso;
struct ir3_shader_key key = {};
cp->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
fd_cs cs(cp->stateobj);
with_crb (cs, 9)
cs_program_emit<CHIP>(ctx, crb, cp->v);
fd6_emit_shader<CHIP>(ctx, cs, cp->v);
struct ir3_shader_variant *v =
ir3_shader_variant(ir3_get_shader(hwcso), key, false, &ctx->debug);
if (v) {
cp->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
fd_cs cs(cp->stateobj);
with_crb (cs, 9)
cs_program_emit<CHIP>(screen, crb, v);
fd6_emit_shader<CHIP>(screen, cs, v);
cp->v = v;
}
}
simple_mtx_unlock(&lock);
}
fd_cs cs(ctx->batch->draw);
@ -242,7 +254,7 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
if (cp->v->local_size_variable) {
uint16_t wg[] = {local_size[0], local_size[1], local_size[2]};
cs_program_emit_local_size<CHIP>(ctx, crb, cp->v, wg);
cs_program_emit_local_size<CHIP>(ctx->screen, crb, cp->v, wg);
}
crb.add(SP_CS_NDRANGE_0(CHIP,

View file

@ -43,9 +43,8 @@ struct program_builder {
template <chip CHIP>
static void
emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_variant *so)
emit_shader_regs(struct fd_screen *screen, fd_cs &cs, const struct ir3_shader_variant *so)
{
struct fd_screen *screen = ctx->screen;
fd_crb crb(cs, 14);
mesa_shader_stage type = so->type;
@ -225,7 +224,7 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari
crb.add(SP_PS_VGS_CNTL(CHIP));
break;
case MESA_SHADER_COMPUTE:
thrsz = ctx->screen->info->props.supports_double_threadsize ? thrsz : THREAD128;
thrsz = screen->info->props.supports_double_threadsize ? thrsz : THREAD128;
crb.add(A6XX_SP_CS_CNTL_0(
.halfregfootprint = so->info.max_half_reg + 1,
.fullregfootprint = so->info.max_reg + 1,
@ -258,7 +257,7 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari
template <chip CHIP>
void
fd6_emit_shader(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_variant *so)
fd6_emit_shader(struct fd_screen *screen, fd_cs &cs, const struct ir3_shader_variant *so)
{
if (!so) {
/* shader stage disabled */
@ -272,11 +271,11 @@ fd6_emit_shader(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_varia
fd_emit_string5(cs, name, strlen(name));
#endif
emit_shader_regs<CHIP>(ctx, cs, so);
emit_shader_regs<CHIP>(screen, cs, so);
if (CHIP == A6XX) {
uint32_t shader_preload_size =
MIN2(so->instrlen, ctx->screen->info->props.instr_cache_size);
MIN2(so->instrlen, screen->info->props.instr_cache_size);
fd_pkt7(cs, fd6_stage2opcode(so->type), 3)
.add(CP_LOAD_STATE6_0(
@ -1298,12 +1297,14 @@ static void
setup_stateobj(fd_cs &cs, const struct program_builder *b)
assert_dt
{
fd6_emit_shader<CHIP>(b->ctx, cs, b->vs);
fd6_emit_shader<CHIP>(b->ctx, cs, b->hs);
fd6_emit_shader<CHIP>(b->ctx, cs, b->ds);
fd6_emit_shader<CHIP>(b->ctx, cs, b->gs);
struct fd_screen *screen = b->ctx->screen;
fd6_emit_shader<CHIP>(screen, cs, b->vs);
fd6_emit_shader<CHIP>(screen, cs, b->hs);
fd6_emit_shader<CHIP>(screen, cs, b->ds);
fd6_emit_shader<CHIP>(screen, cs, b->gs);
if (!b->binning_pass)
fd6_emit_shader<CHIP>(b->ctx, cs, b->fs);
fd6_emit_shader<CHIP>(screen, cs, b->fs);
emit_linkmap<CHIP>(cs, b);

View file

@ -111,7 +111,7 @@ fd6_load_inline_uniforms_via_preamble_ldgk(const struct ir3_shader_variant *v)
}
template <chip CHIP>
void fd6_emit_shader(struct fd_context *ctx, fd_cs &cs,
void fd6_emit_shader(struct fd_screen *screen, fd_cs &cs,
const struct ir3_shader_variant *so) assert_dt;
template <chip CHIP>