mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-03 14:18:07 +02:00
freedreno/a6xx: Fix sharable cs races
For 3d draws, we have a per-ctx cache, which ensures program stateobjs are not shared between contexts/threads. We don't have this for compute shaders. Signed-off-by: Rob Clark <rob.clark@oss.qualcomm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40352>
This commit is contained in:
parent
d2b497c4bb
commit
b91b0535d8
3 changed files with 43 additions and 30 deletions
|
|
@ -9,6 +9,7 @@
|
|||
#include "drm/freedreno_ringbuffer.h"
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/simple_mtx.h"
|
||||
#include "util/u_dump.h"
|
||||
#include "u_tracepoints.h"
|
||||
|
||||
|
|
@ -25,7 +26,7 @@
|
|||
/* nregs: 2 */
|
||||
template <chip CHIP>
|
||||
static void
|
||||
cs_program_emit_local_size(struct fd_context *ctx, fd_crb &crb,
|
||||
cs_program_emit_local_size(struct fd_screen *screen, fd_crb &crb,
|
||||
struct ir3_shader_variant *v, uint16_t local_size[3])
|
||||
{
|
||||
/*
|
||||
|
|
@ -34,7 +35,7 @@ cs_program_emit_local_size(struct fd_context *ctx, fd_crb &crb,
|
|||
* which is always set to THREAD128.
|
||||
*/
|
||||
enum a6xx_threadsize thrsz = v->info.double_threadsize ? THREAD128 : THREAD64;
|
||||
enum a6xx_threadsize thrsz_cs = ctx->screen->info->props
|
||||
enum a6xx_threadsize thrsz_cs = screen->info->props
|
||||
.supports_double_threadsize ? thrsz : THREAD128;
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
|
|
@ -62,7 +63,7 @@ cs_program_emit_local_size(struct fd_context *ctx, fd_crb &crb,
|
|||
/* nregs: 9 */
|
||||
template <chip CHIP>
|
||||
static void
|
||||
cs_program_emit(struct fd_context *ctx, fd_crb &crb, struct ir3_shader_variant *v)
|
||||
cs_program_emit(struct fd_screen *screen, fd_crb &crb, struct ir3_shader_variant *v)
|
||||
assert_dt
|
||||
{
|
||||
crb.add(SP_UPDATE_CNTL(CHIP,
|
||||
|
|
@ -97,7 +98,7 @@ cs_program_emit(struct fd_context *ctx, fd_crb &crb, struct ir3_shader_variant *
|
|||
* which is always set to THREAD128.
|
||||
*/
|
||||
enum a6xx_threadsize thrsz = v->info.double_threadsize ? THREAD128 : THREAD64;
|
||||
enum a6xx_threadsize thrsz_cs = ctx->screen->info->props
|
||||
enum a6xx_threadsize thrsz_cs = screen->info->props
|
||||
.supports_double_threadsize ? thrsz : THREAD128;
|
||||
|
||||
if (CHIP == A6XX) {
|
||||
|
|
@ -112,11 +113,11 @@ cs_program_emit(struct fd_context *ctx, fd_crb &crb, struct ir3_shader_variant *
|
|||
.threadsize = thrsz_cs,
|
||||
));
|
||||
|
||||
if (!ctx->screen->info->props.supports_double_threadsize) {
|
||||
if (!screen->info->props.supports_double_threadsize) {
|
||||
crb.add(SP_PS_WAVE_CNTL(CHIP, .threadsize = thrsz));
|
||||
}
|
||||
|
||||
if (ctx->screen->info->props.has_lpac) {
|
||||
if (screen->info->props.has_lpac) {
|
||||
crb.add(A6XX_SP_CS_WIE_CNTL_0(
|
||||
.wgidconstid = work_group_id,
|
||||
.wgsizeconstid = INVALID_REG,
|
||||
|
|
@ -147,7 +148,7 @@ cs_program_emit(struct fd_context *ctx, fd_crb &crb, struct ir3_shader_variant *
|
|||
}
|
||||
|
||||
if (!v->local_size_variable)
|
||||
cs_program_emit_local_size<CHIP>(ctx, crb, v, v->local_size);
|
||||
cs_program_emit_local_size<CHIP>(screen, crb, v, v->local_size);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
@ -157,18 +158,29 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
|
|||
struct fd6_compute_state *cp = (struct fd6_compute_state *)ctx->compute;
|
||||
|
||||
if (unlikely(!cp->v)) {
|
||||
struct ir3_shader_state *hwcso = (struct ir3_shader_state *)cp->hwcso;
|
||||
struct ir3_shader_key key = {};
|
||||
struct fd_screen *screen = ctx->screen;
|
||||
static simple_mtx_t lock = SIMPLE_MTX_INITIALIZER;
|
||||
|
||||
cp->v = ir3_shader_variant(ir3_get_shader(hwcso), key, false, &ctx->debug);
|
||||
if (!cp->v)
|
||||
return;
|
||||
simple_mtx_lock(&lock);
|
||||
/* check again under lock: */
|
||||
if (!cp->v) {
|
||||
struct ir3_shader_state *hwcso = (struct ir3_shader_state *)cp->hwcso;
|
||||
struct ir3_shader_key key = {};
|
||||
|
||||
cp->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
|
||||
fd_cs cs(cp->stateobj);
|
||||
with_crb (cs, 9)
|
||||
cs_program_emit<CHIP>(ctx, crb, cp->v);
|
||||
fd6_emit_shader<CHIP>(ctx, cs, cp->v);
|
||||
struct ir3_shader_variant *v =
|
||||
ir3_shader_variant(ir3_get_shader(hwcso), key, false, &ctx->debug);
|
||||
if (v) {
|
||||
cp->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
|
||||
fd_cs cs(cp->stateobj);
|
||||
with_crb (cs, 9)
|
||||
cs_program_emit<CHIP>(screen, crb, v);
|
||||
fd6_emit_shader<CHIP>(screen, cs, v);
|
||||
|
||||
cp->v = v;
|
||||
}
|
||||
}
|
||||
|
||||
simple_mtx_unlock(&lock);
|
||||
}
|
||||
|
||||
fd_cs cs(ctx->batch->draw);
|
||||
|
|
@ -242,7 +254,7 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
|
|||
|
||||
if (cp->v->local_size_variable) {
|
||||
uint16_t wg[] = {local_size[0], local_size[1], local_size[2]};
|
||||
cs_program_emit_local_size<CHIP>(ctx, crb, cp->v, wg);
|
||||
cs_program_emit_local_size<CHIP>(ctx->screen, crb, cp->v, wg);
|
||||
}
|
||||
|
||||
crb.add(SP_CS_NDRANGE_0(CHIP,
|
||||
|
|
|
|||
|
|
@ -43,9 +43,8 @@ struct program_builder {
|
|||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_variant *so)
|
||||
emit_shader_regs(struct fd_screen *screen, fd_cs &cs, const struct ir3_shader_variant *so)
|
||||
{
|
||||
struct fd_screen *screen = ctx->screen;
|
||||
fd_crb crb(cs, 14);
|
||||
|
||||
mesa_shader_stage type = so->type;
|
||||
|
|
@ -225,7 +224,7 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari
|
|||
crb.add(SP_PS_VGS_CNTL(CHIP));
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
thrsz = ctx->screen->info->props.supports_double_threadsize ? thrsz : THREAD128;
|
||||
thrsz = screen->info->props.supports_double_threadsize ? thrsz : THREAD128;
|
||||
crb.add(A6XX_SP_CS_CNTL_0(
|
||||
.halfregfootprint = so->info.max_half_reg + 1,
|
||||
.fullregfootprint = so->info.max_reg + 1,
|
||||
|
|
@ -258,7 +257,7 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari
|
|||
|
||||
template <chip CHIP>
|
||||
void
|
||||
fd6_emit_shader(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_variant *so)
|
||||
fd6_emit_shader(struct fd_screen *screen, fd_cs &cs, const struct ir3_shader_variant *so)
|
||||
{
|
||||
if (!so) {
|
||||
/* shader stage disabled */
|
||||
|
|
@ -272,11 +271,11 @@ fd6_emit_shader(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_varia
|
|||
fd_emit_string5(cs, name, strlen(name));
|
||||
#endif
|
||||
|
||||
emit_shader_regs<CHIP>(ctx, cs, so);
|
||||
emit_shader_regs<CHIP>(screen, cs, so);
|
||||
|
||||
if (CHIP == A6XX) {
|
||||
uint32_t shader_preload_size =
|
||||
MIN2(so->instrlen, ctx->screen->info->props.instr_cache_size);
|
||||
MIN2(so->instrlen, screen->info->props.instr_cache_size);
|
||||
|
||||
fd_pkt7(cs, fd6_stage2opcode(so->type), 3)
|
||||
.add(CP_LOAD_STATE6_0(
|
||||
|
|
@ -1298,12 +1297,14 @@ static void
|
|||
setup_stateobj(fd_cs &cs, const struct program_builder *b)
|
||||
assert_dt
|
||||
{
|
||||
fd6_emit_shader<CHIP>(b->ctx, cs, b->vs);
|
||||
fd6_emit_shader<CHIP>(b->ctx, cs, b->hs);
|
||||
fd6_emit_shader<CHIP>(b->ctx, cs, b->ds);
|
||||
fd6_emit_shader<CHIP>(b->ctx, cs, b->gs);
|
||||
struct fd_screen *screen = b->ctx->screen;
|
||||
|
||||
fd6_emit_shader<CHIP>(screen, cs, b->vs);
|
||||
fd6_emit_shader<CHIP>(screen, cs, b->hs);
|
||||
fd6_emit_shader<CHIP>(screen, cs, b->ds);
|
||||
fd6_emit_shader<CHIP>(screen, cs, b->gs);
|
||||
if (!b->binning_pass)
|
||||
fd6_emit_shader<CHIP>(b->ctx, cs, b->fs);
|
||||
fd6_emit_shader<CHIP>(screen, cs, b->fs);
|
||||
|
||||
emit_linkmap<CHIP>(cs, b);
|
||||
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ fd6_load_inline_uniforms_via_preamble_ldgk(const struct ir3_shader_variant *v)
|
|||
}
|
||||
|
||||
template <chip CHIP>
|
||||
void fd6_emit_shader(struct fd_context *ctx, fd_cs &cs,
|
||||
void fd6_emit_shader(struct fd_screen *screen, fd_cs &cs,
|
||||
const struct ir3_shader_variant *so) assert_dt;
|
||||
|
||||
template <chip CHIP>
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue