freedreno/a6xx: Remove shader key from fd6_emit

Only construct the key on-demand if the PROG state is dirty.  The newly
added "virtual" PROG_KEY state is used to know when other state that the
shader key depends on changes.  Worth ~13% at drawoverhead test 0.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20572>
This commit is contained in:
Rob Clark 2023-01-02 13:36:55 -08:00 committed by Marge Bot
parent 7b734cc668
commit 1b8bed38eb
5 changed files with 80 additions and 54 deletions

View file

@ -132,7 +132,7 @@ fd6_build_tess_consts(struct fd6_emit *emit)
* size is dwords, since that's what LDG/STG use.
*/
unsigned num_vertices = emit->hs
? emit->key.patch_vertices
? ctx->patch_vertices
: emit->gs->gs.vertices_in;
uint32_t vs_params[4] = {
@ -146,7 +146,7 @@ fd6_build_tess_consts(struct fd6_emit *emit)
uint32_t hs_params[4] = {
emit->vs->output_size * num_vertices * 4, /* vs primitive stride */
emit->vs->output_size * 4, /* vs vertex stride */
emit->hs->output_size, emit->key.patch_vertices};
emit->hs->output_size, ctx->patch_vertices};
emit_stage_tess_consts(constobj, emit->hs, hs_params,
ARRAY_SIZE(hs_params));

View file

@ -151,7 +151,9 @@ setup_state_map(struct fd_context *ctx)
fd_context_add_map(ctx, FD_DIRTY_ZSA | FD_DIRTY_BLEND | FD_DIRTY_PROG,
BIT(FD6_GROUP_LRZ));
fd_context_add_map(ctx, FD_DIRTY_PROG | FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE,
BIT(FD6_GROUP_PROG));
BIT(FD6_GROUP_PROG) | BIT(FD6_GROUP_PROG_KEY));
fd_context_add_map(ctx, FD_DIRTY_RASTERIZER | FD_DIRTY_MIN_SAMPLES | FD_DIRTY_FRAMEBUFFER,
BIT(FD6_GROUP_PROG_KEY));
fd_context_add_map(ctx, FD_DIRTY_RASTERIZER, BIT(FD6_GROUP_RASTERIZER));
fd_context_add_map(ctx,
FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER_DISCARD |

View file

@ -131,22 +131,12 @@ fixup_draw_state(struct fd_context *ctx, struct fd6_emit *emit) assert_dt
}
}
static bool
fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
unsigned drawid_offset,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count_bias *draw,
unsigned index_offset) assert_dt
static const struct fd6_program_state *
get_program_state(struct fd_context *ctx, const struct pipe_draw_info *info)
assert_dt
{
struct fd6_context *fd6_ctx = fd6_context(ctx);
struct shader_info *gs_info = ir3_get_shader_info(ctx->prog.gs);
struct fd6_emit emit;
emit.ctx = ctx;
emit.info = info;
emit.indirect = indirect;
emit.draw = draw;
emit.key = (struct ir3_cache_key){
struct ir3_cache_key key = {
.vs = ctx->prog.vs,
.gs = ctx->prog.gs,
.fs = ctx->prog.fs,
@ -159,6 +149,52 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
.clip_plane_enable = ctx->rasterizer->clip_plane_enable,
.patch_vertices = ctx->patch_vertices,
};
if (info->mode == PIPE_PRIM_PATCHES) {
struct shader_info *gs_info = ir3_get_shader_info(ctx->prog.gs);
key.hs = ctx->prog.hs;
key.ds = ctx->prog.ds;
struct shader_info *ds_info = ir3_get_shader_info(key.ds);
key.key.tessellation = ir3_tess_mode(ds_info->tess._primitive_mode);
struct shader_info *fs_info = ir3_get_shader_info(key.fs);
key.key.tcs_store_primid =
BITSET_TEST(ds_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||
(gs_info && BITSET_TEST(gs_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID)) ||
(fs_info && (fs_info->inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID)));
}
if (key.gs) {
key.key.has_gs = true;
}
ir3_fixup_shader_state(&ctx->base, &key.key);
if (ctx->gen_dirty & BIT(FD6_GROUP_PROG)) {
struct ir3_program_state *s = ir3_cache_lookup(
ctx->shader_cache, &key, &ctx->debug);
fd6_ctx->prog = fd6_program_state(s);
}
return fd6_ctx->prog;
}
static bool
fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
unsigned drawid_offset,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count_bias *draw,
unsigned index_offset) assert_dt
{
struct fd6_context *fd6_ctx = fd6_context(ctx);
struct fd6_emit emit;
emit.ctx = ctx;
emit.info = info;
emit.indirect = indirect;
emit.draw = draw;
emit.rasterflat = ctx->rasterizer->flatshade;
emit.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable;
emit.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode;
@ -170,39 +206,25 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
if (!(ctx->prog.vs && ctx->prog.fs))
return false;
if (info->mode == PIPE_PRIM_PATCHES) {
emit.key.hs = ctx->prog.hs;
emit.key.ds = ctx->prog.ds;
struct shader_info *ds_info = ir3_get_shader_info(emit.key.ds);
emit.key.key.tessellation = ir3_tess_mode(ds_info->tess._primitive_mode);
if ((info->mode == PIPE_PRIM_PATCHES) || ctx->prog.gs) {
ctx->gen_dirty |= BIT(FD6_GROUP_PRIMITIVE_PARAMS);
struct shader_info *fs_info = ir3_get_shader_info(emit.key.fs);
emit.key.key.tcs_store_primid =
BITSET_TEST(ds_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||
(gs_info && BITSET_TEST(gs_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID)) ||
(fs_info && (fs_info->inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID)));
}
if (emit.key.gs) {
emit.key.key.has_gs = true;
ctx->gen_dirty |= BIT(FD6_GROUP_PRIMITIVE_PARAMS);
}
if (!(emit.key.hs || emit.key.ds || emit.key.gs || indirect))
} else if (!indirect) {
fd6_vsc_update_sizes(ctx->batch, info, draw);
}
ir3_fixup_shader_state(&ctx->base, &emit.key.key);
if (!(ctx->gen_dirty & BIT(FD6_GROUP_PROG))) {
emit.prog = fd6_ctx->prog;
/* If PROG state (which will mark PROG_KEY dirty) or any state that the
* key depends on, is dirty, then we actually need to construct the shader
* key, figure out if we need a new variant, and lookup the PROG state.
* Otherwise we can just use the previous prog state.
*/
if (unlikely(ctx->gen_dirty & BIT(FD6_GROUP_PROG_KEY))) {
emit.prog = get_program_state(ctx, info);
} else {
fd6_ctx->prog = fd6_emit_get_prog(&emit);
emit.prog = fd6_ctx->prog;
}
/* bail if compile failed: */
if (!fd6_ctx->prog)
if (!emit.prog)
return false;
fixup_draw_state(ctx, &emit);
@ -242,7 +264,7 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
struct CP_DRAW_INDX_OFFSET_0 draw0 = {
.prim_type = ctx->screen->primtypes[info->mode],
.vis_cull = USE_VISIBILITY,
.gs_enable = !!emit.key.gs,
.gs_enable = !!ctx->prog.gs,
};
if (indirect && indirect->count_from_stream_output) {
@ -255,12 +277,15 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
}
if (info->mode == PIPE_PRIM_PATCHES) {
uint32_t factor_stride = ir3_tess_factor_stride(emit.key.key.tessellation);
struct shader_info *ds_info = ir3_get_shader_info(ctx->prog.ds);
unsigned tessellation = ir3_tess_mode(ds_info->tess._primitive_mode);
uint32_t factor_stride = ir3_tess_factor_stride(tessellation);
STATIC_ASSERT(IR3_TESS_ISOLINES == TESS_ISOLINES + 1);
STATIC_ASSERT(IR3_TESS_TRIANGLES == TESS_TRIANGLES + 1);
STATIC_ASSERT(IR3_TESS_QUADS == TESS_QUADS + 1);
draw0.patch_type = emit.key.key.tessellation - 1;
draw0.patch_type = tessellation - 1;
draw0.prim_type = DI_PT_PATCHES0 + ctx->patch_vertices;
draw0.tess_enable = true;
@ -298,7 +323,6 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
ctx->last.restart_index = restart_index;
}
// TODO move fd6_emit_streamout.. I think..
if (emit.dirty_groups)
fd6_emit_3d_state(ring, &emit);

View file

@ -902,7 +902,7 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
fd6_emit_non_ring(ring, emit);
break;
default:
unreachable("bad state group");
break;
}
}

View file

@ -66,6 +66,12 @@ enum fd6_state_id {
FD6_GROUP_BLEND_COLOR,
FD6_GROUP_SO,
FD6_GROUP_IBO,
/*
* Virtual state-groups, which don't turn into a CP_SET_DRAW_STATE group
*/
FD6_GROUP_PROG_KEY, /* Set for any state which could change shader key */
FD6_GROUP_NON_GROUP, /* placeholder group for state emit in IB2, keep last */
};
@ -148,7 +154,6 @@ struct fd6_emit {
const struct pipe_draw_info *info;
const struct pipe_draw_indirect_info *indirect;
const struct pipe_draw_start_count_bias *draw;
struct ir3_cache_key key;
uint32_t dirty_groups;
uint32_t sprite_coord_enable; /* bitmask */
@ -172,11 +177,6 @@ struct fd6_emit {
static inline const struct fd6_program_state *
fd6_emit_get_prog(struct fd6_emit *emit)
{
if (!emit->prog) {
struct ir3_program_state *s = ir3_cache_lookup(
emit->ctx->shader_cache, &emit->key, &emit->ctx->debug);
emit->prog = fd6_program_state(s);
}
return emit->prog;
}