freedreno/a6xx: Template specialization for pipeline type

There are a bunch of extra steps for draws that involve GS and/or tess
stages.  But we can use template specialization to skip all that when
there is no GS/tess shader stage bound.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23149>
This commit is contained in:
Rob Clark 2023-05-18 19:45:03 -07:00 committed by Marge Bot
parent 778cdb156e
commit f4fc45d63d
7 changed files with 121 additions and 65 deletions

View file

@ -258,6 +258,7 @@ emit_user_consts(const struct ir3_shader_variant *v,
fd6_emit_ubos(v, ring, constbuf);
}
template <fd6_pipeline_type PIPELINE>
struct fd_ringbuffer *
fd6_build_user_consts(struct fd6_emit *emit)
{
@ -267,21 +268,25 @@ fd6_build_user_consts(struct fd6_emit *emit)
struct fd_ringbuffer *constobj =
fd_submit_new_ringbuffer(ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING);
/* TODO would be nice to templatize the variants (ie. HAS_GS and HAS_TESS) */
emit_user_consts(emit->vs, constobj, &ctx->constbuf[PIPE_SHADER_VERTEX]);
if (emit->hs) {
emit_user_consts(emit->hs, constobj, &ctx->constbuf[PIPE_SHADER_TESS_CTRL]);
emit_user_consts(emit->ds, constobj, &ctx->constbuf[PIPE_SHADER_TESS_EVAL]);
}
if (emit->gs) {
emit_user_consts(emit->gs, constobj, &ctx->constbuf[PIPE_SHADER_GEOMETRY]);
if (PIPELINE == HAS_TESS_GS) {
if (emit->hs) {
emit_user_consts(emit->hs, constobj, &ctx->constbuf[PIPE_SHADER_TESS_CTRL]);
emit_user_consts(emit->ds, constobj, &ctx->constbuf[PIPE_SHADER_TESS_EVAL]);
}
if (emit->gs) {
emit_user_consts(emit->gs, constobj, &ctx->constbuf[PIPE_SHADER_GEOMETRY]);
}
}
emit_user_consts(emit->fs, constobj, &ctx->constbuf[PIPE_SHADER_FRAGMENT]);
return constobj;
}
template struct fd_ringbuffer * fd6_build_user_consts<HAS_TESS_GS>(struct fd6_emit *emit);
template struct fd_ringbuffer * fd6_build_user_consts<NO_TESS_GS>(struct fd6_emit *emit);
template <fd6_pipeline_type PIPELINE>
struct fd_ringbuffer *
fd6_build_driver_params(struct fd6_emit *emit)
{
@ -303,18 +308,20 @@ fd6_build_driver_params(struct fd6_emit *emit)
emit->indirect, emit->draw, emit->draw_id);
}
if (emit->gs && emit->gs->need_driver_params) {
ir3_emit_driver_params(emit->gs, dpconstobj, ctx, emit->info,
emit->indirect, emit->draw, 0);
}
if (PIPELINE == HAS_TESS_GS) {
if (emit->gs && emit->gs->need_driver_params) {
ir3_emit_driver_params(emit->gs, dpconstobj, ctx, emit->info,
emit->indirect, emit->draw, 0);
}
if (emit->hs && emit->hs->need_driver_params) {
ir3_emit_hs_driver_params(emit->hs, dpconstobj, ctx);
}
if (emit->hs && emit->hs->need_driver_params) {
ir3_emit_hs_driver_params(emit->hs, dpconstobj, ctx);
}
if (emit->ds && emit->ds->need_driver_params) {
ir3_emit_driver_params(emit->ds, dpconstobj, ctx, emit->info,
emit->indirect, emit->draw, 0);
if (emit->ds && emit->ds->need_driver_params) {
ir3_emit_driver_params(emit->ds, dpconstobj, ctx, emit->info,
emit->indirect, emit->draw, 0);
}
}
fd6_ctx->has_dp_state = true;
@ -322,6 +329,9 @@ fd6_build_driver_params(struct fd6_emit *emit)
return dpconstobj;
}
template struct fd_ringbuffer * fd6_build_driver_params<HAS_TESS_GS>(struct fd6_emit *emit);
template struct fd_ringbuffer * fd6_build_driver_params<NO_TESS_GS>(struct fd6_emit *emit);
void
fd6_emit_cs_driver_params(struct fd_context *ctx,
struct fd_ringbuffer *ring,

View file

@ -27,11 +27,13 @@
#include "fd6_emit.h"
BEGINC;
struct fd_ringbuffer *fd6_build_tess_consts(struct fd6_emit *emit) assert_dt;
unsigned fd6_user_consts_cmdstream_size(struct ir3_shader_variant *v);
template <fd6_pipeline_type PIPELINE>
struct fd_ringbuffer *fd6_build_user_consts(struct fd6_emit *emit) assert_dt;
template <fd6_pipeline_type PIPELINE>
struct fd_ringbuffer *
fd6_build_driver_params(struct fd6_emit *emit) assert_dt;
@ -50,6 +52,4 @@ void fd6_emit_link_map(struct fd_screen *screen,
const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring) assert_dt;
ENDC;
#endif /* FD6_CONST_H */

View file

@ -197,6 +197,7 @@ fixup_draw_state(struct fd_context *ctx, struct fd6_emit *emit) assert_dt
}
}
template <fd6_pipeline_type PIPELINE>
static const struct fd6_program_state *
get_program_state(struct fd_context *ctx, const struct pipe_draw_info *info)
assert_dt
@ -218,25 +219,27 @@ get_program_state(struct fd_context *ctx, const struct pipe_draw_info *info)
key.key.msaa = (ctx->framebuffer.samples > 1);
key.key.rasterflat = ctx->rasterizer->flatshade;
if (info->mode == PIPE_PRIM_PATCHES) {
struct shader_info *gs_info =
ir3_get_shader_info((struct ir3_shader_state *)ctx->prog.gs);
if (PIPELINE == HAS_TESS_GS) {
if (info->mode == PIPE_PRIM_PATCHES) {
struct shader_info *gs_info =
ir3_get_shader_info((struct ir3_shader_state *)ctx->prog.gs);
key.hs = (struct ir3_shader_state *)ctx->prog.hs;
key.ds = (struct ir3_shader_state *)ctx->prog.ds;
key.hs = (struct ir3_shader_state *)ctx->prog.hs;
key.ds = (struct ir3_shader_state *)ctx->prog.ds;
struct shader_info *ds_info = ir3_get_shader_info(key.ds);
key.key.tessellation = ir3_tess_mode(ds_info->tess._primitive_mode);
struct shader_info *ds_info = ir3_get_shader_info(key.ds);
key.key.tessellation = ir3_tess_mode(ds_info->tess._primitive_mode);
struct shader_info *fs_info = ir3_get_shader_info(key.fs);
key.key.tcs_store_primid =
BITSET_TEST(ds_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||
(gs_info && BITSET_TEST(gs_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID)) ||
(fs_info && (fs_info->inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID)));
}
struct shader_info *fs_info = ir3_get_shader_info(key.fs);
key.key.tcs_store_primid =
BITSET_TEST(ds_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||
(gs_info && BITSET_TEST(gs_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID)) ||
(fs_info && (fs_info->inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID)));
}
if (key.gs) {
key.key.has_gs = true;
if (key.gs) {
key.key.has_gs = true;
}
}
ir3_fixup_shader_state(&ctx->base, &key.key);
@ -267,7 +270,7 @@ flush_streamout(struct fd_context *ctx, struct fd6_emit *emit)
}
}
template <chip CHIP, draw_type DRAW>
template <chip CHIP, fd6_pipeline_type PIPELINE, draw_type DRAW>
static void
draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
unsigned drawid_offset,
@ -296,9 +299,13 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
if (!(ctx->prog.vs && ctx->prog.fs))
return;
if ((info->mode == PIPE_PRIM_PATCHES) || ctx->prog.gs) {
ctx->gen_dirty |= BIT(FD6_GROUP_PRIMITIVE_PARAMS);
} else if (!is_indirect(DRAW)) {
if (PIPELINE == HAS_TESS_GS) {
if ((info->mode == PIPE_PRIM_PATCHES) || ctx->prog.gs) {
ctx->gen_dirty |= BIT(FD6_GROUP_PRIMITIVE_PARAMS);
}
}
if ((PIPELINE == NO_TESS_GS) && !is_indirect(DRAW)) {
fd6_vsc_update_sizes(ctx->batch, info, &draws[0]);
}
@ -308,7 +315,7 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
* Otherwise we can just use the previous prog state.
*/
if (unlikely(ctx->gen_dirty & BIT(FD6_GROUP_PROG_KEY))) {
emit.prog = get_program_state(ctx, info);
emit.prog = get_program_state<PIPELINE>(ctx, info);
} else {
emit.prog = fd6_ctx->prog;
}
@ -323,9 +330,11 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
emit.dirty_groups = ctx->gen_dirty;
emit.vs = fd6_emit_get_prog(&emit)->vs;
emit.hs = fd6_emit_get_prog(&emit)->hs;
emit.ds = fd6_emit_get_prog(&emit)->ds;
emit.gs = fd6_emit_get_prog(&emit)->gs;
if (PIPELINE == HAS_TESS_GS) {
emit.hs = fd6_emit_get_prog(&emit)->hs;
emit.ds = fd6_emit_get_prog(&emit)->ds;
emit.gs = fd6_emit_get_prog(&emit)->gs;
}
emit.fs = fd6_emit_get_prog(&emit)->fs;
if (emit.prog->num_driver_params || fd6_ctx->has_dp_state) {
@ -364,7 +373,7 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
draw0.source_select = DI_SRC_SEL_AUTO_INDEX;
}
if (info->mode == PIPE_PRIM_PATCHES) {
if ((PIPELINE == HAS_TESS_GS) && (info->mode == PIPE_PRIM_PATCHES)) {
struct shader_info *ds_info =
ir3_get_shader_info((struct ir3_shader_state *)ctx->prog.ds);
unsigned tessellation = ir3_tess_mode(ds_info->tess._primitive_mode);
@ -413,7 +422,7 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
}
if (emit.dirty_groups)
fd6_emit_3d_state<CHIP>(ring, &emit);
fd6_emit_3d_state<CHIP, PIPELINE>(ring, &emit);
/* All known firmware versions do not wait for WFI's with CP_DRAW_AUTO.
* Plus, for the common case where the counter buffer is written by
@ -489,7 +498,7 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
emit.state.num_groups = 0;
emit.draw = &draws[i];
emit.draw_id = info->increment_draw_id ? i : 0;
fd6_emit_3d_state<CHIP>(ring, &emit);
fd6_emit_3d_state<CHIP, PIPELINE>(ring, &emit);
}
assert(!index_offset); /* handled by util_draw_multi() */
@ -509,7 +518,7 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
fd_context_all_clean(ctx);
}
template <chip CHIP>
template <chip CHIP, fd6_pipeline_type PIPELINE>
static void
fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
unsigned drawid_offset,
@ -522,30 +531,44 @@ fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
/* Non-indirect case is where we are more likely to see a high draw rate: */
if (likely(!indirect)) {
if (info->index_size) {
draw_vbos<CHIP, DRAW_DIRECT_OP_INDEXED>(
draw_vbos<CHIP, PIPELINE, DRAW_DIRECT_OP_INDEXED>(
ctx, info, drawid_offset, NULL, draws, num_draws, index_offset);
} else {
draw_vbos<CHIP, DRAW_DIRECT_OP_NORMAL>(
draw_vbos<CHIP, PIPELINE, DRAW_DIRECT_OP_NORMAL>(
ctx, info, drawid_offset, NULL, draws, num_draws, index_offset);
}
} else if (indirect->count_from_stream_output) {
draw_vbos<CHIP, DRAW_INDIRECT_OP_XFB>(
draw_vbos<CHIP, PIPELINE, DRAW_INDIRECT_OP_XFB>(
ctx, info, drawid_offset, indirect, draws, num_draws, index_offset);
} else if (indirect->indirect_draw_count && info->index_size) {
draw_vbos<CHIP, DRAW_INDIRECT_OP_INDIRECT_COUNT_INDEXED>(
draw_vbos<CHIP, PIPELINE, DRAW_INDIRECT_OP_INDIRECT_COUNT_INDEXED>(
ctx, info, drawid_offset, indirect, draws, num_draws, index_offset);
} else if (indirect->indirect_draw_count) {
draw_vbos<CHIP, DRAW_INDIRECT_OP_INDIRECT_COUNT>(
draw_vbos<CHIP, PIPELINE, DRAW_INDIRECT_OP_INDIRECT_COUNT>(
ctx, info, drawid_offset, indirect, draws, num_draws, index_offset);
} else if (info->index_size) {
draw_vbos<CHIP, DRAW_INDIRECT_OP_INDEXED>(
draw_vbos<CHIP, PIPELINE, DRAW_INDIRECT_OP_INDEXED>(
ctx, info, drawid_offset, indirect, draws, num_draws, index_offset);
} else {
draw_vbos<CHIP, DRAW_INDIRECT_OP_NORMAL>(
draw_vbos<CHIP, PIPELINE, DRAW_INDIRECT_OP_NORMAL>(
ctx, info, drawid_offset, indirect, draws, num_draws, index_offset);
}
}
template <chip CHIP>
static void
fd6_update_draw(struct fd_context *ctx)
{
const uint32_t gs_tess_stages = BIT(MESA_SHADER_TESS_CTRL) |
BIT(MESA_SHADER_TESS_EVAL) | BIT(MESA_SHADER_GEOMETRY);
if (ctx->bound_shader_stages & gs_tess_stages) {
ctx->draw_vbos = fd6_draw_vbos<CHIP, HAS_TESS_GS>;
} else {
ctx->draw_vbos = fd6_draw_vbos<CHIP, NO_TESS_GS>;
}
}
static bool
do_lrz_clear(struct fd_context *ctx, enum fd_buffer_mask buffers)
{
@ -638,7 +661,8 @@ fd6_draw_init(struct pipe_context *pctx)
{
struct fd_context *ctx = fd_context(pctx);
ctx->clear = fd6_clear;
ctx->draw_vbos = fd6_draw_vbos<CHIP>;
ctx->update_draw = fd6_update_draw<CHIP>;
fd6_update_draw<CHIP>(ctx);
}
/* Teach the compiler about needed variants: */

View file

@ -535,7 +535,7 @@ build_prim_mode(struct fd6_emit *emit, struct fd_context *ctx, bool gmem)
return ring;
}
template <chip CHIP>
template <chip CHIP, fd6_pipeline_type PIPELINE>
void
fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
{
@ -634,16 +634,18 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
fd6_state_take_group(&emit->state, state, FD6_GROUP_FS_BINDLESS);
break;
case FD6_GROUP_CONST:
state = fd6_build_user_consts(emit);
state = fd6_build_user_consts<PIPELINE>(emit);
fd6_state_take_group(&emit->state, state, FD6_GROUP_CONST);
break;
case FD6_GROUP_DRIVER_PARAMS:
state = fd6_build_driver_params(emit);
state = fd6_build_driver_params<PIPELINE>(emit);
fd6_state_take_group(&emit->state, state, FD6_GROUP_DRIVER_PARAMS);
break;
case FD6_GROUP_PRIMITIVE_PARAMS:
state = fd6_build_tess_consts(emit);
fd6_state_take_group(&emit->state, state, FD6_GROUP_PRIMITIVE_PARAMS);
if (PIPELINE == HAS_TESS_GS) {
state = fd6_build_tess_consts(emit);
fd6_state_take_group(&emit->state, state, FD6_GROUP_PRIMITIVE_PARAMS);
}
break;
case FD6_GROUP_VS_TEX:
state = tex_state(ctx, PIPE_SHADER_VERTEX);
@ -687,8 +689,10 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
fd6_state_emit(&emit->state, ring);
}
template void fd6_emit_3d_state<A6XX>(struct fd_ringbuffer *ring, struct fd6_emit *emit);
template void fd6_emit_3d_state<A7XX>(struct fd_ringbuffer *ring, struct fd6_emit *emit);
template void fd6_emit_3d_state<A6XX, NO_TESS_GS>(struct fd_ringbuffer *ring, struct fd6_emit *emit);
template void fd6_emit_3d_state<A7XX, NO_TESS_GS>(struct fd_ringbuffer *ring, struct fd6_emit *emit);
template void fd6_emit_3d_state<A6XX, HAS_TESS_GS>(struct fd_ringbuffer *ring, struct fd6_emit *emit);
template void fd6_emit_3d_state<A7XX, HAS_TESS_GS>(struct fd_ringbuffer *ring, struct fd6_emit *emit);
template <chip CHIP>
void

View file

@ -88,6 +88,18 @@ enum fd6_state_id {
FD6_GROUP_CS_BINDLESS = FD6_GROUP_VS_BINDLESS,
};
/**
* Pipeline type, Ie. is just plain old VS+FS (which can be high draw rate and
* should be a fast-path) or is it a pipeline that uses GS and/or tess to
* amplify geometry.
*
* TODO split GS and TESS?
*/
enum fd6_pipeline_type {
NO_TESS_GS, /* Only has VS+FS */
HAS_TESS_GS, /* Has tess and/or GS */
};
#define ENABLE_ALL \
(CP_SET_DRAW_STATE__0_BINNING | CP_SET_DRAW_STATE__0_GMEM | \
CP_SET_DRAW_STATE__0_SYSMEM)
@ -333,7 +345,7 @@ fd6_gl2spacing(enum gl_tess_spacing spacing)
}
}
template <chip CHIP>
template <chip CHIP, fd6_pipeline_type PIPELINE>
void fd6_emit_3d_state(struct fd_ringbuffer *ring,
struct fd6_emit *emit) assert_dt;

View file

@ -578,6 +578,9 @@ struct fd_context {
const union pipe_color_union *color, double depth,
unsigned stencil) dt;
/* called to update draw_vbo func after bound shader stages change, etc: */
void (*update_draw)(struct fd_context *ctx);
/* compute: */
void (*launch_grid)(struct fd_context *ctx,
const struct pipe_grid_info *info) dt;

View file

@ -36,11 +36,14 @@ static void
update_bound_stage(struct fd_context *ctx, enum pipe_shader_type shader,
bool bound) assert_dt
{
uint32_t bound_shader_stages = ctx->bound_shader_stages;
if (bound) {
ctx->bound_shader_stages |= BIT(shader);
} else {
ctx->bound_shader_stages &= ~BIT(shader);
}
if (ctx->update_draw && (bound_shader_stages != ctx->bound_shader_stages))
ctx->update_draw(ctx);
}
static void