diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.cc b/src/gallium/drivers/freedreno/a6xx/fd6_const.cc index 61d19428d83..b3d2f460551 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_const.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.cc @@ -258,6 +258,7 @@ emit_user_consts(const struct ir3_shader_variant *v, fd6_emit_ubos(v, ring, constbuf); } +template struct fd_ringbuffer * fd6_build_user_consts(struct fd6_emit *emit) { @@ -267,21 +268,25 @@ fd6_build_user_consts(struct fd6_emit *emit) struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer(ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING); - /* TODO would be nice to templatize the variants (ie. HAS_GS and HAS_TESS) */ - emit_user_consts(emit->vs, constobj, &ctx->constbuf[PIPE_SHADER_VERTEX]); - if (emit->hs) { - emit_user_consts(emit->hs, constobj, &ctx->constbuf[PIPE_SHADER_TESS_CTRL]); - emit_user_consts(emit->ds, constobj, &ctx->constbuf[PIPE_SHADER_TESS_EVAL]); - } - if (emit->gs) { - emit_user_consts(emit->gs, constobj, &ctx->constbuf[PIPE_SHADER_GEOMETRY]); + if (PIPELINE == HAS_TESS_GS) { + if (emit->hs) { + emit_user_consts(emit->hs, constobj, &ctx->constbuf[PIPE_SHADER_TESS_CTRL]); + emit_user_consts(emit->ds, constobj, &ctx->constbuf[PIPE_SHADER_TESS_EVAL]); + } + if (emit->gs) { + emit_user_consts(emit->gs, constobj, &ctx->constbuf[PIPE_SHADER_GEOMETRY]); + } } emit_user_consts(emit->fs, constobj, &ctx->constbuf[PIPE_SHADER_FRAGMENT]); return constobj; } +template struct fd_ringbuffer * fd6_build_user_consts(struct fd6_emit *emit); +template struct fd_ringbuffer * fd6_build_user_consts(struct fd6_emit *emit); + +template struct fd_ringbuffer * fd6_build_driver_params(struct fd6_emit *emit) { @@ -303,18 +308,20 @@ fd6_build_driver_params(struct fd6_emit *emit) emit->indirect, emit->draw, emit->draw_id); } - if (emit->gs && emit->gs->need_driver_params) { - ir3_emit_driver_params(emit->gs, dpconstobj, ctx, emit->info, - emit->indirect, emit->draw, 0); - } + if (PIPELINE == HAS_TESS_GS) { + if (emit->gs && emit->gs->need_driver_params) { + ir3_emit_driver_params(emit->gs, dpconstobj, ctx, emit->info, + emit->indirect, emit->draw, 0); + } - if (emit->hs && emit->hs->need_driver_params) { - ir3_emit_hs_driver_params(emit->hs, dpconstobj, ctx); - } + if (emit->hs && emit->hs->need_driver_params) { + ir3_emit_hs_driver_params(emit->hs, dpconstobj, ctx); + } - if (emit->ds && emit->ds->need_driver_params) { - ir3_emit_driver_params(emit->ds, dpconstobj, ctx, emit->info, - emit->indirect, emit->draw, 0); + if (emit->ds && emit->ds->need_driver_params) { + ir3_emit_driver_params(emit->ds, dpconstobj, ctx, emit->info, + emit->indirect, emit->draw, 0); + } } fd6_ctx->has_dp_state = true; @@ -322,6 +329,9 @@ fd6_build_driver_params(struct fd6_emit *emit) return dpconstobj; } +template struct fd_ringbuffer * fd6_build_driver_params(struct fd6_emit *emit); +template struct fd_ringbuffer * fd6_build_driver_params(struct fd6_emit *emit); + void fd6_emit_cs_driver_params(struct fd_context *ctx, struct fd_ringbuffer *ring, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.h b/src/gallium/drivers/freedreno/a6xx/fd6_const.h index 12f397110c1..8c277752884 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_const.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.h @@ -27,11 +27,13 @@ #include "fd6_emit.h" -BEGINC; - struct fd_ringbuffer *fd6_build_tess_consts(struct fd6_emit *emit) assert_dt; unsigned fd6_user_consts_cmdstream_size(struct ir3_shader_variant *v); + +template struct fd_ringbuffer *fd6_build_user_consts(struct fd6_emit *emit) assert_dt; + +template struct fd_ringbuffer * fd6_build_driver_params(struct fd6_emit *emit) assert_dt; @@ -50,6 +52,4 @@ void fd6_emit_link_map(struct fd_screen *screen, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) assert_dt; -ENDC; - #endif /* FD6_CONST_H */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc b/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc index 3d76a70d43f..6cbd819d853 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc @@ -197,6 +197,7 @@ fixup_draw_state(struct fd_context *ctx, struct fd6_emit *emit) assert_dt } } +template static const struct fd6_program_state * get_program_state(struct fd_context *ctx, const struct pipe_draw_info *info) assert_dt @@ -218,25 +219,27 @@ get_program_state(struct fd_context *ctx, const struct pipe_draw_info *info) key.key.msaa = (ctx->framebuffer.samples > 1); key.key.rasterflat = ctx->rasterizer->flatshade; - if (info->mode == PIPE_PRIM_PATCHES) { - struct shader_info *gs_info = - ir3_get_shader_info((struct ir3_shader_state *)ctx->prog.gs); + if (PIPELINE == HAS_TESS_GS) { + if (info->mode == PIPE_PRIM_PATCHES) { + struct shader_info *gs_info = + ir3_get_shader_info((struct ir3_shader_state *)ctx->prog.gs); - key.hs = (struct ir3_shader_state *)ctx->prog.hs; - key.ds = (struct ir3_shader_state *)ctx->prog.ds; + key.hs = (struct ir3_shader_state *)ctx->prog.hs; + key.ds = (struct ir3_shader_state *)ctx->prog.ds; - struct shader_info *ds_info = ir3_get_shader_info(key.ds); - key.key.tessellation = ir3_tess_mode(ds_info->tess._primitive_mode); + struct shader_info *ds_info = ir3_get_shader_info(key.ds); + key.key.tessellation = ir3_tess_mode(ds_info->tess._primitive_mode); - struct shader_info *fs_info = ir3_get_shader_info(key.fs); - key.key.tcs_store_primid = - BITSET_TEST(ds_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) || - (gs_info && BITSET_TEST(gs_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID)) || - (fs_info && (fs_info->inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID))); - } + struct shader_info *fs_info = ir3_get_shader_info(key.fs); + key.key.tcs_store_primid = + BITSET_TEST(ds_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) || + (gs_info && BITSET_TEST(gs_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID)) || + (fs_info && (fs_info->inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID))); + } - if (key.gs) { - key.key.has_gs = true; + if (key.gs) { + key.key.has_gs = true; + } } ir3_fixup_shader_state(&ctx->base, &key.key); @@ -267,7 +270,7 @@ flush_streamout(struct fd_context *ctx, struct fd6_emit *emit) } } -template +template static void draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, unsigned drawid_offset, @@ -296,9 +299,13 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, if (!(ctx->prog.vs && ctx->prog.fs)) return; - if ((info->mode == PIPE_PRIM_PATCHES) || ctx->prog.gs) { - ctx->gen_dirty |= BIT(FD6_GROUP_PRIMITIVE_PARAMS); - } else if (!is_indirect(DRAW)) { + if (PIPELINE == HAS_TESS_GS) { + if ((info->mode == PIPE_PRIM_PATCHES) || ctx->prog.gs) { + ctx->gen_dirty |= BIT(FD6_GROUP_PRIMITIVE_PARAMS); + } + } + + if ((PIPELINE == NO_TESS_GS) && !is_indirect(DRAW)) { fd6_vsc_update_sizes(ctx->batch, info, &draws[0]); } @@ -308,7 +315,7 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, * Otherwise we can just use the previous prog state. */ if (unlikely(ctx->gen_dirty & BIT(FD6_GROUP_PROG_KEY))) { - emit.prog = get_program_state(ctx, info); + emit.prog = get_program_state(ctx, info); } else { emit.prog = fd6_ctx->prog; } @@ -323,9 +330,11 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, emit.dirty_groups = ctx->gen_dirty; emit.vs = fd6_emit_get_prog(&emit)->vs; - emit.hs = fd6_emit_get_prog(&emit)->hs; - emit.ds = fd6_emit_get_prog(&emit)->ds; - emit.gs = fd6_emit_get_prog(&emit)->gs; + if (PIPELINE == HAS_TESS_GS) { + emit.hs = fd6_emit_get_prog(&emit)->hs; + emit.ds = fd6_emit_get_prog(&emit)->ds; + emit.gs = fd6_emit_get_prog(&emit)->gs; + } emit.fs = fd6_emit_get_prog(&emit)->fs; if (emit.prog->num_driver_params || fd6_ctx->has_dp_state) { @@ -364,7 +373,7 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, draw0.source_select = DI_SRC_SEL_AUTO_INDEX; } - if (info->mode == PIPE_PRIM_PATCHES) { + if ((PIPELINE == HAS_TESS_GS) && (info->mode == PIPE_PRIM_PATCHES)) { struct shader_info *ds_info = ir3_get_shader_info((struct ir3_shader_state *)ctx->prog.ds); unsigned tessellation = ir3_tess_mode(ds_info->tess._primitive_mode); @@ -413,7 +422,7 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, } if (emit.dirty_groups) - fd6_emit_3d_state(ring, &emit); + fd6_emit_3d_state(ring, &emit); /* All known firmware versions do not wait for WFI's with CP_DRAW_AUTO. * Plus, for the common case where the counter buffer is written by @@ -489,7 +498,7 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, emit.state.num_groups = 0; emit.draw = &draws[i]; emit.draw_id = info->increment_draw_id ? i : 0; - fd6_emit_3d_state(ring, &emit); + fd6_emit_3d_state(ring, &emit); } assert(!index_offset); /* handled by util_draw_multi() */ @@ -509,7 +518,7 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, fd_context_all_clean(ctx); } -template +template static void fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, unsigned drawid_offset, @@ -522,30 +531,44 @@ fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, /* Non-indirect case is where we are more likely to see a high draw rate: */ if (likely(!indirect)) { if (info->index_size) { - draw_vbos( + draw_vbos( ctx, info, drawid_offset, NULL, draws, num_draws, index_offset); } else { - draw_vbos( + draw_vbos( ctx, info, drawid_offset, NULL, draws, num_draws, index_offset); } } else if (indirect->count_from_stream_output) { - draw_vbos( + draw_vbos( ctx, info, drawid_offset, indirect, draws, num_draws, index_offset); } else if (indirect->indirect_draw_count && info->index_size) { - draw_vbos( + draw_vbos( ctx, info, drawid_offset, indirect, draws, num_draws, index_offset); } else if (indirect->indirect_draw_count) { - draw_vbos( + draw_vbos( ctx, info, drawid_offset, indirect, draws, num_draws, index_offset); } else if (info->index_size) { - draw_vbos( + draw_vbos( ctx, info, drawid_offset, indirect, draws, num_draws, index_offset); } else { - draw_vbos( + draw_vbos( ctx, info, drawid_offset, indirect, draws, num_draws, index_offset); } } +template +static void +fd6_update_draw(struct fd_context *ctx) +{ + const uint32_t gs_tess_stages = BIT(MESA_SHADER_TESS_CTRL) | + BIT(MESA_SHADER_TESS_EVAL) | BIT(MESA_SHADER_GEOMETRY); + + if (ctx->bound_shader_stages & gs_tess_stages) { + ctx->draw_vbos = fd6_draw_vbos; + } else { + ctx->draw_vbos = fd6_draw_vbos; + } +} + static bool do_lrz_clear(struct fd_context *ctx, enum fd_buffer_mask buffers) { @@ -638,7 +661,8 @@ fd6_draw_init(struct pipe_context *pctx) { struct fd_context *ctx = fd_context(pctx); ctx->clear = fd6_clear; - ctx->draw_vbos = fd6_draw_vbos; + ctx->update_draw = fd6_update_draw; + fd6_update_draw(ctx); } /* Teach the compiler about needed variants: */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc index 7a83b14e1cc..4d356f9147d 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc @@ -535,7 +535,7 @@ build_prim_mode(struct fd6_emit *emit, struct fd_context *ctx, bool gmem) return ring; } -template +template void fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) { @@ -634,16 +634,18 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) fd6_state_take_group(&emit->state, state, FD6_GROUP_FS_BINDLESS); break; case FD6_GROUP_CONST: - state = fd6_build_user_consts(emit); + state = fd6_build_user_consts(emit); fd6_state_take_group(&emit->state, state, FD6_GROUP_CONST); break; case FD6_GROUP_DRIVER_PARAMS: - state = fd6_build_driver_params(emit); + state = fd6_build_driver_params(emit); fd6_state_take_group(&emit->state, state, FD6_GROUP_DRIVER_PARAMS); break; case FD6_GROUP_PRIMITIVE_PARAMS: - state = fd6_build_tess_consts(emit); - fd6_state_take_group(&emit->state, state, FD6_GROUP_PRIMITIVE_PARAMS); + if (PIPELINE == HAS_TESS_GS) { + state = fd6_build_tess_consts(emit); + fd6_state_take_group(&emit->state, state, FD6_GROUP_PRIMITIVE_PARAMS); + } break; case FD6_GROUP_VS_TEX: state = tex_state(ctx, PIPE_SHADER_VERTEX); @@ -687,8 +689,10 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) fd6_state_emit(&emit->state, ring); } -template void fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit); -template void fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit); +template void fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit); +template void fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit); +template void fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit); +template void fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit); template void diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index 939f7bb3af2..d97af8c364f 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -88,6 +88,18 @@ enum fd6_state_id { FD6_GROUP_CS_BINDLESS = FD6_GROUP_VS_BINDLESS, }; +/** + * Pipeline type, Ie. is just plain old VS+FS (which can be high draw rate and + * should be a fast-path) or is it a pipeline that uses GS and/or tess to + * amplify geometry. + * + * TODO split GS and TESS? + */ +enum fd6_pipeline_type { + NO_TESS_GS, /* Only has VS+FS */ + HAS_TESS_GS, /* Has tess and/or GS */ +}; + #define ENABLE_ALL \ (CP_SET_DRAW_STATE__0_BINNING | CP_SET_DRAW_STATE__0_GMEM | \ CP_SET_DRAW_STATE__0_SYSMEM) @@ -333,7 +345,7 @@ fd6_gl2spacing(enum gl_tess_spacing spacing) } } -template +template void fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt; diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 1c8f06c289c..c593cadac41 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -578,6 +578,9 @@ struct fd_context { const union pipe_color_union *color, double depth, unsigned stencil) dt; + /* called to update draw_vbo func after bound shader stages change, etc: */ + void (*update_draw)(struct fd_context *ctx); + /* compute: */ void (*launch_grid)(struct fd_context *ctx, const struct pipe_grid_info *info) dt; diff --git a/src/gallium/drivers/freedreno/freedreno_program.c b/src/gallium/drivers/freedreno/freedreno_program.c index feb3e56f548..2a93d197bf3 100644 --- a/src/gallium/drivers/freedreno/freedreno_program.c +++ b/src/gallium/drivers/freedreno/freedreno_program.c @@ -36,11 +36,14 @@ static void update_bound_stage(struct fd_context *ctx, enum pipe_shader_type shader, bool bound) assert_dt { + uint32_t bound_shader_stages = ctx->bound_shader_stages; if (bound) { ctx->bound_shader_stages |= BIT(shader); } else { ctx->bound_shader_stages &= ~BIT(shader); } + if (ctx->update_draw && (bound_shader_stages != ctx->bound_shader_stages)) + ctx->update_draw(ctx); } static void