mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-29 05:10:23 +01:00
freedreno/a6xx: Move CS state to PROG state group
It is pretty easy to just cache the stateobj with the hwcso (since unlike 3d, there is only a single shader state) and re-emit it by pointer when it changes, now that the CS state doesn't depend on the grid info. This also moves immed consts into the PROG state, so they are only updated when the PROG state is dirty. And splits user consts and driver param consts, so they are only re-emit when needed. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21127>
This commit is contained in:
parent
7ea98091c1
commit
dec49ec50a
7 changed files with 114 additions and 34 deletions
|
|
@ -100,32 +100,38 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
}
|
||||
|
||||
fd6_emit_shader(ctx, ring, v);
|
||||
fd6_emit_immediates(ctx->screen, v, ring);
|
||||
}
|
||||
|
||||
static void
|
||||
fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
|
||||
{
|
||||
struct ir3_shader_key key = {};
|
||||
struct ir3_shader_variant *v;
|
||||
struct fd6_compute_state *cs = ctx->compute;
|
||||
struct fd_ringbuffer *ring = ctx->batch->draw;
|
||||
unsigned nglobal = 0;
|
||||
|
||||
if (unlikely(!cs->v)) {
|
||||
struct ir3_shader_key key = {};
|
||||
|
||||
cs->v = ir3_shader_variant(ir3_get_shader(cs->hwcso), key, false, &ctx->debug);
|
||||
if (!cs->v)
|
||||
return;
|
||||
|
||||
cs->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
|
||||
cs_program_emit(ctx, cs->stateobj, cs->v);
|
||||
|
||||
cs->user_consts_cmdstream_size = fd6_user_consts_cmdstream_size(cs->v);
|
||||
}
|
||||
|
||||
trace_start_compute(&ctx->batch->trace, ring, !!info->indirect, info->work_dim,
|
||||
info->block[0], info->block[1], info->block[2],
|
||||
info->grid[0], info->grid[1], info->grid[2]);
|
||||
|
||||
v = ir3_shader_variant(ir3_get_shader(ctx->compute), key, false, &ctx->debug);
|
||||
if (!v)
|
||||
return;
|
||||
|
||||
if (ctx->batch->barrier)
|
||||
fd6_barrier_flush(ctx->batch);
|
||||
|
||||
if (ctx->dirty_shader[PIPE_SHADER_COMPUTE] & FD_DIRTY_SHADER_PROG)
|
||||
cs_program_emit(ctx, ring, v);
|
||||
|
||||
bool emit_instrlen_workaround =
|
||||
v->instrlen > ctx->screen->info->a6xx.instr_cache_size;
|
||||
cs->v->instrlen > ctx->screen->info->a6xx.instr_cache_size;
|
||||
|
||||
/* There appears to be a HW bug where in some rare circumstances it appears
|
||||
* to accidentally use the FS instrlen instead of the CS instrlen, which
|
||||
|
|
@ -143,12 +149,18 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
|
|||
* See https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19023
|
||||
*/
|
||||
if (emit_instrlen_workaround) {
|
||||
OUT_REG(ring, A6XX_SP_FS_INSTRLEN(v->instrlen));
|
||||
OUT_REG(ring, A6XX_SP_FS_INSTRLEN(cs->v->instrlen));
|
||||
fd6_event_write(ctx->batch, ring, LABEL, false);
|
||||
}
|
||||
|
||||
fd6_emit_cs_state(ctx, ring, v);
|
||||
fd6_emit_cs_consts(v, ring, ctx, info);
|
||||
if (ctx->gen_dirty)
|
||||
fd6_emit_cs_state(ctx, ring, cs);
|
||||
|
||||
if (ctx->gen_dirty & BIT(FD6_GROUP_CONST))
|
||||
fd6_emit_cs_user_consts(ctx, ring, cs);
|
||||
|
||||
if (cs->v->need_driver_params || info->input)
|
||||
fd6_emit_cs_driver_params(ctx, ring, cs, info);
|
||||
|
||||
u_foreach_bit (i, ctx->global_bindings.enabled_mask)
|
||||
nglobal++;
|
||||
|
|
@ -171,7 +183,7 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
|
|||
OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_COMPUTE));
|
||||
|
||||
uint32_t shared_size =
|
||||
MAX2(((int)(v->cs.req_local_mem + info->variable_shared_mem) - 1) / 1024, 1);
|
||||
MAX2(((int)(cs->v->cs.req_local_mem + info->variable_shared_mem) - 1) / 1024, 1);
|
||||
OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
|
||||
OUT_RING(ring, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) |
|
||||
A6XX_SP_CS_UNKNOWN_A9B1_UNK6);
|
||||
|
|
@ -230,11 +242,30 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info) in_dt
|
|||
fd_context_all_clean(ctx);
|
||||
}
|
||||
|
||||
static void *
|
||||
fd6_compute_state_create(struct pipe_context *pctx,
|
||||
const struct pipe_compute_state *cso)
|
||||
{
|
||||
struct fd6_compute_state *hwcso = calloc(1, sizeof(*hwcso));
|
||||
hwcso->hwcso = ir3_shader_compute_state_create(pctx, cso);
|
||||
return hwcso;
|
||||
}
|
||||
|
||||
static void
|
||||
fd6_compute_state_delete(struct pipe_context *pctx, void *_hwcso)
|
||||
{
|
||||
struct fd6_compute_state *hwcso = _hwcso;
|
||||
ir3_shader_state_delete(pctx, hwcso->hwcso);
|
||||
if (hwcso->stateobj)
|
||||
fd_ringbuffer_del(hwcso->stateobj);
|
||||
free(hwcso);
|
||||
}
|
||||
|
||||
void
|
||||
fd6_compute_init(struct pipe_context *pctx) disable_thread_safety_analysis
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
ctx->launch_grid = fd6_launch_grid;
|
||||
pctx->create_compute_state = ir3_shader_compute_state_create;
|
||||
pctx->delete_compute_state = ir3_shader_state_delete;
|
||||
pctx->create_compute_state = fd6_compute_state_create;
|
||||
pctx->delete_compute_state = fd6_compute_state_delete;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,6 +29,13 @@
|
|||
|
||||
#include "pipe/p_context.h"
|
||||
|
||||
struct fd6_compute_state {
|
||||
void *hwcso; /* ir3_shader_state */
|
||||
struct ir3_shader_variant *v;
|
||||
struct fd_ringbuffer *stateobj;
|
||||
uint32_t user_consts_cmdstream_size;
|
||||
};
|
||||
|
||||
void fd6_compute_init(struct pipe_context *pctx);
|
||||
|
||||
#endif /* FD6_COMPUTE_H_ */
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include "fd6_const.h"
|
||||
#include "fd6_compute.h"
|
||||
#include "fd6_pack.h"
|
||||
|
||||
#define emit_const_user fd6_emit_const_user
|
||||
|
|
@ -334,12 +335,20 @@ fd6_build_driver_params(struct fd6_emit *emit)
|
|||
}
|
||||
|
||||
void
|
||||
fd6_emit_cs_consts(const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring, struct fd_context *ctx,
|
||||
const struct pipe_grid_info *info)
|
||||
fd6_emit_cs_driver_params(struct fd_context *ctx,
|
||||
struct fd_ringbuffer *ring,
|
||||
struct fd6_compute_state *cs,
|
||||
const struct pipe_grid_info *info)
|
||||
{
|
||||
ir3_emit_cs_consts(v, ring, ctx, info);
|
||||
fd6_emit_ubos(v, ring, &ctx->constbuf[PIPE_SHADER_COMPUTE]);
|
||||
ir3_emit_cs_driver_params(cs->v, ring, ctx, info);
|
||||
}
|
||||
|
||||
void
|
||||
fd6_emit_cs_user_consts(struct fd_context *ctx,
|
||||
struct fd_ringbuffer *ring,
|
||||
struct fd6_compute_state *cs)
|
||||
{
|
||||
emit_user_consts(cs->v, ring, &ctx->constbuf[PIPE_SHADER_COMPUTE]);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -33,9 +33,13 @@ struct fd_ringbuffer *fd6_build_user_consts(struct fd6_emit *emit) assert_dt;
|
|||
struct fd_ringbuffer *
|
||||
fd6_build_driver_params(struct fd6_emit *emit) assert_dt;
|
||||
|
||||
void fd6_emit_cs_consts(const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring, struct fd_context *ctx,
|
||||
const struct pipe_grid_info *info) assert_dt;
|
||||
void fd6_emit_cs_driver_params(struct fd_context *ctx,
|
||||
struct fd_ringbuffer *ring,
|
||||
struct fd6_compute_state *cs,
|
||||
const struct pipe_grid_info *info) assert_dt;
|
||||
void fd6_emit_cs_user_consts(struct fd_context *ctx,
|
||||
struct fd_ringbuffer *ring,
|
||||
struct fd6_compute_state *cs) assert_dt;
|
||||
void fd6_emit_immediates(struct fd_screen *screen,
|
||||
const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring) assert_dt;
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@
|
|||
#include "fd6_blend.h"
|
||||
#include "fd6_const.h"
|
||||
#include "fd6_context.h"
|
||||
#include "fd6_compute.h"
|
||||
#include "fd6_emit.h"
|
||||
#include "fd6_image.h"
|
||||
#include "fd6_pack.h"
|
||||
|
|
@ -674,14 +675,31 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
|
|||
|
||||
void
|
||||
fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
struct ir3_shader_variant *cp)
|
||||
struct fd6_compute_state *cs)
|
||||
{
|
||||
struct fd6_state state = {};
|
||||
|
||||
u_foreach_bit (b, ctx->gen_dirty) {
|
||||
/* We want CP_SET_DRAW_STATE to execute immediately, otherwise we need to
|
||||
* emit consts as draw state groups (which otherwise has no benefit outside
|
||||
* of GMEM 3d using viz stream from binning pass).
|
||||
*
|
||||
* In particular, the PROG state group sets up the configuration for the
|
||||
* const state, so it must execute before we start loading consts, rather
|
||||
* than be deferred until CP_EXEC_CS.
|
||||
*/
|
||||
OUT_PKT7(ring, CP_SET_MODE, 1);
|
||||
OUT_RING(ring, 1);
|
||||
|
||||
uint32_t gen_dirty = ctx->gen_dirty &
|
||||
(BIT(FD6_GROUP_PROG) | BIT(FD6_GROUP_CS_TEX) | BIT(FD6_GROUP_CS_BINDLESS));
|
||||
|
||||
u_foreach_bit (b, gen_dirty) {
|
||||
enum fd6_state_id group = b;
|
||||
|
||||
switch (group) {
|
||||
case FD6_GROUP_PROG:
|
||||
fd6_state_add_group(&state, cs->stateobj, FD6_GROUP_PROG);
|
||||
break;
|
||||
case FD6_GROUP_CS_TEX:
|
||||
fd6_state_take_group(
|
||||
&state,
|
||||
|
|
|
|||
|
|
@ -325,8 +325,9 @@ fd6_gl2spacing(enum gl_tess_spacing spacing)
|
|||
void fd6_emit_3d_state(struct fd_ringbuffer *ring,
|
||||
struct fd6_emit *emit) assert_dt;
|
||||
|
||||
struct fd6_compute_state;
|
||||
void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
struct ir3_shader_variant *cp) assert_dt;
|
||||
struct fd6_compute_state *cs) assert_dt;
|
||||
|
||||
void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring);
|
||||
|
||||
|
|
|
|||
|
|
@ -585,15 +585,12 @@ ir3_emit_fs_consts(const struct ir3_shader_variant *v,
|
|||
emit_common_consts(v, ring, ctx, PIPE_SHADER_FRAGMENT);
|
||||
}
|
||||
|
||||
/* emit compute-shader consts: */
|
||||
static inline void
|
||||
ir3_emit_cs_consts(const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring, struct fd_context *ctx,
|
||||
const struct pipe_grid_info *info) assert_dt
|
||||
ir3_emit_cs_driver_params(const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring, struct fd_context *ctx,
|
||||
const struct pipe_grid_info *info)
|
||||
assert_dt
|
||||
{
|
||||
assert(gl_shader_stage_is_compute(v->type));
|
||||
|
||||
emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
|
||||
emit_kernel_params(ctx, v, ring, info);
|
||||
|
||||
/* a3xx/a4xx can inject these directly */
|
||||
|
|
@ -651,3 +648,16 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* emit compute-shader consts: */
|
||||
static inline void
|
||||
ir3_emit_cs_consts(const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring, struct fd_context *ctx,
|
||||
const struct pipe_grid_info *info) assert_dt
|
||||
{
|
||||
assert(gl_shader_stage_is_compute(v->type));
|
||||
|
||||
emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
|
||||
|
||||
ir3_emit_cs_driver_params(v, ring, ctx, info);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue