From c4167e8911bdffb8d95bc00ee9eb0cc39a5fe400 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 14 Apr 2022 16:12:01 -0400 Subject: [PATCH] panfrost: Rework dirty tracking for Valhall Midgard aggregates a large amount of state into its renderer state descriptor. Our current dirty tracking reflects this, with a single RENDERER dirty flag. That won't work well on Valhall, which splits out orthogonal state into independent descriptors (a blend descriptor, a depth/stencil descriptor, and so on). To prepare for Valhall support, this patch moves the driver to finer dirty tracking. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/panfrost/pan_cmdstream.c | 86 ++++++++++++-------- src/gallium/drivers/panfrost/pan_context.c | 34 ++++---- src/gallium/drivers/panfrost/pan_context.h | 7 +- src/gallium/drivers/panfrost/pan_helpers.c | 3 +- 4 files changed, 75 insertions(+), 55 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 3cc8371387f..65fdfdf7427 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -2895,13 +2895,29 @@ panfrost_is_implicit_prim_restart(const struct pipe_draw_info *info) return info->primitive_restart && implicit; } +/* On Bifrost and older, the Renderer State Descriptor aggregates many pieces of + * 3D state. In particular, it groups the fragment shader descriptor with + * depth/stencil, blend, polygon offset, and multisampling state. These pieces + * of state are dirty tracked independently for the benefit of newer GPUs that + * separate the descriptors. FRAGMENT_RSD_DIRTY_MASK contains the list of 3D + * dirty flags that trigger re-emits of the fragment RSD. + * + * Obscurely, occlusion queries are included. Occlusion query state is nominally + * specified in the draw call descriptor, but must be considered when determing + * early-Z state which is part of the RSD. + */ +#define FRAGMENT_RSD_DIRTY_MASK ( \ + PAN_DIRTY_ZS | PAN_DIRTY_BLEND | PAN_DIRTY_MSAA | \ + PAN_DIRTY_RASTERIZER | PAN_DIRTY_OQ) + static inline void -panfrost_update_state_tex(struct panfrost_batch *batch, - enum pipe_shader_type st) +panfrost_update_shader_state(struct panfrost_batch *batch, + enum pipe_shader_type st) { struct panfrost_context *ctx = batch->ctx; struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, st); + bool frag = (st == PIPE_SHADER_FRAGMENT); unsigned dirty_3d = ctx->dirty; unsigned dirty = ctx->dirty_shader[st]; @@ -2915,50 +2931,48 @@ panfrost_update_state_tex(struct panfrost_batch *batch, panfrost_emit_sampler_descriptors(batch, st); } + /* On Bifrost and older, the fragment shader descriptor is fused + * together with the renderer state; the combined renderer state + * descriptor is emitted below. Otherwise, the shader descriptor is + * standalone and is emitted here. + */ + if ((dirty & PAN_DIRTY_STAGE_SHADER) && !((PAN_ARCH <= 7) && frag)) { + batch->rsd[st] = panfrost_emit_compute_shader_meta(batch, st); + } + if ((dirty & ss->dirty_shader) || (dirty_3d & ss->dirty_3d)) { batch->uniform_buffers[st] = panfrost_emit_const_buf(batch, st, &batch->push_uniforms[st]); } + +#if PAN_ARCH <= 7 + /* On Bifrost and older, if the fragment shader changes OR any renderer + * state specified with the fragment shader, the whole renderer state + * descriptor is dirtied and must be reemited. + */ + if (frag && ((dirty & PAN_DIRTY_STAGE_SHADER) || + (dirty_3d & FRAGMENT_RSD_DIRTY_MASK))) { + + batch->rsd[st] = panfrost_emit_frag_shader_meta(batch); + } + + if (frag && (dirty & PAN_DIRTY_STAGE_IMAGE)) { + batch->attribs[st] = panfrost_emit_image_attribs(batch, + &batch->attrib_bufs[st], st); + } +#endif } static inline void panfrost_update_state_3d(struct panfrost_batch *batch) { - unsigned dirty = batch->ctx->dirty; + struct panfrost_context *ctx = batch->ctx; + unsigned dirty = ctx->dirty; if (dirty & PAN_DIRTY_TLS_SIZE) panfrost_batch_adjust_stack_size(batch); } -static void -panfrost_update_state_vs(struct panfrost_batch *batch) -{ - enum pipe_shader_type st = PIPE_SHADER_VERTEX; - unsigned dirty = batch->ctx->dirty_shader[st]; - - if (dirty & PAN_DIRTY_STAGE_RENDERER) - batch->rsd[st] = panfrost_emit_compute_shader_meta(batch, st); - - panfrost_update_state_tex(batch, st); -} - -static void -panfrost_update_state_fs(struct panfrost_batch *batch) -{ - enum pipe_shader_type st = PIPE_SHADER_FRAGMENT; - unsigned dirty = batch->ctx->dirty_shader[st]; - - if (dirty & PAN_DIRTY_STAGE_RENDERER) - batch->rsd[st] = panfrost_emit_frag_shader_meta(batch); - - if (dirty & PAN_DIRTY_STAGE_IMAGE) { - batch->attribs[st] = panfrost_emit_image_attribs(batch, - &batch->attrib_bufs[st], st); - } - - panfrost_update_state_tex(batch, st); -} - #if PAN_ARCH >= 6 static mali_ptr panfrost_batch_get_bifrost_tiler(struct panfrost_batch *batch, unsigned vertex_count) @@ -3207,8 +3221,8 @@ panfrost_direct_draw(struct panfrost_batch *batch, attribs = panfrost_emit_vertex_data(batch, &attrib_bufs); panfrost_update_state_3d(batch); - panfrost_update_state_vs(batch); - panfrost_update_state_fs(batch); + panfrost_update_shader_state(batch, PIPE_SHADER_VERTEX); + panfrost_update_shader_state(batch, PIPE_SHADER_FRAGMENT); panfrost_clean_state_3d(ctx); /* Fire off the draw itself */ @@ -3303,8 +3317,8 @@ panfrost_indirect_draw(struct panfrost_batch *batch, ctx->base_instance_sysval_ptr = 0; panfrost_update_state_3d(batch); - panfrost_update_state_vs(batch); - panfrost_update_state_fs(batch); + panfrost_update_shader_state(batch, PIPE_SHADER_VERTEX); + panfrost_update_shader_state(batch, PIPE_SHADER_FRAGMENT); panfrost_clean_state_3d(ctx); bool point_coord_replace = (info->mode == PIPE_PRIM_POINTS); diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 7d3c050306b..9bdb220429a 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -136,7 +136,7 @@ panfrost_bind_blend_state(struct pipe_context *pipe, void *cso) { struct panfrost_context *ctx = pan_context(pipe); ctx->blend = cso; - ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= PAN_DIRTY_STAGE_RENDERER; + ctx->dirty |= PAN_DIRTY_BLEND; } static void @@ -144,7 +144,7 @@ panfrost_set_blend_color(struct pipe_context *pipe, const struct pipe_blend_color *blend_color) { struct panfrost_context *ctx = pan_context(pipe); - ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= PAN_DIRTY_STAGE_RENDERER; + ctx->dirty |= PAN_DIRTY_BLEND; if (blend_color) ctx->blend_color = *blend_color; @@ -223,12 +223,11 @@ panfrost_bind_rasterizer_state( struct panfrost_context *ctx = pan_context(pctx); ctx->rasterizer = hwcso; - /* We can assume the renderer state descriptor is always dirty, the - * dependencies are too intricate to bother tracking in detail. However - * we could probably diff the renderers for viewport dirty tracking, - * that just cares about the scissor enable and the depth clips. */ - ctx->dirty |= PAN_DIRTY_SCISSOR; - ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= PAN_DIRTY_STAGE_RENDERER; + /* We can assume rasterizer is always dirty, the dependencies are + * too intricate to bother tracking in detail. However we could + * probably diff the renderers for viewport dirty tracking, that + * just cares about the scissor enable and the depth clips. */ + ctx->dirty |= PAN_DIRTY_SCISSOR | PAN_DIRTY_RASTERIZER; } static void @@ -287,6 +286,7 @@ panfrost_bind_vertex_elements_state( { struct panfrost_context *ctx = pan_context(pctx); ctx->vertex = hwcso; + ctx->dirty |= PAN_DIRTY_VERTEX; } static void * @@ -448,7 +448,7 @@ panfrost_bind_shader_state( ctx->shader[type] = hwcso; ctx->dirty |= PAN_DIRTY_TLS_SIZE; - ctx->dirty_shader[type] |= PAN_DIRTY_STAGE_RENDERER; + ctx->dirty_shader[type] |= PAN_DIRTY_STAGE_SHADER; if (!hwcso) return; @@ -581,6 +581,8 @@ panfrost_set_vertex_buffers( util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers, start_slot, num_buffers, unbind_num_trailing_slots, take_ownership); + + ctx->dirty |= PAN_DIRTY_VERTEX; } static void @@ -612,7 +614,7 @@ panfrost_set_stencil_ref( { struct panfrost_context *ctx = pan_context(pctx); ctx->stencil_ref = ref; - ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= PAN_DIRTY_STAGE_RENDERER; + ctx->dirty |= PAN_DIRTY_ZS; } static void @@ -717,7 +719,7 @@ panfrost_bind_depth_stencil_state(struct pipe_context *pipe, { struct panfrost_context *ctx = pan_context(pipe); ctx->depth_stencil = cso; - ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= PAN_DIRTY_STAGE_RENDERER; + ctx->dirty |= PAN_DIRTY_ZS; } static void @@ -726,7 +728,7 @@ panfrost_set_sample_mask(struct pipe_context *pipe, { struct panfrost_context *ctx = pan_context(pipe); ctx->sample_mask = sample_mask; - ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= PAN_DIRTY_STAGE_RENDERER; + ctx->dirty |= PAN_DIRTY_MSAA; } static void @@ -735,7 +737,7 @@ panfrost_set_min_samples(struct pipe_context *pipe, { struct panfrost_context *ctx = pan_context(pipe); ctx->min_samples = min_samples; - ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= PAN_DIRTY_STAGE_RENDERER; + ctx->dirty |= PAN_DIRTY_MSAA; } static void @@ -788,7 +790,7 @@ panfrost_set_active_query_state(struct pipe_context *pipe, { struct panfrost_context *ctx = pan_context(pipe); ctx->active_queries = enable; - ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= PAN_DIRTY_STAGE_RENDERER; + ctx->dirty |= PAN_DIRTY_OQ; } static void @@ -873,7 +875,7 @@ panfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q) query->msaa = (ctx->pipe_framebuffer.samples > 1); ctx->occlusion_query = query; - ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= PAN_DIRTY_STAGE_RENDERER; + ctx->dirty |= PAN_DIRTY_OQ; break; } @@ -906,7 +908,7 @@ panfrost_end_query(struct pipe_context *pipe, struct pipe_query *q) case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: ctx->occlusion_query = NULL; - ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= PAN_DIRTY_STAGE_RENDERER; + ctx->dirty |= PAN_DIRTY_OQ; break; case PIPE_QUERY_PRIMITIVES_GENERATED: query->end = ctx->prims_generated; diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index ac28f53da45..12644ab0cb9 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -68,10 +68,15 @@ enum pan_dirty_3d { PAN_DIRTY_PARAMS = BITFIELD_BIT(3), PAN_DIRTY_DRAWID = BITFIELD_BIT(4), PAN_DIRTY_TLS_SIZE = BITFIELD_BIT(5), + PAN_DIRTY_ZS = BITFIELD_BIT(6), + PAN_DIRTY_BLEND = BITFIELD_BIT(7), + PAN_DIRTY_MSAA = BITFIELD_BIT(8), + PAN_DIRTY_OQ = BITFIELD_BIT(9), + PAN_DIRTY_RASTERIZER = BITFIELD_BIT(10), }; enum pan_dirty_shader { - PAN_DIRTY_STAGE_RENDERER = BITFIELD_BIT(0), + PAN_DIRTY_STAGE_SHADER = BITFIELD_BIT(0), PAN_DIRTY_STAGE_TEXTURE = BITFIELD_BIT(1), PAN_DIRTY_STAGE_SAMPLER = BITFIELD_BIT(2), PAN_DIRTY_STAGE_IMAGE = BITFIELD_BIT(3), diff --git a/src/gallium/drivers/panfrost/pan_helpers.c b/src/gallium/drivers/panfrost/pan_helpers.c index b492e0fe76c..e9a02125109 100644 --- a/src/gallium/drivers/panfrost/pan_helpers.c +++ b/src/gallium/drivers/panfrost/pan_helpers.c @@ -28,8 +28,7 @@ void panfrost_analyze_sysvals(struct panfrost_shader_state *ss) { unsigned dirty = 0; - unsigned dirty_shader = - PAN_DIRTY_STAGE_RENDERER | PAN_DIRTY_STAGE_CONST; + unsigned dirty_shader = PAN_DIRTY_STAGE_SHADER | PAN_DIRTY_STAGE_CONST; for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) { switch (PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[i])) {