diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blend.c b/src/gallium/drivers/freedreno/a6xx/fd6_blend.c index 6d5c8969742..7827307f6af 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_blend.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_blend.c @@ -167,27 +167,16 @@ fd6_blend_state_create(struct pipe_context *pctx, so->use_dual_src_blend = cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0); + STATIC_ASSERT((4 * PIPE_MAX_COLOR_BUFS) == (8 * sizeof(so->all_mrt_write_mask))); + so->all_mrt_write_mask = 0; + unsigned nr = cso->independent_blend_enable ? cso->max_rt : 0; for (unsigned i = 0; i <= nr; i++) { const struct pipe_rt_blend_state *rt = &cso->rt[i]; so->reads_dest |= rt->blend_enable; - /* From the PoV of LRZ, having masked color channels is - * the same as having blend enabled, in that the draw will - * care about the fragments from an earlier draw. - * - * NOTE we actually don't care about masked color channels - * that don't actually exist in the render target, but we - * don't know the render target format here to determine - * that. It is probably not worth worrying about, but if - * we find a game/benchmark that goes out of it's way to - * mask off non-existent channels, we should fixup the - * pipe_blend_state to give us more info. - */ - if (rt->blend_enable || (rt->colormask != 0xf)) { - so->reads_dest = true; - } + so->all_mrt_write_mask |= rt->colormask << (4 * i); } util_dynarray_init(&so->variants, so); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blend.h b/src/gallium/drivers/freedreno/a6xx/fd6_blend.h index a610bd59a44..2744791df16 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_blend.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_blend.h @@ -51,6 +51,7 @@ struct fd6_blend_stateobj { struct fd_context *ctx; bool reads_dest; + uint32_t all_mrt_write_mask; struct util_dynarray variants; }; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 72c43fa2f0d..111e0d965dc 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -433,6 +433,14 @@ compute_lrz_state(struct fd6_emit *emit) assert_dt lrz.write = false; } + /* Unwritten channels *that actually exist* are a form of blending + * reading the dest from the PoV of LRZ, but the valid dst channels + * isn't known when blend CSO is constructed so we need to handle + * that here. + */ + if (ctx->all_mrt_channel_mask & ~blend->all_mrt_write_mask) + lrz.write = false; + /* if we change depthfunc direction, bail out on using LRZ. The * LRZ buffer encodes a min/max depth value per block, but if * we switch from GT/GE <-> LT/LE, those values cannot be diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index d9b0da5ef24..2a3623ab3ff 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -399,8 +399,11 @@ struct fd_context { struct pipe_stencil_ref stencil_ref dt; unsigned sample_mask dt; unsigned min_samples dt; + /* local context fb state, for when ctx->batch is null: */ struct pipe_framebuffer_state framebuffer dt; + uint32_t all_mrt_channel_mask dt; + struct pipe_poly_stipple stipple dt; struct pipe_viewport_state viewport[PIPE_MAX_VIEWPORTS] dt; struct pipe_scissor_state viewport_scissor[PIPE_MAX_VIEWPORTS] dt; diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c index a23870b797e..65215e2fcf1 100644 --- a/src/gallium/drivers/freedreno/freedreno_state.c +++ b/src/gallium/drivers/freedreno/freedreno_state.c @@ -289,6 +289,25 @@ fd_set_framebuffer_state(struct pipe_context *pctx, util_copy_framebuffer_state(cso, framebuffer); + STATIC_ASSERT((4 * PIPE_MAX_COLOR_BUFS) == (8 * sizeof(ctx->all_mrt_channel_mask))); + ctx->all_mrt_channel_mask = 0; + + /* Generate a bitmask of all valid channels for all MRTs. Blend + * state with unwritten channels essentially acts as blend enabled, + * which disables LRZ write. But only if the cbuf *has* the masked + * channels, which is not known at the time the blend state is + * created. + */ + for (unsigned i = 0; i < framebuffer->nr_cbufs; i++) { + if (!framebuffer->cbufs[i]) + continue; + + enum pipe_format format = framebuffer->cbufs[i]->format; + unsigned nr = util_format_get_nr_components(format); + + ctx->all_mrt_channel_mask |= BITFIELD_MASK(nr) << (4 * i); + } + cso->samples = util_framebuffer_get_num_samples(cso); if (ctx->screen->reorder) {