freedreno: Add draw cost estimation

Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9535>
2026-05-08 17:48:10 +02:00 · 2021-03-09 14:21:16 -08:00 · 2021-03-09 14:21:16 -08:00 · 4f3c16ec05
commit 4f3c16ec05
parent a1d80c6d96
5 changed files with 59 additions and 0 deletions
--- a/src/gallium/drivers/freedreno/freedreno_batch.h
+++ b/src/gallium/drivers/freedreno/freedreno_batch.h
@ -125,6 +125,33 @@ struct fd_batch {
 	 */
 	const struct fd_gmem_stateobj *gmem_state;

+	/* A calculated "draw cost" value for the batch, which tries to
+	 * estimate the bandwidth-per-sample of all the draws according
+	 * to:
+	 *
+	 *    foreach_draw (...) {
+	 *      cost += num_mrt;
+	 *      if (blend_enabled)
+	 *        cost += num_mrt;
+	 *      if (depth_test_enabled)
+	 *        cost++;
+	 *      if (depth_write_enabled)
+	 *        cost++;
+	 *    }
+	 *
+	 * The idea is that each sample-passed minimally does one write
+	 * per MRT.  If blend is enabled, the hw will additionally do
+	 * a framebuffer read per sample-passed (for each MRT with blend
+	 * enabled).  If depth-test is enabled, the hw will additionally
+	 * a depth buffer read.  If depth-write is enable, the hw will
+	 * additionally do a depth buffer write.
+	 *
+	 * This does ignore depth buffer traffic for samples which do not
+	 * pass do to depth-test fail, and some other details.  But it is
+	 * just intended to be a rough estimate that is easy to calculate.
+	 */
+	unsigned cost;
+
 	unsigned num_draws;      /* number of draws in current batch */
 	unsigned num_vertices;   /* number of vertices in current batch */

--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@ -312,6 +312,11 @@ struct fd_context {
 	/* Context sequence #, used for batch-cache key: */
 	uint16_t seqno;

+	/* Cost per draw, used in conjunction with samples-passed history to
+	 * estimate whether GMEM or bypass is the better option.
+	 */
+	uint8_t draw_cost;
+
 	/* Are we in process of shadowing a resource? Used to detect recursion
 	 * in transfer_map, and skip unneeded synchronization.
 	 */
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@ -365,6 +365,8 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
 		util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
 		util_format_short_name(pipe_surface_format(pfb->zsbuf)));

+	batch->cost += ctx->draw_cost;
+
 	for (unsigned i = 0; i < num_draws; i++) {
 		if (ctx->draw_vbo(ctx, info, indirect, &draws[i], index_offset))
 			batch->needs_flush = true;
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@ -43,6 +43,22 @@
 * go in here.
 */

+static void
+update_draw_cost(struct fd_context *ctx)
+	assert_dt
+{
+	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+
+	ctx->draw_cost = pfb->nr_cbufs;
+	for (unsigned i = 0; i < pfb->nr_cbufs; i++)
+		if (fd_blend_enabled(ctx, i))
+			ctx->draw_cost++;
+	if (fd_depth_enabled(ctx))
+		ctx->draw_cost++;
+	if (fd_depth_write_enabled(ctx))
+		ctx->draw_cost++;
+}
+
 static void
 fd_set_blend_color(struct pipe_context *pctx,
 		const struct pipe_blend_color *blend_color)
@ -292,6 +308,7 @@ fd_set_framebuffer_state(struct pipe_context *pctx,
 	ctx->disabled_scissor.maxy = cso->height;

 	fd_context_dirty(ctx, FD_DIRTY_SCISSOR);
+	update_draw_cost(ctx);
 }

 static void
@ -418,6 +435,7 @@ fd_blend_state_bind(struct pipe_context *pctx, void *hwcso)
 	fd_context_dirty(ctx, FD_DIRTY_BLEND);
 	if (old_is_dual != new_is_dual)
 		fd_context_dirty(ctx, FD_DIRTY_BLEND_DUAL);
+	update_draw_cost(ctx);
 }

 static void
@ -470,6 +488,7 @@ fd_zsa_state_bind(struct pipe_context *pctx, void *hwcso)
 	struct fd_context *ctx = fd_context(pctx);
 	ctx->zsa = hwcso;
 	fd_context_dirty(ctx, FD_DIRTY_ZSA);
+	update_draw_cost(ctx);
 }

 static void
--- a/src/gallium/drivers/freedreno/freedreno_state.h
+++ b/src/gallium/drivers/freedreno/freedreno_state.h
@ -48,6 +48,12 @@ static inline bool fd_stencil_enabled(struct fd_context *ctx)
 	return ctx->zsa && ctx->zsa->stencil[0].enabled;
 }

+static inline bool fd_blend_enabled(struct fd_context *ctx, unsigned n)
+	assert_dt
+{
+	return ctx->blend && ctx->blend->rt[n].blend_enable;
+}
+
 static inline bool fd_depth_clamp_enabled(struct fd_context *ctx)
 	assert_dt
 {