r600g: add cs memory usage accounting and limit it v3 (backport for mesa 9.0)

We are now seing cs that can go over the vram+gtt size to avoid failing flush early cs that goes over 70% (gtt+vram) usage. 70% is use to allow some fragmentation. The idea is to compute a gross estimate of memory requirement of each draw call. After each draw call, memory will be precisely accounted. So the uncertainty is only on the current draw call. In practice this gave very good estimate (+/- 10% of the target memory limit). v2: Remove left over from testing version, remove useless NULL checking. Improve commit message. v3: Add comment to code on memory accounting precision This version is a backport for mesa 9.0 Signed-off-by: Jerome Glisse <jglisse@redhat.com> Reviewed-by: Marek Olšák <maraeo@gmail.com>
2026-05-08 22:08:26 +02:00 · 2013-01-30 15:02:32 -05:00 · 2013-01-30 15:02:32 -05:00 · 78222e6363
commit 78222e6363
parent 1e8de8437a
7 changed files with 81 additions and 1 deletions
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@ -1721,6 +1721,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
 		res = (struct r600_resource*)surf->base.texture;
 		rtex = (struct r600_texture*)res;

+		r600_context_add_resource_size(ctx, state->cbufs[i]->texture);
+
 		if (!surf->color_initialized) {
 			evergreen_init_color_surface(rctx, surf);
 		}
@ -1787,6 +1789,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
 		surf = (struct r600_surface*)state->zsbuf;
 		res = (struct r600_resource*)surf->base.texture;

+		r600_context_add_resource_size(ctx, state->zsbuf->texture);
+
 		if (!surf->depth_initialized) {
 			evergreen_init_depth_surface(rctx, surf);
 		}
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@ -635,6 +635,16 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
 {
 	struct r600_atom *state;

+	if (!ctx->ws->cs_memory_below_limit(ctx->cs, ctx->vram, ctx->gtt)) {
+		ctx->gtt = 0;
+		ctx->vram = 0;
+		r600_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC);
+		return;
+	}
+	/* all will be accounted once relocation are emited */
+	ctx->gtt = 0;
+	ctx->vram = 0;
+
 	/* The number of dwords we already used in the CS so far. */
 	num_dw += ctx->cs->cdw;

@ -953,6 +963,8 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)

 	ctx->pm4_dirty_cdwords = 0;
 	ctx->flags = 0;
+	ctx->gtt = 0;
+	ctx->vram = 0;

 	/* Begin a new CS. */
 	r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@ -371,6 +371,10 @@ struct r600_context {

 	unsigned default_ps_gprs, default_vs_gprs;

+	/* current unaccounted memory usage */
+	uint64_t			vram;
+	uint64_t			gtt;
+
 	/* States based on r600_atom. */
 	struct list_head		dirty_states;
 	struct r600_command_buffer	start_cs_cmd; /* invariant state mostly */
@ -886,4 +890,28 @@ static INLINE uint64_t r600_resource_va(struct pipe_screen *screen, struct pipe_
 	return rscreen->ws->buffer_get_virtual_address(rresource->cs_buf);
 }

+static INLINE void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
+{
+	struct r600_context *rctx = (struct r600_context *)ctx;
+	struct r600_resource *rr = (struct r600_resource *)r;
+
+	if (r == NULL) {
+		return;
+	}
+
+	/*
+	 * The idea is to compute a gross estimate of memory requirement of
+	 * each draw call. After each draw call, memory will be precisely
+	 * accounted. So the uncertainty is only on the current draw call.
+	 * In practice this gave very good estimate (+/- 10% of the target
+	 * memory limit).
+	 */
+	if (rr->domains & RADEON_DOMAIN_GTT) {
+		rctx->gtt += rr->buf->size;
+	}
+	if (rr->domains & RADEON_DOMAIN_VRAM) {
+		rctx->vram += rr->buf->size;
+	}
+}
+
 #endif
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@ -1615,6 +1615,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
 		res = (struct r600_resource*)surf->base.texture;
 		rtex = (struct r600_texture*)res;

+		r600_context_add_resource_size(ctx, state->cbufs[i]->texture);
+
 		if (!surf->color_initialized || force_cmask_fmask) {
 			r600_init_color_surface(rctx, surf, force_cmask_fmask);
 			if (force_cmask_fmask) {
@ -1673,6 +1675,8 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
 		surf = (struct r600_surface*)state->zsbuf;
 		res = (struct r600_resource*)surf->base.texture;

+		r600_context_add_resource_size(ctx, state->zsbuf->texture);
+
 		if (!surf->depth_initialized) {
 			r600_init_depth_surface(rctx, surf);
 		}
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@ -504,7 +504,8 @@ void r600_set_index_buffer(struct pipe_context *ctx,

 	if (ib) {
 		pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer);
-	        memcpy(&rctx->index_buffer, ib, sizeof(*ib));
+		memcpy(&rctx->index_buffer, ib, sizeof(*ib));
+		r600_context_add_resource_size(ctx, ib->buffer);
 	} else {
 		pipe_resource_reference(&rctx->index_buffer.buffer, NULL);
 	}
@ -549,6 +550,7 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
 				vb[i].buffer_offset = input[i].buffer_offset;
 				pipe_resource_reference(&vb[i].buffer, input[i].buffer);
 				new_buffer_mask |= 1 << i;
+				r600_context_add_resource_size(ctx, input[i].buffer);
 			} else {
 				pipe_resource_reference(&vb[i].buffer, NULL);
 				disable_mask |= 1 << i;
@ -648,6 +650,7 @@ void r600_set_sampler_views(struct pipe_context *pipe,

 			pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], views[i]);
 			new_mask |= 1 << i;
+			r600_context_add_resource_size(pipe, views[i]->texture);
 		} else {
 			pipe_sampler_view_reference((struct pipe_sampler_view **)&dst->views.views[i], NULL);
 			disable_mask |= 1 << i;
@ -822,6 +825,8 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state)
 	rctx->ps_shader = (struct r600_pipe_shader_selector *)state;
 	r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate);

+	r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->ps_shader->current->bo);
+
 	if (rctx->chip_class <= R700) {
 		bool multiwrite = rctx->ps_shader->current->shader.fs_write_all;

@ -848,6 +853,8 @@ void r600_bind_vs_shader(struct pipe_context *ctx, void *state)
 	if (state) {
 		r600_context_pipe_state_set(rctx, &rctx->vs_shader->current->rstate);

+		r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->vs_shader->current->bo);
+
 		if (rctx->chip_class < EVERGREEN && rctx->ps_shader)
 			r600_adjust_gprs(rctx);
 	}
@ -957,10 +964,13 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
 		} else {
 			u_upload_data(rctx->uploader, 0, input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer);
 		}
+		/* account it in gtt */
+		rctx->gtt += input->buffer_size;
 	} else {
 		/* Setup the hw buffer. */
 		cb->buffer_offset = input->buffer_offset;
 		pipe_resource_reference(&cb->buffer, input->buffer);
+		r600_context_add_resource_size(ctx, input->buffer);
 	}

 	state->enabled_mask |= 1 << index;
@ -1023,6 +1033,7 @@ void r600_set_so_targets(struct pipe_context *ctx,
 	/* Set the new targets. */
 	for (i = 0; i < num_targets; i++) {
 		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], targets[i]);
+		r600_context_add_resource_size(ctx, targets[i]->buffer);
 	}
 	for (; i < rctx->num_so_targets; i++) {
 		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->so_targets[i], NULL);
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@ -366,6 +366,16 @@ static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
    return status;
 }

+static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
+{
+    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+    boolean status =
+        (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
+        (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
+
+    return status;
+}
+
 static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
                                      struct radeon_winsys_cs_handle *buf)
 {
@ -549,6 +559,7 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
    ws->base.cs_destroy = radeon_drm_cs_destroy;
    ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
    ws->base.cs_validate = radeon_drm_cs_validate;
+    ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
    ws->base.cs_write_reloc = radeon_drm_cs_write_reloc;
    ws->base.cs_flush = radeon_drm_cs_flush;
    ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush;
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@ -308,6 +308,16 @@ struct radeon_winsys {
     */
    boolean (*cs_validate)(struct radeon_winsys_cs *cs);

+    /**
+     * Return TRUE if there is enough memory in VRAM and GTT for the relocs
+     * added so far.
+     *
+     * \param cs        A command stream to validate.
+     * \param vram      VRAM memory size pending to be use
+     * \param gtt       GTT memory size pending to be use
+     */
+    boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt);
+
    /**
     * Write a relocated dword to a command buffer.
     *