zink: rework oom flushing

tracking mem usage on the context is bad because if an app is always using a large amount of memory across multiple batches, it triggers the oom flushes on every flush and forces fencing instead, add an explicit flag for oom flushing and another for stalling, then flag oom flushing and stalling as needed, with set_framebuffer_state being an additional flush point now since it's guaranteed not to split renderpasses also proactively prune pending batch states if there's a lot of them Reviewed-by: Dave Airlie <airlied@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11965>
2025-12-24 17:30:12 +01:00 · 2021-05-15 10:20:43 -04:00 · 2021-05-15 10:20:43 -04:00 · d8905446d6
commit d8905446d6
parent 0dc77c8aa5
5 changed files with 38 additions and 9 deletions
--- a/src/gallium/drivers/zink/zink_batch.c
+++ b/src/gallium/drivers/zink/zink_batch.c
@ -89,7 +89,6 @@ zink_reset_batch_state(struct zink_context *ctx, struct zink_batch_state *bs)

   pipe_resource_reference(&bs->flush_res, NULL);

-   ctx->resource_size -= bs->resource_size;
   bs->resource_size = 0;

   /* only reset submitted here so that tc fence desync can pick up the 'completed' flag
@ -551,8 +550,22 @@ zink_end_batch(struct zink_context *ctx, struct zink_batch *batch)

   struct zink_screen *screen = zink_screen(ctx->base.screen);

-   ctx->resource_size += batch->state->resource_size;
   ctx->last_fence = &batch->state->fence;
+   if (ctx->oom_flush || _mesa_hash_table_num_entries(&ctx->batch_states) > 10) {
+      simple_mtx_lock(&ctx->batch_mtx);
+      hash_table_foreach(&ctx->batch_states, he) {
+         struct zink_fence *fence = he->data;
+         struct zink_batch_state *bs = he->data;
+         if (zink_check_batch_completion(ctx, fence->batch_id, true)) {
+            zink_reset_batch_state(ctx, he->data);
+            _mesa_hash_table_remove(&ctx->batch_states, he);
+            util_dynarray_append(&ctx->free_batch_states, struct zink_batch_state *, bs);
+         }
+      }
+      simple_mtx_unlock(&ctx->batch_mtx);
+      if (_mesa_hash_table_num_entries(&ctx->batch_states) > 50)
+         ctx->oom_flush = true;
+   }

   if (screen->device_lost)
      return;
@ -604,6 +617,16 @@ batch_ptr_add_usage(struct zink_batch *batch, struct set *s, void *ptr)
   return !found;
 }

+ALWAYS_INLINE static void
+check_oom_flush(struct zink_context *ctx, const struct zink_batch *batch)
+{
+   const VkDeviceSize resource_size = batch->state->resource_size;
+   if (resource_size >= zink_screen(ctx->base.screen)->clamp_video_mem) {
+       ctx->oom_flush = true;
+       ctx->oom_stall = true;
+    }
+}
+
 void
 zink_batch_reference_resource(struct zink_batch *batch, struct zink_resource *res)
 {
@ -611,6 +634,7 @@ zink_batch_reference_resource(struct zink_batch *batch, struct zink_resource *re
      return;
   pipe_reference(NULL, &res->obj->reference);
   batch->state->resource_size += res->obj->size;
+   check_oom_flush(batch->state->ctx, batch);
   batch->has_work = true;
 }

@ -620,6 +644,7 @@ zink_batch_reference_resource_move(struct zink_batch *batch, struct zink_resourc
   if (!batch_ptr_add_usage(batch, batch->state->resources, res->obj))
      return;
   batch->state->resource_size += res->obj->size;
+   check_oom_flush(batch->state->ctx, batch);
   batch->has_work = true;
 }

--- a/src/gallium/drivers/zink/zink_context.c
+++ b/src/gallium/drivers/zink/zink_context.c
@ -1907,12 +1907,10 @@ flush_batch(struct zink_context *ctx, bool sync)
      zink_select_draw_vbo(ctx);
      zink_select_launch_grid(ctx);

-      if (ctx->resource_size >= zink_screen(ctx->base.screen)->total_video_mem / 2 ||
-          _mesa_hash_table_num_entries(&ctx->batch_states) > 100) {
-         sync_flush(ctx, zink_batch_state(ctx->last_fence));
-         zink_vkfence_wait(zink_screen(ctx->base.screen), ctx->last_fence, PIPE_TIMEOUT_INFINITE);
-         zink_batch_reset_all(ctx);
-      }
+      if (ctx->oom_stall)
+         zink_fence_wait(&ctx->base);
+      ctx->oom_flush = false;
+      ctx->oom_stall = false;
   }
 }

@ -2050,6 +2048,9 @@ zink_set_framebuffer_state(struct pipe_context *pctx,

   /* need to ensure we start a new rp on next draw */
   zink_batch_no_rp(ctx);
+   /* this is an ideal time to oom flush since it won't split a renderpass */
+   if (ctx->oom_flush)
+      flush_batch(ctx, false);
 }

 static void
--- a/src/gallium/drivers/zink/zink_context.h
+++ b/src/gallium/drivers/zink/zink_context.h
@ -174,7 +174,8 @@ struct zink_context {
   struct zink_fence *last_fence; //the last command buffer submitted
   struct hash_table batch_states; //submitted batch states
   struct util_dynarray free_batch_states; //unused batch states
-   VkDeviceSize resource_size; //the accumulated size of resources in submitted buffers
+   bool oom_flush;
+   bool oom_stall;
   struct zink_batch batch;

   unsigned shader_has_inlinable_uniforms_mask;
--- a/src/gallium/drivers/zink/zink_screen.c
+++ b/src/gallium/drivers/zink/zink_screen.c
@ -1904,6 +1904,7 @@ zink_internal_create_screen(const struct pipe_screen_config *config)
   screen->driconf.inline_uniforms = debug_get_bool_option("ZINK_INLINE_UNIFORMS", false);

   screen->total_video_mem = get_video_mem(screen);
+   screen->clamp_video_mem = screen->total_video_mem * 0.8;
   if (!os_get_total_physical_memory(&screen->total_mem))
      goto fail;
   if (screen->info.have_KHR_timeline_semaphore)
--- a/src/gallium/drivers/zink/zink_screen.h
+++ b/src/gallium/drivers/zink/zink_screen.h
@ -98,6 +98,7 @@ struct zink_screen {
   unsigned mem_cache_count;

   uint64_t total_video_mem;
+   uint64_t clamp_video_mem;
   uint64_t total_mem;

   VkInstance instance;