From fcf58e75d00a32db772d4107946a5fc8b304f196 Mon Sep 17 00:00:00 2001
From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Date: Fri, 4 Mar 2022 13:17:06 -0500
Subject: [PATCH] lavapipe: heap-allocate rendering_state struct

this thing is like 28k now, which is just way too big to have on the stack

Reviewed-by: Dave Airlie <airlied@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15233>
---
 src/gallium/frontends/lavapipe/lvp_device.c  |  5 +-
 src/gallium/frontends/lavapipe/lvp_execute.c | 50 +++++++++++---------
 src/gallium/frontends/lavapipe/lvp_private.h |  4 +-
 3 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/src/gallium/frontends/lavapipe/lvp_device.c b/src/gallium/frontends/lavapipe/lvp_device.c
index 6bfdaf6dacd..79974bcd5e7 100644
--- a/src/gallium/frontends/lavapipe/lvp_device.c
+++ b/src/gallium/frontends/lavapipe/lvp_device.c
@@ -1460,12 +1460,15 @@ VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateDevice(
 
    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
 
+   size_t state_size = lvp_get_rendering_state_size();
    device = vk_zalloc2(&physical_device->vk.instance->alloc, pAllocator,
-                       sizeof(*device), 8,
+                       sizeof(*device) + state_size, 8,
                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
    if (!device)
       return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
+   device->queue.state = device + 1;
+
    struct vk_device_dispatch_table dispatch_table;
    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
       &lvp_device_entrypoints, true);
diff --git a/src/gallium/frontends/lavapipe/lvp_execute.c b/src/gallium/frontends/lavapipe/lvp_execute.c
index efaadea1a3a..815f1eb0270 100644
--- a/src/gallium/frontends/lavapipe/lvp_execute.c
+++ b/src/gallium/frontends/lavapipe/lvp_execute.c
@@ -4140,45 +4140,51 @@ VkResult lvp_execute_cmds(struct lvp_device *device,
                           struct lvp_queue *queue,
                           struct lvp_cmd_buffer *cmd_buffer)
 {
-   struct rendering_state state;
-   memset(&state, 0, sizeof(state));
-   state.pctx = queue->ctx;
-   state.cso = queue->cso;
-   state.blend_dirty = true;
-   state.dsa_dirty = true;
-   state.rs_dirty = true;
-   state.vp_dirty = true;
+   struct rendering_state *state = queue->state;
+   memset(state, 0, sizeof(*state));
+   state->pctx = queue->ctx;
+   state->cso = queue->cso;
+   state->blend_dirty = true;
+   state->dsa_dirty = true;
+   state->rs_dirty = true;
+   state->vp_dirty = true;
    for (enum pipe_shader_type s = PIPE_SHADER_VERTEX; s < PIPE_SHADER_TYPES; s++) {
       for (unsigned i = 0; i < PIPE_MAX_SAMPLERS; i++)
-         state.cso_ss_ptr[s][i] = &state.ss[s][i];
+         state->cso_ss_ptr[s][i] = &state->ss[s][i];
    }
    /* create a gallium context */
-   lvp_execute_cmd_buffer(cmd_buffer, &state);
+   lvp_execute_cmd_buffer(cmd_buffer, state);
 
-   state.start_vb = -1;
-   state.num_vb = 0;
+   state->start_vb = -1;
+   state->num_vb = 0;
    cso_unbind_context(queue->cso);
    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
-      if (state.so_targets[i]) {
-         state.pctx->stream_output_target_destroy(state.pctx, state.so_targets[i]);
+      if (state->so_targets[i]) {
+         state->pctx->stream_output_target_destroy(state->pctx, state->so_targets[i]);
       }
    }
 
    for (enum pipe_shader_type s = PIPE_SHADER_VERTEX; s < PIPE_SHADER_TYPES; s++) {
       for (unsigned i = 0; i < PIPE_MAX_SAMPLERS; i++) {
-         if (state.sv[s][i])
-            pipe_sampler_view_reference(&state.sv[s][i], NULL);
+         if (state->sv[s][i])
+            pipe_sampler_view_reference(&state->sv[s][i], NULL);
       }
    }
 
    for (unsigned i = 0; i < PIPE_MAX_SAMPLERS; i++) {
-      if (state.cso_ss_ptr[PIPE_SHADER_COMPUTE][i])
-         state.pctx->delete_sampler_state(state.pctx, state.ss_cso[PIPE_SHADER_COMPUTE][i]);
+      if (state->cso_ss_ptr[PIPE_SHADER_COMPUTE][i])
+         state->pctx->delete_sampler_state(state->pctx, state->ss_cso[PIPE_SHADER_COMPUTE][i]);
    }
 
-   free(state.imageless_views);
-   free(state.pending_clear_aspects);
-   free(state.cleared_views);
-   free(state.attachments);
+   free(state->imageless_views);
+   free(state->pending_clear_aspects);
+   free(state->cleared_views);
+   free(state->attachments);
    return VK_SUCCESS;
 }
+
+size_t
+lvp_get_rendering_state_size(void)
+{
+   return sizeof(struct rendering_state);
+}
diff --git a/src/gallium/frontends/lavapipe/lvp_private.h b/src/gallium/frontends/lavapipe/lvp_private.h
index a57a4941ed3..db9c7c7decf 100644
--- a/src/gallium/frontends/lavapipe/lvp_private.h
+++ b/src/gallium/frontends/lavapipe/lvp_private.h
@@ -169,6 +169,7 @@ struct lvp_queue {
    uint64_t last_fence_timeline;
    struct pipe_fence_handle *last_fence;
    volatile int count;
+   void *state;
 };
 
 struct lvp_semaphore_wait {
@@ -650,7 +651,8 @@ struct lvp_cmd_push_descriptor_set {
 VkResult lvp_execute_cmds(struct lvp_device *device,
                           struct lvp_queue *queue,
                           struct lvp_cmd_buffer *cmd_buffer);
-
+size_t
+lvp_get_rendering_state_size(void);
 struct lvp_image *lvp_swapchain_get_image(VkSwapchainKHR swapchain,
 					  uint32_t index);