From 69bd1c0c40236f7429b2c97c4d2b9b0aba40d338 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Wed, 12 Oct 2022 14:26:39 +0200
Subject: [PATCH] radv: restore uploading shaders individually instead of
 consecutively

The shaders were uploaded consecutively to fit a RGP constraint but
this was more like a workaround. This upload path doesn't work well for
graphics pipeline library and it was the main blocker for GPL caching.

This commit breaks capturing shaders with RGP if the offset between
shaders is too big. Next commit should fix it by using shaders reloc.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21078>
---
 src/amd/vulkan/radv_cmd_buffer.c     |  13 +--
 src/amd/vulkan/radv_pipeline.c       | 114 ---------------------------
 src/amd/vulkan/radv_pipeline_cache.c |  42 +---------
 src/amd/vulkan/radv_private.h        |  15 ----
 src/amd/vulkan/radv_rmv.c            |  16 +++-
 src/amd/vulkan/radv_shader.c         |  21 ++++-
 src/amd/vulkan/radv_shader.h         |   6 +-
 7 files changed, 38 insertions(+), 189 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 9c37d1e05cb..498a773db64 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1898,22 +1898,16 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
       }
    }
 
-   if (pipeline->base.slab_bo)
-      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->base.slab_bo);
-
-   /* With graphics pipeline library, binaries are uploaded from a library and they hold a pointer
-    * to the slab BO.
-    */
    for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
       struct radv_shader *shader = pipeline->base.shaders[s];
 
-      if (!shader || !shader->bo)
+      if (!shader)
          continue;
 
       radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, shader->bo);
    }
 
-   if (pipeline->base.gs_copy_shader && pipeline->base.gs_copy_shader->bo) {
+   if (pipeline->base.gs_copy_shader) {
       radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->base.gs_copy_shader->bo);
    }
 
@@ -6109,7 +6103,8 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer,
    cmd_buffer->compute_scratch_waves_wanted =
       MAX2(cmd_buffer->compute_scratch_waves_wanted, pipeline->base.max_waves);
 
-   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->base.slab_bo);
+   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
+                      pipeline->base.shaders[MESA_SHADER_COMPUTE]->bo);
 
    if (unlikely(cmd_buffer->device->trace_bo))
       radv_save_pipeline(cmd_buffer, &pipeline->base);
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 400fc21dc70..90fc7c971ef 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -122,37 +122,6 @@ radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline)
    return !!pipeline->gs_copy_shader;
 }
 
-static struct radv_pipeline_slab *
-radv_pipeline_slab_create(struct radv_device *device, struct radv_pipeline *pipeline,
-                          uint32_t code_size)
-{
-   struct radv_pipeline_slab *slab;
-
-   slab = calloc(1, sizeof(*slab));
-   if (!slab)
-      return NULL;
-
-   slab->ref_count = 1;
-
-   slab->alloc = radv_alloc_shader_memory(device, code_size, pipeline);
-   if (!slab->alloc) {
-      free(slab);
-      return NULL;
-   }
-
-   return slab;
-}
-
-void
-radv_pipeline_slab_destroy(struct radv_device *device, struct radv_pipeline_slab *slab)
-{
-   if (!p_atomic_dec_zero(&slab->ref_count))
-      return;
-
-   radv_free_shader_memory(device, slab->alloc);
-   free(slab);
-}
-
 void
 radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline,
                       const VkAllocationCallbacks *allocator)
@@ -189,9 +158,6 @@ radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline
       vk_free(&device->vk.alloc, gfx_pipeline_lib->base.state_data);
    }
 
-   if (pipeline->slab)
-      radv_pipeline_slab_destroy(device, pipeline->slab);
-
    for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i)
       if (pipeline->shaders[i])
          radv_shader_unref(device, pipeline->shaders[i]);
@@ -1021,23 +987,12 @@ radv_graphics_pipeline_import_lib(struct radv_graphics_pipeline *pipeline,
             continue;
 
          pipeline->base.shaders[s] = radv_shader_ref(lib->base.base.shaders[s]);
-
-         /* Hold a pointer to the slab BO to indicate the shader is already uploaded. */
-         pipeline->base.shaders[s]->bo = lib->base.base.slab_bo;
       }
 
       /* Import the GS copy shader if present. */
       if (lib->base.base.gs_copy_shader) {
          assert(!pipeline->base.gs_copy_shader);
          pipeline->base.gs_copy_shader = radv_shader_ref(lib->base.base.gs_copy_shader);
-
-         /* Hold a pointer to the slab BO to indicate the shader is already uploaded. */
-         pipeline->base.gs_copy_shader->bo = lib->base.base.slab_bo;
-      }
-
-      /* Refcount the slab BO to make sure it's not freed when the library is destroyed. */
-      if (lib->base.base.slab) {
-         p_atomic_inc(&lib->base.base.slab->ref_count);
       }
 
       /* Import the PS epilog if present. */
@@ -2824,69 +2779,6 @@ non_uniform_access_callback(const nir_src *src, void *_)
    return nir_chase_binding(*src).success ? 0x2 : 0x3;
 }
 
-
-VkResult
-radv_upload_shaders(struct radv_device *device, struct radv_pipeline *pipeline,
-                    struct radv_shader_binary **binaries, struct radv_shader_binary *gs_copy_binary)
-{
-   uint32_t code_size = 0;
-
-   /* Compute the total code size. */
-   for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
-      struct radv_shader *shader = pipeline->shaders[i];
-      if (!shader)
-         continue;
-
-      if (shader->bo)
-         continue;
-
-      code_size += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
-   }
-
-   if (pipeline->gs_copy_shader && !pipeline->gs_copy_shader->bo) {
-      code_size += align(pipeline->gs_copy_shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
-   }
-
-   /* Allocate memory for all shader binaries. */
-   pipeline->slab = radv_pipeline_slab_create(device, pipeline, code_size);
-   if (!pipeline->slab)
-      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
-
-   pipeline->slab_bo = pipeline->slab->alloc->arena->bo;
-
-   /* Upload shader binaries. */
-   uint64_t slab_va = radv_buffer_get_va(pipeline->slab_bo);
-   uint32_t slab_offset = pipeline->slab->alloc->offset;
-   char *slab_ptr = pipeline->slab->alloc->arena->ptr;
-
-   for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
-      struct radv_shader *shader = pipeline->shaders[i];
-      if (!shader)
-         continue;
-
-      if (shader->bo)
-         continue;
-
-      shader->va = slab_va + slab_offset;
-
-      void *dest_ptr = slab_ptr + slab_offset;
-      if (!radv_shader_binary_upload(device, binaries[i], shader, dest_ptr))
-         return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-      slab_offset += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
-   }
-
-   if (pipeline->gs_copy_shader && !pipeline->gs_copy_shader->bo) {
-      pipeline->gs_copy_shader->va = slab_va + slab_offset;
-
-      void *dest_ptr = slab_ptr + slab_offset;
-      if (!radv_shader_binary_upload(device, gs_copy_binary, pipeline->gs_copy_shader, dest_ptr))
-         return VK_ERROR_OUT_OF_HOST_MEMORY;
-   }
-
-   return VK_SUCCESS;
-}
-
 static nir_ssa_def *
 radv_adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alpha_adjust,
                                nir_ssa_def *alpha)
@@ -3738,9 +3630,6 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline,
       }
    }
 
-   /* Upload shader binaries. */
-   radv_upload_shaders(device, &pipeline->base, binaries, gs_copy_binary);
-
    if (!skip_shaders_cache) {
       if (pipeline->base.gs_copy_shader) {
          assert(!binaries[MESA_SHADER_COMPUTE] && !pipeline->base.shaders[MESA_SHADER_COMPUTE]);
@@ -5587,9 +5476,6 @@ radv_compute_pipeline_compile(struct radv_compute_pipeline *pipeline,
       }
    }
 
-   /* Upload compute shader binary. */
-   radv_upload_shaders(device, &pipeline->base, binaries, NULL);
-
    if (!keep_executable_info) {
       radv_pipeline_cache_insert_shaders(device, cache, hash, &pipeline->base, binaries,
                                          stack_sizes ? *stack_sizes : NULL,
diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c
index 747e03aecd9..b116ee5f3a9 100644
--- a/src/amd/vulkan/radv_pipeline_cache.c
+++ b/src/amd/vulkan/radv_pipeline_cache.c
@@ -40,7 +40,6 @@ struct cache_entry {
    uint32_t binary_sizes[MESA_VULKAN_SHADER_STAGES];
    uint32_t num_stack_sizes;
    struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES];
-   struct radv_pipeline_slab *slab;
    char code[0];
 };
 
@@ -105,8 +104,6 @@ radv_pipeline_cache_finish(struct radv_pipeline_cache *cache)
             if (cache->hash_table[i]->shaders[j])
                radv_shader_unref(cache->device, cache->hash_table[i]->shaders[j]);
          }
-         if (cache->hash_table[i]->slab)
-            radv_pipeline_slab_destroy(cache->device, cache->hash_table[i]->slab);
          vk_free(&cache->alloc, cache->hash_table[i]);
       }
    mtx_destroy(&cache->mutex);
@@ -323,7 +320,6 @@ radv_create_shaders_from_pipeline_cache(
    uint32_t *num_stack_sizes, bool *found_in_application_cache)
 {
    struct cache_entry *entry;
-   VkResult result;
 
    if (!cache) {
       cache = device->mem_cache;
@@ -373,9 +369,6 @@ radv_create_shaders_from_pipeline_cache(
       }
    }
 
-   struct radv_shader_binary *binaries[MESA_VULKAN_SHADER_STAGES] = {NULL};
-   struct radv_shader_binary *gs_copy_binary = NULL;
-   bool needs_upload = false;
    char *p = entry->code;
    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
       if (!entry->shaders[i] && entry->binary_sizes[i]) {
@@ -385,8 +378,7 @@ radv_create_shaders_from_pipeline_cache(
 
          entry->shaders[i] = radv_shader_create(device, binary, false, true, NULL);
 
-         needs_upload = true;
-         binaries[i] = binary;
+         free(binary);
       } else if (entry->binary_sizes[i]) {
          p += entry->binary_sizes[i];
       }
@@ -399,27 +391,6 @@ radv_create_shaders_from_pipeline_cache(
       /* For the GS copy shader, RADV uses the compute shader slot to avoid a new cache entry. */
       pipeline->gs_copy_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
       pipeline->shaders[MESA_SHADER_COMPUTE] = NULL;
-      gs_copy_binary = binaries[MESA_SHADER_COMPUTE];
-   }
-
-   if (needs_upload) {
-      result = radv_upload_shaders(device, pipeline, binaries, gs_copy_binary);
-
-      for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
-         if (pipeline->shaders[i])
-            free(binaries[i]);
-      }
-      free(gs_copy_binary);
-
-      if (result != VK_SUCCESS) {
-         radv_pipeline_cache_unlock(cache);
-         return false;
-      }
-
-      entry->slab = pipeline->slab;
-   } else {
-      pipeline->slab = entry->slab;
-      pipeline->slab_bo = pipeline->slab->alloc->arena->bo;
    }
 
    if (num_stack_sizes) {
@@ -440,7 +411,6 @@ radv_create_shaders_from_pipeline_cache(
       for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i)
          if (entry->shaders[i])
             radv_shader_ref(entry->shaders[i]);
-      p_atomic_inc(&entry->slab->ref_count);
    }
 
    assert((uintptr_t)p <= (uintptr_t)entry + entry_size(entry));
@@ -471,11 +441,6 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipel
          radv_shader_ref(pipeline->shaders[i]);
       }
 
-      radv_pipeline_slab_destroy(cache->device, pipeline->slab);
-
-      pipeline->slab = entry->slab;
-      p_atomic_inc(&pipeline->slab->ref_count);
-
       radv_pipeline_cache_unlock(cache);
       return;
    }
@@ -558,9 +523,6 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipel
       radv_shader_ref(pipeline->shaders[i]);
    }
 
-   entry->slab = pipeline->slab;
-   p_atomic_inc(&pipeline->slab->ref_count);
-
    radv_pipeline_cache_add_entry(cache, entry);
 
    radv_pipeline_cache_unlock(cache);
@@ -602,7 +564,6 @@ radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, si
          memcpy(dest_entry, entry, size_of_entry);
          for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i)
             dest_entry->shaders[i] = NULL;
-         dest_entry->slab = NULL;
          radv_pipeline_cache_add_entry(cache, dest_entry);
       }
       p += size_of_entry;
@@ -700,7 +661,6 @@ radv_GetPipelineCacheData(VkDevice _device, VkPipelineCache _cache, size_t *pDat
       memcpy(p, entry, size_of_entry);
       for (int j = 0; j < MESA_VULKAN_SHADER_STAGES; ++j)
          ((struct cache_entry *)p)->shaders[j] = NULL;
-      ((struct cache_entry *)p)->slab = NULL;
       p = (char *)p + size_of_entry;
    }
    *pDataSize = (char *)p - (char *)pData;
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index c7626bf4e78..442ff6594e8 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -420,10 +420,6 @@ void radv_pipeline_cache_insert_shaders(
    struct radv_pipeline *pipeline, struct radv_shader_binary *const *binaries,
    const struct radv_pipeline_shader_stack_size *stack_sizes, uint32_t num_stack_sizes);
 
-VkResult radv_upload_shaders(struct radv_device *device, struct radv_pipeline *pipeline,
-                             struct radv_shader_binary **binaries,
-                             struct radv_shader_binary *gs_copy_binary);
-
 enum radv_blit_ds_layout {
    RADV_BLIT_DS_LAYOUT_TILE_ENABLE,
    RADV_BLIT_DS_LAYOUT_TILE_DISABLE,
@@ -2071,14 +2067,6 @@ struct radv_pipeline_shader_stack_size {
    uint32_t non_recursive_size;
 };
 
-struct radv_pipeline_slab {
-   uint32_t ref_count;
-
-   union radv_shader_arena_block *alloc;
-};
-
-void radv_pipeline_slab_destroy(struct radv_device *device, struct radv_pipeline_slab *slab);
-
 enum radv_depth_clamp_mode {
    RADV_DEPTH_CLAMP_MODE_VIEWPORT = 0,       /* Clamp to the viewport min/max depth bounds */
    RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE = 1,    /* Clamp between 0.0f and 1.0f */
@@ -2091,9 +2079,6 @@ struct radv_pipeline {
 
    struct radv_device *device;
 
-   struct radv_pipeline_slab *slab;
-   struct radeon_winsys_bo *slab_bo;
-
    bool is_internal;
    bool need_indirect_descriptor_sets;
    struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES];
diff --git a/src/amd/vulkan/radv_rmv.c b/src/amd/vulkan/radv_rmv.c
index f68024f40f6..c500df9f940 100644
--- a/src/amd/vulkan/radv_rmv.c
+++ b/src/amd/vulkan/radv_rmv.c
@@ -846,8 +846,15 @@ radv_rmv_log_graphics_pipeline_create(struct radv_device *device, VkPipelineCrea
 
    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE,
                      &create_token);
-   log_resource_bind_locked(device, (uint64_t)_pipeline, pipeline->slab_bo,
-                            pipeline->slab->alloc->offset, pipeline->slab->alloc->size);
+   for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
+      struct radv_shader *shader = pipeline->shaders[s];
+
+      if (!shader)
+         continue;
+
+      log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset,
+                               shader->alloc->size);
+   }
    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
 }
 
@@ -874,8 +881,9 @@ radv_rmv_log_compute_pipeline_create(struct radv_device *device, VkPipelineCreat
 
    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE,
                      &create_token);
-   log_resource_bind_locked(device, (uint64_t)_pipeline, pipeline->slab_bo,
-                            pipeline->slab->alloc->offset, pipeline->slab->alloc->size);
+   struct radv_shader *shader = pipeline->shaders[MESA_SHADER_COMPUTE];
+   log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset,
+                            shader->alloc->size);
    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
 }
 
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 29bbe045d3d..09091dc0453 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -2032,10 +2032,21 @@ radv_open_rtld_binary(struct radv_device *device, const struct radv_shader *shad
 }
 #endif
 
-bool
+static bool
 radv_shader_binary_upload(struct radv_device *device, const struct radv_shader_binary *binary,
-                          struct radv_shader *shader, void *dest_ptr)
+                          struct radv_shader *shader)
 {
+   void *dest_ptr;
+
+   shader->alloc = radv_alloc_shader_memory(device, shader->code_size, shader);
+   if (!shader->alloc)
+      return false;
+
+   shader->bo = shader->alloc->arena->bo;
+   shader->va = radv_buffer_get_va(shader->bo) + shader->alloc->offset;
+
+   dest_ptr = shader->alloc->arena->ptr + shader->alloc->offset;
+
    if (binary->type == RADV_BINARY_TYPE_RTLD) {
 #if !defined(USE_LIBELF)
       return false;
@@ -2185,6 +2196,10 @@ radv_shader_create(struct radv_device *device, const struct radv_shader_binary *
          memcpy(shader->statistics, bin->data, bin->stats_size);
       }
    }
+
+   if (!radv_shader_binary_upload(device, binary, shader))
+      return NULL;
+
    return shader;
 }
 
@@ -2656,6 +2671,8 @@ radv_shader_destroy(struct radv_device *device, struct radv_shader *shader)
 {
    assert(shader->ref_count == 0);
 
+   radv_free_shader_memory(device, shader->alloc);
+
    free(shader->spirv);
    free(shader->nir_string);
    free(shader->disasm_string);
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index bd8e6f4854f..fd490d5c944 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -485,7 +485,8 @@ union radv_shader_arena_block {
 struct radv_shader {
    uint32_t ref_count;
 
-   struct radeon_winsys_bo *bo; /* Not NULL if imported from a lib */
+   struct radeon_winsys_bo *bo;
+   union radv_shader_arena_block *alloc;
    uint64_t va;
 
    struct ac_shader_config config;
@@ -575,9 +576,6 @@ struct radv_shader *radv_shader_nir_to_asm(
    int shader_count, const struct radv_pipeline_key *key, bool keep_shader_info, bool keep_statistic_info,
    struct radv_shader_binary **binary_out);
 
-bool radv_shader_binary_upload(struct radv_device *device, const struct radv_shader_binary *binary,
-                               struct radv_shader *shader, void *dest_ptr);
-
 void radv_shader_part_binary_upload(const struct radv_shader_part_binary *binary, void *dest_ptr);
 
 union radv_shader_arena_block *radv_alloc_shader_memory(struct radv_device *device, uint32_t size,