v3dv: refactor events

This replaces our current implementation, which is 100% CPU based, with an implementation that uses compute shaders for the GPU-side event functions. The benefit of this solution is that we no longer need to stall on the CPU when we need to handle GPU-side event commands. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19313>
2026-03-11 09:10:32 +01:00 · 2022-10-19 09:48:19 +02:00 · 2022-10-19 09:48:19 +02:00 · ecb01d53fd
commit ecb01d53fd
parent 8cd50ef071
4 changed files with 578 additions and 149 deletions
--- a/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c
@ -163,14 +163,6 @@ job_destroy_gpu_csd_resources(struct v3dv_job *job)
      v3dv_bo_free(job->device, job->csd.shared_memory);
 }

-static void
-job_destroy_cpu_wait_events_resources(struct v3dv_job *job)
-{
-   assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
-   assert(job->cmd_buffer);
-   vk_free(&job->cmd_buffer->device->vk.alloc, job->cpu.event_wait.events);
-}
-
 void
 v3dv_job_destroy(struct v3dv_job *job)
 {
@ -191,9 +183,6 @@ v3dv_job_destroy(struct v3dv_job *job)
      case V3DV_JOB_TYPE_GPU_CSD:
         job_destroy_gpu_csd_resources(job);
         break;
-      case V3DV_JOB_TYPE_CPU_WAIT_EVENTS:
-         job_destroy_cpu_wait_events_resources(job);
-         break;
      default:
         break;
      }
@ -3757,6 +3746,74 @@ v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
   list_addtail(&job->list_link, &cmd_buffer->jobs);
 }

+static void
+cmd_buffer_emit_set_event(struct v3dv_cmd_buffer *cmd_buffer,
+                          struct v3dv_event *event,
+                          uint8_t value)
+{
+   assert(value == 0 || value == 1);
+
+   struct v3dv_device *device = cmd_buffer->device;
+   VkCommandBuffer commandBuffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
+
+   v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
+
+   v3dv_CmdBindPipeline(commandBuffer,
+                        VK_PIPELINE_BIND_POINT_COMPUTE,
+                        device->events.set_event_pipeline);
+
+   v3dv_CmdBindDescriptorSets(commandBuffer,
+                              VK_PIPELINE_BIND_POINT_COMPUTE,
+                              device->events.pipeline_layout,
+                              0, 1, &device->events.descriptor_set, 0, NULL);
+
+   assert(event->index < device->events.desc_count);
+   uint32_t offset = event->index;
+   v3dv_CmdPushConstants(commandBuffer,
+                         device->events.pipeline_layout,
+                         VK_SHADER_STAGE_COMPUTE_BIT,
+                         0, 4, &offset);
+
+   v3dv_CmdPushConstants(commandBuffer,
+                         device->events.pipeline_layout,
+                         VK_SHADER_STAGE_COMPUTE_BIT,
+                         4, 1, &value);
+
+   v3dv_CmdDispatch(commandBuffer, 1, 1, 1);
+
+   v3dv_cmd_buffer_meta_state_pop(cmd_buffer, 0, false);
+}
+
+static void
+cmd_buffer_emit_wait_event(struct v3dv_cmd_buffer *cmd_buffer,
+                           struct v3dv_event *event)
+{
+   struct v3dv_device *device = cmd_buffer->device;
+   VkCommandBuffer commandBuffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
+
+   v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
+
+   v3dv_CmdBindPipeline(commandBuffer,
+                        VK_PIPELINE_BIND_POINT_COMPUTE,
+                        device->events.wait_event_pipeline);
+
+   v3dv_CmdBindDescriptorSets(commandBuffer,
+                              VK_PIPELINE_BIND_POINT_COMPUTE,
+                              device->events.pipeline_layout,
+                              0, 1, &device->events.descriptor_set, 0, NULL);
+
+   assert(event->index < device->events.desc_count);
+   uint32_t offset = event->index;
+   v3dv_CmdPushConstants(commandBuffer,
+                         device->events.pipeline_layout,
+                         VK_SHADER_STAGE_COMPUTE_BIT,
+                         0, 4, &offset);
+
+   v3dv_CmdDispatch(commandBuffer, 1, 1, 1);
+
+   v3dv_cmd_buffer_meta_state_pop(cmd_buffer, 0, false);
+}
+
 VKAPI_ATTR void VKAPI_CALL
 v3dv_CmdSetEvent2(VkCommandBuffer commandBuffer,
                  VkEvent _event,
@ -3771,16 +3828,8 @@ v3dv_CmdSetEvent2(VkCommandBuffer commandBuffer,
   assert(cmd_buffer->state.pass == NULL);
   assert(cmd_buffer->state.job == NULL);

-   struct v3dv_job *job =
-      v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
-                                     V3DV_JOB_TYPE_CPU_SET_EVENT,
-                                     cmd_buffer, -1);
-   v3dv_return_if_oom(cmd_buffer, NULL);
-
-   job->cpu.event_set.event = event;
-   job->cpu.event_set.state = 1;
-
-   list_addtail(&job->list_link, &cmd_buffer->jobs);
+   v3dv_CmdPipelineBarrier2(commandBuffer, pDependencyInfo);
+   cmd_buffer_emit_set_event(cmd_buffer, event, 1);
 }

 VKAPI_ATTR void VKAPI_CALL
@ -3797,16 +3846,7 @@ v3dv_CmdResetEvent2(VkCommandBuffer commandBuffer,
   assert(cmd_buffer->state.pass == NULL);
   assert(cmd_buffer->state.job == NULL);

-   struct v3dv_job *job =
-      v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
-                                     V3DV_JOB_TYPE_CPU_SET_EVENT,
-                                     cmd_buffer, -1);
-   v3dv_return_if_oom(cmd_buffer, NULL);
-
-   job->cpu.event_set.event = event;
-   job->cpu.event_set.state = 0;
-
-   list_addtail(&job->list_link, &cmd_buffer->jobs);
+   cmd_buffer_emit_set_event(cmd_buffer, event, 0);
 }

 VKAPI_ATTR void VKAPI_CALL
@ -3816,43 +3856,10 @@ v3dv_CmdWaitEvents2(VkCommandBuffer commandBuffer,
                    const VkDependencyInfo *pDependencyInfos)
 {
   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
-
-   assert(eventCount > 0);
-
-   struct v3dv_job *job =
-      v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
-                                     V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
-                                     cmd_buffer, -1);
-   v3dv_return_if_oom(cmd_buffer, NULL);
-
-   const uint32_t event_list_size = sizeof(struct v3dv_event *) * eventCount;
-
-   job->cpu.event_wait.events =
-      vk_alloc(&cmd_buffer->device->vk.alloc, event_list_size, 8,
-               VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
-   if (!job->cpu.event_wait.events) {
-      v3dv_flag_oom(cmd_buffer, NULL);
-      return;
+   for (uint32_t i = 0; i < eventCount; i++) {
+      struct v3dv_event *event = v3dv_event_from_handle(pEvents[i]);;
+      cmd_buffer_emit_wait_event(cmd_buffer, event);
   }
-   job->cpu.event_wait.event_count = eventCount;
-
-   for (uint32_t i = 0; i < eventCount; i++)
-      job->cpu.event_wait.events[i] = v3dv_event_from_handle(pEvents[i]);
-
-   /* vkCmdWaitEvents can be recorded inside a render pass, so we might have
-    * an active job.
-    *
-    * If we are inside a render pass, because we vkCmd(Re)SetEvent can't happen
-    * inside a render pass, it is safe to move the wait job so it happens right
-    * before the current job we are currently recording for the subpass, if any
-    * (it would actually be safe to move it all the way back to right before
-    * the start of the render pass).
-    *
-    * If we are outside a render pass then we should not have any on-going job
-    * and we are free to just add the wait job without restrictions.
-    */
-   assert(cmd_buffer->state.pass || !cmd_buffer->state.job);
-   list_addtail(&job->list_link, &cmd_buffer->jobs);
 }

 VKAPI_ATTR void VKAPI_CALL
--- a/src/broadcom/vulkan/v3dv_device.c
+++ b/src/broadcom/vulkan/v3dv_device.c
@ -42,6 +42,7 @@
 #include "common/v3d_debug.h"

 #include "compiler/v3d_compiler.h"
+#include "compiler/nir/nir_builder.h"

 #include "drm-uapi/v3d_drm.h"
 #include "format/u_format.h"
@ -2022,6 +2023,12 @@ destroy_device_meta(struct v3dv_device *device)
   v3dv_meta_texel_buffer_copy_finish(device);
 }

+static bool
+device_allocate_event_resources(struct v3dv_device *device);
+
+static void
+device_free_event_resources(struct v3dv_device *device);
+
 VKAPI_ATTR VkResult VKAPI_CALL
 v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
                  const VkDeviceCreateInfo *pCreateInfo,
@ -2101,6 +2108,18 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
   util_dynarray_init(&device->device_address_bo_list,
                      device->device_address_mem_ctx);

+   mtx_init(&device->events.lock, mtx_plain);
+   if (!device->events.bo) {
+      result = device_allocate_event_resources(device);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
+   if (list_is_empty(&device->events.free_list)) {
+      result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+      goto fail;
+   }
+
   *pDevice = v3dv_device_to_handle(device);

   return VK_SUCCESS;
@ -2122,6 +2141,10 @@ v3dv_DestroyDevice(VkDevice _device,

   device->vk.dispatch_table.DeviceWaitIdle(_device);
   queue_finish(&device->queue);
+
+   device_free_event_resources(device);
+   mtx_destroy(&device->events.lock);
+
   destroy_device_meta(device);
   v3dv_pipeline_cache_finish(&device->default_pipeline_cache);

@ -2895,6 +2918,411 @@ v3dv_GetMemoryFdKHR(VkDevice _device,
   return VK_SUCCESS;
 }

+static nir_shader *
+get_set_event_cs()
+{
+   const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
+                                                  "set event cs");
+
+   b.shader->info.workgroup_size[0] = 1;
+   b.shader->info.workgroup_size[1] = 1;
+   b.shader->info.workgroup_size[2] = 1;
+
+   nir_ssa_def *buf =
+      nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
+                                .desc_set = 0,
+                                .binding = 0,
+                                .desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+
+   nir_ssa_def *offset =
+      nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
+
+   nir_ssa_def *value =
+      nir_load_push_constant(&b, 1, 8, nir_imm_int(&b, 0), .base = 4, .range = 4);
+
+   nir_store_ssbo(&b, value, buf, offset,
+                  .access = 0, .write_mask = 0x1, .align_mul = 4);
+
+   return b.shader;
+}
+
+static nir_shader *
+get_wait_event_cs()
+{
+   const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
+   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
+                                                  "wait event cs");
+
+   b.shader->info.workgroup_size[0] = 1;
+   b.shader->info.workgroup_size[1] = 1;
+   b.shader->info.workgroup_size[2] = 1;
+
+   nir_ssa_def *buf =
+      nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
+                                .desc_set = 0,
+                                .binding = 0,
+                                .desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+
+   nir_ssa_def *offset =
+      nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
+
+   nir_loop *loop = nir_push_loop(&b);
+      nir_ssa_def *load =
+         nir_load_ssbo(&b, 1, 8, buf, offset, .access = 0, .align_mul = 4);
+      nir_ssa_def *value = nir_i2i32(&b, load);
+
+      nir_if *if_stmt = nir_push_if(&b, nir_ieq_imm(&b, value, 1));
+      nir_jump(&b, nir_jump_break);
+      nir_pop_if(&b, if_stmt);
+   nir_pop_loop(&b, loop);
+
+   return b.shader;
+}
+
+static VkResult
+create_compute_pipeline_from_nir(struct v3dv_device *device,
+                                 nir_shader *nir,
+                                 VkPipelineLayout pipeline_layout,
+                                 VkPipeline *pipeline)
+{
+   struct vk_shader_module cs_m = vk_shader_module_from_nir(nir);
+
+   VkPipelineShaderStageCreateInfo set_event_cs_stage = {
+      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+      .module = vk_shader_module_to_handle(&cs_m),
+      .pName = "main",
+   };
+
+   VkComputePipelineCreateInfo info = {
+      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+      .stage = set_event_cs_stage,
+      .layout = pipeline_layout,
+   };
+
+   VkResult result =
+      v3dv_CreateComputePipelines(v3dv_device_to_handle(device), VK_NULL_HANDLE,
+                                  1, &info, &device->vk.alloc, pipeline);
+
+   return result;
+}
+
+static bool
+device_create_event_pipelines(struct v3dv_device *device)
+{
+   VkResult result;
+
+   if (!device->events.descriptor_set_layout) {
+      /* Pipeline layout:
+       *  - 1 storage buffer for the BO with the events state.
+       *  - 2 push constants:
+       *    0B: offset of the event in the buffer (4 bytes).
+       *    4B: value for the event (1 byte), only used with the set_event_pipeline.
+       */
+      VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
+         .binding = 0,
+         .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+         .descriptorCount = 1,
+         .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+      };
+
+      VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
+         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+         .bindingCount = 1,
+         .pBindings = &descriptor_set_layout_binding,
+      };
+
+      result =
+         v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
+                                        &descriptor_set_layout_info,
+                                        &device->vk.alloc,
+                                        &device->events.descriptor_set_layout);
+
+      if (result != VK_SUCCESS)
+         return false;
+   }
+
+   if (!device->events.pipeline_layout) {
+      VkPipelineLayoutCreateInfo pipeline_layout_info = {
+         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+         .setLayoutCount = 1,
+         .pSetLayouts = &device->events.descriptor_set_layout,
+         .pushConstantRangeCount = 1,
+         .pPushConstantRanges =
+             &(VkPushConstantRange) { VK_SHADER_STAGE_COMPUTE_BIT, 0, 5 },
+      };
+
+      result =
+         v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
+                                   &pipeline_layout_info,
+                                   &device->vk.alloc,
+                                   &device->events.pipeline_layout);
+
+      if (result != VK_SUCCESS)
+         return false;
+   }
+
+   VkPipeline pipeline;
+
+   if (!device->events.set_event_pipeline) {
+      nir_shader *set_event_cs_nir = get_set_event_cs();
+      result = create_compute_pipeline_from_nir(device,
+                                                set_event_cs_nir,
+                                                device->events.pipeline_layout,
+                                                &pipeline);
+      ralloc_free(set_event_cs_nir);
+      if (result != VK_SUCCESS)
+         return false;
+
+      device->events.set_event_pipeline = pipeline;
+   }
+
+   if (!device->events.wait_event_pipeline) {
+      nir_shader *wait_event_cs_nir = get_wait_event_cs();
+      result = create_compute_pipeline_from_nir(device,
+                                                wait_event_cs_nir,
+                                                device->events.pipeline_layout,
+                                                &pipeline);
+      ralloc_free(wait_event_cs_nir);
+      if (result != VK_SUCCESS)
+         return false;
+
+      device->events.wait_event_pipeline = pipeline;
+   }
+
+   return true;
+}
+
+static void
+device_destroy_event_pipelines(struct v3dv_device *device)
+{
+   VkDevice _device = v3dv_device_to_handle(device);
+
+   v3dv_DestroyPipeline(_device, device->events.set_event_pipeline,
+                         &device->vk.alloc);
+   device->events.set_event_pipeline = VK_NULL_HANDLE;
+
+   v3dv_DestroyPipeline(_device, device->events.wait_event_pipeline,
+                         &device->vk.alloc);
+   device->events.wait_event_pipeline = VK_NULL_HANDLE;
+
+   v3dv_DestroyPipelineLayout(_device, device->events.pipeline_layout,
+                              &device->vk.alloc);
+   device->events.pipeline_layout = VK_NULL_HANDLE;
+
+   v3dv_DestroyDescriptorSetLayout(_device,
+                                   device->events.descriptor_set_layout,
+                                   &device->vk.alloc);
+   device->events.descriptor_set_layout = VK_NULL_HANDLE;
+}
+
+static bool
+device_allocate_event_resources(struct v3dv_device *device)
+{
+   VkResult result = VK_SUCCESS;
+   VkDevice _device = v3dv_device_to_handle(device);
+
+   /* BO with event states. Make sure we always align to a page size (4096)
+    * to ensure we use all the memory the kernel will allocate for the BO.
+    */
+   const uint32_t bo_size = 4096;
+   struct v3dv_bo *bo = v3dv_bo_alloc(device, bo_size, "events", true);
+   if (!bo) {
+      result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+      goto fail;
+   }
+
+   device->events.bo = bo;
+
+   if (!v3dv_bo_map(device, bo, bo_size)) {
+      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+      goto fail;
+   }
+
+   /* List of free event state slots in the BO, 1 byte per slot */
+   device->events.desc_count = bo_size;
+   device->events.desc =
+      vk_alloc2(&device->vk.alloc, NULL,
+                device->events.desc_count * sizeof(struct v3dv_event_desc), 8,
+                VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!device->events.desc) {
+      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+      goto fail;
+   }
+
+   list_inithead(&device->events.free_list);
+   for (int i = 0; i < device->events.desc_count; i++) {
+      device->events.desc[i].index = i;
+      list_addtail(&device->events.desc[i].link, &device->events.free_list);
+   }
+
+   /* Vulkan buffer for the event state BO */
+   VkBufferCreateInfo buf_info = {
+      .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+      .size = bo->size,
+      .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+   };
+   result = v3dv_CreateBuffer(_device, &buf_info, NULL,
+                              &device->events.buffer);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   struct v3dv_device_memory *mem =
+      vk_object_zalloc(&device->vk, NULL, sizeof(*mem),
+                       VK_OBJECT_TYPE_DEVICE_MEMORY);
+   if (!mem) {
+      result = VK_ERROR_OUT_OF_HOST_MEMORY;
+      goto fail;
+   }
+
+   mem->bo = bo;
+   mem->type = &device->pdevice->memory.memoryTypes[0];
+
+   device->events.mem = v3dv_device_memory_to_handle(mem);
+   VkBindBufferMemoryInfo bind_info = {
+      .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
+      .buffer = device->events.buffer,
+      .memory = device->events.mem,
+      .memoryOffset = 0,
+   };
+   bind_buffer_memory(&bind_info);
+
+   /* Pipelines */
+   if (!device_create_event_pipelines(device))
+      goto fail;
+
+   /* Descriptor pool & set to access the buffer */
+   VkDescriptorPoolSize pool_size = {
+      .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+      .descriptorCount = 1,
+   };
+   VkDescriptorPoolCreateInfo pool_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+      .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
+      .maxSets = 1,
+      .poolSizeCount = 1,
+      .pPoolSizes = &pool_size,
+   };
+   result =
+      v3dv_CreateDescriptorPool(_device, &pool_info, NULL,
+                                &device->events.descriptor_pool);
+
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   VkDescriptorSetAllocateInfo alloc_info = {
+      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+      .descriptorPool = device->events.descriptor_pool,
+      .descriptorSetCount = 1,
+      .pSetLayouts = &device->events.descriptor_set_layout,
+   };
+   result = v3dv_AllocateDescriptorSets(_device, &alloc_info,
+                                         &device->events.descriptor_set);
+   if (result != VK_SUCCESS)
+      goto  fail;
+
+   VkDescriptorBufferInfo desc_buf_info = {
+      .buffer = device->events.buffer,
+      .offset = 0,
+      .range = VK_WHOLE_SIZE,
+   };
+
+   VkWriteDescriptorSet write = {
+      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+      .dstSet = device->events.descriptor_set,
+      .dstBinding = 0,
+      .dstArrayElement = 0,
+      .descriptorCount = 1,
+      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+      .pBufferInfo = &desc_buf_info,
+   };
+   v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
+
+   return VK_SUCCESS;
+
+fail:
+   device_free_event_resources(device);
+   return result;
+}
+
+static void
+device_free_event_resources(struct v3dv_device *device)
+{
+   if (device->events.bo) {
+      v3dv_bo_free(device, device->events.bo);
+      device->events.bo = NULL;
+   }
+
+   if (device->events.desc) {
+      vk_free2(&device->vk.alloc, NULL, device->events.desc);
+      device->events.desc = NULL;
+   }
+
+   vk_object_free(&device->vk, NULL,
+                  v3dv_device_memory_from_handle(device->events.mem));
+   device->events.mem = VK_NULL_HANDLE;
+
+   v3dv_DestroyBuffer(v3dv_device_to_handle(device),
+                      device->events.buffer, NULL);
+   device->events.buffer = VK_NULL_HANDLE;
+
+   v3dv_FreeDescriptorSets(v3dv_device_to_handle(device),
+                           device->events.descriptor_pool,
+                           1, &device->events.descriptor_set);
+   device->events.descriptor_set = VK_NULL_HANDLE;
+
+   v3dv_DestroyDescriptorPool(v3dv_device_to_handle(device),
+                              device->events.descriptor_pool,
+                              NULL);
+   device->events.descriptor_pool = VK_NULL_HANDLE;
+
+   device_destroy_event_pipelines(device);
+}
+
+static struct v3dv_event_desc *
+device_allocate_event_descriptor(struct v3dv_device *device)
+{
+   mtx_lock(&device->events.lock);
+   if (list_is_empty(&device->events.free_list)) {
+      mtx_unlock(&device->events.lock);
+      return NULL;
+   }
+
+   struct v3dv_event_desc *desc =
+      list_first_entry(&device->events.free_list, struct v3dv_event_desc, link);
+   list_del(&desc->link);
+   mtx_unlock(&device->events.lock);
+
+   return desc;
+}
+
+static void
+device_free_event_descriptor(struct v3dv_device *device, uint32_t index)
+{
+   mtx_lock(&device->events.lock);
+   assert(index < device->events.desc_count);
+   list_addtail(&device->events.desc[index].link, &device->events.free_list);
+   mtx_unlock(&device->events.lock);
+}
+
+static void
+device_event_set_value(struct v3dv_device *device,
+                       struct v3dv_event *event,
+                       uint8_t value)
+{
+   assert(value == 0 || value == 1);
+   uint8_t *data = (uint8_t *) device->events.bo->map;
+   data[event->index] = value;
+}
+
+static uint8_t
+device_event_get_value(struct v3dv_device *device, struct v3dv_event *event)
+{
+   uint8_t *data = (uint8_t *) device->events.bo->map;
+   return data[event->index];
+}
+
 VKAPI_ATTR VkResult VKAPI_CALL
 v3dv_CreateEvent(VkDevice _device,
                 const VkEventCreateInfo *pCreateInfo,
@ -2902,17 +3330,29 @@ v3dv_CreateEvent(VkDevice _device,
                 VkEvent *pEvent)
 {
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
+   VkResult result = VK_SUCCESS;
+
   struct v3dv_event *event =
      vk_object_zalloc(&device->vk, pAllocator, sizeof(*event),
                       VK_OBJECT_TYPE_EVENT);
-   if (!event)
-      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+   if (!event) {
+      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+      goto fail;
+   }

-   /* Events are created in the unsignaled state */
-   event->state = false;
+   struct v3dv_event_desc *desc = device_allocate_event_descriptor(device);
+   if (!desc) {
+      result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+      goto fail;
+   }
+
+   event->index = desc->index;
+   device_event_set_value(device, event, 0);
   *pEvent = v3dv_event_to_handle(event);
-
   return VK_SUCCESS;
+
+fail:
+   return result;
 }

 VKAPI_ATTR void VKAPI_CALL
@ -2926,29 +3366,33 @@ v3dv_DestroyEvent(VkDevice _device,
   if (!event)
      return;

+   device_free_event_descriptor(device, event->index);
   vk_object_free(&device->vk, pAllocator, event);
 }

 VKAPI_ATTR VkResult VKAPI_CALL
 v3dv_GetEventStatus(VkDevice _device, VkEvent _event)
 {
+   V3DV_FROM_HANDLE(v3dv_device, device, _device);
   V3DV_FROM_HANDLE(v3dv_event, event, _event);
-   return p_atomic_read(&event->state) ? VK_EVENT_SET : VK_EVENT_RESET;
+   return device_event_get_value(device, event) ? VK_EVENT_SET : VK_EVENT_RESET;
 }

 VKAPI_ATTR VkResult VKAPI_CALL
 v3dv_SetEvent(VkDevice _device, VkEvent _event)
 {
+   V3DV_FROM_HANDLE(v3dv_device, device, _device);
   V3DV_FROM_HANDLE(v3dv_event, event, _event);
-   p_atomic_set(&event->state, 1);
+   device_event_set_value(device, event, 1);
   return VK_SUCCESS;
 }

 VKAPI_ATTR VkResult VKAPI_CALL
 v3dv_ResetEvent(VkDevice _device, VkEvent _event)
 {
+   V3DV_FROM_HANDLE(v3dv_device, device, _device);
   V3DV_FROM_HANDLE(v3dv_event, event, _event);
-   p_atomic_set(&event->state, 0);
+   device_event_set_value(device, event, 0);
   return VK_SUCCESS;
 }

--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@ -454,6 +454,15 @@ struct v3dv_pipeline_cache {
   bool externally_synchronized;
 };

+/* This is used to implement a list of free events in the BO we use
+ * hold event states. The index here is used to calculate the offset
+ * within that BO.
+ */
+struct v3dv_event_desc {
+   struct list_head link;
+   uint32_t index;
+};
+
 struct v3dv_device {
   struct vk_device vk;

@ -509,6 +518,44 @@ struct v3dv_device {
   uint32_t bo_size;
   uint32_t bo_count;

+   /* Event handling resources.
+    *
+    * Our implementation of events uses a BO to store event state (signaled vs
+    * reset) and dispatches compute shaders to handle GPU event functions
+    * (signal, reset, wait). This struct holds all the resources required
+    * by the implementation.
+    */
+   struct {
+      mtx_t lock;
+
+      /* BO for the event states: signaled (1) or reset (0) */
+      struct v3dv_bo *bo;
+
+      /* Events can be created and destroyed. Since we have a dedicated BO for
+       * all events we use, we need to keep track of the free slots within that
+       * BO. For that we use a free list where we link together available event
+       * slots in the form of "descriptors" that include an index (which is
+       * basically an offset into the BO that is available).
+       */
+      uint32_t desc_count;
+      struct v3dv_event_desc *desc;
+      struct list_head free_list;
+
+      /* Vulkan resources to access the event BO from shaders. We have a
+       * pipeline that sets the state of an event and another that waits on
+       * a single event. Both pipelines require access to the event state BO,
+       * for which we need to allocate a single descripot set.
+       */
+      VkBuffer buffer;
+      VkDeviceMemory mem;
+      VkDescriptorSetLayout descriptor_set_layout;
+      VkPipelineLayout pipeline_layout;
+      VkDescriptorPool descriptor_pool;
+      VkDescriptorSet descriptor_set;
+      VkPipeline set_event_pipeline;
+      VkPipeline wait_event_pipeline;
+   } events;
+
   struct v3dv_pipeline_cache default_pipeline_cache;

   /* GL_SHADER_STATE_RECORD needs to speficy default attribute values. The
@ -968,8 +1015,6 @@ enum v3dv_job_type {
   V3DV_JOB_TYPE_CPU_RESET_QUERIES,
   V3DV_JOB_TYPE_CPU_END_QUERY,
   V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
-   V3DV_JOB_TYPE_CPU_SET_EVENT,
-   V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
   V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
   V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
   V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
@ -1009,17 +1054,6 @@ struct v3dv_submit_sync_info {
   struct vk_sync_signal *signals;
 };

-struct v3dv_event_set_cpu_job_info {
-   struct v3dv_event *event;
-   int state;
-};
-
-struct v3dv_event_wait_cpu_job_info {
-   /* List of events to wait on */
-   uint32_t event_count;
-   struct v3dv_event **events;
-};
-
 struct v3dv_copy_buffer_to_image_cpu_job_info {
   struct v3dv_image *image;
   struct v3dv_buffer *buffer;
@ -1186,8 +1220,6 @@ struct v3dv_job {
      struct v3dv_reset_query_cpu_job_info          query_reset;
      struct v3dv_end_query_cpu_job_info            query_end;
      struct v3dv_copy_query_results_cpu_job_info   query_copy_results;
-      struct v3dv_event_set_cpu_job_info            event_set;
-      struct v3dv_event_wait_cpu_job_info           event_wait;
      struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
      struct v3dv_csd_indirect_cpu_job_info         csd_indirect;
      struct v3dv_timestamp_query_cpu_job_info      query_timestamp;
@ -1643,7 +1675,11 @@ bool v3dv_cmd_buffer_check_needs_store(const struct v3dv_cmd_buffer_state *state

 struct v3dv_event {
   struct vk_object_base base;
-   int state;
+
+   /* Each event gets a different index, which we use to compute the offset
+    * in the BO we use to track their state (signaled vs reset).
+    */
+   uint32_t index;
 };

 struct v3dv_shader_variant {
--- a/src/broadcom/vulkan/v3dv_queue.c
+++ b/src/broadcom/vulkan/v3dv_queue.c
@ -289,60 +289,6 @@ handle_copy_query_results_cpu_job(struct v3dv_job *job)
   return VK_SUCCESS;
 }

-static VkResult
-handle_set_event_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job,
-                         struct v3dv_submit_sync_info *sync_info)
-{
-   /* From the Vulkan 1.0 spec:
-    *
-    *    "When vkCmdSetEvent is submitted to a queue, it defines an execution
-    *     dependency on commands that were submitted before it, and defines an
-    *     event signal operation which sets the event to the signaled state.
-    *     The first synchronization scope includes every command previously
-    *     submitted to the same queue, including those in the same command
-    *     buffer and batch".
-    *
-    * So we should wait for all prior work to be completed before signaling
-    * the event, this includes all active CPU wait threads spawned for any
-    * command buffer submitted *before* this.
-    */
-
-   VkResult result = queue_wait_idle(queue, sync_info);
-   if (result != VK_SUCCESS)
-      return result;
-
-   struct v3dv_event_set_cpu_job_info *info = &job->cpu.event_set;
-   p_atomic_set(&info->event->state, info->state);
-
-   return VK_SUCCESS;
-}
-
-static bool
-check_wait_events_complete(struct v3dv_job *job)
-{
-   assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
-
-   struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
-   for (uint32_t i = 0; i < info->event_count; i++) {
-      if (!p_atomic_read(&info->events[i]->state))
-         return false;
-   }
-   return true;
-}
-
-static VkResult
-handle_wait_events_cpu_job(struct v3dv_job *job)
-{
-   assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
-
-   /* Wait for events to be signaled */
-   const useconds_t wait_interval_ms = 1;
-   while (!check_wait_events_complete(job))
-      usleep(wait_interval_ms * 1000);
-
-   return VK_SUCCESS;
-}
-
 static VkResult
 handle_copy_buffer_to_image_cpu_job(struct v3dv_queue *queue,
                                    struct v3dv_job *job,
@ -1014,10 +960,6 @@ queue_handle_job(struct v3dv_queue *queue,
      return handle_end_query_cpu_job(job, counter_pass_idx);
   case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS:
      return handle_copy_query_results_cpu_job(job);
-   case V3DV_JOB_TYPE_CPU_SET_EVENT:
-      return handle_set_event_cpu_job(queue, job, sync_info);
-   case V3DV_JOB_TYPE_CPU_WAIT_EVENTS:
-      return handle_wait_events_cpu_job(job);
   case V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE:
      return handle_copy_buffer_to_image_cpu_job(queue, job, sync_info);
   case V3DV_JOB_TYPE_CPU_CSD_INDIRECT: