From ecb01d53fdb413121ce2a8f36ad67e51898eec00 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Wed, 19 Oct 2022 09:48:19 +0200 Subject: [PATCH] v3dv: refactor events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This replaces our current implementation, which is 100% CPU based, with an implementation that uses compute shaders for the GPU-side event functions. The benefit of this solution is that we no longer need to stall on the CPU when we need to handle GPU-side event commands. Reviewed-by: Alejandro PiƱeiro Part-of: --- src/broadcom/vulkan/v3dv_cmd_buffer.c | 141 ++++---- src/broadcom/vulkan/v3dv_device.c | 460 +++++++++++++++++++++++++- src/broadcom/vulkan/v3dv_private.h | 68 +++- src/broadcom/vulkan/v3dv_queue.c | 58 ---- 4 files changed, 578 insertions(+), 149 deletions(-) diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c index d688e36a33c..1cf2573b1c1 100644 --- a/src/broadcom/vulkan/v3dv_cmd_buffer.c +++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c @@ -163,14 +163,6 @@ job_destroy_gpu_csd_resources(struct v3dv_job *job) v3dv_bo_free(job->device, job->csd.shared_memory); } -static void -job_destroy_cpu_wait_events_resources(struct v3dv_job *job) -{ - assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS); - assert(job->cmd_buffer); - vk_free(&job->cmd_buffer->device->vk.alloc, job->cpu.event_wait.events); -} - void v3dv_job_destroy(struct v3dv_job *job) { @@ -191,9 +183,6 @@ v3dv_job_destroy(struct v3dv_job *job) case V3DV_JOB_TYPE_GPU_CSD: job_destroy_gpu_csd_resources(job); break; - case V3DV_JOB_TYPE_CPU_WAIT_EVENTS: - job_destroy_cpu_wait_events_resources(job); - break; default: break; } @@ -3757,6 +3746,74 @@ v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer, list_addtail(&job->list_link, &cmd_buffer->jobs); } +static void +cmd_buffer_emit_set_event(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_event *event, + uint8_t value) +{ + assert(value == 0 || value == 1); + + struct v3dv_device *device = cmd_buffer->device; + VkCommandBuffer commandBuffer = v3dv_cmd_buffer_to_handle(cmd_buffer); + + v3dv_cmd_buffer_meta_state_push(cmd_buffer, true); + + v3dv_CmdBindPipeline(commandBuffer, + VK_PIPELINE_BIND_POINT_COMPUTE, + device->events.set_event_pipeline); + + v3dv_CmdBindDescriptorSets(commandBuffer, + VK_PIPELINE_BIND_POINT_COMPUTE, + device->events.pipeline_layout, + 0, 1, &device->events.descriptor_set, 0, NULL); + + assert(event->index < device->events.desc_count); + uint32_t offset = event->index; + v3dv_CmdPushConstants(commandBuffer, + device->events.pipeline_layout, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, 4, &offset); + + v3dv_CmdPushConstants(commandBuffer, + device->events.pipeline_layout, + VK_SHADER_STAGE_COMPUTE_BIT, + 4, 1, &value); + + v3dv_CmdDispatch(commandBuffer, 1, 1, 1); + + v3dv_cmd_buffer_meta_state_pop(cmd_buffer, 0, false); +} + +static void +cmd_buffer_emit_wait_event(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_event *event) +{ + struct v3dv_device *device = cmd_buffer->device; + VkCommandBuffer commandBuffer = v3dv_cmd_buffer_to_handle(cmd_buffer); + + v3dv_cmd_buffer_meta_state_push(cmd_buffer, true); + + v3dv_CmdBindPipeline(commandBuffer, + VK_PIPELINE_BIND_POINT_COMPUTE, + device->events.wait_event_pipeline); + + v3dv_CmdBindDescriptorSets(commandBuffer, + VK_PIPELINE_BIND_POINT_COMPUTE, + device->events.pipeline_layout, + 0, 1, &device->events.descriptor_set, 0, NULL); + + assert(event->index < device->events.desc_count); + uint32_t offset = event->index; + v3dv_CmdPushConstants(commandBuffer, + device->events.pipeline_layout, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, 4, &offset); + + v3dv_CmdDispatch(commandBuffer, 1, 1, 1); + + v3dv_cmd_buffer_meta_state_pop(cmd_buffer, 0, false); +} + VKAPI_ATTR void VKAPI_CALL v3dv_CmdSetEvent2(VkCommandBuffer commandBuffer, VkEvent _event, @@ -3771,16 +3828,8 @@ v3dv_CmdSetEvent2(VkCommandBuffer commandBuffer, assert(cmd_buffer->state.pass == NULL); assert(cmd_buffer->state.job == NULL); - struct v3dv_job *job = - v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, - V3DV_JOB_TYPE_CPU_SET_EVENT, - cmd_buffer, -1); - v3dv_return_if_oom(cmd_buffer, NULL); - - job->cpu.event_set.event = event; - job->cpu.event_set.state = 1; - - list_addtail(&job->list_link, &cmd_buffer->jobs); + v3dv_CmdPipelineBarrier2(commandBuffer, pDependencyInfo); + cmd_buffer_emit_set_event(cmd_buffer, event, 1); } VKAPI_ATTR void VKAPI_CALL @@ -3797,16 +3846,7 @@ v3dv_CmdResetEvent2(VkCommandBuffer commandBuffer, assert(cmd_buffer->state.pass == NULL); assert(cmd_buffer->state.job == NULL); - struct v3dv_job *job = - v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, - V3DV_JOB_TYPE_CPU_SET_EVENT, - cmd_buffer, -1); - v3dv_return_if_oom(cmd_buffer, NULL); - - job->cpu.event_set.event = event; - job->cpu.event_set.state = 0; - - list_addtail(&job->list_link, &cmd_buffer->jobs); + cmd_buffer_emit_set_event(cmd_buffer, event, 0); } VKAPI_ATTR void VKAPI_CALL @@ -3816,43 +3856,10 @@ v3dv_CmdWaitEvents2(VkCommandBuffer commandBuffer, const VkDependencyInfo *pDependencyInfos) { V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); - - assert(eventCount > 0); - - struct v3dv_job *job = - v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, - V3DV_JOB_TYPE_CPU_WAIT_EVENTS, - cmd_buffer, -1); - v3dv_return_if_oom(cmd_buffer, NULL); - - const uint32_t event_list_size = sizeof(struct v3dv_event *) * eventCount; - - job->cpu.event_wait.events = - vk_alloc(&cmd_buffer->device->vk.alloc, event_list_size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); - if (!job->cpu.event_wait.events) { - v3dv_flag_oom(cmd_buffer, NULL); - return; + for (uint32_t i = 0; i < eventCount; i++) { + struct v3dv_event *event = v3dv_event_from_handle(pEvents[i]);; + cmd_buffer_emit_wait_event(cmd_buffer, event); } - job->cpu.event_wait.event_count = eventCount; - - for (uint32_t i = 0; i < eventCount; i++) - job->cpu.event_wait.events[i] = v3dv_event_from_handle(pEvents[i]); - - /* vkCmdWaitEvents can be recorded inside a render pass, so we might have - * an active job. - * - * If we are inside a render pass, because we vkCmd(Re)SetEvent can't happen - * inside a render pass, it is safe to move the wait job so it happens right - * before the current job we are currently recording for the subpass, if any - * (it would actually be safe to move it all the way back to right before - * the start of the render pass). - * - * If we are outside a render pass then we should not have any on-going job - * and we are free to just add the wait job without restrictions. - */ - assert(cmd_buffer->state.pass || !cmd_buffer->state.job); - list_addtail(&job->list_link, &cmd_buffer->jobs); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c index 25bdc4aa984..b3097202f2d 100644 --- a/src/broadcom/vulkan/v3dv_device.c +++ b/src/broadcom/vulkan/v3dv_device.c @@ -42,6 +42,7 @@ #include "common/v3d_debug.h" #include "compiler/v3d_compiler.h" +#include "compiler/nir/nir_builder.h" #include "drm-uapi/v3d_drm.h" #include "format/u_format.h" @@ -2022,6 +2023,12 @@ destroy_device_meta(struct v3dv_device *device) v3dv_meta_texel_buffer_copy_finish(device); } +static bool +device_allocate_event_resources(struct v3dv_device *device); + +static void +device_free_event_resources(struct v3dv_device *device); + VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo, @@ -2101,6 +2108,18 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice, util_dynarray_init(&device->device_address_bo_list, device->device_address_mem_ctx); + mtx_init(&device->events.lock, mtx_plain); + if (!device->events.bo) { + result = device_allocate_event_resources(device); + if (result != VK_SUCCESS) + goto fail; + } + + if (list_is_empty(&device->events.free_list)) { + result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } + *pDevice = v3dv_device_to_handle(device); return VK_SUCCESS; @@ -2122,6 +2141,10 @@ v3dv_DestroyDevice(VkDevice _device, device->vk.dispatch_table.DeviceWaitIdle(_device); queue_finish(&device->queue); + + device_free_event_resources(device); + mtx_destroy(&device->events.lock); + destroy_device_meta(device); v3dv_pipeline_cache_finish(&device->default_pipeline_cache); @@ -2895,6 +2918,411 @@ v3dv_GetMemoryFdKHR(VkDevice _device, return VK_SUCCESS; } +static nir_shader * +get_set_event_cs() +{ + const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options(); + nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, + "set event cs"); + + b.shader->info.workgroup_size[0] = 1; + b.shader->info.workgroup_size[1] = 1; + b.shader->info.workgroup_size[2] = 1; + + nir_ssa_def *buf = + nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0), + .desc_set = 0, + .binding = 0, + .desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + + nir_ssa_def *offset = + nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4); + + nir_ssa_def *value = + nir_load_push_constant(&b, 1, 8, nir_imm_int(&b, 0), .base = 4, .range = 4); + + nir_store_ssbo(&b, value, buf, offset, + .access = 0, .write_mask = 0x1, .align_mul = 4); + + return b.shader; +} + +static nir_shader * +get_wait_event_cs() +{ + const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options(); + nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, + "wait event cs"); + + b.shader->info.workgroup_size[0] = 1; + b.shader->info.workgroup_size[1] = 1; + b.shader->info.workgroup_size[2] = 1; + + nir_ssa_def *buf = + nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0), + .desc_set = 0, + .binding = 0, + .desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + + nir_ssa_def *offset = + nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4); + + nir_loop *loop = nir_push_loop(&b); + nir_ssa_def *load = + nir_load_ssbo(&b, 1, 8, buf, offset, .access = 0, .align_mul = 4); + nir_ssa_def *value = nir_i2i32(&b, load); + + nir_if *if_stmt = nir_push_if(&b, nir_ieq_imm(&b, value, 1)); + nir_jump(&b, nir_jump_break); + nir_pop_if(&b, if_stmt); + nir_pop_loop(&b, loop); + + return b.shader; +} + +static VkResult +create_compute_pipeline_from_nir(struct v3dv_device *device, + nir_shader *nir, + VkPipelineLayout pipeline_layout, + VkPipeline *pipeline) +{ + struct vk_shader_module cs_m = vk_shader_module_from_nir(nir); + + VkPipelineShaderStageCreateInfo set_event_cs_stage = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = vk_shader_module_to_handle(&cs_m), + .pName = "main", + }; + + VkComputePipelineCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = set_event_cs_stage, + .layout = pipeline_layout, + }; + + VkResult result = + v3dv_CreateComputePipelines(v3dv_device_to_handle(device), VK_NULL_HANDLE, + 1, &info, &device->vk.alloc, pipeline); + + return result; +} + +static bool +device_create_event_pipelines(struct v3dv_device *device) +{ + VkResult result; + + if (!device->events.descriptor_set_layout) { + /* Pipeline layout: + * - 1 storage buffer for the BO with the events state. + * - 2 push constants: + * 0B: offset of the event in the buffer (4 bytes). + * 4B: value for the event (1 byte), only used with the set_event_pipeline. + */ + VkDescriptorSetLayoutBinding descriptor_set_layout_binding = { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + }; + + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = 1, + .pBindings = &descriptor_set_layout_binding, + }; + + result = + v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device), + &descriptor_set_layout_info, + &device->vk.alloc, + &device->events.descriptor_set_layout); + + if (result != VK_SUCCESS) + return false; + } + + if (!device->events.pipeline_layout) { + VkPipelineLayoutCreateInfo pipeline_layout_info = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &device->events.descriptor_set_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = + &(VkPushConstantRange) { VK_SHADER_STAGE_COMPUTE_BIT, 0, 5 }, + }; + + result = + v3dv_CreatePipelineLayout(v3dv_device_to_handle(device), + &pipeline_layout_info, + &device->vk.alloc, + &device->events.pipeline_layout); + + if (result != VK_SUCCESS) + return false; + } + + VkPipeline pipeline; + + if (!device->events.set_event_pipeline) { + nir_shader *set_event_cs_nir = get_set_event_cs(); + result = create_compute_pipeline_from_nir(device, + set_event_cs_nir, + device->events.pipeline_layout, + &pipeline); + ralloc_free(set_event_cs_nir); + if (result != VK_SUCCESS) + return false; + + device->events.set_event_pipeline = pipeline; + } + + if (!device->events.wait_event_pipeline) { + nir_shader *wait_event_cs_nir = get_wait_event_cs(); + result = create_compute_pipeline_from_nir(device, + wait_event_cs_nir, + device->events.pipeline_layout, + &pipeline); + ralloc_free(wait_event_cs_nir); + if (result != VK_SUCCESS) + return false; + + device->events.wait_event_pipeline = pipeline; + } + + return true; +} + +static void +device_destroy_event_pipelines(struct v3dv_device *device) +{ + VkDevice _device = v3dv_device_to_handle(device); + + v3dv_DestroyPipeline(_device, device->events.set_event_pipeline, + &device->vk.alloc); + device->events.set_event_pipeline = VK_NULL_HANDLE; + + v3dv_DestroyPipeline(_device, device->events.wait_event_pipeline, + &device->vk.alloc); + device->events.wait_event_pipeline = VK_NULL_HANDLE; + + v3dv_DestroyPipelineLayout(_device, device->events.pipeline_layout, + &device->vk.alloc); + device->events.pipeline_layout = VK_NULL_HANDLE; + + v3dv_DestroyDescriptorSetLayout(_device, + device->events.descriptor_set_layout, + &device->vk.alloc); + device->events.descriptor_set_layout = VK_NULL_HANDLE; +} + +static bool +device_allocate_event_resources(struct v3dv_device *device) +{ + VkResult result = VK_SUCCESS; + VkDevice _device = v3dv_device_to_handle(device); + + /* BO with event states. Make sure we always align to a page size (4096) + * to ensure we use all the memory the kernel will allocate for the BO. + */ + const uint32_t bo_size = 4096; + struct v3dv_bo *bo = v3dv_bo_alloc(device, bo_size, "events", true); + if (!bo) { + result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } + + device->events.bo = bo; + + if (!v3dv_bo_map(device, bo, bo_size)) { + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + + /* List of free event state slots in the BO, 1 byte per slot */ + device->events.desc_count = bo_size; + device->events.desc = + vk_alloc2(&device->vk.alloc, NULL, + device->events.desc_count * sizeof(struct v3dv_event_desc), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!device->events.desc) { + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + + list_inithead(&device->events.free_list); + for (int i = 0; i < device->events.desc_count; i++) { + device->events.desc[i].index = i; + list_addtail(&device->events.desc[i].link, &device->events.free_list); + } + + /* Vulkan buffer for the event state BO */ + VkBufferCreateInfo buf_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = bo->size, + .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + }; + result = v3dv_CreateBuffer(_device, &buf_info, NULL, + &device->events.buffer); + if (result != VK_SUCCESS) + goto fail; + + struct v3dv_device_memory *mem = + vk_object_zalloc(&device->vk, NULL, sizeof(*mem), + VK_OBJECT_TYPE_DEVICE_MEMORY); + if (!mem) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } + + mem->bo = bo; + mem->type = &device->pdevice->memory.memoryTypes[0]; + + device->events.mem = v3dv_device_memory_to_handle(mem); + VkBindBufferMemoryInfo bind_info = { + .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, + .buffer = device->events.buffer, + .memory = device->events.mem, + .memoryOffset = 0, + }; + bind_buffer_memory(&bind_info); + + /* Pipelines */ + if (!device_create_event_pipelines(device)) + goto fail; + + /* Descriptor pool & set to access the buffer */ + VkDescriptorPoolSize pool_size = { + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + }; + VkDescriptorPoolCreateInfo pool_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .maxSets = 1, + .poolSizeCount = 1, + .pPoolSizes = &pool_size, + }; + result = + v3dv_CreateDescriptorPool(_device, &pool_info, NULL, + &device->events.descriptor_pool); + + if (result != VK_SUCCESS) + goto fail; + + VkDescriptorSetAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = device->events.descriptor_pool, + .descriptorSetCount = 1, + .pSetLayouts = &device->events.descriptor_set_layout, + }; + result = v3dv_AllocateDescriptorSets(_device, &alloc_info, + &device->events.descriptor_set); + if (result != VK_SUCCESS) + goto fail; + + VkDescriptorBufferInfo desc_buf_info = { + .buffer = device->events.buffer, + .offset = 0, + .range = VK_WHOLE_SIZE, + }; + + VkWriteDescriptorSet write = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = device->events.descriptor_set, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .pBufferInfo = &desc_buf_info, + }; + v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL); + + return VK_SUCCESS; + +fail: + device_free_event_resources(device); + return result; +} + +static void +device_free_event_resources(struct v3dv_device *device) +{ + if (device->events.bo) { + v3dv_bo_free(device, device->events.bo); + device->events.bo = NULL; + } + + if (device->events.desc) { + vk_free2(&device->vk.alloc, NULL, device->events.desc); + device->events.desc = NULL; + } + + vk_object_free(&device->vk, NULL, + v3dv_device_memory_from_handle(device->events.mem)); + device->events.mem = VK_NULL_HANDLE; + + v3dv_DestroyBuffer(v3dv_device_to_handle(device), + device->events.buffer, NULL); + device->events.buffer = VK_NULL_HANDLE; + + v3dv_FreeDescriptorSets(v3dv_device_to_handle(device), + device->events.descriptor_pool, + 1, &device->events.descriptor_set); + device->events.descriptor_set = VK_NULL_HANDLE; + + v3dv_DestroyDescriptorPool(v3dv_device_to_handle(device), + device->events.descriptor_pool, + NULL); + device->events.descriptor_pool = VK_NULL_HANDLE; + + device_destroy_event_pipelines(device); +} + +static struct v3dv_event_desc * +device_allocate_event_descriptor(struct v3dv_device *device) +{ + mtx_lock(&device->events.lock); + if (list_is_empty(&device->events.free_list)) { + mtx_unlock(&device->events.lock); + return NULL; + } + + struct v3dv_event_desc *desc = + list_first_entry(&device->events.free_list, struct v3dv_event_desc, link); + list_del(&desc->link); + mtx_unlock(&device->events.lock); + + return desc; +} + +static void +device_free_event_descriptor(struct v3dv_device *device, uint32_t index) +{ + mtx_lock(&device->events.lock); + assert(index < device->events.desc_count); + list_addtail(&device->events.desc[index].link, &device->events.free_list); + mtx_unlock(&device->events.lock); +} + +static void +device_event_set_value(struct v3dv_device *device, + struct v3dv_event *event, + uint8_t value) +{ + assert(value == 0 || value == 1); + uint8_t *data = (uint8_t *) device->events.bo->map; + data[event->index] = value; +} + +static uint8_t +device_event_get_value(struct v3dv_device *device, struct v3dv_event *event) +{ + uint8_t *data = (uint8_t *) device->events.bo->map; + return data[event->index]; +} + VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateEvent(VkDevice _device, const VkEventCreateInfo *pCreateInfo, @@ -2902,17 +3330,29 @@ v3dv_CreateEvent(VkDevice _device, VkEvent *pEvent) { V3DV_FROM_HANDLE(v3dv_device, device, _device); + VkResult result = VK_SUCCESS; + struct v3dv_event *event = vk_object_zalloc(&device->vk, pAllocator, sizeof(*event), VK_OBJECT_TYPE_EVENT); - if (!event) - return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + if (!event) { + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } - /* Events are created in the unsignaled state */ - event->state = false; + struct v3dv_event_desc *desc = device_allocate_event_descriptor(device); + if (!desc) { + result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } + + event->index = desc->index; + device_event_set_value(device, event, 0); *pEvent = v3dv_event_to_handle(event); - return VK_SUCCESS; + +fail: + return result; } VKAPI_ATTR void VKAPI_CALL @@ -2926,29 +3366,33 @@ v3dv_DestroyEvent(VkDevice _device, if (!event) return; + device_free_event_descriptor(device, event->index); vk_object_free(&device->vk, pAllocator, event); } VKAPI_ATTR VkResult VKAPI_CALL v3dv_GetEventStatus(VkDevice _device, VkEvent _event) { + V3DV_FROM_HANDLE(v3dv_device, device, _device); V3DV_FROM_HANDLE(v3dv_event, event, _event); - return p_atomic_read(&event->state) ? VK_EVENT_SET : VK_EVENT_RESET; + return device_event_get_value(device, event) ? VK_EVENT_SET : VK_EVENT_RESET; } VKAPI_ATTR VkResult VKAPI_CALL v3dv_SetEvent(VkDevice _device, VkEvent _event) { + V3DV_FROM_HANDLE(v3dv_device, device, _device); V3DV_FROM_HANDLE(v3dv_event, event, _event); - p_atomic_set(&event->state, 1); + device_event_set_value(device, event, 1); return VK_SUCCESS; } VKAPI_ATTR VkResult VKAPI_CALL v3dv_ResetEvent(VkDevice _device, VkEvent _event) { + V3DV_FROM_HANDLE(v3dv_device, device, _device); V3DV_FROM_HANDLE(v3dv_event, event, _event); - p_atomic_set(&event->state, 0); + device_event_set_value(device, event, 0); return VK_SUCCESS; } diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h index 5e34d4a471b..8086133e08c 100644 --- a/src/broadcom/vulkan/v3dv_private.h +++ b/src/broadcom/vulkan/v3dv_private.h @@ -454,6 +454,15 @@ struct v3dv_pipeline_cache { bool externally_synchronized; }; +/* This is used to implement a list of free events in the BO we use + * hold event states. The index here is used to calculate the offset + * within that BO. + */ +struct v3dv_event_desc { + struct list_head link; + uint32_t index; +}; + struct v3dv_device { struct vk_device vk; @@ -509,6 +518,44 @@ struct v3dv_device { uint32_t bo_size; uint32_t bo_count; + /* Event handling resources. + * + * Our implementation of events uses a BO to store event state (signaled vs + * reset) and dispatches compute shaders to handle GPU event functions + * (signal, reset, wait). This struct holds all the resources required + * by the implementation. + */ + struct { + mtx_t lock; + + /* BO for the event states: signaled (1) or reset (0) */ + struct v3dv_bo *bo; + + /* Events can be created and destroyed. Since we have a dedicated BO for + * all events we use, we need to keep track of the free slots within that + * BO. For that we use a free list where we link together available event + * slots in the form of "descriptors" that include an index (which is + * basically an offset into the BO that is available). + */ + uint32_t desc_count; + struct v3dv_event_desc *desc; + struct list_head free_list; + + /* Vulkan resources to access the event BO from shaders. We have a + * pipeline that sets the state of an event and another that waits on + * a single event. Both pipelines require access to the event state BO, + * for which we need to allocate a single descripot set. + */ + VkBuffer buffer; + VkDeviceMemory mem; + VkDescriptorSetLayout descriptor_set_layout; + VkPipelineLayout pipeline_layout; + VkDescriptorPool descriptor_pool; + VkDescriptorSet descriptor_set; + VkPipeline set_event_pipeline; + VkPipeline wait_event_pipeline; + } events; + struct v3dv_pipeline_cache default_pipeline_cache; /* GL_SHADER_STATE_RECORD needs to speficy default attribute values. The @@ -968,8 +1015,6 @@ enum v3dv_job_type { V3DV_JOB_TYPE_CPU_RESET_QUERIES, V3DV_JOB_TYPE_CPU_END_QUERY, V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS, - V3DV_JOB_TYPE_CPU_SET_EVENT, - V3DV_JOB_TYPE_CPU_WAIT_EVENTS, V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE, V3DV_JOB_TYPE_CPU_CSD_INDIRECT, V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY, @@ -1009,17 +1054,6 @@ struct v3dv_submit_sync_info { struct vk_sync_signal *signals; }; -struct v3dv_event_set_cpu_job_info { - struct v3dv_event *event; - int state; -}; - -struct v3dv_event_wait_cpu_job_info { - /* List of events to wait on */ - uint32_t event_count; - struct v3dv_event **events; -}; - struct v3dv_copy_buffer_to_image_cpu_job_info { struct v3dv_image *image; struct v3dv_buffer *buffer; @@ -1186,8 +1220,6 @@ struct v3dv_job { struct v3dv_reset_query_cpu_job_info query_reset; struct v3dv_end_query_cpu_job_info query_end; struct v3dv_copy_query_results_cpu_job_info query_copy_results; - struct v3dv_event_set_cpu_job_info event_set; - struct v3dv_event_wait_cpu_job_info event_wait; struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image; struct v3dv_csd_indirect_cpu_job_info csd_indirect; struct v3dv_timestamp_query_cpu_job_info query_timestamp; @@ -1643,7 +1675,11 @@ bool v3dv_cmd_buffer_check_needs_store(const struct v3dv_cmd_buffer_state *state struct v3dv_event { struct vk_object_base base; - int state; + + /* Each event gets a different index, which we use to compute the offset + * in the BO we use to track their state (signaled vs reset). + */ + uint32_t index; }; struct v3dv_shader_variant { diff --git a/src/broadcom/vulkan/v3dv_queue.c b/src/broadcom/vulkan/v3dv_queue.c index 6e2829b99be..1b2bee4b5d4 100644 --- a/src/broadcom/vulkan/v3dv_queue.c +++ b/src/broadcom/vulkan/v3dv_queue.c @@ -289,60 +289,6 @@ handle_copy_query_results_cpu_job(struct v3dv_job *job) return VK_SUCCESS; } -static VkResult -handle_set_event_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job, - struct v3dv_submit_sync_info *sync_info) -{ - /* From the Vulkan 1.0 spec: - * - * "When vkCmdSetEvent is submitted to a queue, it defines an execution - * dependency on commands that were submitted before it, and defines an - * event signal operation which sets the event to the signaled state. - * The first synchronization scope includes every command previously - * submitted to the same queue, including those in the same command - * buffer and batch". - * - * So we should wait for all prior work to be completed before signaling - * the event, this includes all active CPU wait threads spawned for any - * command buffer submitted *before* this. - */ - - VkResult result = queue_wait_idle(queue, sync_info); - if (result != VK_SUCCESS) - return result; - - struct v3dv_event_set_cpu_job_info *info = &job->cpu.event_set; - p_atomic_set(&info->event->state, info->state); - - return VK_SUCCESS; -} - -static bool -check_wait_events_complete(struct v3dv_job *job) -{ - assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS); - - struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait; - for (uint32_t i = 0; i < info->event_count; i++) { - if (!p_atomic_read(&info->events[i]->state)) - return false; - } - return true; -} - -static VkResult -handle_wait_events_cpu_job(struct v3dv_job *job) -{ - assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS); - - /* Wait for events to be signaled */ - const useconds_t wait_interval_ms = 1; - while (!check_wait_events_complete(job)) - usleep(wait_interval_ms * 1000); - - return VK_SUCCESS; -} - static VkResult handle_copy_buffer_to_image_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job, @@ -1014,10 +960,6 @@ queue_handle_job(struct v3dv_queue *queue, return handle_end_query_cpu_job(job, counter_pass_idx); case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS: return handle_copy_query_results_cpu_job(job); - case V3DV_JOB_TYPE_CPU_SET_EVENT: - return handle_set_event_cpu_job(queue, job, sync_info); - case V3DV_JOB_TYPE_CPU_WAIT_EVENTS: - return handle_wait_events_cpu_job(job); case V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE: return handle_copy_buffer_to_image_cpu_job(queue, job, sync_info); case V3DV_JOB_TYPE_CPU_CSD_INDIRECT: