v3dv: refactor events

This replaces our current implementation, which is 100% CPU based,
with an implementation that uses compute shaders for the GPU-side
event functions. The benefit of this solution is that we no longer
need to stall on the CPU when we need to handle GPU-side event
commands.

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19313>
This commit is contained in:
Iago Toral Quiroga 2022-10-19 09:48:19 +02:00
parent 8cd50ef071
commit ecb01d53fd
4 changed files with 578 additions and 149 deletions

View file

@ -163,14 +163,6 @@ job_destroy_gpu_csd_resources(struct v3dv_job *job)
v3dv_bo_free(job->device, job->csd.shared_memory);
}
static void
job_destroy_cpu_wait_events_resources(struct v3dv_job *job)
{
assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
assert(job->cmd_buffer);
vk_free(&job->cmd_buffer->device->vk.alloc, job->cpu.event_wait.events);
}
void
v3dv_job_destroy(struct v3dv_job *job)
{
@ -191,9 +183,6 @@ v3dv_job_destroy(struct v3dv_job *job)
case V3DV_JOB_TYPE_GPU_CSD:
job_destroy_gpu_csd_resources(job);
break;
case V3DV_JOB_TYPE_CPU_WAIT_EVENTS:
job_destroy_cpu_wait_events_resources(job);
break;
default:
break;
}
@ -3757,6 +3746,74 @@ v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
list_addtail(&job->list_link, &cmd_buffer->jobs);
}
static void
cmd_buffer_emit_set_event(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_event *event,
uint8_t value)
{
assert(value == 0 || value == 1);
struct v3dv_device *device = cmd_buffer->device;
VkCommandBuffer commandBuffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
v3dv_CmdBindPipeline(commandBuffer,
VK_PIPELINE_BIND_POINT_COMPUTE,
device->events.set_event_pipeline);
v3dv_CmdBindDescriptorSets(commandBuffer,
VK_PIPELINE_BIND_POINT_COMPUTE,
device->events.pipeline_layout,
0, 1, &device->events.descriptor_set, 0, NULL);
assert(event->index < device->events.desc_count);
uint32_t offset = event->index;
v3dv_CmdPushConstants(commandBuffer,
device->events.pipeline_layout,
VK_SHADER_STAGE_COMPUTE_BIT,
0, 4, &offset);
v3dv_CmdPushConstants(commandBuffer,
device->events.pipeline_layout,
VK_SHADER_STAGE_COMPUTE_BIT,
4, 1, &value);
v3dv_CmdDispatch(commandBuffer, 1, 1, 1);
v3dv_cmd_buffer_meta_state_pop(cmd_buffer, 0, false);
}
static void
cmd_buffer_emit_wait_event(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_event *event)
{
struct v3dv_device *device = cmd_buffer->device;
VkCommandBuffer commandBuffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
v3dv_CmdBindPipeline(commandBuffer,
VK_PIPELINE_BIND_POINT_COMPUTE,
device->events.wait_event_pipeline);
v3dv_CmdBindDescriptorSets(commandBuffer,
VK_PIPELINE_BIND_POINT_COMPUTE,
device->events.pipeline_layout,
0, 1, &device->events.descriptor_set, 0, NULL);
assert(event->index < device->events.desc_count);
uint32_t offset = event->index;
v3dv_CmdPushConstants(commandBuffer,
device->events.pipeline_layout,
VK_SHADER_STAGE_COMPUTE_BIT,
0, 4, &offset);
v3dv_CmdDispatch(commandBuffer, 1, 1, 1);
v3dv_cmd_buffer_meta_state_pop(cmd_buffer, 0, false);
}
VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetEvent2(VkCommandBuffer commandBuffer,
VkEvent _event,
@ -3771,16 +3828,8 @@ v3dv_CmdSetEvent2(VkCommandBuffer commandBuffer,
assert(cmd_buffer->state.pass == NULL);
assert(cmd_buffer->state.job == NULL);
struct v3dv_job *job =
v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
V3DV_JOB_TYPE_CPU_SET_EVENT,
cmd_buffer, -1);
v3dv_return_if_oom(cmd_buffer, NULL);
job->cpu.event_set.event = event;
job->cpu.event_set.state = 1;
list_addtail(&job->list_link, &cmd_buffer->jobs);
v3dv_CmdPipelineBarrier2(commandBuffer, pDependencyInfo);
cmd_buffer_emit_set_event(cmd_buffer, event, 1);
}
VKAPI_ATTR void VKAPI_CALL
@ -3797,16 +3846,7 @@ v3dv_CmdResetEvent2(VkCommandBuffer commandBuffer,
assert(cmd_buffer->state.pass == NULL);
assert(cmd_buffer->state.job == NULL);
struct v3dv_job *job =
v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
V3DV_JOB_TYPE_CPU_SET_EVENT,
cmd_buffer, -1);
v3dv_return_if_oom(cmd_buffer, NULL);
job->cpu.event_set.event = event;
job->cpu.event_set.state = 0;
list_addtail(&job->list_link, &cmd_buffer->jobs);
cmd_buffer_emit_set_event(cmd_buffer, event, 0);
}
VKAPI_ATTR void VKAPI_CALL
@ -3816,43 +3856,10 @@ v3dv_CmdWaitEvents2(VkCommandBuffer commandBuffer,
const VkDependencyInfo *pDependencyInfos)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
assert(eventCount > 0);
struct v3dv_job *job =
v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
cmd_buffer, -1);
v3dv_return_if_oom(cmd_buffer, NULL);
const uint32_t event_list_size = sizeof(struct v3dv_event *) * eventCount;
job->cpu.event_wait.events =
vk_alloc(&cmd_buffer->device->vk.alloc, event_list_size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!job->cpu.event_wait.events) {
v3dv_flag_oom(cmd_buffer, NULL);
return;
for (uint32_t i = 0; i < eventCount; i++) {
struct v3dv_event *event = v3dv_event_from_handle(pEvents[i]);;
cmd_buffer_emit_wait_event(cmd_buffer, event);
}
job->cpu.event_wait.event_count = eventCount;
for (uint32_t i = 0; i < eventCount; i++)
job->cpu.event_wait.events[i] = v3dv_event_from_handle(pEvents[i]);
/* vkCmdWaitEvents can be recorded inside a render pass, so we might have
* an active job.
*
* If we are inside a render pass, because we vkCmd(Re)SetEvent can't happen
* inside a render pass, it is safe to move the wait job so it happens right
* before the current job we are currently recording for the subpass, if any
* (it would actually be safe to move it all the way back to right before
* the start of the render pass).
*
* If we are outside a render pass then we should not have any on-going job
* and we are free to just add the wait job without restrictions.
*/
assert(cmd_buffer->state.pass || !cmd_buffer->state.job);
list_addtail(&job->list_link, &cmd_buffer->jobs);
}
VKAPI_ATTR void VKAPI_CALL

View file

@ -42,6 +42,7 @@
#include "common/v3d_debug.h"
#include "compiler/v3d_compiler.h"
#include "compiler/nir/nir_builder.h"
#include "drm-uapi/v3d_drm.h"
#include "format/u_format.h"
@ -2022,6 +2023,12 @@ destroy_device_meta(struct v3dv_device *device)
v3dv_meta_texel_buffer_copy_finish(device);
}
static bool
device_allocate_event_resources(struct v3dv_device *device);
static void
device_free_event_resources(struct v3dv_device *device);
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
const VkDeviceCreateInfo *pCreateInfo,
@ -2101,6 +2108,18 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
util_dynarray_init(&device->device_address_bo_list,
device->device_address_mem_ctx);
mtx_init(&device->events.lock, mtx_plain);
if (!device->events.bo) {
result = device_allocate_event_resources(device);
if (result != VK_SUCCESS)
goto fail;
}
if (list_is_empty(&device->events.free_list)) {
result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto fail;
}
*pDevice = v3dv_device_to_handle(device);
return VK_SUCCESS;
@ -2122,6 +2141,10 @@ v3dv_DestroyDevice(VkDevice _device,
device->vk.dispatch_table.DeviceWaitIdle(_device);
queue_finish(&device->queue);
device_free_event_resources(device);
mtx_destroy(&device->events.lock);
destroy_device_meta(device);
v3dv_pipeline_cache_finish(&device->default_pipeline_cache);
@ -2895,6 +2918,411 @@ v3dv_GetMemoryFdKHR(VkDevice _device,
return VK_SUCCESS;
}
static nir_shader *
get_set_event_cs()
{
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
"set event cs");
b.shader->info.workgroup_size[0] = 1;
b.shader->info.workgroup_size[1] = 1;
b.shader->info.workgroup_size[2] = 1;
nir_ssa_def *buf =
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
.desc_set = 0,
.binding = 0,
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
nir_ssa_def *offset =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
nir_ssa_def *value =
nir_load_push_constant(&b, 1, 8, nir_imm_int(&b, 0), .base = 4, .range = 4);
nir_store_ssbo(&b, value, buf, offset,
.access = 0, .write_mask = 0x1, .align_mul = 4);
return b.shader;
}
static nir_shader *
get_wait_event_cs()
{
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
"wait event cs");
b.shader->info.workgroup_size[0] = 1;
b.shader->info.workgroup_size[1] = 1;
b.shader->info.workgroup_size[2] = 1;
nir_ssa_def *buf =
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
.desc_set = 0,
.binding = 0,
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
nir_ssa_def *offset =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
nir_loop *loop = nir_push_loop(&b);
nir_ssa_def *load =
nir_load_ssbo(&b, 1, 8, buf, offset, .access = 0, .align_mul = 4);
nir_ssa_def *value = nir_i2i32(&b, load);
nir_if *if_stmt = nir_push_if(&b, nir_ieq_imm(&b, value, 1));
nir_jump(&b, nir_jump_break);
nir_pop_if(&b, if_stmt);
nir_pop_loop(&b, loop);
return b.shader;
}
static VkResult
create_compute_pipeline_from_nir(struct v3dv_device *device,
nir_shader *nir,
VkPipelineLayout pipeline_layout,
VkPipeline *pipeline)
{
struct vk_shader_module cs_m = vk_shader_module_from_nir(nir);
VkPipelineShaderStageCreateInfo set_event_cs_stage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = vk_shader_module_to_handle(&cs_m),
.pName = "main",
};
VkComputePipelineCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.stage = set_event_cs_stage,
.layout = pipeline_layout,
};
VkResult result =
v3dv_CreateComputePipelines(v3dv_device_to_handle(device), VK_NULL_HANDLE,
1, &info, &device->vk.alloc, pipeline);
return result;
}
static bool
device_create_event_pipelines(struct v3dv_device *device)
{
VkResult result;
if (!device->events.descriptor_set_layout) {
/* Pipeline layout:
* - 1 storage buffer for the BO with the events state.
* - 2 push constants:
* 0B: offset of the event in the buffer (4 bytes).
* 4B: value for the event (1 byte), only used with the set_event_pipeline.
*/
VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
};
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.bindingCount = 1,
.pBindings = &descriptor_set_layout_binding,
};
result =
v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
&descriptor_set_layout_info,
&device->vk.alloc,
&device->events.descriptor_set_layout);
if (result != VK_SUCCESS)
return false;
}
if (!device->events.pipeline_layout) {
VkPipelineLayoutCreateInfo pipeline_layout_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &device->events.descriptor_set_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges =
&(VkPushConstantRange) { VK_SHADER_STAGE_COMPUTE_BIT, 0, 5 },
};
result =
v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
&pipeline_layout_info,
&device->vk.alloc,
&device->events.pipeline_layout);
if (result != VK_SUCCESS)
return false;
}
VkPipeline pipeline;
if (!device->events.set_event_pipeline) {
nir_shader *set_event_cs_nir = get_set_event_cs();
result = create_compute_pipeline_from_nir(device,
set_event_cs_nir,
device->events.pipeline_layout,
&pipeline);
ralloc_free(set_event_cs_nir);
if (result != VK_SUCCESS)
return false;
device->events.set_event_pipeline = pipeline;
}
if (!device->events.wait_event_pipeline) {
nir_shader *wait_event_cs_nir = get_wait_event_cs();
result = create_compute_pipeline_from_nir(device,
wait_event_cs_nir,
device->events.pipeline_layout,
&pipeline);
ralloc_free(wait_event_cs_nir);
if (result != VK_SUCCESS)
return false;
device->events.wait_event_pipeline = pipeline;
}
return true;
}
static void
device_destroy_event_pipelines(struct v3dv_device *device)
{
VkDevice _device = v3dv_device_to_handle(device);
v3dv_DestroyPipeline(_device, device->events.set_event_pipeline,
&device->vk.alloc);
device->events.set_event_pipeline = VK_NULL_HANDLE;
v3dv_DestroyPipeline(_device, device->events.wait_event_pipeline,
&device->vk.alloc);
device->events.wait_event_pipeline = VK_NULL_HANDLE;
v3dv_DestroyPipelineLayout(_device, device->events.pipeline_layout,
&device->vk.alloc);
device->events.pipeline_layout = VK_NULL_HANDLE;
v3dv_DestroyDescriptorSetLayout(_device,
device->events.descriptor_set_layout,
&device->vk.alloc);
device->events.descriptor_set_layout = VK_NULL_HANDLE;
}
static bool
device_allocate_event_resources(struct v3dv_device *device)
{
VkResult result = VK_SUCCESS;
VkDevice _device = v3dv_device_to_handle(device);
/* BO with event states. Make sure we always align to a page size (4096)
* to ensure we use all the memory the kernel will allocate for the BO.
*/
const uint32_t bo_size = 4096;
struct v3dv_bo *bo = v3dv_bo_alloc(device, bo_size, "events", true);
if (!bo) {
result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto fail;
}
device->events.bo = bo;
if (!v3dv_bo_map(device, bo, bo_size)) {
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail;
}
/* List of free event state slots in the BO, 1 byte per slot */
device->events.desc_count = bo_size;
device->events.desc =
vk_alloc2(&device->vk.alloc, NULL,
device->events.desc_count * sizeof(struct v3dv_event_desc), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!device->events.desc) {
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail;
}
list_inithead(&device->events.free_list);
for (int i = 0; i < device->events.desc_count; i++) {
device->events.desc[i].index = i;
list_addtail(&device->events.desc[i].link, &device->events.free_list);
}
/* Vulkan buffer for the event state BO */
VkBufferCreateInfo buf_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.size = bo->size,
.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
};
result = v3dv_CreateBuffer(_device, &buf_info, NULL,
&device->events.buffer);
if (result != VK_SUCCESS)
goto fail;
struct v3dv_device_memory *mem =
vk_object_zalloc(&device->vk, NULL, sizeof(*mem),
VK_OBJECT_TYPE_DEVICE_MEMORY);
if (!mem) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
mem->bo = bo;
mem->type = &device->pdevice->memory.memoryTypes[0];
device->events.mem = v3dv_device_memory_to_handle(mem);
VkBindBufferMemoryInfo bind_info = {
.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
.buffer = device->events.buffer,
.memory = device->events.mem,
.memoryOffset = 0,
};
bind_buffer_memory(&bind_info);
/* Pipelines */
if (!device_create_event_pipelines(device))
goto fail;
/* Descriptor pool & set to access the buffer */
VkDescriptorPoolSize pool_size = {
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
};
VkDescriptorPoolCreateInfo pool_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
.maxSets = 1,
.poolSizeCount = 1,
.pPoolSizes = &pool_size,
};
result =
v3dv_CreateDescriptorPool(_device, &pool_info, NULL,
&device->events.descriptor_pool);
if (result != VK_SUCCESS)
goto fail;
VkDescriptorSetAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = device->events.descriptor_pool,
.descriptorSetCount = 1,
.pSetLayouts = &device->events.descriptor_set_layout,
};
result = v3dv_AllocateDescriptorSets(_device, &alloc_info,
&device->events.descriptor_set);
if (result != VK_SUCCESS)
goto fail;
VkDescriptorBufferInfo desc_buf_info = {
.buffer = device->events.buffer,
.offset = 0,
.range = VK_WHOLE_SIZE,
};
VkWriteDescriptorSet write = {
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = device->events.descriptor_set,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.pBufferInfo = &desc_buf_info,
};
v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
return VK_SUCCESS;
fail:
device_free_event_resources(device);
return result;
}
static void
device_free_event_resources(struct v3dv_device *device)
{
if (device->events.bo) {
v3dv_bo_free(device, device->events.bo);
device->events.bo = NULL;
}
if (device->events.desc) {
vk_free2(&device->vk.alloc, NULL, device->events.desc);
device->events.desc = NULL;
}
vk_object_free(&device->vk, NULL,
v3dv_device_memory_from_handle(device->events.mem));
device->events.mem = VK_NULL_HANDLE;
v3dv_DestroyBuffer(v3dv_device_to_handle(device),
device->events.buffer, NULL);
device->events.buffer = VK_NULL_HANDLE;
v3dv_FreeDescriptorSets(v3dv_device_to_handle(device),
device->events.descriptor_pool,
1, &device->events.descriptor_set);
device->events.descriptor_set = VK_NULL_HANDLE;
v3dv_DestroyDescriptorPool(v3dv_device_to_handle(device),
device->events.descriptor_pool,
NULL);
device->events.descriptor_pool = VK_NULL_HANDLE;
device_destroy_event_pipelines(device);
}
static struct v3dv_event_desc *
device_allocate_event_descriptor(struct v3dv_device *device)
{
mtx_lock(&device->events.lock);
if (list_is_empty(&device->events.free_list)) {
mtx_unlock(&device->events.lock);
return NULL;
}
struct v3dv_event_desc *desc =
list_first_entry(&device->events.free_list, struct v3dv_event_desc, link);
list_del(&desc->link);
mtx_unlock(&device->events.lock);
return desc;
}
static void
device_free_event_descriptor(struct v3dv_device *device, uint32_t index)
{
mtx_lock(&device->events.lock);
assert(index < device->events.desc_count);
list_addtail(&device->events.desc[index].link, &device->events.free_list);
mtx_unlock(&device->events.lock);
}
static void
device_event_set_value(struct v3dv_device *device,
struct v3dv_event *event,
uint8_t value)
{
assert(value == 0 || value == 1);
uint8_t *data = (uint8_t *) device->events.bo->map;
data[event->index] = value;
}
static uint8_t
device_event_get_value(struct v3dv_device *device, struct v3dv_event *event)
{
uint8_t *data = (uint8_t *) device->events.bo->map;
return data[event->index];
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateEvent(VkDevice _device,
const VkEventCreateInfo *pCreateInfo,
@ -2902,17 +3330,29 @@ v3dv_CreateEvent(VkDevice _device,
VkEvent *pEvent)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
VkResult result = VK_SUCCESS;
struct v3dv_event *event =
vk_object_zalloc(&device->vk, pAllocator, sizeof(*event),
VK_OBJECT_TYPE_EVENT);
if (!event)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
if (!event) {
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail;
}
/* Events are created in the unsignaled state */
event->state = false;
struct v3dv_event_desc *desc = device_allocate_event_descriptor(device);
if (!desc) {
result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto fail;
}
event->index = desc->index;
device_event_set_value(device, event, 0);
*pEvent = v3dv_event_to_handle(event);
return VK_SUCCESS;
fail:
return result;
}
VKAPI_ATTR void VKAPI_CALL
@ -2926,29 +3366,33 @@ v3dv_DestroyEvent(VkDevice _device,
if (!event)
return;
device_free_event_descriptor(device, event->index);
vk_object_free(&device->vk, pAllocator, event);
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetEventStatus(VkDevice _device, VkEvent _event)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
V3DV_FROM_HANDLE(v3dv_event, event, _event);
return p_atomic_read(&event->state) ? VK_EVENT_SET : VK_EVENT_RESET;
return device_event_get_value(device, event) ? VK_EVENT_SET : VK_EVENT_RESET;
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_SetEvent(VkDevice _device, VkEvent _event)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
V3DV_FROM_HANDLE(v3dv_event, event, _event);
p_atomic_set(&event->state, 1);
device_event_set_value(device, event, 1);
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_ResetEvent(VkDevice _device, VkEvent _event)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
V3DV_FROM_HANDLE(v3dv_event, event, _event);
p_atomic_set(&event->state, 0);
device_event_set_value(device, event, 0);
return VK_SUCCESS;
}

View file

@ -454,6 +454,15 @@ struct v3dv_pipeline_cache {
bool externally_synchronized;
};
/* This is used to implement a list of free events in the BO we use
* hold event states. The index here is used to calculate the offset
* within that BO.
*/
struct v3dv_event_desc {
struct list_head link;
uint32_t index;
};
struct v3dv_device {
struct vk_device vk;
@ -509,6 +518,44 @@ struct v3dv_device {
uint32_t bo_size;
uint32_t bo_count;
/* Event handling resources.
*
* Our implementation of events uses a BO to store event state (signaled vs
* reset) and dispatches compute shaders to handle GPU event functions
* (signal, reset, wait). This struct holds all the resources required
* by the implementation.
*/
struct {
mtx_t lock;
/* BO for the event states: signaled (1) or reset (0) */
struct v3dv_bo *bo;
/* Events can be created and destroyed. Since we have a dedicated BO for
* all events we use, we need to keep track of the free slots within that
* BO. For that we use a free list where we link together available event
* slots in the form of "descriptors" that include an index (which is
* basically an offset into the BO that is available).
*/
uint32_t desc_count;
struct v3dv_event_desc *desc;
struct list_head free_list;
/* Vulkan resources to access the event BO from shaders. We have a
* pipeline that sets the state of an event and another that waits on
* a single event. Both pipelines require access to the event state BO,
* for which we need to allocate a single descripot set.
*/
VkBuffer buffer;
VkDeviceMemory mem;
VkDescriptorSetLayout descriptor_set_layout;
VkPipelineLayout pipeline_layout;
VkDescriptorPool descriptor_pool;
VkDescriptorSet descriptor_set;
VkPipeline set_event_pipeline;
VkPipeline wait_event_pipeline;
} events;
struct v3dv_pipeline_cache default_pipeline_cache;
/* GL_SHADER_STATE_RECORD needs to speficy default attribute values. The
@ -968,8 +1015,6 @@ enum v3dv_job_type {
V3DV_JOB_TYPE_CPU_RESET_QUERIES,
V3DV_JOB_TYPE_CPU_END_QUERY,
V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
V3DV_JOB_TYPE_CPU_SET_EVENT,
V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
@ -1009,17 +1054,6 @@ struct v3dv_submit_sync_info {
struct vk_sync_signal *signals;
};
struct v3dv_event_set_cpu_job_info {
struct v3dv_event *event;
int state;
};
struct v3dv_event_wait_cpu_job_info {
/* List of events to wait on */
uint32_t event_count;
struct v3dv_event **events;
};
struct v3dv_copy_buffer_to_image_cpu_job_info {
struct v3dv_image *image;
struct v3dv_buffer *buffer;
@ -1186,8 +1220,6 @@ struct v3dv_job {
struct v3dv_reset_query_cpu_job_info query_reset;
struct v3dv_end_query_cpu_job_info query_end;
struct v3dv_copy_query_results_cpu_job_info query_copy_results;
struct v3dv_event_set_cpu_job_info event_set;
struct v3dv_event_wait_cpu_job_info event_wait;
struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
struct v3dv_csd_indirect_cpu_job_info csd_indirect;
struct v3dv_timestamp_query_cpu_job_info query_timestamp;
@ -1643,7 +1675,11 @@ bool v3dv_cmd_buffer_check_needs_store(const struct v3dv_cmd_buffer_state *state
struct v3dv_event {
struct vk_object_base base;
int state;
/* Each event gets a different index, which we use to compute the offset
* in the BO we use to track their state (signaled vs reset).
*/
uint32_t index;
};
struct v3dv_shader_variant {

View file

@ -289,60 +289,6 @@ handle_copy_query_results_cpu_job(struct v3dv_job *job)
return VK_SUCCESS;
}
static VkResult
handle_set_event_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job,
struct v3dv_submit_sync_info *sync_info)
{
/* From the Vulkan 1.0 spec:
*
* "When vkCmdSetEvent is submitted to a queue, it defines an execution
* dependency on commands that were submitted before it, and defines an
* event signal operation which sets the event to the signaled state.
* The first synchronization scope includes every command previously
* submitted to the same queue, including those in the same command
* buffer and batch".
*
* So we should wait for all prior work to be completed before signaling
* the event, this includes all active CPU wait threads spawned for any
* command buffer submitted *before* this.
*/
VkResult result = queue_wait_idle(queue, sync_info);
if (result != VK_SUCCESS)
return result;
struct v3dv_event_set_cpu_job_info *info = &job->cpu.event_set;
p_atomic_set(&info->event->state, info->state);
return VK_SUCCESS;
}
static bool
check_wait_events_complete(struct v3dv_job *job)
{
assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
for (uint32_t i = 0; i < info->event_count; i++) {
if (!p_atomic_read(&info->events[i]->state))
return false;
}
return true;
}
static VkResult
handle_wait_events_cpu_job(struct v3dv_job *job)
{
assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
/* Wait for events to be signaled */
const useconds_t wait_interval_ms = 1;
while (!check_wait_events_complete(job))
usleep(wait_interval_ms * 1000);
return VK_SUCCESS;
}
static VkResult
handle_copy_buffer_to_image_cpu_job(struct v3dv_queue *queue,
struct v3dv_job *job,
@ -1014,10 +960,6 @@ queue_handle_job(struct v3dv_queue *queue,
return handle_end_query_cpu_job(job, counter_pass_idx);
case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS:
return handle_copy_query_results_cpu_job(job);
case V3DV_JOB_TYPE_CPU_SET_EVENT:
return handle_set_event_cpu_job(queue, job, sync_info);
case V3DV_JOB_TYPE_CPU_WAIT_EVENTS:
return handle_wait_events_cpu_job(job);
case V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE:
return handle_copy_buffer_to_image_cpu_job(queue, job, sync_info);
case V3DV_JOB_TYPE_CPU_CSD_INDIRECT: