mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 04:20:08 +01:00
v3dv: refactor events
This replaces our current implementation, which is 100% CPU based, with an implementation that uses compute shaders for the GPU-side event functions. The benefit of this solution is that we no longer need to stall on the CPU when we need to handle GPU-side event commands. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19313>
This commit is contained in:
parent
8cd50ef071
commit
ecb01d53fd
4 changed files with 578 additions and 149 deletions
|
|
@ -163,14 +163,6 @@ job_destroy_gpu_csd_resources(struct v3dv_job *job)
|
|||
v3dv_bo_free(job->device, job->csd.shared_memory);
|
||||
}
|
||||
|
||||
static void
|
||||
job_destroy_cpu_wait_events_resources(struct v3dv_job *job)
|
||||
{
|
||||
assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
|
||||
assert(job->cmd_buffer);
|
||||
vk_free(&job->cmd_buffer->device->vk.alloc, job->cpu.event_wait.events);
|
||||
}
|
||||
|
||||
void
|
||||
v3dv_job_destroy(struct v3dv_job *job)
|
||||
{
|
||||
|
|
@ -191,9 +183,6 @@ v3dv_job_destroy(struct v3dv_job *job)
|
|||
case V3DV_JOB_TYPE_GPU_CSD:
|
||||
job_destroy_gpu_csd_resources(job);
|
||||
break;
|
||||
case V3DV_JOB_TYPE_CPU_WAIT_EVENTS:
|
||||
job_destroy_cpu_wait_events_resources(job);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -3757,6 +3746,74 @@ v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
|
|||
list_addtail(&job->list_link, &cmd_buffer->jobs);
|
||||
}
|
||||
|
||||
static void
|
||||
cmd_buffer_emit_set_event(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
struct v3dv_event *event,
|
||||
uint8_t value)
|
||||
{
|
||||
assert(value == 0 || value == 1);
|
||||
|
||||
struct v3dv_device *device = cmd_buffer->device;
|
||||
VkCommandBuffer commandBuffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
|
||||
|
||||
v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
|
||||
|
||||
v3dv_CmdBindPipeline(commandBuffer,
|
||||
VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
device->events.set_event_pipeline);
|
||||
|
||||
v3dv_CmdBindDescriptorSets(commandBuffer,
|
||||
VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
device->events.pipeline_layout,
|
||||
0, 1, &device->events.descriptor_set, 0, NULL);
|
||||
|
||||
assert(event->index < device->events.desc_count);
|
||||
uint32_t offset = event->index;
|
||||
v3dv_CmdPushConstants(commandBuffer,
|
||||
device->events.pipeline_layout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, 4, &offset);
|
||||
|
||||
v3dv_CmdPushConstants(commandBuffer,
|
||||
device->events.pipeline_layout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
4, 1, &value);
|
||||
|
||||
v3dv_CmdDispatch(commandBuffer, 1, 1, 1);
|
||||
|
||||
v3dv_cmd_buffer_meta_state_pop(cmd_buffer, 0, false);
|
||||
}
|
||||
|
||||
static void
|
||||
cmd_buffer_emit_wait_event(struct v3dv_cmd_buffer *cmd_buffer,
|
||||
struct v3dv_event *event)
|
||||
{
|
||||
struct v3dv_device *device = cmd_buffer->device;
|
||||
VkCommandBuffer commandBuffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
|
||||
|
||||
v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
|
||||
|
||||
v3dv_CmdBindPipeline(commandBuffer,
|
||||
VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
device->events.wait_event_pipeline);
|
||||
|
||||
v3dv_CmdBindDescriptorSets(commandBuffer,
|
||||
VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
device->events.pipeline_layout,
|
||||
0, 1, &device->events.descriptor_set, 0, NULL);
|
||||
|
||||
assert(event->index < device->events.desc_count);
|
||||
uint32_t offset = event->index;
|
||||
v3dv_CmdPushConstants(commandBuffer,
|
||||
device->events.pipeline_layout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, 4, &offset);
|
||||
|
||||
v3dv_CmdDispatch(commandBuffer, 1, 1, 1);
|
||||
|
||||
v3dv_cmd_buffer_meta_state_pop(cmd_buffer, 0, false);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
v3dv_CmdSetEvent2(VkCommandBuffer commandBuffer,
|
||||
VkEvent _event,
|
||||
|
|
@ -3771,16 +3828,8 @@ v3dv_CmdSetEvent2(VkCommandBuffer commandBuffer,
|
|||
assert(cmd_buffer->state.pass == NULL);
|
||||
assert(cmd_buffer->state.job == NULL);
|
||||
|
||||
struct v3dv_job *job =
|
||||
v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
|
||||
V3DV_JOB_TYPE_CPU_SET_EVENT,
|
||||
cmd_buffer, -1);
|
||||
v3dv_return_if_oom(cmd_buffer, NULL);
|
||||
|
||||
job->cpu.event_set.event = event;
|
||||
job->cpu.event_set.state = 1;
|
||||
|
||||
list_addtail(&job->list_link, &cmd_buffer->jobs);
|
||||
v3dv_CmdPipelineBarrier2(commandBuffer, pDependencyInfo);
|
||||
cmd_buffer_emit_set_event(cmd_buffer, event, 1);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
|
|
@ -3797,16 +3846,7 @@ v3dv_CmdResetEvent2(VkCommandBuffer commandBuffer,
|
|||
assert(cmd_buffer->state.pass == NULL);
|
||||
assert(cmd_buffer->state.job == NULL);
|
||||
|
||||
struct v3dv_job *job =
|
||||
v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
|
||||
V3DV_JOB_TYPE_CPU_SET_EVENT,
|
||||
cmd_buffer, -1);
|
||||
v3dv_return_if_oom(cmd_buffer, NULL);
|
||||
|
||||
job->cpu.event_set.event = event;
|
||||
job->cpu.event_set.state = 0;
|
||||
|
||||
list_addtail(&job->list_link, &cmd_buffer->jobs);
|
||||
cmd_buffer_emit_set_event(cmd_buffer, event, 0);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
|
|
@ -3816,43 +3856,10 @@ v3dv_CmdWaitEvents2(VkCommandBuffer commandBuffer,
|
|||
const VkDependencyInfo *pDependencyInfos)
|
||||
{
|
||||
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
assert(eventCount > 0);
|
||||
|
||||
struct v3dv_job *job =
|
||||
v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
|
||||
V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
|
||||
cmd_buffer, -1);
|
||||
v3dv_return_if_oom(cmd_buffer, NULL);
|
||||
|
||||
const uint32_t event_list_size = sizeof(struct v3dv_event *) * eventCount;
|
||||
|
||||
job->cpu.event_wait.events =
|
||||
vk_alloc(&cmd_buffer->device->vk.alloc, event_list_size, 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
||||
if (!job->cpu.event_wait.events) {
|
||||
v3dv_flag_oom(cmd_buffer, NULL);
|
||||
return;
|
||||
for (uint32_t i = 0; i < eventCount; i++) {
|
||||
struct v3dv_event *event = v3dv_event_from_handle(pEvents[i]);;
|
||||
cmd_buffer_emit_wait_event(cmd_buffer, event);
|
||||
}
|
||||
job->cpu.event_wait.event_count = eventCount;
|
||||
|
||||
for (uint32_t i = 0; i < eventCount; i++)
|
||||
job->cpu.event_wait.events[i] = v3dv_event_from_handle(pEvents[i]);
|
||||
|
||||
/* vkCmdWaitEvents can be recorded inside a render pass, so we might have
|
||||
* an active job.
|
||||
*
|
||||
* If we are inside a render pass, because we vkCmd(Re)SetEvent can't happen
|
||||
* inside a render pass, it is safe to move the wait job so it happens right
|
||||
* before the current job we are currently recording for the subpass, if any
|
||||
* (it would actually be safe to move it all the way back to right before
|
||||
* the start of the render pass).
|
||||
*
|
||||
* If we are outside a render pass then we should not have any on-going job
|
||||
* and we are free to just add the wait job without restrictions.
|
||||
*/
|
||||
assert(cmd_buffer->state.pass || !cmd_buffer->state.job);
|
||||
list_addtail(&job->list_link, &cmd_buffer->jobs);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@
|
|||
#include "common/v3d_debug.h"
|
||||
|
||||
#include "compiler/v3d_compiler.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
|
||||
#include "drm-uapi/v3d_drm.h"
|
||||
#include "format/u_format.h"
|
||||
|
|
@ -2022,6 +2023,12 @@ destroy_device_meta(struct v3dv_device *device)
|
|||
v3dv_meta_texel_buffer_copy_finish(device);
|
||||
}
|
||||
|
||||
static bool
|
||||
device_allocate_event_resources(struct v3dv_device *device);
|
||||
|
||||
static void
|
||||
device_free_event_resources(struct v3dv_device *device);
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
|
||||
const VkDeviceCreateInfo *pCreateInfo,
|
||||
|
|
@ -2101,6 +2108,18 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
util_dynarray_init(&device->device_address_bo_list,
|
||||
device->device_address_mem_ctx);
|
||||
|
||||
mtx_init(&device->events.lock, mtx_plain);
|
||||
if (!device->events.bo) {
|
||||
result = device_allocate_event_resources(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (list_is_empty(&device->events.free_list)) {
|
||||
result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
*pDevice = v3dv_device_to_handle(device);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
|
@ -2122,6 +2141,10 @@ v3dv_DestroyDevice(VkDevice _device,
|
|||
|
||||
device->vk.dispatch_table.DeviceWaitIdle(_device);
|
||||
queue_finish(&device->queue);
|
||||
|
||||
device_free_event_resources(device);
|
||||
mtx_destroy(&device->events.lock);
|
||||
|
||||
destroy_device_meta(device);
|
||||
v3dv_pipeline_cache_finish(&device->default_pipeline_cache);
|
||||
|
||||
|
|
@ -2895,6 +2918,411 @@ v3dv_GetMemoryFdKHR(VkDevice _device,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
get_set_event_cs()
|
||||
{
|
||||
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
|
||||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
|
||||
"set event cs");
|
||||
|
||||
b.shader->info.workgroup_size[0] = 1;
|
||||
b.shader->info.workgroup_size[1] = 1;
|
||||
b.shader->info.workgroup_size[2] = 1;
|
||||
|
||||
nir_ssa_def *buf =
|
||||
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
|
||||
.desc_set = 0,
|
||||
.binding = 0,
|
||||
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
|
||||
|
||||
nir_ssa_def *offset =
|
||||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
|
||||
|
||||
nir_ssa_def *value =
|
||||
nir_load_push_constant(&b, 1, 8, nir_imm_int(&b, 0), .base = 4, .range = 4);
|
||||
|
||||
nir_store_ssbo(&b, value, buf, offset,
|
||||
.access = 0, .write_mask = 0x1, .align_mul = 4);
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
get_wait_event_cs()
|
||||
{
|
||||
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
|
||||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
|
||||
"wait event cs");
|
||||
|
||||
b.shader->info.workgroup_size[0] = 1;
|
||||
b.shader->info.workgroup_size[1] = 1;
|
||||
b.shader->info.workgroup_size[2] = 1;
|
||||
|
||||
nir_ssa_def *buf =
|
||||
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
|
||||
.desc_set = 0,
|
||||
.binding = 0,
|
||||
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
|
||||
|
||||
nir_ssa_def *offset =
|
||||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
|
||||
|
||||
nir_loop *loop = nir_push_loop(&b);
|
||||
nir_ssa_def *load =
|
||||
nir_load_ssbo(&b, 1, 8, buf, offset, .access = 0, .align_mul = 4);
|
||||
nir_ssa_def *value = nir_i2i32(&b, load);
|
||||
|
||||
nir_if *if_stmt = nir_push_if(&b, nir_ieq_imm(&b, value, 1));
|
||||
nir_jump(&b, nir_jump_break);
|
||||
nir_pop_if(&b, if_stmt);
|
||||
nir_pop_loop(&b, loop);
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
create_compute_pipeline_from_nir(struct v3dv_device *device,
|
||||
nir_shader *nir,
|
||||
VkPipelineLayout pipeline_layout,
|
||||
VkPipeline *pipeline)
|
||||
{
|
||||
struct vk_shader_module cs_m = vk_shader_module_from_nir(nir);
|
||||
|
||||
VkPipelineShaderStageCreateInfo set_event_cs_stage = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = vk_shader_module_to_handle(&cs_m),
|
||||
.pName = "main",
|
||||
};
|
||||
|
||||
VkComputePipelineCreateInfo info = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.stage = set_event_cs_stage,
|
||||
.layout = pipeline_layout,
|
||||
};
|
||||
|
||||
VkResult result =
|
||||
v3dv_CreateComputePipelines(v3dv_device_to_handle(device), VK_NULL_HANDLE,
|
||||
1, &info, &device->vk.alloc, pipeline);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool
|
||||
device_create_event_pipelines(struct v3dv_device *device)
|
||||
{
|
||||
VkResult result;
|
||||
|
||||
if (!device->events.descriptor_set_layout) {
|
||||
/* Pipeline layout:
|
||||
* - 1 storage buffer for the BO with the events state.
|
||||
* - 2 push constants:
|
||||
* 0B: offset of the event in the buffer (4 bytes).
|
||||
* 4B: value for the event (1 byte), only used with the set_event_pipeline.
|
||||
*/
|
||||
VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
|
||||
.binding = 0,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
};
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.bindingCount = 1,
|
||||
.pBindings = &descriptor_set_layout_binding,
|
||||
};
|
||||
|
||||
result =
|
||||
v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
|
||||
&descriptor_set_layout_info,
|
||||
&device->vk.alloc,
|
||||
&device->events.descriptor_set_layout);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!device->events.pipeline_layout) {
|
||||
VkPipelineLayoutCreateInfo pipeline_layout_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = &device->events.descriptor_set_layout,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges =
|
||||
&(VkPushConstantRange) { VK_SHADER_STAGE_COMPUTE_BIT, 0, 5 },
|
||||
};
|
||||
|
||||
result =
|
||||
v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
|
||||
&pipeline_layout_info,
|
||||
&device->vk.alloc,
|
||||
&device->events.pipeline_layout);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
return false;
|
||||
}
|
||||
|
||||
VkPipeline pipeline;
|
||||
|
||||
if (!device->events.set_event_pipeline) {
|
||||
nir_shader *set_event_cs_nir = get_set_event_cs();
|
||||
result = create_compute_pipeline_from_nir(device,
|
||||
set_event_cs_nir,
|
||||
device->events.pipeline_layout,
|
||||
&pipeline);
|
||||
ralloc_free(set_event_cs_nir);
|
||||
if (result != VK_SUCCESS)
|
||||
return false;
|
||||
|
||||
device->events.set_event_pipeline = pipeline;
|
||||
}
|
||||
|
||||
if (!device->events.wait_event_pipeline) {
|
||||
nir_shader *wait_event_cs_nir = get_wait_event_cs();
|
||||
result = create_compute_pipeline_from_nir(device,
|
||||
wait_event_cs_nir,
|
||||
device->events.pipeline_layout,
|
||||
&pipeline);
|
||||
ralloc_free(wait_event_cs_nir);
|
||||
if (result != VK_SUCCESS)
|
||||
return false;
|
||||
|
||||
device->events.wait_event_pipeline = pipeline;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
device_destroy_event_pipelines(struct v3dv_device *device)
|
||||
{
|
||||
VkDevice _device = v3dv_device_to_handle(device);
|
||||
|
||||
v3dv_DestroyPipeline(_device, device->events.set_event_pipeline,
|
||||
&device->vk.alloc);
|
||||
device->events.set_event_pipeline = VK_NULL_HANDLE;
|
||||
|
||||
v3dv_DestroyPipeline(_device, device->events.wait_event_pipeline,
|
||||
&device->vk.alloc);
|
||||
device->events.wait_event_pipeline = VK_NULL_HANDLE;
|
||||
|
||||
v3dv_DestroyPipelineLayout(_device, device->events.pipeline_layout,
|
||||
&device->vk.alloc);
|
||||
device->events.pipeline_layout = VK_NULL_HANDLE;
|
||||
|
||||
v3dv_DestroyDescriptorSetLayout(_device,
|
||||
device->events.descriptor_set_layout,
|
||||
&device->vk.alloc);
|
||||
device->events.descriptor_set_layout = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
static bool
|
||||
device_allocate_event_resources(struct v3dv_device *device)
|
||||
{
|
||||
VkResult result = VK_SUCCESS;
|
||||
VkDevice _device = v3dv_device_to_handle(device);
|
||||
|
||||
/* BO with event states. Make sure we always align to a page size (4096)
|
||||
* to ensure we use all the memory the kernel will allocate for the BO.
|
||||
*/
|
||||
const uint32_t bo_size = 4096;
|
||||
struct v3dv_bo *bo = v3dv_bo_alloc(device, bo_size, "events", true);
|
||||
if (!bo) {
|
||||
result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
device->events.bo = bo;
|
||||
|
||||
if (!v3dv_bo_map(device, bo, bo_size)) {
|
||||
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* List of free event state slots in the BO, 1 byte per slot */
|
||||
device->events.desc_count = bo_size;
|
||||
device->events.desc =
|
||||
vk_alloc2(&device->vk.alloc, NULL,
|
||||
device->events.desc_count * sizeof(struct v3dv_event_desc), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
if (!device->events.desc) {
|
||||
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
list_inithead(&device->events.free_list);
|
||||
for (int i = 0; i < device->events.desc_count; i++) {
|
||||
device->events.desc[i].index = i;
|
||||
list_addtail(&device->events.desc[i].link, &device->events.free_list);
|
||||
}
|
||||
|
||||
/* Vulkan buffer for the event state BO */
|
||||
VkBufferCreateInfo buf_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.size = bo->size,
|
||||
.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
|
||||
};
|
||||
result = v3dv_CreateBuffer(_device, &buf_info, NULL,
|
||||
&device->events.buffer);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
struct v3dv_device_memory *mem =
|
||||
vk_object_zalloc(&device->vk, NULL, sizeof(*mem),
|
||||
VK_OBJECT_TYPE_DEVICE_MEMORY);
|
||||
if (!mem) {
|
||||
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
mem->bo = bo;
|
||||
mem->type = &device->pdevice->memory.memoryTypes[0];
|
||||
|
||||
device->events.mem = v3dv_device_memory_to_handle(mem);
|
||||
VkBindBufferMemoryInfo bind_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
|
||||
.buffer = device->events.buffer,
|
||||
.memory = device->events.mem,
|
||||
.memoryOffset = 0,
|
||||
};
|
||||
bind_buffer_memory(&bind_info);
|
||||
|
||||
/* Pipelines */
|
||||
if (!device_create_event_pipelines(device))
|
||||
goto fail;
|
||||
|
||||
/* Descriptor pool & set to access the buffer */
|
||||
VkDescriptorPoolSize pool_size = {
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
};
|
||||
VkDescriptorPoolCreateInfo pool_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||||
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
|
||||
.maxSets = 1,
|
||||
.poolSizeCount = 1,
|
||||
.pPoolSizes = &pool_size,
|
||||
};
|
||||
result =
|
||||
v3dv_CreateDescriptorPool(_device, &pool_info, NULL,
|
||||
&device->events.descriptor_pool);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
VkDescriptorSetAllocateInfo alloc_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
||||
.descriptorPool = device->events.descriptor_pool,
|
||||
.descriptorSetCount = 1,
|
||||
.pSetLayouts = &device->events.descriptor_set_layout,
|
||||
};
|
||||
result = v3dv_AllocateDescriptorSets(_device, &alloc_info,
|
||||
&device->events.descriptor_set);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
VkDescriptorBufferInfo desc_buf_info = {
|
||||
.buffer = device->events.buffer,
|
||||
.offset = 0,
|
||||
.range = VK_WHOLE_SIZE,
|
||||
};
|
||||
|
||||
VkWriteDescriptorSet write = {
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.dstSet = device->events.descriptor_set,
|
||||
.dstBinding = 0,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.pBufferInfo = &desc_buf_info,
|
||||
};
|
||||
v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail:
|
||||
device_free_event_resources(device);
|
||||
return result;
|
||||
}
|
||||
|
||||
static void
|
||||
device_free_event_resources(struct v3dv_device *device)
|
||||
{
|
||||
if (device->events.bo) {
|
||||
v3dv_bo_free(device, device->events.bo);
|
||||
device->events.bo = NULL;
|
||||
}
|
||||
|
||||
if (device->events.desc) {
|
||||
vk_free2(&device->vk.alloc, NULL, device->events.desc);
|
||||
device->events.desc = NULL;
|
||||
}
|
||||
|
||||
vk_object_free(&device->vk, NULL,
|
||||
v3dv_device_memory_from_handle(device->events.mem));
|
||||
device->events.mem = VK_NULL_HANDLE;
|
||||
|
||||
v3dv_DestroyBuffer(v3dv_device_to_handle(device),
|
||||
device->events.buffer, NULL);
|
||||
device->events.buffer = VK_NULL_HANDLE;
|
||||
|
||||
v3dv_FreeDescriptorSets(v3dv_device_to_handle(device),
|
||||
device->events.descriptor_pool,
|
||||
1, &device->events.descriptor_set);
|
||||
device->events.descriptor_set = VK_NULL_HANDLE;
|
||||
|
||||
v3dv_DestroyDescriptorPool(v3dv_device_to_handle(device),
|
||||
device->events.descriptor_pool,
|
||||
NULL);
|
||||
device->events.descriptor_pool = VK_NULL_HANDLE;
|
||||
|
||||
device_destroy_event_pipelines(device);
|
||||
}
|
||||
|
||||
static struct v3dv_event_desc *
|
||||
device_allocate_event_descriptor(struct v3dv_device *device)
|
||||
{
|
||||
mtx_lock(&device->events.lock);
|
||||
if (list_is_empty(&device->events.free_list)) {
|
||||
mtx_unlock(&device->events.lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct v3dv_event_desc *desc =
|
||||
list_first_entry(&device->events.free_list, struct v3dv_event_desc, link);
|
||||
list_del(&desc->link);
|
||||
mtx_unlock(&device->events.lock);
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
static void
|
||||
device_free_event_descriptor(struct v3dv_device *device, uint32_t index)
|
||||
{
|
||||
mtx_lock(&device->events.lock);
|
||||
assert(index < device->events.desc_count);
|
||||
list_addtail(&device->events.desc[index].link, &device->events.free_list);
|
||||
mtx_unlock(&device->events.lock);
|
||||
}
|
||||
|
||||
static void
|
||||
device_event_set_value(struct v3dv_device *device,
|
||||
struct v3dv_event *event,
|
||||
uint8_t value)
|
||||
{
|
||||
assert(value == 0 || value == 1);
|
||||
uint8_t *data = (uint8_t *) device->events.bo->map;
|
||||
data[event->index] = value;
|
||||
}
|
||||
|
||||
static uint8_t
|
||||
device_event_get_value(struct v3dv_device *device, struct v3dv_event *event)
|
||||
{
|
||||
uint8_t *data = (uint8_t *) device->events.bo->map;
|
||||
return data[event->index];
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
v3dv_CreateEvent(VkDevice _device,
|
||||
const VkEventCreateInfo *pCreateInfo,
|
||||
|
|
@ -2902,17 +3330,29 @@ v3dv_CreateEvent(VkDevice _device,
|
|||
VkEvent *pEvent)
|
||||
{
|
||||
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
struct v3dv_event *event =
|
||||
vk_object_zalloc(&device->vk, pAllocator, sizeof(*event),
|
||||
VK_OBJECT_TYPE_EVENT);
|
||||
if (!event)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
if (!event) {
|
||||
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* Events are created in the unsignaled state */
|
||||
event->state = false;
|
||||
struct v3dv_event_desc *desc = device_allocate_event_descriptor(device);
|
||||
if (!desc) {
|
||||
result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
event->index = desc->index;
|
||||
device_event_set_value(device, event, 0);
|
||||
*pEvent = v3dv_event_to_handle(event);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail:
|
||||
return result;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
|
|
@ -2926,29 +3366,33 @@ v3dv_DestroyEvent(VkDevice _device,
|
|||
if (!event)
|
||||
return;
|
||||
|
||||
device_free_event_descriptor(device, event->index);
|
||||
vk_object_free(&device->vk, pAllocator, event);
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
v3dv_GetEventStatus(VkDevice _device, VkEvent _event)
|
||||
{
|
||||
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
||||
V3DV_FROM_HANDLE(v3dv_event, event, _event);
|
||||
return p_atomic_read(&event->state) ? VK_EVENT_SET : VK_EVENT_RESET;
|
||||
return device_event_get_value(device, event) ? VK_EVENT_SET : VK_EVENT_RESET;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
v3dv_SetEvent(VkDevice _device, VkEvent _event)
|
||||
{
|
||||
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
||||
V3DV_FROM_HANDLE(v3dv_event, event, _event);
|
||||
p_atomic_set(&event->state, 1);
|
||||
device_event_set_value(device, event, 1);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
v3dv_ResetEvent(VkDevice _device, VkEvent _event)
|
||||
{
|
||||
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
||||
V3DV_FROM_HANDLE(v3dv_event, event, _event);
|
||||
p_atomic_set(&event->state, 0);
|
||||
device_event_set_value(device, event, 0);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -454,6 +454,15 @@ struct v3dv_pipeline_cache {
|
|||
bool externally_synchronized;
|
||||
};
|
||||
|
||||
/* This is used to implement a list of free events in the BO we use
|
||||
* hold event states. The index here is used to calculate the offset
|
||||
* within that BO.
|
||||
*/
|
||||
struct v3dv_event_desc {
|
||||
struct list_head link;
|
||||
uint32_t index;
|
||||
};
|
||||
|
||||
struct v3dv_device {
|
||||
struct vk_device vk;
|
||||
|
||||
|
|
@ -509,6 +518,44 @@ struct v3dv_device {
|
|||
uint32_t bo_size;
|
||||
uint32_t bo_count;
|
||||
|
||||
/* Event handling resources.
|
||||
*
|
||||
* Our implementation of events uses a BO to store event state (signaled vs
|
||||
* reset) and dispatches compute shaders to handle GPU event functions
|
||||
* (signal, reset, wait). This struct holds all the resources required
|
||||
* by the implementation.
|
||||
*/
|
||||
struct {
|
||||
mtx_t lock;
|
||||
|
||||
/* BO for the event states: signaled (1) or reset (0) */
|
||||
struct v3dv_bo *bo;
|
||||
|
||||
/* Events can be created and destroyed. Since we have a dedicated BO for
|
||||
* all events we use, we need to keep track of the free slots within that
|
||||
* BO. For that we use a free list where we link together available event
|
||||
* slots in the form of "descriptors" that include an index (which is
|
||||
* basically an offset into the BO that is available).
|
||||
*/
|
||||
uint32_t desc_count;
|
||||
struct v3dv_event_desc *desc;
|
||||
struct list_head free_list;
|
||||
|
||||
/* Vulkan resources to access the event BO from shaders. We have a
|
||||
* pipeline that sets the state of an event and another that waits on
|
||||
* a single event. Both pipelines require access to the event state BO,
|
||||
* for which we need to allocate a single descripot set.
|
||||
*/
|
||||
VkBuffer buffer;
|
||||
VkDeviceMemory mem;
|
||||
VkDescriptorSetLayout descriptor_set_layout;
|
||||
VkPipelineLayout pipeline_layout;
|
||||
VkDescriptorPool descriptor_pool;
|
||||
VkDescriptorSet descriptor_set;
|
||||
VkPipeline set_event_pipeline;
|
||||
VkPipeline wait_event_pipeline;
|
||||
} events;
|
||||
|
||||
struct v3dv_pipeline_cache default_pipeline_cache;
|
||||
|
||||
/* GL_SHADER_STATE_RECORD needs to speficy default attribute values. The
|
||||
|
|
@ -968,8 +1015,6 @@ enum v3dv_job_type {
|
|||
V3DV_JOB_TYPE_CPU_RESET_QUERIES,
|
||||
V3DV_JOB_TYPE_CPU_END_QUERY,
|
||||
V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
|
||||
V3DV_JOB_TYPE_CPU_SET_EVENT,
|
||||
V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
|
||||
V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
|
||||
V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
|
||||
V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
|
||||
|
|
@ -1009,17 +1054,6 @@ struct v3dv_submit_sync_info {
|
|||
struct vk_sync_signal *signals;
|
||||
};
|
||||
|
||||
struct v3dv_event_set_cpu_job_info {
|
||||
struct v3dv_event *event;
|
||||
int state;
|
||||
};
|
||||
|
||||
struct v3dv_event_wait_cpu_job_info {
|
||||
/* List of events to wait on */
|
||||
uint32_t event_count;
|
||||
struct v3dv_event **events;
|
||||
};
|
||||
|
||||
struct v3dv_copy_buffer_to_image_cpu_job_info {
|
||||
struct v3dv_image *image;
|
||||
struct v3dv_buffer *buffer;
|
||||
|
|
@ -1186,8 +1220,6 @@ struct v3dv_job {
|
|||
struct v3dv_reset_query_cpu_job_info query_reset;
|
||||
struct v3dv_end_query_cpu_job_info query_end;
|
||||
struct v3dv_copy_query_results_cpu_job_info query_copy_results;
|
||||
struct v3dv_event_set_cpu_job_info event_set;
|
||||
struct v3dv_event_wait_cpu_job_info event_wait;
|
||||
struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
|
||||
struct v3dv_csd_indirect_cpu_job_info csd_indirect;
|
||||
struct v3dv_timestamp_query_cpu_job_info query_timestamp;
|
||||
|
|
@ -1643,7 +1675,11 @@ bool v3dv_cmd_buffer_check_needs_store(const struct v3dv_cmd_buffer_state *state
|
|||
|
||||
struct v3dv_event {
|
||||
struct vk_object_base base;
|
||||
int state;
|
||||
|
||||
/* Each event gets a different index, which we use to compute the offset
|
||||
* in the BO we use to track their state (signaled vs reset).
|
||||
*/
|
||||
uint32_t index;
|
||||
};
|
||||
|
||||
struct v3dv_shader_variant {
|
||||
|
|
|
|||
|
|
@ -289,60 +289,6 @@ handle_copy_query_results_cpu_job(struct v3dv_job *job)
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
handle_set_event_cpu_job(struct v3dv_queue *queue, struct v3dv_job *job,
|
||||
struct v3dv_submit_sync_info *sync_info)
|
||||
{
|
||||
/* From the Vulkan 1.0 spec:
|
||||
*
|
||||
* "When vkCmdSetEvent is submitted to a queue, it defines an execution
|
||||
* dependency on commands that were submitted before it, and defines an
|
||||
* event signal operation which sets the event to the signaled state.
|
||||
* The first synchronization scope includes every command previously
|
||||
* submitted to the same queue, including those in the same command
|
||||
* buffer and batch".
|
||||
*
|
||||
* So we should wait for all prior work to be completed before signaling
|
||||
* the event, this includes all active CPU wait threads spawned for any
|
||||
* command buffer submitted *before* this.
|
||||
*/
|
||||
|
||||
VkResult result = queue_wait_idle(queue, sync_info);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
struct v3dv_event_set_cpu_job_info *info = &job->cpu.event_set;
|
||||
p_atomic_set(&info->event->state, info->state);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static bool
|
||||
check_wait_events_complete(struct v3dv_job *job)
|
||||
{
|
||||
assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
|
||||
|
||||
struct v3dv_event_wait_cpu_job_info *info = &job->cpu.event_wait;
|
||||
for (uint32_t i = 0; i < info->event_count; i++) {
|
||||
if (!p_atomic_read(&info->events[i]->state))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
handle_wait_events_cpu_job(struct v3dv_job *job)
|
||||
{
|
||||
assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS);
|
||||
|
||||
/* Wait for events to be signaled */
|
||||
const useconds_t wait_interval_ms = 1;
|
||||
while (!check_wait_events_complete(job))
|
||||
usleep(wait_interval_ms * 1000);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
handle_copy_buffer_to_image_cpu_job(struct v3dv_queue *queue,
|
||||
struct v3dv_job *job,
|
||||
|
|
@ -1014,10 +960,6 @@ queue_handle_job(struct v3dv_queue *queue,
|
|||
return handle_end_query_cpu_job(job, counter_pass_idx);
|
||||
case V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS:
|
||||
return handle_copy_query_results_cpu_job(job);
|
||||
case V3DV_JOB_TYPE_CPU_SET_EVENT:
|
||||
return handle_set_event_cpu_job(queue, job, sync_info);
|
||||
case V3DV_JOB_TYPE_CPU_WAIT_EVENTS:
|
||||
return handle_wait_events_cpu_job(job);
|
||||
case V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE:
|
||||
return handle_copy_buffer_to_image_cpu_job(queue, job, sync_info);
|
||||
case V3DV_JOB_TYPE_CPU_CSD_INDIRECT:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue