tu: Suballoc VkEvent BOs

No need to burn an entire PAGE_SIZE BO for an event.  And in particular
the pattern of allocate + immediate mmap is expensive in a VM.
Suballocating cuts down the # of times we do this in
dEQP-VK.api.command_buffers.execute_large_primary from 10000 to 157,
avoiding problems with the test running up against watchdog timeout.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33721>
This commit is contained in:
Rob Clark 2025-02-24 11:31:34 -08:00 committed by Marge Bot
parent c3cc756cf9
commit 513184fa44
6 changed files with 34 additions and 20 deletions

View file

@ -7619,7 +7619,7 @@ tu_write_event(struct tu_cmd_buffer *cmd, struct tu_event *event,
if (!(stageMask & ~top_of_pipe_flags)) {
tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 3);
tu_cs_emit_qw(cs, event->bo->iova); /* ADDR_LO/HI */
tu_cs_emit_qw(cs, event->bo.iova); /* ADDR_LO/HI */
tu_cs_emit(cs, value);
} else {
/* Use a RB_DONE_TS event to wait for everything to complete. */
@ -7634,7 +7634,7 @@ tu_write_event(struct tu_cmd_buffer *cmd, struct tu_event *event,
.write_enabled = true).value);
}
tu_cs_emit_qw(cs, event->bo->iova);
tu_cs_emit_qw(cs, event->bo.iova);
tu_cs_emit(cs, value);
}
}

View file

@ -2476,6 +2476,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
mtx_init(&device->pipeline_mutex, mtx_plain);
mtx_init(&device->autotune_mutex, mtx_plain);
mtx_init(&device->kgsl_profiling_mutex, mtx_plain);
mtx_init(&device->event_mutex, mtx_plain);
u_rwlock_init(&device->dma_bo_lock);
pthread_mutex_init(&device->submit_mutex, NULL);
@ -2592,6 +2593,10 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
"kgsl_profiling_suballoc");
}
tu_bo_suballocator_init(&device->event_suballoc, device,
getpagesize(), TU_BO_ALLOC_INTERNAL_RESOURCE,
"event_suballoc");
result = tu_bo_init_new(
device, NULL, &device->global_bo, global_size,
(enum tu_bo_alloc_flags) (TU_BO_ALLOC_ALLOW_DUMP |
@ -2901,6 +2906,7 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
tu_bo_suballocator_finish(&device->pipeline_suballoc);
tu_bo_suballocator_finish(&device->autotune_suballoc);
tu_bo_suballocator_finish(&device->kgsl_profiling_suballoc);
tu_bo_suballocator_finish(&device->event_suballoc);
tu_bo_finish(device, device->global_bo);

View file

@ -335,6 +335,11 @@ struct tu_device
struct tu_suballocator kgsl_profiling_suballoc;
mtx_t kgsl_profiling_mutex;
/* VkEvent BO suballocator. Synchronized by event_mutex.
*/
struct tu_suballocator event_suballoc;
mtx_t event_mutex;
/* the blob seems to always use 8K factor and 128K param sizes, copy them */
#define TU_TESS_FACTOR_SIZE (8 * 1024)
#define TU_TESS_PARAM_SIZE (128 * 1024)

View file

@ -26,23 +26,18 @@ tu_CreateEvent(VkDevice _device,
if (!event)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
VkResult result = tu_bo_init_new(device, &event->base, &event->bo, 0x1000,
TU_BO_ALLOC_NO_FLAGS, "event");
mtx_lock(&device->event_mutex);
VkResult result = tu_suballoc_bo_alloc(&event->bo, &device->event_suballoc, 64, 64);
mtx_unlock(&device->event_mutex);
if (result != VK_SUCCESS)
goto fail_alloc;
result = tu_bo_map(device, event->bo, NULL);
if (result != VK_SUCCESS)
goto fail_map;
TU_RMV(event_create, device, pCreateInfo, event);
*pEvent = tu_event_to_handle(event);
return VK_SUCCESS;
fail_map:
tu_bo_finish(device, event->bo);
fail_alloc:
vk_object_free(&device->vk, pAllocator, event);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
@ -61,10 +56,19 @@ tu_DestroyEvent(VkDevice _device,
TU_RMV(resource_destroy, device, event);
tu_bo_finish(device, event->bo);
mtx_lock(&device->event_mutex);
tu_suballoc_bo_free(&device->event_suballoc, &event->bo);
mtx_unlock(&device->event_mutex);
vk_object_free(&device->vk, pAllocator, event);
}
static uint64_t *
tu_event_map(tu_event *event)
{
return (uint64_t *)tu_suballoc_bo_map(&event->bo);
}
VKAPI_ATTR VkResult VKAPI_CALL
tu_GetEventStatus(VkDevice _device, VkEvent _event)
{
@ -74,7 +78,7 @@ tu_GetEventStatus(VkDevice _device, VkEvent _event)
if (vk_device_is_lost(&device->vk))
return VK_ERROR_DEVICE_LOST;
if (*(uint64_t*) event->bo->map == 1)
if (*tu_event_map(event) == 1)
return VK_EVENT_SET;
return VK_EVENT_RESET;
}
@ -83,7 +87,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
tu_SetEvent(VkDevice _device, VkEvent _event)
{
VK_FROM_HANDLE(tu_event, event, _event);
*(uint64_t*) event->bo->map = 1;
*tu_event_map(event) = 1;
return VK_SUCCESS;
}
@ -92,7 +96,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
tu_ResetEvent(VkDevice _device, VkEvent _event)
{
VK_FROM_HANDLE(tu_event, event, _event);
*(uint64_t*) event->bo->map = 0;
*tu_event_map(event) = 0;
return VK_SUCCESS;
}
@ -146,7 +150,7 @@ tu_CmdWaitEvents2(VkCommandBuffer commandBuffer,
tu_cs_emit_pkt7(cs, CP_WAIT_REG_MEM, 6);
tu_cs_emit(cs, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |
CP_WAIT_REG_MEM_0_POLL(POLL_MEMORY));
tu_cs_emit_qw(cs, event->bo->iova); /* POLL_ADDR_LO/HI */
tu_cs_emit_qw(cs, event->bo.iova); /* POLL_ADDR_LO/HI */
tu_cs_emit(cs, CP_WAIT_REG_MEM_3_REF(1));
tu_cs_emit(cs, CP_WAIT_REG_MEM_4_MASK(~0u));
tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(20));

View file

@ -11,11 +11,12 @@
#define TU_EVENT_H
#include "tu_common.h"
#include "tu_suballoc.h"
struct tu_event
{
struct vk_object_base base;
struct tu_bo *bo;
struct tu_suballoc_bo bo;
};
VK_DEFINE_NONDISP_HANDLE_CASTS(tu_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)

View file

@ -514,10 +514,8 @@ tu_rmv_log_event_create(struct tu_device *device,
vk_rmv_emit_token(&device->vk.memory_trace_data,
VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token);
if (event->bo) {
tu_rmv_emit_resource_bind_locked(device, token.resource_id,
event->bo->iova, event->bo->size);
}
tu_rmv_emit_resource_bind_locked(device, token.resource_id,
event->bo.iova, event->bo.size);
simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
}