diff --git a/meson.build b/meson.build index 393f3ac1b93..d1a5cc2cb4c 100644 --- a/meson.build +++ b/meson.build @@ -95,6 +95,7 @@ with_vulkan_overlay_layer = get_option('vulkan-layers').contains('overlay') with_vulkan_device_select_layer = get_option('vulkan-layers').contains('device-select') with_vulkan_screenshot_layer = get_option('vulkan-layers').contains('screenshot') with_vulkan_vram_report_limit_layer = get_option('vulkan-layers').contains('vram-report-limit') +with_vulkan_anti_lag_layer = get_option('vulkan-layers').contains('anti-lag') with_tools = get_option('tools') if with_tools.contains('all') with_tools = [ diff --git a/meson.options b/meson.options index c3c02c4c94f..cd0e56cc429 100644 --- a/meson.options +++ b/meson.options @@ -299,7 +299,7 @@ option( type : 'array', value : [], choices : [ - 'device-select', 'intel-nullhw', 'overlay', 'screenshot', + 'device-select', 'intel-nullhw', 'overlay', 'screenshot', 'anti-lag', 'vram-report-limit', ], description : 'List of vulkan layers to build' diff --git a/src/vulkan/anti-lag-layer/VkLayer_MESA_anti_lag.json b/src/vulkan/anti-lag-layer/VkLayer_MESA_anti_lag.json new file mode 100644 index 00000000000..4e2ab794c9e --- /dev/null +++ b/src/vulkan/anti-lag-layer/VkLayer_MESA_anti_lag.json @@ -0,0 +1,26 @@ +{ + "file_format_version": "1.2.1", + "layer": { + "name": "VK_LAYER_MESA_anti_lag", + "type": "GLOBAL", + "library_path": "libVkLayer_MESA_anti_lag.so", + "api_version": "1.4.303", + "implementation_version": "1", + "description": "Open-source implementation of the VK_AMD_anti_lag extension.", + "functions": { + "vkNegotiateLoaderLayerInterfaceVersion": "anti_lag_NegotiateLoaderLayerInterfaceVersion" + }, + "device_extensions": [ + { + "name": "VK_AMD_anti_lag", + "spec_version": "1", + "entrypoints": [ + "vkAntiLagUpdateAMD" + ] + } + ], + "disable_environment": { + "DISABLE_LAYER_MESA_ANTI_LAG": "1" + } + } +} \ No newline at end of file diff --git a/src/vulkan/anti-lag-layer/anti_lag_layer.c b/src/vulkan/anti-lag-layer/anti_lag_layer.c new file mode 100644 index 00000000000..6c21e074024 --- /dev/null +++ b/src/vulkan/anti-lag-layer/anti_lag_layer.c @@ -0,0 +1,590 @@ +/* + * Copyright © 2025 Valve Corporation + * + * SPDX-License-Identifier: MIT + */ + +#include "anti_lag_layer.h" +#include +#include "util/os_time.h" +#include "util/simple_mtx.h" +#include "vulkan/vulkan_core.h" +#include "ringbuffer.h" +#include "vk_alloc.h" +#include "vk_util.h" + +static bool +evaluate_frame(device_context *ctx, frame *frame, bool force_wait) +{ + if (frame->state != FRAME_PRESENT) { + /* This frame is not finished yet. */ + assert(!force_wait); + return false; + } + + int query_flags = VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT; + const uint32_t frame_idx = ringbuffer_index(ctx->frames, frame); + + /* Before we commit to completing a frame, all submits on all queues must have completed. */ + for (unsigned i = 0; i < ctx->num_queues; i++) { + queue_context *queue_ctx = &ctx->queues[i]; + ringbuffer_lock(queue_ctx->queries); + uint64_t expected_signal_value = queue_ctx->semaphore_value - queue_ctx->queries.size + + queue_ctx->submissions_per_frame[frame_idx]; + ringbuffer_unlock(queue_ctx->queries); + + if (force_wait) { + /* Wait for the timeline semaphore of the frame to be signaled. */ + struct VkSemaphoreWaitInfo wait_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, + .semaphoreCount = 1, + .pSemaphores = &queue_ctx->semaphore, + .pValues = &expected_signal_value, + }; + ctx->vtable.WaitSemaphores(ctx->device, &wait_info, 0); + } else { + /* Return early if the last timeline semaphore of the frame has not been signaled yet. */ + uint64_t signal_value; + ctx->vtable.GetSemaphoreCounterValue(ctx->device, queue_ctx->semaphore, &signal_value); + if (signal_value < expected_signal_value) + return false; + } + } + + /* For each queue, retrieve timestamp query results. */ + for (unsigned i = 0; i < ctx->num_queues; i++) { + queue_context *queue_ctx = &ctx->queues[i]; + + /* As we hold a global mtx and this is the only place where queries are free'd, + * we don't need to lock the query ringbuffer here in order to read the first entry. + */ + struct query *query = ringbuffer_first(queue_ctx->queries); + uint32_t query_idx = ringbuffer_index(queue_ctx->queries, query); + int num_timestamps = + MIN2(queue_ctx->submissions_per_frame[frame_idx], MAX_QUERIES - query_idx); + + while (num_timestamps > 0) { + /* Retreive timestamp results from this queue. */ + ctx->vtable.GetQueryPoolResults(ctx->device, queue_ctx->queryPool, query_idx, + num_timestamps, sizeof(uint64_t), &query->begin_gpu_ts, + sizeof(struct query), query_flags); + + ringbuffer_lock(queue_ctx->queries); + for (unsigned j = 0; j < num_timestamps; j++) { + + /* Calibrate device timestamps. */ + query->begin_gpu_ts = + ctx->calibration.delta + + (uint64_t)(query->begin_gpu_ts * ctx->calibration.timestamp_period); + if (query->begin_gpu_ts > query->submit_cpu_ts) + frame->min_delay = + MIN2(frame->min_delay, query->begin_gpu_ts - query->submit_cpu_ts); + + /* Check if we can reset half of the query pool at once. */ + uint32_t next_idx = ringbuffer_index(queue_ctx->queries, query) + 1; + const bool reset = next_idx == MAX_QUERIES || next_idx == MAX_QUERIES / 2; + if (reset) { + ringbuffer_unlock(queue_ctx->queries); + ctx->vtable.ResetQueryPool(ctx->device, queue_ctx->queryPool, + next_idx - MAX_QUERIES / 2, MAX_QUERIES / 2); + ringbuffer_lock(queue_ctx->queries); + } + + /* Free query. */ + ringbuffer_free(queue_ctx->queries, query); + queue_ctx->submissions_per_frame[frame_idx]--; + + query = ringbuffer_first(queue_ctx->queries); + } + + /* Ensure that the total number of queries across all frames is correct. */ + ASSERTED uint32_t count = 0; + for (unsigned i = 0; i < MAX_FRAMES; i++) + count += queue_ctx->submissions_per_frame[i]; + assert(count == queue_ctx->queries.size); + + query_idx = ringbuffer_index(queue_ctx->queries, query); + num_timestamps = + MIN2(queue_ctx->submissions_per_frame[frame_idx], MAX_QUERIES - query_idx); + + ringbuffer_unlock(queue_ctx->queries); + } + } + + frame->min_delay++; /* wrap UINT64_MAX in case we didn't have any submissions. */ + + return true; +} + +static bool +calibrate_timestamps(device_context *ctx) +{ + uint64_t ts[2]; + uint64_t deviation; + + VkCalibratedTimestampInfoKHR info[2] = { + { + .sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_KHR, + .timeDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR, + }, + { + .sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_KHR, + .timeDomain = VK_TIME_DOMAIN_DEVICE_KHR, + }, + }; + + VkResult result = ctx->vtable.GetCalibratedTimestampsKHR(ctx->device, 2, info, ts, &deviation); + if (result == VK_SUCCESS) { + /* We take a moving average in order to avoid variance. */ + int64_t new_delta = ts[0] - (int64_t)(ts[1] * ctx->calibration.timestamp_period); + + if (ctx->calibration.delta == 0) { + ctx->calibration.delta = new_delta; + } else { + int64_t diff = new_delta - ctx->calibration.delta; + ctx->calibration.delta += diff / 8; + } + + /* Take a new calibrated timestamp every second. */ + ctx->calibration.recalibrate_when = ts[0] + 1000000000ull; + } + + return result == VK_SUCCESS; +} + +static void +begin_next_frame(device_context *ctx) +{ + frame *next_frame; + if (ctx->active_frame) { + assert(ctx->active_frame->state == FRAME_SUBMIT); + ctx->active_frame->state = FRAME_PRESENT; + next_frame = ringbuffer_next(ctx->frames, ctx->active_frame); + } else { + next_frame = ringbuffer_last(ctx->frames); + } + + /* If there is a frame ready, it becomes active. */ + if (next_frame->state == FRAME_INPUT) { + next_frame->state = FRAME_SUBMIT; + ctx->active_frame = next_frame; + } else { + ctx->active_frame = NULL; + } +} + +static void +anti_lag_disable(device_context *ctx) +{ + ringbuffer_lock(ctx->frames); + while (ctx->frames.size) { + /* Set force-wait=true, so that all pending timestamp queries get completed. */ + begin_next_frame(ctx); + frame *frame = ringbuffer_first(ctx->frames); + evaluate_frame(ctx, frame, true); + frame->state = FRAME_INVALID; + ringbuffer_free(ctx->frames, frame); + } + assert(!ctx->active_frame); + ringbuffer_unlock(ctx->frames); +} + +#define TARGET_DELAY 4000000ll /* 4 ms */ +/** + * Returns the amount of time that we want the next frame to be delayed. + * + * The algorithm used by this function is very simplistic and only aims + * to minimize the delay between calls to vkQueueSubmit or vkQueueSubmit2 + * and the begin of the execution of the submission. + */ +static int64_t +get_wait_time(device_context *ctx) +{ + /* Take the previous evaluated frame's delay as baseline. */ + int64_t imposed_delay = ctx->base_delay; + int64_t adaptation = 0; + + ringbuffer_lock(ctx->frames); + /* In case our ringbuffer is completely full and no frame is in PRESENT stage, + * just move the oldest frame to PRESENT stage, and force-wait. + */ + bool force_wait = ctx->frames.size == MAX_FRAMES; + frame *next_frame = ringbuffer_first(ctx->frames); + if (force_wait && next_frame->state != FRAME_PRESENT) + begin_next_frame(ctx); + + /* Also force-wait for the oldest frame if there is already 2 frames in PRESENT stage. */ + force_wait |= ringbuffer_next(ctx->frames, next_frame)->state == FRAME_PRESENT; + ringbuffer_unlock(ctx->frames); + + /* Take new evaluated frames into consideration. */ + while (evaluate_frame(ctx, next_frame, force_wait)) { + + if (next_frame->min_delay < TARGET_DELAY / 2 && ctx->adaptation <= 0) { + /* If there is no delay between submission and GPU start, halve the base delay and + * set the delay for this frame to zero, in order to account for sudden changes. + */ + ctx->base_delay = ctx->base_delay / 2; + adaptation = -ctx->base_delay; + } else { + /* We use some kind of exponential weighted moving average function here, + * in order to determine a base-delay. We use a smoothing-factor of roughly + * 3%, but don't discount the previous value. This helps keeping the delay + * slightly below the target of 5 ms, most of the time. + */ + int64_t diff = (int64_t)next_frame->min_delay - TARGET_DELAY; + ctx->base_delay = MAX2(0, ctx->base_delay + diff / 32); /* corresponds to ~3 % */ + + /* As the base-delay gets adjusted rather slowly, we additionally use the half of the + * diff as adaptation delay to account for sudden changes. A quarter of the adaptation + * is then subtracted for the next frame, so that we can avoid overcompensation. + */ + adaptation = diff / 2 - ctx->adaptation / 4; + } + + /* We only need space for one frame. */ + force_wait = false; + + ringbuffer_lock(ctx->frames); + next_frame->state = FRAME_INVALID; + ringbuffer_free(ctx->frames, next_frame); + next_frame = ringbuffer_first(ctx->frames); + ringbuffer_unlock(ctx->frames); + } + imposed_delay = ctx->base_delay + adaptation; + ctx->adaptation = adaptation; + + if (imposed_delay > 100000000) { + /* This corresponds to <10 FPS. Something might have gone wrong. */ + calibrate_timestamps(ctx); + ctx->base_delay = ctx->adaptation = imposed_delay = 0; + } + + return MAX2(0, imposed_delay); +} + +static void +reset_frame(frame *frame) +{ + assert(frame->state == FRAME_INVALID); + frame->frame_idx = 0; + frame->frame_start_time = 0; + frame->min_delay = UINT64_MAX; + frame->state = FRAME_INPUT; +} + +VKAPI_ATTR void VKAPI_CALL +anti_lag_AntiLagUpdateAMD(VkDevice device, const VkAntiLagDataAMD *pData) +{ + if (pData == NULL) + return; + + device_context *ctx = get_device_context(device); + if (pData->mode == VK_ANTI_LAG_MODE_OFF_AMD) { + /* Application request to disable Anti-Lag. */ + simple_mtx_lock(&ctx->mtx); + anti_lag_disable(ctx); + simple_mtx_unlock(&ctx->mtx); + return; + } + + uint64_t frame_idx = 0; + int64_t now = os_time_get_nano(); + int64_t imposed_delay = 0; + int64_t last_frame_begin = 0; + + if (pData->pPresentationInfo) { + /* The same frameIndex value should be used with VK_ANTI_LAG_STAGE_INPUT_AMD before + * the frame begins and with VK_ANTI_LAG_STAGE_PRESENT_AMD when the frame ends. + */ + frame_idx = pData->pPresentationInfo->frameIndex; + + /* This marks the end of the current frame. */ + if (pData->pPresentationInfo->stage == VK_ANTI_LAG_STAGE_PRESENT_AMD) { + /* If there is already a new frame pending, any submission that happens afterwards + * gets associated with the new frame. + */ + ringbuffer_lock(ctx->frames); + /* Check that the currently active frame is indeed the frame we are ending now. */ + while (ctx->active_frame && ctx->active_frame->frame_idx <= frame_idx) { + begin_next_frame(ctx); + } + ringbuffer_unlock(ctx->frames); + return; + } + } + + /* Lock this function, in order to avoid race conditions on frame allocation. */ + simple_mtx_lock(&ctx->mtx); + + /* VK_ANTI_LAG_STAGE_INPUT_AMD: This marks the begin of a new frame. + * Evaluate previous frames in order to determine the wait time. + */ + imposed_delay = get_wait_time(ctx); + int64_t next_deadline = now + imposed_delay; + + /* Ensure maxFPS adherence. */ + if (pData->maxFPS) { + int64_t frametime_period = 1000000000u / pData->maxFPS; + last_frame_begin = ringbuffer_last(ctx->frames)->frame_start_time; + next_deadline = MAX2(next_deadline, last_frame_begin + frametime_period); + } + + /* Recalibrate every now and then. */ + if (next_deadline > ctx->calibration.recalibrate_when) + calibrate_timestamps(ctx); + + /* Sleep until deadline is met. */ + os_time_nanosleep_until(next_deadline); + + /* Initialize new frame. */ + ringbuffer_lock(ctx->frames); + frame *new_frame = ringbuffer_alloc(ctx->frames); + reset_frame(new_frame); + new_frame->frame_start_time = next_deadline; + new_frame->imposed_delay = imposed_delay; + new_frame->frame_idx = frame_idx; + + /* Immediately set the frame active if there is no other frame already active. */ + if (!ctx->active_frame) + begin_next_frame(ctx); + + ringbuffer_unlock(ctx->frames); + simple_mtx_unlock(&ctx->mtx); +} + +static queue_context * +get_queue_context(device_context *ctx, VkQueue queue) +{ + for (unsigned i = 0; i < ctx->num_queues; i++) { + if (ctx->queues[i].queue == queue) + return &ctx->queues[i]; + } + + return NULL; +} + +static struct query * +allocate_query(device_context *ctx, queue_context *queue_ctx) +{ + if (!ctx->active_frame) + return NULL; + + /* Allow for a single frame to use at most half of the query pool. */ + uint32_t frame_idx = ringbuffer_index(ctx->frames, ctx->active_frame); + if (queue_ctx->submissions_per_frame[frame_idx] > MAX_QUERIES / 2) + return NULL; + + /* Check that the next query index has been reset properly: + * + * We use some double-buffering here in order to reduce the number of + * VkResetQueryPool commands. + * Return false if the next query-index allocation crosses into the half + * which still contains active queries, + */ + if (queue_ctx->queries.size > MAX_QUERIES / 2) { + struct query *last_query = ringbuffer_last(queue_ctx->queries); + uint32_t next_idx = ringbuffer_index(queue_ctx->queries, last_query) + 1; + if (next_idx == MAX_QUERIES || next_idx == MAX_QUERIES / 2) + return NULL; + } + + return ringbuffer_alloc(queue_ctx->queries); +} + +static bool +get_commandbuffer(device_context *ctx, queue_context *queue_ctx, VkCommandBuffer *cmdbuffer) +{ + uint64_t now = os_time_get_nano(); + + /* Begin critical section. */ + ringbuffer_lock(ctx->frames); + ringbuffer_lock(queue_ctx->queries); + struct query *query = allocate_query(ctx, queue_ctx); + if (query == NULL) { + ringbuffer_unlock(queue_ctx->queries); + ringbuffer_unlock(ctx->frames); + return false; + } + + query->submit_cpu_ts = now; + + /* Assign commandBuffer for timestamp. */ + *cmdbuffer = query->cmdbuffer; + + /* Increment timeline semaphore count. */ + queue_ctx->semaphore_value++; + + /* Add new submission entry for the current frame */ + assert(ctx->active_frame->state == FRAME_SUBMIT); + uint32_t frame_idx = ringbuffer_index(ctx->frames, ctx->active_frame); + queue_ctx->submissions_per_frame[frame_idx]++; + + ringbuffer_unlock(queue_ctx->queries); + ringbuffer_unlock(ctx->frames); + return true; +} + +static VkResult +queue_submit2(device_context *ctx, VkQueue queue, uint32_t submitCount, + const VkSubmitInfo2 *pSubmits, VkFence fence, PFN_vkQueueSubmit2 queueSubmit2) +{ + queue_context *queue_ctx = get_queue_context(ctx, queue); + if (!ctx->active_frame || !queue_ctx) + return queueSubmit2(queue, submitCount, pSubmits, fence); + + int first = -1; + VkCommandBuffer timestamp_cmdbuffer; + /* Check if any submission contains commandbuffers. */ + for (unsigned i = 0; i < submitCount; i++) { + if (pSubmits[i].commandBufferInfoCount) { + first = i; + break; + } + } + + /* Get timestamp commandbuffer. */ + if (first == -1 || !get_commandbuffer(ctx, queue_ctx, ×tamp_cmdbuffer)) + return queueSubmit2(queue, submitCount, pSubmits, fence); + + VkSubmitInfo2 *submits; + VkCommandBufferSubmitInfo *cmdbuffers; + VkSemaphoreSubmitInfo *semaphores; + VK_MULTIALLOC(ma); + vk_multialloc_add(&ma, &submits, VkSubmitInfo2, submitCount); + vk_multialloc_add(&ma, &cmdbuffers, VkCommandBufferSubmitInfo, + pSubmits[first].commandBufferInfoCount + 1); + vk_multialloc_add(&ma, &semaphores, VkSemaphoreSubmitInfo, + pSubmits[first].signalSemaphoreInfoCount + 1); + void *buf = vk_multialloc_zalloc(&ma, &ctx->alloc, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!buf) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + memcpy(submits, pSubmits, sizeof(VkSubmitInfo2) * submitCount); + VkSubmitInfo2 *submit_info = &submits[first]; + + /* Add commandbuffer to submission. */ + cmdbuffers[0] = (VkCommandBufferSubmitInfo){ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, + .commandBuffer = timestamp_cmdbuffer, + }; + memcpy(&cmdbuffers[1], submit_info->pCommandBufferInfos, + sizeof(VkCommandBufferSubmitInfo) * submit_info->commandBufferInfoCount); + submit_info->pCommandBufferInfos = cmdbuffers; + submit_info->commandBufferInfoCount++; + + /* Add timeline semaphore to submission. */ + memcpy(semaphores, submit_info->pSignalSemaphoreInfos, + sizeof(VkSemaphoreSubmitInfo) * submit_info->signalSemaphoreInfoCount); + semaphores[submit_info->signalSemaphoreInfoCount] = (VkSemaphoreSubmitInfo){ + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, + .semaphore = queue_ctx->semaphore, + .value = queue_ctx->semaphore_value, + .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + }; + submit_info->pSignalSemaphoreInfos = semaphores; + submit_info->signalSemaphoreInfoCount++; + + /* Submit with added timestamp query commandbuffer. */ + VkResult res = queueSubmit2(queue, submitCount, submits, fence); + vk_free(&ctx->alloc, submits); + return res; +} + +VKAPI_ATTR VkResult VKAPI_CALL +anti_lag_QueueSubmit2KHR(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, + VkFence fence) +{ + device_context *ctx = get_device_context(queue); + return queue_submit2(ctx, queue, submitCount, pSubmits, fence, ctx->vtable.QueueSubmit2KHR); +} + +VKAPI_ATTR VkResult VKAPI_CALL +anti_lag_QueueSubmit2(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, + VkFence fence) +{ + device_context *ctx = get_device_context(queue); + return queue_submit2(ctx, queue, submitCount, pSubmits, fence, ctx->vtable.QueueSubmit2); +} + +VKAPI_ATTR VkResult VKAPI_CALL +anti_lag_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, + VkFence fence) +{ + device_context *ctx = get_device_context(queue); + queue_context *queue_ctx = get_queue_context(ctx, queue); + if (!ctx->active_frame || !queue_ctx) + return ctx->vtable.QueueSubmit(queue, submitCount, pSubmits, fence); + + int first = -1; + VkCommandBuffer timestamp_cmdbuffer; + /* Check if any submission contains commandbuffers. */ + for (unsigned i = 0; i < submitCount; i++) { + if (pSubmits[i].commandBufferCount) { + first = i; + break; + } + } + + /* Get timestamp commandbuffer. */ + if (first == -1 || !get_commandbuffer(ctx, queue_ctx, ×tamp_cmdbuffer)) + return ctx->vtable.QueueSubmit(queue, submitCount, pSubmits, fence); + + VkSubmitInfo *submits; + VkCommandBuffer *cmdbuffers; + VkSemaphore *semaphores; + VkTimelineSemaphoreSubmitInfo *semaphore_info; + uint64_t *semaphore_values; + VK_MULTIALLOC(ma); + vk_multialloc_add(&ma, &submits, VkSubmitInfo, submitCount); + vk_multialloc_add(&ma, &cmdbuffers, VkCommandBuffer, pSubmits[first].commandBufferCount + 1); + vk_multialloc_add(&ma, &semaphores, VkSemaphore, pSubmits[first].signalSemaphoreCount + 1); + vk_multialloc_add(&ma, &semaphore_info, VkTimelineSemaphoreSubmitInfo, 1); + vk_multialloc_add(&ma, &semaphore_values, uint64_t, pSubmits[first].signalSemaphoreCount + 1); + void *buf = vk_multialloc_zalloc(&ma, &ctx->alloc, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!buf) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + memcpy(submits, pSubmits, sizeof(VkSubmitInfo) * submitCount); + VkSubmitInfo *submit_info = &submits[first]; + + /* Add commandbuffer to submission. */ + cmdbuffers[0] = timestamp_cmdbuffer; + memcpy(&cmdbuffers[1], submit_info->pCommandBuffers, + sizeof(VkCommandBuffer) * submit_info->commandBufferCount); + submit_info->pCommandBuffers = cmdbuffers; + submit_info->commandBufferCount++; + + /* Add timeline semaphore to submission. */ + const VkTimelineSemaphoreSubmitInfo *tlssi = + vk_find_struct_const(pSubmits[first].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO); + semaphores[0] = queue_ctx->semaphore; + memcpy(&semaphores[1], submit_info->pSignalSemaphores, + sizeof(VkSemaphore) * submit_info->signalSemaphoreCount); + submit_info->pSignalSemaphores = semaphores; + submit_info->signalSemaphoreCount++; + semaphore_values[0] = queue_ctx->semaphore_value; + if (tlssi) { + *semaphore_info = *tlssi; /* save original values */ + memcpy(&semaphore_values[1], tlssi->pSignalSemaphoreValues, + sizeof(uint64_t) * tlssi->signalSemaphoreValueCount); + ((VkTimelineSemaphoreSubmitInfo *)tlssi)->pSignalSemaphoreValues = semaphore_values; + ((VkTimelineSemaphoreSubmitInfo *)tlssi)->signalSemaphoreValueCount = + submit_info->signalSemaphoreCount; + } else { + *semaphore_info = (VkTimelineSemaphoreSubmitInfo){ + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, + .pNext = submit_info->pNext, + .signalSemaphoreValueCount = submit_info->signalSemaphoreCount, + .pSignalSemaphoreValues = semaphore_values, + }; + submit_info->pNext = semaphore_info; + } + + /* Submit with added timestamp query commandbuffer. */ + VkResult res = ctx->vtable.QueueSubmit(queue, submitCount, submits, fence); + if (tlssi) + *(VkTimelineSemaphoreSubmitInfo *)tlssi = *semaphore_info; /* restore */ + vk_free(&ctx->alloc, buf); + return res; +} diff --git a/src/vulkan/anti-lag-layer/anti_lag_layer.h b/src/vulkan/anti-lag-layer/anti_lag_layer.h new file mode 100644 index 00000000000..31abb0f9aee --- /dev/null +++ b/src/vulkan/anti-lag-layer/anti_lag_layer.h @@ -0,0 +1,111 @@ +/* + * Copyright © 2025 Valve Corporation + * + * SPDX-License-Identifier: MIT + */ + +#ifndef ANTI_LAG_LAYER_H +#define ANTI_LAG_LAYER_H + +#include "util/simple_mtx.h" +#include "vulkan/vk_layer.h" +#include "vulkan/vulkan_core.h" +#include "ringbuffer.h" + +#define MAX_FRAMES 8 +#define MAX_QUERIES 256 + +enum frame_state { + FRAME_INVALID = 0, + FRAME_INPUT, /* Frame is in input stage. */ + FRAME_SUBMIT, /* All current queueSubmit calls are associated with this frame. */ + FRAME_PRESENT, /* Frame is in present stage and latencies can be evaluated. */ +}; + +typedef struct frame { + uint64_t frame_idx; + uint64_t frame_start_time; + uint64_t min_delay; + uint64_t imposed_delay; + enum frame_state state; +} frame; + +struct query { + uint64_t begin_gpu_ts; + uint64_t submit_cpu_ts; + VkCommandBuffer cmdbuffer; +}; + +typedef struct queue_context { + VkQueue queue; + uint32_t queue_family_idx; + VkCommandPool cmdPool; + VkQueryPool queryPool; + VkSemaphore semaphore; + uint64_t semaphore_value; + uint8_t submissions_per_frame[MAX_FRAMES]; + RINGBUFFER_DECLARE(queries, struct query, MAX_QUERIES); +} queue_context; + +typedef struct device_context { + + struct DeviceDispatchTable { +#define DECLARE_HOOK(fn) PFN_vk##fn fn + DECLARE_HOOK(GetDeviceProcAddr); + DECLARE_HOOK(SetDeviceLoaderData); + DECLARE_HOOK(DestroyDevice); + DECLARE_HOOK(QueueSubmit); + DECLARE_HOOK(QueueSubmit2); + DECLARE_HOOK(QueueSubmit2KHR); + DECLARE_HOOK(GetDeviceQueue); + DECLARE_HOOK(CreateCommandPool); + DECLARE_HOOK(DestroyCommandPool); + DECLARE_HOOK(CreateQueryPool); + DECLARE_HOOK(ResetQueryPool); + DECLARE_HOOK(DestroyQueryPool); + DECLARE_HOOK(GetQueryPoolResults); + DECLARE_HOOK(AllocateCommandBuffers); + DECLARE_HOOK(FreeCommandBuffers); + DECLARE_HOOK(BeginCommandBuffer); + DECLARE_HOOK(EndCommandBuffer); + DECLARE_HOOK(GetCalibratedTimestampsKHR); + DECLARE_HOOK(CmdWriteTimestamp); + DECLARE_HOOK(CreateSemaphore); + DECLARE_HOOK(DestroySemaphore); + DECLARE_HOOK(GetSemaphoreCounterValue); + DECLARE_HOOK(WaitSemaphores); +#undef DECLARE_HOOK + } vtable; + + VkDevice device; + VkAllocationCallbacks alloc; + simple_mtx_t mtx; + + struct { + int64_t delta; + uint64_t recalibrate_when; + float timestamp_period; + } calibration; + + RINGBUFFER_DECLARE(frames, frame, MAX_FRAMES); + frame *active_frame; + int64_t base_delay; + int64_t adaptation; + + unsigned num_queues; + queue_context queues[]; +} device_context; + +device_context *get_device_context(const void *object); + +void anti_lag_AntiLagUpdateAMD(VkDevice device, const VkAntiLagDataAMD *pData); +VkResult anti_lag_QueueSubmit2KHR(VkQueue queue, uint32_t submitCount, + const VkSubmitInfo2 *pSubmits, VkFence fence); +VkResult anti_lag_QueueSubmit2(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, + VkFence fence); +VkResult anti_lag_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, + VkFence fence); + +VkResult anti_lag_NegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface *pVersionStruct); + +#endif /* ANTI_LAG_LAYER_H */ diff --git a/src/vulkan/anti-lag-layer/anti_lag_layer_interface.c b/src/vulkan/anti-lag-layer/anti_lag_layer_interface.c new file mode 100644 index 00000000000..d2ca4a7dd44 --- /dev/null +++ b/src/vulkan/anti-lag-layer/anti_lag_layer_interface.c @@ -0,0 +1,899 @@ +/* + * Copyright © 2025 Valve Corporation + * + * SPDX-License-Identifier: MIT + */ + +#include "util/simple_mtx.h" +#include "vulkan/vk_layer.h" +#include "vulkan/vulkan_core.h" +#include "anti_lag_layer.h" +#include "vk_alloc.h" +#include "vk_util.h" + +static uintptr_t +object_to_key(const void *object) +{ + return (uintptr_t)*(uintptr_t *)object; +} + +typedef struct instance_data { + struct InstanceDispatchTable { +#define DECLARE_HOOK(fn) PFN_vk##fn fn + DECLARE_HOOK(GetInstanceProcAddr); + DECLARE_HOOK(CreateInstance); + DECLARE_HOOK(DestroyInstance); + DECLARE_HOOK(CreateDevice); + DECLARE_HOOK(EnumerateDeviceExtensionProperties); + DECLARE_HOOK(GetPhysicalDeviceFeatures2KHR); + DECLARE_HOOK(GetPhysicalDeviceFeatures2); + DECLARE_HOOK(GetPhysicalDeviceProperties); + DECLARE_HOOK(GetPhysicalDeviceCalibrateableTimeDomainsEXT); + DECLARE_HOOK(GetPhysicalDeviceCalibrateableTimeDomainsKHR); + DECLARE_HOOK(GetPhysicalDeviceQueueFamilyProperties); +#undef DECLARE_HOOK + } vtable; + + VkInstance instance; + uint32_t apiVersion; + VkAllocationCallbacks alloc; + struct instance_data *next; +} instance_data; + +static void +init_instance_vtable(instance_data *ctx, PFN_vkGetInstanceProcAddr gpa) +{ + ctx->vtable.GetInstanceProcAddr = gpa; +#define INIT_HOOK(fn) ctx->vtable.fn = (PFN_vk##fn)gpa(ctx->instance, "vk" #fn) + INIT_HOOK(CreateInstance); + INIT_HOOK(DestroyInstance); + INIT_HOOK(CreateDevice); + INIT_HOOK(EnumerateDeviceExtensionProperties); + INIT_HOOK(GetPhysicalDeviceFeatures2KHR); + INIT_HOOK(GetPhysicalDeviceFeatures2); + INIT_HOOK(GetPhysicalDeviceProperties); + INIT_HOOK(GetPhysicalDeviceCalibrateableTimeDomainsEXT); + INIT_HOOK(GetPhysicalDeviceCalibrateableTimeDomainsKHR); + INIT_HOOK(GetPhysicalDeviceQueueFamilyProperties); +#undef INIT_HOOK +} + +static simple_mtx_t instance_mtx = SIMPLE_MTX_INITIALIZER; +static instance_data *instance_list = NULL; + +static void +add_instance(instance_data *instance) +{ + simple_mtx_lock(&instance_mtx); + instance_data **ptr = &instance_list; + while (*ptr != NULL) + ptr = &(*ptr)->next; + *ptr = instance; + simple_mtx_unlock(&instance_mtx); +} + +static instance_data * +remove_instance(const void *object) +{ + uintptr_t key = object_to_key(object); + simple_mtx_lock(&instance_mtx); + instance_data **ptr = &instance_list; + while (*ptr && key != object_to_key((*ptr)->instance)) + ptr = &(*ptr)->next; + + instance_data *ctx = *ptr; + *ptr = ctx ? ctx->next : NULL; + simple_mtx_unlock(&instance_mtx); + return ctx; +} + +static instance_data * +get_instance_data(const void *object) +{ + uintptr_t key = object_to_key(object); + simple_mtx_lock(&instance_mtx); + instance_data *ctx = instance_list; + while (ctx && key != object_to_key(ctx->instance)) + ctx = ctx->next; + simple_mtx_unlock(&instance_mtx); + return ctx; +} + +static VKAPI_ATTR VkResult VKAPI_CALL +anti_lag_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkInstance *pInstance) +{ + VkLayerInstanceCreateInfo *chain_info = (VkLayerInstanceCreateInfo *)(pCreateInfo->pNext); + while (chain_info && !(chain_info->sType == VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO && + chain_info->function == VK_LAYER_LINK_INFO)) { + chain_info = (VkLayerInstanceCreateInfo *)(chain_info->pNext); + } + + assert(chain_info && chain_info->u.pLayerInfo); + PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr = + chain_info->u.pLayerInfo->pfnNextGetInstanceProcAddr; + PFN_vkCreateInstance fpCreateInstance = + (PFN_vkCreateInstance)fpGetInstanceProcAddr(NULL, "vkCreateInstance"); + if (fpCreateInstance == NULL) + return VK_ERROR_INITIALIZATION_FAILED; + + /* Advance the link info for the next element on the chain. */ + chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext; + + /* Create Instance. */ + VkResult result = fpCreateInstance(pCreateInfo, pAllocator, pInstance); + if (result != VK_SUCCESS) + return result; + + /* Create Instance context. */ + const VkAllocationCallbacks *alloc = pAllocator ? pAllocator : vk_default_allocator(); + void *buf = vk_alloc(alloc, sizeof(instance_data), alignof(instance_data), + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!buf) { + PFN_vkDestroyInstance fpDestroyInstance = + (PFN_vkDestroyInstance)fpGetInstanceProcAddr(*pInstance, "vkDestroyInstance"); + fpDestroyInstance(*pInstance, alloc); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + instance_data *ctx = (instance_data *)buf; + ctx->apiVersion = pCreateInfo->pApplicationInfo && pCreateInfo->pApplicationInfo->apiVersion + ? pCreateInfo->pApplicationInfo->apiVersion + : VK_API_VERSION_1_0; + ctx->instance = *pInstance; + ctx->alloc = *alloc; + ctx->next = NULL; + init_instance_vtable(ctx, fpGetInstanceProcAddr); + add_instance(ctx); + + return VK_SUCCESS; +} + +static VKAPI_ATTR void VKAPI_CALL +anti_lag_DestroyInstance(VkInstance instance, const VkAllocationCallbacks *pAllocator) +{ + instance_data *ctx = remove_instance(instance); + if (ctx) { + ctx->vtable.DestroyInstance(instance, pAllocator); + vk_free(&ctx->alloc, ctx); + } +} + +typedef struct device_data { + VkDevice device; + PFN_vkGetDeviceProcAddr GetDeviceProcAddr; + device_context *ctx; /* NULL if anti-lag ext is not enabled. */ + struct device_data *next; +} device_data; + +static void +init_device_vtable(device_context *ctx, PFN_vkGetDeviceProcAddr gpa, PFN_vkSetDeviceLoaderData sld, + bool calibrated_timestamps_khr, bool host_query_reset_ext, + bool timeline_semaphore_khr) +{ + ctx->vtable.GetDeviceProcAddr = gpa; + ctx->vtable.SetDeviceLoaderData = sld; +#define INIT_HOOK(fn) ctx->vtable.fn = (PFN_vk##fn)gpa(ctx->device, "vk" #fn) +#define INIT_HOOK_ALIAS(fn, alias, cond) \ + ctx->vtable.fn = (PFN_vk##fn)gpa(ctx->device, cond ? "vk" #alias : "vk" #fn) + INIT_HOOK(DestroyDevice); + INIT_HOOK(QueueSubmit); + INIT_HOOK(QueueSubmit2); + INIT_HOOK(QueueSubmit2KHR); + INIT_HOOK(GetDeviceQueue); + INIT_HOOK(CreateCommandPool); + INIT_HOOK(DestroyCommandPool); + INIT_HOOK(CreateQueryPool); + INIT_HOOK_ALIAS(ResetQueryPool, ResetQueryPoolEXT, host_query_reset_ext); + INIT_HOOK(DestroyQueryPool); + INIT_HOOK(GetQueryPoolResults); + INIT_HOOK(AllocateCommandBuffers); + INIT_HOOK(FreeCommandBuffers); + INIT_HOOK(BeginCommandBuffer); + INIT_HOOK(EndCommandBuffer); + INIT_HOOK_ALIAS(GetCalibratedTimestampsKHR, GetCalibratedTimestampsEXT, !calibrated_timestamps_khr); + INIT_HOOK(CmdWriteTimestamp); + INIT_HOOK(CreateSemaphore); + INIT_HOOK(DestroySemaphore); + INIT_HOOK_ALIAS(GetSemaphoreCounterValue, GetSemaphoreCounterValueKHR, timeline_semaphore_khr); + INIT_HOOK_ALIAS(WaitSemaphores, WaitSemaphoresKHR, timeline_semaphore_khr); +#undef INIT_HOOK +#undef INIT_HOOK_ALIAS +} + +static simple_mtx_t device_mtx = SIMPLE_MTX_INITIALIZER; +static device_data *device_list = NULL; + +static void +add_device(device_data *device) +{ + simple_mtx_lock(&device_mtx); + device_data **ptr = &device_list; + while (*ptr != NULL) + ptr = &(*ptr)->next; + *ptr = device; + simple_mtx_unlock(&device_mtx); +} + +static device_data * +remove_device(const void *object) +{ + uintptr_t key = object_to_key(object); + simple_mtx_lock(&device_mtx); + device_data **ptr = &device_list; + while (*ptr && key != object_to_key((*ptr)->device)) + ptr = &(*ptr)->next; + + device_data *ctx = *ptr; + *ptr = ctx ? ctx->next : NULL; + simple_mtx_unlock(&device_mtx); + return ctx; +} + +static device_data * +get_device_data(const void *object) +{ + uintptr_t key = object_to_key(object); + simple_mtx_lock(&device_mtx); + device_data *ctx = device_list; + while (ctx && key != object_to_key(ctx->device)) + ctx = ctx->next; + simple_mtx_unlock(&device_mtx); + return ctx; +} + +device_context * +get_device_context(const void *object) +{ + device_data *data = get_device_data(object); + assert(data && data->ctx); + return data->ctx; +} + +static VkLayerDeviceCreateInfo * +get_device_chain_info(const VkDeviceCreateInfo *pCreateInfo, VkLayerFunction func) +{ + vk_foreach_struct_const (item, pCreateInfo->pNext) { + if (item->sType == VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO && + ((VkLayerDeviceCreateInfo *)item)->function == func) + return (VkLayerDeviceCreateInfo *)item; + } + return NULL; +} + +static bool +should_enable_layer(instance_data *ctx, VkPhysicalDevice physicalDevice, + VkPhysicalDeviceAntiLagFeaturesAMD ext_feature) +{ + /* The extension is not requested by the application. */ + if (!ext_feature.antiLag) + return false; + + /* Ensure that the underlying implementation does not expose VK_AMD_anti_lag itself. */ + ext_feature.antiLag = false; + VkPhysicalDeviceFeatures2 features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, + .pNext = &ext_feature, + }; + + if (ctx->vtable.GetPhysicalDeviceFeatures2KHR) { + ctx->vtable.GetPhysicalDeviceFeatures2KHR(physicalDevice, &features); + return !ext_feature.antiLag; + } + + if (ctx->vtable.GetPhysicalDeviceFeatures2) { + ctx->vtable.GetPhysicalDeviceFeatures2(physicalDevice, &features); + return !ext_feature.antiLag; + } + + return false; +} + +static bool +check_calibrated_timestamps(instance_data *data, VkPhysicalDevice physicalDevice, bool *has_khr) +{ + VkResult res; + uint32_t count = 0; + res = data->vtable.EnumerateDeviceExtensionProperties(physicalDevice, NULL, &count, NULL); + VkExtensionProperties *extensions = + vk_alloc(&data->alloc, count * sizeof(VkExtensionProperties), alignof(VkExtensionProperties), + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!extensions) + return false; + + res |= data->vtable.EnumerateDeviceExtensionProperties(physicalDevice, NULL, &count, extensions); + + *has_khr = false; + bool has_ext = false; + if (res == VK_SUCCESS) { + for (unsigned i = 0; i < count; i++) { + if (strcmp(extensions[i].extensionName, VK_KHR_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) == 0) + *has_khr = true; + if (strcmp(extensions[i].extensionName, VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) == 0) + has_ext = true; + } + } + + vk_free(&data->alloc, extensions); + return *has_khr || has_ext; +} + +/* Initialize per-queue context: + * + * This includes creating one CommandPool and one QueryPool per Queue as well as + * recording one CommandBuffer per timestamp query. + */ +static VkResult +init_queue_context(device_context *ctx, queue_context *queue_ctx) +{ +#define CHECK_RESULT(res, label) \ + if (res != VK_SUCCESS) { \ + goto label; \ + } + + VkResult result; + + /* Create command pool */ + struct VkCommandPoolCreateInfo pool_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .pNext = NULL, + .flags = 0, + .queueFamilyIndex = queue_ctx->queue_family_idx, + }; + result = + ctx->vtable.CreateCommandPool(ctx->device, &pool_info, &ctx->alloc, &queue_ctx->cmdPool); + CHECK_RESULT(result, fail_cmdpool) + + /* Create query pool */ + VkQueryPoolCreateInfo query_pool_info = { + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .queryType = VK_QUERY_TYPE_TIMESTAMP, + .queryCount = MAX_QUERIES, + }; + result = ctx->vtable.CreateQueryPool(ctx->device, &query_pool_info, &ctx->alloc, + &queue_ctx->queryPool); + CHECK_RESULT(result, fail_querypool) + ctx->vtable.ResetQueryPool(ctx->device, queue_ctx->queryPool, 0, MAX_QUERIES); + ringbuffer_init(queue_ctx->queries); + + /* Create timeline semaphore */ + VkSemaphoreTypeCreateInfo timelineCreateInfo = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, + .pNext = NULL, + .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, + .initialValue = 0, + }; + VkSemaphoreCreateInfo createInfo = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = &timelineCreateInfo, + .flags = 0, + }; + result = + ctx->vtable.CreateSemaphore(ctx->device, &createInfo, &ctx->alloc, &queue_ctx->semaphore); + CHECK_RESULT(result, fail_semaphore); + + for (unsigned j = 0; j < MAX_QUERIES; j++) { + struct query *query = &queue_ctx->queries.data[j]; + + /* Allocate commandBuffer for timestamp. */ + VkCommandBufferAllocateInfo buffer_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .commandPool = queue_ctx->cmdPool, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = 1, + }; + result = ctx->vtable.AllocateCommandBuffers(ctx->device, &buffer_info, &query->cmdbuffer); + CHECK_RESULT(result, fail) + result = ctx->vtable.SetDeviceLoaderData(ctx->device, query->cmdbuffer); + CHECK_RESULT(result, fail) + + /* Record commandbuffer. */ + VkCommandBufferBeginInfo beginInfo = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + }; + + result = ctx->vtable.BeginCommandBuffer(query->cmdbuffer, &beginInfo); + CHECK_RESULT(result, fail) + ctx->vtable.CmdWriteTimestamp(query->cmdbuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + queue_ctx->queryPool, j); + result = ctx->vtable.EndCommandBuffer(query->cmdbuffer); + CHECK_RESULT(result, fail) + } + +#undef CHECK_RESULT + return result; + +fail: + ctx->vtable.DestroySemaphore(ctx->device, queue_ctx->semaphore, &ctx->alloc); +fail_semaphore: + ctx->vtable.DestroyQueryPool(ctx->device, queue_ctx->queryPool, &ctx->alloc); +fail_querypool: + ctx->vtable.DestroyCommandPool(ctx->device, queue_ctx->cmdPool, &ctx->alloc); +fail_cmdpool: + for (queue_context *qctx = ctx->queues; qctx != queue_ctx; qctx++) { + ctx->vtable.DestroyQueryPool(ctx->device, qctx->queryPool, &ctx->alloc); + ctx->vtable.DestroyCommandPool(ctx->device, qctx->cmdPool, &ctx->alloc); + } + + return result; +} + +static VKAPI_ATTR VkResult VKAPI_CALL +anti_lag_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkDevice *pDevice) +{ + instance_data *instance_ctx = get_instance_data(physicalDevice); + VkLayerDeviceCreateInfo *chain_info = get_device_chain_info(pCreateInfo, VK_LAYER_LINK_INFO); + PFN_vkGetDeviceProcAddr fpGetDeviceProcAddr = chain_info->u.pLayerInfo->pfnNextGetDeviceProcAddr; + PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr = + chain_info->u.pLayerInfo->pfnNextGetInstanceProcAddr; + PFN_vkCreateDevice fpCreateDevice = + (PFN_vkCreateDevice)fpGetInstanceProcAddr(instance_ctx->instance, "vkCreateDevice"); + if (fpCreateDevice == NULL) + return VK_ERROR_INITIALIZATION_FAILED; + + /* Advance the link info for the next element on the chain. */ + chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext; + + const VkAllocationCallbacks *alloc = pAllocator ? pAllocator : &instance_ctx->alloc; + device_data *data; + VkResult result; + + /* Only allocate a context and add to dispatch if the extension is enabled. */ + const VkPhysicalDeviceAntiLagFeaturesAMD *ext_features = + vk_find_struct_const(pCreateInfo->pNext, PHYSICAL_DEVICE_ANTI_LAG_FEATURES_AMD); + bool enable = ext_features && should_enable_layer(instance_ctx, physicalDevice, *ext_features); + if (enable) { + /* Count queues with sufficient timestamp valid bits. */ + // TODO: make it work with less than 64 valid bits + unsigned num_queue_families = 0; + unsigned num_queues = 0; + for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) + num_queue_families = + MAX2(num_queue_families, pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex + 1); + VkQueueFamilyProperties *queue_family_props = + vk_alloc(alloc, num_queue_families * sizeof(VkQueueFamilyProperties), + alignof(VkQueueFamilyProperties), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!queue_family_props) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + instance_ctx->vtable.GetPhysicalDeviceQueueFamilyProperties( + physicalDevice, &num_queue_families, queue_family_props); + for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { + uint32_t queue_family_idx = pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex; + if (queue_family_props[queue_family_idx].timestampValidBits == 64 && + (queue_family_props[queue_family_idx].queueFlags & + (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT))) { + num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount; + } + } + + /* Allocate the context. */ + device_context *ctx; + queue_context *queues; + VK_MULTIALLOC(ma); + vk_multialloc_add(&ma, &data, device_data, 1); + vk_multialloc_add(&ma, &ctx, struct device_context, 1); + vk_multialloc_add(&ma, &queues, queue_context, num_queues); + void *buf = vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!buf) { + vk_free(alloc, queue_family_props); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + VkPhysicalDeviceProperties properties; + instance_ctx->vtable.GetPhysicalDeviceProperties(physicalDevice, &properties); + + /* Ensure that calibrated timestamps and host query reset extensions are enabled. */ + bool has_calibrated_timestamps = false; + bool has_calibrated_timestamps_khr = false; + bool has_vk12 = instance_ctx->apiVersion >= VK_API_VERSION_1_2 && + properties.apiVersion >= VK_API_VERSION_1_2; + bool has_host_query_reset = has_vk12; + bool has_host_query_reset_ext = false; + bool has_timeline_semaphore = has_vk12; + bool has_timeline_semaphore_khr = false; + for (unsigned i = 0; i < pCreateInfo->enabledExtensionCount; i++) { + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + VK_KHR_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) == 0) + has_calibrated_timestamps = has_calibrated_timestamps_khr = true; + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) == 0) + has_calibrated_timestamps = true; + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME) == 0) + has_host_query_reset = has_host_query_reset_ext = true; + if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], + VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME) == 0) + has_timeline_semaphore = has_timeline_semaphore_khr = true; + } + + /* Add missing extensions. */ + VkDeviceCreateInfo create_info = *pCreateInfo; + const char **ext_names = NULL; + uint32_t num_extra_extensions = + !has_calibrated_timestamps + !has_host_query_reset + !has_timeline_semaphore; + if (num_extra_extensions) { + ext_names = vk_alloc( + alloc, (pCreateInfo->enabledExtensionCount + num_extra_extensions) * sizeof(char *), + alignof(char *), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!ext_names) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } + + memcpy(ext_names, pCreateInfo->ppEnabledExtensionNames, + sizeof(char *) * pCreateInfo->enabledExtensionCount); + + if (!has_timeline_semaphore) { + has_timeline_semaphore_khr = true; + ext_names[create_info.enabledExtensionCount++] = + VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME; + } + if (!has_host_query_reset) { + has_host_query_reset_ext = true; + ext_names[create_info.enabledExtensionCount++] = VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME; + } + if (!has_calibrated_timestamps) { + check_calibrated_timestamps(instance_ctx, physicalDevice, + &has_calibrated_timestamps_khr); + ext_names[create_info.enabledExtensionCount++] = + has_calibrated_timestamps_khr ? VK_KHR_CALIBRATED_TIMESTAMPS_EXTENSION_NAME + : VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME; + } + create_info.ppEnabledExtensionNames = ext_names; + } + + /* Ensure that hostQueryReset feature is enabled. */ + const VkPhysicalDeviceVulkan12Features *vk12 = + vk_find_struct_const(pCreateInfo->pNext, PHYSICAL_DEVICE_VULKAN_1_2_FEATURES); + const VkPhysicalDeviceHostQueryResetFeatures *query_reset = + vk_find_struct_const(pCreateInfo->pNext, PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES); + const VkPhysicalDeviceTimelineSemaphoreFeatures *timeline_semaphore = + vk_find_struct_const(pCreateInfo->pNext, PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES); + uint32_t prev_hostQueryReset; + uint32_t prev_timelineSemaphore; + if (vk12) { + prev_hostQueryReset = vk12->hostQueryReset; + prev_timelineSemaphore = vk12->timelineSemaphore; + ((VkPhysicalDeviceVulkan12Features *)vk12)->hostQueryReset = VK_TRUE; + ((VkPhysicalDeviceVulkan12Features *)vk12)->timelineSemaphore = VK_TRUE; + } else { + if (query_reset) { + prev_hostQueryReset = query_reset->hostQueryReset; + ((VkPhysicalDeviceHostQueryResetFeatures *)query_reset)->hostQueryReset = VK_TRUE; + } else { + VkPhysicalDeviceHostQueryResetFeatures *feat = + alloca(sizeof(VkPhysicalDeviceHostQueryResetFeatures)); + *feat = (VkPhysicalDeviceHostQueryResetFeatures){ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES, + .pNext = (void *)create_info.pNext, + .hostQueryReset = VK_TRUE, + }; + create_info.pNext = feat; + } + if (timeline_semaphore) { + prev_timelineSemaphore = timeline_semaphore->timelineSemaphore; + ((VkPhysicalDeviceTimelineSemaphoreFeatures *)timeline_semaphore)->timelineSemaphore = + VK_TRUE; + } else { + VkPhysicalDeviceTimelineSemaphoreFeatures *feat = + alloca(sizeof(VkPhysicalDeviceTimelineSemaphoreFeatures)); + *feat = (VkPhysicalDeviceTimelineSemaphoreFeatures){ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, + .pNext = (void *)create_info.pNext, + .timelineSemaphore = VK_TRUE, + }; + create_info.pNext = feat; + } + } + + /* Create Device. */ + result = fpCreateDevice(physicalDevice, &create_info, pAllocator, pDevice); + + if (vk12) { + ((VkPhysicalDeviceVulkan12Features *)vk12)->hostQueryReset = prev_hostQueryReset; + ((VkPhysicalDeviceVulkan12Features *)vk12)->timelineSemaphore = prev_timelineSemaphore; + } else { + if (query_reset) + ((VkPhysicalDeviceHostQueryResetFeatures *)query_reset)->hostQueryReset = + prev_hostQueryReset; + if (timeline_semaphore) + ((VkPhysicalDeviceTimelineSemaphoreFeatures *)timeline_semaphore)->timelineSemaphore = + prev_timelineSemaphore; + } + if (ext_names) + vk_free(alloc, ext_names); + + if (result != VK_SUCCESS) + goto fail; + + /* Initialize Context. */ + data->ctx = ctx; + ctx->device = *pDevice; + chain_info = get_device_chain_info(pCreateInfo, VK_LOADER_DATA_CALLBACK); + PFN_vkSetDeviceLoaderData fpSetDeviceLoaderData = + (PFN_vkSetDeviceLoaderData)chain_info->u.pfnSetDeviceLoaderData; + init_device_vtable(ctx, fpGetDeviceProcAddr, fpSetDeviceLoaderData, + has_calibrated_timestamps_khr, has_host_query_reset_ext, + has_timeline_semaphore_khr); + simple_mtx_init(&ctx->mtx, mtx_plain); + ctx->num_queues = num_queues; + ctx->alloc = *alloc; + ctx->calibration.timestamp_period = properties.limits.timestampPeriod; + ringbuffer_init(ctx->frames); + + /* Initialize Queue contexts. */ + unsigned idx = 0; + for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { + /* Skip queue families without sufficient timestamp valid bits. + * Also skip queue families which cannot do GRAPHICS or COMPUTE since they + * always heavily async in nature (DMA transfers and sparse for example). + * Video is also irrelvant here since it should never be a critical path + * in a game that wants anti-lag. */ + uint32_t queue_family_idx = pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex; + if (queue_family_props[queue_family_idx].timestampValidBits != 64 || + !(queue_family_props[queue_family_idx].queueFlags & + (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT))) + continue; + + for (unsigned j = 0; j < pCreateInfo->pQueueCreateInfos[i].queueCount; j++) { + VkQueue queue; + ctx->vtable.GetDeviceQueue(*pDevice, queue_family_idx, j, &queue); + ctx->queues[idx].queue = queue; + ctx->queues[idx].queue_family_idx = queue_family_idx; + result = init_queue_context(ctx, &ctx->queues[idx]); + idx++; + if (result != VK_SUCCESS) + goto fail; + } + } + assert(idx == num_queues); + fail: + vk_free(alloc, queue_family_props); + } else { + data = (device_data *)vk_alloc(alloc, sizeof(device_data), alignof(device_data), + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!data) + return VK_ERROR_OUT_OF_HOST_MEMORY; + result = fpCreateDevice(physicalDevice, pCreateInfo, pAllocator, pDevice); + data->ctx = NULL; + } + + if (result == VK_SUCCESS) { + data->device = *pDevice; + data->GetDeviceProcAddr = fpGetDeviceProcAddr; + data->next = NULL; + add_device(data); + } else { + vk_free(alloc, data); + } + + return result; +} + +static VKAPI_ATTR void VKAPI_CALL +anti_lag_DestroyDevice(VkDevice pDevice, const VkAllocationCallbacks *pAllocator) +{ + device_data *data = remove_device(pDevice); + assert(data && data->ctx); + device_context *ctx = data->ctx; + + /* Destroy per-queue context. + * The application must ensure that no work is active on the device. + */ + for (unsigned i = 0; i < ctx->num_queues; i++) { + queue_context *queue_ctx = &ctx->queues[i]; + ctx->vtable.DestroyQueryPool(ctx->device, queue_ctx->queryPool, &ctx->alloc); + ctx->vtable.DestroyCommandPool(ctx->device, queue_ctx->cmdPool, &ctx->alloc); + ctx->vtable.DestroySemaphore(ctx->device, queue_ctx->semaphore, &ctx->alloc); + } + + ctx->vtable.DestroyDevice(pDevice, pAllocator); + vk_free(&ctx->alloc, data); +} + +static bool +is_anti_lag_supported(VkPhysicalDevice physicalDevice) +{ + instance_data *data = get_instance_data(physicalDevice); + VkPhysicalDeviceProperties properties; + data->vtable.GetPhysicalDeviceProperties(physicalDevice, &properties); + if (properties.limits.timestampPeriod == 0.0 || !properties.limits.timestampComputeAndGraphics) + return false; + + /* Check whether calibrated timestamps are supported. */ + bool has_khr; + if (!check_calibrated_timestamps(data, physicalDevice, &has_khr)) + return false; + + /* Check whether timeline semaphores and host query reset are supported. */ + VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, + .timelineSemaphore = VK_FALSE, + }; + VkPhysicalDeviceHostQueryResetFeatures query_reset = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES, + .pNext = &timeline_semaphore, + .hostQueryReset = VK_FALSE, + }; + VkPhysicalDeviceFeatures2 features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, + .pNext = &query_reset, + }; + if (data->vtable.GetPhysicalDeviceFeatures2KHR) + data->vtable.GetPhysicalDeviceFeatures2KHR(physicalDevice, &features); + else if (data->vtable.GetPhysicalDeviceFeatures2) + data->vtable.GetPhysicalDeviceFeatures2(physicalDevice, &features); + if (!timeline_semaphore.timelineSemaphore || !query_reset.hostQueryReset) + return false; + + /* Check that DEVICE and CLOCK_MONOTONIC time domains are available. */ + VkResult res; + uint32_t count = 0; + PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsKHR ctd = + has_khr ? data->vtable.GetPhysicalDeviceCalibrateableTimeDomainsKHR + : data->vtable.GetPhysicalDeviceCalibrateableTimeDomainsEXT; + res = ctd(physicalDevice, &count, NULL); + VkTimeDomainKHR *time_domains = alloca(count * sizeof(VkTimeDomainKHR)); + res |= ctd(physicalDevice, &count, time_domains); + if (res != VK_SUCCESS) + return false; + + bool has_device_domain = false; + bool has_host_domain = false; + for (unsigned i = 0; i < count; i++) { + has_device_domain |= time_domains[i] == VK_TIME_DOMAIN_DEVICE_KHR; + has_host_domain |= time_domains[i] == VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR; + } + + return has_device_domain && has_host_domain; +} + +static VKAPI_ATTR VkResult VKAPI_CALL +anti_lag_EnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice, const char *pLayerName, + uint32_t *pPropertyCount, + VkExtensionProperties *pProperties) +{ + instance_data *instance_data = get_instance_data(physicalDevice); + + if (pLayerName && strcmp(pLayerName, "VK_LAYER_MESA_anti_lag") == 0) { + if (!is_anti_lag_supported(physicalDevice)) { + *pPropertyCount = 0; + return VK_SUCCESS; + } + + VK_OUTARRAY_MAKE_TYPED(VkExtensionProperties, out, pProperties, pPropertyCount); + vk_outarray_append_typed(VkExtensionProperties, &out, prop) + { + *prop = + (VkExtensionProperties){VK_AMD_ANTI_LAG_EXTENSION_NAME, VK_AMD_ANTI_LAG_SPEC_VERSION}; + } + return vk_outarray_status(&out); + } + + return instance_data->vtable.EnumerateDeviceExtensionProperties(physicalDevice, pLayerName, + pPropertyCount, pProperties); +} + +static VKAPI_ATTR void VKAPI_CALL +anti_lag_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures2 *pFeatures) +{ + instance_data *ctx = get_instance_data(physicalDevice); + ctx->vtable.GetPhysicalDeviceFeatures2(physicalDevice, pFeatures); + VkPhysicalDeviceAntiLagFeaturesAMD *anti_lag_features = + vk_find_struct(pFeatures->pNext, PHYSICAL_DEVICE_ANTI_LAG_FEATURES_AMD); + + if (anti_lag_features) { + anti_lag_features->antiLag |= is_anti_lag_supported(physicalDevice); + } +} + +static VKAPI_ATTR void VKAPI_CALL +anti_lag_GetPhysicalDeviceFeatures2KHR(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures2 *pFeatures) +{ + instance_data *ctx = get_instance_data(physicalDevice); + ctx->vtable.GetPhysicalDeviceFeatures2KHR(physicalDevice, pFeatures); + VkPhysicalDeviceAntiLagFeaturesAMD *anti_lag_features = + vk_find_struct(pFeatures->pNext, PHYSICAL_DEVICE_ANTI_LAG_FEATURES_AMD); + + if (anti_lag_features) { + anti_lag_features->antiLag |= is_anti_lag_supported(physicalDevice); + } +} + +static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +anti_lag_GetInstanceProcAddr(VkInstance instance, const char *pName); + +static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +anti_lag_GetDeviceProcAddr(VkDevice device, const char *pName); + +#define ADD_HOOK(fn) {"vk" #fn, (PFN_vkVoidFunction)anti_lag_##fn} +static const struct { + const char *name; + PFN_vkVoidFunction ptr; +} instance_funcptr_map[] = { + ADD_HOOK(GetInstanceProcAddr), + ADD_HOOK(CreateInstance), + ADD_HOOK(DestroyInstance), + ADD_HOOK(EnumerateDeviceExtensionProperties), + ADD_HOOK(CreateDevice), + ADD_HOOK(GetPhysicalDeviceFeatures2), + ADD_HOOK(GetPhysicalDeviceFeatures2KHR), +}; + +static const struct { + const char *name; + PFN_vkVoidFunction ptr; +} device_funcptr_map[] = { + ADD_HOOK(GetDeviceProcAddr), + ADD_HOOK(DestroyDevice), + ADD_HOOK(AntiLagUpdateAMD), + ADD_HOOK(QueueSubmit), + ADD_HOOK(QueueSubmit2), + ADD_HOOK(QueueSubmit2KHR), +}; +#undef ADD_HOOK + +static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +anti_lag_GetInstanceProcAddr(VkInstance instance, const char *pName) +{ + if (!pName) + return NULL; + + PFN_vkVoidFunction result = NULL; + if (instance) { + instance_data *ctx = get_instance_data(instance); + if (ctx) + result = ctx->vtable.GetInstanceProcAddr(instance, pName); + } + + /* Only hook instance functions which are exposed by the underlying impl. + * Ignore instance parameter for vkCreateInstance and vkCreateDevice. + */ + if (result || strcmp(pName, "vkCreateInstance") == 0 || strcmp(pName, "vkCreateDevice") == 0) { + for (uint32_t i = 0; i < ARRAY_SIZE(instance_funcptr_map); i++) { + if (strcmp(pName, instance_funcptr_map[i].name) == 0) + return instance_funcptr_map[i].ptr; + } + } + + return result; +} + +static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +anti_lag_GetDeviceProcAddr(VkDevice device, const char *pName) +{ + if (!pName || !device) + return NULL; + + device_data *data = get_device_data(device); + PFN_vkVoidFunction result = data->GetDeviceProcAddr(device, pName); + + /* Only hook device functions if the Layer extension is enabled. */ + if (data->ctx && (result || strcmp(pName, "vkAntiLagUpdateAMD") == 0)) { + for (uint32_t i = 0; i < ARRAY_SIZE(device_funcptr_map); i++) { + if (strcmp(pName, device_funcptr_map[i].name) == 0) + return device_funcptr_map[i].ptr; + } + } + + return result; +} + +PUBLIC VKAPI_ATTR VkResult VKAPI_CALL +anti_lag_NegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface *pVersionStruct) +{ + assert(pVersionStruct != NULL); + assert(pVersionStruct->sType == LAYER_NEGOTIATE_INTERFACE_STRUCT); + + if (pVersionStruct->loaderLayerInterfaceVersion >= 2) { + pVersionStruct->loaderLayerInterfaceVersion = 2; + pVersionStruct->pfnGetInstanceProcAddr = anti_lag_GetInstanceProcAddr; + pVersionStruct->pfnGetDeviceProcAddr = anti_lag_GetDeviceProcAddr; + pVersionStruct->pfnGetPhysicalDeviceProcAddr = NULL; + } + + return VK_SUCCESS; +} diff --git a/src/vulkan/anti-lag-layer/meson.build b/src/vulkan/anti-lag-layer/meson.build new file mode 100644 index 00000000000..264c55c8e75 --- /dev/null +++ b/src/vulkan/anti-lag-layer/meson.build @@ -0,0 +1,26 @@ +# Copyright © 2025 Valve Corporation +# SPDX-License-Identifier: MIT + +vklayer_files = files( + 'anti_lag_layer.c', + 'anti_lag_layer_interface.c', +) + +shared_library( + 'VkLayer_MESA_anti_lag', + vklayer_files, + c_args : [no_override_init_args], + gnu_symbol_visibility : 'hidden', + dependencies : [ + idep_vulkan_util, idep_mesautil, + ], + include_directories : [inc_include, inc_util, inc_src], + link_args : cc.get_supported_link_arguments(['-Wl,-Bsymbolic-functions', '-Wl,-z,relro']), + install : true +) + +install_data( + files('VkLayer_MESA_anti_lag.json'), + install_dir : join_paths(get_option('datadir'), 'vulkan', 'implicit_layer.d'), + install_tag : 'runtime', +) diff --git a/src/vulkan/anti-lag-layer/ringbuffer.h b/src/vulkan/anti-lag-layer/ringbuffer.h new file mode 100644 index 00000000000..1747b7e720f --- /dev/null +++ b/src/vulkan/anti-lag-layer/ringbuffer.h @@ -0,0 +1,58 @@ +/* + * Copyright © 2025 Valve Corporation + * + * SPDX-License-Identifier: MIT + */ + +#ifndef RINGBUFFER_H +#define RINGBUFFER_H + +#include "util/macros.h" + +#define RINGBUFFER_DECLARE(name, type, N) \ + struct { \ + type data[N]; \ + uint32_t head; \ + uint32_t tail; \ + uint32_t size; \ + simple_mtx_t mtx; \ + } name + +#define ringbuffer_init(buffer) \ + (buffer.head = buffer.tail = buffer.size = 0, simple_mtx_init(&buffer.mtx, mtx_plain)) + +#define ringbuffer_lock(buffer) simple_mtx_lock(&buffer.mtx) +#define ringbuffer_unlock(buffer) simple_mtx_unlock(&buffer.mtx) + +static inline uint32_t +__ringbuffer_add_wrap(uint32_t *val, uint32_t *size, uint32_t N) +{ + uint32_t prev = *val; + *val = (*val + 1) % N; + *size = *size + 1; + assert(*size <= N); + return prev; +} + +#define ringbuffer_alloc(buffer) \ + (buffer.size == ARRAY_SIZE(buffer.data) \ + ? NULL \ + : &buffer.data[__ringbuffer_add_wrap(&buffer.head, &buffer.size, ARRAY_SIZE(buffer.data))]) + +#define ringbuffer_free(buffer, elem) \ + assert(elem == NULL || elem == &buffer.data[buffer.tail]); \ + buffer.size--; \ + assert(buffer.size < ARRAY_SIZE(buffer.data)); \ + buffer.tail = (buffer.tail + 1) % ARRAY_SIZE(buffer.data) + +#define ringbuffer_first(buffer) (&buffer.data[buffer.tail]) + +#define ringbuffer_last(buffer) \ + (&buffer.data[(buffer.head + ARRAY_SIZE(buffer.data) - 1) % ARRAY_SIZE(buffer.data)]) + +#define ringbuffer_index(buffer, elem) (elem - buffer.data) + +#define ringbuffer_next(buffer, elem) \ + (&buffer.data[(ringbuffer_index(buffer, elem) + 1) % ARRAY_SIZE(buffer.data)]) + +#endif /* RINGBUFFER_H */ diff --git a/src/vulkan/meson.build b/src/vulkan/meson.build index 3225b5f4a9d..cf62ecc6ae7 100644 --- a/src/vulkan/meson.build +++ b/src/vulkan/meson.build @@ -98,3 +98,6 @@ endif if with_vulkan_vram_report_limit_layer subdir('vram-report-limit-layer') endif +if with_vulkan_anti_lag_layer + subdir('anti-lag-layer') +endif