nvk: Add an nvk_mem_stream struct

This is based on nvk_upload_heap but with a different interface and
minor changes to deal with those differences.

Reviewed-by: Mel Henning <mhenning@darkrefraction.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35356>
This commit is contained in:
Faith Ekstrand 2025-06-04 16:13:45 -04:00 committed by Marge Bot
parent a35a12dd38
commit e206962409
3 changed files with 336 additions and 0 deletions

View file

@ -47,6 +47,8 @@ nvk_files = files(
'nvk_instance.h',
'nvk_mem_arena.c',
'nvk_mem_arena.h',
'nvk_mem_stream.c',
'nvk_mem_stream.h',
'nvk_mme.c',
'nvk_mme.h',
'nvk_nir_lower_descriptors.c',

View file

@ -0,0 +1,258 @@
/*
* Copyright © 2025 Collabora Ltd. and Red Hat Inc.
* SPDX-License-Identifier: MIT
*/
#include "nvk_mem_stream.h"
#include "nvk_device.h"
#include "vk_sync.h"
struct nvk_mem_stream_chunk {
struct nvkmd_mem *mem;
/** Link in nvk_mem_stream::recycle */
struct list_head link;
/** Time point at which point this BO will be idle */
uint64_t idle_time_point;
};
static VkResult
nvk_mem_stream_chunk_create(struct nvk_device *dev,
struct nvk_mem_stream_chunk **chunk_out)
{
struct nvk_mem_stream_chunk *chunk;
VkResult result;
chunk = vk_zalloc(&dev->vk.alloc, sizeof(*chunk), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (chunk == NULL)
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base,
NVK_MEM_STREAM_MAX_ALLOC_SIZE, 0,
NVKMD_MEM_GART, NVKMD_MEM_MAP_WR,
&chunk->mem);
if (result != VK_SUCCESS) {
vk_free(&dev->vk.alloc, chunk);
return result;
}
*chunk_out = chunk;
return VK_SUCCESS;
}
static void
nvk_mem_stream_chunk_destroy(struct nvk_device *dev,
struct nvk_mem_stream_chunk *chunk)
{
nvkmd_mem_unref(chunk->mem);
vk_free(&dev->vk.alloc, chunk);
}
VkResult
nvk_mem_stream_init(struct nvk_device *dev,
struct nvk_mem_stream *stream)
{
const struct nvk_physical_device *pdev = nvk_device_physical(dev);
VkResult result;
const struct vk_sync_type *sync_type = pdev->nvkmd->sync_types[0];
assert(sync_type->features & VK_SYNC_FEATURE_TIMELINE);
result = vk_sync_create(&dev->vk, sync_type, VK_SYNC_IS_TIMELINE,
0, &stream->sync);
if (result != VK_SUCCESS)
return result;
stream->next_time_point = 1;
stream->time_point_passed = 0;
stream->needs_flush = false;
stream->chunk = NULL;
stream->chunk_alloc_B = NVK_MEM_STREAM_MAX_ALLOC_SIZE;
list_inithead(&stream->recycle);
return VK_SUCCESS;
}
void
nvk_mem_stream_finish(struct nvk_device *dev,
struct nvk_mem_stream *stream)
{
list_for_each_entry_safe(struct nvk_mem_stream_chunk, chunk,
&stream->recycle, link)
nvk_mem_stream_chunk_destroy(dev, chunk);
if (stream->chunk != NULL)
nvk_mem_stream_chunk_destroy(dev, stream->chunk);
vk_sync_destroy(&dev->vk, stream->sync);
}
static VkResult
nvk_mem_stream_get_chunk(struct nvk_device *dev,
struct nvk_mem_stream *stream,
struct nvk_mem_stream_chunk **chunk_out)
{
if (!list_is_empty(&stream->recycle)) {
/* Check to see if something on the recycle list is ready */
struct nvk_mem_stream_chunk *chunk =
list_first_entry(&stream->recycle, struct nvk_mem_stream_chunk, link);
if (stream->time_point_passed >= chunk->idle_time_point) {
list_del(&chunk->link);
*chunk_out = chunk;
return VK_SUCCESS;
}
/* Try again with a fresh time point. Fetching a new time_point_passed
* after the first check may avoid extra ioctls if things get really hot.
*/
VkResult result = vk_sync_get_value(&dev->vk, stream->sync,
&stream->time_point_passed);
if (result != VK_SUCCESS)
return result;
if (stream->time_point_passed >= chunk->idle_time_point) {
list_del(&chunk->link);
*chunk_out = chunk;
return VK_SUCCESS;
}
}
return nvk_mem_stream_chunk_create(dev, chunk_out);
}
VkResult
nvk_mem_stream_alloc(struct nvk_device *dev,
struct nvk_mem_stream *stream,
uint32_t size_B, uint32_t align_B,
uint64_t *addr_out, void **map_out)
{
assert(size_B <= NVK_MEM_STREAM_MAX_ALLOC_SIZE);
assert(align_B <= NVK_MEM_STREAM_MAX_ALLOC_SIZE);
if (size_B == 0) {
*addr_out = 0;
*map_out = NULL;
return VK_SUCCESS;
}
stream->chunk_alloc_B = align(stream->chunk_alloc_B, align_B);
assert(stream->chunk_alloc_B <= NVK_MEM_STREAM_MAX_ALLOC_SIZE);
if (stream->chunk_alloc_B + size_B > NVK_MEM_STREAM_MAX_ALLOC_SIZE) {
if (stream->chunk != NULL) {
list_addtail(&stream->chunk->link, &stream->recycle);
stream->chunk = NULL;
}
/* On the off chance that nvk_mem_stream_get_mem() fails, reset to MAX
* here so that we hit the re-alloc path on the next attempt.
*/
assert(stream->chunk == NULL);
stream->chunk_alloc_B = NVK_MEM_STREAM_MAX_ALLOC_SIZE;
VkResult result = nvk_mem_stream_get_chunk(dev, stream, &stream->chunk);
if (result != VK_SUCCESS) {
assert(stream->chunk == NULL);
return result;
}
stream->chunk_alloc_B = 0;
}
/* Mark the chunk as not being idle until next_time_point */
assert(stream->chunk->idle_time_point <= stream->next_time_point);
stream->chunk->idle_time_point = stream->next_time_point;
/* The stream needs to be flushed */
stream->needs_flush = true;
assert(stream->chunk_alloc_B + size_B <= NVK_MEM_STREAM_MAX_ALLOC_SIZE);
*addr_out = stream->chunk->mem->va->addr + stream->chunk_alloc_B;
*map_out = stream->chunk->mem->map + stream->chunk_alloc_B;
stream->chunk_alloc_B += size_B;
return VK_SUCCESS;
}
VkResult
nvk_mem_stream_flush(struct nvk_device *dev,
struct nvk_mem_stream *stream,
struct nvkmd_ctx *ctx,
uint64_t *time_point_out)
{
if (!stream->needs_flush) {
if (time_point_out != NULL)
*time_point_out = stream->next_time_point - 1;
return VK_SUCCESS;
}
if (stream->next_time_point == UINT64_MAX)
abort();
const struct vk_sync_signal signal = {
.sync = stream->sync,
.stage_mask = ~0,
.signal_value = stream->next_time_point,
};
VkResult result = nvkmd_ctx_signal(ctx, &dev->vk.base, 1, &signal);
if (result != VK_SUCCESS)
return result;
if (time_point_out != NULL)
*time_point_out = stream->next_time_point;
stream->needs_flush = false;
stream->next_time_point++;
return VK_SUCCESS;
}
VkResult
nvk_mem_stream_push(struct nvk_device *dev,
struct nvk_mem_stream *stream,
struct nvkmd_ctx *ctx,
const uint32_t *push_data,
uint32_t push_dw_count,
uint64_t *time_point_out)
{
VkResult result;
uint64_t push_addr;
void *push_map;
result = nvk_mem_stream_alloc(dev, stream, push_dw_count * 4, 4,
&push_addr, &push_map);
if (result != VK_SUCCESS)
return result;
memcpy(push_map, push_data, push_dw_count * 4);
const struct nvkmd_ctx_exec exec = {
.addr = push_addr,
.size_B = push_dw_count * 4,
};
result = nvkmd_ctx_exec(ctx, &dev->vk.base, 1, &exec);
if (result != VK_SUCCESS)
return result;
return nvk_mem_stream_flush(dev, stream, ctx, time_point_out);
}
VkResult
nvk_mem_stream_sync(struct nvk_device *dev,
struct nvk_mem_stream *stream,
struct nvkmd_ctx *ctx)
{
uint64_t time_point;
VkResult result = nvk_mem_stream_flush(dev, stream, ctx, &time_point);
if (result != VK_SUCCESS)
return result;
return vk_sync_wait(&dev->vk, stream->sync, time_point,
VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
}

View file

@ -0,0 +1,76 @@
/*
* Copyright © 2025 Collabora Ltd. and Red Hat Inc.
* SPDX-License-Identifier: MIT
*/
#ifndef NVK_MEM_STREAM_H
#define NVK_MEM_STREAM_H 1
#include "nvk_private.h"
#include "util/list.h"
struct nvk_device;
struct nvk_mem_stream_chunk;
struct nvkmd_ctx;
struct vk_sync;
struct vk_sync_wait;
#define NVK_MEM_STREAM_MAX_ALLOC_SIZE (64*1024)
/** A streaming memory allocator */
struct nvk_mem_stream {
struct vk_sync *sync;
uint64_t next_time_point;
uint64_t time_point_passed;
bool needs_flush;
struct nvk_mem_stream_chunk *chunk;
uint32_t chunk_alloc_B;
/* list of nvk_mem_stream_chunk */
struct list_head recycle;
};
VkResult nvk_mem_stream_init(struct nvk_device *dev,
struct nvk_mem_stream *stream);
/* The caller must ensure the stream is not in use. This can be done by
* calling hvk_mem_stream_sync().
*/
void nvk_mem_stream_finish(struct nvk_device *dev,
struct nvk_mem_stream *stream);
VkResult nvk_mem_stream_alloc(struct nvk_device *dev,
struct nvk_mem_stream *stream,
uint32_t size_B, uint32_t align_B,
uint64_t *addr_out, void **map_out);
/** Flushes the stream.
*
* Any memory allocated by nvk_mem_stream_alloc() prior to this call are now
* owned by the GPU and may no longer be accessed on the CPU. The memory will
* be automatically recycled once the GPU is done with it.
*
* If non-NULL, time_point_out will be populated with a time point which some
* other context can use to wait on this stream with stream->sync. If the
* flush fails, time_point_out is not written.
*/
VkResult nvk_mem_stream_flush(struct nvk_device *dev,
struct nvk_mem_stream *stream,
struct nvkmd_ctx *ctx,
uint64_t *time_point_out);
/* An alloc, memcpy(), push, and flush, all wrapped up into one */
VkResult nvk_mem_stream_push(struct nvk_device *dev,
struct nvk_mem_stream *stream,
struct nvkmd_ctx *ctx,
const uint32_t *push_data,
uint32_t push_dw_count,
uint64_t *time_point_out);
VkResult nvk_mem_stream_sync(struct nvk_device *dev,
struct nvk_mem_stream *stream,
struct nvkmd_ctx *ctx);
#endif /* NVK_STREAM_H */