radv/spm: use a staging buffer for faster reads on dGPUS

This allows us to move the SPM buffer to VRAM because I think it must
be in VRAM too.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39195>
This commit is contained in:
Samuel Pitoiset 2026-01-07 15:45:38 +01:00 committed by Marge Bot
parent 6863a90486
commit 5bcca4a832
3 changed files with 34 additions and 10 deletions

View file

@ -254,6 +254,8 @@ struct radv_device {
uint64_t spm_buffer_va;
VkBuffer spm_buffer;
VkDeviceMemory spm_memory;
VkBuffer spm_staging_buffer;
VkDeviceMemory spm_staging_memory;
/* Radeon Raytracing Analyzer trace. */
struct radv_rra_trace_data rra_trace;

View file

@ -20,16 +20,18 @@ static bool
radv_spm_init_bo(struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
VkDeviceMemory memory;
VkBuffer buffer;
VkDeviceMemory memory, staging_memory;
VkBuffer buffer, staging_buffer;
VkResult result;
uint64_t va;
void *ptr;
/* Allocate the SPM buffer. */
const uint32_t memory_type_index =
radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
/* Allocate the SPM buffer (it must be in VRAM). */
const uint32_t memory_type_index = radv_find_memory_index(
pdev,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
(device->rgp_use_staging_buffer ? 0
: VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT));
result = radv_sqtt_allocate_buffer(radv_device_to_handle(device), device->spm.buffer_size, memory_type_index,
&buffer, &memory);
@ -43,9 +45,21 @@ radv_spm_init_bo(struct radv_device *device)
va = vk_common_GetBufferDeviceAddress(radv_device_to_handle(device), &addr_info);
/* Allocate a staging buffer in GTT. */
if (device->rgp_use_staging_buffer) {
const uint32_t staging_memory_type_index =
radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
result = radv_sqtt_allocate_buffer(radv_device_to_handle(device), device->spm.buffer_size,
staging_memory_type_index, &staging_buffer, &staging_memory);
if (result != VK_SUCCESS)
return false;
}
VkMemoryMapInfo mem_map_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_MAP_INFO,
.memory = memory,
.memory = device->rgp_use_staging_buffer ? staging_memory : memory,
.size = VK_WHOLE_SIZE,
};
@ -55,6 +69,8 @@ radv_spm_init_bo(struct radv_device *device)
device->spm_buffer = buffer;
device->spm_memory = memory;
device->spm_staging_buffer = staging_buffer;
device->spm_staging_memory = staging_memory;
device->spm_buffer_va = va;
device->spm.bo = &device->spm_buffer;
device->spm.ptr = ptr;
@ -65,16 +81,20 @@ radv_spm_init_bo(struct radv_device *device)
static void
radv_spm_finish_bo(struct radv_device *device)
{
if (device->spm_memory) {
VkDeviceMemory memory = device->rgp_use_staging_buffer ? device->spm_staging_memory : device->spm_memory;
if (memory) {
VkMemoryUnmapInfo unmap_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_UNMAP_INFO,
.memory = device->spm_memory,
.memory = memory,
};
radv_UnmapMemory2(radv_device_to_handle(device), &unmap_info);
}
radv_sqtt_destroy_buffer(radv_device_to_handle(device), device->spm_buffer, device->spm_memory);
if (device->rgp_use_staging_buffer)
radv_sqtt_destroy_buffer(radv_device_to_handle(device), device->spm_staging_buffer, device->spm_staging_memory);
}
static bool

View file

@ -753,9 +753,11 @@ radv_end_sqtt(struct radv_queue *queue)
/* Restore previous state by re-enabling clock gating. */
ac_emit_cp_inhibit_clockgating(cs->b, pdev->info.gfx_level, false);
/* Copy to the staging buffer for faster reads on dGPUs. */
/* Copy to the staging buffers for faster reads on dGPUs. */
if (device->rgp_use_staging_buffer) {
radv_sqtt_copy_buffer(cmdbuf, device->sqtt_buffer, device->sqtt_staging_buffer, device->sqtt_size);
if (device->spm.bo)
radv_sqtt_copy_buffer(cmdbuf, device->spm_buffer, device->spm_staging_buffer, device->spm.buffer_size);
}
result = radv_EndCommandBuffer(cmdbuf);