hk: accelerate buffer copies with CL

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33682>
This commit is contained in:
Alyssa Rosenzweig 2025-01-15 12:42:32 -05:00 committed by Marge Bot
parent be18473d33
commit debdb26167
3 changed files with 59 additions and 16 deletions

23
src/asahi/libagx/copy.cl Normal file
View file

@ -0,0 +1,23 @@
/*
* Copyright 2024 Valve Corporation
* SPDX-License-Identifier: MIT
*/
#include "compiler/libcl/libcl.h"
KERNEL(32)
libagx_fill(global uint32_t *address, uint32_t value)
{
address[cl_global_id.x] = value;
}
KERNEL(32)
libagx_copy_uint4(global uint4 *dest, global uint4 *src)
{
dest[cl_global_id.x] = src[cl_global_id.x];
}
KERNEL(16)
libagx_copy_uchar(global uint8_t *dest, global uint8_t *src)
{
dest[cl_global_id.x] = src[cl_global_id.x];
}

View file

@ -3,6 +3,7 @@
libagx_shader_files = files(
'compression.cl',
'copy.cl',
'draws.cl',
'geometry.cl',
'query.cl',

View file

@ -17,6 +17,8 @@
#include "hk_physical_device.h"
#include "layout.h"
#include "libagx_dgc.h"
#include "libagx_shaders.h"
#include "nir_builder.h"
#include "nir_builder_opcodes.h"
#include "nir_format_convert.h"
@ -1372,17 +1374,33 @@ hk_meta_resolve_rendering(struct hk_cmd_buffer *cmd,
hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
}
static void
hk_cmd_copy(struct hk_cmd_buffer *cmd, uint64_t dst, uint64_t src, size_t size)
{
if (size / 16) {
libagx_copy_uint4(cmd, agx_1d(size / 16), AGX_BARRIER_ALL, dst, src);
}
if (size % 16) {
libagx_copy_uchar(cmd, agx_1d(size % 16), AGX_BARRIER_ALL,
dst + (size & ~15), src + (size & ~15));
}
}
VKAPI_ATTR void VKAPI_CALL
hk_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
const VkCopyBufferInfo2 *pCopyBufferInfo)
hk_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *info)
{
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
struct hk_device *dev = hk_cmd_buffer_device(cmd);
struct hk_meta_save save;
hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
vk_meta_copy_buffer(&cmd->vk, &dev->meta, pCopyBufferInfo);
hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
for (unsigned i = 0; i < info->regionCount; i++) {
const VkBufferCopy2 *region = &info->pRegions[i];
uint64_t src = vk_meta_buffer_address(&dev->vk, info->srcBuffer,
region->srcOffset, region->size);
uint64_t dst = vk_meta_buffer_address(&dev->vk, info->dstBuffer,
region->dstOffset, region->size);
hk_cmd_copy(cmd, dst, src, region->size);
}
}
static bool
@ -1473,13 +1491,14 @@ hk_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer,
VkDeviceSize dstOffset, VkDeviceSize dstRange, uint32_t data)
{
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
VK_FROM_HANDLE(hk_buffer, buffer, dstBuffer);
struct hk_device *dev = hk_cmd_buffer_device(cmd);
struct hk_meta_save save;
hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
vk_meta_fill_buffer(&cmd->vk, &dev->meta, dstBuffer, dstOffset, dstRange,
data);
hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
size_t range = vk_buffer_range(&buffer->vk, dstOffset, dstRange);
uint64_t addr =
vk_meta_buffer_address(&dev->vk, dstBuffer, dstOffset, dstRange);
libagx_fill(cmd, agx_1d(range / 4), AGX_BARRIER_ALL, addr, data);
}
VKAPI_ATTR void VKAPI_CALL
@ -1488,13 +1507,13 @@ hk_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer,
const void *pData)
{
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
VK_FROM_HANDLE(hk_buffer, buffer, dstBuffer);
struct hk_device *dev = hk_cmd_buffer_device(cmd);
size_t range = vk_buffer_range(&buffer->vk, dstOffset, dstRange);
struct hk_meta_save save;
hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
vk_meta_update_buffer(&cmd->vk, &dev->meta, dstBuffer, dstOffset, dstRange,
pData);
hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
hk_cmd_copy(cmd,
vk_meta_buffer_address(&dev->vk, dstBuffer, dstOffset, dstRange),
hk_pool_upload(cmd, pData, range, 4), range);
}
VKAPI_ATTR void VKAPI_CALL