From debdb26167d45ea8663fc36c68942bef55055b18 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 15 Jan 2025 12:42:32 -0500 Subject: [PATCH] hk: accelerate buffer copies with CL Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/libagx/copy.cl | 23 +++++++++++++++ src/asahi/libagx/meson.build | 1 + src/asahi/vulkan/hk_cmd_meta.c | 51 +++++++++++++++++++++++----------- 3 files changed, 59 insertions(+), 16 deletions(-) create mode 100644 src/asahi/libagx/copy.cl diff --git a/src/asahi/libagx/copy.cl b/src/asahi/libagx/copy.cl new file mode 100644 index 00000000000..57d27e3f2f2 --- /dev/null +++ b/src/asahi/libagx/copy.cl @@ -0,0 +1,23 @@ +/* + * Copyright 2024 Valve Corporation + * SPDX-License-Identifier: MIT + */ +#include "compiler/libcl/libcl.h" + +KERNEL(32) +libagx_fill(global uint32_t *address, uint32_t value) +{ + address[cl_global_id.x] = value; +} + +KERNEL(32) +libagx_copy_uint4(global uint4 *dest, global uint4 *src) +{ + dest[cl_global_id.x] = src[cl_global_id.x]; +} + +KERNEL(16) +libagx_copy_uchar(global uint8_t *dest, global uint8_t *src) +{ + dest[cl_global_id.x] = src[cl_global_id.x]; +} diff --git a/src/asahi/libagx/meson.build b/src/asahi/libagx/meson.build index 449c4816ed2..61bb7328a37 100644 --- a/src/asahi/libagx/meson.build +++ b/src/asahi/libagx/meson.build @@ -3,6 +3,7 @@ libagx_shader_files = files( 'compression.cl', + 'copy.cl', 'draws.cl', 'geometry.cl', 'query.cl', diff --git a/src/asahi/vulkan/hk_cmd_meta.c b/src/asahi/vulkan/hk_cmd_meta.c index fc9dcea081f..8cc2adc4cc3 100644 --- a/src/asahi/vulkan/hk_cmd_meta.c +++ b/src/asahi/vulkan/hk_cmd_meta.c @@ -17,6 +17,8 @@ #include "hk_physical_device.h" #include "layout.h" +#include "libagx_dgc.h" +#include "libagx_shaders.h" #include "nir_builder.h" #include "nir_builder_opcodes.h" #include "nir_format_convert.h" @@ -1372,17 +1374,33 @@ hk_meta_resolve_rendering(struct hk_cmd_buffer *cmd, hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS); } +static void +hk_cmd_copy(struct hk_cmd_buffer *cmd, uint64_t dst, uint64_t src, size_t size) +{ + if (size / 16) { + libagx_copy_uint4(cmd, agx_1d(size / 16), AGX_BARRIER_ALL, dst, src); + } + + if (size % 16) { + libagx_copy_uchar(cmd, agx_1d(size % 16), AGX_BARRIER_ALL, + dst + (size & ~15), src + (size & ~15)); + } +} + VKAPI_ATTR void VKAPI_CALL -hk_CmdCopyBuffer2(VkCommandBuffer commandBuffer, - const VkCopyBufferInfo2 *pCopyBufferInfo) +hk_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *info) { VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer); struct hk_device *dev = hk_cmd_buffer_device(cmd); - struct hk_meta_save save; - hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE); - vk_meta_copy_buffer(&cmd->vk, &dev->meta, pCopyBufferInfo); - hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE); + for (unsigned i = 0; i < info->regionCount; i++) { + const VkBufferCopy2 *region = &info->pRegions[i]; + uint64_t src = vk_meta_buffer_address(&dev->vk, info->srcBuffer, + region->srcOffset, region->size); + uint64_t dst = vk_meta_buffer_address(&dev->vk, info->dstBuffer, + region->dstOffset, region->size); + hk_cmd_copy(cmd, dst, src, region->size); + } } static bool @@ -1473,13 +1491,14 @@ hk_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dstRange, uint32_t data) { VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(hk_buffer, buffer, dstBuffer); struct hk_device *dev = hk_cmd_buffer_device(cmd); - struct hk_meta_save save; - hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE); - vk_meta_fill_buffer(&cmd->vk, &dev->meta, dstBuffer, dstOffset, dstRange, - data); - hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE); + size_t range = vk_buffer_range(&buffer->vk, dstOffset, dstRange); + uint64_t addr = + vk_meta_buffer_address(&dev->vk, dstBuffer, dstOffset, dstRange); + + libagx_fill(cmd, agx_1d(range / 4), AGX_BARRIER_ALL, addr, data); } VKAPI_ATTR void VKAPI_CALL @@ -1488,13 +1507,13 @@ hk_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, const void *pData) { VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(hk_buffer, buffer, dstBuffer); struct hk_device *dev = hk_cmd_buffer_device(cmd); + size_t range = vk_buffer_range(&buffer->vk, dstOffset, dstRange); - struct hk_meta_save save; - hk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE); - vk_meta_update_buffer(&cmd->vk, &dev->meta, dstBuffer, dstOffset, dstRange, - pData); - hk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE); + hk_cmd_copy(cmd, + vk_meta_buffer_address(&dev->vk, dstBuffer, dstOffset, dstRange), + hk_pool_upload(cmd, pData, range, 4), range); } VKAPI_ATTR void VKAPI_CALL