From 1d5c2f56d8613ffc0591ac15056461e49c7f281e Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 2 Aug 2024 12:23:27 -0400 Subject: [PATCH] hk: implement drawIndirectCount Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/vulkan/hk_cmd_draw.c | 100 ++++++++++++++++++++++---- src/asahi/vulkan/hk_physical_device.c | 6 +- 2 files changed, 91 insertions(+), 15 deletions(-) diff --git a/src/asahi/vulkan/hk_cmd_draw.c b/src/asahi/vulkan/hk_cmd_draw.c index 16baff50eeb..f03f07a354c 100644 --- a/src/asahi/vulkan/hk_cmd_draw.c +++ b/src/asahi/vulkan/hk_cmd_draw.c @@ -30,6 +30,7 @@ #include "asahi/genxml/agx_pack.h" #include "asahi/lib/libagx_shaders.h" +#include "asahi/lib/shaders/draws.h" #include "asahi/lib/shaders/geometry.h" #include "shaders/query.h" #include "shaders/tessellator.h" @@ -3511,12 +3512,11 @@ hk_CmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer, uint32_t drawCount, } } -VKAPI_ATTR void VKAPI_CALL -hk_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, - VkDeviceSize offset, uint32_t drawCount, uint32_t stride) +static void +hk_draw_indirect_inner(VkCommandBuffer commandBuffer, uint64_t base, + uint32_t drawCount, uint32_t stride) { VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer); - VK_FROM_HANDLE(hk_buffer, buffer, _buffer); /* From the Vulkan 1.3.238 spec: * @@ -3535,18 +3535,26 @@ hk_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, } for (unsigned draw_id = 0; draw_id < drawCount; ++draw_id) { - uint64_t addr = hk_buffer_address(buffer, offset) + stride * draw_id; + uint64_t addr = base + stride * draw_id; hk_draw(cmd, draw_id, hk_draw_indirect(addr)); } } VKAPI_ATTR void VKAPI_CALL -hk_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, - VkDeviceSize offset, uint32_t drawCount, - uint32_t stride) +hk_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, + VkDeviceSize offset, uint32_t drawCount, uint32_t stride) +{ + VK_FROM_HANDLE(hk_buffer, buffer, _buffer); + + hk_draw_indirect_inner(commandBuffer, hk_buffer_address(buffer, offset), + drawCount, stride); +} + +static void +hk_draw_indexed_indirect_inner(VkCommandBuffer commandBuffer, uint64_t buffer, + uint32_t drawCount, uint32_t stride) { VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer); - VK_FROM_HANDLE(hk_buffer, buffer, _buffer); /* From the Vulkan 1.3.238 spec: * @@ -3566,7 +3574,7 @@ hk_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, } for (unsigned draw_id = 0; draw_id < drawCount; ++draw_id) { - uint64_t addr = hk_buffer_address(buffer, offset) + stride * draw_id; + uint64_t addr = buffer + stride * draw_id; hk_draw( cmd, draw_id, @@ -3574,13 +3582,80 @@ hk_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, } } +VKAPI_ATTR void VKAPI_CALL +hk_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, + VkDeviceSize offset, uint32_t drawCount, + uint32_t stride) +{ + VK_FROM_HANDLE(hk_buffer, buffer, _buffer); + + hk_draw_indexed_indirect_inner( + commandBuffer, hk_buffer_address(buffer, offset), drawCount, stride); +} + +/* + * To implement drawIndirectCount generically, we dispatch a compute kernel to + * patch the indirect buffer and then we dispatch the predicated maxDrawCount + * indirect draws. + */ +static void +hk_draw_indirect_count(VkCommandBuffer commandBuffer, VkBuffer _buffer, + VkDeviceSize offset, VkBuffer countBuffer, + VkDeviceSize countBufferOffset, uint32_t maxDrawCount, + uint32_t stride, bool indexed) +{ + VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(hk_buffer, buffer, _buffer); + VK_FROM_HANDLE(hk_buffer, count_buffer, countBuffer); + + struct hk_device *dev = hk_cmd_buffer_device(cmd); + struct agx_predicate_indirect_key key = {.indexed = indexed}; + struct hk_shader *s = + hk_meta_kernel(dev, agx_nir_predicate_indirect, &key, sizeof(key)); + + perf_debug(dev, "Draw indirect count"); + + struct hk_cs *cs = + hk_cmd_buffer_get_cs_general(cmd, &cmd->current_cs.pre_gfx, true); + if (!cs) + return; + + hk_ensure_cs_has_space(cmd, cs, 0x2000 /* TODO */); + + assert((stride % 4) == 0 && "aligned"); + + size_t out_stride = sizeof(uint32_t) * (indexed ? 5 : 4); + uint64_t patched = hk_pool_alloc(cmd, out_stride * maxDrawCount, 4).gpu; + + struct libagx_predicate_indirect_push push = { + .in = hk_buffer_address(buffer, offset), + .out = patched, + .draw_count = hk_buffer_address(count_buffer, countBufferOffset), + .stride_el = stride / 4, + }; + + uint64_t push_ = hk_pool_upload(cmd, &push, sizeof(push), 8); + uint32_t usc = hk_upload_usc_words_kernel(cmd, s, &push_, sizeof(push_)); + + hk_dispatch_with_usc(dev, cs, s, usc, hk_grid(maxDrawCount, 1, 1), + hk_grid(1, 1, 1)); + + if (indexed) { + hk_draw_indexed_indirect_inner(commandBuffer, patched, maxDrawCount, + out_stride); + } else { + hk_draw_indirect_inner(commandBuffer, patched, maxDrawCount, out_stride); + } +} + VKAPI_ATTR void VKAPI_CALL hk_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) { - unreachable("TODO"); + hk_draw_indirect_count(commandBuffer, _buffer, offset, countBuffer, + countBufferOffset, maxDrawCount, stride, false); } VKAPI_ATTR void VKAPI_CALL @@ -3589,7 +3664,8 @@ hk_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) { - unreachable("TODO"); + hk_draw_indirect_count(commandBuffer, _buffer, offset, countBuffer, + countBufferOffset, maxDrawCount, stride, true); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/asahi/vulkan/hk_physical_device.c b/src/asahi/vulkan/hk_physical_device.c index 304cc7c938d..7b9c52effed 100644 --- a/src/asahi/vulkan/hk_physical_device.c +++ b/src/asahi/vulkan/hk_physical_device.c @@ -60,7 +60,7 @@ hk_get_device_extensions(const struct hk_instance *instance, .KHR_depth_stencil_resolve = true, .KHR_descriptor_update_template = true, .KHR_device_group = true, - .KHR_draw_indirect_count = false, + .KHR_draw_indirect_count = true, .KHR_driver_properties = true, .KHR_dynamic_rendering = true, // TODO @@ -289,7 +289,7 @@ hk_get_device_features( /* Vulkan 1.2 */ .samplerMirrorClampToEdge = true, - .drawIndirectCount = false, + .drawIndirectCount = true, .storageBuffer8BitAccess = true, .uniformAndStorageBuffer8BitAccess = true, .storagePushConstant8 = true, @@ -666,7 +666,7 @@ hk_get_device_properties(const struct agx_device *dev, .subTexelPrecisionBits = 8, .mipmapPrecisionBits = 8, .maxDrawIndexedIndexValue = UINT32_MAX, - .maxDrawIndirectCount = UINT32_MAX, + .maxDrawIndirectCount = UINT16_MAX, .maxSamplerLodBias = 15, .maxSamplerAnisotropy = 16, .maxViewports = HK_MAX_VIEWPORTS,