diff --git a/docs/features.txt b/docs/features.txt index 89fbd74fd8c..1fcc520ac9d 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -469,7 +469,7 @@ Vulkan 1.2 -- all DONE: anv, hk, nvk, panvk/v10+, pvr, tu, vn VK_KHR_buffer_device_address DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn) VK_KHR_create_renderpass2 DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn) VK_KHR_depth_stencil_resolve DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn) - VK_KHR_draw_indirect_count DONE (anv, dzn, hasvk, lvp, nvk, panvk/v10+, radv, tu, vn) + VK_KHR_draw_indirect_count DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk/v10+, radv, tu, vn) VK_KHR_driver_properties DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn) VK_KHR_image_format_list DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn) VK_KHR_imageless_framebuffer DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn) diff --git a/src/kosmickrisp/compiler/nir_to_msl.c b/src/kosmickrisp/compiler/nir_to_msl.c index 03e621a7a85..5ac2c28d999 100644 --- a/src/kosmickrisp/compiler/nir_to_msl.c +++ b/src/kosmickrisp/compiler/nir_to_msl.c @@ -1994,12 +1994,6 @@ msl_preprocess_nir(struct nir_shader *nir) nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, UINT32_MAX); - nir_lower_compute_system_values_options csv_options = { - .has_base_global_invocation_id = 0, - .has_base_workgroup_id = true, - }; - NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options); - msl_nir_lower_subgroups(nir); } diff --git a/src/kosmickrisp/kosmicomp.c b/src/kosmickrisp/kosmicomp.c index 227fe8df7bb..b7293fc1fc9 100644 --- a/src/kosmickrisp/kosmicomp.c +++ b/src/kosmickrisp/kosmicomp.c @@ -70,6 +70,12 @@ optimize(nir_shader *nir) { msl_preprocess_nir(nir); + nir_lower_compute_system_values_options csv_options = { + .has_base_global_invocation_id = 0, + .has_base_workgroup_id = true, + }; + NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options); + NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const, nir_address_format_32bit_offset); NIR_PASS(_, nir, nir_lower_explicit_io, diff --git a/src/kosmickrisp/libkk/kk_draws.cl b/src/kosmickrisp/libkk/kk_draws.cl new file mode 100644 index 00000000000..2a97774415c --- /dev/null +++ b/src/kosmickrisp/libkk/kk_draws.cl @@ -0,0 +1,28 @@ +/* + * Copyright 2026 LunarG, Inc. + * Copyright 2026 Google LLC + * Copyright 2024 Valve Corporation + * SPDX-License-Identifier: MIT + */ +#include "compiler/libcl/libcl_vk.h" + +/* + * To implement drawIndirectCount generically, we dispatch a kernel to + * clone-and-patch the indirect buffer, predicating out draws as appropriate. + */ +KERNEL(32) +libkk_predicate_indirect(global uint32_t *out, constant uint32_t *in, + constant uint32_t *draw_count, uint32_t stride_el, + uint indexed__2) +{ + uint draw = cl_global_id.x; + uint words = indexed__2 ? 5 : 4; + bool enabled = draw < *draw_count; + out += draw * words; + in += draw * stride_el; + + /* Copy enabled draws, zero predicated draws. */ + for (uint i = 0; i < words; ++i) { + out[i] = enabled ? in[i] : 0; + } +} diff --git a/src/kosmickrisp/libkk/meson.build b/src/kosmickrisp/libkk/meson.build index 7bbd55b2dc7..16b7f9e6113 100644 --- a/src/kosmickrisp/libkk/meson.build +++ b/src/kosmickrisp/libkk/meson.build @@ -3,6 +3,7 @@ # SPDX-License-Identifier: MIT libkk_shader_files = files( + 'kk_draws.cl', 'kk_triangle_fan.cl', 'kk_query.cl', ) diff --git a/src/kosmickrisp/vulkan/kk_cmd_draw.c b/src/kosmickrisp/vulkan/kk_cmd_draw.c index 0920a4703aa..a1683de63a6 100644 --- a/src/kosmickrisp/vulkan/kk_cmd_draw.c +++ b/src/kosmickrisp/vulkan/kk_cmd_draw.c @@ -1093,15 +1093,6 @@ kk_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, cmd->state.gfx.descriptors.root.draw.draw_id = 0; } -VKAPI_ATTR void VKAPI_CALL -kk_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, - VkDeviceSize offset, VkBuffer countBuffer, - VkDeviceSize countBufferOffset, uint32_t maxDrawCount, - uint32_t stride) -{ - /* TODO_KOSMICKRISP */ -} - VKAPI_ATTR void VKAPI_CALL kk_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, uint32_t drawCount, @@ -1142,11 +1133,106 @@ kk_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, cmd->state.gfx.descriptors.root.draw.draw_id = 0; } +VKAPI_ATTR void VKAPI_CALL +kk_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, + VkDeviceSize offset, VkBuffer countBuffer, + VkDeviceSize countBufferOffset, uint32_t maxDrawCount, + uint32_t stride) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(kk_buffer, buffer, _buffer); + VK_FROM_HANDLE(kk_buffer, count_buffer, countBuffer); + + assert((stride % 4) == 0 && "aligned"); + + const struct vk_dynamic_graphics_state *dyn = + &cmd->vk.dynamic_graphics_state; + + size_t out_stride = sizeof(uint32_t) * 4; + struct kk_bo *patched = + kk_cmd_allocate_buffer(cmd, out_stride * maxDrawCount, 4); + uint64_t in = vk_buffer_address(&buffer->vk, offset); + uint64_t count_addr = + vk_buffer_address(&count_buffer->vk, countBufferOffset); + + struct mtl_size grid = {maxDrawCount, 1u, 1u}; + libkk_predicate_indirect(cmd, grid, true, patched->gpu, in, count_addr, + stride / 4, false); + + for (unsigned i = 0; i < maxDrawCount; ++i) { + /* TODO_KOSMICKRISP + * Move this to a separate buffer from the root so we don't have to upload + * it every single loop. Pass it to the kk_draw call as a parameter that + * will later be uploaded. + */ + cmd->state.gfx.descriptors.root_dirty = true; + cmd->state.gfx.descriptors.root.draw.draw_id = i; + + struct kk_draw_data data = { + .indirect_buffer = patched->map, + .indirect_buffer_offset = out_stride * i, + .prim = vk_topology_to_mesa(dyn->ia.primitive_topology), + .indirect = true, + }; + + kk_draw(cmd, data); + } + /* TODO_KOSMICKRISP Remove once above is done */ + cmd->state.gfx.descriptors.root_dirty = true; + cmd->state.gfx.descriptors.root.draw.draw_id = 0; +} + VKAPI_ATTR void VKAPI_CALL kk_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) { - /* TODO_KOSMICKRISP */ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(kk_buffer, buffer, _buffer); + VK_FROM_HANDLE(kk_buffer, count_buffer, countBuffer); + + assert((stride % 4) == 0 && "aligned"); + + const struct vk_dynamic_graphics_state *dyn = + &cmd->vk.dynamic_graphics_state; + + size_t out_stride = sizeof(uint32_t) * 5; + struct kk_bo *patched = + kk_cmd_allocate_buffer(cmd, out_stride * maxDrawCount, 4); + uint64_t in = vk_buffer_address(&buffer->vk, offset); + uint64_t count_addr = + vk_buffer_address(&count_buffer->vk, countBufferOffset); + + struct mtl_size grid = {maxDrawCount, 1u, 1u}; + libkk_predicate_indirect(cmd, grid, true, patched->gpu, in, count_addr, + stride / 4, true); + + for (unsigned i = 0; i < maxDrawCount; ++i) { + /* TODO_KOSMICKRISP + * Move this to a separate buffer from the root so we don't have to upload + * it every single loop. Pass it to the kk_draw call as a parameter that + * will later be uploaded. + */ + cmd->state.gfx.descriptors.root_dirty = true; + cmd->state.gfx.descriptors.root.draw.draw_id = i; + + struct kk_draw_data data = { + .indirect_buffer = patched->map, + .index_buffer = cmd->state.gfx.index.handle, + .indirect_buffer_offset = out_stride * i, + .index_buffer_offset = cmd->state.gfx.index.offset, + .index_buffer_range_B = + cmd->state.gfx.index.size - cmd->state.gfx.index.offset, + .prim = vk_topology_to_mesa(dyn->ia.primitive_topology), + .index_size = cmd->state.gfx.index.bytes_per_index, + .indirect = true, + .indexed = true, + }; + + kk_draw(cmd, data); + } + /* TODO_KOSMICKRISP Remove once above is done */ + cmd->state.gfx.descriptors.root_dirty = true; + cmd->state.gfx.descriptors.root.draw.draw_id = 0; } diff --git a/src/kosmickrisp/vulkan/kk_physical_device.c b/src/kosmickrisp/vulkan/kk_physical_device.c index 0aea569427e..c47b5b706a8 100644 --- a/src/kosmickrisp/vulkan/kk_physical_device.c +++ b/src/kosmickrisp/vulkan/kk_physical_device.c @@ -66,7 +66,7 @@ kk_get_device_extensions(const struct kk_instance *instance, .KHR_buffer_device_address = true, /* Required in Vulkan 1.3 */ .KHR_create_renderpass2 = true, .KHR_depth_stencil_resolve = true, - .KHR_draw_indirect_count = false, + .KHR_draw_indirect_count = true, .KHR_driver_properties = true, .KHR_image_format_list = true, .KHR_imageless_framebuffer = true, @@ -218,6 +218,7 @@ kk_get_device_features( .descriptorBindingUpdateUnusedWhilePending = true, .descriptorBindingVariableDescriptorCount = true, .descriptorIndexing = true, + .drawIndirectCount = true, .hostQueryReset = true, .imagelessFramebuffer = true, .multiDrawIndirect = true, @@ -409,7 +410,7 @@ kk_get_device_properties(const struct kk_physical_device *pdev, .subTexelPrecisionBits = 8, .mipmapPrecisionBits = 8, .maxDrawIndexedIndexValue = UINT32_MAX, - .maxDrawIndirectCount = UINT32_MAX, + .maxDrawIndirectCount = UINT16_MAX, .maxSamplerLodBias = 15, .maxSamplerAnisotropy = 16, .maxViewports = KK_MAX_VIEWPORTS, diff --git a/src/kosmickrisp/vulkan/kk_shader.c b/src/kosmickrisp/vulkan/kk_shader.c index c337ee7f1c7..f34c2cc7d11 100644 --- a/src/kosmickrisp/vulkan/kk_shader.c +++ b/src/kosmickrisp/vulkan/kk_shader.c @@ -80,6 +80,14 @@ kk_preprocess_nir(UNUSED struct vk_physical_device *vk_pdev, nir_shader *nir, nir_shader_get_entrypoint(nir), nir_var_shader_out); msl_preprocess_nir(nir); + + /* Cannot be part of msl_preprocess_nir since clc does not expose + * has_base_workgroup_id */ + nir_lower_compute_system_values_options csv_options = { + .has_base_global_invocation_id = 0, + .has_base_workgroup_id = true, + }; + NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options); } struct kk_vs_key {