kk: Implement VK_KHR_draw_indirect_count as HK does

Acked-by: Arcady Goldmints-Orlov <arcady@lunarg.com>
Signed-off-by: Aitor Camacho <aitor@lunarg.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40703>
This commit is contained in:
Aitor Camacho 2026-03-25 18:44:53 +09:00 committed by Marge Bot
parent b6535654b9
commit 2b280305af
8 changed files with 143 additions and 19 deletions

View file

@ -469,7 +469,7 @@ Vulkan 1.2 -- all DONE: anv, hk, nvk, panvk/v10+, pvr, tu, vn
VK_KHR_buffer_device_address DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn)
VK_KHR_create_renderpass2 DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn)
VK_KHR_depth_stencil_resolve DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn)
VK_KHR_draw_indirect_count DONE (anv, dzn, hasvk, lvp, nvk, panvk/v10+, radv, tu, vn)
VK_KHR_draw_indirect_count DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk/v10+, radv, tu, vn)
VK_KHR_driver_properties DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn)
VK_KHR_image_format_list DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn)
VK_KHR_imageless_framebuffer DONE (anv, dzn, hasvk, kk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn)

View file

@ -1994,12 +1994,6 @@ msl_preprocess_nir(struct nir_shader *nir)
nir_var_function_temp | nir_var_shader_in | nir_var_shader_out,
UINT32_MAX);
nir_lower_compute_system_values_options csv_options = {
.has_base_global_invocation_id = 0,
.has_base_workgroup_id = true,
};
NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options);
msl_nir_lower_subgroups(nir);
}

View file

@ -70,6 +70,12 @@ optimize(nir_shader *nir)
{
msl_preprocess_nir(nir);
nir_lower_compute_system_values_options csv_options = {
.has_base_global_invocation_id = 0,
.has_base_workgroup_id = true,
};
NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options);
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,
nir_address_format_32bit_offset);
NIR_PASS(_, nir, nir_lower_explicit_io,

View file

@ -0,0 +1,28 @@
/*
* Copyright 2026 LunarG, Inc.
* Copyright 2026 Google LLC
* Copyright 2024 Valve Corporation
* SPDX-License-Identifier: MIT
*/
#include "compiler/libcl/libcl_vk.h"
/*
* To implement drawIndirectCount generically, we dispatch a kernel to
* clone-and-patch the indirect buffer, predicating out draws as appropriate.
*/
KERNEL(32)
libkk_predicate_indirect(global uint32_t *out, constant uint32_t *in,
constant uint32_t *draw_count, uint32_t stride_el,
uint indexed__2)
{
uint draw = cl_global_id.x;
uint words = indexed__2 ? 5 : 4;
bool enabled = draw < *draw_count;
out += draw * words;
in += draw * stride_el;
/* Copy enabled draws, zero predicated draws. */
for (uint i = 0; i < words; ++i) {
out[i] = enabled ? in[i] : 0;
}
}

View file

@ -3,6 +3,7 @@
# SPDX-License-Identifier: MIT
libkk_shader_files = files(
'kk_draws.cl',
'kk_triangle_fan.cl',
'kk_query.cl',
)

View file

@ -1093,15 +1093,6 @@ kk_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer,
cmd->state.gfx.descriptors.root.draw.draw_id = 0;
}
VKAPI_ATTR void VKAPI_CALL
kk_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer,
VkDeviceSize offset, VkBuffer countBuffer,
VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
uint32_t stride)
{
/* TODO_KOSMICKRISP */
}
VKAPI_ATTR void VKAPI_CALL
kk_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer,
VkDeviceSize offset, uint32_t drawCount,
@ -1142,11 +1133,106 @@ kk_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer,
cmd->state.gfx.descriptors.root.draw.draw_id = 0;
}
VKAPI_ATTR void VKAPI_CALL
kk_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer,
VkDeviceSize offset, VkBuffer countBuffer,
VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
uint32_t stride)
{
VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
VK_FROM_HANDLE(kk_buffer, buffer, _buffer);
VK_FROM_HANDLE(kk_buffer, count_buffer, countBuffer);
assert((stride % 4) == 0 && "aligned");
const struct vk_dynamic_graphics_state *dyn =
&cmd->vk.dynamic_graphics_state;
size_t out_stride = sizeof(uint32_t) * 4;
struct kk_bo *patched =
kk_cmd_allocate_buffer(cmd, out_stride * maxDrawCount, 4);
uint64_t in = vk_buffer_address(&buffer->vk, offset);
uint64_t count_addr =
vk_buffer_address(&count_buffer->vk, countBufferOffset);
struct mtl_size grid = {maxDrawCount, 1u, 1u};
libkk_predicate_indirect(cmd, grid, true, patched->gpu, in, count_addr,
stride / 4, false);
for (unsigned i = 0; i < maxDrawCount; ++i) {
/* TODO_KOSMICKRISP
* Move this to a separate buffer from the root so we don't have to upload
* it every single loop. Pass it to the kk_draw call as a parameter that
* will later be uploaded.
*/
cmd->state.gfx.descriptors.root_dirty = true;
cmd->state.gfx.descriptors.root.draw.draw_id = i;
struct kk_draw_data data = {
.indirect_buffer = patched->map,
.indirect_buffer_offset = out_stride * i,
.prim = vk_topology_to_mesa(dyn->ia.primitive_topology),
.indirect = true,
};
kk_draw(cmd, data);
}
/* TODO_KOSMICKRISP Remove once above is done */
cmd->state.gfx.descriptors.root_dirty = true;
cmd->state.gfx.descriptors.root.draw.draw_id = 0;
}
VKAPI_ATTR void VKAPI_CALL
kk_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer,
VkDeviceSize offset, VkBuffer countBuffer,
VkDeviceSize countBufferOffset,
uint32_t maxDrawCount, uint32_t stride)
{
/* TODO_KOSMICKRISP */
VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
VK_FROM_HANDLE(kk_buffer, buffer, _buffer);
VK_FROM_HANDLE(kk_buffer, count_buffer, countBuffer);
assert((stride % 4) == 0 && "aligned");
const struct vk_dynamic_graphics_state *dyn =
&cmd->vk.dynamic_graphics_state;
size_t out_stride = sizeof(uint32_t) * 5;
struct kk_bo *patched =
kk_cmd_allocate_buffer(cmd, out_stride * maxDrawCount, 4);
uint64_t in = vk_buffer_address(&buffer->vk, offset);
uint64_t count_addr =
vk_buffer_address(&count_buffer->vk, countBufferOffset);
struct mtl_size grid = {maxDrawCount, 1u, 1u};
libkk_predicate_indirect(cmd, grid, true, patched->gpu, in, count_addr,
stride / 4, true);
for (unsigned i = 0; i < maxDrawCount; ++i) {
/* TODO_KOSMICKRISP
* Move this to a separate buffer from the root so we don't have to upload
* it every single loop. Pass it to the kk_draw call as a parameter that
* will later be uploaded.
*/
cmd->state.gfx.descriptors.root_dirty = true;
cmd->state.gfx.descriptors.root.draw.draw_id = i;
struct kk_draw_data data = {
.indirect_buffer = patched->map,
.index_buffer = cmd->state.gfx.index.handle,
.indirect_buffer_offset = out_stride * i,
.index_buffer_offset = cmd->state.gfx.index.offset,
.index_buffer_range_B =
cmd->state.gfx.index.size - cmd->state.gfx.index.offset,
.prim = vk_topology_to_mesa(dyn->ia.primitive_topology),
.index_size = cmd->state.gfx.index.bytes_per_index,
.indirect = true,
.indexed = true,
};
kk_draw(cmd, data);
}
/* TODO_KOSMICKRISP Remove once above is done */
cmd->state.gfx.descriptors.root_dirty = true;
cmd->state.gfx.descriptors.root.draw.draw_id = 0;
}

View file

@ -66,7 +66,7 @@ kk_get_device_extensions(const struct kk_instance *instance,
.KHR_buffer_device_address = true, /* Required in Vulkan 1.3 */
.KHR_create_renderpass2 = true,
.KHR_depth_stencil_resolve = true,
.KHR_draw_indirect_count = false,
.KHR_draw_indirect_count = true,
.KHR_driver_properties = true,
.KHR_image_format_list = true,
.KHR_imageless_framebuffer = true,
@ -218,6 +218,7 @@ kk_get_device_features(
.descriptorBindingUpdateUnusedWhilePending = true,
.descriptorBindingVariableDescriptorCount = true,
.descriptorIndexing = true,
.drawIndirectCount = true,
.hostQueryReset = true,
.imagelessFramebuffer = true,
.multiDrawIndirect = true,
@ -409,7 +410,7 @@ kk_get_device_properties(const struct kk_physical_device *pdev,
.subTexelPrecisionBits = 8,
.mipmapPrecisionBits = 8,
.maxDrawIndexedIndexValue = UINT32_MAX,
.maxDrawIndirectCount = UINT32_MAX,
.maxDrawIndirectCount = UINT16_MAX,
.maxSamplerLodBias = 15,
.maxSamplerAnisotropy = 16,
.maxViewports = KK_MAX_VIEWPORTS,

View file

@ -80,6 +80,14 @@ kk_preprocess_nir(UNUSED struct vk_physical_device *vk_pdev, nir_shader *nir,
nir_shader_get_entrypoint(nir), nir_var_shader_out);
msl_preprocess_nir(nir);
/* Cannot be part of msl_preprocess_nir since clc does not expose
* has_base_workgroup_id */
nir_lower_compute_system_values_options csv_options = {
.has_base_global_invocation_id = 0,
.has_base_workgroup_id = true,
};
NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options);
}
struct kk_vs_key {