diff --git a/docs/features.txt b/docs/features.txt index c6806a3ce72..6753222c43d 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -611,7 +611,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_buffer_device_address DONE (anv, dzn/sm6.6+, hasvk, hk, kk, nvk, panvk, radv, vn) VK_EXT_calibrated_timestamps DONE (anv, hasvk, hk, kk, nvk, panvk/v10+, lvp, radv, vn, tu/a750+) VK_EXT_color_write_enable DONE (anv, hasvk, hk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn) - VK_EXT_conditional_rendering DONE (anv, hasvk, lvp, nvk, panvk/v10+, radv, tu, vn) + VK_EXT_conditional_rendering DONE (anv, hasvk, kk, lvp, nvk, panvk/v10+, radv, tu, vn) VK_EXT_conservative_rasterization DONE (anv, nvk, panvk/v11+, radv, vn, tu/a7xx+) VK_EXT_custom_border_color DONE (anv, hasvk, hk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn) VK_EXT_custom_resolve DONE (radv, tu) diff --git a/src/kosmickrisp/vulkan/kk_cmd_buffer.c b/src/kosmickrisp/vulkan/kk_cmd_buffer.c index b92d252025e..582ad8a27c7 100644 --- a/src/kosmickrisp/vulkan/kk_cmd_buffer.c +++ b/src/kosmickrisp/vulkan/kk_cmd_buffer.c @@ -550,3 +550,25 @@ kk_CmdPushDescriptorSetWithTemplate2KHR( push_set, set_layout, template, pPushDescriptorSetWithTemplateInfo->pData); } + +VKAPI_ATTR void VKAPI_CALL +kk_CmdBeginConditionalRendering2EXT( + VkCommandBuffer commandBuffer, + const VkConditionalRenderingBeginInfo2EXT *pConditionalRenderingBegin) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + + cmd->state.cond_render.address = + pConditionalRenderingBegin->addressRange.address; + cmd->state.cond_render.inverted = pConditionalRenderingBegin->flags & + VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT; + cmd->state.cond_render.enabled = true; +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + + cmd->state.cond_render.enabled = false; +} diff --git a/src/kosmickrisp/vulkan/kk_cmd_buffer.h b/src/kosmickrisp/vulkan/kk_cmd_buffer.h index 48981208fd0..ea670b66fc8 100644 --- a/src/kosmickrisp/vulkan/kk_cmd_buffer.h +++ b/src/kosmickrisp/vulkan/kk_cmd_buffer.h @@ -38,7 +38,6 @@ struct kk_root_descriptor_table { uint32_t attrib_clamps[KK_MAX_ATTRIBS]; float blend_constant[4]; float clip_z_coeff; - uint32_t draw_id; } draw; struct { uint32_t base_group[3]; @@ -160,6 +159,12 @@ struct kk_compute_state { struct kk_descriptor_state descriptors; }; +struct kk_conditional_rendering_state { + uint64_t address; + bool inverted; + bool enabled; +}; + struct kk_encoder; struct kk_uploader { @@ -180,6 +185,7 @@ struct kk_cmd_buffer { struct { struct kk_graphics_state gfx; struct kk_compute_state cs; + struct kk_conditional_rendering_state cond_render; struct kk_shader *shaders[MESA_SHADER_STAGES]; /* Only tracks graphics shaders since compute is always bound for now. */ uint32_t dirty_shaders; diff --git a/src/kosmickrisp/vulkan/kk_cmd_dispatch.c b/src/kosmickrisp/vulkan/kk_cmd_dispatch.c index 871f1563e25..95a935815c9 100644 --- a/src/kosmickrisp/vulkan/kk_cmd_dispatch.c +++ b/src/kosmickrisp/vulkan/kk_cmd_dispatch.c @@ -18,18 +18,6 @@ #include "vk_common_entrypoints.h" -VKAPI_ATTR void VKAPI_CALL -kk_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t groupCountX, - uint32_t groupCountY, uint32_t groupCountZ) -{ - /* Metal validation dislikes empty disptaches */ - if (groupCountX * groupCountY * groupCountZ == 0) - return; - - kk_CmdDispatchBase(commandBuffer, 0, 0, 0, groupCountX, groupCountY, - groupCountZ); -} - static void kk_flush_compute_state(struct kk_cmd_buffer *cmd) { @@ -51,12 +39,36 @@ kk_flush_compute_state(struct kk_cmd_buffer *cmd) enc, cmd->state.shaders[MESA_SHADER_COMPUTE]->pipeline.cs); } +static void +kk_predicate_compute(struct kk_cmd_buffer *cmd, uint64_t indirect_addr_out, + uint64_t indirect_addr_in) +{ + uint64_t cond_addr = cmd->state.cond_render.address; + + /* TODO_KOSMICKRISP: This can be accomplished more efficiently using device + * generated commands, constructing an indirect command buffer on the GPU + * which only contains the commands to run if the condition is true. For the + * time being, we apply predicates by zeroing out disabled indirect data */ + struct mtl_size grid = {1u, 1u, 1u}; + if (cmd->state.cond_render.inverted) { + libkk_predicate_indirect_eq_zero(cmd, grid, false, indirect_addr_out, + indirect_addr_in, cond_addr, 3u, 3u); + } else { + libkk_predicate_indirect_neq_zero(cmd, grid, false, indirect_addr_out, + indirect_addr_in, cond_addr, 3u, 3u); + } +} + VKAPI_ATTR void VKAPI_CALL kk_CmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) { + /* Metal validation dislikes empty disptaches */ + if (groupCountX * groupCountY * groupCountZ == 0) + return; + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); struct kk_descriptor_state *desc = &cmd->state.cs.descriptors; @@ -67,17 +79,39 @@ kk_CmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t baseGroupX, desc->root.cs.base_group[1] = baseGroupY; desc->root.cs.base_group[2] = baseGroupZ; - kk_flush_compute_state(cmd); - struct kk_shader *cs = cmd->state.shaders[MESA_SHADER_COMPUTE]; struct mtl_size local_size = cs->info.cs.local_size; - struct mtl_size grid_size = { - .x = groupCountX * local_size.x, - .y = groupCountY * local_size.y, - .z = groupCountZ * local_size.z, - }; + mtl_compute_encoder *enc = kk_compute_encoder(cmd); - mtl_dispatch_threads(enc, grid_size, local_size); + if (cmd->state.cond_render.enabled) { + /* Convert to indirect for predication */ + VkDispatchIndirectCommand indirect = { + .x = groupCountX, + .y = groupCountY, + .z = groupCountZ, + }; + struct kk_ptr patched = + kk_pool_upload(cmd, &indirect, sizeof(indirect), 4u); + if (unlikely(patched.gpu == 0)) { + return; + } + + kk_predicate_compute(cmd, patched.gpu, patched.gpu); + + /* Flush compute state after predication dispatch */ + kk_flush_compute_state(cmd); + mtl_dispatch_threadgroups_with_indirect_buffer( + enc, patched.buffer, patched.offset, local_size); + } else { + struct mtl_size grid_size = { + .x = groupCountX * local_size.x, + .y = groupCountY * local_size.y, + .z = groupCountZ * local_size.z, + }; + + kk_flush_compute_state(cmd); + mtl_dispatch_threads(enc, grid_size, local_size); + } } VKAPI_ATTR void VKAPI_CALL @@ -95,10 +129,27 @@ kk_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, desc->root.cs.base_group[1] = 0; desc->root.cs.base_group[2] = 0; - kk_flush_compute_state(cmd); - struct kk_shader *cs = cmd->state.shaders[MESA_SHADER_COMPUTE]; + struct mtl_size local_size = cs->info.cs.local_size; + mtl_compute_encoder *enc = kk_compute_encoder(cmd); - mtl_dispatch_threadgroups_with_indirect_buffer( - enc, buffer->mtl_handle, offset, cs->info.cs.local_size); + if (cmd->state.cond_render.enabled) { + struct kk_ptr patched = + kk_pool_alloc(cmd, sizeof(VkDispatchIndirectCommand), 4u); + if (unlikely(patched.gpu == 0)) { + return; + } + + kk_predicate_compute(cmd, patched.gpu, + vk_buffer_address(&buffer->vk, offset)); + + /* Flush compute state after predication dispatch */ + kk_flush_compute_state(cmd); + mtl_dispatch_threadgroups_with_indirect_buffer( + enc, patched.buffer, patched.offset, local_size); + } else { + kk_flush_compute_state(cmd); + mtl_dispatch_threadgroups_with_indirect_buffer(enc, buffer->mtl_handle, + offset, local_size); + } } diff --git a/src/kosmickrisp/vulkan/kk_cmd_draw.c b/src/kosmickrisp/vulkan/kk_cmd_draw.c index 2c27c7bc088..71ce14b5c89 100644 --- a/src/kosmickrisp/vulkan/kk_cmd_draw.c +++ b/src/kosmickrisp/vulkan/kk_cmd_draw.c @@ -970,6 +970,17 @@ kk_convert_to_indirect_draw(struct kk_cmd_buffer *cmd, struct kk_draw_data data) return data; } +static struct kk_draw_predicate +kk_cond_render_predicate(struct kk_cmd_buffer *cmd) +{ + struct kk_draw_predicate predicate = { + .gpu_addr = cmd->state.cond_render.address, + .op = cmd->state.cond_render.inverted ? KK_PREDICATE_EQ_ZERO + : KK_PREDICATE_NEQ_ZERO, + }; + return predicate; +} + static struct kk_draw_data kk_predicate_draws(struct kk_cmd_buffer *cmd, struct kk_draw_data data) { @@ -993,6 +1004,10 @@ kk_predicate_draws(struct kk_cmd_buffer *cmd, struct kk_draw_data data) uint32_t out_stride_el = out_stride / sizeof(uint32_t); uint32_t in_stride_el = data.indirect_draws.stride / sizeof(uint32_t); + /* TODO_KOSMICKRISP: This can be accomplished more efficiently using device + * generated commands, constructing an indirect command buffer on the GPU + * which only contains the commands to run if the condition is true. For the + * time being, we apply predicates by zeroing out disabled indirect data */ struct mtl_size grid = {data.draw_count, 1u, 1u}; for (uint32_t i = 0; i < data.predicate_count; i++) { uint64_t addr = data.predicates[i].gpu_addr; @@ -1309,9 +1324,12 @@ kk_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, .firstVertex = firstVertex, .firstInstance = firstInstance, }; + struct kk_draw_predicate cr_predicate = kk_cond_render_predicate(cmd); struct kk_draw_data data = { .draws = &draw, + .predicates = &cr_predicate, .draw_count = 1, + .predicate_count = cmd->state.cond_render.enabled ? 1u : 0u, .prim = vk_topology_to_mesa(dyn->ia.primitive_topology), .shader_data.upload_mask = build_per_draw_upload_mask(cmd), }; @@ -1357,9 +1375,12 @@ kk_CmdDrawMultiEXT(VkCommandBuffer commandBuffer, uint32_t drawCount, } if (draw_idx > 0) { + struct kk_draw_predicate cr_predicate = kk_cond_render_predicate(cmd); struct kk_draw_data data = { .draws = draws, + .predicates = &cr_predicate, .draw_count = draw_idx, + .predicate_count = cmd->state.cond_render.enabled ? 1u : 0u, .prim = vk_topology_to_mesa(dyn->ia.primitive_topology), .shader_data.upload_mask = build_per_draw_upload_mask(cmd), }; @@ -1391,9 +1412,12 @@ kk_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, .vertexOffset = vertexOffset, .firstInstance = firstInstance, }; + struct kk_draw_predicate cr_predicate = kk_cond_render_predicate(cmd); struct kk_draw_data data = { .indexed_draws = &draw, + .predicates = &cr_predicate, .draw_count = 1, + .predicate_count = cmd->state.cond_render.enabled ? 1u : 0u, .index_buffer = cmd->state.gfx.index.handle, .index_buffer_offset = cmd->state.gfx.index.offset, .index_buffer_range_B = cmd->state.gfx.index.range, @@ -1447,9 +1471,12 @@ kk_CmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer, uint32_t drawCount, } if (draw_idx > 0) { + struct kk_draw_predicate cr_predicate = kk_cond_render_predicate(cmd); struct kk_draw_data data = { .indexed_draws = draws, + .predicates = &cr_predicate, .draw_count = draw_idx, + .predicate_count = cmd->state.cond_render.enabled ? 1u : 0u, .index_buffer = cmd->state.gfx.index.handle, .index_buffer_offset = cmd->state.gfx.index.offset, .index_buffer_range_B = cmd->state.gfx.index.range, @@ -1479,11 +1506,14 @@ kk_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, const struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state; + struct kk_draw_predicate cr_predicate = kk_cond_render_predicate(cmd); struct kk_draw_data data = { .indirect_draws.buffer = buffer->mtl_handle, .indirect_draws.offset = offset, .indirect_draws.stride = stride, + .predicates = &cr_predicate, .draw_count = drawCount, + .predicate_count = cmd->state.cond_render.enabled ? 1u : 0u, .indirect = true, .prim = vk_topology_to_mesa(dyn->ia.primitive_topology), .shader_data.upload_mask = build_per_draw_upload_mask(cmd), @@ -1506,11 +1536,14 @@ kk_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, const struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state; + struct kk_draw_predicate cr_predicate = kk_cond_render_predicate(cmd); struct kk_draw_data data = { .indirect_draws.buffer = buffer->mtl_handle, .indirect_draws.offset = offset, .indirect_draws.stride = stride, + .predicates = &cr_predicate, .draw_count = drawCount, + .predicate_count = cmd->state.cond_render.enabled ? 1u : 0u, .index_buffer = cmd->state.gfx.index.handle, .index_buffer_offset = cmd->state.gfx.index.offset, .index_buffer_range_B = cmd->state.gfx.index.range, @@ -1541,17 +1574,22 @@ kk_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, const struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state; + struct kk_draw_predicate cr_predicate = kk_cond_render_predicate(cmd); struct kk_draw_predicate count_predicate = { .gpu_addr = vk_buffer_address(&count_buffer->vk, countBufferOffset), .op = KK_PREDICATE_GT_DRAW_ID, }; + struct kk_draw_predicate predicates[2] = { + count_predicate, + cr_predicate, + }; struct kk_draw_data data = { .indirect_draws.buffer = buffer->mtl_handle, .indirect_draws.offset = offset, .indirect_draws.stride = stride, - .predicates = &count_predicate, + .predicates = predicates, .draw_count = maxDrawCount, - .predicate_count = 1, + .predicate_count = cmd->state.cond_render.enabled ? 2u : 1u, .indirect = true, .prim = vk_topology_to_mesa(dyn->ia.primitive_topology), .shader_data.upload_mask = build_per_draw_upload_mask(cmd), @@ -1576,17 +1614,22 @@ kk_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, const struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state; + struct kk_draw_predicate cr_predicate = kk_cond_render_predicate(cmd); struct kk_draw_predicate count_predicate = { .gpu_addr = vk_buffer_address(&count_buffer->vk, countBufferOffset), .op = KK_PREDICATE_GT_DRAW_ID, }; + struct kk_draw_predicate predicates[2] = { + count_predicate, + cr_predicate, + }; struct kk_draw_data data = { .indirect_draws.buffer = buffer->mtl_handle, .indirect_draws.offset = offset, .indirect_draws.stride = stride, - .predicates = &count_predicate, + .predicates = predicates, .draw_count = maxDrawCount, - .predicate_count = 1, + .predicate_count = cmd->state.cond_render.enabled ? 2u : 1u, .index_buffer = cmd->state.gfx.index.handle, .index_buffer_offset = cmd->state.gfx.index.offset, .index_buffer_range_B = cmd->state.gfx.index.range, diff --git a/src/kosmickrisp/vulkan/kk_cmd_meta.c b/src/kosmickrisp/vulkan/kk_cmd_meta.c index 5fa253b707a..2066d0ae022 100644 --- a/src/kosmickrisp/vulkan/kk_cmd_meta.c +++ b/src/kosmickrisp/vulkan/kk_cmd_meta.c @@ -80,6 +80,7 @@ struct kk_meta_save { } gfx; }; } pipeline; + struct kk_conditional_rendering_state cond_render; struct kk_descriptor_set *desc0; struct kk_push_descriptor_set *push_desc0; mtl_buffer *vb0_handle; @@ -119,6 +120,9 @@ kk_meta_begin(struct kk_cmd_buffer *cmd, struct kk_meta_save *save, cmd->state.shaders[MESA_SHADER_COMPUTE]; } + save->cond_render = cmd->state.cond_render; + cmd->state.cond_render.enabled = false; + save->vb0_handle = cmd->state.gfx.vb.handles[0]; save->vb0 = cmd->state.gfx.vb.addr_range[0]; @@ -183,6 +187,8 @@ kk_meta_end(struct kk_cmd_buffer *cmd, struct kk_meta_save *save, kk_cmd_bind_compute_shader(cmd, save->shaders[MESA_SHADER_COMPUTE]); } + cmd->state.cond_render = save->cond_render; + memcpy(desc->root.push, save->push, sizeof(save->push)); } @@ -278,8 +284,12 @@ kk_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, uint32_t layer_ids[KK_MAX_MULTIVIEW_VIEW_COUNT] = {}; mtl_set_vertex_amplification_count(encoder->main.encoder, layer_ids, 1u); + /* Preserve conditional rendering state for clearing attachments */ + struct kk_conditional_rendering_state cond_render = cmd->state.cond_render; + struct kk_meta_save save; kk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS); + cmd->state.cond_render = cond_render; vk_meta_clear_attachments(&cmd->vk, &dev->meta, &render_info, attachmentCount, pAttachments, rectCount, pRects); kk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS); diff --git a/src/kosmickrisp/vulkan/kk_physical_device.c b/src/kosmickrisp/vulkan/kk_physical_device.c index de6008bf281..aa8b711395f 100644 --- a/src/kosmickrisp/vulkan/kk_physical_device.c +++ b/src/kosmickrisp/vulkan/kk_physical_device.c @@ -157,6 +157,7 @@ kk_get_device_extensions(const struct kk_instance *instance, .EXT_attachment_feedback_loop_layout = true, .EXT_attachment_feedback_loop_dynamic_state = true, .EXT_calibrated_timestamps = true, + .EXT_conditional_rendering = true, .EXT_depth_clip_control = true, .EXT_extended_dynamic_state3 = true, .EXT_external_memory_metal = true, @@ -377,6 +378,10 @@ kk_get_device_features( /* VK_EXT_attachment_feedback_loop_dynamic_state */ .attachmentFeedbackLoopDynamicState = true, + /* VK_EXT_conditional_rendering */ + .conditionalRendering = true, + .inheritedConditionalRendering = true, + /* VK_EXT_depth_clip_control */ .depthClipControl = true,