From 68048759f0ae2e685eced36e83a33d8f69e35925 Mon Sep 17 00:00:00 2001 From: Aitor Camacho Date: Sat, 18 Apr 2026 12:53:15 +0900 Subject: [PATCH] kk: Implement tessellation Same approach as HK for tessellation. It also handles instance_id lowering. instance_id_includes_base_index is not taken into account in multiple other passes that use instance id. These passes expect instance id to actually be instance id. This change adds a pass to work around this. Signed-off-by: Aitor Camacho Part-of: --- src/kosmickrisp/bridge/mtl_encoder.h | 3 + src/kosmickrisp/bridge/mtl_encoder.m | 10 + src/kosmickrisp/bridge/mtl_types.h | 6 + src/kosmickrisp/bridge/stubs/mtl_encoder.c | 6 + src/kosmickrisp/bridge/vk_to_mtl_map.c | 13 + src/kosmickrisp/bridge/vk_to_mtl_map.h | 2 + .../compiler/msl_nir_lower_common.c | 22 + src/kosmickrisp/compiler/nir_to_msl.h | 2 +- src/kosmickrisp/libkk/kk_geometry.cl | 43 ++ src/kosmickrisp/libkk/kk_tessellation.cl | 93 +++ src/kosmickrisp/libkk/kk_tessellator.cl | 31 + src/kosmickrisp/libkk/kk_tessellator.h | 20 + src/kosmickrisp/libkk/meson.build | 3 + src/kosmickrisp/vulkan/kk_cmd_buffer.h | 13 +- src/kosmickrisp/vulkan/kk_cmd_draw.c | 600 ++++++++++++++---- .../vulkan/kk_nir_lower_descriptors.c | 36 +- src/kosmickrisp/vulkan/kk_nir_lower_vbo.c | 26 +- src/kosmickrisp/vulkan/kk_physical_device.c | 1 + src/kosmickrisp/vulkan/kk_shader.c | 10 +- src/kosmickrisp/vulkan/kk_shader.h | 7 +- 20 files changed, 797 insertions(+), 150 deletions(-) create mode 100644 src/kosmickrisp/libkk/kk_geometry.cl create mode 100644 src/kosmickrisp/libkk/kk_tessellation.cl create mode 100644 src/kosmickrisp/libkk/kk_tessellator.cl create mode 100644 src/kosmickrisp/libkk/kk_tessellator.h diff --git a/src/kosmickrisp/bridge/mtl_encoder.h b/src/kosmickrisp/bridge/mtl_encoder.h index 21dc2223d78..beed5d5fbc5 100644 --- a/src/kosmickrisp/bridge/mtl_encoder.h +++ b/src/kosmickrisp/bridge/mtl_encoder.h @@ -61,6 +61,9 @@ void mtl_dispatch_threadgroups_with_indirect_buffer( mtl_compute_encoder *encoder, mtl_buffer *buffer, uint32_t offset, struct mtl_size local_size); +void mtl_memory_barrier_with_scope(mtl_compute_encoder *encoder, + enum mtl_barrier_scope scope); + /* MTLRenderEncoder */ mtl_render_encoder *mtl_new_render_command_encoder_with_descriptor( mtl_command_buffer *command_buffer, mtl_render_pass_descriptor *descriptor); diff --git a/src/kosmickrisp/bridge/mtl_encoder.m b/src/kosmickrisp/bridge/mtl_encoder.m index 1561e7d66c4..dd5037ba84f 100644 --- a/src/kosmickrisp/bridge/mtl_encoder.m +++ b/src/kosmickrisp/bridge/mtl_encoder.m @@ -234,6 +234,16 @@ mtl_dispatch_threadgroups_with_indirect_buffer(mtl_compute_encoder *encoder, } } +void +mtl_memory_barrier_with_scope(mtl_compute_encoder *encoder, + enum mtl_barrier_scope scope) +{ + @autoreleasepool { + id enc = (id)encoder; + [enc memoryBarrierWithScope:(MTLBarrierScope)scope]; + } +} + /* MTLRenderEncoder */ /* Encoder commands */ diff --git a/src/kosmickrisp/bridge/mtl_types.h b/src/kosmickrisp/bridge/mtl_types.h index aa4eedf7760..ee76cfdcc16 100644 --- a/src/kosmickrisp/bridge/mtl_types.h +++ b/src/kosmickrisp/bridge/mtl_types.h @@ -220,6 +220,12 @@ enum mtl_depth_clip_mode { MTL_DEPTH_CLIP_MODE_CLAMP = 1, }; +enum mtl_barrier_scope { + MTL_BARRIER_SCOPE_BUFFERS = 1 << 0, + MTL_BARRIER_SCOPE_TEXTURES = 1 << 1, + MTL_BARRIER_SCOPE_RENDER_TARGETS = 1 << 2, +}; + /** STRUCTURES */ struct mtl_range { size_t offset; diff --git a/src/kosmickrisp/bridge/stubs/mtl_encoder.c b/src/kosmickrisp/bridge/stubs/mtl_encoder.c index cbf6e7ea2d0..931733ebbbf 100644 --- a/src/kosmickrisp/bridge/stubs/mtl_encoder.c +++ b/src/kosmickrisp/bridge/stubs/mtl_encoder.c @@ -59,6 +59,12 @@ mtl_copy_from_texture_to_texture(mtl_blit_encoder *blit_enc_handle, { } +void +mtl_memory_barrier_with_scope(mtl_compute_encoder *encoder, + enum mtl_barrier_scope scope) +{ +} + /* MTLComputeEncoder */ mtl_compute_encoder * mtl_new_compute_command_encoder(mtl_command_buffer *cmd_buffer) diff --git a/src/kosmickrisp/bridge/vk_to_mtl_map.c b/src/kosmickrisp/bridge/vk_to_mtl_map.c index 1299ee76f84..068ea721ef0 100644 --- a/src/kosmickrisp/bridge/vk_to_mtl_map.c +++ b/src/kosmickrisp/bridge/vk_to_mtl_map.c @@ -250,3 +250,16 @@ index_size_in_bytes_to_mtl_index_type(unsigned bytes) UNREACHABLE("Unsupported byte size for index"); } } + +unsigned +mtl_index_type_to_size_B(enum mtl_index_type type) +{ + switch (type) { + case MTL_INDEX_TYPE_UINT16: + return 2u; + case MTL_INDEX_TYPE_UINT32: + return 4u; + default: + UNREACHABLE("Unhandled index type"); + } +} diff --git a/src/kosmickrisp/bridge/vk_to_mtl_map.h b/src/kosmickrisp/bridge/vk_to_mtl_map.h index 151f64bc671..d80e790cf1a 100644 --- a/src/kosmickrisp/bridge/vk_to_mtl_map.h +++ b/src/kosmickrisp/bridge/vk_to_mtl_map.h @@ -76,4 +76,6 @@ enum mtl_cull_mode vk_front_face_to_mtl_cull_mode(enum VkCullModeFlagBits mode); enum mtl_index_type index_size_in_bytes_to_mtl_index_type(unsigned bytes); +unsigned mtl_index_type_to_size_B(enum mtl_index_type type); + #endif /* KK_MTL_TO_VK_MAP_H */ diff --git a/src/kosmickrisp/compiler/msl_nir_lower_common.c b/src/kosmickrisp/compiler/msl_nir_lower_common.c index 2829c338872..a756ac947d6 100644 --- a/src/kosmickrisp/compiler/msl_nir_lower_common.c +++ b/src/kosmickrisp/compiler/msl_nir_lower_common.c @@ -574,3 +574,25 @@ msl_nir_lower_clip_cull_distance(nir_shader *nir, unsigned num_cull_distances) else NIR_PASS(_, nir, msl_nir_lower_clip_cull_distance_vs); } + +static bool +lower_instance_id(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data) +{ + if (intr->intrinsic != nir_intrinsic_load_instance_id) + return false; + + b->cursor = nir_after_instr(&intr->instr); + nir_def *base_instance = nir_load_base_instance(b); + nir_def *instance_id = nir_isub(b, &intr->def, base_instance); + nir_def_rewrite_uses_after(&intr->def, instance_id); + BITSET_SET(b->shader->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE); + + return true; +} + +bool +msl_nir_lower_instance_id(nir_shader *nir) +{ + return nir_shader_intrinsics_pass(nir, lower_instance_id, + nir_metadata_control_flow, NULL); +} diff --git a/src/kosmickrisp/compiler/nir_to_msl.h b/src/kosmickrisp/compiler/nir_to_msl.h index c2a6d6647a0..2d5f19c0a79 100644 --- a/src/kosmickrisp/compiler/nir_to_msl.h +++ b/src/kosmickrisp/compiler/nir_to_msl.h @@ -79,6 +79,7 @@ bool msl_nir_fake_guard_for_discards(struct nir_shader *nir); bool msl_nir_lower_sample_shading(nir_shader *nir); void msl_nir_lower_clip_cull_distance(nir_shader *nir, unsigned num_cull_distances); +bool msl_nir_lower_instance_id(nir_shader *nir); bool msl_gather_uses_per_draw_data(nir_shader *nir); @@ -94,7 +95,6 @@ static const nir_shader_compiler_options kk_nir_options = { .lower_insert_byte = true, .lower_fmod = true, .discard_is_demote = true, - .instance_id_includes_base_index = true, .lower_device_index_to_zero = true, .lower_pack_64_2x32_split = true, .lower_unpack_64_2x32_split = true, diff --git a/src/kosmickrisp/libkk/kk_geometry.cl b/src/kosmickrisp/libkk/kk_geometry.cl new file mode 100644 index 00000000000..c54e0059e0e --- /dev/null +++ b/src/kosmickrisp/libkk/kk_geometry.cl @@ -0,0 +1,43 @@ +/* + * Copyright 2026 LunarG, Inc. + * Copyright 2026 Google LLC + * Copyright 2023 Alyssa Rosenzweig + * Copyright 2023 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#include "compiler/libcl/libcl_vk.h" +#include "poly/geometry.h" +#include "poly/tessellator.h" + +KERNEL(1) +libkk_prefix_sum_tess(global struct poly_tess_params *p) +{ + if (cl_local_id.x != 0) + return; + + /* The last element of an inclusive prefix sum is the total sum */ + uint total = 0; + + if (p->nr_patches > 0) { + for (uint32_t i = 0u; i < p->nr_patches; ++i) { + total += p->counts[i]; + p->counts[i] = total; + } + } + + /* Allocate 4-byte indices */ + uint32_t elsize_B = sizeof(uint32_t); + uint32_t size_B = total * elsize_B; + uint alloc_B = poly_heap_alloc_offs(p->heap, size_B); + p->index_buffer = (global uint32_t *)(((uintptr_t)p->heap->base) + alloc_B); + + /* ...and now we can generate the API indexed draw */ + global uint32_t *desc = p->out_draws; + + desc[0] = total; /* count */ + desc[1] = 1; /* instance_count */ + desc[2] = alloc_B / elsize_B; /* start */ + desc[3] = 0; /* index_bias */ + desc[4] = 0; /* start_instance */ +} diff --git a/src/kosmickrisp/libkk/kk_tessellation.cl b/src/kosmickrisp/libkk/kk_tessellation.cl new file mode 100644 index 00000000000..bb2d06e1695 --- /dev/null +++ b/src/kosmickrisp/libkk/kk_tessellation.cl @@ -0,0 +1,93 @@ +/* + * Copyright 2026 LunarG, Inc. + * Copyright 2026 Google LLC + * Copyright 2023 Alyssa Rosenzweig + * SPDX-License-Identifier: MIT + */ + +#include "poly/geometry.h" +#include "poly/tessellator.h" + +KERNEL(1) +libkk_tess_setup_indirect( + global struct poly_tess_params *p, + global uint32_t *grids /* output: VS then TCS then tess */, + global struct poly_vertex_params *vp /* output */, global uint32_t *indirect, + uint64_t in_index_buffer, uint32_t in_index_buffer_range_el, + uint32_t in_index_size_B, uint64_t vertex_outputs /* bitfield */, + + /* Tess control invocation counter if active, else zero */ + global uint32_t *tcs_statistic) +{ + uint count = indirect[0], instance_count = indirect[1]; + unsigned in_patches = count / p->input_patch_size; + + /* TCS invocation counter increments once per-patch */ + if (tcs_statistic) { + *tcs_statistic += in_patches; + } + + size_t draw_stride = 5 * sizeof(uint32_t); + unsigned unrolled_patches = in_patches * instance_count; + + uint32_t alloc = 0; + uint32_t tcs_out_offs = alloc; + alloc += unrolled_patches * p->tcs_stride_el * 4; + + uint32_t patch_coord_offs = alloc; + alloc += unrolled_patches * 4; + + uint32_t count_offs = alloc; + alloc += unrolled_patches * sizeof(uint32_t); + + uint vb_offs = alloc; + uint vb_size = poly_tcs_in_size(count * instance_count, vertex_outputs); + alloc += vb_size; + + /* Allocate all patch calculations in one go */ + global uchar *blob = poly_heap_alloc(p->heap, alloc); + + p->tcs_buffer = (global float *)(blob + tcs_out_offs); + p->patches_per_instance = in_patches; + p->coord_allocs = (global uint *)(blob + patch_coord_offs); + p->nr_patches = unrolled_patches; + + vp->output_buffer = (uintptr_t)(blob + vb_offs); + vp->outputs = vertex_outputs; + p->counts = (global uint32_t *)(blob + count_offs); + + if (vp) { + vp->verts_per_instance = count; + } + + /* If indexing is enabled, the third word is the offset into the index buffer + * in elements. Apply that offset now that we have it. For a hardware + * indirect draw, the hardware would do this for us, but for software input + * assembly we need to do it ourselves. + * + * XXX: Deduplicate? + */ + if (in_index_size_B) { + /* TODO_KOSMICKRISP Use poly_index_buffer and implement + * load_ro_sink_address_poly */ + vp->index_buffer = in_index_buffer + (indirect[2] * in_index_size_B); + + vp->index_buffer_range_el = + poly_index_buffer_range_el(in_index_buffer_range_el, indirect[2]); + } + + /* VS grid size */ + grids[0] = count; + grids[1] = instance_count; + grids[2] = 1; + + /* TCS grid size */ + grids[3] = in_patches * p->output_patch_size; + grids[4] = instance_count; + grids[5] = 1; + + /* Tess grid size */ + grids[6] = unrolled_patches; + grids[7] = 1; + grids[8] = 1; +} diff --git a/src/kosmickrisp/libkk/kk_tessellator.cl b/src/kosmickrisp/libkk/kk_tessellator.cl new file mode 100644 index 00000000000..fc6e8dad59a --- /dev/null +++ b/src/kosmickrisp/libkk/kk_tessellator.cl @@ -0,0 +1,31 @@ +/* + * Copyright 2026 LunarG, Inc. + * Copyright 2026 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "poly/cl/tessellator.h" + +KERNEL(1) +libkk_tess_isoline(constant struct poly_tess_params *p, + enum poly_tess_mode tess_mode) +{ + uint patch = cl_global_id.x; + poly_tess_isoline_process(p, patch, tess_mode); +} + +KERNEL(1) +libkk_tess_tri(constant struct poly_tess_params *p, + enum poly_tess_mode tess_mode) +{ + uint patch = cl_global_id.x; + poly_tess_tri_process(p, patch, tess_mode); +} + +KERNEL(1) +libkk_tess_quad(constant struct poly_tess_params *p, + enum poly_tess_mode tess_mode) +{ + uint patch = cl_global_id.x; + poly_tess_quad_process(p, patch, tess_mode); +} diff --git a/src/kosmickrisp/libkk/kk_tessellator.h b/src/kosmickrisp/libkk/kk_tessellator.h new file mode 100644 index 00000000000..5d687f40a3a --- /dev/null +++ b/src/kosmickrisp/libkk/kk_tessellator.h @@ -0,0 +1,20 @@ +/* + * Copyright 2026 LunarG, Inc. + * Copyright 2026 Google LLC + * Copyright 2024 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include "poly/tessellator.h" + +#define libkk_tessellate(context, grid, barrier, prim, mode, state) \ + if (prim == TESS_PRIMITIVE_QUADS) { \ + libkk_tess_quad(context, grid, barrier, state, mode); \ + } else if (prim == TESS_PRIMITIVE_TRIANGLES) { \ + libkk_tess_tri(context, grid, barrier, state, mode); \ + } else { \ + assert(prim == TESS_PRIMITIVE_ISOLINES); \ + libkk_tess_isoline(context, grid, barrier, state, mode); \ + } diff --git a/src/kosmickrisp/libkk/meson.build b/src/kosmickrisp/libkk/meson.build index fe4d3c4d907..04e17525255 100644 --- a/src/kosmickrisp/libkk/meson.build +++ b/src/kosmickrisp/libkk/meson.build @@ -4,7 +4,10 @@ libkk_shader_files = files( 'kk_draws.cl', + 'kk_geometry.cl', 'kk_query.cl', + 'kk_tessellation.cl', + 'kk_tessellator.cl', ) libkk_spv = custom_target( diff --git a/src/kosmickrisp/vulkan/kk_cmd_buffer.h b/src/kosmickrisp/vulkan/kk_cmd_buffer.h index de7f1783ea0..c14acbc87ae 100644 --- a/src/kosmickrisp/vulkan/kk_cmd_buffer.h +++ b/src/kosmickrisp/vulkan/kk_cmd_buffer.h @@ -48,8 +48,9 @@ struct kk_root_descriptor_table { float blend_constant[4]; float clip_z_coeff; - uint32_t base_vertex; uint32_t index_size; + uint64_t base_vertex_addr; + uint64_t base_instance_addr; } draw; struct { uint32_t base_group[3]; @@ -158,6 +159,16 @@ struct kk_graphics_state { mtl_buffer *handles[KK_MAX_VBUFS]; } vb; + /* Tessellation state */ + struct { + /* Grid buffer for when the draw is indirect */ + struct kk_ptr indirect_ptr; + mtl_buffer *out_draws_buffer; + uint64_t out_draws_offset; + struct kk_tess_info info; + enum mesa_prim prim; + } tess; + /* Needed by vk_command_buffer::dynamic_graphics_state */ struct vk_vertex_input_state _dynamic_vi; struct vk_sample_locations_state _dynamic_sl; diff --git a/src/kosmickrisp/vulkan/kk_cmd_draw.c b/src/kosmickrisp/vulkan/kk_cmd_draw.c index 558beb48bf4..50b357d5ddf 100644 --- a/src/kosmickrisp/vulkan/kk_cmd_draw.c +++ b/src/kosmickrisp/vulkan/kk_cmd_draw.c @@ -19,7 +19,10 @@ #include "kosmickrisp/bridge/mtl_bridge.h" #include "kosmickrisp/bridge/vk_to_mtl_map.h" +#include "kosmickrisp/libkk/kk_tessellator.h" + #include "poly/geometry.h" +#include "poly/tessellator.h" #include "vulkan/runtime/vk_render_pass.h" #include "vulkan/util/vk_format.h" @@ -782,6 +785,255 @@ kk_flush_pipeline(struct kk_cmd_buffer *cmd) if (gfx->depth_stencil_state) mtl_set_depth_stencil_state(enc, gfx->depth_stencil_state); } + + /* Merge tess info before GS construction since that depends on + * gfx->tess.prim + */ + if ((IS_SHADER_DIRTY(TESS_CTRL) || IS_SHADER_DIRTY(TESS_EVAL)) && + cmd->state.shaders[MESA_SHADER_TESS_CTRL]) { + struct kk_shader *tesc = cmd->state.shaders[MESA_SHADER_TESS_CTRL]; + struct kk_shader *tese = cmd->state.shaders[MESA_SHADER_TESS_EVAL]; + + gfx->tess.info = + kk_tess_info_merge(tese->info.tess.info, tesc->info.tess.info); + + /* Determine primitive based on the merged state */ + if (gfx->tess.info.points) { + gfx->tess.prim = MESA_PRIM_POINTS; + } else if (gfx->tess.info.mode == TESS_PRIMITIVE_ISOLINES) { + gfx->tess.prim = MESA_PRIM_LINES; + } else { + gfx->tess.prim = MESA_PRIM_TRIANGLES; + } + } +} + +static void +kk_init_heap(const void *data) +{ + struct kk_cmd_buffer *cmd = (struct kk_cmd_buffer *)data; + struct kk_device *dev = kk_cmd_buffer_device(cmd); + + size_t size = 128 * 1024 * 1024; + kk_alloc_bo(dev, &dev->vk.base, size, 0, &dev->heap); + + struct poly_heap *map = (struct poly_heap *)dev->heap->cpu; + + /* TODO_KOSMICKRISP Self-contained until we have rodata at the device. */ + *map = (struct poly_heap){ + .base = dev->heap->gpu + sizeof(struct poly_heap), + .size = size - sizeof(struct poly_heap), + }; +} + +static uint64_t +kk_heap(struct kk_cmd_buffer *cmd) +{ + struct kk_device *dev = kk_cmd_buffer_device(cmd); + + util_call_once_data(&dev->heap_init_once, kk_init_heap, cmd); + + /* We need to free all allocations after each command buffer execution */ + if (!cmd->uses_heap) { + uint64_t addr = dev->heap->gpu; + + /* Zeroing the allocated index frees everything */ + kk_cmd_write(cmd, (struct libkk_imm_write){ + addr + offsetof(struct poly_heap, bottom), 0}); + + cmd->uses_heap = true; + } + + return dev->heap->gpu; +} + +enum kk_predicate_op : uint16_t { + /* value > draw_id */ + KK_PREDICATE_GT_DRAW_ID, + /* value == 0 */ + KK_PREDICATE_EQ_ZERO, + /* value != 0 */ + KK_PREDICATE_NEQ_ZERO, +}; + +struct kk_draw_command { + enum mesa_prim prim; + /* Mask of stages that need per-draw data uploaded */ + uint32_t upload_mask; + mtl_buffer *index_buffer; + uint64_t index_buffer_offset; + uint64_t index_buffer_range_B; + uint64_t index_buffer_size_B; + uint32_t restart_index; + uint8_t index_buffer_el_size_B; + bool indirect; + bool indexed; + bool restart; + uint32_t predicate_count; + enum kk_predicate_op predicate_op[2]; + uint32_t draw_count; + uint32_t pad_; + uint64_t predicate_addr[2]; + + union { + struct { + mtl_buffer *buffer; + uint64_t offset; + uint32_t stride; + } indirect_command; + /* These arrays will be >1 when draw_count is >1 as this struct is + * dynamically allocated. */ + VkDrawIndirectCommand draws[1]; + VkDrawIndexedIndirectCommand indexed_draws[1]; + }; +}; +static_assert(sizeof(struct kk_draw_command) == 104u, "Packed struct"); + +struct kk_draw_data { + /* For non-indirect, 0 is vertex/index count, 1 instance count and 2 first + * instance */ + struct kk_grid grid; + struct { + mtl_buffer *buffer; + uint64_t offset; + uint64_t range; + enum mtl_index_type type; + } index; + uint32_t vertex_offset; + enum mtl_primitive_type primitive_type; +}; + +static uint64_t +kk_upload_vertex_params(struct kk_cmd_buffer *cmd, struct kk_draw_data data) +{ + struct kk_descriptor_state *desc = &cmd->state.gfx.descriptors; + + const uint32_t wg_size[3] = {1, 1, 1}; + + struct poly_vertex_params params; + poly_vertex_params_init(¶ms, 0, wg_size); + + /* XXX: We should deduplicate this logic */ + bool indirect = kk_grid_is_indirect(data.grid); + + if (!indirect) + poly_vertex_params_set_draw(¶ms, data.grid.size.x, data.grid.size.y); + + if (data.index.buffer) { + params.index_buffer = + mtl_buffer_get_gpu_address(data.index.buffer) + data.index.offset; + + params.index_buffer_range_el = + data.index.range / mtl_index_type_to_size_B(data.index.type); + } + + struct kk_shader *vs = cmd->state.shaders[MESA_SHADER_VERTEX]; + params.outputs = vs->info.vs.outputs_written; + + if (!indirect) { + uint32_t verts = data.grid.size.x, instances = data.grid.size.y; + unsigned vb_size = + poly_tcs_in_size(verts * instances, vs->info.vs.outputs_written); + + /* Allocate if there are any outputs, or use the null sink to trap + * reads if there aren't. Those reads are undefined but should not + * fault. Affects: + * + * dEQP-VK.pipeline.monolithic.no_position.explicit_declarations.basic.single_view.v0_g1 + */ + if (vb_size) + params.output_buffer = kk_pool_alloc(cmd, vb_size, 4).gpu; + else + params.output_buffer = 0u; + } + + desc->root.draw.vertex_outputs = params.outputs; + + return kk_pool_upload(cmd, ¶ms, sizeof(params), 8).gpu; +} + +static void +kk_upload_tess_params(struct kk_cmd_buffer *cmd, struct poly_tess_params *out, + struct kk_draw_data draw) +{ + struct kk_device *dev = kk_cmd_buffer_device(cmd); + struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state; + struct kk_graphics_state *gfx = &cmd->state.gfx; + struct kk_shader *tcs = cmd->state.shaders[MESA_SHADER_TESS_CTRL]; + + enum poly_tess_partitioning partitioning = + gfx->tess.info.spacing == TESS_SPACING_EQUAL + ? POLY_TESS_PARTITIONING_INTEGER + : gfx->tess.info.spacing == TESS_SPACING_FRACTIONAL_ODD + ? POLY_TESS_PARTITIONING_FRACTIONAL_ODD + : POLY_TESS_PARTITIONING_FRACTIONAL_EVEN; + + struct poly_tess_params args = { + .heap = kk_heap(cmd), + .tcs_stride_el = tcs->info.tess.tcs_output_stride / 4, + .statistic = 0u, + .input_patch_size = dyn->ts.patch_control_points, + .output_patch_size = tcs->info.tess.tcs_output_patch_size, + .tcs_patch_constants = tcs->info.tess.tcs_nr_patch_outputs, + .tcs_per_vertex_outputs = tcs->info.tess.tcs_per_vertex_outputs, + .partitioning = partitioning, + .points_mode = gfx->tess.info.points, + .isolines = gfx->tess.info.mode == TESS_PRIMITIVE_ISOLINES, + }; + + if (!args.points_mode && gfx->tess.info.mode != TESS_PRIMITIVE_ISOLINES) { + args.ccw = gfx->tess.info.ccw; + args.ccw ^= + dyn->ts.domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT; + } + + uint32_t draw_stride_el = 5; + size_t draw_stride_B = draw_stride_el * sizeof(uint32_t); + + /* heap is allocated by kk_heap */ + /* TODO_KOSMICKRISP Self-contained until we have rodata at the device. */ + args.patch_coord_buffer = dev->heap->gpu + sizeof(struct poly_heap); + + if (!kk_grid_is_indirect(draw.grid)) { + unsigned in_patches = draw.grid.size.x / args.input_patch_size; + unsigned unrolled_patches = in_patches * draw.grid.size.y; + + uint32_t alloc = 0; + uint32_t tcs_out_offs = alloc; + alloc += unrolled_patches * args.tcs_stride_el * sizeof(uint32_t); + + uint32_t patch_coord_offs = alloc; + alloc += unrolled_patches * sizeof(uint32_t); + + uint32_t count_offs = alloc; + alloc += unrolled_patches * sizeof(uint32_t); + + /* Single API draw */ + uint32_t draw_offs = alloc; + alloc += draw_stride_B; + + struct kk_ptr ptr = kk_pool_alloc(cmd, alloc, 4); + gfx->tess.out_draws_buffer = ptr.buffer; + gfx->tess.out_draws_offset = ptr.offset + draw_offs; + uint64_t addr = ptr.gpu; + args.tcs_buffer = addr + tcs_out_offs; + args.patches_per_instance = in_patches; + args.coord_allocs = addr + patch_coord_offs; + args.nr_patches = unrolled_patches; + args.out_draws = addr + draw_offs; + args.counts = addr + count_offs; + } else { + /* Allocate 3x indirect global+local grids for VS/TCS/tess */ + uint32_t grid_stride = sizeof(uint32_t) * 3; + gfx->tess.indirect_ptr = kk_pool_alloc(cmd, grid_stride * 3, 4); + + struct kk_ptr ptr = kk_pool_alloc(cmd, draw_stride_B, 4); + gfx->tess.out_draws_buffer = ptr.buffer; + gfx->tess.out_draws_offset = ptr.offset; + args.out_draws = ptr.gpu; + } + + memcpy(out, &args, sizeof(args)); } static void @@ -835,10 +1087,17 @@ kk_flush_dynamic_state(struct kk_cmd_buffer *cmd) desc->root_dirty = true; } - if (IS_DIRTY(RS_FRONT_FACE)) { - mtl_set_front_face_winding( - enc, vk_front_face_to_mtl_winding( - cmd->vk.dynamic_graphics_state.rs.front_face)); + if (IS_DIRTY(RS_FRONT_FACE) || IS_DIRTY(TS_DOMAIN_ORIGIN) || + IS_SHADER_DIRTY(TESS_CTRL) || IS_SHADER_DIRTY(TESS_EVAL)) { + bool front_face_ccw = dyn->rs.front_face != VK_FRONT_FACE_CLOCKWISE; + if (cmd->state.shaders[MESA_SHADER_TESS_EVAL]) { + front_face_ccw ^= gfx->tess.info.ccw; + front_face_ccw ^= + dyn->ts.domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT; + } + mtl_set_front_face_winding(enc, front_face_ccw + ? MTL_WINDING_COUNTER_CLOCKWISE + : MTL_WINDING_CLOCKWISE); } if (IS_DIRTY(RS_DEPTH_BIAS_FACTORS) || IS_DIRTY(RS_DEPTH_BIAS_ENABLE)) { @@ -928,87 +1187,6 @@ kk_flush_gfx_state(struct kk_cmd_buffer *cmd) #undef IS_SHADER_DIRTY #undef IS_DIRTY -enum kk_predicate_op : uint16_t { - /* value > draw_id */ - KK_PREDICATE_GT_DRAW_ID, - /* value == 0 */ - KK_PREDICATE_EQ_ZERO, - /* value != 0 */ - KK_PREDICATE_NEQ_ZERO, -}; - -struct kk_draw_command { - enum mesa_prim prim; - /* Mask of stages that need per-draw data uploaded */ - uint32_t upload_mask; - mtl_buffer *index_buffer; - uint64_t index_buffer_offset; - uint64_t index_buffer_range_B; - uint64_t index_buffer_size_B; - uint32_t restart_index; - uint8_t index_buffer_el_size_B; - bool indirect; - bool indexed; - bool restart; - uint32_t predicate_count; - enum kk_predicate_op predicate_op[2]; - uint32_t draw_count; - uint32_t pad_; - uint64_t predicate_addr[2]; - - union { - struct { - mtl_buffer *buffer; - uint64_t offset; - uint32_t stride; - } indirect_command; - /* These arrays will be >1 when draw_count is >1 as this struct is - * dynamically allocated. */ - VkDrawIndirectCommand draws[1]; - VkDrawIndexedIndirectCommand indexed_draws[1]; - }; -}; -static_assert(sizeof(struct kk_draw_command) == 104u, "Packed struct"); - -static void -kk_init_heap(const void *data) -{ - struct kk_cmd_buffer *cmd = (struct kk_cmd_buffer *)data; - struct kk_device *dev = kk_cmd_buffer_device(cmd); - - size_t size = 128 * 1024 * 1024; - kk_alloc_bo(dev, &dev->vk.base, size, 0, &dev->heap); - - struct poly_heap *map = (struct poly_heap *)dev->heap->cpu; - - /* TODO_KOSMICKRISP Self-contained until we have rodata at the device. */ - *map = (struct poly_heap){ - .base = dev->heap->gpu + sizeof(struct poly_heap), - .size = size - sizeof(struct poly_heap), - }; -} - -static uint64_t -kk_heap(struct kk_cmd_buffer *cmd) -{ - struct kk_device *dev = kk_cmd_buffer_device(cmd); - - util_call_once_data(&dev->heap_init_once, kk_init_heap, cmd); - - /* We need to free all allocations after each command buffer execution */ - if (!cmd->uses_heap) { - uint64_t addr = dev->heap->gpu; - - /* Zeroing the allocated index frees everything */ - kk_cmd_write(cmd, (struct libkk_imm_write){ - addr + offsetof(struct poly_heap, bottom), 0}); - - cmd->uses_heap = true; - } - - return dev->heap->gpu; -} - /* Returns true if the draw was successfully converted. */ static bool kk_convert_to_indirect_draw(struct kk_cmd_buffer *cmd, @@ -1193,19 +1371,6 @@ build_per_draw_upload_mask(struct kk_cmd_buffer *cmd) return mask; } -struct kk_draw_data { - /* For non-indirect, 0 is vertex/index count, 1 instance count and 2 first - * instance */ - struct kk_grid grid; - struct { - mtl_buffer *buffer; - uint64_t offset; - enum mtl_index_type type; - } index; - uint32_t vertex_offset; - enum mtl_primitive_type primitive_type; -}; - static void kk_dispatch_draw(mtl_render_encoder *enc, struct kk_draw_data data) { @@ -1225,9 +1390,12 @@ kk_dispatch_draw(mtl_render_encoder *enc, struct kk_draw_data data) data.index.offset, data.grid.size.y, data.vertex_offset, data.grid.size.z); } else { - mtl_draw_primitives(enc, data.primitive_type, data.vertex_offset, - data.grid.size.x, data.grid.size.y, - data.grid.size.z); + /* Avoid Metal validation error. Empty draws from tessellation will + * have values set to 0. */ + if (data.grid.size.x != 0 && data.grid.size.y != 0) + mtl_draw_primitives(enc, data.primitive_type, data.vertex_offset, + data.grid.size.x, data.grid.size.y, + data.grid.size.z); } } } @@ -1357,6 +1525,128 @@ kk_upload_per_draw_data(struct kk_cmd_buffer *cmd, uint32_t upload_mask, } } +static void +kk_dispatch_compute(mtl_compute_encoder *enc, struct kk_grid grid, + struct mtl_size local_size) +{ + if (grid.mode == KK_GRID_DIRECT) + mtl_dispatch_threads(enc, grid.size, local_size); + else + mtl_dispatch_threadgroups_with_indirect_buffer(enc, grid.indirect, + grid.offset, local_size); +} + +static struct kk_draw_data +kk_launch_tess(struct kk_cmd_buffer *cmd, struct kk_draw_data draw, + uint32_t draw_id) +{ + struct kk_device *dev = kk_cmd_buffer_device(cmd); + struct kk_graphics_state *gfx = &cmd->state.gfx; + struct kk_grid grid_vs, grid_tcs, grid_tess; + + struct kk_shader *vs = cmd->state.shaders[MESA_SHADER_VERTEX]; + struct kk_shader *tcs = cmd->state.shaders[MESA_SHADER_TESS_CTRL]; + + struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state; + uint32_t input_patch_size = dyn->ts.patch_control_points; + uint64_t state = gfx->descriptors.root.draw.tess_params; + struct kk_tess_info info = gfx->tess.info; + + /* Setup grids */ + if (kk_grid_is_indirect(draw.grid)) { + struct libkk_tess_setup_indirect_args args = { + .p = state, + .grids = gfx->tess.indirect_ptr.gpu, + .indirect = + mtl_buffer_get_gpu_address(draw.grid.indirect) + draw.grid.offset, + .vp = gfx->descriptors.root.draw.vertex_params, + .vertex_outputs = vs->info.vs.outputs_written, + .tcs_statistic = 0, + }; + + if (draw.index.buffer) { + args.in_index_buffer = + mtl_buffer_get_gpu_address(draw.index.buffer) + draw.index.offset; + args.in_index_size_B = mtl_index_type_to_size_B(draw.index.type); + args.in_index_buffer_range_el = + draw.index.range / args.in_index_size_B; + } + + libkk_tess_setup_indirect_struct(cmd, kk_grid_1d(1), true, args); + + uint32_t grid_stride = sizeof(uint32_t) * 3; + grid_vs = + kk_grid_indirect(gfx->tess.indirect_ptr.buffer, + gfx->tess.indirect_ptr.offset + 0u * grid_stride); + grid_tcs = + kk_grid_indirect(gfx->tess.indirect_ptr.buffer, + gfx->tess.indirect_ptr.offset + 1u * grid_stride); + grid_tess = + kk_grid_indirect(gfx->tess.indirect_ptr.buffer, + gfx->tess.indirect_ptr.offset + 2u * grid_stride); + } else { + uint32_t patches = draw.grid.size.x / input_patch_size; + grid_vs = grid_tcs = kk_grid_2d(draw.grid.size.x, draw.grid.size.y); + + grid_tcs.size.x = patches * tcs->info.tess.tcs_output_patch_size; + grid_tess = kk_grid_1d(patches * draw.grid.size.y); + } + + /* First launch the VS and TCS */ + + mtl_compute_encoder *enc = kk_encoder_pre_gfx_encoder(cmd); + { + mtl_compute_pipeline_state *pipeline = vs->pipeline.gfx.pre_render[0]; + struct mtl_size local_size = {64, 1, 1}; + mtl_compute_set_pipeline_state(enc, pipeline); + mtl_compute_set_buffer(enc, gfx->descriptors.root.root_buffer.buffer, + gfx->descriptors.root.root_buffer.offset, 0u); + + struct kk_per_draw_data shader_data = {.draw_id = draw_id}; + + struct kk_ptr shader_data_gpu = + kk_pool_upload(cmd, &shader_data, sizeof(shader_data), 8u); + mtl_compute_set_buffer(enc, shader_data_gpu.buffer, + shader_data_gpu.offset, 2); + kk_dispatch_compute(enc, grid_vs, local_size); + /* TODO_KOSMICKRISP Maybe too big of a barrier? We could definitely just + * barrier the buffers we know we modify. */ + mtl_memory_barrier_with_scope(enc, MTL_BARRIER_SCOPE_BUFFERS); + } + { + mtl_compute_pipeline_state *pipeline = vs->pipeline.gfx.pre_render[1]; + struct mtl_size local_size = {tcs->info.tess.tcs_output_patch_size, 1, 1}; + /* Avoid Metal validation error by trying to launch empty compute. Return + * empty data. We set restart to true to avoid unroll. */ + if (grid_tcs.mode == KK_GRID_DIRECT && grid_tcs.size.x == 0u) + return (struct kk_draw_data){.grid = kk_grid_1d(0u)}; + mtl_compute_set_pipeline_state(enc, pipeline); + kk_dispatch_compute(enc, grid_tcs, local_size); + mtl_memory_barrier_with_scope(enc, MTL_BARRIER_SCOPE_BUFFERS); + } + + /* First generate counts, then prefix sum them, and then tessellate. */ + libkk_tessellate(cmd, grid_tess, true, info.mode, POLY_TESS_MODE_COUNT, + state); + mtl_memory_barrier_with_scope(enc, MTL_BARRIER_SCOPE_BUFFERS); + + libkk_prefix_sum_tess(cmd, kk_grid_1d(1u), true, state); + mtl_memory_barrier_with_scope(enc, MTL_BARRIER_SCOPE_BUFFERS); + + libkk_tessellate(cmd, grid_tess, true, info.mode, POLY_TESS_MODE_WITH_COUNTS, + state); + mtl_memory_barrier_with_scope(enc, MTL_BARRIER_SCOPE_BUFFERS); + + draw.grid = + kk_grid_indirect(gfx->tess.out_draws_buffer, gfx->tess.out_draws_offset); + + draw.index.buffer = dev->heap->map; + draw.index.offset = sizeof(struct poly_heap); + draw.index.type = MTL_INDEX_TYPE_UINT32; + draw.primitive_type = mesa_prim_to_mtl_primitive_type(gfx->tess.prim); + return draw; +} + /* When the current draw contains stages not present in Metal such as * tessellation, this step will launch required emulation when needed and build * the per draw data required to launch the Metal draw. */ @@ -1364,30 +1654,99 @@ static struct kk_draw_data build_draw_data(struct kk_cmd_buffer *cmd, struct kk_draw_command *data, uint32_t draw_id) { + bool tess = cmd->state.shaders[MESA_SHADER_TESS_EVAL]; struct kk_draw_data draw = { .index.buffer = data->index_buffer, .index.offset = data->index_buffer_offset, .index.type = data->indexed ? index_size_in_bytes_to_mtl_index_type( data->index_buffer_el_size_B) : 0u, - .primitive_type = mesa_prim_to_mtl_primitive_type(data->prim), + .index.range = data->index_buffer_range_B, + .primitive_type = tess ? 0u : mesa_prim_to_mtl_primitive_type(data->prim), }; + uint64_t first_vertex_gpu = 0u; + uint64_t base_instance_gpu = 0u; if (data->indirect) { - draw.grid = kk_grid_indirect(data->indirect_command.buffer, - data->indirect_command.offset + - draw_id * data->indirect_command.stride); + uint64_t indirect_offset = data->indirect_command.offset + + draw_id * data->indirect_command.stride; + draw.grid = + kk_grid_indirect(data->indirect_command.buffer, indirect_offset); + + if (tess) { + uint64_t first_vertex_offset = + data->indexed ? offsetof(VkDrawIndexedIndirectCommand, vertexOffset) + : offsetof(VkDrawIndirectCommand, firstVertex); + uint64_t base_instance_offset = + data->indexed + ? offsetof(VkDrawIndexedIndirectCommand, firstInstance) + : offsetof(VkDrawIndirectCommand, firstInstance); + first_vertex_gpu = + mtl_buffer_get_gpu_address(data->indirect_command.buffer) + + indirect_offset + first_vertex_offset; + base_instance_gpu = + mtl_buffer_get_gpu_address(data->indirect_command.buffer) + + indirect_offset + base_instance_offset; + } } else if (data->indexed) { - VkDrawIndexedIndirectCommand cmd = data->indexed_draws[draw_id]; - draw.grid = - kk_grid_3d(cmd.indexCount, cmd.instanceCount, cmd.firstInstance); - draw.vertex_offset = cmd.vertexOffset; - draw.index.offset += cmd.firstIndex * data->index_buffer_el_size_B; + VkDrawIndexedIndirectCommand draw_cmd = data->indexed_draws[draw_id]; + draw.grid = kk_grid_3d(draw_cmd.indexCount, draw_cmd.instanceCount, + draw_cmd.firstInstance); + draw.vertex_offset = draw_cmd.vertexOffset; + draw.index.offset += draw_cmd.firstIndex * data->index_buffer_el_size_B; + + if (tess) { + first_vertex_gpu = kk_pool_upload(cmd, &draw_cmd.vertexOffset, + sizeof(draw_cmd.vertexOffset), 4u) + .gpu; + base_instance_gpu = kk_pool_upload(cmd, &draw_cmd.firstInstance, + sizeof(draw_cmd.firstInstance), 4u) + .gpu; + } } else { - VkDrawIndirectCommand cmd = data->draws[draw_id]; - draw.grid = - kk_grid_3d(cmd.vertexCount, cmd.instanceCount, cmd.firstInstance); - draw.vertex_offset = cmd.firstVertex; + VkDrawIndirectCommand draw_cmd = data->draws[draw_id]; + draw.grid = kk_grid_3d(draw_cmd.vertexCount, draw_cmd.instanceCount, + draw_cmd.firstInstance); + draw.vertex_offset = draw_cmd.firstVertex; + + if (tess) { + first_vertex_gpu = kk_pool_upload(cmd, &draw_cmd.firstVertex, + sizeof(draw_cmd.firstVertex), 4u) + .gpu; + base_instance_gpu = kk_pool_upload(cmd, &draw_cmd.firstInstance, + sizeof(draw_cmd.firstInstance), 4u) + .gpu; + } + } + + /* Emulate tessellation. */ + if (tess) { + struct kk_ptr tess_args = {}; + struct kk_graphics_state *gfx = &cmd->state.gfx; + struct kk_descriptor_state *desc = &gfx->descriptors; + if (cmd->state.shaders[MESA_SHADER_TESS_EVAL]) { + gfx->descriptors.root.draw.index_size = data->index_buffer_el_size_B; + gfx->descriptors.root.draw.base_vertex_addr = first_vertex_gpu; + gfx->descriptors.root.draw.base_instance_addr = base_instance_gpu; + desc->root.draw.vertex_params = kk_upload_vertex_params(cmd, draw); + tess_args = kk_pool_alloc(cmd, sizeof(struct poly_tess_params), 4); + gfx->descriptors.root.draw.tess_params = tess_args.gpu; + gfx->descriptors.root_dirty = true; + } + + if (desc->root_dirty) { + kk_upload_descriptor_root(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS); + struct kk_ptr root_buffer = desc->root.root_buffer; + mtl_set_vertex_buffer(kk_render_encoder(cmd), root_buffer.buffer, + root_buffer.offset, 0); + mtl_set_fragment_buffer(kk_render_encoder(cmd), root_buffer.buffer, + root_buffer.offset, 0); + if (tess_args.gpu) { + kk_upload_tess_params(cmd, tess_args.cpu, draw); + } + } + + draw = kk_launch_tess(cmd, draw, draw_id); } return draw; @@ -1406,11 +1765,14 @@ kk_draw(struct kk_cmd_buffer *cmd, struct kk_draw_command *data) if (data->predicate_count > 0 && !kk_predicate_draws(cmd, data)) return; - /* Unroll geometry. Skip draw if we fail. */ - bool requires_unroll = data->prim == MESA_PRIM_TRIANGLE_FAN || - requires_index_promotion(data) || - requires_unroll_restart(cmd, data) || - requires_index_robustness(cmd, data); + bool tess = cmd->state.shaders[MESA_SHADER_TESS_EVAL]; + + /* Unroll geometry. Skip draw if we fail. No need to unroll if tessellation + * is present since it also handles unrolling. */ + bool requires_unroll = !tess && (data->prim == MESA_PRIM_TRIANGLE_FAN || + requires_index_promotion(data) || + requires_unroll_restart(cmd, data) || + requires_index_robustness(cmd, data)); if (requires_unroll && !kk_unroll_geometry(cmd, data)) return; diff --git a/src/kosmickrisp/vulkan/kk_nir_lower_descriptors.c b/src/kosmickrisp/vulkan/kk_nir_lower_descriptors.c index eb376ded9f0..db348fb3469 100644 --- a/src/kosmickrisp/vulkan/kk_nir_lower_descriptors.c +++ b/src/kosmickrisp/vulkan/kk_nir_lower_descriptors.c @@ -816,9 +816,39 @@ lower_poly(struct nir_builder *b, nir_intrinsic_instr *intrin, void *data) case nir_intrinsic_load_index_size_poly: return lower_sysval_to_root_table(b, intrin, draw.index_size); case nir_intrinsic_load_first_vertex: - if (*(bool *)data) - return lower_sysval_to_root_table(b, intrin, draw.base_vertex); - FALLTHROUGH; + /* Lower only compute shaders */ + if (*(bool *)data) { + uint32_t root_table_offset = + kk_root_descriptor_offset(draw.base_vertex_addr); + b->cursor = nir_instr_remove(&intrin->instr); + assert((root_table_offset & 3) == 0 && "aligned"); + + nir_def *addr = load_root(b, intrin->def.num_components, 64u, + nir_imm_int(b, root_table_offset), 4); + + nir_def *val = nir_load_global(b, 1u, intrin->def.bit_size, addr); + + nir_def_rewrite_uses(&intrin->def, val); + return true; + } + return false; + case nir_intrinsic_load_base_instance: + /* Lower only compute shaders */ + if (*(bool *)data) { + uint32_t root_table_offset = + kk_root_descriptor_offset(draw.base_instance_addr); + b->cursor = nir_instr_remove(&intrin->instr); + assert((root_table_offset & 3) == 0 && "aligned"); + + nir_def *addr = load_root(b, intrin->def.num_components, 64u, + nir_imm_int(b, root_table_offset), 4); + + nir_def *val = nir_load_global(b, 1u, intrin->def.bit_size, addr); + + nir_def_rewrite_uses(&intrin->def, val); + return true; + } + return false; default: return false; } diff --git a/src/kosmickrisp/vulkan/kk_nir_lower_vbo.c b/src/kosmickrisp/vulkan/kk_nir_lower_vbo.c index 5fbffc376cf..6d56e5669ce 100644 --- a/src/kosmickrisp/vulkan/kk_nir_lower_vbo.c +++ b/src/kosmickrisp/vulkan/kk_nir_lower_vbo.c @@ -17,9 +17,6 @@ struct ctx { struct kk_attribute *attribs; - bool requires_vertex_id; - bool requires_instance_id; - bool requires_base_instance; bool requires_robustness2; }; @@ -165,22 +162,19 @@ pass(struct nir_builder *b, nir_intrinsic_instr *intr, void *data) nir_def *el; if (attrib.instanced) { if (attrib.divisor > 0) { - /* Metal's instance_id has base_instance included */ - nir_def *instance_id = - nir_isub(b, nir_load_instance_id(b), nir_load_base_instance(b)); - el = nir_udiv_imm(b, instance_id, attrib.divisor); - ctx->requires_instance_id = true; + el = nir_udiv_imm(b, nir_load_instance_id(b), attrib.divisor); + BITSET_SET(b->shader->info.system_values_read, + SYSTEM_VALUE_INSTANCE_ID); } else el = nir_imm_int(b, 0); el = nir_iadd(b, el, nir_load_base_instance(b)); - ctx->requires_base_instance = true; BITSET_SET(b->shader->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE); } else { el = nir_load_vertex_id(b); - ctx->requires_vertex_id = true; + BITSET_SET(b->shader->info.system_values_read, SYSTEM_VALUE_VERTEX_ID); } /* Load the pointer of the buffer from the argument buffer */ @@ -283,14 +277,6 @@ kk_nir_lower_vbo(nir_shader *nir, struct kk_attribute *attribs, .attribs = attribs, .requires_robustness2 = robustness2, }; - bool progress = - nir_shader_intrinsics_pass(nir, pass, nir_metadata_control_flow, &ctx); - - if (ctx.requires_instance_id) - BITSET_SET(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID); - if (ctx.requires_base_instance) - BITSET_SET(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE); - if (ctx.requires_vertex_id) - BITSET_SET(nir->info.system_values_read, SYSTEM_VALUE_VERTEX_ID); - return progress; + return nir_shader_intrinsics_pass(nir, pass, nir_metadata_control_flow, + &ctx); } diff --git a/src/kosmickrisp/vulkan/kk_physical_device.c b/src/kosmickrisp/vulkan/kk_physical_device.c index 93d04d665ef..b54c9e07b07 100644 --- a/src/kosmickrisp/vulkan/kk_physical_device.c +++ b/src/kosmickrisp/vulkan/kk_physical_device.c @@ -227,6 +227,7 @@ kk_get_device_features( .shaderStorageImageReadWithoutFormat = true, .shaderStorageImageWriteWithoutFormat = true, .shaderUniformBufferArrayDynamicIndexing = true, + .tessellationShader = true, .textureCompressionASTC_LDR = true, .textureCompressionBC = true, .textureCompressionETC2 = true, diff --git a/src/kosmickrisp/vulkan/kk_shader.c b/src/kosmickrisp/vulkan/kk_shader.c index 6d94ec8a2e0..b6498a1fbfa 100644 --- a/src/kosmickrisp/vulkan/kk_shader.c +++ b/src/kosmickrisp/vulkan/kk_shader.c @@ -660,10 +660,10 @@ gather_shader_info(struct kk_shader *shader, nir_shader *nir, { shader->info.stage = nir->info.stage; shader->info.uses_per_draw_data = msl_gather_uses_per_draw_data(nir); + shader->info.num_cull_distances = nir->info.cull_distance_array_size; if (nir->info.stage == MESA_SHADER_VERTEX) { nir_shader_intrinsics_pass(nir, gather_vs_inputs, nir_metadata_all, &shader->info.vs.attribs_read); - shader->info.vs.num_cull_distances = nir->info.cull_distance_array_size; shader->info.vs.outputs_written = nir->info.outputs_written; } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { /* Some meta shaders like vk-meta-resolve will have depth_layout as NONE @@ -761,7 +761,7 @@ kk_compile_shader(struct kk_device *dev, nir_shader *nir, gather_shader_info(shader, nir, state); unsigned num_cull_distances = - prev_stage ? prev_stage->info.vs.num_cull_distances : 0; + prev_stage ? prev_stage->info.num_cull_distances : 0; msl_nir_lower_clip_cull_distance(nir, num_cull_distances); /* When using poly to emulate tessellation, vertex and tess control shaders @@ -778,7 +778,11 @@ kk_compile_shader(struct kk_device *dev, nir_shader *nir, memset(&nir->info.cs, 0, sizeof(nir->info.cs)); nir->xfb_info = NULL; NIR_PASS(_, nir, poly_nir_lower_sw_vs); - } + } else + /* Metal's instance_id contains base_instance. When the emulation path + * is taken, since we launch compute, they correctly get translated. + * For the non-emulated path we need to subtract base_instance... */ + NIR_PASS(_, nir, msl_nir_lower_instance_id); } else if (stage == MESA_SHADER_TESS_CTRL) { NIR_PASS(_, nir, poly_nir_lower_tcs); diff --git a/src/kosmickrisp/vulkan/kk_shader.h b/src/kosmickrisp/vulkan/kk_shader.h index 3a57b0864de..a45bd6cd7d3 100644 --- a/src/kosmickrisp/vulkan/kk_shader.h +++ b/src/kosmickrisp/vulkan/kk_shader.h @@ -48,6 +48,10 @@ kk_tess_info_merge(struct kk_tess_info a, struct kk_tess_info b) struct kk_shader_info { mesa_shader_stage stage; bool uses_per_draw_data; + + /* Required for fragment shader cull distance discards. */ + uint8_t num_cull_distances; + union { /* Vertex shader is the pipeline, store all relevant data here. */ struct { @@ -62,9 +66,6 @@ struct kk_shader_info { uint32_t sample_count; uint64_t outputs_written; - /* Required for fragment shader cull distance discards. */ - uint8_t num_cull_distances; - /* Data needed for serialization. */ enum mtl_primitive_topology_class topology; enum mtl_pixel_format rt_formats[MAX_DRAW_BUFFERS];