diff --git a/src/kosmickrisp/clc/kk_clc.c b/src/kosmickrisp/clc/kk_clc.c new file mode 100644 index 00000000000..5025be656e3 --- /dev/null +++ b/src/kosmickrisp/clc/kk_clc.c @@ -0,0 +1,325 @@ +/* + * Copyright 2026 LunarG, Inc. + * Copyright 2026 Google LLC + * Copyright 2023 Alyssa Rosenzweig + * Copyright 2020 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "kosmickrisp/compiler/nir_to_msl.h" + +#include "compiler/glsl_types.h" +#include "compiler/spirv/nir_spirv.h" +#include "nir.h" +#include "nir_builder.h" +#include "nir_builder_opcodes.h" +#include "nir_intrinsics.h" +#include "nir_precompiled.h" +#include "shader_enums.h" + +#include +#include +#include +#include +#include +#include "util/macros.h" +#include + +static const struct spirv_to_nir_options spirv_options = { + .environment = NIR_SPIRV_OPENCL, + .shared_addr_format = nir_address_format_62bit_generic, + .global_addr_format = nir_address_format_62bit_generic, + .temp_addr_format = nir_address_format_62bit_generic, + .constant_addr_format = nir_address_format_64bit_global, + .create_library = true, + .printf = true, +}; + +/* Standard optimization loop */ +static void +optimize(nir_shader *nir) +{ + bool progress; + do { + progress = false; + + NIR_PASS(progress, nir, nir_split_var_copies); + NIR_PASS(progress, nir, nir_split_struct_vars, nir_var_function_temp); + NIR_PASS(progress, nir, nir_lower_var_copies); + NIR_PASS(progress, nir, nir_lower_vars_to_ssa); + + NIR_PASS(progress, nir, nir_opt_copy_prop); + NIR_PASS(progress, nir, nir_opt_remove_phis); + NIR_PASS(progress, nir, nir_lower_all_phis_to_scalar); + NIR_PASS(progress, nir, nir_opt_dce); + NIR_PASS(progress, nir, nir_opt_dead_cf); + NIR_PASS(progress, nir, nir_opt_cse); + + nir_opt_peephole_select_options peephole_select_options = { + .limit = 64, + .expensive_alu_ok = true, + }; + NIR_PASS(progress, nir, nir_opt_peephole_select, + &peephole_select_options); + NIR_PASS(progress, nir, nir_opt_phi_precision); + NIR_PASS(progress, nir, nir_opt_algebraic); + NIR_PASS(progress, nir, nir_opt_constant_folding); + + NIR_PASS(progress, nir, nir_opt_deref); + NIR_PASS(progress, nir, nir_opt_copy_prop_vars); + NIR_PASS(progress, nir, nir_opt_undef); + NIR_PASS(progress, nir, nir_lower_undef_to_zero); + + NIR_PASS(progress, nir, nir_opt_shrink_vectors, true); + NIR_PASS(progress, nir, nir_opt_loop_unroll); + + } while (progress); +} + +static nir_shader * +compile(void *memctx, const uint32_t *spirv, size_t spirv_size) +{ + const nir_shader_compiler_options *nir_options = &kk_nir_options; + + assert(spirv_size % 4 == 0); + nir_shader *nir = + spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL, + "library", &spirv_options, nir_options); + nir_validate_shader(nir, "after spirv_to_nir"); + ralloc_steal(memctx, nir); + + nir_fixup_is_exported(nir); + + NIR_PASS(_, nir, nir_lower_system_values); + NIR_PASS(_, nir, nir_lower_calls_to_builtins); + + nir_lower_compute_system_values_options cs = {.global_id_is_32bit = true}; + NIR_PASS(_, nir, nir_lower_compute_system_values, &cs); + + /* We have to lower away local constant initializers right before we + * inline functions. That way they get properly initialized at the top + * of the function and not at the top of its caller. + */ + NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp); + NIR_PASS(_, nir, nir_lower_returns); + NIR_PASS(_, nir, nir_inline_functions); + nir_remove_non_exported(nir); + NIR_PASS(_, nir, nir_opt_copy_prop); + NIR_PASS(_, nir, nir_opt_deref); + + /* We can't deal with constant data, get rid of it */ + nir_lower_constant_to_temp(nir); + + /* We can go ahead and lower the rest of the constant initializers. We do + * this here so that nir_remove_dead_variables and split_per_member_structs + * below see the corresponding stores. + */ + NIR_PASS(_, nir, nir_lower_variable_initializers, ~0); + + /* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B + * aligned and so it can just read/write them as vec4s. This results in a + * LOT of vec4->vec3 casts on loads and stores. One solution to this + * problem is to get rid of all vec3 variables. + */ + NIR_PASS(_, nir, nir_lower_vec3_to_vec4, + nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared | + nir_var_mem_global | nir_var_mem_constant); + + /* We assign explicit types early so that the optimizer can take advantage + * of that information and hopefully get rid of some of our memcpys. + */ + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, + nir_var_uniform | nir_var_shader_temp | nir_var_function_temp | + nir_var_mem_shared | nir_var_mem_global, + glsl_get_cl_type_size_align); + + optimize(nir); + + NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_all, NULL); + + /* Lower again, this time after dead-variables to get more compact variable + * layouts. + */ + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, + nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared | + nir_var_mem_global | nir_var_mem_constant, + glsl_get_cl_type_size_align); + assert(nir->constant_data_size == 0); + + NIR_PASS(_, nir, nir_lower_memcpy); + + NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_constant, + nir_address_format_64bit_global); + + NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_uniform, + nir_address_format_32bit_offset_as_64bit); + + /* Note: we cannot lower explicit I/O here, because we need derefs in tact + * for function calls into the library to work. + */ + + NIR_PASS(_, nir, nir_lower_convert_alu_types, NULL); + NIR_PASS(_, nir, nir_opt_if, 0); + NIR_PASS(_, nir, nir_opt_idiv_const, 16); + + msl_lower_textures(nir); + msl_lower_nir_late(nir); + + optimize(nir); + + return nir; +} + +static void +print_shader(FILE *fp, const char *name, const char *suffix, uint32_t variant, + const char *msl, nir_shader *nir) +{ + uint32_t msl_length = strlen(msl) + 1; + uint32_t workgroup_count_size = sizeof(nir->info.workgroup_size); + size_t sz_B = workgroup_count_size + msl_length; + size_t sz_el = DIV_ROUND_UP(sz_B, 4); + uint32_t *mem = calloc(sz_el, 4); + + memcpy(mem, nir->info.workgroup_size, workgroup_count_size); + memcpy((uint8_t *)mem + workgroup_count_size, msl, msl_length); + + nir_precomp_print_blob(fp, name, suffix, variant, mem, sz_B, true); + free(mem); +} + +static nir_def * +load_kernel_input(nir_builder *b, unsigned num_components, unsigned bit_size, + unsigned offset_B) +{ + nir_def *root = nir_load_buffer_ptr_kk(b, 1, 64, .binding = 0); + + /* We've bound the address of the root descriptor, index in. */ + nir_def *addr = nir_iadd(b, root, nir_imm_int64(b, offset_B)); + return nir_build_load_global_constant(b, num_components, bit_size, addr, + .align_mul = bit_size, + .access = ACCESS_CAN_SPECULATE); +} + +static int +type_size_vec4(const struct glsl_type *type, bool bindless) +{ + return glsl_count_attribute_slots(type, false); +} + +int +main(int argc, char **argv) +{ + if (argc != 4) { + fprintf(stderr, "Usage: %s [input spir-v] [output header] [output C]\n", + argv[0]); + return 1; + } + + const char *infile = argv[1]; + const char *outh_file = argv[2]; + const char *outc_file = argv[3]; + + void *mem_ctx = ralloc_context(NULL); + + int fd = open(infile, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Failed to open %s\n", infile); + ralloc_free(mem_ctx); + return 1; + } + + off_t spirv_len = lseek(fd, 0, SEEK_END); + const void *spirv_map = mmap(NULL, spirv_len, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + if (spirv_map == MAP_FAILED) { + fprintf(stderr, "Failed to mmap the file: errno=%d, %s\n", errno, + strerror(errno)); + ralloc_free(mem_ctx); + return 1; + } + + FILE *fp_h = fopen(outh_file, "w"); + FILE *fp_c = fopen(outc_file, "w"); + glsl_type_singleton_init_or_ref(); + + nir_precomp_print_header(fp_c, fp_h, "KosmicKrisp Contributors", + "libkk_shaders.h"); + + nir_shader *nir = compile(mem_ctx, spirv_map, spirv_len); + + /* load_preamble works at 16-bit granularity */ + struct nir_precomp_opts opt = {.arg_align_B = 2}; + + const char *target = "AppleSilicon"; + + nir_foreach_entrypoint(libfunc, nir) { + libfunc->pass_flags = 0; + unsigned nr_vars = nir_precomp_nr_variants(libfunc); + + nir_precomp_print_layout_struct(fp_h, &opt, libfunc); + + for (unsigned v = 0; v < nr_vars; ++v) { + nir_shader *s = nir_precompiled_build_variant( + libfunc, MESA_SHADER_COMPUTE, v, &kk_nir_options, &opt, + load_kernel_input); + + nir_link_shader_functions(s, nir); + NIR_PASS(_, s, nir_inline_functions); + nir_remove_non_entrypoints(s); + NIR_PASS(_, s, nir_opt_deref); + NIR_PASS(_, s, nir_lower_vars_to_ssa); + NIR_PASS(_, s, nir_remove_dead_derefs); + NIR_PASS(_, s, nir_remove_dead_variables, + nir_var_function_temp | nir_var_shader_temp, NULL); + NIR_PASS(_, s, nir_lower_vars_to_explicit_types, + nir_var_shader_temp | nir_var_function_temp, + glsl_get_cl_type_size_align); + + NIR_PASS(_, s, nir_lower_vars_to_explicit_types, nir_var_mem_shared, + glsl_get_cl_type_size_align); + + NIR_PASS(_, s, nir_lower_explicit_io, nir_var_mem_shared, + nir_address_format_62bit_generic); + + msl_preprocess_nir(s); + msl_optimize_nir(nir); + + NIR_PASS(_, s, nir_opt_deref); + NIR_PASS(_, s, nir_lower_vars_to_ssa); + + NIR_PASS(_, s, nir_lower_explicit_io, + nir_var_shader_temp | nir_var_function_temp | + nir_var_mem_shared | nir_var_mem_global, + nir_address_format_62bit_generic); + + /* nir_lower_explicit_io can create phis we need to get rid of */ + NIR_PASS(_, s, nir_convert_from_ssa, true, true); + NIR_PASS(_, s, nir_trivialize_registers); + + /* nir_lower_explicit_io will create unpack_64 we need to lower */ + NIR_PASS(_, s, nir_opt_algebraic); + + nir_shader_gather_info(s, nir_shader_get_entrypoint(s)); + struct nir_to_msl_options options = {}; + const char *msl = nir_to_msl(s, &options); + print_shader(fp_c, libfunc->name, target, v, msl, s); + ralloc_free((void *)msl); + + ralloc_free(s); + } + } + + nir_precomp_print_program_enum(fp_h, nir, "libkk"); + nir_precomp_print_dispatch_macros(fp_h, &opt, nir); + + /* For each target, generate a table mapping programs to binaries */ + + nir_precomp_print_extern_binary_map(fp_h, "libkk", target); + nir_precomp_print_binary_map(fp_c, nir, "libkk", target, NULL); + + glsl_type_singleton_decref(); + fclose(fp_c); + fclose(fp_h); + ralloc_free(mem_ctx); + return 0; +} diff --git a/src/kosmickrisp/clc/kk_precompiled_shader.h b/src/kosmickrisp/clc/kk_precompiled_shader.h new file mode 100644 index 00000000000..fa30779a351 --- /dev/null +++ b/src/kosmickrisp/clc/kk_precompiled_shader.h @@ -0,0 +1,23 @@ +/* + * Copyright 2026 LunarG, Inc. + * Copyright 2026 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_PRECOMPILED_SHADER_H +#define KK_PRECOMPILED_SHADER_H 1 + +#include "kosmickrisp/bridge/mtl_types.h" + +#include + +struct kk_precompiled_info { + uint16_t workgroup_size[3]; +}; + +struct kk_precompiled_shader { + struct kk_precompiled_info info; + mtl_compute_pipeline_state *pipeline; +}; + +#endif /* KK_PRECOMPILED_SHADER_H_ */ diff --git a/src/kosmickrisp/clc/meson.build b/src/kosmickrisp/clc/meson.build new file mode 100644 index 00000000000..7edd8c2ae68 --- /dev/null +++ b/src/kosmickrisp/clc/meson.build @@ -0,0 +1,13 @@ +# Copyright 2026 LunarG, Inc. +# Copyright 2026 Google LLC +# SPDX-License-Identifier: MIT + +prog_kk_clc = executable( + 'kk_clc', + ['kk_clc.c'], + link_with : [libmsl_compiler], + include_directories : [inc_include, inc_src], + c_args : [pre_args, no_override_init_args], + link_args : [ld_args_build_id], + dependencies : [idep_vtn, idep_nir, idep_mesautil], +) \ No newline at end of file diff --git a/src/kosmickrisp/compiler/nir_to_msl.c b/src/kosmickrisp/compiler/nir_to_msl.c index ad0a0241916..03e621a7a85 100644 --- a/src/kosmickrisp/compiler/nir_to_msl.c +++ b/src/kosmickrisp/compiler/nir_to_msl.c @@ -1986,11 +1986,13 @@ msl_preprocess_nir(struct nir_shader *nir) nir_var_function_temp | nir_var_shader_in | nir_var_shader_out); NIR_PASS(_, nir, nir_lower_alu_to_scalar, kk_scalarize_filter, NULL); + /* If we do 256 here MSL compiler crashes with + * dEQP-VK.graphicsfuzz.stable-binarysearch-tree-nested-if-and-conditional */ + NIR_PASS(_, nir, nir_lower_vars_to_scratch, 32, + glsl_get_natural_size_align_bytes, glsl_get_word_size_align_bytes); NIR_PASS(_, nir, nir_lower_indirect_derefs_to_if_else_trees, - nir_var_shader_in | nir_var_shader_out, UINT32_MAX); - NIR_PASS(_, nir, nir_lower_vars_to_scratch, 0, - glsl_get_natural_size_align_bytes, - glsl_get_natural_size_align_bytes); + nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, + UINT32_MAX); nir_lower_compute_system_values_options csv_options = { .has_base_global_invocation_id = 0, @@ -2043,7 +2045,7 @@ msl_optimize_nir(struct nir_shader *nir) NIR_PASS(_, nir, nir_lower_load_const_to_scalar); NIR_PASS(_, nir, msl_nir_lower_algebraic_late); NIR_PASS(_, nir, nir_convert_from_ssa, true, false); - nir_trivialize_registers(nir); + NIR_PASS(_, nir, nir_trivialize_registers); NIR_PASS(_, nir, nir_opt_copy_prop); return progress; diff --git a/src/kosmickrisp/compiler/nir_to_msl.h b/src/kosmickrisp/compiler/nir_to_msl.h index c7b11040f5e..b5542de5ffd 100644 --- a/src/kosmickrisp/compiler/nir_to_msl.h +++ b/src/kosmickrisp/compiler/nir_to_msl.h @@ -72,3 +72,37 @@ bool msl_nir_vs_io_types(nir_shader *nir); bool msl_nir_fake_guard_for_discards(struct nir_shader *nir); bool msl_nir_lower_sample_shading(nir_shader *nir); void msl_lower_nir_late(nir_shader *nir); + +static const nir_shader_compiler_options kk_nir_options = { + .lower_fdph = true, + .has_fsub = true, + .has_isub = true, + .lower_extract_word = true, + .lower_extract_byte = true, + .lower_insert_word = true, + .lower_insert_byte = true, + .lower_fmod = true, + .discard_is_demote = true, + .instance_id_includes_base_index = true, + .lower_device_index_to_zero = true, + .lower_pack_64_2x32_split = true, + .lower_unpack_64_2x32_split = true, + .lower_pack_64_2x32 = true, + .lower_pack_half_2x16 = true, + .lower_pack_split = true, + .lower_unpack_half_2x16 = true, + .has_cs_global_id = true, + .lower_fquantize2f16 = true, + .lower_scmp = true, + .lower_ifind_msb = true, + .lower_ufind_msb = true, + .lower_find_lsb = true, + .has_uclz = true, + .lower_mul_2x32_64 = true, + .lower_uadd_carry = true, + .lower_usub_borrow = true, + /* Metal does not support double. */ + .lower_doubles_options = (nir_lower_doubles_options)(~0), + .lower_int64_options = nir_lower_ufind_msb64 | nir_lower_subgroup_shuffle64, + .io_options = nir_io_mediump_is_32bit, +}; diff --git a/src/kosmickrisp/vulkan/cl/kk_query.cl b/src/kosmickrisp/libkk/kk_query.cl similarity index 98% rename from src/kosmickrisp/vulkan/cl/kk_query.cl rename to src/kosmickrisp/libkk/kk_query.cl index ef24ab8ab7b..3189b430eb5 100644 --- a/src/kosmickrisp/vulkan/cl/kk_query.cl +++ b/src/kosmickrisp/libkk/kk_query.cl @@ -10,13 +10,13 @@ #include "kk_query.h" -void +KERNEL(1) libkk_write_u64(global struct libkk_imm_write *write_array) { *write_array[cl_group_id.x].address = write_array[cl_group_id.x].value; } -void +KERNEL(1) libkk_copy_queries(global uint64_t *availability, global uint64_t *results, global uint16_t *oq_index, uint64_t dst_addr, uint64_t dst_stride, uint32_t first_query, diff --git a/src/kosmickrisp/vulkan/cl/kk_query.h b/src/kosmickrisp/libkk/kk_query.h similarity index 100% rename from src/kosmickrisp/vulkan/cl/kk_query.h rename to src/kosmickrisp/libkk/kk_query.h diff --git a/src/kosmickrisp/vulkan/cl/kk_triangle_fan.cl b/src/kosmickrisp/libkk/kk_triangle_fan.cl similarity index 99% rename from src/kosmickrisp/vulkan/cl/kk_triangle_fan.cl rename to src/kosmickrisp/libkk/kk_triangle_fan.cl index bc2e250d072..7e53ed0f1ad 100644 --- a/src/kosmickrisp/vulkan/cl/kk_triangle_fan.cl +++ b/src/kosmickrisp/libkk/kk_triangle_fan.cl @@ -79,7 +79,7 @@ libkk_vertex_id_for_tri_strip_adj(uint prim, uint vert, uint num_prims, * * Here we assume the first vertex is provoking, the Vulkan default. */ - uint offsets[6] = { + const uint offsets[6] = { 0, first ? 1 : (even ? -2 : 3), even_or_first ? 2 : 4, @@ -212,9 +212,8 @@ first_true_thread_in_workgroup(bool cond, local uint *scratch) return (first_group * 32) + off; } -// TODO_KOSMICKRISP // KERNEL(1024) -void +KERNEL(1) libkk_unroll_geometry_and_restart( constant uint8_t *index_buffer, global uint8_t *out_ptr, constant uint32_t *in_draw, global uint32_t *out_draw, diff --git a/src/kosmickrisp/libkk/meson.build b/src/kosmickrisp/libkk/meson.build new file mode 100644 index 00000000000..7bbd55b2dc7 --- /dev/null +++ b/src/kosmickrisp/libkk/meson.build @@ -0,0 +1,54 @@ +# Copyright 2026 LunarG, Inc. +# Copyright 2026 Google LLC +# SPDX-License-Identifier: MIT + +libkk_shader_files = files( + 'kk_triangle_fan.cl', + 'kk_query.cl', +) + +libkk_spv = custom_target( + input : libkk_shader_files, + output : 'libkk.spv', + command : [ + prog_mesa_clc, '-o', '@OUTPUT@', '--depfile', '@DEPFILE@', + libkk_shader_files, '--', + '-I' + join_paths(meson.project_source_root(), 'include'), + '-I' + join_paths(meson.project_source_root(), 'src/compiler/libcl'), + '-I' + join_paths(meson.current_source_dir(), '.'), + '-I' + join_paths(meson.current_source_dir(), '../../'), + '-I' + join_paths(meson.current_source_dir(), 'shaders'), + cl_args, + ], + depfile : 'libkk_shaders.h.d', +) + +libkk = custom_target( + input : libkk_spv, + output : ['libkk.cpp', 'libkk.h'], + command : [prog_vtn_bindgen2, libkk_spv, '@OUTPUT0@', '@OUTPUT1@'], +) + +idep_libkk = declare_dependency( + sources : [libkk], + include_directories : include_directories('.'), +) + +libkk_shaders = custom_target( + input : libkk_spv, + output : ['libkk_shaders.h', 'libkk_shaders.c'], + command : [prog_kk_clc, libkk_spv, '@OUTPUT0@', '@OUTPUT1@'] +) + +# TODO_KOSMICKRISP We can get rid of this library once we remove the generated entrypoints for the trampoline +libkk_shaders_lib = static_library( + 'libkk_shaders', + sources : [libkk_shaders], + dependencies : [idep_mesautil], +) + +idep_libkk_shaders_h = declare_dependency( + sources : [libkk_shaders[0]], + include_directories : include_directories('.'), + link_with : libkk_shaders_lib, +) diff --git a/src/kosmickrisp/meson.build b/src/kosmickrisp/meson.build index fe54329e132..2bc318325e4 100644 --- a/src/kosmickrisp/meson.build +++ b/src/kosmickrisp/meson.build @@ -5,6 +5,8 @@ subdir('bridge') subdir('compiler') subdir('util') +subdir('clc') +subdir('libkk') subdir('vulkan') executable( diff --git a/src/kosmickrisp/vulkan/kk_cmd_buffer.c b/src/kosmickrisp/vulkan/kk_cmd_buffer.c index e15d3e5b20b..62893767f20 100644 --- a/src/kosmickrisp/vulkan/kk_cmd_buffer.c +++ b/src/kosmickrisp/vulkan/kk_cmd_buffer.c @@ -421,6 +421,31 @@ kk_cmd_buffer_flush_push_descriptors(struct kk_cmd_buffer *cmd, desc->push_dirty = 0; } +void +kk_dispatch_precomp(struct kk_cmd_buffer *cmd, struct mtl_size grid, + bool pre_gfx, enum libkk_program idx, void *data, + size_t data_size) +{ + struct kk_device *dev = kk_cmd_buffer_device(cmd); + struct kk_precompiled_shader *prog = &dev->precompiled_cache.shaders[idx]; + + mtl_compute_encoder *encoder = + pre_gfx ? kk_encoder_pre_gfx_encoder(cmd) : kk_compute_encoder(cmd); + + struct kk_bo *bo = kk_cmd_allocate_buffer(cmd, data_size, 4u); + memcpy(bo->cpu, data, data_size); + + mtl_compute_set_buffer(encoder, bo->map, 0, 0); + mtl_compute_set_pipeline_state(encoder, prog->pipeline); + + struct mtl_size local_size = { + .x = prog->info.workgroup_size[0], + .y = prog->info.workgroup_size[1], + .z = prog->info.workgroup_size[2], + }; + mtl_dispatch_threads(encoder, grid, local_size); +} + void kk_cmd_write(struct kk_cmd_buffer *cmd, mtl_buffer *buffer, uint64_t addr, uint64_t value) diff --git a/src/kosmickrisp/vulkan/kk_cmd_buffer.h b/src/kosmickrisp/vulkan/kk_cmd_buffer.h index ed81ac4013a..7a12e79f79b 100644 --- a/src/kosmickrisp/vulkan/kk_cmd_buffer.h +++ b/src/kosmickrisp/vulkan/kk_cmd_buffer.h @@ -254,14 +254,13 @@ uint64_t kk_upload_descriptor_root(struct kk_cmd_buffer *cmd, void kk_cmd_buffer_flush_push_descriptors(struct kk_cmd_buffer *cmd, struct kk_descriptor_state *desc); +void kk_dispatch_precomp(struct kk_cmd_buffer *cmd, struct mtl_size grid, + bool pre_gfx, enum libkk_program idx, void *data, + size_t data_size); + +#define MESA_DISPATCH_PRECOMP kk_dispatch_precomp + void kk_cmd_write(struct kk_cmd_buffer *cmd, mtl_buffer *buffer, uint64_t addr, uint64_t value); -void kk_cmd_dispatch_pipeline(struct kk_cmd_buffer *cmd, - mtl_compute_encoder *encoder, - mtl_compute_pipeline_state *pipeline, - const void *push_data, size_t push_size, - uint32_t groupCountX, uint32_t groupCountY, - uint32_t groupCountZ); - #endif diff --git a/src/kosmickrisp/vulkan/kk_cmd_dispatch.c b/src/kosmickrisp/vulkan/kk_cmd_dispatch.c index 2a66ad47794..00558e60b85 100644 --- a/src/kosmickrisp/vulkan/kk_cmd_dispatch.c +++ b/src/kosmickrisp/vulkan/kk_cmd_dispatch.c @@ -18,44 +18,6 @@ #include "vk_common_entrypoints.h" -void -kk_cmd_dispatch_pipeline(struct kk_cmd_buffer *cmd, - mtl_compute_encoder *encoder, - mtl_compute_pipeline_state *pipeline, - const void *push_data, size_t push_size, - uint32_t groupCountX, uint32_t groupCountY, - uint32_t groupCountZ) -{ - struct kk_root_descriptor_table *root = NULL; - struct kk_bo *bo = kk_cmd_allocate_buffer(cmd, sizeof(*root), 8u); - /* kk_cmd_allocate_buffer already sets the error, we can just exit */ - if (!bo) - return; - - root = bo->cpu; - assert(push_size <= sizeof(root->push)); - memcpy(root->push, push_data, push_size); - root->cs.base_group[0] = 1; /* TODO_KOSMICKRISP This is hard-coded because we - know this is the size we create them with */ - root->cs.base_group[1] = 1; - root->cs.base_group[2] = 1; - - mtl_compute_set_buffer(encoder, bo->map, 0, 0); - mtl_compute_set_pipeline_state(encoder, pipeline); - - struct mtl_size grid_size = { - .x = groupCountX, - .y = groupCountY, - .z = groupCountZ, - }; - struct mtl_size local_size = { - .x = 1, - .y = 1, - .z = 1, - }; - mtl_dispatch_threads(encoder, grid_size, local_size); -} - VKAPI_ATTR void VKAPI_CALL kk_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) diff --git a/src/kosmickrisp/vulkan/kk_device.h b/src/kosmickrisp/vulkan/kk_device.h index 77e84eb0b20..1553e0956d8 100644 --- a/src/kosmickrisp/vulkan/kk_device.h +++ b/src/kosmickrisp/vulkan/kk_device.h @@ -15,6 +15,9 @@ #include "kosmickrisp/bridge/mtl_types.h" +#include "kosmickrisp/clc/kk_precompiled_shader.h" +#include "libkk_shaders.h" + #include "util/u_dynarray.h" #include "vk_device.h" @@ -25,13 +28,6 @@ struct kk_bo; struct kk_physical_device; struct vk_pipeline_cache; -enum kk_device_lib_pipeline { - KK_LIB_IMM_WRITE = 0, - KK_LIB_COPY_QUERY, - KK_LIB_TRIANGLE_FAN, - KK_LIB_COUNT, -}; - struct kk_residency_set { simple_mtx_t mutex; mtl_residency_set *handle; @@ -75,6 +71,10 @@ struct kk_sampler_heap { struct hash_table *ht; }; +struct kk_precompiled_cache { + struct kk_precompiled_shader shaders[LIBKK_NUM_PROGRAMS]; +}; + struct kk_device { struct vk_device vk; @@ -91,7 +91,7 @@ struct kk_device { * recording as required by Metal. */ struct kk_residency_set residency_set; - mtl_compute_pipeline_state *lib_pipelines[KK_LIB_COUNT]; + struct kk_precompiled_cache precompiled_cache; struct kk_queue queue; @@ -103,14 +103,6 @@ struct kk_device { VK_DEFINE_HANDLE_CASTS(kk_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE) -static inline mtl_compute_pipeline_state * -kk_device_lib_pipeline(const struct kk_device *dev, - enum kk_device_lib_pipeline pipeline) -{ - assert(pipeline < KK_LIB_COUNT); - return dev->lib_pipelines[pipeline]; -} - static inline struct kk_physical_device * kk_device_physical(const struct kk_device *dev) { diff --git a/src/kosmickrisp/vulkan/kk_device_lib.c b/src/kosmickrisp/vulkan/kk_device_lib.c index 9d2afbf9e45..8de55346da4 100644 --- a/src/kosmickrisp/vulkan/kk_device_lib.c +++ b/src/kosmickrisp/vulkan/kk_device_lib.c @@ -8,184 +8,65 @@ #include "kk_shader.h" -#include "kkcl.h" +#include "kosmickrisp/clc/kk_precompiled_shader.h" + +#include "libkk_shaders.h" #include "kosmickrisp/bridge/mtl_bridge.h" #include "nir/nir.h" #include "nir/nir_builder.h" -static nir_def * -load_struct_var(nir_builder *b, nir_variable *var, uint32_t field) -{ - nir_deref_instr *deref = - nir_build_deref_struct(b, nir_build_deref_var(b, var), field); - return nir_load_deref(b, deref); -} - -static nir_shader * -create_imm_write_shader() -{ - nir_builder build = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, - "kk-meta-imm-write-u64"); - nir_builder *b = &build; - - struct glsl_struct_field push_fields[] = { - {.type = glsl_uint64_t_type(), .name = "buffer_address", .offset = 0}, - }; - const struct glsl_type *push_iface_type = glsl_interface_type( - push_fields, ARRAY_SIZE(push_fields), GLSL_INTERFACE_PACKING_STD140, - false /* row_major */, "push"); - nir_variable *push = nir_variable_create(b->shader, nir_var_mem_push_const, - push_iface_type, "push"); - - b->shader->info.workgroup_size[0] = 1; - b->shader->info.workgroup_size[1] = 1; - b->shader->info.workgroup_size[2] = 1; - - libkk_write_u64(b, load_struct_var(b, push, 0)); - - return build.shader; -} - -static nir_shader * -create_copy_query_shader() -{ - nir_builder build = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, - "kk-meta-copy-queries"); - nir_builder *b = &build; - - struct glsl_struct_field push_fields[] = { - {.type = glsl_uint64_t_type(), .name = "availability", .offset = 0}, - {.type = glsl_uint64_t_type(), .name = "results", .offset = 8}, - {.type = glsl_uint64_t_type(), .name = "indices", .offset = 16}, - {.type = glsl_uint64_t_type(), .name = "dst_addr", .offset = 24}, - {.type = glsl_uint64_t_type(), .name = "dst_stride", .offset = 32}, - {.type = glsl_uint_type(), .name = "first_query", .offset = 40}, - {.type = glsl_uint_type(), .name = "flags", .offset = 44}, - {.type = glsl_uint16_t_type(), .name = "reports_per_query", .offset = 48}, - }; - /* TODO_KOSMICKRISP Don't use push constants and directly bind the buffer to - * the binding index. This requires compiler work first to remove the - * hard-coded buffer0 value. Same applies to other creation functions. - */ - const struct glsl_type *push_iface_type = glsl_interface_type( - push_fields, ARRAY_SIZE(push_fields), GLSL_INTERFACE_PACKING_STD140, - false /* row_major */, "push"); - nir_variable *push = nir_variable_create(b->shader, nir_var_mem_push_const, - push_iface_type, "push"); - - b->shader->info.workgroup_size[0] = 1; - b->shader->info.workgroup_size[1] = 1; - b->shader->info.workgroup_size[2] = 1; - - libkk_copy_queries(b, load_struct_var(b, push, 0), - load_struct_var(b, push, 1), load_struct_var(b, push, 2), - load_struct_var(b, push, 3), load_struct_var(b, push, 4), - load_struct_var(b, push, 5), load_struct_var(b, push, 6), - load_struct_var(b, push, 7)); - - return build.shader; -} - -static nir_shader * -create_triangle_fan_shader() -{ - nir_builder build = nir_builder_init_simple_shader( - MESA_SHADER_COMPUTE, NULL, "kk-device-unroll-geomtry-and-restart"); - nir_builder *b = &build; - - struct glsl_struct_field push_fields[] = { - {.type = glsl_uint64_t_type(), .name = "index_buffer", .offset = 0}, - {.type = glsl_uint64_t_type(), .name = "out_ptr", .offset = 8}, - {.type = glsl_uint64_t_type(), .name = "indirect_in", .offset = 16}, - {.type = glsl_uint64_t_type(), .name = "indirect_out", .offset = 24}, - {.type = glsl_uint_type(), .name = "restart_index", .offset = 32}, - {.type = glsl_uint_type(), .name = "index_buffer_size_el", .offset = 36}, - {.type = glsl_uint_type(), .name = "in_el_size_B,", .offset = 40}, - {.type = glsl_uint_type(), .name = "out_el_size_B,", .offset = 44}, - {.type = glsl_uint_type(), .name = "flatshade_first", .offset = 48}, - {.type = glsl_uint_type(), .name = "mode", .offset = 52}, - }; - const struct glsl_type *push_iface_type = glsl_interface_type( - push_fields, ARRAY_SIZE(push_fields), GLSL_INTERFACE_PACKING_STD140, - false /* row_major */, "push"); - nir_variable *push = nir_variable_create(b->shader, nir_var_mem_push_const, - push_iface_type, "push"); - - b->shader->info.workgroup_size[0] = 1; - b->shader->info.workgroup_size[1] = 1; - b->shader->info.workgroup_size[2] = 1; - - libkk_unroll_geometry_and_restart( - b, load_struct_var(b, push, 0), load_struct_var(b, push, 1), - load_struct_var(b, push, 2), load_struct_var(b, push, 3), - load_struct_var(b, push, 4), load_struct_var(b, push, 5), - load_struct_var(b, push, 6), load_struct_var(b, push, 7), - load_struct_var(b, push, 8), load_struct_var(b, push, 9)); - - return build.shader; -} - -static struct { - enum kk_device_lib_pipeline ndx; - nir_shader *(*create_shader_fn)(); -} lib_shaders[KK_LIB_COUNT] = { - {KK_LIB_IMM_WRITE, create_imm_write_shader}, - {KK_LIB_COPY_QUERY, create_copy_query_shader}, - {KK_LIB_TRIANGLE_FAN, create_triangle_fan_shader}, -}; -static_assert(ARRAY_SIZE(lib_shaders) == KK_LIB_COUNT, - "Device lib shader count and created shader count mismatch"); - -VkResult -kk_device_init_lib(struct kk_device *dev) +static VkResult +build_precompiled_shaders(struct kk_device *dev) { VkResult result = VK_SUCCESS; uint32_t i = 0u; - for (; i < KK_LIB_COUNT; ++i) { - nir_shader *s = lib_shaders[i].create_shader_fn(); - if (!s) - goto fail; + for (; i < LIBKK_NUM_PROGRAMS; ++i) { + const uint32_t *bin = libkk_AppleSilicon[i]; + const struct kk_precompiled_info *info = (void *)bin; + const char *msl = (const char *)bin + sizeof(*info); - struct kk_shader *shader = NULL; - result = kk_compile_nir_shader(dev, s, &dev->vk.alloc, &shader); - if (result != VK_SUCCESS) - goto fail; - - mtl_library *library = mtl_new_library(dev->mtl_handle, shader->msl_code); + mtl_library *library = mtl_new_library(dev->mtl_handle, msl); if (library == NULL) goto fail; - uint32_t local_size_threads = shader->info.cs.local_size.x * - shader->info.cs.local_size.y * - shader->info.cs.local_size.z; + struct kk_precompiled_shader *shader = &dev->precompiled_cache.shaders[i]; + + /* TODO_KOSMICKRISP Do not hardcode the entrypoint */ mtl_function *function = - mtl_new_function_with_name(library, shader->entrypoint_name); - dev->lib_pipelines[i] = mtl_new_compute_pipeline_state( + mtl_new_function_with_name(library, "main_entrypoint"); + uint32_t local_size_threads = info->workgroup_size[0] * + info->workgroup_size[1] * + info->workgroup_size[2]; + shader->pipeline = mtl_new_compute_pipeline_state( dev->mtl_handle, function, local_size_threads); mtl_release(function); mtl_release(library); - /* We no longer need the shader. Although it may be useful to keep it - * alive for the info maybe? */ - shader->vk.ops->destroy(&dev->vk, &shader->vk, &dev->vk.alloc); - - if (!dev->lib_pipelines[i]) + if (!shader->pipeline) goto fail; + + memcpy(&shader->info, info, sizeof(*info)); } return result; fail: for (uint32_t j = 0u; j < i; ++j) - mtl_release(dev->lib_pipelines[j]); + mtl_release(dev->precompiled_cache.shaders[j].pipeline); return vk_error(dev, result); } +VkResult +kk_device_init_lib(struct kk_device *dev) +{ + return build_precompiled_shaders(dev); +} + void kk_device_finish_lib(struct kk_device *dev) { - for (uint32_t i = 0; i < KK_LIB_COUNT; ++i) - mtl_release(dev->lib_pipelines[i]); + for (uint32_t i = 0; i < LIBKK_NUM_PROGRAMS; ++i) + mtl_release(dev->precompiled_cache.shaders[i].pipeline); } diff --git a/src/kosmickrisp/vulkan/kk_encoder.c b/src/kosmickrisp/vulkan/kk_encoder.c index 2790e33ec80..35774bd85c9 100644 --- a/src/kosmickrisp/vulkan/kk_encoder.c +++ b/src/kosmickrisp/vulkan/kk_encoder.c @@ -13,7 +13,8 @@ #include "kosmickrisp/bridge/mtl_bridge.h" #include "kosmickrisp/bridge/vk_to_mtl_map.h" -#include "cl/kk_query.h" +#include "kosmickrisp/libkk/kk_query.h" +#include "libkk_shaders.h" static void kk_encoder_start_internal(struct kk_encoder_internal *encoder, @@ -134,8 +135,6 @@ upload_queue_writes(struct kk_cmd_buffer *cmd) enc->copy_query_pool_result_infos.size == 0u) return; - struct kk_device *dev = kk_cmd_buffer_device(cmd); - mtl_compute_encoder *compute = kk_compute_encoder(cmd); uint32_t count = util_dynarray_num_elements(&enc->imm_writes, uint64_t) / 2u; if (count != 0) { struct kk_bo *bo = kk_cmd_allocate_buffer(cmd, enc->imm_writes.size, 8u); @@ -143,27 +142,23 @@ upload_queue_writes(struct kk_cmd_buffer *cmd) if (!bo) return; memcpy(bo->cpu, enc->imm_writes.data, enc->imm_writes.size); - struct kk_imm_write_push push_data = { - .buffer_address = bo->gpu, - .count = count, - }; - kk_cmd_dispatch_pipeline(cmd, compute, - kk_device_lib_pipeline(dev, KK_LIB_IMM_WRITE), - &push_data, sizeof(push_data), count, 1, 1); + struct mtl_size grid = {count, 1, 1}; + libkk_write_u64(cmd, grid, false, bo->gpu); enc->imm_writes.size = 0u; } count = util_dynarray_num_elements(&enc->copy_query_pool_result_infos, - struct kk_copy_query_pool_results_info); + struct libkk_copy_queries_args); if (count != 0u) { for (uint32_t i = 0u; i < count; ++i) { struct kk_copy_query_pool_results_info *push_data = util_dynarray_element(&enc->copy_query_pool_result_infos, struct kk_copy_query_pool_results_info, i); - kk_cmd_dispatch_pipeline( - cmd, compute, kk_device_lib_pipeline(dev, KK_LIB_COPY_QUERY), - push_data, sizeof(*push_data), push_data->query_count, 1, 1); + struct mtl_size grid = {push_data->query_count, 1, 1}; + const struct libkk_copy_queries_args *data = + (const struct libkk_copy_queries_args *)push_data; + libkk_copy_queries_struct(cmd, grid, false, *data); } enc->copy_query_pool_result_infos.size = 0u; } @@ -330,7 +325,7 @@ kk_blit_encoder(struct kk_cmd_buffer *cmd) return (mtl_blit_encoder *)encoder->encoder; } -static mtl_compute_encoder * +mtl_compute_encoder * kk_encoder_pre_gfx_encoder(struct kk_cmd_buffer *cmd) { struct kk_encoder *encoder = cmd->encoder; @@ -352,26 +347,12 @@ kk_encoder_pre_gfx_encoder(struct kk_cmd_buffer *cmd) return encoder->pre_gfx.encoder; } -struct kk_triangle_fan_info { - uint64_t index_buffer; - uint64_t out_ptr; - uint64_t in_draw; - uint64_t out_draw; - uint32_t restart_index; - uint32_t index_buffer_size_el; - uint32_t in_el_size_B; - uint32_t out_el_size_B; - uint32_t flatshade_first; - uint32_t mode; -}; - static void -kk_encoder_render_triangle_fan_common(struct kk_cmd_buffer *cmd, - struct kk_triangle_fan_info *info, - mtl_buffer *indirect, mtl_buffer *index, - uint32_t index_count, - uint32_t in_el_size_B, - uint32_t out_el_size_B) +kk_encoder_render_triangle_fan_common( + struct kk_cmd_buffer *cmd, + struct libkk_unroll_geometry_and_restart_args *info, mtl_buffer *indirect, + mtl_buffer *index, uint32_t index_count, uint32_t in_el_size_B, + uint32_t out_el_size_B) { uint32_t index_buffer_size_B = index_count * out_el_size_B; uint32_t buffer_size_B = @@ -387,12 +368,9 @@ kk_encoder_render_triangle_fan_common(struct kk_cmd_buffer *cmd, info->in_el_size_B = in_el_size_B; info->out_el_size_B = out_el_size_B; info->flatshade_first = true; - mtl_compute_encoder *encoder = kk_encoder_pre_gfx_encoder(cmd); - struct kk_device *dev = kk_cmd_buffer_device(cmd); - kk_cmd_dispatch_pipeline(cmd, encoder, - kk_device_lib_pipeline(dev, KK_LIB_TRIANGLE_FAN), - info, sizeof(*info), 1u, 1u, 1u); + struct mtl_size grid = {1, 1, 1}; + libkk_unroll_geometry_and_restart_struct(cmd, grid, true, *info); enum mtl_index_type index_type = index_size_in_bytes_to_mtl_index_type(out_el_size_B); @@ -411,7 +389,7 @@ kk_encoder_render_triangle_fan_indirect(struct kk_cmd_buffer *cmd, u_decomposed_prims_for_vertices(mode, cmd->state.gfx.vb.max_vertices) * mesa_vertices_per_prim(mode); uint32_t el_size_B = decomposed_index_count < UINT16_MAX ? 2u : 4u; - struct kk_triangle_fan_info info = { + struct libkk_unroll_geometry_and_restart_args info = { .in_draw = mtl_buffer_get_gpu_address(indirect) + offset, .restart_index = UINT32_MAX, /* No restart */ .mode = mode, @@ -437,7 +415,7 @@ kk_encoder_render_triangle_fan_indexed_indirect(struct kk_cmd_buffer *cmd, u_decomposed_prims_for_vertices(mode, max_index_count) * mesa_vertices_per_prim(mode); - struct kk_triangle_fan_info info = { + struct libkk_unroll_geometry_and_restart_args info = { .index_buffer = mtl_buffer_get_gpu_address(cmd->state.gfx.index.handle) + cmd->state.gfx.index.offset, .in_draw = mtl_buffer_get_gpu_address(indirect) + offset, diff --git a/src/kosmickrisp/vulkan/kk_encoder.h b/src/kosmickrisp/vulkan/kk_encoder.h index eb820ba604f..8708141b4b7 100644 --- a/src/kosmickrisp/vulkan/kk_encoder.h +++ b/src/kosmickrisp/vulkan/kk_encoder.h @@ -101,6 +101,8 @@ mtl_compute_encoder *kk_compute_encoder(struct kk_cmd_buffer *cmd); mtl_blit_encoder *kk_blit_encoder(struct kk_cmd_buffer *cmd); +mtl_compute_encoder *kk_encoder_pre_gfx_encoder(struct kk_cmd_buffer *cmd); + void upload_queue_writes(struct kk_cmd_buffer *cmd); void kk_encoder_render_triangle_fan_indirect(struct kk_cmd_buffer *cmd, diff --git a/src/kosmickrisp/vulkan/kk_query_pool.c b/src/kosmickrisp/vulkan/kk_query_pool.c index 987b9299893..11b2bcf4bdd 100644 --- a/src/kosmickrisp/vulkan/kk_query_pool.c +++ b/src/kosmickrisp/vulkan/kk_query_pool.c @@ -17,7 +17,6 @@ #include "kk_entrypoints.h" #include "kk_physical_device.h" #include "kk_query_table.h" -#include "kkcl.h" struct kk_query_report { uint64_t value; diff --git a/src/kosmickrisp/vulkan/kk_shader.c b/src/kosmickrisp/vulkan/kk_shader.c index a6413f90934..c337ee7f1c7 100644 --- a/src/kosmickrisp/vulkan/kk_shader.c +++ b/src/kosmickrisp/vulkan/kk_shader.c @@ -33,41 +33,7 @@ static const nir_shader_compiler_options * kk_get_nir_options(struct vk_physical_device *vk_pdev, mesa_shader_stage stage, UNUSED const struct vk_pipeline_robustness_state *rs) { - static nir_shader_compiler_options options = { - .lower_fdph = true, - .has_fsub = true, - .has_isub = true, - .lower_extract_word = true, - .lower_extract_byte = true, - .lower_insert_word = true, - .lower_insert_byte = true, - .lower_fmod = true, - .discard_is_demote = true, - .instance_id_includes_base_index = true, - .lower_device_index_to_zero = true, - .lower_pack_64_2x32_split = true, - .lower_unpack_64_2x32_split = true, - .lower_pack_64_2x32 = true, - .lower_pack_half_2x16 = true, - .lower_pack_split = true, - .lower_unpack_half_2x16 = true, - .has_cs_global_id = true, - .lower_fquantize2f16 = true, - .lower_scmp = true, - .lower_ifind_msb = true, - .lower_ufind_msb = true, - .lower_find_lsb = true, - .has_uclz = true, - .lower_mul_2x32_64 = true, - .lower_uadd_carry = true, - .lower_usub_borrow = true, - /* Metal does not support double. */ - .lower_doubles_options = (nir_lower_doubles_options)(~0), - .lower_int64_options = - nir_lower_ufind_msb64 | nir_lower_subgroup_shuffle64, - .io_options = nir_io_mediump_is_32bit, - }; - return &options; + return &kk_nir_options; } /* TODO_KOSMICKRISP Once we support robustness2, update these values. */ diff --git a/src/kosmickrisp/vulkan/meson.build b/src/kosmickrisp/vulkan/meson.build index 41bf66d3eae..7ba3b3f5bc5 100644 --- a/src/kosmickrisp/vulkan/meson.build +++ b/src/kosmickrisp/vulkan/meson.build @@ -18,6 +18,7 @@ kk_device_dispatch_table_dependencies_list = [ idep_vulkan_util, idep_vulkan_wsi, idep_vulkan_wsi_headers, + idep_libkk_shaders_h, ] libkk_device_dispatch_table = static_library( @@ -100,11 +101,6 @@ kk_files = files( 'kk_wsi.c', ) -kkcl_files = files( - 'cl/kk_query.cl', - 'cl/kk_triangle_fan.cl', -) - kk_entrypoints = custom_target( 'kk_entrypoints', input : [vk_entrypoints_gen, vk_api_xml], @@ -119,25 +115,6 @@ kk_entrypoints = custom_target( relative_dir = fs.relative_to(meson.global_source_root(), meson.global_build_root()) -kkcl_spv = custom_target( - input : kkcl_files, - output : 'kkcl.spv', - command : [ - prog_mesa_clc, '-o', '@OUTPUT@', '--depfile', '@DEPFILE@', kkcl_files, '--', - '-I' + join_paths(meson.project_source_root(), 'src/compiler/libcl'), - '-I' + join_paths(meson.current_source_dir(), '.'), - '-I' + join_paths(meson.project_source_root(), 'src'), - cl_args, - ], - depfile : 'libkk_shaders.h.d', -) - -kkcl = custom_target( - input : kkcl_spv, - output : ['kkcl.cpp', 'kkcl.h'], - command : [prog_vtn_bindgen2, '@INPUT@', '@OUTPUT@'], -) - kk_deps = [ idep_nir, idep_mesautil, @@ -148,7 +125,9 @@ kk_deps = [ kk_device_dispatch_table_dependencies, idep_kk_device_dispatch_table, idep_msl_to_nir, - idep_mtl_bridge + idep_mtl_bridge, + idep_libkk, + idep_libkk_shaders_h, ] libkk = static_library( @@ -156,7 +135,6 @@ libkk = static_library( [ kk_files, kk_entrypoints, - kkcl, sha1_h, ], include_directories : [