From e08da4e9286811786b2e6b162092acd2edbaa1b2 Mon Sep 17 00:00:00 2001 From: Aitor Camacho Date: Sun, 8 Mar 2026 14:16:26 +0900 Subject: [PATCH] kk: Add clc in a similar fashion to other drivers like HK OpenCL shaders will now be compiled offline and translated to MSL offline at build time. Then added to a static table as binary data, to later be compiled once when devices are initialized. This allows for easier integration of OpenCL shaders for things such as tessellation since we now don't have to manually add these shaders to the precompiled device library in code. Signed-off-by: Aitor Camacho Part-of: --- src/kosmickrisp/clc/kk_clc.c | 325 ++++++++++++++++++ src/kosmickrisp/clc/kk_precompiled_shader.h | 23 ++ src/kosmickrisp/clc/meson.build | 13 + src/kosmickrisp/compiler/nir_to_msl.c | 12 +- src/kosmickrisp/compiler/nir_to_msl.h | 34 ++ .../{vulkan/cl => libkk}/kk_query.cl | 4 +- .../{vulkan/cl => libkk}/kk_query.h | 0 .../{vulkan/cl => libkk}/kk_triangle_fan.cl | 5 +- src/kosmickrisp/libkk/meson.build | 54 +++ src/kosmickrisp/meson.build | 2 + src/kosmickrisp/vulkan/kk_cmd_buffer.c | 25 ++ src/kosmickrisp/vulkan/kk_cmd_buffer.h | 13 +- src/kosmickrisp/vulkan/kk_cmd_dispatch.c | 38 -- src/kosmickrisp/vulkan/kk_device.h | 24 +- src/kosmickrisp/vulkan/kk_device_lib.c | 179 ++-------- src/kosmickrisp/vulkan/kk_encoder.c | 60 +--- src/kosmickrisp/vulkan/kk_encoder.h | 2 + src/kosmickrisp/vulkan/kk_query_pool.c | 1 - src/kosmickrisp/vulkan/kk_shader.c | 36 +- src/kosmickrisp/vulkan/meson.build | 30 +- 20 files changed, 557 insertions(+), 323 deletions(-) create mode 100644 src/kosmickrisp/clc/kk_clc.c create mode 100644 src/kosmickrisp/clc/kk_precompiled_shader.h create mode 100644 src/kosmickrisp/clc/meson.build rename src/kosmickrisp/{vulkan/cl => libkk}/kk_query.cl (98%) rename src/kosmickrisp/{vulkan/cl => libkk}/kk_query.h (100%) rename src/kosmickrisp/{vulkan/cl => libkk}/kk_triangle_fan.cl (99%) create mode 100644 src/kosmickrisp/libkk/meson.build diff --git a/src/kosmickrisp/clc/kk_clc.c b/src/kosmickrisp/clc/kk_clc.c new file mode 100644 index 00000000000..5025be656e3 --- /dev/null +++ b/src/kosmickrisp/clc/kk_clc.c @@ -0,0 +1,325 @@ +/* + * Copyright 2026 LunarG, Inc. + * Copyright 2026 Google LLC + * Copyright 2023 Alyssa Rosenzweig + * Copyright 2020 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "kosmickrisp/compiler/nir_to_msl.h" + +#include "compiler/glsl_types.h" +#include "compiler/spirv/nir_spirv.h" +#include "nir.h" +#include "nir_builder.h" +#include "nir_builder_opcodes.h" +#include "nir_intrinsics.h" +#include "nir_precompiled.h" +#include "shader_enums.h" + +#include +#include +#include +#include +#include +#include "util/macros.h" +#include + +static const struct spirv_to_nir_options spirv_options = { + .environment = NIR_SPIRV_OPENCL, + .shared_addr_format = nir_address_format_62bit_generic, + .global_addr_format = nir_address_format_62bit_generic, + .temp_addr_format = nir_address_format_62bit_generic, + .constant_addr_format = nir_address_format_64bit_global, + .create_library = true, + .printf = true, +}; + +/* Standard optimization loop */ +static void +optimize(nir_shader *nir) +{ + bool progress; + do { + progress = false; + + NIR_PASS(progress, nir, nir_split_var_copies); + NIR_PASS(progress, nir, nir_split_struct_vars, nir_var_function_temp); + NIR_PASS(progress, nir, nir_lower_var_copies); + NIR_PASS(progress, nir, nir_lower_vars_to_ssa); + + NIR_PASS(progress, nir, nir_opt_copy_prop); + NIR_PASS(progress, nir, nir_opt_remove_phis); + NIR_PASS(progress, nir, nir_lower_all_phis_to_scalar); + NIR_PASS(progress, nir, nir_opt_dce); + NIR_PASS(progress, nir, nir_opt_dead_cf); + NIR_PASS(progress, nir, nir_opt_cse); + + nir_opt_peephole_select_options peephole_select_options = { + .limit = 64, + .expensive_alu_ok = true, + }; + NIR_PASS(progress, nir, nir_opt_peephole_select, + &peephole_select_options); + NIR_PASS(progress, nir, nir_opt_phi_precision); + NIR_PASS(progress, nir, nir_opt_algebraic); + NIR_PASS(progress, nir, nir_opt_constant_folding); + + NIR_PASS(progress, nir, nir_opt_deref); + NIR_PASS(progress, nir, nir_opt_copy_prop_vars); + NIR_PASS(progress, nir, nir_opt_undef); + NIR_PASS(progress, nir, nir_lower_undef_to_zero); + + NIR_PASS(progress, nir, nir_opt_shrink_vectors, true); + NIR_PASS(progress, nir, nir_opt_loop_unroll); + + } while (progress); +} + +static nir_shader * +compile(void *memctx, const uint32_t *spirv, size_t spirv_size) +{ + const nir_shader_compiler_options *nir_options = &kk_nir_options; + + assert(spirv_size % 4 == 0); + nir_shader *nir = + spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL, + "library", &spirv_options, nir_options); + nir_validate_shader(nir, "after spirv_to_nir"); + ralloc_steal(memctx, nir); + + nir_fixup_is_exported(nir); + + NIR_PASS(_, nir, nir_lower_system_values); + NIR_PASS(_, nir, nir_lower_calls_to_builtins); + + nir_lower_compute_system_values_options cs = {.global_id_is_32bit = true}; + NIR_PASS(_, nir, nir_lower_compute_system_values, &cs); + + /* We have to lower away local constant initializers right before we + * inline functions. That way they get properly initialized at the top + * of the function and not at the top of its caller. + */ + NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp); + NIR_PASS(_, nir, nir_lower_returns); + NIR_PASS(_, nir, nir_inline_functions); + nir_remove_non_exported(nir); + NIR_PASS(_, nir, nir_opt_copy_prop); + NIR_PASS(_, nir, nir_opt_deref); + + /* We can't deal with constant data, get rid of it */ + nir_lower_constant_to_temp(nir); + + /* We can go ahead and lower the rest of the constant initializers. We do + * this here so that nir_remove_dead_variables and split_per_member_structs + * below see the corresponding stores. + */ + NIR_PASS(_, nir, nir_lower_variable_initializers, ~0); + + /* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B + * aligned and so it can just read/write them as vec4s. This results in a + * LOT of vec4->vec3 casts on loads and stores. One solution to this + * problem is to get rid of all vec3 variables. + */ + NIR_PASS(_, nir, nir_lower_vec3_to_vec4, + nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared | + nir_var_mem_global | nir_var_mem_constant); + + /* We assign explicit types early so that the optimizer can take advantage + * of that information and hopefully get rid of some of our memcpys. + */ + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, + nir_var_uniform | nir_var_shader_temp | nir_var_function_temp | + nir_var_mem_shared | nir_var_mem_global, + glsl_get_cl_type_size_align); + + optimize(nir); + + NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_all, NULL); + + /* Lower again, this time after dead-variables to get more compact variable + * layouts. + */ + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, + nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared | + nir_var_mem_global | nir_var_mem_constant, + glsl_get_cl_type_size_align); + assert(nir->constant_data_size == 0); + + NIR_PASS(_, nir, nir_lower_memcpy); + + NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_constant, + nir_address_format_64bit_global); + + NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_uniform, + nir_address_format_32bit_offset_as_64bit); + + /* Note: we cannot lower explicit I/O here, because we need derefs in tact + * for function calls into the library to work. + */ + + NIR_PASS(_, nir, nir_lower_convert_alu_types, NULL); + NIR_PASS(_, nir, nir_opt_if, 0); + NIR_PASS(_, nir, nir_opt_idiv_const, 16); + + msl_lower_textures(nir); + msl_lower_nir_late(nir); + + optimize(nir); + + return nir; +} + +static void +print_shader(FILE *fp, const char *name, const char *suffix, uint32_t variant, + const char *msl, nir_shader *nir) +{ + uint32_t msl_length = strlen(msl) + 1; + uint32_t workgroup_count_size = sizeof(nir->info.workgroup_size); + size_t sz_B = workgroup_count_size + msl_length; + size_t sz_el = DIV_ROUND_UP(sz_B, 4); + uint32_t *mem = calloc(sz_el, 4); + + memcpy(mem, nir->info.workgroup_size, workgroup_count_size); + memcpy((uint8_t *)mem + workgroup_count_size, msl, msl_length); + + nir_precomp_print_blob(fp, name, suffix, variant, mem, sz_B, true); + free(mem); +} + +static nir_def * +load_kernel_input(nir_builder *b, unsigned num_components, unsigned bit_size, + unsigned offset_B) +{ + nir_def *root = nir_load_buffer_ptr_kk(b, 1, 64, .binding = 0); + + /* We've bound the address of the root descriptor, index in. */ + nir_def *addr = nir_iadd(b, root, nir_imm_int64(b, offset_B)); + return nir_build_load_global_constant(b, num_components, bit_size, addr, + .align_mul = bit_size, + .access = ACCESS_CAN_SPECULATE); +} + +static int +type_size_vec4(const struct glsl_type *type, bool bindless) +{ + return glsl_count_attribute_slots(type, false); +} + +int +main(int argc, char **argv) +{ + if (argc != 4) { + fprintf(stderr, "Usage: %s [input spir-v] [output header] [output C]\n", + argv[0]); + return 1; + } + + const char *infile = argv[1]; + const char *outh_file = argv[2]; + const char *outc_file = argv[3]; + + void *mem_ctx = ralloc_context(NULL); + + int fd = open(infile, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Failed to open %s\n", infile); + ralloc_free(mem_ctx); + return 1; + } + + off_t spirv_len = lseek(fd, 0, SEEK_END); + const void *spirv_map = mmap(NULL, spirv_len, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + if (spirv_map == MAP_FAILED) { + fprintf(stderr, "Failed to mmap the file: errno=%d, %s\n", errno, + strerror(errno)); + ralloc_free(mem_ctx); + return 1; + } + + FILE *fp_h = fopen(outh_file, "w"); + FILE *fp_c = fopen(outc_file, "w"); + glsl_type_singleton_init_or_ref(); + + nir_precomp_print_header(fp_c, fp_h, "KosmicKrisp Contributors", + "libkk_shaders.h"); + + nir_shader *nir = compile(mem_ctx, spirv_map, spirv_len); + + /* load_preamble works at 16-bit granularity */ + struct nir_precomp_opts opt = {.arg_align_B = 2}; + + const char *target = "AppleSilicon"; + + nir_foreach_entrypoint(libfunc, nir) { + libfunc->pass_flags = 0; + unsigned nr_vars = nir_precomp_nr_variants(libfunc); + + nir_precomp_print_layout_struct(fp_h, &opt, libfunc); + + for (unsigned v = 0; v < nr_vars; ++v) { + nir_shader *s = nir_precompiled_build_variant( + libfunc, MESA_SHADER_COMPUTE, v, &kk_nir_options, &opt, + load_kernel_input); + + nir_link_shader_functions(s, nir); + NIR_PASS(_, s, nir_inline_functions); + nir_remove_non_entrypoints(s); + NIR_PASS(_, s, nir_opt_deref); + NIR_PASS(_, s, nir_lower_vars_to_ssa); + NIR_PASS(_, s, nir_remove_dead_derefs); + NIR_PASS(_, s, nir_remove_dead_variables, + nir_var_function_temp | nir_var_shader_temp, NULL); + NIR_PASS(_, s, nir_lower_vars_to_explicit_types, + nir_var_shader_temp | nir_var_function_temp, + glsl_get_cl_type_size_align); + + NIR_PASS(_, s, nir_lower_vars_to_explicit_types, nir_var_mem_shared, + glsl_get_cl_type_size_align); + + NIR_PASS(_, s, nir_lower_explicit_io, nir_var_mem_shared, + nir_address_format_62bit_generic); + + msl_preprocess_nir(s); + msl_optimize_nir(nir); + + NIR_PASS(_, s, nir_opt_deref); + NIR_PASS(_, s, nir_lower_vars_to_ssa); + + NIR_PASS(_, s, nir_lower_explicit_io, + nir_var_shader_temp | nir_var_function_temp | + nir_var_mem_shared | nir_var_mem_global, + nir_address_format_62bit_generic); + + /* nir_lower_explicit_io can create phis we need to get rid of */ + NIR_PASS(_, s, nir_convert_from_ssa, true, true); + NIR_PASS(_, s, nir_trivialize_registers); + + /* nir_lower_explicit_io will create unpack_64 we need to lower */ + NIR_PASS(_, s, nir_opt_algebraic); + + nir_shader_gather_info(s, nir_shader_get_entrypoint(s)); + struct nir_to_msl_options options = {}; + const char *msl = nir_to_msl(s, &options); + print_shader(fp_c, libfunc->name, target, v, msl, s); + ralloc_free((void *)msl); + + ralloc_free(s); + } + } + + nir_precomp_print_program_enum(fp_h, nir, "libkk"); + nir_precomp_print_dispatch_macros(fp_h, &opt, nir); + + /* For each target, generate a table mapping programs to binaries */ + + nir_precomp_print_extern_binary_map(fp_h, "libkk", target); + nir_precomp_print_binary_map(fp_c, nir, "libkk", target, NULL); + + glsl_type_singleton_decref(); + fclose(fp_c); + fclose(fp_h); + ralloc_free(mem_ctx); + return 0; +} diff --git a/src/kosmickrisp/clc/kk_precompiled_shader.h b/src/kosmickrisp/clc/kk_precompiled_shader.h new file mode 100644 index 00000000000..fa30779a351 --- /dev/null +++ b/src/kosmickrisp/clc/kk_precompiled_shader.h @@ -0,0 +1,23 @@ +/* + * Copyright 2026 LunarG, Inc. + * Copyright 2026 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_PRECOMPILED_SHADER_H +#define KK_PRECOMPILED_SHADER_H 1 + +#include "kosmickrisp/bridge/mtl_types.h" + +#include + +struct kk_precompiled_info { + uint16_t workgroup_size[3]; +}; + +struct kk_precompiled_shader { + struct kk_precompiled_info info; + mtl_compute_pipeline_state *pipeline; +}; + +#endif /* KK_PRECOMPILED_SHADER_H_ */ diff --git a/src/kosmickrisp/clc/meson.build b/src/kosmickrisp/clc/meson.build new file mode 100644 index 00000000000..7edd8c2ae68 --- /dev/null +++ b/src/kosmickrisp/clc/meson.build @@ -0,0 +1,13 @@ +# Copyright 2026 LunarG, Inc. +# Copyright 2026 Google LLC +# SPDX-License-Identifier: MIT + +prog_kk_clc = executable( + 'kk_clc', + ['kk_clc.c'], + link_with : [libmsl_compiler], + include_directories : [inc_include, inc_src], + c_args : [pre_args, no_override_init_args], + link_args : [ld_args_build_id], + dependencies : [idep_vtn, idep_nir, idep_mesautil], +) \ No newline at end of file diff --git a/src/kosmickrisp/compiler/nir_to_msl.c b/src/kosmickrisp/compiler/nir_to_msl.c index ad0a0241916..03e621a7a85 100644 --- a/src/kosmickrisp/compiler/nir_to_msl.c +++ b/src/kosmickrisp/compiler/nir_to_msl.c @@ -1986,11 +1986,13 @@ msl_preprocess_nir(struct nir_shader *nir) nir_var_function_temp | nir_var_shader_in | nir_var_shader_out); NIR_PASS(_, nir, nir_lower_alu_to_scalar, kk_scalarize_filter, NULL); + /* If we do 256 here MSL compiler crashes with + * dEQP-VK.graphicsfuzz.stable-binarysearch-tree-nested-if-and-conditional */ + NIR_PASS(_, nir, nir_lower_vars_to_scratch, 32, + glsl_get_natural_size_align_bytes, glsl_get_word_size_align_bytes); NIR_PASS(_, nir, nir_lower_indirect_derefs_to_if_else_trees, - nir_var_shader_in | nir_var_shader_out, UINT32_MAX); - NIR_PASS(_, nir, nir_lower_vars_to_scratch, 0, - glsl_get_natural_size_align_bytes, - glsl_get_natural_size_align_bytes); + nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, + UINT32_MAX); nir_lower_compute_system_values_options csv_options = { .has_base_global_invocation_id = 0, @@ -2043,7 +2045,7 @@ msl_optimize_nir(struct nir_shader *nir) NIR_PASS(_, nir, nir_lower_load_const_to_scalar); NIR_PASS(_, nir, msl_nir_lower_algebraic_late); NIR_PASS(_, nir, nir_convert_from_ssa, true, false); - nir_trivialize_registers(nir); + NIR_PASS(_, nir, nir_trivialize_registers); NIR_PASS(_, nir, nir_opt_copy_prop); return progress; diff --git a/src/kosmickrisp/compiler/nir_to_msl.h b/src/kosmickrisp/compiler/nir_to_msl.h index c7b11040f5e..b5542de5ffd 100644 --- a/src/kosmickrisp/compiler/nir_to_msl.h +++ b/src/kosmickrisp/compiler/nir_to_msl.h @@ -72,3 +72,37 @@ bool msl_nir_vs_io_types(nir_shader *nir); bool msl_nir_fake_guard_for_discards(struct nir_shader *nir); bool msl_nir_lower_sample_shading(nir_shader *nir); void msl_lower_nir_late(nir_shader *nir); + +static const nir_shader_compiler_options kk_nir_options = { + .lower_fdph = true, + .has_fsub = true, + .has_isub = true, + .lower_extract_word = true, + .lower_extract_byte = true, + .lower_insert_word = true, + .lower_insert_byte = true, + .lower_fmod = true, + .discard_is_demote = true, + .instance_id_includes_base_index = true, + .lower_device_index_to_zero = true, + .lower_pack_64_2x32_split = true, + .lower_unpack_64_2x32_split = true, + .lower_pack_64_2x32 = true, + .lower_pack_half_2x16 = true, + .lower_pack_split = true, + .lower_unpack_half_2x16 = true, + .has_cs_global_id = true, + .lower_fquantize2f16 = true, + .lower_scmp = true, + .lower_ifind_msb = true, + .lower_ufind_msb = true, + .lower_find_lsb = true, + .has_uclz = true, + .lower_mul_2x32_64 = true, + .lower_uadd_carry = true, + .lower_usub_borrow = true, + /* Metal does not support double. */ + .lower_doubles_options = (nir_lower_doubles_options)(~0), + .lower_int64_options = nir_lower_ufind_msb64 | nir_lower_subgroup_shuffle64, + .io_options = nir_io_mediump_is_32bit, +}; diff --git a/src/kosmickrisp/vulkan/cl/kk_query.cl b/src/kosmickrisp/libkk/kk_query.cl similarity index 98% rename from src/kosmickrisp/vulkan/cl/kk_query.cl rename to src/kosmickrisp/libkk/kk_query.cl index ef24ab8ab7b..3189b430eb5 100644 --- a/src/kosmickrisp/vulkan/cl/kk_query.cl +++ b/src/kosmickrisp/libkk/kk_query.cl @@ -10,13 +10,13 @@ #include "kk_query.h" -void +KERNEL(1) libkk_write_u64(global struct libkk_imm_write *write_array) { *write_array[cl_group_id.x].address = write_array[cl_group_id.x].value; } -void +KERNEL(1) libkk_copy_queries(global uint64_t *availability, global uint64_t *results, global uint16_t *oq_index, uint64_t dst_addr, uint64_t dst_stride, uint32_t first_query, diff --git a/src/kosmickrisp/vulkan/cl/kk_query.h b/src/kosmickrisp/libkk/kk_query.h similarity index 100% rename from src/kosmickrisp/vulkan/cl/kk_query.h rename to src/kosmickrisp/libkk/kk_query.h diff --git a/src/kosmickrisp/vulkan/cl/kk_triangle_fan.cl b/src/kosmickrisp/libkk/kk_triangle_fan.cl similarity index 99% rename from src/kosmickrisp/vulkan/cl/kk_triangle_fan.cl rename to src/kosmickrisp/libkk/kk_triangle_fan.cl index bc2e250d072..7e53ed0f1ad 100644 --- a/src/kosmickrisp/vulkan/cl/kk_triangle_fan.cl +++ b/src/kosmickrisp/libkk/kk_triangle_fan.cl @@ -79,7 +79,7 @@ libkk_vertex_id_for_tri_strip_adj(uint prim, uint vert, uint num_prims, * * Here we assume the first vertex is provoking, the Vulkan default. */ - uint offsets[6] = { + const uint offsets[6] = { 0, first ? 1 : (even ? -2 : 3), even_or_first ? 2 : 4, @@ -212,9 +212,8 @@ first_true_thread_in_workgroup(bool cond, local uint *scratch) return (first_group * 32) + off; } -// TODO_KOSMICKRISP // KERNEL(1024) -void +KERNEL(1) libkk_unroll_geometry_and_restart( constant uint8_t *index_buffer, global uint8_t *out_ptr, constant uint32_t *in_draw, global uint32_t *out_draw, diff --git a/src/kosmickrisp/libkk/meson.build b/src/kosmickrisp/libkk/meson.build new file mode 100644 index 00000000000..7bbd55b2dc7 --- /dev/null +++ b/src/kosmickrisp/libkk/meson.build @@ -0,0 +1,54 @@ +# Copyright 2026 LunarG, Inc. +# Copyright 2026 Google LLC +# SPDX-License-Identifier: MIT + +libkk_shader_files = files( + 'kk_triangle_fan.cl', + 'kk_query.cl', +) + +libkk_spv = custom_target( + input : libkk_shader_files, + output : 'libkk.spv', + command : [ + prog_mesa_clc, '-o', '@OUTPUT@', '--depfile', '@DEPFILE@', + libkk_shader_files, '--', + '-I' + join_paths(meson.project_source_root(), 'include'), + '-I' + join_paths(meson.project_source_root(), 'src/compiler/libcl'), + '-I' + join_paths(meson.current_source_dir(), '.'), + '-I' + join_paths(meson.current_source_dir(), '../../'), + '-I' + join_paths(meson.current_source_dir(), 'shaders'), + cl_args, + ], + depfile : 'libkk_shaders.h.d', +) + +libkk = custom_target( + input : libkk_spv, + output : ['libkk.cpp', 'libkk.h'], + command : [prog_vtn_bindgen2, libkk_spv, '@OUTPUT0@', '@OUTPUT1@'], +) + +idep_libkk = declare_dependency( + sources : [libkk], + include_directories : include_directories('.'), +) + +libkk_shaders = custom_target( + input : libkk_spv, + output : ['libkk_shaders.h', 'libkk_shaders.c'], + command : [prog_kk_clc, libkk_spv, '@OUTPUT0@', '@OUTPUT1@'] +) + +# TODO_KOSMICKRISP We can get rid of this library once we remove the generated entrypoints for the trampoline +libkk_shaders_lib = static_library( + 'libkk_shaders', + sources : [libkk_shaders], + dependencies : [idep_mesautil], +) + +idep_libkk_shaders_h = declare_dependency( + sources : [libkk_shaders[0]], + include_directories : include_directories('.'), + link_with : libkk_shaders_lib, +) diff --git a/src/kosmickrisp/meson.build b/src/kosmickrisp/meson.build index fe54329e132..2bc318325e4 100644 --- a/src/kosmickrisp/meson.build +++ b/src/kosmickrisp/meson.build @@ -5,6 +5,8 @@ subdir('bridge') subdir('compiler') subdir('util') +subdir('clc') +subdir('libkk') subdir('vulkan') executable( diff --git a/src/kosmickrisp/vulkan/kk_cmd_buffer.c b/src/kosmickrisp/vulkan/kk_cmd_buffer.c index e15d3e5b20b..62893767f20 100644 --- a/src/kosmickrisp/vulkan/kk_cmd_buffer.c +++ b/src/kosmickrisp/vulkan/kk_cmd_buffer.c @@ -421,6 +421,31 @@ kk_cmd_buffer_flush_push_descriptors(struct kk_cmd_buffer *cmd, desc->push_dirty = 0; } +void +kk_dispatch_precomp(struct kk_cmd_buffer *cmd, struct mtl_size grid, + bool pre_gfx, enum libkk_program idx, void *data, + size_t data_size) +{ + struct kk_device *dev = kk_cmd_buffer_device(cmd); + struct kk_precompiled_shader *prog = &dev->precompiled_cache.shaders[idx]; + + mtl_compute_encoder *encoder = + pre_gfx ? kk_encoder_pre_gfx_encoder(cmd) : kk_compute_encoder(cmd); + + struct kk_bo *bo = kk_cmd_allocate_buffer(cmd, data_size, 4u); + memcpy(bo->cpu, data, data_size); + + mtl_compute_set_buffer(encoder, bo->map, 0, 0); + mtl_compute_set_pipeline_state(encoder, prog->pipeline); + + struct mtl_size local_size = { + .x = prog->info.workgroup_size[0], + .y = prog->info.workgroup_size[1], + .z = prog->info.workgroup_size[2], + }; + mtl_dispatch_threads(encoder, grid, local_size); +} + void kk_cmd_write(struct kk_cmd_buffer *cmd, mtl_buffer *buffer, uint64_t addr, uint64_t value) diff --git a/src/kosmickrisp/vulkan/kk_cmd_buffer.h b/src/kosmickrisp/vulkan/kk_cmd_buffer.h index ed81ac4013a..7a12e79f79b 100644 --- a/src/kosmickrisp/vulkan/kk_cmd_buffer.h +++ b/src/kosmickrisp/vulkan/kk_cmd_buffer.h @@ -254,14 +254,13 @@ uint64_t kk_upload_descriptor_root(struct kk_cmd_buffer *cmd, void kk_cmd_buffer_flush_push_descriptors(struct kk_cmd_buffer *cmd, struct kk_descriptor_state *desc); +void kk_dispatch_precomp(struct kk_cmd_buffer *cmd, struct mtl_size grid, + bool pre_gfx, enum libkk_program idx, void *data, + size_t data_size); + +#define MESA_DISPATCH_PRECOMP kk_dispatch_precomp + void kk_cmd_write(struct kk_cmd_buffer *cmd, mtl_buffer *buffer, uint64_t addr, uint64_t value); -void kk_cmd_dispatch_pipeline(struct kk_cmd_buffer *cmd, - mtl_compute_encoder *encoder, - mtl_compute_pipeline_state *pipeline, - const void *push_data, size_t push_size, - uint32_t groupCountX, uint32_t groupCountY, - uint32_t groupCountZ); - #endif diff --git a/src/kosmickrisp/vulkan/kk_cmd_dispatch.c b/src/kosmickrisp/vulkan/kk_cmd_dispatch.c index 2a66ad47794..00558e60b85 100644 --- a/src/kosmickrisp/vulkan/kk_cmd_dispatch.c +++ b/src/kosmickrisp/vulkan/kk_cmd_dispatch.c @@ -18,44 +18,6 @@ #include "vk_common_entrypoints.h" -void -kk_cmd_dispatch_pipeline(struct kk_cmd_buffer *cmd, - mtl_compute_encoder *encoder, - mtl_compute_pipeline_state *pipeline, - const void *push_data, size_t push_size, - uint32_t groupCountX, uint32_t groupCountY, - uint32_t groupCountZ) -{ - struct kk_root_descriptor_table *root = NULL; - struct kk_bo *bo = kk_cmd_allocate_buffer(cmd, sizeof(*root), 8u); - /* kk_cmd_allocate_buffer already sets the error, we can just exit */ - if (!bo) - return; - - root = bo->cpu; - assert(push_size <= sizeof(root->push)); - memcpy(root->push, push_data, push_size); - root->cs.base_group[0] = 1; /* TODO_KOSMICKRISP This is hard-coded because we - know this is the size we create them with */ - root->cs.base_group[1] = 1; - root->cs.base_group[2] = 1; - - mtl_compute_set_buffer(encoder, bo->map, 0, 0); - mtl_compute_set_pipeline_state(encoder, pipeline); - - struct mtl_size grid_size = { - .x = groupCountX, - .y = groupCountY, - .z = groupCountZ, - }; - struct mtl_size local_size = { - .x = 1, - .y = 1, - .z = 1, - }; - mtl_dispatch_threads(encoder, grid_size, local_size); -} - VKAPI_ATTR void VKAPI_CALL kk_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) diff --git a/src/kosmickrisp/vulkan/kk_device.h b/src/kosmickrisp/vulkan/kk_device.h index 77e84eb0b20..1553e0956d8 100644 --- a/src/kosmickrisp/vulkan/kk_device.h +++ b/src/kosmickrisp/vulkan/kk_device.h @@ -15,6 +15,9 @@ #include "kosmickrisp/bridge/mtl_types.h" +#include "kosmickrisp/clc/kk_precompiled_shader.h" +#include "libkk_shaders.h" + #include "util/u_dynarray.h" #include "vk_device.h" @@ -25,13 +28,6 @@ struct kk_bo; struct kk_physical_device; struct vk_pipeline_cache; -enum kk_device_lib_pipeline { - KK_LIB_IMM_WRITE = 0, - KK_LIB_COPY_QUERY, - KK_LIB_TRIANGLE_FAN, - KK_LIB_COUNT, -}; - struct kk_residency_set { simple_mtx_t mutex; mtl_residency_set *handle; @@ -75,6 +71,10 @@ struct kk_sampler_heap { struct hash_table *ht; }; +struct kk_precompiled_cache { + struct kk_precompiled_shader shaders[LIBKK_NUM_PROGRAMS]; +}; + struct kk_device { struct vk_device vk; @@ -91,7 +91,7 @@ struct kk_device { * recording as required by Metal. */ struct kk_residency_set residency_set; - mtl_compute_pipeline_state *lib_pipelines[KK_LIB_COUNT]; + struct kk_precompiled_cache precompiled_cache; struct kk_queue queue; @@ -103,14 +103,6 @@ struct kk_device { VK_DEFINE_HANDLE_CASTS(kk_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE) -static inline mtl_compute_pipeline_state * -kk_device_lib_pipeline(const struct kk_device *dev, - enum kk_device_lib_pipeline pipeline) -{ - assert(pipeline < KK_LIB_COUNT); - return dev->lib_pipelines[pipeline]; -} - static inline struct kk_physical_device * kk_device_physical(const struct kk_device *dev) { diff --git a/src/kosmickrisp/vulkan/kk_device_lib.c b/src/kosmickrisp/vulkan/kk_device_lib.c index 9d2afbf9e45..8de55346da4 100644 --- a/src/kosmickrisp/vulkan/kk_device_lib.c +++ b/src/kosmickrisp/vulkan/kk_device_lib.c @@ -8,184 +8,65 @@ #include "kk_shader.h" -#include "kkcl.h" +#include "kosmickrisp/clc/kk_precompiled_shader.h" + +#include "libkk_shaders.h" #include "kosmickrisp/bridge/mtl_bridge.h" #include "nir/nir.h" #include "nir/nir_builder.h" -static nir_def * -load_struct_var(nir_builder *b, nir_variable *var, uint32_t field) -{ - nir_deref_instr *deref = - nir_build_deref_struct(b, nir_build_deref_var(b, var), field); - return nir_load_deref(b, deref); -} - -static nir_shader * -create_imm_write_shader() -{ - nir_builder build = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, - "kk-meta-imm-write-u64"); - nir_builder *b = &build; - - struct glsl_struct_field push_fields[] = { - {.type = glsl_uint64_t_type(), .name = "buffer_address", .offset = 0}, - }; - const struct glsl_type *push_iface_type = glsl_interface_type( - push_fields, ARRAY_SIZE(push_fields), GLSL_INTERFACE_PACKING_STD140, - false /* row_major */, "push"); - nir_variable *push = nir_variable_create(b->shader, nir_var_mem_push_const, - push_iface_type, "push"); - - b->shader->info.workgroup_size[0] = 1; - b->shader->info.workgroup_size[1] = 1; - b->shader->info.workgroup_size[2] = 1; - - libkk_write_u64(b, load_struct_var(b, push, 0)); - - return build.shader; -} - -static nir_shader * -create_copy_query_shader() -{ - nir_builder build = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, - "kk-meta-copy-queries"); - nir_builder *b = &build; - - struct glsl_struct_field push_fields[] = { - {.type = glsl_uint64_t_type(), .name = "availability", .offset = 0}, - {.type = glsl_uint64_t_type(), .name = "results", .offset = 8}, - {.type = glsl_uint64_t_type(), .name = "indices", .offset = 16}, - {.type = glsl_uint64_t_type(), .name = "dst_addr", .offset = 24}, - {.type = glsl_uint64_t_type(), .name = "dst_stride", .offset = 32}, - {.type = glsl_uint_type(), .name = "first_query", .offset = 40}, - {.type = glsl_uint_type(), .name = "flags", .offset = 44}, - {.type = glsl_uint16_t_type(), .name = "reports_per_query", .offset = 48}, - }; - /* TODO_KOSMICKRISP Don't use push constants and directly bind the buffer to - * the binding index. This requires compiler work first to remove the - * hard-coded buffer0 value. Same applies to other creation functions. - */ - const struct glsl_type *push_iface_type = glsl_interface_type( - push_fields, ARRAY_SIZE(push_fields), GLSL_INTERFACE_PACKING_STD140, - false /* row_major */, "push"); - nir_variable *push = nir_variable_create(b->shader, nir_var_mem_push_const, - push_iface_type, "push"); - - b->shader->info.workgroup_size[0] = 1; - b->shader->info.workgroup_size[1] = 1; - b->shader->info.workgroup_size[2] = 1; - - libkk_copy_queries(b, load_struct_var(b, push, 0), - load_struct_var(b, push, 1), load_struct_var(b, push, 2), - load_struct_var(b, push, 3), load_struct_var(b, push, 4), - load_struct_var(b, push, 5), load_struct_var(b, push, 6), - load_struct_var(b, push, 7)); - - return build.shader; -} - -static nir_shader * -create_triangle_fan_shader() -{ - nir_builder build = nir_builder_init_simple_shader( - MESA_SHADER_COMPUTE, NULL, "kk-device-unroll-geomtry-and-restart"); - nir_builder *b = &build; - - struct glsl_struct_field push_fields[] = { - {.type = glsl_uint64_t_type(), .name = "index_buffer", .offset = 0}, - {.type = glsl_uint64_t_type(), .name = "out_ptr", .offset = 8}, - {.type = glsl_uint64_t_type(), .name = "indirect_in", .offset = 16}, - {.type = glsl_uint64_t_type(), .name = "indirect_out", .offset = 24}, - {.type = glsl_uint_type(), .name = "restart_index", .offset = 32}, - {.type = glsl_uint_type(), .name = "index_buffer_size_el", .offset = 36}, - {.type = glsl_uint_type(), .name = "in_el_size_B,", .offset = 40}, - {.type = glsl_uint_type(), .name = "out_el_size_B,", .offset = 44}, - {.type = glsl_uint_type(), .name = "flatshade_first", .offset = 48}, - {.type = glsl_uint_type(), .name = "mode", .offset = 52}, - }; - const struct glsl_type *push_iface_type = glsl_interface_type( - push_fields, ARRAY_SIZE(push_fields), GLSL_INTERFACE_PACKING_STD140, - false /* row_major */, "push"); - nir_variable *push = nir_variable_create(b->shader, nir_var_mem_push_const, - push_iface_type, "push"); - - b->shader->info.workgroup_size[0] = 1; - b->shader->info.workgroup_size[1] = 1; - b->shader->info.workgroup_size[2] = 1; - - libkk_unroll_geometry_and_restart( - b, load_struct_var(b, push, 0), load_struct_var(b, push, 1), - load_struct_var(b, push, 2), load_struct_var(b, push, 3), - load_struct_var(b, push, 4), load_struct_var(b, push, 5), - load_struct_var(b, push, 6), load_struct_var(b, push, 7), - load_struct_var(b, push, 8), load_struct_var(b, push, 9)); - - return build.shader; -} - -static struct { - enum kk_device_lib_pipeline ndx; - nir_shader *(*create_shader_fn)(); -} lib_shaders[KK_LIB_COUNT] = { - {KK_LIB_IMM_WRITE, create_imm_write_shader}, - {KK_LIB_COPY_QUERY, create_copy_query_shader}, - {KK_LIB_TRIANGLE_FAN, create_triangle_fan_shader}, -}; -static_assert(ARRAY_SIZE(lib_shaders) == KK_LIB_COUNT, - "Device lib shader count and created shader count mismatch"); - -VkResult -kk_device_init_lib(struct kk_device *dev) +static VkResult +build_precompiled_shaders(struct kk_device *dev) { VkResult result = VK_SUCCESS; uint32_t i = 0u; - for (; i < KK_LIB_COUNT; ++i) { - nir_shader *s = lib_shaders[i].create_shader_fn(); - if (!s) - goto fail; + for (; i < LIBKK_NUM_PROGRAMS; ++i) { + const uint32_t *bin = libkk_AppleSilicon[i]; + const struct kk_precompiled_info *info = (void *)bin; + const char *msl = (const char *)bin + sizeof(*info); - struct kk_shader *shader = NULL; - result = kk_compile_nir_shader(dev, s, &dev->vk.alloc, &shader); - if (result != VK_SUCCESS) - goto fail; - - mtl_library *library = mtl_new_library(dev->mtl_handle, shader->msl_code); + mtl_library *library = mtl_new_library(dev->mtl_handle, msl); if (library == NULL) goto fail; - uint32_t local_size_threads = shader->info.cs.local_size.x * - shader->info.cs.local_size.y * - shader->info.cs.local_size.z; + struct kk_precompiled_shader *shader = &dev->precompiled_cache.shaders[i]; + + /* TODO_KOSMICKRISP Do not hardcode the entrypoint */ mtl_function *function = - mtl_new_function_with_name(library, shader->entrypoint_name); - dev->lib_pipelines[i] = mtl_new_compute_pipeline_state( + mtl_new_function_with_name(library, "main_entrypoint"); + uint32_t local_size_threads = info->workgroup_size[0] * + info->workgroup_size[1] * + info->workgroup_size[2]; + shader->pipeline = mtl_new_compute_pipeline_state( dev->mtl_handle, function, local_size_threads); mtl_release(function); mtl_release(library); - /* We no longer need the shader. Although it may be useful to keep it - * alive for the info maybe? */ - shader->vk.ops->destroy(&dev->vk, &shader->vk, &dev->vk.alloc); - - if (!dev->lib_pipelines[i]) + if (!shader->pipeline) goto fail; + + memcpy(&shader->info, info, sizeof(*info)); } return result; fail: for (uint32_t j = 0u; j < i; ++j) - mtl_release(dev->lib_pipelines[j]); + mtl_release(dev->precompiled_cache.shaders[j].pipeline); return vk_error(dev, result); } +VkResult +kk_device_init_lib(struct kk_device *dev) +{ + return build_precompiled_shaders(dev); +} + void kk_device_finish_lib(struct kk_device *dev) { - for (uint32_t i = 0; i < KK_LIB_COUNT; ++i) - mtl_release(dev->lib_pipelines[i]); + for (uint32_t i = 0; i < LIBKK_NUM_PROGRAMS; ++i) + mtl_release(dev->precompiled_cache.shaders[i].pipeline); } diff --git a/src/kosmickrisp/vulkan/kk_encoder.c b/src/kosmickrisp/vulkan/kk_encoder.c index 2790e33ec80..35774bd85c9 100644 --- a/src/kosmickrisp/vulkan/kk_encoder.c +++ b/src/kosmickrisp/vulkan/kk_encoder.c @@ -13,7 +13,8 @@ #include "kosmickrisp/bridge/mtl_bridge.h" #include "kosmickrisp/bridge/vk_to_mtl_map.h" -#include "cl/kk_query.h" +#include "kosmickrisp/libkk/kk_query.h" +#include "libkk_shaders.h" static void kk_encoder_start_internal(struct kk_encoder_internal *encoder, @@ -134,8 +135,6 @@ upload_queue_writes(struct kk_cmd_buffer *cmd) enc->copy_query_pool_result_infos.size == 0u) return; - struct kk_device *dev = kk_cmd_buffer_device(cmd); - mtl_compute_encoder *compute = kk_compute_encoder(cmd); uint32_t count = util_dynarray_num_elements(&enc->imm_writes, uint64_t) / 2u; if (count != 0) { struct kk_bo *bo = kk_cmd_allocate_buffer(cmd, enc->imm_writes.size, 8u); @@ -143,27 +142,23 @@ upload_queue_writes(struct kk_cmd_buffer *cmd) if (!bo) return; memcpy(bo->cpu, enc->imm_writes.data, enc->imm_writes.size); - struct kk_imm_write_push push_data = { - .buffer_address = bo->gpu, - .count = count, - }; - kk_cmd_dispatch_pipeline(cmd, compute, - kk_device_lib_pipeline(dev, KK_LIB_IMM_WRITE), - &push_data, sizeof(push_data), count, 1, 1); + struct mtl_size grid = {count, 1, 1}; + libkk_write_u64(cmd, grid, false, bo->gpu); enc->imm_writes.size = 0u; } count = util_dynarray_num_elements(&enc->copy_query_pool_result_infos, - struct kk_copy_query_pool_results_info); + struct libkk_copy_queries_args); if (count != 0u) { for (uint32_t i = 0u; i < count; ++i) { struct kk_copy_query_pool_results_info *push_data = util_dynarray_element(&enc->copy_query_pool_result_infos, struct kk_copy_query_pool_results_info, i); - kk_cmd_dispatch_pipeline( - cmd, compute, kk_device_lib_pipeline(dev, KK_LIB_COPY_QUERY), - push_data, sizeof(*push_data), push_data->query_count, 1, 1); + struct mtl_size grid = {push_data->query_count, 1, 1}; + const struct libkk_copy_queries_args *data = + (const struct libkk_copy_queries_args *)push_data; + libkk_copy_queries_struct(cmd, grid, false, *data); } enc->copy_query_pool_result_infos.size = 0u; } @@ -330,7 +325,7 @@ kk_blit_encoder(struct kk_cmd_buffer *cmd) return (mtl_blit_encoder *)encoder->encoder; } -static mtl_compute_encoder * +mtl_compute_encoder * kk_encoder_pre_gfx_encoder(struct kk_cmd_buffer *cmd) { struct kk_encoder *encoder = cmd->encoder; @@ -352,26 +347,12 @@ kk_encoder_pre_gfx_encoder(struct kk_cmd_buffer *cmd) return encoder->pre_gfx.encoder; } -struct kk_triangle_fan_info { - uint64_t index_buffer; - uint64_t out_ptr; - uint64_t in_draw; - uint64_t out_draw; - uint32_t restart_index; - uint32_t index_buffer_size_el; - uint32_t in_el_size_B; - uint32_t out_el_size_B; - uint32_t flatshade_first; - uint32_t mode; -}; - static void -kk_encoder_render_triangle_fan_common(struct kk_cmd_buffer *cmd, - struct kk_triangle_fan_info *info, - mtl_buffer *indirect, mtl_buffer *index, - uint32_t index_count, - uint32_t in_el_size_B, - uint32_t out_el_size_B) +kk_encoder_render_triangle_fan_common( + struct kk_cmd_buffer *cmd, + struct libkk_unroll_geometry_and_restart_args *info, mtl_buffer *indirect, + mtl_buffer *index, uint32_t index_count, uint32_t in_el_size_B, + uint32_t out_el_size_B) { uint32_t index_buffer_size_B = index_count * out_el_size_B; uint32_t buffer_size_B = @@ -387,12 +368,9 @@ kk_encoder_render_triangle_fan_common(struct kk_cmd_buffer *cmd, info->in_el_size_B = in_el_size_B; info->out_el_size_B = out_el_size_B; info->flatshade_first = true; - mtl_compute_encoder *encoder = kk_encoder_pre_gfx_encoder(cmd); - struct kk_device *dev = kk_cmd_buffer_device(cmd); - kk_cmd_dispatch_pipeline(cmd, encoder, - kk_device_lib_pipeline(dev, KK_LIB_TRIANGLE_FAN), - info, sizeof(*info), 1u, 1u, 1u); + struct mtl_size grid = {1, 1, 1}; + libkk_unroll_geometry_and_restart_struct(cmd, grid, true, *info); enum mtl_index_type index_type = index_size_in_bytes_to_mtl_index_type(out_el_size_B); @@ -411,7 +389,7 @@ kk_encoder_render_triangle_fan_indirect(struct kk_cmd_buffer *cmd, u_decomposed_prims_for_vertices(mode, cmd->state.gfx.vb.max_vertices) * mesa_vertices_per_prim(mode); uint32_t el_size_B = decomposed_index_count < UINT16_MAX ? 2u : 4u; - struct kk_triangle_fan_info info = { + struct libkk_unroll_geometry_and_restart_args info = { .in_draw = mtl_buffer_get_gpu_address(indirect) + offset, .restart_index = UINT32_MAX, /* No restart */ .mode = mode, @@ -437,7 +415,7 @@ kk_encoder_render_triangle_fan_indexed_indirect(struct kk_cmd_buffer *cmd, u_decomposed_prims_for_vertices(mode, max_index_count) * mesa_vertices_per_prim(mode); - struct kk_triangle_fan_info info = { + struct libkk_unroll_geometry_and_restart_args info = { .index_buffer = mtl_buffer_get_gpu_address(cmd->state.gfx.index.handle) + cmd->state.gfx.index.offset, .in_draw = mtl_buffer_get_gpu_address(indirect) + offset, diff --git a/src/kosmickrisp/vulkan/kk_encoder.h b/src/kosmickrisp/vulkan/kk_encoder.h index eb820ba604f..8708141b4b7 100644 --- a/src/kosmickrisp/vulkan/kk_encoder.h +++ b/src/kosmickrisp/vulkan/kk_encoder.h @@ -101,6 +101,8 @@ mtl_compute_encoder *kk_compute_encoder(struct kk_cmd_buffer *cmd); mtl_blit_encoder *kk_blit_encoder(struct kk_cmd_buffer *cmd); +mtl_compute_encoder *kk_encoder_pre_gfx_encoder(struct kk_cmd_buffer *cmd); + void upload_queue_writes(struct kk_cmd_buffer *cmd); void kk_encoder_render_triangle_fan_indirect(struct kk_cmd_buffer *cmd, diff --git a/src/kosmickrisp/vulkan/kk_query_pool.c b/src/kosmickrisp/vulkan/kk_query_pool.c index 987b9299893..11b2bcf4bdd 100644 --- a/src/kosmickrisp/vulkan/kk_query_pool.c +++ b/src/kosmickrisp/vulkan/kk_query_pool.c @@ -17,7 +17,6 @@ #include "kk_entrypoints.h" #include "kk_physical_device.h" #include "kk_query_table.h" -#include "kkcl.h" struct kk_query_report { uint64_t value; diff --git a/src/kosmickrisp/vulkan/kk_shader.c b/src/kosmickrisp/vulkan/kk_shader.c index a6413f90934..c337ee7f1c7 100644 --- a/src/kosmickrisp/vulkan/kk_shader.c +++ b/src/kosmickrisp/vulkan/kk_shader.c @@ -33,41 +33,7 @@ static const nir_shader_compiler_options * kk_get_nir_options(struct vk_physical_device *vk_pdev, mesa_shader_stage stage, UNUSED const struct vk_pipeline_robustness_state *rs) { - static nir_shader_compiler_options options = { - .lower_fdph = true, - .has_fsub = true, - .has_isub = true, - .lower_extract_word = true, - .lower_extract_byte = true, - .lower_insert_word = true, - .lower_insert_byte = true, - .lower_fmod = true, - .discard_is_demote = true, - .instance_id_includes_base_index = true, - .lower_device_index_to_zero = true, - .lower_pack_64_2x32_split = true, - .lower_unpack_64_2x32_split = true, - .lower_pack_64_2x32 = true, - .lower_pack_half_2x16 = true, - .lower_pack_split = true, - .lower_unpack_half_2x16 = true, - .has_cs_global_id = true, - .lower_fquantize2f16 = true, - .lower_scmp = true, - .lower_ifind_msb = true, - .lower_ufind_msb = true, - .lower_find_lsb = true, - .has_uclz = true, - .lower_mul_2x32_64 = true, - .lower_uadd_carry = true, - .lower_usub_borrow = true, - /* Metal does not support double. */ - .lower_doubles_options = (nir_lower_doubles_options)(~0), - .lower_int64_options = - nir_lower_ufind_msb64 | nir_lower_subgroup_shuffle64, - .io_options = nir_io_mediump_is_32bit, - }; - return &options; + return &kk_nir_options; } /* TODO_KOSMICKRISP Once we support robustness2, update these values. */ diff --git a/src/kosmickrisp/vulkan/meson.build b/src/kosmickrisp/vulkan/meson.build index 41bf66d3eae..7ba3b3f5bc5 100644 --- a/src/kosmickrisp/vulkan/meson.build +++ b/src/kosmickrisp/vulkan/meson.build @@ -18,6 +18,7 @@ kk_device_dispatch_table_dependencies_list = [ idep_vulkan_util, idep_vulkan_wsi, idep_vulkan_wsi_headers, + idep_libkk_shaders_h, ] libkk_device_dispatch_table = static_library( @@ -100,11 +101,6 @@ kk_files = files( 'kk_wsi.c', ) -kkcl_files = files( - 'cl/kk_query.cl', - 'cl/kk_triangle_fan.cl', -) - kk_entrypoints = custom_target( 'kk_entrypoints', input : [vk_entrypoints_gen, vk_api_xml], @@ -119,25 +115,6 @@ kk_entrypoints = custom_target( relative_dir = fs.relative_to(meson.global_source_root(), meson.global_build_root()) -kkcl_spv = custom_target( - input : kkcl_files, - output : 'kkcl.spv', - command : [ - prog_mesa_clc, '-o', '@OUTPUT@', '--depfile', '@DEPFILE@', kkcl_files, '--', - '-I' + join_paths(meson.project_source_root(), 'src/compiler/libcl'), - '-I' + join_paths(meson.current_source_dir(), '.'), - '-I' + join_paths(meson.project_source_root(), 'src'), - cl_args, - ], - depfile : 'libkk_shaders.h.d', -) - -kkcl = custom_target( - input : kkcl_spv, - output : ['kkcl.cpp', 'kkcl.h'], - command : [prog_vtn_bindgen2, '@INPUT@', '@OUTPUT@'], -) - kk_deps = [ idep_nir, idep_mesautil, @@ -148,7 +125,9 @@ kk_deps = [ kk_device_dispatch_table_dependencies, idep_kk_device_dispatch_table, idep_msl_to_nir, - idep_mtl_bridge + idep_mtl_bridge, + idep_libkk, + idep_libkk_shaders_h, ] libkk = static_library( @@ -156,7 +135,6 @@ libkk = static_library( [ kk_files, kk_entrypoints, - kkcl, sha1_h, ], include_directories : [