panfrost: Add base of OpenCL C infrastructure

This allows compiling CL shaders into a single SPIR-V library per arch, NIR call bindings for each functions and precompilled binaries for each entrypoints. We are only going to support Bifrost and Valhall for this. Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32720>
2025-12-20 13:50:11 +01:00 · 2024-12-11 19:12:10 +01:00 · 2024-12-11 19:12:10 +01:00 · 20970bcd96
commit 20970bcd96
parent 410e5a36ec
10 changed files with 691 additions and 7 deletions
--- a/docs/drivers/panfrost.rst
+++ b/docs/drivers/panfrost.rst
@ -47,13 +47,18 @@ controllers paired with Mali GPUs. If your board with a Panfrost supported GPU
 has a display controller with mainline Linux support not supported by kmsro,
 it's easy to add support, see the commit ``cff7de4bb597e9`` as an example.

-LLVM is *not* required by Panfrost's compilers. LLVM support in Mesa can
-safely be disabled for most OpenGL ES users with Panfrost.
-
 Build like ``meson . build/ -Dvulkan-drivers=panfrost
-Dgallium-drivers=panfrost -Dllvm=disabled`` for a build directory
+-Dgallium-drivers=panfrost`` for a build directory
 ``build``.

+LLVM is required by Panfrost's compilers at build time.
+
+In case of cross compilation without LLVM,
+you can build and install the required tools on the host (with LLVM installed) with
+``meson . build-host/ -Dvulkan-drivers=panfrost -Dgallium-drivers=panfrost
+-Dmesa-clc=enabled -Dinstall-mesa-clc=true -Dprecomp-compiler=enabled -Dinstall-precomp-compiler=true``
+and then use ``-Dmesa-clc=system -Dprecomp-compiler=system`` on the cross compile side.
+
 For general information on building Mesa, read :doc:`the install documentation
 <../install>`.

--- a/meson.build
+++ b/meson.build
@ -830,8 +830,11 @@ else
  with_drivers_clc = false
 endif

-with_driver_using_cl = with_gallium_iris or with_intel_vk or \
-                       with_gallium_asahi or with_asahi_vk
+with_driver_using_cl = [
+  with_gallium_iris, with_intel_vk,
+  with_gallium_asahi, with_asahi_vk,
+  with_gallium_panfrost, with_panfrost_vk,
+].contains(true)

 if get_option('mesa-clc') == 'system'
  prog_mesa_clc = find_program('mesa_clc', native : true)
--- a/src/panfrost/clc/meson.build
+++ b/src/panfrost/clc/meson.build
@ -0,0 +1,23 @@
+# Copyright 2017 Intel Corporation
+# Copyright 2024 Collabora
+# SPDX-License-Identifier: MIT
+
+
+if get_option('precomp-compiler') == 'system'
+  prog_panfrost_compile = find_program('panfrost_compile', native : true)
+else
+  prog_panfrost_compile = executable(
+    'panfrost_compile',
+    ['panfrost_compile.c'],
+    link_with : [libpanfrost_bifrost],
+    include_directories : [inc_include, inc_src],
+    c_args : [c_msvc_compat_args, no_override_init_args],
+    link_args : [ld_args_build_id],
+    dependencies : [idep_vtn, idep_nir, idep_mesautil, libpanfrost_dep],
+    # If we can run host binaries directly, just build panfrost_compile for the host.
+    # Most commonly this happens when doing a cross compile from an x86_64 build
+    # machine to an x86 host
+    native : not meson.can_run_host_binaries(),
+    install : get_option('install-precomp-compiler'),
+  )
+endif
--- a/src/panfrost/clc/panfrost_compile.c
+++ b/src/panfrost/clc/panfrost_compile.c
@ -0,0 +1,482 @@
+/*
+ * Copyright 2024 Collabora Ltd
+ * Copyright 2023 Alyssa Rosenzweig
+ * Copyright 2020 Intel Corporation
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "compiler/glsl_types.h"
+#include "compiler/spirv/nir_spirv.h"
+#include "panfrost/compiler/bifrost_compile.h"
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_builder_opcodes.h"
+#include "nir_intrinsics.h"
+#include "nir_precompiled.h"
+#include "pan_shader.h"
+#include "shader_enums.h"
+
+#include <fcntl.h>
+#include <inttypes.h>
+#include <libgen.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include "panfrost/util/pan_ir.h"
+#include "util/macros.h"
+#include "util/u_dynarray.h"
+#include <sys/mman.h>
+
+static const struct spirv_to_nir_options spirv_options = {
+   .environment = NIR_SPIRV_OPENCL,
+   .shared_addr_format = nir_address_format_62bit_generic,
+   .global_addr_format = nir_address_format_62bit_generic,
+   .temp_addr_format = nir_address_format_62bit_generic,
+   .constant_addr_format = nir_address_format_64bit_global,
+   .create_library = true,
+};
+
+static const nir_shader_compiler_options *
+get_compiler_options(unsigned arch)
+{
+   if (arch >= 9)
+      return &bifrost_nir_options_v9;
+
+   return &bifrost_nir_options_v6;
+}
+
+/* Standard optimization loop */
+static void
+optimize(nir_shader *nir)
+{
+   bool progress;
+   do {
+      progress = false;
+
+      NIR_PASS(progress, nir, nir_split_var_copies);
+      NIR_PASS(progress, nir, nir_split_struct_vars, nir_var_function_temp);
+      NIR_PASS(progress, nir, nir_lower_var_copies);
+      NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
+
+      NIR_PASS(progress, nir, nir_copy_prop);
+      NIR_PASS(progress, nir, nir_opt_remove_phis);
+      NIR_PASS(progress, nir, nir_lower_phis_to_scalar, true);
+      NIR_PASS(progress, nir, nir_opt_dce);
+      NIR_PASS(progress, nir, nir_opt_dead_cf);
+      NIR_PASS(progress, nir, nir_opt_cse);
+      NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
+      NIR_PASS(progress, nir, nir_opt_phi_precision);
+      NIR_PASS(progress, nir, nir_opt_algebraic);
+      NIR_PASS(progress, nir, nir_opt_constant_folding);
+
+      NIR_PASS(progress, nir, nir_opt_deref);
+      NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
+      NIR_PASS(progress, nir, nir_opt_undef);
+      NIR_PASS(progress, nir, nir_lower_undef_to_zero);
+
+      NIR_PASS(progress, nir, nir_opt_shrink_vectors, true);
+      NIR_PASS(progress, nir, nir_opt_loop_unroll);
+
+   } while (progress);
+}
+
+static nir_shader *
+compile(void *memctx, const uint32_t *spirv, size_t spirv_size, unsigned arch)
+{
+   const nir_shader_compiler_options *nir_options = get_compiler_options(arch);
+
+   nir_shader *nir =
+      spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
+                   "library", &spirv_options, nir_options);
+   nir_validate_shader(nir, "after spirv_to_nir");
+   nir_validate_ssa_dominance(nir, "after spirv_to_nir");
+   ralloc_steal(memctx, nir);
+
+   nir_fixup_is_exported(nir);
+
+   NIR_PASS(_, nir, nir_lower_system_values);
+   NIR_PASS(_, nir, nir_lower_calls_to_builtins);
+
+   nir_lower_compute_system_values_options cs = {.global_id_is_32bit = true};
+   NIR_PASS(_, nir, nir_lower_compute_system_values, &cs);
+
+   /* We have to lower away local constant initializers right before we
+    * inline functions.  That way they get properly initialized at the top
+    * of the function and not at the top of its caller.
+    */
+   NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
+   NIR_PASS(_, nir, nir_lower_returns);
+   NIR_PASS(_, nir, nir_inline_functions);
+   nir_remove_non_exported(nir);
+   NIR_PASS(_, nir, nir_copy_prop);
+   NIR_PASS(_, nir, nir_opt_deref);
+
+   /* We can't deal with constant data, get rid of it */
+   nir_lower_constant_to_temp(nir);
+
+   /* We can go ahead and lower the rest of the constant initializers.  We do
+    * this here so that nir_remove_dead_variables and split_per_member_structs
+    * below see the corresponding stores.
+    */
+   NIR_PASS(_, nir, nir_lower_variable_initializers, ~0);
+
+   /* LLVM loves take advantage of the fact that vec3s in OpenCL are 16B
+    * aligned and so it can just read/write them as vec4s.  This results in a
+    * LOT of vec4->vec3 casts on loads and stores.  One solution to this
+    * problem is to get rid of all vec3 variables.
+    */
+   NIR_PASS(_, nir, nir_lower_vec3_to_vec4,
+            nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
+               nir_var_mem_global | nir_var_mem_constant);
+
+   /* We assign explicit types early so that the optimizer can take advantage
+    * of that information and hopefully get rid of some of our memcpys.
+    */
+   NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
+            nir_var_uniform | nir_var_shader_temp | nir_var_function_temp |
+               nir_var_mem_shared | nir_var_mem_global,
+            glsl_get_cl_type_size_align);
+
+   optimize(nir);
+
+   NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_all, NULL);
+
+   /* Lower again, this time after dead-variables to get more compact variable
+    * layouts.
+    */
+   NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
+            nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
+               nir_var_mem_global | nir_var_mem_constant,
+            glsl_get_cl_type_size_align);
+   assert(nir->constant_data_size == 0);
+
+   NIR_PASS(_, nir, nir_lower_memcpy);
+
+   NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_constant,
+            nir_address_format_64bit_global);
+
+   NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_uniform,
+            nir_address_format_32bit_offset_as_64bit);
+
+   NIR_PASS(_, nir, nir_lower_convert_alu_types, NULL);
+   NIR_PASS(_, nir, nir_opt_if, 0);
+   NIR_PASS(_, nir, nir_opt_idiv_const, 16);
+
+   /* Lower explicit IO here to ensure that we will not clash with different
+    * address formats inside shaders */
+   NIR_PASS(_, nir, nir_opt_deref);
+   NIR_PASS(_, nir, nir_lower_vars_to_ssa);
+   NIR_PASS(_, nir, nir_lower_explicit_io,
+            nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
+               nir_var_mem_global,
+            nir_address_format_62bit_generic);
+
+   optimize(nir);
+
+   return nir;
+}
+
+static nir_def *
+load_sysval_from_push_const(nir_builder *b, unsigned offset, unsigned bit_size,
+                            unsigned num_comps)
+{
+   return nir_load_push_constant(
+      b, num_comps, bit_size,
+      nir_imm_int(b, BIFROST_PRECOMPILED_KERNEL_SYSVALS_OFFSET + offset));
+}
+
+static bool
+lower_sysvals(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *_data)
+{
+   const nir_shader *shader = b->shader;
+
+   unsigned num_comps = intr->def.num_components;
+   unsigned bit_size = intr->def.bit_size;
+   nir_def *val = NULL;
+   b->cursor = nir_before_instr(&intr->instr);
+
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_base_workgroup_id:
+      /* The base is always 0 */
+      val = nir_imm_zero(b, num_comps, bit_size);
+      break;
+   case nir_intrinsic_load_workgroup_size:
+      /* We are never expecting the local size to be variable */
+      assert(!shader->info.workgroup_size_variable);
+      val = nir_vec3(b, nir_imm_int(b, shader->info.workgroup_size[0]),
+                     nir_imm_int(b, shader->info.workgroup_size[1]),
+                     nir_imm_int(b, shader->info.workgroup_size[2]));
+      break;
+
+   case nir_intrinsic_load_num_workgroups:
+      val = load_sysval_from_push_const(
+         b, offsetof(struct bifrost_precompiled_kernel_sysvals, num_workgroups),
+         bit_size, num_comps);
+      break;
+
+   default:
+      return false;
+   }
+
+   b->cursor = nir_after_instr(&intr->instr);
+   nir_def_replace(&intr->def, val);
+   return true;
+}
+
+static void
+print_shader(FILE *fp, const char *name, const char *suffix, uint32_t variant,
+             nir_shader *nir, struct pan_shader_info *shader_info,
+             struct util_dynarray *binary)
+{
+   struct bifrost_precompiled_kernel_info info =
+      bifrost_precompiled_pack_kernel_info(nir, shader_info, binary);
+   size_t sz_B = sizeof(info) + binary->size;
+   size_t sz_el = DIV_ROUND_UP(sz_B, 4);
+   uint32_t *mem = calloc(sz_el, 4);
+
+   memcpy(mem, &info, sizeof(info));
+   memcpy((uint8_t *)mem + sizeof(info), binary->data, binary->size);
+
+   nir_precomp_print_blob(fp, name, suffix, variant, mem, sz_B, true);
+   free(mem);
+}
+
+static nir_def *
+load_kernel_input(nir_builder *b, unsigned num_components, unsigned bit_size,
+                  unsigned offset_B)
+{
+   return nir_load_push_constant(
+      b, num_components, bit_size,
+      nir_imm_int(b, BIFROST_PRECOMPILED_KERNEL_ARGS_OFFSET + offset_B));
+}
+
+/* Always assume default as we generate per gen already */
+static const char *
+remap_variant(nir_function *func, unsigned variant, const char *target)
+{
+   return "default";
+}
+
+void pan_shader_compile_v6(nir_shader *nir,
+                           struct panfrost_compile_inputs *inputs,
+                           struct util_dynarray *binary,
+                           struct pan_shader_info *info);
+
+void pan_shader_compile_v7(nir_shader *nir,
+                           struct panfrost_compile_inputs *inputs,
+                           struct util_dynarray *binary,
+                           struct pan_shader_info *info);
+
+void pan_shader_compile_v9(nir_shader *nir,
+                           struct panfrost_compile_inputs *inputs,
+                           struct util_dynarray *binary,
+                           struct pan_shader_info *info);
+
+void pan_shader_compile_v10(nir_shader *nir,
+                            struct panfrost_compile_inputs *inputs,
+                            struct util_dynarray *binary,
+                            struct pan_shader_info *info);
+
+static void
+shader_compile(int arch, nir_shader *nir,
+               struct panfrost_compile_inputs *inputs,
+               struct util_dynarray *binary, struct pan_shader_info *info)
+{
+   switch (arch) {
+   case 6:
+      pan_shader_compile_v6(nir, inputs, binary, info);
+      break;
+   case 7:
+      pan_shader_compile_v7(nir, inputs, binary, info);
+      break;
+   case 9:
+      pan_shader_compile_v9(nir, inputs, binary, info);
+      break;
+   case 10:
+      pan_shader_compile_v10(nir, inputs, binary, info);
+      break;
+   default:
+      unreachable("Unknown arch!");
+   }
+}
+
+int
+main(int argc, const char **argv)
+{
+   if (argc != 6) {
+      fprintf(
+         stderr,
+         "Usage: %s [library name] [arch] [input spir-v] [output header] [output C]\n",
+         argv[0]);
+      return 1;
+   }
+
+   const char *library_name = argv[1];
+   const char *target_arch_str = argv[2];
+   const char *input_spirv_path = argv[3];
+   const char *output_h_path = argv[4];
+   const char *output_c_path = argv[5];
+
+   int target_arch = atoi(target_arch_str);
+
+   if (target_arch < 4 || target_arch > 10) {
+      fprintf(stderr, "Unsupported target arch %d\n", target_arch);
+      return 1;
+   }
+
+   void *mem_ctx = ralloc_context(NULL);
+   if (mem_ctx == NULL) {
+      fprintf(stderr, "mem_ctx allocation failed\n");
+      goto err_out;
+   }
+
+   int fd = open(input_spirv_path, O_RDONLY);
+   if (fd < 0) {
+      fprintf(stderr, "Failed to open %s\n", input_spirv_path);
+      goto input_spirv_open_failed;
+   }
+
+   off_t spirv_len = lseek(fd, 0, SEEK_END);
+   const void *spirv_map = mmap(NULL, spirv_len, PROT_READ, MAP_PRIVATE, fd, 0);
+   close(fd);
+
+   if (spirv_map == MAP_FAILED) {
+      fprintf(stderr, "Failed to mmap the file: errno=%d, %s\n", errno,
+              strerror(errno));
+      goto input_spirv_open_failed;
+   }
+
+   FILE *fp_h = fopen(output_h_path, "w");
+   if (fp_h == NULL) {
+      fprintf(stderr, "Failed to open %s for writting\n", output_h_path);
+      goto input_spirv_open_failed;
+   }
+
+   FILE *fp_c = fopen(output_c_path, "w");
+   if (fp_c == NULL) {
+      fprintf(stderr, "Failed to open %s for writting\n", output_c_path);
+      goto fp_c_open_failed;
+   }
+
+   glsl_type_singleton_init_or_ref();
+
+   /* POSIX basename can modify the content of the path */
+   char *tmp_out_h_path = strdup(output_h_path);
+   const char *output_h_file_name = basename(tmp_out_h_path);
+   nir_precomp_print_header(fp_c, fp_h, "Collabora Ltd", output_h_file_name);
+   free(tmp_out_h_path);
+
+   nir_shader *nir = compile(mem_ctx, spirv_map, spirv_len, target_arch);
+
+   /* load_preamble works at 32-bit granularity */
+   struct nir_precomp_opts opt = {.arg_align_B = 4};
+
+   nir_foreach_entrypoint(libfunc, nir) {
+      if (target_arch < 6) {
+         fprintf(
+            stderr,
+            "ERROR: Attempting to compile entrypoint %s on Midgard, this is unsupported!\n",
+            libfunc->name);
+         goto invalid_precomp;
+      }
+
+      unsigned nr_vars = nir_precomp_nr_variants(libfunc);
+
+      nir_precomp_print_layout_struct(fp_h, &opt, libfunc);
+
+      for (unsigned v = 0; v < nr_vars; ++v) {
+         nir_shader *s = nir_precompiled_build_variant(
+            libfunc, v, get_compiler_options(target_arch), &opt,
+            load_kernel_input);
+
+         /* Because we do nir_lower_explicit_io on temp variable early on, we
+          * lose the scratch_size when we build the shader variant so we need
+          * to readjust it here. */
+         s->scratch_size = MAX2(s->scratch_size, nir->scratch_size);
+
+         struct panfrost_compile_inputs inputs = {
+            .gpu_id = target_arch << 12,
+            .no_ubo_to_push = true,
+         };
+
+         nir_link_shader_functions(s, nir);
+         NIR_PASS(_, s, nir_inline_functions);
+         nir_remove_non_entrypoints(s);
+         NIR_PASS(_, s, nir_opt_deref);
+         NIR_PASS(_, s, nir_lower_vars_to_ssa);
+         NIR_PASS(_, s, nir_remove_dead_derefs);
+         NIR_PASS(_, s, nir_remove_dead_variables,
+                  nir_var_function_temp | nir_var_shader_temp, NULL);
+         NIR_PASS(_, s, nir_lower_vars_to_explicit_types,
+                  nir_var_shader_temp | nir_var_function_temp,
+                  glsl_get_cl_type_size_align);
+
+         NIR_PASS(_, s, nir_lower_vars_to_explicit_types, nir_var_mem_shared,
+                  glsl_get_cl_type_size_align);
+
+         /* Unroll loops before lowering indirects */
+         bool progress = false;
+         do {
+            progress = false;
+            NIR_PASS(progress, s, nir_opt_loop);
+         } while (progress);
+
+         pan_shader_preprocess(s, inputs.gpu_id);
+
+         NIR_PASS(_, s, nir_opt_deref);
+         NIR_PASS(_, s, nir_lower_vars_to_ssa);
+         NIR_PASS(_, s, nir_lower_explicit_io,
+                  nir_var_shader_temp | nir_var_function_temp |
+                     nir_var_mem_shared | nir_var_mem_global,
+                  nir_address_format_62bit_generic);
+
+         NIR_PASS(_, s, nir_shader_intrinsics_pass, lower_sysvals,
+                  nir_metadata_control_flow, NULL);
+
+         nir_shader *clone = nir_shader_clone(NULL, s);
+
+         struct util_dynarray shader_binary;
+         struct pan_shader_info shader_info = {0};
+         util_dynarray_init(&shader_binary, NULL);
+         shader_compile(target_arch, clone, &inputs, &shader_binary,
+                        &shader_info);
+
+         assert(shader_info.push.count * 4 <=
+                   BIFROST_PRECOMPILED_KERNEL_ARGS_SIZE &&
+                "Too many kernel arguments!");
+
+         print_shader(fp_c, libfunc->name, "default", v, s, &shader_info,
+                      &shader_binary);
+         util_dynarray_fini(&shader_binary);
+         ralloc_free(clone);
+
+         ralloc_free(s);
+      }
+   }
+
+   nir_precomp_print_program_enum(fp_h, nir, library_name);
+   nir_precomp_print_dispatch_macros(fp_h, &opt, nir);
+
+   char target_name[12];
+   snprintf(target_name, sizeof(target_name), "default_v%d", target_arch);
+   nir_precomp_print_extern_binary_map(fp_h, library_name, target_name);
+   nir_precomp_print_binary_map(fp_c, nir, library_name, target_name,
+                                remap_variant);
+
+   glsl_type_singleton_decref();
+   fclose(fp_c);
+   fclose(fp_h);
+   ralloc_free(mem_ctx);
+
+   return 0;
+
+invalid_precomp:
+   glsl_type_singleton_decref();
+fp_c_open_failed:
+   fclose(fp_h);
+input_spirv_open_failed:
+   ralloc_free(mem_ctx);
+err_out:
+   return 1;
+}
--- a/src/panfrost/compiler/bifrost_compile.h
+++ b/src/panfrost/compiler/bifrost_compile.h
@ -24,10 +24,61 @@
 #ifndef __BIFROST_PUBLIC_H_
 #define __BIFROST_PUBLIC_H_

+#include <string.h>
 #include "compiler/nir/nir.h"
 #include "panfrost/util/pan_ir.h"
 #include "util/u_dynarray.h"

+struct bifrost_precompiled_kernel_sysvals {
+   struct {
+      unsigned x, y, z;
+   } num_workgroups;
+} __attribute__((aligned(8)));
+;
+
+#define BIFROST_PRECOMPILED_KERNEL_SYSVALS_SIZE                                \
+   sizeof(struct bifrost_precompiled_kernel_sysvals)
+#define BIFROST_PRECOMPILED_KERNEL_SYSVALS_OFFSET (0)
+#define BIFROST_PRECOMPILED_KERNEL_ARGS_OFFSET                                 \
+   (BIFROST_PRECOMPILED_KERNEL_SYSVALS_OFFSET +                                \
+    BIFROST_PRECOMPILED_KERNEL_SYSVALS_SIZE)
+#define BIFROST_PRECOMPILED_KERNEL_ARGS_SIZE                                   \
+   (512 - BIFROST_PRECOMPILED_KERNEL_ARGS_OFFSET)
+
+struct bifrost_precompiled_kernel_info {
+   struct pan_shader_info info;
+   unsigned local_size_x;
+   unsigned local_size_y;
+   unsigned local_size_z;
+   unsigned binary_size;
+};
+
+static inline struct bifrost_precompiled_kernel_info
+bifrost_precompiled_pack_kernel_info(nir_shader *nir,
+                                     struct pan_shader_info *info,
+                                     struct util_dynarray *binary)
+{
+   return (struct bifrost_precompiled_kernel_info){
+      .info = *info,
+      .local_size_x = nir->info.workgroup_size[0],
+      .local_size_y = nir->info.workgroup_size[1],
+      .local_size_z = nir->info.workgroup_size[2],
+      .binary_size = binary->size,
+   };
+}
+
+static inline void
+bifrost_precompiled_kernel_prepare_push_uniforms(
+   void *dst, const void *user_data, size_t user_data_size,
+   const struct bifrost_precompiled_kernel_sysvals *sysvals)
+{
+   assert(user_data_size <= BIFROST_PRECOMPILED_KERNEL_ARGS_SIZE);
+
+   memcpy(dst, sysvals, sizeof(*sysvals));
+   memcpy(((uint8_t *)dst + BIFROST_PRECOMPILED_KERNEL_ARGS_OFFSET), user_data,
+          user_data_size);
+}
+
 void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id);

 void bifrost_compile_shader_nir(nir_shader *nir,
--- a/src/panfrost/libpan/libpan.h
+++ b/src/panfrost/libpan/libpan.h
@ -0,0 +1,33 @@
+/*
+ * Copyright 2024 Collabora Ltd.
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef LIBPAN_H
+#define LIBPAN_H
+
+#ifndef __OPENCL_VERSION__
+#ifndef PAN_ARCH
+#error "PAN_ARCH needs to be defined for this header to work!"
+#endif
+
+/* We now include binding definition */
+#if (PAN_ARCH == 4)
+#include "libpan_v5.h"
+#elif (PAN_ARCH == 5)
+#include "libpan_v5.h"
+#elif (PAN_ARCH == 6)
+#include "libpan_v6.h"
+#elif (PAN_ARCH == 7)
+#include "libpan_v7.h"
+#elif (PAN_ARCH == 9)
+#include "libpan_v9.h"
+#elif (PAN_ARCH == 10)
+#include "libpan_v10.h"
+#else
+#error "Unsupported architecture for libpan"
+#endif
+
+#endif /* __OPENCL_VERSION__ */
+
+#endif /* LIBPAN_H */
--- a/src/panfrost/libpan/libpan_shaders.h
+++ b/src/panfrost/libpan/libpan_shaders.h
@ -0,0 +1,29 @@
+/*
+ * Copyright 2024 Collabora Ltd.
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef LIBPAN_SHADERS_H
+#define LIBPAN_SHADERS_H
+
+#ifndef PAN_ARCH
+#error "PAN_ARCH needs to be defined for this header to work!"
+#endif
+
+#if (PAN_ARCH == 4)
+#include "libpan_shaders_v4.h"
+#elif (PAN_ARCH == 5)
+#include "libpan_shaders_v5.h"
+#elif (PAN_ARCH == 6)
+#include "libpan_shaders_v6.h"
+#elif (PAN_ARCH == 7)
+#include "libpan_shaders_v7.h"
+#elif (PAN_ARCH == 9)
+#include "libpan_shaders_v9.h"
+#elif (PAN_ARCH == 10)
+#include "libpan_shaders_v10.h"
+#else
+#error "Unsupported architecture for libpan"
+#endif
+
+#endif
--- a/src/panfrost/libpan/meson.build
+++ b/src/panfrost/libpan/meson.build
@ -0,0 +1,56 @@
+# Copyright © 2024 Collabora
+# SPDX-License-Identifier: MIT
+
+
+libpan_shader_files = files(
+  'placeholder.cl',
+)
+
+# We need to set -fmacro-prefix-map properly for reproducability.
+fs = import('fs')
+relative_dir = fs.relative_to(meson.global_source_root(), meson.global_build_root()) + '/'
+
+idep_libpan_per_arch = {}
+
+foreach ver : ['4', '5', '6', '7', '9', '10']
+  libpan_spv = custom_target(
+    'libpan_v' + ver + '.spv',
+    input : libpan_shader_files,
+    output : 'libpan_v' + ver + '.spv',
+    command : [
+        prog_mesa_clc, '-o', '@OUTPUT@',  '--depfile', '@DEPFILE@',
+        libpan_shader_files, '--',
+        '-DPAN_ARCH=@0@'.format(ver),
+        '-I' + join_paths(meson.current_source_dir(), '.'),
+        '-I' + join_paths(meson.current_source_dir(), '../../'),
+        '-I' + join_paths(meson.current_source_dir(), '../lib/'),
+        '-I' + join_paths(meson.current_build_dir(), '../lib/'),
+        '-fmacro-prefix-map=@0@='.format(relative_dir),
+        ],
+    env: ['MESA_SHADER_CACHE_DISABLE=true'],
+    depends : [pan_packers],
+    depfile : 'libpan_shaders_v' + ver + '.h.d',
+  )
+
+  libpan_nir_cur_ver = custom_target(
+    'libpan_v' + ver,
+    input : libpan_spv,
+    output : ['libpan_v' + ver + '.cpp', 'libpan_v' + ver + '.h'],
+    command : [prog_vtn_bindgen2, libpan_spv, '@OUTPUT0@', '@OUTPUT1@'],
+  )
+
+  libpan_shaders_cur_ver = custom_target(
+    'libpan_shaders_v' + ver,
+    input : libpan_spv,
+    output : ['libpan_shaders_v' + ver + '.h', 'libpan_shaders_v' + ver + '.c'],
+    command : [prog_panfrost_compile, 'libpan_shaders', ver, '@INPUT0@', '@OUTPUT0@', '@OUTPUT1@'],
+    env: ['MESA_SHADER_CACHE_DISABLE=true'],
+  )
+
+  idep_libpan_cur_ver = declare_dependency(
+    sources: [libpan_shaders_cur_ver, libpan_nir_cur_ver],
+    include_directories : include_directories('.'),
+  )
+  idep_libpan_per_arch += {ver: idep_libpan_cur_ver}
+endforeach
+
--- a/src/panfrost/libpan/placeholder.cl
+++ b/src/panfrost/libpan/placeholder.cl
--- a/src/panfrost/meson.build
+++ b/src/panfrost/meson.build
@ -3,7 +3,7 @@
 # SPDX-License-Identifier: MIT

 inc_panfrost = include_directories([
-   '.', 'shared', 'midgard', 'compiler', 'lib'
+   '.', 'shared', 'midgard', 'compiler', 'lib', 'libpan',
 ])

 compile_args_panfrost = [
@ -17,6 +17,8 @@ subdir('compiler')

 if with_gallium_panfrost or with_panfrost_vk
   subdir('lib')
+   subdir('clc')
+   subdir('libpan')
   subdir('perf')
 endif