From 3c0a55d93fade26947200ced70039d2cfc3ce34b Mon Sep 17 00:00:00 2001 From: Ioana Ciocoi-Radulescu Date: Fri, 24 Apr 2026 12:27:16 +0300 Subject: [PATCH 1/6] teflon: Add support for custom nodes If a custom operation is claimed by teflon delegate, mark it as such to prevent other delegates (like XNNPack) from trying to handle it, which could result in an "Encountered unresolved op" error. Signed-off-by: Ioana Ciocoi-Radulescu --- src/gallium/frontends/teflon/tfl_device.c | 16 +++++++++++++++- src/gallium/include/pipe/p_state.h | 4 ++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/gallium/frontends/teflon/tfl_device.c b/src/gallium/frontends/teflon/tfl_device.c index 012dfacd74f..15069ffa38a 100644 --- a/src/gallium/frontends/teflon/tfl_device.c +++ b/src/gallium/frontends/teflon/tfl_device.c @@ -304,6 +304,11 @@ fill_operation(struct teflon_delegate *delegate, TfLiteContext *tf_context, TfLi operation->type = PIPE_ML_OPERATION_TYPE_QUANTIZE; break; } + case kTfLiteBuiltinCustom: { + operation->type = PIPE_ML_OPERATION_TYPE_CUSTOM; + operation->custom.name = node_registration->custom_name ? strdup(node_registration->custom_name) : NULL; + break; + } default: return false; } @@ -536,6 +541,9 @@ dump_graph(struct pipe_tensor *tensors, unsigned tensor_count, struct pipe_ml_op case PIPE_ML_OPERATION_TYPE_LEAKY_RELU: teflon_debug("%-15s ", "LEAKY_RELU"); break; + case PIPE_ML_OPERATION_TYPE_CUSTOM: + teflon_debug("%-15s ", "CUSTOM"); + break; } char *input_buf = ralloc_strdup(NULL, ""); @@ -775,6 +783,8 @@ tflite_builtin_op_name(TfLiteBuiltinOperator op) return "TRANSPOSE"; case kTfLiteBuiltinLeakyRelu: return "LEAKY_RELU"; + case kTfLiteBuiltinCustom: + return "CUSTOM"; default: return "unknown"; } @@ -929,8 +939,12 @@ PrepareDelegate(TfLiteContext *tf_context, TfLiteDelegate *tf_delegate) } teflon_debug("\n"); - if (supported) + if (supported) { supported_nodes->data[node_count++] = node_index; + // If custom node is claimed by teflon delegate, tell other delegates to skip it + if (registration->builtin_code == kTfLiteBuiltinCustom) + registration->builtin_code = kTfLiteBuiltinDelegate; + } } supported_nodes->size = node_count; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index cbce129eea2..45f625d110c 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -1072,6 +1072,7 @@ enum pipe_ml_operation_type { PIPE_ML_OPERATION_TYPE_QUANTIZE, PIPE_ML_OPERATION_TYPE_MAXIMUM, PIPE_ML_OPERATION_TYPE_MINIMUM, + PIPE_ML_OPERATION_TYPE_CUSTOM, }; enum pipe_ml_pooling_type { @@ -1276,6 +1277,9 @@ struct pipe_ml_operation struct { float alpha; } leakyrelu; + struct { + char *name; + } custom; }; }; From 33b75f298cf9a1cf54f7f292904bb988770ed9bd Mon Sep 17 00:00:00 2001 From: Ioana Ciocoi-Radulescu Date: Fri, 24 Apr 2026 12:38:01 +0300 Subject: [PATCH 2/6] teflon: Add a name field to pipe_tensor Signed-off-by: Ioana Ciocoi-Radulescu --- src/gallium/frontends/teflon/tfl_device.c | 2 ++ src/gallium/include/pipe/p_state.h | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/src/gallium/frontends/teflon/tfl_device.c b/src/gallium/frontends/teflon/tfl_device.c index 15069ffa38a..ff48137b197 100644 --- a/src/gallium/frontends/teflon/tfl_device.c +++ b/src/gallium/frontends/teflon/tfl_device.c @@ -408,6 +408,8 @@ fill_tensor(struct teflon_delegate *delegate, TfLiteContext *tf_context, struct tensor->dims[out_dim] = 1; } + tensor->name = strdup(tf_tensor.name); + if (tf_tensor.quantization.type == kTfLiteAffineQuantization) { const TfLiteAffineQuantization *quant = (const TfLiteAffineQuantization *)tf_tensor.quantization.params; tensor->scale = quant->scale->data[0]; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 45f625d110c..2305802d0d2 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -1039,6 +1039,10 @@ struct pipe_tensor { * Zero-points used to quantize this tensor, per-axis quantization. */ int *zero_points; + /** + * Tensor name + */ + const char *name; /** * Whether the tensor contains data in INT8 or UINT8 format. */ From 37cb2a514f46d01a9fd88b9871862cbaedbf77cc Mon Sep 17 00:00:00 2001 From: Ioana Ciocoi-Radulescu Date: Tue, 5 May 2026 23:55:35 +0300 Subject: [PATCH 3/6] teflon/tests: Add tests for offline-compiled models Some NPUs can only run custom operators so they require an offline model compilation step that translates from standard to custom ops. Add a new type of Teflon test that takes as input an existing model (for CPU execution) and a compiled version of same model (for NPU execution). Test expects the compiled models to be located in ${TEFLON_TEST_DATA}/compiled_models. Use a slightly larger tolerance value for these tests, since model compilation usually comes with some loss of precision. Signed-off-by: Ioana Ciocoi-Radulescu --- src/gallium/targets/teflon/test_teflon.cpp | 133 ++++++++++++++++++++- 1 file changed, 127 insertions(+), 6 deletions(-) diff --git a/src/gallium/targets/teflon/test_teflon.cpp b/src/gallium/targets/teflon/test_teflon.cpp index 86f85b59fc5..46f45c09cb5 100644 --- a/src/gallium/targets/teflon/test_teflon.cpp +++ b/src/gallium/targets/teflon/test_teflon.cpp @@ -25,8 +25,10 @@ #define TEST_ADD 1 #define TEST_FULLY_CONNECTED 1 #define TEST_MODELS 1 +#define TEST_COMPILED_MODELS 1 #define TOLERANCE 8 +#define COMPILED_MODELS_TOLERANCE 12 std::vector is_signed{false}; /* TODO: Support INT8? */ std::vector padding_same{false, true}; @@ -41,7 +43,7 @@ std::vector fc_channels{23, 46, 128, 256, 512}; std::vector fc_size{128, 1280, 25088, 62720}; static void -test_model(void *buf, size_t buf_size, std::string cache_dir, unsigned tolerance) +run_and_validate(TfLiteModel *cpu_model, TfLiteModel *npu_model, std::string cache_dir, unsigned tolerance) { void **input = NULL; size_t num_inputs; @@ -51,11 +53,8 @@ test_model(void *buf, size_t buf_size, std::string cache_dir, unsigned tolerance size_t num_outputs; void **npu_output; - TfLiteModel *model = TfLiteModelCreate(buf, buf_size); - assert(model); - - run_model(model, EXECUTOR_CPU, &input, &num_inputs, &cpu_output, &output_sizes, &output_types, &num_outputs, cache_dir); - run_model(model, EXECUTOR_NPU, &input, &num_inputs, &npu_output, &output_sizes, &output_types, &num_outputs, cache_dir); + run_model(cpu_model, EXECUTOR_CPU, &input, &num_inputs, &cpu_output, &output_sizes, &output_types, &num_outputs, cache_dir); + run_model(npu_model, EXECUTOR_NPU, &input, &num_inputs, &npu_output, &output_sizes, &output_types, &num_outputs, cache_dir); const char *dump_output = os_get_option("TEFLON_DUMP_OUTPUT"); if (dump_output && atoi(dump_output) == 1) { @@ -221,6 +220,15 @@ test_model(void *buf, size_t buf_size, std::string cache_dir, unsigned tolerance free(output_sizes); free(output_types); +} + +static void +test_model(void *buf, size_t buf_size, std::string cache_dir, unsigned tolerance) +{ + TfLiteModel *model = TfLiteModelCreate(buf, buf_size); + assert(model); + + run_and_validate(model, model, cache_dir, tolerance); TfLiteModelDelete(model); } @@ -249,6 +257,57 @@ test_model_file(std::string file_name, unsigned tolerance, bool use_cache) close(model_fd); } +static void +test_compiled_model(void *buf, size_t buf_size, void *cbuf, size_t cbuf_size, + std::string cache_dir, unsigned tolerance) +{ + TfLiteModel *cpu_model = TfLiteModelCreate(buf, buf_size); + TfLiteModel *npu_model = TfLiteModelCreate(cbuf, cbuf_size); + + assert(cpu_model); + assert(npu_model); + + run_and_validate(cpu_model, npu_model, cache_dir, tolerance); + + TfLiteModelDelete(cpu_model); + TfLiteModelDelete(npu_model); +} + +static void +test_compiled_model_file(std::string file_name, std::string compiled_file_name, + unsigned tolerance, bool use_cache) +{ + std::ostringstream cache_dir; + + if (use_cache) { + auto path = std::filesystem::path(file_name); + cache_dir << "/var/cache/teflon_tests/"; + cache_dir << path.parent_path().filename().string(); + cache_dir << "_"; + cache_dir << path.stem().string(); + } + + srand(4); + + struct stat sb, csb; + int model_fd = open(file_name.c_str(), O_RDONLY); + int cmodel_fd = open(compiled_file_name.c_str(), O_RDONLY); + + fstat(model_fd, &sb); + fstat(cmodel_fd, &csb); + + void *model_data = mmap(0, sb.st_size, PROT_READ, MAP_PRIVATE, model_fd, 0); + void *cmodel_data = mmap(0, csb.st_size, PROT_READ, MAP_PRIVATE, cmodel_fd, 0); + + test_compiled_model(model_data, sb.st_size, cmodel_data, csb.st_size, + cache_dir.str(), tolerance); + + munmap(model_data, sb.st_size); + munmap(cmodel_data, csb.st_size); + close(model_fd); + close(cmodel_fd); +} + void test_conv(int input_size, int weight_size, int input_channels, int output_channels, int stride, bool padding_same, bool is_signed, bool depthwise, int seed) @@ -620,6 +679,66 @@ INSTANTIATE_TEST_SUITE_P( #endif +#if TEST_COMPILED_MODELS + +class CompiledModels : public testing::TestWithParam {}; + +TEST_P(CompiledModels, Op) +{ + std::ostringstream file_path, cfile_path; + auto test_name = GetParam(); + test_name.replace(test_name.find("_"), 1, "/"); + assert(os_get_option("TEFLON_TEST_DATA")); + file_path << os_get_option("TEFLON_TEST_DATA") << "/models/" << test_name << ".tflite"; + cfile_path << os_get_option("TEFLON_TEST_DATA") << "/compiled_models/" << test_name << ".tflite"; + + test_compiled_model_file(file_path.str(), cfile_path.str(), COMPILED_MODELS_TOLERANCE, true); +} + +std::vector +get_compiled_model_files(void) +{ + assert(os_get_option("TEFLON_TEST_DATA")); + std::stringstream dir, cdir; + dir << os_get_option("TEFLON_TEST_DATA") << "/models"; + cdir << os_get_option("TEFLON_TEST_DATA") << "/compiled_models"; + + std::vector paths; + std::filesystem::recursive_directory_iterator b(dir.str()); + for (auto const &f : b) { + if (f.path().extension() != ".tflite") + continue; + + std::filesystem::path relative_path = std::filesystem::relative(f.path(), dir.str()); + std::filesystem::path compiled_path = std::filesystem::path(cdir.str()) / relative_path; + if (!std::filesystem::exists(compiled_path)) + continue; + + std::stringstream path; + path << f.path().parent_path().filename().string(); + path << "_" << f.path().stem().string(); + paths.push_back(path.str()); + } + + std::sort(paths.begin(), paths.end()); + + return paths; +} + +static inline std::string +CompiledModelsTestCaseName( + const testing::TestParamInfo &info) +{ + return info.param; +} + +INSTANTIATE_TEST_SUITE_P( + , CompiledModels, + ::testing::ValuesIn(get_compiled_model_files()), + CompiledModelsTestCaseName); + +#endif + int main(int argc, char **argv) { @@ -656,6 +775,8 @@ main(int argc, char **argv) return 0; } else if (argc > 1 && !strcmp(argv[1], "run_model")) { test_model_file(std::string(argv[2]), TOLERANCE, false); + } else if (argc > 1 && !strcmp(argv[1], "run_compiled_model")) { + test_compiled_model_file(std::string(argv[2]), std::string(argv[3]), COMPILED_MODELS_TOLERANCE, false); } else { testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); From 291d1d44b169bb99189faabe7a274bd29c3c6e0a Mon Sep 17 00:00:00 2001 From: Ioana Ciocoi-Radulescu Date: Fri, 24 Apr 2026 13:36:36 +0300 Subject: [PATCH 4/6] Add new Gallium driver for NXP's Neutron NPU Add source code for the Neutron Gallium driver as well as some helper scripts for model compiling. The compiler can only be run on x86 for now. Signed-off-by: Ioana Ciocoi-Radulescu --- .clang-format-include | 1 + include/drm-uapi/neutron_accel.h | 130 ++++++ meson.build | 3 +- meson.options | 2 +- src/gallium/drivers/neutron/.clang-format | 2 + .../neutron/ci/neutron-imx95-fails.txt | 102 +++++ .../neutron/ci/neutron-imx95-flakes.txt | 0 .../neutron/ci/neutron-imx95-skips.txt | 227 +++++++++++ src/gallium/drivers/neutron/compile_model.py | 58 +++ src/gallium/drivers/neutron/meson.build | 20 + .../drivers/neutron/neutron_compiler.py | 219 ++++++++++ src/gallium/drivers/neutron/neutron_device.c | 257 ++++++++++++ src/gallium/drivers/neutron/neutron_device.h | 90 +++++ src/gallium/drivers/neutron/neutron_ml.c | 382 ++++++++++++++++++ src/gallium/drivers/neutron/neutron_ml.h | 73 ++++ src/gallium/meson.build | 6 + src/gallium/targets/dri/meson.build | 2 +- src/gallium/winsys/neutron/drm/meson.build | 13 + .../winsys/neutron/drm/neutron_drm_public.h | 17 + .../winsys/neutron/drm/neutron_drm_winsys.c | 20 + 20 files changed, 1621 insertions(+), 3 deletions(-) create mode 100644 include/drm-uapi/neutron_accel.h create mode 100644 src/gallium/drivers/neutron/.clang-format create mode 100644 src/gallium/drivers/neutron/ci/neutron-imx95-fails.txt create mode 100644 src/gallium/drivers/neutron/ci/neutron-imx95-flakes.txt create mode 100644 src/gallium/drivers/neutron/ci/neutron-imx95-skips.txt create mode 100644 src/gallium/drivers/neutron/compile_model.py create mode 100644 src/gallium/drivers/neutron/meson.build create mode 100644 src/gallium/drivers/neutron/neutron_compiler.py create mode 100644 src/gallium/drivers/neutron/neutron_device.c create mode 100644 src/gallium/drivers/neutron/neutron_device.h create mode 100644 src/gallium/drivers/neutron/neutron_ml.c create mode 100644 src/gallium/drivers/neutron/neutron_ml.h create mode 100644 src/gallium/winsys/neutron/drm/meson.build create mode 100644 src/gallium/winsys/neutron/drm/neutron_drm_public.h create mode 100644 src/gallium/winsys/neutron/drm/neutron_drm_winsys.c diff --git a/.clang-format-include b/.clang-format-include index ba52553fdc9..b446191541a 100644 --- a/.clang-format-include +++ b/.clang-format-include @@ -3,6 +3,7 @@ src/gallium/drivers/ethosu/**/* src/gallium/drivers/i915 +src/gallium/drivers/neutron/**/* src/gallium/drivers/r300/compiler/* src/gallium/drivers/rocket/**/* src/gallium/targets/teflon/**/* diff --git a/include/drm-uapi/neutron_accel.h b/include/drm-uapi/neutron_accel.h new file mode 100644 index 00000000000..a9e5682709d --- /dev/null +++ b/include/drm-uapi/neutron_accel.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* Copyright 2025-2026 NXP */ + +#ifndef __NEUTRON_ACCEL_H__ +#define __NEUTRON_ACCEL_H__ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/** + * enum drm_neutron_ioctl - Neutron IOCTL IDs + * + * @DRM_NEUTRON_CREATE_BO: Create a buffer object + * @DRM_NEUTRON_SYNC_BO: Sync (parts of) the buffer object memory + * @DRM_NEUTRON_SUBMIT_JOB: Submit a job to the device + */ +enum drm_neutron_ioctl { + DRM_NEUTRON_CREATE_BO = 0, + DRM_NEUTRON_SYNC_BO, + DRM_NEUTRON_SUBMIT_JOB, +}; + +/** + * struct drm_neutron_create_bo - Create a buffer object and return buffer + * info to user + * + * @size: Size in bytes of requested buffer. May be updated by driver + * if allocated size different than requested + * @handle: Returned handle for the new buffer object + * @pad: MBZ + * @map_offset: Returned offset for mmap() calls + */ +struct drm_neutron_create_bo { + __u64 size; + __u32 handle; + __u32 pad; + __u64 map_offset; +}; + +/** + * enum drm_neutron_sync_dir - Direction of buffer object synchronization + * + * @DRM_NEUTRON_SYNC_TO_DEVICE: Sync from CPU to device + * @DRM_NEUTRON_SYNC_FROM_DEVICE: Sync from device to CPU + */ +enum drm_neutron_sync_dir { + DRM_NEUTRON_SYNC_TO_DEVICE = 0, + DRM_NEUTRON_SYNC_FROM_DEVICE, +}; + +/** + * struct drm_neutron_sync_bo - Sync buffer object memory + * + * @handle: Handle of buffer object to sync + * @direction: Direction of sync, can be one of enum drm_neutron_sync_dir + * @size: Size of the memory to sync, in bytes + * @offset: Offset inside the buffer, in bytes + */ +struct drm_neutron_sync_bo { + __u32 handle; + __u32 direction; + __u64 size; + __u64 offset; +}; + +/** + * enum drm_neutron_job_type - Type of job to submit to Neutron device + * + * @DRM_NEUTRON_JOB_INFERENCE: Inference job + */ +enum drm_neutron_job_type { + DRM_NEUTRON_JOB_INFERENCE = 0, +}; + +/** + * struct drm_neutron_inference_job - Inference job descriptor + * + * @tensor_offset: Offset of tensor array inside job BO + * @microcode_offset: Microcode offset inside BO + * @tensor_count: Number of valid tensors + * @pad: MBZ + */ +struct drm_neutron_inference_job { + __u32 tensor_offset; + __u32 microcode_offset; + __u32 tensor_count; + __u32 pad[5]; +}; + +/** + * struct drm_neutron_submit_job - Submit a job to Neutron device + * + * @type: Job type, one of enum drm_neutron_job_type + * @bo_handle: BO handle for this job + * @inference: Inference job descriptor (when type is DRM_NEUTRON_JOB_INFERENCE) + * @reserved: Reserved for future job types + * @syncobj_handle: Handle of syncobj on which user waits for job completion + * @pad: MBZ + */ +struct drm_neutron_submit_job { + __u32 type; + __u32 bo_handle; + union { + struct drm_neutron_inference_job inference; + __u32 reserved[8]; + }; + __u32 syncobj_handle; + __u32 pad; +}; + +#define DRM_IOCTL_NEUTRON_CREATE_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_NEUTRON_CREATE_BO, \ + struct drm_neutron_create_bo) + +#define DRM_IOCTL_NEUTRON_SYNC_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_NEUTRON_SYNC_BO, \ + struct drm_neutron_sync_bo) + +#define DRM_IOCTL_NEUTRON_SUBMIT_JOB \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_NEUTRON_SUBMIT_JOB, \ + struct drm_neutron_submit_job) + +#if defined(__cplusplus) +} +#endif + +#endif /* __NEUTRON_ACCEL_H__ */ diff --git a/meson.build b/meson.build index f1b561b6ad5..6e1ff9a3587 100644 --- a/meson.build +++ b/meson.build @@ -205,7 +205,7 @@ elif gallium_drivers.contains('all') gallium_drivers = [ 'r300', 'r600', 'radeonsi', 'crocus', 'v3d', 'vc4', 'freedreno', 'etnaviv', 'i915', 'nouveau', 'svga', 'tegra', 'virgl', 'lima', 'panfrost', 'llvmpipe', 'softpipe', 'iris', - 'zink', 'd3d12', 'asahi', 'rocket', 'ethosu' + 'zink', 'd3d12', 'asahi', 'rocket', 'ethosu', 'neutron' ] endif @@ -234,6 +234,7 @@ with_gallium_d3d12 = gallium_drivers.contains('d3d12') with_gallium_asahi = gallium_drivers.contains('asahi') with_gallium_rocket = gallium_drivers.contains('rocket') with_gallium_ethosu = gallium_drivers.contains('ethosu') +with_gallium_neutron = gallium_drivers.contains('neutron') foreach gallium_driver : gallium_drivers pre_args += '-DHAVE_@0@'.format(gallium_driver.to_upper()) endforeach diff --git a/meson.options b/meson.options index 024b8340252..30bc0f0729d 100644 --- a/meson.options +++ b/meson.options @@ -87,7 +87,7 @@ option( choices : [ 'all', 'auto', 'asahi', 'crocus', 'd3d12', 'ethosu', 'etnaviv', 'freedreno', 'i915', 'iris', - 'lima', 'llvmpipe', 'nouveau', 'panfrost', 'r300', 'r600', 'radeonsi', + 'lima', 'llvmpipe', 'neutron', 'nouveau', 'panfrost', 'r300', 'r600', 'radeonsi', 'rocket', 'softpipe', 'svga', 'tegra', 'v3d', 'vc4', 'virgl', 'zink', ], description : 'List of gallium drivers to build. If this is set to auto ' + diff --git a/src/gallium/drivers/neutron/.clang-format b/src/gallium/drivers/neutron/.clang-format new file mode 100644 index 00000000000..34cd9d7d1d3 --- /dev/null +++ b/src/gallium/drivers/neutron/.clang-format @@ -0,0 +1,2 @@ +BasedOnStyle: InheritParentConfig +DisableFormat: false diff --git a/src/gallium/drivers/neutron/ci/neutron-imx95-fails.txt b/src/gallium/drivers/neutron/ci/neutron-imx95-fails.txt new file mode 100644 index 00000000000..3c28d42358b --- /dev/null +++ b/src/gallium/drivers/neutron/ci/neutron-imx95-fails.txt @@ -0,0 +1,102 @@ +CompiledModels.Op/efficientdet_efficientdet_tflite_lite0_int8_v1,Fail +CompiledModels.Op/inception_002,Fail +CompiledModels.Op/inception_003,Fail +CompiledModels.Op/inception_008,Fail +CompiledModels.Op/inception_013,Fail +CompiledModels.Op/inception_015,Fail +CompiledModels.Op/inception_016,Fail +CompiledModels.Op/inception_021,Fail +CompiledModels.Op/inception_024,Fail +CompiledModels.Op/inception_027,Fail +CompiledModels.Op/inception_030,Fail +CompiledModels.Op/inception_032,Fail +CompiledModels.Op/inception_033,Fail +CompiledModels.Op/inception_041,Fail +CompiledModels.Op/inception_055,Fail +CompiledModels.Op/inception_059,Fail +CompiledModels.Op/inception_063,Fail +CompiledModels.Op/inception_064,Fail +CompiledModels.Op/inception_071,Fail +CompiledModels.Op/inception_073,Fail +CompiledModels.Op/inception_075,Fail +CompiledModels.Op/inception_077,Fail +CompiledModels.Op/mobiledet_002,Fail +CompiledModels.Op/mobiledet_004,Fail +CompiledModels.Op/mobiledet_006,Fail +CompiledModels.Op/mobiledet_009,Fail +CompiledModels.Op/mobiledet_012,Fail +CompiledModels.Op/mobiledet_015,Fail +CompiledModels.Op/mobiledet_017,Fail +CompiledModels.Op/mobiledet_020,Fail +CompiledModels.Op/mobiledet_029,Fail +CompiledModels.Op/mobiledet_039,Fail +CompiledModels.Op/mobiledet_042,Fail +CompiledModels.Op/mobiledet_046,Fail +CompiledModels.Op/mobiledet_050,Fail +CompiledModels.Op/mobiledet_063,Fail +CompiledModels.Op/mobiledet_086,Fail +CompiledModels.Op/mobiledet_087,Fail +CompiledModels.Op/mobiledet_106,Fail +CompiledModels.Op/mobiledet_119,Fail +CompiledModels.Op/mobiledet_ssdlite_mobiledet_coco_qat_postprocess,Fail +CompiledModels.Op/mobilenetv1_001,Fail +CompiledModels.Op/mobilenetv1_002,Fail +CompiledModels.Op/mobilenetv1_003,Fail +CompiledModels.Op/mobilenetv1_005,Fail +CompiledModels.Op/mobilenetv1_006,Fail +CompiledModels.Op/mobilenetv1_008,Fail +CompiledModels.Op/mobilenetv1_009,Fail +CompiledModels.Op/mobilenetv1_012,Fail +CompiledModels.Op/mobilenetv1_016,Fail +CompiledModels.Op/mobilenetv1_018,Fail +CompiledModels.Op/mobilenetv1_020,Fail +CompiledModels.Op/mobilenetv1_025,Fail +CompiledModels.Op/mobilenetv1_026,Fail +CompiledModels.Op/mobilenetv1_mobilenet_v1_1_224_quant,Fail +CompiledModels.Op/mobilenetv2_001,Fail +CompiledModels.Op/mobilenetv2_003,Fail +CompiledModels.Op/mobilenetv2_006,Fail +CompiledModels.Op/mobilenetv2_007,Fail +CompiledModels.Op/mobilenetv2_010,Fail +CompiledModels.Op/mobilenetv2_014,Fail +CompiledModels.Op/mobilenetv2_018,Fail +CompiledModels.Op/mobilenetv2_025,Fail +CompiledModels.Op/mobilenetv2_029,Fail +CompiledModels.Op/mobilenetv2_040,Fail +CompiledModels.Op/mobilenetv2_044,Fail +CompiledModels.Op/mobilenetv2_059,Fail +CompiledModels.Op/mobilenetv2_061,Fail +CompiledModels.Op/movenetlightning_103,Fail +CompiledModels.Op/movenetlightning_104,Fail +CompiledModels.Op/movenetthunder_103,Fail +CompiledModels.Op/movenetthunder_104,Fail +CompiledModels.Op/ssdmobilenetv2_001,Fail +CompiledModels.Op/ssdmobilenetv2_003,Fail +CompiledModels.Op/ssdmobilenetv2_007,Fail +CompiledModels.Op/ssdmobilenetv2_040,Fail +CompiledModels.Op/ssdmobilenetv2_047,Fail +CompiledModels.Op/ssdmobilenetv2_052,Fail +CompiledModels.Op/ssdmobilenetv2_054,Fail +CompiledModels.Op/ssdmobilenetv2_065,Fail +CompiledModels.Op/ssdmobilenetv2_069,Fail +CompiledModels.Op/ssdmobilenetv2_072,Fail +CompiledModels.Op/ssdmobilenetv2_073,Fail +CompiledModels.Op/ssdmobilenetv2_077,Fail +CompiledModels.Op/ssdmobilenetv2_081,Fail +CompiledModels.Op/ssdmobilenetv2_089,Fail +CompiledModels.Op/ssdmobilenetv2_096,Fail +CompiledModels.Op/ssdmobilenetv2_102,Fail +CompiledModels.Op/ssdmobilenetv2_ssd_mobilenet_v2_coco_quant_postprocess,Fail +CompiledModels.Op/yolox_001,Fail +CompiledModels.Op/yolox_004,Fail +CompiledModels.Op/yolox_027,Fail +CompiledModels.Op/yolox_042,Fail +CompiledModels.Op/yolox_077,Fail +CompiledModels.Op/yolox_086,Fail +CompiledModels.Op/yolox_102,Fail +CompiledModels.Op/yolox_112,Fail +CompiledModels.Op/yolox_122,Fail +CompiledModels.Op/yolox_132,Fail +CompiledModels.Op/yolox_147,Fail +CompiledModels.Op/yolox_yolox_nano_full_integer_quant,Fail + diff --git a/src/gallium/drivers/neutron/ci/neutron-imx95-flakes.txt b/src/gallium/drivers/neutron/ci/neutron-imx95-flakes.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/gallium/drivers/neutron/ci/neutron-imx95-skips.txt b/src/gallium/drivers/neutron/ci/neutron-imx95-skips.txt new file mode 100644 index 00000000000..209c3091581 --- /dev/null +++ b/src/gallium/drivers/neutron/ci/neutron-imx95-skips.txt @@ -0,0 +1,227 @@ +# Standard (un-compiled) tflite models are not supported on Neutron +Add.Op/.* +AddQuant.Op/.* +Conv2D.Op/.* +DepthwiseConv2D.Op/.* +FullyConnected.Op/.* + +Models.Op/.* + +# Compilation error due to duplicate/orphan input tensor +CompiledModels.Op/movenetlightning_117 +CompiledModels.Op/movenetlightning_120 +CompiledModels.Op/movenetthunder_117 +CompiledModels.Op/movenetthunder_120 + +# Conversion rate zero (operators or operator + tensor type/size combo +# not supported yet by Neutron compiler) +CompiledModels.Op/efficientdet_066 +CompiledModels.Op/efficientdet_101 +CompiledModels.Op/efficientdet_136 +CompiledModels.Op/efficientdet_251 +CompiledModels.Op/efficientdet_252 +CompiledModels.Op/efficientdet_253 +CompiledModels.Op/efficientdet_254 +CompiledModels.Op/efficientdet_255 +CompiledModels.Op/efficientdet_256 +CompiledModels.Op/efficientdet_257 +CompiledModels.Op/efficientdet_258 +CompiledModels.Op/efficientdet_259 +CompiledModels.Op/efficientdet_260 +CompiledModels.Op/efficientdet_261 +CompiledModels.Op/efficientdet_262 +CompiledModels.Op/efficientdet_263 +CompiledModels.Op/efficientdet_264 +CompiledModels.Op/efficientdet_265 +CompiledModels.Op/inception_082 +CompiledModels.Op/micronetlarge_013 +CompiledModels.Op/mobiledet_097 +CompiledModels.Op/mobiledet_099 +CompiledModels.Op/mobiledet_101 +CompiledModels.Op/mobiledet_103 +CompiledModels.Op/mobiledet_105 +CompiledModels.Op/mobiledet_107 +CompiledModels.Op/mobiledet_109 +CompiledModels.Op/mobiledet_111 +CompiledModels.Op/mobiledet_113 +CompiledModels.Op/mobiledet_115 +CompiledModels.Op/mobiledet_117 +CompiledModels.Op/mobiledet_118 +CompiledModels.Op/mobiledet_122 +CompiledModels.Op/mobiledet_123 +CompiledModels.Op/mobilenetv1_030 +CompiledModels.Op/movenetlightning_000 +CompiledModels.Op/movenetlightning_001 +CompiledModels.Op/movenetlightning_002 +CompiledModels.Op/movenetlightning_003 +CompiledModels.Op/movenetlightning_071 +CompiledModels.Op/movenetlightning_075 +CompiledModels.Op/movenetlightning_079 +CompiledModels.Op/movenetlightning_087 +CompiledModels.Op/movenetlightning_088 +CompiledModels.Op/movenetlightning_089 +CompiledModels.Op/movenetlightning_090 +CompiledModels.Op/movenetlightning_091 +CompiledModels.Op/movenetlightning_092 +CompiledModels.Op/movenetlightning_093 +CompiledModels.Op/movenetlightning_094 +CompiledModels.Op/movenetlightning_095 +CompiledModels.Op/movenetlightning_096 +CompiledModels.Op/movenetlightning_097 +CompiledModels.Op/movenetlightning_098 +CompiledModels.Op/movenetlightning_099 +CompiledModels.Op/movenetlightning_105 +CompiledModels.Op/movenetlightning_112 +CompiledModels.Op/movenetlightning_113 +CompiledModels.Op/movenetlightning_114 +CompiledModels.Op/movenetlightning_115 +CompiledModels.Op/movenetlightning_116 +CompiledModels.Op/movenetlightning_118 +CompiledModels.Op/movenetlightning_119 +CompiledModels.Op/movenetlightning_122 +CompiledModels.Op/movenetlightning_123 +CompiledModels.Op/movenetlightning_124 +CompiledModels.Op/movenetlightning_125 +CompiledModels.Op/movenetlightning_126 +CompiledModels.Op/movenetlightning_127 +CompiledModels.Op/movenetlightning_128 +CompiledModels.Op/movenetlightning_129 +CompiledModels.Op/movenetlightning_130 +CompiledModels.Op/movenetlightning_131 +CompiledModels.Op/movenetlightning_132 +CompiledModels.Op/movenetlightning_133 +CompiledModels.Op/movenetlightning_134 +CompiledModels.Op/movenetlightning_135 +CompiledModels.Op/movenetlightning_136 +CompiledModels.Op/movenetlightning_137 +CompiledModels.Op/movenetlightning_138 +CompiledModels.Op/movenetlightning_139 +CompiledModels.Op/movenetlightning_140 +CompiledModels.Op/movenetlightning_141 +CompiledModels.Op/movenetlightning_142 +CompiledModels.Op/movenetlightning_143 +CompiledModels.Op/movenetlightning_144 +CompiledModels.Op/movenetlightning_145 +CompiledModels.Op/movenetlightning_147 +CompiledModels.Op/movenetlightning_149 +CompiledModels.Op/movenetlightning_150 +CompiledModels.Op/movenetlightning_151 +CompiledModels.Op/movenetlightning_152 +CompiledModels.Op/movenetlightning_153 +CompiledModels.Op/movenetlightning_154 +CompiledModels.Op/movenetlightning_155 +CompiledModels.Op/movenetlightning_156 +CompiledModels.Op/movenetthunder_000 +CompiledModels.Op/movenetthunder_001 +CompiledModels.Op/movenetthunder_002 +CompiledModels.Op/movenetthunder_003 +CompiledModels.Op/movenetthunder_071 +CompiledModels.Op/movenetthunder_075 +CompiledModels.Op/movenetthunder_079 +CompiledModels.Op/movenetthunder_087 +CompiledModels.Op/movenetthunder_088 +CompiledModels.Op/movenetthunder_089 +CompiledModels.Op/movenetthunder_090 +CompiledModels.Op/movenetthunder_091 +CompiledModels.Op/movenetthunder_092 +CompiledModels.Op/movenetthunder_093 +CompiledModels.Op/movenetthunder_094 +CompiledModels.Op/movenetthunder_095 +CompiledModels.Op/movenetthunder_096 +CompiledModels.Op/movenetthunder_097 +CompiledModels.Op/movenetthunder_098 +CompiledModels.Op/movenetthunder_099 +CompiledModels.Op/movenetthunder_105 +CompiledModels.Op/movenetthunder_112 +CompiledModels.Op/movenetthunder_113 +CompiledModels.Op/movenetthunder_114 +CompiledModels.Op/movenetthunder_115 +CompiledModels.Op/movenetthunder_116 +CompiledModels.Op/movenetthunder_118 +CompiledModels.Op/movenetthunder_119 +CompiledModels.Op/movenetthunder_122 +CompiledModels.Op/movenetthunder_123 +CompiledModels.Op/movenetthunder_124 +CompiledModels.Op/movenetthunder_125 +CompiledModels.Op/movenetthunder_126 +CompiledModels.Op/movenetthunder_127 +CompiledModels.Op/movenetthunder_128 +CompiledModels.Op/movenetthunder_129 +CompiledModels.Op/movenetthunder_130 +CompiledModels.Op/movenetthunder_131 +CompiledModels.Op/movenetthunder_132 +CompiledModels.Op/movenetthunder_133 +CompiledModels.Op/movenetthunder_134 +CompiledModels.Op/movenetthunder_135 +CompiledModels.Op/movenetthunder_136 +CompiledModels.Op/movenetthunder_137 +CompiledModels.Op/movenetthunder_138 +CompiledModels.Op/movenetthunder_139 +CompiledModels.Op/movenetthunder_140 +CompiledModels.Op/movenetthunder_141 +CompiledModels.Op/movenetthunder_142 +CompiledModels.Op/movenetthunder_143 +CompiledModels.Op/movenetthunder_144 +CompiledModels.Op/movenetthunder_145 +CompiledModels.Op/movenetthunder_147 +CompiledModels.Op/movenetthunder_149 +CompiledModels.Op/movenetthunder_150 +CompiledModels.Op/movenetthunder_151 +CompiledModels.Op/movenetthunder_152 +CompiledModels.Op/movenetthunder_153 +CompiledModels.Op/movenetthunder_154 +CompiledModels.Op/movenetthunder_155 +CompiledModels.Op/movenetthunder_156 +CompiledModels.Op/ssdmobilenetv2_049 +CompiledModels.Op/ssdmobilenetv2_051 +CompiledModels.Op/ssdmobilenetv2_067 +CompiledModels.Op/ssdmobilenetv2_068 +CompiledModels.Op/ssdmobilenetv2_070 +CompiledModels.Op/ssdmobilenetv2_075 +CompiledModels.Op/ssdmobilenetv2_076 +CompiledModels.Op/ssdmobilenetv2_078 +CompiledModels.Op/ssdmobilenetv2_083 +CompiledModels.Op/ssdmobilenetv2_084 +CompiledModels.Op/ssdmobilenetv2_086 +CompiledModels.Op/ssdmobilenetv2_091 +CompiledModels.Op/ssdmobilenetv2_092 +CompiledModels.Op/ssdmobilenetv2_094 +CompiledModels.Op/ssdmobilenetv2_099 +CompiledModels.Op/ssdmobilenetv2_100 +CompiledModels.Op/ssdmobilenetv2_101 +CompiledModels.Op/ssdmobilenetv2_107 +CompiledModels.Op/ssdmobilenetv2_108 +CompiledModels.Op/ssdmobilenetv2_109 +CompiledModels.Op/yolox_000 +CompiledModels.Op/yolox_003 +CompiledModels.Op/yolox_012 +CompiledModels.Op/yolox_103 +CompiledModels.Op/yolox_104 +CompiledModels.Op/yolox_113 +CompiledModels.Op/yolox_114 +CompiledModels.Op/yolox_123 +CompiledModels.Op/yolox_124 +CompiledModels.Op/yolox_125 +CompiledModels.Op/yolox_126 +CompiledModels.Op/yolox_127 +CompiledModels.Op/yolox_128 +CompiledModels.Op/yolox_129 +CompiledModels.Op/yolox_130 +CompiledModels.Op/yolox_131 +CompiledModels.Op/yolox_133 +CompiledModels.Op/yolox_134 +CompiledModels.Op/yolox_135 +CompiledModels.Op/yolox_136 +CompiledModels.Op/yolox_137 +CompiledModels.Op/yolox_139 +CompiledModels.Op/yolox_140 +CompiledModels.Op/yolox_141 +CompiledModels.Op/yolox_142 +CompiledModels.Op/yolox_143 +CompiledModels.Op/yolox_145 +CompiledModels.Op/yolox_146 +CompiledModels.Op/yolox_148 +CompiledModels.Op/yolox_149 +CompiledModels.Op/yolox_151 +CompiledModels.Op/yolox_152 +CompiledModels.Op/yolox_153 diff --git a/src/gallium/drivers/neutron/compile_model.py b/src/gallium/drivers/neutron/compile_model.py new file mode 100644 index 00000000000..e6195a0ec2b --- /dev/null +++ b/src/gallium/drivers/neutron/compile_model.py @@ -0,0 +1,58 @@ +# Copyright 2026 NXP +# SPDX-License-Identifier: MIT + +""" +Compile a TFLite model for the Neutron NPU. + +Usage: python compile_model.py + +Exit codes: + 0: Success + 1: Converter internal error + 2: Zero conversion rate (model unchanged) + 3: Invalid arguments or input file +""" + +import sys +import os +from eiq_neutron_sdk import neutron_converter # from https://eiq.nxp.com/repository + +# Only one platform supported for now +TARGET = "imx95" + + +def main(): + if len(sys.argv) != 3: + print("Usage: python compile_model.py ") + sys.exit(3) + + input_file = sys.argv[1] + output_file = sys.argv[2] + + if not os.path.isfile(input_file): + print(f"Error: Input file '{input_file}' does not exist") + sys.exit(3) + + if not input_file.endswith('.tflite'): + print("Error: Input file must be a .tflite model") + sys.exit(3) + + with open(input_file, 'rb') as f: + model_data = f.read() + + try: + cmodel_data = neutron_converter.convertModel(list(model_data), TARGET) + except Exception as e: + print(f"Error during model conversion: {e}") + sys.exit(1) + + if bytes(cmodel_data) == model_data: + print(f"Warning: Conversion rate was zero for {input_file}, skipping output") + sys.exit(2) + + with open(output_file, 'wb') as f: + f.write(bytes(cmodel_data)) + + +if __name__ == "__main__": + main() diff --git a/src/gallium/drivers/neutron/meson.build b/src/gallium/drivers/neutron/meson.build new file mode 100644 index 00000000000..fd75b6f17f1 --- /dev/null +++ b/src/gallium/drivers/neutron/meson.build @@ -0,0 +1,20 @@ +# Copyright 2019 Google, Inc +# SPDX-License-Identifier: MIT + +files_neutron = files( + 'neutron_device.c', + 'neutron_ml.c', +) + +libneutron = static_library( + 'neutron', + files_neutron, + include_directories : [inc_gallium_aux, inc_gallium, inc_include, inc_src], + gnu_symbol_visibility : 'hidden', + dependencies : [idep_mesautil, dep_libdrm], +) + +driver_neutron = declare_dependency( + compile_args : '-DGALLIUM_NEUTRON', + link_with : [libneutronwinsys, libneutron] +) diff --git a/src/gallium/drivers/neutron/neutron_compiler.py b/src/gallium/drivers/neutron/neutron_compiler.py new file mode 100644 index 00000000000..18dcdd39e29 --- /dev/null +++ b/src/gallium/drivers/neutron/neutron_compiler.py @@ -0,0 +1,219 @@ +# Copyright 2026 NXP +# SPDX-License-Identifier: MIT + +""" +Batch compiler of TFLite models for NXP's Neutron NPU + +Usage modes (mutually exclusive): +neutron_compiler.py --in --out +neutron_compiler.py --in-dir --out-dir +neutron_compiler.py --teflon-test-models +""" + +import sys +import os +import argparse +import subprocess + +def parse_args(argv): + parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + + mode_group = parser.add_mutually_exclusive_group(required=True) + mode_group.add_argument('--in', + dest='input_file', + action='store', + help='Input TFLite model file') + mode_group.add_argument('--in-dir', + dest='input_dir', + action='store', + help='Input directory containing TFLite models to be compiled') + mode_group.add_argument('--teflon-test-models', + dest='teflon_test_models', + action='store_true', + help='Use ${TEFLON_TEST_DATA}/models as input directory and ${TEFLON_TEST_DATA}/compiled_models as output') + + parser.add_argument('--out', + dest='output_file', + action='store', + help='Output TFLite model file (required with --in)') + parser.add_argument('--out-dir', + dest='output_dir', + action='store', + help='Output directory for compiled models (required with --in-dir). Output files will have the same name as input files') + + parser.add_argument('--force', + action='store_true', + required=False, + help='Overwrite output files if they exist. Ignored for single file mode') + parser.add_argument('--show-details', + action='store_true', + required=False, + help='Print compilation result for each model. Ignored for single file mode') + parser.add_argument('--log-to-file', + action='store_true', + required=False, + help='Generate log file for each compile attempt. Log is located next to output file') + + args = parser.parse_args() + + if args.input_file: + if not args.output_file: + parser.error("--out is required when using --in") + if not os.path.isfile(args.input_file): + parser.error("--in must be an existing file") + + if args.input_dir: + if not args.output_dir: + parser.error("--out-dir is required when using --in-dir") + if not os.path.isdir(args.input_dir): + parser.error("--in-dir must be an existing directory") + + if args.teflon_test_models and not os.environ.get('TEFLON_TEST_DATA'): + parser.error("--teflon-test-models requires TEFLON_TEST_DATA to be set") + + return args + +def compile(input_file, output_file): + script_dir = os.path.dirname(os.path.abspath(__file__)) + worker_script = os.path.join(script_dir, "compile_model.py") + + try: + result = subprocess.run( + [sys.executable, worker_script, input_file, output_file], + capture_output=True, + text=True, + timeout=60, + ) + except Exception as e: + print(f"Error compiling {input_file}: {e}", file=sys.stderr) + return None + + return result + +def write_log(output_file, result): + model_name_we = os.path.splitext(os.path.basename(output_file))[0] + output_dir = os.path.dirname(output_file) + log_file = os.path.join(output_dir, f"{model_name_we}_compiler_output.log") + + with open(log_file, 'w') as f: + if result.stdout: + f.write(result.stdout.strip()) + if result.stderr: + f.write('\n') + f.write(result.stderr.strip()) + +def print_result(input_file, result): + if result.returncode == 0: + status = "SUCCESS" + elif result.returncode == 1: + status = "COMPILER ERROR" + elif result.returncode == 2: + status = "ZERO CONVERSION RATE" + elif result.returncode == 3: + status = "SCRIPT ERROR" + else: + status = f"UNKNOWN({returncode})" + + print(f" {input_file}: {status}") + if result.returncode != 0 and result.stderr: + print(result.stderr.strip()) + +def print_stats(total, ok, error, zero, skip): + print("\n" + "=" * 50) + print(f"Total models processed: {total}") + print(f" Successful: {ok}") + print(f" Errors: {error}") + print(f" Zero conversion rate: {zero}") + print(f" Skipped: {skip}") + print("=" * 50) + +def find_tflite_files(directory): + tflite_files = [] + for root, _, files in os.walk(directory): + for file in files: + if file.endswith('.tflite'): + tflite_files.append(os.path.join(root, file)) + return sorted(tflite_files) + +def process_model(input_file, output_file, log_to_file, show_details): + output_dir = os.path.dirname(os.path.abspath(output_file)) + os.makedirs(output_dir, exist_ok=True) + + result = compile(input_file, output_file) + if not result: + return 3 + + if log_to_file: + write_log(output_file, result) + + if show_details: + print_result(input_file, result) + + return result.returncode + +def process_directory(input_dir, output_dir, force, log_to_file, show_details): + total = 0 + ok = 0 + errors = 0 + zero = 0 + skip = 0 + + tflite_files = find_tflite_files(input_dir) + for input_file in tflite_files: + total += 1 + + rel_path = os.path.relpath(input_file, input_dir) + output_file = os.path.join(output_dir, rel_path) + if os.path.exists(output_file) and not force: + if show_details: + print(f" {input_file}: SKIPPED") + skip += 1 + continue + + ret_code = process_model(input_file, output_file, log_to_file, show_details) + if ret_code == 0: + ok += 1 + elif ret_code == 2: + zero += 1 + else: + errors += 1 + + print_stats(total, ok, errors, zero, skip) + +def main(argv): + args = parse_args(argv) + + if args.input_file: + process_model( + args.input_file, + args.output_file, + args.log_to_file, + True) + elif args.input_dir: + process_directory( + args.input_dir, + args.output_dir, + args.force, + args.log_to_file, + args.show_details + ) + elif args.teflon_test_models: + teflon_test_data = os.environ.get('TEFLON_TEST_DATA') + input_dir = os.path.join(teflon_test_data, 'models') + output_dir = os.path.join(teflon_test_data, 'compiled_models') + + print(f"Compiling Teflon test models:") + print(f" Input directory: {input_dir}") + print(f" Output directory: {output_dir}") + + process_directory( + input_dir, + output_dir, + args.force, + args.log_to_file, + args.show_details + ) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/src/gallium/drivers/neutron/neutron_device.c b/src/gallium/drivers/neutron/neutron_device.c new file mode 100644 index 00000000000..39a5e961725 --- /dev/null +++ b/src/gallium/drivers/neutron/neutron_device.c @@ -0,0 +1,257 @@ +/* + * Copyright 2026 NXP + * SPDX-License-Identifier: MIT + */ + +#include "util/os_mman.h" +#include "util/u_inlines.h" +#include "util/u_surface.h" +#include "util/u_transfer.h" + +#include + +#include "drm-uapi/neutron_accel.h" + +#include "neutron_device.h" +#include "neutron_ml.h" + +static const struct debug_named_value neutron_debug_options[] = { + {"dbg_msgs", NEUTRON_DBG_MSGS, "Print debug messages"}, + {"dump_bos", NEUTRON_DBG_DUMP_BOS, "Dump buffers for analysis"}, + {"zero_bos", NEUTRON_DBG_ZERO, "Zero buffers for debugging"}, + DEBUG_NAMED_VALUE_END}; + +DEBUG_GET_ONCE_FLAGS_OPTION(neutron_debug, "NEUTRON_DEBUG", + neutron_debug_options, 0) +int neutron_debug = 0; + +static void +neutron_screen_destroy(struct pipe_screen *pscreen) +{ + struct neutron_screen *screen = neutron_screen(pscreen); + + ralloc_free(screen); +} + +static void +neutron_context_destroy(struct pipe_context *pctx) +{ + struct neutron_context *ctx = neutron_context(pctx); + struct neutron_screen *screen = neutron_screen(pctx->screen); + + drmSyncobjDestroy(screen->fd, ctx->syncobj_handle); + ralloc_free(ctx); +} + +static void * +neutron_buffer_map(struct pipe_context *pctx, + struct pipe_resource *prsc, unsigned level, + unsigned usage, const struct pipe_box *box, + struct pipe_transfer **out_transfer) +{ + struct neutron_screen *screen = neutron_screen(pctx->screen); + struct neutron_resource *rsc = neutron_resource(prsc); + struct drm_neutron_sync_bo arg = {0}; + int ret; + + assert(level == 0); + assert(prsc->target == PIPE_BUFFER); + assert(box->y == 0); + assert(box->z == 0); + assert(box->height == 1); + assert(box->depth == 1); + + struct pipe_transfer *transfer = rzalloc(NULL, struct pipe_transfer); + transfer->level = level; + transfer->usage = usage; + transfer->box = *box; + + pipe_resource_reference(&transfer->resource, prsc); + + if (transfer->usage & PIPE_MAP_READ) { + arg.handle = rsc->handle; + arg.direction = DRM_NEUTRON_SYNC_FROM_DEVICE; + arg.size = box->width; + arg.offset = box->x; + + ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_SYNC_BO, &arg); + if (ret < 0) + goto free_transfer; + } + + *out_transfer = transfer; + + return (uint8_t *)rsc->map + box->x; + +free_transfer: + pipe_resource_reference(&transfer->resource, NULL); + ralloc_free(transfer); + return NULL; +} + +static void +neutron_buffer_unmap(struct pipe_context *pctx, + struct pipe_transfer *transfer) +{ + struct neutron_resource *rsrc = neutron_resource(transfer->resource); + struct neutron_screen *screen = neutron_screen(pctx->screen); + struct drm_neutron_sync_bo arg = {0}; + int ret; + + if (transfer->usage & PIPE_MAP_WRITE) { + arg.handle = rsrc->handle; + arg.direction = DRM_NEUTRON_SYNC_TO_DEVICE; + arg.size = transfer->box.width; + arg.offset = transfer->box.x; + + ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_SYNC_BO, &arg); + assert(ret >= 0); + } + + pipe_resource_reference(&transfer->resource, NULL); + ralloc_free(transfer); +} + +static struct pipe_context * +neutron_context_create(struct pipe_screen *pscreen, + void *priv, unsigned flags) +{ + struct neutron_context *ctx = rzalloc(NULL, struct neutron_context); + struct neutron_screen *screen = neutron_screen(pscreen); + struct pipe_context *pctx = &ctx->base; + int ret; + + if (!ctx) + return NULL; + + ret = drmSyncobjCreate(screen->fd, 0, &ctx->syncobj_handle); + if (ret) + return NULL; + + pctx->screen = pscreen; + pctx->priv = priv; + + pctx->destroy = neutron_context_destroy; + + pctx->buffer_map = neutron_buffer_map; + pctx->buffer_unmap = neutron_buffer_unmap; + pctx->resource_copy_region = util_resource_copy_region; + pctx->buffer_subdata = u_default_buffer_subdata; + pctx->clear_buffer = u_default_clear_buffer; + + pctx->ml_subgraph_invoke = neutron_ml_subgraph_invoke; + pctx->ml_subgraph_read_output = neutron_ml_subgraph_read_outputs; + + return pctx; +} + +static struct pipe_resource * +neutron_resource_create(struct pipe_screen *pscreen, + const struct pipe_resource *template) +{ + struct neutron_screen *screen = neutron_screen(pscreen); + struct drm_neutron_create_bo arg = {0}; + struct neutron_resource *rsc; + int ret; + + assert(template->target == PIPE_BUFFER); + assert(template->height0 == 1); + assert(template->depth0 == 1); + assert(template->array_size == 1); + + rsc = rzalloc(NULL, struct neutron_resource); + if (!rsc) + return NULL; + + rsc->base = *template; + rsc->base.screen = pscreen; + rsc->base.nr_samples = template->nr_samples; + pipe_reference_init(&rsc->base.reference, 1); + + arg.size = template->width0; + + ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_CREATE_BO, &arg); + if (ret < 0) + goto free_rsc; + + rsc->handle = arg.handle; + rsc->size = arg.size; + + rsc->map = os_mmap(NULL, rsc->size, PROT_READ | PROT_WRITE, MAP_SHARED, + screen->fd, arg.map_offset); + if (rsc->map == MAP_FAILED) + goto close_bo; + + return &rsc->base; + +close_bo : { + struct drm_gem_close close_arg = {.handle = rsc->handle}; + drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &close_arg); +} +free_rsc: + ralloc_free(rsc); + return NULL; +} + +static void +neutron_resource_destroy(struct pipe_screen *pscreen, + struct pipe_resource *prsc) +{ + struct neutron_resource *rsc = neutron_resource(prsc); + struct neutron_screen *screen = neutron_screen(pscreen); + struct drm_gem_close arg = {0}; + int ret; + + os_munmap(rsc->map, rsc->size); + + arg.handle = rsc->handle; + + ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg); + assert(ret >= 0); + + ralloc_free(rsc); +} + +static int +neutron_screen_get_fd(struct pipe_screen *pscreen) +{ + return neutron_screen(pscreen)->fd; +} + +static struct pipe_ml_device * +neutron_get_ml_device(struct pipe_screen *pscreen) +{ + return &neutron_screen(pscreen)->ml_device.base; +} + +struct pipe_screen * +neutron_screen_create(int fd, + const struct pipe_screen_config *config, + struct renderonly *ro) +{ + struct neutron_screen *neutron_screen; + struct pipe_screen *screen; + + neutron_screen = rzalloc(NULL, struct neutron_screen); + if (!neutron_screen) + return NULL; + + neutron_debug = debug_get_option_neutron_debug(); + + screen = &neutron_screen->pscreen; + + neutron_screen->fd = fd; + + screen->get_screen_fd = neutron_screen_get_fd; + screen->destroy = neutron_screen_destroy; + screen->context_create = neutron_context_create; + screen->resource_create = neutron_resource_create; + screen->resource_destroy = neutron_resource_destroy; + + neutron_screen->ml_device.base.ml_operation_supported = neutron_ml_operation_supported; + neutron_screen->ml_device.base.ml_subgraph_create = neutron_ml_subgraph_create; + neutron_screen->ml_device.base.ml_subgraph_destroy = neutron_ml_subgraph_destroy; + screen->get_ml_device = neutron_get_ml_device; + + return screen; +} diff --git a/src/gallium/drivers/neutron/neutron_device.h b/src/gallium/drivers/neutron/neutron_device.h new file mode 100644 index 00000000000..6ccf9da27bb --- /dev/null +++ b/src/gallium/drivers/neutron/neutron_device.h @@ -0,0 +1,90 @@ +/* + * Copyright 2026 NXP + * SPDX-License-Identifier: MIT + */ + +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "pipe/p_state.h" +#include "renderonly/renderonly.h" +#include "util/log.h" +#include "util/macros.h" + +#ifndef NEUTRON_DEVICE_H +#define NEUTRON_DEVICE_H + +enum neutron_dbg { + NEUTRON_DBG_MSGS = BITFIELD_BIT(0), + NEUTRON_DBG_DUMP_BOS = BITFIELD_BIT(1), + NEUTRON_DBG_ZERO = BITFIELD_BIT(2), +}; + +#define DBG_ENABLED(flag) unlikely(neutron_debug &(flag)) +extern int neutron_debug; + +#define DBG(fmt, ...) \ + do { \ + if (DBG_ENABLED(NEUTRON_DBG_MSGS)) \ + mesa_logd(fmt, ##__VA_ARGS__); \ + } while (0) + +struct neutron_ml_device { + struct pipe_ml_device base; + struct pipe_context *context; +}; + +static inline struct neutron_ml_device * +neutron_ml_device(struct pipe_ml_device *dev) +{ + return (struct neutron_ml_device *)dev; +} + +struct neutron_screen { + struct pipe_screen pscreen; + struct neutron_ml_device ml_device; + int fd; +}; + +static inline struct neutron_screen * +neutron_screen(struct pipe_screen *p) +{ + return (struct neutron_screen *)p; +} + +static inline struct neutron_screen * +neutron_ml_device_screen(struct pipe_ml_device *pdevice) +{ + struct neutron_ml_device *dev = neutron_ml_device(pdevice); + return container_of(dev, struct neutron_screen, ml_device); +} + +struct neutron_context { + struct pipe_context base; + uint32_t syncobj_handle; +}; + +static inline struct neutron_context * +neutron_context(struct pipe_context *pctx) +{ + return (struct neutron_context *)pctx; +} + +struct neutron_resource { + struct pipe_resource base; + uint32_t handle; + uint64_t size; + void *map; +}; + +static inline struct neutron_resource * +neutron_resource(struct pipe_resource *p) +{ + return (struct neutron_resource *)p; +} + +struct pipe_screen * +neutron_screen_create(int fd, + const struct pipe_screen_config *config, + struct renderonly *ro); + +#endif /* NEUTRON_DEVICE_H */ diff --git a/src/gallium/drivers/neutron/neutron_ml.c b/src/gallium/drivers/neutron/neutron_ml.c new file mode 100644 index 00000000000..29a0ac72f71 --- /dev/null +++ b/src/gallium/drivers/neutron/neutron_ml.c @@ -0,0 +1,382 @@ +/* + * Copyright 2026 NXP + * SPDX-License-Identifier: MIT + */ + +#include "pipe/p_state.h" +#include "util/macros.h" +#include "util/u_dynarray.h" +#include "util/u_inlines.h" + +#include + +#include "drm-uapi/neutron_accel.h" +#include "neutron_ml.h" + +static const char * +neutron_tensor_name(enum neutron_tensor_type type) +{ + switch (type) { + case NEUTRON_WEIGHTS: + return "NeutronWeights"; + case NEUTRON_MICROCODE: + return "NeutronMicrocode"; + case NEUTRON_KERNELS: + return "NeutronKernels"; + case NEUTRON_SCRATCH: + return "NeutronScratch"; + case NEUTRON_PROFILE: + return "NeutronProfile"; + case NEUTRON_DEBUG: + return "NeutronDebug"; + case NEUTRON_DATA_INPUT: + return "Input"; + case NEUTRON_DATA_OUTPUT: + return "Output"; + default: + return "N/A"; + } +} + +static void +neutron_dump_buffer(const uint8_t *ptr, const char *name, + int id, int offset, unsigned size) +{ + char buffer[255]; + + snprintf(buffer, sizeof(buffer), "mesa-%s-%03u.bin", name, id); + + FILE *f = fopen(buffer, "wb"); + assert(f); + fwrite(ptr + offset, 1, size, f); + if (ferror(f)) { + DBG("Error in writing to file: %s\n", strerror(errno)); + } + fflush(f); + fclose(f); +} + +static void +neutron_dump_tensor(struct pipe_resource *bo, + struct neutron_tensor *ntensor, int id) +{ + uint8_t *buf = neutron_resource(bo)->map; + neutron_dump_buffer(buf, neutron_tensor_name(ntensor->type), id, + ntensor->offset, ntensor->size); +} + +bool +neutron_ml_operation_supported(struct pipe_ml_device *pdevice, + const struct pipe_ml_operation *operation) +{ + if (operation->type != PIPE_ML_OPERATION_TYPE_CUSTOM) + return false; + + if (!operation->custom.name || + strcmp(operation->custom.name, "NeutronGraph") != 0) + return false; + + if (operation->input_count <= NEUTRON_CUSTOM_INPUT_MAX) + return false; + + return true; +} + +static unsigned +get_tensor_size(const struct pipe_tensor *tensor) +{ + unsigned size = 1; + + for (unsigned i = 0; i < 4; i++) + size *= tensor->dims[i]; + + return size; +} + +static enum neutron_tensor_type +get_tensor_type(struct pipe_tensor *tensor, bool is_input) +{ + for (unsigned i = 0; i < NEUTRON_CUSTOM_MAX; i++) { + if (strcmp(tensor->name, neutron_tensor_name(i)) == 0) + return i; + } + + return is_input ? NEUTRON_DATA_INPUT : NEUTRON_DATA_OUTPUT; +} + +static bool +is_custom_input(struct neutron_tensor *ntensor) +{ + return ntensor->type < NEUTRON_CUSTOM_INPUT_MAX; +} + +static void +create_tensor(struct neutron_subgraph *subgraph, + struct pipe_tensor *tensor, bool is_input) +{ + struct neutron_tensor ntensor = {0}; + + ntensor.type = get_tensor_type(tensor, is_input); + ntensor.size = get_tensor_size(tensor); + ntensor.tensor = tensor; + + if (ntensor.type == NEUTRON_DATA_INPUT) + ntensor.id = subgraph->data_inputs++; + else if (ntensor.type == NEUTRON_DATA_OUTPUT) + ntensor.id = subgraph->data_outputs++; + + util_dynarray_append(&subgraph->tensors, ntensor); +} + +static unsigned +get_header_array_size(struct neutron_subgraph *subgraph) +{ + unsigned num_tensors = + util_dynarray_num_elements(&subgraph->tensors, struct neutron_tensor); + + return num_tensors + 4; +} + +static unsigned +get_buffer_size(struct neutron_subgraph *subgraph) +{ + unsigned size = 0; + + util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) + size += NEUTRON_ALIGN(ntensor->size); + size += subgraph->header.size; + + return size; +} + +static int +get_header_index(struct neutron_subgraph *subgraph, + struct neutron_tensor *ntensor) +{ + switch (ntensor->type) { + case NEUTRON_DATA_INPUT: + return ntensor->id; + case NEUTRON_WEIGHTS: + return subgraph->data_inputs; + case NEUTRON_DATA_OUTPUT: + return subgraph->data_inputs + 1 + ntensor->id; + case NEUTRON_KERNELS: + return subgraph->data_inputs + subgraph->data_outputs + 1; + case NEUTRON_SCRATCH: + return subgraph->data_inputs + subgraph->data_outputs + 2; + case NEUTRON_PROFILE: + /* 4 entries reserved */ + return subgraph->data_inputs + subgraph->data_outputs + 7; + case NEUTRON_DEBUG: + return subgraph->data_inputs + subgraph->data_outputs + 8; + default: + return -1; + } +} + +struct pipe_ml_subgraph * +neutron_ml_subgraph_create(struct pipe_ml_device *pdevice, + const struct pipe_ml_operation *poperations, + unsigned count) +{ + struct neutron_screen *screen = neutron_ml_device_screen(pdevice); + struct neutron_ml_device *dev = neutron_ml_device(pdevice); + const struct pipe_ml_operation *operation = &poperations[0]; + struct neutron_subgraph *subgraph; + uint32_t *offset_array; + unsigned crt_offset = 0; + + if (count > 1) { + DBG("Expect a single NeutronGraph per partition!\n"); + return NULL; + } + + if (!dev->context) + dev->context = screen->pscreen.context_create(&screen->pscreen, NULL, 0); + + subgraph = calloc(1, sizeof(*subgraph)); + subgraph->base.device = pdevice; + + subgraph->tensors = UTIL_DYNARRAY_INIT; + subgraph->header = UTIL_DYNARRAY_INIT; + + for (unsigned i = 0; i < operation->input_count; i++) + create_tensor(subgraph, operation->input_tensors[i], true); + for (unsigned i = 0; i < operation->output_count; i++) + create_tensor(subgraph, operation->output_tensors[i], false); + + if (!util_dynarray_resize(&subgraph->header, uint32_t, + get_header_array_size(subgraph))) + return NULL; + memset(util_dynarray_begin(&subgraph->header), 0, subgraph->header.size); + + subgraph->bo = pipe_buffer_create(dev->context->screen, 0, + PIPE_USAGE_DEFAULT, + get_buffer_size(subgraph)); + if (!subgraph->bo) + return NULL; + + if (DBG_ENABLED(NEUTRON_DBG_ZERO)) { + struct pipe_transfer *transfer; + struct neutron_resource *rsc = neutron_resource(subgraph->bo); + + uint8_t *buf = pipe_buffer_map(dev->context, subgraph->bo, + PIPE_MAP_WRITE, &transfer); + memset(buf, 0, pipe_buffer_size(subgraph->bo)); + pipe_buffer_unmap(dev->context, transfer); + } + + DBG("%s: Tensor list:\n", __func__); + DBG("%16s %3s %6s %8s %8s %8s\n", "TYPE", "ID", "INDEX", "OFFSET", "SIZE", "HDR_IDX"); + + offset_array = util_dynarray_begin(&subgraph->header); + crt_offset = NEUTRON_ALIGN(subgraph->header.size); + + /* Translate all tensors into buffer sections */ + util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) { + unsigned header_index = get_header_index(subgraph, ntensor); + + /* Microcode offset passed via ioctl, all others stored in header */ + if (ntensor->type == NEUTRON_MICROCODE) + subgraph->microcode_offset = crt_offset; + else + offset_array[header_index] = crt_offset; + + ntensor->offset = crt_offset; + crt_offset += NEUTRON_ALIGN(ntensor->size); + + /* Custom inputs are written in the buffer during setup */ + if (is_custom_input(ntensor)) { + pipe_buffer_write(dev->context, subgraph->bo, ntensor->offset, + ntensor->size, ntensor->tensor->data); + + if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS)) + neutron_dump_tensor(subgraph->bo, ntensor, 0); + } + + DBG("%16s %3d %6d %8d %8d %2d\n", neutron_tensor_name(ntensor->type), + ntensor->id, ntensor->tensor->index, ntensor->offset, ntensor->size, + header_index); + } + + pipe_buffer_write(dev->context, subgraph->bo, 0, subgraph->header.size, + util_dynarray_begin(&subgraph->header)); + + if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS)) { + uint8_t *buf = neutron_resource(subgraph->bo)->map; + neutron_dump_buffer(buf, "header", 0, 0, subgraph->header.size); + } + + return &subgraph->base; +} + +static struct neutron_tensor * +get_tensor_by_id(struct neutron_subgraph *subgraph, + enum neutron_tensor_type type, unsigned index) +{ + util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) { + if (ntensor->type == type && ntensor->tensor->index == index) + return ntensor; + } + return NULL; +} + +void +neutron_ml_subgraph_invoke(struct pipe_context *pcontext, + struct pipe_ml_subgraph *psubgraph, + unsigned inputs_count, unsigned input_idxs[], + void *inputs[], bool is_signed[]) +{ + struct neutron_screen *screen = neutron_screen(pcontext->screen); + struct neutron_context *context = neutron_context(pcontext); + struct neutron_subgraph *subgraph = (struct neutron_subgraph *)(psubgraph); + struct drm_neutron_submit_job submit = {0}; + int ret; + + for (unsigned i = 0; i < inputs_count; i++) { + struct neutron_tensor *ntensor = + get_tensor_by_id(subgraph, NEUTRON_DATA_INPUT, input_idxs[i]); + + DBG("Prepare input tensor: idx=%d, offset=%d, size=%d\n", + input_idxs[i], ntensor->offset, ntensor->size); + pipe_buffer_write(pcontext, subgraph->bo, ntensor->offset, + ntensor->size, inputs[i]); + + if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS)) + neutron_dump_tensor(subgraph->bo, ntensor, i); + } + + /* Transfer ownership of output sections to device */ + util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) { + if (ntensor->type == NEUTRON_DATA_OUTPUT) { + struct pipe_transfer *transfer; + pipe_buffer_map_range(pcontext, subgraph->bo, ntensor->offset, + ntensor->size, PIPE_MAP_WRITE, &transfer); + pipe_buffer_unmap(pcontext, transfer); + } + } + + submit.type = DRM_NEUTRON_JOB_INFERENCE; + submit.bo_handle = neutron_resource(subgraph->bo)->handle; + submit.syncobj_handle = context->syncobj_handle; + submit.inference.microcode_offset = subgraph->microcode_offset; + submit.inference.tensor_offset = 0; + submit.inference.tensor_count = + util_dynarray_num_elements(&subgraph->header, uint32_t); + + DBG("Submitting job\n"); + ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_SUBMIT_JOB, &submit); + assert(ret == 0); +} + +void +neutron_ml_subgraph_read_outputs(struct pipe_context *pcontext, + struct pipe_ml_subgraph *psubgraph, + unsigned outputs_count, + unsigned output_idxs[], void *outputsv[], + bool is_signed[]) +{ + struct neutron_subgraph *subgraph = (struct neutron_subgraph *)(psubgraph); + struct neutron_context *context = neutron_context(pcontext); + struct neutron_screen *screen = neutron_screen(pcontext->screen); + int ret; + + ret = drmSyncobjWait(screen->fd, &context->syncobj_handle, 1, INT64_MAX, + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, NULL); + assert(ret == 0); + DBG("Job finished\n"); + + for (unsigned i = 0; i < outputs_count; i++) { + struct neutron_tensor *ntensor = + get_tensor_by_id(subgraph, NEUTRON_DATA_OUTPUT, output_idxs[i]); + + DBG("Read output tensor: idx=%d, offset=%d, size=%d\n", + output_idxs[i], ntensor->offset, ntensor->size); + pipe_buffer_read(pcontext, subgraph->bo, ntensor->offset, + ntensor->size, outputsv[i]); + + if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS)) + neutron_dump_tensor(subgraph->bo, ntensor, i); + } +} + +void +neutron_ml_subgraph_destroy(struct pipe_ml_device *pdevice, + struct pipe_ml_subgraph *psubgraph) +{ + struct pipe_context *pcontext = neutron_ml_device(pdevice)->context; + struct neutron_screen *screen = neutron_screen(pcontext->screen); + struct neutron_subgraph *subgraph = (struct neutron_subgraph *)(psubgraph); + struct drm_gem_close arg = {0}; + int ret; + + arg.handle = neutron_resource(subgraph->bo)->handle; + ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg); + assert(ret >= 0); + + pipe_resource_reference(&subgraph->bo, NULL); + util_dynarray_fini(&subgraph->tensors); + util_dynarray_fini(&subgraph->header); + + free(subgraph); +} diff --git a/src/gallium/drivers/neutron/neutron_ml.h b/src/gallium/drivers/neutron/neutron_ml.h new file mode 100644 index 00000000000..c362230fcbd --- /dev/null +++ b/src/gallium/drivers/neutron/neutron_ml.h @@ -0,0 +1,73 @@ +/* + * Copyright 2026 NXP + * SPDX-License-Identifier: MIT + */ + +#ifndef NEUTRON_ML_H +#define NEUTRON_ML_H + +#include +#include + +#include "neutron_device.h" + +#define NEUTRON_ALIGN(value) align(value, 64) + +enum neutron_tensor_type { + NEUTRON_WEIGHTS = 0, + NEUTRON_MICROCODE, + NEUTRON_KERNELS, + NEUTRON_CUSTOM_INPUT_MAX, + NEUTRON_SCRATCH = NEUTRON_CUSTOM_INPUT_MAX, + NEUTRON_PROFILE, + NEUTRON_DEBUG, + NEUTRON_CUSTOM_MAX, + NEUTRON_DATA_INPUT = NEUTRON_CUSTOM_MAX, + NEUTRON_DATA_OUTPUT, +}; + +struct neutron_tensor { + struct pipe_tensor *tensor; + enum neutron_tensor_type type; + unsigned size; + unsigned offset; + unsigned id; /* For NEUTRON_DATA_INPUT and NEUTRON_DATA_OUTPUT */ +}; + +struct neutron_subgraph { + struct pipe_ml_subgraph base; + struct pipe_resource *bo; + struct util_dynarray tensors; /* struct neutron_tensor */ + struct util_dynarray header; + unsigned microcode_offset; + unsigned data_inputs; /* number of non-custom input tensors */ + unsigned data_outputs; /* number of non-custom output tensors */ +}; + +bool +neutron_ml_operation_supported(struct pipe_ml_device *pdevice, + const struct pipe_ml_operation *operation); + +struct pipe_ml_subgraph * +neutron_ml_subgraph_create(struct pipe_ml_device *pdevice, + const struct pipe_ml_operation *poperations, + unsigned count); + +void +neutron_ml_subgraph_invoke(struct pipe_context *pcontext, + struct pipe_ml_subgraph *psubgraph, + unsigned inputs_count, unsigned input_idxs[], + void *inputs[], bool is_signed[]); + +void +neutron_ml_subgraph_read_outputs(struct pipe_context *pcontext, + struct pipe_ml_subgraph *psubgraph, + unsigned outputs_count, + unsigned output_idxs[], void *outputs[], + bool is_signed[]); + +void +neutron_ml_subgraph_destroy(struct pipe_ml_device *pdevice, + struct pipe_ml_subgraph *psubgraph); + +#endif /* NEUTRON_ML_H */ diff --git a/src/gallium/meson.build b/src/gallium/meson.build index 67011f56b9f..419ffd86fa6 100644 --- a/src/gallium/meson.build +++ b/src/gallium/meson.build @@ -196,6 +196,12 @@ if with_gallium_ethosu else driver_ethosu = declare_dependency() endif +if with_gallium_neutron + subdir('winsys/neutron/drm') + subdir('drivers/neutron') +else + driver_neutron = declare_dependency() +endif if with_gallium_zink if not with_platform_windows subdir('winsys/zink/drm') diff --git a/src/gallium/targets/dri/meson.build b/src/gallium/targets/dri/meson.build index c8ac8f7d510..c4a60ce9a8d 100644 --- a/src/gallium/targets/dri/meson.build +++ b/src/gallium/targets/dri/meson.build @@ -59,7 +59,7 @@ libgallium_dri = shared_library( driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv, driver_tegra, driver_i915, driver_svga, driver_virgl, driver_panfrost, driver_iris, driver_lima, driver_zink, driver_d3d12, - driver_asahi, driver_crocus, driver_rocket, driver_ethosu + driver_asahi, driver_crocus, driver_rocket, driver_ethosu, driver_neutron ], install : true, name_suffix : libname_suffix, diff --git a/src/gallium/winsys/neutron/drm/meson.build b/src/gallium/winsys/neutron/drm/meson.build new file mode 100644 index 00000000000..e9d917a5198 --- /dev/null +++ b/src/gallium/winsys/neutron/drm/meson.build @@ -0,0 +1,13 @@ +# Copyright 2017 Broadcom +# SPDX-License-Identifier: MIT + +libneutronwinsys = static_library( + 'neutronwinsys', + files('neutron_drm_winsys.c'), + include_directories : [ + inc_src, inc_include, + inc_gallium, inc_gallium_aux, inc_gallium_drivers, + ], + gnu_symbol_visibility : 'hidden', + dependencies: [dep_libdrm, idep_mesautil], +) diff --git a/src/gallium/winsys/neutron/drm/neutron_drm_public.h b/src/gallium/winsys/neutron/drm/neutron_drm_public.h new file mode 100644 index 00000000000..a4d969eb7ea --- /dev/null +++ b/src/gallium/winsys/neutron/drm/neutron_drm_public.h @@ -0,0 +1,17 @@ +/* + * Copyright 2014 Broadcom + * Copyright 2018 Alyssa Rosenzweig + * Copyright 2025 Tomeu Vizoso + * SPDX-License-Identifier: MIT + */ + +#ifndef __NEUTRON_DRM_PUBLIC_H__ +#define __NEUTRON_DRM_PUBLIC_H__ + +struct pipe_screen; +struct pipe_screen_config; + +struct pipe_screen * +neutron_drm_screen_create(int drmFD, const struct pipe_screen_config *config); + +#endif /* __NEUTRON_DRM_PUBLIC_H__ */ diff --git a/src/gallium/winsys/neutron/drm/neutron_drm_winsys.c b/src/gallium/winsys/neutron/drm/neutron_drm_winsys.c new file mode 100644 index 00000000000..debf9aa75c9 --- /dev/null +++ b/src/gallium/winsys/neutron/drm/neutron_drm_winsys.c @@ -0,0 +1,20 @@ +/* + * Copyright 2014 Broadcom + * Copyright 2018 Alyssa Rosenzweig + * Copyright 2025 Tomeu Vizoso + * Copyright 2026 NXP + * SPDX-License-Identifier: MIT + */ + +#include "util/os_file.h" +#include "util/u_screen.h" + +#include "neutron/neutron_device.h" +#include "neutron_drm_public.h" + +struct pipe_screen * +neutron_drm_screen_create(int fd, const struct pipe_screen_config *config) +{ + return u_pipe_screen_lookup_or_create(os_dupfd_cloexec(fd), config, NULL, + neutron_screen_create); +} From 62a77801141e9a1ea0ba284f16a76fd073dd633c Mon Sep 17 00:00:00 2001 From: Ioana Ciocoi-Radulescu Date: Fri, 24 Apr 2026 12:08:33 +0300 Subject: [PATCH 5/6] pipe-loader: Load the neutron accel driver Signed-off-by: Ioana Ciocoi-Radulescu --- src/gallium/auxiliary/pipe-loader/meson.build | 3 +++ .../auxiliary/pipe-loader/pipe_loader_drm.c | 4 ++++ .../auxiliary/target-helpers/drm_helper.h | 19 +++++++++++++++++++ .../target-helpers/drm_helper_public.h | 1 + 4 files changed, 27 insertions(+) diff --git a/src/gallium/auxiliary/pipe-loader/meson.build b/src/gallium/auxiliary/pipe-loader/meson.build index a4a13c002a6..1eb28f9f4da 100644 --- a/src/gallium/auxiliary/pipe-loader/meson.build +++ b/src/gallium/auxiliary/pipe-loader/meson.build @@ -48,6 +48,9 @@ endif if with_gallium_zink renderonly_drivers_c_args += '-DGALLIUM_ZINK' endif +if with_gallium_neutron + renderonly_drivers_c_args += '-DGALLIUM_NEUTRON' +endif libpipe_loader_static = static_library( 'pipe_loader_static', diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c index b97c526aedd..33062f5b408 100644 --- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c @@ -86,6 +86,7 @@ static const struct drm_driver_descriptor *driver_descriptors[] = { &etnaviv_driver_descriptor, &rocket_driver_descriptor, ðosu_driver_descriptor, + &neutron_driver_descriptor, &tegra_driver_descriptor, &lima_driver_descriptor, &zink_driver_descriptor, @@ -388,6 +389,9 @@ pipe_loader_get_compatible_render_capable_device_fds(int kms_only_fd, unsigned i #if defined GALLIUM_ETHOSU "ethosu", #endif +#if defined GALLIUM_NEUTRON + "neutron", +#endif #if defined GALLIUM_V3D "v3d", #endif diff --git a/src/gallium/auxiliary/target-helpers/drm_helper.h b/src/gallium/auxiliary/target-helpers/drm_helper.h index 65481b37ce3..0220fac99c5 100644 --- a/src/gallium/auxiliary/target-helpers/drm_helper.h +++ b/src/gallium/auxiliary/target-helpers/drm_helper.h @@ -53,6 +53,7 @@ const struct drm_driver_descriptor descriptor_name = { \ #undef GALLIUM_ASAHI #undef GALLIUM_ROCKET #undef GALLIUM_ETHOSU +#undef GALLIUM_NEUTRON #endif #ifdef GALLIUM_I915 @@ -482,6 +483,24 @@ DRM_DRIVER_DESCRIPTOR(ethosu, NULL, 0) DRM_DRIVER_DESCRIPTOR_STUB(ethosu) #endif +#ifdef GALLIUM_NEUTRON +#include "neutron/drm/neutron_drm_public.h" + +static struct pipe_screen * +pipe_neutron_create_screen(int fd, const struct pipe_screen_config *config) +{ + struct pipe_screen *screen; + + screen = neutron_drm_screen_create(fd, config); + return screen ? debug_screen_wrap(screen) : NULL; +} + +DRM_DRIVER_DESCRIPTOR(neutron, NULL, 0) + +#else +DRM_DRIVER_DESCRIPTOR_STUB(neutron) +#endif + #ifdef GALLIUM_KMSRO #include "kmsro/drm/kmsro_drm_public.h" diff --git a/src/gallium/auxiliary/target-helpers/drm_helper_public.h b/src/gallium/auxiliary/target-helpers/drm_helper_public.h index fe0e12280f6..193acd4ffdf 100644 --- a/src/gallium/auxiliary/target-helpers/drm_helper_public.h +++ b/src/gallium/auxiliary/target-helpers/drm_helper_public.h @@ -24,6 +24,7 @@ extern const struct drm_driver_descriptor etnaviv_driver_descriptor; extern const struct drm_driver_descriptor rknpu_driver_descriptor; extern const struct drm_driver_descriptor rocket_driver_descriptor; extern const struct drm_driver_descriptor ethosu_driver_descriptor; +extern const struct drm_driver_descriptor neutron_driver_descriptor; extern const struct drm_driver_descriptor tegra_driver_descriptor; extern const struct drm_driver_descriptor lima_driver_descriptor; extern const struct drm_driver_descriptor zink_driver_descriptor; From 3e2118eff84640244262a648e5ce0e48ceca9ba7 Mon Sep 17 00:00:00 2001 From: Ioana Ciocoi-Radulescu Date: Fri, 24 Apr 2026 12:10:51 +0300 Subject: [PATCH 6/6] teflon: Link to the neutron driver Signed-off-by: Ioana Ciocoi-Radulescu --- src/gallium/frontends/teflon/tfl_device.c | 3 ++- src/gallium/targets/teflon/meson.build | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gallium/frontends/teflon/tfl_device.c b/src/gallium/frontends/teflon/tfl_device.c index ff48137b197..6ad6f393276 100644 --- a/src/gallium/frontends/teflon/tfl_device.c +++ b/src/gallium/frontends/teflon/tfl_device.c @@ -1010,7 +1010,8 @@ find_accel_device() for (int i = 0; i < n; i++) { if (strstr("rocket", devs[i]->driver_name) || - strstr("ethosu", devs[i]->driver_name)) + strstr("ethosu", devs[i]->driver_name) || + strstr("neutron", devs[i]->driver_name)) device = devs[i]; else pipe_loader_release(&devs[i], 1); diff --git a/src/gallium/targets/teflon/meson.build b/src/gallium/targets/teflon/meson.build index 6a4d1aa7f84..413dfe9ee68 100644 --- a/src/gallium/targets/teflon/meson.build +++ b/src/gallium/targets/teflon/meson.build @@ -10,6 +10,7 @@ libteflon = shared_library( driver_etnaviv, driver_rocket, driver_ethosu, + driver_neutron, idep_nir, idep_mesautil, ],