Merge branch 'neutron' into 'main'

Draft: Add new Gallium driver for NXP's Neutron NPU See merge request mesa/mesa!41401
2026-05-08 02:38:04 +02:00 · 2026-05-08 00:10:44 +00:00 · 2026-05-08 00:10:44 +00:00 · 967de8aab8
commit 967de8aab8
parent ff5b909511 3e2118eff8
28 changed files with 1803 additions and 11 deletions
--- a/.clang-format-include
+++ b/.clang-format-include
@ -3,6 +3,7 @@

 src/gallium/drivers/ethosu/**/*
 src/gallium/drivers/i915
+src/gallium/drivers/neutron/**/*
 src/gallium/drivers/r300/compiler/*
 src/gallium/drivers/rocket/**/*
 src/gallium/targets/teflon/**/*
--- a/include/drm-uapi/neutron_accel.h
+++ b/include/drm-uapi/neutron_accel.h
@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/* Copyright 2025-2026 NXP */
+
+#ifndef __NEUTRON_ACCEL_H__
+#define __NEUTRON_ACCEL_H__
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/**
+ * enum drm_neutron_ioctl - Neutron IOCTL IDs
+ *
+ * @DRM_NEUTRON_CREATE_BO: Create a buffer object
+ * @DRM_NEUTRON_SYNC_BO: Sync (parts of) the buffer object memory
+ * @DRM_NEUTRON_SUBMIT_JOB: Submit a job to the device
+ */
+enum drm_neutron_ioctl {
+	DRM_NEUTRON_CREATE_BO = 0,
+	DRM_NEUTRON_SYNC_BO,
+	DRM_NEUTRON_SUBMIT_JOB,
+};
+
+/**
+ * struct drm_neutron_create_bo - Create a buffer object and return buffer
+ *                                info to user
+ *
+ * @size: Size in bytes of requested buffer. May be updated by driver
+ *        if allocated size different than requested
+ * @handle: Returned handle for the new buffer object
+ * @pad: MBZ
+ * @map_offset: Returned offset for mmap() calls
+ */
+struct drm_neutron_create_bo {
+	__u64 size;
+	__u32 handle;
+	__u32 pad;
+	__u64 map_offset;
+};
+
+/**
+ * enum drm_neutron_sync_dir - Direction of buffer object synchronization
+ *
+ * @DRM_NEUTRON_SYNC_TO_DEVICE: Sync from CPU to device
+ * @DRM_NEUTRON_SYNC_FROM_DEVICE: Sync from device to CPU
+ */
+enum drm_neutron_sync_dir {
+	DRM_NEUTRON_SYNC_TO_DEVICE = 0,
+	DRM_NEUTRON_SYNC_FROM_DEVICE,
+};
+
+/**
+ * struct drm_neutron_sync_bo - Sync buffer object memory
+ *
+ * @handle: Handle of buffer object to sync
+ * @direction: Direction of sync, can be one of enum drm_neutron_sync_dir
+ * @size: Size of the memory to sync, in bytes
+ * @offset: Offset inside the buffer, in bytes
+ */
+struct drm_neutron_sync_bo {
+	__u32 handle;
+	__u32 direction;
+	__u64 size;
+	__u64 offset;
+};
+
+/**
+ * enum drm_neutron_job_type - Type of job to submit to Neutron device
+ *
+ * @DRM_NEUTRON_JOB_INFERENCE: Inference job
+ */
+enum drm_neutron_job_type {
+	DRM_NEUTRON_JOB_INFERENCE = 0,
+};
+
+/**
+ * struct drm_neutron_inference_job - Inference job descriptor
+ *
+ * @tensor_offset: Offset of tensor array inside job BO
+ * @microcode_offset: Microcode offset inside BO
+ * @tensor_count: Number of valid tensors
+ * @pad: MBZ
+ */
+struct drm_neutron_inference_job {
+	__u32 tensor_offset;
+	__u32 microcode_offset;
+	__u32 tensor_count;
+	__u32 pad[5];
+};
+
+/**
+ * struct drm_neutron_submit_job - Submit a job to Neutron device
+ *
+ * @type: Job type, one of enum drm_neutron_job_type
+ * @bo_handle: BO handle for this job
+ * @inference: Inference job descriptor (when type is DRM_NEUTRON_JOB_INFERENCE)
+ * @reserved: Reserved for future job types
+ * @syncobj_handle: Handle of syncobj on which user waits for job completion
+ * @pad: MBZ
+ */
+struct drm_neutron_submit_job {
+	__u32 type;
+	__u32 bo_handle;
+	union {
+		struct drm_neutron_inference_job inference;
+		__u32 reserved[8];
+	};
+	__u32 syncobj_handle;
+	__u32 pad;
+};
+
+#define DRM_IOCTL_NEUTRON_CREATE_BO \
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_NEUTRON_CREATE_BO, \
+		 struct drm_neutron_create_bo)
+
+#define DRM_IOCTL_NEUTRON_SYNC_BO \
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_NEUTRON_SYNC_BO, \
+		 struct drm_neutron_sync_bo)
+
+#define DRM_IOCTL_NEUTRON_SUBMIT_JOB \
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_NEUTRON_SUBMIT_JOB, \
+		 struct drm_neutron_submit_job)
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* __NEUTRON_ACCEL_H__ */
--- a/meson.build
+++ b/meson.build
@ -205,7 +205,7 @@ elif gallium_drivers.contains('all')
   gallium_drivers = [
     'r300', 'r600', 'radeonsi', 'crocus', 'v3d', 'vc4', 'freedreno', 'etnaviv', 'i915',
     'nouveau', 'svga', 'tegra', 'virgl', 'lima', 'panfrost', 'llvmpipe', 'softpipe', 'iris',
-     'zink', 'd3d12', 'asahi', 'rocket', 'ethosu'
+     'zink', 'd3d12', 'asahi', 'rocket', 'ethosu', 'neutron'
   ]
 endif

@ -234,6 +234,7 @@ with_gallium_d3d12 = gallium_drivers.contains('d3d12')
 with_gallium_asahi = gallium_drivers.contains('asahi')
 with_gallium_rocket = gallium_drivers.contains('rocket')
 with_gallium_ethosu = gallium_drivers.contains('ethosu')
+with_gallium_neutron = gallium_drivers.contains('neutron')
 foreach gallium_driver : gallium_drivers
  pre_args += '-DHAVE_@0@'.format(gallium_driver.to_upper())
 endforeach
--- a/meson.options
+++ b/meson.options
@ -87,7 +87,7 @@ option(
  choices : [
    'all', 'auto',
    'asahi', 'crocus', 'd3d12', 'ethosu', 'etnaviv', 'freedreno', 'i915', 'iris',
-    'lima', 'llvmpipe', 'nouveau', 'panfrost', 'r300', 'r600', 'radeonsi',
+    'lima', 'llvmpipe', 'neutron', 'nouveau', 'panfrost', 'r300', 'r600', 'radeonsi',
    'rocket', 'softpipe', 'svga', 'tegra', 'v3d', 'vc4', 'virgl', 'zink',
  ],
  description : 'List of gallium drivers to build. If this is set to auto ' +
--- a/src/gallium/auxiliary/pipe-loader/meson.build
+++ b/src/gallium/auxiliary/pipe-loader/meson.build
@ -48,6 +48,9 @@ endif
 if with_gallium_zink
  renderonly_drivers_c_args += '-DGALLIUM_ZINK'
 endif
+if with_gallium_neutron
+  renderonly_drivers_c_args += '-DGALLIUM_NEUTRON'
+endif

 libpipe_loader_static = static_library(
  'pipe_loader_static',
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@ -86,6 +86,7 @@ static const struct drm_driver_descriptor *driver_descriptors[] = {
   &etnaviv_driver_descriptor,
   &rocket_driver_descriptor,
   &ethosu_driver_descriptor,
+   &neutron_driver_descriptor,
   &tegra_driver_descriptor,
   &lima_driver_descriptor,
   &zink_driver_descriptor,
@ -388,6 +389,9 @@ pipe_loader_get_compatible_render_capable_device_fds(int kms_only_fd, unsigned i
 #if defined GALLIUM_ETHOSU
      "ethosu",
 #endif
+#if defined GALLIUM_NEUTRON
+      "neutron",
+#endif
 #if defined GALLIUM_V3D
      "v3d",
 #endif
--- a/src/gallium/auxiliary/target-helpers/drm_helper.h
+++ b/src/gallium/auxiliary/target-helpers/drm_helper.h
@ -53,6 +53,7 @@ const struct drm_driver_descriptor descriptor_name = {         \
 #undef GALLIUM_ASAHI
 #undef GALLIUM_ROCKET
 #undef GALLIUM_ETHOSU
+#undef GALLIUM_NEUTRON
 #endif

 #ifdef GALLIUM_I915
@ -482,6 +483,24 @@ DRM_DRIVER_DESCRIPTOR(ethosu, NULL, 0)
 DRM_DRIVER_DESCRIPTOR_STUB(ethosu)
 #endif

+#ifdef GALLIUM_NEUTRON
+#include "neutron/drm/neutron_drm_public.h"
+
+static struct pipe_screen *
+pipe_neutron_create_screen(int fd, const struct pipe_screen_config *config)
+{
+   struct pipe_screen *screen;
+
+   screen = neutron_drm_screen_create(fd, config);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+DRM_DRIVER_DESCRIPTOR(neutron, NULL, 0)
+
+#else
+DRM_DRIVER_DESCRIPTOR_STUB(neutron)
+#endif
+
 #ifdef GALLIUM_KMSRO
 #include "kmsro/drm/kmsro_drm_public.h"

--- a/src/gallium/auxiliary/target-helpers/drm_helper_public.h
+++ b/src/gallium/auxiliary/target-helpers/drm_helper_public.h
@ -24,6 +24,7 @@ extern const struct drm_driver_descriptor etnaviv_driver_descriptor;
 extern const struct drm_driver_descriptor rknpu_driver_descriptor;
 extern const struct drm_driver_descriptor rocket_driver_descriptor;
 extern const struct drm_driver_descriptor ethosu_driver_descriptor;
+extern const struct drm_driver_descriptor neutron_driver_descriptor;
 extern const struct drm_driver_descriptor tegra_driver_descriptor;
 extern const struct drm_driver_descriptor lima_driver_descriptor;
 extern const struct drm_driver_descriptor zink_driver_descriptor;
--- a/src/gallium/drivers/neutron/.clang-format
+++ b/src/gallium/drivers/neutron/.clang-format
@ -0,0 +1,2 @@
+BasedOnStyle: InheritParentConfig
+DisableFormat: false
--- a/src/gallium/drivers/neutron/ci/neutron-imx95-fails.txt
+++ b/src/gallium/drivers/neutron/ci/neutron-imx95-fails.txt
@ -0,0 +1,102 @@
+CompiledModels.Op/efficientdet_efficientdet_tflite_lite0_int8_v1,Fail
+CompiledModels.Op/inception_002,Fail
+CompiledModels.Op/inception_003,Fail
+CompiledModels.Op/inception_008,Fail
+CompiledModels.Op/inception_013,Fail
+CompiledModels.Op/inception_015,Fail
+CompiledModels.Op/inception_016,Fail
+CompiledModels.Op/inception_021,Fail
+CompiledModels.Op/inception_024,Fail
+CompiledModels.Op/inception_027,Fail
+CompiledModels.Op/inception_030,Fail
+CompiledModels.Op/inception_032,Fail
+CompiledModels.Op/inception_033,Fail
+CompiledModels.Op/inception_041,Fail
+CompiledModels.Op/inception_055,Fail
+CompiledModels.Op/inception_059,Fail
+CompiledModels.Op/inception_063,Fail
+CompiledModels.Op/inception_064,Fail
+CompiledModels.Op/inception_071,Fail
+CompiledModels.Op/inception_073,Fail
+CompiledModels.Op/inception_075,Fail
+CompiledModels.Op/inception_077,Fail
+CompiledModels.Op/mobiledet_002,Fail
+CompiledModels.Op/mobiledet_004,Fail
+CompiledModels.Op/mobiledet_006,Fail
+CompiledModels.Op/mobiledet_009,Fail
+CompiledModels.Op/mobiledet_012,Fail
+CompiledModels.Op/mobiledet_015,Fail
+CompiledModels.Op/mobiledet_017,Fail
+CompiledModels.Op/mobiledet_020,Fail
+CompiledModels.Op/mobiledet_029,Fail
+CompiledModels.Op/mobiledet_039,Fail
+CompiledModels.Op/mobiledet_042,Fail
+CompiledModels.Op/mobiledet_046,Fail
+CompiledModels.Op/mobiledet_050,Fail
+CompiledModels.Op/mobiledet_063,Fail
+CompiledModels.Op/mobiledet_086,Fail
+CompiledModels.Op/mobiledet_087,Fail
+CompiledModels.Op/mobiledet_106,Fail
+CompiledModels.Op/mobiledet_119,Fail
+CompiledModels.Op/mobiledet_ssdlite_mobiledet_coco_qat_postprocess,Fail
+CompiledModels.Op/mobilenetv1_001,Fail
+CompiledModels.Op/mobilenetv1_002,Fail
+CompiledModels.Op/mobilenetv1_003,Fail
+CompiledModels.Op/mobilenetv1_005,Fail
+CompiledModels.Op/mobilenetv1_006,Fail
+CompiledModels.Op/mobilenetv1_008,Fail
+CompiledModels.Op/mobilenetv1_009,Fail
+CompiledModels.Op/mobilenetv1_012,Fail
+CompiledModels.Op/mobilenetv1_016,Fail
+CompiledModels.Op/mobilenetv1_018,Fail
+CompiledModels.Op/mobilenetv1_020,Fail
+CompiledModels.Op/mobilenetv1_025,Fail
+CompiledModels.Op/mobilenetv1_026,Fail
+CompiledModels.Op/mobilenetv1_mobilenet_v1_1_224_quant,Fail
+CompiledModels.Op/mobilenetv2_001,Fail
+CompiledModels.Op/mobilenetv2_003,Fail
+CompiledModels.Op/mobilenetv2_006,Fail
+CompiledModels.Op/mobilenetv2_007,Fail
+CompiledModels.Op/mobilenetv2_010,Fail
+CompiledModels.Op/mobilenetv2_014,Fail
+CompiledModels.Op/mobilenetv2_018,Fail
+CompiledModels.Op/mobilenetv2_025,Fail
+CompiledModels.Op/mobilenetv2_029,Fail
+CompiledModels.Op/mobilenetv2_040,Fail
+CompiledModels.Op/mobilenetv2_044,Fail
+CompiledModels.Op/mobilenetv2_059,Fail
+CompiledModels.Op/mobilenetv2_061,Fail
+CompiledModels.Op/movenetlightning_103,Fail
+CompiledModels.Op/movenetlightning_104,Fail
+CompiledModels.Op/movenetthunder_103,Fail
+CompiledModels.Op/movenetthunder_104,Fail
+CompiledModels.Op/ssdmobilenetv2_001,Fail
+CompiledModels.Op/ssdmobilenetv2_003,Fail
+CompiledModels.Op/ssdmobilenetv2_007,Fail
+CompiledModels.Op/ssdmobilenetv2_040,Fail
+CompiledModels.Op/ssdmobilenetv2_047,Fail
+CompiledModels.Op/ssdmobilenetv2_052,Fail
+CompiledModels.Op/ssdmobilenetv2_054,Fail
+CompiledModels.Op/ssdmobilenetv2_065,Fail
+CompiledModels.Op/ssdmobilenetv2_069,Fail
+CompiledModels.Op/ssdmobilenetv2_072,Fail
+CompiledModels.Op/ssdmobilenetv2_073,Fail
+CompiledModels.Op/ssdmobilenetv2_077,Fail
+CompiledModels.Op/ssdmobilenetv2_081,Fail
+CompiledModels.Op/ssdmobilenetv2_089,Fail
+CompiledModels.Op/ssdmobilenetv2_096,Fail
+CompiledModels.Op/ssdmobilenetv2_102,Fail
+CompiledModels.Op/ssdmobilenetv2_ssd_mobilenet_v2_coco_quant_postprocess,Fail
+CompiledModels.Op/yolox_001,Fail
+CompiledModels.Op/yolox_004,Fail
+CompiledModels.Op/yolox_027,Fail
+CompiledModels.Op/yolox_042,Fail
+CompiledModels.Op/yolox_077,Fail
+CompiledModels.Op/yolox_086,Fail
+CompiledModels.Op/yolox_102,Fail
+CompiledModels.Op/yolox_112,Fail
+CompiledModels.Op/yolox_122,Fail
+CompiledModels.Op/yolox_132,Fail
+CompiledModels.Op/yolox_147,Fail
+CompiledModels.Op/yolox_yolox_nano_full_integer_quant,Fail
+
--- a/src/gallium/drivers/neutron/ci/neutron-imx95-flakes.txt
+++ b/src/gallium/drivers/neutron/ci/neutron-imx95-flakes.txt
--- a/src/gallium/drivers/neutron/ci/neutron-imx95-skips.txt
+++ b/src/gallium/drivers/neutron/ci/neutron-imx95-skips.txt
@ -0,0 +1,227 @@
+# Standard (un-compiled) tflite models are not supported on Neutron
+Add.Op/.*
+AddQuant.Op/.*
+Conv2D.Op/.*
+DepthwiseConv2D.Op/.*
+FullyConnected.Op/.*
+
+Models.Op/.*
+
+# Compilation error due to duplicate/orphan input tensor
+CompiledModels.Op/movenetlightning_117
+CompiledModels.Op/movenetlightning_120
+CompiledModels.Op/movenetthunder_117
+CompiledModels.Op/movenetthunder_120
+
+# Conversion rate zero (operators or operator + tensor type/size combo
+# not supported yet by Neutron compiler)
+CompiledModels.Op/efficientdet_066
+CompiledModels.Op/efficientdet_101
+CompiledModels.Op/efficientdet_136
+CompiledModels.Op/efficientdet_251
+CompiledModels.Op/efficientdet_252
+CompiledModels.Op/efficientdet_253
+CompiledModels.Op/efficientdet_254
+CompiledModels.Op/efficientdet_255
+CompiledModels.Op/efficientdet_256
+CompiledModels.Op/efficientdet_257
+CompiledModels.Op/efficientdet_258
+CompiledModels.Op/efficientdet_259
+CompiledModels.Op/efficientdet_260
+CompiledModels.Op/efficientdet_261
+CompiledModels.Op/efficientdet_262
+CompiledModels.Op/efficientdet_263
+CompiledModels.Op/efficientdet_264
+CompiledModels.Op/efficientdet_265
+CompiledModels.Op/inception_082
+CompiledModels.Op/micronetlarge_013
+CompiledModels.Op/mobiledet_097
+CompiledModels.Op/mobiledet_099
+CompiledModels.Op/mobiledet_101
+CompiledModels.Op/mobiledet_103
+CompiledModels.Op/mobiledet_105
+CompiledModels.Op/mobiledet_107
+CompiledModels.Op/mobiledet_109
+CompiledModels.Op/mobiledet_111
+CompiledModels.Op/mobiledet_113
+CompiledModels.Op/mobiledet_115
+CompiledModels.Op/mobiledet_117
+CompiledModels.Op/mobiledet_118
+CompiledModels.Op/mobiledet_122
+CompiledModels.Op/mobiledet_123
+CompiledModels.Op/mobilenetv1_030
+CompiledModels.Op/movenetlightning_000
+CompiledModels.Op/movenetlightning_001
+CompiledModels.Op/movenetlightning_002
+CompiledModels.Op/movenetlightning_003
+CompiledModels.Op/movenetlightning_071
+CompiledModels.Op/movenetlightning_075
+CompiledModels.Op/movenetlightning_079
+CompiledModels.Op/movenetlightning_087
+CompiledModels.Op/movenetlightning_088
+CompiledModels.Op/movenetlightning_089
+CompiledModels.Op/movenetlightning_090
+CompiledModels.Op/movenetlightning_091
+CompiledModels.Op/movenetlightning_092
+CompiledModels.Op/movenetlightning_093
+CompiledModels.Op/movenetlightning_094
+CompiledModels.Op/movenetlightning_095
+CompiledModels.Op/movenetlightning_096
+CompiledModels.Op/movenetlightning_097
+CompiledModels.Op/movenetlightning_098
+CompiledModels.Op/movenetlightning_099
+CompiledModels.Op/movenetlightning_105
+CompiledModels.Op/movenetlightning_112
+CompiledModels.Op/movenetlightning_113
+CompiledModels.Op/movenetlightning_114
+CompiledModels.Op/movenetlightning_115
+CompiledModels.Op/movenetlightning_116
+CompiledModels.Op/movenetlightning_118
+CompiledModels.Op/movenetlightning_119
+CompiledModels.Op/movenetlightning_122
+CompiledModels.Op/movenetlightning_123
+CompiledModels.Op/movenetlightning_124
+CompiledModels.Op/movenetlightning_125
+CompiledModels.Op/movenetlightning_126
+CompiledModels.Op/movenetlightning_127
+CompiledModels.Op/movenetlightning_128
+CompiledModels.Op/movenetlightning_129
+CompiledModels.Op/movenetlightning_130
+CompiledModels.Op/movenetlightning_131
+CompiledModels.Op/movenetlightning_132
+CompiledModels.Op/movenetlightning_133
+CompiledModels.Op/movenetlightning_134
+CompiledModels.Op/movenetlightning_135
+CompiledModels.Op/movenetlightning_136
+CompiledModels.Op/movenetlightning_137
+CompiledModels.Op/movenetlightning_138
+CompiledModels.Op/movenetlightning_139
+CompiledModels.Op/movenetlightning_140
+CompiledModels.Op/movenetlightning_141
+CompiledModels.Op/movenetlightning_142
+CompiledModels.Op/movenetlightning_143
+CompiledModels.Op/movenetlightning_144
+CompiledModels.Op/movenetlightning_145
+CompiledModels.Op/movenetlightning_147
+CompiledModels.Op/movenetlightning_149
+CompiledModels.Op/movenetlightning_150
+CompiledModels.Op/movenetlightning_151
+CompiledModels.Op/movenetlightning_152
+CompiledModels.Op/movenetlightning_153
+CompiledModels.Op/movenetlightning_154
+CompiledModels.Op/movenetlightning_155
+CompiledModels.Op/movenetlightning_156
+CompiledModels.Op/movenetthunder_000
+CompiledModels.Op/movenetthunder_001
+CompiledModels.Op/movenetthunder_002
+CompiledModels.Op/movenetthunder_003
+CompiledModels.Op/movenetthunder_071
+CompiledModels.Op/movenetthunder_075
+CompiledModels.Op/movenetthunder_079
+CompiledModels.Op/movenetthunder_087
+CompiledModels.Op/movenetthunder_088
+CompiledModels.Op/movenetthunder_089
+CompiledModels.Op/movenetthunder_090
+CompiledModels.Op/movenetthunder_091
+CompiledModels.Op/movenetthunder_092
+CompiledModels.Op/movenetthunder_093
+CompiledModels.Op/movenetthunder_094
+CompiledModels.Op/movenetthunder_095
+CompiledModels.Op/movenetthunder_096
+CompiledModels.Op/movenetthunder_097
+CompiledModels.Op/movenetthunder_098
+CompiledModels.Op/movenetthunder_099
+CompiledModels.Op/movenetthunder_105
+CompiledModels.Op/movenetthunder_112
+CompiledModels.Op/movenetthunder_113
+CompiledModels.Op/movenetthunder_114
+CompiledModels.Op/movenetthunder_115
+CompiledModels.Op/movenetthunder_116
+CompiledModels.Op/movenetthunder_118
+CompiledModels.Op/movenetthunder_119
+CompiledModels.Op/movenetthunder_122
+CompiledModels.Op/movenetthunder_123
+CompiledModels.Op/movenetthunder_124
+CompiledModels.Op/movenetthunder_125
+CompiledModels.Op/movenetthunder_126
+CompiledModels.Op/movenetthunder_127
+CompiledModels.Op/movenetthunder_128
+CompiledModels.Op/movenetthunder_129
+CompiledModels.Op/movenetthunder_130
+CompiledModels.Op/movenetthunder_131
+CompiledModels.Op/movenetthunder_132
+CompiledModels.Op/movenetthunder_133
+CompiledModels.Op/movenetthunder_134
+CompiledModels.Op/movenetthunder_135
+CompiledModels.Op/movenetthunder_136
+CompiledModels.Op/movenetthunder_137
+CompiledModels.Op/movenetthunder_138
+CompiledModels.Op/movenetthunder_139
+CompiledModels.Op/movenetthunder_140
+CompiledModels.Op/movenetthunder_141
+CompiledModels.Op/movenetthunder_142
+CompiledModels.Op/movenetthunder_143
+CompiledModels.Op/movenetthunder_144
+CompiledModels.Op/movenetthunder_145
+CompiledModels.Op/movenetthunder_147
+CompiledModels.Op/movenetthunder_149
+CompiledModels.Op/movenetthunder_150
+CompiledModels.Op/movenetthunder_151
+CompiledModels.Op/movenetthunder_152
+CompiledModels.Op/movenetthunder_153
+CompiledModels.Op/movenetthunder_154
+CompiledModels.Op/movenetthunder_155
+CompiledModels.Op/movenetthunder_156
+CompiledModels.Op/ssdmobilenetv2_049
+CompiledModels.Op/ssdmobilenetv2_051
+CompiledModels.Op/ssdmobilenetv2_067
+CompiledModels.Op/ssdmobilenetv2_068
+CompiledModels.Op/ssdmobilenetv2_070
+CompiledModels.Op/ssdmobilenetv2_075
+CompiledModels.Op/ssdmobilenetv2_076
+CompiledModels.Op/ssdmobilenetv2_078
+CompiledModels.Op/ssdmobilenetv2_083
+CompiledModels.Op/ssdmobilenetv2_084
+CompiledModels.Op/ssdmobilenetv2_086
+CompiledModels.Op/ssdmobilenetv2_091
+CompiledModels.Op/ssdmobilenetv2_092
+CompiledModels.Op/ssdmobilenetv2_094
+CompiledModels.Op/ssdmobilenetv2_099
+CompiledModels.Op/ssdmobilenetv2_100
+CompiledModels.Op/ssdmobilenetv2_101
+CompiledModels.Op/ssdmobilenetv2_107
+CompiledModels.Op/ssdmobilenetv2_108
+CompiledModels.Op/ssdmobilenetv2_109
+CompiledModels.Op/yolox_000
+CompiledModels.Op/yolox_003
+CompiledModels.Op/yolox_012
+CompiledModels.Op/yolox_103
+CompiledModels.Op/yolox_104
+CompiledModels.Op/yolox_113
+CompiledModels.Op/yolox_114
+CompiledModels.Op/yolox_123
+CompiledModels.Op/yolox_124
+CompiledModels.Op/yolox_125
+CompiledModels.Op/yolox_126
+CompiledModels.Op/yolox_127
+CompiledModels.Op/yolox_128
+CompiledModels.Op/yolox_129
+CompiledModels.Op/yolox_130
+CompiledModels.Op/yolox_131
+CompiledModels.Op/yolox_133
+CompiledModels.Op/yolox_134
+CompiledModels.Op/yolox_135
+CompiledModels.Op/yolox_136
+CompiledModels.Op/yolox_137
+CompiledModels.Op/yolox_139
+CompiledModels.Op/yolox_140
+CompiledModels.Op/yolox_141
+CompiledModels.Op/yolox_142
+CompiledModels.Op/yolox_143
+CompiledModels.Op/yolox_145
+CompiledModels.Op/yolox_146
+CompiledModels.Op/yolox_148
+CompiledModels.Op/yolox_149
+CompiledModels.Op/yolox_151
+CompiledModels.Op/yolox_152
+CompiledModels.Op/yolox_153
--- a/src/gallium/drivers/neutron/compile_model.py
+++ b/src/gallium/drivers/neutron/compile_model.py
@ -0,0 +1,58 @@
+# Copyright 2026 NXP
+# SPDX-License-Identifier: MIT
+
+"""
+Compile a TFLite model for the Neutron NPU.
+
+Usage: python compile_model.py <input.tflite> <output.tflite>
+
+Exit codes:
+    0: Success
+    1: Converter internal error
+    2: Zero conversion rate (model unchanged)
+    3: Invalid arguments or input file
+"""
+
+import sys
+import os
+from eiq_neutron_sdk import neutron_converter    # from https://eiq.nxp.com/repository
+
+# Only one platform supported for now
+TARGET = "imx95"
+
+
+def main():
+    if len(sys.argv) != 3:
+        print("Usage: python compile_model.py <input.tflite> <output.tflite>")
+        sys.exit(3)
+
+    input_file = sys.argv[1]
+    output_file = sys.argv[2]
+
+    if not os.path.isfile(input_file):
+        print(f"Error: Input file '{input_file}' does not exist")
+        sys.exit(3)
+
+    if not input_file.endswith('.tflite'):
+        print("Error: Input file must be a .tflite model")
+        sys.exit(3)
+
+    with open(input_file, 'rb') as f:
+        model_data = f.read()
+
+    try:
+        cmodel_data = neutron_converter.convertModel(list(model_data), TARGET)
+    except Exception as e:
+        print(f"Error during model conversion: {e}")
+        sys.exit(1)
+
+    if bytes(cmodel_data) == model_data:
+        print(f"Warning: Conversion rate was zero for {input_file}, skipping output")
+        sys.exit(2)
+
+    with open(output_file, 'wb') as f:
+        f.write(bytes(cmodel_data))
+
+
+if __name__ == "__main__":
+    main()
--- a/src/gallium/drivers/neutron/meson.build
+++ b/src/gallium/drivers/neutron/meson.build
@ -0,0 +1,20 @@
+# Copyright 2019 Google, Inc
+# SPDX-License-Identifier: MIT
+
+files_neutron = files(
+  'neutron_device.c',
+  'neutron_ml.c',
+)
+
+libneutron = static_library(
+  'neutron',
+  files_neutron,
+  include_directories : [inc_gallium_aux, inc_gallium, inc_include, inc_src],
+  gnu_symbol_visibility : 'hidden',
+  dependencies : [idep_mesautil, dep_libdrm],
+)
+
+driver_neutron = declare_dependency(
+  compile_args : '-DGALLIUM_NEUTRON',
+  link_with : [libneutronwinsys, libneutron]
+)
--- a/src/gallium/drivers/neutron/neutron_compiler.py
+++ b/src/gallium/drivers/neutron/neutron_compiler.py
@ -0,0 +1,219 @@
+# Copyright 2026 NXP
+# SPDX-License-Identifier: MIT
+
+"""
+Batch compiler of TFLite models for NXP's Neutron NPU
+
+Usage modes (mutually exclusive):
+neutron_compiler.py --in <input.tflite> --out <output.tflite>
+neutron_compiler.py --in-dir <input_dir> --out-dir <output_dir>
+neutron_compiler.py --teflon-test-models
+"""
+
+import sys
+import os
+import argparse
+import subprocess
+
+def parse_args(argv):
+    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+
+    mode_group = parser.add_mutually_exclusive_group(required=True)
+    mode_group.add_argument('--in',
+                            dest='input_file',
+                            action='store',
+                            help='Input TFLite model file')
+    mode_group.add_argument('--in-dir',
+                            dest='input_dir',
+                            action='store',
+                            help='Input directory containing TFLite models to be compiled')
+    mode_group.add_argument('--teflon-test-models',
+                            dest='teflon_test_models',
+                            action='store_true',
+                            help='Use ${TEFLON_TEST_DATA}/models as input directory and ${TEFLON_TEST_DATA}/compiled_models as output')
+
+    parser.add_argument('--out',
+                        dest='output_file',
+                        action='store',
+                        help='Output TFLite model file (required with --in)')
+    parser.add_argument('--out-dir',
+                        dest='output_dir',
+                        action='store',
+                        help='Output directory for compiled models (required with --in-dir). Output files will have the same name as input files')
+
+    parser.add_argument('--force',
+                        action='store_true',
+                        required=False,
+                        help='Overwrite output files if they exist. Ignored for single file mode')
+    parser.add_argument('--show-details',
+                        action='store_true',
+                        required=False,
+                        help='Print compilation result for each model. Ignored for single file mode')
+    parser.add_argument('--log-to-file',
+                        action='store_true',
+                        required=False,
+                        help='Generate log file for each compile attempt. Log is located next to output file')
+
+    args = parser.parse_args()
+
+    if args.input_file:
+        if not args.output_file:
+            parser.error("--out is required when using --in")
+        if not os.path.isfile(args.input_file):
+            parser.error("--in must be an existing file")
+
+    if args.input_dir:
+        if not args.output_dir:
+            parser.error("--out-dir is required when using --in-dir")
+        if not os.path.isdir(args.input_dir):
+            parser.error("--in-dir must be an existing directory")
+
+    if args.teflon_test_models and not os.environ.get('TEFLON_TEST_DATA'):
+        parser.error("--teflon-test-models requires TEFLON_TEST_DATA to be set")
+
+    return args
+
+def compile(input_file, output_file):
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    worker_script = os.path.join(script_dir, "compile_model.py")
+
+    try:
+        result = subprocess.run(
+            [sys.executable, worker_script, input_file, output_file],
+            capture_output=True,
+            text=True,
+            timeout=60,
+        )
+    except Exception as e:
+        print(f"Error compiling {input_file}: {e}", file=sys.stderr)
+        return None
+
+    return result
+
+def write_log(output_file, result):
+    model_name_we = os.path.splitext(os.path.basename(output_file))[0]
+    output_dir = os.path.dirname(output_file)
+    log_file = os.path.join(output_dir, f"{model_name_we}_compiler_output.log")
+
+    with open(log_file, 'w') as f:
+        if result.stdout:
+            f.write(result.stdout.strip())
+        if result.stderr:
+            f.write('\n')
+            f.write(result.stderr.strip())
+
+def print_result(input_file, result):
+    if result.returncode == 0:
+        status = "SUCCESS"
+    elif result.returncode == 1:
+        status = "COMPILER ERROR"
+    elif result.returncode == 2:
+        status = "ZERO CONVERSION RATE"
+    elif result.returncode == 3:
+        status = "SCRIPT ERROR"
+    else:
+        status = f"UNKNOWN({returncode})"
+
+    print(f"  {input_file}: {status}")
+    if result.returncode != 0 and result.stderr:
+        print(result.stderr.strip())
+
+def print_stats(total, ok, error, zero, skip):
+    print("\n" + "=" * 50)
+    print(f"Total models processed: {total}")
+    print(f"  Successful:           {ok}")
+    print(f"  Errors:               {error}")
+    print(f"  Zero conversion rate: {zero}")
+    print(f"  Skipped:              {skip}")
+    print("=" * 50)
+
+def find_tflite_files(directory):
+    tflite_files = []
+    for root, _, files in os.walk(directory):
+        for file in files:
+            if file.endswith('.tflite'):
+                tflite_files.append(os.path.join(root, file))
+    return sorted(tflite_files)
+
+def process_model(input_file, output_file, log_to_file, show_details):
+    output_dir = os.path.dirname(os.path.abspath(output_file))
+    os.makedirs(output_dir, exist_ok=True)
+
+    result = compile(input_file, output_file)
+    if not result:
+        return 3
+
+    if log_to_file:
+        write_log(output_file, result)
+
+    if show_details:
+        print_result(input_file, result)
+
+    return result.returncode
+
+def process_directory(input_dir, output_dir, force, log_to_file, show_details):
+    total = 0
+    ok = 0
+    errors = 0
+    zero = 0
+    skip = 0
+
+    tflite_files = find_tflite_files(input_dir)
+    for input_file in tflite_files:
+        total += 1
+
+        rel_path = os.path.relpath(input_file, input_dir)
+        output_file = os.path.join(output_dir, rel_path)
+        if os.path.exists(output_file) and not force:
+            if show_details:
+                print(f"  {input_file}: SKIPPED")
+            skip += 1
+            continue
+
+        ret_code = process_model(input_file, output_file, log_to_file, show_details)
+        if ret_code == 0:
+            ok += 1
+        elif ret_code == 2:
+            zero += 1
+        else:
+            errors += 1
+
+    print_stats(total, ok, errors, zero, skip)
+
+def main(argv):
+    args = parse_args(argv)
+
+    if args.input_file:
+        process_model(
+            args.input_file,
+            args.output_file,
+            args.log_to_file,
+            True)
+    elif args.input_dir:
+        process_directory(
+            args.input_dir,
+            args.output_dir,
+            args.force,
+            args.log_to_file,
+            args.show_details
+        )
+    elif args.teflon_test_models:
+        teflon_test_data = os.environ.get('TEFLON_TEST_DATA')
+        input_dir = os.path.join(teflon_test_data, 'models')
+        output_dir = os.path.join(teflon_test_data, 'compiled_models')
+
+        print(f"Compiling Teflon test models:")
+        print(f"  Input directory:  {input_dir}")
+        print(f"  Output directory: {output_dir}")
+
+        process_directory(
+            input_dir,
+            output_dir,
+            args.force,
+            args.log_to_file,
+            args.show_details
+        )
+
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
--- a/src/gallium/drivers/neutron/neutron_device.c
+++ b/src/gallium/drivers/neutron/neutron_device.c
@ -0,0 +1,257 @@
+/*
+ * Copyright 2026 NXP
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "util/os_mman.h"
+#include "util/u_inlines.h"
+#include "util/u_surface.h"
+#include "util/u_transfer.h"
+
+#include <xf86drm.h>
+
+#include "drm-uapi/neutron_accel.h"
+
+#include "neutron_device.h"
+#include "neutron_ml.h"
+
+static const struct debug_named_value neutron_debug_options[] = {
+   {"dbg_msgs", NEUTRON_DBG_MSGS, "Print debug messages"},
+   {"dump_bos", NEUTRON_DBG_DUMP_BOS, "Dump buffers for analysis"},
+   {"zero_bos", NEUTRON_DBG_ZERO, "Zero buffers for debugging"},
+   DEBUG_NAMED_VALUE_END};
+
+DEBUG_GET_ONCE_FLAGS_OPTION(neutron_debug, "NEUTRON_DEBUG",
+                            neutron_debug_options, 0)
+int neutron_debug = 0;
+
+static void
+neutron_screen_destroy(struct pipe_screen *pscreen)
+{
+   struct neutron_screen *screen = neutron_screen(pscreen);
+
+   ralloc_free(screen);
+}
+
+static void
+neutron_context_destroy(struct pipe_context *pctx)
+{
+   struct neutron_context *ctx = neutron_context(pctx);
+   struct neutron_screen *screen = neutron_screen(pctx->screen);
+
+   drmSyncobjDestroy(screen->fd, ctx->syncobj_handle);
+   ralloc_free(ctx);
+}
+
+static void *
+neutron_buffer_map(struct pipe_context *pctx,
+                   struct pipe_resource *prsc, unsigned level,
+                   unsigned usage, const struct pipe_box *box,
+                   struct pipe_transfer **out_transfer)
+{
+   struct neutron_screen *screen = neutron_screen(pctx->screen);
+   struct neutron_resource *rsc = neutron_resource(prsc);
+   struct drm_neutron_sync_bo arg = {0};
+   int ret;
+
+   assert(level == 0);
+   assert(prsc->target == PIPE_BUFFER);
+   assert(box->y == 0);
+   assert(box->z == 0);
+   assert(box->height == 1);
+   assert(box->depth == 1);
+
+   struct pipe_transfer *transfer = rzalloc(NULL, struct pipe_transfer);
+   transfer->level = level;
+   transfer->usage = usage;
+   transfer->box = *box;
+
+   pipe_resource_reference(&transfer->resource, prsc);
+
+   if (transfer->usage & PIPE_MAP_READ) {
+      arg.handle = rsc->handle;
+      arg.direction = DRM_NEUTRON_SYNC_FROM_DEVICE;
+      arg.size = box->width;
+      arg.offset = box->x;
+
+      ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_SYNC_BO, &arg);
+      if (ret < 0)
+         goto free_transfer;
+   }
+
+   *out_transfer = transfer;
+
+   return (uint8_t *)rsc->map + box->x;
+
+free_transfer:
+   pipe_resource_reference(&transfer->resource, NULL);
+   ralloc_free(transfer);
+   return NULL;
+}
+
+static void
+neutron_buffer_unmap(struct pipe_context *pctx,
+                     struct pipe_transfer *transfer)
+{
+   struct neutron_resource *rsrc = neutron_resource(transfer->resource);
+   struct neutron_screen *screen = neutron_screen(pctx->screen);
+   struct drm_neutron_sync_bo arg = {0};
+   int ret;
+
+   if (transfer->usage & PIPE_MAP_WRITE) {
+      arg.handle = rsrc->handle;
+      arg.direction = DRM_NEUTRON_SYNC_TO_DEVICE;
+      arg.size = transfer->box.width;
+      arg.offset = transfer->box.x;
+
+      ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_SYNC_BO, &arg);
+      assert(ret >= 0);
+   }
+
+   pipe_resource_reference(&transfer->resource, NULL);
+   ralloc_free(transfer);
+}
+
+static struct pipe_context *
+neutron_context_create(struct pipe_screen *pscreen,
+                       void *priv, unsigned flags)
+{
+   struct neutron_context *ctx = rzalloc(NULL, struct neutron_context);
+   struct neutron_screen *screen = neutron_screen(pscreen);
+   struct pipe_context *pctx = &ctx->base;
+   int ret;
+
+   if (!ctx)
+      return NULL;
+
+   ret = drmSyncobjCreate(screen->fd, 0, &ctx->syncobj_handle);
+   if (ret)
+      return NULL;
+
+   pctx->screen = pscreen;
+   pctx->priv = priv;
+
+   pctx->destroy = neutron_context_destroy;
+
+   pctx->buffer_map = neutron_buffer_map;
+   pctx->buffer_unmap = neutron_buffer_unmap;
+   pctx->resource_copy_region = util_resource_copy_region;
+   pctx->buffer_subdata = u_default_buffer_subdata;
+   pctx->clear_buffer = u_default_clear_buffer;
+
+   pctx->ml_subgraph_invoke = neutron_ml_subgraph_invoke;
+   pctx->ml_subgraph_read_output = neutron_ml_subgraph_read_outputs;
+
+   return pctx;
+}
+
+static struct pipe_resource *
+neutron_resource_create(struct pipe_screen *pscreen,
+                        const struct pipe_resource *template)
+{
+   struct neutron_screen *screen = neutron_screen(pscreen);
+   struct drm_neutron_create_bo arg = {0};
+   struct neutron_resource *rsc;
+   int ret;
+
+   assert(template->target == PIPE_BUFFER);
+   assert(template->height0 == 1);
+   assert(template->depth0 == 1);
+   assert(template->array_size == 1);
+
+   rsc = rzalloc(NULL, struct neutron_resource);
+   if (!rsc)
+      return NULL;
+
+   rsc->base = *template;
+   rsc->base.screen = pscreen;
+   rsc->base.nr_samples = template->nr_samples;
+   pipe_reference_init(&rsc->base.reference, 1);
+
+   arg.size = template->width0;
+
+   ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_CREATE_BO, &arg);
+   if (ret < 0)
+      goto free_rsc;
+
+   rsc->handle = arg.handle;
+   rsc->size = arg.size;
+
+   rsc->map = os_mmap(NULL, rsc->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                      screen->fd, arg.map_offset);
+   if (rsc->map == MAP_FAILED)
+      goto close_bo;
+
+   return &rsc->base;
+
+close_bo : {
+   struct drm_gem_close close_arg = {.handle = rsc->handle};
+   drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &close_arg);
+}
+free_rsc:
+   ralloc_free(rsc);
+   return NULL;
+}
+
+static void
+neutron_resource_destroy(struct pipe_screen *pscreen,
+                         struct pipe_resource *prsc)
+{
+   struct neutron_resource *rsc = neutron_resource(prsc);
+   struct neutron_screen *screen = neutron_screen(pscreen);
+   struct drm_gem_close arg = {0};
+   int ret;
+
+   os_munmap(rsc->map, rsc->size);
+
+   arg.handle = rsc->handle;
+
+   ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg);
+   assert(ret >= 0);
+
+   ralloc_free(rsc);
+}
+
+static int
+neutron_screen_get_fd(struct pipe_screen *pscreen)
+{
+   return neutron_screen(pscreen)->fd;
+}
+
+static struct pipe_ml_device *
+neutron_get_ml_device(struct pipe_screen *pscreen)
+{
+   return &neutron_screen(pscreen)->ml_device.base;
+}
+
+struct pipe_screen *
+neutron_screen_create(int fd,
+                      const struct pipe_screen_config *config,
+                      struct renderonly *ro)
+{
+   struct neutron_screen *neutron_screen;
+   struct pipe_screen *screen;
+
+   neutron_screen = rzalloc(NULL, struct neutron_screen);
+   if (!neutron_screen)
+      return NULL;
+
+   neutron_debug = debug_get_option_neutron_debug();
+
+   screen = &neutron_screen->pscreen;
+
+   neutron_screen->fd = fd;
+
+   screen->get_screen_fd = neutron_screen_get_fd;
+   screen->destroy = neutron_screen_destroy;
+   screen->context_create = neutron_context_create;
+   screen->resource_create = neutron_resource_create;
+   screen->resource_destroy = neutron_resource_destroy;
+
+   neutron_screen->ml_device.base.ml_operation_supported = neutron_ml_operation_supported;
+   neutron_screen->ml_device.base.ml_subgraph_create = neutron_ml_subgraph_create;
+   neutron_screen->ml_device.base.ml_subgraph_destroy = neutron_ml_subgraph_destroy;
+   screen->get_ml_device = neutron_get_ml_device;
+
+   return screen;
+}
--- a/src/gallium/drivers/neutron/neutron_device.h
+++ b/src/gallium/drivers/neutron/neutron_device.h
@ -0,0 +1,90 @@
+/*
+ * Copyright 2026 NXP
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_state.h"
+#include "renderonly/renderonly.h"
+#include "util/log.h"
+#include "util/macros.h"
+
+#ifndef NEUTRON_DEVICE_H
+#define NEUTRON_DEVICE_H
+
+enum neutron_dbg {
+   NEUTRON_DBG_MSGS = BITFIELD_BIT(0),
+   NEUTRON_DBG_DUMP_BOS = BITFIELD_BIT(1),
+   NEUTRON_DBG_ZERO = BITFIELD_BIT(2),
+};
+
+#define DBG_ENABLED(flag) unlikely(neutron_debug &(flag))
+extern int neutron_debug;
+
+#define DBG(fmt, ...)                    \
+   do {                                  \
+      if (DBG_ENABLED(NEUTRON_DBG_MSGS)) \
+         mesa_logd(fmt, ##__VA_ARGS__);  \
+   } while (0)
+
+struct neutron_ml_device {
+   struct pipe_ml_device base;
+   struct pipe_context *context;
+};
+
+static inline struct neutron_ml_device *
+neutron_ml_device(struct pipe_ml_device *dev)
+{
+   return (struct neutron_ml_device *)dev;
+}
+
+struct neutron_screen {
+   struct pipe_screen pscreen;
+   struct neutron_ml_device ml_device;
+   int fd;
+};
+
+static inline struct neutron_screen *
+neutron_screen(struct pipe_screen *p)
+{
+   return (struct neutron_screen *)p;
+}
+
+static inline struct neutron_screen *
+neutron_ml_device_screen(struct pipe_ml_device *pdevice)
+{
+   struct neutron_ml_device *dev = neutron_ml_device(pdevice);
+   return container_of(dev, struct neutron_screen, ml_device);
+}
+
+struct neutron_context {
+   struct pipe_context base;
+   uint32_t syncobj_handle;
+};
+
+static inline struct neutron_context *
+neutron_context(struct pipe_context *pctx)
+{
+   return (struct neutron_context *)pctx;
+}
+
+struct neutron_resource {
+   struct pipe_resource base;
+   uint32_t handle;
+   uint64_t size;
+   void *map;
+};
+
+static inline struct neutron_resource *
+neutron_resource(struct pipe_resource *p)
+{
+   return (struct neutron_resource *)p;
+}
+
+struct pipe_screen *
+neutron_screen_create(int fd,
+                      const struct pipe_screen_config *config,
+                      struct renderonly *ro);
+
+#endif /* NEUTRON_DEVICE_H */
--- a/src/gallium/drivers/neutron/neutron_ml.c
+++ b/src/gallium/drivers/neutron/neutron_ml.c
@ -0,0 +1,382 @@
+/*
+ * Copyright 2026 NXP
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "pipe/p_state.h"
+#include "util/macros.h"
+#include "util/u_dynarray.h"
+#include "util/u_inlines.h"
+
+#include <xf86drm.h>
+
+#include "drm-uapi/neutron_accel.h"
+#include "neutron_ml.h"
+
+static const char *
+neutron_tensor_name(enum neutron_tensor_type type)
+{
+   switch (type) {
+   case NEUTRON_WEIGHTS:
+      return "NeutronWeights";
+   case NEUTRON_MICROCODE:
+      return "NeutronMicrocode";
+   case NEUTRON_KERNELS:
+      return "NeutronKernels";
+   case NEUTRON_SCRATCH:
+      return "NeutronScratch";
+   case NEUTRON_PROFILE:
+      return "NeutronProfile";
+   case NEUTRON_DEBUG:
+      return "NeutronDebug";
+   case NEUTRON_DATA_INPUT:
+      return "Input";
+   case NEUTRON_DATA_OUTPUT:
+      return "Output";
+   default:
+      return "N/A";
+   }
+}
+
+static void
+neutron_dump_buffer(const uint8_t *ptr, const char *name,
+                    int id, int offset, unsigned size)
+{
+   char buffer[255];
+
+   snprintf(buffer, sizeof(buffer), "mesa-%s-%03u.bin", name, id);
+
+   FILE *f = fopen(buffer, "wb");
+   assert(f);
+   fwrite(ptr + offset, 1, size, f);
+   if (ferror(f)) {
+      DBG("Error in writing to file: %s\n", strerror(errno));
+   }
+   fflush(f);
+   fclose(f);
+}
+
+static void
+neutron_dump_tensor(struct pipe_resource *bo,
+                    struct neutron_tensor *ntensor, int id)
+{
+   uint8_t *buf = neutron_resource(bo)->map;
+   neutron_dump_buffer(buf, neutron_tensor_name(ntensor->type), id,
+                       ntensor->offset, ntensor->size);
+}
+
+bool
+neutron_ml_operation_supported(struct pipe_ml_device *pdevice,
+                               const struct pipe_ml_operation *operation)
+{
+   if (operation->type != PIPE_ML_OPERATION_TYPE_CUSTOM)
+      return false;
+
+   if (!operation->custom.name ||
+       strcmp(operation->custom.name, "NeutronGraph") != 0)
+      return false;
+
+   if (operation->input_count <= NEUTRON_CUSTOM_INPUT_MAX)
+      return false;
+
+   return true;
+}
+
+static unsigned
+get_tensor_size(const struct pipe_tensor *tensor)
+{
+   unsigned size = 1;
+
+   for (unsigned i = 0; i < 4; i++)
+      size *= tensor->dims[i];
+
+   return size;
+}
+
+static enum neutron_tensor_type
+get_tensor_type(struct pipe_tensor *tensor, bool is_input)
+{
+   for (unsigned i = 0; i < NEUTRON_CUSTOM_MAX; i++) {
+      if (strcmp(tensor->name, neutron_tensor_name(i)) == 0)
+         return i;
+   }
+
+   return is_input ? NEUTRON_DATA_INPUT : NEUTRON_DATA_OUTPUT;
+}
+
+static bool
+is_custom_input(struct neutron_tensor *ntensor)
+{
+   return ntensor->type < NEUTRON_CUSTOM_INPUT_MAX;
+}
+
+static void
+create_tensor(struct neutron_subgraph *subgraph,
+              struct pipe_tensor *tensor, bool is_input)
+{
+   struct neutron_tensor ntensor = {0};
+
+   ntensor.type = get_tensor_type(tensor, is_input);
+   ntensor.size = get_tensor_size(tensor);
+   ntensor.tensor = tensor;
+
+   if (ntensor.type == NEUTRON_DATA_INPUT)
+      ntensor.id = subgraph->data_inputs++;
+   else if (ntensor.type == NEUTRON_DATA_OUTPUT)
+      ntensor.id = subgraph->data_outputs++;
+
+   util_dynarray_append(&subgraph->tensors, ntensor);
+}
+
+static unsigned
+get_header_array_size(struct neutron_subgraph *subgraph)
+{
+   unsigned num_tensors =
+      util_dynarray_num_elements(&subgraph->tensors, struct neutron_tensor);
+
+   return num_tensors + 4;
+}
+
+static unsigned
+get_buffer_size(struct neutron_subgraph *subgraph)
+{
+   unsigned size = 0;
+
+   util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor)
+      size += NEUTRON_ALIGN(ntensor->size);
+   size += subgraph->header.size;
+
+   return size;
+}
+
+static int
+get_header_index(struct neutron_subgraph *subgraph,
+                 struct neutron_tensor *ntensor)
+{
+   switch (ntensor->type) {
+   case NEUTRON_DATA_INPUT:
+      return ntensor->id;
+   case NEUTRON_WEIGHTS:
+      return subgraph->data_inputs;
+   case NEUTRON_DATA_OUTPUT:
+      return subgraph->data_inputs + 1 + ntensor->id;
+   case NEUTRON_KERNELS:
+      return subgraph->data_inputs + subgraph->data_outputs + 1;
+   case NEUTRON_SCRATCH:
+      return subgraph->data_inputs + subgraph->data_outputs + 2;
+   case NEUTRON_PROFILE:
+      /* 4 entries reserved */
+      return subgraph->data_inputs + subgraph->data_outputs + 7;
+   case NEUTRON_DEBUG:
+      return subgraph->data_inputs + subgraph->data_outputs + 8;
+   default:
+      return -1;
+   }
+}
+
+struct pipe_ml_subgraph *
+neutron_ml_subgraph_create(struct pipe_ml_device *pdevice,
+                           const struct pipe_ml_operation *poperations,
+                           unsigned count)
+{
+   struct neutron_screen *screen = neutron_ml_device_screen(pdevice);
+   struct neutron_ml_device *dev = neutron_ml_device(pdevice);
+   const struct pipe_ml_operation *operation = &poperations[0];
+   struct neutron_subgraph *subgraph;
+   uint32_t *offset_array;
+   unsigned crt_offset = 0;
+
+   if (count > 1) {
+      DBG("Expect a single NeutronGraph per partition!\n");
+      return NULL;
+   }
+
+   if (!dev->context)
+      dev->context = screen->pscreen.context_create(&screen->pscreen, NULL, 0);
+
+   subgraph = calloc(1, sizeof(*subgraph));
+   subgraph->base.device = pdevice;
+
+   subgraph->tensors = UTIL_DYNARRAY_INIT;
+   subgraph->header = UTIL_DYNARRAY_INIT;
+
+   for (unsigned i = 0; i < operation->input_count; i++)
+      create_tensor(subgraph, operation->input_tensors[i], true);
+   for (unsigned i = 0; i < operation->output_count; i++)
+      create_tensor(subgraph, operation->output_tensors[i], false);
+
+   if (!util_dynarray_resize(&subgraph->header, uint32_t,
+                             get_header_array_size(subgraph)))
+      return NULL;
+   memset(util_dynarray_begin(&subgraph->header), 0, subgraph->header.size);
+
+   subgraph->bo = pipe_buffer_create(dev->context->screen, 0,
+                                     PIPE_USAGE_DEFAULT,
+                                     get_buffer_size(subgraph));
+   if (!subgraph->bo)
+      return NULL;
+
+   if (DBG_ENABLED(NEUTRON_DBG_ZERO)) {
+      struct pipe_transfer *transfer;
+      struct neutron_resource *rsc = neutron_resource(subgraph->bo);
+
+      uint8_t *buf = pipe_buffer_map(dev->context, subgraph->bo,
+                                     PIPE_MAP_WRITE, &transfer);
+      memset(buf, 0, pipe_buffer_size(subgraph->bo));
+      pipe_buffer_unmap(dev->context, transfer);
+   }
+
+   DBG("%s: Tensor list:\n", __func__);
+   DBG("%16s %3s %6s %8s %8s %8s\n", "TYPE", "ID", "INDEX", "OFFSET", "SIZE", "HDR_IDX");
+
+   offset_array = util_dynarray_begin(&subgraph->header);
+   crt_offset = NEUTRON_ALIGN(subgraph->header.size);
+
+   /* Translate all tensors into buffer sections */
+   util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) {
+      unsigned header_index = get_header_index(subgraph, ntensor);
+
+      /* Microcode offset passed via ioctl, all others stored in header */
+      if (ntensor->type == NEUTRON_MICROCODE)
+         subgraph->microcode_offset = crt_offset;
+      else
+         offset_array[header_index] = crt_offset;
+
+      ntensor->offset = crt_offset;
+      crt_offset += NEUTRON_ALIGN(ntensor->size);
+
+      /* Custom inputs are written in the buffer during setup */
+      if (is_custom_input(ntensor)) {
+         pipe_buffer_write(dev->context, subgraph->bo, ntensor->offset,
+                           ntensor->size, ntensor->tensor->data);
+
+         if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS))
+            neutron_dump_tensor(subgraph->bo, ntensor, 0);
+      }
+
+      DBG("%16s %3d %6d %8d %8d %2d\n", neutron_tensor_name(ntensor->type),
+          ntensor->id, ntensor->tensor->index, ntensor->offset, ntensor->size,
+          header_index);
+   }
+
+   pipe_buffer_write(dev->context, subgraph->bo, 0, subgraph->header.size,
+                     util_dynarray_begin(&subgraph->header));
+
+   if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS)) {
+      uint8_t *buf = neutron_resource(subgraph->bo)->map;
+      neutron_dump_buffer(buf, "header", 0, 0, subgraph->header.size);
+   }
+
+   return &subgraph->base;
+}
+
+static struct neutron_tensor *
+get_tensor_by_id(struct neutron_subgraph *subgraph,
+                 enum neutron_tensor_type type, unsigned index)
+{
+   util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) {
+      if (ntensor->type == type && ntensor->tensor->index == index)
+         return ntensor;
+   }
+   return NULL;
+}
+
+void
+neutron_ml_subgraph_invoke(struct pipe_context *pcontext,
+                           struct pipe_ml_subgraph *psubgraph,
+                           unsigned inputs_count, unsigned input_idxs[],
+                           void *inputs[], bool is_signed[])
+{
+   struct neutron_screen *screen = neutron_screen(pcontext->screen);
+   struct neutron_context *context = neutron_context(pcontext);
+   struct neutron_subgraph *subgraph = (struct neutron_subgraph *)(psubgraph);
+   struct drm_neutron_submit_job submit = {0};
+   int ret;
+
+   for (unsigned i = 0; i < inputs_count; i++) {
+      struct neutron_tensor *ntensor =
+         get_tensor_by_id(subgraph, NEUTRON_DATA_INPUT, input_idxs[i]);
+
+      DBG("Prepare input tensor: idx=%d, offset=%d, size=%d\n",
+          input_idxs[i], ntensor->offset, ntensor->size);
+      pipe_buffer_write(pcontext, subgraph->bo, ntensor->offset,
+                        ntensor->size, inputs[i]);
+
+      if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS))
+         neutron_dump_tensor(subgraph->bo, ntensor, i);
+   }
+
+   /* Transfer ownership of output sections to device */
+   util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) {
+      if (ntensor->type == NEUTRON_DATA_OUTPUT) {
+         struct pipe_transfer *transfer;
+         pipe_buffer_map_range(pcontext, subgraph->bo, ntensor->offset,
+                               ntensor->size, PIPE_MAP_WRITE, &transfer);
+         pipe_buffer_unmap(pcontext, transfer);
+      }
+   }
+
+   submit.type = DRM_NEUTRON_JOB_INFERENCE;
+   submit.bo_handle = neutron_resource(subgraph->bo)->handle;
+   submit.syncobj_handle = context->syncobj_handle;
+   submit.inference.microcode_offset = subgraph->microcode_offset;
+   submit.inference.tensor_offset = 0;
+   submit.inference.tensor_count =
+      util_dynarray_num_elements(&subgraph->header, uint32_t);
+
+   DBG("Submitting job\n");
+   ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_SUBMIT_JOB, &submit);
+   assert(ret == 0);
+}
+
+void
+neutron_ml_subgraph_read_outputs(struct pipe_context *pcontext,
+                                 struct pipe_ml_subgraph *psubgraph,
+                                 unsigned outputs_count,
+                                 unsigned output_idxs[], void *outputsv[],
+                                 bool is_signed[])
+{
+   struct neutron_subgraph *subgraph = (struct neutron_subgraph *)(psubgraph);
+   struct neutron_context *context = neutron_context(pcontext);
+   struct neutron_screen *screen = neutron_screen(pcontext->screen);
+   int ret;
+
+   ret = drmSyncobjWait(screen->fd, &context->syncobj_handle, 1, INT64_MAX,
+                        DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, NULL);
+   assert(ret == 0);
+   DBG("Job finished\n");
+
+   for (unsigned i = 0; i < outputs_count; i++) {
+      struct neutron_tensor *ntensor =
+         get_tensor_by_id(subgraph, NEUTRON_DATA_OUTPUT, output_idxs[i]);
+
+      DBG("Read output tensor: idx=%d, offset=%d, size=%d\n",
+          output_idxs[i], ntensor->offset, ntensor->size);
+      pipe_buffer_read(pcontext, subgraph->bo, ntensor->offset,
+                       ntensor->size, outputsv[i]);
+
+      if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS))
+         neutron_dump_tensor(subgraph->bo, ntensor, i);
+   }
+}
+
+void
+neutron_ml_subgraph_destroy(struct pipe_ml_device *pdevice,
+                            struct pipe_ml_subgraph *psubgraph)
+{
+   struct pipe_context *pcontext = neutron_ml_device(pdevice)->context;
+   struct neutron_screen *screen = neutron_screen(pcontext->screen);
+   struct neutron_subgraph *subgraph = (struct neutron_subgraph *)(psubgraph);
+   struct drm_gem_close arg = {0};
+   int ret;
+
+   arg.handle = neutron_resource(subgraph->bo)->handle;
+   ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg);
+   assert(ret >= 0);
+
+   pipe_resource_reference(&subgraph->bo, NULL);
+   util_dynarray_fini(&subgraph->tensors);
+   util_dynarray_fini(&subgraph->header);
+
+   free(subgraph);
+}
--- a/src/gallium/drivers/neutron/neutron_ml.h
+++ b/src/gallium/drivers/neutron/neutron_ml.h
@ -0,0 +1,73 @@
+/*
+ * Copyright 2026 NXP
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef NEUTRON_ML_H
+#define NEUTRON_ML_H
+
+#include <util/u_dynarray.h>
+#include <util/u_math.h>
+
+#include "neutron_device.h"
+
+#define NEUTRON_ALIGN(value) align(value, 64)
+
+enum neutron_tensor_type {
+   NEUTRON_WEIGHTS = 0,
+   NEUTRON_MICROCODE,
+   NEUTRON_KERNELS,
+   NEUTRON_CUSTOM_INPUT_MAX,
+   NEUTRON_SCRATCH = NEUTRON_CUSTOM_INPUT_MAX,
+   NEUTRON_PROFILE,
+   NEUTRON_DEBUG,
+   NEUTRON_CUSTOM_MAX,
+   NEUTRON_DATA_INPUT = NEUTRON_CUSTOM_MAX,
+   NEUTRON_DATA_OUTPUT,
+};
+
+struct neutron_tensor {
+   struct pipe_tensor *tensor;
+   enum neutron_tensor_type type;
+   unsigned size;
+   unsigned offset;
+   unsigned id; /* For NEUTRON_DATA_INPUT and NEUTRON_DATA_OUTPUT */
+};
+
+struct neutron_subgraph {
+   struct pipe_ml_subgraph base;
+   struct pipe_resource *bo;
+   struct util_dynarray tensors; /* struct neutron_tensor */
+   struct util_dynarray header;
+   unsigned microcode_offset;
+   unsigned data_inputs;  /* number of non-custom input tensors */
+   unsigned data_outputs; /* number of non-custom output tensors */
+};
+
+bool
+neutron_ml_operation_supported(struct pipe_ml_device *pdevice,
+                               const struct pipe_ml_operation *operation);
+
+struct pipe_ml_subgraph *
+neutron_ml_subgraph_create(struct pipe_ml_device *pdevice,
+                           const struct pipe_ml_operation *poperations,
+                           unsigned count);
+
+void
+neutron_ml_subgraph_invoke(struct pipe_context *pcontext,
+                           struct pipe_ml_subgraph *psubgraph,
+                           unsigned inputs_count, unsigned input_idxs[],
+                           void *inputs[], bool is_signed[]);
+
+void
+neutron_ml_subgraph_read_outputs(struct pipe_context *pcontext,
+                                 struct pipe_ml_subgraph *psubgraph,
+                                 unsigned outputs_count,
+                                 unsigned output_idxs[], void *outputs[],
+                                 bool is_signed[]);
+
+void
+neutron_ml_subgraph_destroy(struct pipe_ml_device *pdevice,
+                            struct pipe_ml_subgraph *psubgraph);
+
+#endif /* NEUTRON_ML_H */
--- a/src/gallium/frontends/teflon/tfl_device.c
+++ b/src/gallium/frontends/teflon/tfl_device.c
@ -304,6 +304,11 @@ fill_operation(struct teflon_delegate *delegate, TfLiteContext *tf_context, TfLi
      operation->type = PIPE_ML_OPERATION_TYPE_QUANTIZE;
      break;
   }
+   case kTfLiteBuiltinCustom: {
+      operation->type = PIPE_ML_OPERATION_TYPE_CUSTOM;
+      operation->custom.name = node_registration->custom_name ? strdup(node_registration->custom_name) : NULL;
+      break;
+   }
   default:
      return false;
   }
@ -403,6 +408,8 @@ fill_tensor(struct teflon_delegate *delegate, TfLiteContext *tf_context, struct
         tensor->dims[out_dim] = 1;
   }

+   tensor->name = strdup(tf_tensor.name);
+
   if (tf_tensor.quantization.type == kTfLiteAffineQuantization) {
      const TfLiteAffineQuantization *quant = (const TfLiteAffineQuantization *)tf_tensor.quantization.params;
      tensor->scale = quant->scale->data[0];
@ -536,6 +543,9 @@ dump_graph(struct pipe_tensor *tensors, unsigned tensor_count, struct pipe_ml_op
      case PIPE_ML_OPERATION_TYPE_LEAKY_RELU:
         teflon_debug("%-15s ", "LEAKY_RELU");
         break;
+      case PIPE_ML_OPERATION_TYPE_CUSTOM:
+         teflon_debug("%-15s ", "CUSTOM");
+         break;
      }

      char *input_buf = ralloc_strdup(NULL, "");
@ -775,6 +785,8 @@ tflite_builtin_op_name(TfLiteBuiltinOperator op)
      return "TRANSPOSE";
   case kTfLiteBuiltinLeakyRelu:
      return "LEAKY_RELU";
+   case kTfLiteBuiltinCustom:
+      return "CUSTOM";
   default:
      return "unknown";
   }
@ -929,8 +941,12 @@ PrepareDelegate(TfLiteContext *tf_context, TfLiteDelegate *tf_delegate)
      }
      teflon_debug("\n");

-      if (supported)
+      if (supported) {
         supported_nodes->data[node_count++] = node_index;
+         // If custom node is claimed by teflon delegate, tell other delegates to skip it
+         if (registration->builtin_code == kTfLiteBuiltinCustom)
+            registration->builtin_code = kTfLiteBuiltinDelegate;
+      }
   }
   supported_nodes->size = node_count;

@ -994,7 +1010,8 @@ find_accel_device()

   for (int i = 0; i < n; i++) {
      if (strstr("rocket", devs[i]->driver_name) ||
-          strstr("ethosu", devs[i]->driver_name))
+          strstr("ethosu", devs[i]->driver_name) ||
+          strstr("neutron", devs[i]->driver_name))
         device = devs[i];
      else
         pipe_loader_release(&devs[i], 1);
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@ -1039,6 +1039,10 @@ struct pipe_tensor {
    * Zero-points used to quantize this tensor, per-axis quantization.
    */
   int *zero_points;
+   /**
+    * Tensor name
+    */
+   const char *name;
   /**
    * Whether the tensor contains data in INT8 or UINT8 format.
    */
@ -1072,6 +1076,7 @@ enum pipe_ml_operation_type {
   PIPE_ML_OPERATION_TYPE_QUANTIZE,
   PIPE_ML_OPERATION_TYPE_MAXIMUM,
   PIPE_ML_OPERATION_TYPE_MINIMUM,
+   PIPE_ML_OPERATION_TYPE_CUSTOM,
 };

 enum pipe_ml_pooling_type {
@ -1276,6 +1281,9 @@ struct pipe_ml_operation
      struct {
         float alpha;
      } leakyrelu;
+      struct {
+         char *name;
+      } custom;
   };
 };

--- a/src/gallium/meson.build
+++ b/src/gallium/meson.build
@ -196,6 +196,12 @@ if with_gallium_ethosu
 else
  driver_ethosu = declare_dependency()
 endif
+if with_gallium_neutron
+  subdir('winsys/neutron/drm')
+  subdir('drivers/neutron')
+else
+  driver_neutron = declare_dependency()
+endif
 if with_gallium_zink
  if not with_platform_windows
    subdir('winsys/zink/drm')
--- a/src/gallium/targets/dri/meson.build
+++ b/src/gallium/targets/dri/meson.build
@ -59,7 +59,7 @@ libgallium_dri = shared_library(
    driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv,
    driver_tegra, driver_i915, driver_svga, driver_virgl,
    driver_panfrost, driver_iris, driver_lima, driver_zink, driver_d3d12,
-    driver_asahi, driver_crocus, driver_rocket, driver_ethosu
+    driver_asahi, driver_crocus, driver_rocket, driver_ethosu, driver_neutron
  ],
  install : true,
  name_suffix : libname_suffix,
--- a/src/gallium/targets/teflon/meson.build
+++ b/src/gallium/targets/teflon/meson.build
@ -10,6 +10,7 @@ libteflon = shared_library(
    driver_etnaviv,
    driver_rocket,
    driver_ethosu,
+    driver_neutron,
    idep_nir,
    idep_mesautil,
  ],
--- a/src/gallium/targets/teflon/test_teflon.cpp
+++ b/src/gallium/targets/teflon/test_teflon.cpp
@ -25,8 +25,10 @@
 #define TEST_ADD             1
 #define TEST_FULLY_CONNECTED 1
 #define TEST_MODELS          1
+#define TEST_COMPILED_MODELS 1

 #define TOLERANCE 8
+#define COMPILED_MODELS_TOLERANCE 12

 std::vector<bool> is_signed{false}; /* TODO: Support INT8? */
 std::vector<bool> padding_same{false, true};
@ -41,7 +43,7 @@ std::vector<int> fc_channels{23, 46, 128, 256, 512};
 std::vector<int> fc_size{128, 1280, 25088, 62720};

 static void
-test_model(void *buf, size_t buf_size, std::string cache_dir, unsigned tolerance)
+run_and_validate(TfLiteModel *cpu_model, TfLiteModel *npu_model, std::string cache_dir, unsigned tolerance)
 {
   void **input = NULL;
   size_t num_inputs;
@ -51,11 +53,8 @@ test_model(void *buf, size_t buf_size, std::string cache_dir, unsigned tolerance
   size_t num_outputs;
   void **npu_output;

-   TfLiteModel *model = TfLiteModelCreate(buf, buf_size);
-   assert(model);
-
-   run_model(model, EXECUTOR_CPU, &input, &num_inputs, &cpu_output, &output_sizes, &output_types, &num_outputs, cache_dir);
-   run_model(model, EXECUTOR_NPU, &input, &num_inputs, &npu_output, &output_sizes, &output_types, &num_outputs, cache_dir);
+   run_model(cpu_model, EXECUTOR_CPU, &input, &num_inputs, &cpu_output, &output_sizes, &output_types, &num_outputs, cache_dir);
+   run_model(npu_model, EXECUTOR_NPU, &input, &num_inputs, &npu_output, &output_sizes, &output_types, &num_outputs, cache_dir);

   const char *dump_output = os_get_option("TEFLON_DUMP_OUTPUT");
   if (dump_output && atoi(dump_output) == 1) {
@ -221,6 +220,15 @@ test_model(void *buf, size_t buf_size, std::string cache_dir, unsigned tolerance

   free(output_sizes);
   free(output_types);
+}
+
+static void
+test_model(void *buf, size_t buf_size, std::string cache_dir, unsigned tolerance)
+{
+   TfLiteModel *model = TfLiteModelCreate(buf, buf_size);
+   assert(model);
+
+   run_and_validate(model, model, cache_dir, tolerance);

   TfLiteModelDelete(model);
 }
@ -249,6 +257,57 @@ test_model_file(std::string file_name, unsigned tolerance, bool use_cache)
   close(model_fd);
 }

+static void
+test_compiled_model(void *buf, size_t buf_size, void *cbuf, size_t cbuf_size,
+                    std::string cache_dir, unsigned tolerance)
+{
+   TfLiteModel *cpu_model = TfLiteModelCreate(buf, buf_size);
+   TfLiteModel *npu_model = TfLiteModelCreate(cbuf, cbuf_size);
+
+   assert(cpu_model);
+   assert(npu_model);
+
+   run_and_validate(cpu_model, npu_model, cache_dir, tolerance);
+
+   TfLiteModelDelete(cpu_model);
+   TfLiteModelDelete(npu_model);
+}
+
+static void
+test_compiled_model_file(std::string file_name, std::string compiled_file_name,
+                         unsigned tolerance, bool use_cache)
+{
+   std::ostringstream cache_dir;
+
+   if (use_cache) {
+      auto path = std::filesystem::path(file_name);
+      cache_dir << "/var/cache/teflon_tests/";
+      cache_dir << path.parent_path().filename().string();
+      cache_dir << "_";
+      cache_dir << path.stem().string();
+   }
+
+   srand(4);
+
+   struct stat sb, csb;
+   int model_fd = open(file_name.c_str(), O_RDONLY);
+   int cmodel_fd = open(compiled_file_name.c_str(), O_RDONLY);
+
+   fstat(model_fd, &sb);
+   fstat(cmodel_fd, &csb);
+
+   void *model_data = mmap(0, sb.st_size, PROT_READ, MAP_PRIVATE, model_fd, 0);
+   void *cmodel_data = mmap(0, csb.st_size, PROT_READ, MAP_PRIVATE, cmodel_fd, 0);
+
+   test_compiled_model(model_data, sb.st_size, cmodel_data, csb.st_size,
+                       cache_dir.str(), tolerance);
+
+   munmap(model_data, sb.st_size);
+   munmap(cmodel_data, csb.st_size);
+   close(model_fd);
+   close(cmodel_fd);
+}
+
 void
 test_conv(int input_size, int weight_size, int input_channels, int output_channels,
          int stride, bool padding_same, bool is_signed, bool depthwise, int seed)
@ -620,6 +679,66 @@ INSTANTIATE_TEST_SUITE_P(

 #endif

+#if TEST_COMPILED_MODELS
+
+class CompiledModels : public testing::TestWithParam<std::string> {};
+
+TEST_P(CompiledModels, Op)
+{
+   std::ostringstream file_path, cfile_path;
+   auto test_name = GetParam();
+   test_name.replace(test_name.find("_"), 1, "/");
+   assert(os_get_option("TEFLON_TEST_DATA"));
+   file_path << os_get_option("TEFLON_TEST_DATA") << "/models/" << test_name << ".tflite";
+   cfile_path << os_get_option("TEFLON_TEST_DATA") << "/compiled_models/" << test_name << ".tflite";
+
+   test_compiled_model_file(file_path.str(), cfile_path.str(), COMPILED_MODELS_TOLERANCE, true);
+}
+
+std::vector<std::string>
+get_compiled_model_files(void)
+{
+   assert(os_get_option("TEFLON_TEST_DATA"));
+   std::stringstream dir, cdir;
+   dir << os_get_option("TEFLON_TEST_DATA") << "/models";
+   cdir << os_get_option("TEFLON_TEST_DATA") << "/compiled_models";
+
+   std::vector<std::string> paths;
+   std::filesystem::recursive_directory_iterator b(dir.str());
+   for (auto const &f : b) {
+      if (f.path().extension() != ".tflite")
+         continue;
+
+      std::filesystem::path relative_path = std::filesystem::relative(f.path(), dir.str());
+      std::filesystem::path compiled_path = std::filesystem::path(cdir.str()) / relative_path;
+      if (!std::filesystem::exists(compiled_path))
+         continue;
+
+      std::stringstream path;
+      path << f.path().parent_path().filename().string();
+      path << "_" << f.path().stem().string();
+      paths.push_back(path.str());
+   }
+
+   std::sort(paths.begin(), paths.end());
+
+   return paths;
+}
+
+static inline std::string
+CompiledModelsTestCaseName(
+   const testing::TestParamInfo<std::string> &info)
+{
+   return info.param;
+}
+
+INSTANTIATE_TEST_SUITE_P(
+   , CompiledModels,
+   ::testing::ValuesIn(get_compiled_model_files()),
+   CompiledModelsTestCaseName);
+
+#endif
+
 int
 main(int argc, char **argv)
 {
@ -656,6 +775,8 @@ main(int argc, char **argv)
      return 0;
   } else if (argc > 1 && !strcmp(argv[1], "run_model")) {
      test_model_file(std::string(argv[2]), TOLERANCE, false);
+   } else if (argc > 1 && !strcmp(argv[1], "run_compiled_model")) {
+      test_compiled_model_file(std::string(argv[2]), std::string(argv[3]), COMPILED_MODELS_TOLERANCE, false);
   } else {
      testing::InitGoogleTest(&argc, argv);
      return RUN_ALL_TESTS();
--- a/src/gallium/winsys/neutron/drm/meson.build
+++ b/src/gallium/winsys/neutron/drm/meson.build
@ -0,0 +1,13 @@
+# Copyright 2017 Broadcom
+# SPDX-License-Identifier: MIT
+
+libneutronwinsys = static_library(
+  'neutronwinsys',
+  files('neutron_drm_winsys.c'),
+  include_directories : [
+    inc_src, inc_include,
+    inc_gallium, inc_gallium_aux, inc_gallium_drivers,
+  ],
+  gnu_symbol_visibility : 'hidden',
+  dependencies: [dep_libdrm, idep_mesautil],
+)
--- a/src/gallium/winsys/neutron/drm/neutron_drm_public.h
+++ b/src/gallium/winsys/neutron/drm/neutron_drm_public.h
@ -0,0 +1,17 @@
+/*
+ * Copyright 2014 Broadcom
+ * Copyright 2018 Alyssa Rosenzweig
+ * Copyright 2025 Tomeu Vizoso
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef __NEUTRON_DRM_PUBLIC_H__
+#define __NEUTRON_DRM_PUBLIC_H__
+
+struct pipe_screen;
+struct pipe_screen_config;
+
+struct pipe_screen *
+neutron_drm_screen_create(int drmFD, const struct pipe_screen_config *config);
+
+#endif /* __NEUTRON_DRM_PUBLIC_H__ */
--- a/src/gallium/winsys/neutron/drm/neutron_drm_winsys.c
+++ b/src/gallium/winsys/neutron/drm/neutron_drm_winsys.c
@ -0,0 +1,20 @@
+/*
+ * Copyright 2014 Broadcom
+ * Copyright 2018 Alyssa Rosenzweig
+ * Copyright 2025 Tomeu Vizoso
+ * Copyright 2026 NXP
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "util/os_file.h"
+#include "util/u_screen.h"
+
+#include "neutron/neutron_device.h"
+#include "neutron_drm_public.h"
+
+struct pipe_screen *
+neutron_drm_screen_create(int fd, const struct pipe_screen_config *config)
+{
+   return u_pipe_screen_lookup_or_create(os_dupfd_cloexec(fd), config, NULL,
+                                         neutron_screen_create);
+}