Add new Gallium driver for NXP's Neutron NPU

Add source code for the Neutron Gallium driver as well as some helper scripts for model compiling. The compiler can only be run on x86 for now. Signed-off-by: Ioana Ciocoi-Radulescu <ruxandra.radulescu@nxp.com>
2026-05-08 06:58:05 +02:00 · 2026-04-24 13:36:36 +03:00 · 2026-04-24 13:36:36 +03:00 · 291d1d44b1
commit 291d1d44b1
parent 37cb2a514f
20 changed files with 1621 additions and 3 deletions
--- a/.clang-format-include
+++ b/.clang-format-include
@ -3,6 +3,7 @@

 src/gallium/drivers/ethosu/**/*
 src/gallium/drivers/i915
+src/gallium/drivers/neutron/**/*
 src/gallium/drivers/r300/compiler/*
 src/gallium/drivers/rocket/**/*
 src/gallium/targets/teflon/**/*
--- a/include/drm-uapi/neutron_accel.h
+++ b/include/drm-uapi/neutron_accel.h
@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/* Copyright 2025-2026 NXP */
+
+#ifndef __NEUTRON_ACCEL_H__
+#define __NEUTRON_ACCEL_H__
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/**
+ * enum drm_neutron_ioctl - Neutron IOCTL IDs
+ *
+ * @DRM_NEUTRON_CREATE_BO: Create a buffer object
+ * @DRM_NEUTRON_SYNC_BO: Sync (parts of) the buffer object memory
+ * @DRM_NEUTRON_SUBMIT_JOB: Submit a job to the device
+ */
+enum drm_neutron_ioctl {
+	DRM_NEUTRON_CREATE_BO = 0,
+	DRM_NEUTRON_SYNC_BO,
+	DRM_NEUTRON_SUBMIT_JOB,
+};
+
+/**
+ * struct drm_neutron_create_bo - Create a buffer object and return buffer
+ *                                info to user
+ *
+ * @size: Size in bytes of requested buffer. May be updated by driver
+ *        if allocated size different than requested
+ * @handle: Returned handle for the new buffer object
+ * @pad: MBZ
+ * @map_offset: Returned offset for mmap() calls
+ */
+struct drm_neutron_create_bo {
+	__u64 size;
+	__u32 handle;
+	__u32 pad;
+	__u64 map_offset;
+};
+
+/**
+ * enum drm_neutron_sync_dir - Direction of buffer object synchronization
+ *
+ * @DRM_NEUTRON_SYNC_TO_DEVICE: Sync from CPU to device
+ * @DRM_NEUTRON_SYNC_FROM_DEVICE: Sync from device to CPU
+ */
+enum drm_neutron_sync_dir {
+	DRM_NEUTRON_SYNC_TO_DEVICE = 0,
+	DRM_NEUTRON_SYNC_FROM_DEVICE,
+};
+
+/**
+ * struct drm_neutron_sync_bo - Sync buffer object memory
+ *
+ * @handle: Handle of buffer object to sync
+ * @direction: Direction of sync, can be one of enum drm_neutron_sync_dir
+ * @size: Size of the memory to sync, in bytes
+ * @offset: Offset inside the buffer, in bytes
+ */
+struct drm_neutron_sync_bo {
+	__u32 handle;
+	__u32 direction;
+	__u64 size;
+	__u64 offset;
+};
+
+/**
+ * enum drm_neutron_job_type - Type of job to submit to Neutron device
+ *
+ * @DRM_NEUTRON_JOB_INFERENCE: Inference job
+ */
+enum drm_neutron_job_type {
+	DRM_NEUTRON_JOB_INFERENCE = 0,
+};
+
+/**
+ * struct drm_neutron_inference_job - Inference job descriptor
+ *
+ * @tensor_offset: Offset of tensor array inside job BO
+ * @microcode_offset: Microcode offset inside BO
+ * @tensor_count: Number of valid tensors
+ * @pad: MBZ
+ */
+struct drm_neutron_inference_job {
+	__u32 tensor_offset;
+	__u32 microcode_offset;
+	__u32 tensor_count;
+	__u32 pad[5];
+};
+
+/**
+ * struct drm_neutron_submit_job - Submit a job to Neutron device
+ *
+ * @type: Job type, one of enum drm_neutron_job_type
+ * @bo_handle: BO handle for this job
+ * @inference: Inference job descriptor (when type is DRM_NEUTRON_JOB_INFERENCE)
+ * @reserved: Reserved for future job types
+ * @syncobj_handle: Handle of syncobj on which user waits for job completion
+ * @pad: MBZ
+ */
+struct drm_neutron_submit_job {
+	__u32 type;
+	__u32 bo_handle;
+	union {
+		struct drm_neutron_inference_job inference;
+		__u32 reserved[8];
+	};
+	__u32 syncobj_handle;
+	__u32 pad;
+};
+
+#define DRM_IOCTL_NEUTRON_CREATE_BO \
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_NEUTRON_CREATE_BO, \
+		 struct drm_neutron_create_bo)
+
+#define DRM_IOCTL_NEUTRON_SYNC_BO \
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_NEUTRON_SYNC_BO, \
+		 struct drm_neutron_sync_bo)
+
+#define DRM_IOCTL_NEUTRON_SUBMIT_JOB \
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_NEUTRON_SUBMIT_JOB, \
+		 struct drm_neutron_submit_job)
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* __NEUTRON_ACCEL_H__ */
--- a/meson.build
+++ b/meson.build
@ -205,7 +205,7 @@ elif gallium_drivers.contains('all')
   gallium_drivers = [
     'r300', 'r600', 'radeonsi', 'crocus', 'v3d', 'vc4', 'freedreno', 'etnaviv', 'i915',
     'nouveau', 'svga', 'tegra', 'virgl', 'lima', 'panfrost', 'llvmpipe', 'softpipe', 'iris',
-     'zink', 'd3d12', 'asahi', 'rocket', 'ethosu'
+     'zink', 'd3d12', 'asahi', 'rocket', 'ethosu', 'neutron'
   ]
 endif

@ -234,6 +234,7 @@ with_gallium_d3d12 = gallium_drivers.contains('d3d12')
 with_gallium_asahi = gallium_drivers.contains('asahi')
 with_gallium_rocket = gallium_drivers.contains('rocket')
 with_gallium_ethosu = gallium_drivers.contains('ethosu')
+with_gallium_neutron = gallium_drivers.contains('neutron')
 foreach gallium_driver : gallium_drivers
  pre_args += '-DHAVE_@0@'.format(gallium_driver.to_upper())
 endforeach
--- a/meson.options
+++ b/meson.options
@ -87,7 +87,7 @@ option(
  choices : [
    'all', 'auto',
    'asahi', 'crocus', 'd3d12', 'ethosu', 'etnaviv', 'freedreno', 'i915', 'iris',
-    'lima', 'llvmpipe', 'nouveau', 'panfrost', 'r300', 'r600', 'radeonsi',
+    'lima', 'llvmpipe', 'neutron', 'nouveau', 'panfrost', 'r300', 'r600', 'radeonsi',
    'rocket', 'softpipe', 'svga', 'tegra', 'v3d', 'vc4', 'virgl', 'zink',
  ],
  description : 'List of gallium drivers to build. If this is set to auto ' +
--- a/src/gallium/drivers/neutron/.clang-format
+++ b/src/gallium/drivers/neutron/.clang-format
@ -0,0 +1,2 @@
+BasedOnStyle: InheritParentConfig
+DisableFormat: false
--- a/src/gallium/drivers/neutron/ci/neutron-imx95-fails.txt
+++ b/src/gallium/drivers/neutron/ci/neutron-imx95-fails.txt
@ -0,0 +1,102 @@
+CompiledModels.Op/efficientdet_efficientdet_tflite_lite0_int8_v1,Fail
+CompiledModels.Op/inception_002,Fail
+CompiledModels.Op/inception_003,Fail
+CompiledModels.Op/inception_008,Fail
+CompiledModels.Op/inception_013,Fail
+CompiledModels.Op/inception_015,Fail
+CompiledModels.Op/inception_016,Fail
+CompiledModels.Op/inception_021,Fail
+CompiledModels.Op/inception_024,Fail
+CompiledModels.Op/inception_027,Fail
+CompiledModels.Op/inception_030,Fail
+CompiledModels.Op/inception_032,Fail
+CompiledModels.Op/inception_033,Fail
+CompiledModels.Op/inception_041,Fail
+CompiledModels.Op/inception_055,Fail
+CompiledModels.Op/inception_059,Fail
+CompiledModels.Op/inception_063,Fail
+CompiledModels.Op/inception_064,Fail
+CompiledModels.Op/inception_071,Fail
+CompiledModels.Op/inception_073,Fail
+CompiledModels.Op/inception_075,Fail
+CompiledModels.Op/inception_077,Fail
+CompiledModels.Op/mobiledet_002,Fail
+CompiledModels.Op/mobiledet_004,Fail
+CompiledModels.Op/mobiledet_006,Fail
+CompiledModels.Op/mobiledet_009,Fail
+CompiledModels.Op/mobiledet_012,Fail
+CompiledModels.Op/mobiledet_015,Fail
+CompiledModels.Op/mobiledet_017,Fail
+CompiledModels.Op/mobiledet_020,Fail
+CompiledModels.Op/mobiledet_029,Fail
+CompiledModels.Op/mobiledet_039,Fail
+CompiledModels.Op/mobiledet_042,Fail
+CompiledModels.Op/mobiledet_046,Fail
+CompiledModels.Op/mobiledet_050,Fail
+CompiledModels.Op/mobiledet_063,Fail
+CompiledModels.Op/mobiledet_086,Fail
+CompiledModels.Op/mobiledet_087,Fail
+CompiledModels.Op/mobiledet_106,Fail
+CompiledModels.Op/mobiledet_119,Fail
+CompiledModels.Op/mobiledet_ssdlite_mobiledet_coco_qat_postprocess,Fail
+CompiledModels.Op/mobilenetv1_001,Fail
+CompiledModels.Op/mobilenetv1_002,Fail
+CompiledModels.Op/mobilenetv1_003,Fail
+CompiledModels.Op/mobilenetv1_005,Fail
+CompiledModels.Op/mobilenetv1_006,Fail
+CompiledModels.Op/mobilenetv1_008,Fail
+CompiledModels.Op/mobilenetv1_009,Fail
+CompiledModels.Op/mobilenetv1_012,Fail
+CompiledModels.Op/mobilenetv1_016,Fail
+CompiledModels.Op/mobilenetv1_018,Fail
+CompiledModels.Op/mobilenetv1_020,Fail
+CompiledModels.Op/mobilenetv1_025,Fail
+CompiledModels.Op/mobilenetv1_026,Fail
+CompiledModels.Op/mobilenetv1_mobilenet_v1_1_224_quant,Fail
+CompiledModels.Op/mobilenetv2_001,Fail
+CompiledModels.Op/mobilenetv2_003,Fail
+CompiledModels.Op/mobilenetv2_006,Fail
+CompiledModels.Op/mobilenetv2_007,Fail
+CompiledModels.Op/mobilenetv2_010,Fail
+CompiledModels.Op/mobilenetv2_014,Fail
+CompiledModels.Op/mobilenetv2_018,Fail
+CompiledModels.Op/mobilenetv2_025,Fail
+CompiledModels.Op/mobilenetv2_029,Fail
+CompiledModels.Op/mobilenetv2_040,Fail
+CompiledModels.Op/mobilenetv2_044,Fail
+CompiledModels.Op/mobilenetv2_059,Fail
+CompiledModels.Op/mobilenetv2_061,Fail
+CompiledModels.Op/movenetlightning_103,Fail
+CompiledModels.Op/movenetlightning_104,Fail
+CompiledModels.Op/movenetthunder_103,Fail
+CompiledModels.Op/movenetthunder_104,Fail
+CompiledModels.Op/ssdmobilenetv2_001,Fail
+CompiledModels.Op/ssdmobilenetv2_003,Fail
+CompiledModels.Op/ssdmobilenetv2_007,Fail
+CompiledModels.Op/ssdmobilenetv2_040,Fail
+CompiledModels.Op/ssdmobilenetv2_047,Fail
+CompiledModels.Op/ssdmobilenetv2_052,Fail
+CompiledModels.Op/ssdmobilenetv2_054,Fail
+CompiledModels.Op/ssdmobilenetv2_065,Fail
+CompiledModels.Op/ssdmobilenetv2_069,Fail
+CompiledModels.Op/ssdmobilenetv2_072,Fail
+CompiledModels.Op/ssdmobilenetv2_073,Fail
+CompiledModels.Op/ssdmobilenetv2_077,Fail
+CompiledModels.Op/ssdmobilenetv2_081,Fail
+CompiledModels.Op/ssdmobilenetv2_089,Fail
+CompiledModels.Op/ssdmobilenetv2_096,Fail
+CompiledModels.Op/ssdmobilenetv2_102,Fail
+CompiledModels.Op/ssdmobilenetv2_ssd_mobilenet_v2_coco_quant_postprocess,Fail
+CompiledModels.Op/yolox_001,Fail
+CompiledModels.Op/yolox_004,Fail
+CompiledModels.Op/yolox_027,Fail
+CompiledModels.Op/yolox_042,Fail
+CompiledModels.Op/yolox_077,Fail
+CompiledModels.Op/yolox_086,Fail
+CompiledModels.Op/yolox_102,Fail
+CompiledModels.Op/yolox_112,Fail
+CompiledModels.Op/yolox_122,Fail
+CompiledModels.Op/yolox_132,Fail
+CompiledModels.Op/yolox_147,Fail
+CompiledModels.Op/yolox_yolox_nano_full_integer_quant,Fail
+
--- a/src/gallium/drivers/neutron/ci/neutron-imx95-flakes.txt
+++ b/src/gallium/drivers/neutron/ci/neutron-imx95-flakes.txt
--- a/src/gallium/drivers/neutron/ci/neutron-imx95-skips.txt
+++ b/src/gallium/drivers/neutron/ci/neutron-imx95-skips.txt
@ -0,0 +1,227 @@
+# Standard (un-compiled) tflite models are not supported on Neutron
+Add.Op/.*
+AddQuant.Op/.*
+Conv2D.Op/.*
+DepthwiseConv2D.Op/.*
+FullyConnected.Op/.*
+
+Models.Op/.*
+
+# Compilation error due to duplicate/orphan input tensor
+CompiledModels.Op/movenetlightning_117
+CompiledModels.Op/movenetlightning_120
+CompiledModels.Op/movenetthunder_117
+CompiledModels.Op/movenetthunder_120
+
+# Conversion rate zero (operators or operator + tensor type/size combo
+# not supported yet by Neutron compiler)
+CompiledModels.Op/efficientdet_066
+CompiledModels.Op/efficientdet_101
+CompiledModels.Op/efficientdet_136
+CompiledModels.Op/efficientdet_251
+CompiledModels.Op/efficientdet_252
+CompiledModels.Op/efficientdet_253
+CompiledModels.Op/efficientdet_254
+CompiledModels.Op/efficientdet_255
+CompiledModels.Op/efficientdet_256
+CompiledModels.Op/efficientdet_257
+CompiledModels.Op/efficientdet_258
+CompiledModels.Op/efficientdet_259
+CompiledModels.Op/efficientdet_260
+CompiledModels.Op/efficientdet_261
+CompiledModels.Op/efficientdet_262
+CompiledModels.Op/efficientdet_263
+CompiledModels.Op/efficientdet_264
+CompiledModels.Op/efficientdet_265
+CompiledModels.Op/inception_082
+CompiledModels.Op/micronetlarge_013
+CompiledModels.Op/mobiledet_097
+CompiledModels.Op/mobiledet_099
+CompiledModels.Op/mobiledet_101
+CompiledModels.Op/mobiledet_103
+CompiledModels.Op/mobiledet_105
+CompiledModels.Op/mobiledet_107
+CompiledModels.Op/mobiledet_109
+CompiledModels.Op/mobiledet_111
+CompiledModels.Op/mobiledet_113
+CompiledModels.Op/mobiledet_115
+CompiledModels.Op/mobiledet_117
+CompiledModels.Op/mobiledet_118
+CompiledModels.Op/mobiledet_122
+CompiledModels.Op/mobiledet_123
+CompiledModels.Op/mobilenetv1_030
+CompiledModels.Op/movenetlightning_000
+CompiledModels.Op/movenetlightning_001
+CompiledModels.Op/movenetlightning_002
+CompiledModels.Op/movenetlightning_003
+CompiledModels.Op/movenetlightning_071
+CompiledModels.Op/movenetlightning_075
+CompiledModels.Op/movenetlightning_079
+CompiledModels.Op/movenetlightning_087
+CompiledModels.Op/movenetlightning_088
+CompiledModels.Op/movenetlightning_089
+CompiledModels.Op/movenetlightning_090
+CompiledModels.Op/movenetlightning_091
+CompiledModels.Op/movenetlightning_092
+CompiledModels.Op/movenetlightning_093
+CompiledModels.Op/movenetlightning_094
+CompiledModels.Op/movenetlightning_095
+CompiledModels.Op/movenetlightning_096
+CompiledModels.Op/movenetlightning_097
+CompiledModels.Op/movenetlightning_098
+CompiledModels.Op/movenetlightning_099
+CompiledModels.Op/movenetlightning_105
+CompiledModels.Op/movenetlightning_112
+CompiledModels.Op/movenetlightning_113
+CompiledModels.Op/movenetlightning_114
+CompiledModels.Op/movenetlightning_115
+CompiledModels.Op/movenetlightning_116
+CompiledModels.Op/movenetlightning_118
+CompiledModels.Op/movenetlightning_119
+CompiledModels.Op/movenetlightning_122
+CompiledModels.Op/movenetlightning_123
+CompiledModels.Op/movenetlightning_124
+CompiledModels.Op/movenetlightning_125
+CompiledModels.Op/movenetlightning_126
+CompiledModels.Op/movenetlightning_127
+CompiledModels.Op/movenetlightning_128
+CompiledModels.Op/movenetlightning_129
+CompiledModels.Op/movenetlightning_130
+CompiledModels.Op/movenetlightning_131
+CompiledModels.Op/movenetlightning_132
+CompiledModels.Op/movenetlightning_133
+CompiledModels.Op/movenetlightning_134
+CompiledModels.Op/movenetlightning_135
+CompiledModels.Op/movenetlightning_136
+CompiledModels.Op/movenetlightning_137
+CompiledModels.Op/movenetlightning_138
+CompiledModels.Op/movenetlightning_139
+CompiledModels.Op/movenetlightning_140
+CompiledModels.Op/movenetlightning_141
+CompiledModels.Op/movenetlightning_142
+CompiledModels.Op/movenetlightning_143
+CompiledModels.Op/movenetlightning_144
+CompiledModels.Op/movenetlightning_145
+CompiledModels.Op/movenetlightning_147
+CompiledModels.Op/movenetlightning_149
+CompiledModels.Op/movenetlightning_150
+CompiledModels.Op/movenetlightning_151
+CompiledModels.Op/movenetlightning_152
+CompiledModels.Op/movenetlightning_153
+CompiledModels.Op/movenetlightning_154
+CompiledModels.Op/movenetlightning_155
+CompiledModels.Op/movenetlightning_156
+CompiledModels.Op/movenetthunder_000
+CompiledModels.Op/movenetthunder_001
+CompiledModels.Op/movenetthunder_002
+CompiledModels.Op/movenetthunder_003
+CompiledModels.Op/movenetthunder_071
+CompiledModels.Op/movenetthunder_075
+CompiledModels.Op/movenetthunder_079
+CompiledModels.Op/movenetthunder_087
+CompiledModels.Op/movenetthunder_088
+CompiledModels.Op/movenetthunder_089
+CompiledModels.Op/movenetthunder_090
+CompiledModels.Op/movenetthunder_091
+CompiledModels.Op/movenetthunder_092
+CompiledModels.Op/movenetthunder_093
+CompiledModels.Op/movenetthunder_094
+CompiledModels.Op/movenetthunder_095
+CompiledModels.Op/movenetthunder_096
+CompiledModels.Op/movenetthunder_097
+CompiledModels.Op/movenetthunder_098
+CompiledModels.Op/movenetthunder_099
+CompiledModels.Op/movenetthunder_105
+CompiledModels.Op/movenetthunder_112
+CompiledModels.Op/movenetthunder_113
+CompiledModels.Op/movenetthunder_114
+CompiledModels.Op/movenetthunder_115
+CompiledModels.Op/movenetthunder_116
+CompiledModels.Op/movenetthunder_118
+CompiledModels.Op/movenetthunder_119
+CompiledModels.Op/movenetthunder_122
+CompiledModels.Op/movenetthunder_123
+CompiledModels.Op/movenetthunder_124
+CompiledModels.Op/movenetthunder_125
+CompiledModels.Op/movenetthunder_126
+CompiledModels.Op/movenetthunder_127
+CompiledModels.Op/movenetthunder_128
+CompiledModels.Op/movenetthunder_129
+CompiledModels.Op/movenetthunder_130
+CompiledModels.Op/movenetthunder_131
+CompiledModels.Op/movenetthunder_132
+CompiledModels.Op/movenetthunder_133
+CompiledModels.Op/movenetthunder_134
+CompiledModels.Op/movenetthunder_135
+CompiledModels.Op/movenetthunder_136
+CompiledModels.Op/movenetthunder_137
+CompiledModels.Op/movenetthunder_138
+CompiledModels.Op/movenetthunder_139
+CompiledModels.Op/movenetthunder_140
+CompiledModels.Op/movenetthunder_141
+CompiledModels.Op/movenetthunder_142
+CompiledModels.Op/movenetthunder_143
+CompiledModels.Op/movenetthunder_144
+CompiledModels.Op/movenetthunder_145
+CompiledModels.Op/movenetthunder_147
+CompiledModels.Op/movenetthunder_149
+CompiledModels.Op/movenetthunder_150
+CompiledModels.Op/movenetthunder_151
+CompiledModels.Op/movenetthunder_152
+CompiledModels.Op/movenetthunder_153
+CompiledModels.Op/movenetthunder_154
+CompiledModels.Op/movenetthunder_155
+CompiledModels.Op/movenetthunder_156
+CompiledModels.Op/ssdmobilenetv2_049
+CompiledModels.Op/ssdmobilenetv2_051
+CompiledModels.Op/ssdmobilenetv2_067
+CompiledModels.Op/ssdmobilenetv2_068
+CompiledModels.Op/ssdmobilenetv2_070
+CompiledModels.Op/ssdmobilenetv2_075
+CompiledModels.Op/ssdmobilenetv2_076
+CompiledModels.Op/ssdmobilenetv2_078
+CompiledModels.Op/ssdmobilenetv2_083
+CompiledModels.Op/ssdmobilenetv2_084
+CompiledModels.Op/ssdmobilenetv2_086
+CompiledModels.Op/ssdmobilenetv2_091
+CompiledModels.Op/ssdmobilenetv2_092
+CompiledModels.Op/ssdmobilenetv2_094
+CompiledModels.Op/ssdmobilenetv2_099
+CompiledModels.Op/ssdmobilenetv2_100
+CompiledModels.Op/ssdmobilenetv2_101
+CompiledModels.Op/ssdmobilenetv2_107
+CompiledModels.Op/ssdmobilenetv2_108
+CompiledModels.Op/ssdmobilenetv2_109
+CompiledModels.Op/yolox_000
+CompiledModels.Op/yolox_003
+CompiledModels.Op/yolox_012
+CompiledModels.Op/yolox_103
+CompiledModels.Op/yolox_104
+CompiledModels.Op/yolox_113
+CompiledModels.Op/yolox_114
+CompiledModels.Op/yolox_123
+CompiledModels.Op/yolox_124
+CompiledModels.Op/yolox_125
+CompiledModels.Op/yolox_126
+CompiledModels.Op/yolox_127
+CompiledModels.Op/yolox_128
+CompiledModels.Op/yolox_129
+CompiledModels.Op/yolox_130
+CompiledModels.Op/yolox_131
+CompiledModels.Op/yolox_133
+CompiledModels.Op/yolox_134
+CompiledModels.Op/yolox_135
+CompiledModels.Op/yolox_136
+CompiledModels.Op/yolox_137
+CompiledModels.Op/yolox_139
+CompiledModels.Op/yolox_140
+CompiledModels.Op/yolox_141
+CompiledModels.Op/yolox_142
+CompiledModels.Op/yolox_143
+CompiledModels.Op/yolox_145
+CompiledModels.Op/yolox_146
+CompiledModels.Op/yolox_148
+CompiledModels.Op/yolox_149
+CompiledModels.Op/yolox_151
+CompiledModels.Op/yolox_152
+CompiledModels.Op/yolox_153
--- a/src/gallium/drivers/neutron/compile_model.py
+++ b/src/gallium/drivers/neutron/compile_model.py
@ -0,0 +1,58 @@
+# Copyright 2026 NXP
+# SPDX-License-Identifier: MIT
+
+"""
+Compile a TFLite model for the Neutron NPU.
+
+Usage: python compile_model.py <input.tflite> <output.tflite>
+
+Exit codes:
+    0: Success
+    1: Converter internal error
+    2: Zero conversion rate (model unchanged)
+    3: Invalid arguments or input file
+"""
+
+import sys
+import os
+from eiq_neutron_sdk import neutron_converter    # from https://eiq.nxp.com/repository
+
+# Only one platform supported for now
+TARGET = "imx95"
+
+
+def main():
+    if len(sys.argv) != 3:
+        print("Usage: python compile_model.py <input.tflite> <output.tflite>")
+        sys.exit(3)
+
+    input_file = sys.argv[1]
+    output_file = sys.argv[2]
+
+    if not os.path.isfile(input_file):
+        print(f"Error: Input file '{input_file}' does not exist")
+        sys.exit(3)
+
+    if not input_file.endswith('.tflite'):
+        print("Error: Input file must be a .tflite model")
+        sys.exit(3)
+
+    with open(input_file, 'rb') as f:
+        model_data = f.read()
+
+    try:
+        cmodel_data = neutron_converter.convertModel(list(model_data), TARGET)
+    except Exception as e:
+        print(f"Error during model conversion: {e}")
+        sys.exit(1)
+
+    if bytes(cmodel_data) == model_data:
+        print(f"Warning: Conversion rate was zero for {input_file}, skipping output")
+        sys.exit(2)
+
+    with open(output_file, 'wb') as f:
+        f.write(bytes(cmodel_data))
+
+
+if __name__ == "__main__":
+    main()
--- a/src/gallium/drivers/neutron/meson.build
+++ b/src/gallium/drivers/neutron/meson.build
@ -0,0 +1,20 @@
+# Copyright 2019 Google, Inc
+# SPDX-License-Identifier: MIT
+
+files_neutron = files(
+  'neutron_device.c',
+  'neutron_ml.c',
+)
+
+libneutron = static_library(
+  'neutron',
+  files_neutron,
+  include_directories : [inc_gallium_aux, inc_gallium, inc_include, inc_src],
+  gnu_symbol_visibility : 'hidden',
+  dependencies : [idep_mesautil, dep_libdrm],
+)
+
+driver_neutron = declare_dependency(
+  compile_args : '-DGALLIUM_NEUTRON',
+  link_with : [libneutronwinsys, libneutron]
+)
--- a/src/gallium/drivers/neutron/neutron_compiler.py
+++ b/src/gallium/drivers/neutron/neutron_compiler.py
@ -0,0 +1,219 @@
+# Copyright 2026 NXP
+# SPDX-License-Identifier: MIT
+
+"""
+Batch compiler of TFLite models for NXP's Neutron NPU
+
+Usage modes (mutually exclusive):
+neutron_compiler.py --in <input.tflite> --out <output.tflite>
+neutron_compiler.py --in-dir <input_dir> --out-dir <output_dir>
+neutron_compiler.py --teflon-test-models
+"""
+
+import sys
+import os
+import argparse
+import subprocess
+
+def parse_args(argv):
+    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+
+    mode_group = parser.add_mutually_exclusive_group(required=True)
+    mode_group.add_argument('--in',
+                            dest='input_file',
+                            action='store',
+                            help='Input TFLite model file')
+    mode_group.add_argument('--in-dir',
+                            dest='input_dir',
+                            action='store',
+                            help='Input directory containing TFLite models to be compiled')
+    mode_group.add_argument('--teflon-test-models',
+                            dest='teflon_test_models',
+                            action='store_true',
+                            help='Use ${TEFLON_TEST_DATA}/models as input directory and ${TEFLON_TEST_DATA}/compiled_models as output')
+
+    parser.add_argument('--out',
+                        dest='output_file',
+                        action='store',
+                        help='Output TFLite model file (required with --in)')
+    parser.add_argument('--out-dir',
+                        dest='output_dir',
+                        action='store',
+                        help='Output directory for compiled models (required with --in-dir). Output files will have the same name as input files')
+
+    parser.add_argument('--force',
+                        action='store_true',
+                        required=False,
+                        help='Overwrite output files if they exist. Ignored for single file mode')
+    parser.add_argument('--show-details',
+                        action='store_true',
+                        required=False,
+                        help='Print compilation result for each model. Ignored for single file mode')
+    parser.add_argument('--log-to-file',
+                        action='store_true',
+                        required=False,
+                        help='Generate log file for each compile attempt. Log is located next to output file')
+
+    args = parser.parse_args()
+
+    if args.input_file:
+        if not args.output_file:
+            parser.error("--out is required when using --in")
+        if not os.path.isfile(args.input_file):
+            parser.error("--in must be an existing file")
+
+    if args.input_dir:
+        if not args.output_dir:
+            parser.error("--out-dir is required when using --in-dir")
+        if not os.path.isdir(args.input_dir):
+            parser.error("--in-dir must be an existing directory")
+
+    if args.teflon_test_models and not os.environ.get('TEFLON_TEST_DATA'):
+        parser.error("--teflon-test-models requires TEFLON_TEST_DATA to be set")
+
+    return args
+
+def compile(input_file, output_file):
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    worker_script = os.path.join(script_dir, "compile_model.py")
+
+    try:
+        result = subprocess.run(
+            [sys.executable, worker_script, input_file, output_file],
+            capture_output=True,
+            text=True,
+            timeout=60,
+        )
+    except Exception as e:
+        print(f"Error compiling {input_file}: {e}", file=sys.stderr)
+        return None
+
+    return result
+
+def write_log(output_file, result):
+    model_name_we = os.path.splitext(os.path.basename(output_file))[0]
+    output_dir = os.path.dirname(output_file)
+    log_file = os.path.join(output_dir, f"{model_name_we}_compiler_output.log")
+
+    with open(log_file, 'w') as f:
+        if result.stdout:
+            f.write(result.stdout.strip())
+        if result.stderr:
+            f.write('\n')
+            f.write(result.stderr.strip())
+
+def print_result(input_file, result):
+    if result.returncode == 0:
+        status = "SUCCESS"
+    elif result.returncode == 1:
+        status = "COMPILER ERROR"
+    elif result.returncode == 2:
+        status = "ZERO CONVERSION RATE"
+    elif result.returncode == 3:
+        status = "SCRIPT ERROR"
+    else:
+        status = f"UNKNOWN({returncode})"
+
+    print(f"  {input_file}: {status}")
+    if result.returncode != 0 and result.stderr:
+        print(result.stderr.strip())
+
+def print_stats(total, ok, error, zero, skip):
+    print("\n" + "=" * 50)
+    print(f"Total models processed: {total}")
+    print(f"  Successful:           {ok}")
+    print(f"  Errors:               {error}")
+    print(f"  Zero conversion rate: {zero}")
+    print(f"  Skipped:              {skip}")
+    print("=" * 50)
+
+def find_tflite_files(directory):
+    tflite_files = []
+    for root, _, files in os.walk(directory):
+        for file in files:
+            if file.endswith('.tflite'):
+                tflite_files.append(os.path.join(root, file))
+    return sorted(tflite_files)
+
+def process_model(input_file, output_file, log_to_file, show_details):
+    output_dir = os.path.dirname(os.path.abspath(output_file))
+    os.makedirs(output_dir, exist_ok=True)
+
+    result = compile(input_file, output_file)
+    if not result:
+        return 3
+
+    if log_to_file:
+        write_log(output_file, result)
+
+    if show_details:
+        print_result(input_file, result)
+
+    return result.returncode
+
+def process_directory(input_dir, output_dir, force, log_to_file, show_details):
+    total = 0
+    ok = 0
+    errors = 0
+    zero = 0
+    skip = 0
+
+    tflite_files = find_tflite_files(input_dir)
+    for input_file in tflite_files:
+        total += 1
+
+        rel_path = os.path.relpath(input_file, input_dir)
+        output_file = os.path.join(output_dir, rel_path)
+        if os.path.exists(output_file) and not force:
+            if show_details:
+                print(f"  {input_file}: SKIPPED")
+            skip += 1
+            continue
+
+        ret_code = process_model(input_file, output_file, log_to_file, show_details)
+        if ret_code == 0:
+            ok += 1
+        elif ret_code == 2:
+            zero += 1
+        else:
+            errors += 1
+
+    print_stats(total, ok, errors, zero, skip)
+
+def main(argv):
+    args = parse_args(argv)
+
+    if args.input_file:
+        process_model(
+            args.input_file,
+            args.output_file,
+            args.log_to_file,
+            True)
+    elif args.input_dir:
+        process_directory(
+            args.input_dir,
+            args.output_dir,
+            args.force,
+            args.log_to_file,
+            args.show_details
+        )
+    elif args.teflon_test_models:
+        teflon_test_data = os.environ.get('TEFLON_TEST_DATA')
+        input_dir = os.path.join(teflon_test_data, 'models')
+        output_dir = os.path.join(teflon_test_data, 'compiled_models')
+
+        print(f"Compiling Teflon test models:")
+        print(f"  Input directory:  {input_dir}")
+        print(f"  Output directory: {output_dir}")
+
+        process_directory(
+            input_dir,
+            output_dir,
+            args.force,
+            args.log_to_file,
+            args.show_details
+        )
+
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
--- a/src/gallium/drivers/neutron/neutron_device.c
+++ b/src/gallium/drivers/neutron/neutron_device.c
@ -0,0 +1,257 @@
+/*
+ * Copyright 2026 NXP
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "util/os_mman.h"
+#include "util/u_inlines.h"
+#include "util/u_surface.h"
+#include "util/u_transfer.h"
+
+#include <xf86drm.h>
+
+#include "drm-uapi/neutron_accel.h"
+
+#include "neutron_device.h"
+#include "neutron_ml.h"
+
+static const struct debug_named_value neutron_debug_options[] = {
+   {"dbg_msgs", NEUTRON_DBG_MSGS, "Print debug messages"},
+   {"dump_bos", NEUTRON_DBG_DUMP_BOS, "Dump buffers for analysis"},
+   {"zero_bos", NEUTRON_DBG_ZERO, "Zero buffers for debugging"},
+   DEBUG_NAMED_VALUE_END};
+
+DEBUG_GET_ONCE_FLAGS_OPTION(neutron_debug, "NEUTRON_DEBUG",
+                            neutron_debug_options, 0)
+int neutron_debug = 0;
+
+static void
+neutron_screen_destroy(struct pipe_screen *pscreen)
+{
+   struct neutron_screen *screen = neutron_screen(pscreen);
+
+   ralloc_free(screen);
+}
+
+static void
+neutron_context_destroy(struct pipe_context *pctx)
+{
+   struct neutron_context *ctx = neutron_context(pctx);
+   struct neutron_screen *screen = neutron_screen(pctx->screen);
+
+   drmSyncobjDestroy(screen->fd, ctx->syncobj_handle);
+   ralloc_free(ctx);
+}
+
+static void *
+neutron_buffer_map(struct pipe_context *pctx,
+                   struct pipe_resource *prsc, unsigned level,
+                   unsigned usage, const struct pipe_box *box,
+                   struct pipe_transfer **out_transfer)
+{
+   struct neutron_screen *screen = neutron_screen(pctx->screen);
+   struct neutron_resource *rsc = neutron_resource(prsc);
+   struct drm_neutron_sync_bo arg = {0};
+   int ret;
+
+   assert(level == 0);
+   assert(prsc->target == PIPE_BUFFER);
+   assert(box->y == 0);
+   assert(box->z == 0);
+   assert(box->height == 1);
+   assert(box->depth == 1);
+
+   struct pipe_transfer *transfer = rzalloc(NULL, struct pipe_transfer);
+   transfer->level = level;
+   transfer->usage = usage;
+   transfer->box = *box;
+
+   pipe_resource_reference(&transfer->resource, prsc);
+
+   if (transfer->usage & PIPE_MAP_READ) {
+      arg.handle = rsc->handle;
+      arg.direction = DRM_NEUTRON_SYNC_FROM_DEVICE;
+      arg.size = box->width;
+      arg.offset = box->x;
+
+      ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_SYNC_BO, &arg);
+      if (ret < 0)
+         goto free_transfer;
+   }
+
+   *out_transfer = transfer;
+
+   return (uint8_t *)rsc->map + box->x;
+
+free_transfer:
+   pipe_resource_reference(&transfer->resource, NULL);
+   ralloc_free(transfer);
+   return NULL;
+}
+
+static void
+neutron_buffer_unmap(struct pipe_context *pctx,
+                     struct pipe_transfer *transfer)
+{
+   struct neutron_resource *rsrc = neutron_resource(transfer->resource);
+   struct neutron_screen *screen = neutron_screen(pctx->screen);
+   struct drm_neutron_sync_bo arg = {0};
+   int ret;
+
+   if (transfer->usage & PIPE_MAP_WRITE) {
+      arg.handle = rsrc->handle;
+      arg.direction = DRM_NEUTRON_SYNC_TO_DEVICE;
+      arg.size = transfer->box.width;
+      arg.offset = transfer->box.x;
+
+      ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_SYNC_BO, &arg);
+      assert(ret >= 0);
+   }
+
+   pipe_resource_reference(&transfer->resource, NULL);
+   ralloc_free(transfer);
+}
+
+static struct pipe_context *
+neutron_context_create(struct pipe_screen *pscreen,
+                       void *priv, unsigned flags)
+{
+   struct neutron_context *ctx = rzalloc(NULL, struct neutron_context);
+   struct neutron_screen *screen = neutron_screen(pscreen);
+   struct pipe_context *pctx = &ctx->base;
+   int ret;
+
+   if (!ctx)
+      return NULL;
+
+   ret = drmSyncobjCreate(screen->fd, 0, &ctx->syncobj_handle);
+   if (ret)
+      return NULL;
+
+   pctx->screen = pscreen;
+   pctx->priv = priv;
+
+   pctx->destroy = neutron_context_destroy;
+
+   pctx->buffer_map = neutron_buffer_map;
+   pctx->buffer_unmap = neutron_buffer_unmap;
+   pctx->resource_copy_region = util_resource_copy_region;
+   pctx->buffer_subdata = u_default_buffer_subdata;
+   pctx->clear_buffer = u_default_clear_buffer;
+
+   pctx->ml_subgraph_invoke = neutron_ml_subgraph_invoke;
+   pctx->ml_subgraph_read_output = neutron_ml_subgraph_read_outputs;
+
+   return pctx;
+}
+
+static struct pipe_resource *
+neutron_resource_create(struct pipe_screen *pscreen,
+                        const struct pipe_resource *template)
+{
+   struct neutron_screen *screen = neutron_screen(pscreen);
+   struct drm_neutron_create_bo arg = {0};
+   struct neutron_resource *rsc;
+   int ret;
+
+   assert(template->target == PIPE_BUFFER);
+   assert(template->height0 == 1);
+   assert(template->depth0 == 1);
+   assert(template->array_size == 1);
+
+   rsc = rzalloc(NULL, struct neutron_resource);
+   if (!rsc)
+      return NULL;
+
+   rsc->base = *template;
+   rsc->base.screen = pscreen;
+   rsc->base.nr_samples = template->nr_samples;
+   pipe_reference_init(&rsc->base.reference, 1);
+
+   arg.size = template->width0;
+
+   ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_CREATE_BO, &arg);
+   if (ret < 0)
+      goto free_rsc;
+
+   rsc->handle = arg.handle;
+   rsc->size = arg.size;
+
+   rsc->map = os_mmap(NULL, rsc->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                      screen->fd, arg.map_offset);
+   if (rsc->map == MAP_FAILED)
+      goto close_bo;
+
+   return &rsc->base;
+
+close_bo : {
+   struct drm_gem_close close_arg = {.handle = rsc->handle};
+   drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &close_arg);
+}
+free_rsc:
+   ralloc_free(rsc);
+   return NULL;
+}
+
+static void
+neutron_resource_destroy(struct pipe_screen *pscreen,
+                         struct pipe_resource *prsc)
+{
+   struct neutron_resource *rsc = neutron_resource(prsc);
+   struct neutron_screen *screen = neutron_screen(pscreen);
+   struct drm_gem_close arg = {0};
+   int ret;
+
+   os_munmap(rsc->map, rsc->size);
+
+   arg.handle = rsc->handle;
+
+   ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg);
+   assert(ret >= 0);
+
+   ralloc_free(rsc);
+}
+
+static int
+neutron_screen_get_fd(struct pipe_screen *pscreen)
+{
+   return neutron_screen(pscreen)->fd;
+}
+
+static struct pipe_ml_device *
+neutron_get_ml_device(struct pipe_screen *pscreen)
+{
+   return &neutron_screen(pscreen)->ml_device.base;
+}
+
+struct pipe_screen *
+neutron_screen_create(int fd,
+                      const struct pipe_screen_config *config,
+                      struct renderonly *ro)
+{
+   struct neutron_screen *neutron_screen;
+   struct pipe_screen *screen;
+
+   neutron_screen = rzalloc(NULL, struct neutron_screen);
+   if (!neutron_screen)
+      return NULL;
+
+   neutron_debug = debug_get_option_neutron_debug();
+
+   screen = &neutron_screen->pscreen;
+
+   neutron_screen->fd = fd;
+
+   screen->get_screen_fd = neutron_screen_get_fd;
+   screen->destroy = neutron_screen_destroy;
+   screen->context_create = neutron_context_create;
+   screen->resource_create = neutron_resource_create;
+   screen->resource_destroy = neutron_resource_destroy;
+
+   neutron_screen->ml_device.base.ml_operation_supported = neutron_ml_operation_supported;
+   neutron_screen->ml_device.base.ml_subgraph_create = neutron_ml_subgraph_create;
+   neutron_screen->ml_device.base.ml_subgraph_destroy = neutron_ml_subgraph_destroy;
+   screen->get_ml_device = neutron_get_ml_device;
+
+   return screen;
+}
--- a/src/gallium/drivers/neutron/neutron_device.h
+++ b/src/gallium/drivers/neutron/neutron_device.h
@ -0,0 +1,90 @@
+/*
+ * Copyright 2026 NXP
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_state.h"
+#include "renderonly/renderonly.h"
+#include "util/log.h"
+#include "util/macros.h"
+
+#ifndef NEUTRON_DEVICE_H
+#define NEUTRON_DEVICE_H
+
+enum neutron_dbg {
+   NEUTRON_DBG_MSGS = BITFIELD_BIT(0),
+   NEUTRON_DBG_DUMP_BOS = BITFIELD_BIT(1),
+   NEUTRON_DBG_ZERO = BITFIELD_BIT(2),
+};
+
+#define DBG_ENABLED(flag) unlikely(neutron_debug &(flag))
+extern int neutron_debug;
+
+#define DBG(fmt, ...)                    \
+   do {                                  \
+      if (DBG_ENABLED(NEUTRON_DBG_MSGS)) \
+         mesa_logd(fmt, ##__VA_ARGS__);  \
+   } while (0)
+
+struct neutron_ml_device {
+   struct pipe_ml_device base;
+   struct pipe_context *context;
+};
+
+static inline struct neutron_ml_device *
+neutron_ml_device(struct pipe_ml_device *dev)
+{
+   return (struct neutron_ml_device *)dev;
+}
+
+struct neutron_screen {
+   struct pipe_screen pscreen;
+   struct neutron_ml_device ml_device;
+   int fd;
+};
+
+static inline struct neutron_screen *
+neutron_screen(struct pipe_screen *p)
+{
+   return (struct neutron_screen *)p;
+}
+
+static inline struct neutron_screen *
+neutron_ml_device_screen(struct pipe_ml_device *pdevice)
+{
+   struct neutron_ml_device *dev = neutron_ml_device(pdevice);
+   return container_of(dev, struct neutron_screen, ml_device);
+}
+
+struct neutron_context {
+   struct pipe_context base;
+   uint32_t syncobj_handle;
+};
+
+static inline struct neutron_context *
+neutron_context(struct pipe_context *pctx)
+{
+   return (struct neutron_context *)pctx;
+}
+
+struct neutron_resource {
+   struct pipe_resource base;
+   uint32_t handle;
+   uint64_t size;
+   void *map;
+};
+
+static inline struct neutron_resource *
+neutron_resource(struct pipe_resource *p)
+{
+   return (struct neutron_resource *)p;
+}
+
+struct pipe_screen *
+neutron_screen_create(int fd,
+                      const struct pipe_screen_config *config,
+                      struct renderonly *ro);
+
+#endif /* NEUTRON_DEVICE_H */
--- a/src/gallium/drivers/neutron/neutron_ml.c
+++ b/src/gallium/drivers/neutron/neutron_ml.c
@ -0,0 +1,382 @@
+/*
+ * Copyright 2026 NXP
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "pipe/p_state.h"
+#include "util/macros.h"
+#include "util/u_dynarray.h"
+#include "util/u_inlines.h"
+
+#include <xf86drm.h>
+
+#include "drm-uapi/neutron_accel.h"
+#include "neutron_ml.h"
+
+static const char *
+neutron_tensor_name(enum neutron_tensor_type type)
+{
+   switch (type) {
+   case NEUTRON_WEIGHTS:
+      return "NeutronWeights";
+   case NEUTRON_MICROCODE:
+      return "NeutronMicrocode";
+   case NEUTRON_KERNELS:
+      return "NeutronKernels";
+   case NEUTRON_SCRATCH:
+      return "NeutronScratch";
+   case NEUTRON_PROFILE:
+      return "NeutronProfile";
+   case NEUTRON_DEBUG:
+      return "NeutronDebug";
+   case NEUTRON_DATA_INPUT:
+      return "Input";
+   case NEUTRON_DATA_OUTPUT:
+      return "Output";
+   default:
+      return "N/A";
+   }
+}
+
+static void
+neutron_dump_buffer(const uint8_t *ptr, const char *name,
+                    int id, int offset, unsigned size)
+{
+   char buffer[255];
+
+   snprintf(buffer, sizeof(buffer), "mesa-%s-%03u.bin", name, id);
+
+   FILE *f = fopen(buffer, "wb");
+   assert(f);
+   fwrite(ptr + offset, 1, size, f);
+   if (ferror(f)) {
+      DBG("Error in writing to file: %s\n", strerror(errno));
+   }
+   fflush(f);
+   fclose(f);
+}
+
+static void
+neutron_dump_tensor(struct pipe_resource *bo,
+                    struct neutron_tensor *ntensor, int id)
+{
+   uint8_t *buf = neutron_resource(bo)->map;
+   neutron_dump_buffer(buf, neutron_tensor_name(ntensor->type), id,
+                       ntensor->offset, ntensor->size);
+}
+
+bool
+neutron_ml_operation_supported(struct pipe_ml_device *pdevice,
+                               const struct pipe_ml_operation *operation)
+{
+   if (operation->type != PIPE_ML_OPERATION_TYPE_CUSTOM)
+      return false;
+
+   if (!operation->custom.name ||
+       strcmp(operation->custom.name, "NeutronGraph") != 0)
+      return false;
+
+   if (operation->input_count <= NEUTRON_CUSTOM_INPUT_MAX)
+      return false;
+
+   return true;
+}
+
+static unsigned
+get_tensor_size(const struct pipe_tensor *tensor)
+{
+   unsigned size = 1;
+
+   for (unsigned i = 0; i < 4; i++)
+      size *= tensor->dims[i];
+
+   return size;
+}
+
+static enum neutron_tensor_type
+get_tensor_type(struct pipe_tensor *tensor, bool is_input)
+{
+   for (unsigned i = 0; i < NEUTRON_CUSTOM_MAX; i++) {
+      if (strcmp(tensor->name, neutron_tensor_name(i)) == 0)
+         return i;
+   }
+
+   return is_input ? NEUTRON_DATA_INPUT : NEUTRON_DATA_OUTPUT;
+}
+
+static bool
+is_custom_input(struct neutron_tensor *ntensor)
+{
+   return ntensor->type < NEUTRON_CUSTOM_INPUT_MAX;
+}
+
+static void
+create_tensor(struct neutron_subgraph *subgraph,
+              struct pipe_tensor *tensor, bool is_input)
+{
+   struct neutron_tensor ntensor = {0};
+
+   ntensor.type = get_tensor_type(tensor, is_input);
+   ntensor.size = get_tensor_size(tensor);
+   ntensor.tensor = tensor;
+
+   if (ntensor.type == NEUTRON_DATA_INPUT)
+      ntensor.id = subgraph->data_inputs++;
+   else if (ntensor.type == NEUTRON_DATA_OUTPUT)
+      ntensor.id = subgraph->data_outputs++;
+
+   util_dynarray_append(&subgraph->tensors, ntensor);
+}
+
+static unsigned
+get_header_array_size(struct neutron_subgraph *subgraph)
+{
+   unsigned num_tensors =
+      util_dynarray_num_elements(&subgraph->tensors, struct neutron_tensor);
+
+   return num_tensors + 4;
+}
+
+static unsigned
+get_buffer_size(struct neutron_subgraph *subgraph)
+{
+   unsigned size = 0;
+
+   util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor)
+      size += NEUTRON_ALIGN(ntensor->size);
+   size += subgraph->header.size;
+
+   return size;
+}
+
+static int
+get_header_index(struct neutron_subgraph *subgraph,
+                 struct neutron_tensor *ntensor)
+{
+   switch (ntensor->type) {
+   case NEUTRON_DATA_INPUT:
+      return ntensor->id;
+   case NEUTRON_WEIGHTS:
+      return subgraph->data_inputs;
+   case NEUTRON_DATA_OUTPUT:
+      return subgraph->data_inputs + 1 + ntensor->id;
+   case NEUTRON_KERNELS:
+      return subgraph->data_inputs + subgraph->data_outputs + 1;
+   case NEUTRON_SCRATCH:
+      return subgraph->data_inputs + subgraph->data_outputs + 2;
+   case NEUTRON_PROFILE:
+      /* 4 entries reserved */
+      return subgraph->data_inputs + subgraph->data_outputs + 7;
+   case NEUTRON_DEBUG:
+      return subgraph->data_inputs + subgraph->data_outputs + 8;
+   default:
+      return -1;
+   }
+}
+
+struct pipe_ml_subgraph *
+neutron_ml_subgraph_create(struct pipe_ml_device *pdevice,
+                           const struct pipe_ml_operation *poperations,
+                           unsigned count)
+{
+   struct neutron_screen *screen = neutron_ml_device_screen(pdevice);
+   struct neutron_ml_device *dev = neutron_ml_device(pdevice);
+   const struct pipe_ml_operation *operation = &poperations[0];
+   struct neutron_subgraph *subgraph;
+   uint32_t *offset_array;
+   unsigned crt_offset = 0;
+
+   if (count > 1) {
+      DBG("Expect a single NeutronGraph per partition!\n");
+      return NULL;
+   }
+
+   if (!dev->context)
+      dev->context = screen->pscreen.context_create(&screen->pscreen, NULL, 0);
+
+   subgraph = calloc(1, sizeof(*subgraph));
+   subgraph->base.device = pdevice;
+
+   subgraph->tensors = UTIL_DYNARRAY_INIT;
+   subgraph->header = UTIL_DYNARRAY_INIT;
+
+   for (unsigned i = 0; i < operation->input_count; i++)
+      create_tensor(subgraph, operation->input_tensors[i], true);
+   for (unsigned i = 0; i < operation->output_count; i++)
+      create_tensor(subgraph, operation->output_tensors[i], false);
+
+   if (!util_dynarray_resize(&subgraph->header, uint32_t,
+                             get_header_array_size(subgraph)))
+      return NULL;
+   memset(util_dynarray_begin(&subgraph->header), 0, subgraph->header.size);
+
+   subgraph->bo = pipe_buffer_create(dev->context->screen, 0,
+                                     PIPE_USAGE_DEFAULT,
+                                     get_buffer_size(subgraph));
+   if (!subgraph->bo)
+      return NULL;
+
+   if (DBG_ENABLED(NEUTRON_DBG_ZERO)) {
+      struct pipe_transfer *transfer;
+      struct neutron_resource *rsc = neutron_resource(subgraph->bo);
+
+      uint8_t *buf = pipe_buffer_map(dev->context, subgraph->bo,
+                                     PIPE_MAP_WRITE, &transfer);
+      memset(buf, 0, pipe_buffer_size(subgraph->bo));
+      pipe_buffer_unmap(dev->context, transfer);
+   }
+
+   DBG("%s: Tensor list:\n", __func__);
+   DBG("%16s %3s %6s %8s %8s %8s\n", "TYPE", "ID", "INDEX", "OFFSET", "SIZE", "HDR_IDX");
+
+   offset_array = util_dynarray_begin(&subgraph->header);
+   crt_offset = NEUTRON_ALIGN(subgraph->header.size);
+
+   /* Translate all tensors into buffer sections */
+   util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) {
+      unsigned header_index = get_header_index(subgraph, ntensor);
+
+      /* Microcode offset passed via ioctl, all others stored in header */
+      if (ntensor->type == NEUTRON_MICROCODE)
+         subgraph->microcode_offset = crt_offset;
+      else
+         offset_array[header_index] = crt_offset;
+
+      ntensor->offset = crt_offset;
+      crt_offset += NEUTRON_ALIGN(ntensor->size);
+
+      /* Custom inputs are written in the buffer during setup */
+      if (is_custom_input(ntensor)) {
+         pipe_buffer_write(dev->context, subgraph->bo, ntensor->offset,
+                           ntensor->size, ntensor->tensor->data);
+
+         if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS))
+            neutron_dump_tensor(subgraph->bo, ntensor, 0);
+      }
+
+      DBG("%16s %3d %6d %8d %8d %2d\n", neutron_tensor_name(ntensor->type),
+          ntensor->id, ntensor->tensor->index, ntensor->offset, ntensor->size,
+          header_index);
+   }
+
+   pipe_buffer_write(dev->context, subgraph->bo, 0, subgraph->header.size,
+                     util_dynarray_begin(&subgraph->header));
+
+   if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS)) {
+      uint8_t *buf = neutron_resource(subgraph->bo)->map;
+      neutron_dump_buffer(buf, "header", 0, 0, subgraph->header.size);
+   }
+
+   return &subgraph->base;
+}
+
+static struct neutron_tensor *
+get_tensor_by_id(struct neutron_subgraph *subgraph,
+                 enum neutron_tensor_type type, unsigned index)
+{
+   util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) {
+      if (ntensor->type == type && ntensor->tensor->index == index)
+         return ntensor;
+   }
+   return NULL;
+}
+
+void
+neutron_ml_subgraph_invoke(struct pipe_context *pcontext,
+                           struct pipe_ml_subgraph *psubgraph,
+                           unsigned inputs_count, unsigned input_idxs[],
+                           void *inputs[], bool is_signed[])
+{
+   struct neutron_screen *screen = neutron_screen(pcontext->screen);
+   struct neutron_context *context = neutron_context(pcontext);
+   struct neutron_subgraph *subgraph = (struct neutron_subgraph *)(psubgraph);
+   struct drm_neutron_submit_job submit = {0};
+   int ret;
+
+   for (unsigned i = 0; i < inputs_count; i++) {
+      struct neutron_tensor *ntensor =
+         get_tensor_by_id(subgraph, NEUTRON_DATA_INPUT, input_idxs[i]);
+
+      DBG("Prepare input tensor: idx=%d, offset=%d, size=%d\n",
+          input_idxs[i], ntensor->offset, ntensor->size);
+      pipe_buffer_write(pcontext, subgraph->bo, ntensor->offset,
+                        ntensor->size, inputs[i]);
+
+      if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS))
+         neutron_dump_tensor(subgraph->bo, ntensor, i);
+   }
+
+   /* Transfer ownership of output sections to device */
+   util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) {
+      if (ntensor->type == NEUTRON_DATA_OUTPUT) {
+         struct pipe_transfer *transfer;
+         pipe_buffer_map_range(pcontext, subgraph->bo, ntensor->offset,
+                               ntensor->size, PIPE_MAP_WRITE, &transfer);
+         pipe_buffer_unmap(pcontext, transfer);
+      }
+   }
+
+   submit.type = DRM_NEUTRON_JOB_INFERENCE;
+   submit.bo_handle = neutron_resource(subgraph->bo)->handle;
+   submit.syncobj_handle = context->syncobj_handle;
+   submit.inference.microcode_offset = subgraph->microcode_offset;
+   submit.inference.tensor_offset = 0;
+   submit.inference.tensor_count =
+      util_dynarray_num_elements(&subgraph->header, uint32_t);
+
+   DBG("Submitting job\n");
+   ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_SUBMIT_JOB, &submit);
+   assert(ret == 0);
+}
+
+void
+neutron_ml_subgraph_read_outputs(struct pipe_context *pcontext,
+                                 struct pipe_ml_subgraph *psubgraph,
+                                 unsigned outputs_count,
+                                 unsigned output_idxs[], void *outputsv[],
+                                 bool is_signed[])
+{
+   struct neutron_subgraph *subgraph = (struct neutron_subgraph *)(psubgraph);
+   struct neutron_context *context = neutron_context(pcontext);
+   struct neutron_screen *screen = neutron_screen(pcontext->screen);
+   int ret;
+
+   ret = drmSyncobjWait(screen->fd, &context->syncobj_handle, 1, INT64_MAX,
+                        DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, NULL);
+   assert(ret == 0);
+   DBG("Job finished\n");
+
+   for (unsigned i = 0; i < outputs_count; i++) {
+      struct neutron_tensor *ntensor =
+         get_tensor_by_id(subgraph, NEUTRON_DATA_OUTPUT, output_idxs[i]);
+
+      DBG("Read output tensor: idx=%d, offset=%d, size=%d\n",
+          output_idxs[i], ntensor->offset, ntensor->size);
+      pipe_buffer_read(pcontext, subgraph->bo, ntensor->offset,
+                       ntensor->size, outputsv[i]);
+
+      if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS))
+         neutron_dump_tensor(subgraph->bo, ntensor, i);
+   }
+}
+
+void
+neutron_ml_subgraph_destroy(struct pipe_ml_device *pdevice,
+                            struct pipe_ml_subgraph *psubgraph)
+{
+   struct pipe_context *pcontext = neutron_ml_device(pdevice)->context;
+   struct neutron_screen *screen = neutron_screen(pcontext->screen);
+   struct neutron_subgraph *subgraph = (struct neutron_subgraph *)(psubgraph);
+   struct drm_gem_close arg = {0};
+   int ret;
+
+   arg.handle = neutron_resource(subgraph->bo)->handle;
+   ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg);
+   assert(ret >= 0);
+
+   pipe_resource_reference(&subgraph->bo, NULL);
+   util_dynarray_fini(&subgraph->tensors);
+   util_dynarray_fini(&subgraph->header);
+
+   free(subgraph);
+}
--- a/src/gallium/drivers/neutron/neutron_ml.h
+++ b/src/gallium/drivers/neutron/neutron_ml.h
@ -0,0 +1,73 @@
+/*
+ * Copyright 2026 NXP
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef NEUTRON_ML_H
+#define NEUTRON_ML_H
+
+#include <util/u_dynarray.h>
+#include <util/u_math.h>
+
+#include "neutron_device.h"
+
+#define NEUTRON_ALIGN(value) align(value, 64)
+
+enum neutron_tensor_type {
+   NEUTRON_WEIGHTS = 0,
+   NEUTRON_MICROCODE,
+   NEUTRON_KERNELS,
+   NEUTRON_CUSTOM_INPUT_MAX,
+   NEUTRON_SCRATCH = NEUTRON_CUSTOM_INPUT_MAX,
+   NEUTRON_PROFILE,
+   NEUTRON_DEBUG,
+   NEUTRON_CUSTOM_MAX,
+   NEUTRON_DATA_INPUT = NEUTRON_CUSTOM_MAX,
+   NEUTRON_DATA_OUTPUT,
+};
+
+struct neutron_tensor {
+   struct pipe_tensor *tensor;
+   enum neutron_tensor_type type;
+   unsigned size;
+   unsigned offset;
+   unsigned id; /* For NEUTRON_DATA_INPUT and NEUTRON_DATA_OUTPUT */
+};
+
+struct neutron_subgraph {
+   struct pipe_ml_subgraph base;
+   struct pipe_resource *bo;
+   struct util_dynarray tensors; /* struct neutron_tensor */
+   struct util_dynarray header;
+   unsigned microcode_offset;
+   unsigned data_inputs;  /* number of non-custom input tensors */
+   unsigned data_outputs; /* number of non-custom output tensors */
+};
+
+bool
+neutron_ml_operation_supported(struct pipe_ml_device *pdevice,
+                               const struct pipe_ml_operation *operation);
+
+struct pipe_ml_subgraph *
+neutron_ml_subgraph_create(struct pipe_ml_device *pdevice,
+                           const struct pipe_ml_operation *poperations,
+                           unsigned count);
+
+void
+neutron_ml_subgraph_invoke(struct pipe_context *pcontext,
+                           struct pipe_ml_subgraph *psubgraph,
+                           unsigned inputs_count, unsigned input_idxs[],
+                           void *inputs[], bool is_signed[]);
+
+void
+neutron_ml_subgraph_read_outputs(struct pipe_context *pcontext,
+                                 struct pipe_ml_subgraph *psubgraph,
+                                 unsigned outputs_count,
+                                 unsigned output_idxs[], void *outputs[],
+                                 bool is_signed[]);
+
+void
+neutron_ml_subgraph_destroy(struct pipe_ml_device *pdevice,
+                            struct pipe_ml_subgraph *psubgraph);
+
+#endif /* NEUTRON_ML_H */
--- a/src/gallium/meson.build
+++ b/src/gallium/meson.build
@ -196,6 +196,12 @@ if with_gallium_ethosu
 else
  driver_ethosu = declare_dependency()
 endif
+if with_gallium_neutron
+  subdir('winsys/neutron/drm')
+  subdir('drivers/neutron')
+else
+  driver_neutron = declare_dependency()
+endif
 if with_gallium_zink
  if not with_platform_windows
    subdir('winsys/zink/drm')
--- a/src/gallium/targets/dri/meson.build
+++ b/src/gallium/targets/dri/meson.build
@ -59,7 +59,7 @@ libgallium_dri = shared_library(
    driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv,
    driver_tegra, driver_i915, driver_svga, driver_virgl,
    driver_panfrost, driver_iris, driver_lima, driver_zink, driver_d3d12,
-    driver_asahi, driver_crocus, driver_rocket, driver_ethosu
+    driver_asahi, driver_crocus, driver_rocket, driver_ethosu, driver_neutron
  ],
  install : true,
  name_suffix : libname_suffix,
--- a/src/gallium/winsys/neutron/drm/meson.build
+++ b/src/gallium/winsys/neutron/drm/meson.build
@ -0,0 +1,13 @@
+# Copyright 2017 Broadcom
+# SPDX-License-Identifier: MIT
+
+libneutronwinsys = static_library(
+  'neutronwinsys',
+  files('neutron_drm_winsys.c'),
+  include_directories : [
+    inc_src, inc_include,
+    inc_gallium, inc_gallium_aux, inc_gallium_drivers,
+  ],
+  gnu_symbol_visibility : 'hidden',
+  dependencies: [dep_libdrm, idep_mesautil],
+)
--- a/src/gallium/winsys/neutron/drm/neutron_drm_public.h
+++ b/src/gallium/winsys/neutron/drm/neutron_drm_public.h
@ -0,0 +1,17 @@
+/*
+ * Copyright 2014 Broadcom
+ * Copyright 2018 Alyssa Rosenzweig
+ * Copyright 2025 Tomeu Vizoso
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef __NEUTRON_DRM_PUBLIC_H__
+#define __NEUTRON_DRM_PUBLIC_H__
+
+struct pipe_screen;
+struct pipe_screen_config;
+
+struct pipe_screen *
+neutron_drm_screen_create(int drmFD, const struct pipe_screen_config *config);
+
+#endif /* __NEUTRON_DRM_PUBLIC_H__ */
--- a/src/gallium/winsys/neutron/drm/neutron_drm_winsys.c
+++ b/src/gallium/winsys/neutron/drm/neutron_drm_winsys.c
@ -0,0 +1,20 @@
+/*
+ * Copyright 2014 Broadcom
+ * Copyright 2018 Alyssa Rosenzweig
+ * Copyright 2025 Tomeu Vizoso
+ * Copyright 2026 NXP
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "util/os_file.h"
+#include "util/u_screen.h"
+
+#include "neutron/neutron_device.h"
+#include "neutron_drm_public.h"
+
+struct pipe_screen *
+neutron_drm_screen_create(int fd, const struct pipe_screen_config *config)
+{
+   return u_pipe_screen_lookup_or_create(os_dupfd_cloexec(fd), config, NULL,
+                                         neutron_screen_create);
+}