Merge branch 'neutron' into 'main'

Draft: Add new Gallium driver for NXP's Neutron NPU

See merge request mesa/mesa!41401
This commit is contained in:
Ioana Ciocoi-Radulescu 2026-05-08 00:10:44 +00:00
commit 967de8aab8
28 changed files with 1803 additions and 11 deletions

View file

@ -3,6 +3,7 @@
src/gallium/drivers/ethosu/**/*
src/gallium/drivers/i915
src/gallium/drivers/neutron/**/*
src/gallium/drivers/r300/compiler/*
src/gallium/drivers/rocket/**/*
src/gallium/targets/teflon/**/*

View file

@ -0,0 +1,130 @@
/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
/* Copyright 2025-2026 NXP */
#ifndef __NEUTRON_ACCEL_H__
#define __NEUTRON_ACCEL_H__
#include "drm.h"
#if defined(__cplusplus)
extern "C" {
#endif
/**
* enum drm_neutron_ioctl - Neutron IOCTL IDs
*
* @DRM_NEUTRON_CREATE_BO: Create a buffer object
* @DRM_NEUTRON_SYNC_BO: Sync (parts of) the buffer object memory
* @DRM_NEUTRON_SUBMIT_JOB: Submit a job to the device
*/
enum drm_neutron_ioctl {
DRM_NEUTRON_CREATE_BO = 0,
DRM_NEUTRON_SYNC_BO,
DRM_NEUTRON_SUBMIT_JOB,
};
/**
* struct drm_neutron_create_bo - Create a buffer object and return buffer
* info to user
*
* @size: Size in bytes of requested buffer. May be updated by driver
* if allocated size different than requested
* @handle: Returned handle for the new buffer object
* @pad: MBZ
* @map_offset: Returned offset for mmap() calls
*/
struct drm_neutron_create_bo {
__u64 size;
__u32 handle;
__u32 pad;
__u64 map_offset;
};
/**
* enum drm_neutron_sync_dir - Direction of buffer object synchronization
*
* @DRM_NEUTRON_SYNC_TO_DEVICE: Sync from CPU to device
* @DRM_NEUTRON_SYNC_FROM_DEVICE: Sync from device to CPU
*/
enum drm_neutron_sync_dir {
DRM_NEUTRON_SYNC_TO_DEVICE = 0,
DRM_NEUTRON_SYNC_FROM_DEVICE,
};
/**
* struct drm_neutron_sync_bo - Sync buffer object memory
*
* @handle: Handle of buffer object to sync
* @direction: Direction of sync, can be one of enum drm_neutron_sync_dir
* @size: Size of the memory to sync, in bytes
* @offset: Offset inside the buffer, in bytes
*/
struct drm_neutron_sync_bo {
__u32 handle;
__u32 direction;
__u64 size;
__u64 offset;
};
/**
* enum drm_neutron_job_type - Type of job to submit to Neutron device
*
* @DRM_NEUTRON_JOB_INFERENCE: Inference job
*/
enum drm_neutron_job_type {
DRM_NEUTRON_JOB_INFERENCE = 0,
};
/**
* struct drm_neutron_inference_job - Inference job descriptor
*
* @tensor_offset: Offset of tensor array inside job BO
* @microcode_offset: Microcode offset inside BO
* @tensor_count: Number of valid tensors
* @pad: MBZ
*/
struct drm_neutron_inference_job {
__u32 tensor_offset;
__u32 microcode_offset;
__u32 tensor_count;
__u32 pad[5];
};
/**
* struct drm_neutron_submit_job - Submit a job to Neutron device
*
* @type: Job type, one of enum drm_neutron_job_type
* @bo_handle: BO handle for this job
* @inference: Inference job descriptor (when type is DRM_NEUTRON_JOB_INFERENCE)
* @reserved: Reserved for future job types
* @syncobj_handle: Handle of syncobj on which user waits for job completion
* @pad: MBZ
*/
struct drm_neutron_submit_job {
__u32 type;
__u32 bo_handle;
union {
struct drm_neutron_inference_job inference;
__u32 reserved[8];
};
__u32 syncobj_handle;
__u32 pad;
};
#define DRM_IOCTL_NEUTRON_CREATE_BO \
DRM_IOWR(DRM_COMMAND_BASE + DRM_NEUTRON_CREATE_BO, \
struct drm_neutron_create_bo)
#define DRM_IOCTL_NEUTRON_SYNC_BO \
DRM_IOWR(DRM_COMMAND_BASE + DRM_NEUTRON_SYNC_BO, \
struct drm_neutron_sync_bo)
#define DRM_IOCTL_NEUTRON_SUBMIT_JOB \
DRM_IOWR(DRM_COMMAND_BASE + DRM_NEUTRON_SUBMIT_JOB, \
struct drm_neutron_submit_job)
#if defined(__cplusplus)
}
#endif
#endif /* __NEUTRON_ACCEL_H__ */

View file

@ -205,7 +205,7 @@ elif gallium_drivers.contains('all')
gallium_drivers = [
'r300', 'r600', 'radeonsi', 'crocus', 'v3d', 'vc4', 'freedreno', 'etnaviv', 'i915',
'nouveau', 'svga', 'tegra', 'virgl', 'lima', 'panfrost', 'llvmpipe', 'softpipe', 'iris',
'zink', 'd3d12', 'asahi', 'rocket', 'ethosu'
'zink', 'd3d12', 'asahi', 'rocket', 'ethosu', 'neutron'
]
endif
@ -234,6 +234,7 @@ with_gallium_d3d12 = gallium_drivers.contains('d3d12')
with_gallium_asahi = gallium_drivers.contains('asahi')
with_gallium_rocket = gallium_drivers.contains('rocket')
with_gallium_ethosu = gallium_drivers.contains('ethosu')
with_gallium_neutron = gallium_drivers.contains('neutron')
foreach gallium_driver : gallium_drivers
pre_args += '-DHAVE_@0@'.format(gallium_driver.to_upper())
endforeach

View file

@ -87,7 +87,7 @@ option(
choices : [
'all', 'auto',
'asahi', 'crocus', 'd3d12', 'ethosu', 'etnaviv', 'freedreno', 'i915', 'iris',
'lima', 'llvmpipe', 'nouveau', 'panfrost', 'r300', 'r600', 'radeonsi',
'lima', 'llvmpipe', 'neutron', 'nouveau', 'panfrost', 'r300', 'r600', 'radeonsi',
'rocket', 'softpipe', 'svga', 'tegra', 'v3d', 'vc4', 'virgl', 'zink',
],
description : 'List of gallium drivers to build. If this is set to auto ' +

View file

@ -48,6 +48,9 @@ endif
if with_gallium_zink
renderonly_drivers_c_args += '-DGALLIUM_ZINK'
endif
if with_gallium_neutron
renderonly_drivers_c_args += '-DGALLIUM_NEUTRON'
endif
libpipe_loader_static = static_library(
'pipe_loader_static',

View file

@ -86,6 +86,7 @@ static const struct drm_driver_descriptor *driver_descriptors[] = {
&etnaviv_driver_descriptor,
&rocket_driver_descriptor,
&ethosu_driver_descriptor,
&neutron_driver_descriptor,
&tegra_driver_descriptor,
&lima_driver_descriptor,
&zink_driver_descriptor,
@ -388,6 +389,9 @@ pipe_loader_get_compatible_render_capable_device_fds(int kms_only_fd, unsigned i
#if defined GALLIUM_ETHOSU
"ethosu",
#endif
#if defined GALLIUM_NEUTRON
"neutron",
#endif
#if defined GALLIUM_V3D
"v3d",
#endif

View file

@ -53,6 +53,7 @@ const struct drm_driver_descriptor descriptor_name = { \
#undef GALLIUM_ASAHI
#undef GALLIUM_ROCKET
#undef GALLIUM_ETHOSU
#undef GALLIUM_NEUTRON
#endif
#ifdef GALLIUM_I915
@ -482,6 +483,24 @@ DRM_DRIVER_DESCRIPTOR(ethosu, NULL, 0)
DRM_DRIVER_DESCRIPTOR_STUB(ethosu)
#endif
#ifdef GALLIUM_NEUTRON
#include "neutron/drm/neutron_drm_public.h"
static struct pipe_screen *
pipe_neutron_create_screen(int fd, const struct pipe_screen_config *config)
{
struct pipe_screen *screen;
screen = neutron_drm_screen_create(fd, config);
return screen ? debug_screen_wrap(screen) : NULL;
}
DRM_DRIVER_DESCRIPTOR(neutron, NULL, 0)
#else
DRM_DRIVER_DESCRIPTOR_STUB(neutron)
#endif
#ifdef GALLIUM_KMSRO
#include "kmsro/drm/kmsro_drm_public.h"

View file

@ -24,6 +24,7 @@ extern const struct drm_driver_descriptor etnaviv_driver_descriptor;
extern const struct drm_driver_descriptor rknpu_driver_descriptor;
extern const struct drm_driver_descriptor rocket_driver_descriptor;
extern const struct drm_driver_descriptor ethosu_driver_descriptor;
extern const struct drm_driver_descriptor neutron_driver_descriptor;
extern const struct drm_driver_descriptor tegra_driver_descriptor;
extern const struct drm_driver_descriptor lima_driver_descriptor;
extern const struct drm_driver_descriptor zink_driver_descriptor;

View file

@ -0,0 +1,2 @@
BasedOnStyle: InheritParentConfig
DisableFormat: false

View file

@ -0,0 +1,102 @@
CompiledModels.Op/efficientdet_efficientdet_tflite_lite0_int8_v1,Fail
CompiledModels.Op/inception_002,Fail
CompiledModels.Op/inception_003,Fail
CompiledModels.Op/inception_008,Fail
CompiledModels.Op/inception_013,Fail
CompiledModels.Op/inception_015,Fail
CompiledModels.Op/inception_016,Fail
CompiledModels.Op/inception_021,Fail
CompiledModels.Op/inception_024,Fail
CompiledModels.Op/inception_027,Fail
CompiledModels.Op/inception_030,Fail
CompiledModels.Op/inception_032,Fail
CompiledModels.Op/inception_033,Fail
CompiledModels.Op/inception_041,Fail
CompiledModels.Op/inception_055,Fail
CompiledModels.Op/inception_059,Fail
CompiledModels.Op/inception_063,Fail
CompiledModels.Op/inception_064,Fail
CompiledModels.Op/inception_071,Fail
CompiledModels.Op/inception_073,Fail
CompiledModels.Op/inception_075,Fail
CompiledModels.Op/inception_077,Fail
CompiledModels.Op/mobiledet_002,Fail
CompiledModels.Op/mobiledet_004,Fail
CompiledModels.Op/mobiledet_006,Fail
CompiledModels.Op/mobiledet_009,Fail
CompiledModels.Op/mobiledet_012,Fail
CompiledModels.Op/mobiledet_015,Fail
CompiledModels.Op/mobiledet_017,Fail
CompiledModels.Op/mobiledet_020,Fail
CompiledModels.Op/mobiledet_029,Fail
CompiledModels.Op/mobiledet_039,Fail
CompiledModels.Op/mobiledet_042,Fail
CompiledModels.Op/mobiledet_046,Fail
CompiledModels.Op/mobiledet_050,Fail
CompiledModels.Op/mobiledet_063,Fail
CompiledModels.Op/mobiledet_086,Fail
CompiledModels.Op/mobiledet_087,Fail
CompiledModels.Op/mobiledet_106,Fail
CompiledModels.Op/mobiledet_119,Fail
CompiledModels.Op/mobiledet_ssdlite_mobiledet_coco_qat_postprocess,Fail
CompiledModels.Op/mobilenetv1_001,Fail
CompiledModels.Op/mobilenetv1_002,Fail
CompiledModels.Op/mobilenetv1_003,Fail
CompiledModels.Op/mobilenetv1_005,Fail
CompiledModels.Op/mobilenetv1_006,Fail
CompiledModels.Op/mobilenetv1_008,Fail
CompiledModels.Op/mobilenetv1_009,Fail
CompiledModels.Op/mobilenetv1_012,Fail
CompiledModels.Op/mobilenetv1_016,Fail
CompiledModels.Op/mobilenetv1_018,Fail
CompiledModels.Op/mobilenetv1_020,Fail
CompiledModels.Op/mobilenetv1_025,Fail
CompiledModels.Op/mobilenetv1_026,Fail
CompiledModels.Op/mobilenetv1_mobilenet_v1_1_224_quant,Fail
CompiledModels.Op/mobilenetv2_001,Fail
CompiledModels.Op/mobilenetv2_003,Fail
CompiledModels.Op/mobilenetv2_006,Fail
CompiledModels.Op/mobilenetv2_007,Fail
CompiledModels.Op/mobilenetv2_010,Fail
CompiledModels.Op/mobilenetv2_014,Fail
CompiledModels.Op/mobilenetv2_018,Fail
CompiledModels.Op/mobilenetv2_025,Fail
CompiledModels.Op/mobilenetv2_029,Fail
CompiledModels.Op/mobilenetv2_040,Fail
CompiledModels.Op/mobilenetv2_044,Fail
CompiledModels.Op/mobilenetv2_059,Fail
CompiledModels.Op/mobilenetv2_061,Fail
CompiledModels.Op/movenetlightning_103,Fail
CompiledModels.Op/movenetlightning_104,Fail
CompiledModels.Op/movenetthunder_103,Fail
CompiledModels.Op/movenetthunder_104,Fail
CompiledModels.Op/ssdmobilenetv2_001,Fail
CompiledModels.Op/ssdmobilenetv2_003,Fail
CompiledModels.Op/ssdmobilenetv2_007,Fail
CompiledModels.Op/ssdmobilenetv2_040,Fail
CompiledModels.Op/ssdmobilenetv2_047,Fail
CompiledModels.Op/ssdmobilenetv2_052,Fail
CompiledModels.Op/ssdmobilenetv2_054,Fail
CompiledModels.Op/ssdmobilenetv2_065,Fail
CompiledModels.Op/ssdmobilenetv2_069,Fail
CompiledModels.Op/ssdmobilenetv2_072,Fail
CompiledModels.Op/ssdmobilenetv2_073,Fail
CompiledModels.Op/ssdmobilenetv2_077,Fail
CompiledModels.Op/ssdmobilenetv2_081,Fail
CompiledModels.Op/ssdmobilenetv2_089,Fail
CompiledModels.Op/ssdmobilenetv2_096,Fail
CompiledModels.Op/ssdmobilenetv2_102,Fail
CompiledModels.Op/ssdmobilenetv2_ssd_mobilenet_v2_coco_quant_postprocess,Fail
CompiledModels.Op/yolox_001,Fail
CompiledModels.Op/yolox_004,Fail
CompiledModels.Op/yolox_027,Fail
CompiledModels.Op/yolox_042,Fail
CompiledModels.Op/yolox_077,Fail
CompiledModels.Op/yolox_086,Fail
CompiledModels.Op/yolox_102,Fail
CompiledModels.Op/yolox_112,Fail
CompiledModels.Op/yolox_122,Fail
CompiledModels.Op/yolox_132,Fail
CompiledModels.Op/yolox_147,Fail
CompiledModels.Op/yolox_yolox_nano_full_integer_quant,Fail

View file

@ -0,0 +1,227 @@
# Standard (un-compiled) tflite models are not supported on Neutron
Add.Op/.*
AddQuant.Op/.*
Conv2D.Op/.*
DepthwiseConv2D.Op/.*
FullyConnected.Op/.*
Models.Op/.*
# Compilation error due to duplicate/orphan input tensor
CompiledModels.Op/movenetlightning_117
CompiledModels.Op/movenetlightning_120
CompiledModels.Op/movenetthunder_117
CompiledModels.Op/movenetthunder_120
# Conversion rate zero (operators or operator + tensor type/size combo
# not supported yet by Neutron compiler)
CompiledModels.Op/efficientdet_066
CompiledModels.Op/efficientdet_101
CompiledModels.Op/efficientdet_136
CompiledModels.Op/efficientdet_251
CompiledModels.Op/efficientdet_252
CompiledModels.Op/efficientdet_253
CompiledModels.Op/efficientdet_254
CompiledModels.Op/efficientdet_255
CompiledModels.Op/efficientdet_256
CompiledModels.Op/efficientdet_257
CompiledModels.Op/efficientdet_258
CompiledModels.Op/efficientdet_259
CompiledModels.Op/efficientdet_260
CompiledModels.Op/efficientdet_261
CompiledModels.Op/efficientdet_262
CompiledModels.Op/efficientdet_263
CompiledModels.Op/efficientdet_264
CompiledModels.Op/efficientdet_265
CompiledModels.Op/inception_082
CompiledModels.Op/micronetlarge_013
CompiledModels.Op/mobiledet_097
CompiledModels.Op/mobiledet_099
CompiledModels.Op/mobiledet_101
CompiledModels.Op/mobiledet_103
CompiledModels.Op/mobiledet_105
CompiledModels.Op/mobiledet_107
CompiledModels.Op/mobiledet_109
CompiledModels.Op/mobiledet_111
CompiledModels.Op/mobiledet_113
CompiledModels.Op/mobiledet_115
CompiledModels.Op/mobiledet_117
CompiledModels.Op/mobiledet_118
CompiledModels.Op/mobiledet_122
CompiledModels.Op/mobiledet_123
CompiledModels.Op/mobilenetv1_030
CompiledModels.Op/movenetlightning_000
CompiledModels.Op/movenetlightning_001
CompiledModels.Op/movenetlightning_002
CompiledModels.Op/movenetlightning_003
CompiledModels.Op/movenetlightning_071
CompiledModels.Op/movenetlightning_075
CompiledModels.Op/movenetlightning_079
CompiledModels.Op/movenetlightning_087
CompiledModels.Op/movenetlightning_088
CompiledModels.Op/movenetlightning_089
CompiledModels.Op/movenetlightning_090
CompiledModels.Op/movenetlightning_091
CompiledModels.Op/movenetlightning_092
CompiledModels.Op/movenetlightning_093
CompiledModels.Op/movenetlightning_094
CompiledModels.Op/movenetlightning_095
CompiledModels.Op/movenetlightning_096
CompiledModels.Op/movenetlightning_097
CompiledModels.Op/movenetlightning_098
CompiledModels.Op/movenetlightning_099
CompiledModels.Op/movenetlightning_105
CompiledModels.Op/movenetlightning_112
CompiledModels.Op/movenetlightning_113
CompiledModels.Op/movenetlightning_114
CompiledModels.Op/movenetlightning_115
CompiledModels.Op/movenetlightning_116
CompiledModels.Op/movenetlightning_118
CompiledModels.Op/movenetlightning_119
CompiledModels.Op/movenetlightning_122
CompiledModels.Op/movenetlightning_123
CompiledModels.Op/movenetlightning_124
CompiledModels.Op/movenetlightning_125
CompiledModels.Op/movenetlightning_126
CompiledModels.Op/movenetlightning_127
CompiledModels.Op/movenetlightning_128
CompiledModels.Op/movenetlightning_129
CompiledModels.Op/movenetlightning_130
CompiledModels.Op/movenetlightning_131
CompiledModels.Op/movenetlightning_132
CompiledModels.Op/movenetlightning_133
CompiledModels.Op/movenetlightning_134
CompiledModels.Op/movenetlightning_135
CompiledModels.Op/movenetlightning_136
CompiledModels.Op/movenetlightning_137
CompiledModels.Op/movenetlightning_138
CompiledModels.Op/movenetlightning_139
CompiledModels.Op/movenetlightning_140
CompiledModels.Op/movenetlightning_141
CompiledModels.Op/movenetlightning_142
CompiledModels.Op/movenetlightning_143
CompiledModels.Op/movenetlightning_144
CompiledModels.Op/movenetlightning_145
CompiledModels.Op/movenetlightning_147
CompiledModels.Op/movenetlightning_149
CompiledModels.Op/movenetlightning_150
CompiledModels.Op/movenetlightning_151
CompiledModels.Op/movenetlightning_152
CompiledModels.Op/movenetlightning_153
CompiledModels.Op/movenetlightning_154
CompiledModels.Op/movenetlightning_155
CompiledModels.Op/movenetlightning_156
CompiledModels.Op/movenetthunder_000
CompiledModels.Op/movenetthunder_001
CompiledModels.Op/movenetthunder_002
CompiledModels.Op/movenetthunder_003
CompiledModels.Op/movenetthunder_071
CompiledModels.Op/movenetthunder_075
CompiledModels.Op/movenetthunder_079
CompiledModels.Op/movenetthunder_087
CompiledModels.Op/movenetthunder_088
CompiledModels.Op/movenetthunder_089
CompiledModels.Op/movenetthunder_090
CompiledModels.Op/movenetthunder_091
CompiledModels.Op/movenetthunder_092
CompiledModels.Op/movenetthunder_093
CompiledModels.Op/movenetthunder_094
CompiledModels.Op/movenetthunder_095
CompiledModels.Op/movenetthunder_096
CompiledModels.Op/movenetthunder_097
CompiledModels.Op/movenetthunder_098
CompiledModels.Op/movenetthunder_099
CompiledModels.Op/movenetthunder_105
CompiledModels.Op/movenetthunder_112
CompiledModels.Op/movenetthunder_113
CompiledModels.Op/movenetthunder_114
CompiledModels.Op/movenetthunder_115
CompiledModels.Op/movenetthunder_116
CompiledModels.Op/movenetthunder_118
CompiledModels.Op/movenetthunder_119
CompiledModels.Op/movenetthunder_122
CompiledModels.Op/movenetthunder_123
CompiledModels.Op/movenetthunder_124
CompiledModels.Op/movenetthunder_125
CompiledModels.Op/movenetthunder_126
CompiledModels.Op/movenetthunder_127
CompiledModels.Op/movenetthunder_128
CompiledModels.Op/movenetthunder_129
CompiledModels.Op/movenetthunder_130
CompiledModels.Op/movenetthunder_131
CompiledModels.Op/movenetthunder_132
CompiledModels.Op/movenetthunder_133
CompiledModels.Op/movenetthunder_134
CompiledModels.Op/movenetthunder_135
CompiledModels.Op/movenetthunder_136
CompiledModels.Op/movenetthunder_137
CompiledModels.Op/movenetthunder_138
CompiledModels.Op/movenetthunder_139
CompiledModels.Op/movenetthunder_140
CompiledModels.Op/movenetthunder_141
CompiledModels.Op/movenetthunder_142
CompiledModels.Op/movenetthunder_143
CompiledModels.Op/movenetthunder_144
CompiledModels.Op/movenetthunder_145
CompiledModels.Op/movenetthunder_147
CompiledModels.Op/movenetthunder_149
CompiledModels.Op/movenetthunder_150
CompiledModels.Op/movenetthunder_151
CompiledModels.Op/movenetthunder_152
CompiledModels.Op/movenetthunder_153
CompiledModels.Op/movenetthunder_154
CompiledModels.Op/movenetthunder_155
CompiledModels.Op/movenetthunder_156
CompiledModels.Op/ssdmobilenetv2_049
CompiledModels.Op/ssdmobilenetv2_051
CompiledModels.Op/ssdmobilenetv2_067
CompiledModels.Op/ssdmobilenetv2_068
CompiledModels.Op/ssdmobilenetv2_070
CompiledModels.Op/ssdmobilenetv2_075
CompiledModels.Op/ssdmobilenetv2_076
CompiledModels.Op/ssdmobilenetv2_078
CompiledModels.Op/ssdmobilenetv2_083
CompiledModels.Op/ssdmobilenetv2_084
CompiledModels.Op/ssdmobilenetv2_086
CompiledModels.Op/ssdmobilenetv2_091
CompiledModels.Op/ssdmobilenetv2_092
CompiledModels.Op/ssdmobilenetv2_094
CompiledModels.Op/ssdmobilenetv2_099
CompiledModels.Op/ssdmobilenetv2_100
CompiledModels.Op/ssdmobilenetv2_101
CompiledModels.Op/ssdmobilenetv2_107
CompiledModels.Op/ssdmobilenetv2_108
CompiledModels.Op/ssdmobilenetv2_109
CompiledModels.Op/yolox_000
CompiledModels.Op/yolox_003
CompiledModels.Op/yolox_012
CompiledModels.Op/yolox_103
CompiledModels.Op/yolox_104
CompiledModels.Op/yolox_113
CompiledModels.Op/yolox_114
CompiledModels.Op/yolox_123
CompiledModels.Op/yolox_124
CompiledModels.Op/yolox_125
CompiledModels.Op/yolox_126
CompiledModels.Op/yolox_127
CompiledModels.Op/yolox_128
CompiledModels.Op/yolox_129
CompiledModels.Op/yolox_130
CompiledModels.Op/yolox_131
CompiledModels.Op/yolox_133
CompiledModels.Op/yolox_134
CompiledModels.Op/yolox_135
CompiledModels.Op/yolox_136
CompiledModels.Op/yolox_137
CompiledModels.Op/yolox_139
CompiledModels.Op/yolox_140
CompiledModels.Op/yolox_141
CompiledModels.Op/yolox_142
CompiledModels.Op/yolox_143
CompiledModels.Op/yolox_145
CompiledModels.Op/yolox_146
CompiledModels.Op/yolox_148
CompiledModels.Op/yolox_149
CompiledModels.Op/yolox_151
CompiledModels.Op/yolox_152
CompiledModels.Op/yolox_153

View file

@ -0,0 +1,58 @@
# Copyright 2026 NXP
# SPDX-License-Identifier: MIT
"""
Compile a TFLite model for the Neutron NPU.
Usage: python compile_model.py <input.tflite> <output.tflite>
Exit codes:
0: Success
1: Converter internal error
2: Zero conversion rate (model unchanged)
3: Invalid arguments or input file
"""
import sys
import os
from eiq_neutron_sdk import neutron_converter # from https://eiq.nxp.com/repository
# Only one platform supported for now
TARGET = "imx95"
def main():
if len(sys.argv) != 3:
print("Usage: python compile_model.py <input.tflite> <output.tflite>")
sys.exit(3)
input_file = sys.argv[1]
output_file = sys.argv[2]
if not os.path.isfile(input_file):
print(f"Error: Input file '{input_file}' does not exist")
sys.exit(3)
if not input_file.endswith('.tflite'):
print("Error: Input file must be a .tflite model")
sys.exit(3)
with open(input_file, 'rb') as f:
model_data = f.read()
try:
cmodel_data = neutron_converter.convertModel(list(model_data), TARGET)
except Exception as e:
print(f"Error during model conversion: {e}")
sys.exit(1)
if bytes(cmodel_data) == model_data:
print(f"Warning: Conversion rate was zero for {input_file}, skipping output")
sys.exit(2)
with open(output_file, 'wb') as f:
f.write(bytes(cmodel_data))
if __name__ == "__main__":
main()

View file

@ -0,0 +1,20 @@
# Copyright 2019 Google, Inc
# SPDX-License-Identifier: MIT
files_neutron = files(
'neutron_device.c',
'neutron_ml.c',
)
libneutron = static_library(
'neutron',
files_neutron,
include_directories : [inc_gallium_aux, inc_gallium, inc_include, inc_src],
gnu_symbol_visibility : 'hidden',
dependencies : [idep_mesautil, dep_libdrm],
)
driver_neutron = declare_dependency(
compile_args : '-DGALLIUM_NEUTRON',
link_with : [libneutronwinsys, libneutron]
)

View file

@ -0,0 +1,219 @@
# Copyright 2026 NXP
# SPDX-License-Identifier: MIT
"""
Batch compiler of TFLite models for NXP's Neutron NPU
Usage modes (mutually exclusive):
neutron_compiler.py --in <input.tflite> --out <output.tflite>
neutron_compiler.py --in-dir <input_dir> --out-dir <output_dir>
neutron_compiler.py --teflon-test-models
"""
import sys
import os
import argparse
import subprocess
def parse_args(argv):
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
mode_group = parser.add_mutually_exclusive_group(required=True)
mode_group.add_argument('--in',
dest='input_file',
action='store',
help='Input TFLite model file')
mode_group.add_argument('--in-dir',
dest='input_dir',
action='store',
help='Input directory containing TFLite models to be compiled')
mode_group.add_argument('--teflon-test-models',
dest='teflon_test_models',
action='store_true',
help='Use ${TEFLON_TEST_DATA}/models as input directory and ${TEFLON_TEST_DATA}/compiled_models as output')
parser.add_argument('--out',
dest='output_file',
action='store',
help='Output TFLite model file (required with --in)')
parser.add_argument('--out-dir',
dest='output_dir',
action='store',
help='Output directory for compiled models (required with --in-dir). Output files will have the same name as input files')
parser.add_argument('--force',
action='store_true',
required=False,
help='Overwrite output files if they exist. Ignored for single file mode')
parser.add_argument('--show-details',
action='store_true',
required=False,
help='Print compilation result for each model. Ignored for single file mode')
parser.add_argument('--log-to-file',
action='store_true',
required=False,
help='Generate log file for each compile attempt. Log is located next to output file')
args = parser.parse_args()
if args.input_file:
if not args.output_file:
parser.error("--out is required when using --in")
if not os.path.isfile(args.input_file):
parser.error("--in must be an existing file")
if args.input_dir:
if not args.output_dir:
parser.error("--out-dir is required when using --in-dir")
if not os.path.isdir(args.input_dir):
parser.error("--in-dir must be an existing directory")
if args.teflon_test_models and not os.environ.get('TEFLON_TEST_DATA'):
parser.error("--teflon-test-models requires TEFLON_TEST_DATA to be set")
return args
def compile(input_file, output_file):
script_dir = os.path.dirname(os.path.abspath(__file__))
worker_script = os.path.join(script_dir, "compile_model.py")
try:
result = subprocess.run(
[sys.executable, worker_script, input_file, output_file],
capture_output=True,
text=True,
timeout=60,
)
except Exception as e:
print(f"Error compiling {input_file}: {e}", file=sys.stderr)
return None
return result
def write_log(output_file, result):
model_name_we = os.path.splitext(os.path.basename(output_file))[0]
output_dir = os.path.dirname(output_file)
log_file = os.path.join(output_dir, f"{model_name_we}_compiler_output.log")
with open(log_file, 'w') as f:
if result.stdout:
f.write(result.stdout.strip())
if result.stderr:
f.write('\n')
f.write(result.stderr.strip())
def print_result(input_file, result):
if result.returncode == 0:
status = "SUCCESS"
elif result.returncode == 1:
status = "COMPILER ERROR"
elif result.returncode == 2:
status = "ZERO CONVERSION RATE"
elif result.returncode == 3:
status = "SCRIPT ERROR"
else:
status = f"UNKNOWN({returncode})"
print(f" {input_file}: {status}")
if result.returncode != 0 and result.stderr:
print(result.stderr.strip())
def print_stats(total, ok, error, zero, skip):
print("\n" + "=" * 50)
print(f"Total models processed: {total}")
print(f" Successful: {ok}")
print(f" Errors: {error}")
print(f" Zero conversion rate: {zero}")
print(f" Skipped: {skip}")
print("=" * 50)
def find_tflite_files(directory):
tflite_files = []
for root, _, files in os.walk(directory):
for file in files:
if file.endswith('.tflite'):
tflite_files.append(os.path.join(root, file))
return sorted(tflite_files)
def process_model(input_file, output_file, log_to_file, show_details):
output_dir = os.path.dirname(os.path.abspath(output_file))
os.makedirs(output_dir, exist_ok=True)
result = compile(input_file, output_file)
if not result:
return 3
if log_to_file:
write_log(output_file, result)
if show_details:
print_result(input_file, result)
return result.returncode
def process_directory(input_dir, output_dir, force, log_to_file, show_details):
total = 0
ok = 0
errors = 0
zero = 0
skip = 0
tflite_files = find_tflite_files(input_dir)
for input_file in tflite_files:
total += 1
rel_path = os.path.relpath(input_file, input_dir)
output_file = os.path.join(output_dir, rel_path)
if os.path.exists(output_file) and not force:
if show_details:
print(f" {input_file}: SKIPPED")
skip += 1
continue
ret_code = process_model(input_file, output_file, log_to_file, show_details)
if ret_code == 0:
ok += 1
elif ret_code == 2:
zero += 1
else:
errors += 1
print_stats(total, ok, errors, zero, skip)
def main(argv):
args = parse_args(argv)
if args.input_file:
process_model(
args.input_file,
args.output_file,
args.log_to_file,
True)
elif args.input_dir:
process_directory(
args.input_dir,
args.output_dir,
args.force,
args.log_to_file,
args.show_details
)
elif args.teflon_test_models:
teflon_test_data = os.environ.get('TEFLON_TEST_DATA')
input_dir = os.path.join(teflon_test_data, 'models')
output_dir = os.path.join(teflon_test_data, 'compiled_models')
print(f"Compiling Teflon test models:")
print(f" Input directory: {input_dir}")
print(f" Output directory: {output_dir}")
process_directory(
input_dir,
output_dir,
args.force,
args.log_to_file,
args.show_details
)
if __name__ == "__main__":
main(sys.argv[1:])

View file

@ -0,0 +1,257 @@
/*
* Copyright 2026 NXP
* SPDX-License-Identifier: MIT
*/
#include "util/os_mman.h"
#include "util/u_inlines.h"
#include "util/u_surface.h"
#include "util/u_transfer.h"
#include <xf86drm.h>
#include "drm-uapi/neutron_accel.h"
#include "neutron_device.h"
#include "neutron_ml.h"
static const struct debug_named_value neutron_debug_options[] = {
{"dbg_msgs", NEUTRON_DBG_MSGS, "Print debug messages"},
{"dump_bos", NEUTRON_DBG_DUMP_BOS, "Dump buffers for analysis"},
{"zero_bos", NEUTRON_DBG_ZERO, "Zero buffers for debugging"},
DEBUG_NAMED_VALUE_END};
DEBUG_GET_ONCE_FLAGS_OPTION(neutron_debug, "NEUTRON_DEBUG",
neutron_debug_options, 0)
int neutron_debug = 0;
static void
neutron_screen_destroy(struct pipe_screen *pscreen)
{
struct neutron_screen *screen = neutron_screen(pscreen);
ralloc_free(screen);
}
static void
neutron_context_destroy(struct pipe_context *pctx)
{
struct neutron_context *ctx = neutron_context(pctx);
struct neutron_screen *screen = neutron_screen(pctx->screen);
drmSyncobjDestroy(screen->fd, ctx->syncobj_handle);
ralloc_free(ctx);
}
static void *
neutron_buffer_map(struct pipe_context *pctx,
struct pipe_resource *prsc, unsigned level,
unsigned usage, const struct pipe_box *box,
struct pipe_transfer **out_transfer)
{
struct neutron_screen *screen = neutron_screen(pctx->screen);
struct neutron_resource *rsc = neutron_resource(prsc);
struct drm_neutron_sync_bo arg = {0};
int ret;
assert(level == 0);
assert(prsc->target == PIPE_BUFFER);
assert(box->y == 0);
assert(box->z == 0);
assert(box->height == 1);
assert(box->depth == 1);
struct pipe_transfer *transfer = rzalloc(NULL, struct pipe_transfer);
transfer->level = level;
transfer->usage = usage;
transfer->box = *box;
pipe_resource_reference(&transfer->resource, prsc);
if (transfer->usage & PIPE_MAP_READ) {
arg.handle = rsc->handle;
arg.direction = DRM_NEUTRON_SYNC_FROM_DEVICE;
arg.size = box->width;
arg.offset = box->x;
ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_SYNC_BO, &arg);
if (ret < 0)
goto free_transfer;
}
*out_transfer = transfer;
return (uint8_t *)rsc->map + box->x;
free_transfer:
pipe_resource_reference(&transfer->resource, NULL);
ralloc_free(transfer);
return NULL;
}
static void
neutron_buffer_unmap(struct pipe_context *pctx,
struct pipe_transfer *transfer)
{
struct neutron_resource *rsrc = neutron_resource(transfer->resource);
struct neutron_screen *screen = neutron_screen(pctx->screen);
struct drm_neutron_sync_bo arg = {0};
int ret;
if (transfer->usage & PIPE_MAP_WRITE) {
arg.handle = rsrc->handle;
arg.direction = DRM_NEUTRON_SYNC_TO_DEVICE;
arg.size = transfer->box.width;
arg.offset = transfer->box.x;
ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_SYNC_BO, &arg);
assert(ret >= 0);
}
pipe_resource_reference(&transfer->resource, NULL);
ralloc_free(transfer);
}
static struct pipe_context *
neutron_context_create(struct pipe_screen *pscreen,
void *priv, unsigned flags)
{
struct neutron_context *ctx = rzalloc(NULL, struct neutron_context);
struct neutron_screen *screen = neutron_screen(pscreen);
struct pipe_context *pctx = &ctx->base;
int ret;
if (!ctx)
return NULL;
ret = drmSyncobjCreate(screen->fd, 0, &ctx->syncobj_handle);
if (ret)
return NULL;
pctx->screen = pscreen;
pctx->priv = priv;
pctx->destroy = neutron_context_destroy;
pctx->buffer_map = neutron_buffer_map;
pctx->buffer_unmap = neutron_buffer_unmap;
pctx->resource_copy_region = util_resource_copy_region;
pctx->buffer_subdata = u_default_buffer_subdata;
pctx->clear_buffer = u_default_clear_buffer;
pctx->ml_subgraph_invoke = neutron_ml_subgraph_invoke;
pctx->ml_subgraph_read_output = neutron_ml_subgraph_read_outputs;
return pctx;
}
static struct pipe_resource *
neutron_resource_create(struct pipe_screen *pscreen,
const struct pipe_resource *template)
{
struct neutron_screen *screen = neutron_screen(pscreen);
struct drm_neutron_create_bo arg = {0};
struct neutron_resource *rsc;
int ret;
assert(template->target == PIPE_BUFFER);
assert(template->height0 == 1);
assert(template->depth0 == 1);
assert(template->array_size == 1);
rsc = rzalloc(NULL, struct neutron_resource);
if (!rsc)
return NULL;
rsc->base = *template;
rsc->base.screen = pscreen;
rsc->base.nr_samples = template->nr_samples;
pipe_reference_init(&rsc->base.reference, 1);
arg.size = template->width0;
ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_CREATE_BO, &arg);
if (ret < 0)
goto free_rsc;
rsc->handle = arg.handle;
rsc->size = arg.size;
rsc->map = os_mmap(NULL, rsc->size, PROT_READ | PROT_WRITE, MAP_SHARED,
screen->fd, arg.map_offset);
if (rsc->map == MAP_FAILED)
goto close_bo;
return &rsc->base;
close_bo : {
struct drm_gem_close close_arg = {.handle = rsc->handle};
drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &close_arg);
}
free_rsc:
ralloc_free(rsc);
return NULL;
}
static void
neutron_resource_destroy(struct pipe_screen *pscreen,
struct pipe_resource *prsc)
{
struct neutron_resource *rsc = neutron_resource(prsc);
struct neutron_screen *screen = neutron_screen(pscreen);
struct drm_gem_close arg = {0};
int ret;
os_munmap(rsc->map, rsc->size);
arg.handle = rsc->handle;
ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg);
assert(ret >= 0);
ralloc_free(rsc);
}
static int
neutron_screen_get_fd(struct pipe_screen *pscreen)
{
return neutron_screen(pscreen)->fd;
}
static struct pipe_ml_device *
neutron_get_ml_device(struct pipe_screen *pscreen)
{
return &neutron_screen(pscreen)->ml_device.base;
}
struct pipe_screen *
neutron_screen_create(int fd,
const struct pipe_screen_config *config,
struct renderonly *ro)
{
struct neutron_screen *neutron_screen;
struct pipe_screen *screen;
neutron_screen = rzalloc(NULL, struct neutron_screen);
if (!neutron_screen)
return NULL;
neutron_debug = debug_get_option_neutron_debug();
screen = &neutron_screen->pscreen;
neutron_screen->fd = fd;
screen->get_screen_fd = neutron_screen_get_fd;
screen->destroy = neutron_screen_destroy;
screen->context_create = neutron_context_create;
screen->resource_create = neutron_resource_create;
screen->resource_destroy = neutron_resource_destroy;
neutron_screen->ml_device.base.ml_operation_supported = neutron_ml_operation_supported;
neutron_screen->ml_device.base.ml_subgraph_create = neutron_ml_subgraph_create;
neutron_screen->ml_device.base.ml_subgraph_destroy = neutron_ml_subgraph_destroy;
screen->get_ml_device = neutron_get_ml_device;
return screen;
}

View file

@ -0,0 +1,90 @@
/*
* Copyright 2026 NXP
* SPDX-License-Identifier: MIT
*/
#include "pipe/p_context.h"
#include "pipe/p_screen.h"
#include "pipe/p_state.h"
#include "renderonly/renderonly.h"
#include "util/log.h"
#include "util/macros.h"
#ifndef NEUTRON_DEVICE_H
#define NEUTRON_DEVICE_H
enum neutron_dbg {
NEUTRON_DBG_MSGS = BITFIELD_BIT(0),
NEUTRON_DBG_DUMP_BOS = BITFIELD_BIT(1),
NEUTRON_DBG_ZERO = BITFIELD_BIT(2),
};
#define DBG_ENABLED(flag) unlikely(neutron_debug &(flag))
extern int neutron_debug;
#define DBG(fmt, ...) \
do { \
if (DBG_ENABLED(NEUTRON_DBG_MSGS)) \
mesa_logd(fmt, ##__VA_ARGS__); \
} while (0)
struct neutron_ml_device {
struct pipe_ml_device base;
struct pipe_context *context;
};
static inline struct neutron_ml_device *
neutron_ml_device(struct pipe_ml_device *dev)
{
return (struct neutron_ml_device *)dev;
}
struct neutron_screen {
struct pipe_screen pscreen;
struct neutron_ml_device ml_device;
int fd;
};
static inline struct neutron_screen *
neutron_screen(struct pipe_screen *p)
{
return (struct neutron_screen *)p;
}
static inline struct neutron_screen *
neutron_ml_device_screen(struct pipe_ml_device *pdevice)
{
struct neutron_ml_device *dev = neutron_ml_device(pdevice);
return container_of(dev, struct neutron_screen, ml_device);
}
struct neutron_context {
struct pipe_context base;
uint32_t syncobj_handle;
};
static inline struct neutron_context *
neutron_context(struct pipe_context *pctx)
{
return (struct neutron_context *)pctx;
}
struct neutron_resource {
struct pipe_resource base;
uint32_t handle;
uint64_t size;
void *map;
};
static inline struct neutron_resource *
neutron_resource(struct pipe_resource *p)
{
return (struct neutron_resource *)p;
}
struct pipe_screen *
neutron_screen_create(int fd,
const struct pipe_screen_config *config,
struct renderonly *ro);
#endif /* NEUTRON_DEVICE_H */

View file

@ -0,0 +1,382 @@
/*
* Copyright 2026 NXP
* SPDX-License-Identifier: MIT
*/
#include "pipe/p_state.h"
#include "util/macros.h"
#include "util/u_dynarray.h"
#include "util/u_inlines.h"
#include <xf86drm.h>
#include "drm-uapi/neutron_accel.h"
#include "neutron_ml.h"
static const char *
neutron_tensor_name(enum neutron_tensor_type type)
{
switch (type) {
case NEUTRON_WEIGHTS:
return "NeutronWeights";
case NEUTRON_MICROCODE:
return "NeutronMicrocode";
case NEUTRON_KERNELS:
return "NeutronKernels";
case NEUTRON_SCRATCH:
return "NeutronScratch";
case NEUTRON_PROFILE:
return "NeutronProfile";
case NEUTRON_DEBUG:
return "NeutronDebug";
case NEUTRON_DATA_INPUT:
return "Input";
case NEUTRON_DATA_OUTPUT:
return "Output";
default:
return "N/A";
}
}
static void
neutron_dump_buffer(const uint8_t *ptr, const char *name,
int id, int offset, unsigned size)
{
char buffer[255];
snprintf(buffer, sizeof(buffer), "mesa-%s-%03u.bin", name, id);
FILE *f = fopen(buffer, "wb");
assert(f);
fwrite(ptr + offset, 1, size, f);
if (ferror(f)) {
DBG("Error in writing to file: %s\n", strerror(errno));
}
fflush(f);
fclose(f);
}
static void
neutron_dump_tensor(struct pipe_resource *bo,
struct neutron_tensor *ntensor, int id)
{
uint8_t *buf = neutron_resource(bo)->map;
neutron_dump_buffer(buf, neutron_tensor_name(ntensor->type), id,
ntensor->offset, ntensor->size);
}
bool
neutron_ml_operation_supported(struct pipe_ml_device *pdevice,
const struct pipe_ml_operation *operation)
{
if (operation->type != PIPE_ML_OPERATION_TYPE_CUSTOM)
return false;
if (!operation->custom.name ||
strcmp(operation->custom.name, "NeutronGraph") != 0)
return false;
if (operation->input_count <= NEUTRON_CUSTOM_INPUT_MAX)
return false;
return true;
}
static unsigned
get_tensor_size(const struct pipe_tensor *tensor)
{
unsigned size = 1;
for (unsigned i = 0; i < 4; i++)
size *= tensor->dims[i];
return size;
}
static enum neutron_tensor_type
get_tensor_type(struct pipe_tensor *tensor, bool is_input)
{
for (unsigned i = 0; i < NEUTRON_CUSTOM_MAX; i++) {
if (strcmp(tensor->name, neutron_tensor_name(i)) == 0)
return i;
}
return is_input ? NEUTRON_DATA_INPUT : NEUTRON_DATA_OUTPUT;
}
static bool
is_custom_input(struct neutron_tensor *ntensor)
{
return ntensor->type < NEUTRON_CUSTOM_INPUT_MAX;
}
static void
create_tensor(struct neutron_subgraph *subgraph,
struct pipe_tensor *tensor, bool is_input)
{
struct neutron_tensor ntensor = {0};
ntensor.type = get_tensor_type(tensor, is_input);
ntensor.size = get_tensor_size(tensor);
ntensor.tensor = tensor;
if (ntensor.type == NEUTRON_DATA_INPUT)
ntensor.id = subgraph->data_inputs++;
else if (ntensor.type == NEUTRON_DATA_OUTPUT)
ntensor.id = subgraph->data_outputs++;
util_dynarray_append(&subgraph->tensors, ntensor);
}
static unsigned
get_header_array_size(struct neutron_subgraph *subgraph)
{
unsigned num_tensors =
util_dynarray_num_elements(&subgraph->tensors, struct neutron_tensor);
return num_tensors + 4;
}
static unsigned
get_buffer_size(struct neutron_subgraph *subgraph)
{
unsigned size = 0;
util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor)
size += NEUTRON_ALIGN(ntensor->size);
size += subgraph->header.size;
return size;
}
static int
get_header_index(struct neutron_subgraph *subgraph,
struct neutron_tensor *ntensor)
{
switch (ntensor->type) {
case NEUTRON_DATA_INPUT:
return ntensor->id;
case NEUTRON_WEIGHTS:
return subgraph->data_inputs;
case NEUTRON_DATA_OUTPUT:
return subgraph->data_inputs + 1 + ntensor->id;
case NEUTRON_KERNELS:
return subgraph->data_inputs + subgraph->data_outputs + 1;
case NEUTRON_SCRATCH:
return subgraph->data_inputs + subgraph->data_outputs + 2;
case NEUTRON_PROFILE:
/* 4 entries reserved */
return subgraph->data_inputs + subgraph->data_outputs + 7;
case NEUTRON_DEBUG:
return subgraph->data_inputs + subgraph->data_outputs + 8;
default:
return -1;
}
}
struct pipe_ml_subgraph *
neutron_ml_subgraph_create(struct pipe_ml_device *pdevice,
const struct pipe_ml_operation *poperations,
unsigned count)
{
struct neutron_screen *screen = neutron_ml_device_screen(pdevice);
struct neutron_ml_device *dev = neutron_ml_device(pdevice);
const struct pipe_ml_operation *operation = &poperations[0];
struct neutron_subgraph *subgraph;
uint32_t *offset_array;
unsigned crt_offset = 0;
if (count > 1) {
DBG("Expect a single NeutronGraph per partition!\n");
return NULL;
}
if (!dev->context)
dev->context = screen->pscreen.context_create(&screen->pscreen, NULL, 0);
subgraph = calloc(1, sizeof(*subgraph));
subgraph->base.device = pdevice;
subgraph->tensors = UTIL_DYNARRAY_INIT;
subgraph->header = UTIL_DYNARRAY_INIT;
for (unsigned i = 0; i < operation->input_count; i++)
create_tensor(subgraph, operation->input_tensors[i], true);
for (unsigned i = 0; i < operation->output_count; i++)
create_tensor(subgraph, operation->output_tensors[i], false);
if (!util_dynarray_resize(&subgraph->header, uint32_t,
get_header_array_size(subgraph)))
return NULL;
memset(util_dynarray_begin(&subgraph->header), 0, subgraph->header.size);
subgraph->bo = pipe_buffer_create(dev->context->screen, 0,
PIPE_USAGE_DEFAULT,
get_buffer_size(subgraph));
if (!subgraph->bo)
return NULL;
if (DBG_ENABLED(NEUTRON_DBG_ZERO)) {
struct pipe_transfer *transfer;
struct neutron_resource *rsc = neutron_resource(subgraph->bo);
uint8_t *buf = pipe_buffer_map(dev->context, subgraph->bo,
PIPE_MAP_WRITE, &transfer);
memset(buf, 0, pipe_buffer_size(subgraph->bo));
pipe_buffer_unmap(dev->context, transfer);
}
DBG("%s: Tensor list:\n", __func__);
DBG("%16s %3s %6s %8s %8s %8s\n", "TYPE", "ID", "INDEX", "OFFSET", "SIZE", "HDR_IDX");
offset_array = util_dynarray_begin(&subgraph->header);
crt_offset = NEUTRON_ALIGN(subgraph->header.size);
/* Translate all tensors into buffer sections */
util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) {
unsigned header_index = get_header_index(subgraph, ntensor);
/* Microcode offset passed via ioctl, all others stored in header */
if (ntensor->type == NEUTRON_MICROCODE)
subgraph->microcode_offset = crt_offset;
else
offset_array[header_index] = crt_offset;
ntensor->offset = crt_offset;
crt_offset += NEUTRON_ALIGN(ntensor->size);
/* Custom inputs are written in the buffer during setup */
if (is_custom_input(ntensor)) {
pipe_buffer_write(dev->context, subgraph->bo, ntensor->offset,
ntensor->size, ntensor->tensor->data);
if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS))
neutron_dump_tensor(subgraph->bo, ntensor, 0);
}
DBG("%16s %3d %6d %8d %8d %2d\n", neutron_tensor_name(ntensor->type),
ntensor->id, ntensor->tensor->index, ntensor->offset, ntensor->size,
header_index);
}
pipe_buffer_write(dev->context, subgraph->bo, 0, subgraph->header.size,
util_dynarray_begin(&subgraph->header));
if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS)) {
uint8_t *buf = neutron_resource(subgraph->bo)->map;
neutron_dump_buffer(buf, "header", 0, 0, subgraph->header.size);
}
return &subgraph->base;
}
static struct neutron_tensor *
get_tensor_by_id(struct neutron_subgraph *subgraph,
enum neutron_tensor_type type, unsigned index)
{
util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) {
if (ntensor->type == type && ntensor->tensor->index == index)
return ntensor;
}
return NULL;
}
void
neutron_ml_subgraph_invoke(struct pipe_context *pcontext,
struct pipe_ml_subgraph *psubgraph,
unsigned inputs_count, unsigned input_idxs[],
void *inputs[], bool is_signed[])
{
struct neutron_screen *screen = neutron_screen(pcontext->screen);
struct neutron_context *context = neutron_context(pcontext);
struct neutron_subgraph *subgraph = (struct neutron_subgraph *)(psubgraph);
struct drm_neutron_submit_job submit = {0};
int ret;
for (unsigned i = 0; i < inputs_count; i++) {
struct neutron_tensor *ntensor =
get_tensor_by_id(subgraph, NEUTRON_DATA_INPUT, input_idxs[i]);
DBG("Prepare input tensor: idx=%d, offset=%d, size=%d\n",
input_idxs[i], ntensor->offset, ntensor->size);
pipe_buffer_write(pcontext, subgraph->bo, ntensor->offset,
ntensor->size, inputs[i]);
if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS))
neutron_dump_tensor(subgraph->bo, ntensor, i);
}
/* Transfer ownership of output sections to device */
util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) {
if (ntensor->type == NEUTRON_DATA_OUTPUT) {
struct pipe_transfer *transfer;
pipe_buffer_map_range(pcontext, subgraph->bo, ntensor->offset,
ntensor->size, PIPE_MAP_WRITE, &transfer);
pipe_buffer_unmap(pcontext, transfer);
}
}
submit.type = DRM_NEUTRON_JOB_INFERENCE;
submit.bo_handle = neutron_resource(subgraph->bo)->handle;
submit.syncobj_handle = context->syncobj_handle;
submit.inference.microcode_offset = subgraph->microcode_offset;
submit.inference.tensor_offset = 0;
submit.inference.tensor_count =
util_dynarray_num_elements(&subgraph->header, uint32_t);
DBG("Submitting job\n");
ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_SUBMIT_JOB, &submit);
assert(ret == 0);
}
void
neutron_ml_subgraph_read_outputs(struct pipe_context *pcontext,
struct pipe_ml_subgraph *psubgraph,
unsigned outputs_count,
unsigned output_idxs[], void *outputsv[],
bool is_signed[])
{
struct neutron_subgraph *subgraph = (struct neutron_subgraph *)(psubgraph);
struct neutron_context *context = neutron_context(pcontext);
struct neutron_screen *screen = neutron_screen(pcontext->screen);
int ret;
ret = drmSyncobjWait(screen->fd, &context->syncobj_handle, 1, INT64_MAX,
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, NULL);
assert(ret == 0);
DBG("Job finished\n");
for (unsigned i = 0; i < outputs_count; i++) {
struct neutron_tensor *ntensor =
get_tensor_by_id(subgraph, NEUTRON_DATA_OUTPUT, output_idxs[i]);
DBG("Read output tensor: idx=%d, offset=%d, size=%d\n",
output_idxs[i], ntensor->offset, ntensor->size);
pipe_buffer_read(pcontext, subgraph->bo, ntensor->offset,
ntensor->size, outputsv[i]);
if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS))
neutron_dump_tensor(subgraph->bo, ntensor, i);
}
}
void
neutron_ml_subgraph_destroy(struct pipe_ml_device *pdevice,
struct pipe_ml_subgraph *psubgraph)
{
struct pipe_context *pcontext = neutron_ml_device(pdevice)->context;
struct neutron_screen *screen = neutron_screen(pcontext->screen);
struct neutron_subgraph *subgraph = (struct neutron_subgraph *)(psubgraph);
struct drm_gem_close arg = {0};
int ret;
arg.handle = neutron_resource(subgraph->bo)->handle;
ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg);
assert(ret >= 0);
pipe_resource_reference(&subgraph->bo, NULL);
util_dynarray_fini(&subgraph->tensors);
util_dynarray_fini(&subgraph->header);
free(subgraph);
}

View file

@ -0,0 +1,73 @@
/*
* Copyright 2026 NXP
* SPDX-License-Identifier: MIT
*/
#ifndef NEUTRON_ML_H
#define NEUTRON_ML_H
#include <util/u_dynarray.h>
#include <util/u_math.h>
#include "neutron_device.h"
#define NEUTRON_ALIGN(value) align(value, 64)
enum neutron_tensor_type {
NEUTRON_WEIGHTS = 0,
NEUTRON_MICROCODE,
NEUTRON_KERNELS,
NEUTRON_CUSTOM_INPUT_MAX,
NEUTRON_SCRATCH = NEUTRON_CUSTOM_INPUT_MAX,
NEUTRON_PROFILE,
NEUTRON_DEBUG,
NEUTRON_CUSTOM_MAX,
NEUTRON_DATA_INPUT = NEUTRON_CUSTOM_MAX,
NEUTRON_DATA_OUTPUT,
};
struct neutron_tensor {
struct pipe_tensor *tensor;
enum neutron_tensor_type type;
unsigned size;
unsigned offset;
unsigned id; /* For NEUTRON_DATA_INPUT and NEUTRON_DATA_OUTPUT */
};
struct neutron_subgraph {
struct pipe_ml_subgraph base;
struct pipe_resource *bo;
struct util_dynarray tensors; /* struct neutron_tensor */
struct util_dynarray header;
unsigned microcode_offset;
unsigned data_inputs; /* number of non-custom input tensors */
unsigned data_outputs; /* number of non-custom output tensors */
};
bool
neutron_ml_operation_supported(struct pipe_ml_device *pdevice,
const struct pipe_ml_operation *operation);
struct pipe_ml_subgraph *
neutron_ml_subgraph_create(struct pipe_ml_device *pdevice,
const struct pipe_ml_operation *poperations,
unsigned count);
void
neutron_ml_subgraph_invoke(struct pipe_context *pcontext,
struct pipe_ml_subgraph *psubgraph,
unsigned inputs_count, unsigned input_idxs[],
void *inputs[], bool is_signed[]);
void
neutron_ml_subgraph_read_outputs(struct pipe_context *pcontext,
struct pipe_ml_subgraph *psubgraph,
unsigned outputs_count,
unsigned output_idxs[], void *outputs[],
bool is_signed[]);
void
neutron_ml_subgraph_destroy(struct pipe_ml_device *pdevice,
struct pipe_ml_subgraph *psubgraph);
#endif /* NEUTRON_ML_H */

View file

@ -304,6 +304,11 @@ fill_operation(struct teflon_delegate *delegate, TfLiteContext *tf_context, TfLi
operation->type = PIPE_ML_OPERATION_TYPE_QUANTIZE;
break;
}
case kTfLiteBuiltinCustom: {
operation->type = PIPE_ML_OPERATION_TYPE_CUSTOM;
operation->custom.name = node_registration->custom_name ? strdup(node_registration->custom_name) : NULL;
break;
}
default:
return false;
}
@ -403,6 +408,8 @@ fill_tensor(struct teflon_delegate *delegate, TfLiteContext *tf_context, struct
tensor->dims[out_dim] = 1;
}
tensor->name = strdup(tf_tensor.name);
if (tf_tensor.quantization.type == kTfLiteAffineQuantization) {
const TfLiteAffineQuantization *quant = (const TfLiteAffineQuantization *)tf_tensor.quantization.params;
tensor->scale = quant->scale->data[0];
@ -536,6 +543,9 @@ dump_graph(struct pipe_tensor *tensors, unsigned tensor_count, struct pipe_ml_op
case PIPE_ML_OPERATION_TYPE_LEAKY_RELU:
teflon_debug("%-15s ", "LEAKY_RELU");
break;
case PIPE_ML_OPERATION_TYPE_CUSTOM:
teflon_debug("%-15s ", "CUSTOM");
break;
}
char *input_buf = ralloc_strdup(NULL, "");
@ -775,6 +785,8 @@ tflite_builtin_op_name(TfLiteBuiltinOperator op)
return "TRANSPOSE";
case kTfLiteBuiltinLeakyRelu:
return "LEAKY_RELU";
case kTfLiteBuiltinCustom:
return "CUSTOM";
default:
return "unknown";
}
@ -929,8 +941,12 @@ PrepareDelegate(TfLiteContext *tf_context, TfLiteDelegate *tf_delegate)
}
teflon_debug("\n");
if (supported)
if (supported) {
supported_nodes->data[node_count++] = node_index;
// If custom node is claimed by teflon delegate, tell other delegates to skip it
if (registration->builtin_code == kTfLiteBuiltinCustom)
registration->builtin_code = kTfLiteBuiltinDelegate;
}
}
supported_nodes->size = node_count;
@ -994,7 +1010,8 @@ find_accel_device()
for (int i = 0; i < n; i++) {
if (strstr("rocket", devs[i]->driver_name) ||
strstr("ethosu", devs[i]->driver_name))
strstr("ethosu", devs[i]->driver_name) ||
strstr("neutron", devs[i]->driver_name))
device = devs[i];
else
pipe_loader_release(&devs[i], 1);

View file

@ -1039,6 +1039,10 @@ struct pipe_tensor {
* Zero-points used to quantize this tensor, per-axis quantization.
*/
int *zero_points;
/**
* Tensor name
*/
const char *name;
/**
* Whether the tensor contains data in INT8 or UINT8 format.
*/
@ -1072,6 +1076,7 @@ enum pipe_ml_operation_type {
PIPE_ML_OPERATION_TYPE_QUANTIZE,
PIPE_ML_OPERATION_TYPE_MAXIMUM,
PIPE_ML_OPERATION_TYPE_MINIMUM,
PIPE_ML_OPERATION_TYPE_CUSTOM,
};
enum pipe_ml_pooling_type {
@ -1276,6 +1281,9 @@ struct pipe_ml_operation
struct {
float alpha;
} leakyrelu;
struct {
char *name;
} custom;
};
};

View file

@ -196,6 +196,12 @@ if with_gallium_ethosu
else
driver_ethosu = declare_dependency()
endif
if with_gallium_neutron
subdir('winsys/neutron/drm')
subdir('drivers/neutron')
else
driver_neutron = declare_dependency()
endif
if with_gallium_zink
if not with_platform_windows
subdir('winsys/zink/drm')

View file

@ -59,7 +59,7 @@ libgallium_dri = shared_library(
driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv,
driver_tegra, driver_i915, driver_svga, driver_virgl,
driver_panfrost, driver_iris, driver_lima, driver_zink, driver_d3d12,
driver_asahi, driver_crocus, driver_rocket, driver_ethosu
driver_asahi, driver_crocus, driver_rocket, driver_ethosu, driver_neutron
],
install : true,
name_suffix : libname_suffix,

View file

@ -10,6 +10,7 @@ libteflon = shared_library(
driver_etnaviv,
driver_rocket,
driver_ethosu,
driver_neutron,
idep_nir,
idep_mesautil,
],

View file

@ -25,8 +25,10 @@
#define TEST_ADD 1
#define TEST_FULLY_CONNECTED 1
#define TEST_MODELS 1
#define TEST_COMPILED_MODELS 1
#define TOLERANCE 8
#define COMPILED_MODELS_TOLERANCE 12
std::vector<bool> is_signed{false}; /* TODO: Support INT8? */
std::vector<bool> padding_same{false, true};
@ -41,7 +43,7 @@ std::vector<int> fc_channels{23, 46, 128, 256, 512};
std::vector<int> fc_size{128, 1280, 25088, 62720};
static void
test_model(void *buf, size_t buf_size, std::string cache_dir, unsigned tolerance)
run_and_validate(TfLiteModel *cpu_model, TfLiteModel *npu_model, std::string cache_dir, unsigned tolerance)
{
void **input = NULL;
size_t num_inputs;
@ -51,11 +53,8 @@ test_model(void *buf, size_t buf_size, std::string cache_dir, unsigned tolerance
size_t num_outputs;
void **npu_output;
TfLiteModel *model = TfLiteModelCreate(buf, buf_size);
assert(model);
run_model(model, EXECUTOR_CPU, &input, &num_inputs, &cpu_output, &output_sizes, &output_types, &num_outputs, cache_dir);
run_model(model, EXECUTOR_NPU, &input, &num_inputs, &npu_output, &output_sizes, &output_types, &num_outputs, cache_dir);
run_model(cpu_model, EXECUTOR_CPU, &input, &num_inputs, &cpu_output, &output_sizes, &output_types, &num_outputs, cache_dir);
run_model(npu_model, EXECUTOR_NPU, &input, &num_inputs, &npu_output, &output_sizes, &output_types, &num_outputs, cache_dir);
const char *dump_output = os_get_option("TEFLON_DUMP_OUTPUT");
if (dump_output && atoi(dump_output) == 1) {
@ -221,6 +220,15 @@ test_model(void *buf, size_t buf_size, std::string cache_dir, unsigned tolerance
free(output_sizes);
free(output_types);
}
static void
test_model(void *buf, size_t buf_size, std::string cache_dir, unsigned tolerance)
{
TfLiteModel *model = TfLiteModelCreate(buf, buf_size);
assert(model);
run_and_validate(model, model, cache_dir, tolerance);
TfLiteModelDelete(model);
}
@ -249,6 +257,57 @@ test_model_file(std::string file_name, unsigned tolerance, bool use_cache)
close(model_fd);
}
static void
test_compiled_model(void *buf, size_t buf_size, void *cbuf, size_t cbuf_size,
std::string cache_dir, unsigned tolerance)
{
TfLiteModel *cpu_model = TfLiteModelCreate(buf, buf_size);
TfLiteModel *npu_model = TfLiteModelCreate(cbuf, cbuf_size);
assert(cpu_model);
assert(npu_model);
run_and_validate(cpu_model, npu_model, cache_dir, tolerance);
TfLiteModelDelete(cpu_model);
TfLiteModelDelete(npu_model);
}
static void
test_compiled_model_file(std::string file_name, std::string compiled_file_name,
unsigned tolerance, bool use_cache)
{
std::ostringstream cache_dir;
if (use_cache) {
auto path = std::filesystem::path(file_name);
cache_dir << "/var/cache/teflon_tests/";
cache_dir << path.parent_path().filename().string();
cache_dir << "_";
cache_dir << path.stem().string();
}
srand(4);
struct stat sb, csb;
int model_fd = open(file_name.c_str(), O_RDONLY);
int cmodel_fd = open(compiled_file_name.c_str(), O_RDONLY);
fstat(model_fd, &sb);
fstat(cmodel_fd, &csb);
void *model_data = mmap(0, sb.st_size, PROT_READ, MAP_PRIVATE, model_fd, 0);
void *cmodel_data = mmap(0, csb.st_size, PROT_READ, MAP_PRIVATE, cmodel_fd, 0);
test_compiled_model(model_data, sb.st_size, cmodel_data, csb.st_size,
cache_dir.str(), tolerance);
munmap(model_data, sb.st_size);
munmap(cmodel_data, csb.st_size);
close(model_fd);
close(cmodel_fd);
}
void
test_conv(int input_size, int weight_size, int input_channels, int output_channels,
int stride, bool padding_same, bool is_signed, bool depthwise, int seed)
@ -620,6 +679,66 @@ INSTANTIATE_TEST_SUITE_P(
#endif
#if TEST_COMPILED_MODELS
class CompiledModels : public testing::TestWithParam<std::string> {};
TEST_P(CompiledModels, Op)
{
std::ostringstream file_path, cfile_path;
auto test_name = GetParam();
test_name.replace(test_name.find("_"), 1, "/");
assert(os_get_option("TEFLON_TEST_DATA"));
file_path << os_get_option("TEFLON_TEST_DATA") << "/models/" << test_name << ".tflite";
cfile_path << os_get_option("TEFLON_TEST_DATA") << "/compiled_models/" << test_name << ".tflite";
test_compiled_model_file(file_path.str(), cfile_path.str(), COMPILED_MODELS_TOLERANCE, true);
}
std::vector<std::string>
get_compiled_model_files(void)
{
assert(os_get_option("TEFLON_TEST_DATA"));
std::stringstream dir, cdir;
dir << os_get_option("TEFLON_TEST_DATA") << "/models";
cdir << os_get_option("TEFLON_TEST_DATA") << "/compiled_models";
std::vector<std::string> paths;
std::filesystem::recursive_directory_iterator b(dir.str());
for (auto const &f : b) {
if (f.path().extension() != ".tflite")
continue;
std::filesystem::path relative_path = std::filesystem::relative(f.path(), dir.str());
std::filesystem::path compiled_path = std::filesystem::path(cdir.str()) / relative_path;
if (!std::filesystem::exists(compiled_path))
continue;
std::stringstream path;
path << f.path().parent_path().filename().string();
path << "_" << f.path().stem().string();
paths.push_back(path.str());
}
std::sort(paths.begin(), paths.end());
return paths;
}
static inline std::string
CompiledModelsTestCaseName(
const testing::TestParamInfo<std::string> &info)
{
return info.param;
}
INSTANTIATE_TEST_SUITE_P(
, CompiledModels,
::testing::ValuesIn(get_compiled_model_files()),
CompiledModelsTestCaseName);
#endif
int
main(int argc, char **argv)
{
@ -656,6 +775,8 @@ main(int argc, char **argv)
return 0;
} else if (argc > 1 && !strcmp(argv[1], "run_model")) {
test_model_file(std::string(argv[2]), TOLERANCE, false);
} else if (argc > 1 && !strcmp(argv[1], "run_compiled_model")) {
test_compiled_model_file(std::string(argv[2]), std::string(argv[3]), COMPILED_MODELS_TOLERANCE, false);
} else {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();

View file

@ -0,0 +1,13 @@
# Copyright 2017 Broadcom
# SPDX-License-Identifier: MIT
libneutronwinsys = static_library(
'neutronwinsys',
files('neutron_drm_winsys.c'),
include_directories : [
inc_src, inc_include,
inc_gallium, inc_gallium_aux, inc_gallium_drivers,
],
gnu_symbol_visibility : 'hidden',
dependencies: [dep_libdrm, idep_mesautil],
)

View file

@ -0,0 +1,17 @@
/*
* Copyright 2014 Broadcom
* Copyright 2018 Alyssa Rosenzweig
* Copyright 2025 Tomeu Vizoso
* SPDX-License-Identifier: MIT
*/
#ifndef __NEUTRON_DRM_PUBLIC_H__
#define __NEUTRON_DRM_PUBLIC_H__
struct pipe_screen;
struct pipe_screen_config;
struct pipe_screen *
neutron_drm_screen_create(int drmFD, const struct pipe_screen_config *config);
#endif /* __NEUTRON_DRM_PUBLIC_H__ */

View file

@ -0,0 +1,20 @@
/*
* Copyright 2014 Broadcom
* Copyright 2018 Alyssa Rosenzweig
* Copyright 2025 Tomeu Vizoso
* Copyright 2026 NXP
* SPDX-License-Identifier: MIT
*/
#include "util/os_file.h"
#include "util/u_screen.h"
#include "neutron/neutron_device.h"
#include "neutron_drm_public.h"
struct pipe_screen *
neutron_drm_screen_create(int fd, const struct pipe_screen_config *config)
{
return u_pipe_screen_lookup_or_create(os_dupfd_cloexec(fd), config, NULL,
neutron_screen_create);
}