Add new Gallium driver for NXP's Neutron NPU

Add source code for the Neutron Gallium driver as well as some
helper scripts for model compiling. The compiler can only be run
on x86 for now.

Signed-off-by: Ioana Ciocoi-Radulescu <ruxandra.radulescu@nxp.com>
This commit is contained in:
Ioana Ciocoi-Radulescu 2026-04-24 13:36:36 +03:00
parent 37cb2a514f
commit 291d1d44b1
20 changed files with 1621 additions and 3 deletions

View file

@ -3,6 +3,7 @@
src/gallium/drivers/ethosu/**/*
src/gallium/drivers/i915
src/gallium/drivers/neutron/**/*
src/gallium/drivers/r300/compiler/*
src/gallium/drivers/rocket/**/*
src/gallium/targets/teflon/**/*

View file

@ -0,0 +1,130 @@
/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
/* Copyright 2025-2026 NXP */
#ifndef __NEUTRON_ACCEL_H__
#define __NEUTRON_ACCEL_H__
#include "drm.h"
#if defined(__cplusplus)
extern "C" {
#endif
/**
* enum drm_neutron_ioctl - Neutron IOCTL IDs
*
* @DRM_NEUTRON_CREATE_BO: Create a buffer object
* @DRM_NEUTRON_SYNC_BO: Sync (parts of) the buffer object memory
* @DRM_NEUTRON_SUBMIT_JOB: Submit a job to the device
*/
enum drm_neutron_ioctl {
DRM_NEUTRON_CREATE_BO = 0,
DRM_NEUTRON_SYNC_BO,
DRM_NEUTRON_SUBMIT_JOB,
};
/**
* struct drm_neutron_create_bo - Create a buffer object and return buffer
* info to user
*
* @size: Size in bytes of requested buffer. May be updated by driver
* if allocated size different than requested
* @handle: Returned handle for the new buffer object
* @pad: MBZ
* @map_offset: Returned offset for mmap() calls
*/
struct drm_neutron_create_bo {
__u64 size;
__u32 handle;
__u32 pad;
__u64 map_offset;
};
/**
* enum drm_neutron_sync_dir - Direction of buffer object synchronization
*
* @DRM_NEUTRON_SYNC_TO_DEVICE: Sync from CPU to device
* @DRM_NEUTRON_SYNC_FROM_DEVICE: Sync from device to CPU
*/
enum drm_neutron_sync_dir {
DRM_NEUTRON_SYNC_TO_DEVICE = 0,
DRM_NEUTRON_SYNC_FROM_DEVICE,
};
/**
* struct drm_neutron_sync_bo - Sync buffer object memory
*
* @handle: Handle of buffer object to sync
* @direction: Direction of sync, can be one of enum drm_neutron_sync_dir
* @size: Size of the memory to sync, in bytes
* @offset: Offset inside the buffer, in bytes
*/
struct drm_neutron_sync_bo {
__u32 handle;
__u32 direction;
__u64 size;
__u64 offset;
};
/**
* enum drm_neutron_job_type - Type of job to submit to Neutron device
*
* @DRM_NEUTRON_JOB_INFERENCE: Inference job
*/
enum drm_neutron_job_type {
DRM_NEUTRON_JOB_INFERENCE = 0,
};
/**
* struct drm_neutron_inference_job - Inference job descriptor
*
* @tensor_offset: Offset of tensor array inside job BO
* @microcode_offset: Microcode offset inside BO
* @tensor_count: Number of valid tensors
* @pad: MBZ
*/
struct drm_neutron_inference_job {
__u32 tensor_offset;
__u32 microcode_offset;
__u32 tensor_count;
__u32 pad[5];
};
/**
* struct drm_neutron_submit_job - Submit a job to Neutron device
*
* @type: Job type, one of enum drm_neutron_job_type
* @bo_handle: BO handle for this job
* @inference: Inference job descriptor (when type is DRM_NEUTRON_JOB_INFERENCE)
* @reserved: Reserved for future job types
* @syncobj_handle: Handle of syncobj on which user waits for job completion
* @pad: MBZ
*/
struct drm_neutron_submit_job {
__u32 type;
__u32 bo_handle;
union {
struct drm_neutron_inference_job inference;
__u32 reserved[8];
};
__u32 syncobj_handle;
__u32 pad;
};
#define DRM_IOCTL_NEUTRON_CREATE_BO \
DRM_IOWR(DRM_COMMAND_BASE + DRM_NEUTRON_CREATE_BO, \
struct drm_neutron_create_bo)
#define DRM_IOCTL_NEUTRON_SYNC_BO \
DRM_IOWR(DRM_COMMAND_BASE + DRM_NEUTRON_SYNC_BO, \
struct drm_neutron_sync_bo)
#define DRM_IOCTL_NEUTRON_SUBMIT_JOB \
DRM_IOWR(DRM_COMMAND_BASE + DRM_NEUTRON_SUBMIT_JOB, \
struct drm_neutron_submit_job)
#if defined(__cplusplus)
}
#endif
#endif /* __NEUTRON_ACCEL_H__ */

View file

@ -205,7 +205,7 @@ elif gallium_drivers.contains('all')
gallium_drivers = [
'r300', 'r600', 'radeonsi', 'crocus', 'v3d', 'vc4', 'freedreno', 'etnaviv', 'i915',
'nouveau', 'svga', 'tegra', 'virgl', 'lima', 'panfrost', 'llvmpipe', 'softpipe', 'iris',
'zink', 'd3d12', 'asahi', 'rocket', 'ethosu'
'zink', 'd3d12', 'asahi', 'rocket', 'ethosu', 'neutron'
]
endif
@ -234,6 +234,7 @@ with_gallium_d3d12 = gallium_drivers.contains('d3d12')
with_gallium_asahi = gallium_drivers.contains('asahi')
with_gallium_rocket = gallium_drivers.contains('rocket')
with_gallium_ethosu = gallium_drivers.contains('ethosu')
with_gallium_neutron = gallium_drivers.contains('neutron')
foreach gallium_driver : gallium_drivers
pre_args += '-DHAVE_@0@'.format(gallium_driver.to_upper())
endforeach

View file

@ -87,7 +87,7 @@ option(
choices : [
'all', 'auto',
'asahi', 'crocus', 'd3d12', 'ethosu', 'etnaviv', 'freedreno', 'i915', 'iris',
'lima', 'llvmpipe', 'nouveau', 'panfrost', 'r300', 'r600', 'radeonsi',
'lima', 'llvmpipe', 'neutron', 'nouveau', 'panfrost', 'r300', 'r600', 'radeonsi',
'rocket', 'softpipe', 'svga', 'tegra', 'v3d', 'vc4', 'virgl', 'zink',
],
description : 'List of gallium drivers to build. If this is set to auto ' +

View file

@ -0,0 +1,2 @@
BasedOnStyle: InheritParentConfig
DisableFormat: false

View file

@ -0,0 +1,102 @@
CompiledModels.Op/efficientdet_efficientdet_tflite_lite0_int8_v1,Fail
CompiledModels.Op/inception_002,Fail
CompiledModels.Op/inception_003,Fail
CompiledModels.Op/inception_008,Fail
CompiledModels.Op/inception_013,Fail
CompiledModels.Op/inception_015,Fail
CompiledModels.Op/inception_016,Fail
CompiledModels.Op/inception_021,Fail
CompiledModels.Op/inception_024,Fail
CompiledModels.Op/inception_027,Fail
CompiledModels.Op/inception_030,Fail
CompiledModels.Op/inception_032,Fail
CompiledModels.Op/inception_033,Fail
CompiledModels.Op/inception_041,Fail
CompiledModels.Op/inception_055,Fail
CompiledModels.Op/inception_059,Fail
CompiledModels.Op/inception_063,Fail
CompiledModels.Op/inception_064,Fail
CompiledModels.Op/inception_071,Fail
CompiledModels.Op/inception_073,Fail
CompiledModels.Op/inception_075,Fail
CompiledModels.Op/inception_077,Fail
CompiledModels.Op/mobiledet_002,Fail
CompiledModels.Op/mobiledet_004,Fail
CompiledModels.Op/mobiledet_006,Fail
CompiledModels.Op/mobiledet_009,Fail
CompiledModels.Op/mobiledet_012,Fail
CompiledModels.Op/mobiledet_015,Fail
CompiledModels.Op/mobiledet_017,Fail
CompiledModels.Op/mobiledet_020,Fail
CompiledModels.Op/mobiledet_029,Fail
CompiledModels.Op/mobiledet_039,Fail
CompiledModels.Op/mobiledet_042,Fail
CompiledModels.Op/mobiledet_046,Fail
CompiledModels.Op/mobiledet_050,Fail
CompiledModels.Op/mobiledet_063,Fail
CompiledModels.Op/mobiledet_086,Fail
CompiledModels.Op/mobiledet_087,Fail
CompiledModels.Op/mobiledet_106,Fail
CompiledModels.Op/mobiledet_119,Fail
CompiledModels.Op/mobiledet_ssdlite_mobiledet_coco_qat_postprocess,Fail
CompiledModels.Op/mobilenetv1_001,Fail
CompiledModels.Op/mobilenetv1_002,Fail
CompiledModels.Op/mobilenetv1_003,Fail
CompiledModels.Op/mobilenetv1_005,Fail
CompiledModels.Op/mobilenetv1_006,Fail
CompiledModels.Op/mobilenetv1_008,Fail
CompiledModels.Op/mobilenetv1_009,Fail
CompiledModels.Op/mobilenetv1_012,Fail
CompiledModels.Op/mobilenetv1_016,Fail
CompiledModels.Op/mobilenetv1_018,Fail
CompiledModels.Op/mobilenetv1_020,Fail
CompiledModels.Op/mobilenetv1_025,Fail
CompiledModels.Op/mobilenetv1_026,Fail
CompiledModels.Op/mobilenetv1_mobilenet_v1_1_224_quant,Fail
CompiledModels.Op/mobilenetv2_001,Fail
CompiledModels.Op/mobilenetv2_003,Fail
CompiledModels.Op/mobilenetv2_006,Fail
CompiledModels.Op/mobilenetv2_007,Fail
CompiledModels.Op/mobilenetv2_010,Fail
CompiledModels.Op/mobilenetv2_014,Fail
CompiledModels.Op/mobilenetv2_018,Fail
CompiledModels.Op/mobilenetv2_025,Fail
CompiledModels.Op/mobilenetv2_029,Fail
CompiledModels.Op/mobilenetv2_040,Fail
CompiledModels.Op/mobilenetv2_044,Fail
CompiledModels.Op/mobilenetv2_059,Fail
CompiledModels.Op/mobilenetv2_061,Fail
CompiledModels.Op/movenetlightning_103,Fail
CompiledModels.Op/movenetlightning_104,Fail
CompiledModels.Op/movenetthunder_103,Fail
CompiledModels.Op/movenetthunder_104,Fail
CompiledModels.Op/ssdmobilenetv2_001,Fail
CompiledModels.Op/ssdmobilenetv2_003,Fail
CompiledModels.Op/ssdmobilenetv2_007,Fail
CompiledModels.Op/ssdmobilenetv2_040,Fail
CompiledModels.Op/ssdmobilenetv2_047,Fail
CompiledModels.Op/ssdmobilenetv2_052,Fail
CompiledModels.Op/ssdmobilenetv2_054,Fail
CompiledModels.Op/ssdmobilenetv2_065,Fail
CompiledModels.Op/ssdmobilenetv2_069,Fail
CompiledModels.Op/ssdmobilenetv2_072,Fail
CompiledModels.Op/ssdmobilenetv2_073,Fail
CompiledModels.Op/ssdmobilenetv2_077,Fail
CompiledModels.Op/ssdmobilenetv2_081,Fail
CompiledModels.Op/ssdmobilenetv2_089,Fail
CompiledModels.Op/ssdmobilenetv2_096,Fail
CompiledModels.Op/ssdmobilenetv2_102,Fail
CompiledModels.Op/ssdmobilenetv2_ssd_mobilenet_v2_coco_quant_postprocess,Fail
CompiledModels.Op/yolox_001,Fail
CompiledModels.Op/yolox_004,Fail
CompiledModels.Op/yolox_027,Fail
CompiledModels.Op/yolox_042,Fail
CompiledModels.Op/yolox_077,Fail
CompiledModels.Op/yolox_086,Fail
CompiledModels.Op/yolox_102,Fail
CompiledModels.Op/yolox_112,Fail
CompiledModels.Op/yolox_122,Fail
CompiledModels.Op/yolox_132,Fail
CompiledModels.Op/yolox_147,Fail
CompiledModels.Op/yolox_yolox_nano_full_integer_quant,Fail

View file

@ -0,0 +1,227 @@
# Standard (un-compiled) tflite models are not supported on Neutron
Add.Op/.*
AddQuant.Op/.*
Conv2D.Op/.*
DepthwiseConv2D.Op/.*
FullyConnected.Op/.*
Models.Op/.*
# Compilation error due to duplicate/orphan input tensor
CompiledModels.Op/movenetlightning_117
CompiledModels.Op/movenetlightning_120
CompiledModels.Op/movenetthunder_117
CompiledModels.Op/movenetthunder_120
# Conversion rate zero (operators or operator + tensor type/size combo
# not supported yet by Neutron compiler)
CompiledModels.Op/efficientdet_066
CompiledModels.Op/efficientdet_101
CompiledModels.Op/efficientdet_136
CompiledModels.Op/efficientdet_251
CompiledModels.Op/efficientdet_252
CompiledModels.Op/efficientdet_253
CompiledModels.Op/efficientdet_254
CompiledModels.Op/efficientdet_255
CompiledModels.Op/efficientdet_256
CompiledModels.Op/efficientdet_257
CompiledModels.Op/efficientdet_258
CompiledModels.Op/efficientdet_259
CompiledModels.Op/efficientdet_260
CompiledModels.Op/efficientdet_261
CompiledModels.Op/efficientdet_262
CompiledModels.Op/efficientdet_263
CompiledModels.Op/efficientdet_264
CompiledModels.Op/efficientdet_265
CompiledModels.Op/inception_082
CompiledModels.Op/micronetlarge_013
CompiledModels.Op/mobiledet_097
CompiledModels.Op/mobiledet_099
CompiledModels.Op/mobiledet_101
CompiledModels.Op/mobiledet_103
CompiledModels.Op/mobiledet_105
CompiledModels.Op/mobiledet_107
CompiledModels.Op/mobiledet_109
CompiledModels.Op/mobiledet_111
CompiledModels.Op/mobiledet_113
CompiledModels.Op/mobiledet_115
CompiledModels.Op/mobiledet_117
CompiledModels.Op/mobiledet_118
CompiledModels.Op/mobiledet_122
CompiledModels.Op/mobiledet_123
CompiledModels.Op/mobilenetv1_030
CompiledModels.Op/movenetlightning_000
CompiledModels.Op/movenetlightning_001
CompiledModels.Op/movenetlightning_002
CompiledModels.Op/movenetlightning_003
CompiledModels.Op/movenetlightning_071
CompiledModels.Op/movenetlightning_075
CompiledModels.Op/movenetlightning_079
CompiledModels.Op/movenetlightning_087
CompiledModels.Op/movenetlightning_088
CompiledModels.Op/movenetlightning_089
CompiledModels.Op/movenetlightning_090
CompiledModels.Op/movenetlightning_091
CompiledModels.Op/movenetlightning_092
CompiledModels.Op/movenetlightning_093
CompiledModels.Op/movenetlightning_094
CompiledModels.Op/movenetlightning_095
CompiledModels.Op/movenetlightning_096
CompiledModels.Op/movenetlightning_097
CompiledModels.Op/movenetlightning_098
CompiledModels.Op/movenetlightning_099
CompiledModels.Op/movenetlightning_105
CompiledModels.Op/movenetlightning_112
CompiledModels.Op/movenetlightning_113
CompiledModels.Op/movenetlightning_114
CompiledModels.Op/movenetlightning_115
CompiledModels.Op/movenetlightning_116
CompiledModels.Op/movenetlightning_118
CompiledModels.Op/movenetlightning_119
CompiledModels.Op/movenetlightning_122
CompiledModels.Op/movenetlightning_123
CompiledModels.Op/movenetlightning_124
CompiledModels.Op/movenetlightning_125
CompiledModels.Op/movenetlightning_126
CompiledModels.Op/movenetlightning_127
CompiledModels.Op/movenetlightning_128
CompiledModels.Op/movenetlightning_129
CompiledModels.Op/movenetlightning_130
CompiledModels.Op/movenetlightning_131
CompiledModels.Op/movenetlightning_132
CompiledModels.Op/movenetlightning_133
CompiledModels.Op/movenetlightning_134
CompiledModels.Op/movenetlightning_135
CompiledModels.Op/movenetlightning_136
CompiledModels.Op/movenetlightning_137
CompiledModels.Op/movenetlightning_138
CompiledModels.Op/movenetlightning_139
CompiledModels.Op/movenetlightning_140
CompiledModels.Op/movenetlightning_141
CompiledModels.Op/movenetlightning_142
CompiledModels.Op/movenetlightning_143
CompiledModels.Op/movenetlightning_144
CompiledModels.Op/movenetlightning_145
CompiledModels.Op/movenetlightning_147
CompiledModels.Op/movenetlightning_149
CompiledModels.Op/movenetlightning_150
CompiledModels.Op/movenetlightning_151
CompiledModels.Op/movenetlightning_152
CompiledModels.Op/movenetlightning_153
CompiledModels.Op/movenetlightning_154
CompiledModels.Op/movenetlightning_155
CompiledModels.Op/movenetlightning_156
CompiledModels.Op/movenetthunder_000
CompiledModels.Op/movenetthunder_001
CompiledModels.Op/movenetthunder_002
CompiledModels.Op/movenetthunder_003
CompiledModels.Op/movenetthunder_071
CompiledModels.Op/movenetthunder_075
CompiledModels.Op/movenetthunder_079
CompiledModels.Op/movenetthunder_087
CompiledModels.Op/movenetthunder_088
CompiledModels.Op/movenetthunder_089
CompiledModels.Op/movenetthunder_090
CompiledModels.Op/movenetthunder_091
CompiledModels.Op/movenetthunder_092
CompiledModels.Op/movenetthunder_093
CompiledModels.Op/movenetthunder_094
CompiledModels.Op/movenetthunder_095
CompiledModels.Op/movenetthunder_096
CompiledModels.Op/movenetthunder_097
CompiledModels.Op/movenetthunder_098
CompiledModels.Op/movenetthunder_099
CompiledModels.Op/movenetthunder_105
CompiledModels.Op/movenetthunder_112
CompiledModels.Op/movenetthunder_113
CompiledModels.Op/movenetthunder_114
CompiledModels.Op/movenetthunder_115
CompiledModels.Op/movenetthunder_116
CompiledModels.Op/movenetthunder_118
CompiledModels.Op/movenetthunder_119
CompiledModels.Op/movenetthunder_122
CompiledModels.Op/movenetthunder_123
CompiledModels.Op/movenetthunder_124
CompiledModels.Op/movenetthunder_125
CompiledModels.Op/movenetthunder_126
CompiledModels.Op/movenetthunder_127
CompiledModels.Op/movenetthunder_128
CompiledModels.Op/movenetthunder_129
CompiledModels.Op/movenetthunder_130
CompiledModels.Op/movenetthunder_131
CompiledModels.Op/movenetthunder_132
CompiledModels.Op/movenetthunder_133
CompiledModels.Op/movenetthunder_134
CompiledModels.Op/movenetthunder_135
CompiledModels.Op/movenetthunder_136
CompiledModels.Op/movenetthunder_137
CompiledModels.Op/movenetthunder_138
CompiledModels.Op/movenetthunder_139
CompiledModels.Op/movenetthunder_140
CompiledModels.Op/movenetthunder_141
CompiledModels.Op/movenetthunder_142
CompiledModels.Op/movenetthunder_143
CompiledModels.Op/movenetthunder_144
CompiledModels.Op/movenetthunder_145
CompiledModels.Op/movenetthunder_147
CompiledModels.Op/movenetthunder_149
CompiledModels.Op/movenetthunder_150
CompiledModels.Op/movenetthunder_151
CompiledModels.Op/movenetthunder_152
CompiledModels.Op/movenetthunder_153
CompiledModels.Op/movenetthunder_154
CompiledModels.Op/movenetthunder_155
CompiledModels.Op/movenetthunder_156
CompiledModels.Op/ssdmobilenetv2_049
CompiledModels.Op/ssdmobilenetv2_051
CompiledModels.Op/ssdmobilenetv2_067
CompiledModels.Op/ssdmobilenetv2_068
CompiledModels.Op/ssdmobilenetv2_070
CompiledModels.Op/ssdmobilenetv2_075
CompiledModels.Op/ssdmobilenetv2_076
CompiledModels.Op/ssdmobilenetv2_078
CompiledModels.Op/ssdmobilenetv2_083
CompiledModels.Op/ssdmobilenetv2_084
CompiledModels.Op/ssdmobilenetv2_086
CompiledModels.Op/ssdmobilenetv2_091
CompiledModels.Op/ssdmobilenetv2_092
CompiledModels.Op/ssdmobilenetv2_094
CompiledModels.Op/ssdmobilenetv2_099
CompiledModels.Op/ssdmobilenetv2_100
CompiledModels.Op/ssdmobilenetv2_101
CompiledModels.Op/ssdmobilenetv2_107
CompiledModels.Op/ssdmobilenetv2_108
CompiledModels.Op/ssdmobilenetv2_109
CompiledModels.Op/yolox_000
CompiledModels.Op/yolox_003
CompiledModels.Op/yolox_012
CompiledModels.Op/yolox_103
CompiledModels.Op/yolox_104
CompiledModels.Op/yolox_113
CompiledModels.Op/yolox_114
CompiledModels.Op/yolox_123
CompiledModels.Op/yolox_124
CompiledModels.Op/yolox_125
CompiledModels.Op/yolox_126
CompiledModels.Op/yolox_127
CompiledModels.Op/yolox_128
CompiledModels.Op/yolox_129
CompiledModels.Op/yolox_130
CompiledModels.Op/yolox_131
CompiledModels.Op/yolox_133
CompiledModels.Op/yolox_134
CompiledModels.Op/yolox_135
CompiledModels.Op/yolox_136
CompiledModels.Op/yolox_137
CompiledModels.Op/yolox_139
CompiledModels.Op/yolox_140
CompiledModels.Op/yolox_141
CompiledModels.Op/yolox_142
CompiledModels.Op/yolox_143
CompiledModels.Op/yolox_145
CompiledModels.Op/yolox_146
CompiledModels.Op/yolox_148
CompiledModels.Op/yolox_149
CompiledModels.Op/yolox_151
CompiledModels.Op/yolox_152
CompiledModels.Op/yolox_153

View file

@ -0,0 +1,58 @@
# Copyright 2026 NXP
# SPDX-License-Identifier: MIT
"""
Compile a TFLite model for the Neutron NPU.
Usage: python compile_model.py <input.tflite> <output.tflite>
Exit codes:
0: Success
1: Converter internal error
2: Zero conversion rate (model unchanged)
3: Invalid arguments or input file
"""
import sys
import os
from eiq_neutron_sdk import neutron_converter # from https://eiq.nxp.com/repository
# Only one platform supported for now
TARGET = "imx95"
def main():
if len(sys.argv) != 3:
print("Usage: python compile_model.py <input.tflite> <output.tflite>")
sys.exit(3)
input_file = sys.argv[1]
output_file = sys.argv[2]
if not os.path.isfile(input_file):
print(f"Error: Input file '{input_file}' does not exist")
sys.exit(3)
if not input_file.endswith('.tflite'):
print("Error: Input file must be a .tflite model")
sys.exit(3)
with open(input_file, 'rb') as f:
model_data = f.read()
try:
cmodel_data = neutron_converter.convertModel(list(model_data), TARGET)
except Exception as e:
print(f"Error during model conversion: {e}")
sys.exit(1)
if bytes(cmodel_data) == model_data:
print(f"Warning: Conversion rate was zero for {input_file}, skipping output")
sys.exit(2)
with open(output_file, 'wb') as f:
f.write(bytes(cmodel_data))
if __name__ == "__main__":
main()

View file

@ -0,0 +1,20 @@
# Copyright 2019 Google, Inc
# SPDX-License-Identifier: MIT
files_neutron = files(
'neutron_device.c',
'neutron_ml.c',
)
libneutron = static_library(
'neutron',
files_neutron,
include_directories : [inc_gallium_aux, inc_gallium, inc_include, inc_src],
gnu_symbol_visibility : 'hidden',
dependencies : [idep_mesautil, dep_libdrm],
)
driver_neutron = declare_dependency(
compile_args : '-DGALLIUM_NEUTRON',
link_with : [libneutronwinsys, libneutron]
)

View file

@ -0,0 +1,219 @@
# Copyright 2026 NXP
# SPDX-License-Identifier: MIT
"""
Batch compiler of TFLite models for NXP's Neutron NPU
Usage modes (mutually exclusive):
neutron_compiler.py --in <input.tflite> --out <output.tflite>
neutron_compiler.py --in-dir <input_dir> --out-dir <output_dir>
neutron_compiler.py --teflon-test-models
"""
import sys
import os
import argparse
import subprocess
def parse_args(argv):
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
mode_group = parser.add_mutually_exclusive_group(required=True)
mode_group.add_argument('--in',
dest='input_file',
action='store',
help='Input TFLite model file')
mode_group.add_argument('--in-dir',
dest='input_dir',
action='store',
help='Input directory containing TFLite models to be compiled')
mode_group.add_argument('--teflon-test-models',
dest='teflon_test_models',
action='store_true',
help='Use ${TEFLON_TEST_DATA}/models as input directory and ${TEFLON_TEST_DATA}/compiled_models as output')
parser.add_argument('--out',
dest='output_file',
action='store',
help='Output TFLite model file (required with --in)')
parser.add_argument('--out-dir',
dest='output_dir',
action='store',
help='Output directory for compiled models (required with --in-dir). Output files will have the same name as input files')
parser.add_argument('--force',
action='store_true',
required=False,
help='Overwrite output files if they exist. Ignored for single file mode')
parser.add_argument('--show-details',
action='store_true',
required=False,
help='Print compilation result for each model. Ignored for single file mode')
parser.add_argument('--log-to-file',
action='store_true',
required=False,
help='Generate log file for each compile attempt. Log is located next to output file')
args = parser.parse_args()
if args.input_file:
if not args.output_file:
parser.error("--out is required when using --in")
if not os.path.isfile(args.input_file):
parser.error("--in must be an existing file")
if args.input_dir:
if not args.output_dir:
parser.error("--out-dir is required when using --in-dir")
if not os.path.isdir(args.input_dir):
parser.error("--in-dir must be an existing directory")
if args.teflon_test_models and not os.environ.get('TEFLON_TEST_DATA'):
parser.error("--teflon-test-models requires TEFLON_TEST_DATA to be set")
return args
def compile(input_file, output_file):
script_dir = os.path.dirname(os.path.abspath(__file__))
worker_script = os.path.join(script_dir, "compile_model.py")
try:
result = subprocess.run(
[sys.executable, worker_script, input_file, output_file],
capture_output=True,
text=True,
timeout=60,
)
except Exception as e:
print(f"Error compiling {input_file}: {e}", file=sys.stderr)
return None
return result
def write_log(output_file, result):
model_name_we = os.path.splitext(os.path.basename(output_file))[0]
output_dir = os.path.dirname(output_file)
log_file = os.path.join(output_dir, f"{model_name_we}_compiler_output.log")
with open(log_file, 'w') as f:
if result.stdout:
f.write(result.stdout.strip())
if result.stderr:
f.write('\n')
f.write(result.stderr.strip())
def print_result(input_file, result):
if result.returncode == 0:
status = "SUCCESS"
elif result.returncode == 1:
status = "COMPILER ERROR"
elif result.returncode == 2:
status = "ZERO CONVERSION RATE"
elif result.returncode == 3:
status = "SCRIPT ERROR"
else:
status = f"UNKNOWN({returncode})"
print(f" {input_file}: {status}")
if result.returncode != 0 and result.stderr:
print(result.stderr.strip())
def print_stats(total, ok, error, zero, skip):
print("\n" + "=" * 50)
print(f"Total models processed: {total}")
print(f" Successful: {ok}")
print(f" Errors: {error}")
print(f" Zero conversion rate: {zero}")
print(f" Skipped: {skip}")
print("=" * 50)
def find_tflite_files(directory):
tflite_files = []
for root, _, files in os.walk(directory):
for file in files:
if file.endswith('.tflite'):
tflite_files.append(os.path.join(root, file))
return sorted(tflite_files)
def process_model(input_file, output_file, log_to_file, show_details):
output_dir = os.path.dirname(os.path.abspath(output_file))
os.makedirs(output_dir, exist_ok=True)
result = compile(input_file, output_file)
if not result:
return 3
if log_to_file:
write_log(output_file, result)
if show_details:
print_result(input_file, result)
return result.returncode
def process_directory(input_dir, output_dir, force, log_to_file, show_details):
total = 0
ok = 0
errors = 0
zero = 0
skip = 0
tflite_files = find_tflite_files(input_dir)
for input_file in tflite_files:
total += 1
rel_path = os.path.relpath(input_file, input_dir)
output_file = os.path.join(output_dir, rel_path)
if os.path.exists(output_file) and not force:
if show_details:
print(f" {input_file}: SKIPPED")
skip += 1
continue
ret_code = process_model(input_file, output_file, log_to_file, show_details)
if ret_code == 0:
ok += 1
elif ret_code == 2:
zero += 1
else:
errors += 1
print_stats(total, ok, errors, zero, skip)
def main(argv):
args = parse_args(argv)
if args.input_file:
process_model(
args.input_file,
args.output_file,
args.log_to_file,
True)
elif args.input_dir:
process_directory(
args.input_dir,
args.output_dir,
args.force,
args.log_to_file,
args.show_details
)
elif args.teflon_test_models:
teflon_test_data = os.environ.get('TEFLON_TEST_DATA')
input_dir = os.path.join(teflon_test_data, 'models')
output_dir = os.path.join(teflon_test_data, 'compiled_models')
print(f"Compiling Teflon test models:")
print(f" Input directory: {input_dir}")
print(f" Output directory: {output_dir}")
process_directory(
input_dir,
output_dir,
args.force,
args.log_to_file,
args.show_details
)
if __name__ == "__main__":
main(sys.argv[1:])

View file

@ -0,0 +1,257 @@
/*
* Copyright 2026 NXP
* SPDX-License-Identifier: MIT
*/
#include "util/os_mman.h"
#include "util/u_inlines.h"
#include "util/u_surface.h"
#include "util/u_transfer.h"
#include <xf86drm.h>
#include "drm-uapi/neutron_accel.h"
#include "neutron_device.h"
#include "neutron_ml.h"
static const struct debug_named_value neutron_debug_options[] = {
{"dbg_msgs", NEUTRON_DBG_MSGS, "Print debug messages"},
{"dump_bos", NEUTRON_DBG_DUMP_BOS, "Dump buffers for analysis"},
{"zero_bos", NEUTRON_DBG_ZERO, "Zero buffers for debugging"},
DEBUG_NAMED_VALUE_END};
DEBUG_GET_ONCE_FLAGS_OPTION(neutron_debug, "NEUTRON_DEBUG",
neutron_debug_options, 0)
int neutron_debug = 0;
static void
neutron_screen_destroy(struct pipe_screen *pscreen)
{
struct neutron_screen *screen = neutron_screen(pscreen);
ralloc_free(screen);
}
static void
neutron_context_destroy(struct pipe_context *pctx)
{
struct neutron_context *ctx = neutron_context(pctx);
struct neutron_screen *screen = neutron_screen(pctx->screen);
drmSyncobjDestroy(screen->fd, ctx->syncobj_handle);
ralloc_free(ctx);
}
static void *
neutron_buffer_map(struct pipe_context *pctx,
struct pipe_resource *prsc, unsigned level,
unsigned usage, const struct pipe_box *box,
struct pipe_transfer **out_transfer)
{
struct neutron_screen *screen = neutron_screen(pctx->screen);
struct neutron_resource *rsc = neutron_resource(prsc);
struct drm_neutron_sync_bo arg = {0};
int ret;
assert(level == 0);
assert(prsc->target == PIPE_BUFFER);
assert(box->y == 0);
assert(box->z == 0);
assert(box->height == 1);
assert(box->depth == 1);
struct pipe_transfer *transfer = rzalloc(NULL, struct pipe_transfer);
transfer->level = level;
transfer->usage = usage;
transfer->box = *box;
pipe_resource_reference(&transfer->resource, prsc);
if (transfer->usage & PIPE_MAP_READ) {
arg.handle = rsc->handle;
arg.direction = DRM_NEUTRON_SYNC_FROM_DEVICE;
arg.size = box->width;
arg.offset = box->x;
ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_SYNC_BO, &arg);
if (ret < 0)
goto free_transfer;
}
*out_transfer = transfer;
return (uint8_t *)rsc->map + box->x;
free_transfer:
pipe_resource_reference(&transfer->resource, NULL);
ralloc_free(transfer);
return NULL;
}
static void
neutron_buffer_unmap(struct pipe_context *pctx,
struct pipe_transfer *transfer)
{
struct neutron_resource *rsrc = neutron_resource(transfer->resource);
struct neutron_screen *screen = neutron_screen(pctx->screen);
struct drm_neutron_sync_bo arg = {0};
int ret;
if (transfer->usage & PIPE_MAP_WRITE) {
arg.handle = rsrc->handle;
arg.direction = DRM_NEUTRON_SYNC_TO_DEVICE;
arg.size = transfer->box.width;
arg.offset = transfer->box.x;
ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_SYNC_BO, &arg);
assert(ret >= 0);
}
pipe_resource_reference(&transfer->resource, NULL);
ralloc_free(transfer);
}
static struct pipe_context *
neutron_context_create(struct pipe_screen *pscreen,
void *priv, unsigned flags)
{
struct neutron_context *ctx = rzalloc(NULL, struct neutron_context);
struct neutron_screen *screen = neutron_screen(pscreen);
struct pipe_context *pctx = &ctx->base;
int ret;
if (!ctx)
return NULL;
ret = drmSyncobjCreate(screen->fd, 0, &ctx->syncobj_handle);
if (ret)
return NULL;
pctx->screen = pscreen;
pctx->priv = priv;
pctx->destroy = neutron_context_destroy;
pctx->buffer_map = neutron_buffer_map;
pctx->buffer_unmap = neutron_buffer_unmap;
pctx->resource_copy_region = util_resource_copy_region;
pctx->buffer_subdata = u_default_buffer_subdata;
pctx->clear_buffer = u_default_clear_buffer;
pctx->ml_subgraph_invoke = neutron_ml_subgraph_invoke;
pctx->ml_subgraph_read_output = neutron_ml_subgraph_read_outputs;
return pctx;
}
static struct pipe_resource *
neutron_resource_create(struct pipe_screen *pscreen,
const struct pipe_resource *template)
{
struct neutron_screen *screen = neutron_screen(pscreen);
struct drm_neutron_create_bo arg = {0};
struct neutron_resource *rsc;
int ret;
assert(template->target == PIPE_BUFFER);
assert(template->height0 == 1);
assert(template->depth0 == 1);
assert(template->array_size == 1);
rsc = rzalloc(NULL, struct neutron_resource);
if (!rsc)
return NULL;
rsc->base = *template;
rsc->base.screen = pscreen;
rsc->base.nr_samples = template->nr_samples;
pipe_reference_init(&rsc->base.reference, 1);
arg.size = template->width0;
ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_CREATE_BO, &arg);
if (ret < 0)
goto free_rsc;
rsc->handle = arg.handle;
rsc->size = arg.size;
rsc->map = os_mmap(NULL, rsc->size, PROT_READ | PROT_WRITE, MAP_SHARED,
screen->fd, arg.map_offset);
if (rsc->map == MAP_FAILED)
goto close_bo;
return &rsc->base;
close_bo : {
struct drm_gem_close close_arg = {.handle = rsc->handle};
drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &close_arg);
}
free_rsc:
ralloc_free(rsc);
return NULL;
}
static void
neutron_resource_destroy(struct pipe_screen *pscreen,
struct pipe_resource *prsc)
{
struct neutron_resource *rsc = neutron_resource(prsc);
struct neutron_screen *screen = neutron_screen(pscreen);
struct drm_gem_close arg = {0};
int ret;
os_munmap(rsc->map, rsc->size);
arg.handle = rsc->handle;
ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg);
assert(ret >= 0);
ralloc_free(rsc);
}
static int
neutron_screen_get_fd(struct pipe_screen *pscreen)
{
return neutron_screen(pscreen)->fd;
}
static struct pipe_ml_device *
neutron_get_ml_device(struct pipe_screen *pscreen)
{
return &neutron_screen(pscreen)->ml_device.base;
}
struct pipe_screen *
neutron_screen_create(int fd,
const struct pipe_screen_config *config,
struct renderonly *ro)
{
struct neutron_screen *neutron_screen;
struct pipe_screen *screen;
neutron_screen = rzalloc(NULL, struct neutron_screen);
if (!neutron_screen)
return NULL;
neutron_debug = debug_get_option_neutron_debug();
screen = &neutron_screen->pscreen;
neutron_screen->fd = fd;
screen->get_screen_fd = neutron_screen_get_fd;
screen->destroy = neutron_screen_destroy;
screen->context_create = neutron_context_create;
screen->resource_create = neutron_resource_create;
screen->resource_destroy = neutron_resource_destroy;
neutron_screen->ml_device.base.ml_operation_supported = neutron_ml_operation_supported;
neutron_screen->ml_device.base.ml_subgraph_create = neutron_ml_subgraph_create;
neutron_screen->ml_device.base.ml_subgraph_destroy = neutron_ml_subgraph_destroy;
screen->get_ml_device = neutron_get_ml_device;
return screen;
}

View file

@ -0,0 +1,90 @@
/*
* Copyright 2026 NXP
* SPDX-License-Identifier: MIT
*/
#include "pipe/p_context.h"
#include "pipe/p_screen.h"
#include "pipe/p_state.h"
#include "renderonly/renderonly.h"
#include "util/log.h"
#include "util/macros.h"
#ifndef NEUTRON_DEVICE_H
#define NEUTRON_DEVICE_H
enum neutron_dbg {
NEUTRON_DBG_MSGS = BITFIELD_BIT(0),
NEUTRON_DBG_DUMP_BOS = BITFIELD_BIT(1),
NEUTRON_DBG_ZERO = BITFIELD_BIT(2),
};
#define DBG_ENABLED(flag) unlikely(neutron_debug &(flag))
extern int neutron_debug;
#define DBG(fmt, ...) \
do { \
if (DBG_ENABLED(NEUTRON_DBG_MSGS)) \
mesa_logd(fmt, ##__VA_ARGS__); \
} while (0)
struct neutron_ml_device {
struct pipe_ml_device base;
struct pipe_context *context;
};
static inline struct neutron_ml_device *
neutron_ml_device(struct pipe_ml_device *dev)
{
return (struct neutron_ml_device *)dev;
}
struct neutron_screen {
struct pipe_screen pscreen;
struct neutron_ml_device ml_device;
int fd;
};
static inline struct neutron_screen *
neutron_screen(struct pipe_screen *p)
{
return (struct neutron_screen *)p;
}
static inline struct neutron_screen *
neutron_ml_device_screen(struct pipe_ml_device *pdevice)
{
struct neutron_ml_device *dev = neutron_ml_device(pdevice);
return container_of(dev, struct neutron_screen, ml_device);
}
struct neutron_context {
struct pipe_context base;
uint32_t syncobj_handle;
};
static inline struct neutron_context *
neutron_context(struct pipe_context *pctx)
{
return (struct neutron_context *)pctx;
}
struct neutron_resource {
struct pipe_resource base;
uint32_t handle;
uint64_t size;
void *map;
};
static inline struct neutron_resource *
neutron_resource(struct pipe_resource *p)
{
return (struct neutron_resource *)p;
}
struct pipe_screen *
neutron_screen_create(int fd,
const struct pipe_screen_config *config,
struct renderonly *ro);
#endif /* NEUTRON_DEVICE_H */

View file

@ -0,0 +1,382 @@
/*
* Copyright 2026 NXP
* SPDX-License-Identifier: MIT
*/
#include "pipe/p_state.h"
#include "util/macros.h"
#include "util/u_dynarray.h"
#include "util/u_inlines.h"
#include <xf86drm.h>
#include "drm-uapi/neutron_accel.h"
#include "neutron_ml.h"
static const char *
neutron_tensor_name(enum neutron_tensor_type type)
{
switch (type) {
case NEUTRON_WEIGHTS:
return "NeutronWeights";
case NEUTRON_MICROCODE:
return "NeutronMicrocode";
case NEUTRON_KERNELS:
return "NeutronKernels";
case NEUTRON_SCRATCH:
return "NeutronScratch";
case NEUTRON_PROFILE:
return "NeutronProfile";
case NEUTRON_DEBUG:
return "NeutronDebug";
case NEUTRON_DATA_INPUT:
return "Input";
case NEUTRON_DATA_OUTPUT:
return "Output";
default:
return "N/A";
}
}
static void
neutron_dump_buffer(const uint8_t *ptr, const char *name,
int id, int offset, unsigned size)
{
char buffer[255];
snprintf(buffer, sizeof(buffer), "mesa-%s-%03u.bin", name, id);
FILE *f = fopen(buffer, "wb");
assert(f);
fwrite(ptr + offset, 1, size, f);
if (ferror(f)) {
DBG("Error in writing to file: %s\n", strerror(errno));
}
fflush(f);
fclose(f);
}
static void
neutron_dump_tensor(struct pipe_resource *bo,
struct neutron_tensor *ntensor, int id)
{
uint8_t *buf = neutron_resource(bo)->map;
neutron_dump_buffer(buf, neutron_tensor_name(ntensor->type), id,
ntensor->offset, ntensor->size);
}
bool
neutron_ml_operation_supported(struct pipe_ml_device *pdevice,
const struct pipe_ml_operation *operation)
{
if (operation->type != PIPE_ML_OPERATION_TYPE_CUSTOM)
return false;
if (!operation->custom.name ||
strcmp(operation->custom.name, "NeutronGraph") != 0)
return false;
if (operation->input_count <= NEUTRON_CUSTOM_INPUT_MAX)
return false;
return true;
}
static unsigned
get_tensor_size(const struct pipe_tensor *tensor)
{
unsigned size = 1;
for (unsigned i = 0; i < 4; i++)
size *= tensor->dims[i];
return size;
}
static enum neutron_tensor_type
get_tensor_type(struct pipe_tensor *tensor, bool is_input)
{
for (unsigned i = 0; i < NEUTRON_CUSTOM_MAX; i++) {
if (strcmp(tensor->name, neutron_tensor_name(i)) == 0)
return i;
}
return is_input ? NEUTRON_DATA_INPUT : NEUTRON_DATA_OUTPUT;
}
static bool
is_custom_input(struct neutron_tensor *ntensor)
{
return ntensor->type < NEUTRON_CUSTOM_INPUT_MAX;
}
static void
create_tensor(struct neutron_subgraph *subgraph,
struct pipe_tensor *tensor, bool is_input)
{
struct neutron_tensor ntensor = {0};
ntensor.type = get_tensor_type(tensor, is_input);
ntensor.size = get_tensor_size(tensor);
ntensor.tensor = tensor;
if (ntensor.type == NEUTRON_DATA_INPUT)
ntensor.id = subgraph->data_inputs++;
else if (ntensor.type == NEUTRON_DATA_OUTPUT)
ntensor.id = subgraph->data_outputs++;
util_dynarray_append(&subgraph->tensors, ntensor);
}
static unsigned
get_header_array_size(struct neutron_subgraph *subgraph)
{
unsigned num_tensors =
util_dynarray_num_elements(&subgraph->tensors, struct neutron_tensor);
return num_tensors + 4;
}
static unsigned
get_buffer_size(struct neutron_subgraph *subgraph)
{
unsigned size = 0;
util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor)
size += NEUTRON_ALIGN(ntensor->size);
size += subgraph->header.size;
return size;
}
static int
get_header_index(struct neutron_subgraph *subgraph,
struct neutron_tensor *ntensor)
{
switch (ntensor->type) {
case NEUTRON_DATA_INPUT:
return ntensor->id;
case NEUTRON_WEIGHTS:
return subgraph->data_inputs;
case NEUTRON_DATA_OUTPUT:
return subgraph->data_inputs + 1 + ntensor->id;
case NEUTRON_KERNELS:
return subgraph->data_inputs + subgraph->data_outputs + 1;
case NEUTRON_SCRATCH:
return subgraph->data_inputs + subgraph->data_outputs + 2;
case NEUTRON_PROFILE:
/* 4 entries reserved */
return subgraph->data_inputs + subgraph->data_outputs + 7;
case NEUTRON_DEBUG:
return subgraph->data_inputs + subgraph->data_outputs + 8;
default:
return -1;
}
}
struct pipe_ml_subgraph *
neutron_ml_subgraph_create(struct pipe_ml_device *pdevice,
const struct pipe_ml_operation *poperations,
unsigned count)
{
struct neutron_screen *screen = neutron_ml_device_screen(pdevice);
struct neutron_ml_device *dev = neutron_ml_device(pdevice);
const struct pipe_ml_operation *operation = &poperations[0];
struct neutron_subgraph *subgraph;
uint32_t *offset_array;
unsigned crt_offset = 0;
if (count > 1) {
DBG("Expect a single NeutronGraph per partition!\n");
return NULL;
}
if (!dev->context)
dev->context = screen->pscreen.context_create(&screen->pscreen, NULL, 0);
subgraph = calloc(1, sizeof(*subgraph));
subgraph->base.device = pdevice;
subgraph->tensors = UTIL_DYNARRAY_INIT;
subgraph->header = UTIL_DYNARRAY_INIT;
for (unsigned i = 0; i < operation->input_count; i++)
create_tensor(subgraph, operation->input_tensors[i], true);
for (unsigned i = 0; i < operation->output_count; i++)
create_tensor(subgraph, operation->output_tensors[i], false);
if (!util_dynarray_resize(&subgraph->header, uint32_t,
get_header_array_size(subgraph)))
return NULL;
memset(util_dynarray_begin(&subgraph->header), 0, subgraph->header.size);
subgraph->bo = pipe_buffer_create(dev->context->screen, 0,
PIPE_USAGE_DEFAULT,
get_buffer_size(subgraph));
if (!subgraph->bo)
return NULL;
if (DBG_ENABLED(NEUTRON_DBG_ZERO)) {
struct pipe_transfer *transfer;
struct neutron_resource *rsc = neutron_resource(subgraph->bo);
uint8_t *buf = pipe_buffer_map(dev->context, subgraph->bo,
PIPE_MAP_WRITE, &transfer);
memset(buf, 0, pipe_buffer_size(subgraph->bo));
pipe_buffer_unmap(dev->context, transfer);
}
DBG("%s: Tensor list:\n", __func__);
DBG("%16s %3s %6s %8s %8s %8s\n", "TYPE", "ID", "INDEX", "OFFSET", "SIZE", "HDR_IDX");
offset_array = util_dynarray_begin(&subgraph->header);
crt_offset = NEUTRON_ALIGN(subgraph->header.size);
/* Translate all tensors into buffer sections */
util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) {
unsigned header_index = get_header_index(subgraph, ntensor);
/* Microcode offset passed via ioctl, all others stored in header */
if (ntensor->type == NEUTRON_MICROCODE)
subgraph->microcode_offset = crt_offset;
else
offset_array[header_index] = crt_offset;
ntensor->offset = crt_offset;
crt_offset += NEUTRON_ALIGN(ntensor->size);
/* Custom inputs are written in the buffer during setup */
if (is_custom_input(ntensor)) {
pipe_buffer_write(dev->context, subgraph->bo, ntensor->offset,
ntensor->size, ntensor->tensor->data);
if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS))
neutron_dump_tensor(subgraph->bo, ntensor, 0);
}
DBG("%16s %3d %6d %8d %8d %2d\n", neutron_tensor_name(ntensor->type),
ntensor->id, ntensor->tensor->index, ntensor->offset, ntensor->size,
header_index);
}
pipe_buffer_write(dev->context, subgraph->bo, 0, subgraph->header.size,
util_dynarray_begin(&subgraph->header));
if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS)) {
uint8_t *buf = neutron_resource(subgraph->bo)->map;
neutron_dump_buffer(buf, "header", 0, 0, subgraph->header.size);
}
return &subgraph->base;
}
static struct neutron_tensor *
get_tensor_by_id(struct neutron_subgraph *subgraph,
enum neutron_tensor_type type, unsigned index)
{
util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) {
if (ntensor->type == type && ntensor->tensor->index == index)
return ntensor;
}
return NULL;
}
void
neutron_ml_subgraph_invoke(struct pipe_context *pcontext,
struct pipe_ml_subgraph *psubgraph,
unsigned inputs_count, unsigned input_idxs[],
void *inputs[], bool is_signed[])
{
struct neutron_screen *screen = neutron_screen(pcontext->screen);
struct neutron_context *context = neutron_context(pcontext);
struct neutron_subgraph *subgraph = (struct neutron_subgraph *)(psubgraph);
struct drm_neutron_submit_job submit = {0};
int ret;
for (unsigned i = 0; i < inputs_count; i++) {
struct neutron_tensor *ntensor =
get_tensor_by_id(subgraph, NEUTRON_DATA_INPUT, input_idxs[i]);
DBG("Prepare input tensor: idx=%d, offset=%d, size=%d\n",
input_idxs[i], ntensor->offset, ntensor->size);
pipe_buffer_write(pcontext, subgraph->bo, ntensor->offset,
ntensor->size, inputs[i]);
if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS))
neutron_dump_tensor(subgraph->bo, ntensor, i);
}
/* Transfer ownership of output sections to device */
util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) {
if (ntensor->type == NEUTRON_DATA_OUTPUT) {
struct pipe_transfer *transfer;
pipe_buffer_map_range(pcontext, subgraph->bo, ntensor->offset,
ntensor->size, PIPE_MAP_WRITE, &transfer);
pipe_buffer_unmap(pcontext, transfer);
}
}
submit.type = DRM_NEUTRON_JOB_INFERENCE;
submit.bo_handle = neutron_resource(subgraph->bo)->handle;
submit.syncobj_handle = context->syncobj_handle;
submit.inference.microcode_offset = subgraph->microcode_offset;
submit.inference.tensor_offset = 0;
submit.inference.tensor_count =
util_dynarray_num_elements(&subgraph->header, uint32_t);
DBG("Submitting job\n");
ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_SUBMIT_JOB, &submit);
assert(ret == 0);
}
void
neutron_ml_subgraph_read_outputs(struct pipe_context *pcontext,
struct pipe_ml_subgraph *psubgraph,
unsigned outputs_count,
unsigned output_idxs[], void *outputsv[],
bool is_signed[])
{
struct neutron_subgraph *subgraph = (struct neutron_subgraph *)(psubgraph);
struct neutron_context *context = neutron_context(pcontext);
struct neutron_screen *screen = neutron_screen(pcontext->screen);
int ret;
ret = drmSyncobjWait(screen->fd, &context->syncobj_handle, 1, INT64_MAX,
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, NULL);
assert(ret == 0);
DBG("Job finished\n");
for (unsigned i = 0; i < outputs_count; i++) {
struct neutron_tensor *ntensor =
get_tensor_by_id(subgraph, NEUTRON_DATA_OUTPUT, output_idxs[i]);
DBG("Read output tensor: idx=%d, offset=%d, size=%d\n",
output_idxs[i], ntensor->offset, ntensor->size);
pipe_buffer_read(pcontext, subgraph->bo, ntensor->offset,
ntensor->size, outputsv[i]);
if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS))
neutron_dump_tensor(subgraph->bo, ntensor, i);
}
}
void
neutron_ml_subgraph_destroy(struct pipe_ml_device *pdevice,
struct pipe_ml_subgraph *psubgraph)
{
struct pipe_context *pcontext = neutron_ml_device(pdevice)->context;
struct neutron_screen *screen = neutron_screen(pcontext->screen);
struct neutron_subgraph *subgraph = (struct neutron_subgraph *)(psubgraph);
struct drm_gem_close arg = {0};
int ret;
arg.handle = neutron_resource(subgraph->bo)->handle;
ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg);
assert(ret >= 0);
pipe_resource_reference(&subgraph->bo, NULL);
util_dynarray_fini(&subgraph->tensors);
util_dynarray_fini(&subgraph->header);
free(subgraph);
}

View file

@ -0,0 +1,73 @@
/*
* Copyright 2026 NXP
* SPDX-License-Identifier: MIT
*/
#ifndef NEUTRON_ML_H
#define NEUTRON_ML_H
#include <util/u_dynarray.h>
#include <util/u_math.h>
#include "neutron_device.h"
#define NEUTRON_ALIGN(value) align(value, 64)
enum neutron_tensor_type {
NEUTRON_WEIGHTS = 0,
NEUTRON_MICROCODE,
NEUTRON_KERNELS,
NEUTRON_CUSTOM_INPUT_MAX,
NEUTRON_SCRATCH = NEUTRON_CUSTOM_INPUT_MAX,
NEUTRON_PROFILE,
NEUTRON_DEBUG,
NEUTRON_CUSTOM_MAX,
NEUTRON_DATA_INPUT = NEUTRON_CUSTOM_MAX,
NEUTRON_DATA_OUTPUT,
};
struct neutron_tensor {
struct pipe_tensor *tensor;
enum neutron_tensor_type type;
unsigned size;
unsigned offset;
unsigned id; /* For NEUTRON_DATA_INPUT and NEUTRON_DATA_OUTPUT */
};
struct neutron_subgraph {
struct pipe_ml_subgraph base;
struct pipe_resource *bo;
struct util_dynarray tensors; /* struct neutron_tensor */
struct util_dynarray header;
unsigned microcode_offset;
unsigned data_inputs; /* number of non-custom input tensors */
unsigned data_outputs; /* number of non-custom output tensors */
};
bool
neutron_ml_operation_supported(struct pipe_ml_device *pdevice,
const struct pipe_ml_operation *operation);
struct pipe_ml_subgraph *
neutron_ml_subgraph_create(struct pipe_ml_device *pdevice,
const struct pipe_ml_operation *poperations,
unsigned count);
void
neutron_ml_subgraph_invoke(struct pipe_context *pcontext,
struct pipe_ml_subgraph *psubgraph,
unsigned inputs_count, unsigned input_idxs[],
void *inputs[], bool is_signed[]);
void
neutron_ml_subgraph_read_outputs(struct pipe_context *pcontext,
struct pipe_ml_subgraph *psubgraph,
unsigned outputs_count,
unsigned output_idxs[], void *outputs[],
bool is_signed[]);
void
neutron_ml_subgraph_destroy(struct pipe_ml_device *pdevice,
struct pipe_ml_subgraph *psubgraph);
#endif /* NEUTRON_ML_H */

View file

@ -196,6 +196,12 @@ if with_gallium_ethosu
else
driver_ethosu = declare_dependency()
endif
if with_gallium_neutron
subdir('winsys/neutron/drm')
subdir('drivers/neutron')
else
driver_neutron = declare_dependency()
endif
if with_gallium_zink
if not with_platform_windows
subdir('winsys/zink/drm')

View file

@ -59,7 +59,7 @@ libgallium_dri = shared_library(
driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv,
driver_tegra, driver_i915, driver_svga, driver_virgl,
driver_panfrost, driver_iris, driver_lima, driver_zink, driver_d3d12,
driver_asahi, driver_crocus, driver_rocket, driver_ethosu
driver_asahi, driver_crocus, driver_rocket, driver_ethosu, driver_neutron
],
install : true,
name_suffix : libname_suffix,

View file

@ -0,0 +1,13 @@
# Copyright 2017 Broadcom
# SPDX-License-Identifier: MIT
libneutronwinsys = static_library(
'neutronwinsys',
files('neutron_drm_winsys.c'),
include_directories : [
inc_src, inc_include,
inc_gallium, inc_gallium_aux, inc_gallium_drivers,
],
gnu_symbol_visibility : 'hidden',
dependencies: [dep_libdrm, idep_mesautil],
)

View file

@ -0,0 +1,17 @@
/*
* Copyright 2014 Broadcom
* Copyright 2018 Alyssa Rosenzweig
* Copyright 2025 Tomeu Vizoso
* SPDX-License-Identifier: MIT
*/
#ifndef __NEUTRON_DRM_PUBLIC_H__
#define __NEUTRON_DRM_PUBLIC_H__
struct pipe_screen;
struct pipe_screen_config;
struct pipe_screen *
neutron_drm_screen_create(int drmFD, const struct pipe_screen_config *config);
#endif /* __NEUTRON_DRM_PUBLIC_H__ */

View file

@ -0,0 +1,20 @@
/*
* Copyright 2014 Broadcom
* Copyright 2018 Alyssa Rosenzweig
* Copyright 2025 Tomeu Vizoso
* Copyright 2026 NXP
* SPDX-License-Identifier: MIT
*/
#include "util/os_file.h"
#include "util/u_screen.h"
#include "neutron/neutron_device.h"
#include "neutron_drm_public.h"
struct pipe_screen *
neutron_drm_screen_create(int fd, const struct pipe_screen_config *config)
{
return u_pipe_screen_lookup_or_create(os_dupfd_cloexec(fd), config, NULL,
neutron_screen_create);
}