diff --git a/.clang-format-include b/.clang-format-include index ba52553fdc9..b446191541a 100644 --- a/.clang-format-include +++ b/.clang-format-include @@ -3,6 +3,7 @@ src/gallium/drivers/ethosu/**/* src/gallium/drivers/i915 +src/gallium/drivers/neutron/**/* src/gallium/drivers/r300/compiler/* src/gallium/drivers/rocket/**/* src/gallium/targets/teflon/**/* diff --git a/include/drm-uapi/neutron_accel.h b/include/drm-uapi/neutron_accel.h new file mode 100644 index 00000000000..a9e5682709d --- /dev/null +++ b/include/drm-uapi/neutron_accel.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* Copyright 2025-2026 NXP */ + +#ifndef __NEUTRON_ACCEL_H__ +#define __NEUTRON_ACCEL_H__ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/** + * enum drm_neutron_ioctl - Neutron IOCTL IDs + * + * @DRM_NEUTRON_CREATE_BO: Create a buffer object + * @DRM_NEUTRON_SYNC_BO: Sync (parts of) the buffer object memory + * @DRM_NEUTRON_SUBMIT_JOB: Submit a job to the device + */ +enum drm_neutron_ioctl { + DRM_NEUTRON_CREATE_BO = 0, + DRM_NEUTRON_SYNC_BO, + DRM_NEUTRON_SUBMIT_JOB, +}; + +/** + * struct drm_neutron_create_bo - Create a buffer object and return buffer + * info to user + * + * @size: Size in bytes of requested buffer. May be updated by driver + * if allocated size different than requested + * @handle: Returned handle for the new buffer object + * @pad: MBZ + * @map_offset: Returned offset for mmap() calls + */ +struct drm_neutron_create_bo { + __u64 size; + __u32 handle; + __u32 pad; + __u64 map_offset; +}; + +/** + * enum drm_neutron_sync_dir - Direction of buffer object synchronization + * + * @DRM_NEUTRON_SYNC_TO_DEVICE: Sync from CPU to device + * @DRM_NEUTRON_SYNC_FROM_DEVICE: Sync from device to CPU + */ +enum drm_neutron_sync_dir { + DRM_NEUTRON_SYNC_TO_DEVICE = 0, + DRM_NEUTRON_SYNC_FROM_DEVICE, +}; + +/** + * struct drm_neutron_sync_bo - Sync buffer object memory + * + * @handle: Handle of buffer object to sync + * @direction: Direction of sync, can be one of enum drm_neutron_sync_dir + * @size: Size of the memory to sync, in bytes + * @offset: Offset inside the buffer, in bytes + */ +struct drm_neutron_sync_bo { + __u32 handle; + __u32 direction; + __u64 size; + __u64 offset; +}; + +/** + * enum drm_neutron_job_type - Type of job to submit to Neutron device + * + * @DRM_NEUTRON_JOB_INFERENCE: Inference job + */ +enum drm_neutron_job_type { + DRM_NEUTRON_JOB_INFERENCE = 0, +}; + +/** + * struct drm_neutron_inference_job - Inference job descriptor + * + * @tensor_offset: Offset of tensor array inside job BO + * @microcode_offset: Microcode offset inside BO + * @tensor_count: Number of valid tensors + * @pad: MBZ + */ +struct drm_neutron_inference_job { + __u32 tensor_offset; + __u32 microcode_offset; + __u32 tensor_count; + __u32 pad[5]; +}; + +/** + * struct drm_neutron_submit_job - Submit a job to Neutron device + * + * @type: Job type, one of enum drm_neutron_job_type + * @bo_handle: BO handle for this job + * @inference: Inference job descriptor (when type is DRM_NEUTRON_JOB_INFERENCE) + * @reserved: Reserved for future job types + * @syncobj_handle: Handle of syncobj on which user waits for job completion + * @pad: MBZ + */ +struct drm_neutron_submit_job { + __u32 type; + __u32 bo_handle; + union { + struct drm_neutron_inference_job inference; + __u32 reserved[8]; + }; + __u32 syncobj_handle; + __u32 pad; +}; + +#define DRM_IOCTL_NEUTRON_CREATE_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_NEUTRON_CREATE_BO, \ + struct drm_neutron_create_bo) + +#define DRM_IOCTL_NEUTRON_SYNC_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_NEUTRON_SYNC_BO, \ + struct drm_neutron_sync_bo) + +#define DRM_IOCTL_NEUTRON_SUBMIT_JOB \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_NEUTRON_SUBMIT_JOB, \ + struct drm_neutron_submit_job) + +#if defined(__cplusplus) +} +#endif + +#endif /* __NEUTRON_ACCEL_H__ */ diff --git a/meson.build b/meson.build index f1b561b6ad5..6e1ff9a3587 100644 --- a/meson.build +++ b/meson.build @@ -205,7 +205,7 @@ elif gallium_drivers.contains('all') gallium_drivers = [ 'r300', 'r600', 'radeonsi', 'crocus', 'v3d', 'vc4', 'freedreno', 'etnaviv', 'i915', 'nouveau', 'svga', 'tegra', 'virgl', 'lima', 'panfrost', 'llvmpipe', 'softpipe', 'iris', - 'zink', 'd3d12', 'asahi', 'rocket', 'ethosu' + 'zink', 'd3d12', 'asahi', 'rocket', 'ethosu', 'neutron' ] endif @@ -234,6 +234,7 @@ with_gallium_d3d12 = gallium_drivers.contains('d3d12') with_gallium_asahi = gallium_drivers.contains('asahi') with_gallium_rocket = gallium_drivers.contains('rocket') with_gallium_ethosu = gallium_drivers.contains('ethosu') +with_gallium_neutron = gallium_drivers.contains('neutron') foreach gallium_driver : gallium_drivers pre_args += '-DHAVE_@0@'.format(gallium_driver.to_upper()) endforeach diff --git a/meson.options b/meson.options index 024b8340252..30bc0f0729d 100644 --- a/meson.options +++ b/meson.options @@ -87,7 +87,7 @@ option( choices : [ 'all', 'auto', 'asahi', 'crocus', 'd3d12', 'ethosu', 'etnaviv', 'freedreno', 'i915', 'iris', - 'lima', 'llvmpipe', 'nouveau', 'panfrost', 'r300', 'r600', 'radeonsi', + 'lima', 'llvmpipe', 'neutron', 'nouveau', 'panfrost', 'r300', 'r600', 'radeonsi', 'rocket', 'softpipe', 'svga', 'tegra', 'v3d', 'vc4', 'virgl', 'zink', ], description : 'List of gallium drivers to build. If this is set to auto ' + diff --git a/src/gallium/drivers/neutron/.clang-format b/src/gallium/drivers/neutron/.clang-format new file mode 100644 index 00000000000..34cd9d7d1d3 --- /dev/null +++ b/src/gallium/drivers/neutron/.clang-format @@ -0,0 +1,2 @@ +BasedOnStyle: InheritParentConfig +DisableFormat: false diff --git a/src/gallium/drivers/neutron/ci/neutron-imx95-fails.txt b/src/gallium/drivers/neutron/ci/neutron-imx95-fails.txt new file mode 100644 index 00000000000..3c28d42358b --- /dev/null +++ b/src/gallium/drivers/neutron/ci/neutron-imx95-fails.txt @@ -0,0 +1,102 @@ +CompiledModels.Op/efficientdet_efficientdet_tflite_lite0_int8_v1,Fail +CompiledModels.Op/inception_002,Fail +CompiledModels.Op/inception_003,Fail +CompiledModels.Op/inception_008,Fail +CompiledModels.Op/inception_013,Fail +CompiledModels.Op/inception_015,Fail +CompiledModels.Op/inception_016,Fail +CompiledModels.Op/inception_021,Fail +CompiledModels.Op/inception_024,Fail +CompiledModels.Op/inception_027,Fail +CompiledModels.Op/inception_030,Fail +CompiledModels.Op/inception_032,Fail +CompiledModels.Op/inception_033,Fail +CompiledModels.Op/inception_041,Fail +CompiledModels.Op/inception_055,Fail +CompiledModels.Op/inception_059,Fail +CompiledModels.Op/inception_063,Fail +CompiledModels.Op/inception_064,Fail +CompiledModels.Op/inception_071,Fail +CompiledModels.Op/inception_073,Fail +CompiledModels.Op/inception_075,Fail +CompiledModels.Op/inception_077,Fail +CompiledModels.Op/mobiledet_002,Fail +CompiledModels.Op/mobiledet_004,Fail +CompiledModels.Op/mobiledet_006,Fail +CompiledModels.Op/mobiledet_009,Fail +CompiledModels.Op/mobiledet_012,Fail +CompiledModels.Op/mobiledet_015,Fail +CompiledModels.Op/mobiledet_017,Fail +CompiledModels.Op/mobiledet_020,Fail +CompiledModels.Op/mobiledet_029,Fail +CompiledModels.Op/mobiledet_039,Fail +CompiledModels.Op/mobiledet_042,Fail +CompiledModels.Op/mobiledet_046,Fail +CompiledModels.Op/mobiledet_050,Fail +CompiledModels.Op/mobiledet_063,Fail +CompiledModels.Op/mobiledet_086,Fail +CompiledModels.Op/mobiledet_087,Fail +CompiledModels.Op/mobiledet_106,Fail +CompiledModels.Op/mobiledet_119,Fail +CompiledModels.Op/mobiledet_ssdlite_mobiledet_coco_qat_postprocess,Fail +CompiledModels.Op/mobilenetv1_001,Fail +CompiledModels.Op/mobilenetv1_002,Fail +CompiledModels.Op/mobilenetv1_003,Fail +CompiledModels.Op/mobilenetv1_005,Fail +CompiledModels.Op/mobilenetv1_006,Fail +CompiledModels.Op/mobilenetv1_008,Fail +CompiledModels.Op/mobilenetv1_009,Fail +CompiledModels.Op/mobilenetv1_012,Fail +CompiledModels.Op/mobilenetv1_016,Fail +CompiledModels.Op/mobilenetv1_018,Fail +CompiledModels.Op/mobilenetv1_020,Fail +CompiledModels.Op/mobilenetv1_025,Fail +CompiledModels.Op/mobilenetv1_026,Fail +CompiledModels.Op/mobilenetv1_mobilenet_v1_1_224_quant,Fail +CompiledModels.Op/mobilenetv2_001,Fail +CompiledModels.Op/mobilenetv2_003,Fail +CompiledModels.Op/mobilenetv2_006,Fail +CompiledModels.Op/mobilenetv2_007,Fail +CompiledModels.Op/mobilenetv2_010,Fail +CompiledModels.Op/mobilenetv2_014,Fail +CompiledModels.Op/mobilenetv2_018,Fail +CompiledModels.Op/mobilenetv2_025,Fail +CompiledModels.Op/mobilenetv2_029,Fail +CompiledModels.Op/mobilenetv2_040,Fail +CompiledModels.Op/mobilenetv2_044,Fail +CompiledModels.Op/mobilenetv2_059,Fail +CompiledModels.Op/mobilenetv2_061,Fail +CompiledModels.Op/movenetlightning_103,Fail +CompiledModels.Op/movenetlightning_104,Fail +CompiledModels.Op/movenetthunder_103,Fail +CompiledModels.Op/movenetthunder_104,Fail +CompiledModels.Op/ssdmobilenetv2_001,Fail +CompiledModels.Op/ssdmobilenetv2_003,Fail +CompiledModels.Op/ssdmobilenetv2_007,Fail +CompiledModels.Op/ssdmobilenetv2_040,Fail +CompiledModels.Op/ssdmobilenetv2_047,Fail +CompiledModels.Op/ssdmobilenetv2_052,Fail +CompiledModels.Op/ssdmobilenetv2_054,Fail +CompiledModels.Op/ssdmobilenetv2_065,Fail +CompiledModels.Op/ssdmobilenetv2_069,Fail +CompiledModels.Op/ssdmobilenetv2_072,Fail +CompiledModels.Op/ssdmobilenetv2_073,Fail +CompiledModels.Op/ssdmobilenetv2_077,Fail +CompiledModels.Op/ssdmobilenetv2_081,Fail +CompiledModels.Op/ssdmobilenetv2_089,Fail +CompiledModels.Op/ssdmobilenetv2_096,Fail +CompiledModels.Op/ssdmobilenetv2_102,Fail +CompiledModels.Op/ssdmobilenetv2_ssd_mobilenet_v2_coco_quant_postprocess,Fail +CompiledModels.Op/yolox_001,Fail +CompiledModels.Op/yolox_004,Fail +CompiledModels.Op/yolox_027,Fail +CompiledModels.Op/yolox_042,Fail +CompiledModels.Op/yolox_077,Fail +CompiledModels.Op/yolox_086,Fail +CompiledModels.Op/yolox_102,Fail +CompiledModels.Op/yolox_112,Fail +CompiledModels.Op/yolox_122,Fail +CompiledModels.Op/yolox_132,Fail +CompiledModels.Op/yolox_147,Fail +CompiledModels.Op/yolox_yolox_nano_full_integer_quant,Fail + diff --git a/src/gallium/drivers/neutron/ci/neutron-imx95-flakes.txt b/src/gallium/drivers/neutron/ci/neutron-imx95-flakes.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/gallium/drivers/neutron/ci/neutron-imx95-skips.txt b/src/gallium/drivers/neutron/ci/neutron-imx95-skips.txt new file mode 100644 index 00000000000..209c3091581 --- /dev/null +++ b/src/gallium/drivers/neutron/ci/neutron-imx95-skips.txt @@ -0,0 +1,227 @@ +# Standard (un-compiled) tflite models are not supported on Neutron +Add.Op/.* +AddQuant.Op/.* +Conv2D.Op/.* +DepthwiseConv2D.Op/.* +FullyConnected.Op/.* + +Models.Op/.* + +# Compilation error due to duplicate/orphan input tensor +CompiledModels.Op/movenetlightning_117 +CompiledModels.Op/movenetlightning_120 +CompiledModels.Op/movenetthunder_117 +CompiledModels.Op/movenetthunder_120 + +# Conversion rate zero (operators or operator + tensor type/size combo +# not supported yet by Neutron compiler) +CompiledModels.Op/efficientdet_066 +CompiledModels.Op/efficientdet_101 +CompiledModels.Op/efficientdet_136 +CompiledModels.Op/efficientdet_251 +CompiledModels.Op/efficientdet_252 +CompiledModels.Op/efficientdet_253 +CompiledModels.Op/efficientdet_254 +CompiledModels.Op/efficientdet_255 +CompiledModels.Op/efficientdet_256 +CompiledModels.Op/efficientdet_257 +CompiledModels.Op/efficientdet_258 +CompiledModels.Op/efficientdet_259 +CompiledModels.Op/efficientdet_260 +CompiledModels.Op/efficientdet_261 +CompiledModels.Op/efficientdet_262 +CompiledModels.Op/efficientdet_263 +CompiledModels.Op/efficientdet_264 +CompiledModels.Op/efficientdet_265 +CompiledModels.Op/inception_082 +CompiledModels.Op/micronetlarge_013 +CompiledModels.Op/mobiledet_097 +CompiledModels.Op/mobiledet_099 +CompiledModels.Op/mobiledet_101 +CompiledModels.Op/mobiledet_103 +CompiledModels.Op/mobiledet_105 +CompiledModels.Op/mobiledet_107 +CompiledModels.Op/mobiledet_109 +CompiledModels.Op/mobiledet_111 +CompiledModels.Op/mobiledet_113 +CompiledModels.Op/mobiledet_115 +CompiledModels.Op/mobiledet_117 +CompiledModels.Op/mobiledet_118 +CompiledModels.Op/mobiledet_122 +CompiledModels.Op/mobiledet_123 +CompiledModels.Op/mobilenetv1_030 +CompiledModels.Op/movenetlightning_000 +CompiledModels.Op/movenetlightning_001 +CompiledModels.Op/movenetlightning_002 +CompiledModels.Op/movenetlightning_003 +CompiledModels.Op/movenetlightning_071 +CompiledModels.Op/movenetlightning_075 +CompiledModels.Op/movenetlightning_079 +CompiledModels.Op/movenetlightning_087 +CompiledModels.Op/movenetlightning_088 +CompiledModels.Op/movenetlightning_089 +CompiledModels.Op/movenetlightning_090 +CompiledModels.Op/movenetlightning_091 +CompiledModels.Op/movenetlightning_092 +CompiledModels.Op/movenetlightning_093 +CompiledModels.Op/movenetlightning_094 +CompiledModels.Op/movenetlightning_095 +CompiledModels.Op/movenetlightning_096 +CompiledModels.Op/movenetlightning_097 +CompiledModels.Op/movenetlightning_098 +CompiledModels.Op/movenetlightning_099 +CompiledModels.Op/movenetlightning_105 +CompiledModels.Op/movenetlightning_112 +CompiledModels.Op/movenetlightning_113 +CompiledModels.Op/movenetlightning_114 +CompiledModels.Op/movenetlightning_115 +CompiledModels.Op/movenetlightning_116 +CompiledModels.Op/movenetlightning_118 +CompiledModels.Op/movenetlightning_119 +CompiledModels.Op/movenetlightning_122 +CompiledModels.Op/movenetlightning_123 +CompiledModels.Op/movenetlightning_124 +CompiledModels.Op/movenetlightning_125 +CompiledModels.Op/movenetlightning_126 +CompiledModels.Op/movenetlightning_127 +CompiledModels.Op/movenetlightning_128 +CompiledModels.Op/movenetlightning_129 +CompiledModels.Op/movenetlightning_130 +CompiledModels.Op/movenetlightning_131 +CompiledModels.Op/movenetlightning_132 +CompiledModels.Op/movenetlightning_133 +CompiledModels.Op/movenetlightning_134 +CompiledModels.Op/movenetlightning_135 +CompiledModels.Op/movenetlightning_136 +CompiledModels.Op/movenetlightning_137 +CompiledModels.Op/movenetlightning_138 +CompiledModels.Op/movenetlightning_139 +CompiledModels.Op/movenetlightning_140 +CompiledModels.Op/movenetlightning_141 +CompiledModels.Op/movenetlightning_142 +CompiledModels.Op/movenetlightning_143 +CompiledModels.Op/movenetlightning_144 +CompiledModels.Op/movenetlightning_145 +CompiledModels.Op/movenetlightning_147 +CompiledModels.Op/movenetlightning_149 +CompiledModels.Op/movenetlightning_150 +CompiledModels.Op/movenetlightning_151 +CompiledModels.Op/movenetlightning_152 +CompiledModels.Op/movenetlightning_153 +CompiledModels.Op/movenetlightning_154 +CompiledModels.Op/movenetlightning_155 +CompiledModels.Op/movenetlightning_156 +CompiledModels.Op/movenetthunder_000 +CompiledModels.Op/movenetthunder_001 +CompiledModels.Op/movenetthunder_002 +CompiledModels.Op/movenetthunder_003 +CompiledModels.Op/movenetthunder_071 +CompiledModels.Op/movenetthunder_075 +CompiledModels.Op/movenetthunder_079 +CompiledModels.Op/movenetthunder_087 +CompiledModels.Op/movenetthunder_088 +CompiledModels.Op/movenetthunder_089 +CompiledModels.Op/movenetthunder_090 +CompiledModels.Op/movenetthunder_091 +CompiledModels.Op/movenetthunder_092 +CompiledModels.Op/movenetthunder_093 +CompiledModels.Op/movenetthunder_094 +CompiledModels.Op/movenetthunder_095 +CompiledModels.Op/movenetthunder_096 +CompiledModels.Op/movenetthunder_097 +CompiledModels.Op/movenetthunder_098 +CompiledModels.Op/movenetthunder_099 +CompiledModels.Op/movenetthunder_105 +CompiledModels.Op/movenetthunder_112 +CompiledModels.Op/movenetthunder_113 +CompiledModels.Op/movenetthunder_114 +CompiledModels.Op/movenetthunder_115 +CompiledModels.Op/movenetthunder_116 +CompiledModels.Op/movenetthunder_118 +CompiledModels.Op/movenetthunder_119 +CompiledModels.Op/movenetthunder_122 +CompiledModels.Op/movenetthunder_123 +CompiledModels.Op/movenetthunder_124 +CompiledModels.Op/movenetthunder_125 +CompiledModels.Op/movenetthunder_126 +CompiledModels.Op/movenetthunder_127 +CompiledModels.Op/movenetthunder_128 +CompiledModels.Op/movenetthunder_129 +CompiledModels.Op/movenetthunder_130 +CompiledModels.Op/movenetthunder_131 +CompiledModels.Op/movenetthunder_132 +CompiledModels.Op/movenetthunder_133 +CompiledModels.Op/movenetthunder_134 +CompiledModels.Op/movenetthunder_135 +CompiledModels.Op/movenetthunder_136 +CompiledModels.Op/movenetthunder_137 +CompiledModels.Op/movenetthunder_138 +CompiledModels.Op/movenetthunder_139 +CompiledModels.Op/movenetthunder_140 +CompiledModels.Op/movenetthunder_141 +CompiledModels.Op/movenetthunder_142 +CompiledModels.Op/movenetthunder_143 +CompiledModels.Op/movenetthunder_144 +CompiledModels.Op/movenetthunder_145 +CompiledModels.Op/movenetthunder_147 +CompiledModels.Op/movenetthunder_149 +CompiledModels.Op/movenetthunder_150 +CompiledModels.Op/movenetthunder_151 +CompiledModels.Op/movenetthunder_152 +CompiledModels.Op/movenetthunder_153 +CompiledModels.Op/movenetthunder_154 +CompiledModels.Op/movenetthunder_155 +CompiledModels.Op/movenetthunder_156 +CompiledModels.Op/ssdmobilenetv2_049 +CompiledModels.Op/ssdmobilenetv2_051 +CompiledModels.Op/ssdmobilenetv2_067 +CompiledModels.Op/ssdmobilenetv2_068 +CompiledModels.Op/ssdmobilenetv2_070 +CompiledModels.Op/ssdmobilenetv2_075 +CompiledModels.Op/ssdmobilenetv2_076 +CompiledModels.Op/ssdmobilenetv2_078 +CompiledModels.Op/ssdmobilenetv2_083 +CompiledModels.Op/ssdmobilenetv2_084 +CompiledModels.Op/ssdmobilenetv2_086 +CompiledModels.Op/ssdmobilenetv2_091 +CompiledModels.Op/ssdmobilenetv2_092 +CompiledModels.Op/ssdmobilenetv2_094 +CompiledModels.Op/ssdmobilenetv2_099 +CompiledModels.Op/ssdmobilenetv2_100 +CompiledModels.Op/ssdmobilenetv2_101 +CompiledModels.Op/ssdmobilenetv2_107 +CompiledModels.Op/ssdmobilenetv2_108 +CompiledModels.Op/ssdmobilenetv2_109 +CompiledModels.Op/yolox_000 +CompiledModels.Op/yolox_003 +CompiledModels.Op/yolox_012 +CompiledModels.Op/yolox_103 +CompiledModels.Op/yolox_104 +CompiledModels.Op/yolox_113 +CompiledModels.Op/yolox_114 +CompiledModels.Op/yolox_123 +CompiledModels.Op/yolox_124 +CompiledModels.Op/yolox_125 +CompiledModels.Op/yolox_126 +CompiledModels.Op/yolox_127 +CompiledModels.Op/yolox_128 +CompiledModels.Op/yolox_129 +CompiledModels.Op/yolox_130 +CompiledModels.Op/yolox_131 +CompiledModels.Op/yolox_133 +CompiledModels.Op/yolox_134 +CompiledModels.Op/yolox_135 +CompiledModels.Op/yolox_136 +CompiledModels.Op/yolox_137 +CompiledModels.Op/yolox_139 +CompiledModels.Op/yolox_140 +CompiledModels.Op/yolox_141 +CompiledModels.Op/yolox_142 +CompiledModels.Op/yolox_143 +CompiledModels.Op/yolox_145 +CompiledModels.Op/yolox_146 +CompiledModels.Op/yolox_148 +CompiledModels.Op/yolox_149 +CompiledModels.Op/yolox_151 +CompiledModels.Op/yolox_152 +CompiledModels.Op/yolox_153 diff --git a/src/gallium/drivers/neutron/compile_model.py b/src/gallium/drivers/neutron/compile_model.py new file mode 100644 index 00000000000..e6195a0ec2b --- /dev/null +++ b/src/gallium/drivers/neutron/compile_model.py @@ -0,0 +1,58 @@ +# Copyright 2026 NXP +# SPDX-License-Identifier: MIT + +""" +Compile a TFLite model for the Neutron NPU. + +Usage: python compile_model.py + +Exit codes: + 0: Success + 1: Converter internal error + 2: Zero conversion rate (model unchanged) + 3: Invalid arguments or input file +""" + +import sys +import os +from eiq_neutron_sdk import neutron_converter # from https://eiq.nxp.com/repository + +# Only one platform supported for now +TARGET = "imx95" + + +def main(): + if len(sys.argv) != 3: + print("Usage: python compile_model.py ") + sys.exit(3) + + input_file = sys.argv[1] + output_file = sys.argv[2] + + if not os.path.isfile(input_file): + print(f"Error: Input file '{input_file}' does not exist") + sys.exit(3) + + if not input_file.endswith('.tflite'): + print("Error: Input file must be a .tflite model") + sys.exit(3) + + with open(input_file, 'rb') as f: + model_data = f.read() + + try: + cmodel_data = neutron_converter.convertModel(list(model_data), TARGET) + except Exception as e: + print(f"Error during model conversion: {e}") + sys.exit(1) + + if bytes(cmodel_data) == model_data: + print(f"Warning: Conversion rate was zero for {input_file}, skipping output") + sys.exit(2) + + with open(output_file, 'wb') as f: + f.write(bytes(cmodel_data)) + + +if __name__ == "__main__": + main() diff --git a/src/gallium/drivers/neutron/meson.build b/src/gallium/drivers/neutron/meson.build new file mode 100644 index 00000000000..fd75b6f17f1 --- /dev/null +++ b/src/gallium/drivers/neutron/meson.build @@ -0,0 +1,20 @@ +# Copyright 2019 Google, Inc +# SPDX-License-Identifier: MIT + +files_neutron = files( + 'neutron_device.c', + 'neutron_ml.c', +) + +libneutron = static_library( + 'neutron', + files_neutron, + include_directories : [inc_gallium_aux, inc_gallium, inc_include, inc_src], + gnu_symbol_visibility : 'hidden', + dependencies : [idep_mesautil, dep_libdrm], +) + +driver_neutron = declare_dependency( + compile_args : '-DGALLIUM_NEUTRON', + link_with : [libneutronwinsys, libneutron] +) diff --git a/src/gallium/drivers/neutron/neutron_compiler.py b/src/gallium/drivers/neutron/neutron_compiler.py new file mode 100644 index 00000000000..18dcdd39e29 --- /dev/null +++ b/src/gallium/drivers/neutron/neutron_compiler.py @@ -0,0 +1,219 @@ +# Copyright 2026 NXP +# SPDX-License-Identifier: MIT + +""" +Batch compiler of TFLite models for NXP's Neutron NPU + +Usage modes (mutually exclusive): +neutron_compiler.py --in --out +neutron_compiler.py --in-dir --out-dir +neutron_compiler.py --teflon-test-models +""" + +import sys +import os +import argparse +import subprocess + +def parse_args(argv): + parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + + mode_group = parser.add_mutually_exclusive_group(required=True) + mode_group.add_argument('--in', + dest='input_file', + action='store', + help='Input TFLite model file') + mode_group.add_argument('--in-dir', + dest='input_dir', + action='store', + help='Input directory containing TFLite models to be compiled') + mode_group.add_argument('--teflon-test-models', + dest='teflon_test_models', + action='store_true', + help='Use ${TEFLON_TEST_DATA}/models as input directory and ${TEFLON_TEST_DATA}/compiled_models as output') + + parser.add_argument('--out', + dest='output_file', + action='store', + help='Output TFLite model file (required with --in)') + parser.add_argument('--out-dir', + dest='output_dir', + action='store', + help='Output directory for compiled models (required with --in-dir). Output files will have the same name as input files') + + parser.add_argument('--force', + action='store_true', + required=False, + help='Overwrite output files if they exist. Ignored for single file mode') + parser.add_argument('--show-details', + action='store_true', + required=False, + help='Print compilation result for each model. Ignored for single file mode') + parser.add_argument('--log-to-file', + action='store_true', + required=False, + help='Generate log file for each compile attempt. Log is located next to output file') + + args = parser.parse_args() + + if args.input_file: + if not args.output_file: + parser.error("--out is required when using --in") + if not os.path.isfile(args.input_file): + parser.error("--in must be an existing file") + + if args.input_dir: + if not args.output_dir: + parser.error("--out-dir is required when using --in-dir") + if not os.path.isdir(args.input_dir): + parser.error("--in-dir must be an existing directory") + + if args.teflon_test_models and not os.environ.get('TEFLON_TEST_DATA'): + parser.error("--teflon-test-models requires TEFLON_TEST_DATA to be set") + + return args + +def compile(input_file, output_file): + script_dir = os.path.dirname(os.path.abspath(__file__)) + worker_script = os.path.join(script_dir, "compile_model.py") + + try: + result = subprocess.run( + [sys.executable, worker_script, input_file, output_file], + capture_output=True, + text=True, + timeout=60, + ) + except Exception as e: + print(f"Error compiling {input_file}: {e}", file=sys.stderr) + return None + + return result + +def write_log(output_file, result): + model_name_we = os.path.splitext(os.path.basename(output_file))[0] + output_dir = os.path.dirname(output_file) + log_file = os.path.join(output_dir, f"{model_name_we}_compiler_output.log") + + with open(log_file, 'w') as f: + if result.stdout: + f.write(result.stdout.strip()) + if result.stderr: + f.write('\n') + f.write(result.stderr.strip()) + +def print_result(input_file, result): + if result.returncode == 0: + status = "SUCCESS" + elif result.returncode == 1: + status = "COMPILER ERROR" + elif result.returncode == 2: + status = "ZERO CONVERSION RATE" + elif result.returncode == 3: + status = "SCRIPT ERROR" + else: + status = f"UNKNOWN({returncode})" + + print(f" {input_file}: {status}") + if result.returncode != 0 and result.stderr: + print(result.stderr.strip()) + +def print_stats(total, ok, error, zero, skip): + print("\n" + "=" * 50) + print(f"Total models processed: {total}") + print(f" Successful: {ok}") + print(f" Errors: {error}") + print(f" Zero conversion rate: {zero}") + print(f" Skipped: {skip}") + print("=" * 50) + +def find_tflite_files(directory): + tflite_files = [] + for root, _, files in os.walk(directory): + for file in files: + if file.endswith('.tflite'): + tflite_files.append(os.path.join(root, file)) + return sorted(tflite_files) + +def process_model(input_file, output_file, log_to_file, show_details): + output_dir = os.path.dirname(os.path.abspath(output_file)) + os.makedirs(output_dir, exist_ok=True) + + result = compile(input_file, output_file) + if not result: + return 3 + + if log_to_file: + write_log(output_file, result) + + if show_details: + print_result(input_file, result) + + return result.returncode + +def process_directory(input_dir, output_dir, force, log_to_file, show_details): + total = 0 + ok = 0 + errors = 0 + zero = 0 + skip = 0 + + tflite_files = find_tflite_files(input_dir) + for input_file in tflite_files: + total += 1 + + rel_path = os.path.relpath(input_file, input_dir) + output_file = os.path.join(output_dir, rel_path) + if os.path.exists(output_file) and not force: + if show_details: + print(f" {input_file}: SKIPPED") + skip += 1 + continue + + ret_code = process_model(input_file, output_file, log_to_file, show_details) + if ret_code == 0: + ok += 1 + elif ret_code == 2: + zero += 1 + else: + errors += 1 + + print_stats(total, ok, errors, zero, skip) + +def main(argv): + args = parse_args(argv) + + if args.input_file: + process_model( + args.input_file, + args.output_file, + args.log_to_file, + True) + elif args.input_dir: + process_directory( + args.input_dir, + args.output_dir, + args.force, + args.log_to_file, + args.show_details + ) + elif args.teflon_test_models: + teflon_test_data = os.environ.get('TEFLON_TEST_DATA') + input_dir = os.path.join(teflon_test_data, 'models') + output_dir = os.path.join(teflon_test_data, 'compiled_models') + + print(f"Compiling Teflon test models:") + print(f" Input directory: {input_dir}") + print(f" Output directory: {output_dir}") + + process_directory( + input_dir, + output_dir, + args.force, + args.log_to_file, + args.show_details + ) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/src/gallium/drivers/neutron/neutron_device.c b/src/gallium/drivers/neutron/neutron_device.c new file mode 100644 index 00000000000..39a5e961725 --- /dev/null +++ b/src/gallium/drivers/neutron/neutron_device.c @@ -0,0 +1,257 @@ +/* + * Copyright 2026 NXP + * SPDX-License-Identifier: MIT + */ + +#include "util/os_mman.h" +#include "util/u_inlines.h" +#include "util/u_surface.h" +#include "util/u_transfer.h" + +#include + +#include "drm-uapi/neutron_accel.h" + +#include "neutron_device.h" +#include "neutron_ml.h" + +static const struct debug_named_value neutron_debug_options[] = { + {"dbg_msgs", NEUTRON_DBG_MSGS, "Print debug messages"}, + {"dump_bos", NEUTRON_DBG_DUMP_BOS, "Dump buffers for analysis"}, + {"zero_bos", NEUTRON_DBG_ZERO, "Zero buffers for debugging"}, + DEBUG_NAMED_VALUE_END}; + +DEBUG_GET_ONCE_FLAGS_OPTION(neutron_debug, "NEUTRON_DEBUG", + neutron_debug_options, 0) +int neutron_debug = 0; + +static void +neutron_screen_destroy(struct pipe_screen *pscreen) +{ + struct neutron_screen *screen = neutron_screen(pscreen); + + ralloc_free(screen); +} + +static void +neutron_context_destroy(struct pipe_context *pctx) +{ + struct neutron_context *ctx = neutron_context(pctx); + struct neutron_screen *screen = neutron_screen(pctx->screen); + + drmSyncobjDestroy(screen->fd, ctx->syncobj_handle); + ralloc_free(ctx); +} + +static void * +neutron_buffer_map(struct pipe_context *pctx, + struct pipe_resource *prsc, unsigned level, + unsigned usage, const struct pipe_box *box, + struct pipe_transfer **out_transfer) +{ + struct neutron_screen *screen = neutron_screen(pctx->screen); + struct neutron_resource *rsc = neutron_resource(prsc); + struct drm_neutron_sync_bo arg = {0}; + int ret; + + assert(level == 0); + assert(prsc->target == PIPE_BUFFER); + assert(box->y == 0); + assert(box->z == 0); + assert(box->height == 1); + assert(box->depth == 1); + + struct pipe_transfer *transfer = rzalloc(NULL, struct pipe_transfer); + transfer->level = level; + transfer->usage = usage; + transfer->box = *box; + + pipe_resource_reference(&transfer->resource, prsc); + + if (transfer->usage & PIPE_MAP_READ) { + arg.handle = rsc->handle; + arg.direction = DRM_NEUTRON_SYNC_FROM_DEVICE; + arg.size = box->width; + arg.offset = box->x; + + ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_SYNC_BO, &arg); + if (ret < 0) + goto free_transfer; + } + + *out_transfer = transfer; + + return (uint8_t *)rsc->map + box->x; + +free_transfer: + pipe_resource_reference(&transfer->resource, NULL); + ralloc_free(transfer); + return NULL; +} + +static void +neutron_buffer_unmap(struct pipe_context *pctx, + struct pipe_transfer *transfer) +{ + struct neutron_resource *rsrc = neutron_resource(transfer->resource); + struct neutron_screen *screen = neutron_screen(pctx->screen); + struct drm_neutron_sync_bo arg = {0}; + int ret; + + if (transfer->usage & PIPE_MAP_WRITE) { + arg.handle = rsrc->handle; + arg.direction = DRM_NEUTRON_SYNC_TO_DEVICE; + arg.size = transfer->box.width; + arg.offset = transfer->box.x; + + ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_SYNC_BO, &arg); + assert(ret >= 0); + } + + pipe_resource_reference(&transfer->resource, NULL); + ralloc_free(transfer); +} + +static struct pipe_context * +neutron_context_create(struct pipe_screen *pscreen, + void *priv, unsigned flags) +{ + struct neutron_context *ctx = rzalloc(NULL, struct neutron_context); + struct neutron_screen *screen = neutron_screen(pscreen); + struct pipe_context *pctx = &ctx->base; + int ret; + + if (!ctx) + return NULL; + + ret = drmSyncobjCreate(screen->fd, 0, &ctx->syncobj_handle); + if (ret) + return NULL; + + pctx->screen = pscreen; + pctx->priv = priv; + + pctx->destroy = neutron_context_destroy; + + pctx->buffer_map = neutron_buffer_map; + pctx->buffer_unmap = neutron_buffer_unmap; + pctx->resource_copy_region = util_resource_copy_region; + pctx->buffer_subdata = u_default_buffer_subdata; + pctx->clear_buffer = u_default_clear_buffer; + + pctx->ml_subgraph_invoke = neutron_ml_subgraph_invoke; + pctx->ml_subgraph_read_output = neutron_ml_subgraph_read_outputs; + + return pctx; +} + +static struct pipe_resource * +neutron_resource_create(struct pipe_screen *pscreen, + const struct pipe_resource *template) +{ + struct neutron_screen *screen = neutron_screen(pscreen); + struct drm_neutron_create_bo arg = {0}; + struct neutron_resource *rsc; + int ret; + + assert(template->target == PIPE_BUFFER); + assert(template->height0 == 1); + assert(template->depth0 == 1); + assert(template->array_size == 1); + + rsc = rzalloc(NULL, struct neutron_resource); + if (!rsc) + return NULL; + + rsc->base = *template; + rsc->base.screen = pscreen; + rsc->base.nr_samples = template->nr_samples; + pipe_reference_init(&rsc->base.reference, 1); + + arg.size = template->width0; + + ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_CREATE_BO, &arg); + if (ret < 0) + goto free_rsc; + + rsc->handle = arg.handle; + rsc->size = arg.size; + + rsc->map = os_mmap(NULL, rsc->size, PROT_READ | PROT_WRITE, MAP_SHARED, + screen->fd, arg.map_offset); + if (rsc->map == MAP_FAILED) + goto close_bo; + + return &rsc->base; + +close_bo : { + struct drm_gem_close close_arg = {.handle = rsc->handle}; + drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &close_arg); +} +free_rsc: + ralloc_free(rsc); + return NULL; +} + +static void +neutron_resource_destroy(struct pipe_screen *pscreen, + struct pipe_resource *prsc) +{ + struct neutron_resource *rsc = neutron_resource(prsc); + struct neutron_screen *screen = neutron_screen(pscreen); + struct drm_gem_close arg = {0}; + int ret; + + os_munmap(rsc->map, rsc->size); + + arg.handle = rsc->handle; + + ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg); + assert(ret >= 0); + + ralloc_free(rsc); +} + +static int +neutron_screen_get_fd(struct pipe_screen *pscreen) +{ + return neutron_screen(pscreen)->fd; +} + +static struct pipe_ml_device * +neutron_get_ml_device(struct pipe_screen *pscreen) +{ + return &neutron_screen(pscreen)->ml_device.base; +} + +struct pipe_screen * +neutron_screen_create(int fd, + const struct pipe_screen_config *config, + struct renderonly *ro) +{ + struct neutron_screen *neutron_screen; + struct pipe_screen *screen; + + neutron_screen = rzalloc(NULL, struct neutron_screen); + if (!neutron_screen) + return NULL; + + neutron_debug = debug_get_option_neutron_debug(); + + screen = &neutron_screen->pscreen; + + neutron_screen->fd = fd; + + screen->get_screen_fd = neutron_screen_get_fd; + screen->destroy = neutron_screen_destroy; + screen->context_create = neutron_context_create; + screen->resource_create = neutron_resource_create; + screen->resource_destroy = neutron_resource_destroy; + + neutron_screen->ml_device.base.ml_operation_supported = neutron_ml_operation_supported; + neutron_screen->ml_device.base.ml_subgraph_create = neutron_ml_subgraph_create; + neutron_screen->ml_device.base.ml_subgraph_destroy = neutron_ml_subgraph_destroy; + screen->get_ml_device = neutron_get_ml_device; + + return screen; +} diff --git a/src/gallium/drivers/neutron/neutron_device.h b/src/gallium/drivers/neutron/neutron_device.h new file mode 100644 index 00000000000..6ccf9da27bb --- /dev/null +++ b/src/gallium/drivers/neutron/neutron_device.h @@ -0,0 +1,90 @@ +/* + * Copyright 2026 NXP + * SPDX-License-Identifier: MIT + */ + +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "pipe/p_state.h" +#include "renderonly/renderonly.h" +#include "util/log.h" +#include "util/macros.h" + +#ifndef NEUTRON_DEVICE_H +#define NEUTRON_DEVICE_H + +enum neutron_dbg { + NEUTRON_DBG_MSGS = BITFIELD_BIT(0), + NEUTRON_DBG_DUMP_BOS = BITFIELD_BIT(1), + NEUTRON_DBG_ZERO = BITFIELD_BIT(2), +}; + +#define DBG_ENABLED(flag) unlikely(neutron_debug &(flag)) +extern int neutron_debug; + +#define DBG(fmt, ...) \ + do { \ + if (DBG_ENABLED(NEUTRON_DBG_MSGS)) \ + mesa_logd(fmt, ##__VA_ARGS__); \ + } while (0) + +struct neutron_ml_device { + struct pipe_ml_device base; + struct pipe_context *context; +}; + +static inline struct neutron_ml_device * +neutron_ml_device(struct pipe_ml_device *dev) +{ + return (struct neutron_ml_device *)dev; +} + +struct neutron_screen { + struct pipe_screen pscreen; + struct neutron_ml_device ml_device; + int fd; +}; + +static inline struct neutron_screen * +neutron_screen(struct pipe_screen *p) +{ + return (struct neutron_screen *)p; +} + +static inline struct neutron_screen * +neutron_ml_device_screen(struct pipe_ml_device *pdevice) +{ + struct neutron_ml_device *dev = neutron_ml_device(pdevice); + return container_of(dev, struct neutron_screen, ml_device); +} + +struct neutron_context { + struct pipe_context base; + uint32_t syncobj_handle; +}; + +static inline struct neutron_context * +neutron_context(struct pipe_context *pctx) +{ + return (struct neutron_context *)pctx; +} + +struct neutron_resource { + struct pipe_resource base; + uint32_t handle; + uint64_t size; + void *map; +}; + +static inline struct neutron_resource * +neutron_resource(struct pipe_resource *p) +{ + return (struct neutron_resource *)p; +} + +struct pipe_screen * +neutron_screen_create(int fd, + const struct pipe_screen_config *config, + struct renderonly *ro); + +#endif /* NEUTRON_DEVICE_H */ diff --git a/src/gallium/drivers/neutron/neutron_ml.c b/src/gallium/drivers/neutron/neutron_ml.c new file mode 100644 index 00000000000..29a0ac72f71 --- /dev/null +++ b/src/gallium/drivers/neutron/neutron_ml.c @@ -0,0 +1,382 @@ +/* + * Copyright 2026 NXP + * SPDX-License-Identifier: MIT + */ + +#include "pipe/p_state.h" +#include "util/macros.h" +#include "util/u_dynarray.h" +#include "util/u_inlines.h" + +#include + +#include "drm-uapi/neutron_accel.h" +#include "neutron_ml.h" + +static const char * +neutron_tensor_name(enum neutron_tensor_type type) +{ + switch (type) { + case NEUTRON_WEIGHTS: + return "NeutronWeights"; + case NEUTRON_MICROCODE: + return "NeutronMicrocode"; + case NEUTRON_KERNELS: + return "NeutronKernels"; + case NEUTRON_SCRATCH: + return "NeutronScratch"; + case NEUTRON_PROFILE: + return "NeutronProfile"; + case NEUTRON_DEBUG: + return "NeutronDebug"; + case NEUTRON_DATA_INPUT: + return "Input"; + case NEUTRON_DATA_OUTPUT: + return "Output"; + default: + return "N/A"; + } +} + +static void +neutron_dump_buffer(const uint8_t *ptr, const char *name, + int id, int offset, unsigned size) +{ + char buffer[255]; + + snprintf(buffer, sizeof(buffer), "mesa-%s-%03u.bin", name, id); + + FILE *f = fopen(buffer, "wb"); + assert(f); + fwrite(ptr + offset, 1, size, f); + if (ferror(f)) { + DBG("Error in writing to file: %s\n", strerror(errno)); + } + fflush(f); + fclose(f); +} + +static void +neutron_dump_tensor(struct pipe_resource *bo, + struct neutron_tensor *ntensor, int id) +{ + uint8_t *buf = neutron_resource(bo)->map; + neutron_dump_buffer(buf, neutron_tensor_name(ntensor->type), id, + ntensor->offset, ntensor->size); +} + +bool +neutron_ml_operation_supported(struct pipe_ml_device *pdevice, + const struct pipe_ml_operation *operation) +{ + if (operation->type != PIPE_ML_OPERATION_TYPE_CUSTOM) + return false; + + if (!operation->custom.name || + strcmp(operation->custom.name, "NeutronGraph") != 0) + return false; + + if (operation->input_count <= NEUTRON_CUSTOM_INPUT_MAX) + return false; + + return true; +} + +static unsigned +get_tensor_size(const struct pipe_tensor *tensor) +{ + unsigned size = 1; + + for (unsigned i = 0; i < 4; i++) + size *= tensor->dims[i]; + + return size; +} + +static enum neutron_tensor_type +get_tensor_type(struct pipe_tensor *tensor, bool is_input) +{ + for (unsigned i = 0; i < NEUTRON_CUSTOM_MAX; i++) { + if (strcmp(tensor->name, neutron_tensor_name(i)) == 0) + return i; + } + + return is_input ? NEUTRON_DATA_INPUT : NEUTRON_DATA_OUTPUT; +} + +static bool +is_custom_input(struct neutron_tensor *ntensor) +{ + return ntensor->type < NEUTRON_CUSTOM_INPUT_MAX; +} + +static void +create_tensor(struct neutron_subgraph *subgraph, + struct pipe_tensor *tensor, bool is_input) +{ + struct neutron_tensor ntensor = {0}; + + ntensor.type = get_tensor_type(tensor, is_input); + ntensor.size = get_tensor_size(tensor); + ntensor.tensor = tensor; + + if (ntensor.type == NEUTRON_DATA_INPUT) + ntensor.id = subgraph->data_inputs++; + else if (ntensor.type == NEUTRON_DATA_OUTPUT) + ntensor.id = subgraph->data_outputs++; + + util_dynarray_append(&subgraph->tensors, ntensor); +} + +static unsigned +get_header_array_size(struct neutron_subgraph *subgraph) +{ + unsigned num_tensors = + util_dynarray_num_elements(&subgraph->tensors, struct neutron_tensor); + + return num_tensors + 4; +} + +static unsigned +get_buffer_size(struct neutron_subgraph *subgraph) +{ + unsigned size = 0; + + util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) + size += NEUTRON_ALIGN(ntensor->size); + size += subgraph->header.size; + + return size; +} + +static int +get_header_index(struct neutron_subgraph *subgraph, + struct neutron_tensor *ntensor) +{ + switch (ntensor->type) { + case NEUTRON_DATA_INPUT: + return ntensor->id; + case NEUTRON_WEIGHTS: + return subgraph->data_inputs; + case NEUTRON_DATA_OUTPUT: + return subgraph->data_inputs + 1 + ntensor->id; + case NEUTRON_KERNELS: + return subgraph->data_inputs + subgraph->data_outputs + 1; + case NEUTRON_SCRATCH: + return subgraph->data_inputs + subgraph->data_outputs + 2; + case NEUTRON_PROFILE: + /* 4 entries reserved */ + return subgraph->data_inputs + subgraph->data_outputs + 7; + case NEUTRON_DEBUG: + return subgraph->data_inputs + subgraph->data_outputs + 8; + default: + return -1; + } +} + +struct pipe_ml_subgraph * +neutron_ml_subgraph_create(struct pipe_ml_device *pdevice, + const struct pipe_ml_operation *poperations, + unsigned count) +{ + struct neutron_screen *screen = neutron_ml_device_screen(pdevice); + struct neutron_ml_device *dev = neutron_ml_device(pdevice); + const struct pipe_ml_operation *operation = &poperations[0]; + struct neutron_subgraph *subgraph; + uint32_t *offset_array; + unsigned crt_offset = 0; + + if (count > 1) { + DBG("Expect a single NeutronGraph per partition!\n"); + return NULL; + } + + if (!dev->context) + dev->context = screen->pscreen.context_create(&screen->pscreen, NULL, 0); + + subgraph = calloc(1, sizeof(*subgraph)); + subgraph->base.device = pdevice; + + subgraph->tensors = UTIL_DYNARRAY_INIT; + subgraph->header = UTIL_DYNARRAY_INIT; + + for (unsigned i = 0; i < operation->input_count; i++) + create_tensor(subgraph, operation->input_tensors[i], true); + for (unsigned i = 0; i < operation->output_count; i++) + create_tensor(subgraph, operation->output_tensors[i], false); + + if (!util_dynarray_resize(&subgraph->header, uint32_t, + get_header_array_size(subgraph))) + return NULL; + memset(util_dynarray_begin(&subgraph->header), 0, subgraph->header.size); + + subgraph->bo = pipe_buffer_create(dev->context->screen, 0, + PIPE_USAGE_DEFAULT, + get_buffer_size(subgraph)); + if (!subgraph->bo) + return NULL; + + if (DBG_ENABLED(NEUTRON_DBG_ZERO)) { + struct pipe_transfer *transfer; + struct neutron_resource *rsc = neutron_resource(subgraph->bo); + + uint8_t *buf = pipe_buffer_map(dev->context, subgraph->bo, + PIPE_MAP_WRITE, &transfer); + memset(buf, 0, pipe_buffer_size(subgraph->bo)); + pipe_buffer_unmap(dev->context, transfer); + } + + DBG("%s: Tensor list:\n", __func__); + DBG("%16s %3s %6s %8s %8s %8s\n", "TYPE", "ID", "INDEX", "OFFSET", "SIZE", "HDR_IDX"); + + offset_array = util_dynarray_begin(&subgraph->header); + crt_offset = NEUTRON_ALIGN(subgraph->header.size); + + /* Translate all tensors into buffer sections */ + util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) { + unsigned header_index = get_header_index(subgraph, ntensor); + + /* Microcode offset passed via ioctl, all others stored in header */ + if (ntensor->type == NEUTRON_MICROCODE) + subgraph->microcode_offset = crt_offset; + else + offset_array[header_index] = crt_offset; + + ntensor->offset = crt_offset; + crt_offset += NEUTRON_ALIGN(ntensor->size); + + /* Custom inputs are written in the buffer during setup */ + if (is_custom_input(ntensor)) { + pipe_buffer_write(dev->context, subgraph->bo, ntensor->offset, + ntensor->size, ntensor->tensor->data); + + if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS)) + neutron_dump_tensor(subgraph->bo, ntensor, 0); + } + + DBG("%16s %3d %6d %8d %8d %2d\n", neutron_tensor_name(ntensor->type), + ntensor->id, ntensor->tensor->index, ntensor->offset, ntensor->size, + header_index); + } + + pipe_buffer_write(dev->context, subgraph->bo, 0, subgraph->header.size, + util_dynarray_begin(&subgraph->header)); + + if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS)) { + uint8_t *buf = neutron_resource(subgraph->bo)->map; + neutron_dump_buffer(buf, "header", 0, 0, subgraph->header.size); + } + + return &subgraph->base; +} + +static struct neutron_tensor * +get_tensor_by_id(struct neutron_subgraph *subgraph, + enum neutron_tensor_type type, unsigned index) +{ + util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) { + if (ntensor->type == type && ntensor->tensor->index == index) + return ntensor; + } + return NULL; +} + +void +neutron_ml_subgraph_invoke(struct pipe_context *pcontext, + struct pipe_ml_subgraph *psubgraph, + unsigned inputs_count, unsigned input_idxs[], + void *inputs[], bool is_signed[]) +{ + struct neutron_screen *screen = neutron_screen(pcontext->screen); + struct neutron_context *context = neutron_context(pcontext); + struct neutron_subgraph *subgraph = (struct neutron_subgraph *)(psubgraph); + struct drm_neutron_submit_job submit = {0}; + int ret; + + for (unsigned i = 0; i < inputs_count; i++) { + struct neutron_tensor *ntensor = + get_tensor_by_id(subgraph, NEUTRON_DATA_INPUT, input_idxs[i]); + + DBG("Prepare input tensor: idx=%d, offset=%d, size=%d\n", + input_idxs[i], ntensor->offset, ntensor->size); + pipe_buffer_write(pcontext, subgraph->bo, ntensor->offset, + ntensor->size, inputs[i]); + + if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS)) + neutron_dump_tensor(subgraph->bo, ntensor, i); + } + + /* Transfer ownership of output sections to device */ + util_dynarray_foreach (&subgraph->tensors, struct neutron_tensor, ntensor) { + if (ntensor->type == NEUTRON_DATA_OUTPUT) { + struct pipe_transfer *transfer; + pipe_buffer_map_range(pcontext, subgraph->bo, ntensor->offset, + ntensor->size, PIPE_MAP_WRITE, &transfer); + pipe_buffer_unmap(pcontext, transfer); + } + } + + submit.type = DRM_NEUTRON_JOB_INFERENCE; + submit.bo_handle = neutron_resource(subgraph->bo)->handle; + submit.syncobj_handle = context->syncobj_handle; + submit.inference.microcode_offset = subgraph->microcode_offset; + submit.inference.tensor_offset = 0; + submit.inference.tensor_count = + util_dynarray_num_elements(&subgraph->header, uint32_t); + + DBG("Submitting job\n"); + ret = drmIoctl(screen->fd, DRM_IOCTL_NEUTRON_SUBMIT_JOB, &submit); + assert(ret == 0); +} + +void +neutron_ml_subgraph_read_outputs(struct pipe_context *pcontext, + struct pipe_ml_subgraph *psubgraph, + unsigned outputs_count, + unsigned output_idxs[], void *outputsv[], + bool is_signed[]) +{ + struct neutron_subgraph *subgraph = (struct neutron_subgraph *)(psubgraph); + struct neutron_context *context = neutron_context(pcontext); + struct neutron_screen *screen = neutron_screen(pcontext->screen); + int ret; + + ret = drmSyncobjWait(screen->fd, &context->syncobj_handle, 1, INT64_MAX, + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, NULL); + assert(ret == 0); + DBG("Job finished\n"); + + for (unsigned i = 0; i < outputs_count; i++) { + struct neutron_tensor *ntensor = + get_tensor_by_id(subgraph, NEUTRON_DATA_OUTPUT, output_idxs[i]); + + DBG("Read output tensor: idx=%d, offset=%d, size=%d\n", + output_idxs[i], ntensor->offset, ntensor->size); + pipe_buffer_read(pcontext, subgraph->bo, ntensor->offset, + ntensor->size, outputsv[i]); + + if (DBG_ENABLED(NEUTRON_DBG_DUMP_BOS)) + neutron_dump_tensor(subgraph->bo, ntensor, i); + } +} + +void +neutron_ml_subgraph_destroy(struct pipe_ml_device *pdevice, + struct pipe_ml_subgraph *psubgraph) +{ + struct pipe_context *pcontext = neutron_ml_device(pdevice)->context; + struct neutron_screen *screen = neutron_screen(pcontext->screen); + struct neutron_subgraph *subgraph = (struct neutron_subgraph *)(psubgraph); + struct drm_gem_close arg = {0}; + int ret; + + arg.handle = neutron_resource(subgraph->bo)->handle; + ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg); + assert(ret >= 0); + + pipe_resource_reference(&subgraph->bo, NULL); + util_dynarray_fini(&subgraph->tensors); + util_dynarray_fini(&subgraph->header); + + free(subgraph); +} diff --git a/src/gallium/drivers/neutron/neutron_ml.h b/src/gallium/drivers/neutron/neutron_ml.h new file mode 100644 index 00000000000..c362230fcbd --- /dev/null +++ b/src/gallium/drivers/neutron/neutron_ml.h @@ -0,0 +1,73 @@ +/* + * Copyright 2026 NXP + * SPDX-License-Identifier: MIT + */ + +#ifndef NEUTRON_ML_H +#define NEUTRON_ML_H + +#include +#include + +#include "neutron_device.h" + +#define NEUTRON_ALIGN(value) align(value, 64) + +enum neutron_tensor_type { + NEUTRON_WEIGHTS = 0, + NEUTRON_MICROCODE, + NEUTRON_KERNELS, + NEUTRON_CUSTOM_INPUT_MAX, + NEUTRON_SCRATCH = NEUTRON_CUSTOM_INPUT_MAX, + NEUTRON_PROFILE, + NEUTRON_DEBUG, + NEUTRON_CUSTOM_MAX, + NEUTRON_DATA_INPUT = NEUTRON_CUSTOM_MAX, + NEUTRON_DATA_OUTPUT, +}; + +struct neutron_tensor { + struct pipe_tensor *tensor; + enum neutron_tensor_type type; + unsigned size; + unsigned offset; + unsigned id; /* For NEUTRON_DATA_INPUT and NEUTRON_DATA_OUTPUT */ +}; + +struct neutron_subgraph { + struct pipe_ml_subgraph base; + struct pipe_resource *bo; + struct util_dynarray tensors; /* struct neutron_tensor */ + struct util_dynarray header; + unsigned microcode_offset; + unsigned data_inputs; /* number of non-custom input tensors */ + unsigned data_outputs; /* number of non-custom output tensors */ +}; + +bool +neutron_ml_operation_supported(struct pipe_ml_device *pdevice, + const struct pipe_ml_operation *operation); + +struct pipe_ml_subgraph * +neutron_ml_subgraph_create(struct pipe_ml_device *pdevice, + const struct pipe_ml_operation *poperations, + unsigned count); + +void +neutron_ml_subgraph_invoke(struct pipe_context *pcontext, + struct pipe_ml_subgraph *psubgraph, + unsigned inputs_count, unsigned input_idxs[], + void *inputs[], bool is_signed[]); + +void +neutron_ml_subgraph_read_outputs(struct pipe_context *pcontext, + struct pipe_ml_subgraph *psubgraph, + unsigned outputs_count, + unsigned output_idxs[], void *outputs[], + bool is_signed[]); + +void +neutron_ml_subgraph_destroy(struct pipe_ml_device *pdevice, + struct pipe_ml_subgraph *psubgraph); + +#endif /* NEUTRON_ML_H */ diff --git a/src/gallium/meson.build b/src/gallium/meson.build index 67011f56b9f..419ffd86fa6 100644 --- a/src/gallium/meson.build +++ b/src/gallium/meson.build @@ -196,6 +196,12 @@ if with_gallium_ethosu else driver_ethosu = declare_dependency() endif +if with_gallium_neutron + subdir('winsys/neutron/drm') + subdir('drivers/neutron') +else + driver_neutron = declare_dependency() +endif if with_gallium_zink if not with_platform_windows subdir('winsys/zink/drm') diff --git a/src/gallium/targets/dri/meson.build b/src/gallium/targets/dri/meson.build index c8ac8f7d510..c4a60ce9a8d 100644 --- a/src/gallium/targets/dri/meson.build +++ b/src/gallium/targets/dri/meson.build @@ -59,7 +59,7 @@ libgallium_dri = shared_library( driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv, driver_tegra, driver_i915, driver_svga, driver_virgl, driver_panfrost, driver_iris, driver_lima, driver_zink, driver_d3d12, - driver_asahi, driver_crocus, driver_rocket, driver_ethosu + driver_asahi, driver_crocus, driver_rocket, driver_ethosu, driver_neutron ], install : true, name_suffix : libname_suffix, diff --git a/src/gallium/winsys/neutron/drm/meson.build b/src/gallium/winsys/neutron/drm/meson.build new file mode 100644 index 00000000000..e9d917a5198 --- /dev/null +++ b/src/gallium/winsys/neutron/drm/meson.build @@ -0,0 +1,13 @@ +# Copyright 2017 Broadcom +# SPDX-License-Identifier: MIT + +libneutronwinsys = static_library( + 'neutronwinsys', + files('neutron_drm_winsys.c'), + include_directories : [ + inc_src, inc_include, + inc_gallium, inc_gallium_aux, inc_gallium_drivers, + ], + gnu_symbol_visibility : 'hidden', + dependencies: [dep_libdrm, idep_mesautil], +) diff --git a/src/gallium/winsys/neutron/drm/neutron_drm_public.h b/src/gallium/winsys/neutron/drm/neutron_drm_public.h new file mode 100644 index 00000000000..a4d969eb7ea --- /dev/null +++ b/src/gallium/winsys/neutron/drm/neutron_drm_public.h @@ -0,0 +1,17 @@ +/* + * Copyright 2014 Broadcom + * Copyright 2018 Alyssa Rosenzweig + * Copyright 2025 Tomeu Vizoso + * SPDX-License-Identifier: MIT + */ + +#ifndef __NEUTRON_DRM_PUBLIC_H__ +#define __NEUTRON_DRM_PUBLIC_H__ + +struct pipe_screen; +struct pipe_screen_config; + +struct pipe_screen * +neutron_drm_screen_create(int drmFD, const struct pipe_screen_config *config); + +#endif /* __NEUTRON_DRM_PUBLIC_H__ */ diff --git a/src/gallium/winsys/neutron/drm/neutron_drm_winsys.c b/src/gallium/winsys/neutron/drm/neutron_drm_winsys.c new file mode 100644 index 00000000000..debf9aa75c9 --- /dev/null +++ b/src/gallium/winsys/neutron/drm/neutron_drm_winsys.c @@ -0,0 +1,20 @@ +/* + * Copyright 2014 Broadcom + * Copyright 2018 Alyssa Rosenzweig + * Copyright 2025 Tomeu Vizoso + * Copyright 2026 NXP + * SPDX-License-Identifier: MIT + */ + +#include "util/os_file.h" +#include "util/u_screen.h" + +#include "neutron/neutron_device.h" +#include "neutron_drm_public.h" + +struct pipe_screen * +neutron_drm_screen_create(int fd, const struct pipe_screen_config *config) +{ + return u_pipe_screen_lookup_or_create(os_dupfd_cloexec(fd), config, NULL, + neutron_screen_create); +}