From d8fef0a26c42920ae6b174c07668b9c1492a557b Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Fri, 21 Mar 2025 12:32:01 -0500 Subject: [PATCH] nak: Improve WS abstractions in hw_runner Reviewed-by: Lorenzo Rossi Part-of: --- src/nouveau/compiler/nak/hw_runner.rs | 299 +++++++++++++++++--------- 1 file changed, 201 insertions(+), 98 deletions(-) diff --git a/src/nouveau/compiler/nak/hw_runner.rs b/src/nouveau/compiler/nak/hw_runner.rs index 689803d2283..86bc548ac2d 100644 --- a/src/nouveau/compiler/nak/hw_runner.rs +++ b/src/nouveau/compiler/nak/hw_runner.rs @@ -13,10 +13,12 @@ use nvidia_headers::classes::clc6c0::mthd as clc6c0; use nvidia_headers::classes::clc6c0::AMPERE_COMPUTE_A; use std::io; +use std::ops::Deref; use std::ptr; use std::ptr::NonNull; +use std::slice; use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::Mutex; +use std::sync::{Arc, Mutex}; unsafe fn is_nvidia_device(dev: drmDevicePtr) -> bool { match (*dev).bustype as u32 { @@ -36,127 +38,92 @@ pub struct CB0 { pub invocations: u32, } -struct BO<'a> { - run: &'a Runner, - bo: NonNull, - pub addr: u64, - pub map: *mut std::os::raw::c_void, +struct DrmDevices { + num_devices: usize, + devices: [drmDevicePtr; 16], } -impl<'a> BO<'a> { - fn new(run: &'a Runner, size: u64) -> io::Result> { - let size = size.next_multiple_of(4096); - - let mut map: *mut std::os::raw::c_void = std::ptr::null_mut(); - let bo = unsafe { - nouveau_ws_bo_new_mapped( - run.dev.as_ptr(), - size, - 0, // align - NOUVEAU_WS_BO_GART, - NOUVEAU_WS_BO_RDWR, - ptr::from_mut(&mut map), - ) - }; - let Some(bo) = NonNull::new(bo) else { - return Err(io::Error::last_os_error()); - }; - assert!(!map.is_null()); - - let addr = run.next_addr.fetch_add(size, Ordering::Relaxed); - assert!(addr % 4096 == 0); - +impl DrmDevices { + fn get() -> io::Result { unsafe { - nouveau_ws_bo_bind_vma( - run.dev.as_ptr(), - bo.as_ptr(), - addr, - size, - 0, // bo_offset - 0, // pte_kind + let mut devices: [drmDevicePtr; 16] = std::mem::zeroed(); + let num_devices = drmGetDevices( + devices.as_mut_ptr(), + devices.len().try_into().unwrap(), ); + if num_devices < 0 { + return Err(io::Error::last_os_error()); + } + Ok(DrmDevices { + num_devices: num_devices.try_into().unwrap(), + devices, + }) } + } - Ok(BO { run, bo, addr, map }) + fn iter(&self) -> slice::Iter<'_, drmDevicePtr> { + self.devices[..self.num_devices].iter() } } -impl Drop for BO<'_> { +impl Deref for DrmDevices { + type Target = [drmDevicePtr]; + + fn deref(&self) -> &[drmDevicePtr] { + &self.devices[..self.num_devices] + } +} + +impl Drop for DrmDevices { fn drop(&mut self) { unsafe { - nouveau_ws_bo_unbind_vma( - self.run.dev.as_ptr(), - self.addr, - self.bo.as_ref().size, + drmFreeDevices( + self.devices.as_mut_ptr(), + self.num_devices.try_into().unwrap(), ); - nouveau_ws_bo_destroy(self.bo.as_ptr()); } } } -pub struct Runner { +struct Device { dev: NonNull, - ctx: NonNull, - syncobj: u32, - sync_value: Mutex, next_addr: AtomicU64, } -impl<'a> Runner { - pub fn new(dev_id: Option) -> Runner { +impl Device { + pub fn new(dev_id: Option) -> io::Result> { + let drm_devices = DrmDevices::get()?; unsafe { - let mut drm_devices: [drmDevicePtr; 16] = std::mem::zeroed(); - let num_drm_devices = drmGetDevices( - drm_devices.as_mut_ptr(), - drm_devices.len().try_into().unwrap(), - ); - - assert!(num_drm_devices >= 0, "Failed to enumerate DRM devices"); - let num_drm_devices: usize = num_drm_devices.try_into().unwrap(); - let drm_dev = if let Some(dev_id) = dev_id { - assert!(dev_id < num_drm_devices, "Unknown device {dev_id}"); - assert!( - is_nvidia_device(drm_devices[dev_id]), - "Device {dev_id} is not an NVIDIA device", - ); + if dev_id >= drm_devices.len() { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "Unknown device {dev_id}", + )); + } drm_devices[dev_id] } else { - *drm_devices - .iter() - .find(|dev| is_nvidia_device(**dev)) - .expect("Failed to find an NVIDIA device") + if let Some(dev) = + drm_devices.iter().find(|dev| is_nvidia_device(**dev)) + { + *dev + } else { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "Failed to find an NVIDIA device", + )); + } }; let dev = nouveau_ws_device_new(drm_dev); - let dev = - NonNull::new(dev).expect("Failed to create nouveau device"); + let Some(dev) = NonNull::new(dev) else { + return Err(io::Error::last_os_error()); + }; - drmFreeDevices( - drm_devices.as_mut_ptr(), - num_drm_devices.try_into().unwrap(), - ); - - let mut ctx: *mut nouveau_ws_context = std::ptr::null_mut(); - let err = nouveau_ws_context_create( - dev.as_ptr(), - NOUVEAU_WS_ENGINE_COMPUTE, - &mut ctx, - ); - assert!(err == 0, "Failed to create nouveau context"); - let ctx = NonNull::new(ctx).unwrap(); - - let mut syncobj = 0_u32; - let err = drmSyncobjCreate(dev.as_ref().fd, 0, &mut syncobj); - assert!(err == 0, "Failed to create syncobj"); - - Runner { + Ok(Arc::new(Device { dev, - ctx, - syncobj, - sync_value: Mutex::new(0), next_addr: AtomicU64::new(1 << 16), - } + })) } } @@ -164,7 +131,59 @@ impl<'a> Runner { unsafe { &self.dev.as_ref().info } } - fn exec(&self, addr: u64, len: u16) -> io::Result<()> { + fn fd(&self) -> i32 { + unsafe { self.dev.as_ref().fd } + } + + fn ws_dev(&self) -> *mut nouveau_ws_device { + self.dev.as_ptr() + } +} + +impl Drop for Device { + fn drop(&mut self) { + unsafe { nouveau_ws_device_destroy(self.ws_dev()) } + } +} + +struct Context { + dev: Arc, + ctx: NonNull, + syncobj: u32, + sync_value: Mutex, +} + +impl Context { + pub fn new(dev: Arc) -> io::Result { + unsafe { + let mut ctx: *mut nouveau_ws_context = std::ptr::null_mut(); + let err = nouveau_ws_context_create( + dev.ws_dev(), + NOUVEAU_WS_ENGINE_COMPUTE, + &mut ctx, + ); + if err != 0 { + return Err(io::Error::last_os_error()); + } + let ctx = NonNull::new(ctx).unwrap(); + + let mut syncobj = 0_u32; + let err = drmSyncobjCreate(dev.fd(), 0, &mut syncobj); + if err != 0 { + nouveau_ws_context_destroy(ctx.as_ptr()); + return Err(io::Error::last_os_error()); + } + + Ok(Context { + dev, + ctx, + syncobj, + sync_value: Mutex::new(0), + }) + } + } + + pub fn exec(&self, addr: u64, len: u16) -> io::Result<()> { let sync_value = unsafe { let mut sync_value = self.sync_value.lock().unwrap(); *sync_value += 1; @@ -189,7 +208,7 @@ impl<'a> Runner { sig_ptr: &sig as *const _ as u64, }; let err = drmIoctl( - self.dev.as_ref().fd, + self.dev.fd(), DRM_RS_IOCTL_NOUVEAU_EXEC.into(), &exec as *const _ as *mut std::os::raw::c_void, ); @@ -202,7 +221,7 @@ impl<'a> Runner { unsafe { let err = drmSyncobjTimelineWait( - self.dev.as_ref().fd, + self.dev.fd(), &self.syncobj as *const _ as *mut _, &sync_value as *const _ as *mut _, 1, // num_handles @@ -225,7 +244,7 @@ impl<'a> Runner { sig_ptr: 0, }; let err = drmIoctl( - self.dev.as_ref().fd, + self.dev.fd(), DRM_RS_IOCTL_NOUVEAU_EXEC.into(), ptr::from_mut(&mut exec).cast(), ); @@ -236,6 +255,90 @@ impl<'a> Runner { Ok(()) } +} + +impl Drop for Context { + fn drop(&mut self) { + unsafe { + drmSyncobjDestroy(self.dev.fd(), self.syncobj); + nouveau_ws_context_destroy(self.ctx.as_ptr()); + } + } +} + +struct BO { + dev: Arc, + bo: NonNull, + pub addr: u64, + pub map: *mut std::os::raw::c_void, +} + +impl BO { + fn new(dev: Arc, size: u64) -> io::Result { + let size = size.next_multiple_of(4096); + + let mut map: *mut std::os::raw::c_void = std::ptr::null_mut(); + let bo = unsafe { + nouveau_ws_bo_new_mapped( + dev.ws_dev(), + size, + 0, // align + NOUVEAU_WS_BO_GART, + NOUVEAU_WS_BO_RDWR, + ptr::from_mut(&mut map), + ) + }; + let Some(bo) = NonNull::new(bo) else { + return Err(io::Error::last_os_error()); + }; + assert!(!map.is_null()); + + let addr = dev.next_addr.fetch_add(size, Ordering::Relaxed); + assert!(addr % 4096 == 0); + + unsafe { + nouveau_ws_bo_bind_vma( + dev.ws_dev(), + bo.as_ptr(), + addr, + size, + 0, // bo_offset + 0, // pte_kind + ); + } + + Ok(BO { dev, bo, addr, map }) + } +} + +impl Drop for BO { + fn drop(&mut self) { + unsafe { + nouveau_ws_bo_unbind_vma( + self.dev.dev.as_ptr(), + self.addr, + self.bo.as_ref().size, + ); + nouveau_ws_bo_destroy(self.bo.as_ptr()); + } + } +} + +pub struct Runner { + dev: Arc, + ctx: Context, +} + +impl<'a> Runner { + pub fn new(dev_id: Option) -> Runner { + let dev = Device::new(dev_id).expect("Failed to create nouveau device"); + let ctx = Context::new(dev.clone()).expect("Failed to create context"); + Runner { dev, ctx } + } + + pub fn dev_info(&self) -> &nv_device_info { + self.dev.dev_info() + } pub unsafe fn run_raw( &self, @@ -270,7 +373,7 @@ impl<'a> Runner { let data_offset = size.next_multiple_of(256); size = data_offset + data_size; - let bo = BO::new(self, size.try_into().unwrap())?; + let bo = BO::new(self.dev.clone(), size.try_into().unwrap())?; // Copy the data from the caller into our BO let data_addr = bo.addr + u64::try_from(data_offset).unwrap(); @@ -395,7 +498,7 @@ impl<'a> Runner { let push_map = bo.map.byte_offset(push_offset.try_into().unwrap()); std::ptr::copy(p.as_ptr(), push_map.cast(), p.len()); - let res = self.exec(push_addr, (p.len() * 4).try_into().unwrap()); + let res = self.ctx.exec(push_addr, (p.len() * 4).try_into().unwrap()); // Always copy the data back to the caller, even if exec fails let data_map = bo.map.byte_offset(data_offset.try_into().unwrap());