nak: Improve WS abstractions in hw_runner

Reviewed-by: Lorenzo Rossi <snowycoder@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34129>
This commit is contained in:
Faith Ekstrand 2025-03-21 12:32:01 -05:00 committed by Marge Bot
parent c1d64053f2
commit d8fef0a26c

View file

@ -13,10 +13,12 @@ use nvidia_headers::classes::clc6c0::mthd as clc6c0;
use nvidia_headers::classes::clc6c0::AMPERE_COMPUTE_A;
use std::io;
use std::ops::Deref;
use std::ptr;
use std::ptr::NonNull;
use std::slice;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Mutex;
use std::sync::{Arc, Mutex};
unsafe fn is_nvidia_device(dev: drmDevicePtr) -> bool {
match (*dev).bustype as u32 {
@ -36,127 +38,92 @@ pub struct CB0 {
pub invocations: u32,
}
struct BO<'a> {
run: &'a Runner,
bo: NonNull<nouveau_ws_bo>,
pub addr: u64,
pub map: *mut std::os::raw::c_void,
struct DrmDevices {
num_devices: usize,
devices: [drmDevicePtr; 16],
}
impl<'a> BO<'a> {
fn new(run: &'a Runner, size: u64) -> io::Result<BO<'a>> {
let size = size.next_multiple_of(4096);
let mut map: *mut std::os::raw::c_void = std::ptr::null_mut();
let bo = unsafe {
nouveau_ws_bo_new_mapped(
run.dev.as_ptr(),
size,
0, // align
NOUVEAU_WS_BO_GART,
NOUVEAU_WS_BO_RDWR,
ptr::from_mut(&mut map),
)
};
let Some(bo) = NonNull::new(bo) else {
return Err(io::Error::last_os_error());
};
assert!(!map.is_null());
let addr = run.next_addr.fetch_add(size, Ordering::Relaxed);
assert!(addr % 4096 == 0);
impl DrmDevices {
fn get() -> io::Result<Self> {
unsafe {
nouveau_ws_bo_bind_vma(
run.dev.as_ptr(),
bo.as_ptr(),
addr,
size,
0, // bo_offset
0, // pte_kind
let mut devices: [drmDevicePtr; 16] = std::mem::zeroed();
let num_devices = drmGetDevices(
devices.as_mut_ptr(),
devices.len().try_into().unwrap(),
);
if num_devices < 0 {
return Err(io::Error::last_os_error());
}
Ok(DrmDevices {
num_devices: num_devices.try_into().unwrap(),
devices,
})
}
}
Ok(BO { run, bo, addr, map })
fn iter(&self) -> slice::Iter<'_, drmDevicePtr> {
self.devices[..self.num_devices].iter()
}
}
impl Drop for BO<'_> {
impl Deref for DrmDevices {
type Target = [drmDevicePtr];
fn deref(&self) -> &[drmDevicePtr] {
&self.devices[..self.num_devices]
}
}
impl Drop for DrmDevices {
fn drop(&mut self) {
unsafe {
nouveau_ws_bo_unbind_vma(
self.run.dev.as_ptr(),
self.addr,
self.bo.as_ref().size,
drmFreeDevices(
self.devices.as_mut_ptr(),
self.num_devices.try_into().unwrap(),
);
nouveau_ws_bo_destroy(self.bo.as_ptr());
}
}
}
pub struct Runner {
struct Device {
dev: NonNull<nouveau_ws_device>,
ctx: NonNull<nouveau_ws_context>,
syncobj: u32,
sync_value: Mutex<u64>,
next_addr: AtomicU64,
}
impl<'a> Runner {
pub fn new(dev_id: Option<usize>) -> Runner {
impl Device {
pub fn new(dev_id: Option<usize>) -> io::Result<Arc<Self>> {
let drm_devices = DrmDevices::get()?;
unsafe {
let mut drm_devices: [drmDevicePtr; 16] = std::mem::zeroed();
let num_drm_devices = drmGetDevices(
drm_devices.as_mut_ptr(),
drm_devices.len().try_into().unwrap(),
);
assert!(num_drm_devices >= 0, "Failed to enumerate DRM devices");
let num_drm_devices: usize = num_drm_devices.try_into().unwrap();
let drm_dev = if let Some(dev_id) = dev_id {
assert!(dev_id < num_drm_devices, "Unknown device {dev_id}");
assert!(
is_nvidia_device(drm_devices[dev_id]),
"Device {dev_id} is not an NVIDIA device",
);
if dev_id >= drm_devices.len() {
return Err(io::Error::new(
io::ErrorKind::NotFound,
"Unknown device {dev_id}",
));
}
drm_devices[dev_id]
} else {
*drm_devices
.iter()
.find(|dev| is_nvidia_device(**dev))
.expect("Failed to find an NVIDIA device")
if let Some(dev) =
drm_devices.iter().find(|dev| is_nvidia_device(**dev))
{
*dev
} else {
return Err(io::Error::new(
io::ErrorKind::NotFound,
"Failed to find an NVIDIA device",
));
}
};
let dev = nouveau_ws_device_new(drm_dev);
let dev =
NonNull::new(dev).expect("Failed to create nouveau device");
let Some(dev) = NonNull::new(dev) else {
return Err(io::Error::last_os_error());
};
drmFreeDevices(
drm_devices.as_mut_ptr(),
num_drm_devices.try_into().unwrap(),
);
let mut ctx: *mut nouveau_ws_context = std::ptr::null_mut();
let err = nouveau_ws_context_create(
dev.as_ptr(),
NOUVEAU_WS_ENGINE_COMPUTE,
&mut ctx,
);
assert!(err == 0, "Failed to create nouveau context");
let ctx = NonNull::new(ctx).unwrap();
let mut syncobj = 0_u32;
let err = drmSyncobjCreate(dev.as_ref().fd, 0, &mut syncobj);
assert!(err == 0, "Failed to create syncobj");
Runner {
Ok(Arc::new(Device {
dev,
ctx,
syncobj,
sync_value: Mutex::new(0),
next_addr: AtomicU64::new(1 << 16),
}
}))
}
}
@ -164,7 +131,59 @@ impl<'a> Runner {
unsafe { &self.dev.as_ref().info }
}
fn exec(&self, addr: u64, len: u16) -> io::Result<()> {
fn fd(&self) -> i32 {
unsafe { self.dev.as_ref().fd }
}
fn ws_dev(&self) -> *mut nouveau_ws_device {
self.dev.as_ptr()
}
}
impl Drop for Device {
fn drop(&mut self) {
unsafe { nouveau_ws_device_destroy(self.ws_dev()) }
}
}
struct Context {
dev: Arc<Device>,
ctx: NonNull<nouveau_ws_context>,
syncobj: u32,
sync_value: Mutex<u64>,
}
impl Context {
pub fn new(dev: Arc<Device>) -> io::Result<Self> {
unsafe {
let mut ctx: *mut nouveau_ws_context = std::ptr::null_mut();
let err = nouveau_ws_context_create(
dev.ws_dev(),
NOUVEAU_WS_ENGINE_COMPUTE,
&mut ctx,
);
if err != 0 {
return Err(io::Error::last_os_error());
}
let ctx = NonNull::new(ctx).unwrap();
let mut syncobj = 0_u32;
let err = drmSyncobjCreate(dev.fd(), 0, &mut syncobj);
if err != 0 {
nouveau_ws_context_destroy(ctx.as_ptr());
return Err(io::Error::last_os_error());
}
Ok(Context {
dev,
ctx,
syncobj,
sync_value: Mutex::new(0),
})
}
}
pub fn exec(&self, addr: u64, len: u16) -> io::Result<()> {
let sync_value = unsafe {
let mut sync_value = self.sync_value.lock().unwrap();
*sync_value += 1;
@ -189,7 +208,7 @@ impl<'a> Runner {
sig_ptr: &sig as *const _ as u64,
};
let err = drmIoctl(
self.dev.as_ref().fd,
self.dev.fd(),
DRM_RS_IOCTL_NOUVEAU_EXEC.into(),
&exec as *const _ as *mut std::os::raw::c_void,
);
@ -202,7 +221,7 @@ impl<'a> Runner {
unsafe {
let err = drmSyncobjTimelineWait(
self.dev.as_ref().fd,
self.dev.fd(),
&self.syncobj as *const _ as *mut _,
&sync_value as *const _ as *mut _,
1, // num_handles
@ -225,7 +244,7 @@ impl<'a> Runner {
sig_ptr: 0,
};
let err = drmIoctl(
self.dev.as_ref().fd,
self.dev.fd(),
DRM_RS_IOCTL_NOUVEAU_EXEC.into(),
ptr::from_mut(&mut exec).cast(),
);
@ -236,6 +255,90 @@ impl<'a> Runner {
Ok(())
}
}
impl Drop for Context {
fn drop(&mut self) {
unsafe {
drmSyncobjDestroy(self.dev.fd(), self.syncobj);
nouveau_ws_context_destroy(self.ctx.as_ptr());
}
}
}
struct BO {
dev: Arc<Device>,
bo: NonNull<nouveau_ws_bo>,
pub addr: u64,
pub map: *mut std::os::raw::c_void,
}
impl BO {
fn new(dev: Arc<Device>, size: u64) -> io::Result<BO> {
let size = size.next_multiple_of(4096);
let mut map: *mut std::os::raw::c_void = std::ptr::null_mut();
let bo = unsafe {
nouveau_ws_bo_new_mapped(
dev.ws_dev(),
size,
0, // align
NOUVEAU_WS_BO_GART,
NOUVEAU_WS_BO_RDWR,
ptr::from_mut(&mut map),
)
};
let Some(bo) = NonNull::new(bo) else {
return Err(io::Error::last_os_error());
};
assert!(!map.is_null());
let addr = dev.next_addr.fetch_add(size, Ordering::Relaxed);
assert!(addr % 4096 == 0);
unsafe {
nouveau_ws_bo_bind_vma(
dev.ws_dev(),
bo.as_ptr(),
addr,
size,
0, // bo_offset
0, // pte_kind
);
}
Ok(BO { dev, bo, addr, map })
}
}
impl Drop for BO {
fn drop(&mut self) {
unsafe {
nouveau_ws_bo_unbind_vma(
self.dev.dev.as_ptr(),
self.addr,
self.bo.as_ref().size,
);
nouveau_ws_bo_destroy(self.bo.as_ptr());
}
}
}
pub struct Runner {
dev: Arc<Device>,
ctx: Context,
}
impl<'a> Runner {
pub fn new(dev_id: Option<usize>) -> Runner {
let dev = Device::new(dev_id).expect("Failed to create nouveau device");
let ctx = Context::new(dev.clone()).expect("Failed to create context");
Runner { dev, ctx }
}
pub fn dev_info(&self) -> &nv_device_info {
self.dev.dev_info()
}
pub unsafe fn run_raw(
&self,
@ -270,7 +373,7 @@ impl<'a> Runner {
let data_offset = size.next_multiple_of(256);
size = data_offset + data_size;
let bo = BO::new(self, size.try_into().unwrap())?;
let bo = BO::new(self.dev.clone(), size.try_into().unwrap())?;
// Copy the data from the caller into our BO
let data_addr = bo.addr + u64::try_from(data_offset).unwrap();
@ -395,7 +498,7 @@ impl<'a> Runner {
let push_map = bo.map.byte_offset(push_offset.try_into().unwrap());
std::ptr::copy(p.as_ptr(), push_map.cast(), p.len());
let res = self.exec(push_addr, (p.len() * 4).try_into().unwrap());
let res = self.ctx.exec(push_addr, (p.len() * 4).try_into().unwrap());
// Always copy the data back to the caller, even if exec fails
let data_map = bo.map.byte_offset(data_offset.try_into().unwrap());