rusticl: add support for coarse-grain buffer SVM

Reviewed-by: Adam Jackson <ajax@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32942>
2025-12-24 13:10:10 +01:00 · 2024-12-16 14:56:24 +01:00 · 2024-12-16 14:56:24 +01:00 · da4de8d7e3
commit da4de8d7e3
parent 6e13e438d1
14 changed files with 685 additions and 157 deletions
--- a/docs/features.txt
+++ b/docs/features.txt
@ -756,7 +756,7 @@ Rusticl OpenCL 1.2 -- all DONE:

 Rusticl OpenCL 2.0 -- all DONE:

-  Shared virtual memory                                 in progress (nvc0, llvmpipe)
+  Shared virtual memory                                 DONE (nvc0, llvmpipe)
  Device queues                                         not started
  - cl_khr_create_command_queue                         DONE
  - Additional queries for clGetDeviceInfo              DONE
--- a/docs/relnotes/new_features.txt
+++ b/docs/relnotes/new_features.txt
@ -28,3 +28,4 @@ VK_EXT_texel_buffer_alignment on panvk
 cl_khr_kernel_clock on freedreno, iris, llvmpipe, nvc0, panfrost, radeonsi and zink with llvm-19 or newer
 GL_KHR_texture_compression_astc_hdr on panfrost and asahi
 cl_ext_buffer_device_address on llvmpipe and zink
+Completed OpenCL 2.0 coarse grain buffer SVM support
--- a/src/gallium/frontends/rusticl/api/device.rs
+++ b/src/gallium/frontends/rusticl/api/device.rs
@ -288,16 +288,17 @@ unsafe impl CLInfo<cl_device_info> for cl_device_id {
                }
            }
            CL_DEVICE_SVM_CAPABILITIES | CL_DEVICE_SVM_CAPABILITIES_ARM => {
-                v.write::<cl_device_svm_capabilities>(
-                    if dev.svm_supported() {
-                        CL_DEVICE_SVM_COARSE_GRAIN_BUFFER
-                            | CL_DEVICE_SVM_FINE_GRAIN_BUFFER
-                            | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM
-                    } else {
-                        0
-                    }
-                    .into(),
-                )
+                let mut caps = 0;
+
+                if dev.api_svm_supported() {
+                    caps |= CL_DEVICE_SVM_COARSE_GRAIN_BUFFER;
+                }
+
+                if dev.system_svm_supported() {
+                    caps |= CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM;
+                }
+
+                v.write::<cl_device_svm_capabilities>(caps.into())
            }
            CL_DEVICE_TYPE => {
                // CL_DEVICE_TYPE_DEFAULT ... will never be returned in CL_DEVICE_TYPE for any
--- a/src/gallium/frontends/rusticl/api/kernel.rs
+++ b/src/gallium/frontends/rusticl/api/kernel.rs
@ -554,7 +554,7 @@ fn set_kernel_exec_info(
    // CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM if no devices in the context associated with kernel
    // support SVM.
    let check_svm_support = || {
-        if devs.iter().all(|dev| !dev.svm_supported()) {
+        if devs.iter().all(|dev| !dev.api_svm_supported()) {
            Err(CL_INVALID_OPERATION)
        } else {
            Ok(())
@ -577,16 +577,36 @@ fn set_kernel_exec_info(
        CL_KERNEL_EXEC_INFO_SVM_PTRS | CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM => {
            check_svm_support()?;

+            // reuse the existing container so we avoid reallocations
+            let mut svms = k.svms.lock().unwrap();
+
            // To specify that no SVM allocations will be accessed by a kernel other than those set
            // as kernel arguments, specify an empty set by passing param_value_size equal to zero
            // and param_value equal to NULL.
-            if !param_value.is_null() || param_value_size != 0 {
-                let _ = unsafe {
+            if param_value_size == 0 && param_value.is_null() {
+                svms.clear();
+            } else {
+                let pointers = unsafe {
                    cl_slice::from_raw_parts_bytes_len::<*const c_void>(
                        param_value,
                        param_value_size,
                    )?
                };
+
+                // We need to clear _after_ the error checking above. We could just assign a new
+                // container, however we also want to reuse the allocations.
+                svms.clear();
+                pointers
+                    .iter()
+                    // Each of the pointers can be the pointer returned by clSVMAlloc or can be a
+                    // pointer to the middle of an SVM allocation. It is sufficient to specify one
+                    // pointer for each SVM allocation.
+                    //
+                    // So we'll simply fetch the base and store that one.
+                    .filter_map(|&handle| k.prog.context.find_svm_alloc(handle as usize))
+                    .for_each(|(base, _)| {
+                        svms.insert(base as usize);
+                    });
            }
        }
        CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM
@ -600,14 +620,19 @@ fn set_kernel_exec_info(
            if val.len() != 1 {
                return Err(CL_INVALID_VALUE);
            }
+
+            // CL_INVALID_OPERATION if param_name is CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM and
+            // param_value is CL_TRUE but no devices in context associated with kernel support
+            // fine-grain system SVM allocations.
+            if val[0] == CL_TRUE && devs.iter().all(|dev| !dev.system_svm_supported()) {
+                return Err(CL_INVALID_OPERATION);
+            }
        }
        // CL_INVALID_VALUE if param_name is not valid
        _ => return Err(CL_INVALID_VALUE),
    }

    Ok(())
-
-    // CL_INVALID_OPERATION if param_name is CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM and param_value is CL_TRUE but no devices in context associated with kernel support fine-grain system SVM allocations.
 }

 #[cl_entrypoint(clEnqueueNDRangeKernel)]
--- a/src/gallium/frontends/rusticl/api/memory.rs
+++ b/src/gallium/frontends/rusticl/api/memory.rs
@ -21,7 +21,7 @@ use rusticl_proc_macros::cl_info_entrypoint;

 use std::cmp;
 use std::cmp::Ordering;
-use std::mem::{self, MaybeUninit};
+use std::mem;
 use std::num::NonZeroU64;
 use std::os::raw::c_void;
 use std::ptr;
@ -297,13 +297,13 @@ fn create_buffer_with_properties(

    // or if CL_MEM_USE_HOST_PTR is set in flags and host_ptr is a pointer returned by clSVMAlloc
    // and size is greater than the size passed to clSVMAlloc.
-    if let Some((svm_ptr, svm_layout)) = c.find_svm_alloc(host_ptr as usize) {
+    if let Some((svm_ptr, alloc_size)) = c.find_svm_alloc(host_ptr as usize) {
        // SAFETY: they are part of the same allocation, and because host_ptr >= svm_ptr we can cast
        // to usize.
        let diff = unsafe { host_ptr.byte_offset_from(svm_ptr) } as usize;

        // technically we don't have to account for the offset, but it's almost for free.
-        if size > svm_layout - diff {
+        if size > alloc_size - diff {
            return Err(CL_INVALID_BUFFER_SIZE);
        }
    }
@ -2376,13 +2376,16 @@ pub fn svm_alloc(
        return Err(CL_INVALID_VALUE);
    }

-    let alignment = if alignment != 0 {
-        alignment as usize
-    } else {
-        // When alignment is 0, the size of the largest supported type is used.
-        // In the case of the full profile, that's `long16`.
-        mem::size_of::<[u64; 16]>()
-    };
+    // When alignment is 0, the size of the largest supported type is used.
+    // In the case of the full profile, that's `long16`.
+    let alignment = NonZeroU64::new(alignment.into())
+        .unwrap_or(NonZeroU64::new(mem::size_of::<[u64; 16]>() as u64).unwrap());
+
+    // size is 0 or > CL_DEVICE_MAX_MEM_ALLOC_SIZE value for any device in context.
+    let size = NonZeroU64::new(size as u64).ok_or(CL_INVALID_VALUE)?;
+    if size.get() > c.max_mem_alloc() {
+        return Err(CL_INVALID_VALUE);
+    }

    c.alloc_svm_ptr(size, alignment)

@ -2422,7 +2425,7 @@ fn enqueue_svm_free_impl(
    }

    // CL_INVALID_OPERATION if the device associated with command queue does not support SVM.
-    if !q.device.svm_supported() {
+    if !q.device.api_svm_supported() {
        return Err(CL_INVALID_OPERATION);
    }

@ -2529,47 +2532,33 @@ fn enqueue_svm_memcpy_impl(
    let block = check_cl_bool(blocking_copy).ok_or(CL_INVALID_VALUE)?;

    // CL_INVALID_OPERATION if the device associated with command queue does not support SVM.
-    if !q.device.svm_supported() {
+    if !q.device.api_svm_supported() {
        return Err(CL_INVALID_OPERATION);
    }

+    // CL_INVALID_VALUE if dst_ptr or src_ptr is NULL.
+    if src_ptr.is_null() || dst_ptr.is_null() {
+        return Err(CL_INVALID_VALUE);
+    }
+
+    let src_ptr = src_ptr as usize;
+    let dst_ptr = dst_ptr as usize;
+
    // CL_MEM_COPY_OVERLAP if the values specified for dst_ptr, src_ptr and size result in an
    // overlapping copy.
-    let dst_ptr_addr = dst_ptr as usize;
-    let src_ptr_addr = src_ptr as usize;
-    if (src_ptr_addr <= dst_ptr_addr && dst_ptr_addr < src_ptr_addr + size)
-        || (dst_ptr_addr <= src_ptr_addr && src_ptr_addr < dst_ptr_addr + size)
+    if (src_ptr <= dst_ptr && dst_ptr < src_ptr + size)
+        || (dst_ptr <= src_ptr && src_ptr < dst_ptr + size)
    {
        return Err(CL_MEM_COPY_OVERLAP);
    }

-    // CAST: We have no idea about the type or initialization status of these bytes.
-    // MaybeUninit<u8> is the safe bet.
-    let src_ptr = src_ptr.cast::<MaybeUninit<u8>>();
-
-    // CAST: We have no idea about the type or initialization status of these bytes.
-    // MaybeUninit<u8> is the safe bet.
-    let dst_ptr = dst_ptr.cast::<MaybeUninit<u8>>();
-
-    // SAFETY: It is up to the application to ensure the memory is valid to read for `size` bytes
-    // and that it doesn't modify it until the command has completed.
-    let src = unsafe { cl_slice::from_raw_parts(src_ptr, size)? };
-
-    // SAFETY: We've ensured there's no aliasing between src and dst. It is up to the application
-    // to ensure the memory is valid to read and write for `size` bytes and that it doesn't modify
-    // or read from it until the command has completed.
-    let dst = unsafe { cl_slice::from_raw_parts_mut(dst_ptr, size)? };
-
    create_and_queue(
        q,
        cmd_type,
        evs,
        event,
        block,
-        Box::new(move |_, _| {
-            dst.copy_from_slice(src);
-            Ok(())
-        }),
+        Box::new(move |q, ctx| q.context.copy_svm(ctx, src_ptr, dst_ptr, size)),
    )
 }

@ -2636,10 +2625,20 @@ fn enqueue_svm_mem_fill_impl(
    let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;

    // CL_INVALID_OPERATION if the device associated with command queue does not support SVM.
-    if !q.device.svm_supported() {
+    if !q.device.api_svm_supported() {
        return Err(CL_INVALID_OPERATION);
    }

+    // CL_INVALID_VALUE if svm_ptr is NULL.
+    if svm_ptr.is_null() {
+        return Err(CL_INVALID_VALUE);
+    }
+
+    // CL_INVALID_VALUE if svm_ptr is not aligned to pattern_size bytes.
+    if !is_aligned_to(svm_ptr, pattern_size) {
+        return Err(CL_INVALID_VALUE);
+    }
+
    // CL_INVALID_VALUE if pattern is NULL [...]
    if pattern.is_null() {
        return Err(CL_INVALID_VALUE);
@ -2701,35 +2700,9 @@ fn enqueue_svm_mem_fill_impl(
            // `pattern_size` bytes and properly initialized.
            // Creating a bitwise copy can't create memory safety issues, since `Pattern` is `Copy`.
            let pattern = unsafe { pattern_ptr.read_unaligned() };
+            let svm_ptr = svm_ptr as usize;

-            // CAST: Same as with `pattern`, we don't know the exact type of `svm_ptr`, but we do
-            // know it's fine if we choose the same type here. The application might reasonably
-            // give us uninitialized memory though, so cast to a `MaybeUninit<Pattern>`, which has
-            // the same layout as `Pattern`.
-            let svm_ptr = svm_ptr.cast::<MaybeUninit<Pattern>>();
-
-            // SAFETY: It is the calling application's responsibility to ensure that `svm_ptr` is
-            // valid for reads and writes up to `size` bytes.
-            // Since `pattern_size == mem::size_of::<Pattern>()` and `MaybeUninit<Pattern>` has the
-            // same layout as `Pattern`, we know that
-            // `size / pattern_size * mem::size_of<MaybeUninit<Pattern>>` equals `size`.
-            //
-            // Since we're creating a `&[MaybeUninit<Pattern>]` the initialization status does not
-            // matter.
-            //
-            // From here on out we only access the referenced memory though this slice. In
-            // particular, since we've made a copy of `pattern`, it doesn't matter if the memory
-            // region referenced by `pattern` aliases the one referenced by this slice. It is up to
-            // the application not to access it at all until this command has been completed.
-            let svm_slice = unsafe { cl_slice::from_raw_parts_mut(svm_ptr, size / pattern_size)? };
-
-            Box::new(move |_, _| {
-                for x in svm_slice {
-                    x.write(pattern);
-                }
-
-                Ok(())
-            })
+            Box::new(move |q, ctx| q.context.clear_svm(ctx, svm_ptr, size, pattern.0))
        }};
    }

@ -2817,7 +2790,7 @@ fn enqueue_svm_map_impl(
    let block = check_cl_bool(blocking_map).ok_or(CL_INVALID_VALUE)?;

    // CL_INVALID_OPERATION if the device associated with command queue does not support SVM.
-    if !q.device.svm_supported() {
+    if !q.device.api_svm_supported() {
        return Err(CL_INVALID_OPERATION);
    }

@ -2834,7 +2807,15 @@ fn enqueue_svm_map_impl(
    // ... or if values specified in map_flags are not valid.
    validate_map_flags_common(flags)?;

-    create_and_queue(q, cmd_type, evs, event, block, Box::new(|_, _| Ok(())))
+    let svm_ptr = svm_ptr as usize;
+    create_and_queue(
+        q,
+        cmd_type,
+        evs,
+        event,
+        block,
+        Box::new(move |q, ctx| q.context.copy_svm_to_host(ctx, svm_ptr, flags)),
+    )
 }

 #[cl_entrypoint(clEnqueueSVMMap)]
@ -2897,7 +2878,7 @@ fn enqueue_svm_unmap_impl(
    let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;

    // CL_INVALID_OPERATION if the device associated with command queue does not support SVM.
-    if !q.device.svm_supported() {
+    if !q.device.api_svm_supported() {
        return Err(CL_INVALID_OPERATION);
    }

@ -2906,7 +2887,15 @@ fn enqueue_svm_unmap_impl(
        return Err(CL_INVALID_VALUE);
    }

-    create_and_queue(q, cmd_type, evs, event, false, Box::new(|_, _| Ok(())))
+    create_and_queue(
+        q,
+        cmd_type,
+        evs,
+        event,
+        false,
+        // TODO: we _could_ migrate the content somewhere, but it's really pointless to do
+        Box::new(move |_, _| Ok(())),
+    )
 }

 #[cl_entrypoint(clEnqueueSVMUnmap)]
@ -2960,7 +2949,7 @@ fn enqueue_svm_migrate_mem(
    let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;

    // CL_INVALID_OPERATION if the device associated with command queue does not support SVM.
-    if !q.device.svm_supported() {
+    if !q.device.api_svm_supported() {
        return Err(CL_INVALID_OPERATION);
    }

@ -3010,9 +2999,9 @@ fn enqueue_svm_migrate_mem(
        evs,
        event,
        false,
-        Box::new(move |_, ctx| {
-            ctx.svm_migrate(&svm_pointers, &sizes, to_device, content_undefined);
-            Ok(())
+        Box::new(move |q, ctx| {
+            q.context
+                .migrate_svm(ctx, svm_pointers, sizes, to_device, content_undefined)
        }),
    )
 }
--- a/src/gallium/frontends/rusticl/core/context.rs
+++ b/src/gallium/frontends/rusticl/core/context.rs
@ -1,13 +1,15 @@
 use crate::api::icd::*;
 use crate::api::types::DeleteContextCB;
-use crate::api::util::checked_compare;
+use crate::api::util::bit_check;
 use crate::core::device::*;
 use crate::core::format::*;
 use crate::core::gl::*;
 use crate::core::memory::*;
+use crate::core::queue::*;
 use crate::core::util::*;
 use crate::impl_cl_type_trait;

+use mesa_rust::pipe::context::RWFlags;
 use mesa_rust::pipe::resource::*;
 use mesa_rust::pipe::screen::ResourceType;
 use mesa_rust_gen::*;
@ -19,15 +21,21 @@ use rusticl_opencl_gen::*;

 use std::alloc;
 use std::alloc::Layout;
-use std::cmp::Ordering;
+use std::cmp;
 use std::collections::HashMap;
 use std::convert::TryInto;
+use std::ffi::c_int;
 use std::mem;
+use std::num::NonZeroU64;
 use std::os::raw::c_void;
+use std::ptr;
+use std::slice;
 use std::sync::Arc;
 use std::sync::Mutex;
 use std::sync::Weak;

+use super::platform::Platform;
+
 struct TrackedBDAAlloc {
    buffer: Weak<Buffer>,
    size: cl_mem_device_address_ext,
@ -39,6 +47,53 @@ impl AllocSize<cl_mem_device_address_ext> for TrackedBDAAlloc {
    }
 }

+struct SVMAlloc {
+    layout: Layout,
+    vma: Option<NonZeroU64>,
+    alloc: Arc<Allocation>,
+}
+
+impl SVMAlloc {
+    pub fn size(&self) -> usize {
+        self.layout.size()
+    }
+}
+
+impl Drop for SVMAlloc {
+    fn drop(&mut self) {
+        if let Some(vma) = self.vma {
+            let address = vma.get() as usize as *mut c_void;
+            unsafe {
+                debug_assert_eq!(0, munmap(address, self.size()));
+            }
+
+            Platform::get()
+                .vm
+                .as_ref()
+                .unwrap()
+                .lock()
+                .unwrap()
+                .free(vma, NonZeroU64::new(self.size() as u64).unwrap());
+        } else {
+            // SAFETY: we make sure that svm_pointer is a valid allocation and reuse the same layout
+            // from the allocation
+            unsafe {
+                alloc::dealloc(self.alloc.host_ptr().cast(), self.layout);
+            }
+        }
+    }
+}
+
+impl AllocSize<usize> for SVMAlloc {
+    fn size(&self) -> usize {
+        SVMAlloc::size(self)
+    }
+}
+
+struct SVMContext {
+    svm_ptrs: TrackedPointers<usize, SVMAlloc>,
+}
+
 pub struct Context {
    pub base: CLObjectBase<CL_INVALID_CONTEXT>,
    pub devs: Vec<&'static Device>,
@ -48,7 +103,7 @@ pub struct Context {
    bda_ptrs: Mutex<
        HashMap<&'static Device, TrackedPointers<cl_mem_device_address_ext, TrackedBDAAlloc>>,
    >,
-    svm_ptrs: Mutex<TrackedPointers<usize, Layout>>,
+    svm: Mutex<SVMContext>,
    pub gl_ctx_manager: Option<GLCtxManager>,
 }

@ -66,7 +121,9 @@ impl Context {
            properties: properties,
            dtors: Mutex::new(Vec::new()),
            bda_ptrs: Mutex::new(HashMap::new()),
-            svm_ptrs: Mutex::new(TrackedPointers::new()),
+            svm: Mutex::new(SVMContext {
+                svm_ptrs: TrackedPointers::new(),
+            }),
            gl_ctx_manager: gl_ctx_manager,
        })
    }
@ -207,13 +264,21 @@ impl Context {
    }

    pub fn has_svm_devs(&self) -> bool {
-        self.devs.iter().any(|dev| dev.svm_supported())
+        self.devs.iter().any(|dev| dev.api_svm_supported())
    }

-    pub fn alloc_svm_ptr(&self, size: usize, alignment: usize) -> CLResult<*mut c_void> {
+    pub fn alloc_svm_ptr(
+        &self,
+        size: NonZeroU64,
+        mut alignment: NonZeroU64,
+    ) -> CLResult<*mut c_void> {
+        // TODO: choose better alignment in regards to huge pages
+        alignment = cmp::max(alignment, NonZeroU64::new(0x1000).unwrap());
+
        // clSVMAlloc will fail if alignment is not a power of two.
        // `from_size_align()` verifies this condition is met.
-        let layout = Layout::from_size_align(size, alignment).or(Err(CL_INVALID_VALUE))?;
+        let layout = Layout::from_size_align(size.get() as usize, alignment.get() as usize)
+            .or(Err(CL_INVALID_VALUE))?;

        // clSVMAlloc will fail if size is 0 or > CL_DEVICE_MAX_MEM_ALLOC_SIZE value
        // for any device in context.
@ -222,38 +287,252 @@ impl Context {
        // `from_size_align()` ensures that the allocation will fit in host memory,
        // the maximum allocation may be smaller due to limitations from gallium or
        // devices.
-        let size_aligned = layout.pad_to_align().size();
-        if size == 0 || checked_compare(size_aligned, Ordering::Greater, self.max_mem_alloc()) {
-            return Err(CL_INVALID_VALUE);
-        }
+        // let size_aligned = layout.pad_to_align().size();

-        // SAFETY: `size` is verified to be non-zero and the returned pointer is not
-        // expected to point to initialized memory.
-        let ptr = unsafe { alloc::alloc(layout) };
+        // allocate a vma if one of the devices doesn't support system SVM
+        let vma = if let Some(vm) = &Platform::get().vm {
+            Some(
+                vm.lock()
+                    .unwrap()
+                    .alloc(size, alignment)
+                    .ok_or(CL_OUT_OF_RESOURCES)?,
+            )
+        } else {
+            None
+        };
+
+        let ptr: *mut c_void = if let Some(vma) = &vma {
+            let res = unsafe {
+                mmap(
+                    vma.get() as usize as *mut c_void,
+                    size.get() as usize,
+                    (PROT_READ | PROT_WRITE) as c_int,
+                    // MAP_FIXED_NOREPLACE needs 4.17
+                    (MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE | MAP_NORESERVE) as c_int,
+                    -1,
+                    0,
+                )
+            };
+
+            // mmap returns MAP_FAILED on error which is -1
+            if res as usize == usize::MAX {
+                return Err(CL_OUT_OF_HOST_MEMORY);
+            }
+
+            res.cast()
+        } else {
+            unsafe { alloc::alloc(layout) }.cast()
+        };

        if ptr.is_null() {
-            Err(CL_OUT_OF_HOST_MEMORY)
-        } else {
-            Ok(ptr.cast())
+            return Err(CL_OUT_OF_HOST_MEMORY);
        }
+
+        let address = ptr as u64;
+        let mut buffers = HashMap::new();
+        for &dev in &self.devs {
+            let size: u32 = size.get().try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
+
+            // For system SVM devices we simply create a userptr resource.
+            let res = if dev.system_svm_supported() {
+                dev.screen()
+                    .resource_create_buffer_from_user(size, ptr, PIPE_BIND_GLOBAL, 0)
+            } else {
+                dev.screen().resource_create_buffer(
+                    size,
+                    ResourceType::Normal,
+                    PIPE_BIND_GLOBAL,
+                    PIPE_RESOURCE_FLAG_FRONTEND_VM,
+                )
+            };
+
+            let res = res.ok_or(CL_OUT_OF_RESOURCES)?;
+            if !dev.system_svm_supported() {
+                if !dev.screen().resource_assign_vma(&res, address) {
+                    return Err(CL_OUT_OF_RESOURCES);
+                }
+            }
+
+            buffers.insert(dev, Arc::new(res));
+        }
+
+        self.svm.lock().unwrap().svm_ptrs.insert(
+            ptr as usize,
+            SVMAlloc {
+                layout: layout,
+                vma,
+                alloc: Arc::new(Allocation::new(buffers, 0, ptr)),
+            },
+        );
+
+        Ok(ptr)
    }

-    pub fn find_svm_alloc(&self, ptr: usize) -> Option<(*const c_void, usize)> {
-        self.svm_ptrs
+    pub fn copy_svm_to_dev(
+        &self,
+        ctx: &QueueContext,
+        ptr: usize,
+    ) -> CLResult<Option<Arc<PipeResource>>> {
+        let svm = self.svm.lock().unwrap();
+
+        let Some(alloc) = svm.svm_ptrs.find_alloc_precise(ptr) else {
+            return Ok(None);
+        };
+
+        Ok(Some(Arc::clone(
+            alloc.alloc.get_res_for_access(ctx, RWFlags::RW)?,
+        )))
+    }
+
+    pub fn copy_svm_to_host(
+        &self,
+        ctx: &QueueContext,
+        svm_ptr: usize,
+        flags: cl_map_flags,
+    ) -> CLResult<()> {
+        // no need to copy
+        if bit_check(flags, CL_MAP_WRITE_INVALIDATE_REGION) {
+            return Ok(());
+        }
+
+        let svm = self.svm.lock().unwrap();
+        let Some((_, alloc)) = svm.svm_ptrs.find_alloc(svm_ptr) else {
+            return Ok(());
+        };
+
+        alloc.alloc.migrate_to_hostptr(ctx, RWFlags::RW)
+    }
+
+    pub fn copy_svm(
+        &self,
+        ctx: &QueueContext,
+        src_addr: usize,
+        dst_addr: usize,
+        size: usize,
+    ) -> CLResult<()> {
+        let svm = self.svm.lock().unwrap();
+        let src = svm.svm_ptrs.find_alloc(src_addr);
+        let dst = svm.svm_ptrs.find_alloc(dst_addr);
+
+        #[allow(clippy::collapsible_else_if)]
+        if let Some((src_base, src_alloc)) = src {
+            let src_res = src_alloc.alloc.get_res_for_access(ctx, RWFlags::RD)?;
+            let src_offset = src_addr - src_base;
+
+            if let Some((dst_base, dst_alloc)) = dst {
+                let dst_res = dst_alloc.alloc.get_res_for_access(ctx, RWFlags::WR)?;
+                let dst_offset = dst_addr - dst_base;
+
+                ctx.resource_copy_buffer(
+                    src_res,
+                    src_offset as i32,
+                    dst_res,
+                    dst_offset as u32,
+                    size as i32,
+                );
+            } else {
+                let map = ctx
+                    .buffer_map(src_res, src_offset as i32, size as i32, RWFlags::RD)
+                    .ok_or(CL_OUT_OF_HOST_MEMORY)?;
+                unsafe {
+                    ptr::copy_nonoverlapping(map.ptr(), dst_addr as *mut c_void, size);
+                }
+            }
+        } else {
+            if let Some((dst_base, dst_alloc)) = dst {
+                let dst_res = dst_alloc.alloc.get_res_for_access(ctx, RWFlags::WR)?;
+                let dst_offset = dst_addr - dst_base;
+
+                ctx.buffer_subdata(
+                    dst_res,
+                    dst_offset as u32,
+                    src_addr as *const c_void,
+                    size as u32,
+                );
+            } else {
+                unsafe {
+                    ptr::copy(src_addr as *const c_void, dst_addr as *mut c_void, size);
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    pub fn clear_svm<const T: usize>(
+        &self,
+        ctx: &QueueContext,
+        svm_ptr: usize,
+        size: usize,
+        pattern: [u8; T],
+    ) -> CLResult<()> {
+        let svm = self.svm.lock().unwrap();
+
+        if let Some((base, alloc)) = svm.svm_ptrs.find_alloc(svm_ptr) {
+            let res = alloc.alloc.get_res_for_access(ctx, RWFlags::WR)?;
+            let offset = svm_ptr - base;
+            ctx.clear_buffer(res, &pattern, offset as u32, size as u32);
+        } else {
+            let slice = unsafe {
+                slice::from_raw_parts_mut(svm_ptr as *mut _, size / mem::size_of_val(&pattern))
+            };
+
+            slice.fill(pattern);
+        }
+
+        Ok(())
+    }
+
+    pub fn migrate_svm(
+        &self,
+        ctx: &QueueContext,
+        pointers: Vec<usize>,
+        sizes: Vec<usize>,
+        to_device: bool,
+        content_undefined: bool,
+    ) -> CLResult<()> {
+        let svm = self.svm.lock().unwrap();
+
+        if ctx.dev.system_svm_supported() {
+            ctx.svm_migrate(&pointers, &sizes, to_device, content_undefined);
+        } else {
+            for ptr in pointers {
+                let Some((_, alloc)) = svm.svm_ptrs.find_alloc(ptr) else {
+                    continue;
+                };
+
+                // we assume it's only read, so it remains valid on the host until future commands
+                // have different needs.
+                if to_device {
+                    alloc.alloc.get_res_for_access(ctx, RWFlags::RD)?;
+                } else {
+                    alloc.alloc.migrate_to_hostptr(ctx, RWFlags::RD)?;
+                }
+            }
+        }
+        Ok(())
+    }
+
+    pub fn get_svm_alloc(&self, ptr: usize) -> Option<(*mut c_void, Arc<Allocation>)> {
+        self.svm
            .lock()
            .unwrap()
+            .svm_ptrs
            .find_alloc(ptr)
-            .map(|(ptr, layout)| (ptr as *const c_void, layout.size()))
+            .map(|(base, alloc)| (base as *mut c_void, Arc::clone(&alloc.alloc)))
+    }
+
+    pub fn find_svm_alloc(&self, ptr: usize) -> Option<(*mut c_void, usize)> {
+        self.svm
+            .lock()
+            .unwrap()
+            .svm_ptrs
+            .find_alloc(ptr)
+            .map(|(ptr, alloc)| (ptr as _, alloc.size()))
    }

    pub fn remove_svm_ptr(&self, ptr: usize) {
-        if let Some(layout) = self.svm_ptrs.lock().unwrap().remove(ptr) {
-            // SAFETY: we make sure that svm_pointer is a valid allocation and reuse the same layout
-            // from the allocation
-            unsafe {
-                alloc::dealloc(ptr as *mut u8, layout);
-            }
-        }
+        self.svm.lock().unwrap().svm_ptrs.remove(ptr);
    }

    pub fn add_bda_ptr(&self, buffer: &Arc<Buffer>) {
--- a/src/gallium/frontends/rusticl/core/device.rs
+++ b/src/gallium/frontends/rusticl/core/device.rs
@ -27,6 +27,7 @@ use std::convert::TryInto;
 use std::env;
 use std::ffi::CStr;
 use std::mem::transmute;
+use std::num::NonZeroU64;
 use std::os::raw::*;
 use std::sync::Arc;
 use std::sync::Mutex;
@ -104,6 +105,13 @@ pub trait HelperContextWrapper {
    where
        F: Fn(&HelperContext);

+    fn buffer_map(
+        &self,
+        res: &PipeResource,
+        offset: i32,
+        size: i32,
+        rw: RWFlags,
+    ) -> Option<PipeTransfer>;
    fn create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void;
    fn delete_compute_state(&self, cso: *mut c_void);
    fn compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info;
@ -165,6 +173,16 @@ impl HelperContextWrapper for HelperContext<'_> {
        self.lock.flush()
    }

+    fn buffer_map(
+        &self,
+        res: &PipeResource,
+        offset: i32,
+        size: i32,
+        rw: RWFlags,
+    ) -> Option<PipeTransfer> {
+        self.lock.buffer_map(res, offset, size, rw)
+    }
+
    fn create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void {
        self.lock.create_compute_state(nir, static_local_mem)
    }
@ -1157,10 +1175,33 @@ impl Device {
            && (subgroup_sizes == 1 || (subgroup_sizes > 1 && self.shareable_shaders()))
    }

-    pub fn svm_supported(&self) -> bool {
+    pub fn system_svm_supported(&self) -> bool {
        self.screen.caps().system_svm
    }

+    pub fn svm_supported(&self) -> bool {
+        if cfg!(not(target_pointer_width = "64")) {
+            return false;
+        }
+
+        self.system_svm_supported() || self.screen().is_vm_supported()
+    }
+
+    /// Checks if the device supports SVM _and_ that we were able to initialize SVM support on a
+    /// platform level.
+    pub fn api_svm_supported(&self) -> bool {
+        self.system_svm_supported()
+            || (self.screen().is_vm_supported() && Platform::get().vm.is_some())
+    }
+
+    // returns (start, end)
+    pub fn vm_alloc_range(&self) -> Option<(NonZeroU64, NonZeroU64)> {
+        let min = self.screen.caps().min_vma;
+        let max = self.screen.caps().max_vma;
+
+        Some((NonZeroU64::new(min)?, NonZeroU64::new(max)?))
+    }
+
    pub fn unified_memory(&self) -> bool {
        self.screen.caps().uma
    }
--- a/src/gallium/frontends/rusticl/core/kernel.rs
+++ b/src/gallium/frontends/rusticl/core/kernel.rs
@ -524,6 +524,7 @@ pub struct Kernel {
    pub name: String,
    values: Mutex<Vec<Option<KernelArgValue>>>,
    pub bdas: Mutex<Vec<cl_mem_device_address_ext>>,
+    pub svms: Mutex<HashSet<usize>>,
    builds: HashMap<&'static Device, Arc<NirKernelBuilds>>,
    pub kernel_info: Arc<KernelInfo>,
 }
@ -1246,6 +1247,7 @@ impl Kernel {
            name: name,
            values: Mutex::new(values),
            bdas: Mutex::new(Vec::new()),
+            svms: Mutex::new(HashSet::new()),
            builds: builds,
            kernel_info: kernel_info,
        })
@ -1323,6 +1325,7 @@ impl Kernel {
        let arg_values = self.arg_values().clone();
        let nir_kernel_builds = Arc::clone(&self.builds[q.device]);
        let mut bdas = self.bdas.lock().unwrap().clone();
+        let svms = self.svms.lock().unwrap().clone();

        let mut buffer_arcs = HashMap::new();
        let mut image_arcs = HashMap::new();
@ -1442,6 +1445,12 @@ impl Kernel {
                printf_buf = Some(buf);
            }

+            // translate SVM pointers to their base first
+            let mut svms: HashSet<_> = svms
+                .into_iter()
+                .filter_map(|svm_pointer| Some(q.context.find_svm_alloc(svm_pointer)?.0 as usize))
+                .collect();
+
            for arg in &nir_kernel_build.compiled_args {
                let is_opaque = if let CompiledKernelArgType::APIArg(idx) = arg.kind {
                    kernel_info.args[idx].kind.is_opaque()
@ -1485,6 +1494,9 @@ impl Kernel {
                                    if let Some(address) = buffer.dev_address(ctx.dev) {
                                        let _ = buffer.get_res_for_access(ctx, rw)?;
                                        bdas.push(address.get());
+                                    } else if buffer.is_svm() {
+                                        let _ = buffer.get_res_for_access(ctx, rw)?;
+                                        svms.insert(buffer.host_ptr() as usize);
                                    }
                                } else {
                                    let res = buffer.get_res_for_access(ctx, rw)?;
@ -1498,6 +1510,11 @@ impl Kernel {
                                }
                            }
                            &KernelArgValue::SVM(handle) => {
+                                // get the base address so we deduplicate properly
+                                if let Some((base, _)) = q.context.find_svm_alloc(handle) {
+                                    svms.insert(base as usize);
+                                }
+
                                if !api_arg.dead {
                                    add_pointer(q, &mut input, handle as u64);
                                }
@ -1603,11 +1620,21 @@ impl Kernel {
                .filter_map(|address| q.context.find_bda_alloc(q.device, address))
                .collect::<HashSet<_>>();

-            let bdas: Vec<_> = bdas
+            let mut bdas: Vec<_> = bdas
                .iter()
                .map(|buffer| Ok(buffer.get_res_for_access(ctx, RWFlags::RW)?.deref()))
                .collect::<CLResult<_>>()?;

+            let svms_new = svms
+                .into_iter()
+                .filter_map(|svm| q.context.copy_svm_to_dev(ctx, svm).transpose())
+                .collect::<CLResult<Vec<_>>>()?;
+
+            // uhhh
+            for svm in &svms_new {
+                bdas.push(svm);
+            }
+
            // subtract the shader local_size as we only request something on top of that.
            variable_local_size -= static_local_size;

@ -1819,7 +1846,7 @@ impl Kernel {
    }

    pub fn has_svm_devs(&self) -> bool {
-        self.prog.devs.iter().any(|dev| dev.svm_supported())
+        self.prog.devs.iter().any(|dev| dev.api_svm_supported())
    }

    pub fn subgroup_sizes(&self, dev: &Device) -> impl ExactSizeIterator<Item = usize> {
@ -1876,6 +1903,7 @@ impl Clone for Kernel {
            name: self.name.clone(),
            values: Mutex::new(self.arg_values().clone()),
            bdas: Mutex::new(self.bdas.lock().unwrap().clone()),
+            svms: Mutex::new(self.svms.lock().unwrap().clone()),
            builds: self.builds.clone(),
            kernel_info: Arc::clone(&self.kernel_info),
        }
--- a/src/gallium/frontends/rusticl/core/memory.rs
+++ b/src/gallium/frontends/rusticl/core/memory.rs
@ -398,11 +398,17 @@ pub struct SubAllocation {
    offset: usize,
 }

+pub struct SvmAllocation {
+    alloc: Arc<Allocation>,
+    offset: usize,
+}
+
 /// Abstraction over the memory allocation. It might be a real GPU backing storage or simply a sub
 /// allocation over an existing memory object.
-enum Allocation {
+pub enum Allocation {
    Resource(ResourceAllocation),
    SubAlloc(SubAllocation),
+    Svm(SvmAllocation),
 }

 // TODO: - Once it's used for more stuff might make sense to split it into an Image and Buffer
@ -455,6 +461,14 @@ impl Allocation {
        })
    }

+    fn new_svm(alloc: Arc<Allocation>, offset: usize) -> Self {
+        Self::Svm(SvmAllocation {
+            // we precalculate the entire offset here.
+            offset: alloc.offset() + offset,
+            alloc: alloc,
+        })
+    }
+
    /// Returns true if the backing storage of the two objects is equal.
    fn backing_resource_eq(&self, other: &Self) -> bool {
        ptr::eq(self.get_real_resource(), other.get_real_resource())
@ -465,24 +479,26 @@ impl Allocation {
        match self {
            Allocation::SubAlloc(sub) => sub.mem.alloc.get_real_resource(),
            Allocation::Resource(res) => res,
+            Allocation::Svm(svm) => svm.alloc.get_real_resource(),
        }
    }

    /// Returns the resource associated with `dev` without any data migration.
-    fn get_res_of_dev(&self, dev: &Device) -> CLResult<&Arc<PipeResource>> {
-        self.get_real_resource()
-            .res
-            .get(dev)
-            .ok_or(CL_OUT_OF_HOST_MEMORY)
+    fn get_res_of_dev(&self, dev: &Device) -> Option<&Arc<PipeResource>> {
+        self.get_real_resource().res.get(dev)
    }

    /// Returns the resource associated with `ctx.dev` and transparently migrate the data.
-    fn get_res_for_access(&self, ctx: &QueueContext, rw: RWFlags) -> CLResult<&Arc<PipeResource>> {
+    pub fn get_res_for_access(
+        &self,
+        ctx: &QueueContext,
+        rw: RWFlags,
+    ) -> CLResult<&Arc<PipeResource>> {
        self.get_real_resource().get_res_for_access(ctx, rw)
    }

    /// Migrates the content to the host. Fails if there is no host ptr.
-    pub fn _migrate_to_hostptr(&self, ctx: &QueueContext, rw: RWFlags) -> CLResult<()> {
+    pub fn migrate_to_hostptr(&self, ctx: &QueueContext, rw: RWFlags) -> CLResult<()> {
        self.get_real_resource().migrate_to_hostptr(ctx, rw)
    }

@ -497,14 +513,20 @@ impl Allocation {
        host_ptr as _
    }

-    fn is_user_alloc_for_dev(&self, dev: &Device) -> CLResult<bool> {
-        Ok(self.get_res_of_dev(dev)?.is_user())
+    fn is_user_alloc_for_dev(&self, dev: &Device) -> bool {
+        if let Some(res) = self.get_res_of_dev(dev) {
+            res.is_user()
+        } else {
+            // for SVM allocations there might not even be a real resource
+            dev.system_svm_supported()
+        }
    }

    fn offset(&self) -> usize {
        match self {
            Allocation::Resource(res) => res.offset,
            Allocation::SubAlloc(sub) => sub.offset,
+            Allocation::Svm(svm) => svm.offset,
        }
    }
 }
@ -773,39 +795,71 @@ impl MemBase {
            .copied()
            == Some(CL_TRUE.into());

-        let res_type = if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) {
-            ResourceType::Staging
+        // if it's a SVM host ptr, we just use the already allocated resource if it exists, because
+        // this is actually mandated by the spec. The size requirement will be checked inside the
+        // API layer.
+        //
+        // From the OpenCL spec:
+        //   If clCreateBuffer or clCreateBufferWithProperties is called with a pointer returned
+        //   clSVMAlloc as its host_ptr argument, and CL_MEM_USE_HOST_PTR is set in its flags
+        //   argument, clCreateBuffer or clCreateBufferWithProperties will succeed and return
+        //   valid non-zero buffer object as long as the size argument is no larger than the size
+        //   argument passed in the original clSVMAlloc call. The new buffer object returned has the
+        //   shared memory as the underlying storage.
+        let svm = bit_check(flags, CL_MEM_USE_HOST_PTR)
+            .then(|| context.get_svm_alloc(host_ptr as usize))
+            .flatten();
+
+        let alloc = if let Some((svm_ptr, ref svm_alloc)) = svm {
+            // SAFETY: svm_ptr is the base of the allocation host_ptr points into.
+            let offset = unsafe { host_ptr.byte_offset_from(svm_ptr) } as usize;
+            Allocation::new_svm(Arc::clone(svm_alloc), offset)
        } else {
-            ResourceType::Normal
+            let res_type = if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) {
+                ResourceType::Staging
+            } else {
+                ResourceType::Normal
+            };
+
+            let buffer = context.create_buffer(
+                size,
+                host_ptr,
+                bit_check(flags, CL_MEM_COPY_HOST_PTR),
+                bda,
+                res_type,
+            )?;
+
+            // We can only keep the host_ptr when `CL_MEM_USE_HOST_PTR` is set.
+            if !bit_check(flags, CL_MEM_USE_HOST_PTR) {
+                host_ptr = ptr::null_mut()
+            }
+
+            Allocation::new(buffer, 0, host_ptr)
        };

-        let buffer = context.create_buffer(
-            size,
-            host_ptr,
-            bit_check(flags, CL_MEM_COPY_HOST_PTR),
-            bda,
-            res_type,
-        )?;
-
-        // We can only keep the host_ptr when `CL_MEM_USE_HOST_PTR` is set.
-        if !bit_check(flags, CL_MEM_USE_HOST_PTR) {
-            host_ptr = ptr::null_mut()
-        }
-
        let addresses = bda.then(|| {
            context
                .devs
                .iter()
                .filter(|dev| dev.bda_supported())
                .map(|&dev| {
-                    let address = buffer[dev].resource_get_address();
+                    // If the buffer is backed by an SVM allocation, we need to use its address.
+                    let address = if let Some((address, _)) = svm {
+                        NonZeroU64::new(address as usize as u64)
+                    } else if let Some(res) = alloc.get_res_of_dev(dev) {
+                        res.resource_get_address()
+                    } else {
+                        // if there is no resource, it's a system SVM allocation
+                        assert!(dev.system_svm_supported());
+                        NonZeroU64::new(alloc.host_ptr() as u64)
+                    };
+
                    Some((dev, address?))
                })
                .collect::<Option<_>>()
                .unwrap()
        });

-        let alloc = Allocation::new(buffer, 0, host_ptr);
        let buffer = Arc::new(Buffer {
            base: Self {
                base: CLObjectBase::new(RusticlTypes::Buffer),
@ -1102,6 +1156,9 @@ impl MemBase {
        match &self.alloc {
            Allocation::SubAlloc(sub) => Some(&sub.mem),
            Allocation::Resource(_) => None,
+            // In theory the SVM allocation is the parent, but that's not a memory object on the API
+            // level.
+            Allocation::Svm(_) => None,
        }
    }

@ -1109,10 +1166,10 @@ impl MemBase {
        self.alloc.host_ptr()
    }

-    fn is_pure_user_memory(&self, d: &Device) -> CLResult<bool> {
+    fn is_pure_user_memory(&self, d: &Device) -> bool {
        // 1Dbuffer objects are weird. The parent memory object can be a host_ptr thing, but we are
        // not allowed to actually return a pointer based on the host_ptr when mapping.
-        Ok(self.alloc.is_user_alloc_for_dev(d)? && !self.host_ptr().is_null())
+        self.alloc.is_user_alloc_for_dev(d) && !self.host_ptr().is_null()
    }

    fn map<T>(
@ -1430,7 +1487,7 @@ impl Buffer {

        // in this case we only need to migrate to the device if the data is located on a device not
        // having a userptr allocation.
-        if self.is_pure_user_memory(ctx.dev)? {
+        if self.is_pure_user_memory(ctx.dev) {
            let rw = if mapping.writes {
                RWFlags::RW
            } else {
@ -1446,7 +1503,7 @@ impl Buffer {

    pub fn sync_unmap(&self, ctx: &QueueContext, ptr: MutMemoryPtr) -> CLResult<()> {
        // no need to update
-        if self.is_pure_user_memory(ctx.dev)? {
+        if self.is_pure_user_memory(ctx.dev) {
            return Ok(());
        }

@ -1904,7 +1961,7 @@ impl Image {

        // in this case we only need to migrate to the device if the data is located on a device not
        // having a userptr allocation.
-        if self.is_pure_user_memory(ctx.dev)? {
+        if self.is_pure_user_memory(ctx.dev) {
            let rw = if mapping.writes {
                RWFlags::RW
            } else {
@ -1930,7 +1987,7 @@ impl Image {

    pub fn sync_unmap(&self, ctx: &QueueContext, ptr: MutMemoryPtr) -> CLResult<()> {
        // no need to update
-        if self.is_pure_user_memory(ctx.dev)? {
+        if self.is_pure_user_memory(ctx.dev) {
            return Ok(());
        }

--- a/src/gallium/frontends/rusticl/core/platform.rs
+++ b/src/gallium/frontends/rusticl/core/platform.rs
@ -3,25 +3,47 @@ use crate::api::icd::DISPATCH;
 use crate::core::device::*;
 use crate::core::version::*;

+use mesa_rust::pipe::screen::ScreenVMAllocation;
+use mesa_rust::util::vm::VM;
 use mesa_rust_gen::*;
 use mesa_rust_util::string::char_arr_to_cstr;
 use rusticl_opencl_gen::*;

+use std::cmp;
 use std::env;
+use std::num::NonZeroU64;
+use std::ops::Deref;
 use std::ptr;
 use std::ptr::addr_of;
 use std::ptr::addr_of_mut;
+use std::sync::Mutex;
 use std::sync::Once;

 /// Maximum size a pixel can be across all supported image formats.
 pub const MAX_PIXEL_SIZE_BYTES: u64 = 4 * 4;

+pub struct PlatformVM<'a> {
+    vm: Mutex<VM>,
+    // we make use of the drop to automatically free the reserved VM
+    _dev_allocs: Vec<ScreenVMAllocation<'a>>,
+}
+
+impl Deref for PlatformVM<'_> {
+    type Target = Mutex<VM>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.vm
+    }
+}
+
 #[repr(C)]
 pub struct Platform {
    dispatch: &'static cl_icd_dispatch,
    pub devs: Vec<Device>,
    pub extension_string: String,
    pub extensions: Vec<cl_name_version>,
+    // lifetime has to match the one of devs
+    pub vm: Option<PlatformVM<'static>>,
 }

 pub enum PerfDebugLevel {
@ -57,6 +79,7 @@ static mut PLATFORM: Platform = Platform {
    devs: Vec::new(),
    extension_string: String::new(),
    extensions: Vec::new(),
+    vm: None,
 };
 static mut PLATFORM_DBG: PlatformDebug = PlatformDebug {
    allow_invalid_spirv: false,
@ -139,13 +162,56 @@ impl Platform {
        unsafe { &*addr_of!(PLATFORM_FEATURES) }
    }

-    fn init(&mut self) {
+    fn alloc_vm(devs: &[Device]) -> Option<PlatformVM<'_>> {
+        // We support buffer SVM only on 64 bit platforms
+        if cfg!(not(target_pointer_width = "64")) {
+            return None;
+        }
+
+        // No need to check system SVM devices
+        let devs = devs.iter().filter(|dev| !dev.system_svm_supported());
+
+        let (start, end) = devs.clone().filter_map(|dev| dev.vm_alloc_range()).reduce(
+            |(min_a, max_a), (min_b, max_b)| (cmp::max(min_a, min_b), cmp::min(max_a, max_b)),
+        )?;
+
+        // Allocate 1/8 of the available VM. No specific reason for this limit. Might have to bump
+        // this later, but it's probably fine as there is plenty of VM available.
+        let size = NonZeroU64::new((end.get() / 8).next_power_of_two())?;
+        if start > size {
+            return None;
+        }
+
+        let mut allocs = Vec::new();
+        for dev in devs {
+            allocs.push(dev.screen().alloc_vm(size, size)?);
+        }
+
+        Some(PlatformVM {
+            vm: Mutex::new(VM::new(size, size)),
+            _dev_allocs: allocs,
+        })
+    }
+
+    fn init(&'static mut self) {
        unsafe {
            glsl_type_singleton_init_or_ref();
        }

        self.devs = Device::all();

+        self.vm = Self::alloc_vm(&self.devs);
+
+        if self
+            .devs
+            .iter()
+            .any(|dev| !dev.system_svm_supported() && dev.svm_supported())
+            && self.vm.is_none()
+        {
+            // TODO: in theory we should also remove the exposed SVM extension, but...
+            eprintln!("rusticl: could not initialize SVM support");
+        }
+
        let mut exts_str: Vec<&str> = Vec::new();
        let mut add_ext = |major, minor, patch, ext: &'static str| {
            self.extensions
--- a/src/gallium/frontends/rusticl/mesa/pipe/resource.rs
+++ b/src/gallium/frontends/rusticl/mesa/pipe/resource.rs
@ -282,7 +282,18 @@ impl PipeResource {

 impl Drop for PipeResource {
    fn drop(&mut self) {
-        unsafe { pipe_resource_reference(&mut self.pipe.as_ptr(), ptr::null_mut()) }
+        unsafe {
+            let pipe = self.pipe.as_ref();
+            let screen = pipe.screen.as_ref().unwrap();
+
+            if pipe.flags & PIPE_RESOURCE_FLAG_FRONTEND_VM != 0 {
+                if let Some(resource_assign_vma) = screen.resource_assign_vma {
+                    resource_assign_vma(pipe.screen, self.pipe(), 0);
+                }
+            }
+
+            pipe_resource_reference(&mut self.pipe.as_ptr(), ptr::null_mut());
+        }
    }
 }

--- a/src/gallium/frontends/rusticl/meson.build
+++ b/src/gallium/frontends/rusticl/meson.build
@ -308,6 +308,12 @@ rusticl_mesa_bindings = rust.bindgen(
    '--allowlist-function',     'rusticl_.*',
    '--allowlist-function',     'std(err|out)_ptr',

+    # libc
+    '--allowlist-function',     'mmap',
+    '--allowlist-function',     'munmap',
+    '--allowlist-var',          'MAP_.*',
+    '--allowlist-var',          'PROT_.*',
+
    # winsys
    '--allowlist-var',          'WINSYS_HANDLE_TYPE_.*',
  ],
--- a/src/gallium/frontends/rusticl/rusticl_system_bindings.h
+++ b/src/gallium/frontends/rusticl/rusticl_system_bindings.h
@ -1,4 +1,5 @@
 #include <stdio.h>
+#include <sys/mman.h>

 FILE *stdout_ptr(void);
 FILE *stderr_ptr(void);
--- a/src/gallium/frontends/rusticl/util/ptr.rs
+++ b/src/gallium/frontends/rusticl/util/ptr.rs
@ -1,6 +1,9 @@
 use std::{
    alloc::Layout,
-    collections::{btree_map::Entry, BTreeMap},
+    collections::{
+        btree_map::{Entry, Values, ValuesMut},
+        BTreeMap,
+    },
    hash::{Hash, Hasher},
    mem,
    ops::{Add, Deref},
@ -175,6 +178,14 @@ impl<P, T: AllocSize<P>> TrackedPointers<P, T> {
            ptrs: BTreeMap::new(),
        }
    }
+
+    pub fn values(&self) -> Values<'_, P, T> {
+        self.ptrs.values()
+    }
+
+    pub fn values_mut(&mut self) -> ValuesMut<'_, P, T> {
+        self.ptrs.values_mut()
+    }
 }

 impl<P, T: AllocSize<P>> TrackedPointers<P, T>
@ -201,6 +212,18 @@ where
        None
    }

+    pub fn find_alloc_mut(&mut self, ptr: P) -> Option<(P, &mut T)> {
+        if let Some((&base, val)) = self.ptrs.range_mut(..=ptr).next_back() {
+            let size = val.size();
+            // we check if ptr is within [base..base+size)
+            // means we can check if ptr - (base + size) < 0
+            if ptr < (base + size) {
+                return Some((base, val));
+            }
+        }
+        None
+    }
+
    pub fn find_alloc_precise(&self, ptr: P) -> Option<&T> {
        self.ptrs.get(&ptr)
    }