diff --git a/docs/features.txt b/docs/features.txt index 92139d69d6f..786dd3c0b4c 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -756,7 +756,7 @@ Rusticl OpenCL 1.2 -- all DONE: Rusticl OpenCL 2.0 -- all DONE: - Shared virtual memory in progress (nvc0, llvmpipe) + Shared virtual memory DONE (nvc0, llvmpipe) Device queues not started - cl_khr_create_command_queue DONE - Additional queries for clGetDeviceInfo DONE diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt index ab9add6fb24..0ae550a4f4e 100644 --- a/docs/relnotes/new_features.txt +++ b/docs/relnotes/new_features.txt @@ -28,3 +28,4 @@ VK_EXT_texel_buffer_alignment on panvk cl_khr_kernel_clock on freedreno, iris, llvmpipe, nvc0, panfrost, radeonsi and zink with llvm-19 or newer GL_KHR_texture_compression_astc_hdr on panfrost and asahi cl_ext_buffer_device_address on llvmpipe and zink +Completed OpenCL 2.0 coarse grain buffer SVM support diff --git a/src/gallium/frontends/rusticl/api/device.rs b/src/gallium/frontends/rusticl/api/device.rs index 9536765ded3..10d20dd94ba 100644 --- a/src/gallium/frontends/rusticl/api/device.rs +++ b/src/gallium/frontends/rusticl/api/device.rs @@ -288,16 +288,17 @@ unsafe impl CLInfo for cl_device_id { } } CL_DEVICE_SVM_CAPABILITIES | CL_DEVICE_SVM_CAPABILITIES_ARM => { - v.write::( - if dev.svm_supported() { - CL_DEVICE_SVM_COARSE_GRAIN_BUFFER - | CL_DEVICE_SVM_FINE_GRAIN_BUFFER - | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM - } else { - 0 - } - .into(), - ) + let mut caps = 0; + + if dev.api_svm_supported() { + caps |= CL_DEVICE_SVM_COARSE_GRAIN_BUFFER; + } + + if dev.system_svm_supported() { + caps |= CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM; + } + + v.write::(caps.into()) } CL_DEVICE_TYPE => { // CL_DEVICE_TYPE_DEFAULT ... will never be returned in CL_DEVICE_TYPE for any diff --git a/src/gallium/frontends/rusticl/api/kernel.rs b/src/gallium/frontends/rusticl/api/kernel.rs index bd8cb2bbcc8..e4dfa94a5aa 100644 --- a/src/gallium/frontends/rusticl/api/kernel.rs +++ b/src/gallium/frontends/rusticl/api/kernel.rs @@ -554,7 +554,7 @@ fn set_kernel_exec_info( // CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM if no devices in the context associated with kernel // support SVM. let check_svm_support = || { - if devs.iter().all(|dev| !dev.svm_supported()) { + if devs.iter().all(|dev| !dev.api_svm_supported()) { Err(CL_INVALID_OPERATION) } else { Ok(()) @@ -577,16 +577,36 @@ fn set_kernel_exec_info( CL_KERNEL_EXEC_INFO_SVM_PTRS | CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM => { check_svm_support()?; + // reuse the existing container so we avoid reallocations + let mut svms = k.svms.lock().unwrap(); + // To specify that no SVM allocations will be accessed by a kernel other than those set // as kernel arguments, specify an empty set by passing param_value_size equal to zero // and param_value equal to NULL. - if !param_value.is_null() || param_value_size != 0 { - let _ = unsafe { + if param_value_size == 0 && param_value.is_null() { + svms.clear(); + } else { + let pointers = unsafe { cl_slice::from_raw_parts_bytes_len::<*const c_void>( param_value, param_value_size, )? }; + + // We need to clear _after_ the error checking above. We could just assign a new + // container, however we also want to reuse the allocations. + svms.clear(); + pointers + .iter() + // Each of the pointers can be the pointer returned by clSVMAlloc or can be a + // pointer to the middle of an SVM allocation. It is sufficient to specify one + // pointer for each SVM allocation. + // + // So we'll simply fetch the base and store that one. + .filter_map(|&handle| k.prog.context.find_svm_alloc(handle as usize)) + .for_each(|(base, _)| { + svms.insert(base as usize); + }); } } CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM @@ -600,14 +620,19 @@ fn set_kernel_exec_info( if val.len() != 1 { return Err(CL_INVALID_VALUE); } + + // CL_INVALID_OPERATION if param_name is CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM and + // param_value is CL_TRUE but no devices in context associated with kernel support + // fine-grain system SVM allocations. + if val[0] == CL_TRUE && devs.iter().all(|dev| !dev.system_svm_supported()) { + return Err(CL_INVALID_OPERATION); + } } // CL_INVALID_VALUE if param_name is not valid _ => return Err(CL_INVALID_VALUE), } Ok(()) - - // CL_INVALID_OPERATION if param_name is CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM and param_value is CL_TRUE but no devices in context associated with kernel support fine-grain system SVM allocations. } #[cl_entrypoint(clEnqueueNDRangeKernel)] diff --git a/src/gallium/frontends/rusticl/api/memory.rs b/src/gallium/frontends/rusticl/api/memory.rs index 9ab6df9210a..11969ebf954 100644 --- a/src/gallium/frontends/rusticl/api/memory.rs +++ b/src/gallium/frontends/rusticl/api/memory.rs @@ -21,7 +21,7 @@ use rusticl_proc_macros::cl_info_entrypoint; use std::cmp; use std::cmp::Ordering; -use std::mem::{self, MaybeUninit}; +use std::mem; use std::num::NonZeroU64; use std::os::raw::c_void; use std::ptr; @@ -297,13 +297,13 @@ fn create_buffer_with_properties( // or if CL_MEM_USE_HOST_PTR is set in flags and host_ptr is a pointer returned by clSVMAlloc // and size is greater than the size passed to clSVMAlloc. - if let Some((svm_ptr, svm_layout)) = c.find_svm_alloc(host_ptr as usize) { + if let Some((svm_ptr, alloc_size)) = c.find_svm_alloc(host_ptr as usize) { // SAFETY: they are part of the same allocation, and because host_ptr >= svm_ptr we can cast // to usize. let diff = unsafe { host_ptr.byte_offset_from(svm_ptr) } as usize; // technically we don't have to account for the offset, but it's almost for free. - if size > svm_layout - diff { + if size > alloc_size - diff { return Err(CL_INVALID_BUFFER_SIZE); } } @@ -2376,13 +2376,16 @@ pub fn svm_alloc( return Err(CL_INVALID_VALUE); } - let alignment = if alignment != 0 { - alignment as usize - } else { - // When alignment is 0, the size of the largest supported type is used. - // In the case of the full profile, that's `long16`. - mem::size_of::<[u64; 16]>() - }; + // When alignment is 0, the size of the largest supported type is used. + // In the case of the full profile, that's `long16`. + let alignment = NonZeroU64::new(alignment.into()) + .unwrap_or(NonZeroU64::new(mem::size_of::<[u64; 16]>() as u64).unwrap()); + + // size is 0 or > CL_DEVICE_MAX_MEM_ALLOC_SIZE value for any device in context. + let size = NonZeroU64::new(size as u64).ok_or(CL_INVALID_VALUE)?; + if size.get() > c.max_mem_alloc() { + return Err(CL_INVALID_VALUE); + } c.alloc_svm_ptr(size, alignment) @@ -2422,7 +2425,7 @@ fn enqueue_svm_free_impl( } // CL_INVALID_OPERATION if the device associated with command queue does not support SVM. - if !q.device.svm_supported() { + if !q.device.api_svm_supported() { return Err(CL_INVALID_OPERATION); } @@ -2529,47 +2532,33 @@ fn enqueue_svm_memcpy_impl( let block = check_cl_bool(blocking_copy).ok_or(CL_INVALID_VALUE)?; // CL_INVALID_OPERATION if the device associated with command queue does not support SVM. - if !q.device.svm_supported() { + if !q.device.api_svm_supported() { return Err(CL_INVALID_OPERATION); } + // CL_INVALID_VALUE if dst_ptr or src_ptr is NULL. + if src_ptr.is_null() || dst_ptr.is_null() { + return Err(CL_INVALID_VALUE); + } + + let src_ptr = src_ptr as usize; + let dst_ptr = dst_ptr as usize; + // CL_MEM_COPY_OVERLAP if the values specified for dst_ptr, src_ptr and size result in an // overlapping copy. - let dst_ptr_addr = dst_ptr as usize; - let src_ptr_addr = src_ptr as usize; - if (src_ptr_addr <= dst_ptr_addr && dst_ptr_addr < src_ptr_addr + size) - || (dst_ptr_addr <= src_ptr_addr && src_ptr_addr < dst_ptr_addr + size) + if (src_ptr <= dst_ptr && dst_ptr < src_ptr + size) + || (dst_ptr <= src_ptr && src_ptr < dst_ptr + size) { return Err(CL_MEM_COPY_OVERLAP); } - // CAST: We have no idea about the type or initialization status of these bytes. - // MaybeUninit is the safe bet. - let src_ptr = src_ptr.cast::>(); - - // CAST: We have no idea about the type or initialization status of these bytes. - // MaybeUninit is the safe bet. - let dst_ptr = dst_ptr.cast::>(); - - // SAFETY: It is up to the application to ensure the memory is valid to read for `size` bytes - // and that it doesn't modify it until the command has completed. - let src = unsafe { cl_slice::from_raw_parts(src_ptr, size)? }; - - // SAFETY: We've ensured there's no aliasing between src and dst. It is up to the application - // to ensure the memory is valid to read and write for `size` bytes and that it doesn't modify - // or read from it until the command has completed. - let dst = unsafe { cl_slice::from_raw_parts_mut(dst_ptr, size)? }; - create_and_queue( q, cmd_type, evs, event, block, - Box::new(move |_, _| { - dst.copy_from_slice(src); - Ok(()) - }), + Box::new(move |q, ctx| q.context.copy_svm(ctx, src_ptr, dst_ptr, size)), ) } @@ -2636,10 +2625,20 @@ fn enqueue_svm_mem_fill_impl( let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?; // CL_INVALID_OPERATION if the device associated with command queue does not support SVM. - if !q.device.svm_supported() { + if !q.device.api_svm_supported() { return Err(CL_INVALID_OPERATION); } + // CL_INVALID_VALUE if svm_ptr is NULL. + if svm_ptr.is_null() { + return Err(CL_INVALID_VALUE); + } + + // CL_INVALID_VALUE if svm_ptr is not aligned to pattern_size bytes. + if !is_aligned_to(svm_ptr, pattern_size) { + return Err(CL_INVALID_VALUE); + } + // CL_INVALID_VALUE if pattern is NULL [...] if pattern.is_null() { return Err(CL_INVALID_VALUE); @@ -2701,35 +2700,9 @@ fn enqueue_svm_mem_fill_impl( // `pattern_size` bytes and properly initialized. // Creating a bitwise copy can't create memory safety issues, since `Pattern` is `Copy`. let pattern = unsafe { pattern_ptr.read_unaligned() }; + let svm_ptr = svm_ptr as usize; - // CAST: Same as with `pattern`, we don't know the exact type of `svm_ptr`, but we do - // know it's fine if we choose the same type here. The application might reasonably - // give us uninitialized memory though, so cast to a `MaybeUninit`, which has - // the same layout as `Pattern`. - let svm_ptr = svm_ptr.cast::>(); - - // SAFETY: It is the calling application's responsibility to ensure that `svm_ptr` is - // valid for reads and writes up to `size` bytes. - // Since `pattern_size == mem::size_of::()` and `MaybeUninit` has the - // same layout as `Pattern`, we know that - // `size / pattern_size * mem::size_of>` equals `size`. - // - // Since we're creating a `&[MaybeUninit]` the initialization status does not - // matter. - // - // From here on out we only access the referenced memory though this slice. In - // particular, since we've made a copy of `pattern`, it doesn't matter if the memory - // region referenced by `pattern` aliases the one referenced by this slice. It is up to - // the application not to access it at all until this command has been completed. - let svm_slice = unsafe { cl_slice::from_raw_parts_mut(svm_ptr, size / pattern_size)? }; - - Box::new(move |_, _| { - for x in svm_slice { - x.write(pattern); - } - - Ok(()) - }) + Box::new(move |q, ctx| q.context.clear_svm(ctx, svm_ptr, size, pattern.0)) }}; } @@ -2817,7 +2790,7 @@ fn enqueue_svm_map_impl( let block = check_cl_bool(blocking_map).ok_or(CL_INVALID_VALUE)?; // CL_INVALID_OPERATION if the device associated with command queue does not support SVM. - if !q.device.svm_supported() { + if !q.device.api_svm_supported() { return Err(CL_INVALID_OPERATION); } @@ -2834,7 +2807,15 @@ fn enqueue_svm_map_impl( // ... or if values specified in map_flags are not valid. validate_map_flags_common(flags)?; - create_and_queue(q, cmd_type, evs, event, block, Box::new(|_, _| Ok(()))) + let svm_ptr = svm_ptr as usize; + create_and_queue( + q, + cmd_type, + evs, + event, + block, + Box::new(move |q, ctx| q.context.copy_svm_to_host(ctx, svm_ptr, flags)), + ) } #[cl_entrypoint(clEnqueueSVMMap)] @@ -2897,7 +2878,7 @@ fn enqueue_svm_unmap_impl( let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?; // CL_INVALID_OPERATION if the device associated with command queue does not support SVM. - if !q.device.svm_supported() { + if !q.device.api_svm_supported() { return Err(CL_INVALID_OPERATION); } @@ -2906,7 +2887,15 @@ fn enqueue_svm_unmap_impl( return Err(CL_INVALID_VALUE); } - create_and_queue(q, cmd_type, evs, event, false, Box::new(|_, _| Ok(()))) + create_and_queue( + q, + cmd_type, + evs, + event, + false, + // TODO: we _could_ migrate the content somewhere, but it's really pointless to do + Box::new(move |_, _| Ok(())), + ) } #[cl_entrypoint(clEnqueueSVMUnmap)] @@ -2960,7 +2949,7 @@ fn enqueue_svm_migrate_mem( let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?; // CL_INVALID_OPERATION if the device associated with command queue does not support SVM. - if !q.device.svm_supported() { + if !q.device.api_svm_supported() { return Err(CL_INVALID_OPERATION); } @@ -3010,9 +2999,9 @@ fn enqueue_svm_migrate_mem( evs, event, false, - Box::new(move |_, ctx| { - ctx.svm_migrate(&svm_pointers, &sizes, to_device, content_undefined); - Ok(()) + Box::new(move |q, ctx| { + q.context + .migrate_svm(ctx, svm_pointers, sizes, to_device, content_undefined) }), ) } diff --git a/src/gallium/frontends/rusticl/core/context.rs b/src/gallium/frontends/rusticl/core/context.rs index ee201ef3485..73efd2238fa 100644 --- a/src/gallium/frontends/rusticl/core/context.rs +++ b/src/gallium/frontends/rusticl/core/context.rs @@ -1,13 +1,15 @@ use crate::api::icd::*; use crate::api::types::DeleteContextCB; -use crate::api::util::checked_compare; +use crate::api::util::bit_check; use crate::core::device::*; use crate::core::format::*; use crate::core::gl::*; use crate::core::memory::*; +use crate::core::queue::*; use crate::core::util::*; use crate::impl_cl_type_trait; +use mesa_rust::pipe::context::RWFlags; use mesa_rust::pipe::resource::*; use mesa_rust::pipe::screen::ResourceType; use mesa_rust_gen::*; @@ -19,15 +21,21 @@ use rusticl_opencl_gen::*; use std::alloc; use std::alloc::Layout; -use std::cmp::Ordering; +use std::cmp; use std::collections::HashMap; use std::convert::TryInto; +use std::ffi::c_int; use std::mem; +use std::num::NonZeroU64; use std::os::raw::c_void; +use std::ptr; +use std::slice; use std::sync::Arc; use std::sync::Mutex; use std::sync::Weak; +use super::platform::Platform; + struct TrackedBDAAlloc { buffer: Weak, size: cl_mem_device_address_ext, @@ -39,6 +47,53 @@ impl AllocSize for TrackedBDAAlloc { } } +struct SVMAlloc { + layout: Layout, + vma: Option, + alloc: Arc, +} + +impl SVMAlloc { + pub fn size(&self) -> usize { + self.layout.size() + } +} + +impl Drop for SVMAlloc { + fn drop(&mut self) { + if let Some(vma) = self.vma { + let address = vma.get() as usize as *mut c_void; + unsafe { + debug_assert_eq!(0, munmap(address, self.size())); + } + + Platform::get() + .vm + .as_ref() + .unwrap() + .lock() + .unwrap() + .free(vma, NonZeroU64::new(self.size() as u64).unwrap()); + } else { + // SAFETY: we make sure that svm_pointer is a valid allocation and reuse the same layout + // from the allocation + unsafe { + alloc::dealloc(self.alloc.host_ptr().cast(), self.layout); + } + } + } +} + +impl AllocSize for SVMAlloc { + fn size(&self) -> usize { + SVMAlloc::size(self) + } +} + +struct SVMContext { + svm_ptrs: TrackedPointers, +} + pub struct Context { pub base: CLObjectBase, pub devs: Vec<&'static Device>, @@ -48,7 +103,7 @@ pub struct Context { bda_ptrs: Mutex< HashMap<&'static Device, TrackedPointers>, >, - svm_ptrs: Mutex>, + svm: Mutex, pub gl_ctx_manager: Option, } @@ -66,7 +121,9 @@ impl Context { properties: properties, dtors: Mutex::new(Vec::new()), bda_ptrs: Mutex::new(HashMap::new()), - svm_ptrs: Mutex::new(TrackedPointers::new()), + svm: Mutex::new(SVMContext { + svm_ptrs: TrackedPointers::new(), + }), gl_ctx_manager: gl_ctx_manager, }) } @@ -207,13 +264,21 @@ impl Context { } pub fn has_svm_devs(&self) -> bool { - self.devs.iter().any(|dev| dev.svm_supported()) + self.devs.iter().any(|dev| dev.api_svm_supported()) } - pub fn alloc_svm_ptr(&self, size: usize, alignment: usize) -> CLResult<*mut c_void> { + pub fn alloc_svm_ptr( + &self, + size: NonZeroU64, + mut alignment: NonZeroU64, + ) -> CLResult<*mut c_void> { + // TODO: choose better alignment in regards to huge pages + alignment = cmp::max(alignment, NonZeroU64::new(0x1000).unwrap()); + // clSVMAlloc will fail if alignment is not a power of two. // `from_size_align()` verifies this condition is met. - let layout = Layout::from_size_align(size, alignment).or(Err(CL_INVALID_VALUE))?; + let layout = Layout::from_size_align(size.get() as usize, alignment.get() as usize) + .or(Err(CL_INVALID_VALUE))?; // clSVMAlloc will fail if size is 0 or > CL_DEVICE_MAX_MEM_ALLOC_SIZE value // for any device in context. @@ -222,38 +287,252 @@ impl Context { // `from_size_align()` ensures that the allocation will fit in host memory, // the maximum allocation may be smaller due to limitations from gallium or // devices. - let size_aligned = layout.pad_to_align().size(); - if size == 0 || checked_compare(size_aligned, Ordering::Greater, self.max_mem_alloc()) { - return Err(CL_INVALID_VALUE); - } + // let size_aligned = layout.pad_to_align().size(); - // SAFETY: `size` is verified to be non-zero and the returned pointer is not - // expected to point to initialized memory. - let ptr = unsafe { alloc::alloc(layout) }; + // allocate a vma if one of the devices doesn't support system SVM + let vma = if let Some(vm) = &Platform::get().vm { + Some( + vm.lock() + .unwrap() + .alloc(size, alignment) + .ok_or(CL_OUT_OF_RESOURCES)?, + ) + } else { + None + }; + + let ptr: *mut c_void = if let Some(vma) = &vma { + let res = unsafe { + mmap( + vma.get() as usize as *mut c_void, + size.get() as usize, + (PROT_READ | PROT_WRITE) as c_int, + // MAP_FIXED_NOREPLACE needs 4.17 + (MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE | MAP_NORESERVE) as c_int, + -1, + 0, + ) + }; + + // mmap returns MAP_FAILED on error which is -1 + if res as usize == usize::MAX { + return Err(CL_OUT_OF_HOST_MEMORY); + } + + res.cast() + } else { + unsafe { alloc::alloc(layout) }.cast() + }; if ptr.is_null() { - Err(CL_OUT_OF_HOST_MEMORY) - } else { - Ok(ptr.cast()) + return Err(CL_OUT_OF_HOST_MEMORY); } + + let address = ptr as u64; + let mut buffers = HashMap::new(); + for &dev in &self.devs { + let size: u32 = size.get().try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?; + + // For system SVM devices we simply create a userptr resource. + let res = if dev.system_svm_supported() { + dev.screen() + .resource_create_buffer_from_user(size, ptr, PIPE_BIND_GLOBAL, 0) + } else { + dev.screen().resource_create_buffer( + size, + ResourceType::Normal, + PIPE_BIND_GLOBAL, + PIPE_RESOURCE_FLAG_FRONTEND_VM, + ) + }; + + let res = res.ok_or(CL_OUT_OF_RESOURCES)?; + if !dev.system_svm_supported() { + if !dev.screen().resource_assign_vma(&res, address) { + return Err(CL_OUT_OF_RESOURCES); + } + } + + buffers.insert(dev, Arc::new(res)); + } + + self.svm.lock().unwrap().svm_ptrs.insert( + ptr as usize, + SVMAlloc { + layout: layout, + vma, + alloc: Arc::new(Allocation::new(buffers, 0, ptr)), + }, + ); + + Ok(ptr) } - pub fn find_svm_alloc(&self, ptr: usize) -> Option<(*const c_void, usize)> { - self.svm_ptrs + pub fn copy_svm_to_dev( + &self, + ctx: &QueueContext, + ptr: usize, + ) -> CLResult>> { + let svm = self.svm.lock().unwrap(); + + let Some(alloc) = svm.svm_ptrs.find_alloc_precise(ptr) else { + return Ok(None); + }; + + Ok(Some(Arc::clone( + alloc.alloc.get_res_for_access(ctx, RWFlags::RW)?, + ))) + } + + pub fn copy_svm_to_host( + &self, + ctx: &QueueContext, + svm_ptr: usize, + flags: cl_map_flags, + ) -> CLResult<()> { + // no need to copy + if bit_check(flags, CL_MAP_WRITE_INVALIDATE_REGION) { + return Ok(()); + } + + let svm = self.svm.lock().unwrap(); + let Some((_, alloc)) = svm.svm_ptrs.find_alloc(svm_ptr) else { + return Ok(()); + }; + + alloc.alloc.migrate_to_hostptr(ctx, RWFlags::RW) + } + + pub fn copy_svm( + &self, + ctx: &QueueContext, + src_addr: usize, + dst_addr: usize, + size: usize, + ) -> CLResult<()> { + let svm = self.svm.lock().unwrap(); + let src = svm.svm_ptrs.find_alloc(src_addr); + let dst = svm.svm_ptrs.find_alloc(dst_addr); + + #[allow(clippy::collapsible_else_if)] + if let Some((src_base, src_alloc)) = src { + let src_res = src_alloc.alloc.get_res_for_access(ctx, RWFlags::RD)?; + let src_offset = src_addr - src_base; + + if let Some((dst_base, dst_alloc)) = dst { + let dst_res = dst_alloc.alloc.get_res_for_access(ctx, RWFlags::WR)?; + let dst_offset = dst_addr - dst_base; + + ctx.resource_copy_buffer( + src_res, + src_offset as i32, + dst_res, + dst_offset as u32, + size as i32, + ); + } else { + let map = ctx + .buffer_map(src_res, src_offset as i32, size as i32, RWFlags::RD) + .ok_or(CL_OUT_OF_HOST_MEMORY)?; + unsafe { + ptr::copy_nonoverlapping(map.ptr(), dst_addr as *mut c_void, size); + } + } + } else { + if let Some((dst_base, dst_alloc)) = dst { + let dst_res = dst_alloc.alloc.get_res_for_access(ctx, RWFlags::WR)?; + let dst_offset = dst_addr - dst_base; + + ctx.buffer_subdata( + dst_res, + dst_offset as u32, + src_addr as *const c_void, + size as u32, + ); + } else { + unsafe { + ptr::copy(src_addr as *const c_void, dst_addr as *mut c_void, size); + } + } + } + + Ok(()) + } + + pub fn clear_svm( + &self, + ctx: &QueueContext, + svm_ptr: usize, + size: usize, + pattern: [u8; T], + ) -> CLResult<()> { + let svm = self.svm.lock().unwrap(); + + if let Some((base, alloc)) = svm.svm_ptrs.find_alloc(svm_ptr) { + let res = alloc.alloc.get_res_for_access(ctx, RWFlags::WR)?; + let offset = svm_ptr - base; + ctx.clear_buffer(res, &pattern, offset as u32, size as u32); + } else { + let slice = unsafe { + slice::from_raw_parts_mut(svm_ptr as *mut _, size / mem::size_of_val(&pattern)) + }; + + slice.fill(pattern); + } + + Ok(()) + } + + pub fn migrate_svm( + &self, + ctx: &QueueContext, + pointers: Vec, + sizes: Vec, + to_device: bool, + content_undefined: bool, + ) -> CLResult<()> { + let svm = self.svm.lock().unwrap(); + + if ctx.dev.system_svm_supported() { + ctx.svm_migrate(&pointers, &sizes, to_device, content_undefined); + } else { + for ptr in pointers { + let Some((_, alloc)) = svm.svm_ptrs.find_alloc(ptr) else { + continue; + }; + + // we assume it's only read, so it remains valid on the host until future commands + // have different needs. + if to_device { + alloc.alloc.get_res_for_access(ctx, RWFlags::RD)?; + } else { + alloc.alloc.migrate_to_hostptr(ctx, RWFlags::RD)?; + } + } + } + Ok(()) + } + + pub fn get_svm_alloc(&self, ptr: usize) -> Option<(*mut c_void, Arc)> { + self.svm .lock() .unwrap() + .svm_ptrs .find_alloc(ptr) - .map(|(ptr, layout)| (ptr as *const c_void, layout.size())) + .map(|(base, alloc)| (base as *mut c_void, Arc::clone(&alloc.alloc))) + } + + pub fn find_svm_alloc(&self, ptr: usize) -> Option<(*mut c_void, usize)> { + self.svm + .lock() + .unwrap() + .svm_ptrs + .find_alloc(ptr) + .map(|(ptr, alloc)| (ptr as _, alloc.size())) } pub fn remove_svm_ptr(&self, ptr: usize) { - if let Some(layout) = self.svm_ptrs.lock().unwrap().remove(ptr) { - // SAFETY: we make sure that svm_pointer is a valid allocation and reuse the same layout - // from the allocation - unsafe { - alloc::dealloc(ptr as *mut u8, layout); - } - } + self.svm.lock().unwrap().svm_ptrs.remove(ptr); } pub fn add_bda_ptr(&self, buffer: &Arc) { diff --git a/src/gallium/frontends/rusticl/core/device.rs b/src/gallium/frontends/rusticl/core/device.rs index 6f6bd77a3ca..5fb1b48ed0f 100644 --- a/src/gallium/frontends/rusticl/core/device.rs +++ b/src/gallium/frontends/rusticl/core/device.rs @@ -27,6 +27,7 @@ use std::convert::TryInto; use std::env; use std::ffi::CStr; use std::mem::transmute; +use std::num::NonZeroU64; use std::os::raw::*; use std::sync::Arc; use std::sync::Mutex; @@ -104,6 +105,13 @@ pub trait HelperContextWrapper { where F: Fn(&HelperContext); + fn buffer_map( + &self, + res: &PipeResource, + offset: i32, + size: i32, + rw: RWFlags, + ) -> Option; fn create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void; fn delete_compute_state(&self, cso: *mut c_void); fn compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info; @@ -165,6 +173,16 @@ impl HelperContextWrapper for HelperContext<'_> { self.lock.flush() } + fn buffer_map( + &self, + res: &PipeResource, + offset: i32, + size: i32, + rw: RWFlags, + ) -> Option { + self.lock.buffer_map(res, offset, size, rw) + } + fn create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void { self.lock.create_compute_state(nir, static_local_mem) } @@ -1157,10 +1175,33 @@ impl Device { && (subgroup_sizes == 1 || (subgroup_sizes > 1 && self.shareable_shaders())) } - pub fn svm_supported(&self) -> bool { + pub fn system_svm_supported(&self) -> bool { self.screen.caps().system_svm } + pub fn svm_supported(&self) -> bool { + if cfg!(not(target_pointer_width = "64")) { + return false; + } + + self.system_svm_supported() || self.screen().is_vm_supported() + } + + /// Checks if the device supports SVM _and_ that we were able to initialize SVM support on a + /// platform level. + pub fn api_svm_supported(&self) -> bool { + self.system_svm_supported() + || (self.screen().is_vm_supported() && Platform::get().vm.is_some()) + } + + // returns (start, end) + pub fn vm_alloc_range(&self) -> Option<(NonZeroU64, NonZeroU64)> { + let min = self.screen.caps().min_vma; + let max = self.screen.caps().max_vma; + + Some((NonZeroU64::new(min)?, NonZeroU64::new(max)?)) + } + pub fn unified_memory(&self) -> bool { self.screen.caps().uma } diff --git a/src/gallium/frontends/rusticl/core/kernel.rs b/src/gallium/frontends/rusticl/core/kernel.rs index 7a161e8064c..c55ca5bde8c 100644 --- a/src/gallium/frontends/rusticl/core/kernel.rs +++ b/src/gallium/frontends/rusticl/core/kernel.rs @@ -524,6 +524,7 @@ pub struct Kernel { pub name: String, values: Mutex>>, pub bdas: Mutex>, + pub svms: Mutex>, builds: HashMap<&'static Device, Arc>, pub kernel_info: Arc, } @@ -1246,6 +1247,7 @@ impl Kernel { name: name, values: Mutex::new(values), bdas: Mutex::new(Vec::new()), + svms: Mutex::new(HashSet::new()), builds: builds, kernel_info: kernel_info, }) @@ -1323,6 +1325,7 @@ impl Kernel { let arg_values = self.arg_values().clone(); let nir_kernel_builds = Arc::clone(&self.builds[q.device]); let mut bdas = self.bdas.lock().unwrap().clone(); + let svms = self.svms.lock().unwrap().clone(); let mut buffer_arcs = HashMap::new(); let mut image_arcs = HashMap::new(); @@ -1442,6 +1445,12 @@ impl Kernel { printf_buf = Some(buf); } + // translate SVM pointers to their base first + let mut svms: HashSet<_> = svms + .into_iter() + .filter_map(|svm_pointer| Some(q.context.find_svm_alloc(svm_pointer)?.0 as usize)) + .collect(); + for arg in &nir_kernel_build.compiled_args { let is_opaque = if let CompiledKernelArgType::APIArg(idx) = arg.kind { kernel_info.args[idx].kind.is_opaque() @@ -1485,6 +1494,9 @@ impl Kernel { if let Some(address) = buffer.dev_address(ctx.dev) { let _ = buffer.get_res_for_access(ctx, rw)?; bdas.push(address.get()); + } else if buffer.is_svm() { + let _ = buffer.get_res_for_access(ctx, rw)?; + svms.insert(buffer.host_ptr() as usize); } } else { let res = buffer.get_res_for_access(ctx, rw)?; @@ -1498,6 +1510,11 @@ impl Kernel { } } &KernelArgValue::SVM(handle) => { + // get the base address so we deduplicate properly + if let Some((base, _)) = q.context.find_svm_alloc(handle) { + svms.insert(base as usize); + } + if !api_arg.dead { add_pointer(q, &mut input, handle as u64); } @@ -1603,11 +1620,21 @@ impl Kernel { .filter_map(|address| q.context.find_bda_alloc(q.device, address)) .collect::>(); - let bdas: Vec<_> = bdas + let mut bdas: Vec<_> = bdas .iter() .map(|buffer| Ok(buffer.get_res_for_access(ctx, RWFlags::RW)?.deref())) .collect::>()?; + let svms_new = svms + .into_iter() + .filter_map(|svm| q.context.copy_svm_to_dev(ctx, svm).transpose()) + .collect::>>()?; + + // uhhh + for svm in &svms_new { + bdas.push(svm); + } + // subtract the shader local_size as we only request something on top of that. variable_local_size -= static_local_size; @@ -1819,7 +1846,7 @@ impl Kernel { } pub fn has_svm_devs(&self) -> bool { - self.prog.devs.iter().any(|dev| dev.svm_supported()) + self.prog.devs.iter().any(|dev| dev.api_svm_supported()) } pub fn subgroup_sizes(&self, dev: &Device) -> impl ExactSizeIterator { @@ -1876,6 +1903,7 @@ impl Clone for Kernel { name: self.name.clone(), values: Mutex::new(self.arg_values().clone()), bdas: Mutex::new(self.bdas.lock().unwrap().clone()), + svms: Mutex::new(self.svms.lock().unwrap().clone()), builds: self.builds.clone(), kernel_info: Arc::clone(&self.kernel_info), } diff --git a/src/gallium/frontends/rusticl/core/memory.rs b/src/gallium/frontends/rusticl/core/memory.rs index 98ae5b9e722..33c96d548f8 100644 --- a/src/gallium/frontends/rusticl/core/memory.rs +++ b/src/gallium/frontends/rusticl/core/memory.rs @@ -398,11 +398,17 @@ pub struct SubAllocation { offset: usize, } +pub struct SvmAllocation { + alloc: Arc, + offset: usize, +} + /// Abstraction over the memory allocation. It might be a real GPU backing storage or simply a sub /// allocation over an existing memory object. -enum Allocation { +pub enum Allocation { Resource(ResourceAllocation), SubAlloc(SubAllocation), + Svm(SvmAllocation), } // TODO: - Once it's used for more stuff might make sense to split it into an Image and Buffer @@ -455,6 +461,14 @@ impl Allocation { }) } + fn new_svm(alloc: Arc, offset: usize) -> Self { + Self::Svm(SvmAllocation { + // we precalculate the entire offset here. + offset: alloc.offset() + offset, + alloc: alloc, + }) + } + /// Returns true if the backing storage of the two objects is equal. fn backing_resource_eq(&self, other: &Self) -> bool { ptr::eq(self.get_real_resource(), other.get_real_resource()) @@ -465,24 +479,26 @@ impl Allocation { match self { Allocation::SubAlloc(sub) => sub.mem.alloc.get_real_resource(), Allocation::Resource(res) => res, + Allocation::Svm(svm) => svm.alloc.get_real_resource(), } } /// Returns the resource associated with `dev` without any data migration. - fn get_res_of_dev(&self, dev: &Device) -> CLResult<&Arc> { - self.get_real_resource() - .res - .get(dev) - .ok_or(CL_OUT_OF_HOST_MEMORY) + fn get_res_of_dev(&self, dev: &Device) -> Option<&Arc> { + self.get_real_resource().res.get(dev) } /// Returns the resource associated with `ctx.dev` and transparently migrate the data. - fn get_res_for_access(&self, ctx: &QueueContext, rw: RWFlags) -> CLResult<&Arc> { + pub fn get_res_for_access( + &self, + ctx: &QueueContext, + rw: RWFlags, + ) -> CLResult<&Arc> { self.get_real_resource().get_res_for_access(ctx, rw) } /// Migrates the content to the host. Fails if there is no host ptr. - pub fn _migrate_to_hostptr(&self, ctx: &QueueContext, rw: RWFlags) -> CLResult<()> { + pub fn migrate_to_hostptr(&self, ctx: &QueueContext, rw: RWFlags) -> CLResult<()> { self.get_real_resource().migrate_to_hostptr(ctx, rw) } @@ -497,14 +513,20 @@ impl Allocation { host_ptr as _ } - fn is_user_alloc_for_dev(&self, dev: &Device) -> CLResult { - Ok(self.get_res_of_dev(dev)?.is_user()) + fn is_user_alloc_for_dev(&self, dev: &Device) -> bool { + if let Some(res) = self.get_res_of_dev(dev) { + res.is_user() + } else { + // for SVM allocations there might not even be a real resource + dev.system_svm_supported() + } } fn offset(&self) -> usize { match self { Allocation::Resource(res) => res.offset, Allocation::SubAlloc(sub) => sub.offset, + Allocation::Svm(svm) => svm.offset, } } } @@ -773,39 +795,71 @@ impl MemBase { .copied() == Some(CL_TRUE.into()); - let res_type = if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) { - ResourceType::Staging + // if it's a SVM host ptr, we just use the already allocated resource if it exists, because + // this is actually mandated by the spec. The size requirement will be checked inside the + // API layer. + // + // From the OpenCL spec: + // If clCreateBuffer or clCreateBufferWithProperties is called with a pointer returned + // clSVMAlloc as its host_ptr argument, and CL_MEM_USE_HOST_PTR is set in its flags + // argument, clCreateBuffer or clCreateBufferWithProperties will succeed and return + // valid non-zero buffer object as long as the size argument is no larger than the size + // argument passed in the original clSVMAlloc call. The new buffer object returned has the + // shared memory as the underlying storage. + let svm = bit_check(flags, CL_MEM_USE_HOST_PTR) + .then(|| context.get_svm_alloc(host_ptr as usize)) + .flatten(); + + let alloc = if let Some((svm_ptr, ref svm_alloc)) = svm { + // SAFETY: svm_ptr is the base of the allocation host_ptr points into. + let offset = unsafe { host_ptr.byte_offset_from(svm_ptr) } as usize; + Allocation::new_svm(Arc::clone(svm_alloc), offset) } else { - ResourceType::Normal + let res_type = if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) { + ResourceType::Staging + } else { + ResourceType::Normal + }; + + let buffer = context.create_buffer( + size, + host_ptr, + bit_check(flags, CL_MEM_COPY_HOST_PTR), + bda, + res_type, + )?; + + // We can only keep the host_ptr when `CL_MEM_USE_HOST_PTR` is set. + if !bit_check(flags, CL_MEM_USE_HOST_PTR) { + host_ptr = ptr::null_mut() + } + + Allocation::new(buffer, 0, host_ptr) }; - let buffer = context.create_buffer( - size, - host_ptr, - bit_check(flags, CL_MEM_COPY_HOST_PTR), - bda, - res_type, - )?; - - // We can only keep the host_ptr when `CL_MEM_USE_HOST_PTR` is set. - if !bit_check(flags, CL_MEM_USE_HOST_PTR) { - host_ptr = ptr::null_mut() - } - let addresses = bda.then(|| { context .devs .iter() .filter(|dev| dev.bda_supported()) .map(|&dev| { - let address = buffer[dev].resource_get_address(); + // If the buffer is backed by an SVM allocation, we need to use its address. + let address = if let Some((address, _)) = svm { + NonZeroU64::new(address as usize as u64) + } else if let Some(res) = alloc.get_res_of_dev(dev) { + res.resource_get_address() + } else { + // if there is no resource, it's a system SVM allocation + assert!(dev.system_svm_supported()); + NonZeroU64::new(alloc.host_ptr() as u64) + }; + Some((dev, address?)) }) .collect::>() .unwrap() }); - let alloc = Allocation::new(buffer, 0, host_ptr); let buffer = Arc::new(Buffer { base: Self { base: CLObjectBase::new(RusticlTypes::Buffer), @@ -1102,6 +1156,9 @@ impl MemBase { match &self.alloc { Allocation::SubAlloc(sub) => Some(&sub.mem), Allocation::Resource(_) => None, + // In theory the SVM allocation is the parent, but that's not a memory object on the API + // level. + Allocation::Svm(_) => None, } } @@ -1109,10 +1166,10 @@ impl MemBase { self.alloc.host_ptr() } - fn is_pure_user_memory(&self, d: &Device) -> CLResult { + fn is_pure_user_memory(&self, d: &Device) -> bool { // 1Dbuffer objects are weird. The parent memory object can be a host_ptr thing, but we are // not allowed to actually return a pointer based on the host_ptr when mapping. - Ok(self.alloc.is_user_alloc_for_dev(d)? && !self.host_ptr().is_null()) + self.alloc.is_user_alloc_for_dev(d) && !self.host_ptr().is_null() } fn map( @@ -1430,7 +1487,7 @@ impl Buffer { // in this case we only need to migrate to the device if the data is located on a device not // having a userptr allocation. - if self.is_pure_user_memory(ctx.dev)? { + if self.is_pure_user_memory(ctx.dev) { let rw = if mapping.writes { RWFlags::RW } else { @@ -1446,7 +1503,7 @@ impl Buffer { pub fn sync_unmap(&self, ctx: &QueueContext, ptr: MutMemoryPtr) -> CLResult<()> { // no need to update - if self.is_pure_user_memory(ctx.dev)? { + if self.is_pure_user_memory(ctx.dev) { return Ok(()); } @@ -1904,7 +1961,7 @@ impl Image { // in this case we only need to migrate to the device if the data is located on a device not // having a userptr allocation. - if self.is_pure_user_memory(ctx.dev)? { + if self.is_pure_user_memory(ctx.dev) { let rw = if mapping.writes { RWFlags::RW } else { @@ -1930,7 +1987,7 @@ impl Image { pub fn sync_unmap(&self, ctx: &QueueContext, ptr: MutMemoryPtr) -> CLResult<()> { // no need to update - if self.is_pure_user_memory(ctx.dev)? { + if self.is_pure_user_memory(ctx.dev) { return Ok(()); } diff --git a/src/gallium/frontends/rusticl/core/platform.rs b/src/gallium/frontends/rusticl/core/platform.rs index 0210ff6938c..190090ae907 100644 --- a/src/gallium/frontends/rusticl/core/platform.rs +++ b/src/gallium/frontends/rusticl/core/platform.rs @@ -3,25 +3,47 @@ use crate::api::icd::DISPATCH; use crate::core::device::*; use crate::core::version::*; +use mesa_rust::pipe::screen::ScreenVMAllocation; +use mesa_rust::util::vm::VM; use mesa_rust_gen::*; use mesa_rust_util::string::char_arr_to_cstr; use rusticl_opencl_gen::*; +use std::cmp; use std::env; +use std::num::NonZeroU64; +use std::ops::Deref; use std::ptr; use std::ptr::addr_of; use std::ptr::addr_of_mut; +use std::sync::Mutex; use std::sync::Once; /// Maximum size a pixel can be across all supported image formats. pub const MAX_PIXEL_SIZE_BYTES: u64 = 4 * 4; +pub struct PlatformVM<'a> { + vm: Mutex, + // we make use of the drop to automatically free the reserved VM + _dev_allocs: Vec>, +} + +impl Deref for PlatformVM<'_> { + type Target = Mutex; + + fn deref(&self) -> &Self::Target { + &self.vm + } +} + #[repr(C)] pub struct Platform { dispatch: &'static cl_icd_dispatch, pub devs: Vec, pub extension_string: String, pub extensions: Vec, + // lifetime has to match the one of devs + pub vm: Option>, } pub enum PerfDebugLevel { @@ -57,6 +79,7 @@ static mut PLATFORM: Platform = Platform { devs: Vec::new(), extension_string: String::new(), extensions: Vec::new(), + vm: None, }; static mut PLATFORM_DBG: PlatformDebug = PlatformDebug { allow_invalid_spirv: false, @@ -139,13 +162,56 @@ impl Platform { unsafe { &*addr_of!(PLATFORM_FEATURES) } } - fn init(&mut self) { + fn alloc_vm(devs: &[Device]) -> Option> { + // We support buffer SVM only on 64 bit platforms + if cfg!(not(target_pointer_width = "64")) { + return None; + } + + // No need to check system SVM devices + let devs = devs.iter().filter(|dev| !dev.system_svm_supported()); + + let (start, end) = devs.clone().filter_map(|dev| dev.vm_alloc_range()).reduce( + |(min_a, max_a), (min_b, max_b)| (cmp::max(min_a, min_b), cmp::min(max_a, max_b)), + )?; + + // Allocate 1/8 of the available VM. No specific reason for this limit. Might have to bump + // this later, but it's probably fine as there is plenty of VM available. + let size = NonZeroU64::new((end.get() / 8).next_power_of_two())?; + if start > size { + return None; + } + + let mut allocs = Vec::new(); + for dev in devs { + allocs.push(dev.screen().alloc_vm(size, size)?); + } + + Some(PlatformVM { + vm: Mutex::new(VM::new(size, size)), + _dev_allocs: allocs, + }) + } + + fn init(&'static mut self) { unsafe { glsl_type_singleton_init_or_ref(); } self.devs = Device::all(); + self.vm = Self::alloc_vm(&self.devs); + + if self + .devs + .iter() + .any(|dev| !dev.system_svm_supported() && dev.svm_supported()) + && self.vm.is_none() + { + // TODO: in theory we should also remove the exposed SVM extension, but... + eprintln!("rusticl: could not initialize SVM support"); + } + let mut exts_str: Vec<&str> = Vec::new(); let mut add_ext = |major, minor, patch, ext: &'static str| { self.extensions diff --git a/src/gallium/frontends/rusticl/mesa/pipe/resource.rs b/src/gallium/frontends/rusticl/mesa/pipe/resource.rs index 592efd152ac..1482b6e08ff 100644 --- a/src/gallium/frontends/rusticl/mesa/pipe/resource.rs +++ b/src/gallium/frontends/rusticl/mesa/pipe/resource.rs @@ -282,7 +282,18 @@ impl PipeResource { impl Drop for PipeResource { fn drop(&mut self) { - unsafe { pipe_resource_reference(&mut self.pipe.as_ptr(), ptr::null_mut()) } + unsafe { + let pipe = self.pipe.as_ref(); + let screen = pipe.screen.as_ref().unwrap(); + + if pipe.flags & PIPE_RESOURCE_FLAG_FRONTEND_VM != 0 { + if let Some(resource_assign_vma) = screen.resource_assign_vma { + resource_assign_vma(pipe.screen, self.pipe(), 0); + } + } + + pipe_resource_reference(&mut self.pipe.as_ptr(), ptr::null_mut()); + } } } diff --git a/src/gallium/frontends/rusticl/meson.build b/src/gallium/frontends/rusticl/meson.build index 7c8ee0ad257..426b0a9b993 100644 --- a/src/gallium/frontends/rusticl/meson.build +++ b/src/gallium/frontends/rusticl/meson.build @@ -308,6 +308,12 @@ rusticl_mesa_bindings = rust.bindgen( '--allowlist-function', 'rusticl_.*', '--allowlist-function', 'std(err|out)_ptr', + # libc + '--allowlist-function', 'mmap', + '--allowlist-function', 'munmap', + '--allowlist-var', 'MAP_.*', + '--allowlist-var', 'PROT_.*', + # winsys '--allowlist-var', 'WINSYS_HANDLE_TYPE_.*', ], diff --git a/src/gallium/frontends/rusticl/rusticl_system_bindings.h b/src/gallium/frontends/rusticl/rusticl_system_bindings.h index 5856946ab15..b33865a5a04 100644 --- a/src/gallium/frontends/rusticl/rusticl_system_bindings.h +++ b/src/gallium/frontends/rusticl/rusticl_system_bindings.h @@ -1,4 +1,5 @@ #include +#include FILE *stdout_ptr(void); FILE *stderr_ptr(void); diff --git a/src/gallium/frontends/rusticl/util/ptr.rs b/src/gallium/frontends/rusticl/util/ptr.rs index f14fdec1014..4d2f24ff415 100644 --- a/src/gallium/frontends/rusticl/util/ptr.rs +++ b/src/gallium/frontends/rusticl/util/ptr.rs @@ -1,6 +1,9 @@ use std::{ alloc::Layout, - collections::{btree_map::Entry, BTreeMap}, + collections::{ + btree_map::{Entry, Values, ValuesMut}, + BTreeMap, + }, hash::{Hash, Hasher}, mem, ops::{Add, Deref}, @@ -175,6 +178,14 @@ impl> TrackedPointers { ptrs: BTreeMap::new(), } } + + pub fn values(&self) -> Values<'_, P, T> { + self.ptrs.values() + } + + pub fn values_mut(&mut self) -> ValuesMut<'_, P, T> { + self.ptrs.values_mut() + } } impl> TrackedPointers @@ -201,6 +212,18 @@ where None } + pub fn find_alloc_mut(&mut self, ptr: P) -> Option<(P, &mut T)> { + if let Some((&base, val)) = self.ptrs.range_mut(..=ptr).next_back() { + let size = val.size(); + // we check if ptr is within [base..base+size) + // means we can check if ptr - (base + size) < 0 + if ptr < (base + size) { + return Some((base, val)); + } + } + None + } + pub fn find_alloc_precise(&self, ptr: P) -> Option<&T> { self.ptrs.get(&ptr) }