mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 13:10:10 +01:00
rusticl: add support for coarse-grain buffer SVM
Reviewed-by: Adam Jackson <ajax@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32942>
This commit is contained in:
parent
6e13e438d1
commit
da4de8d7e3
14 changed files with 685 additions and 157 deletions
|
|
@ -756,7 +756,7 @@ Rusticl OpenCL 1.2 -- all DONE:
|
|||
|
||||
Rusticl OpenCL 2.0 -- all DONE:
|
||||
|
||||
Shared virtual memory in progress (nvc0, llvmpipe)
|
||||
Shared virtual memory DONE (nvc0, llvmpipe)
|
||||
Device queues not started
|
||||
- cl_khr_create_command_queue DONE
|
||||
- Additional queries for clGetDeviceInfo DONE
|
||||
|
|
|
|||
|
|
@ -28,3 +28,4 @@ VK_EXT_texel_buffer_alignment on panvk
|
|||
cl_khr_kernel_clock on freedreno, iris, llvmpipe, nvc0, panfrost, radeonsi and zink with llvm-19 or newer
|
||||
GL_KHR_texture_compression_astc_hdr on panfrost and asahi
|
||||
cl_ext_buffer_device_address on llvmpipe and zink
|
||||
Completed OpenCL 2.0 coarse grain buffer SVM support
|
||||
|
|
|
|||
|
|
@ -288,16 +288,17 @@ unsafe impl CLInfo<cl_device_info> for cl_device_id {
|
|||
}
|
||||
}
|
||||
CL_DEVICE_SVM_CAPABILITIES | CL_DEVICE_SVM_CAPABILITIES_ARM => {
|
||||
v.write::<cl_device_svm_capabilities>(
|
||||
if dev.svm_supported() {
|
||||
CL_DEVICE_SVM_COARSE_GRAIN_BUFFER
|
||||
| CL_DEVICE_SVM_FINE_GRAIN_BUFFER
|
||||
| CL_DEVICE_SVM_FINE_GRAIN_SYSTEM
|
||||
} else {
|
||||
0
|
||||
}
|
||||
.into(),
|
||||
)
|
||||
let mut caps = 0;
|
||||
|
||||
if dev.api_svm_supported() {
|
||||
caps |= CL_DEVICE_SVM_COARSE_GRAIN_BUFFER;
|
||||
}
|
||||
|
||||
if dev.system_svm_supported() {
|
||||
caps |= CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM;
|
||||
}
|
||||
|
||||
v.write::<cl_device_svm_capabilities>(caps.into())
|
||||
}
|
||||
CL_DEVICE_TYPE => {
|
||||
// CL_DEVICE_TYPE_DEFAULT ... will never be returned in CL_DEVICE_TYPE for any
|
||||
|
|
|
|||
|
|
@ -554,7 +554,7 @@ fn set_kernel_exec_info(
|
|||
// CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM if no devices in the context associated with kernel
|
||||
// support SVM.
|
||||
let check_svm_support = || {
|
||||
if devs.iter().all(|dev| !dev.svm_supported()) {
|
||||
if devs.iter().all(|dev| !dev.api_svm_supported()) {
|
||||
Err(CL_INVALID_OPERATION)
|
||||
} else {
|
||||
Ok(())
|
||||
|
|
@ -577,16 +577,36 @@ fn set_kernel_exec_info(
|
|||
CL_KERNEL_EXEC_INFO_SVM_PTRS | CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM => {
|
||||
check_svm_support()?;
|
||||
|
||||
// reuse the existing container so we avoid reallocations
|
||||
let mut svms = k.svms.lock().unwrap();
|
||||
|
||||
// To specify that no SVM allocations will be accessed by a kernel other than those set
|
||||
// as kernel arguments, specify an empty set by passing param_value_size equal to zero
|
||||
// and param_value equal to NULL.
|
||||
if !param_value.is_null() || param_value_size != 0 {
|
||||
let _ = unsafe {
|
||||
if param_value_size == 0 && param_value.is_null() {
|
||||
svms.clear();
|
||||
} else {
|
||||
let pointers = unsafe {
|
||||
cl_slice::from_raw_parts_bytes_len::<*const c_void>(
|
||||
param_value,
|
||||
param_value_size,
|
||||
)?
|
||||
};
|
||||
|
||||
// We need to clear _after_ the error checking above. We could just assign a new
|
||||
// container, however we also want to reuse the allocations.
|
||||
svms.clear();
|
||||
pointers
|
||||
.iter()
|
||||
// Each of the pointers can be the pointer returned by clSVMAlloc or can be a
|
||||
// pointer to the middle of an SVM allocation. It is sufficient to specify one
|
||||
// pointer for each SVM allocation.
|
||||
//
|
||||
// So we'll simply fetch the base and store that one.
|
||||
.filter_map(|&handle| k.prog.context.find_svm_alloc(handle as usize))
|
||||
.for_each(|(base, _)| {
|
||||
svms.insert(base as usize);
|
||||
});
|
||||
}
|
||||
}
|
||||
CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM
|
||||
|
|
@ -600,14 +620,19 @@ fn set_kernel_exec_info(
|
|||
if val.len() != 1 {
|
||||
return Err(CL_INVALID_VALUE);
|
||||
}
|
||||
|
||||
// CL_INVALID_OPERATION if param_name is CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM and
|
||||
// param_value is CL_TRUE but no devices in context associated with kernel support
|
||||
// fine-grain system SVM allocations.
|
||||
if val[0] == CL_TRUE && devs.iter().all(|dev| !dev.system_svm_supported()) {
|
||||
return Err(CL_INVALID_OPERATION);
|
||||
}
|
||||
}
|
||||
// CL_INVALID_VALUE if param_name is not valid
|
||||
_ => return Err(CL_INVALID_VALUE),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
// CL_INVALID_OPERATION if param_name is CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM and param_value is CL_TRUE but no devices in context associated with kernel support fine-grain system SVM allocations.
|
||||
}
|
||||
|
||||
#[cl_entrypoint(clEnqueueNDRangeKernel)]
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ use rusticl_proc_macros::cl_info_entrypoint;
|
|||
|
||||
use std::cmp;
|
||||
use std::cmp::Ordering;
|
||||
use std::mem::{self, MaybeUninit};
|
||||
use std::mem;
|
||||
use std::num::NonZeroU64;
|
||||
use std::os::raw::c_void;
|
||||
use std::ptr;
|
||||
|
|
@ -297,13 +297,13 @@ fn create_buffer_with_properties(
|
|||
|
||||
// or if CL_MEM_USE_HOST_PTR is set in flags and host_ptr is a pointer returned by clSVMAlloc
|
||||
// and size is greater than the size passed to clSVMAlloc.
|
||||
if let Some((svm_ptr, svm_layout)) = c.find_svm_alloc(host_ptr as usize) {
|
||||
if let Some((svm_ptr, alloc_size)) = c.find_svm_alloc(host_ptr as usize) {
|
||||
// SAFETY: they are part of the same allocation, and because host_ptr >= svm_ptr we can cast
|
||||
// to usize.
|
||||
let diff = unsafe { host_ptr.byte_offset_from(svm_ptr) } as usize;
|
||||
|
||||
// technically we don't have to account for the offset, but it's almost for free.
|
||||
if size > svm_layout - diff {
|
||||
if size > alloc_size - diff {
|
||||
return Err(CL_INVALID_BUFFER_SIZE);
|
||||
}
|
||||
}
|
||||
|
|
@ -2376,13 +2376,16 @@ pub fn svm_alloc(
|
|||
return Err(CL_INVALID_VALUE);
|
||||
}
|
||||
|
||||
let alignment = if alignment != 0 {
|
||||
alignment as usize
|
||||
} else {
|
||||
// When alignment is 0, the size of the largest supported type is used.
|
||||
// In the case of the full profile, that's `long16`.
|
||||
mem::size_of::<[u64; 16]>()
|
||||
};
|
||||
// When alignment is 0, the size of the largest supported type is used.
|
||||
// In the case of the full profile, that's `long16`.
|
||||
let alignment = NonZeroU64::new(alignment.into())
|
||||
.unwrap_or(NonZeroU64::new(mem::size_of::<[u64; 16]>() as u64).unwrap());
|
||||
|
||||
// size is 0 or > CL_DEVICE_MAX_MEM_ALLOC_SIZE value for any device in context.
|
||||
let size = NonZeroU64::new(size as u64).ok_or(CL_INVALID_VALUE)?;
|
||||
if size.get() > c.max_mem_alloc() {
|
||||
return Err(CL_INVALID_VALUE);
|
||||
}
|
||||
|
||||
c.alloc_svm_ptr(size, alignment)
|
||||
|
||||
|
|
@ -2422,7 +2425,7 @@ fn enqueue_svm_free_impl(
|
|||
}
|
||||
|
||||
// CL_INVALID_OPERATION if the device associated with command queue does not support SVM.
|
||||
if !q.device.svm_supported() {
|
||||
if !q.device.api_svm_supported() {
|
||||
return Err(CL_INVALID_OPERATION);
|
||||
}
|
||||
|
||||
|
|
@ -2529,47 +2532,33 @@ fn enqueue_svm_memcpy_impl(
|
|||
let block = check_cl_bool(blocking_copy).ok_or(CL_INVALID_VALUE)?;
|
||||
|
||||
// CL_INVALID_OPERATION if the device associated with command queue does not support SVM.
|
||||
if !q.device.svm_supported() {
|
||||
if !q.device.api_svm_supported() {
|
||||
return Err(CL_INVALID_OPERATION);
|
||||
}
|
||||
|
||||
// CL_INVALID_VALUE if dst_ptr or src_ptr is NULL.
|
||||
if src_ptr.is_null() || dst_ptr.is_null() {
|
||||
return Err(CL_INVALID_VALUE);
|
||||
}
|
||||
|
||||
let src_ptr = src_ptr as usize;
|
||||
let dst_ptr = dst_ptr as usize;
|
||||
|
||||
// CL_MEM_COPY_OVERLAP if the values specified for dst_ptr, src_ptr and size result in an
|
||||
// overlapping copy.
|
||||
let dst_ptr_addr = dst_ptr as usize;
|
||||
let src_ptr_addr = src_ptr as usize;
|
||||
if (src_ptr_addr <= dst_ptr_addr && dst_ptr_addr < src_ptr_addr + size)
|
||||
|| (dst_ptr_addr <= src_ptr_addr && src_ptr_addr < dst_ptr_addr + size)
|
||||
if (src_ptr <= dst_ptr && dst_ptr < src_ptr + size)
|
||||
|| (dst_ptr <= src_ptr && src_ptr < dst_ptr + size)
|
||||
{
|
||||
return Err(CL_MEM_COPY_OVERLAP);
|
||||
}
|
||||
|
||||
// CAST: We have no idea about the type or initialization status of these bytes.
|
||||
// MaybeUninit<u8> is the safe bet.
|
||||
let src_ptr = src_ptr.cast::<MaybeUninit<u8>>();
|
||||
|
||||
// CAST: We have no idea about the type or initialization status of these bytes.
|
||||
// MaybeUninit<u8> is the safe bet.
|
||||
let dst_ptr = dst_ptr.cast::<MaybeUninit<u8>>();
|
||||
|
||||
// SAFETY: It is up to the application to ensure the memory is valid to read for `size` bytes
|
||||
// and that it doesn't modify it until the command has completed.
|
||||
let src = unsafe { cl_slice::from_raw_parts(src_ptr, size)? };
|
||||
|
||||
// SAFETY: We've ensured there's no aliasing between src and dst. It is up to the application
|
||||
// to ensure the memory is valid to read and write for `size` bytes and that it doesn't modify
|
||||
// or read from it until the command has completed.
|
||||
let dst = unsafe { cl_slice::from_raw_parts_mut(dst_ptr, size)? };
|
||||
|
||||
create_and_queue(
|
||||
q,
|
||||
cmd_type,
|
||||
evs,
|
||||
event,
|
||||
block,
|
||||
Box::new(move |_, _| {
|
||||
dst.copy_from_slice(src);
|
||||
Ok(())
|
||||
}),
|
||||
Box::new(move |q, ctx| q.context.copy_svm(ctx, src_ptr, dst_ptr, size)),
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -2636,10 +2625,20 @@ fn enqueue_svm_mem_fill_impl(
|
|||
let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;
|
||||
|
||||
// CL_INVALID_OPERATION if the device associated with command queue does not support SVM.
|
||||
if !q.device.svm_supported() {
|
||||
if !q.device.api_svm_supported() {
|
||||
return Err(CL_INVALID_OPERATION);
|
||||
}
|
||||
|
||||
// CL_INVALID_VALUE if svm_ptr is NULL.
|
||||
if svm_ptr.is_null() {
|
||||
return Err(CL_INVALID_VALUE);
|
||||
}
|
||||
|
||||
// CL_INVALID_VALUE if svm_ptr is not aligned to pattern_size bytes.
|
||||
if !is_aligned_to(svm_ptr, pattern_size) {
|
||||
return Err(CL_INVALID_VALUE);
|
||||
}
|
||||
|
||||
// CL_INVALID_VALUE if pattern is NULL [...]
|
||||
if pattern.is_null() {
|
||||
return Err(CL_INVALID_VALUE);
|
||||
|
|
@ -2701,35 +2700,9 @@ fn enqueue_svm_mem_fill_impl(
|
|||
// `pattern_size` bytes and properly initialized.
|
||||
// Creating a bitwise copy can't create memory safety issues, since `Pattern` is `Copy`.
|
||||
let pattern = unsafe { pattern_ptr.read_unaligned() };
|
||||
let svm_ptr = svm_ptr as usize;
|
||||
|
||||
// CAST: Same as with `pattern`, we don't know the exact type of `svm_ptr`, but we do
|
||||
// know it's fine if we choose the same type here. The application might reasonably
|
||||
// give us uninitialized memory though, so cast to a `MaybeUninit<Pattern>`, which has
|
||||
// the same layout as `Pattern`.
|
||||
let svm_ptr = svm_ptr.cast::<MaybeUninit<Pattern>>();
|
||||
|
||||
// SAFETY: It is the calling application's responsibility to ensure that `svm_ptr` is
|
||||
// valid for reads and writes up to `size` bytes.
|
||||
// Since `pattern_size == mem::size_of::<Pattern>()` and `MaybeUninit<Pattern>` has the
|
||||
// same layout as `Pattern`, we know that
|
||||
// `size / pattern_size * mem::size_of<MaybeUninit<Pattern>>` equals `size`.
|
||||
//
|
||||
// Since we're creating a `&[MaybeUninit<Pattern>]` the initialization status does not
|
||||
// matter.
|
||||
//
|
||||
// From here on out we only access the referenced memory though this slice. In
|
||||
// particular, since we've made a copy of `pattern`, it doesn't matter if the memory
|
||||
// region referenced by `pattern` aliases the one referenced by this slice. It is up to
|
||||
// the application not to access it at all until this command has been completed.
|
||||
let svm_slice = unsafe { cl_slice::from_raw_parts_mut(svm_ptr, size / pattern_size)? };
|
||||
|
||||
Box::new(move |_, _| {
|
||||
for x in svm_slice {
|
||||
x.write(pattern);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
Box::new(move |q, ctx| q.context.clear_svm(ctx, svm_ptr, size, pattern.0))
|
||||
}};
|
||||
}
|
||||
|
||||
|
|
@ -2817,7 +2790,7 @@ fn enqueue_svm_map_impl(
|
|||
let block = check_cl_bool(blocking_map).ok_or(CL_INVALID_VALUE)?;
|
||||
|
||||
// CL_INVALID_OPERATION if the device associated with command queue does not support SVM.
|
||||
if !q.device.svm_supported() {
|
||||
if !q.device.api_svm_supported() {
|
||||
return Err(CL_INVALID_OPERATION);
|
||||
}
|
||||
|
||||
|
|
@ -2834,7 +2807,15 @@ fn enqueue_svm_map_impl(
|
|||
// ... or if values specified in map_flags are not valid.
|
||||
validate_map_flags_common(flags)?;
|
||||
|
||||
create_and_queue(q, cmd_type, evs, event, block, Box::new(|_, _| Ok(())))
|
||||
let svm_ptr = svm_ptr as usize;
|
||||
create_and_queue(
|
||||
q,
|
||||
cmd_type,
|
||||
evs,
|
||||
event,
|
||||
block,
|
||||
Box::new(move |q, ctx| q.context.copy_svm_to_host(ctx, svm_ptr, flags)),
|
||||
)
|
||||
}
|
||||
|
||||
#[cl_entrypoint(clEnqueueSVMMap)]
|
||||
|
|
@ -2897,7 +2878,7 @@ fn enqueue_svm_unmap_impl(
|
|||
let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;
|
||||
|
||||
// CL_INVALID_OPERATION if the device associated with command queue does not support SVM.
|
||||
if !q.device.svm_supported() {
|
||||
if !q.device.api_svm_supported() {
|
||||
return Err(CL_INVALID_OPERATION);
|
||||
}
|
||||
|
||||
|
|
@ -2906,7 +2887,15 @@ fn enqueue_svm_unmap_impl(
|
|||
return Err(CL_INVALID_VALUE);
|
||||
}
|
||||
|
||||
create_and_queue(q, cmd_type, evs, event, false, Box::new(|_, _| Ok(())))
|
||||
create_and_queue(
|
||||
q,
|
||||
cmd_type,
|
||||
evs,
|
||||
event,
|
||||
false,
|
||||
// TODO: we _could_ migrate the content somewhere, but it's really pointless to do
|
||||
Box::new(move |_, _| Ok(())),
|
||||
)
|
||||
}
|
||||
|
||||
#[cl_entrypoint(clEnqueueSVMUnmap)]
|
||||
|
|
@ -2960,7 +2949,7 @@ fn enqueue_svm_migrate_mem(
|
|||
let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;
|
||||
|
||||
// CL_INVALID_OPERATION if the device associated with command queue does not support SVM.
|
||||
if !q.device.svm_supported() {
|
||||
if !q.device.api_svm_supported() {
|
||||
return Err(CL_INVALID_OPERATION);
|
||||
}
|
||||
|
||||
|
|
@ -3010,9 +2999,9 @@ fn enqueue_svm_migrate_mem(
|
|||
evs,
|
||||
event,
|
||||
false,
|
||||
Box::new(move |_, ctx| {
|
||||
ctx.svm_migrate(&svm_pointers, &sizes, to_device, content_undefined);
|
||||
Ok(())
|
||||
Box::new(move |q, ctx| {
|
||||
q.context
|
||||
.migrate_svm(ctx, svm_pointers, sizes, to_device, content_undefined)
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,13 +1,15 @@
|
|||
use crate::api::icd::*;
|
||||
use crate::api::types::DeleteContextCB;
|
||||
use crate::api::util::checked_compare;
|
||||
use crate::api::util::bit_check;
|
||||
use crate::core::device::*;
|
||||
use crate::core::format::*;
|
||||
use crate::core::gl::*;
|
||||
use crate::core::memory::*;
|
||||
use crate::core::queue::*;
|
||||
use crate::core::util::*;
|
||||
use crate::impl_cl_type_trait;
|
||||
|
||||
use mesa_rust::pipe::context::RWFlags;
|
||||
use mesa_rust::pipe::resource::*;
|
||||
use mesa_rust::pipe::screen::ResourceType;
|
||||
use mesa_rust_gen::*;
|
||||
|
|
@ -19,15 +21,21 @@ use rusticl_opencl_gen::*;
|
|||
|
||||
use std::alloc;
|
||||
use std::alloc::Layout;
|
||||
use std::cmp::Ordering;
|
||||
use std::cmp;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryInto;
|
||||
use std::ffi::c_int;
|
||||
use std::mem;
|
||||
use std::num::NonZeroU64;
|
||||
use std::os::raw::c_void;
|
||||
use std::ptr;
|
||||
use std::slice;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
use std::sync::Weak;
|
||||
|
||||
use super::platform::Platform;
|
||||
|
||||
struct TrackedBDAAlloc {
|
||||
buffer: Weak<Buffer>,
|
||||
size: cl_mem_device_address_ext,
|
||||
|
|
@ -39,6 +47,53 @@ impl AllocSize<cl_mem_device_address_ext> for TrackedBDAAlloc {
|
|||
}
|
||||
}
|
||||
|
||||
struct SVMAlloc {
|
||||
layout: Layout,
|
||||
vma: Option<NonZeroU64>,
|
||||
alloc: Arc<Allocation>,
|
||||
}
|
||||
|
||||
impl SVMAlloc {
|
||||
pub fn size(&self) -> usize {
|
||||
self.layout.size()
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for SVMAlloc {
|
||||
fn drop(&mut self) {
|
||||
if let Some(vma) = self.vma {
|
||||
let address = vma.get() as usize as *mut c_void;
|
||||
unsafe {
|
||||
debug_assert_eq!(0, munmap(address, self.size()));
|
||||
}
|
||||
|
||||
Platform::get()
|
||||
.vm
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.lock()
|
||||
.unwrap()
|
||||
.free(vma, NonZeroU64::new(self.size() as u64).unwrap());
|
||||
} else {
|
||||
// SAFETY: we make sure that svm_pointer is a valid allocation and reuse the same layout
|
||||
// from the allocation
|
||||
unsafe {
|
||||
alloc::dealloc(self.alloc.host_ptr().cast(), self.layout);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AllocSize<usize> for SVMAlloc {
|
||||
fn size(&self) -> usize {
|
||||
SVMAlloc::size(self)
|
||||
}
|
||||
}
|
||||
|
||||
struct SVMContext {
|
||||
svm_ptrs: TrackedPointers<usize, SVMAlloc>,
|
||||
}
|
||||
|
||||
pub struct Context {
|
||||
pub base: CLObjectBase<CL_INVALID_CONTEXT>,
|
||||
pub devs: Vec<&'static Device>,
|
||||
|
|
@ -48,7 +103,7 @@ pub struct Context {
|
|||
bda_ptrs: Mutex<
|
||||
HashMap<&'static Device, TrackedPointers<cl_mem_device_address_ext, TrackedBDAAlloc>>,
|
||||
>,
|
||||
svm_ptrs: Mutex<TrackedPointers<usize, Layout>>,
|
||||
svm: Mutex<SVMContext>,
|
||||
pub gl_ctx_manager: Option<GLCtxManager>,
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +121,9 @@ impl Context {
|
|||
properties: properties,
|
||||
dtors: Mutex::new(Vec::new()),
|
||||
bda_ptrs: Mutex::new(HashMap::new()),
|
||||
svm_ptrs: Mutex::new(TrackedPointers::new()),
|
||||
svm: Mutex::new(SVMContext {
|
||||
svm_ptrs: TrackedPointers::new(),
|
||||
}),
|
||||
gl_ctx_manager: gl_ctx_manager,
|
||||
})
|
||||
}
|
||||
|
|
@ -207,13 +264,21 @@ impl Context {
|
|||
}
|
||||
|
||||
pub fn has_svm_devs(&self) -> bool {
|
||||
self.devs.iter().any(|dev| dev.svm_supported())
|
||||
self.devs.iter().any(|dev| dev.api_svm_supported())
|
||||
}
|
||||
|
||||
pub fn alloc_svm_ptr(&self, size: usize, alignment: usize) -> CLResult<*mut c_void> {
|
||||
pub fn alloc_svm_ptr(
|
||||
&self,
|
||||
size: NonZeroU64,
|
||||
mut alignment: NonZeroU64,
|
||||
) -> CLResult<*mut c_void> {
|
||||
// TODO: choose better alignment in regards to huge pages
|
||||
alignment = cmp::max(alignment, NonZeroU64::new(0x1000).unwrap());
|
||||
|
||||
// clSVMAlloc will fail if alignment is not a power of two.
|
||||
// `from_size_align()` verifies this condition is met.
|
||||
let layout = Layout::from_size_align(size, alignment).or(Err(CL_INVALID_VALUE))?;
|
||||
let layout = Layout::from_size_align(size.get() as usize, alignment.get() as usize)
|
||||
.or(Err(CL_INVALID_VALUE))?;
|
||||
|
||||
// clSVMAlloc will fail if size is 0 or > CL_DEVICE_MAX_MEM_ALLOC_SIZE value
|
||||
// for any device in context.
|
||||
|
|
@ -222,38 +287,252 @@ impl Context {
|
|||
// `from_size_align()` ensures that the allocation will fit in host memory,
|
||||
// the maximum allocation may be smaller due to limitations from gallium or
|
||||
// devices.
|
||||
let size_aligned = layout.pad_to_align().size();
|
||||
if size == 0 || checked_compare(size_aligned, Ordering::Greater, self.max_mem_alloc()) {
|
||||
return Err(CL_INVALID_VALUE);
|
||||
}
|
||||
// let size_aligned = layout.pad_to_align().size();
|
||||
|
||||
// SAFETY: `size` is verified to be non-zero and the returned pointer is not
|
||||
// expected to point to initialized memory.
|
||||
let ptr = unsafe { alloc::alloc(layout) };
|
||||
// allocate a vma if one of the devices doesn't support system SVM
|
||||
let vma = if let Some(vm) = &Platform::get().vm {
|
||||
Some(
|
||||
vm.lock()
|
||||
.unwrap()
|
||||
.alloc(size, alignment)
|
||||
.ok_or(CL_OUT_OF_RESOURCES)?,
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let ptr: *mut c_void = if let Some(vma) = &vma {
|
||||
let res = unsafe {
|
||||
mmap(
|
||||
vma.get() as usize as *mut c_void,
|
||||
size.get() as usize,
|
||||
(PROT_READ | PROT_WRITE) as c_int,
|
||||
// MAP_FIXED_NOREPLACE needs 4.17
|
||||
(MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE | MAP_NORESERVE) as c_int,
|
||||
-1,
|
||||
0,
|
||||
)
|
||||
};
|
||||
|
||||
// mmap returns MAP_FAILED on error which is -1
|
||||
if res as usize == usize::MAX {
|
||||
return Err(CL_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
res.cast()
|
||||
} else {
|
||||
unsafe { alloc::alloc(layout) }.cast()
|
||||
};
|
||||
|
||||
if ptr.is_null() {
|
||||
Err(CL_OUT_OF_HOST_MEMORY)
|
||||
} else {
|
||||
Ok(ptr.cast())
|
||||
return Err(CL_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
let address = ptr as u64;
|
||||
let mut buffers = HashMap::new();
|
||||
for &dev in &self.devs {
|
||||
let size: u32 = size.get().try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
|
||||
|
||||
// For system SVM devices we simply create a userptr resource.
|
||||
let res = if dev.system_svm_supported() {
|
||||
dev.screen()
|
||||
.resource_create_buffer_from_user(size, ptr, PIPE_BIND_GLOBAL, 0)
|
||||
} else {
|
||||
dev.screen().resource_create_buffer(
|
||||
size,
|
||||
ResourceType::Normal,
|
||||
PIPE_BIND_GLOBAL,
|
||||
PIPE_RESOURCE_FLAG_FRONTEND_VM,
|
||||
)
|
||||
};
|
||||
|
||||
let res = res.ok_or(CL_OUT_OF_RESOURCES)?;
|
||||
if !dev.system_svm_supported() {
|
||||
if !dev.screen().resource_assign_vma(&res, address) {
|
||||
return Err(CL_OUT_OF_RESOURCES);
|
||||
}
|
||||
}
|
||||
|
||||
buffers.insert(dev, Arc::new(res));
|
||||
}
|
||||
|
||||
self.svm.lock().unwrap().svm_ptrs.insert(
|
||||
ptr as usize,
|
||||
SVMAlloc {
|
||||
layout: layout,
|
||||
vma,
|
||||
alloc: Arc::new(Allocation::new(buffers, 0, ptr)),
|
||||
},
|
||||
);
|
||||
|
||||
Ok(ptr)
|
||||
}
|
||||
|
||||
pub fn find_svm_alloc(&self, ptr: usize) -> Option<(*const c_void, usize)> {
|
||||
self.svm_ptrs
|
||||
pub fn copy_svm_to_dev(
|
||||
&self,
|
||||
ctx: &QueueContext,
|
||||
ptr: usize,
|
||||
) -> CLResult<Option<Arc<PipeResource>>> {
|
||||
let svm = self.svm.lock().unwrap();
|
||||
|
||||
let Some(alloc) = svm.svm_ptrs.find_alloc_precise(ptr) else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
Ok(Some(Arc::clone(
|
||||
alloc.alloc.get_res_for_access(ctx, RWFlags::RW)?,
|
||||
)))
|
||||
}
|
||||
|
||||
pub fn copy_svm_to_host(
|
||||
&self,
|
||||
ctx: &QueueContext,
|
||||
svm_ptr: usize,
|
||||
flags: cl_map_flags,
|
||||
) -> CLResult<()> {
|
||||
// no need to copy
|
||||
if bit_check(flags, CL_MAP_WRITE_INVALIDATE_REGION) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let svm = self.svm.lock().unwrap();
|
||||
let Some((_, alloc)) = svm.svm_ptrs.find_alloc(svm_ptr) else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
alloc.alloc.migrate_to_hostptr(ctx, RWFlags::RW)
|
||||
}
|
||||
|
||||
pub fn copy_svm(
|
||||
&self,
|
||||
ctx: &QueueContext,
|
||||
src_addr: usize,
|
||||
dst_addr: usize,
|
||||
size: usize,
|
||||
) -> CLResult<()> {
|
||||
let svm = self.svm.lock().unwrap();
|
||||
let src = svm.svm_ptrs.find_alloc(src_addr);
|
||||
let dst = svm.svm_ptrs.find_alloc(dst_addr);
|
||||
|
||||
#[allow(clippy::collapsible_else_if)]
|
||||
if let Some((src_base, src_alloc)) = src {
|
||||
let src_res = src_alloc.alloc.get_res_for_access(ctx, RWFlags::RD)?;
|
||||
let src_offset = src_addr - src_base;
|
||||
|
||||
if let Some((dst_base, dst_alloc)) = dst {
|
||||
let dst_res = dst_alloc.alloc.get_res_for_access(ctx, RWFlags::WR)?;
|
||||
let dst_offset = dst_addr - dst_base;
|
||||
|
||||
ctx.resource_copy_buffer(
|
||||
src_res,
|
||||
src_offset as i32,
|
||||
dst_res,
|
||||
dst_offset as u32,
|
||||
size as i32,
|
||||
);
|
||||
} else {
|
||||
let map = ctx
|
||||
.buffer_map(src_res, src_offset as i32, size as i32, RWFlags::RD)
|
||||
.ok_or(CL_OUT_OF_HOST_MEMORY)?;
|
||||
unsafe {
|
||||
ptr::copy_nonoverlapping(map.ptr(), dst_addr as *mut c_void, size);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if let Some((dst_base, dst_alloc)) = dst {
|
||||
let dst_res = dst_alloc.alloc.get_res_for_access(ctx, RWFlags::WR)?;
|
||||
let dst_offset = dst_addr - dst_base;
|
||||
|
||||
ctx.buffer_subdata(
|
||||
dst_res,
|
||||
dst_offset as u32,
|
||||
src_addr as *const c_void,
|
||||
size as u32,
|
||||
);
|
||||
} else {
|
||||
unsafe {
|
||||
ptr::copy(src_addr as *const c_void, dst_addr as *mut c_void, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn clear_svm<const T: usize>(
|
||||
&self,
|
||||
ctx: &QueueContext,
|
||||
svm_ptr: usize,
|
||||
size: usize,
|
||||
pattern: [u8; T],
|
||||
) -> CLResult<()> {
|
||||
let svm = self.svm.lock().unwrap();
|
||||
|
||||
if let Some((base, alloc)) = svm.svm_ptrs.find_alloc(svm_ptr) {
|
||||
let res = alloc.alloc.get_res_for_access(ctx, RWFlags::WR)?;
|
||||
let offset = svm_ptr - base;
|
||||
ctx.clear_buffer(res, &pattern, offset as u32, size as u32);
|
||||
} else {
|
||||
let slice = unsafe {
|
||||
slice::from_raw_parts_mut(svm_ptr as *mut _, size / mem::size_of_val(&pattern))
|
||||
};
|
||||
|
||||
slice.fill(pattern);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn migrate_svm(
|
||||
&self,
|
||||
ctx: &QueueContext,
|
||||
pointers: Vec<usize>,
|
||||
sizes: Vec<usize>,
|
||||
to_device: bool,
|
||||
content_undefined: bool,
|
||||
) -> CLResult<()> {
|
||||
let svm = self.svm.lock().unwrap();
|
||||
|
||||
if ctx.dev.system_svm_supported() {
|
||||
ctx.svm_migrate(&pointers, &sizes, to_device, content_undefined);
|
||||
} else {
|
||||
for ptr in pointers {
|
||||
let Some((_, alloc)) = svm.svm_ptrs.find_alloc(ptr) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
// we assume it's only read, so it remains valid on the host until future commands
|
||||
// have different needs.
|
||||
if to_device {
|
||||
alloc.alloc.get_res_for_access(ctx, RWFlags::RD)?;
|
||||
} else {
|
||||
alloc.alloc.migrate_to_hostptr(ctx, RWFlags::RD)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_svm_alloc(&self, ptr: usize) -> Option<(*mut c_void, Arc<Allocation>)> {
|
||||
self.svm
|
||||
.lock()
|
||||
.unwrap()
|
||||
.svm_ptrs
|
||||
.find_alloc(ptr)
|
||||
.map(|(ptr, layout)| (ptr as *const c_void, layout.size()))
|
||||
.map(|(base, alloc)| (base as *mut c_void, Arc::clone(&alloc.alloc)))
|
||||
}
|
||||
|
||||
pub fn find_svm_alloc(&self, ptr: usize) -> Option<(*mut c_void, usize)> {
|
||||
self.svm
|
||||
.lock()
|
||||
.unwrap()
|
||||
.svm_ptrs
|
||||
.find_alloc(ptr)
|
||||
.map(|(ptr, alloc)| (ptr as _, alloc.size()))
|
||||
}
|
||||
|
||||
pub fn remove_svm_ptr(&self, ptr: usize) {
|
||||
if let Some(layout) = self.svm_ptrs.lock().unwrap().remove(ptr) {
|
||||
// SAFETY: we make sure that svm_pointer is a valid allocation and reuse the same layout
|
||||
// from the allocation
|
||||
unsafe {
|
||||
alloc::dealloc(ptr as *mut u8, layout);
|
||||
}
|
||||
}
|
||||
self.svm.lock().unwrap().svm_ptrs.remove(ptr);
|
||||
}
|
||||
|
||||
pub fn add_bda_ptr(&self, buffer: &Arc<Buffer>) {
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ use std::convert::TryInto;
|
|||
use std::env;
|
||||
use std::ffi::CStr;
|
||||
use std::mem::transmute;
|
||||
use std::num::NonZeroU64;
|
||||
use std::os::raw::*;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
|
|
@ -104,6 +105,13 @@ pub trait HelperContextWrapper {
|
|||
where
|
||||
F: Fn(&HelperContext);
|
||||
|
||||
fn buffer_map(
|
||||
&self,
|
||||
res: &PipeResource,
|
||||
offset: i32,
|
||||
size: i32,
|
||||
rw: RWFlags,
|
||||
) -> Option<PipeTransfer>;
|
||||
fn create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void;
|
||||
fn delete_compute_state(&self, cso: *mut c_void);
|
||||
fn compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info;
|
||||
|
|
@ -165,6 +173,16 @@ impl HelperContextWrapper for HelperContext<'_> {
|
|||
self.lock.flush()
|
||||
}
|
||||
|
||||
fn buffer_map(
|
||||
&self,
|
||||
res: &PipeResource,
|
||||
offset: i32,
|
||||
size: i32,
|
||||
rw: RWFlags,
|
||||
) -> Option<PipeTransfer> {
|
||||
self.lock.buffer_map(res, offset, size, rw)
|
||||
}
|
||||
|
||||
fn create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void {
|
||||
self.lock.create_compute_state(nir, static_local_mem)
|
||||
}
|
||||
|
|
@ -1157,10 +1175,33 @@ impl Device {
|
|||
&& (subgroup_sizes == 1 || (subgroup_sizes > 1 && self.shareable_shaders()))
|
||||
}
|
||||
|
||||
pub fn svm_supported(&self) -> bool {
|
||||
pub fn system_svm_supported(&self) -> bool {
|
||||
self.screen.caps().system_svm
|
||||
}
|
||||
|
||||
pub fn svm_supported(&self) -> bool {
|
||||
if cfg!(not(target_pointer_width = "64")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
self.system_svm_supported() || self.screen().is_vm_supported()
|
||||
}
|
||||
|
||||
/// Checks if the device supports SVM _and_ that we were able to initialize SVM support on a
|
||||
/// platform level.
|
||||
pub fn api_svm_supported(&self) -> bool {
|
||||
self.system_svm_supported()
|
||||
|| (self.screen().is_vm_supported() && Platform::get().vm.is_some())
|
||||
}
|
||||
|
||||
// returns (start, end)
|
||||
pub fn vm_alloc_range(&self) -> Option<(NonZeroU64, NonZeroU64)> {
|
||||
let min = self.screen.caps().min_vma;
|
||||
let max = self.screen.caps().max_vma;
|
||||
|
||||
Some((NonZeroU64::new(min)?, NonZeroU64::new(max)?))
|
||||
}
|
||||
|
||||
pub fn unified_memory(&self) -> bool {
|
||||
self.screen.caps().uma
|
||||
}
|
||||
|
|
|
|||
|
|
@ -524,6 +524,7 @@ pub struct Kernel {
|
|||
pub name: String,
|
||||
values: Mutex<Vec<Option<KernelArgValue>>>,
|
||||
pub bdas: Mutex<Vec<cl_mem_device_address_ext>>,
|
||||
pub svms: Mutex<HashSet<usize>>,
|
||||
builds: HashMap<&'static Device, Arc<NirKernelBuilds>>,
|
||||
pub kernel_info: Arc<KernelInfo>,
|
||||
}
|
||||
|
|
@ -1246,6 +1247,7 @@ impl Kernel {
|
|||
name: name,
|
||||
values: Mutex::new(values),
|
||||
bdas: Mutex::new(Vec::new()),
|
||||
svms: Mutex::new(HashSet::new()),
|
||||
builds: builds,
|
||||
kernel_info: kernel_info,
|
||||
})
|
||||
|
|
@ -1323,6 +1325,7 @@ impl Kernel {
|
|||
let arg_values = self.arg_values().clone();
|
||||
let nir_kernel_builds = Arc::clone(&self.builds[q.device]);
|
||||
let mut bdas = self.bdas.lock().unwrap().clone();
|
||||
let svms = self.svms.lock().unwrap().clone();
|
||||
|
||||
let mut buffer_arcs = HashMap::new();
|
||||
let mut image_arcs = HashMap::new();
|
||||
|
|
@ -1442,6 +1445,12 @@ impl Kernel {
|
|||
printf_buf = Some(buf);
|
||||
}
|
||||
|
||||
// translate SVM pointers to their base first
|
||||
let mut svms: HashSet<_> = svms
|
||||
.into_iter()
|
||||
.filter_map(|svm_pointer| Some(q.context.find_svm_alloc(svm_pointer)?.0 as usize))
|
||||
.collect();
|
||||
|
||||
for arg in &nir_kernel_build.compiled_args {
|
||||
let is_opaque = if let CompiledKernelArgType::APIArg(idx) = arg.kind {
|
||||
kernel_info.args[idx].kind.is_opaque()
|
||||
|
|
@ -1485,6 +1494,9 @@ impl Kernel {
|
|||
if let Some(address) = buffer.dev_address(ctx.dev) {
|
||||
let _ = buffer.get_res_for_access(ctx, rw)?;
|
||||
bdas.push(address.get());
|
||||
} else if buffer.is_svm() {
|
||||
let _ = buffer.get_res_for_access(ctx, rw)?;
|
||||
svms.insert(buffer.host_ptr() as usize);
|
||||
}
|
||||
} else {
|
||||
let res = buffer.get_res_for_access(ctx, rw)?;
|
||||
|
|
@ -1498,6 +1510,11 @@ impl Kernel {
|
|||
}
|
||||
}
|
||||
&KernelArgValue::SVM(handle) => {
|
||||
// get the base address so we deduplicate properly
|
||||
if let Some((base, _)) = q.context.find_svm_alloc(handle) {
|
||||
svms.insert(base as usize);
|
||||
}
|
||||
|
||||
if !api_arg.dead {
|
||||
add_pointer(q, &mut input, handle as u64);
|
||||
}
|
||||
|
|
@ -1603,11 +1620,21 @@ impl Kernel {
|
|||
.filter_map(|address| q.context.find_bda_alloc(q.device, address))
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let bdas: Vec<_> = bdas
|
||||
let mut bdas: Vec<_> = bdas
|
||||
.iter()
|
||||
.map(|buffer| Ok(buffer.get_res_for_access(ctx, RWFlags::RW)?.deref()))
|
||||
.collect::<CLResult<_>>()?;
|
||||
|
||||
let svms_new = svms
|
||||
.into_iter()
|
||||
.filter_map(|svm| q.context.copy_svm_to_dev(ctx, svm).transpose())
|
||||
.collect::<CLResult<Vec<_>>>()?;
|
||||
|
||||
// uhhh
|
||||
for svm in &svms_new {
|
||||
bdas.push(svm);
|
||||
}
|
||||
|
||||
// subtract the shader local_size as we only request something on top of that.
|
||||
variable_local_size -= static_local_size;
|
||||
|
||||
|
|
@ -1819,7 +1846,7 @@ impl Kernel {
|
|||
}
|
||||
|
||||
pub fn has_svm_devs(&self) -> bool {
|
||||
self.prog.devs.iter().any(|dev| dev.svm_supported())
|
||||
self.prog.devs.iter().any(|dev| dev.api_svm_supported())
|
||||
}
|
||||
|
||||
pub fn subgroup_sizes(&self, dev: &Device) -> impl ExactSizeIterator<Item = usize> {
|
||||
|
|
@ -1876,6 +1903,7 @@ impl Clone for Kernel {
|
|||
name: self.name.clone(),
|
||||
values: Mutex::new(self.arg_values().clone()),
|
||||
bdas: Mutex::new(self.bdas.lock().unwrap().clone()),
|
||||
svms: Mutex::new(self.svms.lock().unwrap().clone()),
|
||||
builds: self.builds.clone(),
|
||||
kernel_info: Arc::clone(&self.kernel_info),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -398,11 +398,17 @@ pub struct SubAllocation {
|
|||
offset: usize,
|
||||
}
|
||||
|
||||
pub struct SvmAllocation {
|
||||
alloc: Arc<Allocation>,
|
||||
offset: usize,
|
||||
}
|
||||
|
||||
/// Abstraction over the memory allocation. It might be a real GPU backing storage or simply a sub
|
||||
/// allocation over an existing memory object.
|
||||
enum Allocation {
|
||||
pub enum Allocation {
|
||||
Resource(ResourceAllocation),
|
||||
SubAlloc(SubAllocation),
|
||||
Svm(SvmAllocation),
|
||||
}
|
||||
|
||||
// TODO: - Once it's used for more stuff might make sense to split it into an Image and Buffer
|
||||
|
|
@ -455,6 +461,14 @@ impl Allocation {
|
|||
})
|
||||
}
|
||||
|
||||
fn new_svm(alloc: Arc<Allocation>, offset: usize) -> Self {
|
||||
Self::Svm(SvmAllocation {
|
||||
// we precalculate the entire offset here.
|
||||
offset: alloc.offset() + offset,
|
||||
alloc: alloc,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns true if the backing storage of the two objects is equal.
|
||||
fn backing_resource_eq(&self, other: &Self) -> bool {
|
||||
ptr::eq(self.get_real_resource(), other.get_real_resource())
|
||||
|
|
@ -465,24 +479,26 @@ impl Allocation {
|
|||
match self {
|
||||
Allocation::SubAlloc(sub) => sub.mem.alloc.get_real_resource(),
|
||||
Allocation::Resource(res) => res,
|
||||
Allocation::Svm(svm) => svm.alloc.get_real_resource(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the resource associated with `dev` without any data migration.
|
||||
fn get_res_of_dev(&self, dev: &Device) -> CLResult<&Arc<PipeResource>> {
|
||||
self.get_real_resource()
|
||||
.res
|
||||
.get(dev)
|
||||
.ok_or(CL_OUT_OF_HOST_MEMORY)
|
||||
fn get_res_of_dev(&self, dev: &Device) -> Option<&Arc<PipeResource>> {
|
||||
self.get_real_resource().res.get(dev)
|
||||
}
|
||||
|
||||
/// Returns the resource associated with `ctx.dev` and transparently migrate the data.
|
||||
fn get_res_for_access(&self, ctx: &QueueContext, rw: RWFlags) -> CLResult<&Arc<PipeResource>> {
|
||||
pub fn get_res_for_access(
|
||||
&self,
|
||||
ctx: &QueueContext,
|
||||
rw: RWFlags,
|
||||
) -> CLResult<&Arc<PipeResource>> {
|
||||
self.get_real_resource().get_res_for_access(ctx, rw)
|
||||
}
|
||||
|
||||
/// Migrates the content to the host. Fails if there is no host ptr.
|
||||
pub fn _migrate_to_hostptr(&self, ctx: &QueueContext, rw: RWFlags) -> CLResult<()> {
|
||||
pub fn migrate_to_hostptr(&self, ctx: &QueueContext, rw: RWFlags) -> CLResult<()> {
|
||||
self.get_real_resource().migrate_to_hostptr(ctx, rw)
|
||||
}
|
||||
|
||||
|
|
@ -497,14 +513,20 @@ impl Allocation {
|
|||
host_ptr as _
|
||||
}
|
||||
|
||||
fn is_user_alloc_for_dev(&self, dev: &Device) -> CLResult<bool> {
|
||||
Ok(self.get_res_of_dev(dev)?.is_user())
|
||||
fn is_user_alloc_for_dev(&self, dev: &Device) -> bool {
|
||||
if let Some(res) = self.get_res_of_dev(dev) {
|
||||
res.is_user()
|
||||
} else {
|
||||
// for SVM allocations there might not even be a real resource
|
||||
dev.system_svm_supported()
|
||||
}
|
||||
}
|
||||
|
||||
fn offset(&self) -> usize {
|
||||
match self {
|
||||
Allocation::Resource(res) => res.offset,
|
||||
Allocation::SubAlloc(sub) => sub.offset,
|
||||
Allocation::Svm(svm) => svm.offset,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -773,39 +795,71 @@ impl MemBase {
|
|||
.copied()
|
||||
== Some(CL_TRUE.into());
|
||||
|
||||
let res_type = if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) {
|
||||
ResourceType::Staging
|
||||
// if it's a SVM host ptr, we just use the already allocated resource if it exists, because
|
||||
// this is actually mandated by the spec. The size requirement will be checked inside the
|
||||
// API layer.
|
||||
//
|
||||
// From the OpenCL spec:
|
||||
// If clCreateBuffer or clCreateBufferWithProperties is called with a pointer returned
|
||||
// clSVMAlloc as its host_ptr argument, and CL_MEM_USE_HOST_PTR is set in its flags
|
||||
// argument, clCreateBuffer or clCreateBufferWithProperties will succeed and return
|
||||
// valid non-zero buffer object as long as the size argument is no larger than the size
|
||||
// argument passed in the original clSVMAlloc call. The new buffer object returned has the
|
||||
// shared memory as the underlying storage.
|
||||
let svm = bit_check(flags, CL_MEM_USE_HOST_PTR)
|
||||
.then(|| context.get_svm_alloc(host_ptr as usize))
|
||||
.flatten();
|
||||
|
||||
let alloc = if let Some((svm_ptr, ref svm_alloc)) = svm {
|
||||
// SAFETY: svm_ptr is the base of the allocation host_ptr points into.
|
||||
let offset = unsafe { host_ptr.byte_offset_from(svm_ptr) } as usize;
|
||||
Allocation::new_svm(Arc::clone(svm_alloc), offset)
|
||||
} else {
|
||||
ResourceType::Normal
|
||||
let res_type = if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) {
|
||||
ResourceType::Staging
|
||||
} else {
|
||||
ResourceType::Normal
|
||||
};
|
||||
|
||||
let buffer = context.create_buffer(
|
||||
size,
|
||||
host_ptr,
|
||||
bit_check(flags, CL_MEM_COPY_HOST_PTR),
|
||||
bda,
|
||||
res_type,
|
||||
)?;
|
||||
|
||||
// We can only keep the host_ptr when `CL_MEM_USE_HOST_PTR` is set.
|
||||
if !bit_check(flags, CL_MEM_USE_HOST_PTR) {
|
||||
host_ptr = ptr::null_mut()
|
||||
}
|
||||
|
||||
Allocation::new(buffer, 0, host_ptr)
|
||||
};
|
||||
|
||||
let buffer = context.create_buffer(
|
||||
size,
|
||||
host_ptr,
|
||||
bit_check(flags, CL_MEM_COPY_HOST_PTR),
|
||||
bda,
|
||||
res_type,
|
||||
)?;
|
||||
|
||||
// We can only keep the host_ptr when `CL_MEM_USE_HOST_PTR` is set.
|
||||
if !bit_check(flags, CL_MEM_USE_HOST_PTR) {
|
||||
host_ptr = ptr::null_mut()
|
||||
}
|
||||
|
||||
let addresses = bda.then(|| {
|
||||
context
|
||||
.devs
|
||||
.iter()
|
||||
.filter(|dev| dev.bda_supported())
|
||||
.map(|&dev| {
|
||||
let address = buffer[dev].resource_get_address();
|
||||
// If the buffer is backed by an SVM allocation, we need to use its address.
|
||||
let address = if let Some((address, _)) = svm {
|
||||
NonZeroU64::new(address as usize as u64)
|
||||
} else if let Some(res) = alloc.get_res_of_dev(dev) {
|
||||
res.resource_get_address()
|
||||
} else {
|
||||
// if there is no resource, it's a system SVM allocation
|
||||
assert!(dev.system_svm_supported());
|
||||
NonZeroU64::new(alloc.host_ptr() as u64)
|
||||
};
|
||||
|
||||
Some((dev, address?))
|
||||
})
|
||||
.collect::<Option<_>>()
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
let alloc = Allocation::new(buffer, 0, host_ptr);
|
||||
let buffer = Arc::new(Buffer {
|
||||
base: Self {
|
||||
base: CLObjectBase::new(RusticlTypes::Buffer),
|
||||
|
|
@ -1102,6 +1156,9 @@ impl MemBase {
|
|||
match &self.alloc {
|
||||
Allocation::SubAlloc(sub) => Some(&sub.mem),
|
||||
Allocation::Resource(_) => None,
|
||||
// In theory the SVM allocation is the parent, but that's not a memory object on the API
|
||||
// level.
|
||||
Allocation::Svm(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1109,10 +1166,10 @@ impl MemBase {
|
|||
self.alloc.host_ptr()
|
||||
}
|
||||
|
||||
fn is_pure_user_memory(&self, d: &Device) -> CLResult<bool> {
|
||||
fn is_pure_user_memory(&self, d: &Device) -> bool {
|
||||
// 1Dbuffer objects are weird. The parent memory object can be a host_ptr thing, but we are
|
||||
// not allowed to actually return a pointer based on the host_ptr when mapping.
|
||||
Ok(self.alloc.is_user_alloc_for_dev(d)? && !self.host_ptr().is_null())
|
||||
self.alloc.is_user_alloc_for_dev(d) && !self.host_ptr().is_null()
|
||||
}
|
||||
|
||||
fn map<T>(
|
||||
|
|
@ -1430,7 +1487,7 @@ impl Buffer {
|
|||
|
||||
// in this case we only need to migrate to the device if the data is located on a device not
|
||||
// having a userptr allocation.
|
||||
if self.is_pure_user_memory(ctx.dev)? {
|
||||
if self.is_pure_user_memory(ctx.dev) {
|
||||
let rw = if mapping.writes {
|
||||
RWFlags::RW
|
||||
} else {
|
||||
|
|
@ -1446,7 +1503,7 @@ impl Buffer {
|
|||
|
||||
pub fn sync_unmap(&self, ctx: &QueueContext, ptr: MutMemoryPtr) -> CLResult<()> {
|
||||
// no need to update
|
||||
if self.is_pure_user_memory(ctx.dev)? {
|
||||
if self.is_pure_user_memory(ctx.dev) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
|
|
@ -1904,7 +1961,7 @@ impl Image {
|
|||
|
||||
// in this case we only need to migrate to the device if the data is located on a device not
|
||||
// having a userptr allocation.
|
||||
if self.is_pure_user_memory(ctx.dev)? {
|
||||
if self.is_pure_user_memory(ctx.dev) {
|
||||
let rw = if mapping.writes {
|
||||
RWFlags::RW
|
||||
} else {
|
||||
|
|
@ -1930,7 +1987,7 @@ impl Image {
|
|||
|
||||
pub fn sync_unmap(&self, ctx: &QueueContext, ptr: MutMemoryPtr) -> CLResult<()> {
|
||||
// no need to update
|
||||
if self.is_pure_user_memory(ctx.dev)? {
|
||||
if self.is_pure_user_memory(ctx.dev) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,25 +3,47 @@ use crate::api::icd::DISPATCH;
|
|||
use crate::core::device::*;
|
||||
use crate::core::version::*;
|
||||
|
||||
use mesa_rust::pipe::screen::ScreenVMAllocation;
|
||||
use mesa_rust::util::vm::VM;
|
||||
use mesa_rust_gen::*;
|
||||
use mesa_rust_util::string::char_arr_to_cstr;
|
||||
use rusticl_opencl_gen::*;
|
||||
|
||||
use std::cmp;
|
||||
use std::env;
|
||||
use std::num::NonZeroU64;
|
||||
use std::ops::Deref;
|
||||
use std::ptr;
|
||||
use std::ptr::addr_of;
|
||||
use std::ptr::addr_of_mut;
|
||||
use std::sync::Mutex;
|
||||
use std::sync::Once;
|
||||
|
||||
/// Maximum size a pixel can be across all supported image formats.
|
||||
pub const MAX_PIXEL_SIZE_BYTES: u64 = 4 * 4;
|
||||
|
||||
pub struct PlatformVM<'a> {
|
||||
vm: Mutex<VM>,
|
||||
// we make use of the drop to automatically free the reserved VM
|
||||
_dev_allocs: Vec<ScreenVMAllocation<'a>>,
|
||||
}
|
||||
|
||||
impl Deref for PlatformVM<'_> {
|
||||
type Target = Mutex<VM>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.vm
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub struct Platform {
|
||||
dispatch: &'static cl_icd_dispatch,
|
||||
pub devs: Vec<Device>,
|
||||
pub extension_string: String,
|
||||
pub extensions: Vec<cl_name_version>,
|
||||
// lifetime has to match the one of devs
|
||||
pub vm: Option<PlatformVM<'static>>,
|
||||
}
|
||||
|
||||
pub enum PerfDebugLevel {
|
||||
|
|
@ -57,6 +79,7 @@ static mut PLATFORM: Platform = Platform {
|
|||
devs: Vec::new(),
|
||||
extension_string: String::new(),
|
||||
extensions: Vec::new(),
|
||||
vm: None,
|
||||
};
|
||||
static mut PLATFORM_DBG: PlatformDebug = PlatformDebug {
|
||||
allow_invalid_spirv: false,
|
||||
|
|
@ -139,13 +162,56 @@ impl Platform {
|
|||
unsafe { &*addr_of!(PLATFORM_FEATURES) }
|
||||
}
|
||||
|
||||
fn init(&mut self) {
|
||||
fn alloc_vm(devs: &[Device]) -> Option<PlatformVM<'_>> {
|
||||
// We support buffer SVM only on 64 bit platforms
|
||||
if cfg!(not(target_pointer_width = "64")) {
|
||||
return None;
|
||||
}
|
||||
|
||||
// No need to check system SVM devices
|
||||
let devs = devs.iter().filter(|dev| !dev.system_svm_supported());
|
||||
|
||||
let (start, end) = devs.clone().filter_map(|dev| dev.vm_alloc_range()).reduce(
|
||||
|(min_a, max_a), (min_b, max_b)| (cmp::max(min_a, min_b), cmp::min(max_a, max_b)),
|
||||
)?;
|
||||
|
||||
// Allocate 1/8 of the available VM. No specific reason for this limit. Might have to bump
|
||||
// this later, but it's probably fine as there is plenty of VM available.
|
||||
let size = NonZeroU64::new((end.get() / 8).next_power_of_two())?;
|
||||
if start > size {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut allocs = Vec::new();
|
||||
for dev in devs {
|
||||
allocs.push(dev.screen().alloc_vm(size, size)?);
|
||||
}
|
||||
|
||||
Some(PlatformVM {
|
||||
vm: Mutex::new(VM::new(size, size)),
|
||||
_dev_allocs: allocs,
|
||||
})
|
||||
}
|
||||
|
||||
fn init(&'static mut self) {
|
||||
unsafe {
|
||||
glsl_type_singleton_init_or_ref();
|
||||
}
|
||||
|
||||
self.devs = Device::all();
|
||||
|
||||
self.vm = Self::alloc_vm(&self.devs);
|
||||
|
||||
if self
|
||||
.devs
|
||||
.iter()
|
||||
.any(|dev| !dev.system_svm_supported() && dev.svm_supported())
|
||||
&& self.vm.is_none()
|
||||
{
|
||||
// TODO: in theory we should also remove the exposed SVM extension, but...
|
||||
eprintln!("rusticl: could not initialize SVM support");
|
||||
}
|
||||
|
||||
let mut exts_str: Vec<&str> = Vec::new();
|
||||
let mut add_ext = |major, minor, patch, ext: &'static str| {
|
||||
self.extensions
|
||||
|
|
|
|||
|
|
@ -282,7 +282,18 @@ impl PipeResource {
|
|||
|
||||
impl Drop for PipeResource {
|
||||
fn drop(&mut self) {
|
||||
unsafe { pipe_resource_reference(&mut self.pipe.as_ptr(), ptr::null_mut()) }
|
||||
unsafe {
|
||||
let pipe = self.pipe.as_ref();
|
||||
let screen = pipe.screen.as_ref().unwrap();
|
||||
|
||||
if pipe.flags & PIPE_RESOURCE_FLAG_FRONTEND_VM != 0 {
|
||||
if let Some(resource_assign_vma) = screen.resource_assign_vma {
|
||||
resource_assign_vma(pipe.screen, self.pipe(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
pipe_resource_reference(&mut self.pipe.as_ptr(), ptr::null_mut());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -308,6 +308,12 @@ rusticl_mesa_bindings = rust.bindgen(
|
|||
'--allowlist-function', 'rusticl_.*',
|
||||
'--allowlist-function', 'std(err|out)_ptr',
|
||||
|
||||
# libc
|
||||
'--allowlist-function', 'mmap',
|
||||
'--allowlist-function', 'munmap',
|
||||
'--allowlist-var', 'MAP_.*',
|
||||
'--allowlist-var', 'PROT_.*',
|
||||
|
||||
# winsys
|
||||
'--allowlist-var', 'WINSYS_HANDLE_TYPE_.*',
|
||||
],
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include <stdio.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
FILE *stdout_ptr(void);
|
||||
FILE *stderr_ptr(void);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
use std::{
|
||||
alloc::Layout,
|
||||
collections::{btree_map::Entry, BTreeMap},
|
||||
collections::{
|
||||
btree_map::{Entry, Values, ValuesMut},
|
||||
BTreeMap,
|
||||
},
|
||||
hash::{Hash, Hasher},
|
||||
mem,
|
||||
ops::{Add, Deref},
|
||||
|
|
@ -175,6 +178,14 @@ impl<P, T: AllocSize<P>> TrackedPointers<P, T> {
|
|||
ptrs: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn values(&self) -> Values<'_, P, T> {
|
||||
self.ptrs.values()
|
||||
}
|
||||
|
||||
pub fn values_mut(&mut self) -> ValuesMut<'_, P, T> {
|
||||
self.ptrs.values_mut()
|
||||
}
|
||||
}
|
||||
|
||||
impl<P, T: AllocSize<P>> TrackedPointers<P, T>
|
||||
|
|
@ -201,6 +212,18 @@ where
|
|||
None
|
||||
}
|
||||
|
||||
pub fn find_alloc_mut(&mut self, ptr: P) -> Option<(P, &mut T)> {
|
||||
if let Some((&base, val)) = self.ptrs.range_mut(..=ptr).next_back() {
|
||||
let size = val.size();
|
||||
// we check if ptr is within [base..base+size)
|
||||
// means we can check if ptr - (base + size) < 0
|
||||
if ptr < (base + size) {
|
||||
return Some((base, val));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn find_alloc_precise(&self, ptr: P) -> Option<&T> {
|
||||
self.ptrs.get(&ptr)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue