rusticl: implement cl_ext_buffer_device_address

Reviewed-by: Adam Jackson <ajax@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32942>
This commit is contained in:
Karol Herbst 2024-04-16 11:27:54 +02:00 committed by Marge Bot
parent 35a9829391
commit b65652b4be
11 changed files with 366 additions and 58 deletions

View file

@ -849,6 +849,7 @@ Rusticl extensions that are not part of any OpenCL version:
cl_khr_terminate_context not started
cl_khr_throttle_hints not started
cl_khr_work_group_uniform_arithmetic not started
cl_ext_buffer_device_address DONE (llvmpipe, zink)
cl_arm_non_uniform_work_group_size not started
cl_arm_shared_virtual_memory in progress (nvc0)
cl_intel_subgroups in progress (available with RUSTICL_FEATURES=intel)

View file

@ -27,3 +27,4 @@ VK_EXT_image_2d_view_of_3d on panvk
VK_EXT_texel_buffer_alignment on panvk
cl_khr_kernel_clock on freedreno, iris, llvmpipe, nvc0, panfrost, radeonsi and zink with llvm-19 or newer
GL_KHR_texture_compression_astc_hdr on panfrost and asahi
cl_ext_buffer_device_address on llvmpipe and zink

View file

@ -517,6 +517,9 @@ extern "C" fn clGetExtensionFunctionAddress(
"clSVMAllocARM" => cl_ext_func!(clSVMAlloc: clSVMAllocARM_fn),
"clSVMFreeARM" => cl_ext_func!(clSVMFree: clSVMFreeARM_fn),
// cl_ext_buffer_device_address
"clSetKernelArgDevicePointerEXT" => cl_ext_func!(clSetKernelArgDevicePointerEXT: clSetKernelArgDevicePointerEXT_fn),
// DPCPP bug https://github.com/intel/llvm/issues/9964
"clSetProgramSpecializationConstant" => cl_ext_func!(clSetProgramSpecializationConstant: clSetProgramSpecializationConstant_fn),

View file

@ -420,33 +420,41 @@ fn set_kernel_arg(
// let's create the arg now
let arg = unsafe {
if arg.dead {
KernelArgValue::None
} else {
match arg.kind {
KernelArgType::Constant(_) => KernelArgValue::Constant(
slice::from_raw_parts(arg_value.cast(), arg_size).to_vec(),
),
KernelArgType::MemConstant | KernelArgType::MemGlobal => {
let ptr: *const cl_mem = arg_value.cast();
if ptr.is_null() || (*ptr).is_null() {
KernelArgValue::None
} else {
let buffer = Buffer::arc_from_raw(*ptr)?;
KernelArgValue::Buffer(Arc::downgrade(&buffer))
}
}
KernelArgType::MemLocal => KernelArgValue::LocalMem(arg_size),
KernelArgType::Image | KernelArgType::RWImage | KernelArgType::Texture => {
let img: *const cl_mem = arg_value.cast();
let img = Image::arc_from_raw(*img)?;
KernelArgValue::Image(Arc::downgrade(&img))
}
KernelArgType::Sampler => {
let ptr: *const cl_sampler = arg_value.cast();
KernelArgValue::Sampler(Sampler::arc_from_raw(*ptr)?)
match arg.kind {
KernelArgType::Constant(_) if !arg.dead => KernelArgValue::Constant(
slice::from_raw_parts(arg_value.cast(), arg_size).to_vec(),
),
KernelArgType::MemConstant | KernelArgType::MemGlobal => {
let ptr: *const cl_mem = arg_value.cast();
if ptr.is_null() || (*ptr).is_null() {
KernelArgValue::None
} else {
let buffer = Buffer::arc_from_raw(*ptr)?;
KernelArgValue::Buffer(Arc::downgrade(&buffer))
}
}
KernelArgType::MemLocal if !arg.dead => KernelArgValue::LocalMem(arg_size),
KernelArgType::Image | KernelArgType::RWImage | KernelArgType::Texture
if !arg.dead =>
{
let img: *const cl_mem = arg_value.cast();
let img = Image::arc_from_raw(*img)?;
KernelArgValue::Image(Arc::downgrade(&img))
}
KernelArgType::Sampler if !arg.dead => {
let ptr: *const cl_sampler = arg_value.cast();
KernelArgValue::Sampler(Sampler::arc_from_raw(*ptr)?)
}
_ => {
debug_assert!(
arg.dead
|| matches!(
arg.kind,
KernelArgType::MemConstant | KernelArgType::MemGlobal
)
);
KernelArgValue::None
}
}
};
k.set_kernel_arg(arg_index, arg)
@ -490,6 +498,38 @@ fn set_kernel_arg_svm_pointer(
// CL_INVALID_ARG_VALUE if arg_value specified is not a valid value.
}
#[cl_entrypoint(clSetKernelArgDevicePointerEXT)]
fn set_kernel_arg_device_pointer(
kernel: cl_kernel,
arg_index: cl_uint,
arg_value: cl_mem_device_address_ext,
) -> CLResult<()> {
let kernel = Kernel::ref_from_raw(kernel)?;
let arg_index = arg_index as usize;
let devs = &kernel.prog.context.devs;
// CL_INVALID_OPERATION if no devices in the context associated with kernel support the device
// pointer.
if devs.iter().any(|dev| !dev.bda_supported()) {
return Err(CL_INVALID_OPERATION);
}
// CL_INVALID_ARG_INDEX if arg_index is not a valid argument index.
let Some(arg) = kernel.kernel_info.args.get(arg_index) else {
return Err(CL_INVALID_ARG_INDEX);
};
// The device pointer can only be used for arguments that are declared to be a pointer to global
// memory allocated with clCreateBufferWithProperties with the CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT
// property.
if arg.kind != KernelArgType::MemGlobal {
return Err(CL_INVALID_ARG_INDEX);
}
// we set the arg also when it's a dead argument, as we need to ensure the buffer gets migrated.
kernel.set_kernel_arg(arg_index, KernelArgValue::BDA(arg_value))
}
#[cl_entrypoint(clSetKernelExecInfo)]
fn set_kernel_exec_info(
kernel: cl_kernel,
@ -498,15 +538,45 @@ fn set_kernel_exec_info(
param_value: *const ::std::os::raw::c_void,
) -> CLResult<()> {
let k = Kernel::ref_from_raw(kernel)?;
let devs = &k.prog.devs;
// CL_INVALID_OPERATION if no devices in the context associated with kernel support SVM.
if !k.prog.devs.iter().any(|dev| dev.svm_supported()) {
return Err(CL_INVALID_OPERATION);
}
// CL_INVALID_OPERATION for CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT if no device in the context
// associated with kernel support the cl_ext_buffer_device_address extension.
let check_bda_support = || {
if devs.iter().all(|dev| !dev.bda_supported()) {
Err(CL_INVALID_OPERATION)
} else {
Ok(())
}
};
// CL_INVALID_OPERATION for CL_KERNEL_EXEC_INFO_SVM_PTRS and
// CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM if no devices in the context associated with kernel
// support SVM.
let check_svm_support = || {
if devs.iter().all(|dev| !dev.svm_supported()) {
Err(CL_INVALID_OPERATION)
} else {
Ok(())
}
};
// CL_INVALID_VALUE ... if the size specified by param_value_size is not valid.
match param_name {
CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT => {
check_bda_support()?;
let handles = unsafe {
cl_slice::from_raw_parts_bytes_len::<cl_mem_device_address_ext>(
param_value,
param_value_size,
)?
};
handles.clone_into(&mut k.bdas.lock().unwrap());
}
CL_KERNEL_EXEC_INFO_SVM_PTRS | CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM => {
check_svm_support()?;
// To specify that no SVM allocations will be accessed by a kernel other than those set
// as kernel arguments, specify an empty set by passing param_value_size equal to zero
// and param_value equal to NULL.
@ -521,6 +591,7 @@ fn set_kernel_exec_info(
}
CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM
| CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM => {
check_svm_support()?;
let val = unsafe {
cl_slice::from_raw_parts_bytes_len::<cl_bool>(param_value, param_value_size)?
};

View file

@ -24,6 +24,7 @@ use std::alloc::Layout;
use std::cmp;
use std::cmp::Ordering;
use std::mem::{self, MaybeUninit};
use std::num::NonZeroU64;
use std::os::raw::c_void;
use std::ptr;
use std::sync::Arc;
@ -231,6 +232,21 @@ unsafe impl CLInfo<cl_mem_info> for cl_mem {
let ptr = Arc::as_ptr(&mem.context);
v.write::<cl_context>(cl_context::from_ptr(ptr))
}
CL_MEM_DEVICE_ADDRESS_EXT => {
let buffer = Buffer::ref_from_raw(*self)?;
let addresses = buffer
.dev_addresses()
// CL_INVALID_OPERATION is returned for the CL_MEM_DEVICE_ADDRESS_EXT query if
// the cl_ext_buffer_device_address extension is not supported or if the buffer
// was not allocated with CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT.
//
// We don't have to explicitly check here, as we will get None returned if
// either of those conditions are true.
.ok_or(CL_INVALID_OPERATION)?
.map(|(_, address)| address.map(NonZeroU64::get).unwrap_or_default());
v.write_iter::<cl_mem_device_address_ext>(addresses)
}
CL_MEM_FLAGS => v.write::<cl_mem_flags>(mem.flags),
// TODO debugging feature
CL_MEM_MAP_COUNT => v.write::<cl_uint>(0),
@ -300,9 +316,18 @@ fn create_buffer_with_properties(
// CL_INVALID_PROPERTY if a property name in properties is not a supported property name, if
// the value specified for a supported property name is not valid, or if the same property name
// is specified more than once.
if !props.is_empty() {
// we don't support any properties
return Err(CL_INVALID_PROPERTY);
for (&key, _) in props.iter() {
match key as u32 {
CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT => {
// CL_INVALID_OPERATION If properties includes CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT and
// there are no devices in the context that support the cl_ext_buffer_device_address
// extension.
if c.devs.iter().all(|dev| !dev.bda_supported()) {
return Err(CL_INVALID_OPERATION);
}
}
_ => return Err(CL_INVALID_PROPERTY),
}
}
Ok(MemBase::new_buffer(c, flags, size, host_ptr, props)?.into_cl())

View file

@ -12,6 +12,7 @@ use mesa_rust::pipe::screen::ResourceType;
use mesa_rust_gen::*;
use mesa_rust_util::conversion::*;
use mesa_rust_util::properties::Properties;
use mesa_rust_util::ptr::AllocSize;
use mesa_rust_util::ptr::TrackedPointers;
use rusticl_opencl_gen::*;
@ -22,12 +23,28 @@ use std::mem;
use std::os::raw::c_void;
use std::sync::Arc;
use std::sync::Mutex;
use std::sync::Weak;
struct TrackedBDAAlloc {
buffer: Weak<Buffer>,
size: cl_mem_device_address_ext,
}
impl AllocSize<cl_mem_device_address_ext> for TrackedBDAAlloc {
fn size(&self) -> cl_mem_device_address_ext {
self.size
}
}
pub struct Context {
pub base: CLObjectBase<CL_INVALID_CONTEXT>,
pub devs: Vec<&'static Device>,
pub properties: Properties<cl_context_properties>,
pub dtors: Mutex<Vec<DeleteContextCB>>,
// we track the pointers per device for quick access in hot paths.
bda_ptrs: Mutex<
HashMap<&'static Device, TrackedPointers<cl_mem_device_address_ext, TrackedBDAAlloc>>,
>,
svm_ptrs: Mutex<TrackedPointers<usize, Layout>>,
pub gl_ctx_manager: Option<GLCtxManager>,
}
@ -45,6 +62,7 @@ impl Context {
devs: devs,
properties: properties,
dtors: Mutex::new(Vec::new()),
bda_ptrs: Mutex::new(HashMap::new()),
svm_ptrs: Mutex::new(TrackedPointers::new()),
gl_ctx_manager: gl_ctx_manager,
})
@ -55,10 +73,17 @@ impl Context {
size: usize,
user_ptr: *mut c_void,
copy: bool,
bda: bool,
res_type: ResourceType,
) -> CLResult<HashMap<&'static Device, Arc<PipeResource>>> {
let adj_size: u32 = size.try_into_with_err(CL_OUT_OF_HOST_MEMORY)?;
let mut res = HashMap::new();
let mut pipe_flags = 0;
if bda {
pipe_flags |= PIPE_RESOURCE_FLAG_FIXED_ADDRESS;
}
for &dev in &self.devs {
let mut resource = None;
@ -67,13 +92,17 @@ impl Context {
adj_size,
user_ptr,
PIPE_BIND_GLOBAL,
pipe_flags,
)
}
if resource.is_none() {
resource = dev
.screen()
.resource_create_buffer(adj_size, res_type, PIPE_BIND_GLOBAL)
resource = dev.screen().resource_create_buffer(
adj_size,
res_type,
PIPE_BIND_GLOBAL,
pipe_flags,
)
}
let resource = resource.ok_or(CL_OUT_OF_RESOURCES);
@ -194,6 +223,46 @@ impl Context {
self.svm_ptrs.lock().unwrap().remove(ptr)
}
pub fn add_bda_ptr(&self, buffer: &Arc<Buffer>) {
if let Some(iter) = buffer.dev_addresses() {
let mut bda_ptrs = self.bda_ptrs.lock().unwrap();
for (dev, address) in iter {
let Some(address) = address else {
continue;
};
bda_ptrs.entry(dev).or_default().insert(
address.get(),
TrackedBDAAlloc {
buffer: Arc::downgrade(buffer),
size: buffer.size as _,
},
);
}
}
}
pub fn find_bda_alloc(
&self,
dev: &Device,
ptr: cl_mem_device_address_ext,
) -> Option<Arc<Buffer>> {
let lock = self.bda_ptrs.lock().unwrap();
let (_, mem) = lock.get(dev)?.find_alloc(ptr)?;
mem.buffer.upgrade()
}
pub fn remove_bda(&self, buf: &Buffer) {
let mut bda_ptrs = self.bda_ptrs.lock().unwrap();
for (dev, bdas) in bda_ptrs.iter_mut() {
if let Some(address) = buf.dev_address(dev) {
bdas.remove(address.get());
}
}
}
pub fn import_gl_buffer(
&self,
handle: u32,

View file

@ -742,6 +742,10 @@ impl Device {
add_ext(1, 0, 0, "cl_arm_shared_virtual_memory");
}
if self.bda_supported() {
add_ext(1, 0, 2, "cl_ext_buffer_device_address");
}
self.extensions = exts;
self.clc_features = feats;
self.extension_string = exts_str.join(" ");
@ -880,6 +884,10 @@ impl Device {
self.screen.caps().doubles
}
pub fn bda_supported(&self) -> bool {
self.screen().is_fixed_address_supported()
}
pub fn intel_subgroups_supported(&self) -> bool {
Platform::features().intel && self.subgroups_supported()
}

View file

@ -22,10 +22,12 @@ use spirv::SpirvKernelInfo;
use std::cmp;
use std::collections::HashMap;
use std::collections::HashSet;
use std::convert::TryInto;
use std::ffi::CStr;
use std::fmt::Debug;
use std::fmt::Display;
use std::ops::Deref;
use std::ops::Index;
use std::ops::Not;
use std::os::raw::c_void;
@ -49,6 +51,8 @@ use std::sync::Weak;
#[derive(Clone)]
pub enum KernelArgValue {
None,
/// cl_ext_buffer_device_address
BDA(u64),
Buffer(Weak<Buffer>),
Constant(Vec<u8>),
Image(Weak<Image>),
@ -495,7 +499,7 @@ impl NirKernelBuild {
// TODO bind as constant buffer
let res = dev
.screen()
.resource_create_buffer(len, ResourceType::Normal, PIPE_BIND_GLOBAL)
.resource_create_buffer(len, ResourceType::Normal, PIPE_BIND_GLOBAL, 0)
.unwrap();
dev.helper_ctx()
@ -518,6 +522,7 @@ pub struct Kernel {
pub prog: Arc<Program>,
pub name: String,
values: Mutex<Vec<Option<KernelArgValue>>>,
pub bdas: Mutex<Vec<cl_mem_device_address_ext>>,
builds: HashMap<&'static Device, Arc<NirKernelBuilds>>,
pub kernel_info: Arc<KernelInfo>,
}
@ -1239,6 +1244,7 @@ impl Kernel {
prog: prog,
name: name,
values: Mutex::new(values),
bdas: Mutex::new(Vec::new()),
builds: builds,
kernel_info: kernel_info,
})
@ -1315,6 +1321,7 @@ impl Kernel {
let kernel_info = Arc::clone(&self.kernel_info);
let arg_values = self.arg_values().clone();
let nir_kernel_builds = Arc::clone(&self.builds[q.device]);
let mut bdas = self.bdas.lock().unwrap().clone();
let mut buffer_arcs = HashMap::new();
let mut image_arcs = HashMap::new();
@ -1391,6 +1398,16 @@ impl Kernel {
};
let mut resource_info = Vec::new();
fn add_pointer(q: &Queue, input: &mut Vec<u8>, address: u64) {
if q.device.address_bits() == 64 {
let address: u64 = address;
input.extend_from_slice(&address.to_ne_bytes());
} else {
let address: u32 = address as u32;
input.extend_from_slice(&address.to_ne_bytes());
}
}
fn add_global<'a>(
q: &Queue,
input: &mut Vec<u8>,
@ -1399,13 +1416,7 @@ impl Kernel {
offset: usize,
) {
resource_info.push((res, input.len()));
if q.device.address_bits() == 64 {
let offset: u64 = offset as u64;
input.extend_from_slice(&offset.to_ne_bytes());
} else {
let offset: u32 = offset as u32;
input.extend_from_slice(&offset.to_ne_bytes());
}
add_pointer(q, input, offset as u64);
}
fn add_sysval(q: &Queue, input: &mut Vec<u8>, vals: &[usize; 3]) {
@ -1421,7 +1432,7 @@ impl Kernel {
let buf = q
.device
.screen
.resource_create_buffer(printf_size, ResourceType::Staging, PIPE_BIND_GLOBAL)
.resource_create_buffer(printf_size, ResourceType::Staging, PIPE_BIND_GLOBAL, 0)
.unwrap();
let init_data: [u8; 1] = [4];
@ -1444,16 +1455,18 @@ impl Kernel {
match arg.kind {
CompiledKernelArgType::APIArg(idx) => {
let api_arg = &kernel_info.args[idx];
if api_arg.dead {
continue;
}
let Some(value) = &arg_values[idx] else {
continue;
};
match value {
KernelArgValue::Constant(c) => input.extend_from_slice(c),
KernelArgValue::BDA(address) => {
bdas.push(*address);
if !api_arg.dead {
add_pointer(q, &mut input, *address);
}
}
KernelArgValue::Buffer(buffer) => {
let buffer = &buffer_arcs[&(buffer.as_ptr() as usize)];
let rw = if api_arg.spirv.address_qualifier
@ -1464,8 +1477,24 @@ impl Kernel {
RWFlags::RW
};
let res = buffer.get_res_for_access(ctx, rw)?;
add_global(q, &mut input, &mut resource_info, res, buffer.offset());
// if the argument is dead, based on what kind of memory it is, we
// might need to migrate and make it available to the invocation
// regardless.
if api_arg.dead {
if let Some(address) = buffer.dev_address(ctx.dev) {
let _ = buffer.get_res_for_access(ctx, rw)?;
bdas.push(address.get());
}
} else {
let res = buffer.get_res_for_access(ctx, rw)?;
add_global(
q,
&mut input,
&mut resource_info,
res,
buffer.offset(),
);
}
}
KernelArgValue::Image(image) => {
let image = &image_arcs[&(image.as_ptr() as usize)];
@ -1508,11 +1537,14 @@ impl Kernel {
samplers.push(sampler.pipe());
}
KernelArgValue::None => {
assert!(
api_arg.kind == KernelArgType::MemGlobal
|| api_arg.kind == KernelArgType::MemConstant
);
input.extend_from_slice(null_ptr);
if !arg.dead
&& matches!(
api_arg.kind,
KernelArgType::MemGlobal | KernelArgType::MemConstant
)
{
input.extend_from_slice(null_ptr);
}
}
}
}
@ -1557,6 +1589,19 @@ impl Kernel {
}
}
// dedup with a HashSet
let bdas = bdas
.into_iter()
// Ignore invalid pointers as they are legal to be passed in, but illegal to
// dereference.
.filter_map(|address| q.context.find_bda_alloc(q.device, address))
.collect::<HashSet<_>>();
let bdas: Vec<_> = bdas
.iter()
.map(|buffer| Ok(buffer.get_res_for_access(ctx, RWFlags::RW)?.deref()))
.collect::<CLResult<_>>()?;
// subtract the shader local_size as we only request something on top of that.
variable_local_size -= static_local_size;
@ -1604,7 +1649,13 @@ impl Kernel {
];
ctx.update_cb0(&input)?;
ctx.launch_grid(work_dim, block, this_grid, variable_local_size as u32);
ctx.launch_grid(
work_dim,
block,
this_grid,
variable_local_size as u32,
&bdas,
);
if Platform::dbg().sync_every_event {
ctx.flush().wait();
@ -1818,6 +1869,7 @@ impl Clone for Kernel {
prog: Arc::clone(&self.prog),
name: self.name.clone(),
values: Mutex::new(self.arg_values().clone()),
bdas: Mutex::new(self.bdas.lock().unwrap().clone()),
builds: self.builds.clone(),
kernel_info: Arc::clone(&self.kernel_info),
}

View file

@ -30,6 +30,7 @@ use std::collections::HashMap;
use std::convert::TryInto;
use std::mem;
use std::mem::size_of;
use std::num::NonZeroU64;
use std::ops::Deref;
use std::os::raw::c_void;
use std::ptr;
@ -47,6 +48,12 @@ struct Mapping<T> {
inner: T,
}
impl<T> Mapping<T> {
fn size(&self) -> usize {
self.layout.size()
}
}
impl<T> Drop for Mapping<T> {
fn drop(&mut self) {
if let Some(ptr) = &self.ptr {
@ -580,6 +587,7 @@ pub struct MemBase {
pub struct Buffer {
base: MemBase,
address: Option<HashMap<&'static Device, NonZeroU64>>,
maps: Mutex<TrackedPointers<usize, Mapping<BufferMapping>>>,
}
@ -760,6 +768,11 @@ impl MemBase {
mut host_ptr: *mut c_void,
props: Properties<cl_mem_properties>,
) -> CLResult<Arc<Buffer>> {
let bda = props
.get(&CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT.into())
.copied()
== Some(CL_TRUE.into());
let res_type = if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) {
ResourceType::Staging
} else {
@ -770,6 +783,7 @@ impl MemBase {
size,
host_ptr,
bit_check(flags, CL_MEM_COPY_HOST_PTR),
bda,
res_type,
)?;
@ -778,8 +792,21 @@ impl MemBase {
host_ptr = ptr::null_mut()
}
let addresses = bda.then(|| {
context
.devs
.iter()
.filter(|dev| dev.bda_supported())
.map(|&dev| {
let address = buffer[dev].resource_get_address();
Some((dev, address?))
})
.collect::<Option<_>>()
.unwrap()
});
let alloc = Allocation::new(buffer, 0, host_ptr);
Ok(Arc::new(Buffer {
let buffer = Arc::new(Buffer {
base: Self {
base: CLObjectBase::new(RusticlTypes::Buffer),
context: context,
@ -791,8 +818,15 @@ impl MemBase {
cbs: Mutex::new(Vec::new()),
alloc: alloc,
},
address: addresses,
maps: Mutex::new(TrackedPointers::new()),
}))
});
if buffer.address.is_some() {
buffer.context.add_bda_ptr(&buffer);
}
Ok(buffer)
}
pub fn new_sub_buffer(
@ -801,6 +835,14 @@ impl MemBase {
offset: usize,
size: usize,
) -> Arc<Buffer> {
let address = parent.address.as_ref().map(|addresses| {
addresses
.iter()
// checked_add should never fail, because an allocation will never wrap around.
.map(|(&dev, address)| (dev, address.checked_add(offset as u64).unwrap()))
.collect()
});
Arc::new(Buffer {
base: Self {
base: CLObjectBase::new(RusticlTypes::Buffer),
@ -813,6 +855,7 @@ impl MemBase {
cbs: Mutex::new(Vec::new()),
alloc: Allocation::new_sub(Mem::Buffer(parent), offset),
},
address: address,
maps: Mutex::new(TrackedPointers::new()),
})
}
@ -1000,6 +1043,7 @@ impl MemBase {
Ok(if rusticl_type == RusticlTypes::Buffer {
Arc::new(Buffer {
base: base,
address: None,
maps: Mutex::new(TrackedPointers::new()),
})
.into_cl()
@ -1266,6 +1310,24 @@ impl Buffer {
Ok(())
}
pub fn dev_address(&self, dev: &Device) -> Option<NonZeroU64> {
self.address.as_ref()?.get(dev).copied()
}
/// Returns an iterator of device address pairs in the same order as devices in the associated
/// context.
pub fn dev_addresses(
&self,
) -> Option<impl ExactSizeIterator<Item = (&'static Device, Option<NonZeroU64>)> + '_> {
let address = self.address.as_ref()?;
Some(
self.context
.devs
.iter()
.map(|&dev| (dev, address.get(dev).copied())),
)
}
pub fn fill(
&self,
ctx: &QueueContext,
@ -1507,6 +1569,14 @@ impl Buffer {
}
}
impl Drop for Buffer {
fn drop(&mut self) {
if self.address.is_some() {
self.context.remove_bda(self);
}
}
}
impl Image {
pub fn copy_to_buffer(
&self,

View file

@ -454,12 +454,16 @@ impl PipeContext {
block: [u32; 3],
grid: [u32; 3],
variable_local_mem: u32,
globals: &[&PipeResource],
) {
let mut globals: Vec<*mut pipe_resource> = globals.iter().map(|res| res.pipe()).collect();
let info = pipe_grid_info {
variable_shared_mem: variable_local_mem,
work_dim: work_dim,
block: block,
grid: grid,
globals: globals.as_mut_ptr(),
num_globals: globals.len() as u32,
..Default::default()
};
unsafe { self.pipe.as_ref().launch_grid.unwrap()(self.pipe.as_ptr(), &info) }

View file

@ -136,6 +136,7 @@ impl PipeScreen {
size: u32,
res_type: ResourceType,
pipe_bind: u32,
pipe_flags: u32,
) -> Option<PipeResource> {
let mut tmpl = pipe_resource::default();
@ -145,6 +146,7 @@ impl PipeScreen {
tmpl.depth0 = 1;
tmpl.array_size = 1;
tmpl.bind = pipe_bind;
tmpl.flags = pipe_flags;
res_type.apply(&mut tmpl);
@ -156,6 +158,7 @@ impl PipeScreen {
size: u32,
mem: *mut c_void,
pipe_bind: u32,
pipe_flags: u32,
) -> Option<PipeResource> {
let mut tmpl = pipe_resource::default();
@ -165,6 +168,7 @@ impl PipeScreen {
tmpl.depth0 = 1;
tmpl.array_size = 1;
tmpl.bind = pipe_bind;
tmpl.flags = pipe_flags;
self.resource_create_from_user(&tmpl, mem)
}