mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 22:49:13 +02:00
rusticl: implement cl_ext_buffer_device_address
Reviewed-by: Adam Jackson <ajax@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32942>
This commit is contained in:
parent
35a9829391
commit
b65652b4be
11 changed files with 366 additions and 58 deletions
|
|
@ -849,6 +849,7 @@ Rusticl extensions that are not part of any OpenCL version:
|
|||
cl_khr_terminate_context not started
|
||||
cl_khr_throttle_hints not started
|
||||
cl_khr_work_group_uniform_arithmetic not started
|
||||
cl_ext_buffer_device_address DONE (llvmpipe, zink)
|
||||
cl_arm_non_uniform_work_group_size not started
|
||||
cl_arm_shared_virtual_memory in progress (nvc0)
|
||||
cl_intel_subgroups in progress (available with RUSTICL_FEATURES=intel)
|
||||
|
|
|
|||
|
|
@ -27,3 +27,4 @@ VK_EXT_image_2d_view_of_3d on panvk
|
|||
VK_EXT_texel_buffer_alignment on panvk
|
||||
cl_khr_kernel_clock on freedreno, iris, llvmpipe, nvc0, panfrost, radeonsi and zink with llvm-19 or newer
|
||||
GL_KHR_texture_compression_astc_hdr on panfrost and asahi
|
||||
cl_ext_buffer_device_address on llvmpipe and zink
|
||||
|
|
|
|||
|
|
@ -517,6 +517,9 @@ extern "C" fn clGetExtensionFunctionAddress(
|
|||
"clSVMAllocARM" => cl_ext_func!(clSVMAlloc: clSVMAllocARM_fn),
|
||||
"clSVMFreeARM" => cl_ext_func!(clSVMFree: clSVMFreeARM_fn),
|
||||
|
||||
// cl_ext_buffer_device_address
|
||||
"clSetKernelArgDevicePointerEXT" => cl_ext_func!(clSetKernelArgDevicePointerEXT: clSetKernelArgDevicePointerEXT_fn),
|
||||
|
||||
// DPCPP bug https://github.com/intel/llvm/issues/9964
|
||||
"clSetProgramSpecializationConstant" => cl_ext_func!(clSetProgramSpecializationConstant: clSetProgramSpecializationConstant_fn),
|
||||
|
||||
|
|
|
|||
|
|
@ -420,33 +420,41 @@ fn set_kernel_arg(
|
|||
|
||||
// let's create the arg now
|
||||
let arg = unsafe {
|
||||
if arg.dead {
|
||||
KernelArgValue::None
|
||||
} else {
|
||||
match arg.kind {
|
||||
KernelArgType::Constant(_) => KernelArgValue::Constant(
|
||||
slice::from_raw_parts(arg_value.cast(), arg_size).to_vec(),
|
||||
),
|
||||
KernelArgType::MemConstant | KernelArgType::MemGlobal => {
|
||||
let ptr: *const cl_mem = arg_value.cast();
|
||||
if ptr.is_null() || (*ptr).is_null() {
|
||||
KernelArgValue::None
|
||||
} else {
|
||||
let buffer = Buffer::arc_from_raw(*ptr)?;
|
||||
KernelArgValue::Buffer(Arc::downgrade(&buffer))
|
||||
}
|
||||
}
|
||||
KernelArgType::MemLocal => KernelArgValue::LocalMem(arg_size),
|
||||
KernelArgType::Image | KernelArgType::RWImage | KernelArgType::Texture => {
|
||||
let img: *const cl_mem = arg_value.cast();
|
||||
let img = Image::arc_from_raw(*img)?;
|
||||
KernelArgValue::Image(Arc::downgrade(&img))
|
||||
}
|
||||
KernelArgType::Sampler => {
|
||||
let ptr: *const cl_sampler = arg_value.cast();
|
||||
KernelArgValue::Sampler(Sampler::arc_from_raw(*ptr)?)
|
||||
match arg.kind {
|
||||
KernelArgType::Constant(_) if !arg.dead => KernelArgValue::Constant(
|
||||
slice::from_raw_parts(arg_value.cast(), arg_size).to_vec(),
|
||||
),
|
||||
KernelArgType::MemConstant | KernelArgType::MemGlobal => {
|
||||
let ptr: *const cl_mem = arg_value.cast();
|
||||
if ptr.is_null() || (*ptr).is_null() {
|
||||
KernelArgValue::None
|
||||
} else {
|
||||
let buffer = Buffer::arc_from_raw(*ptr)?;
|
||||
KernelArgValue::Buffer(Arc::downgrade(&buffer))
|
||||
}
|
||||
}
|
||||
KernelArgType::MemLocal if !arg.dead => KernelArgValue::LocalMem(arg_size),
|
||||
KernelArgType::Image | KernelArgType::RWImage | KernelArgType::Texture
|
||||
if !arg.dead =>
|
||||
{
|
||||
let img: *const cl_mem = arg_value.cast();
|
||||
let img = Image::arc_from_raw(*img)?;
|
||||
KernelArgValue::Image(Arc::downgrade(&img))
|
||||
}
|
||||
KernelArgType::Sampler if !arg.dead => {
|
||||
let ptr: *const cl_sampler = arg_value.cast();
|
||||
KernelArgValue::Sampler(Sampler::arc_from_raw(*ptr)?)
|
||||
}
|
||||
_ => {
|
||||
debug_assert!(
|
||||
arg.dead
|
||||
|| matches!(
|
||||
arg.kind,
|
||||
KernelArgType::MemConstant | KernelArgType::MemGlobal
|
||||
)
|
||||
);
|
||||
KernelArgValue::None
|
||||
}
|
||||
}
|
||||
};
|
||||
k.set_kernel_arg(arg_index, arg)
|
||||
|
|
@ -490,6 +498,38 @@ fn set_kernel_arg_svm_pointer(
|
|||
// CL_INVALID_ARG_VALUE if arg_value specified is not a valid value.
|
||||
}
|
||||
|
||||
#[cl_entrypoint(clSetKernelArgDevicePointerEXT)]
|
||||
fn set_kernel_arg_device_pointer(
|
||||
kernel: cl_kernel,
|
||||
arg_index: cl_uint,
|
||||
arg_value: cl_mem_device_address_ext,
|
||||
) -> CLResult<()> {
|
||||
let kernel = Kernel::ref_from_raw(kernel)?;
|
||||
let arg_index = arg_index as usize;
|
||||
let devs = &kernel.prog.context.devs;
|
||||
|
||||
// CL_INVALID_OPERATION if no devices in the context associated with kernel support the device
|
||||
// pointer.
|
||||
if devs.iter().any(|dev| !dev.bda_supported()) {
|
||||
return Err(CL_INVALID_OPERATION);
|
||||
}
|
||||
|
||||
// CL_INVALID_ARG_INDEX if arg_index is not a valid argument index.
|
||||
let Some(arg) = kernel.kernel_info.args.get(arg_index) else {
|
||||
return Err(CL_INVALID_ARG_INDEX);
|
||||
};
|
||||
|
||||
// The device pointer can only be used for arguments that are declared to be a pointer to global
|
||||
// memory allocated with clCreateBufferWithProperties with the CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT
|
||||
// property.
|
||||
if arg.kind != KernelArgType::MemGlobal {
|
||||
return Err(CL_INVALID_ARG_INDEX);
|
||||
}
|
||||
|
||||
// we set the arg also when it's a dead argument, as we need to ensure the buffer gets migrated.
|
||||
kernel.set_kernel_arg(arg_index, KernelArgValue::BDA(arg_value))
|
||||
}
|
||||
|
||||
#[cl_entrypoint(clSetKernelExecInfo)]
|
||||
fn set_kernel_exec_info(
|
||||
kernel: cl_kernel,
|
||||
|
|
@ -498,15 +538,45 @@ fn set_kernel_exec_info(
|
|||
param_value: *const ::std::os::raw::c_void,
|
||||
) -> CLResult<()> {
|
||||
let k = Kernel::ref_from_raw(kernel)?;
|
||||
let devs = &k.prog.devs;
|
||||
|
||||
// CL_INVALID_OPERATION if no devices in the context associated with kernel support SVM.
|
||||
if !k.prog.devs.iter().any(|dev| dev.svm_supported()) {
|
||||
return Err(CL_INVALID_OPERATION);
|
||||
}
|
||||
// CL_INVALID_OPERATION for CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT if no device in the context
|
||||
// associated with kernel support the cl_ext_buffer_device_address extension.
|
||||
let check_bda_support = || {
|
||||
if devs.iter().all(|dev| !dev.bda_supported()) {
|
||||
Err(CL_INVALID_OPERATION)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
};
|
||||
|
||||
// CL_INVALID_OPERATION for CL_KERNEL_EXEC_INFO_SVM_PTRS and
|
||||
// CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM if no devices in the context associated with kernel
|
||||
// support SVM.
|
||||
let check_svm_support = || {
|
||||
if devs.iter().all(|dev| !dev.svm_supported()) {
|
||||
Err(CL_INVALID_OPERATION)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
};
|
||||
|
||||
// CL_INVALID_VALUE ... if the size specified by param_value_size is not valid.
|
||||
match param_name {
|
||||
CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT => {
|
||||
check_bda_support()?;
|
||||
let handles = unsafe {
|
||||
cl_slice::from_raw_parts_bytes_len::<cl_mem_device_address_ext>(
|
||||
param_value,
|
||||
param_value_size,
|
||||
)?
|
||||
};
|
||||
|
||||
handles.clone_into(&mut k.bdas.lock().unwrap());
|
||||
}
|
||||
CL_KERNEL_EXEC_INFO_SVM_PTRS | CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM => {
|
||||
check_svm_support()?;
|
||||
|
||||
// To specify that no SVM allocations will be accessed by a kernel other than those set
|
||||
// as kernel arguments, specify an empty set by passing param_value_size equal to zero
|
||||
// and param_value equal to NULL.
|
||||
|
|
@ -521,6 +591,7 @@ fn set_kernel_exec_info(
|
|||
}
|
||||
CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM
|
||||
| CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM => {
|
||||
check_svm_support()?;
|
||||
let val = unsafe {
|
||||
cl_slice::from_raw_parts_bytes_len::<cl_bool>(param_value, param_value_size)?
|
||||
};
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ use std::alloc::Layout;
|
|||
use std::cmp;
|
||||
use std::cmp::Ordering;
|
||||
use std::mem::{self, MaybeUninit};
|
||||
use std::num::NonZeroU64;
|
||||
use std::os::raw::c_void;
|
||||
use std::ptr;
|
||||
use std::sync::Arc;
|
||||
|
|
@ -231,6 +232,21 @@ unsafe impl CLInfo<cl_mem_info> for cl_mem {
|
|||
let ptr = Arc::as_ptr(&mem.context);
|
||||
v.write::<cl_context>(cl_context::from_ptr(ptr))
|
||||
}
|
||||
CL_MEM_DEVICE_ADDRESS_EXT => {
|
||||
let buffer = Buffer::ref_from_raw(*self)?;
|
||||
let addresses = buffer
|
||||
.dev_addresses()
|
||||
// CL_INVALID_OPERATION is returned for the CL_MEM_DEVICE_ADDRESS_EXT query if
|
||||
// the cl_ext_buffer_device_address extension is not supported or if the buffer
|
||||
// was not allocated with CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT.
|
||||
//
|
||||
// We don't have to explicitly check here, as we will get None returned if
|
||||
// either of those conditions are true.
|
||||
.ok_or(CL_INVALID_OPERATION)?
|
||||
.map(|(_, address)| address.map(NonZeroU64::get).unwrap_or_default());
|
||||
|
||||
v.write_iter::<cl_mem_device_address_ext>(addresses)
|
||||
}
|
||||
CL_MEM_FLAGS => v.write::<cl_mem_flags>(mem.flags),
|
||||
// TODO debugging feature
|
||||
CL_MEM_MAP_COUNT => v.write::<cl_uint>(0),
|
||||
|
|
@ -300,9 +316,18 @@ fn create_buffer_with_properties(
|
|||
// CL_INVALID_PROPERTY if a property name in properties is not a supported property name, if
|
||||
// the value specified for a supported property name is not valid, or if the same property name
|
||||
// is specified more than once.
|
||||
if !props.is_empty() {
|
||||
// we don't support any properties
|
||||
return Err(CL_INVALID_PROPERTY);
|
||||
for (&key, _) in props.iter() {
|
||||
match key as u32 {
|
||||
CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT => {
|
||||
// CL_INVALID_OPERATION If properties includes CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT and
|
||||
// there are no devices in the context that support the cl_ext_buffer_device_address
|
||||
// extension.
|
||||
if c.devs.iter().all(|dev| !dev.bda_supported()) {
|
||||
return Err(CL_INVALID_OPERATION);
|
||||
}
|
||||
}
|
||||
_ => return Err(CL_INVALID_PROPERTY),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(MemBase::new_buffer(c, flags, size, host_ptr, props)?.into_cl())
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ use mesa_rust::pipe::screen::ResourceType;
|
|||
use mesa_rust_gen::*;
|
||||
use mesa_rust_util::conversion::*;
|
||||
use mesa_rust_util::properties::Properties;
|
||||
use mesa_rust_util::ptr::AllocSize;
|
||||
use mesa_rust_util::ptr::TrackedPointers;
|
||||
use rusticl_opencl_gen::*;
|
||||
|
||||
|
|
@ -22,12 +23,28 @@ use std::mem;
|
|||
use std::os::raw::c_void;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
use std::sync::Weak;
|
||||
|
||||
struct TrackedBDAAlloc {
|
||||
buffer: Weak<Buffer>,
|
||||
size: cl_mem_device_address_ext,
|
||||
}
|
||||
|
||||
impl AllocSize<cl_mem_device_address_ext> for TrackedBDAAlloc {
|
||||
fn size(&self) -> cl_mem_device_address_ext {
|
||||
self.size
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Context {
|
||||
pub base: CLObjectBase<CL_INVALID_CONTEXT>,
|
||||
pub devs: Vec<&'static Device>,
|
||||
pub properties: Properties<cl_context_properties>,
|
||||
pub dtors: Mutex<Vec<DeleteContextCB>>,
|
||||
// we track the pointers per device for quick access in hot paths.
|
||||
bda_ptrs: Mutex<
|
||||
HashMap<&'static Device, TrackedPointers<cl_mem_device_address_ext, TrackedBDAAlloc>>,
|
||||
>,
|
||||
svm_ptrs: Mutex<TrackedPointers<usize, Layout>>,
|
||||
pub gl_ctx_manager: Option<GLCtxManager>,
|
||||
}
|
||||
|
|
@ -45,6 +62,7 @@ impl Context {
|
|||
devs: devs,
|
||||
properties: properties,
|
||||
dtors: Mutex::new(Vec::new()),
|
||||
bda_ptrs: Mutex::new(HashMap::new()),
|
||||
svm_ptrs: Mutex::new(TrackedPointers::new()),
|
||||
gl_ctx_manager: gl_ctx_manager,
|
||||
})
|
||||
|
|
@ -55,10 +73,17 @@ impl Context {
|
|||
size: usize,
|
||||
user_ptr: *mut c_void,
|
||||
copy: bool,
|
||||
bda: bool,
|
||||
res_type: ResourceType,
|
||||
) -> CLResult<HashMap<&'static Device, Arc<PipeResource>>> {
|
||||
let adj_size: u32 = size.try_into_with_err(CL_OUT_OF_HOST_MEMORY)?;
|
||||
let mut res = HashMap::new();
|
||||
let mut pipe_flags = 0;
|
||||
|
||||
if bda {
|
||||
pipe_flags |= PIPE_RESOURCE_FLAG_FIXED_ADDRESS;
|
||||
}
|
||||
|
||||
for &dev in &self.devs {
|
||||
let mut resource = None;
|
||||
|
||||
|
|
@ -67,13 +92,17 @@ impl Context {
|
|||
adj_size,
|
||||
user_ptr,
|
||||
PIPE_BIND_GLOBAL,
|
||||
pipe_flags,
|
||||
)
|
||||
}
|
||||
|
||||
if resource.is_none() {
|
||||
resource = dev
|
||||
.screen()
|
||||
.resource_create_buffer(adj_size, res_type, PIPE_BIND_GLOBAL)
|
||||
resource = dev.screen().resource_create_buffer(
|
||||
adj_size,
|
||||
res_type,
|
||||
PIPE_BIND_GLOBAL,
|
||||
pipe_flags,
|
||||
)
|
||||
}
|
||||
|
||||
let resource = resource.ok_or(CL_OUT_OF_RESOURCES);
|
||||
|
|
@ -194,6 +223,46 @@ impl Context {
|
|||
self.svm_ptrs.lock().unwrap().remove(ptr)
|
||||
}
|
||||
|
||||
pub fn add_bda_ptr(&self, buffer: &Arc<Buffer>) {
|
||||
if let Some(iter) = buffer.dev_addresses() {
|
||||
let mut bda_ptrs = self.bda_ptrs.lock().unwrap();
|
||||
|
||||
for (dev, address) in iter {
|
||||
let Some(address) = address else {
|
||||
continue;
|
||||
};
|
||||
|
||||
bda_ptrs.entry(dev).or_default().insert(
|
||||
address.get(),
|
||||
TrackedBDAAlloc {
|
||||
buffer: Arc::downgrade(buffer),
|
||||
size: buffer.size as _,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_bda_alloc(
|
||||
&self,
|
||||
dev: &Device,
|
||||
ptr: cl_mem_device_address_ext,
|
||||
) -> Option<Arc<Buffer>> {
|
||||
let lock = self.bda_ptrs.lock().unwrap();
|
||||
let (_, mem) = lock.get(dev)?.find_alloc(ptr)?;
|
||||
mem.buffer.upgrade()
|
||||
}
|
||||
|
||||
pub fn remove_bda(&self, buf: &Buffer) {
|
||||
let mut bda_ptrs = self.bda_ptrs.lock().unwrap();
|
||||
|
||||
for (dev, bdas) in bda_ptrs.iter_mut() {
|
||||
if let Some(address) = buf.dev_address(dev) {
|
||||
bdas.remove(address.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn import_gl_buffer(
|
||||
&self,
|
||||
handle: u32,
|
||||
|
|
|
|||
|
|
@ -742,6 +742,10 @@ impl Device {
|
|||
add_ext(1, 0, 0, "cl_arm_shared_virtual_memory");
|
||||
}
|
||||
|
||||
if self.bda_supported() {
|
||||
add_ext(1, 0, 2, "cl_ext_buffer_device_address");
|
||||
}
|
||||
|
||||
self.extensions = exts;
|
||||
self.clc_features = feats;
|
||||
self.extension_string = exts_str.join(" ");
|
||||
|
|
@ -880,6 +884,10 @@ impl Device {
|
|||
self.screen.caps().doubles
|
||||
}
|
||||
|
||||
pub fn bda_supported(&self) -> bool {
|
||||
self.screen().is_fixed_address_supported()
|
||||
}
|
||||
|
||||
pub fn intel_subgroups_supported(&self) -> bool {
|
||||
Platform::features().intel && self.subgroups_supported()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,10 +22,12 @@ use spirv::SpirvKernelInfo;
|
|||
|
||||
use std::cmp;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::convert::TryInto;
|
||||
use std::ffi::CStr;
|
||||
use std::fmt::Debug;
|
||||
use std::fmt::Display;
|
||||
use std::ops::Deref;
|
||||
use std::ops::Index;
|
||||
use std::ops::Not;
|
||||
use std::os::raw::c_void;
|
||||
|
|
@ -49,6 +51,8 @@ use std::sync::Weak;
|
|||
#[derive(Clone)]
|
||||
pub enum KernelArgValue {
|
||||
None,
|
||||
/// cl_ext_buffer_device_address
|
||||
BDA(u64),
|
||||
Buffer(Weak<Buffer>),
|
||||
Constant(Vec<u8>),
|
||||
Image(Weak<Image>),
|
||||
|
|
@ -495,7 +499,7 @@ impl NirKernelBuild {
|
|||
// TODO bind as constant buffer
|
||||
let res = dev
|
||||
.screen()
|
||||
.resource_create_buffer(len, ResourceType::Normal, PIPE_BIND_GLOBAL)
|
||||
.resource_create_buffer(len, ResourceType::Normal, PIPE_BIND_GLOBAL, 0)
|
||||
.unwrap();
|
||||
|
||||
dev.helper_ctx()
|
||||
|
|
@ -518,6 +522,7 @@ pub struct Kernel {
|
|||
pub prog: Arc<Program>,
|
||||
pub name: String,
|
||||
values: Mutex<Vec<Option<KernelArgValue>>>,
|
||||
pub bdas: Mutex<Vec<cl_mem_device_address_ext>>,
|
||||
builds: HashMap<&'static Device, Arc<NirKernelBuilds>>,
|
||||
pub kernel_info: Arc<KernelInfo>,
|
||||
}
|
||||
|
|
@ -1239,6 +1244,7 @@ impl Kernel {
|
|||
prog: prog,
|
||||
name: name,
|
||||
values: Mutex::new(values),
|
||||
bdas: Mutex::new(Vec::new()),
|
||||
builds: builds,
|
||||
kernel_info: kernel_info,
|
||||
})
|
||||
|
|
@ -1315,6 +1321,7 @@ impl Kernel {
|
|||
let kernel_info = Arc::clone(&self.kernel_info);
|
||||
let arg_values = self.arg_values().clone();
|
||||
let nir_kernel_builds = Arc::clone(&self.builds[q.device]);
|
||||
let mut bdas = self.bdas.lock().unwrap().clone();
|
||||
|
||||
let mut buffer_arcs = HashMap::new();
|
||||
let mut image_arcs = HashMap::new();
|
||||
|
|
@ -1391,6 +1398,16 @@ impl Kernel {
|
|||
};
|
||||
|
||||
let mut resource_info = Vec::new();
|
||||
fn add_pointer(q: &Queue, input: &mut Vec<u8>, address: u64) {
|
||||
if q.device.address_bits() == 64 {
|
||||
let address: u64 = address;
|
||||
input.extend_from_slice(&address.to_ne_bytes());
|
||||
} else {
|
||||
let address: u32 = address as u32;
|
||||
input.extend_from_slice(&address.to_ne_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
fn add_global<'a>(
|
||||
q: &Queue,
|
||||
input: &mut Vec<u8>,
|
||||
|
|
@ -1399,13 +1416,7 @@ impl Kernel {
|
|||
offset: usize,
|
||||
) {
|
||||
resource_info.push((res, input.len()));
|
||||
if q.device.address_bits() == 64 {
|
||||
let offset: u64 = offset as u64;
|
||||
input.extend_from_slice(&offset.to_ne_bytes());
|
||||
} else {
|
||||
let offset: u32 = offset as u32;
|
||||
input.extend_from_slice(&offset.to_ne_bytes());
|
||||
}
|
||||
add_pointer(q, input, offset as u64);
|
||||
}
|
||||
|
||||
fn add_sysval(q: &Queue, input: &mut Vec<u8>, vals: &[usize; 3]) {
|
||||
|
|
@ -1421,7 +1432,7 @@ impl Kernel {
|
|||
let buf = q
|
||||
.device
|
||||
.screen
|
||||
.resource_create_buffer(printf_size, ResourceType::Staging, PIPE_BIND_GLOBAL)
|
||||
.resource_create_buffer(printf_size, ResourceType::Staging, PIPE_BIND_GLOBAL, 0)
|
||||
.unwrap();
|
||||
|
||||
let init_data: [u8; 1] = [4];
|
||||
|
|
@ -1444,16 +1455,18 @@ impl Kernel {
|
|||
match arg.kind {
|
||||
CompiledKernelArgType::APIArg(idx) => {
|
||||
let api_arg = &kernel_info.args[idx];
|
||||
if api_arg.dead {
|
||||
continue;
|
||||
}
|
||||
|
||||
let Some(value) = &arg_values[idx] else {
|
||||
continue;
|
||||
};
|
||||
|
||||
match value {
|
||||
KernelArgValue::Constant(c) => input.extend_from_slice(c),
|
||||
KernelArgValue::BDA(address) => {
|
||||
bdas.push(*address);
|
||||
if !api_arg.dead {
|
||||
add_pointer(q, &mut input, *address);
|
||||
}
|
||||
}
|
||||
KernelArgValue::Buffer(buffer) => {
|
||||
let buffer = &buffer_arcs[&(buffer.as_ptr() as usize)];
|
||||
let rw = if api_arg.spirv.address_qualifier
|
||||
|
|
@ -1464,8 +1477,24 @@ impl Kernel {
|
|||
RWFlags::RW
|
||||
};
|
||||
|
||||
let res = buffer.get_res_for_access(ctx, rw)?;
|
||||
add_global(q, &mut input, &mut resource_info, res, buffer.offset());
|
||||
// if the argument is dead, based on what kind of memory it is, we
|
||||
// might need to migrate and make it available to the invocation
|
||||
// regardless.
|
||||
if api_arg.dead {
|
||||
if let Some(address) = buffer.dev_address(ctx.dev) {
|
||||
let _ = buffer.get_res_for_access(ctx, rw)?;
|
||||
bdas.push(address.get());
|
||||
}
|
||||
} else {
|
||||
let res = buffer.get_res_for_access(ctx, rw)?;
|
||||
add_global(
|
||||
q,
|
||||
&mut input,
|
||||
&mut resource_info,
|
||||
res,
|
||||
buffer.offset(),
|
||||
);
|
||||
}
|
||||
}
|
||||
KernelArgValue::Image(image) => {
|
||||
let image = &image_arcs[&(image.as_ptr() as usize)];
|
||||
|
|
@ -1508,11 +1537,14 @@ impl Kernel {
|
|||
samplers.push(sampler.pipe());
|
||||
}
|
||||
KernelArgValue::None => {
|
||||
assert!(
|
||||
api_arg.kind == KernelArgType::MemGlobal
|
||||
|| api_arg.kind == KernelArgType::MemConstant
|
||||
);
|
||||
input.extend_from_slice(null_ptr);
|
||||
if !arg.dead
|
||||
&& matches!(
|
||||
api_arg.kind,
|
||||
KernelArgType::MemGlobal | KernelArgType::MemConstant
|
||||
)
|
||||
{
|
||||
input.extend_from_slice(null_ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1557,6 +1589,19 @@ impl Kernel {
|
|||
}
|
||||
}
|
||||
|
||||
// dedup with a HashSet
|
||||
let bdas = bdas
|
||||
.into_iter()
|
||||
// Ignore invalid pointers as they are legal to be passed in, but illegal to
|
||||
// dereference.
|
||||
.filter_map(|address| q.context.find_bda_alloc(q.device, address))
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let bdas: Vec<_> = bdas
|
||||
.iter()
|
||||
.map(|buffer| Ok(buffer.get_res_for_access(ctx, RWFlags::RW)?.deref()))
|
||||
.collect::<CLResult<_>>()?;
|
||||
|
||||
// subtract the shader local_size as we only request something on top of that.
|
||||
variable_local_size -= static_local_size;
|
||||
|
||||
|
|
@ -1604,7 +1649,13 @@ impl Kernel {
|
|||
];
|
||||
|
||||
ctx.update_cb0(&input)?;
|
||||
ctx.launch_grid(work_dim, block, this_grid, variable_local_size as u32);
|
||||
ctx.launch_grid(
|
||||
work_dim,
|
||||
block,
|
||||
this_grid,
|
||||
variable_local_size as u32,
|
||||
&bdas,
|
||||
);
|
||||
|
||||
if Platform::dbg().sync_every_event {
|
||||
ctx.flush().wait();
|
||||
|
|
@ -1818,6 +1869,7 @@ impl Clone for Kernel {
|
|||
prog: Arc::clone(&self.prog),
|
||||
name: self.name.clone(),
|
||||
values: Mutex::new(self.arg_values().clone()),
|
||||
bdas: Mutex::new(self.bdas.lock().unwrap().clone()),
|
||||
builds: self.builds.clone(),
|
||||
kernel_info: Arc::clone(&self.kernel_info),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ use std::collections::HashMap;
|
|||
use std::convert::TryInto;
|
||||
use std::mem;
|
||||
use std::mem::size_of;
|
||||
use std::num::NonZeroU64;
|
||||
use std::ops::Deref;
|
||||
use std::os::raw::c_void;
|
||||
use std::ptr;
|
||||
|
|
@ -47,6 +48,12 @@ struct Mapping<T> {
|
|||
inner: T,
|
||||
}
|
||||
|
||||
impl<T> Mapping<T> {
|
||||
fn size(&self) -> usize {
|
||||
self.layout.size()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Drop for Mapping<T> {
|
||||
fn drop(&mut self) {
|
||||
if let Some(ptr) = &self.ptr {
|
||||
|
|
@ -580,6 +587,7 @@ pub struct MemBase {
|
|||
|
||||
pub struct Buffer {
|
||||
base: MemBase,
|
||||
address: Option<HashMap<&'static Device, NonZeroU64>>,
|
||||
maps: Mutex<TrackedPointers<usize, Mapping<BufferMapping>>>,
|
||||
}
|
||||
|
||||
|
|
@ -760,6 +768,11 @@ impl MemBase {
|
|||
mut host_ptr: *mut c_void,
|
||||
props: Properties<cl_mem_properties>,
|
||||
) -> CLResult<Arc<Buffer>> {
|
||||
let bda = props
|
||||
.get(&CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT.into())
|
||||
.copied()
|
||||
== Some(CL_TRUE.into());
|
||||
|
||||
let res_type = if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) {
|
||||
ResourceType::Staging
|
||||
} else {
|
||||
|
|
@ -770,6 +783,7 @@ impl MemBase {
|
|||
size,
|
||||
host_ptr,
|
||||
bit_check(flags, CL_MEM_COPY_HOST_PTR),
|
||||
bda,
|
||||
res_type,
|
||||
)?;
|
||||
|
||||
|
|
@ -778,8 +792,21 @@ impl MemBase {
|
|||
host_ptr = ptr::null_mut()
|
||||
}
|
||||
|
||||
let addresses = bda.then(|| {
|
||||
context
|
||||
.devs
|
||||
.iter()
|
||||
.filter(|dev| dev.bda_supported())
|
||||
.map(|&dev| {
|
||||
let address = buffer[dev].resource_get_address();
|
||||
Some((dev, address?))
|
||||
})
|
||||
.collect::<Option<_>>()
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
let alloc = Allocation::new(buffer, 0, host_ptr);
|
||||
Ok(Arc::new(Buffer {
|
||||
let buffer = Arc::new(Buffer {
|
||||
base: Self {
|
||||
base: CLObjectBase::new(RusticlTypes::Buffer),
|
||||
context: context,
|
||||
|
|
@ -791,8 +818,15 @@ impl MemBase {
|
|||
cbs: Mutex::new(Vec::new()),
|
||||
alloc: alloc,
|
||||
},
|
||||
address: addresses,
|
||||
maps: Mutex::new(TrackedPointers::new()),
|
||||
}))
|
||||
});
|
||||
|
||||
if buffer.address.is_some() {
|
||||
buffer.context.add_bda_ptr(&buffer);
|
||||
}
|
||||
|
||||
Ok(buffer)
|
||||
}
|
||||
|
||||
pub fn new_sub_buffer(
|
||||
|
|
@ -801,6 +835,14 @@ impl MemBase {
|
|||
offset: usize,
|
||||
size: usize,
|
||||
) -> Arc<Buffer> {
|
||||
let address = parent.address.as_ref().map(|addresses| {
|
||||
addresses
|
||||
.iter()
|
||||
// checked_add should never fail, because an allocation will never wrap around.
|
||||
.map(|(&dev, address)| (dev, address.checked_add(offset as u64).unwrap()))
|
||||
.collect()
|
||||
});
|
||||
|
||||
Arc::new(Buffer {
|
||||
base: Self {
|
||||
base: CLObjectBase::new(RusticlTypes::Buffer),
|
||||
|
|
@ -813,6 +855,7 @@ impl MemBase {
|
|||
cbs: Mutex::new(Vec::new()),
|
||||
alloc: Allocation::new_sub(Mem::Buffer(parent), offset),
|
||||
},
|
||||
address: address,
|
||||
maps: Mutex::new(TrackedPointers::new()),
|
||||
})
|
||||
}
|
||||
|
|
@ -1000,6 +1043,7 @@ impl MemBase {
|
|||
Ok(if rusticl_type == RusticlTypes::Buffer {
|
||||
Arc::new(Buffer {
|
||||
base: base,
|
||||
address: None,
|
||||
maps: Mutex::new(TrackedPointers::new()),
|
||||
})
|
||||
.into_cl()
|
||||
|
|
@ -1266,6 +1310,24 @@ impl Buffer {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub fn dev_address(&self, dev: &Device) -> Option<NonZeroU64> {
|
||||
self.address.as_ref()?.get(dev).copied()
|
||||
}
|
||||
|
||||
/// Returns an iterator of device address pairs in the same order as devices in the associated
|
||||
/// context.
|
||||
pub fn dev_addresses(
|
||||
&self,
|
||||
) -> Option<impl ExactSizeIterator<Item = (&'static Device, Option<NonZeroU64>)> + '_> {
|
||||
let address = self.address.as_ref()?;
|
||||
Some(
|
||||
self.context
|
||||
.devs
|
||||
.iter()
|
||||
.map(|&dev| (dev, address.get(dev).copied())),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn fill(
|
||||
&self,
|
||||
ctx: &QueueContext,
|
||||
|
|
@ -1507,6 +1569,14 @@ impl Buffer {
|
|||
}
|
||||
}
|
||||
|
||||
impl Drop for Buffer {
|
||||
fn drop(&mut self) {
|
||||
if self.address.is_some() {
|
||||
self.context.remove_bda(self);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Image {
|
||||
pub fn copy_to_buffer(
|
||||
&self,
|
||||
|
|
|
|||
|
|
@ -454,12 +454,16 @@ impl PipeContext {
|
|||
block: [u32; 3],
|
||||
grid: [u32; 3],
|
||||
variable_local_mem: u32,
|
||||
globals: &[&PipeResource],
|
||||
) {
|
||||
let mut globals: Vec<*mut pipe_resource> = globals.iter().map(|res| res.pipe()).collect();
|
||||
let info = pipe_grid_info {
|
||||
variable_shared_mem: variable_local_mem,
|
||||
work_dim: work_dim,
|
||||
block: block,
|
||||
grid: grid,
|
||||
globals: globals.as_mut_ptr(),
|
||||
num_globals: globals.len() as u32,
|
||||
..Default::default()
|
||||
};
|
||||
unsafe { self.pipe.as_ref().launch_grid.unwrap()(self.pipe.as_ptr(), &info) }
|
||||
|
|
|
|||
|
|
@ -136,6 +136,7 @@ impl PipeScreen {
|
|||
size: u32,
|
||||
res_type: ResourceType,
|
||||
pipe_bind: u32,
|
||||
pipe_flags: u32,
|
||||
) -> Option<PipeResource> {
|
||||
let mut tmpl = pipe_resource::default();
|
||||
|
||||
|
|
@ -145,6 +146,7 @@ impl PipeScreen {
|
|||
tmpl.depth0 = 1;
|
||||
tmpl.array_size = 1;
|
||||
tmpl.bind = pipe_bind;
|
||||
tmpl.flags = pipe_flags;
|
||||
|
||||
res_type.apply(&mut tmpl);
|
||||
|
||||
|
|
@ -156,6 +158,7 @@ impl PipeScreen {
|
|||
size: u32,
|
||||
mem: *mut c_void,
|
||||
pipe_bind: u32,
|
||||
pipe_flags: u32,
|
||||
) -> Option<PipeResource> {
|
||||
let mut tmpl = pipe_resource::default();
|
||||
|
||||
|
|
@ -165,6 +168,7 @@ impl PipeScreen {
|
|||
tmpl.depth0 = 1;
|
||||
tmpl.array_size = 1;
|
||||
tmpl.bind = pipe_bind;
|
||||
tmpl.flags = pipe_flags;
|
||||
|
||||
self.resource_create_from_user(&tmpl, mem)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue