rusticl/device: make it &'static

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24061>
This commit is contained in:
Karol Herbst 2023-07-08 18:41:32 +02:00 committed by Marge Bot
parent afe95b613c
commit d653eb8a9a
13 changed files with 146 additions and 127 deletions

View file

@ -1,9 +1,9 @@
use crate::api::device::get_devs_for_type;
use crate::api::icd::*;
use crate::api::types::*;
use crate::api::util::*;
use crate::cl_closure;
use crate::core::context::*;
use crate::core::device::get_devs_for_type;
use crate::core::platform::*;
use mesa_rust_util::properties::Properties;
@ -15,24 +15,18 @@ use std::collections::HashSet;
use std::iter::FromIterator;
use std::mem::MaybeUninit;
use std::slice;
use std::sync::Arc;
#[cl_info_entrypoint(cl_get_context_info)]
impl CLInfo<cl_context_info> for cl_context {
fn query(&self, q: cl_context_info, _: &[u8]) -> CLResult<Vec<MaybeUninit<u8>>> {
let ctx = self.get_ref()?;
Ok(match q {
CL_CONTEXT_DEVICES => {
cl_prop::<&Vec<cl_device_id>>(
&ctx.devs
.iter()
.map(|d| {
// Note we use as_ptr here which doesn't increase the reference count.
cl_device_id::from_ptr(Arc::as_ptr(d))
})
.collect(),
)
}
CL_CONTEXT_DEVICES => cl_prop::<Vec<cl_device_id>>(
ctx.devs
.iter()
.map(|&d| cl_device_id::from_ptr(d))
.collect(),
),
CL_CONTEXT_NUM_DEVICES => cl_prop::<cl_uint>(ctx.devs.len() as u32),
CL_CONTEXT_PROPERTIES => cl_prop::<&Properties<cl_context_properties>>(&ctx.properties),
CL_CONTEXT_REFERENCE_COUNT => cl_prop::<cl_uint>(self.refcnt()?),
@ -81,7 +75,7 @@ fn create_context(
// Duplicate devices specified in devices are ignored.
let set: HashSet<_> =
HashSet::from_iter(unsafe { slice::from_raw_parts(devices, num_devices as usize) }.iter());
let devs: Result<_, _> = set.into_iter().map(cl_device_id::get_arc).collect();
let devs: Result<_, _> = set.into_iter().map(cl_device_id::get_ref).collect();
Ok(cl_context::from_arc(Context::new(devs?, props)))
}

View file

@ -15,7 +15,6 @@ use std::cmp::min;
use std::ffi::CStr;
use std::mem::{size_of, MaybeUninit};
use std::ptr;
use std::sync::Arc;
const SPIRV_SUPPORT_STRING: &str = "SPIR-V_1.0 SPIR-V_1.1 SPIR-V_1.2 SPIR-V_1.3 SPIR-V_1.4";
const SPIRV_SUPPORT: [cl_name_version; 5] = [
@ -316,18 +315,6 @@ impl CLInfo<cl_device_info> for cl_device_id {
}
}
fn devs() -> &'static Vec<Arc<Device>> {
&Platform::get().devs
}
pub fn get_devs_for_type(device_type: cl_device_type) -> Vec<&'static Device> {
devs()
.iter()
.filter(|d| device_type & d.device_type(true) != 0)
.map(Arc::as_ref)
.collect()
}
#[cl_entrypoint]
fn get_device_ids(
platform: cl_platform_id,

View file

@ -272,6 +272,28 @@ pub trait ReferenceCountedAPIPointer<T, const ERR: i32> {
Ok(res)
}
fn get_ref_vec_from_arr(objs: *const Self, count: u32) -> CLResult<Vec<&'static T>>
where
Self: Sized,
{
// CL spec requires validation for obj arrays, both values have to make sense
if objs.is_null() && count > 0 || !objs.is_null() && count == 0 {
return Err(CL_INVALID_VALUE);
}
let mut res = Vec::new();
if objs.is_null() || count == 0 {
return Ok(res);
}
for i in 0..count as usize {
unsafe {
res.push((*objs.add(i)).get_ref()?);
}
}
Ok(res)
}
fn retain(&self) -> CLResult<()> {
unsafe {
Arc::increment_strong_count(self.get_ptr()?);

View file

@ -82,10 +82,10 @@ impl CLInfoObj<cl_kernel_work_group_info, cl_device_id> for cl_kernel {
if kernel.prog.devs.len() > 1 {
return Err(CL_INVALID_DEVICE);
} else {
kernel.prog.devs[0].clone()
kernel.prog.devs[0]
}
} else {
dev.get_arc()?
dev.get_ref()?
};
// CL_INVALID_DEVICE if device is not in the list of devices associated with kernel
@ -95,12 +95,12 @@ impl CLInfoObj<cl_kernel_work_group_info, cl_device_id> for cl_kernel {
Ok(match *q {
CL_KERNEL_COMPILE_WORK_GROUP_SIZE => cl_prop::<[usize; 3]>(kernel.work_group_size),
CL_KERNEL_LOCAL_MEM_SIZE => cl_prop::<cl_ulong>(kernel.local_mem_size(&dev)),
CL_KERNEL_LOCAL_MEM_SIZE => cl_prop::<cl_ulong>(kernel.local_mem_size(dev)),
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE => {
cl_prop::<usize>(kernel.preferred_simd_size(&dev))
cl_prop::<usize>(kernel.preferred_simd_size(dev))
}
CL_KERNEL_PRIVATE_MEM_SIZE => cl_prop::<cl_ulong>(kernel.priv_mem_size(&dev)),
CL_KERNEL_WORK_GROUP_SIZE => cl_prop::<usize>(kernel.max_threads_per_block(&dev)),
CL_KERNEL_PRIVATE_MEM_SIZE => cl_prop::<cl_ulong>(kernel.priv_mem_size(dev)),
CL_KERNEL_WORK_GROUP_SIZE => cl_prop::<usize>(kernel.max_threads_per_block(dev)),
// CL_INVALID_VALUE if param_name is not one of the supported values
_ => return Err(CL_INVALID_VALUE),
})
@ -128,10 +128,10 @@ impl CLInfoObj<cl_kernel_sub_group_info, (cl_device_id, usize, *const c_void, us
if kernel.prog.devs.len() > 1 {
return Err(CL_INVALID_DEVICE);
} else {
kernel.prog.devs[0].clone()
kernel.prog.devs[0]
}
} else {
dev.get_arc()?
dev.get_ref()?
};
// CL_INVALID_DEVICE if device is not in the list of devices associated with kernel
@ -172,16 +172,16 @@ impl CLInfoObj<cl_kernel_sub_group_info, (cl_device_id, usize, *const c_void, us
Ok(match q {
CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE => {
cl_prop::<usize>(kernel.subgroups_for_block(&dev, input))
cl_prop::<usize>(kernel.subgroups_for_block(dev, input))
}
CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE => {
cl_prop::<usize>(kernel.subgroup_size_for_block(&dev, input))
cl_prop::<usize>(kernel.subgroup_size_for_block(dev, input))
}
CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT => {
let subgroups = input[0];
let mut res = vec![0; 3];
for subgroup_size in kernel.subgroup_sizes(&dev) {
for subgroup_size in kernel.subgroup_sizes(dev) {
let threads = subgroups * subgroup_size;
if threads > dev.max_threads_per_block() {
@ -189,7 +189,7 @@ impl CLInfoObj<cl_kernel_sub_group_info, (cl_device_id, usize, *const c_void, us
}
let block = [threads, 1, 1];
let real_subgroups = kernel.subgroups_for_block(&dev, &block);
let real_subgroups = kernel.subgroups_for_block(dev, &block);
if real_subgroups == subgroups {
res = block.to_vec();
@ -201,11 +201,11 @@ impl CLInfoObj<cl_kernel_sub_group_info, (cl_device_id, usize, *const c_void, us
cl_prop::<Vec<usize>>(res)
}
CL_KERNEL_MAX_NUM_SUB_GROUPS => {
let threads = kernel.max_threads_per_block(&dev);
let threads = kernel.max_threads_per_block(dev);
let max_groups = dev.max_subgroups();
let mut result = 0;
for sgs in kernel.subgroup_sizes(&dev) {
for sgs in kernel.subgroup_sizes(dev) {
result = cmp::max(result, threads / sgs);
result = cmp::min(result, max_groups as usize);
}
@ -512,7 +512,7 @@ fn enqueue_ndrange_kernel(
// CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built program executable available
// for device associated with command_queue.
if k.prog.status(&q.device) != CL_BUILD_SUCCESS as cl_build_status {
if k.prog.status(q.device) != CL_BUILD_SUCCESS as cl_build_status {
return Err(CL_INVALID_PROGRAM_EXECUTABLE);
}

View file

@ -395,7 +395,7 @@ fn validate_image_desc(
image_desc: *const cl_image_desc,
host_ptr: *mut ::std::os::raw::c_void,
elem_size: usize,
devs: &[Arc<Device>],
devs: &[&Device],
) -> CLResult<(cl_image_desc, Option<Arc<Mem>>)> {
// CL_INVALID_IMAGE_DESCRIPTOR if values specified in image_desc are not valid
const err: cl_int = CL_INVALID_IMAGE_DESCRIPTOR;

View file

@ -33,18 +33,12 @@ impl CLInfo<cl_program_info> for cl_program {
let ptr = Arc::as_ptr(&prog.context);
cl_prop::<cl_context>(cl_context::from_ptr(ptr))
}
CL_PROGRAM_DEVICES => {
cl_prop::<&Vec<cl_device_id>>(
&prog
.devs
.iter()
.map(|d| {
// Note we use as_ptr here which doesn't increase the reference count.
cl_device_id::from_ptr(Arc::as_ptr(d))
})
.collect(),
)
}
CL_PROGRAM_DEVICES => cl_prop::<Vec<cl_device_id>>(
prog.devs
.iter()
.map(|&d| cl_device_id::from_ptr(d))
.collect(),
),
CL_PROGRAM_IL => match &prog.src {
ProgramSourceType::Il(il) => to_maybeuninit_vec(il.to_bin().to_vec()),
_ => Vec::new(),
@ -82,12 +76,12 @@ impl CLInfoObj<cl_program_build_info, cl_device_id> for cl_program {
}
}
fn validate_devices(
fn validate_devices<'a>(
device_list: *const cl_device_id,
num_devices: cl_uint,
default: &[Arc<Device>],
) -> CLResult<Vec<Arc<Device>>> {
let mut devs = cl_device_id::get_arc_vec_from_arr(device_list, num_devices)?;
default: &[&'a Device],
) -> CLResult<Vec<&'a Device>> {
let mut devs = cl_device_id::get_ref_vec_from_arr(device_list, num_devices)?;
// If device_list is a NULL value, the compile is performed for all devices associated with
// program.
@ -197,7 +191,7 @@ fn create_program_with_binary(
binary_status: *mut cl_int,
) -> CLResult<cl_program> {
let c = context.get_arc()?;
let devs = cl_device_id::get_arc_vec_from_arr(device_list, num_devices)?;
let devs = cl_device_id::get_ref_vec_from_arr(device_list, num_devices)?;
// CL_INVALID_VALUE if device_list is NULL or num_devices is zero.
if devs.is_empty() {

View file

@ -23,11 +23,7 @@ impl CLInfo<cl_command_queue_info> for cl_command_queue {
let ptr = Arc::as_ptr(&queue.context);
cl_prop::<cl_context>(cl_context::from_ptr(ptr))
}
CL_QUEUE_DEVICE => {
// Note we use as_ptr here which doesn't increase the reference count.
let ptr = Arc::as_ptr(&queue.device);
cl_prop::<cl_device_id>(cl_device_id::from_ptr(ptr))
}
CL_QUEUE_DEVICE => cl_prop::<cl_device_id>(cl_device_id::from_ptr(queue.device)),
CL_QUEUE_DEVICE_DEFAULT => cl_prop::<cl_command_queue>(ptr::null_mut()),
CL_QUEUE_PROPERTIES => cl_prop::<cl_command_queue_properties>(queue.props),
CL_QUEUE_PROPERTIES_ARRAY => {
@ -61,7 +57,7 @@ pub fn create_command_queue_impl(
properties_v2: Option<Properties<cl_queue_properties>>,
) -> CLResult<cl_command_queue> {
let c = context.get_arc()?;
let d = device.get_arc()?;
let d = device.get_ref()?.to_static().ok_or(CL_INVALID_DEVICE)?;
// CL_INVALID_DEVICE if device [...] is not associated with context.
if !c.devs.contains(&d) {
@ -102,7 +98,7 @@ fn create_command_queue_with_properties(
properties: *const cl_queue_properties,
) -> CLResult<cl_command_queue> {
let c = context.get_arc()?;
let d = device.get_arc()?;
let d = device.get_ref()?.to_static().ok_or(CL_INVALID_DEVICE)?;
let mut queue_properties = cl_command_queue_properties::default();
let properties = if properties.is_null() {

View file

@ -20,7 +20,7 @@ use std::sync::Mutex;
pub struct Context {
pub base: CLObjectBase<CL_INVALID_CONTEXT>,
pub devs: Vec<Arc<Device>>,
pub devs: Vec<&'static Device>,
pub properties: Properties<cl_context_properties>,
pub dtors: Mutex<Vec<Box<dyn Fn(cl_context)>>>,
pub svm_ptrs: Mutex<BTreeMap<*const c_void, Layout>>,
@ -30,7 +30,7 @@ impl_cl_type_trait!(cl_context, Context, CL_INVALID_CONTEXT);
impl Context {
pub fn new(
devs: Vec<Arc<Device>>,
devs: Vec<&'static Device>,
properties: Properties<cl_context_properties>,
) -> Arc<Context> {
Arc::new(Self {
@ -48,10 +48,10 @@ impl Context {
user_ptr: *mut c_void,
copy: bool,
res_type: ResourceType,
) -> CLResult<HashMap<Arc<Device>, Arc<PipeResource>>> {
) -> CLResult<HashMap<&'static Device, Arc<PipeResource>>> {
let adj_size: u32 = size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
let mut res = HashMap::new();
for dev in &self.devs {
for &dev in &self.devs {
let mut resource = None;
if !user_ptr.is_null() && !copy {
@ -65,7 +65,7 @@ impl Context {
}
let resource = resource.ok_or(CL_OUT_OF_RESOURCES);
res.insert(Arc::clone(dev), Arc::new(resource?));
res.insert(dev, Arc::new(resource?));
}
if !user_ptr.is_null() {
@ -88,7 +88,7 @@ impl Context {
user_ptr: *mut c_void,
copy: bool,
res_type: ResourceType,
) -> CLResult<HashMap<Arc<Device>, Arc<PipeResource>>> {
) -> CLResult<HashMap<&'static Device, Arc<PipeResource>>> {
let width = desc
.image_width
.try_into()
@ -108,7 +108,7 @@ impl Context {
let target = cl_mem_type_to_texture_target(desc.image_type);
let mut res = HashMap::new();
for dev in &self.devs {
for &dev in &self.devs {
let mut resource = None;
// we can't specify custom pitches/slices, so this won't work for non 1D images
@ -125,7 +125,7 @@ impl Context {
}
let resource = resource.ok_or(CL_OUT_OF_RESOURCES);
res.insert(Arc::clone(dev), Arc::new(resource?));
res.insert(dev, Arc::new(resource?));
}
if !user_ptr.is_null() {

View file

@ -235,6 +235,19 @@ impl Device {
Some(Arc::new(d))
}
/// Converts a temporary reference to a static if and only if this device lives inside static
/// memory.
pub fn to_static(&self) -> Option<&'static Self> {
for dev in devs() {
let dev = dev.as_ref();
if self == dev {
return Some(dev);
}
}
None
}
fn fill_format_tables(&mut self) {
for f in FORMATS {
let mut fs = HashMap::new();
@ -937,3 +950,15 @@ impl Device {
}
}
}
fn devs() -> &'static Vec<Arc<Device>> {
&Platform::get().devs
}
pub fn get_devs_for_type(device_type: cl_device_type) -> Vec<&'static Device> {
devs()
.iter()
.filter(|d| device_type & d.device_type(true) != 0)
.map(Arc::as_ref)
.collect()
}

View file

@ -258,7 +258,7 @@ struct KernelDevStateInner {
}
struct KernelDevState {
states: HashMap<Arc<Device>, KernelDevStateInner>,
states: HashMap<&'static Device, KernelDevStateInner>,
}
impl Drop for KernelDevState {
@ -272,10 +272,10 @@ impl Drop for KernelDevState {
}
impl KernelDevState {
fn new(nirs: &HashMap<Arc<Device>, Arc<NirShader>>) -> Arc<Self> {
fn new(nirs: &HashMap<&'static Device, Arc<NirShader>>) -> Arc<Self> {
let states = nirs
.iter()
.map(|(dev, nir)| {
.map(|(&dev, nir)| {
let mut cso = dev
.helper_ctx()
.create_compute_state(nir, nir.shared_size());
@ -289,7 +289,7 @@ impl KernelDevState {
};
(
dev.clone(),
dev,
KernelDevStateInner {
nir: nir.clone(),
constant_buffer: cb,
@ -871,7 +871,7 @@ impl Kernel {
grid: &[usize],
offsets: &[usize],
) -> CLResult<EventSig> {
let dev_state = self.dev_state.get(&q.device);
let dev_state = self.dev_state.get(q.device);
let mut block = create_kernel_arr::<u32>(block, 1);
let mut grid = create_kernel_arr::<u32>(grid, 1);
let offsets = create_kernel_arr::<u64>(offsets, 0);
@ -894,7 +894,7 @@ impl Kernel {
&[0; 4]
};
self.optimize_local_size(&q.device, &mut grid, &mut block);
self.optimize_local_size(q.device, &mut grid, &mut block);
for (arg, val) in self.build.args.iter().zip(&self.values) {
if arg.dead {
@ -911,7 +911,7 @@ impl Kernel {
match val.borrow().as_ref().unwrap() {
KernelArgValue::Constant(c) => input.extend_from_slice(c),
KernelArgValue::MemObject(mem) => {
let res = mem.get_res_of_dev(&q.device)?;
let res = mem.get_res_of_dev(q.device)?;
// If resource is a buffer and mem a 2D image, the 2d image was created from a
// buffer. Use strides and dimensions of 2d image
let app_img_info =
@ -1038,7 +1038,7 @@ impl Kernel {
let k = Arc::clone(self);
Ok(Box::new(move |q, ctx| {
let dev_state = k.dev_state.get(&q.device);
let dev_state = k.dev_state.get(q.device);
let mut input = input.clone();
let mut resources = Vec::with_capacity(resource_info.len());
let mut globals: Vec<*mut u32> = Vec::new();

View file

@ -18,6 +18,7 @@ use mesa_rust_util::properties::Properties;
use rusticl_opencl_gen::*;
use std::cmp;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::convert::TryInto;
use std::mem::size_of;
@ -45,7 +46,7 @@ impl MappingTransfer {
}
struct Mappings {
tx: HashMap<Arc<Device>, MappingTransfer>,
tx: HashMap<&'static Device, MappingTransfer>,
maps: HashMap<*mut c_void, u32>,
}
@ -92,7 +93,7 @@ impl Mappings {
fn clean_up_tx(&mut self, dev: &Device, ctx: &PipeContext) {
if self.maps.is_empty() {
if let Some(tx) = self.tx.get(dev) {
if let Some(tx) = self.tx.get(&dev) {
if tx.pending == 0 {
self.tx.remove(dev).unwrap().tx.with_ctx(ctx);
}
@ -116,7 +117,7 @@ pub struct Mem {
pub image_elem_size: u8,
pub props: Vec<cl_mem_properties>,
pub cbs: Mutex<Vec<Box<dyn Fn(cl_mem)>>>,
res: Option<HashMap<Arc<Device>, Arc<PipeResource>>>,
res: Option<HashMap<&'static Device, Arc<PipeResource>>>,
maps: Mutex<Mappings>,
}
@ -457,7 +458,7 @@ impl Mem {
assert!(self.is_buffer());
let tx = if can_map_directly(&q.device, r) {
let tx = if can_map_directly(q.device, r) {
ctx.buffer_map_directly(
r,
offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
@ -513,10 +514,10 @@ impl Mem {
) -> CLResult<(PipeTransfer, Option<PipeResource>)> {
assert!(!self.is_buffer());
let r = self.get_res()?.get(&q.device).unwrap();
let r = self.get_res()?.get(q.device).unwrap();
let ctx = q.device.helper_ctx();
let tx = if can_map_directly(&q.device, r) {
let tx = if can_map_directly(q.device, r) {
ctx.texture_map_directly(r, bx, rw)
} else {
None
@ -573,7 +574,7 @@ impl Mem {
&& bit_check(mem.flags, CL_MEM_USE_HOST_PTR)
}
fn get_res(&self) -> CLResult<&HashMap<Arc<Device>, Arc<PipeResource>>> {
fn get_res(&self) -> CLResult<&HashMap<&'static Device, Arc<PipeResource>>> {
self.get_parent().res.as_ref().ok_or(CL_OUT_OF_HOST_MEMORY)
}
@ -992,17 +993,17 @@ impl Mem {
ptr: *mut c_void,
) -> CLResult<()> {
let mut lock = self.maps.lock().unwrap();
if !lock.increase_ref(&q.device, ptr) {
if !lock.increase_ref(q.device, ptr) {
return Ok(());
}
if self.has_user_shadow_buffer(&q.device)? {
if self.has_user_shadow_buffer(q.device)? {
self.read_to_user(q, ctx, 0, self.host_ptr, self.size)
} else {
if let Some(shadow) = lock.tx.get(&q.device).and_then(|tx| tx.shadow.as_ref()) {
let mut offset = 0;
let b = self.to_parent(&mut offset);
let res = b.get_res_of_dev(&q.device)?;
let res = b.get_res_of_dev(q.device)?;
let bx = create_pipe_box(
[offset, 0, 0].into(),
[self.size, 1, 1].into(),
@ -1022,11 +1023,11 @@ impl Mem {
ptr: *mut c_void,
) -> CLResult<()> {
let mut lock = self.maps.lock().unwrap();
if !lock.increase_ref(&q.device, ptr) {
if !lock.increase_ref(q.device, ptr) {
return Ok(());
}
if self.has_user_shadow_buffer(&q.device)? {
if self.has_user_shadow_buffer(q.device)? {
self.read_to_user_rect(
self.host_ptr,
q,
@ -1040,8 +1041,8 @@ impl Mem {
self.image_desc.image_slice_pitch,
)
} else {
if let Some(shadow) = lock.tx.get(&q.device).and_then(|tx| tx.shadow.as_ref()) {
let res = self.get_res_of_dev(&q.device)?;
if let Some(shadow) = lock.tx.get(q.device).and_then(|tx| tx.shadow.as_ref()) {
let res = self.get_res_of_dev(q.device)?;
let bx = self.image_desc.bx()?;
ctx.resource_copy_region(res, shadow, &[0, 0, 0], &bx);
}
@ -1080,7 +1081,7 @@ impl Mem {
lock: &'a mut MutexGuard<Mappings>,
rw: RWFlags,
) -> CLResult<&'a PipeTransfer> {
if !lock.tx.contains_key(&q.device) {
if let Entry::Vacant(e) = lock.tx.entry(q.device) {
let (tx, res) = if self.is_buffer() {
self.tx_raw_async(q, rw)?
} else {
@ -1088,10 +1089,9 @@ impl Mem {
self.tx_image_raw_async(q, &bx, rw)?
};
lock.tx
.insert(q.device.clone(), MappingTransfer::new(tx, res));
e.insert(MappingTransfer::new(tx, res));
} else {
lock.mark_pending(&q.device);
lock.mark_pending(q.device);
}
Ok(&lock.tx.get_mut(&q.device).unwrap().tx)
@ -1101,7 +1101,7 @@ impl Mem {
assert!(self.is_buffer());
let mut lock = self.maps.lock().unwrap();
let ptr = if self.has_user_shadow_buffer(&q.device)? {
let ptr = if self.has_user_shadow_buffer(q.device)? {
self.host_ptr
} else {
let tx = self.map(q, &mut lock, RWFlags::RW)?;
@ -1125,7 +1125,7 @@ impl Mem {
let mut lock = self.maps.lock().unwrap();
// we might have a host_ptr shadow buffer or image created from buffer
let ptr = if self.has_user_shadow_buffer(&q.device)? || self.is_parent_buffer() {
let ptr = if self.has_user_shadow_buffer(q.device)? || self.is_parent_buffer() {
*row_pitch = self.image_desc.image_row_pitch;
*slice_pitch = self.image_desc.image_slice_pitch;
@ -1173,12 +1173,12 @@ impl Mem {
return Ok(());
}
let (needs_sync, shadow) = lock.decrease_ref(ptr, &q.device);
let (needs_sync, shadow) = lock.decrease_ref(ptr, q.device);
if needs_sync {
if let Some(shadow) = shadow {
let mut offset = 0;
let b = self.to_parent(&mut offset);
let res = b.get_res_of_dev(&q.device)?;
let res = b.get_res_of_dev(q.device)?;
let bx = if b.is_buffer() {
create_pipe_box(
@ -1191,7 +1191,7 @@ impl Mem {
};
ctx.resource_copy_region(shadow, res, &[offset as u32, 0, 0], &bx);
} else if self.has_user_shadow_buffer(&q.device)? {
} else if self.has_user_shadow_buffer(q.device)? {
if self.is_buffer() {
self.write_from_user(q, ctx, 0, self.host_ptr, self.size)?;
} else {
@ -1211,7 +1211,7 @@ impl Mem {
}
}
lock.clean_up_tx(&q.device, ctx);
lock.clean_up_tx(q.device, ctx);
Ok(())
}

View file

@ -60,7 +60,7 @@ pub enum ProgramSourceType {
pub struct Program {
pub base: CLObjectBase<CL_INVALID_PROGRAM>,
pub context: Arc<Context>,
pub devs: Vec<Arc<Device>>,
pub devs: Vec<&'static Device>,
pub src: ProgramSourceType,
build: Mutex<ProgramBuild>,
}
@ -69,14 +69,14 @@ impl_cl_type_trait!(cl_program, Program, CL_INVALID_PROGRAM);
#[derive(Clone)]
pub struct NirKernelBuild {
pub nirs: HashMap<Arc<Device>, Arc<NirShader>>,
pub nirs: HashMap<&'static Device, Arc<NirShader>>,
pub args: Vec<KernelArg>,
pub internal_args: Vec<InternalKernelArg>,
pub attributes_string: String,
}
pub(super) struct ProgramBuild {
builds: HashMap<Arc<Device>, ProgramDevBuild>,
builds: HashMap<&'static Device, ProgramDevBuild>,
spec_constants: HashMap<u32, nir_const_value>,
kernels: Vec<String>,
kernel_builds: HashMap<String, Arc<NirKernelBuild>>,
@ -122,7 +122,7 @@ impl ProgramBuild {
for d in self.devs_with_build() {
let (nir, args, internal_args) = convert_spirv_to_nir(self, kernel_name, &args, d);
let attributes_string = self.attribute_str(kernel_name, d);
nirs.insert(d.clone(), Arc::new(nir));
nirs.insert(d, Arc::new(nir));
args_set.insert(args);
internal_args_set.insert(internal_args);
attributes_string_set.insert(attributes_string);
@ -163,11 +163,11 @@ impl ProgramBuild {
self.builds.get_mut(dev).unwrap()
}
fn devs_with_build(&self) -> Vec<&Arc<Device>> {
fn devs_with_build(&self) -> Vec<&'static Device> {
self.builds
.iter()
.filter(|(_, build)| build.status == CL_BUILD_SUCCESS as cl_build_status)
.map(|(d, _)| d)
.map(|(&d, _)| d)
.collect()
}
@ -285,11 +285,13 @@ fn prepare_options(options: &str, dev: &Device) -> Vec<CString> {
}
impl Program {
fn create_default_builds(devs: &[Arc<Device>]) -> HashMap<Arc<Device>, ProgramDevBuild> {
fn create_default_builds(
devs: &[&'static Device],
) -> HashMap<&'static Device, ProgramDevBuild> {
devs.iter()
.map(|d| {
.map(|&d| {
(
d.clone(),
d,
ProgramDevBuild {
spirv: None,
status: CL_BUILD_NONE,
@ -302,7 +304,7 @@ impl Program {
.collect()
}
pub fn new(context: &Arc<Context>, devs: &[Arc<Device>], src: CString) -> Arc<Program> {
pub fn new(context: &Arc<Context>, devs: &[&'static Device], src: CString) -> Arc<Program> {
Arc::new(Self {
base: CLObjectBase::new(),
context: context.clone(),
@ -319,13 +321,13 @@ impl Program {
pub fn from_bins(
context: Arc<Context>,
devs: Vec<Arc<Device>>,
devs: Vec<&'static Device>,
bins: &[&[u8]],
) -> Arc<Program> {
let mut builds = HashMap::new();
let mut kernels = HashSet::new();
for (d, b) in devs.iter().zip(bins) {
for (&d, b) in devs.iter().zip(bins) {
let mut ptr = b.as_ptr();
let bin_type;
let spirv;
@ -364,7 +366,7 @@ impl Program {
}
builds.insert(
d.clone(),
d,
ProgramDevBuild {
spirv: spirv,
status: CL_BUILD_SUCCESS as cl_build_status,
@ -625,17 +627,16 @@ impl Program {
pub fn link(
context: Arc<Context>,
devs: &[Arc<Device>],
devs: &[&'static Device],
progs: &[Arc<Program>],
options: String,
) -> Arc<Program> {
let devs: Vec<Arc<Device>> = devs.iter().map(|d| (*d).clone()).collect();
let mut builds = HashMap::new();
let mut kernels = HashSet::new();
let mut locks: Vec<_> = progs.iter().map(|p| p.build_info()).collect();
let lib = options.contains("-create-library");
for d in &devs {
for &d in devs {
let bins: Vec<_> = locks
.iter_mut()
.map(|l| l.dev_build(d).spirv.as_ref().unwrap())
@ -661,7 +662,7 @@ impl Program {
};
builds.insert(
d.clone(),
d,
ProgramDevBuild {
spirv: spirv,
status: status,
@ -685,7 +686,7 @@ impl Program {
Arc::new(Self {
base: CLObjectBase::new(),
context: context,
devs: devs,
devs: devs.to_owned(),
src: ProgramSourceType::Linked,
build: Mutex::new(build),
})

View file

@ -23,7 +23,7 @@ struct QueueState {
pub struct Queue {
pub base: CLObjectBase<CL_INVALID_COMMAND_QUEUE>,
pub context: Arc<Context>,
pub device: Arc<Device>,
pub device: &'static Device,
pub props: cl_command_queue_properties,
pub props_v2: Option<Properties<cl_queue_properties>>,
state: Mutex<QueueState>,
@ -43,7 +43,7 @@ fn flush_events(evs: &mut Vec<Arc<Event>>, pipe: &PipeContext) {
impl Queue {
pub fn new(
context: Arc<Context>,
device: Arc<Device>,
device: &'static Device,
props: cl_command_queue_properties,
props_v2: Option<Properties<cl_queue_properties>>,
) -> CLResult<Arc<Queue>> {