mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 19:40:10 +01:00
rusticl/device: make it &'static
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24061>
This commit is contained in:
parent
afe95b613c
commit
d653eb8a9a
13 changed files with 146 additions and 127 deletions
|
|
@ -1,9 +1,9 @@
|
|||
use crate::api::device::get_devs_for_type;
|
||||
use crate::api::icd::*;
|
||||
use crate::api::types::*;
|
||||
use crate::api::util::*;
|
||||
use crate::cl_closure;
|
||||
use crate::core::context::*;
|
||||
use crate::core::device::get_devs_for_type;
|
||||
use crate::core::platform::*;
|
||||
|
||||
use mesa_rust_util::properties::Properties;
|
||||
|
|
@ -15,24 +15,18 @@ use std::collections::HashSet;
|
|||
use std::iter::FromIterator;
|
||||
use std::mem::MaybeUninit;
|
||||
use std::slice;
|
||||
use std::sync::Arc;
|
||||
|
||||
#[cl_info_entrypoint(cl_get_context_info)]
|
||||
impl CLInfo<cl_context_info> for cl_context {
|
||||
fn query(&self, q: cl_context_info, _: &[u8]) -> CLResult<Vec<MaybeUninit<u8>>> {
|
||||
let ctx = self.get_ref()?;
|
||||
Ok(match q {
|
||||
CL_CONTEXT_DEVICES => {
|
||||
cl_prop::<&Vec<cl_device_id>>(
|
||||
&ctx.devs
|
||||
.iter()
|
||||
.map(|d| {
|
||||
// Note we use as_ptr here which doesn't increase the reference count.
|
||||
cl_device_id::from_ptr(Arc::as_ptr(d))
|
||||
})
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
CL_CONTEXT_DEVICES => cl_prop::<Vec<cl_device_id>>(
|
||||
ctx.devs
|
||||
.iter()
|
||||
.map(|&d| cl_device_id::from_ptr(d))
|
||||
.collect(),
|
||||
),
|
||||
CL_CONTEXT_NUM_DEVICES => cl_prop::<cl_uint>(ctx.devs.len() as u32),
|
||||
CL_CONTEXT_PROPERTIES => cl_prop::<&Properties<cl_context_properties>>(&ctx.properties),
|
||||
CL_CONTEXT_REFERENCE_COUNT => cl_prop::<cl_uint>(self.refcnt()?),
|
||||
|
|
@ -81,7 +75,7 @@ fn create_context(
|
|||
// Duplicate devices specified in devices are ignored.
|
||||
let set: HashSet<_> =
|
||||
HashSet::from_iter(unsafe { slice::from_raw_parts(devices, num_devices as usize) }.iter());
|
||||
let devs: Result<_, _> = set.into_iter().map(cl_device_id::get_arc).collect();
|
||||
let devs: Result<_, _> = set.into_iter().map(cl_device_id::get_ref).collect();
|
||||
|
||||
Ok(cl_context::from_arc(Context::new(devs?, props)))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ use std::cmp::min;
|
|||
use std::ffi::CStr;
|
||||
use std::mem::{size_of, MaybeUninit};
|
||||
use std::ptr;
|
||||
use std::sync::Arc;
|
||||
|
||||
const SPIRV_SUPPORT_STRING: &str = "SPIR-V_1.0 SPIR-V_1.1 SPIR-V_1.2 SPIR-V_1.3 SPIR-V_1.4";
|
||||
const SPIRV_SUPPORT: [cl_name_version; 5] = [
|
||||
|
|
@ -316,18 +315,6 @@ impl CLInfo<cl_device_info> for cl_device_id {
|
|||
}
|
||||
}
|
||||
|
||||
fn devs() -> &'static Vec<Arc<Device>> {
|
||||
&Platform::get().devs
|
||||
}
|
||||
|
||||
pub fn get_devs_for_type(device_type: cl_device_type) -> Vec<&'static Device> {
|
||||
devs()
|
||||
.iter()
|
||||
.filter(|d| device_type & d.device_type(true) != 0)
|
||||
.map(Arc::as_ref)
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cl_entrypoint]
|
||||
fn get_device_ids(
|
||||
platform: cl_platform_id,
|
||||
|
|
|
|||
|
|
@ -272,6 +272,28 @@ pub trait ReferenceCountedAPIPointer<T, const ERR: i32> {
|
|||
Ok(res)
|
||||
}
|
||||
|
||||
fn get_ref_vec_from_arr(objs: *const Self, count: u32) -> CLResult<Vec<&'static T>>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
// CL spec requires validation for obj arrays, both values have to make sense
|
||||
if objs.is_null() && count > 0 || !objs.is_null() && count == 0 {
|
||||
return Err(CL_INVALID_VALUE);
|
||||
}
|
||||
|
||||
let mut res = Vec::new();
|
||||
if objs.is_null() || count == 0 {
|
||||
return Ok(res);
|
||||
}
|
||||
|
||||
for i in 0..count as usize {
|
||||
unsafe {
|
||||
res.push((*objs.add(i)).get_ref()?);
|
||||
}
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
fn retain(&self) -> CLResult<()> {
|
||||
unsafe {
|
||||
Arc::increment_strong_count(self.get_ptr()?);
|
||||
|
|
|
|||
|
|
@ -82,10 +82,10 @@ impl CLInfoObj<cl_kernel_work_group_info, cl_device_id> for cl_kernel {
|
|||
if kernel.prog.devs.len() > 1 {
|
||||
return Err(CL_INVALID_DEVICE);
|
||||
} else {
|
||||
kernel.prog.devs[0].clone()
|
||||
kernel.prog.devs[0]
|
||||
}
|
||||
} else {
|
||||
dev.get_arc()?
|
||||
dev.get_ref()?
|
||||
};
|
||||
|
||||
// CL_INVALID_DEVICE if device is not in the list of devices associated with kernel
|
||||
|
|
@ -95,12 +95,12 @@ impl CLInfoObj<cl_kernel_work_group_info, cl_device_id> for cl_kernel {
|
|||
|
||||
Ok(match *q {
|
||||
CL_KERNEL_COMPILE_WORK_GROUP_SIZE => cl_prop::<[usize; 3]>(kernel.work_group_size),
|
||||
CL_KERNEL_LOCAL_MEM_SIZE => cl_prop::<cl_ulong>(kernel.local_mem_size(&dev)),
|
||||
CL_KERNEL_LOCAL_MEM_SIZE => cl_prop::<cl_ulong>(kernel.local_mem_size(dev)),
|
||||
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE => {
|
||||
cl_prop::<usize>(kernel.preferred_simd_size(&dev))
|
||||
cl_prop::<usize>(kernel.preferred_simd_size(dev))
|
||||
}
|
||||
CL_KERNEL_PRIVATE_MEM_SIZE => cl_prop::<cl_ulong>(kernel.priv_mem_size(&dev)),
|
||||
CL_KERNEL_WORK_GROUP_SIZE => cl_prop::<usize>(kernel.max_threads_per_block(&dev)),
|
||||
CL_KERNEL_PRIVATE_MEM_SIZE => cl_prop::<cl_ulong>(kernel.priv_mem_size(dev)),
|
||||
CL_KERNEL_WORK_GROUP_SIZE => cl_prop::<usize>(kernel.max_threads_per_block(dev)),
|
||||
// CL_INVALID_VALUE if param_name is not one of the supported values
|
||||
_ => return Err(CL_INVALID_VALUE),
|
||||
})
|
||||
|
|
@ -128,10 +128,10 @@ impl CLInfoObj<cl_kernel_sub_group_info, (cl_device_id, usize, *const c_void, us
|
|||
if kernel.prog.devs.len() > 1 {
|
||||
return Err(CL_INVALID_DEVICE);
|
||||
} else {
|
||||
kernel.prog.devs[0].clone()
|
||||
kernel.prog.devs[0]
|
||||
}
|
||||
} else {
|
||||
dev.get_arc()?
|
||||
dev.get_ref()?
|
||||
};
|
||||
|
||||
// CL_INVALID_DEVICE if device is not in the list of devices associated with kernel
|
||||
|
|
@ -172,16 +172,16 @@ impl CLInfoObj<cl_kernel_sub_group_info, (cl_device_id, usize, *const c_void, us
|
|||
|
||||
Ok(match q {
|
||||
CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE => {
|
||||
cl_prop::<usize>(kernel.subgroups_for_block(&dev, input))
|
||||
cl_prop::<usize>(kernel.subgroups_for_block(dev, input))
|
||||
}
|
||||
CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE => {
|
||||
cl_prop::<usize>(kernel.subgroup_size_for_block(&dev, input))
|
||||
cl_prop::<usize>(kernel.subgroup_size_for_block(dev, input))
|
||||
}
|
||||
CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT => {
|
||||
let subgroups = input[0];
|
||||
let mut res = vec![0; 3];
|
||||
|
||||
for subgroup_size in kernel.subgroup_sizes(&dev) {
|
||||
for subgroup_size in kernel.subgroup_sizes(dev) {
|
||||
let threads = subgroups * subgroup_size;
|
||||
|
||||
if threads > dev.max_threads_per_block() {
|
||||
|
|
@ -189,7 +189,7 @@ impl CLInfoObj<cl_kernel_sub_group_info, (cl_device_id, usize, *const c_void, us
|
|||
}
|
||||
|
||||
let block = [threads, 1, 1];
|
||||
let real_subgroups = kernel.subgroups_for_block(&dev, &block);
|
||||
let real_subgroups = kernel.subgroups_for_block(dev, &block);
|
||||
|
||||
if real_subgroups == subgroups {
|
||||
res = block.to_vec();
|
||||
|
|
@ -201,11 +201,11 @@ impl CLInfoObj<cl_kernel_sub_group_info, (cl_device_id, usize, *const c_void, us
|
|||
cl_prop::<Vec<usize>>(res)
|
||||
}
|
||||
CL_KERNEL_MAX_NUM_SUB_GROUPS => {
|
||||
let threads = kernel.max_threads_per_block(&dev);
|
||||
let threads = kernel.max_threads_per_block(dev);
|
||||
let max_groups = dev.max_subgroups();
|
||||
|
||||
let mut result = 0;
|
||||
for sgs in kernel.subgroup_sizes(&dev) {
|
||||
for sgs in kernel.subgroup_sizes(dev) {
|
||||
result = cmp::max(result, threads / sgs);
|
||||
result = cmp::min(result, max_groups as usize);
|
||||
}
|
||||
|
|
@ -512,7 +512,7 @@ fn enqueue_ndrange_kernel(
|
|||
|
||||
// CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built program executable available
|
||||
// for device associated with command_queue.
|
||||
if k.prog.status(&q.device) != CL_BUILD_SUCCESS as cl_build_status {
|
||||
if k.prog.status(q.device) != CL_BUILD_SUCCESS as cl_build_status {
|
||||
return Err(CL_INVALID_PROGRAM_EXECUTABLE);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -395,7 +395,7 @@ fn validate_image_desc(
|
|||
image_desc: *const cl_image_desc,
|
||||
host_ptr: *mut ::std::os::raw::c_void,
|
||||
elem_size: usize,
|
||||
devs: &[Arc<Device>],
|
||||
devs: &[&Device],
|
||||
) -> CLResult<(cl_image_desc, Option<Arc<Mem>>)> {
|
||||
// CL_INVALID_IMAGE_DESCRIPTOR if values specified in image_desc are not valid
|
||||
const err: cl_int = CL_INVALID_IMAGE_DESCRIPTOR;
|
||||
|
|
|
|||
|
|
@ -33,18 +33,12 @@ impl CLInfo<cl_program_info> for cl_program {
|
|||
let ptr = Arc::as_ptr(&prog.context);
|
||||
cl_prop::<cl_context>(cl_context::from_ptr(ptr))
|
||||
}
|
||||
CL_PROGRAM_DEVICES => {
|
||||
cl_prop::<&Vec<cl_device_id>>(
|
||||
&prog
|
||||
.devs
|
||||
.iter()
|
||||
.map(|d| {
|
||||
// Note we use as_ptr here which doesn't increase the reference count.
|
||||
cl_device_id::from_ptr(Arc::as_ptr(d))
|
||||
})
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
CL_PROGRAM_DEVICES => cl_prop::<Vec<cl_device_id>>(
|
||||
prog.devs
|
||||
.iter()
|
||||
.map(|&d| cl_device_id::from_ptr(d))
|
||||
.collect(),
|
||||
),
|
||||
CL_PROGRAM_IL => match &prog.src {
|
||||
ProgramSourceType::Il(il) => to_maybeuninit_vec(il.to_bin().to_vec()),
|
||||
_ => Vec::new(),
|
||||
|
|
@ -82,12 +76,12 @@ impl CLInfoObj<cl_program_build_info, cl_device_id> for cl_program {
|
|||
}
|
||||
}
|
||||
|
||||
fn validate_devices(
|
||||
fn validate_devices<'a>(
|
||||
device_list: *const cl_device_id,
|
||||
num_devices: cl_uint,
|
||||
default: &[Arc<Device>],
|
||||
) -> CLResult<Vec<Arc<Device>>> {
|
||||
let mut devs = cl_device_id::get_arc_vec_from_arr(device_list, num_devices)?;
|
||||
default: &[&'a Device],
|
||||
) -> CLResult<Vec<&'a Device>> {
|
||||
let mut devs = cl_device_id::get_ref_vec_from_arr(device_list, num_devices)?;
|
||||
|
||||
// If device_list is a NULL value, the compile is performed for all devices associated with
|
||||
// program.
|
||||
|
|
@ -197,7 +191,7 @@ fn create_program_with_binary(
|
|||
binary_status: *mut cl_int,
|
||||
) -> CLResult<cl_program> {
|
||||
let c = context.get_arc()?;
|
||||
let devs = cl_device_id::get_arc_vec_from_arr(device_list, num_devices)?;
|
||||
let devs = cl_device_id::get_ref_vec_from_arr(device_list, num_devices)?;
|
||||
|
||||
// CL_INVALID_VALUE if device_list is NULL or num_devices is zero.
|
||||
if devs.is_empty() {
|
||||
|
|
|
|||
|
|
@ -23,11 +23,7 @@ impl CLInfo<cl_command_queue_info> for cl_command_queue {
|
|||
let ptr = Arc::as_ptr(&queue.context);
|
||||
cl_prop::<cl_context>(cl_context::from_ptr(ptr))
|
||||
}
|
||||
CL_QUEUE_DEVICE => {
|
||||
// Note we use as_ptr here which doesn't increase the reference count.
|
||||
let ptr = Arc::as_ptr(&queue.device);
|
||||
cl_prop::<cl_device_id>(cl_device_id::from_ptr(ptr))
|
||||
}
|
||||
CL_QUEUE_DEVICE => cl_prop::<cl_device_id>(cl_device_id::from_ptr(queue.device)),
|
||||
CL_QUEUE_DEVICE_DEFAULT => cl_prop::<cl_command_queue>(ptr::null_mut()),
|
||||
CL_QUEUE_PROPERTIES => cl_prop::<cl_command_queue_properties>(queue.props),
|
||||
CL_QUEUE_PROPERTIES_ARRAY => {
|
||||
|
|
@ -61,7 +57,7 @@ pub fn create_command_queue_impl(
|
|||
properties_v2: Option<Properties<cl_queue_properties>>,
|
||||
) -> CLResult<cl_command_queue> {
|
||||
let c = context.get_arc()?;
|
||||
let d = device.get_arc()?;
|
||||
let d = device.get_ref()?.to_static().ok_or(CL_INVALID_DEVICE)?;
|
||||
|
||||
// CL_INVALID_DEVICE if device [...] is not associated with context.
|
||||
if !c.devs.contains(&d) {
|
||||
|
|
@ -102,7 +98,7 @@ fn create_command_queue_with_properties(
|
|||
properties: *const cl_queue_properties,
|
||||
) -> CLResult<cl_command_queue> {
|
||||
let c = context.get_arc()?;
|
||||
let d = device.get_arc()?;
|
||||
let d = device.get_ref()?.to_static().ok_or(CL_INVALID_DEVICE)?;
|
||||
|
||||
let mut queue_properties = cl_command_queue_properties::default();
|
||||
let properties = if properties.is_null() {
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ use std::sync::Mutex;
|
|||
|
||||
pub struct Context {
|
||||
pub base: CLObjectBase<CL_INVALID_CONTEXT>,
|
||||
pub devs: Vec<Arc<Device>>,
|
||||
pub devs: Vec<&'static Device>,
|
||||
pub properties: Properties<cl_context_properties>,
|
||||
pub dtors: Mutex<Vec<Box<dyn Fn(cl_context)>>>,
|
||||
pub svm_ptrs: Mutex<BTreeMap<*const c_void, Layout>>,
|
||||
|
|
@ -30,7 +30,7 @@ impl_cl_type_trait!(cl_context, Context, CL_INVALID_CONTEXT);
|
|||
|
||||
impl Context {
|
||||
pub fn new(
|
||||
devs: Vec<Arc<Device>>,
|
||||
devs: Vec<&'static Device>,
|
||||
properties: Properties<cl_context_properties>,
|
||||
) -> Arc<Context> {
|
||||
Arc::new(Self {
|
||||
|
|
@ -48,10 +48,10 @@ impl Context {
|
|||
user_ptr: *mut c_void,
|
||||
copy: bool,
|
||||
res_type: ResourceType,
|
||||
) -> CLResult<HashMap<Arc<Device>, Arc<PipeResource>>> {
|
||||
) -> CLResult<HashMap<&'static Device, Arc<PipeResource>>> {
|
||||
let adj_size: u32 = size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
|
||||
let mut res = HashMap::new();
|
||||
for dev in &self.devs {
|
||||
for &dev in &self.devs {
|
||||
let mut resource = None;
|
||||
|
||||
if !user_ptr.is_null() && !copy {
|
||||
|
|
@ -65,7 +65,7 @@ impl Context {
|
|||
}
|
||||
|
||||
let resource = resource.ok_or(CL_OUT_OF_RESOURCES);
|
||||
res.insert(Arc::clone(dev), Arc::new(resource?));
|
||||
res.insert(dev, Arc::new(resource?));
|
||||
}
|
||||
|
||||
if !user_ptr.is_null() {
|
||||
|
|
@ -88,7 +88,7 @@ impl Context {
|
|||
user_ptr: *mut c_void,
|
||||
copy: bool,
|
||||
res_type: ResourceType,
|
||||
) -> CLResult<HashMap<Arc<Device>, Arc<PipeResource>>> {
|
||||
) -> CLResult<HashMap<&'static Device, Arc<PipeResource>>> {
|
||||
let width = desc
|
||||
.image_width
|
||||
.try_into()
|
||||
|
|
@ -108,7 +108,7 @@ impl Context {
|
|||
let target = cl_mem_type_to_texture_target(desc.image_type);
|
||||
|
||||
let mut res = HashMap::new();
|
||||
for dev in &self.devs {
|
||||
for &dev in &self.devs {
|
||||
let mut resource = None;
|
||||
|
||||
// we can't specify custom pitches/slices, so this won't work for non 1D images
|
||||
|
|
@ -125,7 +125,7 @@ impl Context {
|
|||
}
|
||||
|
||||
let resource = resource.ok_or(CL_OUT_OF_RESOURCES);
|
||||
res.insert(Arc::clone(dev), Arc::new(resource?));
|
||||
res.insert(dev, Arc::new(resource?));
|
||||
}
|
||||
|
||||
if !user_ptr.is_null() {
|
||||
|
|
|
|||
|
|
@ -235,6 +235,19 @@ impl Device {
|
|||
Some(Arc::new(d))
|
||||
}
|
||||
|
||||
/// Converts a temporary reference to a static if and only if this device lives inside static
|
||||
/// memory.
|
||||
pub fn to_static(&self) -> Option<&'static Self> {
|
||||
for dev in devs() {
|
||||
let dev = dev.as_ref();
|
||||
if self == dev {
|
||||
return Some(dev);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn fill_format_tables(&mut self) {
|
||||
for f in FORMATS {
|
||||
let mut fs = HashMap::new();
|
||||
|
|
@ -937,3 +950,15 @@ impl Device {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn devs() -> &'static Vec<Arc<Device>> {
|
||||
&Platform::get().devs
|
||||
}
|
||||
|
||||
pub fn get_devs_for_type(device_type: cl_device_type) -> Vec<&'static Device> {
|
||||
devs()
|
||||
.iter()
|
||||
.filter(|d| device_type & d.device_type(true) != 0)
|
||||
.map(Arc::as_ref)
|
||||
.collect()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -258,7 +258,7 @@ struct KernelDevStateInner {
|
|||
}
|
||||
|
||||
struct KernelDevState {
|
||||
states: HashMap<Arc<Device>, KernelDevStateInner>,
|
||||
states: HashMap<&'static Device, KernelDevStateInner>,
|
||||
}
|
||||
|
||||
impl Drop for KernelDevState {
|
||||
|
|
@ -272,10 +272,10 @@ impl Drop for KernelDevState {
|
|||
}
|
||||
|
||||
impl KernelDevState {
|
||||
fn new(nirs: &HashMap<Arc<Device>, Arc<NirShader>>) -> Arc<Self> {
|
||||
fn new(nirs: &HashMap<&'static Device, Arc<NirShader>>) -> Arc<Self> {
|
||||
let states = nirs
|
||||
.iter()
|
||||
.map(|(dev, nir)| {
|
||||
.map(|(&dev, nir)| {
|
||||
let mut cso = dev
|
||||
.helper_ctx()
|
||||
.create_compute_state(nir, nir.shared_size());
|
||||
|
|
@ -289,7 +289,7 @@ impl KernelDevState {
|
|||
};
|
||||
|
||||
(
|
||||
dev.clone(),
|
||||
dev,
|
||||
KernelDevStateInner {
|
||||
nir: nir.clone(),
|
||||
constant_buffer: cb,
|
||||
|
|
@ -871,7 +871,7 @@ impl Kernel {
|
|||
grid: &[usize],
|
||||
offsets: &[usize],
|
||||
) -> CLResult<EventSig> {
|
||||
let dev_state = self.dev_state.get(&q.device);
|
||||
let dev_state = self.dev_state.get(q.device);
|
||||
let mut block = create_kernel_arr::<u32>(block, 1);
|
||||
let mut grid = create_kernel_arr::<u32>(grid, 1);
|
||||
let offsets = create_kernel_arr::<u64>(offsets, 0);
|
||||
|
|
@ -894,7 +894,7 @@ impl Kernel {
|
|||
&[0; 4]
|
||||
};
|
||||
|
||||
self.optimize_local_size(&q.device, &mut grid, &mut block);
|
||||
self.optimize_local_size(q.device, &mut grid, &mut block);
|
||||
|
||||
for (arg, val) in self.build.args.iter().zip(&self.values) {
|
||||
if arg.dead {
|
||||
|
|
@ -911,7 +911,7 @@ impl Kernel {
|
|||
match val.borrow().as_ref().unwrap() {
|
||||
KernelArgValue::Constant(c) => input.extend_from_slice(c),
|
||||
KernelArgValue::MemObject(mem) => {
|
||||
let res = mem.get_res_of_dev(&q.device)?;
|
||||
let res = mem.get_res_of_dev(q.device)?;
|
||||
// If resource is a buffer and mem a 2D image, the 2d image was created from a
|
||||
// buffer. Use strides and dimensions of 2d image
|
||||
let app_img_info =
|
||||
|
|
@ -1038,7 +1038,7 @@ impl Kernel {
|
|||
|
||||
let k = Arc::clone(self);
|
||||
Ok(Box::new(move |q, ctx| {
|
||||
let dev_state = k.dev_state.get(&q.device);
|
||||
let dev_state = k.dev_state.get(q.device);
|
||||
let mut input = input.clone();
|
||||
let mut resources = Vec::with_capacity(resource_info.len());
|
||||
let mut globals: Vec<*mut u32> = Vec::new();
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ use mesa_rust_util::properties::Properties;
|
|||
use rusticl_opencl_gen::*;
|
||||
|
||||
use std::cmp;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryInto;
|
||||
use std::mem::size_of;
|
||||
|
|
@ -45,7 +46,7 @@ impl MappingTransfer {
|
|||
}
|
||||
|
||||
struct Mappings {
|
||||
tx: HashMap<Arc<Device>, MappingTransfer>,
|
||||
tx: HashMap<&'static Device, MappingTransfer>,
|
||||
maps: HashMap<*mut c_void, u32>,
|
||||
}
|
||||
|
||||
|
|
@ -92,7 +93,7 @@ impl Mappings {
|
|||
|
||||
fn clean_up_tx(&mut self, dev: &Device, ctx: &PipeContext) {
|
||||
if self.maps.is_empty() {
|
||||
if let Some(tx) = self.tx.get(dev) {
|
||||
if let Some(tx) = self.tx.get(&dev) {
|
||||
if tx.pending == 0 {
|
||||
self.tx.remove(dev).unwrap().tx.with_ctx(ctx);
|
||||
}
|
||||
|
|
@ -116,7 +117,7 @@ pub struct Mem {
|
|||
pub image_elem_size: u8,
|
||||
pub props: Vec<cl_mem_properties>,
|
||||
pub cbs: Mutex<Vec<Box<dyn Fn(cl_mem)>>>,
|
||||
res: Option<HashMap<Arc<Device>, Arc<PipeResource>>>,
|
||||
res: Option<HashMap<&'static Device, Arc<PipeResource>>>,
|
||||
maps: Mutex<Mappings>,
|
||||
}
|
||||
|
||||
|
|
@ -457,7 +458,7 @@ impl Mem {
|
|||
|
||||
assert!(self.is_buffer());
|
||||
|
||||
let tx = if can_map_directly(&q.device, r) {
|
||||
let tx = if can_map_directly(q.device, r) {
|
||||
ctx.buffer_map_directly(
|
||||
r,
|
||||
offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
|
||||
|
|
@ -513,10 +514,10 @@ impl Mem {
|
|||
) -> CLResult<(PipeTransfer, Option<PipeResource>)> {
|
||||
assert!(!self.is_buffer());
|
||||
|
||||
let r = self.get_res()?.get(&q.device).unwrap();
|
||||
let r = self.get_res()?.get(q.device).unwrap();
|
||||
let ctx = q.device.helper_ctx();
|
||||
|
||||
let tx = if can_map_directly(&q.device, r) {
|
||||
let tx = if can_map_directly(q.device, r) {
|
||||
ctx.texture_map_directly(r, bx, rw)
|
||||
} else {
|
||||
None
|
||||
|
|
@ -573,7 +574,7 @@ impl Mem {
|
|||
&& bit_check(mem.flags, CL_MEM_USE_HOST_PTR)
|
||||
}
|
||||
|
||||
fn get_res(&self) -> CLResult<&HashMap<Arc<Device>, Arc<PipeResource>>> {
|
||||
fn get_res(&self) -> CLResult<&HashMap<&'static Device, Arc<PipeResource>>> {
|
||||
self.get_parent().res.as_ref().ok_or(CL_OUT_OF_HOST_MEMORY)
|
||||
}
|
||||
|
||||
|
|
@ -992,17 +993,17 @@ impl Mem {
|
|||
ptr: *mut c_void,
|
||||
) -> CLResult<()> {
|
||||
let mut lock = self.maps.lock().unwrap();
|
||||
if !lock.increase_ref(&q.device, ptr) {
|
||||
if !lock.increase_ref(q.device, ptr) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if self.has_user_shadow_buffer(&q.device)? {
|
||||
if self.has_user_shadow_buffer(q.device)? {
|
||||
self.read_to_user(q, ctx, 0, self.host_ptr, self.size)
|
||||
} else {
|
||||
if let Some(shadow) = lock.tx.get(&q.device).and_then(|tx| tx.shadow.as_ref()) {
|
||||
let mut offset = 0;
|
||||
let b = self.to_parent(&mut offset);
|
||||
let res = b.get_res_of_dev(&q.device)?;
|
||||
let res = b.get_res_of_dev(q.device)?;
|
||||
let bx = create_pipe_box(
|
||||
[offset, 0, 0].into(),
|
||||
[self.size, 1, 1].into(),
|
||||
|
|
@ -1022,11 +1023,11 @@ impl Mem {
|
|||
ptr: *mut c_void,
|
||||
) -> CLResult<()> {
|
||||
let mut lock = self.maps.lock().unwrap();
|
||||
if !lock.increase_ref(&q.device, ptr) {
|
||||
if !lock.increase_ref(q.device, ptr) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if self.has_user_shadow_buffer(&q.device)? {
|
||||
if self.has_user_shadow_buffer(q.device)? {
|
||||
self.read_to_user_rect(
|
||||
self.host_ptr,
|
||||
q,
|
||||
|
|
@ -1040,8 +1041,8 @@ impl Mem {
|
|||
self.image_desc.image_slice_pitch,
|
||||
)
|
||||
} else {
|
||||
if let Some(shadow) = lock.tx.get(&q.device).and_then(|tx| tx.shadow.as_ref()) {
|
||||
let res = self.get_res_of_dev(&q.device)?;
|
||||
if let Some(shadow) = lock.tx.get(q.device).and_then(|tx| tx.shadow.as_ref()) {
|
||||
let res = self.get_res_of_dev(q.device)?;
|
||||
let bx = self.image_desc.bx()?;
|
||||
ctx.resource_copy_region(res, shadow, &[0, 0, 0], &bx);
|
||||
}
|
||||
|
|
@ -1080,7 +1081,7 @@ impl Mem {
|
|||
lock: &'a mut MutexGuard<Mappings>,
|
||||
rw: RWFlags,
|
||||
) -> CLResult<&'a PipeTransfer> {
|
||||
if !lock.tx.contains_key(&q.device) {
|
||||
if let Entry::Vacant(e) = lock.tx.entry(q.device) {
|
||||
let (tx, res) = if self.is_buffer() {
|
||||
self.tx_raw_async(q, rw)?
|
||||
} else {
|
||||
|
|
@ -1088,10 +1089,9 @@ impl Mem {
|
|||
self.tx_image_raw_async(q, &bx, rw)?
|
||||
};
|
||||
|
||||
lock.tx
|
||||
.insert(q.device.clone(), MappingTransfer::new(tx, res));
|
||||
e.insert(MappingTransfer::new(tx, res));
|
||||
} else {
|
||||
lock.mark_pending(&q.device);
|
||||
lock.mark_pending(q.device);
|
||||
}
|
||||
|
||||
Ok(&lock.tx.get_mut(&q.device).unwrap().tx)
|
||||
|
|
@ -1101,7 +1101,7 @@ impl Mem {
|
|||
assert!(self.is_buffer());
|
||||
|
||||
let mut lock = self.maps.lock().unwrap();
|
||||
let ptr = if self.has_user_shadow_buffer(&q.device)? {
|
||||
let ptr = if self.has_user_shadow_buffer(q.device)? {
|
||||
self.host_ptr
|
||||
} else {
|
||||
let tx = self.map(q, &mut lock, RWFlags::RW)?;
|
||||
|
|
@ -1125,7 +1125,7 @@ impl Mem {
|
|||
let mut lock = self.maps.lock().unwrap();
|
||||
|
||||
// we might have a host_ptr shadow buffer or image created from buffer
|
||||
let ptr = if self.has_user_shadow_buffer(&q.device)? || self.is_parent_buffer() {
|
||||
let ptr = if self.has_user_shadow_buffer(q.device)? || self.is_parent_buffer() {
|
||||
*row_pitch = self.image_desc.image_row_pitch;
|
||||
*slice_pitch = self.image_desc.image_slice_pitch;
|
||||
|
||||
|
|
@ -1173,12 +1173,12 @@ impl Mem {
|
|||
return Ok(());
|
||||
}
|
||||
|
||||
let (needs_sync, shadow) = lock.decrease_ref(ptr, &q.device);
|
||||
let (needs_sync, shadow) = lock.decrease_ref(ptr, q.device);
|
||||
if needs_sync {
|
||||
if let Some(shadow) = shadow {
|
||||
let mut offset = 0;
|
||||
let b = self.to_parent(&mut offset);
|
||||
let res = b.get_res_of_dev(&q.device)?;
|
||||
let res = b.get_res_of_dev(q.device)?;
|
||||
|
||||
let bx = if b.is_buffer() {
|
||||
create_pipe_box(
|
||||
|
|
@ -1191,7 +1191,7 @@ impl Mem {
|
|||
};
|
||||
|
||||
ctx.resource_copy_region(shadow, res, &[offset as u32, 0, 0], &bx);
|
||||
} else if self.has_user_shadow_buffer(&q.device)? {
|
||||
} else if self.has_user_shadow_buffer(q.device)? {
|
||||
if self.is_buffer() {
|
||||
self.write_from_user(q, ctx, 0, self.host_ptr, self.size)?;
|
||||
} else {
|
||||
|
|
@ -1211,7 +1211,7 @@ impl Mem {
|
|||
}
|
||||
}
|
||||
|
||||
lock.clean_up_tx(&q.device, ctx);
|
||||
lock.clean_up_tx(q.device, ctx);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ pub enum ProgramSourceType {
|
|||
pub struct Program {
|
||||
pub base: CLObjectBase<CL_INVALID_PROGRAM>,
|
||||
pub context: Arc<Context>,
|
||||
pub devs: Vec<Arc<Device>>,
|
||||
pub devs: Vec<&'static Device>,
|
||||
pub src: ProgramSourceType,
|
||||
build: Mutex<ProgramBuild>,
|
||||
}
|
||||
|
|
@ -69,14 +69,14 @@ impl_cl_type_trait!(cl_program, Program, CL_INVALID_PROGRAM);
|
|||
|
||||
#[derive(Clone)]
|
||||
pub struct NirKernelBuild {
|
||||
pub nirs: HashMap<Arc<Device>, Arc<NirShader>>,
|
||||
pub nirs: HashMap<&'static Device, Arc<NirShader>>,
|
||||
pub args: Vec<KernelArg>,
|
||||
pub internal_args: Vec<InternalKernelArg>,
|
||||
pub attributes_string: String,
|
||||
}
|
||||
|
||||
pub(super) struct ProgramBuild {
|
||||
builds: HashMap<Arc<Device>, ProgramDevBuild>,
|
||||
builds: HashMap<&'static Device, ProgramDevBuild>,
|
||||
spec_constants: HashMap<u32, nir_const_value>,
|
||||
kernels: Vec<String>,
|
||||
kernel_builds: HashMap<String, Arc<NirKernelBuild>>,
|
||||
|
|
@ -122,7 +122,7 @@ impl ProgramBuild {
|
|||
for d in self.devs_with_build() {
|
||||
let (nir, args, internal_args) = convert_spirv_to_nir(self, kernel_name, &args, d);
|
||||
let attributes_string = self.attribute_str(kernel_name, d);
|
||||
nirs.insert(d.clone(), Arc::new(nir));
|
||||
nirs.insert(d, Arc::new(nir));
|
||||
args_set.insert(args);
|
||||
internal_args_set.insert(internal_args);
|
||||
attributes_string_set.insert(attributes_string);
|
||||
|
|
@ -163,11 +163,11 @@ impl ProgramBuild {
|
|||
self.builds.get_mut(dev).unwrap()
|
||||
}
|
||||
|
||||
fn devs_with_build(&self) -> Vec<&Arc<Device>> {
|
||||
fn devs_with_build(&self) -> Vec<&'static Device> {
|
||||
self.builds
|
||||
.iter()
|
||||
.filter(|(_, build)| build.status == CL_BUILD_SUCCESS as cl_build_status)
|
||||
.map(|(d, _)| d)
|
||||
.map(|(&d, _)| d)
|
||||
.collect()
|
||||
}
|
||||
|
||||
|
|
@ -285,11 +285,13 @@ fn prepare_options(options: &str, dev: &Device) -> Vec<CString> {
|
|||
}
|
||||
|
||||
impl Program {
|
||||
fn create_default_builds(devs: &[Arc<Device>]) -> HashMap<Arc<Device>, ProgramDevBuild> {
|
||||
fn create_default_builds(
|
||||
devs: &[&'static Device],
|
||||
) -> HashMap<&'static Device, ProgramDevBuild> {
|
||||
devs.iter()
|
||||
.map(|d| {
|
||||
.map(|&d| {
|
||||
(
|
||||
d.clone(),
|
||||
d,
|
||||
ProgramDevBuild {
|
||||
spirv: None,
|
||||
status: CL_BUILD_NONE,
|
||||
|
|
@ -302,7 +304,7 @@ impl Program {
|
|||
.collect()
|
||||
}
|
||||
|
||||
pub fn new(context: &Arc<Context>, devs: &[Arc<Device>], src: CString) -> Arc<Program> {
|
||||
pub fn new(context: &Arc<Context>, devs: &[&'static Device], src: CString) -> Arc<Program> {
|
||||
Arc::new(Self {
|
||||
base: CLObjectBase::new(),
|
||||
context: context.clone(),
|
||||
|
|
@ -319,13 +321,13 @@ impl Program {
|
|||
|
||||
pub fn from_bins(
|
||||
context: Arc<Context>,
|
||||
devs: Vec<Arc<Device>>,
|
||||
devs: Vec<&'static Device>,
|
||||
bins: &[&[u8]],
|
||||
) -> Arc<Program> {
|
||||
let mut builds = HashMap::new();
|
||||
let mut kernels = HashSet::new();
|
||||
|
||||
for (d, b) in devs.iter().zip(bins) {
|
||||
for (&d, b) in devs.iter().zip(bins) {
|
||||
let mut ptr = b.as_ptr();
|
||||
let bin_type;
|
||||
let spirv;
|
||||
|
|
@ -364,7 +366,7 @@ impl Program {
|
|||
}
|
||||
|
||||
builds.insert(
|
||||
d.clone(),
|
||||
d,
|
||||
ProgramDevBuild {
|
||||
spirv: spirv,
|
||||
status: CL_BUILD_SUCCESS as cl_build_status,
|
||||
|
|
@ -625,17 +627,16 @@ impl Program {
|
|||
|
||||
pub fn link(
|
||||
context: Arc<Context>,
|
||||
devs: &[Arc<Device>],
|
||||
devs: &[&'static Device],
|
||||
progs: &[Arc<Program>],
|
||||
options: String,
|
||||
) -> Arc<Program> {
|
||||
let devs: Vec<Arc<Device>> = devs.iter().map(|d| (*d).clone()).collect();
|
||||
let mut builds = HashMap::new();
|
||||
let mut kernels = HashSet::new();
|
||||
let mut locks: Vec<_> = progs.iter().map(|p| p.build_info()).collect();
|
||||
let lib = options.contains("-create-library");
|
||||
|
||||
for d in &devs {
|
||||
for &d in devs {
|
||||
let bins: Vec<_> = locks
|
||||
.iter_mut()
|
||||
.map(|l| l.dev_build(d).spirv.as_ref().unwrap())
|
||||
|
|
@ -661,7 +662,7 @@ impl Program {
|
|||
};
|
||||
|
||||
builds.insert(
|
||||
d.clone(),
|
||||
d,
|
||||
ProgramDevBuild {
|
||||
spirv: spirv,
|
||||
status: status,
|
||||
|
|
@ -685,7 +686,7 @@ impl Program {
|
|||
Arc::new(Self {
|
||||
base: CLObjectBase::new(),
|
||||
context: context,
|
||||
devs: devs,
|
||||
devs: devs.to_owned(),
|
||||
src: ProgramSourceType::Linked,
|
||||
build: Mutex::new(build),
|
||||
})
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ struct QueueState {
|
|||
pub struct Queue {
|
||||
pub base: CLObjectBase<CL_INVALID_COMMAND_QUEUE>,
|
||||
pub context: Arc<Context>,
|
||||
pub device: Arc<Device>,
|
||||
pub device: &'static Device,
|
||||
pub props: cl_command_queue_properties,
|
||||
pub props_v2: Option<Properties<cl_queue_properties>>,
|
||||
state: Mutex<QueueState>,
|
||||
|
|
@ -43,7 +43,7 @@ fn flush_events(evs: &mut Vec<Arc<Event>>, pipe: &PipeContext) {
|
|||
impl Queue {
|
||||
pub fn new(
|
||||
context: Arc<Context>,
|
||||
device: Arc<Device>,
|
||||
device: &'static Device,
|
||||
props: cl_command_queue_properties,
|
||||
props_v2: Option<Properties<cl_queue_properties>>,
|
||||
) -> CLResult<Arc<Queue>> {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue