rusticl/mem: support read/write/copy ops for images

Signed-off-by: Karol Herbst <kherbst@redhat.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15439>
This commit is contained in:
Karol Herbst 2022-03-22 18:33:57 +01:00 committed by Marge Bot
parent d1e5f86e95
commit 71a9af4910
10 changed files with 800 additions and 179 deletions

View file

@ -973,52 +973,83 @@ extern "C" fn cl_enqueue_copy_buffer(
}
extern "C" fn cl_enqueue_read_image(
_command_queue: cl_command_queue,
_image: cl_mem,
_blocking_read: cl_bool,
_origin: *const usize,
_region: *const usize,
_row_pitch: usize,
_slice_pitch: usize,
_ptr: *mut ::std::os::raw::c_void,
_num_events_in_wait_list: cl_uint,
_event_wait_list: *const cl_event,
_event: *mut cl_event,
command_queue: cl_command_queue,
image: cl_mem,
blocking_read: cl_bool,
origin: *const usize,
region: *const usize,
row_pitch: usize,
slice_pitch: usize,
ptr: *mut ::std::os::raw::c_void,
num_events_in_wait_list: cl_uint,
event_wait_list: *const cl_event,
event: *mut cl_event,
) -> cl_int {
println!("cl_enqueue_read_image not implemented");
CL_OUT_OF_HOST_MEMORY
match_err!(enqueue_read_image(
command_queue,
image,
blocking_read,
origin,
region,
row_pitch,
slice_pitch,
ptr,
num_events_in_wait_list,
event_wait_list,
event,
))
}
extern "C" fn cl_enqueue_write_image(
_command_queue: cl_command_queue,
_image: cl_mem,
_blocking_write: cl_bool,
_origin: *const usize,
_region: *const usize,
_input_row_pitch: usize,
_input_slice_pitch: usize,
_ptr: *const ::std::os::raw::c_void,
_num_events_in_wait_list: cl_uint,
_event_wait_list: *const cl_event,
_event: *mut cl_event,
command_queue: cl_command_queue,
image: cl_mem,
blocking_write: cl_bool,
origin: *const usize,
region: *const usize,
input_row_pitch: usize,
input_slice_pitch: usize,
ptr: *const ::std::os::raw::c_void,
num_events_in_wait_list: cl_uint,
event_wait_list: *const cl_event,
event: *mut cl_event,
) -> cl_int {
println!("cl_enqueue_write_image not implemented");
CL_OUT_OF_HOST_MEMORY
match_err!(enqueue_write_image(
command_queue,
image,
blocking_write,
origin,
region,
input_row_pitch,
input_slice_pitch,
ptr,
num_events_in_wait_list,
event_wait_list,
event,
))
}
extern "C" fn cl_enqueue_copy_image(
_command_queue: cl_command_queue,
_src_image: cl_mem,
_dst_image: cl_mem,
_src_origin: *const usize,
_dst_origin: *const usize,
_region: *const usize,
_num_events_in_wait_list: cl_uint,
_event_wait_list: *const cl_event,
_event: *mut cl_event,
command_queue: cl_command_queue,
src_image: cl_mem,
dst_image: cl_mem,
src_origin: *const usize,
dst_origin: *const usize,
region: *const usize,
num_events_in_wait_list: cl_uint,
event_wait_list: *const cl_event,
event: *mut cl_event,
) -> cl_int {
println!("cl_enqueue_copy_image not implemented");
CL_OUT_OF_HOST_MEMORY
match_err!(enqueue_copy_image(
command_queue,
src_image,
dst_image,
src_origin,
dst_origin,
region,
num_events_in_wait_list,
event_wait_list,
event,
))
}
extern "C" fn cl_enqueue_copy_image_to_buffer(

View file

@ -410,7 +410,7 @@ pub fn enqueue_ndrange_kernel(
local_work_size,
global_work_size,
global_work_offset,
)
)?
};
create_and_queue(q, CL_COMMAND_NDRANGE_KERNEL, evs, event, false, cb)

View file

@ -8,6 +8,7 @@ use crate::api::icd::*;
use crate::api::types::*;
use crate::api::util::*;
use crate::core::device::*;
use crate::core::format::*;
use crate::core::memory::*;
use crate::*;
@ -329,41 +330,9 @@ fn validate_image_format<'a>(
) -> CLResult<(&'a cl_image_format, u8)> {
// CL_INVALID_IMAGE_FORMAT_DESCRIPTOR ... if image_format is NULL.
let format = unsafe { image_format.as_ref() }.ok_or(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)?;
let channels = match format.image_channel_order {
CL_R | CL_A | CL_DEPTH | CL_LUMINANCE | CL_INTENSITY => 1,
CL_RG | CL_RA | CL_Rx => 2,
CL_RGB | CL_RGx | CL_sRGB => 3,
CL_RGBA | CL_ARGB | CL_BGRA | CL_ABGR | CL_RGBx | CL_sRGBA | CL_sBGRA | CL_sRGBx => 4,
_ => return Err(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR),
};
let channel_size = match format.image_channel_data_type {
CL_SNORM_INT8 | CL_UNORM_INT8 | CL_SIGNED_INT8 | CL_UNSIGNED_INT8 => 1,
CL_SNORM_INT16 | CL_UNORM_INT16 | CL_SIGNED_INT16 | CL_UNSIGNED_INT16 | CL_HALF_FLOAT
| CL_UNORM_SHORT_565 | CL_UNORM_SHORT_555 => 2,
CL_SIGNED_INT32
| CL_UNSIGNED_INT32
| CL_FLOAT
| CL_UNORM_INT_101010
| CL_UNORM_INT_101010_2 => 4,
_ => return Err(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR),
};
let packed = [
CL_UNORM_SHORT_565,
CL_UNORM_SHORT_555,
CL_UNORM_INT_101010,
CL_UNORM_INT_101010,
]
.contains(&format.image_channel_data_type);
let pixel_size = format
.pixel_size()
.ok_or(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)?;
// special validation
let valid_combination = match format.image_channel_data_type {
@ -377,14 +346,7 @@ fn validate_image_format<'a>(
return Err(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
}
Ok((
format,
if packed {
channel_size
} else {
channels * channel_size
},
))
Ok((format, pixel_size))
}
fn validate_image_desc(
@ -402,14 +364,11 @@ fn validate_image_desc(
// image_type describes the image type and must be either CL_MEM_OBJECT_IMAGE1D,
// CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D,
// CL_MEM_OBJECT_IMAGE2D_ARRAY, or CL_MEM_OBJECT_IMAGE3D.
let (dims, array) = match desc.image_type {
CL_MEM_OBJECT_IMAGE1D | CL_MEM_OBJECT_IMAGE1D_BUFFER => (1, false),
CL_MEM_OBJECT_IMAGE1D_ARRAY => (1, true),
CL_MEM_OBJECT_IMAGE2D => (2, false),
CL_MEM_OBJECT_IMAGE2D_ARRAY => (2, true),
CL_MEM_OBJECT_IMAGE3D => (3, false),
_ => return Err(err),
};
if !CL_IMAGE_TYPES.contains(&desc.image_type) {
return Err(err);
}
let (dims, array) = desc.type_info();
// image_width is the width of the image in pixels. For a 2D image and image array, the image
// width must be a value ≥ 1 and ≤ CL_DEVICE_IMAGE2D_MAX_WIDTH. For a 3D image, the image width
@ -495,6 +454,8 @@ fn validate_image_desc(
if desc.image_row_pitch != 0 || desc.image_slice_pitch != 0 {
return Err(err);
}
desc.image_row_pitch = desc.image_width * elem_size;
desc.image_slice_pitch = desc.image_row_pitch * desc.image_height;
} else {
if desc.image_row_pitch == 0 {
desc.image_row_pitch = desc.image_width * elem_size;
@ -729,7 +690,7 @@ pub fn create_image_with_properties(
elem_size,
host_ptr,
props,
)))
)?))
}
pub fn create_image(
@ -1515,13 +1476,200 @@ pub fn enqueue_map_buffer(
Box::new(|_, _| Ok(())),
)?;
Ok(b.map(&q, offset, size, block))
b.map(&q, offset, size, block)
// TODO
// CL_MISALIGNED_SUB_BUFFER_OFFSET if buffer is a sub-buffer object and offset specified when the sub-buffer object is created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for the device associated with queue. This error code is missing before version 1.1.
// CL_MAP_FAILURE if there is a failure to map the requested region into the host address space. This error cannot occur for buffer objects created with CL_MEM_USE_HOST_PTR or CL_MEM_ALLOC_HOST_PTR.
// CL_INVALID_OPERATION if mapping would lead to overlapping regions being mapped for writing.
}
pub fn enqueue_read_image(
command_queue: cl_command_queue,
image: cl_mem,
blocking_read: cl_bool,
origin: *const usize,
region: *const usize,
mut row_pitch: usize,
mut slice_pitch: usize,
ptr: *mut ::std::os::raw::c_void,
num_events_in_wait_list: cl_uint,
event_wait_list: *const cl_event,
event: *mut cl_event,
) -> CLResult<()> {
let q = command_queue.get_arc()?;
let i = image.get_arc()?;
let block = check_cl_bool(blocking_read).ok_or(CL_INVALID_VALUE)?;
let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;
let pixel_size = i.image_format.pixel_size().unwrap() as usize;
// CL_INVALID_CONTEXT if the context associated with command_queue and image are not the same
if i.context != q.context {
return Err(CL_INVALID_CONTEXT);
}
// CL_INVALID_OPERATION if clEnqueueReadImage is called on image which has been created with
// CL_MEM_HOST_WRITE_ONLY or CL_MEM_HOST_NO_ACCESS.
if bit_check(i.flags, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS) {
return Err(CL_INVALID_OPERATION);
}
// CL_INVALID_VALUE if origin or region is NULL.
// CL_INVALID_VALUE if ptr is NULL.
if origin.is_null() || region.is_null() || ptr.is_null() {
return Err(CL_INVALID_VALUE);
}
// CL_INVALID_VALUE if image is a 1D or 2D image and slice_pitch or input_slice_pitch is not 0.
if !i.image_desc.has_slice() && slice_pitch != 0 {
return Err(CL_INVALID_VALUE);
}
let r = unsafe { CLVec::from_raw(region) };
let o = unsafe { CLVec::from_raw(origin) };
// If row_pitch (or input_row_pitch) is set to 0, the appropriate row pitch is calculated based
// on the size of each element in bytes multiplied by width.
if row_pitch == 0 {
row_pitch = r[0] * pixel_size;
}
// If slice_pitch (or input_slice_pitch) is set to 0, the appropriate slice pitch is calculated
// based on the row_pitch × height.
if slice_pitch == 0 {
slice_pitch = row_pitch * r[1];
}
create_and_queue(
q,
CL_COMMAND_READ_IMAGE,
evs,
event,
block,
Box::new(move |q, ctx| {
i.read_to_user_rect(
ptr,
q,
ctx,
&r,
&o,
i.image_desc.image_row_pitch,
i.image_desc.image_slice_pitch,
&CLVec::default(),
row_pitch,
slice_pitch,
)
}),
)
//• CL_INVALID_VALUE if the region being read or written specified by origin and region is out of bounds.
//• CL_INVALID_VALUE if values in origin and region do not follow rules described in the argument description for origin and region.
//• CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified or compute row and/or slice pitch) for image are not supported by device associated with queue.
//• CL_IMAGE_FORMAT_NOT_SUPPORTED if image format (image channel order and data type) for image are not supported by device associated with queue.
//• CL_INVALID_OPERATION if the device associated with command_queue does not support images (i.e. CL_DEVICE_IMAGE_SUPPORT specified in the Device Queries table is CL_FALSE).
//• CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST if the read and write operations are blocking and the execution status of any of the events in event_wait_list is a negative integer value.
}
pub fn enqueue_write_image(
command_queue: cl_command_queue,
image: cl_mem,
blocking_write: cl_bool,
origin: *const usize,
region: *const usize,
mut row_pitch: usize,
mut slice_pitch: usize,
ptr: *const ::std::os::raw::c_void,
num_events_in_wait_list: cl_uint,
event_wait_list: *const cl_event,
event: *mut cl_event,
) -> CLResult<()> {
let q = command_queue.get_arc()?;
let i = image.get_arc()?;
let block = check_cl_bool(blocking_write).ok_or(CL_INVALID_VALUE)?;
let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?;
let pixel_size = i.image_format.pixel_size().unwrap() as usize;
// CL_INVALID_CONTEXT if the context associated with command_queue and image are not the same
if i.context != q.context {
return Err(CL_INVALID_CONTEXT);
}
// CL_INVALID_OPERATION if clEnqueueWriteImage is called on image which has been created with
// CL_MEM_HOST_READ_ONLY or CL_MEM_HOST_NO_ACCESS.
if bit_check(i.flags, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS) {
return Err(CL_INVALID_OPERATION);
}
// CL_INVALID_VALUE if origin or region is NULL.
// CL_INVALID_VALUE if ptr is NULL.
if origin.is_null() || region.is_null() || ptr.is_null() {
return Err(CL_INVALID_VALUE);
}
// CL_INVALID_VALUE if image is a 1D or 2D image and slice_pitch or input_slice_pitch is not 0.
if !i.image_desc.has_slice() && slice_pitch != 0 {
return Err(CL_INVALID_VALUE);
}
let r = unsafe { CLVec::from_raw(region) };
let o = unsafe { CLVec::from_raw(origin) };
// If row_pitch (or input_row_pitch) is set to 0, the appropriate row pitch is calculated based
// on the size of each element in bytes multiplied by width.
if row_pitch == 0 {
row_pitch = r[0] * pixel_size;
}
// If slice_pitch (or input_slice_pitch) is set to 0, the appropriate slice pitch is calculated
// based on the row_pitch × height.
if slice_pitch == 0 {
slice_pitch = row_pitch * r[1];
}
create_and_queue(
q,
CL_COMMAND_WRITE_BUFFER_RECT,
evs,
event,
block,
Box::new(move |q, ctx| {
i.write_from_user_rect(
ptr,
q,
ctx,
&r,
&CLVec::default(),
row_pitch,
slice_pitch,
&o,
i.image_desc.image_row_pitch,
i.image_desc.image_slice_pitch,
)
}),
)
//• CL_INVALID_VALUE if the region being read or written specified by origin and region is out of bounds.
//• CL_INVALID_VALUE if values in origin and region do not follow rules described in the argument description for origin and region.
//• CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified or compute row and/or slice pitch) for image are not supported by device associated with queue.
//• CL_IMAGE_FORMAT_NOT_SUPPORTED if image format (image channel order and data type) for image are not supported by device associated with queue.
//• CL_INVALID_OPERATION if the device associated with command_queue does not support images (i.e. CL_DEVICE_IMAGE_SUPPORT specified in the Device Queries table is CL_FALSE).
//• CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST if the read and write operations are blocking and the execution status of any of the events in event_wait_list is a negative integer value.
}
pub fn enqueue_copy_image(
_command_queue: cl_command_queue,
_src_image: cl_mem,
_dst_image: cl_mem,
_src_origin: *const usize,
_dst_origin: *const usize,
_region: *const usize,
_num_events_in_wait_list: cl_uint,
_event_wait_list: *const cl_event,
_event: *mut cl_event,
) -> CLResult<()> {
println!("enqueue_copy_image not implemented");
Err(CL_OUT_OF_HOST_MEMORY)
}
pub fn enqueue_unmap_mem_object(
command_queue: cl_command_queue,
memobj: cl_mem,

View file

@ -3,6 +3,8 @@ extern crate rusticl_opencl_gen;
use crate::api::icd::*;
use crate::core::device::*;
use crate::core::format::*;
use crate::core::util::*;
use crate::impl_cl_type_trait;
use self::mesa_rust::pipe::resource::*;
@ -62,6 +64,79 @@ impl Context {
}
Ok(res)
}
pub fn create_texture(
&self,
desc: &cl_image_desc,
format: &cl_image_format,
) -> CLResult<HashMap<Arc<Device>, Arc<PipeResource>>> {
let width = desc
.image_width
.try_into()
.map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
let height = desc
.image_height
.try_into()
.map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
let depth = desc
.image_depth
.try_into()
.map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
let array_size = desc
.image_array_size
.try_into()
.map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
let target = cl_mem_type_to_texture_target(desc.image_type);
let format = format.to_pipe_format().unwrap();
let mut res = HashMap::new();
for dev in &self.devs {
let resource = dev
.screen()
.resource_create_texture(width, height, depth, array_size, target, format)
.ok_or(CL_OUT_OF_RESOURCES);
res.insert(Arc::clone(dev), Arc::new(resource?));
}
Ok(res)
}
pub fn create_texture_from_user(
&self,
desc: &cl_image_desc,
format: &cl_image_format,
user_ptr: *mut c_void,
) -> CLResult<HashMap<Arc<Device>, Arc<PipeResource>>> {
let width = desc
.image_width
.try_into()
.map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
let height = desc
.image_height
.try_into()
.map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
let depth = desc
.image_depth
.try_into()
.map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
let array_size = desc
.image_array_size
.try_into()
.map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
let target = cl_mem_type_to_texture_target(desc.image_type);
let format = format.to_pipe_format().unwrap();
let mut res = HashMap::new();
for dev in &self.devs {
let resource = dev
.screen()
.resource_create_texture_from_user(
width, height, depth, array_size, target, format, user_ptr,
)
.ok_or(CL_OUT_OF_RESOURCES);
res.insert(Arc::clone(dev), Arc::new(resource?));
}
Ok(res)
}
}
impl Drop for Context {

View file

@ -179,3 +179,77 @@ pub const FORMATS: &[RusticlImageFormat] = &[
pipe_format::PIPE_FORMAT_B8G8R8A8_UNORM,
),
];
pub trait CLFormatInfo {
fn channels(&self) -> Option<u8>;
fn format_info(&self) -> Option<(u8, bool)>;
fn to_pipe_format(&self) -> Option<pipe_format>;
fn channel_size(&self) -> Option<u8> {
if let Some(packed) = self.is_packed() {
assert!(!packed);
self.format_info().map(|i| i.0)
} else {
None
}
}
fn packed_size(&self) -> Option<u8> {
if let Some(packed) = self.is_packed() {
assert!(packed);
self.format_info().map(|i| i.0)
} else {
None
}
}
fn is_packed(&self) -> Option<bool> {
self.format_info().map(|i| i.1)
}
fn pixel_size(&self) -> Option<u8> {
if let Some(packed) = self.is_packed() {
if packed {
self.packed_size()
} else {
self.channels().zip(self.channel_size()).map(|(c, s)| c * s)
}
} else {
None
}
}
}
impl CLFormatInfo for cl_image_format {
#[allow(non_upper_case_globals)]
fn channels(&self) -> Option<u8> {
match self.image_channel_order {
CL_R | CL_A | CL_DEPTH | CL_INTENSITY | CL_LUMINANCE => Some(1),
CL_RG | CL_RA | CL_Rx => Some(2),
CL_RGB | CL_RGx | CL_sRGB => Some(3),
CL_RGBA | CL_ARGB | CL_BGRA | CL_ABGR | CL_RGBx | CL_sRGBA | CL_sBGRA | CL_sRGBx => {
Some(4)
}
_ => None,
}
}
fn format_info(&self) -> Option<(u8, bool)> {
match self.image_channel_data_type {
CL_SIGNED_INT8 | CL_UNSIGNED_INT8 | CL_SNORM_INT8 | CL_UNORM_INT8 => Some((1, false)),
CL_SIGNED_INT16 | CL_UNSIGNED_INT16 | CL_SNORM_INT16 | CL_UNORM_INT16
| CL_HALF_FLOAT => Some((2, false)),
CL_SIGNED_INT32 | CL_UNSIGNED_INT32 | CL_FLOAT => Some((4, false)),
CL_UNORM_SHORT_555 | CL_UNORM_SHORT_565 => Some((2, true)),
CL_UNORM_INT_101010 | CL_UNORM_INT_101010_2 => Some((4, true)),
_ => None,
}
}
fn to_pipe_format(&self) -> Option<pipe_format> {
FORMATS
.iter()
.find(|f| f.cl_image_format == *self)
.map(|f| f.pipe)
}
}

View file

@ -409,7 +409,7 @@ impl Kernel {
block: &[usize],
grid: &[usize],
offsets: &[usize],
) -> EventSig {
) -> CLResult<EventSig> {
let nir = self.nirs.get(&q.device).unwrap();
let mut block = create_kernel_arr::<u32>(block, 1);
let mut grid = create_kernel_arr::<u32>(grid, 1);
@ -436,7 +436,7 @@ impl Kernel {
KernelArgValue::Constant(c) => input.extend_from_slice(c),
KernelArgValue::MemObject(mem) => {
input.extend_from_slice(&mem.offset.to_ne_bytes());
resource_info.push((Some(mem.get_res_of_dev(&q.device).clone()), arg.offset));
resource_info.push((Some(mem.get_res_of_dev(&q.device)?.clone()), arg.offset));
}
KernelArgValue::LocalMem(size) => {
// TODO 32 bit
@ -491,7 +491,7 @@ impl Kernel {
}
let k = self.clone();
Box::new(move |q, ctx| {
Ok(Box::new(move |q, ctx| {
let nir = k.nirs.get(&q.device).unwrap();
let mut input = input.clone();
let mut resources = Vec::with_capacity(resource_info.len());
@ -545,7 +545,7 @@ impl Kernel {
}
Ok(())
})
}))
}
pub fn access_qualifier(&self, idx: cl_uint) -> cl_kernel_arg_access_qualifier {

View file

@ -1,4 +1,5 @@
extern crate mesa_rust;
extern crate mesa_rust_gen;
extern crate rusticl_opencl_gen;
use crate::api::icd::*;
@ -6,14 +7,17 @@ use crate::api::types::*;
use crate::api::util::*;
use crate::core::context::*;
use crate::core::device::*;
use crate::core::format::*;
use crate::core::queue::*;
use crate::impl_cl_type_trait;
use self::mesa_rust::pipe::context::*;
use self::mesa_rust::pipe::resource::*;
use self::mesa_rust::pipe::transfer::*;
use self::mesa_rust_gen::*;
use self::rusticl_opencl_gen::*;
use std::cmp;
use std::collections::HashMap;
use std::convert::TryInto;
use std::ops::AddAssign;
@ -43,6 +47,97 @@ pub struct Mem {
impl_cl_type_trait!(cl_mem, Mem, CL_INVALID_MEM_OBJECT);
pub trait CLImageDescInfo {
fn type_info(&self) -> (u8, bool);
fn pixels(&self) -> usize;
fn bx(&self) -> CLResult<pipe_box>;
fn row_pitch(&self) -> CLResult<u32>;
fn slice_pitch(&self) -> CLResult<u32>;
fn dims(&self) -> u8 {
self.type_info().0
}
fn has_slice(&self) -> bool {
self.dims() == 3 || self.is_array()
}
fn is_array(&self) -> bool {
self.type_info().1
}
}
impl CLImageDescInfo for cl_image_desc {
fn type_info(&self) -> (u8, bool) {
match self.image_type {
CL_MEM_OBJECT_IMAGE1D | CL_MEM_OBJECT_IMAGE1D_BUFFER => (1, false),
CL_MEM_OBJECT_IMAGE1D_ARRAY => (1, true),
CL_MEM_OBJECT_IMAGE2D => (2, false),
CL_MEM_OBJECT_IMAGE2D_ARRAY => (2, true),
CL_MEM_OBJECT_IMAGE3D => (3, false),
_ => panic!("unknown image_type {:x}", self.image_type),
}
}
fn pixels(&self) -> usize {
let mut res = self.image_width;
let dims = self.dims();
if dims > 1 {
res *= self.image_height;
}
if dims > 2 {
res *= self.image_depth;
}
if self.is_array() {
res *= self.image_array_size;
}
res
}
fn bx(&self) -> CLResult<pipe_box> {
let mut depth = if self.is_array() {
self.image_array_size
.try_into()
.map_err(|_| CL_OUT_OF_HOST_MEMORY)?
} else {
self.image_depth
.try_into()
.map_err(|_| CL_OUT_OF_HOST_MEMORY)?
};
let height = cmp::max(self.image_height, 1);
depth = cmp::max(depth, 1);
Ok(pipe_box {
x: 0,
y: 0,
z: 0,
width: self
.image_width
.try_into()
.map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
height: height.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
depth: depth,
})
}
fn row_pitch(&self) -> CLResult<u32> {
self.image_row_pitch
.try_into()
.map_err(|_| CL_OUT_OF_HOST_MEMORY)
}
fn slice_pitch(&self) -> CLResult<u32> {
self.image_slice_pitch
.try_into()
.map_err(|_| CL_OUT_OF_HOST_MEMORY)
}
}
fn sw_copy(
src: *const c_void,
dst: *mut c_void,
@ -53,6 +148,7 @@ fn sw_copy(
dst_origin: &CLVec<usize>,
dst_row_pitch: usize,
dst_slice_pitch: usize,
pixel_size: u8,
) {
for z in 0..region[2] {
for y in 0..region[1] {
@ -60,13 +156,36 @@ fn sw_copy(
ptr::copy_nonoverlapping(
src.add((*src_origin + [0, y, z]) * [1, src_row_pitch, src_slice_pitch]),
dst.add((*dst_origin + [0, y, z]) * [1, dst_row_pitch, dst_slice_pitch]),
region[0],
region[0] * pixel_size as usize,
)
};
}
}
}
fn create_box(
origin: &CLVec<usize>,
region: &CLVec<usize>,
tex_type: cl_mem_object_type,
) -> CLResult<pipe_box> {
let mut y = 1;
let mut z = 2;
// array slice belongs to z/depth
if tex_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
(z, y) = (y, z);
}
Ok(pipe_box {
x: origin[0].try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
y: origin[y].try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
z: origin[z].try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
width: region[0].try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
height: region[y].try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
depth: region[z].try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
})
}
impl Mem {
pub fn new_buffer(
context: Arc<Context>,
@ -153,64 +272,126 @@ impl Mem {
mem_type: cl_mem_object_type,
flags: cl_mem_flags,
image_format: &cl_image_format,
image_desc: cl_image_desc,
mut image_desc: cl_image_desc,
image_elem_size: u8,
host_ptr: *mut c_void,
props: Vec<cl_mem_properties>,
) -> Arc<Mem> {
if bit_check(
flags,
CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR,
) {
) -> CLResult<Arc<Mem>> {
if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) {
println!("host ptr semantics not implemented!");
}
// we have to sanitize the image_desc a little for internal use
let api_image_desc = image_desc;
let dims = image_desc.dims();
let is_array = image_desc.is_array();
if dims < 3 {
image_desc.image_depth = 1;
}
if dims < 2 {
image_desc.image_height = 1;
}
if !is_array {
image_desc.image_array_size = 1;
}
let texture = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
context.create_texture_from_user(&image_desc, image_format, host_ptr)
} else {
context.create_texture(&image_desc, image_format)
}?;
if bit_check(flags, CL_MEM_COPY_HOST_PTR) {
let bx = image_desc.bx()?;
let stride = image_desc.row_pitch()?;
let layer_stride = image_desc.slice_pitch()?;
for (d, r) in &texture {
d.helper_ctx()
.texture_subdata(r, &bx, host_ptr, stride, layer_stride);
}
}
let host_ptr = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
host_ptr
} else {
ptr::null_mut()
};
Arc::new(Self {
Ok(Arc::new(Self {
base: CLObjectBase::new(),
context: context,
parent: None,
mem_type: mem_type,
flags: flags,
size: 0,
size: image_desc.pixels() * image_format.pixel_size().unwrap() as usize,
offset: 0,
host_ptr: host_ptr,
image_format: *image_format,
image_desc: image_desc,
image_desc: api_image_desc,
image_elem_size: image_elem_size,
props: props,
cbs: Mutex::new(Vec::new()),
res: None,
res: Some(texture),
maps: Mutex::new(HashMap::new()),
})
}))
}
pub fn is_buffer(&self) -> bool {
self.mem_type == CL_MEM_OBJECT_BUFFER
}
fn tx(
&self,
q: &Arc<Queue>,
ctx: &Arc<PipeContext>,
mut offset: usize,
size: usize,
blocking: bool,
) -> CLResult<PipeTransfer> {
let b = self.to_parent(&mut offset);
let r = b.get_res()?.get(&q.device).unwrap();
assert!(self.is_buffer());
Ok(ctx.buffer_map(
r,
offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
blocking,
))
}
fn tx_image(
&self,
q: &Arc<Queue>,
ctx: &Arc<PipeContext>,
bx: &pipe_box,
blocking: bool,
) -> CLResult<PipeTransfer> {
assert!(!self.is_buffer());
let r = self.get_res()?.get(&q.device).unwrap();
Ok(ctx.texture_map(r, bx, blocking))
}
pub fn has_same_parent(&self, other: &Self) -> bool {
let a = self.parent.as_ref().map_or(self, |p| p);
let b = other.parent.as_ref().map_or(other, |p| p);
ptr::eq(a, b)
}
fn get_res(&self) -> &HashMap<Arc<Device>, Arc<PipeResource>> {
fn get_res(&self) -> CLResult<&HashMap<Arc<Device>, Arc<PipeResource>>> {
self.parent
.as_ref()
.map_or(self, |p| p.as_ref())
.res
.as_ref()
.unwrap()
.ok_or(CL_OUT_OF_HOST_MEMORY)
}
pub fn get_res_of_dev(&self, dev: &Arc<Device>) -> &Arc<PipeResource> {
self.get_res().get(dev).unwrap()
pub fn get_res_of_dev(&self, dev: &Arc<Device>) -> CLResult<&Arc<PipeResource>> {
Ok(self.get_res()?.get(dev).unwrap())
}
fn to_parent<'a>(&'a self, offset: &mut usize) -> &'a Self {
@ -226,24 +407,18 @@ impl Mem {
&self,
q: &Arc<Queue>,
ctx: &Arc<PipeContext>,
mut offset: usize,
offset: usize,
ptr: *mut c_void,
size: usize,
) -> CLResult<()> {
let b = self.to_parent(&mut offset);
let r = b.get_res().get(&q.device).unwrap();
let tx = ctx.buffer_map(
r,
offset.try_into().unwrap(),
size.try_into().unwrap(),
true,
);
assert!(self.is_buffer());
let tx = self.tx(q, ctx, offset, size, true)?;
unsafe {
ptr::copy_nonoverlapping(tx.ptr(), ptr, size);
}
drop(tx);
Ok(())
}
@ -255,8 +430,10 @@ impl Mem {
ptr: *const c_void,
size: usize,
) -> CLResult<()> {
assert!(self.is_buffer());
let b = self.to_parent(&mut offset);
let r = b.get_res().get(&q.device).unwrap();
let r = b.get_res()?.get(&q.device).unwrap();
ctx.buffer_subdata(
r,
offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
@ -275,11 +452,13 @@ impl Mem {
mut dst_offset: usize,
size: usize,
) -> CLResult<()> {
assert!(self.is_buffer());
let src = self.to_parent(&mut src_offset);
let dst = dst.to_parent(&mut dst_offset);
let src_res = src.get_res().get(&q.device).unwrap();
let dst_res = dst.get_res().get(&q.device).unwrap();
let src_res = src.get_res()?.get(&q.device).unwrap();
let dst_res = dst.get_res()?.get(&q.device).unwrap();
ctx.resource_copy_region(
src_res,
@ -299,8 +478,10 @@ impl Mem {
mut offset: usize,
size: usize,
) -> CLResult<()> {
assert!(self.is_buffer());
let b = self.to_parent(&mut offset);
let res = b.get_res().get(&q.device).unwrap();
let res = b.get_res()?.get(&q.device).unwrap();
ctx.clear_buffer(
res,
pattern,
@ -318,29 +499,49 @@ impl Mem {
region: &CLVec<usize>,
src_origin: &CLVec<usize>,
src_row_pitch: usize,
src_slice_pitch: usize,
mut src_slice_pitch: usize,
dst_origin: &CLVec<usize>,
dst_row_pitch: usize,
dst_slice_pitch: usize,
) -> CLResult<()> {
let mut offset = 0;
let b = self.to_parent(&mut offset);
let r = b.res.as_ref().unwrap().get(&q.device).unwrap();
let tx = ctx.buffer_map(r, 0, self.size.try_into().unwrap(), true);
if self.is_buffer() {
let tx = self.tx(q, ctx, 0, self.size, true)?;
sw_copy(
src,
tx.ptr(),
region,
src_origin,
src_row_pitch,
src_slice_pitch,
dst_origin,
dst_row_pitch,
dst_slice_pitch,
1,
);
} else {
assert!(dst_row_pitch == self.image_desc.image_row_pitch);
assert!(dst_slice_pitch == self.image_desc.image_slice_pitch);
assert!(src_origin == &CLVec::default());
sw_copy(
src,
unsafe { tx.ptr().add(offset) },
region,
src_origin,
src_row_pitch,
src_slice_pitch,
dst_origin,
dst_row_pitch,
dst_slice_pitch,
);
let res = self.get_res()?.get(&q.device).unwrap();
let bx = create_box(dst_origin, region, self.mem_type)?;
drop(tx);
if self.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
src_slice_pitch = src_row_pitch;
}
ctx.texture_subdata(
res,
&bx,
src,
src_row_pitch
.try_into()
.map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
src_slice_pitch
.try_into()
.map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
);
}
Ok(())
}
@ -351,19 +552,31 @@ impl Mem {
ctx: &Arc<PipeContext>,
region: &CLVec<usize>,
src_origin: &CLVec<usize>,
src_row_pitch: usize,
src_slice_pitch: usize,
mut src_row_pitch: usize,
mut src_slice_pitch: usize,
dst_origin: &CLVec<usize>,
dst_row_pitch: usize,
dst_slice_pitch: usize,
) -> CLResult<()> {
let mut offset = 0;
let b = self.to_parent(&mut offset);
let r = b.res.as_ref().unwrap().get(&q.device).unwrap();
let tx = ctx.buffer_map(r, 0, self.size.try_into().unwrap(), true);
let tx;
let pixel_size;
if self.is_buffer() {
tx = self.tx(q, ctx, 0, self.size, true)?;
pixel_size = 1;
} else {
assert!(dst_origin == &CLVec::default());
let bx = create_box(src_origin, region, self.mem_type)?;
tx = self.tx_image(q, ctx, &bx, true)?;
src_row_pitch = tx.row_pitch() as usize;
src_slice_pitch = tx.slice_pitch() as usize;
pixel_size = self.image_format.pixel_size().unwrap();
};
sw_copy(
unsafe { tx.ptr().add(offset) },
tx.ptr(),
dst,
region,
src_origin,
@ -372,9 +585,9 @@ impl Mem {
dst_origin,
dst_row_pitch,
dst_slice_pitch,
pixel_size,
);
drop(tx);
Ok(())
}
@ -391,21 +604,15 @@ impl Mem {
dst_row_pitch: usize,
dst_slice_pitch: usize,
) -> CLResult<()> {
let mut src_offset = 0;
let mut dst_offset = 0;
let src = self.to_parent(&mut src_offset);
let dst = dst.to_parent(&mut dst_offset);
assert!(self.is_buffer());
let res_src = src.res.as_ref().unwrap().get(&q.device).unwrap();
let res_dst = dst.res.as_ref().unwrap().get(&q.device).unwrap();
let tx_src = ctx.buffer_map(res_src, 0, src.size.try_into().unwrap(), true);
let tx_dst = ctx.buffer_map(res_dst, 0, dst.size.try_into().unwrap(), true);
let tx_src = self.tx(q, ctx, 0, self.size, true)?;
let tx_dst = dst.tx(q, ctx, 0, self.size, true)?;
// TODO check to use hw accelerated paths (e.g. resource_copy_region or blits)
sw_copy(
unsafe { tx_src.ptr().add(src_offset) },
unsafe { tx_dst.ptr().add(dst_offset) },
tx_src.ptr(),
tx_dst.ptr(),
region,
src_origin,
src_row_pitch,
@ -413,26 +620,24 @@ impl Mem {
dst_origin,
dst_row_pitch,
dst_slice_pitch,
1,
);
drop(tx_src);
drop(tx_dst);
Ok(())
}
// TODO use PIPE_MAP_UNSYNCHRONIZED for non blocking
pub fn map(&self, q: &Arc<Queue>, mut offset: usize, size: usize, block: bool) -> *mut c_void {
let b = self.to_parent(&mut offset);
pub fn map(
&self,
q: &Arc<Queue>,
offset: usize,
size: usize,
block: bool,
) -> CLResult<*mut c_void> {
assert!(self.is_buffer());
let res = b.res.as_ref().unwrap().get(&q.device).unwrap();
let tx = q.device.helper_ctx().buffer_map(
res,
offset.try_into().unwrap(),
size.try_into().unwrap(),
block,
);
let tx = self.tx(q, &q.device.helper_ctx(), offset, size, block)?;
let ptr = tx.ptr();
let mut lock = self.maps.lock().unwrap();
let e = lock.get_mut(&ptr);
@ -443,7 +648,7 @@ impl Mem {
lock.insert(tx.ptr(), (1, tx));
}
ptr
Ok(ptr)
}
pub fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool {

View file

@ -55,6 +55,28 @@ impl PipeContext {
}
}
pub fn texture_subdata(
&self,
res: &PipeResource,
bx: &pipe_box,
data: *const c_void,
stride: u32,
layer_stride: u32,
) {
unsafe {
self.pipe.as_ref().texture_subdata.unwrap()(
self.pipe.as_ptr(),
res.pipe(),
0,
pipe_map_flags::PIPE_MAP_WRITE.0, // TODO PIPE_MAP_x
bx,
data,
stride,
layer_stride,
)
}
}
pub fn clear_buffer(&self, res: &PipeResource, pattern: &[u8], offset: u32, size: u32) {
unsafe {
self.pipe.as_ref().clear_buffer.unwrap()(
@ -277,5 +299,6 @@ fn has_required_cbs(c: &pipe_context) -> bool {
&& c.resource_copy_region.is_some()
&& c.set_global_binding.is_some()
&& c.texture_map.is_some()
&& c.texture_subdata.is_some()
&& c.texture_unmap.is_some()
}

View file

@ -83,6 +83,20 @@ impl PipeScreen {
)
}
fn resource_create(&self, tmpl: &pipe_resource) -> Option<PipeResource> {
PipeResource::new(unsafe { (*self.screen).resource_create.unwrap()(self.screen, tmpl) })
}
fn resource_create_from_user(
&self,
tmpl: &pipe_resource,
mem: *mut c_void,
) -> Option<PipeResource> {
PipeResource::new(unsafe {
(*self.screen).resource_from_user_memory.unwrap()(self.screen, tmpl, mem)
})
}
pub fn resource_create_buffer(&self, size: u32) -> Option<PipeResource> {
let mut tmpl = pipe_resource::default();
@ -93,7 +107,7 @@ impl PipeScreen {
tmpl.array_size = 1;
tmpl.bind = PIPE_BIND_GLOBAL;
PipeResource::new(unsafe { (*self.screen).resource_create.unwrap()(self.screen, &tmpl) })
self.resource_create(&tmpl)
}
pub fn resource_create_buffer_from_user(
@ -110,9 +124,52 @@ impl PipeScreen {
tmpl.array_size = 1;
tmpl.bind = PIPE_BIND_GLOBAL;
PipeResource::new(unsafe {
(*self.screen).resource_from_user_memory.unwrap()(self.screen, &tmpl, mem)
})
self.resource_create_from_user(&tmpl, mem)
}
pub fn resource_create_texture(
&self,
width: u32,
height: u16,
depth: u16,
array_size: u16,
target: pipe_texture_target,
format: pipe_format,
) -> Option<PipeResource> {
let mut tmpl = pipe_resource::default();
tmpl.set_target(target);
tmpl.set_format(format);
tmpl.width0 = width;
tmpl.height0 = height;
tmpl.depth0 = depth;
tmpl.array_size = array_size;
tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE;
self.resource_create(&tmpl)
}
pub fn resource_create_texture_from_user(
&self,
width: u32,
height: u16,
depth: u16,
array_size: u16,
target: pipe_texture_target,
format: pipe_format,
mem: *mut c_void,
) -> Option<PipeResource> {
let mut tmpl = pipe_resource::default();
tmpl.set_target(target);
tmpl.set_format(format);
tmpl.width0 = width;
tmpl.height0 = height;
tmpl.depth0 = depth;
tmpl.array_size = array_size;
tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE;
self.resource_create_from_user(&tmpl, mem)
}
pub fn param(&self, cap: pipe_cap) -> i32 {

View file

@ -56,6 +56,14 @@ impl PipeTransfer {
self.ptr
}
pub fn row_pitch(&self) -> u32 {
unsafe { (*self.pipe).stride }
}
pub fn slice_pitch(&self) -> u32 {
unsafe { (*self.pipe).layer_stride }
}
pub fn with_ctx(self, ctx: &PipeContext) -> GuardedPipeTransfer {
GuardedPipeTransfer {
inner: self,