rusticl/mem: rewrite the (un)mapping code

Signed-off-by: Karol Herbst <kherbst@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18793>
This commit is contained in:
Karol Herbst 2022-09-23 20:15:16 +02:00 committed by Marge Bot
parent dc081353ac
commit 72f763f5cc
5 changed files with 318 additions and 213 deletions

View file

@ -13,7 +13,6 @@ use mesa_rust_util::properties::Properties;
use mesa_rust_util::ptr::*;
use rusticl_opencl_gen::*;
use std::cell::Cell;
use std::cmp::Ordering;
use std::os::raw::c_void;
use std::ptr;
@ -1594,35 +1593,17 @@ pub fn enqueue_map_buffer(
return Err(CL_INVALID_CONTEXT);
}
if block {
let ptr = Arc::new(Cell::new(Ok(ptr::null_mut())));
let cloned = ptr.clone();
create_and_queue(
q,
CL_COMMAND_MAP_BUFFER,
evs,
event,
block,
Box::new(move |q, ctx| {
cloned.set(b.map_buffer(q, Some(ctx), offset, size));
Ok(())
}),
)?;
let ptr = b.map_buffer(&q, offset, size)?;
create_and_queue(
q,
CL_COMMAND_MAP_BUFFER,
evs,
event,
block,
Box::new(move |q, ctx| b.sync_shadow_buffer(q, ctx, ptr)),
)?;
ptr.get()
} else {
let ptr = b.map_buffer(&q, None, offset, size);
create_and_queue(
q,
CL_COMMAND_MAP_BUFFER,
evs,
event,
block,
Box::new(move |q, ctx| b.sync_shadow_buffer(q, ctx, true)),
)?;
ptr
}
Ok(ptr)
// TODO
// CL_MISALIGNED_SUB_BUFFER_OFFSET if buffer is a sub-buffer object and offset specified when the sub-buffer object is created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for the device associated with queue. This error code is missing before version 1.1.
@ -2069,60 +2050,24 @@ pub fn enqueue_map_image(
unsafe { image_slice_pitch.as_mut().unwrap() }
};
if block {
let res = Arc::new(Cell::new((Ok(ptr::null_mut()), 0, 0)));
let cloned = res.clone();
let ptr = i.map_image(
&q,
&origin,
&region,
unsafe { image_row_pitch.as_mut().unwrap() },
image_slice_pitch,
)?;
create_and_queue(
q.clone(),
CL_COMMAND_MAP_IMAGE,
evs,
event,
block,
// we don't really have anything to do here?
Box::new(move |q, ctx| {
let mut image_row_pitch = 0;
let mut image_slice_pitch = 0;
create_and_queue(
q.clone(),
CL_COMMAND_MAP_IMAGE,
evs,
event,
block,
Box::new(move |q, ctx| i.sync_shadow_image(q, ctx, ptr)),
)?;
let ptr = i.map_image(
q,
Some(ctx),
&origin,
&region,
&mut image_row_pitch,
&mut image_slice_pitch,
);
cloned.set((ptr, image_row_pitch, image_slice_pitch));
Ok(())
}),
)?;
let res = res.get();
unsafe { *image_row_pitch = res.1 };
*image_slice_pitch = res.2;
res.0
} else {
let ptr = i.map_image(
&q,
None,
&origin,
&region,
unsafe { image_row_pitch.as_mut().unwrap() },
image_slice_pitch,
);
create_and_queue(
q.clone(),
CL_COMMAND_MAP_IMAGE,
evs,
event,
block,
Box::new(move |q, ctx| i.sync_shadow_image(q, ctx, true)),
)?;
ptr
}
Ok(ptr)
//• CL_INVALID_VALUE if values in origin and region do not follow rules described in the argument description for origin and region.
//• CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified or compute row and/or slice pitch) for image are not supported by device associated with queue.

View file

@ -48,9 +48,6 @@ pub trait HelperContextWrapper {
where
F: Fn(&HelperContext);
fn buffer_map_async(&self, res: &PipeResource, offset: i32, size: i32) -> PipeTransfer;
fn texture_map_async(&self, res: &PipeResource, bx: &pipe_box) -> PipeTransfer;
fn buffer_map_directly(
&self,
res: &PipeResource,
@ -116,16 +113,6 @@ impl<'a> HelperContextWrapper for HelperContext<'a> {
self.lock.flush()
}
fn buffer_map_async(&self, res: &PipeResource, offset: i32, size: i32) -> PipeTransfer {
self.lock
.buffer_map(res, offset, size, RWFlags::RW, ResourceMapType::Async)
}
fn texture_map_async(&self, res: &PipeResource, bx: &pipe_box) -> PipeTransfer {
self.lock
.texture_map(res, bx, RWFlags::RW, ResourceMapType::Async)
}
fn buffer_map_directly(
&self,
res: &PipeResource,

View file

@ -5,10 +5,12 @@ use crate::core::context::*;
use crate::core::device::*;
use crate::core::format::*;
use crate::core::queue::*;
use crate::core::util::cl_mem_type_to_texture_target;
use crate::impl_cl_type_trait;
use mesa_rust::pipe::context::*;
use mesa_rust::pipe::resource::*;
use mesa_rust::pipe::screen::ResourceType;
use mesa_rust::pipe::transfer::*;
use mesa_rust_gen::*;
use mesa_rust_util::math::*;
@ -26,8 +28,24 @@ use std::sync::Arc;
use std::sync::Mutex;
use std::sync::MutexGuard;
struct MappingTransfer {
tx: PipeTransfer,
shadow: Option<PipeResource>,
pending: u32,
}
impl MappingTransfer {
fn new(tx: PipeTransfer, shadow: Option<PipeResource>) -> Self {
MappingTransfer {
tx: tx,
shadow: shadow,
pending: 1,
}
}
}
struct Mappings {
tx: HashMap<Arc<Device>, (PipeTransfer, u32)>,
tx: HashMap<Arc<Device>, MappingTransfer>,
maps: HashMap<*mut c_void, u32>,
}
@ -38,6 +56,49 @@ impl Mappings {
maps: HashMap::new(),
})
}
fn mark_pending(&mut self, dev: &Device) {
self.tx.get_mut(dev).unwrap().pending += 1;
}
fn unmark_pending(&mut self, dev: &Device) {
if let Some(tx) = self.tx.get_mut(dev) {
tx.pending -= 1;
}
}
fn increase_ref(&mut self, dev: &Device, ptr: *mut c_void) -> bool {
let res = self.maps.is_empty();
*self.maps.entry(ptr).or_default() += 1;
self.unmark_pending(dev);
res
}
fn decrease_ref(&mut self, ptr: *mut c_void, dev: &Device) -> (bool, Option<&PipeResource>) {
if let Some(r) = self.maps.get_mut(&ptr) {
*r -= 1;
if *r == 0 {
self.maps.remove(&ptr);
}
if self.maps.is_empty() {
let shadow = self.tx.get(dev).and_then(|tx| tx.shadow.as_ref());
return (true, shadow);
}
}
(false, None)
}
fn clean_up_tx(&mut self, dev: &Device, ctx: &PipeContext) {
if self.maps.is_empty() {
if let Some(tx) = self.tx.get(dev) {
if tx.pending == 0 {
self.tx.remove(dev).unwrap().tx.with_ctx(ctx);
}
}
}
}
}
#[repr(C)]
@ -365,7 +426,7 @@ impl Mem {
fn tx_raw(
&self,
q: &Arc<Queue>,
ctx: Option<&PipeContext>,
ctx: &PipeContext,
mut offset: usize,
size: usize,
rw: RWFlags,
@ -375,21 +436,51 @@ impl Mem {
assert!(self.is_buffer());
Ok(if let Some(ctx) = ctx {
ctx.buffer_map(
Ok(ctx.buffer_map(
r,
offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
rw,
ResourceMapType::Normal,
))
}
fn tx_raw_async(
&self,
q: &Arc<Queue>,
rw: RWFlags,
) -> CLResult<(PipeTransfer, Option<PipeResource>)> {
let mut offset = 0;
let b = self.to_parent(&mut offset);
let r = b.get_res()?.get(&q.device).unwrap();
let size = self.size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
let ctx = q.device.helper_ctx();
assert!(self.is_buffer());
// don't bother mapping directly if it's not UMA
let tx = if q.device.unified_memory() {
ctx.buffer_map_directly(
r,
offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
size,
rw,
ResourceMapType::Normal,
)
} else {
q.device.helper_ctx().buffer_map_async(
r,
offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
)
})
None
};
if let Some(tx) = tx {
Ok((tx, None))
} else {
let shadow = q
.device
.screen()
.resource_create_buffer(size as u32, ResourceType::Staging)
.ok_or(CL_OUT_OF_RESOURCES)?;
let tx = ctx.buffer_map_coherent(&shadow, 0, size, rw);
Ok((tx, Some(shadow)))
}
}
fn tx<'a>(
@ -400,24 +491,59 @@ impl Mem {
size: usize,
rw: RWFlags,
) -> CLResult<GuardedPipeTransfer<'a>> {
Ok(self.tx_raw(q, Some(ctx), offset, size, rw)?.with_ctx(ctx))
Ok(self.tx_raw(q, ctx, offset, size, rw)?.with_ctx(ctx))
}
fn tx_image_raw(
&self,
q: &Arc<Queue>,
ctx: Option<&PipeContext>,
ctx: &PipeContext,
bx: &pipe_box,
rw: RWFlags,
) -> CLResult<PipeTransfer> {
assert!(!self.is_buffer());
let r = self.get_res()?.get(&q.device).unwrap();
Ok(if let Some(ctx) = ctx {
ctx.texture_map(r, bx, rw, ResourceMapType::Normal)
Ok(ctx.texture_map(r, bx, rw, ResourceMapType::Normal))
}
fn tx_image_raw_async(
&self,
q: &Arc<Queue>,
bx: &pipe_box,
rw: RWFlags,
) -> CLResult<(PipeTransfer, Option<PipeResource>)> {
assert!(!self.is_buffer());
let r = self.get_res()?.get(&q.device).unwrap();
let ctx = q.device.helper_ctx();
// don't bother mapping directly if it's not UMA
let tx = if q.device.unified_memory() {
ctx.texture_map_directly(r, bx, rw)
} else {
q.device.helper_ctx().texture_map_async(r, bx)
})
None
};
if let Some(tx) = tx {
Ok((tx, None))
} else {
let shadow = q
.device
.screen()
.resource_create_texture(
r.width(),
r.height(),
r.depth(),
r.array_size(),
cl_mem_type_to_texture_target(self.image_desc.image_type),
self.image_format.to_pipe_format().unwrap(),
ResourceType::Staging,
)
.ok_or(CL_OUT_OF_RESOURCES)?;
let tx = ctx.texture_map_coherent(&shadow, bx, rw);
Ok((tx, Some(shadow)))
}
}
fn tx_image<'a>(
@ -427,7 +553,7 @@ impl Mem {
bx: &pipe_box,
rw: RWFlags,
) -> CLResult<GuardedPipeTransfer<'a>> {
Ok(self.tx_image_raw(q, Some(ctx), bx, rw)?.with_ctx(ctx))
Ok(self.tx_image_raw(q, ctx, bx, rw)?.with_ctx(ctx))
}
pub fn has_same_parent(&self, other: &Self) -> bool {
@ -789,115 +915,139 @@ impl Mem {
Ok(())
}
// TODO: only sync on unmap when memory is mapped for writing
pub fn sync_shadow_buffer(&self, q: &Arc<Queue>, ctx: &PipeContext, map: bool) -> CLResult<()> {
// TODO: only sync on map when the memory is not mapped with discard
pub fn sync_shadow_buffer(
&self,
q: &Arc<Queue>,
ctx: &PipeContext,
ptr: *mut c_void,
) -> CLResult<()> {
let mut lock = self.maps.lock().unwrap();
if !lock.increase_ref(&q.device, ptr) {
return Ok(());
}
if self.has_user_shadow_buffer(&q.device)? {
if map {
self.read_to_user(q, ctx, 0, self.host_ptr, self.size)
} else {
self.write_from_user(q, ctx, 0, self.host_ptr, self.size)
}
self.read_to_user(q, ctx, 0, self.host_ptr, self.size)
} else {
if let Some(shadow) = lock.tx.get(&q.device).and_then(|tx| tx.shadow.as_ref()) {
let mut offset = 0;
let b = self.to_parent(&mut offset);
let res = b.get_res_of_dev(&q.device)?;
let bx = pipe_box {
width: self.size as i32,
height: 1,
depth: 1,
x: offset as i32,
..Default::default()
};
ctx.resource_copy_region(res, shadow, &[0; 3], &bx);
}
Ok(())
}
}
// TODO: only sync on unmap when memory is mapped for writing
pub fn sync_shadow_image(&self, q: &Arc<Queue>, ctx: &PipeContext, map: bool) -> CLResult<()> {
// TODO: only sync on map when the memory is not mapped with discard
pub fn sync_shadow_image(
&self,
q: &Arc<Queue>,
ctx: &PipeContext,
ptr: *mut c_void,
) -> CLResult<()> {
let mut lock = self.maps.lock().unwrap();
if !lock.increase_ref(&q.device, ptr) {
return Ok(());
}
if self.has_user_shadow_buffer(&q.device)? {
if map {
self.read_to_user_rect(
self.host_ptr,
q,
ctx,
&self.image_desc.size(),
&CLVec::default(),
0,
0,
&CLVec::default(),
self.image_desc.image_row_pitch,
self.image_desc.image_slice_pitch,
)
} else {
self.write_from_user_rect(
self.host_ptr,
q,
ctx,
&self.image_desc.size(),
&CLVec::default(),
self.image_desc.image_row_pitch,
self.image_desc.image_slice_pitch,
&CLVec::default(),
self.image_desc.image_row_pitch,
self.image_desc.image_slice_pitch,
)
}
self.read_to_user_rect(
self.host_ptr,
q,
ctx,
&self.image_desc.api_size(),
&CLVec::default(),
0,
0,
&CLVec::default(),
self.image_desc.image_row_pitch,
self.image_desc.image_slice_pitch,
)
} else {
if let Some(shadow) = lock.tx.get(&q.device).and_then(|tx| tx.shadow.as_ref()) {
let res = self.get_res_of_dev(&q.device)?;
let bx = self.image_desc.bx()?;
ctx.resource_copy_region(res, shadow, &[0, 0, 0], &bx);
}
Ok(())
}
}
/// Maps the queue associated device's resource.
///
/// Mapping resources could have been quite straightforward if OpenCL wouldn't allow for so
/// called non blocking maps. Non blocking maps shall return a valid pointer to the mapped
/// region immediately, but should not synchronize data (in case of shadow buffers) until after
/// the map event is reached in the queue.
/// This makes it not possible to simply use pipe_transfers as those can't be explicitly synced
/// by the frontend.
///
/// In order to have a compliant implementation of the mapping API we have to consider the
/// following cases:
/// 1. Mapping a cl_mem object with CL_MEM_USE_HOST_PTR: We simply return the host_ptr.
/// Synchronization of shadowed host ptrs are done in `sync_shadow_buffer` and
/// `sync_shadow_image` on demand.
/// 2. Mapping linear resources on UMA systems: We simply create the pipe_transfer with
/// `PIPE_MAP_DIRECTLY` and `PIPE_MAP_UNSYNCHRONIZED` and return the attached pointer.
/// 3. On non UMA systems or when 2. fails (e.g. due to the resource being tiled) we
/// - create a shadow pipe_resource with `PIPE_USAGE_STAGING`,
/// `PIPE_RESOURCE_FLAG_MAP_PERSISTENT` and `PIPE_RESOURCE_FLAG_MAP_COHERENT`
/// - create a pipe_transfer with `PIPE_MAP_COHERENT`, `PIPE_MAP_PERSISTENT` and
/// `PIPE_MAP_UNSYNCHRONIZED`
/// - sync the shadow buffer like a host_ptr shadow buffer in 1.
///
/// Taking this approach we guarentee that we only copy when actually needed while making sure
/// the content behind the returned pointer is valid until unmapped.
fn map<'a>(
&self,
q: &Arc<Queue>,
ctx: Option<&PipeContext>,
lock: &'a mut MutexGuard<Mappings>,
rw: RWFlags,
) -> CLResult<&'a PipeTransfer> {
if !lock.tx.contains_key(&q.device) {
let tx = if self.is_buffer() {
self.tx_raw(q, ctx, 0, self.size, rw)?
let (tx, res) = if self.is_buffer() {
self.tx_raw_async(q, rw)?
} else {
let bx = self.image_desc.bx()?;
self.tx_image_raw(q, ctx, &bx, rw)?
self.tx_image_raw_async(q, &bx, rw)?
};
lock.tx.insert(q.device.clone(), (tx, 0));
lock.tx
.insert(q.device.clone(), MappingTransfer::new(tx, res));
} else {
lock.mark_pending(&q.device);
}
let tx = lock.tx.get_mut(&q.device).unwrap();
tx.1 += 1;
Ok(&tx.0)
Ok(&lock.tx.get_mut(&q.device).unwrap().tx)
}
// TODO: we could map a partial region and increase the mapping on the fly
pub fn map_buffer(
&self,
q: &Arc<Queue>,
ctx: Option<&PipeContext>,
offset: usize,
_size: usize,
) -> CLResult<*mut c_void> {
pub fn map_buffer(&self, q: &Arc<Queue>, offset: usize, _size: usize) -> CLResult<*mut c_void> {
assert!(self.is_buffer());
let mut lock = self.maps.lock().unwrap();
let ptr = if self.has_user_shadow_buffer(&q.device)? {
// copy to the host_ptr if we are blocking
if let Some(ctx) = ctx {
self.sync_shadow_buffer(q, ctx, true)?;
}
self.host_ptr
} else {
let tx = self.map(q, ctx, &mut lock, RWFlags::RW)?;
let tx = self.map(q, &mut lock, RWFlags::RW)?;
tx.ptr()
};
let ptr = unsafe { ptr.add(offset) };
if let Some(e) = lock.maps.get_mut(&ptr) {
*e += 1;
} else {
lock.maps.insert(ptr, 1);
}
Ok(ptr)
}
pub fn map_image(
&self,
q: &Arc<Queue>,
ctx: Option<&PipeContext>,
origin: &CLVec<usize>,
_region: &CLVec<usize>,
row_pitch: &mut usize,
@ -912,14 +1062,9 @@ impl Mem {
*row_pitch = self.image_desc.image_row_pitch;
*slice_pitch = self.image_desc.image_slice_pitch;
// copy to the host_ptr if we are blocking
if let Some(ctx) = ctx {
self.sync_shadow_image(q, ctx, true)?;
}
self.host_ptr
} else {
let tx = self.map(q, ctx, &mut lock, RWFlags::RW)?;
let tx = self.map(q, &mut lock, RWFlags::RW)?;
if self.image_desc.dims() > 1 {
*row_pitch = tx.row_pitch() as usize;
@ -942,12 +1087,6 @@ impl Mem {
)
};
if let Some(e) = lock.maps.get_mut(&ptr) {
*e += 1;
} else {
lock.maps.insert(ptr, 1);
}
Ok(ptr)
}
@ -955,36 +1094,53 @@ impl Mem {
self.maps.lock().unwrap().maps.contains_key(&ptr)
}
// TODO: only sync on unmap when the memory is not mapped for writing
pub fn unmap(&self, q: &Arc<Queue>, ctx: &PipeContext, ptr: *mut c_void) -> CLResult<()> {
let mut lock = self.maps.lock().unwrap();
let e = lock.maps.get_mut(&ptr).unwrap();
if *e == 0 {
if !lock.maps.contains_key(&ptr) {
return Ok(());
}
*e -= 1;
if *e == 0 {
lock.maps.remove(&ptr);
}
let (needs_sync, shadow) = lock.decrease_ref(ptr, &q.device);
if needs_sync {
if let Some(shadow) = shadow {
let mut offset = 0;
let b = self.to_parent(&mut offset);
let res = b.get_res_of_dev(&q.device)?;
// TODO: only sync on last unmap and only mapped ranges
if self.has_user_shadow_buffer(&q.device)? {
if self.is_buffer() {
self.sync_shadow_buffer(q, ctx, false)?;
} else {
self.sync_shadow_image(q, ctx, false)?;
let bx = if b.is_buffer() {
pipe_box {
width: self.size as i32,
height: 1,
depth: 1,
..Default::default()
}
} else {
self.image_desc.bx()?
};
ctx.resource_copy_region(shadow, res, &[offset as u32, 0, 0], &bx);
} else if self.has_user_shadow_buffer(&q.device)? {
if self.is_buffer() {
self.write_from_user(q, ctx, 0, self.host_ptr, self.size)?;
} else {
self.write_from_user_rect(
self.host_ptr,
q,
ctx,
&self.image_desc.api_size(),
&CLVec::default(),
self.image_desc.image_row_pitch,
self.image_desc.image_slice_pitch,
&CLVec::default(),
self.image_desc.image_row_pitch,
self.image_desc.image_slice_pitch,
)?;
}
}
}
// shadow buffers don't get a tx option bound
if let Some(tx) = lock.tx.get_mut(&q.device) {
tx.1 -= 1;
if tx.1 == 0 {
lock.tx.remove(&q.device).unwrap().0.with_ctx(ctx);
}
}
lock.clean_up_tx(&q.device, ctx);
Ok(())
}
@ -1001,7 +1157,7 @@ impl Drop for Mem {
.for_each(|cb| cb(cl));
for (d, tx) in self.maps.lock().unwrap().tx.drain() {
d.helper_ctx().unmap(tx.0);
d.helper_ctx().unmap(tx.tx);
}
}
}

View file

@ -19,6 +19,7 @@ pub struct PipeContext {
unsafe impl Send for PipeContext {}
unsafe impl Sync for PipeContext {}
#[derive(Clone, Copy)]
#[repr(u32)]
pub enum RWFlags {
RD = pipe_map_flags::PIPE_MAP_READ.0,

View file

@ -27,6 +27,22 @@ impl PipeResource {
unsafe { self.pipe.as_ref().unwrap() }
}
pub fn width(&self) -> u32 {
unsafe { self.pipe.as_ref().unwrap().width0 }
}
pub fn height(&self) -> u16 {
unsafe { self.pipe.as_ref().unwrap().height0 }
}
pub fn depth(&self) -> u16 {
unsafe { self.pipe.as_ref().unwrap().depth0 }
}
pub fn array_size(&self) -> u16 {
unsafe { self.pipe.as_ref().unwrap().array_size }
}
pub fn pipe_image_view(&self, format: pipe_format, read_write: bool) -> pipe_image_view {
let u = if self.as_ref().target() == pipe_texture_target::PIPE_BUFFER {
pipe_image_view__bindgen_ty_1 {