diff --git a/src/gallium/frontends/rusticl/api/memory.rs b/src/gallium/frontends/rusticl/api/memory.rs index f06e133badd..85e6ae24355 100644 --- a/src/gallium/frontends/rusticl/api/memory.rs +++ b/src/gallium/frontends/rusticl/api/memory.rs @@ -13,7 +13,6 @@ use mesa_rust_util::properties::Properties; use mesa_rust_util::ptr::*; use rusticl_opencl_gen::*; -use std::cell::Cell; use std::cmp::Ordering; use std::os::raw::c_void; use std::ptr; @@ -1594,35 +1593,17 @@ pub fn enqueue_map_buffer( return Err(CL_INVALID_CONTEXT); } - if block { - let ptr = Arc::new(Cell::new(Ok(ptr::null_mut()))); - let cloned = ptr.clone(); - create_and_queue( - q, - CL_COMMAND_MAP_BUFFER, - evs, - event, - block, - Box::new(move |q, ctx| { - cloned.set(b.map_buffer(q, Some(ctx), offset, size)); - Ok(()) - }), - )?; + let ptr = b.map_buffer(&q, offset, size)?; + create_and_queue( + q, + CL_COMMAND_MAP_BUFFER, + evs, + event, + block, + Box::new(move |q, ctx| b.sync_shadow_buffer(q, ctx, ptr)), + )?; - ptr.get() - } else { - let ptr = b.map_buffer(&q, None, offset, size); - create_and_queue( - q, - CL_COMMAND_MAP_BUFFER, - evs, - event, - block, - Box::new(move |q, ctx| b.sync_shadow_buffer(q, ctx, true)), - )?; - - ptr - } + Ok(ptr) // TODO // CL_MISALIGNED_SUB_BUFFER_OFFSET if buffer is a sub-buffer object and offset specified when the sub-buffer object is created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for the device associated with queue. This error code is missing before version 1.1. @@ -2069,60 +2050,24 @@ pub fn enqueue_map_image( unsafe { image_slice_pitch.as_mut().unwrap() } }; - if block { - let res = Arc::new(Cell::new((Ok(ptr::null_mut()), 0, 0))); - let cloned = res.clone(); + let ptr = i.map_image( + &q, + &origin, + ®ion, + unsafe { image_row_pitch.as_mut().unwrap() }, + image_slice_pitch, + )?; - create_and_queue( - q.clone(), - CL_COMMAND_MAP_IMAGE, - evs, - event, - block, - // we don't really have anything to do here? - Box::new(move |q, ctx| { - let mut image_row_pitch = 0; - let mut image_slice_pitch = 0; + create_and_queue( + q.clone(), + CL_COMMAND_MAP_IMAGE, + evs, + event, + block, + Box::new(move |q, ctx| i.sync_shadow_image(q, ctx, ptr)), + )?; - let ptr = i.map_image( - q, - Some(ctx), - &origin, - ®ion, - &mut image_row_pitch, - &mut image_slice_pitch, - ); - cloned.set((ptr, image_row_pitch, image_slice_pitch)); - - Ok(()) - }), - )?; - - let res = res.get(); - unsafe { *image_row_pitch = res.1 }; - *image_slice_pitch = res.2; - res.0 - } else { - let ptr = i.map_image( - &q, - None, - &origin, - ®ion, - unsafe { image_row_pitch.as_mut().unwrap() }, - image_slice_pitch, - ); - - create_and_queue( - q.clone(), - CL_COMMAND_MAP_IMAGE, - evs, - event, - block, - Box::new(move |q, ctx| i.sync_shadow_image(q, ctx, true)), - )?; - - ptr - } + Ok(ptr) //• CL_INVALID_VALUE if values in origin and region do not follow rules described in the argument description for origin and region. //• CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified or compute row and/or slice pitch) for image are not supported by device associated with queue. diff --git a/src/gallium/frontends/rusticl/core/device.rs b/src/gallium/frontends/rusticl/core/device.rs index 0bdf16ecefd..f08757a9e63 100644 --- a/src/gallium/frontends/rusticl/core/device.rs +++ b/src/gallium/frontends/rusticl/core/device.rs @@ -48,9 +48,6 @@ pub trait HelperContextWrapper { where F: Fn(&HelperContext); - fn buffer_map_async(&self, res: &PipeResource, offset: i32, size: i32) -> PipeTransfer; - fn texture_map_async(&self, res: &PipeResource, bx: &pipe_box) -> PipeTransfer; - fn buffer_map_directly( &self, res: &PipeResource, @@ -116,16 +113,6 @@ impl<'a> HelperContextWrapper for HelperContext<'a> { self.lock.flush() } - fn buffer_map_async(&self, res: &PipeResource, offset: i32, size: i32) -> PipeTransfer { - self.lock - .buffer_map(res, offset, size, RWFlags::RW, ResourceMapType::Async) - } - - fn texture_map_async(&self, res: &PipeResource, bx: &pipe_box) -> PipeTransfer { - self.lock - .texture_map(res, bx, RWFlags::RW, ResourceMapType::Async) - } - fn buffer_map_directly( &self, res: &PipeResource, diff --git a/src/gallium/frontends/rusticl/core/memory.rs b/src/gallium/frontends/rusticl/core/memory.rs index 1d90de7c487..44eec4ca5d8 100644 --- a/src/gallium/frontends/rusticl/core/memory.rs +++ b/src/gallium/frontends/rusticl/core/memory.rs @@ -5,10 +5,12 @@ use crate::core::context::*; use crate::core::device::*; use crate::core::format::*; use crate::core::queue::*; +use crate::core::util::cl_mem_type_to_texture_target; use crate::impl_cl_type_trait; use mesa_rust::pipe::context::*; use mesa_rust::pipe::resource::*; +use mesa_rust::pipe::screen::ResourceType; use mesa_rust::pipe::transfer::*; use mesa_rust_gen::*; use mesa_rust_util::math::*; @@ -26,8 +28,24 @@ use std::sync::Arc; use std::sync::Mutex; use std::sync::MutexGuard; +struct MappingTransfer { + tx: PipeTransfer, + shadow: Option, + pending: u32, +} + +impl MappingTransfer { + fn new(tx: PipeTransfer, shadow: Option) -> Self { + MappingTransfer { + tx: tx, + shadow: shadow, + pending: 1, + } + } +} + struct Mappings { - tx: HashMap, (PipeTransfer, u32)>, + tx: HashMap, MappingTransfer>, maps: HashMap<*mut c_void, u32>, } @@ -38,6 +56,49 @@ impl Mappings { maps: HashMap::new(), }) } + + fn mark_pending(&mut self, dev: &Device) { + self.tx.get_mut(dev).unwrap().pending += 1; + } + + fn unmark_pending(&mut self, dev: &Device) { + if let Some(tx) = self.tx.get_mut(dev) { + tx.pending -= 1; + } + } + + fn increase_ref(&mut self, dev: &Device, ptr: *mut c_void) -> bool { + let res = self.maps.is_empty(); + *self.maps.entry(ptr).or_default() += 1; + self.unmark_pending(dev); + res + } + + fn decrease_ref(&mut self, ptr: *mut c_void, dev: &Device) -> (bool, Option<&PipeResource>) { + if let Some(r) = self.maps.get_mut(&ptr) { + *r -= 1; + + if *r == 0 { + self.maps.remove(&ptr); + } + + if self.maps.is_empty() { + let shadow = self.tx.get(dev).and_then(|tx| tx.shadow.as_ref()); + return (true, shadow); + } + } + (false, None) + } + + fn clean_up_tx(&mut self, dev: &Device, ctx: &PipeContext) { + if self.maps.is_empty() { + if let Some(tx) = self.tx.get(dev) { + if tx.pending == 0 { + self.tx.remove(dev).unwrap().tx.with_ctx(ctx); + } + } + } + } } #[repr(C)] @@ -365,7 +426,7 @@ impl Mem { fn tx_raw( &self, q: &Arc, - ctx: Option<&PipeContext>, + ctx: &PipeContext, mut offset: usize, size: usize, rw: RWFlags, @@ -375,21 +436,51 @@ impl Mem { assert!(self.is_buffer()); - Ok(if let Some(ctx) = ctx { - ctx.buffer_map( + Ok(ctx.buffer_map( + r, + offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?, + size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?, + rw, + ResourceMapType::Normal, + )) + } + + fn tx_raw_async( + &self, + q: &Arc, + rw: RWFlags, + ) -> CLResult<(PipeTransfer, Option)> { + let mut offset = 0; + let b = self.to_parent(&mut offset); + let r = b.get_res()?.get(&q.device).unwrap(); + let size = self.size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?; + let ctx = q.device.helper_ctx(); + + assert!(self.is_buffer()); + + // don't bother mapping directly if it's not UMA + let tx = if q.device.unified_memory() { + ctx.buffer_map_directly( r, offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?, - size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?, + size, rw, - ResourceMapType::Normal, ) } else { - q.device.helper_ctx().buffer_map_async( - r, - offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?, - size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?, - ) - }) + None + }; + + if let Some(tx) = tx { + Ok((tx, None)) + } else { + let shadow = q + .device + .screen() + .resource_create_buffer(size as u32, ResourceType::Staging) + .ok_or(CL_OUT_OF_RESOURCES)?; + let tx = ctx.buffer_map_coherent(&shadow, 0, size, rw); + Ok((tx, Some(shadow))) + } } fn tx<'a>( @@ -400,24 +491,59 @@ impl Mem { size: usize, rw: RWFlags, ) -> CLResult> { - Ok(self.tx_raw(q, Some(ctx), offset, size, rw)?.with_ctx(ctx)) + Ok(self.tx_raw(q, ctx, offset, size, rw)?.with_ctx(ctx)) } fn tx_image_raw( &self, q: &Arc, - ctx: Option<&PipeContext>, + ctx: &PipeContext, bx: &pipe_box, rw: RWFlags, ) -> CLResult { assert!(!self.is_buffer()); let r = self.get_res()?.get(&q.device).unwrap(); - Ok(if let Some(ctx) = ctx { - ctx.texture_map(r, bx, rw, ResourceMapType::Normal) + Ok(ctx.texture_map(r, bx, rw, ResourceMapType::Normal)) + } + + fn tx_image_raw_async( + &self, + q: &Arc, + bx: &pipe_box, + rw: RWFlags, + ) -> CLResult<(PipeTransfer, Option)> { + assert!(!self.is_buffer()); + + let r = self.get_res()?.get(&q.device).unwrap(); + let ctx = q.device.helper_ctx(); + + // don't bother mapping directly if it's not UMA + let tx = if q.device.unified_memory() { + ctx.texture_map_directly(r, bx, rw) } else { - q.device.helper_ctx().texture_map_async(r, bx) - }) + None + }; + + if let Some(tx) = tx { + Ok((tx, None)) + } else { + let shadow = q + .device + .screen() + .resource_create_texture( + r.width(), + r.height(), + r.depth(), + r.array_size(), + cl_mem_type_to_texture_target(self.image_desc.image_type), + self.image_format.to_pipe_format().unwrap(), + ResourceType::Staging, + ) + .ok_or(CL_OUT_OF_RESOURCES)?; + let tx = ctx.texture_map_coherent(&shadow, bx, rw); + Ok((tx, Some(shadow))) + } } fn tx_image<'a>( @@ -427,7 +553,7 @@ impl Mem { bx: &pipe_box, rw: RWFlags, ) -> CLResult> { - Ok(self.tx_image_raw(q, Some(ctx), bx, rw)?.with_ctx(ctx)) + Ok(self.tx_image_raw(q, ctx, bx, rw)?.with_ctx(ctx)) } pub fn has_same_parent(&self, other: &Self) -> bool { @@ -789,115 +915,139 @@ impl Mem { Ok(()) } - // TODO: only sync on unmap when memory is mapped for writing - pub fn sync_shadow_buffer(&self, q: &Arc, ctx: &PipeContext, map: bool) -> CLResult<()> { + // TODO: only sync on map when the memory is not mapped with discard + pub fn sync_shadow_buffer( + &self, + q: &Arc, + ctx: &PipeContext, + ptr: *mut c_void, + ) -> CLResult<()> { + let mut lock = self.maps.lock().unwrap(); + if !lock.increase_ref(&q.device, ptr) { + return Ok(()); + } + if self.has_user_shadow_buffer(&q.device)? { - if map { - self.read_to_user(q, ctx, 0, self.host_ptr, self.size) - } else { - self.write_from_user(q, ctx, 0, self.host_ptr, self.size) - } + self.read_to_user(q, ctx, 0, self.host_ptr, self.size) } else { + if let Some(shadow) = lock.tx.get(&q.device).and_then(|tx| tx.shadow.as_ref()) { + let mut offset = 0; + let b = self.to_parent(&mut offset); + let res = b.get_res_of_dev(&q.device)?; + let bx = pipe_box { + width: self.size as i32, + height: 1, + depth: 1, + x: offset as i32, + ..Default::default() + }; + ctx.resource_copy_region(res, shadow, &[0; 3], &bx); + } Ok(()) } } - // TODO: only sync on unmap when memory is mapped for writing - pub fn sync_shadow_image(&self, q: &Arc, ctx: &PipeContext, map: bool) -> CLResult<()> { + // TODO: only sync on map when the memory is not mapped with discard + pub fn sync_shadow_image( + &self, + q: &Arc, + ctx: &PipeContext, + ptr: *mut c_void, + ) -> CLResult<()> { + let mut lock = self.maps.lock().unwrap(); + if !lock.increase_ref(&q.device, ptr) { + return Ok(()); + } + if self.has_user_shadow_buffer(&q.device)? { - if map { - self.read_to_user_rect( - self.host_ptr, - q, - ctx, - &self.image_desc.size(), - &CLVec::default(), - 0, - 0, - &CLVec::default(), - self.image_desc.image_row_pitch, - self.image_desc.image_slice_pitch, - ) - } else { - self.write_from_user_rect( - self.host_ptr, - q, - ctx, - &self.image_desc.size(), - &CLVec::default(), - self.image_desc.image_row_pitch, - self.image_desc.image_slice_pitch, - &CLVec::default(), - self.image_desc.image_row_pitch, - self.image_desc.image_slice_pitch, - ) - } + self.read_to_user_rect( + self.host_ptr, + q, + ctx, + &self.image_desc.api_size(), + &CLVec::default(), + 0, + 0, + &CLVec::default(), + self.image_desc.image_row_pitch, + self.image_desc.image_slice_pitch, + ) } else { + if let Some(shadow) = lock.tx.get(&q.device).and_then(|tx| tx.shadow.as_ref()) { + let res = self.get_res_of_dev(&q.device)?; + let bx = self.image_desc.bx()?; + ctx.resource_copy_region(res, shadow, &[0, 0, 0], &bx); + } Ok(()) } } + /// Maps the queue associated device's resource. + /// + /// Mapping resources could have been quite straightforward if OpenCL wouldn't allow for so + /// called non blocking maps. Non blocking maps shall return a valid pointer to the mapped + /// region immediately, but should not synchronize data (in case of shadow buffers) until after + /// the map event is reached in the queue. + /// This makes it not possible to simply use pipe_transfers as those can't be explicitly synced + /// by the frontend. + /// + /// In order to have a compliant implementation of the mapping API we have to consider the + /// following cases: + /// 1. Mapping a cl_mem object with CL_MEM_USE_HOST_PTR: We simply return the host_ptr. + /// Synchronization of shadowed host ptrs are done in `sync_shadow_buffer` and + /// `sync_shadow_image` on demand. + /// 2. Mapping linear resources on UMA systems: We simply create the pipe_transfer with + /// `PIPE_MAP_DIRECTLY` and `PIPE_MAP_UNSYNCHRONIZED` and return the attached pointer. + /// 3. On non UMA systems or when 2. fails (e.g. due to the resource being tiled) we + /// - create a shadow pipe_resource with `PIPE_USAGE_STAGING`, + /// `PIPE_RESOURCE_FLAG_MAP_PERSISTENT` and `PIPE_RESOURCE_FLAG_MAP_COHERENT` + /// - create a pipe_transfer with `PIPE_MAP_COHERENT`, `PIPE_MAP_PERSISTENT` and + /// `PIPE_MAP_UNSYNCHRONIZED` + /// - sync the shadow buffer like a host_ptr shadow buffer in 1. + /// + /// Taking this approach we guarentee that we only copy when actually needed while making sure + /// the content behind the returned pointer is valid until unmapped. fn map<'a>( &self, q: &Arc, - ctx: Option<&PipeContext>, lock: &'a mut MutexGuard, rw: RWFlags, ) -> CLResult<&'a PipeTransfer> { if !lock.tx.contains_key(&q.device) { - let tx = if self.is_buffer() { - self.tx_raw(q, ctx, 0, self.size, rw)? + let (tx, res) = if self.is_buffer() { + self.tx_raw_async(q, rw)? } else { let bx = self.image_desc.bx()?; - self.tx_image_raw(q, ctx, &bx, rw)? + self.tx_image_raw_async(q, &bx, rw)? }; - lock.tx.insert(q.device.clone(), (tx, 0)); + lock.tx + .insert(q.device.clone(), MappingTransfer::new(tx, res)); + } else { + lock.mark_pending(&q.device); } - let tx = lock.tx.get_mut(&q.device).unwrap(); - tx.1 += 1; - Ok(&tx.0) + Ok(&lock.tx.get_mut(&q.device).unwrap().tx) } - // TODO: we could map a partial region and increase the mapping on the fly - pub fn map_buffer( - &self, - q: &Arc, - ctx: Option<&PipeContext>, - offset: usize, - _size: usize, - ) -> CLResult<*mut c_void> { + pub fn map_buffer(&self, q: &Arc, offset: usize, _size: usize) -> CLResult<*mut c_void> { assert!(self.is_buffer()); let mut lock = self.maps.lock().unwrap(); let ptr = if self.has_user_shadow_buffer(&q.device)? { - // copy to the host_ptr if we are blocking - if let Some(ctx) = ctx { - self.sync_shadow_buffer(q, ctx, true)?; - } - self.host_ptr } else { - let tx = self.map(q, ctx, &mut lock, RWFlags::RW)?; + let tx = self.map(q, &mut lock, RWFlags::RW)?; tx.ptr() }; let ptr = unsafe { ptr.add(offset) }; - - if let Some(e) = lock.maps.get_mut(&ptr) { - *e += 1; - } else { - lock.maps.insert(ptr, 1); - } - Ok(ptr) } pub fn map_image( &self, q: &Arc, - ctx: Option<&PipeContext>, origin: &CLVec, _region: &CLVec, row_pitch: &mut usize, @@ -912,14 +1062,9 @@ impl Mem { *row_pitch = self.image_desc.image_row_pitch; *slice_pitch = self.image_desc.image_slice_pitch; - // copy to the host_ptr if we are blocking - if let Some(ctx) = ctx { - self.sync_shadow_image(q, ctx, true)?; - } - self.host_ptr } else { - let tx = self.map(q, ctx, &mut lock, RWFlags::RW)?; + let tx = self.map(q, &mut lock, RWFlags::RW)?; if self.image_desc.dims() > 1 { *row_pitch = tx.row_pitch() as usize; @@ -942,12 +1087,6 @@ impl Mem { ) }; - if let Some(e) = lock.maps.get_mut(&ptr) { - *e += 1; - } else { - lock.maps.insert(ptr, 1); - } - Ok(ptr) } @@ -955,36 +1094,53 @@ impl Mem { self.maps.lock().unwrap().maps.contains_key(&ptr) } + // TODO: only sync on unmap when the memory is not mapped for writing pub fn unmap(&self, q: &Arc, ctx: &PipeContext, ptr: *mut c_void) -> CLResult<()> { let mut lock = self.maps.lock().unwrap(); - let e = lock.maps.get_mut(&ptr).unwrap(); - - if *e == 0 { + if !lock.maps.contains_key(&ptr) { return Ok(()); } - *e -= 1; - if *e == 0 { - lock.maps.remove(&ptr); - } + let (needs_sync, shadow) = lock.decrease_ref(ptr, &q.device); + if needs_sync { + if let Some(shadow) = shadow { + let mut offset = 0; + let b = self.to_parent(&mut offset); + let res = b.get_res_of_dev(&q.device)?; - // TODO: only sync on last unmap and only mapped ranges - if self.has_user_shadow_buffer(&q.device)? { - if self.is_buffer() { - self.sync_shadow_buffer(q, ctx, false)?; - } else { - self.sync_shadow_image(q, ctx, false)?; + let bx = if b.is_buffer() { + pipe_box { + width: self.size as i32, + height: 1, + depth: 1, + ..Default::default() + } + } else { + self.image_desc.bx()? + }; + + ctx.resource_copy_region(shadow, res, &[offset as u32, 0, 0], &bx); + } else if self.has_user_shadow_buffer(&q.device)? { + if self.is_buffer() { + self.write_from_user(q, ctx, 0, self.host_ptr, self.size)?; + } else { + self.write_from_user_rect( + self.host_ptr, + q, + ctx, + &self.image_desc.api_size(), + &CLVec::default(), + self.image_desc.image_row_pitch, + self.image_desc.image_slice_pitch, + &CLVec::default(), + self.image_desc.image_row_pitch, + self.image_desc.image_slice_pitch, + )?; + } } } - // shadow buffers don't get a tx option bound - if let Some(tx) = lock.tx.get_mut(&q.device) { - tx.1 -= 1; - - if tx.1 == 0 { - lock.tx.remove(&q.device).unwrap().0.with_ctx(ctx); - } - } + lock.clean_up_tx(&q.device, ctx); Ok(()) } @@ -1001,7 +1157,7 @@ impl Drop for Mem { .for_each(|cb| cb(cl)); for (d, tx) in self.maps.lock().unwrap().tx.drain() { - d.helper_ctx().unmap(tx.0); + d.helper_ctx().unmap(tx.tx); } } } diff --git a/src/gallium/frontends/rusticl/mesa/pipe/context.rs b/src/gallium/frontends/rusticl/mesa/pipe/context.rs index b689106ed7b..9fed6102e76 100644 --- a/src/gallium/frontends/rusticl/mesa/pipe/context.rs +++ b/src/gallium/frontends/rusticl/mesa/pipe/context.rs @@ -19,6 +19,7 @@ pub struct PipeContext { unsafe impl Send for PipeContext {} unsafe impl Sync for PipeContext {} +#[derive(Clone, Copy)] #[repr(u32)] pub enum RWFlags { RD = pipe_map_flags::PIPE_MAP_READ.0, diff --git a/src/gallium/frontends/rusticl/mesa/pipe/resource.rs b/src/gallium/frontends/rusticl/mesa/pipe/resource.rs index 4c5ca712f3a..938e970819a 100644 --- a/src/gallium/frontends/rusticl/mesa/pipe/resource.rs +++ b/src/gallium/frontends/rusticl/mesa/pipe/resource.rs @@ -27,6 +27,22 @@ impl PipeResource { unsafe { self.pipe.as_ref().unwrap() } } + pub fn width(&self) -> u32 { + unsafe { self.pipe.as_ref().unwrap().width0 } + } + + pub fn height(&self) -> u16 { + unsafe { self.pipe.as_ref().unwrap().height0 } + } + + pub fn depth(&self) -> u16 { + unsafe { self.pipe.as_ref().unwrap().depth0 } + } + + pub fn array_size(&self) -> u16 { + unsafe { self.pipe.as_ref().unwrap().array_size } + } + pub fn pipe_image_view(&self, format: pipe_format, read_write: bool) -> pipe_image_view { let u = if self.as_ref().target() == pipe_texture_target::PIPE_BUFFER { pipe_image_view__bindgen_ty_1 {