diff --git a/docs/features.txt b/docs/features.txt index bf2820ec81e..aac0513e6f8 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -867,7 +867,7 @@ Rusticl OpenCL 1.2 -- all DONE: Separate compilation and linking of programs DONE Extend cl_mem_flags DONE clEnqueueFillBuffer, clEnqueueFillImage DONE - Add CL_MAP_WRITE_INVALIDATE_REGION to cl_map_flags in progress (flag is ignored) + Add CL_MAP_WRITE_INVALIDATE_REGION to cl_map_flags DONE New image types DONE clCreateImage DONE clEnqueueMigrateMemObjects DONE diff --git a/src/gallium/frontends/rusticl/api/memory.rs b/src/gallium/frontends/rusticl/api/memory.rs index 59b23e6411e..9015fbb03c9 100644 --- a/src/gallium/frontends/rusticl/api/memory.rs +++ b/src/gallium/frontends/rusticl/api/memory.rs @@ -1685,14 +1685,20 @@ fn enqueue_map_buffer( return Err(CL_INVALID_CONTEXT); } - let ptr = b.map(q.device, offset)?; + let ptr = b.map(size, offset, map_flags != CL_MAP_READ.into())?; create_and_queue( q, CL_COMMAND_MAP_BUFFER, evs, event, block, - Box::new(move |q, ctx| b.sync_shadow(q, ctx, ptr)), + Box::new(move |q, ctx| { + if map_flags != CL_MAP_WRITE_INVALIDATE_REGION.into() { + b.sync_map(q, ctx, ptr) + } else { + Ok(()) + } + }), )?; Ok(ptr.as_ptr()) @@ -2141,24 +2147,29 @@ fn enqueue_map_image( }; let ptr = i.map( - q.device, - &origin, + origin, + region, unsafe { image_row_pitch.as_mut().unwrap() }, image_slice_pitch, + map_flags != CL_MAP_READ.into(), )?; - // SAFETY: it's required that applications do not cause data races - let sync_ptr = unsafe { MutMemoryPtr::from_ptr(ptr) }; create_and_queue( q, CL_COMMAND_MAP_IMAGE, evs, event, block, - Box::new(move |q, ctx| i.sync_shadow(q, ctx, sync_ptr)), + Box::new(move |q, ctx| { + if map_flags != CL_MAP_WRITE_INVALIDATE_REGION.into() { + i.sync_map(q, ctx, ptr) + } else { + Ok(()) + } + }), )?; - Ok(ptr) + Ok(ptr.as_ptr()) //• CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified or compute row and/or slice pitch) for image are not supported by device associated with queue. //• CL_IMAGE_FORMAT_NOT_SUPPORTED if image format (image channel order and data type) for image are not supported by device associated with queue. diff --git a/src/gallium/frontends/rusticl/core/memory.rs b/src/gallium/frontends/rusticl/core/memory.rs index 4dda18de3b2..c70fddc0a7c 100644 --- a/src/gallium/frontends/rusticl/core/memory.rs +++ b/src/gallium/frontends/rusticl/core/memory.rs @@ -17,10 +17,14 @@ use mesa_rust::pipe::transfer::*; use mesa_rust_gen::*; use mesa_rust_util::math::*; use mesa_rust_util::properties::Properties; +use mesa_rust_util::ptr::AllocSize; +use mesa_rust_util::ptr::TrackedPointers; use rusticl_opencl_gen::*; +use std::alloc; +use std::alloc::Layout; use std::cmp; -use std::collections::hash_map::Entry; +use std::collections::btree_map::Entry; use std::collections::HashMap; use std::convert::TryInto; use std::mem; @@ -31,84 +35,45 @@ use std::ptr; use std::sync::Arc; use std::sync::Mutex; -struct MappingTransfer { - tx: PipeTransfer, - shadow: Option, - pending: u32, +struct Mapping { + layout: Layout, + writes: bool, + ptr: Option, + count: u32, + inner: T, } -impl MappingTransfer { - fn new(tx: PipeTransfer, shadow: Option) -> Self { - MappingTransfer { - tx: tx, - shadow: shadow, - pending: 1, +impl Drop for Mapping { + fn drop(&mut self) { + if let Some(ptr) = &self.ptr { + unsafe { + alloc::dealloc(ptr.as_ptr().cast(), self.layout); + } } } } -struct Mappings { - tx: HashMap<&'static Device, MappingTransfer>, - maps: HashMap, +impl AllocSize for Mapping { + fn size(&self) -> usize { + self.layout.size() + } } -impl Mappings { - fn new() -> Mutex { - Mutex::new(Mappings { - tx: HashMap::new(), - maps: HashMap::new(), - }) +impl Deref for Mapping { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.inner } +} - fn contains_ptr(&self, ptr: *mut c_void) -> bool { - let ptr = ptr as usize; - self.maps.contains_key(&ptr) - } +struct BufferMapping { + offset: usize, +} - fn mark_pending(&mut self, dev: &Device) { - self.tx.get_mut(dev).unwrap().pending += 1; - } - - fn unmark_pending(&mut self, dev: &Device) { - if let Some(tx) = self.tx.get_mut(dev) { - tx.pending -= 1; - } - } - - fn increase_ref(&mut self, dev: &Device, ptr: *mut c_void) -> bool { - let ptr = ptr as usize; - let res = self.maps.is_empty(); - *self.maps.entry(ptr).or_default() += 1; - self.unmark_pending(dev); - res - } - - fn decrease_ref(&mut self, ptr: *mut c_void, dev: &Device) -> (bool, Option<&PipeResource>) { - let ptr = ptr as usize; - if let Some(r) = self.maps.get_mut(&ptr) { - *r -= 1; - - if *r == 0 { - self.maps.remove(&ptr); - } - - if self.maps.is_empty() { - let shadow = self.tx.get(dev).and_then(|tx| tx.shadow.as_ref()); - return (true, shadow); - } - } - (false, None) - } - - fn clean_up_tx(&mut self, dev: &Device, ctx: &PipeContext) { - if self.maps.is_empty() { - if let Some(tx) = self.tx.get(&dev) { - if tx.pending == 0 { - self.tx.remove(dev).unwrap().tx.with_ctx(ctx); - } - } - } - } +struct ImageMapping { + origin: CLVec, + region: CLVec, } #[repr(transparent)] @@ -133,6 +98,14 @@ impl ConstMemoryPtr { } } +impl From for ConstMemoryPtr { + fn from(value: MutMemoryPtr) -> Self { + Self { + ptr: value.ptr.cast(), + } + } +} + #[repr(transparent)] #[derive(Clone, Copy)] pub struct MutMemoryPtr { @@ -172,6 +145,13 @@ impl Deref for Mem { } impl Mem { + pub fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool { + match self { + Self::Buffer(b) => b.is_mapped_ptr(ptr), + Self::Image(i) => i.is_mapped_ptr(ptr), + } + } + pub fn unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> { match self { Self::Buffer(b) => b.unmap(q, ctx, ptr), @@ -219,12 +199,12 @@ pub struct MemBase { pub cbs: Mutex>, pub gl_obj: Option, res: Option>>, - maps: Mutex, } pub struct Buffer { base: MemBase, pub offset: usize, + maps: Mutex>>, } pub struct Image { @@ -233,6 +213,7 @@ pub struct Image { pub pipe_format: pipe_format, pub image_desc: cl_image_desc, pub image_elem_size: u8, + maps: Mutex>>, } impl Deref for Buffer { @@ -436,9 +417,9 @@ impl MemBase { gl_obj: None, cbs: Mutex::new(Vec::new()), res: Some(buffer), - maps: Mappings::new(), }, offset: 0, + maps: Mutex::new(TrackedPointers::new()), })) } @@ -467,9 +448,9 @@ impl MemBase { gl_obj: None, cbs: Mutex::new(Vec::new()), res: None, - maps: Mappings::new(), }, offset: offset, + maps: Mutex::new(TrackedPointers::new()), }) } @@ -550,12 +531,12 @@ impl MemBase { gl_obj: None, cbs: Mutex::new(Vec::new()), res: texture, - maps: Mappings::new(), }, image_format: *image_format, pipe_format: pipe_format, image_desc: api_image_desc, image_elem_size: image_elem_size, + maps: Mutex::new(TrackedPointers::new()), })) } @@ -656,13 +637,13 @@ impl MemBase { }), cbs: Mutex::new(Vec::new()), res: Some(texture), - maps: Mappings::new(), }; Ok(if rusticl_type == RusticlTypes::Buffer { Arc::new(Buffer { base: base, offset: gl_mem_props.offset as usize, + maps: Mutex::new(TrackedPointers::new()), }) .into_cl() } else { @@ -683,6 +664,7 @@ impl MemBase { ..Default::default() }, image_elem_size: gl_mem_props.pixel_size, + maps: Mutex::new(TrackedPointers::new()), }) .into_cl() }) @@ -729,8 +711,47 @@ impl MemBase { self.host_ptr as *mut c_void } - pub fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool { - self.maps.lock().unwrap().contains_ptr(ptr) + fn is_pure_user_memory(&self, d: &Device) -> CLResult { + let r = self.get_res_of_dev(d)?; + Ok(r.is_user) + } + + fn map( + &self, + offset: usize, + layout: Layout, + writes: bool, + maps: &Mutex>>, + inner: T, + ) -> CLResult { + let host_ptr = self.host_ptr(); + let ptr = unsafe { + let ptr = if !host_ptr.is_null() { + host_ptr.add(offset) + } else { + alloc::alloc(layout).cast() + }; + + MutMemoryPtr::from_ptr(ptr) + }; + + match maps.lock().unwrap().entry(ptr.as_ptr() as usize) { + Entry::Occupied(mut e) => { + debug_assert!(!host_ptr.is_null()); + e.get_mut().count += 1; + } + Entry::Vacant(e) => { + e.insert(Mapping { + layout: layout, + writes: writes, + ptr: host_ptr.is_null().then_some(ptr), + count: 1, + inner: inner, + }); + } + } + + Ok(ptr) } } @@ -740,10 +761,6 @@ impl Drop for MemBase { for cb in cbs.into_iter().rev() { cb.call(self); } - - for (d, tx) in self.maps.get_mut().unwrap().tx.drain() { - d.helper_ctx().unmap(tx.tx); - } } } @@ -895,25 +912,20 @@ impl Buffer { Ok(()) } - pub fn map(&self, dev: &'static Device, offset: usize) -> CLResult { - let ptr = if self.has_user_shadow_buffer(dev)? { - self.host_ptr() - } else { - let mut lock = self.maps.lock().unwrap(); + fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool { + self.maps.lock().unwrap().contains_key(ptr as usize) + } - if let Entry::Vacant(e) = lock.tx.entry(dev) { - let (tx, res) = self.tx_raw_async(dev, RWFlags::RW)?; - e.insert(MappingTransfer::new(tx, res)); - } else { - lock.mark_pending(dev); - } - - lock.tx.get(dev).unwrap().tx.ptr() - }; - - let ptr = unsafe { ptr.add(offset) }; - // SAFETY: it's required that applications do not cause data races - Ok(unsafe { MutMemoryPtr::from_ptr(ptr) }) + pub fn map(&self, size: usize, offset: usize, writes: bool) -> CLResult { + let layout = + unsafe { Layout::from_size_align_unchecked(size, size_of::<[cl_ulong; 16]>()) }; + self.base.map( + offset, + layout, + writes, + &self.maps, + BufferMapping { offset: offset }, + ) } pub fn read( @@ -968,35 +980,18 @@ impl Buffer { Ok(()) } - // TODO: only sync on map when the memory is not mapped with discard - pub fn sync_shadow(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> { - let ptr = ptr.as_ptr(); - let mut lock = self.maps.lock().unwrap(); - if !lock.increase_ref(q.device, ptr) { + pub fn sync_map(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> { + // no need to update + if self.is_pure_user_memory(q.device)? { return Ok(()); } - if self.has_user_shadow_buffer(q.device)? { - self.read( - q, - ctx, - 0, - // SAFETY: it's required that applications do not cause data races - unsafe { MutMemoryPtr::from_ptr(self.host_ptr()) }, - self.size, - ) - } else { - if let Some(shadow) = lock.tx.get(&q.device).and_then(|tx| tx.shadow.as_ref()) { - let res = self.get_res_of_dev(q.device)?; - let bx = create_pipe_box( - [self.offset, 0, 0].into(), - [self.size, 1, 1].into(), - CL_MEM_OBJECT_BUFFER, - )?; - ctx.resource_copy_region(res, shadow, &[0; 3], &bx); - } - Ok(()) - } + let maps = self.maps.lock().unwrap(); + let Some(mapping) = maps.find_alloc_precise(ptr.as_ptr() as usize) else { + return Err(CL_INVALID_VALUE); + }; + + self.read(q, ctx, mapping.offset, ptr, mapping.size()) } fn tx<'a>( @@ -1022,69 +1017,21 @@ impl Buffer { .with_ctx(ctx)) } - fn tx_raw_async( - &self, - dev: &Device, - rw: RWFlags, - ) -> CLResult<(PipeTransfer, Option)> { - let r = self.get_res_of_dev(dev)?; - let offset = self.offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?; - let size = self.size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?; - let ctx = dev.helper_ctx(); - - let tx = if can_map_directly(dev, r) { - ctx.buffer_map_directly(r, offset, size, rw) - } else { - None - }; - - if let Some(tx) = tx { - Ok((tx, None)) - } else { - let shadow = dev - .screen() - .resource_create_buffer(size as u32, ResourceType::Staging, 0) - .ok_or(CL_OUT_OF_RESOURCES)?; - let tx = ctx - .buffer_map_coherent(&shadow, 0, size, rw) - .ok_or(CL_OUT_OF_RESOURCES)?; - Ok((tx, Some(shadow))) - } - } - // TODO: only sync on unmap when the memory is not mapped for writing pub fn unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> { - let ptr = ptr.as_ptr(); - let mut lock = self.maps.lock().unwrap(); - if !lock.contains_ptr(ptr) { - return Ok(()); - } - - let (needs_sync, shadow) = lock.decrease_ref(ptr, q.device); - if needs_sync { - if let Some(shadow) = shadow { - let res = self.get_res_of_dev(q.device)?; - let offset = self.offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?; - let bx = create_pipe_box( - CLVec::default(), - [self.size, 1, 1].into(), - CL_MEM_OBJECT_BUFFER, - )?; - - ctx.resource_copy_region(shadow, res, &[offset, 0, 0], &bx); - } else if self.has_user_shadow_buffer(q.device)? { - self.write( - q, - ctx, - 0, - // SAFETY: it's required that applications do not cause data races - unsafe { ConstMemoryPtr::from_ptr(self.host_ptr()) }, - self.size, - )?; + let mapping = match self.maps.lock().unwrap().entry(ptr.as_ptr() as usize) { + Entry::Vacant(_) => return Err(CL_INVALID_VALUE), + Entry::Occupied(mut entry) => { + entry.get_mut().count -= 1; + (entry.get().count == 0).then(|| entry.remove()) } - } + }; - lock.clean_up_tx(q.device, ctx); + if let Some(mapping) = mapping { + if mapping.writes && !self.is_pure_user_memory(q.device)? { + self.write(q, ctx, mapping.offset, ptr.into(), mapping.size())?; + } + }; Ok(()) } @@ -1345,61 +1292,45 @@ impl Image { Ok(()) } + fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool { + self.maps.lock().unwrap().contains_key(ptr as usize) + } + pub fn is_parent_buffer(&self) -> bool { matches!(self.parent, Some(Mem::Buffer(_))) } pub fn map( &self, - dev: &'static Device, - origin: &CLVec, + origin: CLVec, + region: CLVec, row_pitch: &mut usize, slice_pitch: &mut usize, - ) -> CLResult<*mut c_void> { - // we might have a host_ptr shadow buffer or image created from buffer - let ptr = if self.has_user_shadow_buffer(dev)? { - *row_pitch = self.image_desc.image_row_pitch; - *slice_pitch = self.image_desc.image_slice_pitch; - self.host_ptr() - } else if let Some(Mem::Buffer(buffer)) = &self.parent { - *row_pitch = self.image_desc.image_row_pitch; - *slice_pitch = self.image_desc.image_slice_pitch; - buffer.map(dev, 0)?.as_ptr() - } else { - let mut lock = self.maps.lock().unwrap(); + writes: bool, + ) -> CLResult { + let pixel_size = self.image_format.pixel_size().unwrap() as usize; - if let Entry::Vacant(e) = lock.tx.entry(dev) { - let bx = self.image_desc.bx()?; - let (tx, res) = self.tx_raw_async(dev, &bx, RWFlags::RW)?; - e.insert(MappingTransfer::new(tx, res)); - } else { - lock.mark_pending(dev); - } + *row_pitch = self.image_desc.row_pitch()? as usize; + *slice_pitch = self.image_desc.slice_pitch(); - let tx = &lock.tx.get(dev).unwrap().tx; + let (offset, size) = + CLVec::calc_offset_size(origin, region, [pixel_size, *row_pitch, *slice_pitch]); - if self.image_desc.dims() > 1 { - *row_pitch = tx.row_pitch() as usize; - } - if self.image_desc.dims() > 2 || self.image_desc.is_array() { - *slice_pitch = tx.slice_pitch(); - } + let layout; + unsafe { + layout = Layout::from_size_align_unchecked(size, size_of::<[u32; 4]>()); + } - tx.ptr() - }; - - let ptr = unsafe { - ptr.add( - *origin - * [ - self.image_format.pixel_size().unwrap().into(), - *row_pitch, - *slice_pitch, - ], - ) - }; - - Ok(ptr) + self.base.map( + offset, + layout, + writes, + &self.maps, + ImageMapping { + origin: origin, + region: region, + }, + ) } pub fn pipe_image_host_access(&self) -> u16 { @@ -1466,32 +1397,29 @@ impl Image { } // TODO: only sync on map when the memory is not mapped with discard - pub fn sync_shadow(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> { - let ptr = ptr.as_ptr(); - let mut lock = self.maps.lock().unwrap(); - if !lock.increase_ref(q.device, ptr) { + pub fn sync_map(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> { + // no need to update + if self.is_pure_user_memory(q.device)? { return Ok(()); } - if self.has_user_shadow_buffer(q.device)? { - self.read( - // SAFETY: it's required that applications do not cause data races - unsafe { MutMemoryPtr::from_ptr(self.host_ptr()) }, - q, - ctx, - &self.image_desc.size(), - &CLVec::default(), - self.image_desc.image_row_pitch, - self.image_desc.image_slice_pitch, - ) - } else { - if let Some(shadow) = lock.tx.get(q.device).and_then(|tx| tx.shadow.as_ref()) { - let res = self.get_res_of_dev(q.device)?; - let bx = self.image_desc.bx()?; - ctx.resource_copy_region(res, shadow, &[0, 0, 0], &bx); - } - Ok(()) - } + let maps = self.maps.lock().unwrap(); + let Some(mapping) = maps.find_alloc_precise(ptr.as_ptr() as usize) else { + return Err(CL_INVALID_VALUE); + }; + + let row_pitch = self.image_desc.row_pitch()? as usize; + let slice_pitch = self.image_desc.slice_pitch(); + + self.read( + ptr, + q, + ctx, + &mapping.region, + &mapping.origin, + row_pitch, + slice_pitch, + ) } fn tx_image<'a>( @@ -1508,74 +1436,33 @@ impl Image { .with_ctx(ctx)) } - fn tx_raw_async( - &self, - dev: &Device, - bx: &pipe_box, - rw: RWFlags, - ) -> CLResult<(PipeTransfer, Option)> { - let r = self.get_res_of_dev(dev)?; - let ctx = dev.helper_ctx(); - - let tx = if can_map_directly(dev, r) { - ctx.texture_map_directly(r, bx, rw) - } else { - None - }; - - if let Some(tx) = tx { - Ok((tx, None)) - } else { - let shadow = dev - .screen() - .resource_create_texture( - r.width(), - r.height(), - r.depth(), - r.array_size(), - cl_mem_type_to_texture_target(self.image_desc.image_type), - self.pipe_format, - ResourceType::Staging, - false, - ) - .ok_or(CL_OUT_OF_RESOURCES)?; - let tx = ctx - .texture_map_coherent(&shadow, bx, rw) - .ok_or(CL_OUT_OF_RESOURCES)?; - Ok((tx, Some(shadow))) - } - } - // TODO: only sync on unmap when the memory is not mapped for writing pub fn unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> { - let ptr = ptr.as_ptr(); - let mut lock = self.maps.lock().unwrap(); - if !lock.contains_ptr(ptr) { - return Ok(()); - } + let mapping = match self.maps.lock().unwrap().entry(ptr.as_ptr() as usize) { + Entry::Vacant(_) => return Err(CL_INVALID_VALUE), + Entry::Occupied(mut entry) => { + entry.get_mut().count -= 1; + (entry.get().count == 0).then(|| entry.remove()) + } + }; - let (needs_sync, shadow) = lock.decrease_ref(ptr, q.device); - if needs_sync { - if let Some(shadow) = shadow { - let res = self.get_res_of_dev(q.device)?; - let bx = self.image_desc.bx()?; - ctx.resource_copy_region(shadow, res, &[0, 0, 0], &bx); - } else if self.has_user_shadow_buffer(q.device)? { + let row_pitch = self.image_desc.row_pitch()? as usize; + let slice_pitch = self.image_desc.slice_pitch(); + + if let Some(mapping) = mapping { + if mapping.writes && !self.is_pure_user_memory(q.device)? { self.write( - // SAFETY: it's required that applications do not cause data races - unsafe { ConstMemoryPtr::from_ptr(self.host_ptr()) }, + ptr.into(), q, ctx, - &self.image_desc.size(), - self.image_desc.image_row_pitch, - self.image_desc.image_slice_pitch, - &CLVec::default(), + &mapping.region, + row_pitch, + slice_pitch, + &mapping.origin, )?; } } - lock.clean_up_tx(q.device, ctx); - Ok(()) }