diff --git a/docs/envvars.rst b/docs/envvars.rst index 6d3b9d5a960..4b7293be13b 100644 --- a/docs/envvars.rst +++ b/docs/envvars.rst @@ -1153,6 +1153,7 @@ Rusticl environment variables - ``allow_invalid_spirv`` disables validation of any input SPIR-V - ``clc`` dumps all OpenCL C source being compiled + - ``no_reuse_context`` pipe_contexts are not recycled - ``perf`` prints a warning when hitting slow paths once - ``perfspam`` same as perf, but doesn't skip same warnings - ``program`` dumps compilation logs to stderr diff --git a/src/gallium/frontends/rusticl/core/device.rs b/src/gallium/frontends/rusticl/core/device.rs index f83ae4b9127..9cf801659e4 100644 --- a/src/gallium/frontends/rusticl/core/device.rs +++ b/src/gallium/frontends/rusticl/core/device.rs @@ -46,6 +46,7 @@ pub struct Device { pub lib_clc: NirShader, pub caps: DeviceCaps, helper_ctx: Mutex, + reusable_ctx: Mutex>, } #[derive(Default)] @@ -216,6 +217,7 @@ impl Device { clc_features: Vec::new(), formats: HashMap::new(), lib_clc: lib_clc?, + reusable_ctx: Mutex::new(Vec::new()), }; // check if we are embedded or full profile first @@ -984,10 +986,26 @@ impl Device { }) } + fn reusable_ctx(&self) -> MutexGuard> { + self.reusable_ctx.lock().unwrap() + } + pub fn screen(&self) -> &Arc { &self.screen } + pub fn create_context(&self) -> Option { + self.reusable_ctx() + .pop() + .or_else(|| self.screen.create_context()) + } + + pub fn recycle_context(&self, ctx: PipeContext) { + if Platform::dbg().reuse_context { + self.reusable_ctx().push(ctx); + } + } + pub fn subgroup_sizes(&self) -> Vec { let subgroup_size = ComputeParam::::compute_param( self.screen.as_ref(), diff --git a/src/gallium/frontends/rusticl/core/platform.rs b/src/gallium/frontends/rusticl/core/platform.rs index e0e3d1cacdb..e034552f05b 100644 --- a/src/gallium/frontends/rusticl/core/platform.rs +++ b/src/gallium/frontends/rusticl/core/platform.rs @@ -33,6 +33,7 @@ pub struct PlatformDebug { pub perf: PerfDebugLevel, pub program: bool, pub max_grid_size: u64, + pub reuse_context: bool, pub sync_every_event: bool, pub validate_spirv: bool, } @@ -80,6 +81,7 @@ static mut PLATFORM_DBG: PlatformDebug = PlatformDebug { perf: PerfDebugLevel::None, program: false, max_grid_size: 0, + reuse_context: true, sync_every_event: false, validate_spirv: false, }; @@ -96,6 +98,7 @@ fn load_env() { match flag { "allow_invalid_spirv" => debug.allow_invalid_spirv = true, "clc" => debug.clc = true, + "no_reuse_context" => debug.reuse_context = false, "perf" => debug.perf = PerfDebugLevel::Once, "perfspam" => debug.perf = PerfDebugLevel::Spam, "program" => debug.program = true, diff --git a/src/gallium/frontends/rusticl/core/queue.rs b/src/gallium/frontends/rusticl/core/queue.rs index f9f88e827f9..af59d113016 100644 --- a/src/gallium/frontends/rusticl/core/queue.rs +++ b/src/gallium/frontends/rusticl/core/queue.rs @@ -11,6 +11,7 @@ use rusticl_opencl_gen::*; use std::cmp; use std::mem; +use std::mem::ManuallyDrop; use std::ops::Deref; use std::sync::mpsc; use std::sync::Arc; @@ -23,19 +24,19 @@ use std::thread::JoinHandle; /// /// Used for tracking bound GPU state to lower CPU overhead and centralize state tracking pub struct QueueContext { - ctx: PipeContext, + // need to use ManuallyDrop so we can recycle the context without cloning + ctx: ManuallyDrop, + dev: &'static Device, use_stream: bool, } impl QueueContext { - fn new_for(device: &Device) -> CLResult { - let ctx = device - .screen() - .create_context() - .ok_or(CL_OUT_OF_HOST_MEMORY)?; + fn new_for(device: &'static Device) -> CLResult { + let ctx = device.create_context().ok_or(CL_OUT_OF_HOST_MEMORY)?; Ok(Self { - ctx: ctx, + ctx: ManuallyDrop::new(ctx), + dev: device, use_stream: device.prefers_real_buffer_in_cb0(), }) } @@ -66,7 +67,9 @@ impl Deref for QueueContext { impl Drop for QueueContext { fn drop(&mut self) { - self.ctx.set_constant_buffer(0, &[]) + let ctx = unsafe { ManuallyDrop::take(&mut self.ctx) }; + ctx.set_constant_buffer(0, &[]); + self.dev.recycle_context(ctx); } }