rusticl: reuse PipeContext

Gallium drivers are likely to leak CPU and GPU resources as with OpenGL
they rarely have to create more than a single pipe_context.

This also reduces the cost of creating CL queues.

In order to debug drivers leaking memory a new debug option is added to
disable the reuse of PipeContexts

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30888>
This commit is contained in:
Karol Herbst 2024-08-27 12:35:11 +02:00 committed by Marge Bot
parent 64ca0fd2f2
commit 73c8e2c3cd
4 changed files with 33 additions and 8 deletions

View file

@ -1153,6 +1153,7 @@ Rusticl environment variables
- ``allow_invalid_spirv`` disables validation of any input SPIR-V
- ``clc`` dumps all OpenCL C source being compiled
- ``no_reuse_context`` pipe_contexts are not recycled
- ``perf`` prints a warning when hitting slow paths once
- ``perfspam`` same as perf, but doesn't skip same warnings
- ``program`` dumps compilation logs to stderr

View file

@ -46,6 +46,7 @@ pub struct Device {
pub lib_clc: NirShader,
pub caps: DeviceCaps,
helper_ctx: Mutex<PipeContext>,
reusable_ctx: Mutex<Vec<PipeContext>>,
}
#[derive(Default)]
@ -216,6 +217,7 @@ impl Device {
clc_features: Vec::new(),
formats: HashMap::new(),
lib_clc: lib_clc?,
reusable_ctx: Mutex::new(Vec::new()),
};
// check if we are embedded or full profile first
@ -984,10 +986,26 @@ impl Device {
})
}
fn reusable_ctx(&self) -> MutexGuard<Vec<PipeContext>> {
self.reusable_ctx.lock().unwrap()
}
pub fn screen(&self) -> &Arc<PipeScreen> {
&self.screen
}
pub fn create_context(&self) -> Option<PipeContext> {
self.reusable_ctx()
.pop()
.or_else(|| self.screen.create_context())
}
pub fn recycle_context(&self, ctx: PipeContext) {
if Platform::dbg().reuse_context {
self.reusable_ctx().push(ctx);
}
}
pub fn subgroup_sizes(&self) -> Vec<usize> {
let subgroup_size = ComputeParam::<u32>::compute_param(
self.screen.as_ref(),

View file

@ -33,6 +33,7 @@ pub struct PlatformDebug {
pub perf: PerfDebugLevel,
pub program: bool,
pub max_grid_size: u64,
pub reuse_context: bool,
pub sync_every_event: bool,
pub validate_spirv: bool,
}
@ -80,6 +81,7 @@ static mut PLATFORM_DBG: PlatformDebug = PlatformDebug {
perf: PerfDebugLevel::None,
program: false,
max_grid_size: 0,
reuse_context: true,
sync_every_event: false,
validate_spirv: false,
};
@ -96,6 +98,7 @@ fn load_env() {
match flag {
"allow_invalid_spirv" => debug.allow_invalid_spirv = true,
"clc" => debug.clc = true,
"no_reuse_context" => debug.reuse_context = false,
"perf" => debug.perf = PerfDebugLevel::Once,
"perfspam" => debug.perf = PerfDebugLevel::Spam,
"program" => debug.program = true,

View file

@ -11,6 +11,7 @@ use rusticl_opencl_gen::*;
use std::cmp;
use std::mem;
use std::mem::ManuallyDrop;
use std::ops::Deref;
use std::sync::mpsc;
use std::sync::Arc;
@ -23,19 +24,19 @@ use std::thread::JoinHandle;
///
/// Used for tracking bound GPU state to lower CPU overhead and centralize state tracking
pub struct QueueContext {
ctx: PipeContext,
// need to use ManuallyDrop so we can recycle the context without cloning
ctx: ManuallyDrop<PipeContext>,
dev: &'static Device,
use_stream: bool,
}
impl QueueContext {
fn new_for(device: &Device) -> CLResult<Self> {
let ctx = device
.screen()
.create_context()
.ok_or(CL_OUT_OF_HOST_MEMORY)?;
fn new_for(device: &'static Device) -> CLResult<Self> {
let ctx = device.create_context().ok_or(CL_OUT_OF_HOST_MEMORY)?;
Ok(Self {
ctx: ctx,
ctx: ManuallyDrop::new(ctx),
dev: device,
use_stream: device.prefers_real_buffer_in_cb0(),
})
}
@ -66,7 +67,9 @@ impl Deref for QueueContext {
impl Drop for QueueContext {
fn drop(&mut self) {
self.ctx.set_constant_buffer(0, &[])
let ctx = unsafe { ManuallyDrop::take(&mut self.ctx) };
ctx.set_constant_buffer(0, &[]);
self.dev.recycle_context(ctx);
}
}