mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
rusticl: create the CSO ahead of launches
Signed-off-by: Karol Herbst <kherbst@redhat.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18581>
This commit is contained in:
parent
09d05e4e1c
commit
09e15994fc
2 changed files with 76 additions and 11 deletions
|
|
@ -73,6 +73,9 @@ pub trait HelperContextWrapper {
|
|||
|
||||
fn texture_map_coherent(&self, res: &PipeResource, bx: &pipe_box, rw: RWFlags) -> PipeTransfer;
|
||||
|
||||
fn create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void;
|
||||
fn delete_compute_state(&self, cso: *mut c_void);
|
||||
|
||||
fn unmap(&self, tx: PipeTransfer);
|
||||
}
|
||||
|
||||
|
|
@ -148,6 +151,14 @@ impl<'a> HelperContextWrapper for HelperContext<'a> {
|
|||
.texture_map(res, bx, rw, ResourceMapType::Coherent)
|
||||
}
|
||||
|
||||
fn create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void {
|
||||
self.lock.create_compute_state(nir, static_local_mem)
|
||||
}
|
||||
|
||||
fn delete_compute_state(&self, cso: *mut c_void) {
|
||||
self.lock.delete_compute_state(cso)
|
||||
}
|
||||
|
||||
fn unmap(&self, tx: PipeTransfer) {
|
||||
tx.with_ctx(&self.lock);
|
||||
}
|
||||
|
|
@ -727,6 +738,10 @@ impl Device {
|
|||
id as u32
|
||||
}
|
||||
|
||||
pub fn shareable_shaders(&self) -> bool {
|
||||
self.screen.param(pipe_cap::PIPE_CAP_SHAREABLE_SHADERS) == 1
|
||||
}
|
||||
|
||||
pub fn helper_ctx(&self) -> impl HelperContextWrapper + '_ {
|
||||
HelperContext {
|
||||
lock: self.helper_ctx.lock().unwrap(),
|
||||
|
|
|
|||
|
|
@ -245,6 +245,48 @@ impl InternalKernelArg {
|
|||
}
|
||||
}
|
||||
|
||||
struct KernelDevStateInner {
|
||||
nir: NirShader,
|
||||
cso: *mut c_void,
|
||||
}
|
||||
|
||||
struct KernelDevState {
|
||||
states: HashMap<Arc<Device>, KernelDevStateInner>,
|
||||
}
|
||||
|
||||
impl Drop for KernelDevState {
|
||||
fn drop(&mut self) {
|
||||
self.states.iter().for_each(|(dev, dev_state)| {
|
||||
if !dev_state.cso.is_null() {
|
||||
dev.helper_ctx().delete_compute_state(dev_state.cso);
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl KernelDevState {
|
||||
fn new(nirs: HashMap<Arc<Device>, NirShader>) -> Arc<Self> {
|
||||
let states = nirs
|
||||
.into_iter()
|
||||
.map(|(dev, nir)| {
|
||||
let cso = if dev.shareable_shaders() {
|
||||
dev.helper_ctx()
|
||||
.create_compute_state(&nir, nir.shared_size())
|
||||
} else {
|
||||
ptr::null_mut()
|
||||
};
|
||||
(dev, KernelDevStateInner { nir: nir, cso: cso })
|
||||
})
|
||||
.collect();
|
||||
|
||||
Arc::new(Self { states: states })
|
||||
}
|
||||
|
||||
fn get(&self, dev: &Device) -> &KernelDevStateInner {
|
||||
self.states.get(dev).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub struct Kernel {
|
||||
pub base: CLObjectBase<CL_INVALID_KERNEL>,
|
||||
|
|
@ -255,7 +297,7 @@ pub struct Kernel {
|
|||
pub work_group_size: [usize; 3],
|
||||
pub attributes_string: String,
|
||||
internal_args: Vec<InternalKernelArg>,
|
||||
nirs: HashMap<Arc<Device>, NirShader>,
|
||||
dev_state: Arc<KernelDevState>,
|
||||
}
|
||||
|
||||
impl_cl_type_trait!(cl_kernel, Kernel, CL_INVALID_KERNEL);
|
||||
|
|
@ -790,8 +832,7 @@ impl Kernel {
|
|||
attributes_string: attributes_string,
|
||||
values: values,
|
||||
internal_args: internal_args,
|
||||
// caller has to verify all kernels have the same sig
|
||||
nirs: nirs,
|
||||
dev_state: KernelDevState::new(nirs),
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -805,7 +846,7 @@ impl Kernel {
|
|||
grid: &[usize],
|
||||
offsets: &[usize],
|
||||
) -> CLResult<EventSig> {
|
||||
let nir = self.nirs.get(&q.device).unwrap();
|
||||
let nir = &self.dev_state.get(&q.device).nir;
|
||||
let mut block = create_kernel_arr::<u32>(block, 1);
|
||||
let mut grid = create_kernel_arr::<u32>(grid, 1);
|
||||
let offsets = create_kernel_arr::<u64>(offsets, 0);
|
||||
|
|
@ -969,11 +1010,11 @@ impl Kernel {
|
|||
|
||||
let k = Arc::clone(self);
|
||||
Ok(Box::new(move |q, ctx| {
|
||||
let nir = k.nirs.get(&q.device).unwrap();
|
||||
let dev_state = k.dev_state.get(&q.device);
|
||||
let mut input = input.clone();
|
||||
let mut resources = Vec::with_capacity(resource_info.len());
|
||||
let mut globals: Vec<*mut u32> = Vec::new();
|
||||
let printf_format = nir.printf_format();
|
||||
let printf_format = dev_state.nir.printf_format();
|
||||
|
||||
let mut sviews: Vec<_> = sviews
|
||||
.iter()
|
||||
|
|
@ -998,7 +1039,12 @@ impl Kernel {
|
|||
init_data.len() as u32,
|
||||
);
|
||||
}
|
||||
let cso = ctx.create_compute_state(nir, static_local_size as u32);
|
||||
|
||||
let cso = if dev_state.cso.is_null() {
|
||||
ctx.create_compute_state(&dev_state.nir, static_local_size as u32)
|
||||
} else {
|
||||
dev_state.cso
|
||||
};
|
||||
|
||||
ctx.bind_compute_state(cso);
|
||||
ctx.bind_sampler_states(&samplers);
|
||||
|
|
@ -1013,8 +1059,12 @@ impl Kernel {
|
|||
ctx.clear_shader_images(iviews.len() as u32);
|
||||
ctx.clear_sampler_views(sviews.len() as u32);
|
||||
ctx.clear_sampler_states(samplers.len() as u32);
|
||||
|
||||
ctx.bind_compute_state(ptr::null_mut());
|
||||
ctx.delete_compute_state(cso);
|
||||
if dev_state.cso.is_null() {
|
||||
ctx.delete_compute_state(cso);
|
||||
}
|
||||
|
||||
ctx.memory_barrier(PIPE_BARRIER_GLOBAL_BUFFER);
|
||||
|
||||
samplers.iter().for_each(|s| ctx.delete_sampler_state(*s));
|
||||
|
|
@ -1115,12 +1165,12 @@ impl Kernel {
|
|||
}
|
||||
|
||||
pub fn priv_mem_size(&self, dev: &Arc<Device>) -> cl_ulong {
|
||||
self.nirs.get(dev).unwrap().scratch_size() as cl_ulong
|
||||
self.dev_state.get(dev).nir.scratch_size() as cl_ulong
|
||||
}
|
||||
|
||||
pub fn local_mem_size(&self, dev: &Arc<Device>) -> cl_ulong {
|
||||
// TODO include args
|
||||
self.nirs.get(dev).unwrap().shared_size() as cl_ulong
|
||||
self.dev_state.get(dev).nir.shared_size() as cl_ulong
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1135,7 +1185,7 @@ impl Clone for Kernel {
|
|||
work_group_size: self.work_group_size,
|
||||
attributes_string: self.attributes_string.clone(),
|
||||
internal_args: self.internal_args.clone(),
|
||||
nirs: self.nirs.clone(),
|
||||
dev_state: self.dev_state.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue