rusticl/kernel: move most of the code in launch inside the closure

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29527>
This commit is contained in:
Karol Herbst 2024-06-03 18:52:27 +02:00 committed by Marge Bot
parent 436122cb10
commit bb2453c649

View file

@ -940,12 +940,21 @@ impl Kernel {
grid: &[usize],
offsets: &[usize],
) -> CLResult<EventSig> {
let nir_kernel_build = self.builds.get(q.device).unwrap().clone();
// Clone all the data we need to execute this kernel
let kernel_info = Arc::clone(&self.kernel_info);
let arg_values = self.arg_values().clone();
let nir_kernel_build = Arc::clone(&self.builds[q.device]);
// operations we want to report errors to the clients
let mut block = create_kernel_arr::<u32>(block, 1)?;
let mut grid = create_kernel_arr::<usize>(grid, 1)?;
let offsets = create_kernel_arr::<usize>(offsets, 0)?;
self.optimize_local_size(q.device, &mut grid, &mut block);
Ok(Box::new(move |q, ctx| {
let mut workgroup_id_offset_loc = None;
let mut input: Vec<u8> = Vec::new();
let mut input = Vec::new();
let mut resource_info = Vec::new();
// Set it once so we get the alignment padding right
let static_local_size: u64 = nir_kernel_build.shared_size;
@ -969,10 +978,7 @@ impl Kernel {
null_ptr_v3 = [0u8; 12].as_slice();
};
self.optimize_local_size(q.device, &mut grid, &mut block);
let arg_values = self.arg_values();
for (arg, val) in self.kernel_info.args.iter().zip(arg_values.iter()) {
for (arg, val) in kernel_info.args.iter().zip(arg_values.iter()) {
if arg.dead {
continue;
}
@ -1002,8 +1008,9 @@ impl Kernel {
// If resource is a buffer, the image was created from a buffer. Use strides and
// dimensions of the image then.
let app_img_info =
if res.as_ref().is_buffer() && image.mem_type == CL_MEM_OBJECT_IMAGE2D {
let app_img_info = if res.as_ref().is_buffer()
&& image.mem_type == CL_MEM_OBJECT_IMAGE2D
{
Some(AppImgInfo::new(
image.image_desc.row_pitch()? / image.image_elem_size as u32,
image.image_desc.width()?,
@ -1075,8 +1082,12 @@ impl Kernel {
// subtract the shader local_size as we only request something on top of that.
variable_local_size -= static_local_size;
let mut resources = Vec::with_capacity(resource_info.len());
let mut globals: Vec<*mut u32> = Vec::new();
let printf_format = &nir_kernel_build.printf_info;
let mut printf_buf = None;
for arg in &self.kernel_info.internal_args {
for arg in &kernel_info.internal_args {
if arg.offset > input.len() {
input.resize(arg.offset, 0);
}
@ -1090,23 +1101,7 @@ impl Kernel {
));
}
InternalKernelArgType::GlobalWorkOffsets => {
if q.device.address_bits() == 64 {
input.extend_from_slice(unsafe {
as_byte_slice(&[
offsets[0] as u64,
offsets[1] as u64,
offsets[2] as u64,
])
});
} else {
input.extend_from_slice(unsafe {
as_byte_slice(&[
offsets[0] as u32,
offsets[1] as u32,
offsets[2] as u32,
])
});
}
input.extend_from_slice(unsafe { as_byte_slice(&offsets) });
}
InternalKernelArgType::WorkGroupOffsets => {
workgroup_id_offset_loc = Some(input.len());
@ -1151,12 +1146,6 @@ impl Kernel {
}
}
Ok(Box::new(move |q, ctx| {
let mut input = input.clone();
let mut resources = Vec::with_capacity(resource_info.len());
let mut globals: Vec<*mut u32> = Vec::new();
let printf_format = &nir_kernel_build.printf_info;
let mut sviews: Vec<_> = sviews
.iter()
.map(|(s, f, aii)| ctx.create_sampler_view(s, *f, aii.as_ref()))