rusticl/kernel: implement CL_KERNEL_COMPILE_WORK_GROUP_SIZE

Signed-off-by: Karol Herbst <kherbst@redhat.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15439>
This commit is contained in:
Karol Herbst 2022-03-17 17:38:41 +01:00 committed by Marge Bot
parent 43f3a4fc97
commit f1bbf85678
4 changed files with 40 additions and 9 deletions

View file

@ -87,6 +87,7 @@ impl CLInfoObj<cl_kernel_work_group_info, cl_device_id> for cl_kernel {
}
Ok(match *q {
CL_KERNEL_COMPILE_WORK_GROUP_SIZE => cl_prop::<[usize; 3]>(kernel.work_group_size),
CL_KERNEL_LOCAL_MEM_SIZE => cl_prop::<cl_ulong>(kernel.local_mem_size(&dev)),
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE => cl_prop::<usize>(1),
CL_KERNEL_PRIVATE_MEM_SIZE => cl_prop::<cl_ulong>(kernel.priv_mem_size(&dev)),
@ -358,6 +359,17 @@ pub fn enqueue_ndrange_kernel(
return Err(CL_INVALID_WORK_ITEM_SIZE);
}
// CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and does not match the required
// work-group size for kernel in the program source.
let wgs = k.work_group_size;
if wgs[0] != 0 {
for i in 0..work_dim as usize {
if wgs[i] != local_work_size[i] {
return Err(CL_INVALID_WORK_GROUP_SIZE);
}
}
}
// If global_work_size is NULL, or the value in any passed dimension is 0 then the kernel
// command will trivially succeed after its event dependencies are satisfied and subsequently
// update its completion event.
@ -375,7 +387,6 @@ pub fn enqueue_ndrange_kernel(
create_and_queue(q, CL_COMMAND_NDRANGE_KERNEL, evs, event, false, cb)
//• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and does not match the required work-group size for kernel in the program source.
//• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and is not consistent with the required number of sub-groups for kernel in the program source.
//• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the total number of work-items in the work-group computed as local_work_size[0] × … local_work_size[work_dim - 1] is greater than the value specified by CL_KERNEL_WORK_GROUP_SIZE in the Kernel Object Device Queries table.
//• CL_INVALID_WORK_GROUP_SIZE if the work-group size must be uniform and the local_work_size is not NULL, is not equal to the required work-group size specified in the kernel source, or the global_work_size is not evenly divisible by the local_work_size.
@ -402,7 +413,7 @@ pub fn enqueue_task(
kernel,
1,
ptr::null(),
[1, 0, 0].as_ptr(),
[1, 1, 1].as_ptr(),
[1, 0, 0].as_ptr(),
num_events_in_wait_list,
event_wait_list,

View file

@ -181,6 +181,19 @@ where
}
}
impl<T, const I: usize> CLProp for [T; I]
where
T: CLProp,
{
fn cl_vec(&self) -> Vec<u8> {
let mut res: Vec<u8> = Vec::new();
for i in self {
res.append(&mut i.cl_vec())
}
res
}
}
impl<T> CLProp for *const T {
fn cl_vec(&self) -> Vec<u8> {
(*self as usize).cl_vec()

View file

@ -131,6 +131,7 @@ pub struct Kernel {
pub name: String,
pub args: Vec<KernelArg>,
pub values: Vec<RefCell<Option<KernelArgValue>>>,
pub work_group_size: [usize; 3],
internal_args: Vec<InternalKernelArg>,
nirs: HashMap<Arc<Device>, NirShader>,
}
@ -148,7 +149,7 @@ fn create_kernel_arr(vals: &[usize], val: u32) -> [u32; 3] {
// mostly like clc_spirv_to_dxil
// does not DCEe uniforms or images!
fn lower_and_optimize_nir_pre_inputs(nir: &mut NirShader, lib_clc: &NirShader) {
nir.set_workgroup_size(&[0; 3]);
nir.set_workgroup_size_variable_if_zero();
nir.structurize();
while {
let mut progress = false;
@ -300,6 +301,8 @@ impl Kernel {
nirs.iter_mut()
.for_each(|(d, n)| lower_and_optimize_nir_pre_inputs(n, &d.lib_clc));
let nir = nirs.values_mut().next().unwrap();
let wgs = nir.workgroup_size();
let work_group_size = [wgs[0] as usize, wgs[1] as usize, wgs[2] as usize];
let mut args = KernelArg::from_spirv_nir(args, nir);
// can't use vec!...
let values = args.iter().map(|_| RefCell::new(None)).collect();
@ -319,6 +322,7 @@ impl Kernel {
prog: prog,
name: name,
args: args,
work_group_size: work_group_size,
values: values,
internal_args: internal_args,
// caller has to verify all kernels have the same sig

View file

@ -152,13 +152,16 @@ impl NirShader {
unsafe { (*self.nir.as_ptr()).info.shared_size }
}
pub fn set_workgroup_size(&self, workgroup: &[u16; 3]) {
let mut nir = self.nir.as_ptr();
pub fn workgroup_size(&self) -> [u16; 3] {
unsafe { (*self.nir.as_ptr()).info.workgroup_size }
}
pub fn set_workgroup_size_variable_if_zero(&self) {
let nir = self.nir.as_ptr();
unsafe {
(*nir).info.set_workgroup_size_variable(workgroup[0] == 0);
(*nir).info.workgroup_size[0] = workgroup[0];
(*nir).info.workgroup_size[1] = workgroup[1];
(*nir).info.workgroup_size[2] = workgroup[2];
(*nir)
.info
.set_workgroup_size_variable((*nir).info.workgroup_size[0] == 0);
}
}