diff --git a/src/gallium/frontends/rusticl/api/kernel.rs b/src/gallium/frontends/rusticl/api/kernel.rs index bb3a494e2e2..02a90c8b3b1 100644 --- a/src/gallium/frontends/rusticl/api/kernel.rs +++ b/src/gallium/frontends/rusticl/api/kernel.rs @@ -87,6 +87,7 @@ impl CLInfoObj for cl_kernel { } Ok(match *q { + CL_KERNEL_COMPILE_WORK_GROUP_SIZE => cl_prop::<[usize; 3]>(kernel.work_group_size), CL_KERNEL_LOCAL_MEM_SIZE => cl_prop::(kernel.local_mem_size(&dev)), CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE => cl_prop::(1), CL_KERNEL_PRIVATE_MEM_SIZE => cl_prop::(kernel.priv_mem_size(&dev)), @@ -358,6 +359,17 @@ pub fn enqueue_ndrange_kernel( return Err(CL_INVALID_WORK_ITEM_SIZE); } + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and does not match the required + // work-group size for kernel in the program source. + let wgs = k.work_group_size; + if wgs[0] != 0 { + for i in 0..work_dim as usize { + if wgs[i] != local_work_size[i] { + return Err(CL_INVALID_WORK_GROUP_SIZE); + } + } + } + // If global_work_size is NULL, or the value in any passed dimension is 0 then the kernel // command will trivially succeed after its event dependencies are satisfied and subsequently // update its completion event. @@ -375,7 +387,6 @@ pub fn enqueue_ndrange_kernel( create_and_queue(q, CL_COMMAND_NDRANGE_KERNEL, evs, event, false, cb) - //• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and does not match the required work-group size for kernel in the program source. //• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and is not consistent with the required number of sub-groups for kernel in the program source. //• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the total number of work-items in the work-group computed as local_work_size[0] × … local_work_size[work_dim - 1] is greater than the value specified by CL_KERNEL_WORK_GROUP_SIZE in the Kernel Object Device Queries table. //• CL_INVALID_WORK_GROUP_SIZE if the work-group size must be uniform and the local_work_size is not NULL, is not equal to the required work-group size specified in the kernel source, or the global_work_size is not evenly divisible by the local_work_size. @@ -402,7 +413,7 @@ pub fn enqueue_task( kernel, 1, ptr::null(), - [1, 0, 0].as_ptr(), + [1, 1, 1].as_ptr(), [1, 0, 0].as_ptr(), num_events_in_wait_list, event_wait_list, diff --git a/src/gallium/frontends/rusticl/api/util.rs b/src/gallium/frontends/rusticl/api/util.rs index 2e6adfd483e..84fe3783bc6 100644 --- a/src/gallium/frontends/rusticl/api/util.rs +++ b/src/gallium/frontends/rusticl/api/util.rs @@ -181,6 +181,19 @@ where } } +impl CLProp for [T; I] +where + T: CLProp, +{ + fn cl_vec(&self) -> Vec { + let mut res: Vec = Vec::new(); + for i in self { + res.append(&mut i.cl_vec()) + } + res + } +} + impl CLProp for *const T { fn cl_vec(&self) -> Vec { (*self as usize).cl_vec() diff --git a/src/gallium/frontends/rusticl/core/kernel.rs b/src/gallium/frontends/rusticl/core/kernel.rs index aa1f28f59f6..cf1b828999e 100644 --- a/src/gallium/frontends/rusticl/core/kernel.rs +++ b/src/gallium/frontends/rusticl/core/kernel.rs @@ -131,6 +131,7 @@ pub struct Kernel { pub name: String, pub args: Vec, pub values: Vec>>, + pub work_group_size: [usize; 3], internal_args: Vec, nirs: HashMap, NirShader>, } @@ -148,7 +149,7 @@ fn create_kernel_arr(vals: &[usize], val: u32) -> [u32; 3] { // mostly like clc_spirv_to_dxil // does not DCEe uniforms or images! fn lower_and_optimize_nir_pre_inputs(nir: &mut NirShader, lib_clc: &NirShader) { - nir.set_workgroup_size(&[0; 3]); + nir.set_workgroup_size_variable_if_zero(); nir.structurize(); while { let mut progress = false; @@ -300,6 +301,8 @@ impl Kernel { nirs.iter_mut() .for_each(|(d, n)| lower_and_optimize_nir_pre_inputs(n, &d.lib_clc)); let nir = nirs.values_mut().next().unwrap(); + let wgs = nir.workgroup_size(); + let work_group_size = [wgs[0] as usize, wgs[1] as usize, wgs[2] as usize]; let mut args = KernelArg::from_spirv_nir(args, nir); // can't use vec!... let values = args.iter().map(|_| RefCell::new(None)).collect(); @@ -319,6 +322,7 @@ impl Kernel { prog: prog, name: name, args: args, + work_group_size: work_group_size, values: values, internal_args: internal_args, // caller has to verify all kernels have the same sig diff --git a/src/gallium/frontends/rusticl/mesa/compiler/nir.rs b/src/gallium/frontends/rusticl/mesa/compiler/nir.rs index b3067306d0a..ca1d51ae092 100644 --- a/src/gallium/frontends/rusticl/mesa/compiler/nir.rs +++ b/src/gallium/frontends/rusticl/mesa/compiler/nir.rs @@ -152,13 +152,16 @@ impl NirShader { unsafe { (*self.nir.as_ptr()).info.shared_size } } - pub fn set_workgroup_size(&self, workgroup: &[u16; 3]) { - let mut nir = self.nir.as_ptr(); + pub fn workgroup_size(&self) -> [u16; 3] { + unsafe { (*self.nir.as_ptr()).info.workgroup_size } + } + + pub fn set_workgroup_size_variable_if_zero(&self) { + let nir = self.nir.as_ptr(); unsafe { - (*nir).info.set_workgroup_size_variable(workgroup[0] == 0); - (*nir).info.workgroup_size[0] = workgroup[0]; - (*nir).info.workgroup_size[1] = workgroup[1]; - (*nir).info.workgroup_size[2] = workgroup[2]; + (*nir) + .info + .set_workgroup_size_variable((*nir).info.workgroup_size[0] == 0); } }