diff --git a/src/gallium/frontends/rusticl/api/device.rs b/src/gallium/frontends/rusticl/api/device.rs index 497bcb49109..e4433812157 100644 --- a/src/gallium/frontends/rusticl/api/device.rs +++ b/src/gallium/frontends/rusticl/api/device.rs @@ -100,7 +100,7 @@ unsafe impl CLInfo for cl_device_id { } CL_DEVICE_HOST_UNIFIED_MEMORY => v.write::(dev.unified_memory()), CL_DEVICE_IL_VERSION => v.write::<&CStr>(SPIRV_SUPPORT_STRING), - CL_DEVICE_ILS_WITH_VERSION => v.write::>(SPIRV_SUPPORT.to_vec()), + CL_DEVICE_ILS_WITH_VERSION => v.write::<&[cl_name_version]>(&SPIRV_SUPPORT), CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT => { v.write::(dev.image_base_address_alignment()) } @@ -189,7 +189,7 @@ unsafe impl CLInfo for cl_device_id { CL_DEVICE_MAX_SAMPLERS => v.write::(dev.max_samplers()), CL_DEVICE_MAX_WORK_GROUP_SIZE => v.write::(dev.max_threads_per_block()), CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS => v.write::(dev.max_grid_dimensions()), - CL_DEVICE_MAX_WORK_ITEM_SIZES => v.write::>(dev.max_block_sizes()), + CL_DEVICE_MAX_WORK_ITEM_SIZES => v.write::<&[usize]>(&dev.max_block_sizes()), CL_DEVICE_MAX_WRITE_IMAGE_ARGS => v.write::(dev.caps.max_write_images), // TODO proper retrival from devices CL_DEVICE_MEM_BASE_ADDR_ALIGN => v.write::(0x1000), diff --git a/src/gallium/frontends/rusticl/api/kernel.rs b/src/gallium/frontends/rusticl/api/kernel.rs index ae864a67fdc..986ee434055 100644 --- a/src/gallium/frontends/rusticl/api/kernel.rs +++ b/src/gallium/frontends/rusticl/api/kernel.rs @@ -50,8 +50,10 @@ unsafe impl CLInfoObj for cl_kernel { fn query(&self, idx: cl_uint, q: cl_kernel_arg_info, v: CLInfoValue) -> CLResult { let kernel = Kernel::ref_from_raw(*self)?; + let idx = idx as usize; + // CL_INVALID_ARG_INDEX if arg_index is not a valid argument index. - if idx as usize >= kernel.kernel_info.args.len() { + if idx >= kernel.kernel_info.args.len() { return Err(CL_INVALID_ARG_INDEX); } diff --git a/src/gallium/frontends/rusticl/core/device.rs b/src/gallium/frontends/rusticl/core/device.rs index 94fb12ab6d7..f8546758891 100644 --- a/src/gallium/frontends/rusticl/core/device.rs +++ b/src/gallium/frontends/rusticl/core/device.rs @@ -785,7 +785,7 @@ impl Device { 1 << 26, min( self.max_mem_alloc(), - self.screen.caps().max_shader_buffer_size as u64, + self.screen.caps().max_shader_buffer_size.into(), ), ) } @@ -1003,17 +1003,20 @@ impl Device { self.screen.compute_caps().max_local_size as cl_ulong } - pub fn max_block_sizes(&self) -> Vec { - let v: [u32; 3] = self.screen.compute_caps().max_block_size; - v.into_iter().map(|v| v as usize).collect() + pub fn max_block_sizes(&self) -> [usize; 3] { + self.screen + .compute_caps() + .max_block_size + .map(|value| value as usize) } - pub fn max_grid_size(&self) -> Vec { - let v: [u32; 3] = self.screen.compute_caps().max_grid_size; - v.into_iter() - .map(|a| min(a, Platform::dbg().max_grid_size)) - .map(|v| v as usize) - .collect() + pub fn max_grid_size(&self) -> [usize; 3] { + self.screen + .compute_caps() + .max_grid_size + .map(|screen_max_grid_size| { + min(screen_max_grid_size, Platform::dbg().max_grid_size) as usize + }) } pub fn max_clock_freq(&self) -> cl_uint { diff --git a/src/gallium/frontends/rusticl/core/kernel.rs b/src/gallium/frontends/rusticl/core/kernel.rs index 497516ef7ed..f863b94167a 100644 --- a/src/gallium/frontends/rusticl/core/kernel.rs +++ b/src/gallium/frontends/rusticl/core/kernel.rs @@ -122,7 +122,7 @@ impl KernelArgType { #[derive(Hash, PartialEq, Eq, Clone)] enum CompiledKernelArgType { - APIArg(u32), + APIArg(usize), ConstantBuffer, GlobalWorkOffsets, GlobalWorkSize, @@ -235,7 +235,7 @@ struct CompiledKernelArg { kind: CompiledKernelArgType, /// The binding for image/sampler args, the offset into the input buffer /// for anything else. - offset: u32, + offset: usize, dead: bool, } @@ -254,7 +254,7 @@ impl CompiledKernelArg { var.data.binding } else { var.data.driver_location - }; + } as usize; } } @@ -262,7 +262,7 @@ impl CompiledKernelArg { unsafe { blob_write_uint16(blob, args.len() as u16); for arg in args { - blob_write_uint32(blob, arg.offset); + blob_write_uint32(blob, arg.offset as u32); blob_write_uint8(blob, arg.dead.into()); match arg.kind { CompiledKernelArgType::ConstantBuffer => blob_write_uint8(blob, 0), @@ -282,7 +282,7 @@ impl CompiledKernelArg { CompiledKernelArgType::GlobalWorkSize => blob_write_uint8(blob, 9), CompiledKernelArgType::APIArg(idx) => { blob_write_uint8(blob, 10); - blob_write_uint32(blob, idx) + blob_write_uint32(blob, idx as u32) } }; } @@ -295,7 +295,7 @@ impl CompiledKernelArg { let mut res = Vec::with_capacity(len); for _ in 0..len { - let offset = blob_read_uint32(blob); + let offset = blob_read_uint32(blob) as usize; let dead = blob_read_uint8(blob) != 0; let kind = match blob_read_uint8(blob) { @@ -315,7 +315,7 @@ impl CompiledKernelArg { 8 => CompiledKernelArgType::NumWorkgroups, 9 => CompiledKernelArgType::GlobalWorkSize, 10 => { - let idx = blob_read_uint32(blob); + let idx = blob_read_uint32(blob) as usize; CompiledKernelArgType::APIArg(idx) } _ => return None, @@ -1009,7 +1009,7 @@ fn compile_nir_variant( /* update the has_variable_shared_mem info as we might have DCEed all of them */ nir.set_has_variable_shared_mem(compiled_args.iter().any(|arg| { if let CompiledKernelArgType::APIArg(idx) = arg.kind { - args[idx as usize].kind == KernelArgType::MemLocal && !arg.dead + args[idx].kind == KernelArgType::MemLocal && !arg.dead } else { false } @@ -1042,7 +1042,7 @@ fn compile_nir_remaining( // add all API kernel args let mut compiled_args: Vec<_> = (0..args.len()) .map(|idx| CompiledKernelArg { - kind: CompiledKernelArgType::APIArg(idx as u32), + kind: CompiledKernelArgType::APIArg(idx), offset: 0, dead: true, }) @@ -1196,7 +1196,7 @@ pub(super) fn convert_spirv_to_nir( for arg in &build.compiled_args { if let CompiledKernelArgType::APIArg(idx) = arg.kind { - args[idx as usize].dead &= arg.dead; + args[idx].dead &= arg.dead; } } } @@ -1351,7 +1351,7 @@ impl Kernel { self.optimize_local_size(q.device, &mut grid, &mut block); Ok(Box::new(move |q, ctx| { - let hw_max_grid: Vec = q.device.max_grid_size(); + let hw_max_grid = q.device.max_grid_size(); let variant = if offsets == [0; 3] && grid[0] <= hw_max_grid[0] @@ -1432,23 +1432,23 @@ impl Kernel { for arg in &nir_kernel_build.compiled_args { let is_opaque = if let CompiledKernelArgType::APIArg(idx) = arg.kind { - kernel_info.args[idx as usize].kind.is_opaque() + kernel_info.args[idx].kind.is_opaque() } else { false }; - if !is_opaque && arg.offset as usize > input.len() { - input.resize(arg.offset as usize, 0); + if !is_opaque && arg.offset > input.len() { + input.resize(arg.offset, 0); } match arg.kind { CompiledKernelArgType::APIArg(idx) => { - let api_arg = &kernel_info.args[idx as usize]; + let api_arg = &kernel_info.args[idx]; if api_arg.dead { continue; } - let Some(value) = &arg_values[idx as usize] else { + let Some(value) = &arg_values[idx] else { continue; }; @@ -1480,11 +1480,10 @@ impl Kernel { (&mut tex_formats, &mut tex_orders) }; - let binding = arg.offset as usize; - assert!(binding >= formats.len()); + assert!(arg.offset >= formats.len()); - formats.resize(binding, 0); - orders.resize(binding, 0); + formats.resize(arg.offset, 0); + orders.resize(arg.offset, 0); formats.push(image.image_format.image_channel_data_type as u16); orders.push(image.image_format.image_channel_order as u16); @@ -1655,8 +1654,8 @@ impl Kernel { Ok(()) } - pub fn access_qualifier(&self, idx: cl_uint) -> cl_kernel_arg_access_qualifier { - let aq = self.kernel_info.args[idx as usize].spirv.access_qualifier; + pub fn access_qualifier(&self, idx: usize) -> cl_kernel_arg_access_qualifier { + let aq = self.kernel_info.args[idx].spirv.access_qualifier; if aq == clc_kernel_arg_access_qualifier::CLC_KERNEL_ARG_ACCESS_READ @@ -1672,8 +1671,8 @@ impl Kernel { } } - pub fn address_qualifier(&self, idx: cl_uint) -> cl_kernel_arg_address_qualifier { - match self.kernel_info.args[idx as usize].spirv.address_qualifier { + pub fn address_qualifier(&self, idx: usize) -> cl_kernel_arg_address_qualifier { + match self.kernel_info.args[idx].spirv.address_qualifier { clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_PRIVATE => { CL_KERNEL_ARG_ADDRESS_PRIVATE } @@ -1689,8 +1688,8 @@ impl Kernel { } } - pub fn type_qualifier(&self, idx: cl_uint) -> cl_kernel_arg_type_qualifier { - let tq = self.kernel_info.args[idx as usize].spirv.type_qualifier; + pub fn type_qualifier(&self, idx: usize) -> cl_kernel_arg_type_qualifier { + let tq = self.kernel_info.args[idx].spirv.type_qualifier; let zero = clc_kernel_arg_type_qualifier(0); let mut res = CL_KERNEL_ARG_TYPE_NONE; @@ -1721,13 +1720,13 @@ impl Kernel { self.kernel_info.subgroup_size } - pub fn arg_name(&self, idx: cl_uint) -> Option<&CStr> { - let name = &self.kernel_info.args[idx as usize].spirv.name; + pub fn arg_name(&self, idx: usize) -> Option<&CStr> { + let name = &self.kernel_info.args[idx].spirv.name; name.is_empty().not().then_some(name) } - pub fn arg_type_name(&self, idx: cl_uint) -> Option<&CStr> { - let type_name = &self.kernel_info.args[idx as usize].spirv.type_name; + pub fn arg_type_name(&self, idx: usize) -> Option<&CStr> { + let type_name = &self.kernel_info.args[idx].spirv.type_name; type_name.is_empty().not().then_some(type_name) }