diff --git a/meson.build b/meson.build index 2acc4499a48..6250de545fd 100644 --- a/meson.build +++ b/meson.build @@ -966,6 +966,7 @@ if with_gallium_rusticl add_languages('rust', required: true) with_clc = true + with_libclc = true endif dep_clc = null_dep diff --git a/src/gallium/frontends/rusticl/api/icd.rs b/src/gallium/frontends/rusticl/api/icd.rs index e7febc90030..aff678e57b2 100644 --- a/src/gallium/frontends/rusticl/api/icd.rs +++ b/src/gallium/frontends/rusticl/api/icd.rs @@ -1079,18 +1079,27 @@ extern "C" fn cl_enqueue_unmap_mem_object( } extern "C" fn cl_enqueue_ndrange_kernel( - _command_queue: cl_command_queue, - _kernel: cl_kernel, - _work_dim: cl_uint, - _global_work_offset: *const usize, - _global_work_size: *const usize, - _local_work_size: *const usize, - _num_events_in_wait_list: cl_uint, - _event_wait_list: *const cl_event, - _event: *mut cl_event, + command_queue: cl_command_queue, + kernel: cl_kernel, + work_dim: cl_uint, + global_work_offset: *const usize, + global_work_size: *const usize, + local_work_size: *const usize, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, ) -> cl_int { - println!("cl_enqueue_ndrange_kernel not implemented"); - CL_OUT_OF_HOST_MEMORY + match_err!(enqueue_ndrange_kernel( + command_queue, + kernel, + work_dim, + global_work_offset, + global_work_size, + local_work_size, + num_events_in_wait_list, + event_wait_list, + event + )) } extern "C" fn cl_get_extension_function_address( diff --git a/src/gallium/frontends/rusticl/api/kernel.rs b/src/gallium/frontends/rusticl/api/kernel.rs index 0a67f8da6bb..2818b0477a1 100644 --- a/src/gallium/frontends/rusticl/api/kernel.rs +++ b/src/gallium/frontends/rusticl/api/kernel.rs @@ -1,14 +1,17 @@ extern crate mesa_rust_util; extern crate rusticl_opencl_gen; +use crate::api::event::create_and_queue; use crate::api::icd::*; use crate::api::util::*; +use crate::core::event::*; use crate::core::kernel::*; use self::mesa_rust_util::string::*; use self::rusticl_opencl_gen::*; use std::collections::HashSet; +use std::slice; use std::sync::Arc; impl CLInfo for cl_kernel { @@ -73,6 +76,19 @@ impl CLInfoObj for cl_kernel { } } +const ZERO_ARR: [usize; 3] = [0; 3]; + +/// # Safety +/// +/// This function is only safe when called on an array of `work_dim` length +unsafe fn kernel_work_arr_or_default<'a>(arr: *const usize, work_dim: cl_uint) -> &'a [usize] { + if !arr.is_null() { + slice::from_raw_parts(arr, work_dim as usize) + } else { + &ZERO_ARR + } +} + pub fn create_kernel( program: cl_program, kernel_name: *const ::std::os::raw::c_char, @@ -108,6 +124,7 @@ pub fn create_kernel( // CL_INVALID_KERNEL_DEFINITION if the function definition for __kernel function given by // kernel_name such as the number of arguments, the argument types are not the same for all // devices for which the program executable has been built. + let nirs = p.nirs(&name); let kernel_args: HashSet<_> = devs.iter().map(|d| p.args(d, &name)).collect(); if kernel_args.len() != 1 { return Err(CL_INVALID_KERNEL_DEFINITION); @@ -116,6 +133,7 @@ pub fn create_kernel( Ok(cl_kernel::from_arc(Kernel::new( name, p, + nirs, kernel_args.into_iter().next().unwrap(), ))) } @@ -123,24 +141,177 @@ pub fn create_kernel( pub fn set_kernel_arg( kernel: cl_kernel, arg_index: cl_uint, - _arg_size: usize, - _arg_value: *const ::std::os::raw::c_void, + arg_size: usize, + arg_value: *const ::std::os::raw::c_void, ) -> CLResult<()> { let k = kernel.get_arc()?; // CL_INVALID_ARG_INDEX if arg_index is not a valid argument index. - if arg_index as usize >= k.args.len() { - return Err(CL_INVALID_ARG_INDEX); + if let Some(arg) = k.args.get(arg_index as usize) { + // CL_INVALID_ARG_SIZE if arg_size does not match the size of the data type for an argument + // that is not a memory object or if the argument is a memory object and + // arg_size != sizeof(cl_mem) or if arg_size is zero and the argument is declared with the + // local qualifier or if the argument is a sampler and arg_size != sizeof(cl_sampler). + match arg.kind { + KernelArgType::MemLocal => { + if arg_size == 0 { + return Err(CL_INVALID_ARG_SIZE); + } + } + _ => { + if arg.size != arg_size { + return Err(CL_INVALID_ARG_SIZE); + } + } + } + + // CL_INVALID_ARG_VALUE if arg_value specified is not a valid value. + match arg.kind { + // If the argument is declared with the local qualifier, the arg_value entry must be + // NULL. + KernelArgType::MemLocal => { + if !arg_value.is_null() { + return Err(CL_INVALID_ARG_VALUE); + } + } + // If the argument is of type sampler_t, the arg_value entry must be a pointer to the + // sampler object. + KernelArgType::Constant | KernelArgType::Sampler => { + if arg_value.is_null() { + return Err(CL_INVALID_ARG_VALUE); + } + } + _ => {} + }; + + // let's create the arg now + let arg = unsafe { + if arg.dead { + KernelArgValue::None + } else { + match arg.kind { + KernelArgType::Constant => KernelArgValue::Constant( + slice::from_raw_parts(arg_value.cast(), arg_size).to_vec(), + ), + KernelArgType::MemConstant | KernelArgType::MemGlobal => { + let ptr: *const cl_mem = arg_value.cast(); + if ptr.is_null() || (*ptr).is_null() { + KernelArgValue::None + } else { + KernelArgValue::MemObject((*ptr).get_ref()?) + } + } + KernelArgType::MemLocal => KernelArgValue::LocalMem(arg_size), + KernelArgType::Sampler => { + let ptr: *const cl_sampler = arg_value.cast(); + KernelArgValue::Sampler((*ptr).get_ref()?) + } + } + } + }; + k.values.get(arg_index as usize).unwrap().replace(Some(arg)); + Ok(()) + } else { + Err(CL_INVALID_ARG_INDEX) } - //• CL_INVALID_ARG_VALUE if arg_value specified is not a valid value. - //• CL_INVALID_MEM_OBJECT for an argument declared to be a memory object when the specified arg_value is not a valid memory object. - //• CL_INVALID_SAMPLER for an argument declared to be of type sampler_t when the specified arg_value is not a valid sampler object. //• CL_INVALID_DEVICE_QUEUE for an argument declared to be of type queue_t when the specified arg_value is not a valid device queue object. This error code is missing before version 2.0. - //• CL_INVALID_ARG_SIZE if arg_size does not match the size of the data type for an argument that is not a memory object or if the argument is a memory object and arg_size != sizeof(cl_mem) or if arg_size is zero and the argument is declared with the local qualifier or if the argument is a sampler and arg_size != sizeof(cl_sampler). - //• CL_MAX_SIZE_RESTRICTION_EXCEEDED if the size in bytes of the memory object (if the argument is a memory object) or arg_size (if the argument is declared with local qualifier) exceeds a language- specified maximum size restriction for this argument, such as the MaxByteOffset SPIR-V decoration. This error code is missing before version 2.2. //• CL_INVALID_ARG_VALUE if the argument is an image declared with the read_only qualifier and arg_value refers to an image object created with cl_mem_flags of CL_MEM_WRITE_ONLY or if the image argument is declared with the write_only qualifier and arg_value refers to an image object created with cl_mem_flags of CL_MEM_READ_ONLY. - - println!("set_kernel_arg not implemented"); - Err(CL_OUT_OF_HOST_MEMORY) + //• CL_MAX_SIZE_RESTRICTION_EXCEEDED if the size in bytes of the memory object (if the argument is a memory object) or arg_size (if the argument is declared with local qualifier) exceeds a language- specified maximum size restriction for this argument, such as the MaxByteOffset SPIR-V decoration. This error code is missing before version 2.2. +} + +pub fn enqueue_ndrange_kernel( + command_queue: cl_command_queue, + kernel: cl_kernel, + work_dim: cl_uint, + global_work_offset: *const usize, + global_work_size: *const usize, + local_work_size: *const usize, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> CLResult<()> { + let q = command_queue.get_arc()?; + let k = kernel.get_arc()?; + let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?; + + // CL_INVALID_CONTEXT if context associated with command_queue and kernel are not the same + if q.context != k.prog.context { + return Err(CL_INVALID_CONTEXT); + } + + // CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built program executable available + // for device associated with command_queue. + if k.prog.status(&q.device) != CL_BUILD_SUCCESS as cl_build_status { + return Err(CL_INVALID_PROGRAM_EXECUTABLE); + } + + // CL_INVALID_KERNEL_ARGS if the kernel argument values have not been specified. + if k.values.iter().any(|v| v.borrow().is_none()) { + return Err(CL_INVALID_KERNEL_ARGS); + } + + // CL_INVALID_WORK_DIMENSION if work_dim is not a valid value (i.e. a value between 1 and + // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS). + if work_dim == 0 || work_dim > q.device.max_grid_dimensions() { + return Err(CL_INVALID_WORK_DIMENSION); + } + + // we assume the application gets it right and doesn't pass shorter arrays then actually needed. + let global_work_size = unsafe { kernel_work_arr_or_default(global_work_size, work_dim) }; + let local_work_size = unsafe { kernel_work_arr_or_default(local_work_size, work_dim) }; + let global_work_offset = unsafe { kernel_work_arr_or_default(global_work_offset, work_dim) }; + + if q.device.address_bits() == 32 { + for (s, o) in global_work_size.iter().zip(global_work_offset) { + // CL_INVALID_GLOBAL_WORK_SIZE if any of the values specified in global_work_size[0], … + // global_work_size[work_dim - 1] exceed the maximum value representable by size_t on + // the device on which the kernel-instance will be enqueued. + if *s > u32::MAX as usize { + return Err(CL_INVALID_GLOBAL_WORK_SIZE); + } + // CL_INVALID_GLOBAL_OFFSET if the value specified in global_work_size + the + // corresponding values in global_work_offset for any dimensions is greater than the + // maximum value representable by size t on the device on which the kernel-instance + // will be enqueued + if s + o > u32::MAX as usize { + return Err(CL_INVALID_GLOBAL_OFFSET); + } + } + } + + // CL_INVALID_WORK_ITEM_SIZE if the number of work-items specified in any of + // local_work_size[0], … local_work_size[work_dim - 1] is greater than the corresponding values + // specified by CL_DEVICE_MAX_WORK_ITEM_SIZES[0], …, CL_DEVICE_MAX_WORK_ITEM_SIZES[work_dim - 1]. + if local_work_size.iter().gt(q.device.max_block_sizes().iter()) { + return Err(CL_INVALID_WORK_ITEM_SIZE); + } + + // If global_work_size is NULL, or the value in any passed dimension is 0 then the kernel + // command will trivially succeed after its event dependencies are satisfied and subsequently + // update its completion event. + let cb: EventSig = if global_work_size.contains(&0) { + Box::new(|_, _| Ok(())) + } else { + k.launch( + &q, + work_dim, + local_work_size, + global_work_size, + global_work_offset, + ) + }; + + create_and_queue(q, CL_COMMAND_NDRANGE_KERNEL, evs, event, false, cb) + + //• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and does not match the required work-group size for kernel in the program source. + //• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and is not consistent with the required number of sub-groups for kernel in the program source. + //• CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the total number of work-items in the work-group computed as local_work_size[0] × … local_work_size[work_dim - 1] is greater than the value specified by CL_KERNEL_WORK_GROUP_SIZE in the Kernel Object Device Queries table. + //• CL_INVALID_WORK_GROUP_SIZE if the work-group size must be uniform and the local_work_size is not NULL, is not equal to the required work-group size specified in the kernel source, or the global_work_size is not evenly divisible by the local_work_size. + //• CL_MISALIGNED_SUB_BUFFER_OFFSET if a sub-buffer object is specified as the value for an argument that is a buffer object and the offset specified when the sub-buffer object is created is not aligned to CL_DEVICE_MEM_BASE_ADDR_ALIGN value for device associated with queue. This error code + //• CL_INVALID_IMAGE_SIZE if an image object is specified as an argument value and the image dimensions (image width, height, specified or compute row and/or slice pitch) are not supported by device associated with queue. + //• CL_IMAGE_FORMAT_NOT_SUPPORTED if an image object is specified as an argument value and the image format (image channel order and data type) is not supported by device associated with queue. + //• CL_OUT_OF_RESOURCES if there is a failure to queue the execution instance of kernel on the command-queue because of insufficient resources needed to execute the kernel. For example, the explicitly specified local_work_size causes a failure to execute the kernel because of insufficient resources such as registers or local memory. Another example would be the number of read-only image args used in kernel exceed the CL_DEVICE_MAX_READ_IMAGE_ARGS value for device or the number of write-only and read-write image args used in kernel exceed the CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS value for device or the number of samplers used in kernel exceed CL_DEVICE_MAX_SAMPLERS for device. + //• CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate memory for data store associated with image or buffer objects specified as arguments to kernel. + //• CL_INVALID_OPERATION if SVM pointers are passed as arguments to a kernel and the device does not support SVM or if system pointers are passed as arguments to a kernel and/or stored inside SVM allocations passed as kernel arguments and the device does not support fine grain system SVM allocations. } diff --git a/src/gallium/frontends/rusticl/core/context.rs b/src/gallium/frontends/rusticl/core/context.rs index 5e8a9d23afd..74fc33a0d80 100644 --- a/src/gallium/frontends/rusticl/core/context.rs +++ b/src/gallium/frontends/rusticl/core/context.rs @@ -30,7 +30,7 @@ impl Context { }) } - pub fn create_buffer(&self, size: usize) -> CLResult, PipeResource>> { + pub fn create_buffer(&self, size: usize) -> CLResult, Arc>> { let adj_size: u32 = size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?; let mut res = HashMap::new(); for dev in &self.devs { @@ -38,7 +38,7 @@ impl Context { .screen() .resource_create_buffer(adj_size) .ok_or(CL_OUT_OF_RESOURCES); - res.insert(Arc::clone(dev), resource?); + res.insert(Arc::clone(dev), Arc::new(resource?)); } Ok(res) } @@ -47,7 +47,7 @@ impl Context { &self, size: usize, user_ptr: *mut c_void, - ) -> CLResult, PipeResource>> { + ) -> CLResult, Arc>> { let adj_size: u32 = size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?; let mut res = HashMap::new(); for dev in &self.devs { @@ -55,7 +55,7 @@ impl Context { .screen() .resource_create_buffer_from_user(adj_size, user_ptr) .ok_or(CL_OUT_OF_RESOURCES); - res.insert(Arc::clone(dev), resource?); + res.insert(Arc::clone(dev), Arc::new(resource?)); } Ok(res) } diff --git a/src/gallium/frontends/rusticl/core/device.rs b/src/gallium/frontends/rusticl/core/device.rs index 8ac22cec895..79afe8472dc 100644 --- a/src/gallium/frontends/rusticl/core/device.rs +++ b/src/gallium/frontends/rusticl/core/device.rs @@ -10,6 +10,8 @@ use crate::core::util::*; use crate::core::version::*; use crate::impl_cl_type_trait; +use self::mesa_rust::compiler::clc::*; +use self::mesa_rust::compiler::nir::*; use self::mesa_rust::pipe::context::*; use self::mesa_rust::pipe::device::load_screens; use self::mesa_rust::pipe::screen::*; @@ -27,7 +29,7 @@ use std::sync::MutexGuard; pub struct Device { pub base: CLObjectBase, - screen: Arc, + pub screen: Arc, pub cl_version: CLVersion, pub clc_version: CLVersion, pub clc_versions: Vec, @@ -36,6 +38,7 @@ pub struct Device { pub extension_string: String, pub extensions: Vec, pub formats: HashMap>, + pub lib_clc: NirShader, helper_ctx: Mutex>, } @@ -43,6 +46,11 @@ impl_cl_type_trait!(cl_device_id, Device, CL_INVALID_DEVICE); impl Device { fn new(screen: Arc) -> Option> { + if !Self::check_valid(&screen) { + return None; + } + + let lib_clc = spirv::SPIRVBin::get_lib_clc(&screen); let mut d = Self { base: CLObjectBase::new(), helper_ctx: Mutex::new(screen.create_context().unwrap()), @@ -55,12 +63,9 @@ impl Device { extension_string: String::from(""), extensions: Vec::new(), formats: HashMap::new(), + lib_clc: lib_clc?, }; - if !d.check_valid() { - return None; - } - d.fill_format_tables(); // check if we are embedded or full profile first @@ -112,17 +117,21 @@ impl Device { } } - fn check_valid(&self) -> bool { - if self.screen.param(pipe_cap::PIPE_CAP_COMPUTE) == 0 || + fn check_valid(screen: &PipeScreen) -> bool { + if screen.param(pipe_cap::PIPE_CAP_COMPUTE) == 0 || // even though we use PIPE_SHADER_IR_NIR, PIPE_SHADER_IR_NIR_SERIALIZED marks CL support by the driver - self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_SUPPORTED_IRS) & (1 << (pipe_shader_ir::PIPE_SHADER_IR_NIR_SERIALIZED as i32)) == 0 + screen.shader_param(pipe_shader_type::PIPE_SHADER_COMPUTE, pipe_shader_cap::PIPE_SHADER_CAP_SUPPORTED_IRS) & (1 << (pipe_shader_ir::PIPE_SHADER_IR_NIR_SERIALIZED as i32)) == 0 { return false; } // CL_DEVICE_MAX_PARAMETER_SIZE // For this minimum value, only a maximum of 128 arguments can be passed to a kernel - if self.param_max_size() < 128 { + if ComputeParam::::compute_param( + screen, + pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_INPUT_SIZE, + ) < 128 + { return false; } true diff --git a/src/gallium/frontends/rusticl/core/kernel.rs b/src/gallium/frontends/rusticl/core/kernel.rs index 8b487556446..51e15ac412b 100644 --- a/src/gallium/frontends/rusticl/core/kernel.rs +++ b/src/gallium/frontends/rusticl/core/kernel.rs @@ -3,37 +3,430 @@ extern crate mesa_rust_gen; extern crate rusticl_opencl_gen; use crate::api::icd::*; +use crate::core::device::*; +use crate::core::event::*; +use crate::core::memory::*; use crate::core::program::*; +use crate::core::queue::*; use crate::impl_cl_type_trait; use self::mesa_rust::compiler::clc::*; +use self::mesa_rust::compiler::nir::*; use self::mesa_rust_gen::*; use self::rusticl_opencl_gen::*; +use std::cell::RefCell; +use std::collections::HashMap; +use std::collections::HashSet; +use std::convert::TryInto; +use std::ptr; use std::sync::Arc; +// ugh, we are not allowed to take refs, so... +pub enum KernelArgValue { + None, + Constant(Vec), + MemObject(&'static Mem), + Sampler(&'static Sampler), + LocalMem(usize), +} + +#[derive(PartialEq, Eq)] +pub enum KernelArgType { + Constant, // for anything passed by value + Sampler, + MemGlobal, + MemConstant, + MemLocal, +} + +#[derive(Hash, PartialEq, Eq)] +pub enum InternalKernelArgType { + ConstantBuffer, +} + +pub struct KernelArg { + spirv: spirv::SPIRVKernelArg, + pub kind: KernelArgType, + pub size: usize, + pub offset: usize, + pub dead: bool, +} + +#[derive(Hash, PartialEq, Eq)] +pub struct InternalKernelArg { + pub kind: InternalKernelArgType, + pub size: usize, + pub offset: usize, +} + +impl KernelArg { + fn from_spirv_nir(spirv: Vec, nir: &mut NirShader) -> Vec { + let nir_arg_map: HashMap<_, _> = nir + .variables_with_mode( + nir_variable_mode::nir_var_uniform | nir_variable_mode::nir_var_image, + ) + .map(|v| (v.data.location, v)) + .collect(); + let mut res = Vec::new(); + + for (i, s) in spirv.into_iter().enumerate() { + let nir = nir_arg_map.get(&(i as i32)).unwrap(); + let kind = match s.address_qualifier { + clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_PRIVATE => { + if unsafe { glsl_type_is_sampler(nir.type_) } { + KernelArgType::Sampler + } else { + KernelArgType::Constant + } + } + clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_CONSTANT => { + KernelArgType::MemConstant + } + clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_LOCAL => { + KernelArgType::MemLocal + } + clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_GLOBAL => { + KernelArgType::MemGlobal + } + }; + + res.push(Self { + spirv: s, + size: unsafe { glsl_get_cl_size(nir.type_) } as usize, + // we'll update it later in the 2nd pass + kind: kind, + offset: 0, + dead: true, + }); + } + res + } + + fn assign_locations( + args: &mut [Self], + internal_args: &mut [InternalKernelArg], + nir: &mut NirShader, + ) { + for var in nir.variables_with_mode( + nir_variable_mode::nir_var_uniform | nir_variable_mode::nir_var_image, + ) { + if let Some(arg) = args.get_mut(var.data.location as usize) { + arg.offset = var.data.driver_location as usize; + arg.dead = false; + } else { + internal_args + .get_mut(var.data.location as usize - args.len()) + .unwrap() + .offset = var.data.driver_location as usize; + } + } + } +} + #[repr(C)] pub struct Kernel { pub base: CLObjectBase, pub prog: Arc, pub name: String, - pub args: Vec, + pub args: Vec, + pub values: Vec>>, + internal_args: Vec, + nirs: HashMap, NirShader>, } impl_cl_type_trait!(cl_kernel, Kernel, CL_INVALID_KERNEL); +fn create_kernel_arr(vals: &[usize], val: u32) -> [u32; 3] { + let mut res = [val; 3]; + for (i, v) in vals.iter().enumerate() { + res[i] = (*v).try_into().expect("64 bit work groups not supported"); + } + res +} + +// mostly like clc_spirv_to_dxil +// does not DCEe uniforms or images! +fn lower_and_optimize_nir_pre_inputs(nir: &mut NirShader, lib_clc: &NirShader) { + nir.set_workgroup_size(&[0; 3]); + nir.structurize(); + while { + let mut progress = false; + progress |= nir.pass0(nir_copy_prop); + progress |= nir.pass0(nir_opt_copy_prop_vars); + progress |= nir.pass0(nir_opt_deref); + progress |= nir.pass0(nir_opt_dce); + progress |= nir.pass0(nir_opt_undef); + progress |= nir.pass0(nir_opt_constant_folding); + progress |= nir.pass0(nir_opt_cse); + progress |= nir.pass0(nir_lower_vars_to_ssa); + progress |= nir.pass0(nir_opt_algebraic); + progress + } {} + nir.inline(lib_clc); + nir.remove_non_entrypoints(); + while { + let mut progress = false; + progress |= nir.pass0(nir_copy_prop); + progress |= nir.pass0(nir_opt_copy_prop_vars); + progress |= nir.pass0(nir_opt_deref); + progress |= nir.pass0(nir_opt_dce); + progress |= nir.pass0(nir_opt_undef); + progress |= nir.pass0(nir_opt_constant_folding); + progress |= nir.pass0(nir_opt_cse); + progress |= nir.pass0(nir_split_var_copies); + progress |= nir.pass0(nir_lower_var_copies); + progress |= nir.pass0(nir_lower_vars_to_ssa); + progress |= nir.pass0(nir_opt_algebraic); + progress |= nir.pass1( + nir_opt_if, + nir_opt_if_options::nir_opt_if_aggressive_last_continue + | nir_opt_if_options::nir_opt_if_optimize_phi_true_false, + ); + progress |= nir.pass0(nir_opt_dead_cf); + progress |= nir.pass0(nir_opt_remove_phis); + // we don't want to be too aggressive here, but it kills a bit of CFG + progress |= nir.pass3(nir_opt_peephole_select, 1, true, true); + progress |= nir.pass1( + nir_lower_vec3_to_vec4, + nir_variable_mode::nir_var_mem_generic | nir_variable_mode::nir_var_uniform, + ); + progress + } {} + // TODO variable initializers + // TODO lower memcpy + nir.pass0(nir_dedup_inline_samplers); + nir.pass2( + nir_lower_vars_to_explicit_types, + nir_variable_mode::nir_var_function_temp, + Some(glsl_get_cl_type_size_align), + ); + // TODO printf + nir.pass0(nir_split_var_copies); + nir.pass0(nir_opt_copy_prop_vars); + nir.pass0(nir_lower_var_copies); + nir.pass0(nir_lower_vars_to_ssa); + nir.pass0(nir_lower_alu); + nir.pass0(nir_opt_dce); + nir.pass0(nir_opt_deref); +} + +fn lower_and_optimize_nir_late( + dev: &Device, + nir: &mut NirShader, + args: usize, +) -> Vec { + let mut res = Vec::new(); + let mut lower_state = rusticl_lower_state::default(); + + nir.pass2( + nir_remove_dead_variables, + nir_variable_mode::nir_var_uniform + | nir_variable_mode::nir_var_mem_constant + | nir_variable_mode::nir_var_function_temp, + ptr::null(), + ); + nir.pass1(nir_lower_readonly_images_to_tex, false); + nir.pass2( + nir_remove_dead_variables, + nir_variable_mode::nir_var_mem_shared | nir_variable_mode::nir_var_function_temp, + ptr::null(), + ); + nir.reset_scratch_size(); + nir.pass2( + nir_lower_vars_to_explicit_types, + nir_variable_mode::nir_var_mem_constant, + Some(glsl_get_cl_type_size_align), + ); + nir.extract_constant_initializers(); + // TODO printf + // TODO 32 bit devices + if nir.has_constant() { + res.push(InternalKernelArg { + kind: InternalKernelArgType::ConstantBuffer, + offset: 0, + size: 8, + }); + lower_state.const_buf = nir.add_var( + nir_variable_mode::nir_var_uniform, + unsafe { glsl_uint64_t_type() }, + args + res.len() - 1, + "constant_buffer_addr", + ); + } + + nir.pass2( + nir_lower_vars_to_explicit_types, + nir_variable_mode::nir_var_mem_shared + | nir_variable_mode::nir_var_function_temp + | nir_variable_mode::nir_var_uniform + | nir_variable_mode::nir_var_mem_global, + Some(glsl_get_cl_type_size_align), + ); + nir.pass2( + nir_lower_explicit_io, + nir_variable_mode::nir_var_mem_global | nir_variable_mode::nir_var_mem_constant, + nir_address_format::nir_address_format_64bit_global, + ); + nir.pass1(rusticl_lower_intrinsics, &mut lower_state); + nir.pass2( + nir_lower_explicit_io, + nir_variable_mode::nir_var_mem_shared + | nir_variable_mode::nir_var_function_temp + | nir_variable_mode::nir_var_uniform, + nir_address_format::nir_address_format_32bit_offset_as_64bit, + ); + nir.pass0(nir_lower_system_values); + let compute_options = nir_lower_compute_system_values_options::default(); + nir.pass1(nir_lower_compute_system_values, &compute_options); + nir.pass0(nir_opt_deref); + nir.pass0(nir_lower_vars_to_ssa); + + // TODO whatever clc is doing here + + nir.pass1(nir_lower_convert_alu_types, None); + nir.pass0(nir_opt_dce); + dev.screen.finalize_nir(nir); + res +} + impl Kernel { - pub fn new(name: String, prog: Arc, args: Vec) -> Arc { + pub fn new( + name: String, + prog: Arc, + mut nirs: HashMap, NirShader>, + args: Vec, + ) -> Arc { + nirs.iter_mut() + .for_each(|(d, n)| lower_and_optimize_nir_pre_inputs(n, &d.lib_clc)); + let nir = nirs.values_mut().next().unwrap(); + let mut args = KernelArg::from_spirv_nir(args, nir); + // can't use vec!... + let values = args.iter().map(|_| RefCell::new(None)).collect(); + let internal_args: HashSet<_> = nirs + .iter_mut() + .map(|(d, n)| lower_and_optimize_nir_late(d, n, args.len())) + .collect(); + // we want the same internal args for every compiled kernel, for now + assert!(internal_args.len() == 1); + let mut internal_args = internal_args.into_iter().next().unwrap(); + + nirs.values_mut() + .for_each(|n| KernelArg::assign_locations(&mut args, &mut internal_args, n)); + Arc::new(Self { base: CLObjectBase::new(), prog: prog, name: name, args: args, + values: values, + internal_args: internal_args, + // caller has to verify all kernels have the same sig + nirs: nirs, + }) + } + + // the painful part is, that host threads are allowed to modify the kernel object once it was + // enqueued, so return a closure with all req data included. + pub fn launch( + &self, + q: &Arc, + work_dim: u32, + block: &[usize], + grid: &[usize], + offsets: &[usize], + ) -> EventSig { + let nir = self.nirs.get(&q.device).unwrap(); + let mut block = create_kernel_arr(block, 1); + let mut grid = create_kernel_arr(grid, 1); + let offsets = create_kernel_arr(offsets, 0); + let mut input: Vec = Vec::new(); + let mut resource_info = Vec::new(); + let mut local_size: u32 = nir.shared_size(); + + for i in 0..3 { + if block[i] == 0 { + block[i] = 1; + } else { + grid[i] /= block[i]; + } + } + + for (arg, val) in self.args.iter().zip(&self.values) { + if arg.dead { + continue; + } + match val.borrow().as_ref().unwrap() { + KernelArgValue::Constant(c) => input.extend_from_slice(c), + KernelArgValue::MemObject(mem) => { + input.extend_from_slice(&mem.offset.to_ne_bytes()); + resource_info.push((Some(mem.get_res_of_dev(&q.device).clone()), arg.offset)); + } + KernelArgValue::LocalMem(size) => { + // TODO 32 bit + input.extend_from_slice(&[0; 8]); + local_size += *size as u32; + } + KernelArgValue::None => { + assert!( + arg.kind == KernelArgType::MemGlobal + || arg.kind == KernelArgType::MemConstant + ); + input.extend_from_slice(&[0; 8]); + } + _ => panic!("unhandled arg type"), + } + } + + for arg in &self.internal_args { + match arg.kind { + InternalKernelArgType::ConstantBuffer => { + input.extend_from_slice(&[0; 8]); + let buf = nir.get_constant_buffer(); + let res = Arc::new( + q.device + .screen() + .resource_create_buffer(buf.len() as u32) + .unwrap(), + ); + q.device.helper_ctx().buffer_subdata( + &res, + 0, + buf.as_ptr().cast(), + buf.len() as u32, + ); + resource_info.push((Some(res), arg.offset)); + } + } + } + + let cso = q + .device + .helper_ctx() + .create_compute_state(nir, input.len() as u32, local_size); + + Box::new(move |_, ctx| { + let mut input = input.clone(); + let mut resources = Vec::with_capacity(resource_info.len()); + let mut globals: Vec<*mut u32> = Vec::new(); + + for (res, offset) in resource_info.clone() { + resources.push(res); + globals.push(unsafe { input.as_mut_ptr().add(offset) }.cast()); + } + + ctx.bind_compute_state(cso); + ctx.set_global_binding(resources.as_slice(), &mut globals); + ctx.launch_grid(work_dim, block, grid, offsets, &input); + ctx.clear_global_binding(globals.len() as u32); + ctx.delete_compute_state(cso); + ctx.memory_barrier(PIPE_BARRIER_GLOBAL_BUFFER); + Ok(()) }) } pub fn access_qualifier(&self, idx: cl_uint) -> cl_kernel_arg_access_qualifier { - let aq = self.args[idx as usize].access_qualifier; + let aq = self.args[idx as usize].spirv.access_qualifier; if aq == clc_kernel_arg_access_qualifier::CLC_KERNEL_ARG_ACCESS_READ @@ -50,7 +443,7 @@ impl Kernel { } pub fn address_qualifier(&self, idx: cl_uint) -> cl_kernel_arg_address_qualifier { - match self.args[idx as usize].address_qualifier { + match self.args[idx as usize].spirv.address_qualifier { clc_kernel_arg_address_qualifier::CLC_KERNEL_ARG_ADDRESS_PRIVATE => { CL_KERNEL_ARG_ADDRESS_PRIVATE } @@ -67,7 +460,7 @@ impl Kernel { } pub fn type_qualifier(&self, idx: cl_uint) -> cl_kernel_arg_type_qualifier { - let tq = self.args[idx as usize].type_qualifier; + let tq = self.args[idx as usize].spirv.type_qualifier; let zero = clc_kernel_arg_type_qualifier(0); let mut res = CL_KERNEL_ARG_TYPE_NONE; @@ -87,10 +480,10 @@ impl Kernel { } pub fn arg_name(&self, idx: cl_uint) -> &String { - &self.args[idx as usize].name + &self.args[idx as usize].spirv.name } pub fn arg_type_name(&self, idx: cl_uint) -> &String { - &self.args[idx as usize].type_name + &self.args[idx as usize].spirv.type_name } } diff --git a/src/gallium/frontends/rusticl/core/memory.rs b/src/gallium/frontends/rusticl/core/memory.rs index 8208cec7316..fff35e1b15d 100644 --- a/src/gallium/frontends/rusticl/core/memory.rs +++ b/src/gallium/frontends/rusticl/core/memory.rs @@ -36,7 +36,7 @@ pub struct Mem { pub image_desc: cl_image_desc, pub image_elem_size: u8, pub cbs: Mutex>>, - res: Option, PipeResource>>, + res: Option, Arc>>, maps: Mutex>, } @@ -187,7 +187,7 @@ impl Mem { ptr::eq(a, b) } - fn get_res(&self) -> &HashMap, PipeResource> { + fn get_res(&self) -> &HashMap, Arc> { self.parent .as_ref() .map_or(self, |p| p.as_ref()) @@ -196,6 +196,10 @@ impl Mem { .unwrap() } + pub fn get_res_of_dev(&self, dev: &Arc) -> &Arc { + self.get_res().get(dev).unwrap() + } + fn to_parent<'a>(&'a self, offset: &mut usize) -> &'a Self { if let Some(parent) = &self.parent { offset.add_assign(self.offset); diff --git a/src/gallium/frontends/rusticl/core/program.rs b/src/gallium/frontends/rusticl/core/program.rs index 3c96a0d95e2..e914d330a7f 100644 --- a/src/gallium/frontends/rusticl/core/program.rs +++ b/src/gallium/frontends/rusticl/core/program.rs @@ -1,4 +1,5 @@ extern crate mesa_rust; +extern crate mesa_rust_gen; extern crate rusticl_opencl_gen; use crate::api::icd::*; @@ -7,6 +8,8 @@ use crate::core::device::*; use crate::impl_cl_type_trait; use self::mesa_rust::compiler::clc::*; +use self::mesa_rust::compiler::nir::*; +use self::mesa_rust_gen::*; use self::rusticl_opencl_gen::*; use std::collections::HashMap; @@ -227,4 +230,28 @@ impl Program { }), }) } + + pub fn nirs(&self, kernel: &str) -> HashMap, NirShader> { + let mut lock = self.build_info(); + let mut res = HashMap::new(); + for d in &self.devs { + let info = Self::dev_build_info(&mut lock, d); + if info.status != CL_BUILD_SUCCESS as cl_build_status { + continue; + } + let nir = info + .spirv + .as_ref() + .unwrap() + .to_nir( + kernel, + d.screen + .nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE), + &d.lib_clc, + ) + .unwrap(); + res.insert(d.clone(), nir); + } + res + } } diff --git a/src/gallium/frontends/rusticl/mesa/compiler/clc/spirv.rs b/src/gallium/frontends/rusticl/mesa/compiler/clc/spirv.rs index 89f7655f175..f98bb9061d3 100644 --- a/src/gallium/frontends/rusticl/mesa/compiler/clc/spirv.rs +++ b/src/gallium/frontends/rusticl/mesa/compiler/clc/spirv.rs @@ -1,6 +1,9 @@ extern crate mesa_rust_gen; extern crate mesa_rust_util; +use crate::compiler::nir::*; +use crate::pipe::screen::*; + use self::mesa_rust_gen::*; use self::mesa_rust_util::string::*; @@ -167,6 +170,70 @@ impl SPIRVBin { .collect(), } } + + fn get_spirv_options(library: bool, clc_shader: *const nir_shader) -> spirv_to_nir_options { + spirv_to_nir_options { + create_library: library, + environment: nir_spirv_execution_environment::NIR_SPIRV_OPENCL, + clc_shader: clc_shader, + float_controls_execution_mode: float_controls::FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32 + as u16, + + caps: spirv_supported_capabilities { + address: true, + float64: true, + int8: true, + int16: true, + int64: true, + kernel: true, + kernel_image: true, + linkage: true, + ..Default::default() + }, + + constant_addr_format: nir_address_format::nir_address_format_64bit_global, + global_addr_format: nir_address_format::nir_address_format_64bit_global, // TODO 32 bit devices + shared_addr_format: nir_address_format::nir_address_format_32bit_offset_as_64bit, + temp_addr_format: nir_address_format::nir_address_format_32bit_offset_as_64bit, + + // default + debug: spirv_to_nir_options__bindgen_ty_1::default(), + ..Default::default() + } + } + + pub fn to_nir( + &self, + entry_point: &str, + nir_options: *const nir_shader_compiler_options, + libclc: &NirShader, + ) -> Option { + let c_entry = CString::new(entry_point.as_bytes()).unwrap(); + let spirv_options = Self::get_spirv_options(false, libclc.get_nir()); + let nir = unsafe { + spirv_to_nir( + self.spirv.data.cast(), + self.spirv.size / 4, + ptr::null_mut(), // spec + 0, // spec count + gl_shader_stage::MESA_SHADER_KERNEL, + c_entry.as_ptr(), + &spirv_options, + nir_options, + ) + }; + + NirShader::new(nir) + } + + pub fn get_lib_clc(screen: &PipeScreen) -> Option { + let nir_options = screen.nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE); + let spirv_options = Self::get_spirv_options(true, ptr::null()); + let shader_cache = screen.shader_cache(); + NirShader::new(unsafe { + nir_load_libclc_shader(64, shader_cache, &spirv_options, nir_options) + }) + } } impl Drop for SPIRVBin { diff --git a/src/gallium/frontends/rusticl/mesa/compiler/nir.rs b/src/gallium/frontends/rusticl/mesa/compiler/nir.rs index 6a4f8fa487c..5b659555921 100644 --- a/src/gallium/frontends/rusticl/mesa/compiler/nir.rs +++ b/src/gallium/frontends/rusticl/mesa/compiler/nir.rs @@ -2,12 +2,215 @@ extern crate mesa_rust_gen; use self::mesa_rust_gen::*; +use std::convert::TryInto; +use std::ffi::c_void; +use std::ffi::CString; +use std::marker::PhantomData; +use std::ptr; use std::ptr::NonNull; +use std::slice; + +// from https://internals.rust-lang.org/t/discussion-on-offset-of/7440/2 +macro_rules! offset_of { + ($Struct:path, $field:ident) => {{ + // Using a separate function to minimize unhygienic hazards + // (e.g. unsafety of #[repr(packed)] field borrows). + // Uncomment `const` when `const fn`s can juggle pointers. + /*const*/ + fn offset() -> usize { + let u = std::mem::MaybeUninit::<$Struct>::uninit(); + // Use pattern-matching to avoid accidentally going through Deref. + let &$Struct { $field: ref f, .. } = unsafe { &*u.as_ptr() }; + let o = (f as *const _ as usize).wrapping_sub(&u as *const _ as usize); + // Triple check that we are within `u` still. + assert!((0..=std::mem::size_of_val(&u)).contains(&o)); + o + } + offset() + }}; +} + +pub struct ExecListIter<'a, T> { + n: &'a mut exec_node, + offset: usize, + _marker: PhantomData, +} + +impl<'a, T> ExecListIter<'a, T> { + fn new(l: &'a mut exec_list, offset: usize) -> Self { + Self { + n: &mut l.head_sentinel, + offset: offset, + _marker: PhantomData, + } + } +} + +impl<'a, T: 'a> Iterator for ExecListIter<'a, T> { + type Item = &'a mut T; + + fn next(&mut self) -> Option { + self.n = unsafe { &mut *self.n.next }; + if self.n.next.is_null() { + None + } else { + let t: *mut c_void = (self.n as *mut exec_node).cast(); + Some(unsafe { &mut *(t.sub(self.offset).cast()) }) + } + } +} pub struct NirShader { nir: NonNull, } +impl NirShader { + pub fn new(nir: *mut nir_shader) -> Option { + NonNull::new(nir).map(|nir| Self { nir: nir }) + } + + pub fn print(&self) { + unsafe { nir_print_shader(self.nir.as_ptr(), stderr) }; + } + + pub fn get_nir(&self) -> *mut nir_shader { + self.nir.as_ptr() + } + + pub fn dup_for_driver(&self) -> *mut nir_shader { + unsafe { nir_shader_clone(ptr::null_mut(), self.nir.as_ptr()) } + } + + pub fn pass0(&mut self, pass: unsafe extern "C" fn(*mut nir_shader) -> R) -> R { + unsafe { pass(self.nir.as_ptr()) } + } + + pub fn pass1( + &mut self, + pass: unsafe extern "C" fn(*mut nir_shader, a: A) -> R, + a: A, + ) -> R { + unsafe { pass(self.nir.as_ptr(), a) } + } + + pub fn pass2( + &mut self, + pass: unsafe extern "C" fn(*mut nir_shader, a: A, b: B) -> R, + a: A, + b: B, + ) -> R { + unsafe { pass(self.nir.as_ptr(), a, b) } + } + + pub fn pass3( + &mut self, + pass: unsafe extern "C" fn(*mut nir_shader, a: A, b: B, c: C) -> R, + a: A, + b: B, + c: C, + ) -> R { + unsafe { pass(self.nir.as_ptr(), a, b, c) } + } + + pub fn structurize(&mut self) { + self.pass0(nir_lower_goto_ifs); + self.pass0(nir_opt_dead_cf); + } + + pub fn inline(&mut self, libclc: &NirShader) { + self.pass1( + nir_lower_variable_initializers, + nir_variable_mode::nir_var_function_temp, + ); + self.pass0(nir_lower_returns); + self.pass1(nir_lower_libclc, libclc.nir.as_ptr()); + self.pass0(nir_inline_functions); + } + + pub fn remove_non_entrypoints(&mut self) { + unsafe { nir_remove_non_entrypoints(self.nir.as_ptr()) }; + } + + pub fn variables(&mut self) -> ExecListIter { + ExecListIter::new( + &mut unsafe { self.nir.as_mut() }.variables, + offset_of!(nir_variable, node), + ) + } + + pub fn reset_scratch_size(&self) { + unsafe { + (*self.nir.as_ptr()).scratch_size = 0; + } + } + + pub fn shared_size(&self) -> u32 { + unsafe { (*self.nir.as_ptr()).info.shared_size } + } + + pub fn set_workgroup_size(&self, workgroup: &[u16; 3]) { + let mut nir = self.nir.as_ptr(); + unsafe { + (*nir).info.set_workgroup_size_variable(workgroup[0] == 0); + (*nir).info.workgroup_size[0] = workgroup[0]; + (*nir).info.workgroup_size[1] = workgroup[1]; + (*nir).info.workgroup_size[2] = workgroup[2]; + } + } + + pub fn variables_with_mode( + &mut self, + mode: nir_variable_mode, + ) -> impl Iterator { + self.variables() + .filter(move |v| v.data.mode() & mode.0 != 0) + } + + pub fn extract_constant_initializers(&self) { + let nir = self.nir.as_ptr(); + unsafe { + if (*nir).constant_data_size > 0 { + assert!((*nir).constant_data.is_null()); + (*nir).constant_data = rzalloc_size(nir.cast(), (*nir).constant_data_size as usize); + nir_gather_explicit_io_initializers( + nir, + (*nir).constant_data, + (*nir).constant_data_size as usize, + nir_variable_mode::nir_var_mem_constant, + ); + } + } + } + + pub fn has_constant(&self) -> bool { + unsafe { + !self.nir.as_ref().constant_data.is_null() && self.nir.as_ref().constant_data_size > 0 + } + } + + pub fn get_constant_buffer(&self) -> &[u8] { + unsafe { + let nir = self.nir.as_ref(); + slice::from_raw_parts(nir.constant_data.cast(), nir.constant_data_size as usize) + } + } + + pub fn add_var( + &self, + mode: nir_variable_mode, + glsl_type: *const glsl_type, + loc: usize, + name: &str, + ) -> *mut nir_variable { + let name = CString::new(name).unwrap(); + unsafe { + let var = nir_variable_create(self.nir.as_ptr(), mode, glsl_type, name.as_ptr()); + (*var).data.location = loc.try_into().unwrap(); + var + } + } +} + impl Drop for NirShader { fn drop(&mut self) { unsafe { ralloc_free(self.nir.as_ptr().cast()) }; diff --git a/src/gallium/frontends/rusticl/mesa/pipe/context.rs b/src/gallium/frontends/rusticl/mesa/pipe/context.rs index 6f64c3a1327..6c8a5cc63d6 100644 --- a/src/gallium/frontends/rusticl/mesa/pipe/context.rs +++ b/src/gallium/frontends/rusticl/mesa/pipe/context.rs @@ -1,5 +1,6 @@ extern crate mesa_rust_gen; +use crate::compiler::nir::*; use crate::pipe::resource::*; use crate::pipe::transfer::*; @@ -97,6 +98,84 @@ impl PipeContext { unsafe { self.pipe.as_ref().blit.unwrap()(self.pipe.as_ptr(), &blit_info) } } + + pub fn create_compute_state( + &self, + nir: &NirShader, + input_mem: u32, + local_mem: u32, + ) -> *mut c_void { + let state = pipe_compute_state { + ir_type: pipe_shader_ir::PIPE_SHADER_IR_NIR, + prog: nir.dup_for_driver().cast(), + req_input_mem: input_mem, + req_local_mem: local_mem, + req_private_mem: 0, + }; + unsafe { self.pipe.as_ref().create_compute_state.unwrap()(self.pipe.as_ptr(), &state) } + } + + pub fn bind_compute_state(&self, state: *mut c_void) { + unsafe { self.pipe.as_ref().bind_compute_state.unwrap()(self.pipe.as_ptr(), state) } + } + + pub fn delete_compute_state(&self, state: *mut c_void) { + unsafe { self.pipe.as_ref().delete_compute_state.unwrap()(self.pipe.as_ptr(), state) } + } + + pub fn launch_grid( + &self, + work_dim: u32, + block: [u32; 3], + grid: [u32; 3], + grid_base: [u32; 3], + input: &[u8], + ) { + let info = pipe_grid_info { + pc: 0, + input: input.as_ptr().cast(), + work_dim: work_dim, + block: block, + last_block: [0; 3], + grid: grid, + grid_base: grid_base, + indirect: ptr::null_mut(), + indirect_offset: 0, + }; + unsafe { self.pipe.as_ref().launch_grid.unwrap()(self.pipe.as_ptr(), &info) } + } + + pub fn set_global_binding(&self, res: &[Option>], out: &mut [*mut u32]) { + let mut res: Vec<_> = res + .iter() + .map(|o| o.as_ref().map_or(ptr::null_mut(), |r| r.pipe())) + .collect(); + unsafe { + self.pipe.as_ref().set_global_binding.unwrap()( + self.pipe.as_ptr(), + 0, + res.len() as u32, + res.as_mut_ptr(), + out.as_mut_ptr(), + ) + } + } + + pub fn clear_global_binding(&self, count: u32) { + unsafe { + self.pipe.as_ref().set_global_binding.unwrap()( + self.pipe.as_ptr(), + 0, + count, + ptr::null_mut(), + ptr::null_mut(), + ) + } + } + + pub fn memory_barrier(&self, barriers: u32) { + unsafe { self.pipe.as_ref().memory_barrier.unwrap()(self.pipe.as_ptr(), barriers) } + } } impl Drop for PipeContext { @@ -109,8 +188,14 @@ impl Drop for PipeContext { fn has_required_cbs(c: &pipe_context) -> bool { c.destroy.is_some() + && c.bind_compute_state.is_some() && c.blit.is_some() && c.buffer_map.is_some() && c.buffer_subdata.is_some() && c.buffer_unmap.is_some() + && c.create_compute_state.is_some() + && c.delete_compute_state.is_some() + && c.launch_grid.is_some() + && c.memory_barrier.is_some() + && c.set_global_binding.is_some() } diff --git a/src/gallium/frontends/rusticl/mesa/pipe/screen.rs b/src/gallium/frontends/rusticl/mesa/pipe/screen.rs index 3da6c1e8f6e..3148a0e3fcc 100644 --- a/src/gallium/frontends/rusticl/mesa/pipe/screen.rs +++ b/src/gallium/frontends/rusticl/mesa/pipe/screen.rs @@ -1,6 +1,7 @@ extern crate mesa_rust_gen; extern crate mesa_rust_util; +use crate::compiler::nir::NirShader; use crate::pipe::context::*; use crate::pipe::device::*; use crate::pipe::resource::*; @@ -151,6 +152,39 @@ impl PipeScreen { let s = &mut unsafe { *self.screen }; unsafe { s.is_format_supported.unwrap()(self.screen, format, target, 0, 0, bindings) } } + + pub fn nir_shader_compiler_options( + &self, + shader: pipe_shader_type, + ) -> *const nir_shader_compiler_options { + unsafe { + (*self.screen).get_compiler_options.unwrap()( + self.screen, + pipe_shader_ir::PIPE_SHADER_IR_NIR, + shader, + ) + .cast() + } + } + + pub fn shader_cache(&self) -> *mut disk_cache { + let s = &mut unsafe { *self.screen }; + + if let Some(func) = s.get_disk_shader_cache { + unsafe { func(self.screen) } + } else { + ptr::null_mut() + } + } + + pub fn finalize_nir(&self, nir: &NirShader) { + let s = &mut unsafe { *self.screen }; + if let Some(func) = s.finalize_nir { + unsafe { + func(s, nir.get_nir().cast()); + } + } + } } impl Drop for PipeScreen { @@ -165,6 +199,7 @@ fn has_required_cbs(screen: *mut pipe_screen) -> bool { let s = unsafe { *screen }; s.context_create.is_some() && s.destroy.is_some() + && s.get_compiler_options.is_some() && s.get_compute_param.is_some() && s.get_name.is_some() && s.get_param.is_some() diff --git a/src/gallium/frontends/rusticl/meson.build b/src/gallium/frontends/rusticl/meson.build index a922f9ea0f3..80d8ebf8973 100644 --- a/src/gallium/frontends/rusticl/meson.build +++ b/src/gallium/frontends/rusticl/meson.build @@ -149,13 +149,16 @@ rusticl_mesa_bindings_inline_wrapper = static_library( 'mesa_bindings_inline_wrapper', [ 'rusticl_mesa_inline_bindings_wrapper.c', - 'rusticl_mesa_inline_bindings_wrapper.h' + 'rusticl_mesa_inline_bindings_wrapper.h', + 'rusticl_nir.c', + 'rusticl_nir.h', ], gnu_symbol_visibility : 'hidden', include_directories : [ inc_gallium, inc_gallium_aux, inc_include, + inc_nir, inc_src, ], c_args : pre_args, @@ -179,16 +182,23 @@ rusticl_mesa_bindings_rs = rust.bindgen( args : [ rusticl_bindgen_args, '--whitelist-function', 'clc_.*', + '--whitelist-function', 'glsl_.*', '--whitelist-function', 'nir_.*', '--whitelist-function', 'pipe_.*', - '--whitelist-function', 'ralloc_.*', + '--whitelist-function', 'rusticl_.*', + '--whitelist-function', 'rz?alloc_.*', + '--whitelist-function', 'spirv_.*', '--whitelist-type', 'pipe_endian', '--whitelist-type', 'clc_kernel_arg_access_qualifier', '--bitfield-enum', 'clc_kernel_arg_access_qualifier', '--whitelist-type', 'clc_kernel_arg_type_qualifier', '--bitfield-enum', 'clc_kernel_arg_type_qualifier', + '--bitfield-enum', 'nir_opt_if_options', + '--bitfield-enum', 'nir_variable_mode', + '--whitelist-type', 'float_controls', '--whitelist-var', 'PIPE_.*', '--bitfield-enum', 'pipe_map_flags', + '--allowlist-var', 'stderr', ], ) diff --git a/src/gallium/frontends/rusticl/rusticl_mesa_bindings.h b/src/gallium/frontends/rusticl/rusticl_mesa_bindings.h index 830e544ad92..ad4ff8290ee 100644 --- a/src/gallium/frontends/rusticl/rusticl_mesa_bindings.h +++ b/src/gallium/frontends/rusticl/rusticl_mesa_bindings.h @@ -1,11 +1,13 @@ #include "rusticl_mesa_inline_bindings_wrapper.h" #include "compiler/clc/clc.h" - -#include "nir.h" +#include "nir_types.h" +#include "spirv/nir_spirv.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" #include "pipe/p_state.h" #include "pipe-loader/pipe_loader.h" + +#include "rusticl_nir.h" diff --git a/src/gallium/frontends/rusticl/rusticl_nir.c b/src/gallium/frontends/rusticl/rusticl_nir.c new file mode 100644 index 00000000000..5f7b978a1aa --- /dev/null +++ b/src/gallium/frontends/rusticl/rusticl_nir.c @@ -0,0 +1,38 @@ +#include "nir.h" +#include "nir_builder.h" + +#include "rusticl_nir.h" + +static bool +rusticl_lower_intrinsics_filter(const nir_instr* instr, const void* state) +{ + return instr->type == nir_instr_type_intrinsic; +} + +static nir_ssa_def* +rusticl_lower_intrinsics_instr( + nir_builder *b, + nir_instr *instr, + void* _state +) { + nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr); + struct rusticl_lower_state *state = _state; + + switch (intrinsic->intrinsic) { + case nir_intrinsic_load_constant_base_ptr: + return nir_load_var(b, state->const_buf); + default: + return NULL; + } +} + +bool +rusticl_lower_intrinsics(nir_shader *nir, struct rusticl_lower_state* state) +{ + return nir_shader_lower_instructions( + nir, + rusticl_lower_intrinsics_filter, + rusticl_lower_intrinsics_instr, + state + ); +} diff --git a/src/gallium/frontends/rusticl/rusticl_nir.h b/src/gallium/frontends/rusticl/rusticl_nir.h new file mode 100644 index 00000000000..9121c25043c --- /dev/null +++ b/src/gallium/frontends/rusticl/rusticl_nir.h @@ -0,0 +1,5 @@ +struct rusticl_lower_state { + nir_variable *const_buf; +}; + +bool rusticl_lower_intrinsics(nir_shader *nir, struct rusticl_lower_state *state);