From ee1fe1a1e572519c165084aa36763fd68c0e8722 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Tue, 11 Mar 2025 20:36:17 +0100 Subject: [PATCH] rusticl: implement cl_khr_spirv_queries Part-of: --- docs/features.txt | 1 + docs/relnotes/new_features.txt | 1 + src/gallium/frontends/rusticl/api/device.rs | 12 ++++ src/gallium/frontends/rusticl/api/util.rs | 12 ++++ src/gallium/frontends/rusticl/core/device.rs | 62 +++++++++++++++++-- src/gallium/frontends/rusticl/core/program.rs | 1 + .../rusticl/mesa/compiler/clc/spirv.rs | 36 ++--------- 7 files changed, 88 insertions(+), 37 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index 705c01bfdb8..b64d957137a 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -836,6 +836,7 @@ Rusticl extensions that are not part of any OpenCL version: cl_khr_spirv_extended_debug_info not started cl_khr_spirv_linkonce_odr DONE cl_khr_spirv_no_integer_wrap_decoration DONE + cl_khr_spirv_queries DONE cl_khr_srgb_image_writes not started cl_khr_subgroup_ballot not started cl_khr_subgroup_clustered_reduce not started diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt index 958327de755..00dcce701ba 100644 --- a/docs/relnotes/new_features.txt +++ b/docs/relnotes/new_features.txt @@ -40,3 +40,4 @@ cl_khr_fp16 on asahi, freedreno, llvmpipe, panfrost, radeonsi and zink GL_ARB_shader_clock on panfrost/v6+ VK_EXT_texture_compression_astc_hdr on panvk MSAA with 8 and 16 sample counts on panfrost +cl_khr_spirv_queries diff --git a/src/gallium/frontends/rusticl/api/device.rs b/src/gallium/frontends/rusticl/api/device.rs index 10d20dd94ba..2e8395aee34 100644 --- a/src/gallium/frontends/rusticl/api/device.rs +++ b/src/gallium/frontends/rusticl/api/device.rs @@ -12,6 +12,7 @@ use rusticl_proc_macros::cl_entrypoint; use rusticl_proc_macros::cl_info_entrypoint; use std::cmp::min; +use std::ffi::c_char; use std::ffi::CStr; use std::mem::size_of; use std::ptr; @@ -279,6 +280,17 @@ unsafe impl CLInfo for cl_device_id { CL_DEVICE_SINGLE_FP_CONFIG => v.write::( (CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN) as cl_device_fp_config, ), + CL_DEVICE_SPIRV_CAPABILITIES_KHR => { + v.write_iter::(dev.spirv_caps_vec.iter().map(|&cap| cap as _)) + } + CL_DEVICE_SPIRV_EXTENDED_INSTRUCTION_SETS_KHR => { + // use static memory as we hand out pointers to the values here. + static instr_sets: [&CStr; 1] = [c"OpenCL.std"]; + v.write_iter::<*const c_char>(instr_sets.iter().map(|str| str.as_ptr())) + } + CL_DEVICE_SPIRV_EXTENSIONS_KHR => { + v.write_iter::<*const c_char>(dev.spirv_extensions.iter().map(|str| str.as_ptr())) + } CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS => v.write::(false), CL_DEVICE_SUB_GROUP_SIZES_INTEL => { if dev.subgroups_supported() { diff --git a/src/gallium/frontends/rusticl/api/util.rs b/src/gallium/frontends/rusticl/api/util.rs index b7e3f8d169b..722deeee6ba 100644 --- a/src/gallium/frontends/rusticl/api/util.rs +++ b/src/gallium/frontends/rusticl/api/util.rs @@ -382,6 +382,18 @@ impl CLProp for *mut T { } } +impl CLProp for *const T { + type Output = Self; + + fn count(&self) -> usize { + 1 + } + + fn write_to(&self, out: &mut [MaybeUninit]) { + out[0].write(*self); + } +} + impl CLProp for &Properties where T: CLProp + Copy + Default, diff --git a/src/gallium/frontends/rusticl/core/device.rs b/src/gallium/frontends/rusticl/core/device.rs index 9a08ed594e5..d7ded752491 100644 --- a/src/gallium/frontends/rusticl/core/device.rs +++ b/src/gallium/frontends/rusticl/core/device.rs @@ -45,6 +45,8 @@ pub struct DeviceBase { pub embedded: bool, pub extension_string: String, pub extensions: Vec, + pub spirv_caps: spirv_capabilities, + pub spirv_caps_vec: Vec, pub spirv_extensions: Vec<&'static CStr>, pub clc_features: Vec, pub formats: HashMap>, @@ -575,6 +577,8 @@ impl DeviceBase { let mut exts_str: Vec = Vec::new(); let mut exts = Vec::new(); let mut feats = Vec::new(); + let mut spirv_caps = spirv_capabilities::default(); + let mut spirv_caps_vec = Vec::new(); let mut spirv_exts = Vec::new(); let mut add_ext = |major, minor, patch, ext: &str| { exts.push(mk_cl_version_ext(major, minor, patch, ext)); @@ -586,6 +590,12 @@ impl DeviceBase { let mut add_spirv = |ext| { spirv_exts.push(ext); }; + let mut add_cap = |cap: SpvCapability| { + unsafe { + spirv_capabilities_set(&mut spirv_caps, cap, true); + } + spirv_caps_vec.push(cap); + }; // add extensions all drivers support for now add_ext(1, 0, 0, "cl_khr_byte_addressable_store"); @@ -599,6 +609,7 @@ impl DeviceBase { add_ext(1, 0, 0, "cl_khr_local_int32_extended_atomics"); add_ext(2, 0, 0, "cl_khr_integer_dot_product"); add_ext(1, 0, 0, "cl_khr_spirv_no_integer_wrap_decoration"); + add_ext(1, 0, 0, "cl_khr_spirv_queries"); add_ext(1, 0, 0, "cl_khr_suggested_local_work_size"); add_feat(2, 0, 0, "__opencl_c_integer_dot_product_input_4x8bit"); @@ -614,16 +625,31 @@ impl DeviceBase { add_spirv(c"SPV_KHR_integer_dot_product"); add_spirv(c"SPV_KHR_no_integer_wrap_decoration"); + add_cap(SpvCapability::SpvCapabilityAddresses); + add_cap(SpvCapability::SpvCapabilityDotProduct); + add_cap(SpvCapability::SpvCapabilityDotProductInput4x8Bit); + add_cap(SpvCapability::SpvCapabilityDotProductInput4x8BitPacked); + add_cap(SpvCapability::SpvCapabilityExpectAssumeKHR); + add_cap(SpvCapability::SpvCapabilityFloat16Buffer); + add_cap(SpvCapability::SpvCapabilityInt8); + add_cap(SpvCapability::SpvCapabilityInt16); + add_cap(SpvCapability::SpvCapabilityLinkage); + add_cap(SpvCapability::SpvCapabilityKernel); + add_cap(SpvCapability::SpvCapabilityUniformDecoration); + add_cap(SpvCapability::SpvCapabilityVector16); + if self.linkonce_supported() { add_ext(1, 0, 0, "cl_khr_spirv_linkonce_odr"); add_spirv(c"SPV_KHR_linkonce_odr"); } if self.fp16_supported() { + add_cap(SpvCapability::SpvCapabilityFloat16); add_ext(1, 0, 0, "cl_khr_fp16"); } if self.fp64_supported() { + add_cap(SpvCapability::SpvCapabilityFloat64); add_ext(1, 0, 0, "cl_khr_fp64"); add_feat(1, 0, 0, "__opencl_c_fp64"); } @@ -637,10 +663,12 @@ impl DeviceBase { add_ext(1, 0, 0, "cles_khr_int64"); }; + add_cap(SpvCapability::SpvCapabilityInt64); add_feat(1, 0, 0, "__opencl_c_int64"); } if self.kernel_clock_supported() { + add_cap(SpvCapability::SpvCapabilityShaderClockKHR); add_ext(1, 0, 0, "cl_khr_kernel_clock"); add_feat(1, 0, 0, "__opencl_c_kernel_clock_scope_device"); add_feat(1, 0, 0, "__opencl_c_kernel_clock_scope_sub_group"); @@ -648,6 +676,12 @@ impl DeviceBase { } if self.caps.has_images { + add_cap(SpvCapability::SpvCapabilityImage1D); + add_cap(SpvCapability::SpvCapabilityImageBasic); + add_cap(SpvCapability::SpvCapabilityImageBuffer); + add_cap(SpvCapability::SpvCapabilityLiteralSampler); + add_cap(SpvCapability::SpvCapabilitySampled1D); + add_cap(SpvCapability::SpvCapabilitySampledBuffer); add_feat(1, 0, 0, "__opencl_c_images"); if self.image2d_from_buffer_supported() { @@ -655,6 +689,7 @@ impl DeviceBase { } if self.caps.has_rw_images { + add_cap(SpvCapability::SpvCapabilityImageReadWrite); add_feat(1, 0, 0, "__opencl_c_read_write_images"); } @@ -680,6 +715,10 @@ impl DeviceBase { } if self.subgroups_supported() { + add_cap(SpvCapability::SpvCapabilityGroupNonUniformShuffle); + add_cap(SpvCapability::SpvCapabilityGroupNonUniformShuffleRelative); + add_cap(SpvCapability::SpvCapabilityGroups); + add_cap(SpvCapability::SpvCapabilitySubgroupDispatch); // requires CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS //add_ext(1, 0, 0, "cl_khr_subgroups"); add_feat(1, 0, 0, "__opencl_c_subgroups"); @@ -688,6 +727,9 @@ impl DeviceBase { add_ext(1, 0, 0, "cl_khr_subgroup_shuffle"); add_ext(1, 0, 0, "cl_khr_subgroup_shuffle_relative"); if self.intel_subgroups_supported() { + // add_cap(SpvCapability::SpvCapabilitySubgroupBufferBlockIOINTEL); + // add_cap(SpvCapability::SpvCapabilitySubgroupImageBlockIOINTEL); + add_cap(SpvCapability::SpvCapabilitySubgroupShuffleINTEL); add_ext(1, 0, 0, "cl_intel_required_subgroup_size"); add_ext(1, 0, 0, "cl_intel_subgroups"); add_spirv(c"SPV_INTEL_subgroups"); @@ -705,6 +747,8 @@ impl DeviceBase { self.extensions = exts; self.clc_features = feats; self.extension_string = exts_str.join(" "); + self.spirv_caps = spirv_caps; + self.spirv_caps_vec = spirv_caps_vec; self.spirv_extensions = spirv_exts; } @@ -1168,11 +1212,6 @@ impl Device { // Create before loading libclc as llvmpipe only creates the shader cache with the first // context being created. let helper_ctx = screen.create_context()?; - let lib_clc = spirv::SPIRVBin::get_lib_clc(&screen); - if lib_clc.is_none() { - eprintln!("Libclc failed to load. Please make sure it is installed and provides spirv-mesa3d-.spv and/or spirv64-mesa3d-.spv"); - } - let mut dev_base = DeviceBase { caps: DeviceCaps::new(&screen), helper_ctx: Mutex::new(helper_ctx), @@ -1184,6 +1223,8 @@ impl Device { embedded: false, extension_string: String::from(""), extensions: Vec::new(), + spirv_caps: spirv_capabilities::default(), + spirv_caps_vec: Vec::new(), spirv_extensions: Vec::new(), clc_features: Vec::new(), formats: HashMap::new(), @@ -1203,6 +1244,17 @@ impl Device { // now figure out what version we are dev_base.check_version(); + // Libclc depends on a few caps which must always be enabled. At runtime we should never + // actually pass relevant functionality down to drivers, so this should be fine. + let mut spirv_caps = dev_base.spirv_caps; + spirv_caps.Float64 = true; + spirv_caps.Int64 = true; + + let lib_clc = spirv::SPIRVBin::get_lib_clc(dev_base.screen(), &spirv_caps); + if lib_clc.is_none() { + eprintln!("Libclc failed to load. Please make sure it is installed and provides spirv-mesa3d-.spv and/or spirv64-mesa3d-.spv"); + } + Some(Device { base: CLObjectBase::new(RusticlTypes::Device), dev_base: dev_base, diff --git a/src/gallium/frontends/rusticl/core/program.rs b/src/gallium/frontends/rusticl/core/program.rs index 3a223c04b06..a8fb0b9d706 100644 --- a/src/gallium/frontends/rusticl/core/program.rs +++ b/src/gallium/frontends/rusticl/core/program.rs @@ -216,6 +216,7 @@ impl ProgramBuild { kernel, d.screen .nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE), + &d.spirv_caps, &d.lib_clc, &mut spec_constants, d.address_bits(), diff --git a/src/gallium/frontends/rusticl/mesa/compiler/clc/spirv.rs b/src/gallium/frontends/rusticl/mesa/compiler/clc/spirv.rs index cb021d6facf..7a7059a4651 100644 --- a/src/gallium/frontends/rusticl/mesa/compiler/clc/spirv.rs +++ b/src/gallium/frontends/rusticl/mesa/compiler/clc/spirv.rs @@ -277,33 +277,6 @@ impl SPIRVBin { } } - fn get_spirv_capabilities() -> spirv_capabilities { - spirv_capabilities { - Addresses: true, - Float16: true, - Float16Buffer: true, - Float64: true, - GenericPointer: true, - Groups: true, - GroupNonUniformShuffle: true, - GroupNonUniformShuffleRelative: true, - Int8: true, - Int16: true, - Int64: true, - Kernel: true, - ImageBasic: true, - ImageReadWrite: true, - Linkage: true, - LiteralSampler: true, - SampledBuffer: true, - Sampled1D: true, - ShaderClockKHR: true, - UniformDecoration: true, - Vector16: true, - ..Default::default() - } - } - fn get_spirv_options( library: bool, clc_shader: *const nir_shader, @@ -350,15 +323,15 @@ impl SPIRVBin { &self, entry_point: &str, nir_options: *const nir_shader_compiler_options, + spirv_caps: &spirv_capabilities, libclc: &NirShader, spec_constants: &mut [nir_spirv_specialization], address_bits: u32, log: Option<&mut Vec>, ) -> Option { let c_entry = CString::new(entry_point.as_bytes()).unwrap(); - let spirv_caps = Self::get_spirv_capabilities(); let spirv_options = - Self::get_spirv_options(false, libclc.get_nir(), address_bits, &spirv_caps, log); + Self::get_spirv_options(false, libclc.get_nir(), address_bits, spirv_caps, log); let nir = unsafe { spirv_to_nir( @@ -376,12 +349,11 @@ impl SPIRVBin { NirShader::new(nir) } - pub fn get_lib_clc(screen: &PipeScreen) -> Option { + pub fn get_lib_clc(screen: &PipeScreen, spirv_caps: &spirv_capabilities) -> Option { let nir_options = screen.nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE); let address_bits = screen.compute_caps().address_bits; - let spirv_caps = Self::get_spirv_capabilities(); let spirv_options = - Self::get_spirv_options(false, ptr::null(), address_bits, &spirv_caps, None); + Self::get_spirv_options(false, ptr::null(), address_bits, spirv_caps, None); let shader_cache = DiskCacheBorrowed::as_ptr(&screen.shader_cache()); NirShader::new(unsafe {