rusticl: add support for printf

Signed-off-by: Karol Herbst <kherbst@redhat.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15439>
This commit is contained in:
Karol Herbst 2022-03-18 18:30:14 +01:00 committed by Marge Bot
parent f5e6b3aae3
commit ea1250d0bf
10 changed files with 129 additions and 14 deletions

View file

@ -139,8 +139,7 @@ impl CLInfo<cl_device_info> for cl_device_id {
CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG => cl_prop::<cl_uint>(1),
CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT => cl_prop::<cl_uint>(1),
CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE => cl_prop::<usize>(1),
// TODO
CL_DEVICE_PRINTF_BUFFER_SIZE => cl_prop::<usize>(0),
CL_DEVICE_PRINTF_BUFFER_SIZE => cl_prop::<usize>(dev.printf_buffer_size()),
// TODO
CL_DEVICE_PROFILING_TIMER_RESOLUTION => cl_prop::<usize>(0),
CL_DEVICE_OPENCL_C_FEATURES => cl_prop::<Vec<cl_name_version>>(Vec::new()),

View file

@ -265,6 +265,18 @@ impl Device {
}
}
if self.embedded {
// The minimum value for the EMBEDDED profile is 1 KB.
if self.printf_buffer_size() < 1024 {
res = CLVersion::Cl1_1;
}
} else {
// The minimum value for the FULL profile is 1 MB.
if self.printf_buffer_size() < 1024 * 1024 {
res = CLVersion::Cl1_1;
}
}
if !exts.contains(&"cl_khr_byte_addressable_store")
|| !exts.contains(&"cl_khr_global_int32_base_atomics")
|| !exts.contains(&"cl_khr_global_int32_extended_atomics")
@ -495,6 +507,10 @@ impl Device {
) as usize
}
pub fn printf_buffer_size(&self) -> usize {
1024 * 1024
}
pub fn screen(&self) -> &Arc<PipeScreen> {
&self.screen
}

View file

@ -21,6 +21,7 @@ use std::collections::HashMap;
use std::collections::HashSet;
use std::convert::TryInto;
use std::ptr;
use std::slice;
use std::sync::Arc;
// ugh, we are not allowed to take refs, so...
@ -46,6 +47,7 @@ pub enum KernelArgType {
pub enum InternalKernelArgType {
ConstantBuffer,
GlobalWorkOffsets,
PrintfBuffer,
}
#[derive(Clone)]
@ -156,7 +158,7 @@ where
// mostly like clc_spirv_to_dxil
// does not DCEe uniforms or images!
fn lower_and_optimize_nir_pre_inputs(nir: &mut NirShader, lib_clc: &NirShader) {
fn lower_and_optimize_nir_pre_inputs(dev: &Device, nir: &mut NirShader, lib_clc: &NirShader) {
nir.set_workgroup_size_variable_if_zero();
nir.structurize();
while {
@ -212,7 +214,12 @@ fn lower_and_optimize_nir_pre_inputs(nir: &mut NirShader, lib_clc: &NirShader) {
nir_variable_mode::nir_var_function_temp,
Some(glsl_get_cl_type_size_align),
);
// TODO printf
let mut printf_opts = nir_lower_printf_options::default();
printf_opts.set_treat_doubles_as_floats(false);
printf_opts.max_buffer_size = dev.printf_buffer_size() as u32;
nir.pass1(nir_lower_printf, &printf_opts);
nir.pass0(nir_split_var_copies);
nir.pass0(nir_opt_copy_prop_vars);
nir.pass0(nir_lower_var_copies);
@ -255,6 +262,7 @@ fn lower_and_optimize_nir_late(
Some(glsl_get_cl_type_size_align),
);
nir.extract_constant_initializers();
// TODO printf
// TODO 32 bit devices
// add vars for global offsets
@ -282,6 +290,19 @@ fn lower_and_optimize_nir_late(
"constant_buffer_addr",
);
}
if nir.has_printf() {
res.push(InternalKernelArg {
kind: InternalKernelArgType::PrintfBuffer,
offset: 0,
size: 8,
});
lower_state.printf_buf = nir.add_var(
nir_variable_mode::nir_var_uniform,
unsafe { glsl_uint64_t_type() },
args + res.len() - 1,
"printf_buffer_addr",
);
}
nir.pass2(
nir_lower_vars_to_explicit_types,
@ -332,6 +353,14 @@ fn lower_and_optimize_nir_late(
res
}
fn extract<'a, const S: usize>(buf: &'a mut &[u8]) -> &'a [u8; S] {
let val;
(val, *buf) = (*buf).split_at(S);
// we split of 4 bytes and convert to [u8; 4], so this should be safe
// use split_array_ref once it's stable
val.try_into().unwrap()
}
impl Kernel {
pub fn new(
name: String,
@ -340,7 +369,7 @@ impl Kernel {
args: Vec<spirv::SPIRVKernelArg>,
) -> Arc<Kernel> {
nirs.iter_mut()
.for_each(|(d, n)| lower_and_optimize_nir_pre_inputs(n, &d.lib_clc));
.for_each(|(d, n)| lower_and_optimize_nir_pre_inputs(d, n, &d.lib_clc));
let nir = nirs.values_mut().next().unwrap();
let wgs = nir.workgroup_size();
let work_group_size = [wgs[0] as usize, wgs[1] as usize, wgs[2] as usize];
@ -374,7 +403,7 @@ impl Kernel {
// the painful part is, that host threads are allowed to modify the kernel object once it was
// enqueued, so return a closure with all req data included.
pub fn launch(
&self,
self: &Arc<Self>,
q: &Arc<Queue>,
work_dim: u32,
block: &[usize],
@ -388,6 +417,7 @@ impl Kernel {
let mut input: Vec<u8> = Vec::new();
let mut resource_info = Vec::new();
let mut local_size: u32 = nir.shared_size();
let printf_size = q.device.printf_buffer_size() as u32;
for i in 0..3 {
if block[i] == 0 {
@ -424,6 +454,7 @@ impl Kernel {
}
}
let mut printf_buf = None;
for arg in &self.internal_args {
input.append(&mut vec![0; arg.offset - input.len()]);
match arg.kind {
@ -447,30 +478,72 @@ impl Kernel {
InternalKernelArgType::GlobalWorkOffsets => {
input.extend_from_slice(&cl_prop::<[u64; 3]>(offsets));
}
InternalKernelArgType::PrintfBuffer => {
let buf =
Arc::new(q.device.screen.resource_create_buffer(printf_size).unwrap());
input.extend_from_slice(&[0; 8]);
resource_info.push((Some(buf.clone()), arg.offset));
printf_buf = Some(buf);
}
}
}
let cso = q
.device
.helper_ctx()
.create_compute_state(nir, input.len() as u32, local_size);
Box::new(move |_, ctx| {
let k = self.clone();
Box::new(move |q, ctx| {
let nir = k.nirs.get(&q.device).unwrap();
let mut input = input.clone();
let mut resources = Vec::with_capacity(resource_info.len());
let mut globals: Vec<*mut u32> = Vec::new();
let printf_format = nir.printf_format();
let printf_buf = printf_buf.clone();
for (res, offset) in resource_info.clone() {
resources.push(res);
globals.push(unsafe { input.as_mut_ptr().add(offset) }.cast());
}
if let Some(printf_buf) = &printf_buf {
let init_data: [u8; 1] = [4];
ctx.buffer_subdata(
printf_buf,
0,
init_data.as_ptr().cast(),
init_data.len() as u32,
);
}
let cso = ctx.create_compute_state(nir, input.len() as u32, local_size);
ctx.bind_compute_state(cso);
ctx.set_global_binding(resources.as_slice(), &mut globals);
ctx.launch_grid(work_dim, block, grid, &input);
ctx.clear_global_binding(globals.len() as u32);
ctx.delete_compute_state(cso);
ctx.memory_barrier(PIPE_BARRIER_GLOBAL_BUFFER);
if let Some(printf_buf) = &printf_buf {
let tx = ctx
.buffer_map(printf_buf, 0, printf_size as i32, true)
.with_ctx(ctx);
let mut buf: &[u8] =
unsafe { slice::from_raw_parts(tx.ptr().cast(), printf_size as usize) };
let length = u32::from_ne_bytes(*extract(&mut buf));
// update our slice to make sure we don't go out of bounds
buf = &buf[0..(length - 4) as usize];
unsafe {
u_printf(
stdout,
buf.as_ptr().cast(),
buf.len(),
printf_format.as_ptr(),
printf_format.len() as u32,
);
}
}
Ok(())
})
}

View file

@ -189,6 +189,7 @@ impl SPIRVBin {
kernel: true,
kernel_image: true,
linkage: true,
printf: true,
..Default::default()
},

View file

@ -199,6 +199,23 @@ impl NirShader {
}
}
pub fn has_printf(&self) -> bool {
unsafe {
!self.nir.as_ref().printf_info.is_null() && self.nir.as_ref().printf_info_count != 0
}
}
pub fn printf_format(&self) -> &[u_printf_info] {
if self.has_printf() {
unsafe {
let nir = self.nir.as_ref();
slice::from_raw_parts(nir.printf_info, nir.printf_info_count as usize)
}
} else {
&[]
}
}
pub fn get_constant_buffer(&self) -> &[u8] {
unsafe {
let nir = self.nir.as_ref();

View file

@ -91,6 +91,7 @@ impl PipeScreen {
tmpl.height0 = 1;
tmpl.depth0 = 1;
tmpl.array_size = 1;
tmpl.bind = PIPE_BIND_GLOBAL;
PipeResource::new(unsafe { (*self.screen).resource_create.unwrap()(self.screen, &tmpl) })
}
@ -107,6 +108,7 @@ impl PipeScreen {
tmpl.height0 = 1;
tmpl.depth0 = 1;
tmpl.array_size = 1;
tmpl.bind = PIPE_BIND_GLOBAL;
PipeResource::new(unsafe {
(*self.screen).resource_from_user_memory.unwrap()(self.screen, &tmpl, mem)
@ -184,7 +186,7 @@ impl PipeScreen {
let s = &mut unsafe { *self.screen };
if let Some(func) = s.finalize_nir {
unsafe {
func(s, nir.get_nir().cast());
func(self.screen, nir.get_nir().cast());
}
}
}

View file

@ -193,12 +193,13 @@ rusticl_mesa_bindings_rs = rust.bindgen(
rusticl_bindgen_args,
'--whitelist-function', 'clc_.*',
'--whitelist-function', 'glsl_.*',
'--whitelist-function', 'malloc',
'--whitelist-function', 'nir_.*',
'--whitelist-function', 'pipe_.*',
'--whitelist-function', 'rusticl_.*',
'--whitelist-function', 'rz?alloc_.*',
'--whitelist-function', 'spirv_.*',
'--whitelist-function', 'malloc',
'--whitelist-function', 'u_.*',
'--whitelist-type', 'pipe_endian',
'--whitelist-type', 'clc_kernel_arg_access_qualifier',
'--bitfield-enum', 'clc_kernel_arg_access_qualifier',
@ -210,6 +211,7 @@ rusticl_mesa_bindings_rs = rust.bindgen(
'--whitelist-var', 'PIPE_.*',
'--bitfield-enum', 'pipe_map_flags',
'--allowlist-var', 'stderr',
'--allowlist-var', 'stdout',
'--bitfield-enum', 'nir_lower_int64_options',
],
)

View file

@ -10,4 +10,6 @@
#include "pipe/p_state.h"
#include "pipe-loader/pipe_loader.h"
#include "util/u_printf.h"
#include "rusticl_nir.h"

View file

@ -23,6 +23,8 @@ rusticl_lower_intrinsics_instr(
return nir_load_var(b, state->base_global_invoc_id);
case nir_intrinsic_load_constant_base_ptr:
return nir_load_var(b, state->const_buf);
case nir_intrinsic_load_printf_buffer_address:
return nir_load_var(b, state->printf_buf);
default:
return NULL;
}

View file

@ -1,6 +1,7 @@
struct rusticl_lower_state {
nir_variable *base_global_invoc_id;
nir_variable *const_buf;
nir_variable *printf_buf;
};
bool rusticl_lower_intrinsics(nir_shader *nir, struct rusticl_lower_state *state);