mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-26 04:10:09 +01:00
rusticl: add support for printf
Signed-off-by: Karol Herbst <kherbst@redhat.com> Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15439>
This commit is contained in:
parent
f5e6b3aae3
commit
ea1250d0bf
10 changed files with 129 additions and 14 deletions
|
|
@ -139,8 +139,7 @@ impl CLInfo<cl_device_info> for cl_device_id {
|
|||
CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG => cl_prop::<cl_uint>(1),
|
||||
CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT => cl_prop::<cl_uint>(1),
|
||||
CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE => cl_prop::<usize>(1),
|
||||
// TODO
|
||||
CL_DEVICE_PRINTF_BUFFER_SIZE => cl_prop::<usize>(0),
|
||||
CL_DEVICE_PRINTF_BUFFER_SIZE => cl_prop::<usize>(dev.printf_buffer_size()),
|
||||
// TODO
|
||||
CL_DEVICE_PROFILING_TIMER_RESOLUTION => cl_prop::<usize>(0),
|
||||
CL_DEVICE_OPENCL_C_FEATURES => cl_prop::<Vec<cl_name_version>>(Vec::new()),
|
||||
|
|
|
|||
|
|
@ -265,6 +265,18 @@ impl Device {
|
|||
}
|
||||
}
|
||||
|
||||
if self.embedded {
|
||||
// The minimum value for the EMBEDDED profile is 1 KB.
|
||||
if self.printf_buffer_size() < 1024 {
|
||||
res = CLVersion::Cl1_1;
|
||||
}
|
||||
} else {
|
||||
// The minimum value for the FULL profile is 1 MB.
|
||||
if self.printf_buffer_size() < 1024 * 1024 {
|
||||
res = CLVersion::Cl1_1;
|
||||
}
|
||||
}
|
||||
|
||||
if !exts.contains(&"cl_khr_byte_addressable_store")
|
||||
|| !exts.contains(&"cl_khr_global_int32_base_atomics")
|
||||
|| !exts.contains(&"cl_khr_global_int32_extended_atomics")
|
||||
|
|
@ -495,6 +507,10 @@ impl Device {
|
|||
) as usize
|
||||
}
|
||||
|
||||
pub fn printf_buffer_size(&self) -> usize {
|
||||
1024 * 1024
|
||||
}
|
||||
|
||||
pub fn screen(&self) -> &Arc<PipeScreen> {
|
||||
&self.screen
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ use std::collections::HashMap;
|
|||
use std::collections::HashSet;
|
||||
use std::convert::TryInto;
|
||||
use std::ptr;
|
||||
use std::slice;
|
||||
use std::sync::Arc;
|
||||
|
||||
// ugh, we are not allowed to take refs, so...
|
||||
|
|
@ -46,6 +47,7 @@ pub enum KernelArgType {
|
|||
pub enum InternalKernelArgType {
|
||||
ConstantBuffer,
|
||||
GlobalWorkOffsets,
|
||||
PrintfBuffer,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
|
|
@ -156,7 +158,7 @@ where
|
|||
|
||||
// mostly like clc_spirv_to_dxil
|
||||
// does not DCEe uniforms or images!
|
||||
fn lower_and_optimize_nir_pre_inputs(nir: &mut NirShader, lib_clc: &NirShader) {
|
||||
fn lower_and_optimize_nir_pre_inputs(dev: &Device, nir: &mut NirShader, lib_clc: &NirShader) {
|
||||
nir.set_workgroup_size_variable_if_zero();
|
||||
nir.structurize();
|
||||
while {
|
||||
|
|
@ -212,7 +214,12 @@ fn lower_and_optimize_nir_pre_inputs(nir: &mut NirShader, lib_clc: &NirShader) {
|
|||
nir_variable_mode::nir_var_function_temp,
|
||||
Some(glsl_get_cl_type_size_align),
|
||||
);
|
||||
// TODO printf
|
||||
|
||||
let mut printf_opts = nir_lower_printf_options::default();
|
||||
printf_opts.set_treat_doubles_as_floats(false);
|
||||
printf_opts.max_buffer_size = dev.printf_buffer_size() as u32;
|
||||
nir.pass1(nir_lower_printf, &printf_opts);
|
||||
|
||||
nir.pass0(nir_split_var_copies);
|
||||
nir.pass0(nir_opt_copy_prop_vars);
|
||||
nir.pass0(nir_lower_var_copies);
|
||||
|
|
@ -255,6 +262,7 @@ fn lower_and_optimize_nir_late(
|
|||
Some(glsl_get_cl_type_size_align),
|
||||
);
|
||||
nir.extract_constant_initializers();
|
||||
|
||||
// TODO printf
|
||||
// TODO 32 bit devices
|
||||
// add vars for global offsets
|
||||
|
|
@ -282,6 +290,19 @@ fn lower_and_optimize_nir_late(
|
|||
"constant_buffer_addr",
|
||||
);
|
||||
}
|
||||
if nir.has_printf() {
|
||||
res.push(InternalKernelArg {
|
||||
kind: InternalKernelArgType::PrintfBuffer,
|
||||
offset: 0,
|
||||
size: 8,
|
||||
});
|
||||
lower_state.printf_buf = nir.add_var(
|
||||
nir_variable_mode::nir_var_uniform,
|
||||
unsafe { glsl_uint64_t_type() },
|
||||
args + res.len() - 1,
|
||||
"printf_buffer_addr",
|
||||
);
|
||||
}
|
||||
|
||||
nir.pass2(
|
||||
nir_lower_vars_to_explicit_types,
|
||||
|
|
@ -332,6 +353,14 @@ fn lower_and_optimize_nir_late(
|
|||
res
|
||||
}
|
||||
|
||||
fn extract<'a, const S: usize>(buf: &'a mut &[u8]) -> &'a [u8; S] {
|
||||
let val;
|
||||
(val, *buf) = (*buf).split_at(S);
|
||||
// we split of 4 bytes and convert to [u8; 4], so this should be safe
|
||||
// use split_array_ref once it's stable
|
||||
val.try_into().unwrap()
|
||||
}
|
||||
|
||||
impl Kernel {
|
||||
pub fn new(
|
||||
name: String,
|
||||
|
|
@ -340,7 +369,7 @@ impl Kernel {
|
|||
args: Vec<spirv::SPIRVKernelArg>,
|
||||
) -> Arc<Kernel> {
|
||||
nirs.iter_mut()
|
||||
.for_each(|(d, n)| lower_and_optimize_nir_pre_inputs(n, &d.lib_clc));
|
||||
.for_each(|(d, n)| lower_and_optimize_nir_pre_inputs(d, n, &d.lib_clc));
|
||||
let nir = nirs.values_mut().next().unwrap();
|
||||
let wgs = nir.workgroup_size();
|
||||
let work_group_size = [wgs[0] as usize, wgs[1] as usize, wgs[2] as usize];
|
||||
|
|
@ -374,7 +403,7 @@ impl Kernel {
|
|||
// the painful part is, that host threads are allowed to modify the kernel object once it was
|
||||
// enqueued, so return a closure with all req data included.
|
||||
pub fn launch(
|
||||
&self,
|
||||
self: &Arc<Self>,
|
||||
q: &Arc<Queue>,
|
||||
work_dim: u32,
|
||||
block: &[usize],
|
||||
|
|
@ -388,6 +417,7 @@ impl Kernel {
|
|||
let mut input: Vec<u8> = Vec::new();
|
||||
let mut resource_info = Vec::new();
|
||||
let mut local_size: u32 = nir.shared_size();
|
||||
let printf_size = q.device.printf_buffer_size() as u32;
|
||||
|
||||
for i in 0..3 {
|
||||
if block[i] == 0 {
|
||||
|
|
@ -424,6 +454,7 @@ impl Kernel {
|
|||
}
|
||||
}
|
||||
|
||||
let mut printf_buf = None;
|
||||
for arg in &self.internal_args {
|
||||
input.append(&mut vec![0; arg.offset - input.len()]);
|
||||
match arg.kind {
|
||||
|
|
@ -447,30 +478,72 @@ impl Kernel {
|
|||
InternalKernelArgType::GlobalWorkOffsets => {
|
||||
input.extend_from_slice(&cl_prop::<[u64; 3]>(offsets));
|
||||
}
|
||||
InternalKernelArgType::PrintfBuffer => {
|
||||
let buf =
|
||||
Arc::new(q.device.screen.resource_create_buffer(printf_size).unwrap());
|
||||
|
||||
input.extend_from_slice(&[0; 8]);
|
||||
resource_info.push((Some(buf.clone()), arg.offset));
|
||||
|
||||
printf_buf = Some(buf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let cso = q
|
||||
.device
|
||||
.helper_ctx()
|
||||
.create_compute_state(nir, input.len() as u32, local_size);
|
||||
|
||||
Box::new(move |_, ctx| {
|
||||
let k = self.clone();
|
||||
Box::new(move |q, ctx| {
|
||||
let nir = k.nirs.get(&q.device).unwrap();
|
||||
let mut input = input.clone();
|
||||
let mut resources = Vec::with_capacity(resource_info.len());
|
||||
let mut globals: Vec<*mut u32> = Vec::new();
|
||||
let printf_format = nir.printf_format();
|
||||
let printf_buf = printf_buf.clone();
|
||||
|
||||
for (res, offset) in resource_info.clone() {
|
||||
resources.push(res);
|
||||
globals.push(unsafe { input.as_mut_ptr().add(offset) }.cast());
|
||||
}
|
||||
|
||||
if let Some(printf_buf) = &printf_buf {
|
||||
let init_data: [u8; 1] = [4];
|
||||
ctx.buffer_subdata(
|
||||
printf_buf,
|
||||
0,
|
||||
init_data.as_ptr().cast(),
|
||||
init_data.len() as u32,
|
||||
);
|
||||
}
|
||||
let cso = ctx.create_compute_state(nir, input.len() as u32, local_size);
|
||||
|
||||
ctx.bind_compute_state(cso);
|
||||
ctx.set_global_binding(resources.as_slice(), &mut globals);
|
||||
ctx.launch_grid(work_dim, block, grid, &input);
|
||||
ctx.clear_global_binding(globals.len() as u32);
|
||||
ctx.delete_compute_state(cso);
|
||||
ctx.memory_barrier(PIPE_BARRIER_GLOBAL_BUFFER);
|
||||
|
||||
if let Some(printf_buf) = &printf_buf {
|
||||
let tx = ctx
|
||||
.buffer_map(printf_buf, 0, printf_size as i32, true)
|
||||
.with_ctx(ctx);
|
||||
let mut buf: &[u8] =
|
||||
unsafe { slice::from_raw_parts(tx.ptr().cast(), printf_size as usize) };
|
||||
let length = u32::from_ne_bytes(*extract(&mut buf));
|
||||
|
||||
// update our slice to make sure we don't go out of bounds
|
||||
buf = &buf[0..(length - 4) as usize];
|
||||
|
||||
unsafe {
|
||||
u_printf(
|
||||
stdout,
|
||||
buf.as_ptr().cast(),
|
||||
buf.len(),
|
||||
printf_format.as_ptr(),
|
||||
printf_format.len() as u32,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -189,6 +189,7 @@ impl SPIRVBin {
|
|||
kernel: true,
|
||||
kernel_image: true,
|
||||
linkage: true,
|
||||
printf: true,
|
||||
..Default::default()
|
||||
},
|
||||
|
||||
|
|
|
|||
|
|
@ -199,6 +199,23 @@ impl NirShader {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn has_printf(&self) -> bool {
|
||||
unsafe {
|
||||
!self.nir.as_ref().printf_info.is_null() && self.nir.as_ref().printf_info_count != 0
|
||||
}
|
||||
}
|
||||
|
||||
pub fn printf_format(&self) -> &[u_printf_info] {
|
||||
if self.has_printf() {
|
||||
unsafe {
|
||||
let nir = self.nir.as_ref();
|
||||
slice::from_raw_parts(nir.printf_info, nir.printf_info_count as usize)
|
||||
}
|
||||
} else {
|
||||
&[]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_constant_buffer(&self) -> &[u8] {
|
||||
unsafe {
|
||||
let nir = self.nir.as_ref();
|
||||
|
|
|
|||
|
|
@ -91,6 +91,7 @@ impl PipeScreen {
|
|||
tmpl.height0 = 1;
|
||||
tmpl.depth0 = 1;
|
||||
tmpl.array_size = 1;
|
||||
tmpl.bind = PIPE_BIND_GLOBAL;
|
||||
|
||||
PipeResource::new(unsafe { (*self.screen).resource_create.unwrap()(self.screen, &tmpl) })
|
||||
}
|
||||
|
|
@ -107,6 +108,7 @@ impl PipeScreen {
|
|||
tmpl.height0 = 1;
|
||||
tmpl.depth0 = 1;
|
||||
tmpl.array_size = 1;
|
||||
tmpl.bind = PIPE_BIND_GLOBAL;
|
||||
|
||||
PipeResource::new(unsafe {
|
||||
(*self.screen).resource_from_user_memory.unwrap()(self.screen, &tmpl, mem)
|
||||
|
|
@ -184,7 +186,7 @@ impl PipeScreen {
|
|||
let s = &mut unsafe { *self.screen };
|
||||
if let Some(func) = s.finalize_nir {
|
||||
unsafe {
|
||||
func(s, nir.get_nir().cast());
|
||||
func(self.screen, nir.get_nir().cast());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -193,12 +193,13 @@ rusticl_mesa_bindings_rs = rust.bindgen(
|
|||
rusticl_bindgen_args,
|
||||
'--whitelist-function', 'clc_.*',
|
||||
'--whitelist-function', 'glsl_.*',
|
||||
'--whitelist-function', 'malloc',
|
||||
'--whitelist-function', 'nir_.*',
|
||||
'--whitelist-function', 'pipe_.*',
|
||||
'--whitelist-function', 'rusticl_.*',
|
||||
'--whitelist-function', 'rz?alloc_.*',
|
||||
'--whitelist-function', 'spirv_.*',
|
||||
'--whitelist-function', 'malloc',
|
||||
'--whitelist-function', 'u_.*',
|
||||
'--whitelist-type', 'pipe_endian',
|
||||
'--whitelist-type', 'clc_kernel_arg_access_qualifier',
|
||||
'--bitfield-enum', 'clc_kernel_arg_access_qualifier',
|
||||
|
|
@ -210,6 +211,7 @@ rusticl_mesa_bindings_rs = rust.bindgen(
|
|||
'--whitelist-var', 'PIPE_.*',
|
||||
'--bitfield-enum', 'pipe_map_flags',
|
||||
'--allowlist-var', 'stderr',
|
||||
'--allowlist-var', 'stdout',
|
||||
'--bitfield-enum', 'nir_lower_int64_options',
|
||||
],
|
||||
)
|
||||
|
|
|
|||
|
|
@ -10,4 +10,6 @@
|
|||
#include "pipe/p_state.h"
|
||||
#include "pipe-loader/pipe_loader.h"
|
||||
|
||||
#include "util/u_printf.h"
|
||||
|
||||
#include "rusticl_nir.h"
|
||||
|
|
|
|||
|
|
@ -23,6 +23,8 @@ rusticl_lower_intrinsics_instr(
|
|||
return nir_load_var(b, state->base_global_invoc_id);
|
||||
case nir_intrinsic_load_constant_base_ptr:
|
||||
return nir_load_var(b, state->const_buf);
|
||||
case nir_intrinsic_load_printf_buffer_address:
|
||||
return nir_load_var(b, state->printf_buf);
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
struct rusticl_lower_state {
|
||||
nir_variable *base_global_invoc_id;
|
||||
nir_variable *const_buf;
|
||||
nir_variable *printf_buf;
|
||||
};
|
||||
|
||||
bool rusticl_lower_intrinsics(nir_shader *nir, struct rusticl_lower_state *state);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue