mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-30 01:20:17 +01:00
rusticl: lower huge grids
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27666>
This commit is contained in:
parent
204c287327
commit
91552bb4ec
4 changed files with 100 additions and 9 deletions
|
|
@ -892,6 +892,11 @@ impl Device {
|
|||
v.into_iter().map(|v| v as usize).collect()
|
||||
}
|
||||
|
||||
pub fn max_grid_size(&self) -> Vec<u64> {
|
||||
self.screen
|
||||
.compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_GRID_SIZE)
|
||||
}
|
||||
|
||||
pub fn max_clock_freq(&self) -> cl_uint {
|
||||
self.screen
|
||||
.compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ use crate::api::icd::*;
|
|||
use crate::core::device::*;
|
||||
use crate::core::event::*;
|
||||
use crate::core::memory::*;
|
||||
use crate::core::platform::*;
|
||||
use crate::core::program::*;
|
||||
use crate::core::queue::*;
|
||||
use crate::impl_cl_type_trait;
|
||||
|
|
@ -61,6 +62,8 @@ pub enum InternalKernelArgType {
|
|||
FormatArray,
|
||||
OrderArray,
|
||||
WorkDim,
|
||||
WorkGroupOffsets,
|
||||
NumWorkgroups,
|
||||
}
|
||||
|
||||
#[derive(Hash, PartialEq, Eq, Clone)]
|
||||
|
|
@ -221,6 +224,8 @@ impl InternalKernelArg {
|
|||
InternalKernelArgType::FormatArray => bin.push(4),
|
||||
InternalKernelArgType::OrderArray => bin.push(5),
|
||||
InternalKernelArgType::WorkDim => bin.push(6),
|
||||
InternalKernelArgType::WorkGroupOffsets => bin.push(7),
|
||||
InternalKernelArgType::NumWorkgroups => bin.push(8),
|
||||
}
|
||||
|
||||
bin
|
||||
|
|
@ -243,6 +248,8 @@ impl InternalKernelArg {
|
|||
4 => InternalKernelArgType::FormatArray,
|
||||
5 => InternalKernelArgType::OrderArray,
|
||||
6 => InternalKernelArgType::WorkDim,
|
||||
7 => InternalKernelArgType::WorkGroupOffsets,
|
||||
8 => InternalKernelArgType::NumWorkgroups,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
|
|
@ -531,6 +538,7 @@ fn lower_and_optimize_nir(
|
|||
nir_pass!(nir, nir_lower_system_values);
|
||||
let mut compute_options = nir_lower_compute_system_values_options::default();
|
||||
compute_options.set_has_base_global_invocation_id(true);
|
||||
compute_options.set_has_base_workgroup_id(true);
|
||||
nir_pass!(nir, nir_lower_compute_system_values, &compute_options);
|
||||
nir.gather_info();
|
||||
|
||||
|
|
@ -549,6 +557,37 @@ fn lower_and_optimize_nir(
|
|||
);
|
||||
}
|
||||
|
||||
if nir.reads_sysval(gl_system_value::SYSTEM_VALUE_BASE_WORKGROUP_ID) {
|
||||
internal_args.push(InternalKernelArg {
|
||||
kind: InternalKernelArgType::WorkGroupOffsets,
|
||||
offset: 0,
|
||||
size: 3 * size_of::<usize>(),
|
||||
});
|
||||
lower_state.base_workgroup_id_loc = args.len() + internal_args.len() - 1;
|
||||
nir.add_var(
|
||||
nir_variable_mode::nir_var_uniform,
|
||||
unsafe { glsl_vector_type(host_bits_base_type, 3) },
|
||||
lower_state.base_workgroup_id_loc,
|
||||
"base_workgroup_id",
|
||||
);
|
||||
}
|
||||
|
||||
if nir.reads_sysval(gl_system_value::SYSTEM_VALUE_NUM_WORKGROUPS) {
|
||||
internal_args.push(InternalKernelArg {
|
||||
kind: InternalKernelArgType::NumWorkgroups,
|
||||
offset: 0,
|
||||
size: 12,
|
||||
});
|
||||
|
||||
lower_state.num_workgroups_loc = args.len() + internal_args.len() - 1;
|
||||
nir.add_var(
|
||||
nir_variable_mode::nir_var_uniform,
|
||||
unsafe { glsl_vector_type(glsl_base_type::GLSL_TYPE_UINT, 3) },
|
||||
lower_state.num_workgroups_loc,
|
||||
"num_workgroups",
|
||||
);
|
||||
}
|
||||
|
||||
if nir.has_constant() {
|
||||
internal_args.push(InternalKernelArg {
|
||||
kind: InternalKernelArgType::ConstantBuffer,
|
||||
|
|
@ -906,6 +945,7 @@ impl Kernel {
|
|||
let mut block = create_kernel_arr::<u32>(block, 1)?;
|
||||
let mut grid = create_kernel_arr::<usize>(grid, 1)?;
|
||||
let offsets = create_kernel_arr::<usize>(offsets, 0)?;
|
||||
let mut workgroup_id_offset_loc = None;
|
||||
let mut input: Vec<u8> = Vec::new();
|
||||
let mut resource_info = Vec::new();
|
||||
// Set it once so we get the alignment padding right
|
||||
|
|
@ -919,10 +959,12 @@ impl Kernel {
|
|||
let mut tex_orders: Vec<u16> = Vec::new();
|
||||
let mut img_formats: Vec<u16> = Vec::new();
|
||||
let mut img_orders: Vec<u16> = Vec::new();
|
||||
let null_ptr: &[u8] = if q.device.address_bits() == 64 {
|
||||
&[0; 8]
|
||||
|
||||
let host_null_v3 = &[0u8; 3 * size_of::<usize>()];
|
||||
let null_ptr = if q.device.address_bits() == 64 {
|
||||
[0u8; 8].as_slice()
|
||||
} else {
|
||||
&[0; 4]
|
||||
[0u8; 4].as_slice()
|
||||
};
|
||||
|
||||
self.optimize_local_size(q.device, &mut grid, &mut block);
|
||||
|
|
@ -1043,6 +1085,10 @@ impl Kernel {
|
|||
InternalKernelArgType::GlobalWorkOffsets => {
|
||||
input.extend_from_slice(unsafe { as_byte_slice(&offsets) });
|
||||
}
|
||||
InternalKernelArgType::WorkGroupOffsets => {
|
||||
workgroup_id_offset_loc = Some(input.len());
|
||||
input.extend_from_slice(host_null_v3);
|
||||
}
|
||||
InternalKernelArgType::PrintfBuffer => {
|
||||
let buf = Arc::new(
|
||||
q.device
|
||||
|
|
@ -1074,6 +1120,11 @@ impl Kernel {
|
|||
InternalKernelArgType::WorkDim => {
|
||||
input.extend_from_slice(&[work_dim as u8; 1]);
|
||||
}
|
||||
InternalKernelArgType::NumWorkgroups => {
|
||||
input.extend_from_slice(unsafe {
|
||||
as_byte_slice(&[grid[0] as u32, grid[1] as u32, grid[2] as u32])
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1123,13 +1174,42 @@ impl Kernel {
|
|||
ctx.set_global_binding(resources.as_slice(), &mut globals);
|
||||
ctx.update_cb0(&input);
|
||||
|
||||
let grid = [
|
||||
grid[0].try_into().ok().ok_or(CL_OUT_OF_HOST_MEMORY)?,
|
||||
grid[1].try_into().ok().ok_or(CL_OUT_OF_HOST_MEMORY)?,
|
||||
grid[2].try_into().ok().ok_or(CL_OUT_OF_HOST_MEMORY)?,
|
||||
];
|
||||
let hw_max_grid: Vec<usize> = q
|
||||
.device
|
||||
.max_grid_size()
|
||||
.into_iter()
|
||||
.map(|val| val.try_into().unwrap_or(usize::MAX))
|
||||
// clamped as pipe_launch_grid::grid is only u32
|
||||
.map(|val| cmp::min(val, u32::MAX as usize))
|
||||
.collect();
|
||||
|
||||
ctx.launch_grid(work_dim, block, grid, variable_local_size as u32);
|
||||
for z in 0..div_round_up(grid[2], hw_max_grid[2]) {
|
||||
for y in 0..div_round_up(grid[1], hw_max_grid[1]) {
|
||||
for x in 0..div_round_up(grid[0], hw_max_grid[0]) {
|
||||
if let Some(workgroup_id_offset_loc) = workgroup_id_offset_loc {
|
||||
let this_offsets =
|
||||
[x * hw_max_grid[0], y * hw_max_grid[1], z * hw_max_grid[2]];
|
||||
|
||||
input[workgroup_id_offset_loc
|
||||
..workgroup_id_offset_loc + (size_of::<usize>() * 3)]
|
||||
.copy_from_slice(unsafe { as_byte_slice(&this_offsets) });
|
||||
}
|
||||
|
||||
let this_grid = [
|
||||
cmp::min(hw_max_grid[0], grid[0] - hw_max_grid[0] * x) as u32,
|
||||
cmp::min(hw_max_grid[1], grid[1] - hw_max_grid[1] * y) as u32,
|
||||
cmp::min(hw_max_grid[2], grid[2] - hw_max_grid[2] * z) as u32,
|
||||
];
|
||||
|
||||
ctx.update_cb0(&input);
|
||||
ctx.launch_grid(work_dim, block, this_grid, variable_local_size as u32);
|
||||
|
||||
if Platform::dbg().sync_every_event {
|
||||
ctx.flush().wait();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ctx.clear_global_binding(globals.len() as u32);
|
||||
ctx.clear_shader_images(iviews.len() as u32);
|
||||
|
|
|
|||
|
|
@ -65,6 +65,10 @@ rusticl_lower_intrinsics_instr(
|
|||
return NULL;
|
||||
case nir_intrinsic_load_base_global_invocation_id:
|
||||
return nir_load_var(b, nir_find_variable_with_location(b->shader, nir_var_uniform, state->base_global_invoc_id_loc));
|
||||
case nir_intrinsic_load_base_workgroup_id:
|
||||
return nir_load_var(b, nir_find_variable_with_location(b->shader, nir_var_uniform, state->base_workgroup_id_loc));
|
||||
case nir_intrinsic_load_num_workgroups:
|
||||
return nir_load_var(b, nir_find_variable_with_location(b->shader, nir_var_uniform, state->num_workgroups_loc));
|
||||
case nir_intrinsic_load_constant_base_ptr:
|
||||
return nir_load_var(b, nir_find_variable_with_location(b->shader, nir_var_uniform, state->const_buf_loc));
|
||||
case nir_intrinsic_load_printf_buffer_address:
|
||||
|
|
|
|||
|
|
@ -2,11 +2,13 @@
|
|||
|
||||
struct rusticl_lower_state {
|
||||
size_t base_global_invoc_id_loc;
|
||||
size_t base_workgroup_id_loc;
|
||||
size_t const_buf_loc;
|
||||
size_t printf_buf_loc;
|
||||
size_t format_arr_loc;
|
||||
size_t order_arr_loc;
|
||||
size_t work_dim_loc;
|
||||
size_t num_workgroups_loc;
|
||||
};
|
||||
|
||||
bool rusticl_lower_intrinsics(nir_shader *nir, struct rusticl_lower_state *state);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue