spirv, nir: Delay calculation of shared_size when using explicit layout

Move the calculation to nir_lower_vars_to_explicit_types().  This
consolidates the check of shader_info::shared_memory_explicit_layout
in a single place instead of in all drivers.

This is motivated by SPV_KHR_untyped_pointers.  Before that extension
we had essentially two modes for shared memory variables

- No layout decorations in the SPIR-V, and both internal layout and
  driver location was _given by the driver_.

- Explicitly laid out, i.e. they are blocks, and decorated with Aliased.
  Because they all alias, we could assign them driver location directly
  to the start of the shared memory.

With the untyped pointers extension, there's a third option, to be added
by a later commit

- Explicitly laid out, i.e. they are blocks, and NOT decorated
  with Aliased.  Driver location is _given by the driver_.  Blocks
  with and without Aliased can be mixed.

The driver location of multiple blocks that don't alias depend on
alignment that is driver-specific, which we can more easily do from
the nir_lower_vars_to_explicit_types() that already has access to
a function to obtain such value.

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> (hk)
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> (v3dv)
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> (anv/hasvk)
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> (panvk)
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> (radv)
Reviewed-by: Rob Clark <robdclark@gmail.com> (tu)
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34139>
This commit is contained in:
Caio Oliveira 2025-03-18 00:01:58 -07:00 committed by Marge Bot
parent adfe29cb46
commit fd0a7efb5a
11 changed files with 78 additions and 68 deletions

View file

@ -519,9 +519,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
/* Lower shared variables early to prevent the over allocation of shared memory in /* Lower shared variables early to prevent the over allocation of shared memory in
* radv_nir_lower_ray_queries. */ * radv_nir_lower_ray_queries. */
if (nir->info.stage == MESA_SHADER_COMPUTE) { if (nir->info.stage == MESA_SHADER_COMPUTE) {
if (!nir->info.shared_memory_explicit_layout)
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info); NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info);
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset);
} }
@ -626,10 +624,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
if (nir->info.stage == MESA_SHADER_TASK || nir->info.stage == MESA_SHADER_MESH) if (nir->info.stage == MESA_SHADER_TASK || nir->info.stage == MESA_SHADER_MESH)
var_modes |= nir_var_mem_task_payload; var_modes |= nir_var_mem_task_payload;
if (!nir->info.shared_memory_explicit_layout)
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, var_modes, shared_var_info); NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, var_modes, shared_var_info);
else if (var_modes & ~nir_var_mem_shared)
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, var_modes & ~nir_var_mem_shared, shared_var_info);
NIR_PASS(_, nir, nir_lower_explicit_io, var_modes, nir_address_format_32bit_offset); NIR_PASS(_, nir, nir_lower_explicit_io, var_modes, nir_address_format_32bit_offset);
if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) { if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) {

View file

@ -750,7 +750,6 @@ hk_lower_nir(struct hk_device *dev, nir_shader *nir,
lower_load_global_constant_offset_instr, nir_metadata_none, lower_load_global_constant_offset_instr, nir_metadata_none,
&soft_fault); &soft_fault);
if (!nir->info.shared_memory_explicit_layout) {
/* There may be garbage in shared_size, but it's the job of /* There may be garbage in shared_size, but it's the job of
* nir_lower_vars_to_explicit_types to allocate it. We have to reset to * nir_lower_vars_to_explicit_types to allocate it. We have to reset to
* avoid overallocation. * avoid overallocation.
@ -759,7 +758,6 @@ hk_lower_nir(struct hk_device *dev, nir_shader *nir,
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared,
shared_var_info); shared_var_info);
}
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared,
nir_address_format_32bit_offset); nir_address_format_32bit_offset);

View file

@ -3090,10 +3090,8 @@ shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
static void static void
lower_compute(struct nir_shader *nir) lower_compute(struct nir_shader *nir)
{ {
if (!nir->info.shared_memory_explicit_layout) {
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
nir_var_mem_shared, shared_type_info); nir_var_mem_shared, shared_type_info);
}
NIR_PASS(_, nir, nir_lower_explicit_io, NIR_PASS(_, nir, nir_lower_explicit_io,
nir_var_mem_shared, nir_address_format_32bit_offset); nir_var_mem_shared, nir_address_format_32bit_offset);

View file

@ -2663,6 +2663,45 @@ lower_vars_to_explicit(nir_shader *shader,
return progress; return progress;
} }
static unsigned
nir_calculate_alignment_from_explicit_layout(const glsl_type *type,
glsl_type_size_align_func type_info)
{
unsigned size, alignment;
glsl_get_explicit_type_for_size_align(type, type_info,
&size, &alignment);
return alignment;
}
static void
nir_assign_shared_var_locations(nir_shader *shader, glsl_type_size_align_func type_info)
{
assert(shader->info.shared_memory_explicit_layout);
unsigned aliased_size = 0;
unsigned aliased_alignment = 0;
nir_foreach_variable_with_modes(var, shader, nir_var_mem_shared) {
/* Per SPV_KHR_workgroup_storage_explicit_layout, if one shared variable is
* a Block, all of them will be and Blocks are explicitly laid out.
*/
assert(glsl_type_is_interface(var->type));
const bool align_to_stride = false;
aliased_size = MAX2(aliased_size, glsl_get_explicit_size(var->type, align_to_stride));
aliased_alignment = MAX2(aliased_alignment,
nir_calculate_alignment_from_explicit_layout(var->type, type_info));
}
if (aliased_size) {
const unsigned aliased_location =
align(shader->info.shared_size, aliased_alignment);
nir_foreach_variable_with_modes(var, shader, nir_var_mem_shared)
var->data.driver_location = aliased_location;
shader->info.shared_size = aliased_location + aliased_size;
}
}
/* If nir_lower_vars_to_explicit_types is called on any shader that contains /* If nir_lower_vars_to_explicit_types is called on any shader that contains
* generic pointers, it must either be used on all of the generic modes or * generic pointers, it must either be used on all of the generic modes or
* none. * none.
@ -2693,9 +2732,14 @@ nir_lower_vars_to_explicit_types(nir_shader *shader,
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_global, type_info); progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_global, type_info);
if (modes & nir_var_mem_shared) { if (modes & nir_var_mem_shared) {
assert(!shader->info.shared_memory_explicit_layout); if (shader->info.shared_memory_explicit_layout) {
nir_assign_shared_var_locations(shader, type_info);
/* Types don't change, so no further lowering is needed. */
modes &= ~nir_var_mem_shared;
} else {
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info); progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info);
} }
}
if (modes & nir_var_shader_temp) if (modes & nir_var_shader_temp)
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info); progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info);
@ -2712,12 +2756,14 @@ nir_lower_vars_to_explicit_types(nir_shader *shader,
if (modes & nir_var_mem_node_payload_in) if (modes & nir_var_mem_node_payload_in)
progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_node_payload_in, type_info); progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_node_payload_in, type_info);
if (modes) {
nir_foreach_function_impl(impl, shader) { nir_foreach_function_impl(impl, shader) {
if (modes & nir_var_function_temp) if (modes & nir_var_function_temp)
progress |= lower_vars_to_explicit(shader, &impl->locals, nir_var_function_temp, type_info); progress |= lower_vars_to_explicit(shader, &impl->locals, nir_var_function_temp, type_info);
progress |= nir_lower_vars_to_explicit_types_impl(impl, modes, type_info); progress |= nir_lower_vars_to_explicit_types_impl(impl, modes, type_info);
} }
}
return progress; return progress;
} }

View file

@ -7189,26 +7189,6 @@ spirv_to_nir(const uint32_t *words, size_t word_count,
*/ */
nir_opt_dce(b->shader); nir_opt_dce(b->shader);
/* Per SPV_KHR_workgroup_storage_explicit_layout, if one shared variable is
* a Block, all of them will be and Blocks are explicitly laid out.
*/
nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) {
if (glsl_type_is_interface(var->type)) {
assert(b->supported_capabilities.WorkgroupMemoryExplicitLayoutKHR);
b->shader->info.shared_memory_explicit_layout = true;
break;
}
}
if (b->shader->info.shared_memory_explicit_layout) {
unsigned size = 0;
nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) {
assert(glsl_type_is_interface(var->type));
const bool align_to_stride = false;
size = MAX2(size, glsl_get_explicit_size(var->type, align_to_stride));
}
b->shader->info.shared_size = size;
}
if (stage == MESA_SHADER_FRAGMENT) { if (stage == MESA_SHADER_FRAGMENT) {
/* From the Vulkan 1.2.199 spec: /* From the Vulkan 1.2.199 spec:
* *

View file

@ -2216,6 +2216,9 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val,
var->var->name = ralloc_strdup(var->var, val->name); var->var->name = ralloc_strdup(var->var, val->name);
var->var->type = vtn_type_get_nir_type(b, var->type, var->mode); var->var->type = vtn_type_get_nir_type(b, var->type, var->mode);
var->var->data.mode = nir_mode; var->var->data.mode = nir_mode;
if (var->mode == vtn_variable_mode_workgroup &&
glsl_type_is_interface(var->var->type))
b->shader->info.shared_memory_explicit_layout = true;
break; break;
case vtn_variable_mode_input: case vtn_variable_mode_input:

View file

@ -2587,10 +2587,8 @@ tu_shader_create(struct tu_device *dev,
nir_address_format_64bit_global); nir_address_format_64bit_global);
if (nir->info.stage == MESA_SHADER_COMPUTE) { if (nir->info.stage == MESA_SHADER_COMPUTE) {
if (!nir->info.shared_memory_explicit_layout) {
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
nir_var_mem_shared, shared_type_info); nir_var_mem_shared, shared_type_info);
}
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared,
nir_address_format_32bit_offset); nir_address_format_32bit_offset);

View file

@ -1127,10 +1127,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
pipeline->layout.type); pipeline->layout.type);
if (gl_shader_stage_uses_workgroup(nir->info.stage)) { if (gl_shader_stage_uses_workgroup(nir->info.stage)) {
if (!nir->info.shared_memory_explicit_layout) {
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
nir_var_mem_shared, shared_type_info); nir_var_mem_shared, shared_type_info);
}
NIR_PASS(_, nir, nir_lower_explicit_io, NIR_PASS(_, nir, nir_lower_explicit_io,
nir_var_mem_shared, nir_address_format_32bit_offset); nir_var_mem_shared, nir_address_format_32bit_offset);

View file

@ -555,10 +555,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
prog_data, &stage->bind_map, mem_ctx); prog_data, &stage->bind_map, mem_ctx);
if (gl_shader_stage_uses_workgroup(nir->info.stage)) { if (gl_shader_stage_uses_workgroup(nir->info.stage)) {
if (!nir->info.shared_memory_explicit_layout) {
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
nir_var_mem_shared, shared_type_info); nir_var_mem_shared, shared_type_info);
}
NIR_PASS(_, nir, nir_lower_explicit_io, NIR_PASS(_, nir, nir_lower_explicit_io,
nir_var_mem_shared, nir_address_format_32bit_offset); nir_var_mem_shared, nir_address_format_32bit_offset);

View file

@ -499,10 +499,8 @@ nvk_lower_nir(struct nvk_device *dev, nir_shader *nir,
NIR_PASS(_, nir, nir_shader_intrinsics_pass, NIR_PASS(_, nir, nir_shader_intrinsics_pass,
lower_load_intrinsic, nir_metadata_none, NULL); lower_load_intrinsic, nir_metadata_none, NULL);
if (!nir->info.shared_memory_explicit_layout) {
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
nir_var_mem_shared, shared_var_info); nir_var_mem_shared, shared_var_info);
}
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared,
nir_address_format_32bit_offset); nir_address_format_32bit_offset);

View file

@ -792,10 +792,8 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
#endif #endif
if (gl_shader_stage_uses_workgroup(stage)) { if (gl_shader_stage_uses_workgroup(stage)) {
if (!nir->info.shared_memory_explicit_layout) {
NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared,
shared_type_info); shared_type_info);
}
NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared,
nir_address_format_32bit_offset); nir_address_format_32bit_offset);