From fd0a7efb5af5840f29e29e66dcef4c114ba7c94e Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Tue, 18 Mar 2025 00:01:58 -0700 Subject: [PATCH] spirv, nir: Delay calculation of shared_size when using explicit layout Move the calculation to nir_lower_vars_to_explicit_types(). This consolidates the check of shader_info::shared_memory_explicit_layout in a single place instead of in all drivers. This is motivated by SPV_KHR_untyped_pointers. Before that extension we had essentially two modes for shared memory variables - No layout decorations in the SPIR-V, and both internal layout and driver location was _given by the driver_. - Explicitly laid out, i.e. they are blocks, and decorated with Aliased. Because they all alias, we could assign them driver location directly to the start of the shared memory. With the untyped pointers extension, there's a third option, to be added by a later commit - Explicitly laid out, i.e. they are blocks, and NOT decorated with Aliased. Driver location is _given by the driver_. Blocks with and without Aliased can be mixed. The driver location of multiple blocks that don't alias depend on alignment that is driver-specific, which we can more easily do from the nir_lower_vars_to_explicit_types() that already has access to a function to obtain such value. Reviewed-by: Alyssa Rosenzweig (hk) Reviewed-by: Iago Toral Quiroga (v3dv) Reviewed-by: Lionel Landwerlin (anv/hasvk) Reviewed-by: Boris Brezillon (panvk) Reviewed-by: Samuel Pitoiset (radv) Reviewed-by: Rob Clark (tu) Reviewed-by: Faith Ekstrand Part-of: --- src/amd/vulkan/radv_shader.c | 9 +---- src/asahi/vulkan/hk_shader.c | 16 ++++---- src/broadcom/vulkan/v3dv_pipeline.c | 6 +-- src/compiler/nir/nir_lower_io.c | 58 ++++++++++++++++++++++++--- src/compiler/spirv/spirv_to_nir.c | 20 --------- src/compiler/spirv/vtn_variables.c | 3 ++ src/freedreno/vulkan/tu_shader.cc | 10 ++--- src/intel/vulkan/anv_pipeline.c | 6 +-- src/intel/vulkan_hasvk/anv_pipeline.c | 6 +-- src/nouveau/vulkan/nvk_shader.c | 6 +-- src/panfrost/vulkan/panvk_vX_shader.c | 6 +-- 11 files changed, 78 insertions(+), 68 deletions(-) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 50f0759d6a8..69f28a89172 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -519,9 +519,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st /* Lower shared variables early to prevent the over allocation of shared memory in * radv_nir_lower_ray_queries. */ if (nir->info.stage == MESA_SHADER_COMPUTE) { - if (!nir->info.shared_memory_explicit_layout) - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info); - + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); } @@ -626,10 +624,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st if (nir->info.stage == MESA_SHADER_TASK || nir->info.stage == MESA_SHADER_MESH) var_modes |= nir_var_mem_task_payload; - if (!nir->info.shared_memory_explicit_layout) - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, var_modes, shared_var_info); - else if (var_modes & ~nir_var_mem_shared) - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, var_modes & ~nir_var_mem_shared, shared_var_info); + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, var_modes, shared_var_info); NIR_PASS(_, nir, nir_lower_explicit_io, var_modes, nir_address_format_32bit_offset); if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) { diff --git a/src/asahi/vulkan/hk_shader.c b/src/asahi/vulkan/hk_shader.c index f624126ce27..3a3d39a5d41 100644 --- a/src/asahi/vulkan/hk_shader.c +++ b/src/asahi/vulkan/hk_shader.c @@ -750,16 +750,14 @@ hk_lower_nir(struct hk_device *dev, nir_shader *nir, lower_load_global_constant_offset_instr, nir_metadata_none, &soft_fault); - if (!nir->info.shared_memory_explicit_layout) { - /* There may be garbage in shared_size, but it's the job of - * nir_lower_vars_to_explicit_types to allocate it. We have to reset to - * avoid overallocation. - */ - nir->info.shared_size = 0; + /* There may be garbage in shared_size, but it's the job of + * nir_lower_vars_to_explicit_types to allocate it. We have to reset to + * avoid overallocation. + */ + nir->info.shared_size = 0; - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, - shared_var_info); - } + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, + shared_var_info); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c index 54767e4e3b4..e65aa9c34b0 100644 --- a/src/broadcom/vulkan/v3dv_pipeline.c +++ b/src/broadcom/vulkan/v3dv_pipeline.c @@ -3090,10 +3090,8 @@ shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) static void lower_compute(struct nir_shader *nir) { - if (!nir->info.shared_memory_explicit_layout) { - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, - nir_var_mem_shared, shared_type_info); - } + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, + nir_var_mem_shared, shared_type_info); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index 7ab2c63330e..ad8f4b8b8d1 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -2663,6 +2663,45 @@ lower_vars_to_explicit(nir_shader *shader, return progress; } +static unsigned +nir_calculate_alignment_from_explicit_layout(const glsl_type *type, + glsl_type_size_align_func type_info) +{ + unsigned size, alignment; + glsl_get_explicit_type_for_size_align(type, type_info, + &size, &alignment); + return alignment; +} + +static void +nir_assign_shared_var_locations(nir_shader *shader, glsl_type_size_align_func type_info) +{ + assert(shader->info.shared_memory_explicit_layout); + + unsigned aliased_size = 0; + unsigned aliased_alignment = 0; + nir_foreach_variable_with_modes(var, shader, nir_var_mem_shared) { + /* Per SPV_KHR_workgroup_storage_explicit_layout, if one shared variable is + * a Block, all of them will be and Blocks are explicitly laid out. + */ + assert(glsl_type_is_interface(var->type)); + const bool align_to_stride = false; + aliased_size = MAX2(aliased_size, glsl_get_explicit_size(var->type, align_to_stride)); + aliased_alignment = MAX2(aliased_alignment, + nir_calculate_alignment_from_explicit_layout(var->type, type_info)); + } + + if (aliased_size) { + const unsigned aliased_location = + align(shader->info.shared_size, aliased_alignment); + + nir_foreach_variable_with_modes(var, shader, nir_var_mem_shared) + var->data.driver_location = aliased_location; + + shader->info.shared_size = aliased_location + aliased_size; + } +} + /* If nir_lower_vars_to_explicit_types is called on any shader that contains * generic pointers, it must either be used on all of the generic modes or * none. @@ -2693,8 +2732,13 @@ nir_lower_vars_to_explicit_types(nir_shader *shader, progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_global, type_info); if (modes & nir_var_mem_shared) { - assert(!shader->info.shared_memory_explicit_layout); - progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info); + if (shader->info.shared_memory_explicit_layout) { + nir_assign_shared_var_locations(shader, type_info); + /* Types don't change, so no further lowering is needed. */ + modes &= ~nir_var_mem_shared; + } else { + progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info); + } } if (modes & nir_var_shader_temp) @@ -2712,11 +2756,13 @@ nir_lower_vars_to_explicit_types(nir_shader *shader, if (modes & nir_var_mem_node_payload_in) progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_node_payload_in, type_info); - nir_foreach_function_impl(impl, shader) { - if (modes & nir_var_function_temp) - progress |= lower_vars_to_explicit(shader, &impl->locals, nir_var_function_temp, type_info); + if (modes) { + nir_foreach_function_impl(impl, shader) { + if (modes & nir_var_function_temp) + progress |= lower_vars_to_explicit(shader, &impl->locals, nir_var_function_temp, type_info); - progress |= nir_lower_vars_to_explicit_types_impl(impl, modes, type_info); + progress |= nir_lower_vars_to_explicit_types_impl(impl, modes, type_info); + } } return progress; diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 31e74339c2c..5dad3060d13 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -7189,26 +7189,6 @@ spirv_to_nir(const uint32_t *words, size_t word_count, */ nir_opt_dce(b->shader); - /* Per SPV_KHR_workgroup_storage_explicit_layout, if one shared variable is - * a Block, all of them will be and Blocks are explicitly laid out. - */ - nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) { - if (glsl_type_is_interface(var->type)) { - assert(b->supported_capabilities.WorkgroupMemoryExplicitLayoutKHR); - b->shader->info.shared_memory_explicit_layout = true; - break; - } - } - if (b->shader->info.shared_memory_explicit_layout) { - unsigned size = 0; - nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) { - assert(glsl_type_is_interface(var->type)); - const bool align_to_stride = false; - size = MAX2(size, glsl_get_explicit_size(var->type, align_to_stride)); - } - b->shader->info.shared_size = size; - } - if (stage == MESA_SHADER_FRAGMENT) { /* From the Vulkan 1.2.199 spec: * diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index 372942cc895..ee73c81702f 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -2216,6 +2216,9 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val, var->var->name = ralloc_strdup(var->var, val->name); var->var->type = vtn_type_get_nir_type(b, var->type, var->mode); var->var->data.mode = nir_mode; + if (var->mode == vtn_variable_mode_workgroup && + glsl_type_is_interface(var->var->type)) + b->shader->info.shared_memory_explicit_layout = true; break; case vtn_variable_mode_input: diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 189735ad77b..dc63f5bf41f 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -743,7 +743,7 @@ lower_inline_ubo(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data) val = nir_load_global_ir3(b, intrin->num_components, intrin->def.bit_size, base_addr, nir_ishr_imm(b, offset, 2), - .access = + .access = (enum gl_access_qualifier)( (enum gl_access_qualifier)(ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER) | ACCESS_CAN_SPECULATE), @@ -1585,7 +1585,7 @@ tu6_emit_cs_config(struct tu_cs *cs, tu_cs_emit_regs( cs, HLSQ_CS_CNTL_1(CHIP, .linearlocalidregid = regid(63, 0), .threadsize = thrsz_cs, - .workgrouprastorderzfirsten = true, + .workgrouprastorderzfirsten = true, .wgtilewidth = 4, .wgtileheight = tile_height)); tu_cs_emit_regs(cs, HLSQ_FS_CNTL_0(CHIP, .threadsize = THREAD64)); @@ -2587,10 +2587,8 @@ tu_shader_create(struct tu_device *dev, nir_address_format_64bit_global); if (nir->info.stage == MESA_SHADER_COMPUTE) { - if (!nir->info.shared_memory_explicit_layout) { - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, - nir_var_mem_shared, shared_type_info); - } + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, + nir_var_mem_shared, shared_type_info); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 78204082d72..62720e9fa9e 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -1127,10 +1127,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, pipeline->layout.type); if (gl_shader_stage_uses_workgroup(nir->info.stage)) { - if (!nir->info.shared_memory_explicit_layout) { - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, - nir_var_mem_shared, shared_type_info); - } + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, + nir_var_mem_shared, shared_type_info); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); diff --git a/src/intel/vulkan_hasvk/anv_pipeline.c b/src/intel/vulkan_hasvk/anv_pipeline.c index 8e485934ce9..d9ce1e7fe17 100644 --- a/src/intel/vulkan_hasvk/anv_pipeline.c +++ b/src/intel/vulkan_hasvk/anv_pipeline.c @@ -555,10 +555,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, prog_data, &stage->bind_map, mem_ctx); if (gl_shader_stage_uses_workgroup(nir->info.stage)) { - if (!nir->info.shared_memory_explicit_layout) { - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, - nir_var_mem_shared, shared_type_info); - } + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, + nir_var_mem_shared, shared_type_info); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); diff --git a/src/nouveau/vulkan/nvk_shader.c b/src/nouveau/vulkan/nvk_shader.c index d38f4005ec4..75bea91d466 100644 --- a/src/nouveau/vulkan/nvk_shader.c +++ b/src/nouveau/vulkan/nvk_shader.c @@ -499,10 +499,8 @@ nvk_lower_nir(struct nvk_device *dev, nir_shader *nir, NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower_load_intrinsic, nir_metadata_none, NULL); - if (!nir->info.shared_memory_explicit_layout) { - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, - nir_var_mem_shared, shared_var_info); - } + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, + nir_var_mem_shared, shared_var_info); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index bf1d9e755a6..622dfaff044 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -792,10 +792,8 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir, #endif if (gl_shader_stage_uses_workgroup(stage)) { - if (!nir->info.shared_memory_explicit_layout) { - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, - shared_type_info); - } + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, + shared_type_info); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset);