diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 50f0759d6a8..69f28a89172 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -519,9 +519,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st /* Lower shared variables early to prevent the over allocation of shared memory in * radv_nir_lower_ray_queries. */ if (nir->info.stage == MESA_SHADER_COMPUTE) { - if (!nir->info.shared_memory_explicit_layout) - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info); - + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); } @@ -626,10 +624,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st if (nir->info.stage == MESA_SHADER_TASK || nir->info.stage == MESA_SHADER_MESH) var_modes |= nir_var_mem_task_payload; - if (!nir->info.shared_memory_explicit_layout) - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, var_modes, shared_var_info); - else if (var_modes & ~nir_var_mem_shared) - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, var_modes & ~nir_var_mem_shared, shared_var_info); + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, var_modes, shared_var_info); NIR_PASS(_, nir, nir_lower_explicit_io, var_modes, nir_address_format_32bit_offset); if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) { diff --git a/src/asahi/vulkan/hk_shader.c b/src/asahi/vulkan/hk_shader.c index f624126ce27..3a3d39a5d41 100644 --- a/src/asahi/vulkan/hk_shader.c +++ b/src/asahi/vulkan/hk_shader.c @@ -750,16 +750,14 @@ hk_lower_nir(struct hk_device *dev, nir_shader *nir, lower_load_global_constant_offset_instr, nir_metadata_none, &soft_fault); - if (!nir->info.shared_memory_explicit_layout) { - /* There may be garbage in shared_size, but it's the job of - * nir_lower_vars_to_explicit_types to allocate it. We have to reset to - * avoid overallocation. - */ - nir->info.shared_size = 0; + /* There may be garbage in shared_size, but it's the job of + * nir_lower_vars_to_explicit_types to allocate it. We have to reset to + * avoid overallocation. + */ + nir->info.shared_size = 0; - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, - shared_var_info); - } + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, + shared_var_info); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c index 54767e4e3b4..e65aa9c34b0 100644 --- a/src/broadcom/vulkan/v3dv_pipeline.c +++ b/src/broadcom/vulkan/v3dv_pipeline.c @@ -3090,10 +3090,8 @@ shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) static void lower_compute(struct nir_shader *nir) { - if (!nir->info.shared_memory_explicit_layout) { - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, - nir_var_mem_shared, shared_type_info); - } + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, + nir_var_mem_shared, shared_type_info); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index 7ab2c63330e..ad8f4b8b8d1 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -2663,6 +2663,45 @@ lower_vars_to_explicit(nir_shader *shader, return progress; } +static unsigned +nir_calculate_alignment_from_explicit_layout(const glsl_type *type, + glsl_type_size_align_func type_info) +{ + unsigned size, alignment; + glsl_get_explicit_type_for_size_align(type, type_info, + &size, &alignment); + return alignment; +} + +static void +nir_assign_shared_var_locations(nir_shader *shader, glsl_type_size_align_func type_info) +{ + assert(shader->info.shared_memory_explicit_layout); + + unsigned aliased_size = 0; + unsigned aliased_alignment = 0; + nir_foreach_variable_with_modes(var, shader, nir_var_mem_shared) { + /* Per SPV_KHR_workgroup_storage_explicit_layout, if one shared variable is + * a Block, all of them will be and Blocks are explicitly laid out. + */ + assert(glsl_type_is_interface(var->type)); + const bool align_to_stride = false; + aliased_size = MAX2(aliased_size, glsl_get_explicit_size(var->type, align_to_stride)); + aliased_alignment = MAX2(aliased_alignment, + nir_calculate_alignment_from_explicit_layout(var->type, type_info)); + } + + if (aliased_size) { + const unsigned aliased_location = + align(shader->info.shared_size, aliased_alignment); + + nir_foreach_variable_with_modes(var, shader, nir_var_mem_shared) + var->data.driver_location = aliased_location; + + shader->info.shared_size = aliased_location + aliased_size; + } +} + /* If nir_lower_vars_to_explicit_types is called on any shader that contains * generic pointers, it must either be used on all of the generic modes or * none. @@ -2693,8 +2732,13 @@ nir_lower_vars_to_explicit_types(nir_shader *shader, progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_global, type_info); if (modes & nir_var_mem_shared) { - assert(!shader->info.shared_memory_explicit_layout); - progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info); + if (shader->info.shared_memory_explicit_layout) { + nir_assign_shared_var_locations(shader, type_info); + /* Types don't change, so no further lowering is needed. */ + modes &= ~nir_var_mem_shared; + } else { + progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info); + } } if (modes & nir_var_shader_temp) @@ -2712,11 +2756,13 @@ nir_lower_vars_to_explicit_types(nir_shader *shader, if (modes & nir_var_mem_node_payload_in) progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_node_payload_in, type_info); - nir_foreach_function_impl(impl, shader) { - if (modes & nir_var_function_temp) - progress |= lower_vars_to_explicit(shader, &impl->locals, nir_var_function_temp, type_info); + if (modes) { + nir_foreach_function_impl(impl, shader) { + if (modes & nir_var_function_temp) + progress |= lower_vars_to_explicit(shader, &impl->locals, nir_var_function_temp, type_info); - progress |= nir_lower_vars_to_explicit_types_impl(impl, modes, type_info); + progress |= nir_lower_vars_to_explicit_types_impl(impl, modes, type_info); + } } return progress; diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 31e74339c2c..5dad3060d13 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -7189,26 +7189,6 @@ spirv_to_nir(const uint32_t *words, size_t word_count, */ nir_opt_dce(b->shader); - /* Per SPV_KHR_workgroup_storage_explicit_layout, if one shared variable is - * a Block, all of them will be and Blocks are explicitly laid out. - */ - nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) { - if (glsl_type_is_interface(var->type)) { - assert(b->supported_capabilities.WorkgroupMemoryExplicitLayoutKHR); - b->shader->info.shared_memory_explicit_layout = true; - break; - } - } - if (b->shader->info.shared_memory_explicit_layout) { - unsigned size = 0; - nir_foreach_variable_with_modes(var, b->shader, nir_var_mem_shared) { - assert(glsl_type_is_interface(var->type)); - const bool align_to_stride = false; - size = MAX2(size, glsl_get_explicit_size(var->type, align_to_stride)); - } - b->shader->info.shared_size = size; - } - if (stage == MESA_SHADER_FRAGMENT) { /* From the Vulkan 1.2.199 spec: * diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c index 372942cc895..ee73c81702f 100644 --- a/src/compiler/spirv/vtn_variables.c +++ b/src/compiler/spirv/vtn_variables.c @@ -2216,6 +2216,9 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val, var->var->name = ralloc_strdup(var->var, val->name); var->var->type = vtn_type_get_nir_type(b, var->type, var->mode); var->var->data.mode = nir_mode; + if (var->mode == vtn_variable_mode_workgroup && + glsl_type_is_interface(var->var->type)) + b->shader->info.shared_memory_explicit_layout = true; break; case vtn_variable_mode_input: diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 189735ad77b..dc63f5bf41f 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -743,7 +743,7 @@ lower_inline_ubo(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data) val = nir_load_global_ir3(b, intrin->num_components, intrin->def.bit_size, base_addr, nir_ishr_imm(b, offset, 2), - .access = + .access = (enum gl_access_qualifier)( (enum gl_access_qualifier)(ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER) | ACCESS_CAN_SPECULATE), @@ -1585,7 +1585,7 @@ tu6_emit_cs_config(struct tu_cs *cs, tu_cs_emit_regs( cs, HLSQ_CS_CNTL_1(CHIP, .linearlocalidregid = regid(63, 0), .threadsize = thrsz_cs, - .workgrouprastorderzfirsten = true, + .workgrouprastorderzfirsten = true, .wgtilewidth = 4, .wgtileheight = tile_height)); tu_cs_emit_regs(cs, HLSQ_FS_CNTL_0(CHIP, .threadsize = THREAD64)); @@ -2587,10 +2587,8 @@ tu_shader_create(struct tu_device *dev, nir_address_format_64bit_global); if (nir->info.stage == MESA_SHADER_COMPUTE) { - if (!nir->info.shared_memory_explicit_layout) { - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, - nir_var_mem_shared, shared_type_info); - } + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, + nir_var_mem_shared, shared_type_info); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 78204082d72..62720e9fa9e 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -1127,10 +1127,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, pipeline->layout.type); if (gl_shader_stage_uses_workgroup(nir->info.stage)) { - if (!nir->info.shared_memory_explicit_layout) { - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, - nir_var_mem_shared, shared_type_info); - } + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, + nir_var_mem_shared, shared_type_info); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); diff --git a/src/intel/vulkan_hasvk/anv_pipeline.c b/src/intel/vulkan_hasvk/anv_pipeline.c index 8e485934ce9..d9ce1e7fe17 100644 --- a/src/intel/vulkan_hasvk/anv_pipeline.c +++ b/src/intel/vulkan_hasvk/anv_pipeline.c @@ -555,10 +555,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, prog_data, &stage->bind_map, mem_ctx); if (gl_shader_stage_uses_workgroup(nir->info.stage)) { - if (!nir->info.shared_memory_explicit_layout) { - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, - nir_var_mem_shared, shared_type_info); - } + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, + nir_var_mem_shared, shared_type_info); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); diff --git a/src/nouveau/vulkan/nvk_shader.c b/src/nouveau/vulkan/nvk_shader.c index d38f4005ec4..75bea91d466 100644 --- a/src/nouveau/vulkan/nvk_shader.c +++ b/src/nouveau/vulkan/nvk_shader.c @@ -499,10 +499,8 @@ nvk_lower_nir(struct nvk_device *dev, nir_shader *nir, NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower_load_intrinsic, nir_metadata_none, NULL); - if (!nir->info.shared_memory_explicit_layout) { - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, - nir_var_mem_shared, shared_var_info); - } + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, + nir_var_mem_shared, shared_var_info); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index bf1d9e755a6..622dfaff044 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -792,10 +792,8 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir, #endif if (gl_shader_stage_uses_workgroup(stage)) { - if (!nir->info.shared_memory_explicit_layout) { - NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, - shared_type_info); - } + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, + shared_type_info); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset);