panvk: Implement VK_KHR_zero_initialize_workgroup_memory

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com> Reviewed-by: John Anthony <john.anthony@arm.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32131>
2026-05-05 11:48:06 +02:00 · 2024-11-14 08:43:44 +01:00 · 2024-11-14 08:43:44 +01:00 · 98cff2e098
commit 98cff2e098
parent f88ecf6bb1
3 changed files with 17 additions and 2 deletions
--- a/docs/features.txt
+++ b/docs/features.txt
@ -489,7 +489,7 @@ Vulkan 1.3 -- all DONE: anv, lvp, nvk, radv, tu, vn, v3dv
  VK_KHR_shader_relaxed_extended_instruction            DONE (anv, hasvk, nvk, panvk, radv, tu, v3dv)
  VK_KHR_shader_terminate_invocation                    DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn)
  VK_KHR_synchronization2                               DONE (anv, dzn, hasvk, lvp, nvk, panvk, radv, tu, v3dv, vn)
-  VK_KHR_zero_initialize_workgroup_memory               DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn)
+  VK_KHR_zero_initialize_workgroup_memory               DONE (anv, hasvk, lvp, nvk, panvk, radv, tu, v3dv, vn)
  VK_EXT_4444_formats                                   DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn)
  VK_EXT_extended_dynamic_state                         DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn)
  VK_EXT_extended_dynamic_state2                        DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn)
--- a/src/panfrost/vulkan/panvk_physical_device.c
+++ b/src/panfrost/vulkan/panvk_physical_device.c
@ -221,6 +221,7 @@ get_device_extensions(const struct panvk_physical_device *device,
      .KHR_synchronization2 = true,
      .KHR_timeline_semaphore = true,
      .KHR_variable_pointers = true,
+      .KHR_zero_initialize_workgroup_memory = true,
      .EXT_buffer_device_address = true,
      .EXT_custom_border_color = true,
      .EXT_depth_clip_enable = true,
@ -343,7 +344,7 @@ get_features(const struct panvk_physical_device *device,
      .computeFullSubgroups = false,
      .synchronization2 = true,
      .textureCompressionASTC_HDR = false,
-      .shaderZeroInitializeWorkgroupMemory = false,
+      .shaderZeroInitializeWorkgroupMemory = true,
      .dynamicRendering = true,
      .shaderIntegerDotProduct = false,
      .maintenance4 = false,
--- a/src/panfrost/vulkan/panvk_vX_shader.c
+++ b/src/panfrost/vulkan/panvk_vX_shader.c
@ -485,6 +485,20 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
                 nir_address_format_32bit_offset);
   }

+   if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) {
+      /* Align everything up to 16 bytes to take advantage of load store
+       * vectorization. */
+      nir->info.shared_size = align(nir->info.shared_size, 16);
+      NIR_PASS(_, nir, nir_zero_initialize_shared_memory, nir->info.shared_size,
+               16);
+
+      /* We need to call lower_compute_system_values again because
+       * nir_zero_initialize_shared_memory generates load_invocation_id which
+       * has to be lowered to load_invocation_index.
+       */
+      NIR_PASS(_, nir, nir_lower_compute_system_values, NULL);
+   }
+
   if (stage == MESA_SHADER_VERTEX) {
      /* We need the driver_location to match the vertex attribute location,
       * so we can use the attribute layout described by