panvk: Implement VK_KHR_zero_initialize_workgroup_memory

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: John Anthony <john.anthony@arm.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32131>
This commit is contained in:
Mary Guillemard 2024-11-14 08:43:44 +01:00
parent f88ecf6bb1
commit 98cff2e098
3 changed files with 17 additions and 2 deletions

View file

@ -489,7 +489,7 @@ Vulkan 1.3 -- all DONE: anv, lvp, nvk, radv, tu, vn, v3dv
VK_KHR_shader_relaxed_extended_instruction DONE (anv, hasvk, nvk, panvk, radv, tu, v3dv)
VK_KHR_shader_terminate_invocation DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn)
VK_KHR_synchronization2 DONE (anv, dzn, hasvk, lvp, nvk, panvk, radv, tu, v3dv, vn)
VK_KHR_zero_initialize_workgroup_memory DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn)
VK_KHR_zero_initialize_workgroup_memory DONE (anv, hasvk, lvp, nvk, panvk, radv, tu, v3dv, vn)
VK_EXT_4444_formats DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn)
VK_EXT_extended_dynamic_state DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn)
VK_EXT_extended_dynamic_state2 DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn)

View file

@ -221,6 +221,7 @@ get_device_extensions(const struct panvk_physical_device *device,
.KHR_synchronization2 = true,
.KHR_timeline_semaphore = true,
.KHR_variable_pointers = true,
.KHR_zero_initialize_workgroup_memory = true,
.EXT_buffer_device_address = true,
.EXT_custom_border_color = true,
.EXT_depth_clip_enable = true,
@ -343,7 +344,7 @@ get_features(const struct panvk_physical_device *device,
.computeFullSubgroups = false,
.synchronization2 = true,
.textureCompressionASTC_HDR = false,
.shaderZeroInitializeWorkgroupMemory = false,
.shaderZeroInitializeWorkgroupMemory = true,
.dynamicRendering = true,
.shaderIntegerDotProduct = false,
.maintenance4 = false,

View file

@ -485,6 +485,20 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
nir_address_format_32bit_offset);
}
if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) {
/* Align everything up to 16 bytes to take advantage of load store
* vectorization. */
nir->info.shared_size = align(nir->info.shared_size, 16);
NIR_PASS(_, nir, nir_zero_initialize_shared_memory, nir->info.shared_size,
16);
/* We need to call lower_compute_system_values again because
* nir_zero_initialize_shared_memory generates load_invocation_id which
* has to be lowered to load_invocation_index.
*/
NIR_PASS(_, nir, nir_lower_compute_system_values, NULL);
}
if (stage == MESA_SHADER_VERTEX) {
/* We need the driver_location to match the vertex attribute location,
* so we can use the attribute layout described by