diff --git a/docs/features.txt b/docs/features.txt index 1881a78491e..0771e42976c 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -479,7 +479,7 @@ Vulkan 1.3 -- all DONE: anv, radv VK_KHR_shader_non_semantic_info DONE (anv, radv) VK_KHR_shader_terminate_invocation DONE (anv, radv, tu) VK_KHR_synchronization2 DONE (anv, radv) - VK_KHR_zero_initialize_workgroup_memory DONE (anv, radv) + VK_KHR_zero_initialize_workgroup_memory DONE (anv, radv, tu) VK_EXT_4444_formats DONE (anv, lvp, radv, tu, v3dv) VK_EXT_extended_dynamic_state DONE (anv, lvp, radv, tu) VK_EXT_extended_dynamic_state2 DONE (anv, lvp, radv, tu) diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index bb95ea8f0a1..d15fbd75e9d 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -159,6 +159,7 @@ get_device_extensions(const struct tu_physical_device *device, .KHR_separate_depth_stencil_layouts = true, .KHR_buffer_device_address = true, .KHR_shader_integer_dot_product = true, + .KHR_zero_initialize_workgroup_memory = true, #ifndef TU_USE_KGSL .KHR_timeline_semaphore = true, #endif @@ -593,7 +594,7 @@ tu_get_physical_device_features_1_3(struct tu_physical_device *pdevice, features->computeFullSubgroups = true; features->synchronization2 = false; features->textureCompressionASTC_HDR = false; - features->shaderZeroInitializeWorkgroupMemory = false; + features->shaderZeroInitializeWorkgroupMemory = true; features->dynamicRendering = false; features->shaderIntegerDotProduct = true; features->maintenance4 = false; diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index 82e94da573f..e3e72f5f413 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -87,10 +87,6 @@ tu_spirv_to_nir(struct tu_device *dev, }, }; - const struct nir_lower_compute_system_values_options compute_sysval_options = { - .has_base_workgroup_id = true, - }; - const nir_shader_compiler_options *nir_options = ir3_get_compiler_options(dev->compiler); @@ -158,7 +154,6 @@ tu_spirv_to_nir(struct tu_device *dev, NIR_PASS_V(nir, nir_lower_is_helper_invocation); NIR_PASS_V(nir, nir_lower_system_values); - NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_sysval_options); NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays); @@ -763,6 +758,22 @@ tu_shader_create(struct tu_device *dev, NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); + + if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) { + const unsigned chunk_size = 16; /* max single store size */ + /* Shared memory is allocated in 1024b chunks in HW, but the zero-init + * extension only requires us to initialize the memory that the shader + * is allocated at the API level, and it's up to the user to ensure + * that accesses are limited to those bounds. + */ + const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size); + NIR_PASS_V(nir, nir_zero_initialize_shared_memory, shared_size, chunk_size); + } + + const struct nir_lower_compute_system_values_options compute_sysval_options = { + .has_base_workgroup_id = true, + }; + NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_sysval_options); } nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);