From 9e3bc1f1233b0a50a12ef2406888ba31f84055da Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Mon, 17 Nov 2025 15:46:24 -0500 Subject: [PATCH] tu: Make softfloat shader compiled on demand The vast majority of users will not need this, so do not pay the runtime and memory cost of compiling the shader to NIR until it's needed. Part-of: --- src/freedreno/vulkan/tu_device.cc | 4 +- src/freedreno/vulkan/tu_device.h | 1 + src/freedreno/vulkan/tu_shader.cc | 178 ++++++++++++++++-------------- src/freedreno/vulkan/tu_shader.h | 3 - 4 files changed, 96 insertions(+), 90 deletions(-) diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 956d8edd24b..375f38757ee 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -2739,6 +2739,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, mtx_init(&device->vis_stream_suballocator_mtx, mtx_plain); mtx_init(&device->mutex, mtx_plain); mtx_init(&device->copy_timestamp_cs_pool_mutex, mtx_plain); + mtx_init(&device->softfloat_mutex, mtx_plain); #ifdef HAVE_PERFETTO mtx_init(&device->perfetto.pending_clocks_sync_mtx, mtx_plain); #endif @@ -2826,8 +2827,6 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, goto fail_compiler; } - tu_init_softfloat32(device); - /* Initialize sparse array for refcounting imported BOs */ util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512); @@ -3091,7 +3090,6 @@ fail_global_bo: fail_free_zombie_vma: util_sparse_array_finish(&device->bo_map); u_vector_finish(&device->zombie_vmas); - tu_destroy_softfloat32(device); ir3_compiler_destroy(device->compiler); fail_compiler: vk_meta_device_finish(&device->vk, &device->meta); diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index 6a3b7e54195..5f1d570feab 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -322,6 +322,7 @@ struct tu_device struct vk_meta_device meta; struct nir_shader *float32_shader; + mtx_t softfloat_mutex; radix_sort_vk_t *radix_sort; mtx_t radix_sort_mutex; diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 93634bff29a..eaff774c767 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -58,9 +58,103 @@ static const struct spirv_to_nir_options tu_spirv_options = { .min_ssbo_alignment = 4, }; +static nir_shader * +tu_spirv_to_nir_library(struct tu_device *dev, + const uint32_t *words, + size_t word_count) +{ + const nir_shader_compiler_options *nir_options = + ir3_get_compiler_options(dev->compiler); + spirv_to_nir_options spirv_options = tu_spirv_options; + spirv_options.create_library = true; + + nir_shader *nir = + spirv_to_nir(words, word_count, NULL, 0, MESA_SHADER_COMPUTE, + "main", &spirv_options, nir_options); + + NIR_PASS(_, nir, nir_lower_system_values); + + /* We have to lower away local constant initializers right before we + * inline functions. That way they get properly initialized at the top + * of the function and not at the top of its caller. + */ + NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp); + NIR_PASS(_, nir, nir_lower_returns); + NIR_PASS(_, nir, nir_inline_functions); + nir_remove_non_exported(nir); + NIR_PASS(_, nir, nir_opt_copy_prop); + NIR_PASS(_, nir, nir_opt_deref); + + /* We can't deal with constant data, get rid of it */ + nir_lower_constant_to_temp(nir); + + /* We can go ahead and lower the rest of the constant initializers. We do + * this here so that nir_remove_dead_variables and split_per_member_structs + * below see the corresponding stores. + */ + NIR_PASS(_, nir, nir_lower_variable_initializers, (nir_variable_mode)~0); + + NIR_PASS(_, nir, nir_opt_find_array_copies); + NIR_PASS(_, nir, nir_opt_copy_prop_vars); + NIR_PASS(_, nir, nir_opt_dce); + + NIR_PASS(_, nir, nir_split_var_copies); + NIR_PASS(_, nir, nir_lower_var_copies); + + NIR_PASS(_, nir, nir_lower_mediump_vars, nir_var_function_temp); + NIR_PASS(_, nir, nir_opt_copy_prop_vars); + NIR_PASS(_, nir, nir_opt_combine_stores, nir_var_all); + + /* Do some optimizations to clean up the shader now. By optimizing the + * functions in the library, we avoid having to re-do that work every + * time we inline a copy of a function. Reducing basic blocks also helps + * with compile times. + */ + NIR_PASS(_, nir, nir_lower_vars_to_ssa); + NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL); + NIR_PASS(_, nir, nir_opt_copy_prop); + NIR_PASS(_, nir, nir_opt_dce); + NIR_PASS(_, nir, nir_opt_cse); + NIR_PASS(_, nir, nir_opt_gcm, true); + + nir_opt_peephole_select_options peephole_select_options = {}; + peephole_select_options.limit = 1; + NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options); + NIR_PASS(_, nir, nir_opt_dce); + + return nir; +} + +static const uint32_t float32_spv[] = { +#include "float32_spv.h" +}; + +void +tu_init_softfloat32(struct tu_device *dev) +{ + if (dev->float32_shader) + return; + + mtx_lock(&dev->softfloat_mutex); + if (!dev->float32_shader) { + dev->float32_shader = tu_spirv_to_nir_library(dev, float32_spv, + ARRAY_SIZE(float32_spv)); + } + mtx_unlock(&dev->softfloat_mutex); +} + +void +tu_destroy_softfloat32(struct tu_device *dev) +{ + if (dev->float32_shader) + ralloc_free(dev->float32_shader); +} + static void tu_nir_lower_softfloat32(struct tu_device *dev, nir_shader *nir) { + tu_init_softfloat32(dev); + NIR_PASS(_, nir, nir_lower_floats, dev->float32_shader); /* Cleanup the result before linking to minimize shader size. */ @@ -161,90 +255,6 @@ tu_spirv_to_nir(struct tu_device *dev, return nir; } -static nir_shader * -tu_spirv_to_nir_library(struct tu_device *dev, - const uint32_t *words, - size_t word_count) -{ - const nir_shader_compiler_options *nir_options = - ir3_get_compiler_options(dev->compiler); - spirv_to_nir_options spirv_options = tu_spirv_options; - spirv_options.create_library = true; - - nir_shader *nir = - spirv_to_nir(words, word_count, NULL, 0, MESA_SHADER_COMPUTE, - "main", &spirv_options, nir_options); - - NIR_PASS(_, nir, nir_lower_system_values); - - /* We have to lower away local constant initializers right before we - * inline functions. That way they get properly initialized at the top - * of the function and not at the top of its caller. - */ - NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp); - NIR_PASS(_, nir, nir_lower_returns); - NIR_PASS(_, nir, nir_inline_functions); - nir_remove_non_exported(nir); - NIR_PASS(_, nir, nir_opt_copy_prop); - NIR_PASS(_, nir, nir_opt_deref); - - /* We can't deal with constant data, get rid of it */ - nir_lower_constant_to_temp(nir); - - /* We can go ahead and lower the rest of the constant initializers. We do - * this here so that nir_remove_dead_variables and split_per_member_structs - * below see the corresponding stores. - */ - NIR_PASS(_, nir, nir_lower_variable_initializers, (nir_variable_mode)~0); - - NIR_PASS(_, nir, nir_opt_find_array_copies); - NIR_PASS(_, nir, nir_opt_copy_prop_vars); - NIR_PASS(_, nir, nir_opt_dce); - - NIR_PASS(_, nir, nir_split_var_copies); - NIR_PASS(_, nir, nir_lower_var_copies); - - NIR_PASS(_, nir, nir_lower_mediump_vars, nir_var_function_temp); - NIR_PASS(_, nir, nir_opt_copy_prop_vars); - NIR_PASS(_, nir, nir_opt_combine_stores, nir_var_all); - - /* Do some optimizations to clean up the shader now. By optimizing the - * functions in the library, we avoid having to re-do that work every - * time we inline a copy of a function. Reducing basic blocks also helps - * with compile times. - */ - NIR_PASS(_, nir, nir_lower_vars_to_ssa); - NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL); - NIR_PASS(_, nir, nir_opt_copy_prop); - NIR_PASS(_, nir, nir_opt_dce); - NIR_PASS(_, nir, nir_opt_cse); - NIR_PASS(_, nir, nir_opt_gcm, true); - - nir_opt_peephole_select_options peephole_select_options = {}; - peephole_select_options.limit = 1; - NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options); - NIR_PASS(_, nir, nir_opt_dce); - - return nir; -} - -static const uint32_t float32_spv[] = { -#include "float32_spv.h" -}; - -void -tu_init_softfloat32(struct tu_device *dev) -{ - dev->float32_shader = tu_spirv_to_nir_library(dev, float32_spv, - ARRAY_SIZE(float32_spv)); -} - -void -tu_destroy_softfloat32(struct tu_device *dev) -{ - ralloc_free(dev->float32_shader); -} - static void lower_load_push_constant(struct tu_device *dev, nir_builder *b, diff --git a/src/freedreno/vulkan/tu_shader.h b/src/freedreno/vulkan/tu_shader.h index 440c82277b8..33ea0a3b0f2 100644 --- a/src/freedreno/vulkan/tu_shader.h +++ b/src/freedreno/vulkan/tu_shader.h @@ -133,9 +133,6 @@ struct tu_shader_key { extern const struct vk_pipeline_cache_object_ops tu_shader_ops; -void -tu_init_softfloat32(struct tu_device *device); - void tu_destroy_softfloat32(struct tu_device *device);