diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 956d8edd24b..375f38757ee 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -2739,6 +2739,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, mtx_init(&device->vis_stream_suballocator_mtx, mtx_plain); mtx_init(&device->mutex, mtx_plain); mtx_init(&device->copy_timestamp_cs_pool_mutex, mtx_plain); + mtx_init(&device->softfloat_mutex, mtx_plain); #ifdef HAVE_PERFETTO mtx_init(&device->perfetto.pending_clocks_sync_mtx, mtx_plain); #endif @@ -2826,8 +2827,6 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, goto fail_compiler; } - tu_init_softfloat32(device); - /* Initialize sparse array for refcounting imported BOs */ util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512); @@ -3091,7 +3090,6 @@ fail_global_bo: fail_free_zombie_vma: util_sparse_array_finish(&device->bo_map); u_vector_finish(&device->zombie_vmas); - tu_destroy_softfloat32(device); ir3_compiler_destroy(device->compiler); fail_compiler: vk_meta_device_finish(&device->vk, &device->meta); diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index 6a3b7e54195..5f1d570feab 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -322,6 +322,7 @@ struct tu_device struct vk_meta_device meta; struct nir_shader *float32_shader; + mtx_t softfloat_mutex; radix_sort_vk_t *radix_sort; mtx_t radix_sort_mutex; diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 93634bff29a..eaff774c767 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -58,9 +58,103 @@ static const struct spirv_to_nir_options tu_spirv_options = { .min_ssbo_alignment = 4, }; +static nir_shader * +tu_spirv_to_nir_library(struct tu_device *dev, + const uint32_t *words, + size_t word_count) +{ + const nir_shader_compiler_options *nir_options = + ir3_get_compiler_options(dev->compiler); + spirv_to_nir_options spirv_options = tu_spirv_options; + spirv_options.create_library = true; + + nir_shader *nir = + spirv_to_nir(words, word_count, NULL, 0, MESA_SHADER_COMPUTE, + "main", &spirv_options, nir_options); + + NIR_PASS(_, nir, nir_lower_system_values); + + /* We have to lower away local constant initializers right before we + * inline functions. That way they get properly initialized at the top + * of the function and not at the top of its caller. + */ + NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp); + NIR_PASS(_, nir, nir_lower_returns); + NIR_PASS(_, nir, nir_inline_functions); + nir_remove_non_exported(nir); + NIR_PASS(_, nir, nir_opt_copy_prop); + NIR_PASS(_, nir, nir_opt_deref); + + /* We can't deal with constant data, get rid of it */ + nir_lower_constant_to_temp(nir); + + /* We can go ahead and lower the rest of the constant initializers. We do + * this here so that nir_remove_dead_variables and split_per_member_structs + * below see the corresponding stores. + */ + NIR_PASS(_, nir, nir_lower_variable_initializers, (nir_variable_mode)~0); + + NIR_PASS(_, nir, nir_opt_find_array_copies); + NIR_PASS(_, nir, nir_opt_copy_prop_vars); + NIR_PASS(_, nir, nir_opt_dce); + + NIR_PASS(_, nir, nir_split_var_copies); + NIR_PASS(_, nir, nir_lower_var_copies); + + NIR_PASS(_, nir, nir_lower_mediump_vars, nir_var_function_temp); + NIR_PASS(_, nir, nir_opt_copy_prop_vars); + NIR_PASS(_, nir, nir_opt_combine_stores, nir_var_all); + + /* Do some optimizations to clean up the shader now. By optimizing the + * functions in the library, we avoid having to re-do that work every + * time we inline a copy of a function. Reducing basic blocks also helps + * with compile times. + */ + NIR_PASS(_, nir, nir_lower_vars_to_ssa); + NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL); + NIR_PASS(_, nir, nir_opt_copy_prop); + NIR_PASS(_, nir, nir_opt_dce); + NIR_PASS(_, nir, nir_opt_cse); + NIR_PASS(_, nir, nir_opt_gcm, true); + + nir_opt_peephole_select_options peephole_select_options = {}; + peephole_select_options.limit = 1; + NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options); + NIR_PASS(_, nir, nir_opt_dce); + + return nir; +} + +static const uint32_t float32_spv[] = { +#include "float32_spv.h" +}; + +void +tu_init_softfloat32(struct tu_device *dev) +{ + if (dev->float32_shader) + return; + + mtx_lock(&dev->softfloat_mutex); + if (!dev->float32_shader) { + dev->float32_shader = tu_spirv_to_nir_library(dev, float32_spv, + ARRAY_SIZE(float32_spv)); + } + mtx_unlock(&dev->softfloat_mutex); +} + +void +tu_destroy_softfloat32(struct tu_device *dev) +{ + if (dev->float32_shader) + ralloc_free(dev->float32_shader); +} + static void tu_nir_lower_softfloat32(struct tu_device *dev, nir_shader *nir) { + tu_init_softfloat32(dev); + NIR_PASS(_, nir, nir_lower_floats, dev->float32_shader); /* Cleanup the result before linking to minimize shader size. */ @@ -161,90 +255,6 @@ tu_spirv_to_nir(struct tu_device *dev, return nir; } -static nir_shader * -tu_spirv_to_nir_library(struct tu_device *dev, - const uint32_t *words, - size_t word_count) -{ - const nir_shader_compiler_options *nir_options = - ir3_get_compiler_options(dev->compiler); - spirv_to_nir_options spirv_options = tu_spirv_options; - spirv_options.create_library = true; - - nir_shader *nir = - spirv_to_nir(words, word_count, NULL, 0, MESA_SHADER_COMPUTE, - "main", &spirv_options, nir_options); - - NIR_PASS(_, nir, nir_lower_system_values); - - /* We have to lower away local constant initializers right before we - * inline functions. That way they get properly initialized at the top - * of the function and not at the top of its caller. - */ - NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp); - NIR_PASS(_, nir, nir_lower_returns); - NIR_PASS(_, nir, nir_inline_functions); - nir_remove_non_exported(nir); - NIR_PASS(_, nir, nir_opt_copy_prop); - NIR_PASS(_, nir, nir_opt_deref); - - /* We can't deal with constant data, get rid of it */ - nir_lower_constant_to_temp(nir); - - /* We can go ahead and lower the rest of the constant initializers. We do - * this here so that nir_remove_dead_variables and split_per_member_structs - * below see the corresponding stores. - */ - NIR_PASS(_, nir, nir_lower_variable_initializers, (nir_variable_mode)~0); - - NIR_PASS(_, nir, nir_opt_find_array_copies); - NIR_PASS(_, nir, nir_opt_copy_prop_vars); - NIR_PASS(_, nir, nir_opt_dce); - - NIR_PASS(_, nir, nir_split_var_copies); - NIR_PASS(_, nir, nir_lower_var_copies); - - NIR_PASS(_, nir, nir_lower_mediump_vars, nir_var_function_temp); - NIR_PASS(_, nir, nir_opt_copy_prop_vars); - NIR_PASS(_, nir, nir_opt_combine_stores, nir_var_all); - - /* Do some optimizations to clean up the shader now. By optimizing the - * functions in the library, we avoid having to re-do that work every - * time we inline a copy of a function. Reducing basic blocks also helps - * with compile times. - */ - NIR_PASS(_, nir, nir_lower_vars_to_ssa); - NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL); - NIR_PASS(_, nir, nir_opt_copy_prop); - NIR_PASS(_, nir, nir_opt_dce); - NIR_PASS(_, nir, nir_opt_cse); - NIR_PASS(_, nir, nir_opt_gcm, true); - - nir_opt_peephole_select_options peephole_select_options = {}; - peephole_select_options.limit = 1; - NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options); - NIR_PASS(_, nir, nir_opt_dce); - - return nir; -} - -static const uint32_t float32_spv[] = { -#include "float32_spv.h" -}; - -void -tu_init_softfloat32(struct tu_device *dev) -{ - dev->float32_shader = tu_spirv_to_nir_library(dev, float32_spv, - ARRAY_SIZE(float32_spv)); -} - -void -tu_destroy_softfloat32(struct tu_device *dev) -{ - ralloc_free(dev->float32_shader); -} - static void lower_load_push_constant(struct tu_device *dev, nir_builder *b, diff --git a/src/freedreno/vulkan/tu_shader.h b/src/freedreno/vulkan/tu_shader.h index 440c82277b8..33ea0a3b0f2 100644 --- a/src/freedreno/vulkan/tu_shader.h +++ b/src/freedreno/vulkan/tu_shader.h @@ -133,9 +133,6 @@ struct tu_shader_key { extern const struct vk_pipeline_cache_object_ops tu_shader_ops; -void -tu_init_softfloat32(struct tu_device *device); - void tu_destroy_softfloat32(struct tu_device *device);