mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 05:08:08 +02:00
tu: Make softfloat shader compiled on demand
The vast majority of users will not need this, so do not pay the runtime and memory cost of compiling the shader to NIR until it's needed. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38088>
This commit is contained in:
parent
6400de124c
commit
9e3bc1f123
4 changed files with 96 additions and 90 deletions
|
|
@ -2739,6 +2739,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
mtx_init(&device->vis_stream_suballocator_mtx, mtx_plain);
|
||||
mtx_init(&device->mutex, mtx_plain);
|
||||
mtx_init(&device->copy_timestamp_cs_pool_mutex, mtx_plain);
|
||||
mtx_init(&device->softfloat_mutex, mtx_plain);
|
||||
#ifdef HAVE_PERFETTO
|
||||
mtx_init(&device->perfetto.pending_clocks_sync_mtx, mtx_plain);
|
||||
#endif
|
||||
|
|
@ -2826,8 +2827,6 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
goto fail_compiler;
|
||||
}
|
||||
|
||||
tu_init_softfloat32(device);
|
||||
|
||||
/* Initialize sparse array for refcounting imported BOs */
|
||||
util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
|
||||
|
||||
|
|
@ -3091,7 +3090,6 @@ fail_global_bo:
|
|||
fail_free_zombie_vma:
|
||||
util_sparse_array_finish(&device->bo_map);
|
||||
u_vector_finish(&device->zombie_vmas);
|
||||
tu_destroy_softfloat32(device);
|
||||
ir3_compiler_destroy(device->compiler);
|
||||
fail_compiler:
|
||||
vk_meta_device_finish(&device->vk, &device->meta);
|
||||
|
|
|
|||
|
|
@ -322,6 +322,7 @@ struct tu_device
|
|||
struct vk_meta_device meta;
|
||||
|
||||
struct nir_shader *float32_shader;
|
||||
mtx_t softfloat_mutex;
|
||||
|
||||
radix_sort_vk_t *radix_sort;
|
||||
mtx_t radix_sort_mutex;
|
||||
|
|
|
|||
|
|
@ -58,9 +58,103 @@ static const struct spirv_to_nir_options tu_spirv_options = {
|
|||
.min_ssbo_alignment = 4,
|
||||
};
|
||||
|
||||
static nir_shader *
|
||||
tu_spirv_to_nir_library(struct tu_device *dev,
|
||||
const uint32_t *words,
|
||||
size_t word_count)
|
||||
{
|
||||
const nir_shader_compiler_options *nir_options =
|
||||
ir3_get_compiler_options(dev->compiler);
|
||||
spirv_to_nir_options spirv_options = tu_spirv_options;
|
||||
spirv_options.create_library = true;
|
||||
|
||||
nir_shader *nir =
|
||||
spirv_to_nir(words, word_count, NULL, 0, MESA_SHADER_COMPUTE,
|
||||
"main", &spirv_options, nir_options);
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_system_values);
|
||||
|
||||
/* We have to lower away local constant initializers right before we
|
||||
* inline functions. That way they get properly initialized at the top
|
||||
* of the function and not at the top of its caller.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
|
||||
NIR_PASS(_, nir, nir_lower_returns);
|
||||
NIR_PASS(_, nir, nir_inline_functions);
|
||||
nir_remove_non_exported(nir);
|
||||
NIR_PASS(_, nir, nir_opt_copy_prop);
|
||||
NIR_PASS(_, nir, nir_opt_deref);
|
||||
|
||||
/* We can't deal with constant data, get rid of it */
|
||||
nir_lower_constant_to_temp(nir);
|
||||
|
||||
/* We can go ahead and lower the rest of the constant initializers. We do
|
||||
* this here so that nir_remove_dead_variables and split_per_member_structs
|
||||
* below see the corresponding stores.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_lower_variable_initializers, (nir_variable_mode)~0);
|
||||
|
||||
NIR_PASS(_, nir, nir_opt_find_array_copies);
|
||||
NIR_PASS(_, nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
|
||||
NIR_PASS(_, nir, nir_split_var_copies);
|
||||
NIR_PASS(_, nir, nir_lower_var_copies);
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_mediump_vars, nir_var_function_temp);
|
||||
NIR_PASS(_, nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(_, nir, nir_opt_combine_stores, nir_var_all);
|
||||
|
||||
/* Do some optimizations to clean up the shader now. By optimizing the
|
||||
* functions in the library, we avoid having to re-do that work every
|
||||
* time we inline a copy of a function. Reducing basic blocks also helps
|
||||
* with compile times.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
||||
NIR_PASS(_, nir, nir_opt_copy_prop);
|
||||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
NIR_PASS(_, nir, nir_opt_cse);
|
||||
NIR_PASS(_, nir, nir_opt_gcm, true);
|
||||
|
||||
nir_opt_peephole_select_options peephole_select_options = {};
|
||||
peephole_select_options.limit = 1;
|
||||
NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options);
|
||||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
|
||||
return nir;
|
||||
}
|
||||
|
||||
static const uint32_t float32_spv[] = {
|
||||
#include "float32_spv.h"
|
||||
};
|
||||
|
||||
void
|
||||
tu_init_softfloat32(struct tu_device *dev)
|
||||
{
|
||||
if (dev->float32_shader)
|
||||
return;
|
||||
|
||||
mtx_lock(&dev->softfloat_mutex);
|
||||
if (!dev->float32_shader) {
|
||||
dev->float32_shader = tu_spirv_to_nir_library(dev, float32_spv,
|
||||
ARRAY_SIZE(float32_spv));
|
||||
}
|
||||
mtx_unlock(&dev->softfloat_mutex);
|
||||
}
|
||||
|
||||
void
|
||||
tu_destroy_softfloat32(struct tu_device *dev)
|
||||
{
|
||||
if (dev->float32_shader)
|
||||
ralloc_free(dev->float32_shader);
|
||||
}
|
||||
|
||||
static void
|
||||
tu_nir_lower_softfloat32(struct tu_device *dev, nir_shader *nir)
|
||||
{
|
||||
tu_init_softfloat32(dev);
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_floats, dev->float32_shader);
|
||||
|
||||
/* Cleanup the result before linking to minimize shader size. */
|
||||
|
|
@ -161,90 +255,6 @@ tu_spirv_to_nir(struct tu_device *dev,
|
|||
return nir;
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
tu_spirv_to_nir_library(struct tu_device *dev,
|
||||
const uint32_t *words,
|
||||
size_t word_count)
|
||||
{
|
||||
const nir_shader_compiler_options *nir_options =
|
||||
ir3_get_compiler_options(dev->compiler);
|
||||
spirv_to_nir_options spirv_options = tu_spirv_options;
|
||||
spirv_options.create_library = true;
|
||||
|
||||
nir_shader *nir =
|
||||
spirv_to_nir(words, word_count, NULL, 0, MESA_SHADER_COMPUTE,
|
||||
"main", &spirv_options, nir_options);
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_system_values);
|
||||
|
||||
/* We have to lower away local constant initializers right before we
|
||||
* inline functions. That way they get properly initialized at the top
|
||||
* of the function and not at the top of its caller.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
|
||||
NIR_PASS(_, nir, nir_lower_returns);
|
||||
NIR_PASS(_, nir, nir_inline_functions);
|
||||
nir_remove_non_exported(nir);
|
||||
NIR_PASS(_, nir, nir_opt_copy_prop);
|
||||
NIR_PASS(_, nir, nir_opt_deref);
|
||||
|
||||
/* We can't deal with constant data, get rid of it */
|
||||
nir_lower_constant_to_temp(nir);
|
||||
|
||||
/* We can go ahead and lower the rest of the constant initializers. We do
|
||||
* this here so that nir_remove_dead_variables and split_per_member_structs
|
||||
* below see the corresponding stores.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_lower_variable_initializers, (nir_variable_mode)~0);
|
||||
|
||||
NIR_PASS(_, nir, nir_opt_find_array_copies);
|
||||
NIR_PASS(_, nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
|
||||
NIR_PASS(_, nir, nir_split_var_copies);
|
||||
NIR_PASS(_, nir, nir_lower_var_copies);
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_mediump_vars, nir_var_function_temp);
|
||||
NIR_PASS(_, nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(_, nir, nir_opt_combine_stores, nir_var_all);
|
||||
|
||||
/* Do some optimizations to clean up the shader now. By optimizing the
|
||||
* functions in the library, we avoid having to re-do that work every
|
||||
* time we inline a copy of a function. Reducing basic blocks also helps
|
||||
* with compile times.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
||||
NIR_PASS(_, nir, nir_opt_copy_prop);
|
||||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
NIR_PASS(_, nir, nir_opt_cse);
|
||||
NIR_PASS(_, nir, nir_opt_gcm, true);
|
||||
|
||||
nir_opt_peephole_select_options peephole_select_options = {};
|
||||
peephole_select_options.limit = 1;
|
||||
NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options);
|
||||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
|
||||
return nir;
|
||||
}
|
||||
|
||||
static const uint32_t float32_spv[] = {
|
||||
#include "float32_spv.h"
|
||||
};
|
||||
|
||||
void
|
||||
tu_init_softfloat32(struct tu_device *dev)
|
||||
{
|
||||
dev->float32_shader = tu_spirv_to_nir_library(dev, float32_spv,
|
||||
ARRAY_SIZE(float32_spv));
|
||||
}
|
||||
|
||||
void
|
||||
tu_destroy_softfloat32(struct tu_device *dev)
|
||||
{
|
||||
ralloc_free(dev->float32_shader);
|
||||
}
|
||||
|
||||
static void
|
||||
lower_load_push_constant(struct tu_device *dev,
|
||||
nir_builder *b,
|
||||
|
|
|
|||
|
|
@ -133,9 +133,6 @@ struct tu_shader_key {
|
|||
|
||||
extern const struct vk_pipeline_cache_object_ops tu_shader_ops;
|
||||
|
||||
void
|
||||
tu_init_softfloat32(struct tu_device *device);
|
||||
|
||||
void
|
||||
tu_destroy_softfloat32(struct tu_device *device);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue