diff --git a/src/freedreno/ci/deqp-freedreno-a618-vk.toml b/src/freedreno/ci/deqp-freedreno-a618-vk.toml index 43a19d811f7..e5fb47ec983 100644 --- a/src/freedreno/ci/deqp-freedreno-a618-vk.toml +++ b/src/freedreno/ci/deqp-freedreno-a618-vk.toml @@ -5,6 +5,9 @@ caselists = ["/deqp-vk/mustpass/vk-main.txt"] renderer_check = "Turnip Adreno .* 618" fraction = 3 tests_per_group = 10000 +[deqp.env] +# Enable additional tests that test fp32 denorm preserve. +tu_enable_softfloat32 = "true" # force-gmem testing # Autotuner forces sysmem on most CTS tests diff --git a/src/freedreno/ci/deqp-freedreno-a660-vk-full.toml b/src/freedreno/ci/deqp-freedreno-a660-vk-full.toml index 00c2032da93..d74db8fb584 100644 --- a/src/freedreno/ci/deqp-freedreno-a660-vk-full.toml +++ b/src/freedreno/ci/deqp-freedreno-a660-vk-full.toml @@ -5,6 +5,9 @@ caselists = ["/deqp-vk/mustpass/vk-main.txt"] timeout = 300 renderer_check = "Turnip Adreno .* 660" tests_per_group = 10000 +[deqp.env] +# Enable additional tests that test fp32 denorm preserve. +tu_enable_softfloat32 = "true" # force-gmem testing # Autotuner forces sysmem on most CTS tests diff --git a/src/freedreno/ci/deqp-freedreno-a660-vk.toml b/src/freedreno/ci/deqp-freedreno-a660-vk.toml index a60bcddec3c..7fd88e03dc0 100644 --- a/src/freedreno/ci/deqp-freedreno-a660-vk.toml +++ b/src/freedreno/ci/deqp-freedreno-a660-vk.toml @@ -5,6 +5,9 @@ caselists = ["/deqp-vk/mustpass/vk-main.txt"] renderer_check = "Turnip Adreno .* 660" fraction = 3 tests_per_group = 10000 +[deqp.env] +# Enable additional tests that test fp32 denorm preserve. +tu_enable_softfloat32 = "true" # force-gmem testing # Autotuner forces sysmem on most CTS tests diff --git a/src/freedreno/ci/deqp-freedreno-a750-vk.toml b/src/freedreno/ci/deqp-freedreno-a750-vk.toml index 3e5e0205be9..d37327a8b56 100644 --- a/src/freedreno/ci/deqp-freedreno-a750-vk.toml +++ b/src/freedreno/ci/deqp-freedreno-a750-vk.toml @@ -4,6 +4,9 @@ deqp = "/deqp-vk/external/vulkancts/modules/vulkan/deqp-vk" caselists = ["/deqp-vk/mustpass/vk-main.txt"] renderer_check = "Turnip Adreno .* 750" tests_per_group = 10000 +[deqp.env] +# Enable additional tests that test fp32 denorm preserve. +tu_enable_softfloat32 = "true" # force-gmem testing # Autotuner forces sysmem on most CTS tests diff --git a/src/freedreno/vulkan/meson.build b/src/freedreno/vulkan/meson.build index 18e6aba1387..0f6daa01e7c 100644 --- a/src/freedreno/vulkan/meson.build +++ b/src/freedreno/vulkan/meson.build @@ -51,6 +51,17 @@ libtu_files = files( 'tu_util.cc', ) +libtu_files += custom_target( + 'float32_spv.h', + input : float32_glsl_file, + output : 'float32_spv.h', + command : [ + prog_glslang, '--no-link', '-V', '-S', 'comp', '-x', '-o', '@OUTPUT@', '@INPUT@', + glslang_quiet, glslang_depfile, + ], + depfile : 'float32_spv.h.d', +) + subdir('bvh') libtu_includes = [ diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index abae398f13b..956d8edd24b 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -909,7 +909,14 @@ tu_get_physical_device_properties_1_2(struct tu_physical_device *pdevice, p->shaderSignedZeroInfNanPreserveFloat16 = true; p->shaderDenormFlushToZeroFloat32 = true; - p->shaderDenormPreserveFloat32 = false; + + /* FP32 denorm preserve has to be emulated via soft-float. Normal + * applications should not use this, and we don't want to advertize it and + * get people confused, but vkd3d-proton cannot emulate it itself so we + * have to allow it to use our emulation. + */ + p->shaderDenormPreserveFloat32 = pdevice->instance->enable_softfloat32; + p->shaderRoundingModeRTEFloat32 = true; p->shaderRoundingModeRTZFloat32 = false; p->shaderSignedZeroInfNanPreserveFloat32 = true; @@ -1774,6 +1781,7 @@ static const driOptionDescription tu_dri_options[] = { DRI_CONF_TU_DISABLE_D24S8_BORDER_COLOR_WORKAROUND(false) DRI_CONF_TU_USE_TEX_COORD_ROUND_NEAREST_EVEN_MODE(false) DRI_CONF_TU_IGNORE_FRAG_DEPTH_DIRECTION(false) + DRI_CONF_TU_ENABLE_SOFTFLOAT32(false) DRI_CONF_SECTION_END }; @@ -1800,6 +1808,8 @@ tu_init_dri_options(struct tu_instance *instance) driQueryOptionb(&instance->dri_options, "tu_use_tex_coord_round_nearest_even_mode"); instance->ignore_frag_depth_direction = driQueryOptionb(&instance->dri_options, "tu_ignore_frag_depth_direction"); + instance->enable_softfloat32 = + driQueryOptionb(&instance->dri_options, "tu_enable_softfloat32"); } static uint32_t instance_count = 0; @@ -2816,6 +2826,8 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, goto fail_compiler; } + tu_init_softfloat32(device); + /* Initialize sparse array for refcounting imported BOs */ util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512); @@ -3079,6 +3091,7 @@ fail_global_bo: fail_free_zombie_vma: util_sparse_array_finish(&device->bo_map); u_vector_finish(&device->zombie_vmas); + tu_destroy_softfloat32(device); ir3_compiler_destroy(device->compiler); fail_compiler: vk_meta_device_finish(&device->vk, &device->meta); @@ -3133,6 +3146,8 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) vk_meta_device_finish(&device->vk, &device->meta); + tu_destroy_softfloat32(device); + ir3_compiler_destroy(device->compiler); vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc); diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index 2ad3103392f..6a3b7e54195 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -227,6 +227,11 @@ struct tu_instance /* Apps may be accidentally incorrect */ bool ignore_frag_depth_direction; + + /* D3D12 SM6.2 requires float32 denorm support which we have to emulate. + * However we don't want native Vulkan apps using this. + */ + bool enable_softfloat32; }; VK_DEFINE_HANDLE_CASTS(tu_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE) @@ -316,6 +321,8 @@ struct tu_device struct vk_meta_device meta; + struct nir_shader *float32_shader; + radix_sort_vk_t *radix_sort; mtx_t radix_sort_mutex; diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index a88fa3d2059..81db611403b 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -35,6 +35,39 @@ init_ir3_nir_options(struct ir3_shader_nir_options *options, }; } +static const struct spirv_to_nir_options tu_spirv_options = { + /* Use 16-bit math for RelaxedPrecision ALU ops */ + .mediump_16bit_alu = true, + + .ubo_addr_format = nir_address_format_vec2_index_32bit_offset, + .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset, + + /* Accessed via stg/ldg */ + .phys_ssbo_addr_format = nir_address_format_64bit_global, + + /* Accessed via the const register file */ + .push_const_addr_format = nir_address_format_logical, + + /* Accessed via ldl/stl */ + .shared_addr_format = nir_address_format_32bit_offset, + + /* Accessed via stg/ldg (not used with Vulkan?) */ + .global_addr_format = nir_address_format_64bit_global, + + .min_ubo_alignment = 64, + .min_ssbo_alignment = 4, +}; + +static void +tu_nir_lower_softfloat32(struct tu_device *dev, nir_shader *nir) +{ + NIR_PASS(_, nir, nir_lower_floats, dev->float32_shader); + + /* Cleanup the result before linking to minimize shader size. */ + struct ir3_optimize_options optimize_options = {}; + ir3_optimize_loop(dev->compiler, &optimize_options, nir); +} + nir_shader * tu_spirv_to_nir(struct tu_device *dev, void *mem_ctx, @@ -43,38 +76,15 @@ tu_spirv_to_nir(struct tu_device *dev, const struct tu_shader_key *key, mesa_shader_stage stage) { - /* TODO these are made-up */ - const struct spirv_to_nir_options spirv_options = { - /* ViewID is a sysval in geometry stages and an input in the FS */ - .view_index_is_input = - stage == MESA_SHADER_FRAGMENT && - !key->lower_view_index_to_device_index, - - /* Use 16-bit math for RelaxedPrecision ALU ops */ - .mediump_16bit_alu = true, - - .ubo_addr_format = nir_address_format_vec2_index_32bit_offset, - .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset, - - /* Accessed via stg/ldg */ - .phys_ssbo_addr_format = nir_address_format_64bit_global, - - /* Accessed via the const register file */ - .push_const_addr_format = nir_address_format_logical, - - /* Accessed via ldl/stl */ - .shared_addr_format = nir_address_format_32bit_offset, - - /* Accessed via stg/ldg (not used with Vulkan?) */ - .global_addr_format = nir_address_format_64bit_global, - - .min_ubo_alignment = 64, - .min_ssbo_alignment = 4, - }; - const nir_shader_compiler_options *nir_options = ir3_get_compiler_options(dev->compiler); + spirv_to_nir_options spirv_options = tu_spirv_options; + /* ViewID is a sysval in geometry stages and an input in the FS */ + spirv_options.view_index_is_input = + stage == MESA_SHADER_FRAGMENT && + !key->lower_view_index_to_device_index; + nir_shader *nir; VkResult result = vk_pipeline_shader_stage_to_nir(&dev->vk, pipeline_flags, stage_info, @@ -144,9 +154,97 @@ tu_spirv_to_nir(struct tu_device *dev, }; NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options); + if (nir_is_denorm_preserve(nir->info.float_controls_execution_mode, 32)) { + tu_nir_lower_softfloat32(dev, nir); + } + return nir; } +static nir_shader * +tu_spirv_to_nir_library(struct tu_device *dev, + const uint32_t *words, + size_t word_count) +{ + const nir_shader_compiler_options *nir_options = + ir3_get_compiler_options(dev->compiler); + spirv_to_nir_options spirv_options = tu_spirv_options; + spirv_options.create_library = true; + + nir_shader *nir = + spirv_to_nir(words, word_count, NULL, 0, MESA_SHADER_COMPUTE, + "main", &spirv_options, nir_options); + + NIR_PASS(_, nir, nir_lower_system_values); + + /* We have to lower away local constant initializers right before we + * inline functions. That way they get properly initialized at the top + * of the function and not at the top of its caller. + */ + NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp); + NIR_PASS(_, nir, nir_lower_returns); + NIR_PASS(_, nir, nir_inline_functions); + nir_remove_non_exported(nir); + NIR_PASS(_, nir, nir_copy_prop); + NIR_PASS(_, nir, nir_opt_deref); + + /* We can't deal with constant data, get rid of it */ + nir_lower_constant_to_temp(nir); + + /* We can go ahead and lower the rest of the constant initializers. We do + * this here so that nir_remove_dead_variables and split_per_member_structs + * below see the corresponding stores. + */ + NIR_PASS(_, nir, nir_lower_variable_initializers, (nir_variable_mode)~0); + + NIR_PASS(_, nir, nir_opt_find_array_copies); + NIR_PASS(_, nir, nir_opt_copy_prop_vars); + NIR_PASS(_, nir, nir_opt_dce); + + NIR_PASS(_, nir, nir_split_var_copies); + NIR_PASS(_, nir, nir_lower_var_copies); + + NIR_PASS(_, nir, nir_lower_mediump_vars, nir_var_function_temp); + NIR_PASS(_, nir, nir_opt_copy_prop_vars); + NIR_PASS(_, nir, nir_opt_combine_stores, nir_var_all); + + /* Do some optimizations to clean up the shader now. By optimizing the + * functions in the library, we avoid having to re-do that work every + * time we inline a copy of a function. Reducing basic blocks also helps + * with compile times. + */ + NIR_PASS(_, nir, nir_lower_vars_to_ssa); + NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL); + NIR_PASS(_, nir, nir_copy_prop); + NIR_PASS(_, nir, nir_opt_dce); + NIR_PASS(_, nir, nir_opt_cse); + NIR_PASS(_, nir, nir_opt_gcm, true); + + nir_opt_peephole_select_options peephole_select_options = {}; + peephole_select_options.limit = 1; + NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options); + NIR_PASS(_, nir, nir_opt_dce); + + return nir; +} + +static const uint32_t float32_spv[] = { +#include "float32_spv.h" +}; + +void +tu_init_softfloat32(struct tu_device *dev) +{ + dev->float32_shader = tu_spirv_to_nir_library(dev, float32_spv, + ARRAY_SIZE(float32_spv)); +} + +void +tu_destroy_softfloat32(struct tu_device *dev) +{ + ralloc_free(dev->float32_shader); +} + static void lower_load_push_constant(struct tu_device *dev, nir_builder *b, diff --git a/src/freedreno/vulkan/tu_shader.h b/src/freedreno/vulkan/tu_shader.h index 02b68471bb8..440c82277b8 100644 --- a/src/freedreno/vulkan/tu_shader.h +++ b/src/freedreno/vulkan/tu_shader.h @@ -132,6 +132,13 @@ struct tu_shader_key { }; extern const struct vk_pipeline_cache_object_ops tu_shader_ops; + +void +tu_init_softfloat32(struct tu_device *device); + +void +tu_destroy_softfloat32(struct tu_device *device); + bool tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, struct tu_device *dev); diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf index baa54a65668..58340cd4ac8 100644 --- a/src/util/00-mesa-defaults.conf +++ b/src/util/00-mesa-defaults.conf @@ -1338,6 +1338,9 @@ TODO: document the other workarounds. -->