tu: Expose preserving fp32 denorms via softfloat32

Microsoft required the ability to preserve fp32 denorms via a shader flag in shader model 6.2, but Adreno does not support this. Instead Qualcomm's DX12 driver uses soft floats. Implement something similar to expose the equivalent Vulkan feature for vkd3d-proton. In practice no apps should actually use this but it lets us go from SM6.0 to SM6.6 with vkd3d-proton. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37608>
2026-05-08 22:08:26 +02:00 · 2025-09-27 00:34:43 -04:00 · 2025-09-27 00:34:43 -04:00 · b92f7c17da
commit b92f7c17da
parent d30ff374a1
11 changed files with 187 additions and 30 deletions
--- a/src/freedreno/ci/deqp-freedreno-a618-vk.toml
+++ b/src/freedreno/ci/deqp-freedreno-a618-vk.toml
@ -5,6 +5,9 @@ caselists = ["/deqp-vk/mustpass/vk-main.txt"]
 renderer_check = "Turnip Adreno .* 618"
 fraction = 3
 tests_per_group = 10000
+[deqp.env]
+# Enable additional tests that test fp32 denorm preserve.
+tu_enable_softfloat32 = "true"

 # force-gmem testing
 # Autotuner forces sysmem on most CTS tests
--- a/src/freedreno/ci/deqp-freedreno-a660-vk-full.toml
+++ b/src/freedreno/ci/deqp-freedreno-a660-vk-full.toml
@ -5,6 +5,9 @@ caselists = ["/deqp-vk/mustpass/vk-main.txt"]
 timeout = 300
 renderer_check = "Turnip Adreno .* 660"
 tests_per_group = 10000
+[deqp.env]
+# Enable additional tests that test fp32 denorm preserve.
+tu_enable_softfloat32 = "true"

 # force-gmem testing
 # Autotuner forces sysmem on most CTS tests
--- a/src/freedreno/ci/deqp-freedreno-a660-vk.toml
+++ b/src/freedreno/ci/deqp-freedreno-a660-vk.toml
@ -5,6 +5,9 @@ caselists = ["/deqp-vk/mustpass/vk-main.txt"]
 renderer_check = "Turnip Adreno .* 660"
 fraction = 3
 tests_per_group = 10000
+[deqp.env]
+# Enable additional tests that test fp32 denorm preserve.
+tu_enable_softfloat32 = "true"

 # force-gmem testing
 # Autotuner forces sysmem on most CTS tests
--- a/src/freedreno/ci/deqp-freedreno-a750-vk.toml
+++ b/src/freedreno/ci/deqp-freedreno-a750-vk.toml
@ -4,6 +4,9 @@ deqp = "/deqp-vk/external/vulkancts/modules/vulkan/deqp-vk"
 caselists = ["/deqp-vk/mustpass/vk-main.txt"]
 renderer_check = "Turnip Adreno .* 750"
 tests_per_group = 10000
+[deqp.env]
+# Enable additional tests that test fp32 denorm preserve.
+tu_enable_softfloat32 = "true"

 # force-gmem testing
 # Autotuner forces sysmem on most CTS tests
--- a/src/freedreno/vulkan/meson.build
+++ b/src/freedreno/vulkan/meson.build
@ -51,6 +51,17 @@ libtu_files = files(
  'tu_util.cc',
 )

+libtu_files += custom_target(
+  'float32_spv.h',
+  input : float32_glsl_file,
+  output : 'float32_spv.h',
+  command : [
+    prog_glslang, '--no-link', '-V', '-S', 'comp', '-x', '-o', '@OUTPUT@', '@INPUT@',
+    glslang_quiet, glslang_depfile,
+  ],
+  depfile : 'float32_spv.h.d',
+)
+
 subdir('bvh')

 libtu_includes = [
--- a/src/freedreno/vulkan/tu_device.cc
+++ b/src/freedreno/vulkan/tu_device.cc
@ -909,7 +909,14 @@ tu_get_physical_device_properties_1_2(struct tu_physical_device *pdevice,
   p->shaderSignedZeroInfNanPreserveFloat16  = true;

   p->shaderDenormFlushToZeroFloat32         = true;
-   p->shaderDenormPreserveFloat32            = false;
+
+   /* FP32 denorm preserve has to be emulated via soft-float. Normal
+    * applications should not use this, and we don't want to advertize it and
+    * get people confused, but vkd3d-proton cannot emulate it itself so we
+    * have to allow it to use our emulation.
+    */
+   p->shaderDenormPreserveFloat32 = pdevice->instance->enable_softfloat32;
+
   p->shaderRoundingModeRTEFloat32           = true;
   p->shaderRoundingModeRTZFloat32           = false;
   p->shaderSignedZeroInfNanPreserveFloat32  = true;
@ -1774,6 +1781,7 @@ static const driOptionDescription tu_dri_options[] = {
      DRI_CONF_TU_DISABLE_D24S8_BORDER_COLOR_WORKAROUND(false)
      DRI_CONF_TU_USE_TEX_COORD_ROUND_NEAREST_EVEN_MODE(false)
      DRI_CONF_TU_IGNORE_FRAG_DEPTH_DIRECTION(false)
+      DRI_CONF_TU_ENABLE_SOFTFLOAT32(false)
   DRI_CONF_SECTION_END
 };

@ -1800,6 +1808,8 @@ tu_init_dri_options(struct tu_instance *instance)
         driQueryOptionb(&instance->dri_options, "tu_use_tex_coord_round_nearest_even_mode");
   instance->ignore_frag_depth_direction =
         driQueryOptionb(&instance->dri_options, "tu_ignore_frag_depth_direction");
+   instance->enable_softfloat32 =
+         driQueryOptionb(&instance->dri_options, "tu_enable_softfloat32");
 }

 static uint32_t instance_count = 0;
@ -2816,6 +2826,8 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
      goto fail_compiler;
   }

+   tu_init_softfloat32(device);
+
   /* Initialize sparse array for refcounting imported BOs */
   util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);

@ -3079,6 +3091,7 @@ fail_global_bo:
 fail_free_zombie_vma:
   util_sparse_array_finish(&device->bo_map);
   u_vector_finish(&device->zombie_vmas);
+   tu_destroy_softfloat32(device);
   ir3_compiler_destroy(device->compiler);
 fail_compiler:
   vk_meta_device_finish(&device->vk, &device->meta);
@ -3133,6 +3146,8 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)

   vk_meta_device_finish(&device->vk, &device->meta);

+   tu_destroy_softfloat32(device);
+
   ir3_compiler_destroy(device->compiler);

   vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
--- a/src/freedreno/vulkan/tu_device.h
+++ b/src/freedreno/vulkan/tu_device.h
@ -227,6 +227,11 @@ struct tu_instance

   /* Apps may be accidentally incorrect  */
   bool ignore_frag_depth_direction;
+
+   /* D3D12 SM6.2 requires float32 denorm support which we have to emulate.
+    * However we don't want native Vulkan apps using this.
+    */
+   bool enable_softfloat32;
 };
 VK_DEFINE_HANDLE_CASTS(tu_instance, vk.base, VkInstance,
                       VK_OBJECT_TYPE_INSTANCE)
@ -316,6 +321,8 @@ struct tu_device

   struct vk_meta_device meta;

+   struct nir_shader *float32_shader;
+
   radix_sort_vk_t *radix_sort;
   mtx_t radix_sort_mutex;

--- a/src/freedreno/vulkan/tu_shader.cc
+++ b/src/freedreno/vulkan/tu_shader.cc
@ -35,6 +35,39 @@ init_ir3_nir_options(struct ir3_shader_nir_options *options,
   };
 }

+static const struct spirv_to_nir_options tu_spirv_options = {
+   /* Use 16-bit math for RelaxedPrecision ALU ops */
+   .mediump_16bit_alu = true,
+
+   .ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
+   .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset,
+
+   /* Accessed via stg/ldg */
+   .phys_ssbo_addr_format = nir_address_format_64bit_global,
+
+   /* Accessed via the const register file */
+   .push_const_addr_format = nir_address_format_logical,
+
+   /* Accessed via ldl/stl */
+   .shared_addr_format = nir_address_format_32bit_offset,
+
+   /* Accessed via stg/ldg (not used with Vulkan?) */
+   .global_addr_format = nir_address_format_64bit_global,
+
+   .min_ubo_alignment = 64,
+   .min_ssbo_alignment = 4,
+};
+
+static void
+tu_nir_lower_softfloat32(struct tu_device *dev, nir_shader *nir)
+{
+   NIR_PASS(_, nir, nir_lower_floats, dev->float32_shader);
+
+   /* Cleanup the result before linking to minimize shader size. */
+   struct ir3_optimize_options optimize_options = {};
+   ir3_optimize_loop(dev->compiler, &optimize_options, nir);
+}
+
 nir_shader *
 tu_spirv_to_nir(struct tu_device *dev,
                void *mem_ctx,
@ -43,38 +76,15 @@ tu_spirv_to_nir(struct tu_device *dev,
                const struct tu_shader_key *key,
                mesa_shader_stage stage)
 {
-   /* TODO these are made-up */
-   const struct spirv_to_nir_options spirv_options = {
-      /* ViewID is a sysval in geometry stages and an input in the FS */
-      .view_index_is_input =
-         stage == MESA_SHADER_FRAGMENT &&
-         !key->lower_view_index_to_device_index,
-
-      /* Use 16-bit math for RelaxedPrecision ALU ops */
-      .mediump_16bit_alu = true,
-
-      .ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
-      .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset,
-
-      /* Accessed via stg/ldg */
-      .phys_ssbo_addr_format = nir_address_format_64bit_global,
-
-      /* Accessed via the const register file */
-      .push_const_addr_format = nir_address_format_logical,
-
-      /* Accessed via ldl/stl */
-      .shared_addr_format = nir_address_format_32bit_offset,
-
-      /* Accessed via stg/ldg (not used with Vulkan?) */
-      .global_addr_format = nir_address_format_64bit_global,
-
-      .min_ubo_alignment = 64,
-      .min_ssbo_alignment = 4,
-   };
-
   const nir_shader_compiler_options *nir_options =
      ir3_get_compiler_options(dev->compiler);

+   spirv_to_nir_options spirv_options = tu_spirv_options;
+   /* ViewID is a sysval in geometry stages and an input in the FS */
+   spirv_options.view_index_is_input =
+      stage == MESA_SHADER_FRAGMENT &&
+      !key->lower_view_index_to_device_index;
+
   nir_shader *nir;
   VkResult result =
      vk_pipeline_shader_stage_to_nir(&dev->vk, pipeline_flags, stage_info,
@ -144,9 +154,97 @@ tu_spirv_to_nir(struct tu_device *dev,
   };
   NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options);

+   if (nir_is_denorm_preserve(nir->info.float_controls_execution_mode, 32)) {
+      tu_nir_lower_softfloat32(dev, nir);
+   }
+
   return nir;
 }

+static nir_shader *
+tu_spirv_to_nir_library(struct tu_device *dev,
+                        const uint32_t *words,
+                        size_t word_count)
+{
+   const nir_shader_compiler_options *nir_options =
+      ir3_get_compiler_options(dev->compiler);
+   spirv_to_nir_options spirv_options = tu_spirv_options;
+   spirv_options.create_library = true;
+
+   nir_shader *nir =
+      spirv_to_nir(words, word_count, NULL, 0, MESA_SHADER_COMPUTE,
+                   "main", &spirv_options, nir_options);
+
+   NIR_PASS(_, nir, nir_lower_system_values);
+
+   /* We have to lower away local constant initializers right before we
+    * inline functions.  That way they get properly initialized at the top
+    * of the function and not at the top of its caller.
+    */
+   NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
+   NIR_PASS(_, nir, nir_lower_returns);
+   NIR_PASS(_, nir, nir_inline_functions);
+   nir_remove_non_exported(nir);
+   NIR_PASS(_, nir, nir_copy_prop);
+   NIR_PASS(_, nir, nir_opt_deref);
+
+   /* We can't deal with constant data, get rid of it */
+   nir_lower_constant_to_temp(nir);
+
+   /* We can go ahead and lower the rest of the constant initializers.  We do
+    * this here so that nir_remove_dead_variables and split_per_member_structs
+    * below see the corresponding stores.
+    */
+   NIR_PASS(_, nir, nir_lower_variable_initializers, (nir_variable_mode)~0);
+
+   NIR_PASS(_, nir, nir_opt_find_array_copies);
+   NIR_PASS(_, nir, nir_opt_copy_prop_vars);
+   NIR_PASS(_, nir, nir_opt_dce);
+
+   NIR_PASS(_, nir, nir_split_var_copies);
+   NIR_PASS(_, nir, nir_lower_var_copies);
+
+   NIR_PASS(_, nir, nir_lower_mediump_vars, nir_var_function_temp);
+   NIR_PASS(_, nir, nir_opt_copy_prop_vars);
+   NIR_PASS(_, nir, nir_opt_combine_stores, nir_var_all);
+
+   /* Do some optimizations to clean up the shader now.  By optimizing the
+    * functions in the library, we avoid having to re-do that work every
+    * time we inline a copy of a function.  Reducing basic blocks also helps
+    * with compile times.
+    */
+   NIR_PASS(_, nir, nir_lower_vars_to_ssa);
+   NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
+   NIR_PASS(_, nir, nir_copy_prop);
+   NIR_PASS(_, nir, nir_opt_dce);
+   NIR_PASS(_, nir, nir_opt_cse);
+   NIR_PASS(_, nir, nir_opt_gcm, true);
+
+   nir_opt_peephole_select_options peephole_select_options = {};
+   peephole_select_options.limit = 1;
+   NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options);
+   NIR_PASS(_, nir, nir_opt_dce);
+
+   return nir;
+}
+
+static const uint32_t float32_spv[] = {
+#include "float32_spv.h"
+};
+
+void
+tu_init_softfloat32(struct tu_device *dev)
+{
+   dev->float32_shader = tu_spirv_to_nir_library(dev, float32_spv,
+                                                 ARRAY_SIZE(float32_spv));
+}
+
+void
+tu_destroy_softfloat32(struct tu_device *dev)
+{
+   ralloc_free(dev->float32_shader);
+}
+
 static void
 lower_load_push_constant(struct tu_device *dev,
                         nir_builder *b,
--- a/src/freedreno/vulkan/tu_shader.h
+++ b/src/freedreno/vulkan/tu_shader.h
@ -132,6 +132,13 @@ struct tu_shader_key {
 };

 extern const struct vk_pipeline_cache_object_ops tu_shader_ops;
+
+void
+tu_init_softfloat32(struct tu_device *device);
+
+void
+tu_destroy_softfloat32(struct tu_device *device);
+
 bool
 tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, struct tu_device *dev);

--- a/src/util/00-mesa-defaults.conf
+++ b/src/util/00-mesa-defaults.conf
@ -1338,6 +1338,9 @@ TODO: document the other workarounds.
            -->
            <option name="tu_use_tex_coord_round_nearest_even_mode" value="true" />
        </engine>
+	<engine engine_name_match="vkd3d">
+	    <option name="tu_enable_softfloat32" value="true" />
+	</engine>
        <application name="Sons Of The Forest" executable="SonsOfTheForest.exe">
            <option name="tu_ignore_frag_depth_direction" value="true" />
        </application>
--- a/src/util/driconf.h
+++ b/src/util/driconf.h
@ -647,6 +647,10 @@
   DRI_CONF_OPT_B(tu_ignore_frag_depth_direction, def, \
                  "Ignore direction specified for gl_FragDepth output")

+#define DRI_CONF_TU_ENABLE_SOFTFLOAT32(def) \
+   DRI_CONF_OPT_B(tu_enable_softfloat32, def, \
+                  "Enable softfloat emulation for float32 denormals")
+
 /**
 * \brief Honeykrisp specific configuration options
 */