diff --git a/src/freedreno/ci/deqp-freedreno-a618-vk.toml b/src/freedreno/ci/deqp-freedreno-a618-vk.toml
index 43a19d811f7..e5fb47ec983 100644
--- a/src/freedreno/ci/deqp-freedreno-a618-vk.toml
+++ b/src/freedreno/ci/deqp-freedreno-a618-vk.toml
@@ -5,6 +5,9 @@ caselists = ["/deqp-vk/mustpass/vk-main.txt"]
renderer_check = "Turnip Adreno .* 618"
fraction = 3
tests_per_group = 10000
+[deqp.env]
+# Enable additional tests that test fp32 denorm preserve.
+tu_enable_softfloat32 = "true"
# force-gmem testing
# Autotuner forces sysmem on most CTS tests
diff --git a/src/freedreno/ci/deqp-freedreno-a660-vk-full.toml b/src/freedreno/ci/deqp-freedreno-a660-vk-full.toml
index 00c2032da93..d74db8fb584 100644
--- a/src/freedreno/ci/deqp-freedreno-a660-vk-full.toml
+++ b/src/freedreno/ci/deqp-freedreno-a660-vk-full.toml
@@ -5,6 +5,9 @@ caselists = ["/deqp-vk/mustpass/vk-main.txt"]
timeout = 300
renderer_check = "Turnip Adreno .* 660"
tests_per_group = 10000
+[deqp.env]
+# Enable additional tests that test fp32 denorm preserve.
+tu_enable_softfloat32 = "true"
# force-gmem testing
# Autotuner forces sysmem on most CTS tests
diff --git a/src/freedreno/ci/deqp-freedreno-a660-vk.toml b/src/freedreno/ci/deqp-freedreno-a660-vk.toml
index a60bcddec3c..7fd88e03dc0 100644
--- a/src/freedreno/ci/deqp-freedreno-a660-vk.toml
+++ b/src/freedreno/ci/deqp-freedreno-a660-vk.toml
@@ -5,6 +5,9 @@ caselists = ["/deqp-vk/mustpass/vk-main.txt"]
renderer_check = "Turnip Adreno .* 660"
fraction = 3
tests_per_group = 10000
+[deqp.env]
+# Enable additional tests that test fp32 denorm preserve.
+tu_enable_softfloat32 = "true"
# force-gmem testing
# Autotuner forces sysmem on most CTS tests
diff --git a/src/freedreno/ci/deqp-freedreno-a750-vk.toml b/src/freedreno/ci/deqp-freedreno-a750-vk.toml
index 3e5e0205be9..d37327a8b56 100644
--- a/src/freedreno/ci/deqp-freedreno-a750-vk.toml
+++ b/src/freedreno/ci/deqp-freedreno-a750-vk.toml
@@ -4,6 +4,9 @@ deqp = "/deqp-vk/external/vulkancts/modules/vulkan/deqp-vk"
caselists = ["/deqp-vk/mustpass/vk-main.txt"]
renderer_check = "Turnip Adreno .* 750"
tests_per_group = 10000
+[deqp.env]
+# Enable additional tests that test fp32 denorm preserve.
+tu_enable_softfloat32 = "true"
# force-gmem testing
# Autotuner forces sysmem on most CTS tests
diff --git a/src/freedreno/vulkan/meson.build b/src/freedreno/vulkan/meson.build
index 18e6aba1387..0f6daa01e7c 100644
--- a/src/freedreno/vulkan/meson.build
+++ b/src/freedreno/vulkan/meson.build
@@ -51,6 +51,17 @@ libtu_files = files(
'tu_util.cc',
)
+libtu_files += custom_target(
+ 'float32_spv.h',
+ input : float32_glsl_file,
+ output : 'float32_spv.h',
+ command : [
+ prog_glslang, '--no-link', '-V', '-S', 'comp', '-x', '-o', '@OUTPUT@', '@INPUT@',
+ glslang_quiet, glslang_depfile,
+ ],
+ depfile : 'float32_spv.h.d',
+)
+
subdir('bvh')
libtu_includes = [
diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc
index abae398f13b..956d8edd24b 100644
--- a/src/freedreno/vulkan/tu_device.cc
+++ b/src/freedreno/vulkan/tu_device.cc
@@ -909,7 +909,14 @@ tu_get_physical_device_properties_1_2(struct tu_physical_device *pdevice,
p->shaderSignedZeroInfNanPreserveFloat16 = true;
p->shaderDenormFlushToZeroFloat32 = true;
- p->shaderDenormPreserveFloat32 = false;
+
+ /* FP32 denorm preserve has to be emulated via soft-float. Normal
+ * applications should not use this, and we don't want to advertize it and
+ * get people confused, but vkd3d-proton cannot emulate it itself so we
+ * have to allow it to use our emulation.
+ */
+ p->shaderDenormPreserveFloat32 = pdevice->instance->enable_softfloat32;
+
p->shaderRoundingModeRTEFloat32 = true;
p->shaderRoundingModeRTZFloat32 = false;
p->shaderSignedZeroInfNanPreserveFloat32 = true;
@@ -1774,6 +1781,7 @@ static const driOptionDescription tu_dri_options[] = {
DRI_CONF_TU_DISABLE_D24S8_BORDER_COLOR_WORKAROUND(false)
DRI_CONF_TU_USE_TEX_COORD_ROUND_NEAREST_EVEN_MODE(false)
DRI_CONF_TU_IGNORE_FRAG_DEPTH_DIRECTION(false)
+ DRI_CONF_TU_ENABLE_SOFTFLOAT32(false)
DRI_CONF_SECTION_END
};
@@ -1800,6 +1808,8 @@ tu_init_dri_options(struct tu_instance *instance)
driQueryOptionb(&instance->dri_options, "tu_use_tex_coord_round_nearest_even_mode");
instance->ignore_frag_depth_direction =
driQueryOptionb(&instance->dri_options, "tu_ignore_frag_depth_direction");
+ instance->enable_softfloat32 =
+ driQueryOptionb(&instance->dri_options, "tu_enable_softfloat32");
}
static uint32_t instance_count = 0;
@@ -2816,6 +2826,8 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
goto fail_compiler;
}
+ tu_init_softfloat32(device);
+
/* Initialize sparse array for refcounting imported BOs */
util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
@@ -3079,6 +3091,7 @@ fail_global_bo:
fail_free_zombie_vma:
util_sparse_array_finish(&device->bo_map);
u_vector_finish(&device->zombie_vmas);
+ tu_destroy_softfloat32(device);
ir3_compiler_destroy(device->compiler);
fail_compiler:
vk_meta_device_finish(&device->vk, &device->meta);
@@ -3133,6 +3146,8 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
vk_meta_device_finish(&device->vk, &device->meta);
+ tu_destroy_softfloat32(device);
+
ir3_compiler_destroy(device->compiler);
vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h
index 2ad3103392f..6a3b7e54195 100644
--- a/src/freedreno/vulkan/tu_device.h
+++ b/src/freedreno/vulkan/tu_device.h
@@ -227,6 +227,11 @@ struct tu_instance
/* Apps may be accidentally incorrect */
bool ignore_frag_depth_direction;
+
+ /* D3D12 SM6.2 requires float32 denorm support which we have to emulate.
+ * However we don't want native Vulkan apps using this.
+ */
+ bool enable_softfloat32;
};
VK_DEFINE_HANDLE_CASTS(tu_instance, vk.base, VkInstance,
VK_OBJECT_TYPE_INSTANCE)
@@ -316,6 +321,8 @@ struct tu_device
struct vk_meta_device meta;
+ struct nir_shader *float32_shader;
+
radix_sort_vk_t *radix_sort;
mtx_t radix_sort_mutex;
diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc
index a88fa3d2059..81db611403b 100644
--- a/src/freedreno/vulkan/tu_shader.cc
+++ b/src/freedreno/vulkan/tu_shader.cc
@@ -35,6 +35,39 @@ init_ir3_nir_options(struct ir3_shader_nir_options *options,
};
}
+static const struct spirv_to_nir_options tu_spirv_options = {
+ /* Use 16-bit math for RelaxedPrecision ALU ops */
+ .mediump_16bit_alu = true,
+
+ .ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
+ .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset,
+
+ /* Accessed via stg/ldg */
+ .phys_ssbo_addr_format = nir_address_format_64bit_global,
+
+ /* Accessed via the const register file */
+ .push_const_addr_format = nir_address_format_logical,
+
+ /* Accessed via ldl/stl */
+ .shared_addr_format = nir_address_format_32bit_offset,
+
+ /* Accessed via stg/ldg (not used with Vulkan?) */
+ .global_addr_format = nir_address_format_64bit_global,
+
+ .min_ubo_alignment = 64,
+ .min_ssbo_alignment = 4,
+};
+
+static void
+tu_nir_lower_softfloat32(struct tu_device *dev, nir_shader *nir)
+{
+ NIR_PASS(_, nir, nir_lower_floats, dev->float32_shader);
+
+ /* Cleanup the result before linking to minimize shader size. */
+ struct ir3_optimize_options optimize_options = {};
+ ir3_optimize_loop(dev->compiler, &optimize_options, nir);
+}
+
nir_shader *
tu_spirv_to_nir(struct tu_device *dev,
void *mem_ctx,
@@ -43,38 +76,15 @@ tu_spirv_to_nir(struct tu_device *dev,
const struct tu_shader_key *key,
mesa_shader_stage stage)
{
- /* TODO these are made-up */
- const struct spirv_to_nir_options spirv_options = {
- /* ViewID is a sysval in geometry stages and an input in the FS */
- .view_index_is_input =
- stage == MESA_SHADER_FRAGMENT &&
- !key->lower_view_index_to_device_index,
-
- /* Use 16-bit math for RelaxedPrecision ALU ops */
- .mediump_16bit_alu = true,
-
- .ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
- .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset,
-
- /* Accessed via stg/ldg */
- .phys_ssbo_addr_format = nir_address_format_64bit_global,
-
- /* Accessed via the const register file */
- .push_const_addr_format = nir_address_format_logical,
-
- /* Accessed via ldl/stl */
- .shared_addr_format = nir_address_format_32bit_offset,
-
- /* Accessed via stg/ldg (not used with Vulkan?) */
- .global_addr_format = nir_address_format_64bit_global,
-
- .min_ubo_alignment = 64,
- .min_ssbo_alignment = 4,
- };
-
const nir_shader_compiler_options *nir_options =
ir3_get_compiler_options(dev->compiler);
+ spirv_to_nir_options spirv_options = tu_spirv_options;
+ /* ViewID is a sysval in geometry stages and an input in the FS */
+ spirv_options.view_index_is_input =
+ stage == MESA_SHADER_FRAGMENT &&
+ !key->lower_view_index_to_device_index;
+
nir_shader *nir;
VkResult result =
vk_pipeline_shader_stage_to_nir(&dev->vk, pipeline_flags, stage_info,
@@ -144,9 +154,97 @@ tu_spirv_to_nir(struct tu_device *dev,
};
NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options);
+ if (nir_is_denorm_preserve(nir->info.float_controls_execution_mode, 32)) {
+ tu_nir_lower_softfloat32(dev, nir);
+ }
+
return nir;
}
+static nir_shader *
+tu_spirv_to_nir_library(struct tu_device *dev,
+ const uint32_t *words,
+ size_t word_count)
+{
+ const nir_shader_compiler_options *nir_options =
+ ir3_get_compiler_options(dev->compiler);
+ spirv_to_nir_options spirv_options = tu_spirv_options;
+ spirv_options.create_library = true;
+
+ nir_shader *nir =
+ spirv_to_nir(words, word_count, NULL, 0, MESA_SHADER_COMPUTE,
+ "main", &spirv_options, nir_options);
+
+ NIR_PASS(_, nir, nir_lower_system_values);
+
+ /* We have to lower away local constant initializers right before we
+ * inline functions. That way they get properly initialized at the top
+ * of the function and not at the top of its caller.
+ */
+ NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
+ NIR_PASS(_, nir, nir_lower_returns);
+ NIR_PASS(_, nir, nir_inline_functions);
+ nir_remove_non_exported(nir);
+ NIR_PASS(_, nir, nir_copy_prop);
+ NIR_PASS(_, nir, nir_opt_deref);
+
+ /* We can't deal with constant data, get rid of it */
+ nir_lower_constant_to_temp(nir);
+
+ /* We can go ahead and lower the rest of the constant initializers. We do
+ * this here so that nir_remove_dead_variables and split_per_member_structs
+ * below see the corresponding stores.
+ */
+ NIR_PASS(_, nir, nir_lower_variable_initializers, (nir_variable_mode)~0);
+
+ NIR_PASS(_, nir, nir_opt_find_array_copies);
+ NIR_PASS(_, nir, nir_opt_copy_prop_vars);
+ NIR_PASS(_, nir, nir_opt_dce);
+
+ NIR_PASS(_, nir, nir_split_var_copies);
+ NIR_PASS(_, nir, nir_lower_var_copies);
+
+ NIR_PASS(_, nir, nir_lower_mediump_vars, nir_var_function_temp);
+ NIR_PASS(_, nir, nir_opt_copy_prop_vars);
+ NIR_PASS(_, nir, nir_opt_combine_stores, nir_var_all);
+
+ /* Do some optimizations to clean up the shader now. By optimizing the
+ * functions in the library, we avoid having to re-do that work every
+ * time we inline a copy of a function. Reducing basic blocks also helps
+ * with compile times.
+ */
+ NIR_PASS(_, nir, nir_lower_vars_to_ssa);
+ NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
+ NIR_PASS(_, nir, nir_copy_prop);
+ NIR_PASS(_, nir, nir_opt_dce);
+ NIR_PASS(_, nir, nir_opt_cse);
+ NIR_PASS(_, nir, nir_opt_gcm, true);
+
+ nir_opt_peephole_select_options peephole_select_options = {};
+ peephole_select_options.limit = 1;
+ NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options);
+ NIR_PASS(_, nir, nir_opt_dce);
+
+ return nir;
+}
+
+static const uint32_t float32_spv[] = {
+#include "float32_spv.h"
+};
+
+void
+tu_init_softfloat32(struct tu_device *dev)
+{
+ dev->float32_shader = tu_spirv_to_nir_library(dev, float32_spv,
+ ARRAY_SIZE(float32_spv));
+}
+
+void
+tu_destroy_softfloat32(struct tu_device *dev)
+{
+ ralloc_free(dev->float32_shader);
+}
+
static void
lower_load_push_constant(struct tu_device *dev,
nir_builder *b,
diff --git a/src/freedreno/vulkan/tu_shader.h b/src/freedreno/vulkan/tu_shader.h
index 02b68471bb8..440c82277b8 100644
--- a/src/freedreno/vulkan/tu_shader.h
+++ b/src/freedreno/vulkan/tu_shader.h
@@ -132,6 +132,13 @@ struct tu_shader_key {
};
extern const struct vk_pipeline_cache_object_ops tu_shader_ops;
+
+void
+tu_init_softfloat32(struct tu_device *device);
+
+void
+tu_destroy_softfloat32(struct tu_device *device);
+
bool
tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, struct tu_device *dev);
diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf
index baa54a65668..58340cd4ac8 100644
--- a/src/util/00-mesa-defaults.conf
+++ b/src/util/00-mesa-defaults.conf
@@ -1338,6 +1338,9 @@ TODO: document the other workarounds.
-->
+
+
+
diff --git a/src/util/driconf.h b/src/util/driconf.h
index 8fce2fc9ef0..dc7ca01fa23 100644
--- a/src/util/driconf.h
+++ b/src/util/driconf.h
@@ -647,6 +647,10 @@
DRI_CONF_OPT_B(tu_ignore_frag_depth_direction, def, \
"Ignore direction specified for gl_FragDepth output")
+#define DRI_CONF_TU_ENABLE_SOFTFLOAT32(def) \
+ DRI_CONF_OPT_B(tu_enable_softfloat32, def, \
+ "Enable softfloat emulation for float32 denormals")
+
/**
* \brief Honeykrisp specific configuration options
*/