tu: Expose preserving fp32 denorms via softfloat32
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Microsoft required the ability to preserve fp32 denorms via a shader
flag in shader model 6.2, but Adreno does not support this. Instead
Qualcomm's DX12 driver uses soft floats. Implement something similar to
expose the equivalent Vulkan feature for vkd3d-proton. In practice no
apps should actually use this but it lets us go from SM6.0 to SM6.6 with
vkd3d-proton.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37608>
This commit is contained in:
Connor Abbott 2025-09-27 00:34:43 -04:00 committed by Marge Bot
parent d30ff374a1
commit b92f7c17da
11 changed files with 187 additions and 30 deletions

View file

@ -5,6 +5,9 @@ caselists = ["/deqp-vk/mustpass/vk-main.txt"]
renderer_check = "Turnip Adreno .* 618"
fraction = 3
tests_per_group = 10000
[deqp.env]
# Enable additional tests that test fp32 denorm preserve.
tu_enable_softfloat32 = "true"
# force-gmem testing
# Autotuner forces sysmem on most CTS tests

View file

@ -5,6 +5,9 @@ caselists = ["/deqp-vk/mustpass/vk-main.txt"]
timeout = 300
renderer_check = "Turnip Adreno .* 660"
tests_per_group = 10000
[deqp.env]
# Enable additional tests that test fp32 denorm preserve.
tu_enable_softfloat32 = "true"
# force-gmem testing
# Autotuner forces sysmem on most CTS tests

View file

@ -5,6 +5,9 @@ caselists = ["/deqp-vk/mustpass/vk-main.txt"]
renderer_check = "Turnip Adreno .* 660"
fraction = 3
tests_per_group = 10000
[deqp.env]
# Enable additional tests that test fp32 denorm preserve.
tu_enable_softfloat32 = "true"
# force-gmem testing
# Autotuner forces sysmem on most CTS tests

View file

@ -4,6 +4,9 @@ deqp = "/deqp-vk/external/vulkancts/modules/vulkan/deqp-vk"
caselists = ["/deqp-vk/mustpass/vk-main.txt"]
renderer_check = "Turnip Adreno .* 750"
tests_per_group = 10000
[deqp.env]
# Enable additional tests that test fp32 denorm preserve.
tu_enable_softfloat32 = "true"
# force-gmem testing
# Autotuner forces sysmem on most CTS tests

View file

@ -51,6 +51,17 @@ libtu_files = files(
'tu_util.cc',
)
libtu_files += custom_target(
'float32_spv.h',
input : float32_glsl_file,
output : 'float32_spv.h',
command : [
prog_glslang, '--no-link', '-V', '-S', 'comp', '-x', '-o', '@OUTPUT@', '@INPUT@',
glslang_quiet, glslang_depfile,
],
depfile : 'float32_spv.h.d',
)
subdir('bvh')
libtu_includes = [

View file

@ -909,7 +909,14 @@ tu_get_physical_device_properties_1_2(struct tu_physical_device *pdevice,
p->shaderSignedZeroInfNanPreserveFloat16 = true;
p->shaderDenormFlushToZeroFloat32 = true;
p->shaderDenormPreserveFloat32 = false;
/* FP32 denorm preserve has to be emulated via soft-float. Normal
* applications should not use this, and we don't want to advertize it and
* get people confused, but vkd3d-proton cannot emulate it itself so we
* have to allow it to use our emulation.
*/
p->shaderDenormPreserveFloat32 = pdevice->instance->enable_softfloat32;
p->shaderRoundingModeRTEFloat32 = true;
p->shaderRoundingModeRTZFloat32 = false;
p->shaderSignedZeroInfNanPreserveFloat32 = true;
@ -1774,6 +1781,7 @@ static const driOptionDescription tu_dri_options[] = {
DRI_CONF_TU_DISABLE_D24S8_BORDER_COLOR_WORKAROUND(false)
DRI_CONF_TU_USE_TEX_COORD_ROUND_NEAREST_EVEN_MODE(false)
DRI_CONF_TU_IGNORE_FRAG_DEPTH_DIRECTION(false)
DRI_CONF_TU_ENABLE_SOFTFLOAT32(false)
DRI_CONF_SECTION_END
};
@ -1800,6 +1808,8 @@ tu_init_dri_options(struct tu_instance *instance)
driQueryOptionb(&instance->dri_options, "tu_use_tex_coord_round_nearest_even_mode");
instance->ignore_frag_depth_direction =
driQueryOptionb(&instance->dri_options, "tu_ignore_frag_depth_direction");
instance->enable_softfloat32 =
driQueryOptionb(&instance->dri_options, "tu_enable_softfloat32");
}
static uint32_t instance_count = 0;
@ -2816,6 +2826,8 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
goto fail_compiler;
}
tu_init_softfloat32(device);
/* Initialize sparse array for refcounting imported BOs */
util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
@ -3079,6 +3091,7 @@ fail_global_bo:
fail_free_zombie_vma:
util_sparse_array_finish(&device->bo_map);
u_vector_finish(&device->zombie_vmas);
tu_destroy_softfloat32(device);
ir3_compiler_destroy(device->compiler);
fail_compiler:
vk_meta_device_finish(&device->vk, &device->meta);
@ -3133,6 +3146,8 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
vk_meta_device_finish(&device->vk, &device->meta);
tu_destroy_softfloat32(device);
ir3_compiler_destroy(device->compiler);
vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);

View file

@ -227,6 +227,11 @@ struct tu_instance
/* Apps may be accidentally incorrect */
bool ignore_frag_depth_direction;
/* D3D12 SM6.2 requires float32 denorm support which we have to emulate.
* However we don't want native Vulkan apps using this.
*/
bool enable_softfloat32;
};
VK_DEFINE_HANDLE_CASTS(tu_instance, vk.base, VkInstance,
VK_OBJECT_TYPE_INSTANCE)
@ -316,6 +321,8 @@ struct tu_device
struct vk_meta_device meta;
struct nir_shader *float32_shader;
radix_sort_vk_t *radix_sort;
mtx_t radix_sort_mutex;

View file

@ -35,6 +35,39 @@ init_ir3_nir_options(struct ir3_shader_nir_options *options,
};
}
static const struct spirv_to_nir_options tu_spirv_options = {
/* Use 16-bit math for RelaxedPrecision ALU ops */
.mediump_16bit_alu = true,
.ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
.ssbo_addr_format = nir_address_format_vec2_index_32bit_offset,
/* Accessed via stg/ldg */
.phys_ssbo_addr_format = nir_address_format_64bit_global,
/* Accessed via the const register file */
.push_const_addr_format = nir_address_format_logical,
/* Accessed via ldl/stl */
.shared_addr_format = nir_address_format_32bit_offset,
/* Accessed via stg/ldg (not used with Vulkan?) */
.global_addr_format = nir_address_format_64bit_global,
.min_ubo_alignment = 64,
.min_ssbo_alignment = 4,
};
static void
tu_nir_lower_softfloat32(struct tu_device *dev, nir_shader *nir)
{
NIR_PASS(_, nir, nir_lower_floats, dev->float32_shader);
/* Cleanup the result before linking to minimize shader size. */
struct ir3_optimize_options optimize_options = {};
ir3_optimize_loop(dev->compiler, &optimize_options, nir);
}
nir_shader *
tu_spirv_to_nir(struct tu_device *dev,
void *mem_ctx,
@ -43,38 +76,15 @@ tu_spirv_to_nir(struct tu_device *dev,
const struct tu_shader_key *key,
mesa_shader_stage stage)
{
/* TODO these are made-up */
const struct spirv_to_nir_options spirv_options = {
/* ViewID is a sysval in geometry stages and an input in the FS */
.view_index_is_input =
stage == MESA_SHADER_FRAGMENT &&
!key->lower_view_index_to_device_index,
/* Use 16-bit math for RelaxedPrecision ALU ops */
.mediump_16bit_alu = true,
.ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
.ssbo_addr_format = nir_address_format_vec2_index_32bit_offset,
/* Accessed via stg/ldg */
.phys_ssbo_addr_format = nir_address_format_64bit_global,
/* Accessed via the const register file */
.push_const_addr_format = nir_address_format_logical,
/* Accessed via ldl/stl */
.shared_addr_format = nir_address_format_32bit_offset,
/* Accessed via stg/ldg (not used with Vulkan?) */
.global_addr_format = nir_address_format_64bit_global,
.min_ubo_alignment = 64,
.min_ssbo_alignment = 4,
};
const nir_shader_compiler_options *nir_options =
ir3_get_compiler_options(dev->compiler);
spirv_to_nir_options spirv_options = tu_spirv_options;
/* ViewID is a sysval in geometry stages and an input in the FS */
spirv_options.view_index_is_input =
stage == MESA_SHADER_FRAGMENT &&
!key->lower_view_index_to_device_index;
nir_shader *nir;
VkResult result =
vk_pipeline_shader_stage_to_nir(&dev->vk, pipeline_flags, stage_info,
@ -144,9 +154,97 @@ tu_spirv_to_nir(struct tu_device *dev,
};
NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options);
if (nir_is_denorm_preserve(nir->info.float_controls_execution_mode, 32)) {
tu_nir_lower_softfloat32(dev, nir);
}
return nir;
}
static nir_shader *
tu_spirv_to_nir_library(struct tu_device *dev,
const uint32_t *words,
size_t word_count)
{
const nir_shader_compiler_options *nir_options =
ir3_get_compiler_options(dev->compiler);
spirv_to_nir_options spirv_options = tu_spirv_options;
spirv_options.create_library = true;
nir_shader *nir =
spirv_to_nir(words, word_count, NULL, 0, MESA_SHADER_COMPUTE,
"main", &spirv_options, nir_options);
NIR_PASS(_, nir, nir_lower_system_values);
/* We have to lower away local constant initializers right before we
* inline functions. That way they get properly initialized at the top
* of the function and not at the top of its caller.
*/
NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
NIR_PASS(_, nir, nir_lower_returns);
NIR_PASS(_, nir, nir_inline_functions);
nir_remove_non_exported(nir);
NIR_PASS(_, nir, nir_copy_prop);
NIR_PASS(_, nir, nir_opt_deref);
/* We can't deal with constant data, get rid of it */
nir_lower_constant_to_temp(nir);
/* We can go ahead and lower the rest of the constant initializers. We do
* this here so that nir_remove_dead_variables and split_per_member_structs
* below see the corresponding stores.
*/
NIR_PASS(_, nir, nir_lower_variable_initializers, (nir_variable_mode)~0);
NIR_PASS(_, nir, nir_opt_find_array_copies);
NIR_PASS(_, nir, nir_opt_copy_prop_vars);
NIR_PASS(_, nir, nir_opt_dce);
NIR_PASS(_, nir, nir_split_var_copies);
NIR_PASS(_, nir, nir_lower_var_copies);
NIR_PASS(_, nir, nir_lower_mediump_vars, nir_var_function_temp);
NIR_PASS(_, nir, nir_opt_copy_prop_vars);
NIR_PASS(_, nir, nir_opt_combine_stores, nir_var_all);
/* Do some optimizations to clean up the shader now. By optimizing the
* functions in the library, we avoid having to re-do that work every
* time we inline a copy of a function. Reducing basic blocks also helps
* with compile times.
*/
NIR_PASS(_, nir, nir_lower_vars_to_ssa);
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
NIR_PASS(_, nir, nir_copy_prop);
NIR_PASS(_, nir, nir_opt_dce);
NIR_PASS(_, nir, nir_opt_cse);
NIR_PASS(_, nir, nir_opt_gcm, true);
nir_opt_peephole_select_options peephole_select_options = {};
peephole_select_options.limit = 1;
NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options);
NIR_PASS(_, nir, nir_opt_dce);
return nir;
}
static const uint32_t float32_spv[] = {
#include "float32_spv.h"
};
void
tu_init_softfloat32(struct tu_device *dev)
{
dev->float32_shader = tu_spirv_to_nir_library(dev, float32_spv,
ARRAY_SIZE(float32_spv));
}
void
tu_destroy_softfloat32(struct tu_device *dev)
{
ralloc_free(dev->float32_shader);
}
static void
lower_load_push_constant(struct tu_device *dev,
nir_builder *b,

View file

@ -132,6 +132,13 @@ struct tu_shader_key {
};
extern const struct vk_pipeline_cache_object_ops tu_shader_ops;
void
tu_init_softfloat32(struct tu_device *device);
void
tu_destroy_softfloat32(struct tu_device *device);
bool
tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, struct tu_device *dev);

View file

@ -1338,6 +1338,9 @@ TODO: document the other workarounds.
-->
<option name="tu_use_tex_coord_round_nearest_even_mode" value="true" />
</engine>
<engine engine_name_match="vkd3d">
<option name="tu_enable_softfloat32" value="true" />
</engine>
<application name="Sons Of The Forest" executable="SonsOfTheForest.exe">
<option name="tu_ignore_frag_depth_direction" value="true" />
</application>

View file

@ -647,6 +647,10 @@
DRI_CONF_OPT_B(tu_ignore_frag_depth_direction, def, \
"Ignore direction specified for gl_FragDepth output")
#define DRI_CONF_TU_ENABLE_SOFTFLOAT32(def) \
DRI_CONF_OPT_B(tu_enable_softfloat32, def, \
"Enable softfloat emulation for float32 denormals")
/**
* \brief Honeykrisp specific configuration options
*/