mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 22:08:26 +02:00
tu: Expose preserving fp32 denorms via softfloat32
Microsoft required the ability to preserve fp32 denorms via a shader flag in shader model 6.2, but Adreno does not support this. Instead Qualcomm's DX12 driver uses soft floats. Implement something similar to expose the equivalent Vulkan feature for vkd3d-proton. In practice no apps should actually use this but it lets us go from SM6.0 to SM6.6 with vkd3d-proton. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37608>
This commit is contained in:
parent
d30ff374a1
commit
b92f7c17da
11 changed files with 187 additions and 30 deletions
|
|
@ -5,6 +5,9 @@ caselists = ["/deqp-vk/mustpass/vk-main.txt"]
|
|||
renderer_check = "Turnip Adreno .* 618"
|
||||
fraction = 3
|
||||
tests_per_group = 10000
|
||||
[deqp.env]
|
||||
# Enable additional tests that test fp32 denorm preserve.
|
||||
tu_enable_softfloat32 = "true"
|
||||
|
||||
# force-gmem testing
|
||||
# Autotuner forces sysmem on most CTS tests
|
||||
|
|
|
|||
|
|
@ -5,6 +5,9 @@ caselists = ["/deqp-vk/mustpass/vk-main.txt"]
|
|||
timeout = 300
|
||||
renderer_check = "Turnip Adreno .* 660"
|
||||
tests_per_group = 10000
|
||||
[deqp.env]
|
||||
# Enable additional tests that test fp32 denorm preserve.
|
||||
tu_enable_softfloat32 = "true"
|
||||
|
||||
# force-gmem testing
|
||||
# Autotuner forces sysmem on most CTS tests
|
||||
|
|
|
|||
|
|
@ -5,6 +5,9 @@ caselists = ["/deqp-vk/mustpass/vk-main.txt"]
|
|||
renderer_check = "Turnip Adreno .* 660"
|
||||
fraction = 3
|
||||
tests_per_group = 10000
|
||||
[deqp.env]
|
||||
# Enable additional tests that test fp32 denorm preserve.
|
||||
tu_enable_softfloat32 = "true"
|
||||
|
||||
# force-gmem testing
|
||||
# Autotuner forces sysmem on most CTS tests
|
||||
|
|
|
|||
|
|
@ -4,6 +4,9 @@ deqp = "/deqp-vk/external/vulkancts/modules/vulkan/deqp-vk"
|
|||
caselists = ["/deqp-vk/mustpass/vk-main.txt"]
|
||||
renderer_check = "Turnip Adreno .* 750"
|
||||
tests_per_group = 10000
|
||||
[deqp.env]
|
||||
# Enable additional tests that test fp32 denorm preserve.
|
||||
tu_enable_softfloat32 = "true"
|
||||
|
||||
# force-gmem testing
|
||||
# Autotuner forces sysmem on most CTS tests
|
||||
|
|
|
|||
|
|
@ -51,6 +51,17 @@ libtu_files = files(
|
|||
'tu_util.cc',
|
||||
)
|
||||
|
||||
libtu_files += custom_target(
|
||||
'float32_spv.h',
|
||||
input : float32_glsl_file,
|
||||
output : 'float32_spv.h',
|
||||
command : [
|
||||
prog_glslang, '--no-link', '-V', '-S', 'comp', '-x', '-o', '@OUTPUT@', '@INPUT@',
|
||||
glslang_quiet, glslang_depfile,
|
||||
],
|
||||
depfile : 'float32_spv.h.d',
|
||||
)
|
||||
|
||||
subdir('bvh')
|
||||
|
||||
libtu_includes = [
|
||||
|
|
|
|||
|
|
@ -909,7 +909,14 @@ tu_get_physical_device_properties_1_2(struct tu_physical_device *pdevice,
|
|||
p->shaderSignedZeroInfNanPreserveFloat16 = true;
|
||||
|
||||
p->shaderDenormFlushToZeroFloat32 = true;
|
||||
p->shaderDenormPreserveFloat32 = false;
|
||||
|
||||
/* FP32 denorm preserve has to be emulated via soft-float. Normal
|
||||
* applications should not use this, and we don't want to advertize it and
|
||||
* get people confused, but vkd3d-proton cannot emulate it itself so we
|
||||
* have to allow it to use our emulation.
|
||||
*/
|
||||
p->shaderDenormPreserveFloat32 = pdevice->instance->enable_softfloat32;
|
||||
|
||||
p->shaderRoundingModeRTEFloat32 = true;
|
||||
p->shaderRoundingModeRTZFloat32 = false;
|
||||
p->shaderSignedZeroInfNanPreserveFloat32 = true;
|
||||
|
|
@ -1774,6 +1781,7 @@ static const driOptionDescription tu_dri_options[] = {
|
|||
DRI_CONF_TU_DISABLE_D24S8_BORDER_COLOR_WORKAROUND(false)
|
||||
DRI_CONF_TU_USE_TEX_COORD_ROUND_NEAREST_EVEN_MODE(false)
|
||||
DRI_CONF_TU_IGNORE_FRAG_DEPTH_DIRECTION(false)
|
||||
DRI_CONF_TU_ENABLE_SOFTFLOAT32(false)
|
||||
DRI_CONF_SECTION_END
|
||||
};
|
||||
|
||||
|
|
@ -1800,6 +1808,8 @@ tu_init_dri_options(struct tu_instance *instance)
|
|||
driQueryOptionb(&instance->dri_options, "tu_use_tex_coord_round_nearest_even_mode");
|
||||
instance->ignore_frag_depth_direction =
|
||||
driQueryOptionb(&instance->dri_options, "tu_ignore_frag_depth_direction");
|
||||
instance->enable_softfloat32 =
|
||||
driQueryOptionb(&instance->dri_options, "tu_enable_softfloat32");
|
||||
}
|
||||
|
||||
static uint32_t instance_count = 0;
|
||||
|
|
@ -2816,6 +2826,8 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
goto fail_compiler;
|
||||
}
|
||||
|
||||
tu_init_softfloat32(device);
|
||||
|
||||
/* Initialize sparse array for refcounting imported BOs */
|
||||
util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
|
||||
|
||||
|
|
@ -3079,6 +3091,7 @@ fail_global_bo:
|
|||
fail_free_zombie_vma:
|
||||
util_sparse_array_finish(&device->bo_map);
|
||||
u_vector_finish(&device->zombie_vmas);
|
||||
tu_destroy_softfloat32(device);
|
||||
ir3_compiler_destroy(device->compiler);
|
||||
fail_compiler:
|
||||
vk_meta_device_finish(&device->vk, &device->meta);
|
||||
|
|
@ -3133,6 +3146,8 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
|
|||
|
||||
vk_meta_device_finish(&device->vk, &device->meta);
|
||||
|
||||
tu_destroy_softfloat32(device);
|
||||
|
||||
ir3_compiler_destroy(device->compiler);
|
||||
|
||||
vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
|
||||
|
|
|
|||
|
|
@ -227,6 +227,11 @@ struct tu_instance
|
|||
|
||||
/* Apps may be accidentally incorrect */
|
||||
bool ignore_frag_depth_direction;
|
||||
|
||||
/* D3D12 SM6.2 requires float32 denorm support which we have to emulate.
|
||||
* However we don't want native Vulkan apps using this.
|
||||
*/
|
||||
bool enable_softfloat32;
|
||||
};
|
||||
VK_DEFINE_HANDLE_CASTS(tu_instance, vk.base, VkInstance,
|
||||
VK_OBJECT_TYPE_INSTANCE)
|
||||
|
|
@ -316,6 +321,8 @@ struct tu_device
|
|||
|
||||
struct vk_meta_device meta;
|
||||
|
||||
struct nir_shader *float32_shader;
|
||||
|
||||
radix_sort_vk_t *radix_sort;
|
||||
mtx_t radix_sort_mutex;
|
||||
|
||||
|
|
|
|||
|
|
@ -35,6 +35,39 @@ init_ir3_nir_options(struct ir3_shader_nir_options *options,
|
|||
};
|
||||
}
|
||||
|
||||
static const struct spirv_to_nir_options tu_spirv_options = {
|
||||
/* Use 16-bit math for RelaxedPrecision ALU ops */
|
||||
.mediump_16bit_alu = true,
|
||||
|
||||
.ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
|
||||
.ssbo_addr_format = nir_address_format_vec2_index_32bit_offset,
|
||||
|
||||
/* Accessed via stg/ldg */
|
||||
.phys_ssbo_addr_format = nir_address_format_64bit_global,
|
||||
|
||||
/* Accessed via the const register file */
|
||||
.push_const_addr_format = nir_address_format_logical,
|
||||
|
||||
/* Accessed via ldl/stl */
|
||||
.shared_addr_format = nir_address_format_32bit_offset,
|
||||
|
||||
/* Accessed via stg/ldg (not used with Vulkan?) */
|
||||
.global_addr_format = nir_address_format_64bit_global,
|
||||
|
||||
.min_ubo_alignment = 64,
|
||||
.min_ssbo_alignment = 4,
|
||||
};
|
||||
|
||||
static void
|
||||
tu_nir_lower_softfloat32(struct tu_device *dev, nir_shader *nir)
|
||||
{
|
||||
NIR_PASS(_, nir, nir_lower_floats, dev->float32_shader);
|
||||
|
||||
/* Cleanup the result before linking to minimize shader size. */
|
||||
struct ir3_optimize_options optimize_options = {};
|
||||
ir3_optimize_loop(dev->compiler, &optimize_options, nir);
|
||||
}
|
||||
|
||||
nir_shader *
|
||||
tu_spirv_to_nir(struct tu_device *dev,
|
||||
void *mem_ctx,
|
||||
|
|
@ -43,38 +76,15 @@ tu_spirv_to_nir(struct tu_device *dev,
|
|||
const struct tu_shader_key *key,
|
||||
mesa_shader_stage stage)
|
||||
{
|
||||
/* TODO these are made-up */
|
||||
const struct spirv_to_nir_options spirv_options = {
|
||||
/* ViewID is a sysval in geometry stages and an input in the FS */
|
||||
.view_index_is_input =
|
||||
stage == MESA_SHADER_FRAGMENT &&
|
||||
!key->lower_view_index_to_device_index,
|
||||
|
||||
/* Use 16-bit math for RelaxedPrecision ALU ops */
|
||||
.mediump_16bit_alu = true,
|
||||
|
||||
.ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
|
||||
.ssbo_addr_format = nir_address_format_vec2_index_32bit_offset,
|
||||
|
||||
/* Accessed via stg/ldg */
|
||||
.phys_ssbo_addr_format = nir_address_format_64bit_global,
|
||||
|
||||
/* Accessed via the const register file */
|
||||
.push_const_addr_format = nir_address_format_logical,
|
||||
|
||||
/* Accessed via ldl/stl */
|
||||
.shared_addr_format = nir_address_format_32bit_offset,
|
||||
|
||||
/* Accessed via stg/ldg (not used with Vulkan?) */
|
||||
.global_addr_format = nir_address_format_64bit_global,
|
||||
|
||||
.min_ubo_alignment = 64,
|
||||
.min_ssbo_alignment = 4,
|
||||
};
|
||||
|
||||
const nir_shader_compiler_options *nir_options =
|
||||
ir3_get_compiler_options(dev->compiler);
|
||||
|
||||
spirv_to_nir_options spirv_options = tu_spirv_options;
|
||||
/* ViewID is a sysval in geometry stages and an input in the FS */
|
||||
spirv_options.view_index_is_input =
|
||||
stage == MESA_SHADER_FRAGMENT &&
|
||||
!key->lower_view_index_to_device_index;
|
||||
|
||||
nir_shader *nir;
|
||||
VkResult result =
|
||||
vk_pipeline_shader_stage_to_nir(&dev->vk, pipeline_flags, stage_info,
|
||||
|
|
@ -144,9 +154,97 @@ tu_spirv_to_nir(struct tu_device *dev,
|
|||
};
|
||||
NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options);
|
||||
|
||||
if (nir_is_denorm_preserve(nir->info.float_controls_execution_mode, 32)) {
|
||||
tu_nir_lower_softfloat32(dev, nir);
|
||||
}
|
||||
|
||||
return nir;
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
tu_spirv_to_nir_library(struct tu_device *dev,
|
||||
const uint32_t *words,
|
||||
size_t word_count)
|
||||
{
|
||||
const nir_shader_compiler_options *nir_options =
|
||||
ir3_get_compiler_options(dev->compiler);
|
||||
spirv_to_nir_options spirv_options = tu_spirv_options;
|
||||
spirv_options.create_library = true;
|
||||
|
||||
nir_shader *nir =
|
||||
spirv_to_nir(words, word_count, NULL, 0, MESA_SHADER_COMPUTE,
|
||||
"main", &spirv_options, nir_options);
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_system_values);
|
||||
|
||||
/* We have to lower away local constant initializers right before we
|
||||
* inline functions. That way they get properly initialized at the top
|
||||
* of the function and not at the top of its caller.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
|
||||
NIR_PASS(_, nir, nir_lower_returns);
|
||||
NIR_PASS(_, nir, nir_inline_functions);
|
||||
nir_remove_non_exported(nir);
|
||||
NIR_PASS(_, nir, nir_copy_prop);
|
||||
NIR_PASS(_, nir, nir_opt_deref);
|
||||
|
||||
/* We can't deal with constant data, get rid of it */
|
||||
nir_lower_constant_to_temp(nir);
|
||||
|
||||
/* We can go ahead and lower the rest of the constant initializers. We do
|
||||
* this here so that nir_remove_dead_variables and split_per_member_structs
|
||||
* below see the corresponding stores.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_lower_variable_initializers, (nir_variable_mode)~0);
|
||||
|
||||
NIR_PASS(_, nir, nir_opt_find_array_copies);
|
||||
NIR_PASS(_, nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
|
||||
NIR_PASS(_, nir, nir_split_var_copies);
|
||||
NIR_PASS(_, nir, nir_lower_var_copies);
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_mediump_vars, nir_var_function_temp);
|
||||
NIR_PASS(_, nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(_, nir, nir_opt_combine_stores, nir_var_all);
|
||||
|
||||
/* Do some optimizations to clean up the shader now. By optimizing the
|
||||
* functions in the library, we avoid having to re-do that work every
|
||||
* time we inline a copy of a function. Reducing basic blocks also helps
|
||||
* with compile times.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
||||
NIR_PASS(_, nir, nir_copy_prop);
|
||||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
NIR_PASS(_, nir, nir_opt_cse);
|
||||
NIR_PASS(_, nir, nir_opt_gcm, true);
|
||||
|
||||
nir_opt_peephole_select_options peephole_select_options = {};
|
||||
peephole_select_options.limit = 1;
|
||||
NIR_PASS(_, nir, nir_opt_peephole_select, &peephole_select_options);
|
||||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
|
||||
return nir;
|
||||
}
|
||||
|
||||
static const uint32_t float32_spv[] = {
|
||||
#include "float32_spv.h"
|
||||
};
|
||||
|
||||
void
|
||||
tu_init_softfloat32(struct tu_device *dev)
|
||||
{
|
||||
dev->float32_shader = tu_spirv_to_nir_library(dev, float32_spv,
|
||||
ARRAY_SIZE(float32_spv));
|
||||
}
|
||||
|
||||
void
|
||||
tu_destroy_softfloat32(struct tu_device *dev)
|
||||
{
|
||||
ralloc_free(dev->float32_shader);
|
||||
}
|
||||
|
||||
static void
|
||||
lower_load_push_constant(struct tu_device *dev,
|
||||
nir_builder *b,
|
||||
|
|
|
|||
|
|
@ -132,6 +132,13 @@ struct tu_shader_key {
|
|||
};
|
||||
|
||||
extern const struct vk_pipeline_cache_object_ops tu_shader_ops;
|
||||
|
||||
void
|
||||
tu_init_softfloat32(struct tu_device *device);
|
||||
|
||||
void
|
||||
tu_destroy_softfloat32(struct tu_device *device);
|
||||
|
||||
bool
|
||||
tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, struct tu_device *dev);
|
||||
|
||||
|
|
|
|||
|
|
@ -1338,6 +1338,9 @@ TODO: document the other workarounds.
|
|||
-->
|
||||
<option name="tu_use_tex_coord_round_nearest_even_mode" value="true" />
|
||||
</engine>
|
||||
<engine engine_name_match="vkd3d">
|
||||
<option name="tu_enable_softfloat32" value="true" />
|
||||
</engine>
|
||||
<application name="Sons Of The Forest" executable="SonsOfTheForest.exe">
|
||||
<option name="tu_ignore_frag_depth_direction" value="true" />
|
||||
</application>
|
||||
|
|
|
|||
|
|
@ -647,6 +647,10 @@
|
|||
DRI_CONF_OPT_B(tu_ignore_frag_depth_direction, def, \
|
||||
"Ignore direction specified for gl_FragDepth output")
|
||||
|
||||
#define DRI_CONF_TU_ENABLE_SOFTFLOAT32(def) \
|
||||
DRI_CONF_OPT_B(tu_enable_softfloat32, def, \
|
||||
"Enable softfloat emulation for float32 denormals")
|
||||
|
||||
/**
|
||||
* \brief Honeykrisp specific configuration options
|
||||
*/
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue