mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 06:48:06 +02:00
radeonsi: follow shader_info.float_controls_execution_mode (mostly)
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17864>
This commit is contained in:
parent
0482ff3158
commit
9e9cc62912
11 changed files with 58 additions and 20 deletions
|
|
@ -138,6 +138,6 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wav
|
|||
* - denormals break v_mad_f32
|
||||
* - GFX6 & GFX7 would be very slow
|
||||
*/
|
||||
conf->float_mode &= ~V_00B028_FP_ALL_DENORMS;
|
||||
conf->float_mode |= V_00B028_FP_64_DENORMS;
|
||||
conf->float_mode &= ~V_00B028_FP_32_DENORMS;
|
||||
conf->float_mode |= V_00B028_FP_16_64_DENORMS;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -295,9 +295,10 @@
|
|||
},
|
||||
"FLOAT_MODE": {
|
||||
"entries": [
|
||||
{"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
|
||||
{"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
|
||||
{"name": "FP_32_DENORMS", "value": 48},
|
||||
{"name": "FP_64_DENORMS", "value": 192},
|
||||
{"name": "FP_ALL_DENORMS", "value": 240}
|
||||
{"name": "FP_16_64_DENORMS", "value": 192}
|
||||
]
|
||||
},
|
||||
"ForceControl": {
|
||||
|
|
|
|||
|
|
@ -249,9 +249,10 @@
|
|||
},
|
||||
"FLOAT_MODE": {
|
||||
"entries": [
|
||||
{"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
|
||||
{"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
|
||||
{"name": "FP_32_DENORMS", "value": 48},
|
||||
{"name": "FP_64_DENORMS", "value": 192},
|
||||
{"name": "FP_ALL_DENORMS", "value": 240}
|
||||
{"name": "FP_16_64_DENORMS", "value": 192}
|
||||
]
|
||||
},
|
||||
"ForceControl": {
|
||||
|
|
|
|||
|
|
@ -229,9 +229,10 @@
|
|||
},
|
||||
"FLOAT_MODE": {
|
||||
"entries": [
|
||||
{"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
|
||||
{"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
|
||||
{"name": "FP_32_DENORMS", "value": 48},
|
||||
{"name": "FP_64_DENORMS", "value": 192},
|
||||
{"name": "FP_ALL_DENORMS", "value": 240}
|
||||
{"name": "FP_16_64_DENORMS", "value": 192}
|
||||
]
|
||||
},
|
||||
"ForceControl": {
|
||||
|
|
|
|||
|
|
@ -233,9 +233,10 @@
|
|||
},
|
||||
"FLOAT_MODE": {
|
||||
"entries": [
|
||||
{"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
|
||||
{"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
|
||||
{"name": "FP_32_DENORMS", "value": 48},
|
||||
{"name": "FP_64_DENORMS", "value": 192},
|
||||
{"name": "FP_ALL_DENORMS", "value": 240}
|
||||
{"name": "FP_16_64_DENORMS", "value": 192}
|
||||
]
|
||||
},
|
||||
"ForceControl": {
|
||||
|
|
|
|||
|
|
@ -249,9 +249,10 @@
|
|||
},
|
||||
"FLOAT_MODE": {
|
||||
"entries": [
|
||||
{"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
|
||||
{"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
|
||||
{"name": "FP_32_DENORMS", "value": 48},
|
||||
{"name": "FP_64_DENORMS", "value": 192},
|
||||
{"name": "FP_ALL_DENORMS", "value": 240}
|
||||
{"name": "FP_16_64_DENORMS", "value": 192}
|
||||
]
|
||||
},
|
||||
"ForceControl": {
|
||||
|
|
|
|||
|
|
@ -270,9 +270,10 @@
|
|||
},
|
||||
"FLOAT_MODE": {
|
||||
"entries": [
|
||||
{"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
|
||||
{"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
|
||||
{"name": "FP_32_DENORMS", "value": 48},
|
||||
{"name": "FP_64_DENORMS", "value": 192},
|
||||
{"name": "FP_ALL_DENORMS", "value": 240}
|
||||
{"name": "FP_16_64_DENORMS", "value": 192}
|
||||
]
|
||||
},
|
||||
"ForceControl": {
|
||||
|
|
|
|||
|
|
@ -277,9 +277,10 @@
|
|||
},
|
||||
"FLOAT_MODE": {
|
||||
"entries": [
|
||||
{"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
|
||||
{"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
|
||||
{"name": "FP_32_DENORMS", "value": 48},
|
||||
{"name": "FP_64_DENORMS", "value": 192},
|
||||
{"name": "FP_ALL_DENORMS", "value": 240}
|
||||
{"name": "FP_16_64_DENORMS", "value": 192}
|
||||
]
|
||||
},
|
||||
"ForceControl": {
|
||||
|
|
|
|||
|
|
@ -310,9 +310,10 @@
|
|||
},
|
||||
"FLOAT_MODE": {
|
||||
"entries": [
|
||||
{"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
|
||||
{"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
|
||||
{"name": "FP_32_DENORMS", "value": 48},
|
||||
{"name": "FP_64_DENORMS", "value": 192},
|
||||
{"name": "FP_ALL_DENORMS", "value": 240}
|
||||
{"name": "FP_16_64_DENORMS", "value": 192}
|
||||
]
|
||||
},
|
||||
"ForceControl": {
|
||||
|
|
|
|||
|
|
@ -418,9 +418,10 @@ VRSHtileEncoding = {
|
|||
missing_enums_all = {
|
||||
'FLOAT_MODE': {
|
||||
"entries": [
|
||||
{"name": "FP_32_ROUND_TOWARDS_ZERO", "value": 3},
|
||||
{"name": "FP_16_64_ROUND_TOWARDS_ZERO", "value": 12},
|
||||
{"name": "FP_32_DENORMS", "value": 48},
|
||||
{"name": "FP_64_DENORMS", "value": 192},
|
||||
{"name": "FP_ALL_DENORMS", "value": 240}
|
||||
{"name": "FP_16_64_DENORMS", "value": 192},
|
||||
]
|
||||
},
|
||||
'QUANT_MODE': {
|
||||
|
|
|
|||
|
|
@ -1839,6 +1839,33 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
|
|||
shader->info.uses_instanceid = sel->info.uses_instanceid;
|
||||
shader->info.private_mem_vgprs = DIV_ROUND_UP(nir->scratch_size, 4);
|
||||
|
||||
/* Set the FP ALU behavior. */
|
||||
/* By default, we disable denormals for FP32 and enable them for FP16 and FP64
|
||||
* for performance and correctness reasons. FP32 denormals can't be enabled because
|
||||
* they break output modifiers and v_mad_f32 and are very slow on GFX6-7.
|
||||
*
|
||||
* float_controls_execution_mode defines the set of valid behaviors. Contradicting flags
|
||||
* can be set simultaneously, which means we are allowed to choose, but not really because
|
||||
* some options cause GLCTS failures.
|
||||
*/
|
||||
unsigned float_mode = V_00B028_FP_16_64_DENORMS;
|
||||
|
||||
if (!(nir->info.float_controls_execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) &&
|
||||
nir->info.float_controls_execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32)
|
||||
float_mode |= V_00B028_FP_32_ROUND_TOWARDS_ZERO;
|
||||
|
||||
if (!(nir->info.float_controls_execution_mode & (FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16 |
|
||||
FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64)) &&
|
||||
nir->info.float_controls_execution_mode & (FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16 |
|
||||
FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64))
|
||||
float_mode |= V_00B028_FP_16_64_ROUND_TOWARDS_ZERO;
|
||||
|
||||
if (!(nir->info.float_controls_execution_mode & (FLOAT_CONTROLS_DENORM_PRESERVE_FP16 |
|
||||
FLOAT_CONTROLS_DENORM_PRESERVE_FP64)) &&
|
||||
nir->info.float_controls_execution_mode & (FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 |
|
||||
FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64))
|
||||
float_mode &= ~V_00B028_FP_16_64_DENORMS;
|
||||
|
||||
/* TODO: ACO could compile non-monolithic shaders here (starting
|
||||
* with PS and NGG VS), but monolithic shaders should be compiled
|
||||
* by LLVM due to more complicated compilation.
|
||||
|
|
@ -1846,6 +1873,8 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
|
|||
if (!si_llvm_compile_shader(sscreen, compiler, shader, &so, debug, nir, free_nir))
|
||||
return false;
|
||||
|
||||
shader->config.float_mode = float_mode;
|
||||
|
||||
/* The GS copy shader is compiled next. */
|
||||
if (sel->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
|
||||
shader->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, &so, debug);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue