mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 04:50:11 +01:00
nir/range-analysis: Adjust result range of multiplication to account for flush-to-zero
Fixes piglit tests (new in piglit!110):
- fs-underflow-fma-compare-zero.shader_test
- fs-underflow-mul-compare-zero.shader_test
v2: Add back part of comment accidentally deleted. Noticed by
Caio. Remove is_not_zero function as it is no longer used.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111308
Fixes: fa116ce357 ("nir/range-analysis: Range tracking for ffma and flrp")
Fixes: 405de7ccb6 ("nir/range-analysis: Rudimentary value range analysis pass")
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
All Gen7+ platforms** had similar results. (Ice Lake shown)
total instructions in shared programs: 16278465 -> 16279492 (<.01%)
instructions in affected programs: 16765 -> 17792 (6.13%)
helped: 0
HURT: 23
HURT stats (abs) min: 7 max: 275 x̄: 44.65 x̃: 8
HURT stats (rel) min: 1.15% max: 17.51% x̄: 4.23% x̃: 1.62%
95% mean confidence interval for instructions value: 9.57 79.74
95% mean confidence interval for instructions %-change: 1.85% 6.61%
Instructions are HURT.
total cycles in shared programs: 367135159 -> 367154270 (<.01%)
cycles in affected programs: 279306 -> 298417 (6.84%)
helped: 0
HURT: 23
HURT stats (abs) min: 13 max: 6029 x̄: 830.91 x̃: 54
HURT stats (rel) min: 0.17% max: 45.67% x̄: 7.33% x̃: 0.49%
95% mean confidence interval for cycles value: 100.89 1560.94
95% mean confidence interval for cycles %-change: 0.94% 13.71%
Cycles are HURT.
total spills in shared programs: 8870 -> 8869 (-0.01%)
spills in affected programs: 19 -> 18 (-5.26%)
helped: 1
HURT: 0
total fills in shared programs: 21904 -> 21901 (-0.01%)
fills in affected programs: 81 -> 78 (-3.70%)
helped: 1
HURT: 0
LOST: 0
GAINED: 1
** On Broadwell, a shader was hurt for spills / fills instead of
helped.
No changes on any earlier platforms.
This commit is contained in:
parent
33ad2bab4b
commit
ef2e235252
1 changed files with 22 additions and 31 deletions
|
|
@ -31,12 +31,6 @@
|
||||||
* the result.
|
* the result.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static bool
|
|
||||||
is_not_zero(enum ssa_ranges r)
|
|
||||||
{
|
|
||||||
return r == gt_zero || r == lt_zero || r == ne_zero;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void *
|
static void *
|
||||||
pack_data(const struct ssa_result_range r)
|
pack_data(const struct ssa_result_range r)
|
||||||
{
|
{
|
||||||
|
|
@ -271,7 +265,11 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
|
||||||
ASSERT_TABLE_IS_COMMUTATIVE(fadd_table);
|
ASSERT_TABLE_IS_COMMUTATIVE(fadd_table);
|
||||||
ASSERT_TABLE_IS_DIAGONAL(fadd_table);
|
ASSERT_TABLE_IS_DIAGONAL(fadd_table);
|
||||||
|
|
||||||
/* ge_zero: ge_zero * ge_zero
|
/* Due to flush-to-zero semanatics of floating-point numbers with very
|
||||||
|
* small mangnitudes, we can never really be sure a result will be
|
||||||
|
* non-zero.
|
||||||
|
*
|
||||||
|
* ge_zero: ge_zero * ge_zero
|
||||||
* | ge_zero * gt_zero
|
* | ge_zero * gt_zero
|
||||||
* | ge_zero * eq_zero
|
* | ge_zero * eq_zero
|
||||||
* | le_zero * lt_zero
|
* | le_zero * lt_zero
|
||||||
|
|
@ -280,9 +278,7 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
|
||||||
* | gt_zero * ge_zero # Multiplication is commutative
|
* | gt_zero * ge_zero # Multiplication is commutative
|
||||||
* | eq_zero * ge_zero # Multiplication is commutative
|
* | eq_zero * ge_zero # Multiplication is commutative
|
||||||
* | a * a # Left source == right source
|
* | a * a # Left source == right source
|
||||||
* ;
|
* | gt_zero * gt_zero
|
||||||
*
|
|
||||||
* gt_zero: gt_zero * gt_zero
|
|
||||||
* | lt_zero * lt_zero
|
* | lt_zero * lt_zero
|
||||||
* ;
|
* ;
|
||||||
*
|
*
|
||||||
|
|
@ -291,19 +287,10 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
|
||||||
* | lt_zero * ge_zero # Multiplication is commutative
|
* | lt_zero * ge_zero # Multiplication is commutative
|
||||||
* | le_zero * ge_zero # Multiplication is commutative
|
* | le_zero * ge_zero # Multiplication is commutative
|
||||||
* | le_zero * gt_zero
|
* | le_zero * gt_zero
|
||||||
* ;
|
* | lt_zero * gt_zero
|
||||||
*
|
|
||||||
* lt_zero: lt_zero * gt_zero
|
|
||||||
* | gt_zero * lt_zero # Multiplication is commutative
|
* | gt_zero * lt_zero # Multiplication is commutative
|
||||||
* ;
|
* ;
|
||||||
*
|
*
|
||||||
* ne_zero: ne_zero * gt_zero
|
|
||||||
* | ne_zero * lt_zero
|
|
||||||
* | gt_zero * ne_zero # Multiplication is commutative
|
|
||||||
* | lt_zero * ne_zero # Multiplication is commutative
|
|
||||||
* | ne_zero * ne_zero
|
|
||||||
* ;
|
|
||||||
*
|
|
||||||
* eq_zero: eq_zero * <any>
|
* eq_zero: eq_zero * <any>
|
||||||
* <any> * eq_zero # Multiplication is commutative
|
* <any> * eq_zero # Multiplication is commutative
|
||||||
*
|
*
|
||||||
|
|
@ -312,11 +299,11 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
|
||||||
static const enum ssa_ranges fmul_table[last_range + 1][last_range + 1] = {
|
static const enum ssa_ranges fmul_table[last_range + 1][last_range + 1] = {
|
||||||
/* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
|
/* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
|
||||||
/* unknown */ { _______, _______, _______, _______, _______, _______, eq_zero },
|
/* unknown */ { _______, _______, _______, _______, _______, _______, eq_zero },
|
||||||
/* lt_zero */ { _______, gt_zero, ge_zero, lt_zero, le_zero, ne_zero, eq_zero },
|
/* lt_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
|
||||||
/* le_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
|
/* le_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
|
||||||
/* gt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
|
/* gt_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
|
||||||
/* ge_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
|
/* ge_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
|
||||||
/* ne_zero */ { _______, ne_zero, _______, ne_zero, _______, ne_zero, eq_zero },
|
/* ne_zero */ { _______, _______, _______, _______, _______, _______, eq_zero },
|
||||||
/* eq_zero */ { eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero }
|
/* eq_zero */ { eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -601,11 +588,13 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
|
||||||
|
|
||||||
/* x * x => ge_zero */
|
/* x * x => ge_zero */
|
||||||
if (left.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
|
if (left.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
|
||||||
/* x * x => ge_zero or gt_zero depending on the range of x. */
|
/* Even if x > 0, the result of x*x can be zero when x is, for
|
||||||
r.range = is_not_zero(left.range) ? gt_zero : ge_zero;
|
* example, a subnormal number.
|
||||||
|
*/
|
||||||
|
r.range = ge_zero;
|
||||||
} else if (left.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
|
} else if (left.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
|
||||||
/* -x * x => le_zero or lt_zero depending on the range of x. */
|
/* -x * x => le_zero. */
|
||||||
r.range = is_not_zero(left.range) ? lt_zero : le_zero;
|
r.range = le_zero;
|
||||||
} else
|
} else
|
||||||
r.range = fmul_table[left.range][right.range];
|
r.range = fmul_table[left.range][right.range];
|
||||||
|
|
||||||
|
|
@ -732,11 +721,13 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
|
||||||
enum ssa_ranges fmul_range;
|
enum ssa_ranges fmul_range;
|
||||||
|
|
||||||
if (first.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
|
if (first.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
|
||||||
/* x * x => ge_zero or gt_zero depending on the range of x. */
|
/* See handling of nir_op_fmul for explanation of why ge_zero is the
|
||||||
fmul_range = is_not_zero(first.range) ? gt_zero : ge_zero;
|
* range.
|
||||||
|
*/
|
||||||
|
fmul_range = ge_zero;
|
||||||
} else if (first.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
|
} else if (first.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
|
||||||
/* -x * x => le_zero or lt_zero depending on the range of x. */
|
/* -x * x => le_zero */
|
||||||
fmul_range = is_not_zero(first.range) ? lt_zero : le_zero;
|
fmul_range = le_zero;
|
||||||
} else
|
} else
|
||||||
fmul_range = fmul_table[first.range][second.range];
|
fmul_range = fmul_table[first.range][second.range];
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue