mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 04:20:08 +01:00
nir/algebraic: Elide range clamping of f2u sources
There are no shader-db changes on ELK platforms because those platforms don't support 8- or 16-bit integer types. v2: Restrict patterns generated such that the integer limits are exactly representable in the specified floating point format. With the exception of the value 0, this requires that float_sz > int_sz. This had no impact on shader-db or fossil-db on any Intel platform. Noticed by Georg. v3: Add a missing is_a_number. shader-db: All Intel platforms had similar results. (Lunar Lake shown) total cycles in shared programs: 889936056 -> 889934082 (<.01%) cycles in affected programs: 65806 -> 63832 (-3.00%) helped: 2 / HURT: 0 fossil-db: Lunar Lake Totals: Instrs: 233284796 -> 233282917 (-0.00%); split: -0.00%, +0.00% Cycle count: 32756399804 -> 32754972188 (-0.00%); split: -0.01%, +0.00% Spill count: 519861 -> 519813 (-0.01%) Fill count: 663650 -> 663626 (-0.00%); split: -0.01%, +0.01% Max live registers: 71738626 -> 71738696 (+0.00%) Non SSA regs after NIR: 67837902 -> 67837648 (-0.00%) Totals from 1236 (0.16% of 790723) affected shaders: Instrs: 2134504 -> 2132625 (-0.09%); split: -0.09%, +0.01% Cycle count: 604922278 -> 603494662 (-0.24%); split: -0.48%, +0.25% Spill count: 16509 -> 16461 (-0.29%) Fill count: 32760 -> 32736 (-0.07%); split: -0.22%, +0.15% Max live registers: 250112 -> 250182 (+0.03%) Non SSA regs after NIR: 302368 -> 302114 (-0.08%) Meteor Lake, DG2, and Tiger Lake had similar results. (Meteor Lake shown) Totals: Instrs: 264095370 -> 264094056 (-0.00%); split: -0.00%, +0.00% Cycle count: 26554146277 -> 26553027268 (-0.00%); split: -0.01%, +0.01% Spill count: 530603 -> 530615 (+0.00%) Fill count: 613231 -> 613273 (+0.01%) Max live registers: 46559041 -> 46559087 (+0.00%) Totals from 1237 (0.14% of 905547) affected shaders: Instrs: 2262517 -> 2261203 (-0.06%); split: -0.07%, +0.01% Cycle count: 518219799 -> 517100790 (-0.22%); split: -0.59%, +0.37% Spill count: 17518 -> 17530 (+0.07%) Fill count: 32273 -> 32315 (+0.13%) Max live registers: 128360 -> 128406 (+0.04%) Ice Lake and Skylake had similar results. (Ice Lake shown) Totals: Instrs: 269849640 -> 269848198 (-0.00%); split: -0.00%, +0.00% Cycle count: 26718329643 -> 26718289020 (-0.00%); split: -0.00%, +0.00% Max live registers: 46878430 -> 46878462 (+0.00%) Totals from 1233 (0.14% of 905427) affected shaders: Instrs: 2324225 -> 2322783 (-0.06%); split: -0.06%, +0.00% Cycle count: 531467501 -> 531426878 (-0.01%); split: -0.11%, +0.10% Max live registers: 130782 -> 130814 (+0.02%) Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37186>
This commit is contained in:
parent
073ffceef6
commit
c49d6e0480
1 changed files with 39 additions and 0 deletions
|
|
@ -3635,6 +3635,45 @@ late_optimizations = [
|
|||
(('udiv_aligned_4', a), ('ushr', a, 2)),
|
||||
]
|
||||
|
||||
for int_sz in (8, 16, 32):
|
||||
# Note: Python's float is 64-bit, so it should be able to exactly
|
||||
# represent these values for upto 32 bits.
|
||||
uintmax = float((1 << int_sz) - 1)
|
||||
intmax = float((1 << (int_sz - 1)) - 1)
|
||||
intmin = float(1 << (int_sz - 1))
|
||||
|
||||
# Don't generate patterns that try to emit saturating conversion from
|
||||
# 64-bit float to 8-bit integer. These are generally not supported by any
|
||||
# drivers.
|
||||
all_float_sizes = (16, 32, 64) if int_sz > 8 else (16, 32)
|
||||
|
||||
for float_sz in all_float_sizes:
|
||||
# The floating point type can only precisely represent the signed
|
||||
# integer minimum or maximum if it has enough mantissa and exponent
|
||||
# bits.
|
||||
if float_sz > int_sz:
|
||||
late_optimizations.extend([
|
||||
# This requires is_a_number because f2i_sat(NaN) is zero, but
|
||||
# fmax(intmin, NaN) is intmin.
|
||||
((f'f2i{int_sz}', ('fmax', f'a@{float_sz}(is_a_number)', intmin)), ('f2i{int_sz}_sat', a), 'options->has_f2i_sat'),
|
||||
|
||||
((f'f2i{int_sz}', ('fmin', f'a@{float_sz}(is_a_number)', intmax)), ('f2i{int_sz}_sat', a), 'options->has_f2i_sat'),
|
||||
((f'f2u{int_sz}', ('fmin', f'a@{float_sz}(is_a_number)', uintmax)), (f'f2u{int_sz}_sat', a), 'options->has_f2u_sat'),
|
||||
])
|
||||
|
||||
late_optimizations.extend([
|
||||
# This does not require is_a_number because both f2u_sat(NaN) and
|
||||
# fmax(NaN, 0) are zero.
|
||||
((f'f2u{int_sz}', ('fmax', f'a@{float_sz}', 0.0)), ('f2u{int_sz}_sat', a), 'options->has_f2u_sat'),
|
||||
|
||||
# f2i(NaN) and f2u(NaN) are zero.
|
||||
((f'f2i{int_sz}', ('bcsel', ('feq', f'a@{float_sz}', a), a, 0.0)), (f'f2i{int_sz}_sat', a), 'options->has_f2i_sat'),
|
||||
((f'f2u{int_sz}', ('bcsel', ('feq', f'a@{float_sz}', a), a, 0.0)), (f'f2u{int_sz}_sat', a), 'options->has_f2u_sat'),
|
||||
|
||||
((f'f2i{int_sz}', ('bcsel', ('fneu', f'a@{float_sz}', a), 0.0, a)), (f'f2i{int_sz}_sat', a), 'options->has_f2i_sat'),
|
||||
((f'f2u{int_sz}', ('bcsel', ('fneu', f'a@{float_sz}', a), 0.0, a)), (f'f2u{int_sz}_sat', a), 'options->has_f2u_sat'),
|
||||
])
|
||||
|
||||
# re-combine inexact mul+add to ffma. Do this before fsub so that a * b - c
|
||||
# gets combined to fma(a, b, -c).
|
||||
for sz, mulz in itertools.product([16, 32, 64], [False, True]):
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue