mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 21:50:12 +01:00
intel/brw: Use range analysis to optimize fsign
shader-db: Meteor Lake, DG2, and Tiger Lake had similar results. (Meteor Lake shown) total instructions in shared programs: 19674784 -> 19665960 (-0.04%) instructions in affected programs: 933425 -> 924601 (-0.95%) helped: 3656 / HURT: 0 total cycles in shared programs: 810343919 -> 810241030 (-0.01%) cycles in affected programs: 56752034 -> 56649145 (-0.18%) helped: 3032 / HURT: 434 LOST: 11 GAINED: 0 Ice Lake and Skylake had similar results. (Ice Lake shown) total instructions in shared programs: 20315795 -> 20305856 (-0.05%) instructions in affected programs: 979698 -> 969759 (-1.01%) helped: 3845 / HURT: 0 total cycles in shared programs: 830600281 -> 830534694 (<.01%) cycles in affected programs: 45675615 -> 45610028 (-0.14%) helped: 3250 / HURT: 325 total spills in shared programs: 4583 -> 4565 (-0.39%) spills in affected programs: 180 -> 162 (-10.00%) helped: 3 / HURT: 0 total fills in shared programs: 5245 -> 5219 (-0.50%) fills in affected programs: 379 -> 353 (-6.86%) helped: 3 / HURT: 0 LOST: 14 GAINED: 8 fossil-db: All Intel platforms except Tiger Lake had similar results. (Meteor Lake shown) Totals: Instrs: 154024263 -> 154023814 (-0.00%) Cycle count: 17463341602 -> 17461726239 (-0.01%); split: -0.01%, +0.00% Totals from 322 (0.05% of 631440) affected shaders: Instrs: 199933 -> 199484 (-0.22%) Cycle count: 168492537 -> 166877174 (-0.96%); split: -0.96%, +0.00% Tiger Lake Instrs: 149984723 -> 149984287 (-0.00%) Cycle count: 15238596937 -> 15239260415 (+0.00%); split: -0.00%, +0.01% Max dispatch width: 5553408 -> 5553424 (+0.00%) Totals from 318 (0.05% of 631414) affected shaders: Instrs: 179624 -> 179188 (-0.24%) Cycle count: 160724533 -> 161388011 (+0.41%); split: -0.06%, +0.48% Max dispatch width: 3296 -> 3312 (+0.49%) Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29095>
This commit is contained in:
parent
e578657313
commit
97e3c6a12a
1 changed files with 16 additions and 0 deletions
|
|
@ -16,6 +16,16 @@ lower_fsign = [
|
|||
# is_finite.
|
||||
#
|
||||
# NOTE: fcsel opcodes are currently limited to float32 in NIR.
|
||||
(('fmul@32(is_only_used_as_float)', ('fsign(is_used_once)', 'a(is_not_negative)'), b), ('fcsel_gt', a , b , ('fmul', b, 0.0 ))),
|
||||
(('~fmul@32', ('fsign(is_used_once)', 'a(is_not_negative)'), b), ('fcsel_gt', a , b , 0.0 )),
|
||||
(('fmul@32(is_only_used_as_float)', ('fsign(is_used_once)', 'a(is_not_positive)'), b), ('fcsel_gt', ('fneg', a), ('fneg', b), ('fmul', b, 0x80000000))),
|
||||
(('~fmul@32', ('fsign(is_used_once)', 'a(is_not_positive)'), b), ('fcsel_gt', ('fneg', a), ('fneg', b), 0x80000000 )),
|
||||
|
||||
(('fmul@16(is_only_used_as_float)', ('fsign(is_used_once)', 'a(is_not_negative)'), b), ('bcsel', ('!flt', 0, a ), b , ('fmul', b, 0.0 ))),
|
||||
(('~fmul@16', ('fsign(is_used_once)', 'a(is_not_negative)'), b), ('bcsel', ('!flt', 0, a ), b , 0.0 )),
|
||||
(('fmul@16(is_only_used_as_float)', ('fsign(is_used_once)', 'a(is_not_positive)'), b), ('bcsel', ('!flt', 0, ('fneg', a)), ('fneg', b), ('fmul', b, 0x8000))),
|
||||
(('~fmul@16', ('fsign(is_used_once)', 'a(is_not_positive)'), b), ('bcsel', ('!flt', 0, ('fneg', a)), ('fneg', b), 0x8000 )),
|
||||
|
||||
(('fmul@32(is_only_used_as_float,nsz)', ('fsign(is_used_once)', a), 'b(is_finite)'), ('fcsel_gt', a, b, ('fcsel_gt', ('fneg', a), ('fneg', b), 0.0))),
|
||||
(('fmul@32(is_only_used_as_float,nsz,nnan)', ('fsign(is_used_once)', a), b ), ('fcsel_gt', a, b, ('fcsel_gt', ('fneg', a), ('fneg', b), 0.0))),
|
||||
(('~fmul@32', ('fsign(is_used_once)', a), b ), ('fcsel_gt', a, b, ('fcsel_gt', ('fneg', a), ('fneg', b), 0.0))),
|
||||
|
|
@ -25,6 +35,12 @@ lower_fsign = [
|
|||
# only slight deviation is that it can provide -0 for some NaN inputs.
|
||||
(('fsign@32', a), ('fcsel_gt', ('fabs', a) , ('ior', ('iand', a, 0x80000000), 0x3f800000), ('iand', a, 0x80000000))),
|
||||
(('fsign@16', a), ('bcsel', ('!flt', 0, ('fabs', a)), ('ior', ('iand', a, 0x8000 ), 0x3c00 ), ('iand', a, 0x8000 ))),
|
||||
|
||||
# The only effect a*0.0 should have is when 'a' is infinity, -0.0 or NaN
|
||||
(('fmul(nsz,nnan)', 'a', 0.0), 0.0),
|
||||
(('fmul(nsz)', 'a(is_finite)', 0.0), 0.0),
|
||||
(('fmul(nsz,nnan)', 'a@32', 0x80000000), 0.0),
|
||||
(('fmul(nsz,nnan)', 'a@16', 0x8000 ), 0.0),
|
||||
]
|
||||
|
||||
def main():
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue