mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-01 05:58:05 +02:00
nak: add algebraic patterns to improve MUFU.F16
Doesn't really help many shaders, but I've seen a couple that turn from MUFU into F2F(MUFU.F16(F2F)). Though this might be as well a limitation of related code, e.g. returning F32 from TEX, and not use TEX.F16 instead. Totals: CodeSize: 8662337424 -> 8662336960 (-0.00%) Static cycle count: 4718044491 -> 4718044554 (+0.00%); split: -0.00%, +0.00% Totals from 7 (0.00% of 1163204) affected shaders: CodeSize: 236480 -> 236016 (-0.20%) Static cycle count: 2108061 -> 2108124 (+0.00%); split: -0.01%, +0.01% Reviewed-by: Mel Henning <mhenning@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40392>
This commit is contained in:
parent
9cc2cd843b
commit
72e9f9a760
1 changed files with 35 additions and 0 deletions
|
|
@ -31,6 +31,7 @@ s = 's'
|
|||
|
||||
# common conditions to improve readability
|
||||
volta = 'nak->sm >= 70 && nak->sm < 73'
|
||||
fp16_round_is_rtz = 'nir_is_rounding_mode_rtz(info->float_controls_execution_mode, 16)'
|
||||
|
||||
algebraic_lowering = [
|
||||
# Volta doesn't have `IMNMX`
|
||||
|
|
@ -56,6 +57,40 @@ for f2f16 in ['f2f16', 'f2f16_rtz', 'f2f16_rtne']:
|
|||
(('vec2', (f2f16 + '(is_used_once)', 'a@32'), (f2f16 + '(is_used_once)', 'b@32')), (f2f16, ('vec2', a, b)), 'nak->sm >= 86')
|
||||
]
|
||||
|
||||
# If we find mufu surrounded by bit_size conversions, just do the op in the
|
||||
# original bit_size.
|
||||
# MUFU.F16 internally appears to operate with the same precision as F32 does
|
||||
# with the result being rounded towards zero to F16. EXP2 and RCP seem to be
|
||||
# off by one around Inf, so it's only safe if we can ignore inf for those.
|
||||
#
|
||||
# This was verified with the `hw_tests::test_op_mufu_f16_down`.
|
||||
|
||||
# mufu.f16 for those is identical to mufu.f32 with rtz rounding except for results around infinity
|
||||
for op in ['fexp2', 'frcp']:
|
||||
algebraic_lowering += [
|
||||
(('f2f16_rtz(ninf)', (op + '(is_used_once)', ('f2f32', 'a@16'))), (op, a), 'nak->sm >= 73'),
|
||||
(('f2f16(ninf)', (op + '(is_used_once)', ('f2f32', 'a@16'))),
|
||||
(op, a),
|
||||
'nak->sm >= 73 && ' + fp16_round_is_rtz),
|
||||
]
|
||||
|
||||
# mufu.f16 for those is identical to mufu.f32 with rtz rounding
|
||||
for op in ['fcos_normalized_2_pi', 'flog2', 'frsq', 'fsin_normalized_2_pi', 'fsqrt']:
|
||||
algebraic_lowering += [
|
||||
(('f2f16_rtz', (op + '(is_used_once)', ('f2f32', 'a@16'))), (op, a), 'nak->sm >= 73'),
|
||||
(('f2f16', (op + '(is_used_once)', ('f2f32', 'a@16'))),
|
||||
(op, a),
|
||||
'nak->sm >= 73 && ' + fp16_round_is_rtz),
|
||||
]
|
||||
|
||||
# If contract is on we can always remove the conversions
|
||||
for op in ['fcos_normalized_2_pi', 'fexp2', 'flog2', 'frcp', 'frsq', 'fsin_normalized_2_pi', 'fsqrt']:
|
||||
for f2f16 in ['f2f16_rtz', 'f2f16_rtne', 'f2f16']:
|
||||
algebraic_lowering += [
|
||||
((f2f16 + '(contract)', (op + '(is_used_once)', ('f2f32', 'a@16'))),
|
||||
(op, a), 'nak->sm >= 73'),
|
||||
(('f2f32(contract)', (op + '(is_used_once)', (f2f16, 'a@32'))), (op, a)),
|
||||
]
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue