mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-04 00:08:16 +02:00
nir/algebraic: optimize ishl(iadd(ishl, ishl))
This reduces arithmetic for cooperative matrix loads: v_mbcnt_lo_u32_b32 v0, -1, 0 v_and_b32_e32 v1, 15, v0 v_lshrrev_b32_e32 v0, 4, v0 v_lshlrev_b32_e32 v1, 4, v1 v_lshl_add_u32 v0, v0, 3, v1 v_lshlrev_b32_e32 v0, 1, v0 -> v_mbcnt_lo_u32_b32 v0, -1, 0 v_and_b32_e32 v1, -16, v0 v_and_b32_e32 v0, 15, v0 v_lshl_add_u32 v0, v0, 5, v1 fossil-db (gfx1201): Totals from 38 (0.02% of 208640) affected shaders: Instrs: 42234 -> 42181 (-0.13%) CodeSize: 232656 -> 232384 (-0.12%) Latency: 128807 -> 128759 (-0.04%) InvThroughput: 20860 -> 20850 (-0.05%) VALU: 23035 -> 23013 (-0.10%) SALU: 4790 -> 4784 (-0.13%) fossil-db (gfx1201, dEQP-VK.compute.pipeline.cooperative_matrix.*): Totals from 44 (2.71% of 1623) affected shaders: Instrs: 46834 -> 46802 (-0.07%) CodeSize: 287536 -> 287272 (-0.09%) Latency: 100960 -> 100918 (-0.04%); split: -0.10%, +0.06% InvThroughput: 21808 -> 21796 (-0.06%) VALU: 19336 -> 19328 (-0.04%) SALU: 10790 -> 10782 (-0.07%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41653>
This commit is contained in:
parent
b1c40839f2
commit
c3db34a525
1 changed files with 8 additions and 3 deletions
|
|
@ -605,19 +605,24 @@ for s in [8, 16, 32, 64]:
|
|||
mask = s - 1
|
||||
|
||||
ishl = "ishl@{}".format(s)
|
||||
ishl_once = "ishl@{}(is_used_once)".format(s)
|
||||
ishr = "ishr@{}".format(s)
|
||||
ushr = "ushr@{}".format(s)
|
||||
|
||||
in_bounds = ('ult', ('iadd', ('iand', b, mask), ('iand', c, mask)), s)
|
||||
in_bounds = lambda x, y: ('ult', ('iadd', ('iand', x, mask), ('iand', y, mask)), s)
|
||||
|
||||
optimizations.extend([
|
||||
((ishl, (ishl, a, '#b'), '#c'), ('bcsel', in_bounds, (ishl, a, ('iadd', b, c)), 0)),
|
||||
((ushr, (ushr, a, '#b'), '#c'), ('bcsel', in_bounds, (ushr, a, ('iadd', b, c)), 0)),
|
||||
((ishl, (ishl, a, '#b'), '#c'), ('bcsel', in_bounds(b, c), (ishl, a, ('iadd', b, c)), 0)),
|
||||
((ushr, (ushr, a, '#b'), '#c'), ('bcsel', in_bounds(b, c), (ushr, a, ('iadd', b, c)), 0)),
|
||||
|
||||
# To get get -1 for large shifts of negative values, ishr must instead
|
||||
# clamp the shift count to the maximum value.
|
||||
((ishr, (ishr, a, '#b'), '#c'),
|
||||
(ishr, a, ('imin', ('iadd', ('iand', b, mask), ('iand', c, mask)), s - 1))),
|
||||
|
||||
((ishl, ('iadd(is_used_once)', (ishl_once, a, '#b'), (ishl_once, c, '#d')), '#e'),
|
||||
('iadd', ('bcsel', in_bounds(b, e), ('ishl', a, ('iand', ('iadd', b, e), mask)), 0),
|
||||
('bcsel', in_bounds(d, e), ('ishl', c, ('iand', ('iadd', d, e), mask)), 0))),
|
||||
])
|
||||
|
||||
# Optimize a pattern of address calculation created by DXVK where the offset is
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue