mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 13:48:06 +02:00
nir/opt_algebraic: use imul24_relaxed for lowered dot4x8_add
Totals from 28 (0.04% of 72819) affected shaders: (Navi10) MaxWaves: 181 -> 186 (+2.76%) Instrs: 406735 -> 338360 (-16.81%) CodeSize: 2913588 -> 2469712 (-15.23%) VGPRs: 5520 -> 5468 (-0.94%) SpillVGPRs: 32 -> 0 (-inf%) LDS: 64512 -> 62464 (-3.17%) Scratch: 10240 -> 0 (-inf%) Latency: 11028252 -> 4357120 (-60.49%) InvThroughput: 11004126 -> 4079018 (-62.93%) VClause: 1686 -> 2055 (+21.89%); split: -0.89%, +22.78% SClause: 890 -> 852 (-4.27%) Copies: 4516 -> 2644 (-41.45%); split: -41.59%, +0.13% PreSGPRs: 982 -> 974 (-0.81%) PreVGPRs: 5356 -> 4284 (-20.01%) VALU: 370529 -> 330201 (-10.88%) SALU: 28850 -> 1170 (-95.94%) VMEM: 2616 -> 2560 (-2.14%) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41178>
This commit is contained in:
parent
fe067b17d9
commit
708093d830
1 changed files with 16 additions and 16 deletions
|
|
@ -410,22 +410,22 @@ for sz in (16, 32, 64):
|
|||
|
||||
# Shorthand for the expansion of just the dot product part of the [iu]dp4a
|
||||
# instructions.
|
||||
sdot_4x8_a_b = ('iadd', ('iadd', ('imul', ('extract_i8', a, 0), ('extract_i8', b, 0)),
|
||||
('imul', ('extract_i8', a, 1), ('extract_i8', b, 1))),
|
||||
('iadd', ('imul', ('extract_i8', a, 2), ('extract_i8', b, 2)),
|
||||
('imul', ('extract_i8', a, 3), ('extract_i8', b, 3))))
|
||||
udot_4x8_a_b = ('iadd', ('iadd', ('imul', ('extract_u8', a, 0), ('extract_u8', b, 0)),
|
||||
('imul', ('extract_u8', a, 1), ('extract_u8', b, 1))),
|
||||
('iadd', ('imul', ('extract_u8', a, 2), ('extract_u8', b, 2)),
|
||||
('imul', ('extract_u8', a, 3), ('extract_u8', b, 3))))
|
||||
sudot_4x8_a_b = ('iadd', ('iadd', ('imul', ('extract_i8', a, 0), ('extract_u8', b, 0)),
|
||||
('imul', ('extract_i8', a, 1), ('extract_u8', b, 1))),
|
||||
('iadd', ('imul', ('extract_i8', a, 2), ('extract_u8', b, 2)),
|
||||
('imul', ('extract_i8', a, 3), ('extract_u8', b, 3))))
|
||||
sdot_2x16_a_b = ('iadd', ('imul', ('extract_i16', a, 0), ('extract_i16', b, 0)),
|
||||
('imul', ('extract_i16', a, 1), ('extract_i16', b, 1)))
|
||||
udot_2x16_a_b = ('iadd', ('imul', ('extract_u16', a, 0), ('extract_u16', b, 0)),
|
||||
('imul', ('extract_u16', a, 1), ('extract_u16', b, 1)))
|
||||
sdot_4x8_a_b = ('iadd', ('iadd', ('imul24_relaxed', ('extract_i8', a, 0), ('extract_i8', b, 0)),
|
||||
('imul24_relaxed', ('extract_i8', a, 1), ('extract_i8', b, 1))),
|
||||
('iadd', ('imul24_relaxed', ('extract_i8', a, 2), ('extract_i8', b, 2)),
|
||||
('imul24_relaxed', ('extract_i8', a, 3), ('extract_i8', b, 3))))
|
||||
udot_4x8_a_b = ('iadd', ('iadd', ('umul24_relaxed', ('extract_u8', a, 0), ('extract_u8', b, 0)),
|
||||
('umul24_relaxed', ('extract_u8', a, 1), ('extract_u8', b, 1))),
|
||||
('iadd', ('umul24_relaxed', ('extract_u8', a, 2), ('extract_u8', b, 2)),
|
||||
('umul24_relaxed', ('extract_u8', a, 3), ('extract_u8', b, 3))))
|
||||
sudot_4x8_a_b = ('iadd', ('iadd', ('imul24_relaxed', ('extract_i8', a, 0), ('extract_u8', b, 0)),
|
||||
('imul24_relaxed', ('extract_i8', a, 1), ('extract_u8', b, 1))),
|
||||
('iadd', ('imul24_relaxed', ('extract_i8', a, 2), ('extract_u8', b, 2)),
|
||||
('imul24_relaxed', ('extract_i8', a, 3), ('extract_u8', b, 3))))
|
||||
sdot_2x16_a_b = ('iadd', ('imul24_relaxed', ('extract_i16', a, 0), ('extract_i16', b, 0)),
|
||||
('imul24_relaxed', ('extract_i16', a, 1), ('extract_i16', b, 1)))
|
||||
udot_2x16_a_b = ('iadd', ('umul24_relaxed', ('extract_u16', a, 0), ('extract_u16', b, 0)),
|
||||
('umul24_relaxed', ('extract_u16', a, 1), ('extract_u16', b, 1)))
|
||||
|
||||
optimizations.extend([
|
||||
(('sdot_4x8_iadd', a, b, c), ('iadd', sdot_4x8_a_b, c), '!options->has_sdot_4x8'),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue