mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 05:08:08 +02:00
nir, ir3: Add lower_fmulz_with_abs_min backend option
This commits adds the `lower_fmulz_with_abs_min` which lowers `fmulz` -> `min(abs(a), abs(b)) == 0.0 ? 0.0 : a * b` `ffmaz` -> `min(abs(a), abs(b)) == 0.0 ? c : ffma(a, b, c) This is useful for ISAs which have `abs` for free on `min` such as ir3. Adreno A750 Benchmark of 10 runs of 5 DX9 single frame trimmed captures looped 2048 times using u_trace measuring `start_render_pass` to `end_render_pass` results: sysmem: -1.91156%, -2.21791%, -2.02533%, -2.21666%, -2.33272%, -2.67349%, -1.75278%, -2.05923%, -2.26892%, -2.10506% Avg: ~ -2.16% ST.S: ~ 0.25% gmem: -3.61496%, -3.66682%, -3.80901%, -3.51198%, -3.72950%, -3.71413%, -3.64467%, -3.67092%, -3.90640%, -3.83888% Avg: ~ -3.71% ST.S: ~ 0.12% Signed-off-by: Karmjit Mahil <karmjit.mahil@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31479>
This commit is contained in:
parent
8d19ffef0a
commit
9c6183604f
3 changed files with 19 additions and 0 deletions
|
|
@ -3954,6 +3954,14 @@ for op in ['ffma', 'ffmaz']:
|
|||
(('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, e, d)), (op, b, ('bcsel', a, c, e), d)),
|
||||
]
|
||||
|
||||
# Lower fmulz using min(abs(a), abs(b))==0.0
|
||||
late_optimizations += [
|
||||
(('fmulz@32', a, b),
|
||||
('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), 0.0, ('fmul', a, b)), 'options->lower_fmulz_with_abs_min'),
|
||||
(('ffmaz@32', a, b, c),
|
||||
('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), c, ('ffma@32', a, b, c)), 'options->lower_fmulz_with_abs_min')
|
||||
]
|
||||
|
||||
# mediump: If an opcode is surrounded by conversions, remove the conversions.
|
||||
# The rationale is that type conversions + the low precision opcode are more
|
||||
# expensive that the same arithmetic opcode at higher precision.
|
||||
|
|
|
|||
|
|
@ -712,6 +712,11 @@ typedef struct nir_shader_compiler_options {
|
|||
uint8_t support_indirect_inputs;
|
||||
uint8_t support_indirect_outputs;
|
||||
|
||||
/**
|
||||
* Lower fmulz to `min(abs(a), abs(b)) == 0.0 ? 0.0 : a * b`.
|
||||
*/
|
||||
bool lower_fmulz_with_abs_min;
|
||||
|
||||
/** store the variable offset into the instrinsic range_base instead
|
||||
* of adding it to the image index.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -113,6 +113,12 @@ static const nir_shader_compiler_options ir3_base_options = {
|
|||
.lower_uniforms_to_ubo = true,
|
||||
.max_unroll_iterations = 32,
|
||||
|
||||
/* Not actually supported but we want fmulz to be produced and then be
|
||||
* lowered with the abs min pattern since we have free abs on min.
|
||||
*/
|
||||
.has_fmulz = true,
|
||||
.lower_fmulz_with_abs_min = true,
|
||||
|
||||
.lower_cs_local_index_to_id = true,
|
||||
.lower_wpos_pntc = true,
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue