diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 0e9240fff2f..bca31e57ea2 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -3954,6 +3954,14 @@ for op in ['ffma', 'ffmaz']: (('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, e, d)), (op, b, ('bcsel', a, c, e), d)), ] +# Lower fmulz using min(abs(a), abs(b))==0.0 +late_optimizations += [ + (('fmulz@32', a, b), + ('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), 0.0, ('fmul', a, b)), 'options->lower_fmulz_with_abs_min'), + (('ffmaz@32', a, b, c), + ('bcsel', ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0), c, ('ffma@32', a, b, c)), 'options->lower_fmulz_with_abs_min') +] + # mediump: If an opcode is surrounded by conversions, remove the conversions. # The rationale is that type conversions + the low precision opcode are more # expensive that the same arithmetic opcode at higher precision. diff --git a/src/compiler/nir/nir_shader_compiler_options.h b/src/compiler/nir/nir_shader_compiler_options.h index eef99759df2..f97df5ac67f 100644 --- a/src/compiler/nir/nir_shader_compiler_options.h +++ b/src/compiler/nir/nir_shader_compiler_options.h @@ -712,6 +712,11 @@ typedef struct nir_shader_compiler_options { uint8_t support_indirect_inputs; uint8_t support_indirect_outputs; + /** + * Lower fmulz to `min(abs(a), abs(b)) == 0.0 ? 0.0 : a * b`. + */ + bool lower_fmulz_with_abs_min; + /** store the variable offset into the instrinsic range_base instead * of adding it to the image index. */ diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index 51c5cb52698..22bc882114d 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -113,6 +113,12 @@ static const nir_shader_compiler_options ir3_base_options = { .lower_uniforms_to_ubo = true, .max_unroll_iterations = 32, + /* Not actually supported but we want fmulz to be produced and then be + * lowered with the abs min pattern since we have free abs on min. + */ + .has_fmulz = true, + .lower_fmulz_with_abs_min = true, + .lower_cs_local_index_to_id = true, .lower_wpos_pntc = true,