diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index c0f466b7e4b..d316e805dd7 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -4571,6 +4571,10 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend) NIR_PASS(progress, nir, nir_opt_cse); } + /* This opt currently helps on Bifrost but not Valhall */ + if (gpu_id < 0x9000) + NIR_PASS(progress, nir, bifrost_nir_opt_boolean_bitwise); + NIR_PASS(progress, nir, nir_lower_alu_to_scalar, bi_scalarize_filter, NULL); NIR_PASS(progress, nir, nir_lower_phis_to_scalar, true); NIR_PASS(progress, nir, nir_opt_vectorize, bi_vectorize_filter, NULL); diff --git a/src/panfrost/bifrost/bifrost_nir.h b/src/panfrost/bifrost/bifrost_nir.h index b94ba6eaf38..ba0208012b8 100644 --- a/src/panfrost/bifrost/bifrost_nir.h +++ b/src/panfrost/bifrost/bifrost_nir.h @@ -27,3 +27,4 @@ bool bifrost_nir_lower_algebraic_late(nir_shader *shader); bool bifrost_nir_lower_xfb(nir_shader *shader); +bool bifrost_nir_opt_boolean_bitwise(nir_shader *shader); diff --git a/src/panfrost/bifrost/bifrost_nir_algebraic.py b/src/panfrost/bifrost/bifrost_nir_algebraic.py index 9a6085815fd..77fad35ff31 100644 --- a/src/panfrost/bifrost/bifrost_nir_algebraic.py +++ b/src/panfrost/bifrost/bifrost_nir_algebraic.py @@ -28,6 +28,20 @@ a = 'a' b = 'b' c = 'c' +# In general, bcsel is cheaper than bitwise arithmetic on Mali. On +# Bifrost, we can implement bcsel as either CSEL or MUX to schedule to either +# execution unit. On Valhall, bitwise arithmetic may be on the SFU whereas MUX +# is on the higher throughput CVT unit. We get a zero argument for free relative +# to the bitwise op, which would be LSHIFT_* internally taking a zero anyway. +# +# As such, it's beneficial to reexpress bitwise arithmetic of booleans as bcsel. +opt_bool_bitwise = [ + (('iand', 'a@1', 'b@1'), ('bcsel', a, b, False)), + (('ior', 'a@1', 'b@1'), ('bcsel', a, a, b)), + (('iand', 'a@1', ('inot', 'b@1')), ('bcsel', b, 0, a)), + (('ior', 'a@1', ('inot', 'b@1')), ('bcsel', b, a, True)), +] + algebraic_late = [ # Canonical form. The scheduler will convert back if it makes sense. (('fmul', a, 2.0), ('fadd', a, a)), @@ -69,6 +83,8 @@ def run(): print('#include "bifrost_nir.h"') + print(nir_algebraic.AlgebraicPass("bifrost_nir_opt_boolean_bitwise", + opt_bool_bitwise).render()) print(nir_algebraic.AlgebraicPass("bifrost_nir_lower_algebraic_late", algebraic_late).render())