diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index ec841c43536..cb692846104 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3392,6 +3392,9 @@ typedef struct nir_shader_compiler_options { * to imul with masked inputs and iadd */ bool has_umad24; + /* Backend supports fused comapre against zero and csel */ + bool has_fused_comp_and_csel; + /** Backend supports fsub, if not set fsub will automatically be lowered to * fadd(x, fneg(y)). If true, driver should call nir_opt_algebraic_late(). */ bool has_fsub; diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 012c7bfea09..e9514e1bce2 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -1015,6 +1015,12 @@ opcode("b16csel", 0, tuint, [0, 0, 0], opcode("b32csel", 0, tuint, [0, 0, 0], [tbool32, tuint, tuint], False, "", "src0 ? src1 : src2") +triop("i32csel_gt", tint32, "", "(src0 > 0.0f) ? src1 : src2") +triop("i32csel_ge", tint32, "", "(src0 >= 0.0f) ? src1 : src2") + +triop("fcsel_gt", tfloat32, "", "(src0 > 0.0f) ? src1 : src2") +triop("fcsel_ge", tfloat32, "", "(src0 >= 0.0f) ? src1 : src2") + # SM5 bfi assembly triop("bfi", tuint32, "", """ unsigned mask = src0, insert = src1, base = src2; diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 32d1dab1743..00fcbe81d71 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1662,6 +1662,22 @@ optimizations.extend([ (('imul24', a, '#b@32(is_pos_power_of_two)'), ('ishl', a, ('find_lsb', b)), '!options->lower_bitops'), (('imul24', a, '#b@32(is_neg_power_of_two)'), ('ineg', ('ishl', a, ('find_lsb', ('iabs', b)))), '!options->lower_bitops'), (('imul24', a, 0), (0)), + + (('fcsel', ('slt', 0, a), b, c), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"), + (('fcsel', ('slt', a, 0), b, c), ('fcsel_ge', a, c, b), "options->has_fused_comp_and_csel"), + (('fcsel', ('sge', a, 0), b, c), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"), + (('fcsel', ('sge', 0, a), b, c), ('fcsel_gt', a, c, b), "options->has_fused_comp_and_csel"), + + (('bcsel', ('ilt', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, b, c), "options->has_fused_comp_and_csel"), + (('bcsel', ('ilt', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, c, b), "options->has_fused_comp_and_csel"), + (('bcsel', ('ige', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, b, c), "options->has_fused_comp_and_csel"), + (('bcsel', ('ige', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, c, b), "options->has_fused_comp_and_csel"), + + (('bcsel', ('flt', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"), + (('bcsel', ('flt', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_ge', a, c, b), "options->has_fused_comp_and_csel"), + (('bcsel', ('fge', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"), + (('bcsel', ('fge', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_gt', a, c, b), "options->has_fused_comp_and_csel"), + ]) # bit_size dependent lowerings