lima/ppir: add support and folding for fclamp_pos

Now that ppir uses its own folding for dest modifiers, we can add folding for fclamp_pos which was previously unused. We borrow the nir optimization pass from Panfrost to create a nir_op_fclamp_pos_mali when possible and reuse the dest modifier lowering to optimize that to a modifier when possible. Signed-off-by: Erico Nunes <nunes.erico@gmail.com> Reviewed-by: Vasily Khoruzhick <anarsoul@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30332>
2026-05-08 02:38:04 +02:00 · 2024-08-03 16:17:27 +02:00 · 2024-08-03 16:17:27 +02:00 · 3e2b2c3bdd
commit 3e2b2c3bdd
parent 0286585337
7 changed files with 21 additions and 0 deletions
--- a/src/gallium/drivers/lima/ir/lima_ir.h
+++ b/src/gallium/drivers/lima/ir/lima_ir.h
@ -75,4 +75,6 @@ void lima_nir_duplicate_load_uniforms(nir_shader *shader);

 bool lima_nir_lower_txp(nir_shader *shader);

+bool lima_nir_ppir_algebraic_late(nir_shader *shader);
+
 #endif
--- a/src/gallium/drivers/lima/ir/lima_nir_algebraic.py
+++ b/src/gallium/drivers/lima/ir/lima_nir_algebraic.py
@ -37,6 +37,11 @@ lower_ftrunc = [
        (('ftrunc', 'a'), ('fmul', ('fsign', 'a'), ('ffloor', ('fmax', 'a', ('fneg', 'a')))))
 ]

+# PP fuse clamp_positive. Shared with Midgard/Bifrost
+ppir_algebraic_late = [
+    (('fmax', 'a', 0.0), ('fclamp_pos_mali', 'a')),
+]
+
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-p', '--import-path', required=True)
@ -54,6 +59,8 @@ def run():
                                      scale_trig).render())
    print(nir_algebraic.AlgebraicPass("lima_nir_lower_ftrunc",
                                      lower_ftrunc).render())
+    print(nir_algebraic.AlgebraicPass("lima_nir_ppir_algebraic_late",
+                                      ppir_algebraic_late).render())

 if __name__ == '__main__':
    main()
--- a/src/gallium/drivers/lima/ir/pp/lower.c
+++ b/src/gallium/drivers/lima/ir/pp/lower.c
@ -467,6 +467,11 @@ static bool ppir_lower_with_dest_mod(ppir_block *block, ppir_node *node, ppir_ou
   return true;
 }

+static bool ppir_lower_clamp_pos(ppir_block *block, ppir_node *node)
+{
+   return ppir_lower_with_dest_mod(block, node, ppir_outmod_clamp_positive);
+}
+
 static bool ppir_lower_sat(ppir_block *block, ppir_node *node)
 {
   return ppir_lower_with_dest_mod(block, node, ppir_outmod_clamp_fraction);
@ -675,6 +680,7 @@ static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
   [ppir_op_select] = ppir_lower_select,
   [ppir_op_trunc] = ppir_lower_trunc,
   [ppir_op_sat] = ppir_lower_sat,
+   [ppir_op_clamp_pos] = ppir_lower_clamp_pos,
   [ppir_op_branch] = ppir_lower_branch,
   [ppir_op_load_uniform] = ppir_lower_load,
   [ppir_op_load_temp] = ppir_lower_load,
--- a/src/gallium/drivers/lima/ir/pp/nir.c
+++ b/src/gallium/drivers/lima/ir/pp/nir.c
@ -153,6 +153,7 @@ static int nir_to_ppir_opcodes[nir_num_opcodes] = {
   [nir_op_fsat] = ppir_op_sat,
   [nir_op_fddx] = ppir_op_ddx,
   [nir_op_fddy] = ppir_op_ddy,
+   [nir_op_fclamp_pos_mali] = ppir_op_clamp_pos,
 };

 static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)
--- a/src/gallium/drivers/lima/ir/pp/node.c
+++ b/src/gallium/drivers/lima/ir/pp/node.c
@ -49,6 +49,9 @@ const ppir_op_info ppir_op_infos[] = {
   [ppir_op_sat] = {
      .name = "sat",
   },
+   [ppir_op_clamp_pos] = {
+      .name = "clamp_pos",
+   },
   [ppir_op_mul] = {
      .name = "mul",
      .slots = (int []) {
--- a/src/gallium/drivers/lima/ir/pp/ppir.h
+++ b/src/gallium/drivers/lima/ir/pp/ppir.h
@ -84,6 +84,7 @@ typedef enum {
   ppir_op_min,
   ppir_op_max,
   ppir_op_trunc,
+   ppir_op_clamp_pos,

   ppir_op_and,
   ppir_op_or,
--- a/src/gallium/drivers/lima/lima_program.c
+++ b/src/gallium/drivers/lima/lima_program.c
@ -262,6 +262,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,

   /* Must be run after optimization loop */
   NIR_PASS_V(s, lima_nir_scale_trig);
+   NIR_PASS_V(s, lima_nir_ppir_algebraic_late);

   NIR_PASS_V(s, nir_copy_prop);
   NIR_PASS_V(s, nir_opt_dce);