From 3e2b2c3bdd99469a6b328882b5516a196d2642bb Mon Sep 17 00:00:00 2001 From: Erico Nunes Date: Sat, 3 Aug 2024 16:17:27 +0200 Subject: [PATCH] lima/ppir: add support and folding for fclamp_pos Now that ppir uses its own folding for dest modifiers, we can add folding for fclamp_pos which was previously unused. We borrow the nir optimization pass from Panfrost to create a nir_op_fclamp_pos_mali when possible and reuse the dest modifier lowering to optimize that to a modifier when possible. Signed-off-by: Erico Nunes Reviewed-by: Vasily Khoruzhick Part-of: --- src/gallium/drivers/lima/ir/lima_ir.h | 2 ++ src/gallium/drivers/lima/ir/lima_nir_algebraic.py | 7 +++++++ src/gallium/drivers/lima/ir/pp/lower.c | 6 ++++++ src/gallium/drivers/lima/ir/pp/nir.c | 1 + src/gallium/drivers/lima/ir/pp/node.c | 3 +++ src/gallium/drivers/lima/ir/pp/ppir.h | 1 + src/gallium/drivers/lima/lima_program.c | 1 + 7 files changed, 21 insertions(+) diff --git a/src/gallium/drivers/lima/ir/lima_ir.h b/src/gallium/drivers/lima/ir/lima_ir.h index 706804fb8d5..eef815c2567 100644 --- a/src/gallium/drivers/lima/ir/lima_ir.h +++ b/src/gallium/drivers/lima/ir/lima_ir.h @@ -75,4 +75,6 @@ void lima_nir_duplicate_load_uniforms(nir_shader *shader); bool lima_nir_lower_txp(nir_shader *shader); +bool lima_nir_ppir_algebraic_late(nir_shader *shader); + #endif diff --git a/src/gallium/drivers/lima/ir/lima_nir_algebraic.py b/src/gallium/drivers/lima/ir/lima_nir_algebraic.py index db0ccc75bf5..8719b44d553 100644 --- a/src/gallium/drivers/lima/ir/lima_nir_algebraic.py +++ b/src/gallium/drivers/lima/ir/lima_nir_algebraic.py @@ -37,6 +37,11 @@ lower_ftrunc = [ (('ftrunc', 'a'), ('fmul', ('fsign', 'a'), ('ffloor', ('fmax', 'a', ('fneg', 'a'))))) ] +# PP fuse clamp_positive. Shared with Midgard/Bifrost +ppir_algebraic_late = [ + (('fmax', 'a', 0.0), ('fclamp_pos_mali', 'a')), +] + def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', '--import-path', required=True) @@ -54,6 +59,8 @@ def run(): scale_trig).render()) print(nir_algebraic.AlgebraicPass("lima_nir_lower_ftrunc", lower_ftrunc).render()) + print(nir_algebraic.AlgebraicPass("lima_nir_ppir_algebraic_late", + ppir_algebraic_late).render()) if __name__ == '__main__': main() diff --git a/src/gallium/drivers/lima/ir/pp/lower.c b/src/gallium/drivers/lima/ir/pp/lower.c index e344f53ce56..b2ffc7bec04 100644 --- a/src/gallium/drivers/lima/ir/pp/lower.c +++ b/src/gallium/drivers/lima/ir/pp/lower.c @@ -467,6 +467,11 @@ static bool ppir_lower_with_dest_mod(ppir_block *block, ppir_node *node, ppir_ou return true; } +static bool ppir_lower_clamp_pos(ppir_block *block, ppir_node *node) +{ + return ppir_lower_with_dest_mod(block, node, ppir_outmod_clamp_positive); +} + static bool ppir_lower_sat(ppir_block *block, ppir_node *node) { return ppir_lower_with_dest_mod(block, node, ppir_outmod_clamp_fraction); @@ -675,6 +680,7 @@ static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = { [ppir_op_select] = ppir_lower_select, [ppir_op_trunc] = ppir_lower_trunc, [ppir_op_sat] = ppir_lower_sat, + [ppir_op_clamp_pos] = ppir_lower_clamp_pos, [ppir_op_branch] = ppir_lower_branch, [ppir_op_load_uniform] = ppir_lower_load, [ppir_op_load_temp] = ppir_lower_load, diff --git a/src/gallium/drivers/lima/ir/pp/nir.c b/src/gallium/drivers/lima/ir/pp/nir.c index 6d61ab66790..f1b8ad61481 100644 --- a/src/gallium/drivers/lima/ir/pp/nir.c +++ b/src/gallium/drivers/lima/ir/pp/nir.c @@ -153,6 +153,7 @@ static int nir_to_ppir_opcodes[nir_num_opcodes] = { [nir_op_fsat] = ppir_op_sat, [nir_op_fddx] = ppir_op_ddx, [nir_op_fddy] = ppir_op_ddy, + [nir_op_fclamp_pos_mali] = ppir_op_clamp_pos, }; static bool ppir_emit_alu(ppir_block *block, nir_instr *ni) diff --git a/src/gallium/drivers/lima/ir/pp/node.c b/src/gallium/drivers/lima/ir/pp/node.c index e22a06ce5ee..abf10dd32d9 100644 --- a/src/gallium/drivers/lima/ir/pp/node.c +++ b/src/gallium/drivers/lima/ir/pp/node.c @@ -49,6 +49,9 @@ const ppir_op_info ppir_op_infos[] = { [ppir_op_sat] = { .name = "sat", }, + [ppir_op_clamp_pos] = { + .name = "clamp_pos", + }, [ppir_op_mul] = { .name = "mul", .slots = (int []) { diff --git a/src/gallium/drivers/lima/ir/pp/ppir.h b/src/gallium/drivers/lima/ir/pp/ppir.h index f9191a1c5d3..3c45d27898a 100644 --- a/src/gallium/drivers/lima/ir/pp/ppir.h +++ b/src/gallium/drivers/lima/ir/pp/ppir.h @@ -84,6 +84,7 @@ typedef enum { ppir_op_min, ppir_op_max, ppir_op_trunc, + ppir_op_clamp_pos, ppir_op_and, ppir_op_or, diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c index 05576b44dfc..985f7d5e138 100644 --- a/src/gallium/drivers/lima/lima_program.c +++ b/src/gallium/drivers/lima/lima_program.c @@ -262,6 +262,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s, /* Must be run after optimization loop */ NIR_PASS_V(s, lima_nir_scale_trig); + NIR_PASS_V(s, lima_nir_ppir_algebraic_late); NIR_PASS_V(s, nir_copy_prop); NIR_PASS_V(s, nir_opt_dce);