diff --git a/src/gallium/drivers/lima/ci/lima-fails.txt b/src/gallium/drivers/lima/ci/lima-fails.txt
index c0a2f1ac25f..e07775e7334 100644
--- a/src/gallium/drivers/lima/ci/lima-fails.txt
+++ b/src/gallium/drivers/lima/ci/lima-fails.txt
@@ -402,7 +402,6 @@ spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat2x4-mat2x4,Fail
 spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat3x2-mat3x2,Fail
 spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat3x4-mat3x4,Fail
 spec@glsl-1.20@execution@built-in-functions@fs-op-div-mat4x3-mat4x3,Fail
-spec@glsl-1.20@execution@built-in-functions@fs-op-mult-mat4x3-mat3x4,Fail
 spec@glsl-1.20@execution@clipping@fixed-clip-enables,Fail
 spec@glsl-1.20@execution@clipping@vs-clip-vertex-const-reject,Fail
 spec@glsl-1.20@execution@clipping@vs-clip-vertex-different-from-position,Fail
diff --git a/src/gallium/drivers/lima/ir/lima_nir_duplicate_consts.c b/src/gallium/drivers/lima/ir/lima_nir_duplicate_consts.c
index 23ed6f5cdbe..a8af9bc5709 100644
--- a/src/gallium/drivers/lima/ir/lima_nir_duplicate_consts.c
+++ b/src/gallium/drivers/lima/ir/lima_nir_duplicate_consts.c
@@ -33,10 +33,19 @@ lima_nir_duplicate_load_const(nir_builder *b, nir_load_const_instr *load)
 
    nir_foreach_use_safe(use_src, &load->def) {
       nir_load_const_instr *dupl;
+      nir_instr *instr = nir_src_parent_instr(use_src);
+      nir_alu_instr *alu = NULL;
+      if (instr->type == nir_instr_type_alu)
+         alu = nir_instr_as_alu(instr);
 
-      if (last_parent_instr != nir_src_parent_instr(use_src)) {
+      /* Always clone consts for FFMA sources as well, since it will translate
+       * into 2 PPIR ops and each may need its own const. Redundant consts
+       * will be dropped by PPIR later
+       */
+      if (last_parent_instr != instr ||
+          (alu && alu->op == nir_op_ffma)) {
          /* if ssa use, clone for the target block */
-         b->cursor = nir_before_instr(nir_src_parent_instr(use_src));
+         b->cursor = nir_before_instr(instr);
 
          dupl = nir_load_const_instr_create(b->shader, load->def.num_components,
                                             load->def.bit_size);
diff --git a/src/gallium/drivers/lima/ir/pp/nir.c b/src/gallium/drivers/lima/ir/pp/nir.c
index 11525409a20..1e29aa2f933 100644
--- a/src/gallium/drivers/lima/ir/pp/nir.c
+++ b/src/gallium/drivers/lima/ir/pp/nir.c
@@ -123,6 +123,57 @@ static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
    ppir_node_target_assign(ps, child);
 }
 
+static bool ppir_emit_ffma(ppir_block *block, nir_instr *ni)
+{
+   nir_alu_instr *instr = nir_instr_as_alu(ni);
+   nir_def *def = &instr->def;
+   unsigned mask = nir_component_mask(def->num_components);
+   uint8_t identity[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
+                           PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W };
+
+   ppir_alu_node *add = ppir_node_create_dest(block, ppir_op_add, def, mask);
+   if (!add)
+      return false;
+   ppir_alu_node *mul = ppir_node_create(block, ppir_op_mul, -1, mask);
+   if (!mul)
+      return false;
+
+   ppir_dest *mul_dest = &mul->dest;
+   ppir_dest *add_dest = &add->dest;
+
+   mul_dest->type = ppir_target_pipeline;
+   if (util_bitcount(add_dest->write_mask) == 1) {
+      mul_dest->write_mask = 1;
+      mul_dest->pipeline = ppir_pipeline_reg_fmul;
+   } else {
+      mul_dest->write_mask = u_bit_consecutive(0, 4);
+      mul_dest->pipeline = ppir_pipeline_reg_vmul;
+   }
+
+   add->num_src = 2;
+   mul->num_src = 2;
+
+   for (int i = 0; i < 2; i++) {
+      nir_alu_src *alu_src = instr->src + i;
+      ppir_src *ps = mul->src + i;
+      memcpy(ps->swizzle, alu_src->swizzle, sizeof(ps->swizzle));
+      ppir_node_add_src(block->comp, &mul->node, ps, &alu_src->src, mask);
+   }
+
+   nir_alu_src *alu_src = instr->src + 2;
+   ppir_src *ps = add->src;
+   memcpy(ps[1].swizzle, alu_src->swizzle, sizeof(ps[1].swizzle));
+   ppir_node_add_src(block->comp, &add->node, ps + 1, &alu_src->src, mask);
+
+   memcpy(ps[0].swizzle, identity, sizeof(ps[0].swizzle));
+   ppir_node_target_assign(&ps[0], &mul->node);
+   ppir_node_add_dep(&add->node, &mul->node, ppir_dep_src);
+
+   list_addtail(&add->node.list, &block->node_list);
+   list_addtail(&mul->node.list, &block->node_list);
+   return true;
+}
+
 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
    [nir_op_mov] = ppir_op_mov,
    [nir_op_fmul] = ppir_op_mul,
@@ -152,6 +203,7 @@ static int nir_to_ppir_opcodes[nir_num_opcodes] = {
    [nir_op_ftrunc] = ppir_op_trunc,
    [nir_op_fsat] = ppir_op_sat,
    [nir_op_fclamp_pos] = ppir_op_clamp_pos,
+   [nir_op_ffma] = ppir_op_ffma,
 };
 
 static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)
@@ -164,6 +216,11 @@ static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)
       ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
       return false;
    }
+
+   if (op == ppir_op_ffma) {
+      return ppir_emit_ffma(block, ni);
+   }
+
    unsigned mask = nir_component_mask(def->num_components);
    ppir_alu_node *node = ppir_node_create_dest(block, op, def, mask);
    if (!node)
diff --git a/src/gallium/drivers/lima/ir/pp/ppir.h b/src/gallium/drivers/lima/ir/pp/ppir.h
index 3c45d27898a..a349c294005 100644
--- a/src/gallium/drivers/lima/ir/pp/ppir.h
+++ b/src/gallium/drivers/lima/ir/pp/ppir.h
@@ -118,6 +118,8 @@ typedef enum {
    ppir_op_undef,
    ppir_op_dummy,
 
+   ppir_op_ffma,
+
    ppir_op_num,
 } ppir_op;
 
diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c
index 5f623b2e7d9..952c1151a5d 100644
--- a/src/gallium/drivers/lima/lima_program.c
+++ b/src/gallium/drivers/lima/lima_program.c
@@ -69,9 +69,9 @@ static const nir_shader_compiler_options vs_nir_options = {
 };
 
 static const nir_shader_compiler_options fs_nir_options = {
-   .lower_ffma16 = true,
-   .lower_ffma32 = true,
-   .lower_ffma64 = true,
+   .fuse_ffma16 = true,
+   .fuse_ffma32 = true,
+   .fuse_ffma64 = true,
    .lower_fpow = true,
    .lower_fdiv = true,
    .lower_fmod = true,
@@ -266,6 +266,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
 
    /* Must be run after optimization loop */
    NIR_PASS_V(s, lima_nir_scale_trig);
+   NIR_PASS_V(s, nir_opt_algebraic_late);
    NIR_PASS_V(s, lima_nir_ppir_algebraic_late);
 
    NIR_PASS_V(s, nir_copy_prop);