diff --git a/src/broadcom/compiler/meson.build b/src/broadcom/compiler/meson.build
index 2d68fb41138..95156140ad9 100644
--- a/src/broadcom/compiler/meson.build
+++ b/src/broadcom/compiler/meson.build
@@ -23,6 +23,7 @@ libbroadcom_compiler_files = files(
   'vir.c',
   'vir_dump.c',
   'vir_live_variables.c',
+  'vir_opt_constant_alu.c',
   'vir_opt_copy_propagate.c',
   'vir_opt_dead_code.c',
   'vir_opt_redundant_flags.c',
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index 4bec511346a..8e83a8ed90c 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -988,6 +988,7 @@ bool vir_opt_peephole_sf(struct v3d_compile *c);
 bool vir_opt_redundant_flags(struct v3d_compile *c);
 bool vir_opt_small_immediates(struct v3d_compile *c);
 bool vir_opt_vpm(struct v3d_compile *c);
+bool vir_opt_constant_alu(struct v3d_compile *c);
 void v3d_nir_lower_blend(nir_shader *s, struct v3d_compile *c);
 void v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c);
 void v3d_nir_lower_line_smooth(nir_shader *shader);
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 3d816cbb2b5..7f915b6b910 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -1497,6 +1497,7 @@ vir_optimize(struct v3d_compile *c)
                 OPTPASS(vir_opt_redundant_flags);
                 OPTPASS(vir_opt_dead_code);
                 OPTPASS(vir_opt_small_immediates);
+                OPTPASS(vir_opt_constant_alu);
 
                 if (!progress)
                         break;
diff --git a/src/broadcom/compiler/vir_opt_constant_alu.c b/src/broadcom/compiler/vir_opt_constant_alu.c
new file mode 100644
index 00000000000..561b7af6b8a
--- /dev/null
+++ b/src/broadcom/compiler/vir_opt_constant_alu.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright © 2021 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file v3d_opt_constant_alu.c
+ *
+ * Identified sequences of ALU instructions that operate on constant operands
+ * and reduces them to a uniform load.
+ *
+ * Currently, this is useul to optimize the result of removing leading ldunifa
+ * instructions in the DCE pass, which can leave a series of constant additions
+ * that increment the unifa address by 4 for each leading ldunif removed. It
+ * helps turn this:
+ *
+ * nop t1; ldunif (0x00000004 / 0.000000)
+ * nop t2; ldunif (0x00000004 / 0.000000)
+ * add t3, t1, t2
+ *
+ * into:
+ *
+ * nop t1; ldunif (0x00000004 / 0.000000)
+ * nop t2; ldunif (0x00000004 / 0.000000)
+ * nop t4; ldunif (0x00000008 / 0.000000)
+ * mov t3, t4
+ *
+ * For best results we want to run copy propagation in between this and
+ * the combine constants pass: every time we manage to convert an alu to
+ * a uniform load, we move the uniform to the original alu destination. By
+ * running copy propagation immediately after we can reuse the uniform as
+ * source in more follow-up alu instructions, making them constant and allowing
+ * this pass to continue making progress. However, if we run the small
+ * immediates optimization before that, that pass can convert some of the movs
+ * to use small immediates instead of the uniforms and prevent us from making
+ * the best of this pass, as small immediates don't get copy propagated.
+ */
+
+#include "v3d_compiler.h"
+
+static bool
+opt_constant_add(struct v3d_compile *c, struct qinst *inst, uint32_t *values)
+{
+        /* FIXME: handle more add operations */
+        struct qreg unif = { };
+        switch (inst->qpu.alu.add.op) {
+        case V3D_QPU_A_ADD:
+                c->cursor = vir_after_inst(inst);
+                unif = vir_uniform_ui(c, values[0] + values[1]);
+                break;
+        default:
+                return false;
+        }
+
+        /* Remove the original ALU instruction and replace it with a uniform
+         * load.
+         */
+        struct qreg dst = inst->dst;
+        struct qinst *mov = vir_MOV_dest(c, dst, unif);
+        vir_remove_instruction(c, inst);
+        if (dst.file == QFILE_TEMP)
+                c->defs[dst.index] = mov;
+        return true;
+}
+
+static bool
+try_opt_constant_alu(struct v3d_compile *c, struct qinst *inst)
+{
+        if(inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
+                return false;
+
+        /* If the instruction does anything other than writing the result
+         * directly to the destination, skip.
+         */
+        if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
+            inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
+                return false;
+        }
+
+        if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
+            inst->qpu.flags.mc != V3D_QPU_COND_NONE) {
+                return false;
+        }
+
+        assert(vir_get_nsrc(inst) <= 2);
+        uint32_t values[2];
+        for (int i = 0; i < vir_get_nsrc(inst); i++) {
+                if (inst->src[i].file == QFILE_SMALL_IMM &&
+                    v3d_qpu_small_imm_unpack(c->devinfo,
+                                             inst->qpu.raddr_b,
+                                             &values[i])) {
+                        continue;
+                }
+
+                if (inst->src[i].file == QFILE_TEMP) {
+                        struct qinst *def = c->defs[inst->src[i].index];
+                        if (!def)
+                                return false;
+
+                        if ((def->qpu.sig.ldunif || def->qpu.sig.ldunifrf) &&
+                            c->uniform_contents[def->uniform] == QUNIFORM_CONSTANT) {
+                                values[i] = c->uniform_data[def->uniform];
+                                continue;
+                        }
+                }
+
+                return false;
+        }
+
+        /* FIXME: handle mul operations */
+        if (vir_is_add(inst))
+                return opt_constant_add(c, inst, values);
+
+        return false;
+}
+
+bool
+vir_opt_constant_alu(struct v3d_compile *c)
+{
+        bool progress = false;
+        vir_for_each_block(block, c) {
+                vir_for_each_inst_safe(inst, block) {
+                        progress = try_opt_constant_alu(c, inst) || progress;
+                }
+        }
+
+        return progress;
+}
diff --git a/src/broadcom/compiler/vir_opt_dead_code.c b/src/broadcom/compiler/vir_opt_dead_code.c
index 59b458c6967..b40b25b20c0 100644
--- a/src/broadcom/compiler/vir_opt_dead_code.c
+++ b/src/broadcom/compiler/vir_opt_dead_code.c
@@ -160,10 +160,6 @@ increment_unifa_address(struct v3d_compile *c, struct qinst *unifa)
                 return true;
         }
 
-        /* FIXME: we can optimize this further by implementing a constant
-         * ALU pass in the backend, for the case where we are skipping
-         * multiple leading ldunifa.
-         */
         if (unifa->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
             unifa->qpu.alu.add.op == V3D_QPU_A_ADD) {
                 c->cursor = vir_after_inst(unifa);