diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 90c3f4a51ae..3ceaa5a3f75 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -168,6 +168,9 @@ typedef enum ir3_register_flags {
     * Note: This effectively has the same semantics as IR3_REG_KILL.
     */
    IR3_REG_LAST_USE = BIT(18),
+
+   /* Predicate register (p0.c). Cannot be combined with half or shared. */
+   IR3_REG_PREDICATE = BIT(19),
 } ir3_register_flags;
 
 struct ir3_register {
@@ -563,9 +566,6 @@ struct ir3 {
    /* same for a1.x: */
    DECLARE_ARRAY(struct ir3_instruction *, a1_users);
 
-   /* and same for instructions that consume predicate register: */
-   DECLARE_ARRAY(struct ir3_instruction *, predicates);
-
    /* Track texture sample instructions which need texture state
     * patched in (for astc-srgb workaround):
     */
@@ -952,7 +952,7 @@ is_same_type_mov(struct ir3_instruction *instr)
    dst = instr->dsts[0];
 
    /* mov's that write to a0 or p0.x are special: */
-   if (dst->num == regid(REG_P0, 0))
+   if (dst->flags & IR3_REG_PREDICATE)
       return false;
    if (reg_num(dst) == REG_A0)
       return false;
@@ -1295,7 +1295,9 @@ is_dest_gpr(struct ir3_register *dst)
 {
    if (dst->wrmask == 0)
       return false;
-   if ((reg_num(dst) == REG_A0) || (dst->num == regid(REG_P0, 0)))
+   if (reg_num(dst) == REG_A0)
+      return false;
+   if (dst->flags & IR3_REG_PREDICATE)
       return false;
    return true;
 }
@@ -1333,10 +1335,10 @@ writes_addr1(struct ir3_instruction *instr)
 static inline bool
 writes_pred(struct ir3_instruction *instr)
 {
-   /* Note: only the first dest can write to p0.x */
+   /* Note: only the first dest can write to p0 */
    if (instr->dsts_count > 0) {
       struct ir3_register *dst = instr->dsts[0];
-      return reg_num(dst) == REG_P0;
+      return !!(dst->flags & IR3_REG_PREDICATE);
    }
    return false;
 }
@@ -1349,8 +1351,8 @@ writes_pred(struct ir3_instruction *instr)
 static inline bool
 is_reg_special(const struct ir3_register *reg)
 {
-   return (reg->flags & IR3_REG_SHARED) || (reg_num(reg) == REG_A0) ||
-          (reg_num(reg) == REG_P0);
+   return (reg->flags & (IR3_REG_SHARED | IR3_REG_PREDICATE) ||
+           (reg_num(reg) == REG_A0));
 }
 
 /* Same as above but in cases where we don't have a register. r48.x and above
@@ -1381,9 +1383,9 @@ conflicts(struct ir3_register *a, struct ir3_register *b)
 static inline bool
 reg_gpr(struct ir3_register *r)
 {
-   if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED))
+   if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_PREDICATE))
       return false;
-   if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
+   if (reg_num(r) == REG_A0)
       return false;
    return true;
 }
@@ -1999,6 +2001,7 @@ bool ir3_postsched(struct ir3 *ir, struct ir3_shader_variant *v);
 
 /* register assignment: */
 int ir3_ra(struct ir3_shader_variant *v);
+void ir3_ra_predicates(struct ir3_shader_variant *v);
 
 /* lower subgroup ops: */
 bool ir3_lower_subgroups(struct ir3 *ir);
diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c
index 5a254be1fe1..5f651a7b622 100644
--- a/src/freedreno/ir3/ir3_compiler.c
+++ b/src/freedreno/ir3/ir3_compiler.c
@@ -156,6 +156,8 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
 
    compiler->local_mem_size = dev_info->cs_shared_mem_size;
 
+   compiler->num_predicates = 1;
+
    if (compiler->gen >= 6) {
       compiler->samgq_workaround = true;
       /* a6xx split the pipeline state into geometry and fragment state, in
@@ -212,6 +214,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
       compiler->stsc_duplication_quirk = dev_info->a7xx.stsc_duplication_quirk;
       compiler->load_shader_consts_via_preamble = dev_info->a7xx.load_shader_consts_via_preamble;
       compiler->load_inline_uniforms_via_preamble_ldgk = dev_info->a7xx.load_inline_uniforms_via_preamble_ldgk;
+      compiler->num_predicates = 4;
    } else {
       compiler->max_const_pipeline = 512;
       compiler->max_const_geom = 512;
diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h
index 23eb3eeb161..9703052fd5d 100644
--- a/src/freedreno/ir3/ir3_compiler.h
+++ b/src/freedreno/ir3/ir3_compiler.h
@@ -214,6 +214,9 @@ struct ir3_compiler {
     */
    bool has_getfiberid;
 
+   /* Number of available predicate registers (p0.c) */
+   uint32_t num_predicates;
+
    /* MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB */
    uint32_t max_variable_workgroup_size;
 
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 1c5d253e433..6b3b46df509 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -2614,8 +2614,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
       cond->cat2.condition = IR3_COND_NE;
 
       /* condition always goes in predicate register: */
-      cond->dsts[0]->num = regid(REG_P0, 0);
-      cond->dsts[0]->flags &= ~IR3_REG_SSA;
+      cond->dsts[0]->flags |= IR3_REG_PREDICATE;
 
       if (intr->intrinsic == nir_intrinsic_demote ||
           intr->intrinsic == nir_intrinsic_demote_if) {
@@ -2631,8 +2630,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
                             IR3_BARRIER_ACTIVE_FIBERS_W;
       kill->barrier_conflict = IR3_BARRIER_IMAGE_W | IR3_BARRIER_BUFFER_W |
                                IR3_BARRIER_ACTIVE_FIBERS_R;
-      kill->srcs[0]->num = regid(REG_P0, 0);
-      array_insert(ctx->ir, ctx->ir->predicates, kill);
+      kill->srcs[0]->flags |= IR3_REG_PREDICATE;
 
       array_insert(b, b->keeps, kill);
       ctx->so->has_kill = true;
@@ -2653,14 +2651,13 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
       cond->cat2.condition = IR3_COND_NE;
 
       /* condition always goes in predicate register: */
-      cond->dsts[0]->num = regid(REG_P0, 0);
+      cond->dsts[0]->flags |= IR3_REG_PREDICATE;
 
-      kill = ir3_PREDT(b, cond, 0);
+      kill = ir3_PREDT(b, cond, IR3_REG_PREDICATE);
 
       kill->barrier_class = IR3_BARRIER_EVERYTHING;
       kill->barrier_conflict = IR3_BARRIER_EVERYTHING;
 
-      array_insert(ctx->ir, ctx->ir->predicates, kill);
       array_insert(b, b->keeps, kill);
       break;
    }
@@ -2673,8 +2670,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
          dst[0] = ir3_ANY_MACRO(ctx->block, pred, 0);
       else
          dst[0] = ir3_ALL_MACRO(ctx->block, pred, 0);
-      dst[0]->srcs[0]->num = regid(REG_P0, 0);
-      array_insert(ctx->ir, ctx->ir->predicates, dst[0]);
+      dst[0]->srcs[0]->flags |= IR3_REG_PREDICATE;
       break;
    }
    case nir_intrinsic_elect:
@@ -2690,8 +2686,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
       dst[0] = ir3_READ_COND_MACRO(ctx->block, ir3_get_predicate(ctx, cond), 0,
                                    src, 0);
       dst[0]->dsts[0]->flags |= IR3_REG_SHARED;
-      dst[0]->srcs[0]->num = regid(REG_P0, 0);
-      array_insert(ctx->ir, ctx->ir->predicates, dst[0]);
+      dst[0]->srcs[0]->flags |= IR3_REG_PREDICATE;
       break;
    }
 
@@ -2712,8 +2707,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
          struct ir3_instruction *src = ir3_get_src(ctx, &intr->src[0])[0];
          struct ir3_instruction *pred = ir3_get_predicate(ctx, src);
          ballot = ir3_BALLOT_MACRO(ctx->block, pred, components);
-         ballot->srcs[0]->num = regid(REG_P0, 0);
-         array_insert(ctx->ir, ctx->ir->predicates, ballot);
+         ballot->srcs[0]->flags |= IR3_REG_PREDICATE;
       }
 
       ballot->barrier_class = IR3_BARRIER_ACTIVE_FIBERS_R;
@@ -3820,11 +3814,11 @@ emit_if(struct ir3_context *ctx, nir_if *nif)
 
    if (condition->opc == OPC_ANY_MACRO && condition->block == ctx->block) {
       struct ir3_instruction *pred = ssa(condition->srcs[0]);
-      ir3_BANY(ctx->block, pred, 0);
+      ir3_BANY(ctx->block, pred, IR3_REG_PREDICATE);
    } else if (condition->opc == OPC_ALL_MACRO &&
               condition->block == ctx->block) {
       struct ir3_instruction *pred = ssa(condition->srcs[0]);
-      ir3_BALL(ctx->block, pred, 0);
+      ir3_BALL(ctx->block, pred, IR3_REG_PREDICATE);
    } else if (condition->opc == OPC_ELECT_MACRO &&
               condition->block == ctx->block) {
       ir3_GETONE(ctx->block);
@@ -3837,7 +3831,7 @@ emit_if(struct ir3_context *ctx, nir_if *nif)
       ir3_SHPS(ctx->block);
    } else {
       struct ir3_instruction *pred = ir3_get_predicate(ctx, condition);
-      ir3_BR(ctx->block, pred, 0);
+      ir3_BR(ctx->block, pred, IR3_REG_PREDICATE);
    }
 
    emit_cf_list(ctx, &nif->then_list);
@@ -3959,15 +3953,14 @@ emit_stream_out(struct ir3_context *ctx)
 
    /* setup 'if (vtxcnt < maxvtxcnt)' condition: */
    cond = ir3_CMPS_S(ctx->block, vtxcnt, 0, maxvtxcnt, 0);
-   cond->dsts[0]->num = regid(REG_P0, 0);
-   cond->dsts[0]->flags &= ~IR3_REG_SSA;
+   cond->dsts[0]->flags |= IR3_REG_PREDICATE;
    cond->cat2.condition = IR3_COND_LT;
 
    /* condition goes on previous block to the conditional,
     * since it is used to pick which of the two successor
     * paths to take:
     */
-   ir3_BR(orig_end_block, cond, 0);
+   ir3_BR(orig_end_block, cond, IR3_REG_PREDICATE);
 
    /* switch to stream_out_block to generate the stream-out
     * instructions:
diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c
index f57f8118020..5618188f03a 100644
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
@@ -463,15 +463,14 @@ ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src)
    struct ir3_block *b = ctx->block;
    struct ir3_instruction *cond;
 
-   /* NOTE: only cmps.*.* can write p0.x: */
+   /* NOTE: we use cpms.s.ne x, 0 to move x into a predicate register */
    struct ir3_instruction *zero =
          create_immed_typed(b, 0, is_half(src) ? TYPE_U16 : TYPE_U32);
    cond = ir3_CMPS_S(b, src, 0, zero, 0);
    cond->cat2.condition = IR3_COND_NE;
 
    /* condition always goes in predicate register: */
-   cond->dsts[0]->num = regid(REG_P0, 0);
-   cond->dsts[0]->flags &= ~IR3_REG_SSA;
+   cond->dsts[0]->flags |= IR3_REG_PREDICATE;
 
    return cond;
 }
diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c
index 6a872dc904b..bdf8628cee6 100644
--- a/src/freedreno/ir3/ir3_cp.c
+++ b/src/freedreno/ir3/ir3_cp.c
@@ -100,7 +100,7 @@ static bool
 is_foldable_double_cmp(struct ir3_instruction *cmp)
 {
    struct ir3_instruction *cond = ssa(cmp->srcs[0]);
-   return (cmp->dsts[0]->num == regid(REG_P0, 0)) && cond &&
+   return (cmp->dsts[0]->flags & IR3_REG_PREDICATE) && cond &&
           (cmp->srcs[1]->flags & IR3_REG_IMMED) &&
           (cmp->srcs[1]->iim_val == 0) &&
           (cmp->cat2.condition == IR3_COND_NE) &&
diff --git a/src/freedreno/ir3/ir3_dce.c b/src/freedreno/ir3/ir3_dce.c
index c640f6e5c35..456a1c805c4 100644
--- a/src/freedreno/ir3/ir3_dce.c
+++ b/src/freedreno/ir3/ir3_dce.c
@@ -133,12 +133,6 @@ find_and_remove_unused(struct ir3 *ir, struct ir3_shader_variant *so)
       struct ir3_instruction *terminator = ir3_block_get_terminator(block);
       if (terminator) {
          instr_dce(terminator, false);
-
-         /* Temporary workaround for predicates not being SSA. Won't be
-          * necessary anymore once we have RA for predicates.
-          */
-         foreach_src (src, terminator)
-            instr_dce(src->def->instr, false);
       }
    }
 
@@ -181,12 +175,6 @@ find_and_remove_unused(struct ir3 *ir, struct ir3_shader_variant *so)
          ir->a1_users[i] = NULL;
    }
 
-   for (i = 0; i < ir->predicates_count; i++) {
-      struct ir3_instruction *instr = ir->predicates[i];
-      if (instr && (instr->flags & IR3_INSTR_UNUSED))
-         ir->predicates[i] = NULL;
-   }
-
    /* cleanup unused inputs: */
    foreach_input_n (in, n, ir)
       if (in->flags & IR3_INSTR_UNUSED)
diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c
index e9ad732eadb..69a59b67fe6 100644
--- a/src/freedreno/ir3/ir3_legalize.c
+++ b/src/freedreno/ir3/ir3_legalize.c
@@ -727,9 +727,6 @@ invert_branch(struct ir3_instruction *branch)
  * Initially this is done naively, without considering if the successor
  * block immediately follows the current block (ie. so no jump required),
  * but that is cleaned up in opt_jump().
- *
- * TODO what ensures that the last write to p0.x in a block is the
- * branch condition?  Have we been getting lucky all this time?
  */
 static void
 block_sched(struct ir3 *ir)
diff --git a/src/freedreno/ir3/ir3_parser.y b/src/freedreno/ir3/ir3_parser.y
index eb3732be9fa..6549e4bbe56 100644
--- a/src/freedreno/ir3/ir3_parser.y
+++ b/src/freedreno/ir3/ir3_parser.y
@@ -845,11 +845,11 @@ instr:             iflags cat0_instr
 
 label:             T_IDENTIFIER ':' { new_label($1); }
 
-cat0_src1:         '!' T_P0        { instr->cat0.inv1 = true; $$ = new_src((62 << 3) + $2, 0); }
-|                  T_P0            { $$ = new_src((62 << 3) + $1, 0); }
+cat0_src1:         '!' T_P0        { instr->cat0.inv1 = true; $$ = new_src((62 << 3) + $2, IR3_REG_PREDICATE); }
+|                  T_P0            { $$ = new_src((62 << 3) + $1, IR3_REG_PREDICATE); }
 
-cat0_src2:         '!' T_P0        { instr->cat0.inv2 = true; $$ = new_src((62 << 3) + $2, 0); }
-|                  T_P0            { $$ = new_src((62 << 3) + $1, 0); }
+cat0_src2:         '!' T_P0        { instr->cat0.inv2 = true; $$ = new_src((62 << 3) + $2, IR3_REG_PREDICATE); }
+|                  T_P0            { $$ = new_src((62 << 3) + $1, IR3_REG_PREDICATE); }
 
 cat0_immed:        '#' integer     { instr->cat0.immed = $2; }
 |                  '#' T_IDENTIFIER { ralloc_steal(instr, (void *)$2); instr->cat0.target_label = $2; }
@@ -1434,12 +1434,12 @@ meta_print: meta_print_start meta_print_regs {
 src:               T_REGISTER     { $$ = new_src($1, 0); }
 |                  T_A0           { $$ = new_src((61 << 3), IR3_REG_HALF); }
 |                  T_A1           { $$ = new_src((61 << 3) + 1, IR3_REG_HALF); }
-|                  T_P0           { $$ = new_src((62 << 3) + $1, 0); }
+|                  T_P0           { $$ = new_src((62 << 3) + $1, IR3_REG_PREDICATE); }
 
 dst:               T_REGISTER     { $$ = new_dst($1, 0); }
 |                  T_A0           { $$ = new_dst((61 << 3), IR3_REG_HALF); }
 |                  T_A1           { $$ = new_dst((61 << 3) + 1, IR3_REG_HALF); }
-|                  T_P0           { $$ = new_dst((62 << 3) + $1, 0); }
+|                  T_P0           { $$ = new_dst((62 << 3) + $1, IR3_REG_PREDICATE); }
 
 const:             T_CONSTANT     { $$ = new_src($1, IR3_REG_CONST); }
 
diff --git a/src/freedreno/ir3/ir3_print.c b/src/freedreno/ir3/ir3_print.c
index bd5d0cf0452..92e55bf9a4b 100644
--- a/src/freedreno/ir3/ir3_print.c
+++ b/src/freedreno/ir3/ir3_print.c
@@ -257,9 +257,18 @@ print_ssa_name(struct log_stream *stream, struct ir3_register *reg, bool dst)
       print_ssa_def_name(stream, reg);
    }
 
-   if (reg->num != INVALID_REG && !(reg->flags & IR3_REG_ARRAY))
-      mesa_log_stream_printf(stream, "(" SYN_REG("r%u.%c") ")", reg_num(reg),
+   if (reg->num != INVALID_REG && !(reg->flags & IR3_REG_ARRAY)) {
+      const char *prefix = "r";
+      unsigned num = reg_num(reg);
+
+      if (reg->flags & IR3_REG_PREDICATE) {
+         prefix = "p";
+         num = 0;
+      }
+
+      mesa_log_stream_printf(stream, "(" SYN_REG("%s%u.%c") ")", prefix, num,
                              "xyzw"[reg_comp(reg)]);
+   }
 }
 
 static void
@@ -296,6 +305,8 @@ print_reg_name(struct log_stream *stream, struct ir3_instruction *instr,
       mesa_log_stream_printf(stream, "s");
    if (reg->flags & IR3_REG_HALF)
       mesa_log_stream_printf(stream, "h");
+   if (reg->flags & IR3_REG_PREDICATE)
+      mesa_log_stream_printf(stream, "p");
 
    if (reg->flags & IR3_REG_IMMED) {
       mesa_log_stream_printf(stream, SYN_IMMED("imm[%f,%d,0x%x]"), reg->fim_val,
@@ -325,6 +336,9 @@ print_reg_name(struct log_stream *stream, struct ir3_instruction *instr,
       if (reg->flags & IR3_REG_CONST)
          mesa_log_stream_printf(stream, SYN_CONST("c%u.%c"), reg_num(reg),
                                 "xyzw"[reg_comp(reg)]);
+      else if (reg->flags & IR3_REG_PREDICATE)
+         mesa_log_stream_printf(stream, SYN_REG("p0.%c"),
+                                "xyzw"[reg_comp(reg)]);
       else
          mesa_log_stream_printf(stream, SYN_REG("r%u.%c"), reg_num(reg),
                                 "xyzw"[reg_comp(reg)]);
diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c
index ddc3da4b76b..2debdeab167 100644
--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
@@ -2544,6 +2544,9 @@ ir3_ra(struct ir3_shader_variant *v)
 {
    ir3_calc_dominance(v->ir);
 
+   /* Predicate RA needs dominance. */
+   ir3_ra_predicates(v);
+
    ir3_create_parallel_copies(v->ir);
 
    struct ra_ctx *ctx = rzalloc(NULL, struct ra_ctx);
diff --git a/src/freedreno/ir3/ir3_ra.h b/src/freedreno/ir3/ir3_ra.h
index bc194627834..6dea545c3ce 100644
--- a/src/freedreno/ir3/ir3_ra.h
+++ b/src/freedreno/ir3/ir3_ra.h
@@ -87,7 +87,7 @@ ra_reg_get_physreg(const struct ir3_register *reg)
 static inline bool
 def_is_gpr(const struct ir3_register *reg)
 {
-   return reg_num(reg) != REG_A0 && reg_num(reg) != REG_P0;
+   return reg_num(reg) != REG_A0 && !(reg->flags & IR3_REG_PREDICATE);
 }
 
 /* Note: don't count undef as a source.
@@ -105,6 +105,12 @@ ra_reg_is_dst(const struct ir3_register *reg)
           ((reg->flags & IR3_REG_ARRAY) || reg->wrmask);
 }
 
+static inline bool
+ra_reg_is_predicate(const struct ir3_register *reg)
+{
+   return (reg->flags & IR3_REG_SSA) && (reg->flags & IR3_REG_PREDICATE);
+}
+
 /* Iterators for sources and destinations which:
  * - Don't include fake sources (irrelevant for RA)
  * - Don't include non-SSA sources (immediates and constants, also irrelevant)
diff --git a/src/freedreno/ir3/ir3_ra_predicates.c b/src/freedreno/ir3/ir3_ra_predicates.c
new file mode 100644
index 00000000000..9c3e553275b
--- /dev/null
+++ b/src/freedreno/ir3/ir3_ra_predicates.c
@@ -0,0 +1,467 @@
+/*
+ * Copyright 2024 Igalia S.L.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "ir3.h"
+#include "ir3_ra.h"
+#include "ir3_shader.h"
+
+/* Represents a def that is currently live. We keep track of both the pre-RA def
+ * a register refers to and, in case of spilling and reloading, the def of the
+ * reloaded instruction. This allows us to assign reloaded defs to sources and
+ * prevents additional reloads.
+ */
+struct live_def {
+   /* The pre-RA def. */
+   struct ir3_register *def;
+
+   /* The reloaded def. NULL if def was not reloaded. */
+   struct ir3_register *reloaded_def;
+
+   /* Set when used for a src marked first-kill. We cannot immediately free the
+    * register because then it might get reused for another src in the same
+    * instruction. Instead, we free it after an instruction's sources have been
+    * processed.
+    */
+   bool killed;
+};
+
+/* Per-block liveness information. Stores live defs per supported register,
+ * indexed by register component.
+ */
+struct block_liveness {
+   /* Live-in defs taken from the intersections the block's predecessors
+    * live-out defs.
+    */
+   struct live_def *live_in_defs;
+
+   /* Currently live defs. Starts from live-in and is updated while processing
+    * the instructions in a block. Contains the live-out defs after the whole
+    * block has been processed.
+    */
+   struct live_def *live_defs;
+};
+
+struct ra_predicates_ctx {
+   struct ir3 *ir;
+   unsigned num_regs;
+   struct ir3_liveness *liveness;
+   struct block_liveness *blocks_liveness;
+
+   /* True once we spilled a register. This allows us to postpone the
+    * calculation of SSA uses and instruction counting until the first time we
+    * need to spill. This is useful since spilling is rare in general.
+    */
+   bool spilled;
+};
+
+static bool
+has_free_regs(struct ra_predicates_ctx *ctx, struct block_liveness *live)
+{
+   for (unsigned i = 0; i < ctx->num_regs; ++i) {
+      if (live->live_defs[i].def == NULL)
+         return true;
+   }
+
+   return false;
+}
+
+static unsigned
+alloc_reg_comp(struct ra_predicates_ctx *ctx, struct block_liveness *live)
+{
+   for (unsigned i = 0; i < ctx->num_regs; ++i) {
+      if (live->live_defs[i].def == NULL)
+         return i;
+   }
+
+   unreachable("Reg availability should have been checked before");
+}
+
+static struct live_def *
+assign_reg(struct ra_predicates_ctx *ctx, struct block_liveness *live,
+           struct ir3_register *def, struct ir3_register *reloaded_def,
+           unsigned comp)
+{
+   assert(comp < ctx->num_regs);
+
+   struct ir3_register *current_def =
+      (reloaded_def == NULL) ? def : reloaded_def;
+
+   current_def->num = regid(REG_P0, comp);
+
+   struct live_def *live_def = &live->live_defs[comp];
+   assert((live_def->def == NULL) && (live_def->reloaded_def == NULL));
+
+   live_def->def = def;
+   live_def->reloaded_def = reloaded_def;
+   return live_def;
+}
+
+static struct live_def *
+alloc_reg(struct ra_predicates_ctx *ctx, struct block_liveness *live,
+          struct ir3_register *def, struct ir3_register *reloaded_def)
+{
+   unsigned comp = alloc_reg_comp(ctx, live);
+   return assign_reg(ctx, live, def, reloaded_def, comp);
+}
+
+static void
+free_reg(struct ra_predicates_ctx *ctx, struct block_liveness *live,
+         struct ir3_register *reg)
+{
+   assert((reg->flags & IR3_REG_PREDICATE) && (reg->num != INVALID_REG));
+
+   unsigned comp = reg_comp(reg);
+   assert(comp < ctx->num_regs);
+
+   struct live_def *reg_live_def = &live->live_defs[comp];
+   assert((reg_live_def->def == reg) || (reg_live_def->reloaded_def == reg));
+
+   reg_live_def->def = NULL;
+   reg_live_def->reloaded_def = NULL;
+   reg_live_def->killed = false;
+}
+
+static struct ir3_instruction *
+first_non_allocated_use_after(struct ir3_register *def,
+                              struct ir3_instruction *after)
+{
+   uint32_t first_ip = UINT32_MAX;
+   struct ir3_instruction *first = NULL;
+
+   foreach_ssa_use (use, def->instr) {
+      if (!ir3_block_dominates(after->block, use->block))
+         continue;
+
+      /* Do not filter-out after itself. This ensures that if after is a use of
+       * def, def will not get selected to get spilled because there must be
+       * another register with a further first use. We have to ensure that def
+       * doesn't get spilled in this case because otherwise, we might spill a
+       * register used by an earlier source of after.
+       */
+      if (use->ip < after->ip)
+         continue;
+
+      foreach_ssa_src_n (src, n, use) {
+         if (__is_false_dep(use, n))
+            continue;
+
+         struct ir3_register *src_reg = use->srcs[n];
+         if (!ra_reg_is_predicate(src_reg) || src_reg->def != def)
+            continue;
+         if (use->ip >= first_ip)
+            continue;
+
+         first_ip = use->ip;
+         first = use;
+      }
+   }
+
+   return first;
+}
+
+static bool
+is_predicate_use(struct ir3_instruction *instr, unsigned src_n)
+{
+   if (__is_false_dep(instr, src_n))
+      return false;
+   return ra_reg_is_predicate(instr->srcs[src_n]);
+}
+
+/* Spill a register by simply removing one from the live defs. We don't need to
+ * store its value anywhere since it can be rematerialized (see reload). We
+ * chose the register whose def's first use is the furthest.
+ */
+static void
+spill(struct ra_predicates_ctx *ctx, struct block_liveness *live,
+      struct ir3_instruction *spill_location)
+{
+   if (!ctx->spilled) {
+      ir3_count_instructions_ra(ctx->ir);
+      ir3_find_ssa_uses_for(ctx->ir, ctx, is_predicate_use);
+      ctx->spilled = true;
+   }
+
+   unsigned furthest_first_use = 0;
+   unsigned spill_reg = ~0;
+
+   for (unsigned i = 0; i < ctx->num_regs; ++i) {
+      struct ir3_register *candidate = live->live_defs[i].def;
+      assert(candidate != NULL);
+
+      struct ir3_instruction *first_use =
+         first_non_allocated_use_after(candidate, spill_location);
+
+      if (first_use == NULL) {
+         spill_reg = i;
+         break;
+      }
+
+      if (first_use->ip > furthest_first_use) {
+         furthest_first_use = first_use->ip;
+         spill_reg = i;
+      }
+   }
+
+   assert(spill_reg != ~0);
+
+   live->live_defs[spill_reg].def = NULL;
+   live->live_defs[spill_reg].reloaded_def = NULL;
+}
+
+static struct live_def *
+find_live_def(struct ra_predicates_ctx *ctx, struct block_liveness *live,
+              struct ir3_register *def)
+{
+   for (unsigned i = 0; i < ctx->num_regs; ++i) {
+      struct live_def *live_def = &live->live_defs[i];
+      if (live_def->def == def)
+         return live_def;
+   }
+
+   return NULL;
+}
+
+static struct ir3_register *
+get_def(struct live_def *live_def)
+{
+   return live_def->reloaded_def == NULL ? live_def->def
+                                         : live_def->reloaded_def;
+}
+
+/* Reload a def into s specific register, which must be free. Reloading is
+ * implemented by cloning the instruction that produced the def and moving it in
+ * front of the use.
+ */
+static struct live_def *
+reload_into(struct ra_predicates_ctx *ctx, struct block_liveness *live,
+            struct ir3_register *def, struct ir3_instruction *use,
+            unsigned comp)
+{
+   struct ir3_instruction *reloaded_instr = NULL;
+   bool def_is_allocated = !(def->flags & IR3_REG_UNUSED);
+
+   if (!def_is_allocated && use->block == def->instr->block) {
+      /* If def has not been allocated a register yet, no source is currently
+       * using it. If it's in the same block as the current use, just move it in
+       * front of it.
+       */
+      reloaded_instr = def->instr;
+   } else {
+      /* If the def is either 1) already allocated or 2) in a different block
+       * than the current use, we have to clone it. For 1) because its allocated
+       * register isn't currently live (we wouldn't be reloading it otherwise).
+       * For 2) because it might have other uses in blocks that aren't
+       * successors of the use.
+       */
+      reloaded_instr = ir3_instr_clone(def->instr);
+   }
+
+   reloaded_instr->block = use->block;
+   ir3_instr_move_before(reloaded_instr, use);
+   struct ir3_register *reloaded_def = reloaded_instr->dsts[0];
+   return assign_reg(ctx, live, def, reloaded_def, comp);
+}
+
+/* Reload a def into a register, spilling one if necessary. */
+static struct live_def *
+reload(struct ra_predicates_ctx *ctx, struct block_liveness *live,
+       struct ir3_register *def, struct ir3_instruction *use)
+{
+   if (!has_free_regs(ctx, live))
+      spill(ctx, live, use);
+
+   unsigned comp = alloc_reg_comp(ctx, live);
+   return reload_into(ctx, live, def, use, comp);
+}
+
+static int
+ra_block(struct ra_predicates_ctx *ctx, struct ir3_block *block)
+{
+   struct block_liveness *live = &ctx->blocks_liveness[block->index];
+
+   foreach_instr (instr, &block->instr_list) {
+      /* Assign registers to sources based on their defs. */
+      foreach_src (src, instr) {
+         if (!ra_reg_is_predicate(src))
+            continue;
+
+         struct live_def *live_def = find_live_def(ctx, live, src->def);
+         if (live_def == NULL)
+            live_def = reload(ctx, live, src->def, instr);
+
+         assert(live_def != NULL);
+
+         struct ir3_register *def = get_def(live_def);
+         src->num = def->num;
+         src->def = def;
+
+         /* Mark the def as used to make sure we won't move it anymore. */
+         def->flags &= ~IR3_REG_UNUSED;
+
+         /* If this source kills the def, don't free the register right away to
+          * prevent it being reused for another source of this instruction. We
+          * can free it after all sources of this instruction have been
+          * processed.
+          */
+         if (src->flags & IR3_REG_FIRST_KILL)
+            live_def->killed = true;
+      }
+
+      /* After all sources of an instruction have been processed, we can  free
+       * the registers that were killed by a source.
+       */
+      for (unsigned reg = 0; reg < ctx->num_regs; ++reg) {
+         struct live_def *live_def = &live->live_defs[reg];
+         if (live_def->def == NULL)
+            continue;
+
+         if (live_def->killed)
+            free_reg(ctx, live, get_def(live_def));
+      }
+
+      /* Allocate registers for new defs. */
+      foreach_dst (dst, instr) {
+         if (!ra_reg_is_predicate(dst))
+            continue;
+
+         /* Mark it as unused until we encounter the first use. This allows us
+          * to know when it is legal to move the instruction.
+          */
+         dst->flags |= IR3_REG_UNUSED;
+
+         /* If we don't have any free registers, ignore the def for now. If we
+          * start spilling right away, we might end-up with a cascade of spills
+          * when there are a lot of defs before their first uses.
+          */
+         if (!has_free_regs(ctx, live))
+            continue;
+
+         alloc_reg(ctx, live, dst, NULL);
+      }
+   }
+
+   /* Process loop back edges. Since we ignore them while calculating a block's
+    * live-in defs in init_block_liveness, we now make sure that we satisfy our
+    * successor's live-in requirements by producing the correct defs in the
+    * required registers.
+    */
+   for (unsigned i = 0; i < 2; ++i) {
+      struct ir3_block *succ = block->successors[i];
+      if (!succ)
+         continue;
+
+      struct live_def *succ_live_in =
+         ctx->blocks_liveness[succ->index].live_in_defs;
+
+      /* If live_in_defs has not been set yet, it's not a back edge. */
+      if (!succ_live_in)
+         continue;
+
+      for (unsigned reg = 0; reg < ctx->num_regs; ++reg) {
+         struct live_def *succ_def = &succ_live_in[reg];
+         if (!succ_def->def)
+            continue;
+
+         struct live_def *cur_def = &live->live_defs[reg];
+
+         /* Same def in the same register, nothing to be done. */
+         if (cur_def->def == succ_def->def)
+            continue;
+
+         /* Different def in the same register, free it first. */
+         if (cur_def->def)
+            free_reg(ctx, live, cur_def->def);
+
+         /* Reload the def in the required register right before the block's
+          * terminator.
+          */
+         struct ir3_instruction *use = ir3_block_get_terminator(block);
+         reload_into(ctx, live, succ_def->def, use, reg);
+      }
+   }
+
+   return 0;
+}
+
+/* Propagate live-out defs of a block's predecessors to the block's live-in
+ * defs. This takes the intersection of all predecessors live-out defs. That is,
+ * a def will be live-in if it's live-out in the same register in all
+ * predecessors.
+ */
+static void
+init_block_liveness(struct ra_predicates_ctx *ctx, struct ir3_block *block)
+{
+   struct block_liveness *live = &ctx->blocks_liveness[block->index];
+   live->live_defs = rzalloc_array(ctx, struct live_def, ctx->num_regs);
+   BITSET_WORD *live_in = ctx->liveness->live_in[block->index];
+
+   for (unsigned i = 0; i < block->predecessors_count; ++i) {
+      struct ir3_block *pred = block->predecessors[i];
+      assert(pred != NULL);
+
+      struct block_liveness *pred_live = &ctx->blocks_liveness[pred->index];
+
+      /* If the predecessor has not been processed yet it means it's the back
+       * edge of a loop. We ignore it now, take the live-out defs of the block's
+       * other predecessors, and make sure the live-out defs of the back edge
+       * match this block's live-in defs after processing the back edge.
+       */
+      if (pred_live->live_defs == NULL)
+         continue;
+
+      for (unsigned reg = 0; reg < ctx->num_regs; ++reg) {
+         struct live_def *cur_def = &live->live_defs[reg];
+         struct live_def *pred_def = &pred_live->live_defs[reg];
+
+         if (i == 0 && pred_def->def != NULL) {
+            /* If the first predecessor has a def in reg, use it if it's live-in
+             * in this block.
+             */
+            if (BITSET_TEST(live_in, pred_def->def->name))
+               *cur_def = *pred_def;
+         } else if (cur_def->def != pred_def->def) {
+            /* Different predecessors have different live-out defs in reg so we
+             * cannot use it as live-in.
+             */
+            cur_def->def = NULL;
+            cur_def->reloaded_def = NULL;
+         }
+      }
+   }
+
+   live->live_in_defs = rzalloc_array(ctx, struct live_def, ctx->num_regs);
+   memcpy(live->live_in_defs, live->live_defs,
+          sizeof(struct live_def) * ctx->num_regs);
+}
+
+void
+ir3_ra_predicates(struct ir3_shader_variant *v)
+{
+   struct ra_predicates_ctx *ctx = rzalloc(NULL, struct ra_predicates_ctx);
+   ctx->ir = v->ir;
+   ctx->num_regs = v->compiler->num_predicates;
+   ctx->liveness = ir3_calc_liveness_for(ctx, v->ir, ra_reg_is_predicate,
+                                         ra_reg_is_predicate);
+   ctx->blocks_liveness =
+      rzalloc_array(ctx, struct block_liveness, ctx->liveness->block_count);
+
+   foreach_block (block, &v->ir->block_list) {
+      init_block_liveness(ctx, block);
+      ra_block(ctx, block);
+   }
+
+   /* Remove instructions that became unused. This happens when a def was never
+    * used directly but only through its reloaded clones.
+    * Note that index 0 in the liveness definitions is always NULL.
+    */
+   for (unsigned i = 1; i < ctx->liveness->definitions_count; ++i) {
+      struct ir3_register *def = ctx->liveness->definitions[i];
+
+      if (def->flags & IR3_REG_UNUSED)
+         list_delinit(&def->instr->node);
+   }
+
+   ralloc_free(ctx);
+}
diff --git a/src/freedreno/ir3/ir3_sched.c b/src/freedreno/ir3/ir3_sched.c
index 3991c0c6861..aad52b194a3 100644
--- a/src/freedreno/ir3/ir3_sched.c
+++ b/src/freedreno/ir3/ir3_sched.c
@@ -97,9 +97,8 @@ struct ir3_sched_ctx {
    struct ir3_instruction *scheduled; /* last scheduled instr */
    struct ir3_instruction *addr0;     /* current a0.x user, if any */
    struct ir3_instruction *addr1;     /* current a1.x user, if any */
-   struct ir3_instruction *pred;      /* current p0.x user, if any */
 
-   struct ir3_instruction *split; /* most-recently-split a0/a1/p0 producer */
+   struct ir3_instruction *split; /* most-recently-split a0/a1 producer */
 
    int remaining_kills;
    int remaining_tex;
@@ -277,11 +276,6 @@ schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
       ctx->addr1 = instr;
    }
 
-   if (writes_pred(instr)) {
-      assert(ctx->pred == NULL);
-      ctx->pred = instr;
-   }
-
    instr->flags |= IR3_INSTR_MARK;
 
    di(instr, "schedule");
@@ -366,9 +360,9 @@ struct ir3_sched_notes {
     */
    bool blocked_kill;
    /* there is at least one instruction that could be scheduled,
-    * except for conflicting address/predicate register usage:
+    * except for conflicting address register usage:
     */
-   bool addr0_conflict, addr1_conflict, pred_conflict;
+   bool addr0_conflict, addr1_conflict;
 };
 
 static bool
@@ -485,12 +479,6 @@ check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
       return false;
    }
 
-   if (writes_pred(instr) && ctx->pred) {
-      assert(ctx->pred != instr);
-      notes->pred_conflict = true;
-      return false;
-   }
-
    /* if the instruction is a kill, we need to ensure *every*
     * bary.f is scheduled.  The hw seems unhappy if the thread
     * gets killed before the end-input (ei) flag is hit.
@@ -955,58 +943,6 @@ split_addr(struct ir3_sched_ctx *ctx, struct ir3_instruction **addr,
    return new_addr;
 }
 
-/* "spill" the predicate register by remapping any unscheduled
- * instructions which depend on the current predicate register
- * to a clone of the instruction which wrote the address reg.
- */
-static struct ir3_instruction *
-split_pred(struct ir3_sched_ctx *ctx)
-{
-   struct ir3 *ir;
-   struct ir3_instruction *new_pred = NULL;
-   unsigned i;
-
-   assert(ctx->pred);
-
-   ir = ctx->pred->block->shader;
-
-   for (i = 0; i < ir->predicates_count; i++) {
-      struct ir3_instruction *predicated = ir->predicates[i];
-
-      if (!predicated)
-         continue;
-
-      /* skip instructions already scheduled: */
-      if (is_scheduled(predicated))
-         continue;
-
-      /* remap remaining instructions using current pred
-       * to new pred:
-       *
-       * TODO is there ever a case when pred isn't first
-       * (and only) src?
-       */
-      if (ssa(predicated->srcs[0]) == ctx->pred) {
-         if (!new_pred) {
-            new_pred = split_instr(ctx, ctx->pred);
-            /* original pred is scheduled, but new one isn't: */
-            new_pred->flags &= ~IR3_INSTR_MARK;
-         }
-         predicated->srcs[0]->def->instr = new_pred;
-         /* don't need to remove old dag edge since old pred is
-          * already scheduled:
-          */
-         sched_node_add_dep(predicated, new_pred, 0);
-         di(predicated, "new predicate");
-      }
-   }
-
-   /* all remaining predicated remapped to new pred: */
-   ctx->pred = NULL;
-
-   return new_pred;
-}
-
 static void
 sched_node_init(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
 {
@@ -1182,7 +1118,6 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
    /* addr/pred writes are per-block: */
    ctx->addr0 = NULL;
    ctx->addr1 = NULL;
-   ctx->pred = NULL;
    ctx->sy_delay = 0;
    ctx->ss_delay = 0;
    ctx->sy_index = ctx->first_outstanding_sy_index = 0;
@@ -1267,8 +1202,6 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
          } else if (notes.addr1_conflict) {
             new_instr =
                split_addr(ctx, &ctx->addr1, ir->a1_users, ir->a1_users_count);
-         } else if (notes.pred_conflict) {
-            new_instr = split_pred(ctx);
          } else {
             d("unscheduled_list:");
             foreach_instr (instr, &ctx->unscheduled_list)
diff --git a/src/freedreno/ir3/ir3_validate.c b/src/freedreno/ir3/ir3_validate.c
index 73a65c1916e..c8113f79197 100644
--- a/src/freedreno/ir3/ir3_validate.c
+++ b/src/freedreno/ir3/ir3_validate.c
@@ -65,7 +65,7 @@ validate_error(struct ir3_validate_ctx *ctx, const char *condstr)
 static unsigned
 reg_class_flags(struct ir3_register *reg)
 {
-   return reg->flags & (IR3_REG_HALF | IR3_REG_SHARED);
+   return reg->flags & (IR3_REG_HALF | IR3_REG_SHARED | IR3_REG_PREDICATE);
 }
 
 static void
@@ -78,6 +78,9 @@ validate_src(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr,
    if (!(reg->flags & IR3_REG_SSA) || !reg->def)
       return;
 
+   if (reg->flags & IR3_REG_PREDICATE)
+      validate_assert(ctx, !(reg->flags & (IR3_REG_SHARED | IR3_REG_HALF)));
+
    struct ir3_register *src = reg->def;
 
    validate_assert(ctx, _mesa_set_search(ctx->defs, src->instr));
diff --git a/src/freedreno/ir3/meson.build b/src/freedreno/ir3/meson.build
index 3578bbaf9b8..b67d4b9a2de 100644
--- a/src/freedreno/ir3/meson.build
+++ b/src/freedreno/ir3/meson.build
@@ -108,6 +108,7 @@ libfreedreno_ir3_files = files(
   'ir3_print.c',
   'ir3_ra.c',
   'ir3_ra.h',
+  'ir3_ra_predicates.c',
   'ir3_ra_validate.c',
   'ir3_reconvergence.c',
   'ir3_remove_unreachable.c',