vc4: Use the ra_alloc_contig_reg_class() function to speed up RA.

It means we don't need to do the n^2 loop over the regs to set up the pq values, nor do we need to allocate conflicts lists. Acked-by: Erico Nunes <nunes.erico@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9437>
2026-05-04 22:49:13 +02:00 · 2021-03-04 16:50:26 -08:00 · 2021-03-04 16:50:26 -08:00 · 15aa8e9189
commit 15aa8e9189
parent 2d7bcdaf6b
1 changed files with 7 additions and 7 deletions
--- a/src/gallium/drivers/vc4/vc4_register_allocate.c
+++ b/src/gallium/drivers/vc4/vc4_register_allocate.c
@ -115,20 +115,20 @@ vc4_alloc_reg_set(struct vc4_context *vc4)
        if (vc4->regs)
                return;

-        vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs), true);
+        vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs), false);

        /* The physical regfiles split us into two classes, with [0] being the
         * whole space and [1] being the bottom half (for threaded fragment
         * shaders).
         */
        for (int i = 0; i < 2; i++) {
-                vc4->reg_class_any[i] = ra_alloc_reg_class(vc4->regs);
-                vc4->reg_class_a_or_b[i] = ra_alloc_reg_class(vc4->regs);
-                vc4->reg_class_a_or_b_or_acc[i] = ra_alloc_reg_class(vc4->regs);
-                vc4->reg_class_r4_or_a[i] = ra_alloc_reg_class(vc4->regs);
-                vc4->reg_class_a[i] = ra_alloc_reg_class(vc4->regs);
+                vc4->reg_class_any[i] = ra_alloc_contig_reg_class(vc4->regs, 1);
+                vc4->reg_class_a_or_b[i] = ra_alloc_contig_reg_class(vc4->regs, 1);
+                vc4->reg_class_a_or_b_or_acc[i] = ra_alloc_contig_reg_class(vc4->regs, 1);
+                vc4->reg_class_r4_or_a[i] = ra_alloc_contig_reg_class(vc4->regs, 1);
+                vc4->reg_class_a[i] = ra_alloc_contig_reg_class(vc4->regs, 1);
        }
-        vc4->reg_class_r0_r3 = ra_alloc_reg_class(vc4->regs);
+        vc4->reg_class_r0_r3 = ra_alloc_contig_reg_class(vc4->regs, 1);

        /* r0-r3 */
        for (uint32_t i = ACC_INDEX; i < ACC_INDEX + 4; i++) {