From 1bf3d6c8e6eb54aa835d1b3df44ac6b0bdaf9da2 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Wed, 26 Jun 2024 08:06:33 +0200
Subject: [PATCH] broadcom/compiler: don't spill in between multop and umul24
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The multop instruction implicitly writes rtop which is not preserved
acrosss thread switches. We can spill the sources of the multop
(since these would happen before multop) and the destination of
umul24 (since that would happen after umul24).

Fixes some OpenCL tests when V3D_DEBUG=opt_compile_time is used to
choose a different compile configuration.

cc: mesa-stable

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29909>
(cherry picked from commit 38b7f411a1fb26ef4b2714852c7f950f0a3a710c)
---
 .pick_status.json                             |  2 +-
 src/broadcom/compiler/vir_register_allocate.c | 22 +++++++++++++++++--
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 4d0360e92ce..23276e568ef 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -354,7 +354,7 @@
         "description": "broadcom/compiler: don't spill in between multop and umul24",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": null,
         "notes": null
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
index 54c3da72baf..4fdf134a2ed 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
+++ b/src/broadcom/compiler/vir_register_allocate.c
@@ -271,6 +271,7 @@ v3d_choose_spill_node(struct v3d_compile *c)
         float block_scale = 1.0;
         float spill_costs[c->num_temps];
         bool in_tmu_operation = false;
+        bool rtop_hazard = false;
         bool started_last_seg = false;
 
         for (unsigned i = 0; i < c->num_temps; i++)
@@ -279,6 +280,20 @@ v3d_choose_spill_node(struct v3d_compile *c)
         /* XXX: Scale the cost up when inside of a loop. */
         vir_for_each_block(block, c) {
                 vir_for_each_inst(inst, block) {
+                        /* RTOP is not preserved across thread switches, so
+                         * we can't spill in the middle of multop + umul24.
+                         */
+                        bool is_multop = false;
+                        bool is_umul24 = false;
+                        if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
+                                if (inst->qpu.alu.mul.op == V3D_QPU_M_MULTOP) {
+                                    is_multop = true;
+                                    rtop_hazard = true;
+                                } else if (inst->qpu.alu.mul.op == V3D_QPU_M_UMUL24) {
+                                    is_umul24 = true;
+                                }
+                        }
+
                         /* We can't insert new thread switches after
                          * starting output writes.
                          */
@@ -297,7 +312,7 @@ v3d_choose_spill_node(struct v3d_compile *c)
 
                                 if (spill_type != SPILL_TYPE_TMU) {
                                         spill_costs[temp] += block_scale;
-                                } else if (!no_spilling) {
+                                } else if (!no_spilling && (!rtop_hazard || is_multop)) {
                                         float tmu_op_scale = in_tmu_operation ?
                                                 3.0 : 1.0;
                                         spill_costs[temp] += (block_scale *
@@ -315,7 +330,7 @@ v3d_choose_spill_node(struct v3d_compile *c)
 
                                 if (spill_type != SPILL_TYPE_TMU) {
                                         /* We just rematerialize it later */
-                                } else if (!no_spilling) {
+                                } else if (!no_spilling && (!rtop_hazard || is_umul24)) {
                                         spill_costs[temp] += (block_scale *
                                                               tmu_scale);
                                 } else {
@@ -344,6 +359,9 @@ v3d_choose_spill_node(struct v3d_compile *c)
 
                         if (qinst_writes_tmu(c->devinfo, inst))
                                 in_tmu_operation = true;
+
+                        if (is_umul24)
+                                rtop_hazard = false;
                 }
         }