mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-24 04:08:10 +02:00
Implement a simple pre-RA bottom-up list scheduler with the goal of decreasing register pressure. On Xe2, this significantly reduces spilling. SSA form allows us to estimate register demand cheaply and accurately, which theoretically [1] gives this algorithm the two Hippocratic properties: 1. Shaders with low register pressure are unaffected. 2. Register pressure can only be decreased, never increased. In other words: first, do no harm. The heuristic itself is very simple: greedily choose instructions that decrease liveness using a backwards list scheduler. This is far from optimal! But thanks to the above properties, even a heuristic that picked random instructions would be a win overall - by construction, we can only ever win. In other words: this scheduler is your older brother powering off the game console any time he's about to lose a game, maintaining a 100% win rate. [1] In reality, neither property is strictly satisfied due to the messy details of mapping our clean logical model onto Intel's many weird physical register files. Nevertheless, the algorithm is well-motivated and the empirical results on Xe2 are excellent. SIMD16: Totals: Instrs: 2754194 -> 2753957 (-0.01%); split: -0.23%, +0.22% CodeSize: 41094768 -> 41092768 (-0.00%); split: -0.23%, +0.23% Number of spill instructions: 1724 -> 1129 (-34.51%) Number of fill instructions: 1912 -> 1119 (-41.47%) Totals from 168 (6.35% of 2647) affected shaders: Instrs: 850994 -> 850757 (-0.03%); split: -0.75%, +0.73% CodeSize: 12825680 -> 12823680 (-0.02%); split: -0.74%, +0.73% Number of spill instructions: 1724 -> 1129 (-34.51%) Number of fill instructions: 1912 -> 1119 (-41.47%) SIMD32: Totals: Instrs: 4688858 -> 4557800 (-2.80%); split: -3.53%, +0.74% CodeSize: 70177200 -> 68214816 (-2.80%); split: -3.53%, +0.74% Number of spill instructions: 50316 -> 45795 (-8.99%); split: -9.56%, +0.57% Number of fill instructions: 51526 -> 45075 (-12.52%); split: -13.23%, +0.71% Totals from 819 (30.94% of 2647) affected shaders: Instrs: 3810182 -> 3679124 (-3.44%); split: -4.35%, +0.91% CodeSize: 57044000 -> 55081616 (-3.44%); split: -4.35%, +0.91% Number of spill instructions: 49264 -> 44743 (-9.18%); split: -9.76%, +0.58% Number of fill instructions: 50182 -> 43731 (-12.86%); split: -13.58%, +0.73% Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41688>
85 lines
2.1 KiB
C
85 lines
2.1 KiB
C
/*
|
|
* Copyright 2026 Intel Corporation
|
|
* Copyright 2019 Broadcom
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "jay_dag.h"
|
|
#include <stdint.h>
|
|
#include "util/ralloc.h"
|
|
#include "util/u_dynarray.h"
|
|
|
|
void
|
|
jay_dag_init(struct jay_dag *dag, void *memctx, uint32_t node_count)
|
|
{
|
|
assert(node_count >= 1 && "node 0 is reserved and always present");
|
|
|
|
*dag = (struct jay_dag) {
|
|
.adjacency = rzalloc_array(memctx, uint32_t, node_count),
|
|
.parent_counts = rzalloc_array(memctx, uint32_t, node_count),
|
|
.node_count = node_count,
|
|
.node = 1,
|
|
};
|
|
|
|
util_dynarray_init(&dag->heads, memctx);
|
|
util_dynarray_init(&dag->edges, memctx);
|
|
}
|
|
|
|
void
|
|
jay_dag_add_edge(struct jay_dag *dag, uint32_t child)
|
|
{
|
|
if (child && child != dag->node) {
|
|
assert(child < dag->node_count);
|
|
|
|
/* We have to prune degenerate or duplicate edges */
|
|
for (uint32_t i = (dag->node > 0 ? dag->adjacency[dag->node - 1] : 0);
|
|
i < util_dynarray_num_elements(&dag->edges, uint32_t); ++i) {
|
|
if (*util_dynarray_element(&dag->edges, uint32_t, i) == child)
|
|
return;
|
|
}
|
|
|
|
util_dynarray_append(&dag->edges, child);
|
|
dag->parent_counts[child]++;
|
|
}
|
|
}
|
|
|
|
void
|
|
jay_dag_next_node(struct jay_dag *dag)
|
|
{
|
|
assert(dag->node < dag->node_count);
|
|
|
|
dag->adjacency[dag->node++] =
|
|
util_dynarray_num_elements(&dag->edges, uint32_t);
|
|
}
|
|
|
|
void
|
|
jay_dag_finalize(struct jay_dag *dag, uint32_t first_node)
|
|
{
|
|
for (uint32_t i = dag->node - 1; i >= first_node; --i) {
|
|
if (dag->parent_counts[i] == 0) {
|
|
util_dynarray_append(&dag->heads, i);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Removes a DAG head from the graph, and moves any new dag heads into the
|
|
* heads list.
|
|
*/
|
|
void
|
|
jay_dag_prune_head(struct jay_dag *dag, uint32_t head)
|
|
{
|
|
assert(!dag->parent_counts[head]);
|
|
util_dynarray_delete_unordered(&dag->heads, uint32_t, head);
|
|
uint32_t first = head > 0 ? dag->adjacency[head - 1] : 0;
|
|
|
|
for (unsigned i = first; i < dag->adjacency[head]; ++i) {
|
|
uint32_t *it = util_dynarray_element(&dag->edges, uint32_t, i);
|
|
|
|
if ((--dag->parent_counts[*it]) == 0) {
|
|
util_dynarray_append(&dag->heads, *it);
|
|
}
|
|
}
|
|
}
|
|
|
|
|