lima: ppir: reuse load_temp/store_temp nodes if possible

Currently spilling code operates on individual ops rather than on
instructions, and as a result it may create a redundant load_temp op if
an instruction references spilling register several times.

Similarly, it creates multiple stores if there are multiple ops in the
instruction that write different components of the register.

Check whether the instruction already contains a necessary load_temp or
store_temp and reuse it if possible.

shader-db:

total instructions in shared programs: 27718 -> 27673 (-0.16%)
instructions in affected programs: 2786 -> 2741 (-1.62%)
helped: 18
HURT: 0
helped stats (abs) min: 1 max: 8 x̄: 2.50 x̃: 1
helped stats (rel) min: 0.39% max: 5.33% x̄: 2.05% x̃: 0.80%
95% mean confidence interval for instructions value: -3.70 -1.30
95% mean confidence interval for instructions %-change: -3.09% -1.01%
Instructions are helped.

total loops in shared programs: 4 -> 4 (0.00%)
loops in affected programs: 0 -> 0
helped: 0
HURT: 0

total spills in shared programs: 390 -> 381 (-2.31%)
spills in affected programs: 145 -> 136 (-6.21%)
helped: 9
HURT: 0

total fills in shared programs: 1210 -> 1174 (-2.98%)
fills in affected programs: 149 -> 113 (-24.16%)
helped: 9
HURT: 0

LOST:   0
GAINED: 0

Reviewed-by: Erico Nunes <nunes.erico@gmail.com>
Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33753>
This commit is contained in:
Vasily Khoruzhick 2025-02-23 23:42:11 -08:00 committed by Marge Bot
parent 7200cf8827
commit 6528ee4228

View file

@ -185,14 +185,24 @@ static bool create_new_instr_before(ppir_block *block, ppir_instr *ref,
}
static bool ppir_update_spilled_src(ppir_compiler *comp, ppir_block *block,
ppir_node *node, ppir_src *src,
ppir_node **fill_node)
ppir_node *node, ppir_src *src)
{
/* nodes might have multiple references to the same value.
* avoid creating unnecessary loads for the same fill by
* saving the node resulting from the temporary load */
if (*fill_node)
goto update_src;
int temp_index = -comp->prog->state.stack_size; /* indices are negative */
/* Check if we already have a load for this temporary in node instruction
* and reuse it if possible */
if (node->instr->slots[PPIR_INSTR_SLOT_UNIFORM] &&
node->instr_pos != PPIR_INSTR_SLOT_VARYING &&
node->instr_pos != PPIR_INSTR_SLOT_TEXLD) {
ppir_load_node *load = ppir_node_to_load(node->instr->slots[PPIR_INSTR_SLOT_UNIFORM]);
if (load->node.op == ppir_op_load_temp &&
load->index == temp_index &&
load->num_components == src->reg->num_components) {
/* Just update source */
ppir_node_target_assign(src, &load->node);
return true;
}
}
int num_components = src->reg->num_components;
@ -205,7 +215,7 @@ static bool ppir_update_spilled_src(ppir_compiler *comp, ppir_block *block,
ppir_load_node *load = ppir_node_to_load(load_node);
load->index = -comp->prog->state.stack_size; /* index sizes are negative */
load->index = temp_index;
load->num_components = num_components;
ppir_dest *ld_dest = &load->dest;
@ -220,7 +230,6 @@ static bool ppir_update_spilled_src(ppir_compiler *comp, ppir_block *block,
node->instr_pos != PPIR_INSTR_SLOT_VARYING &&
node->instr_pos != PPIR_INSTR_SLOT_TEXLD) {
ppir_node_target_assign(src, load_node);
*fill_node = load_node;
return ppir_instr_insert_node(node->instr, load_node);
}
@ -263,11 +272,8 @@ static bool ppir_update_spilled_src(ppir_compiler *comp, ppir_block *block,
ppir_node_add_dep(node, move_node, ppir_dep_src);
ppir_node_add_dep(move_node, load_node, ppir_dep_src);
*fill_node = move_node;
update_src:
/* switch node src to use the fill node dest */
ppir_node_target_assign(src, *fill_node);
ppir_node_target_assign(src, move_node);
return true;
}
@ -340,20 +346,38 @@ static bool ppir_update_spilled_dest(ppir_compiler *comp, ppir_block *block,
ppir_dest *dest = ppir_node_get_dest(node);
assert(dest != NULL);
ppir_reg *reg = ppir_dest_get_reg(dest);
bool reused = false;
int temp_index = -comp->prog->state.stack_size; /* indices are negative */
ppir_node *store_node = NULL;
/* Check if we already have a store for this reg in the node instruction
* and reuse it */
if (node->instr->slots[PPIR_INSTR_SLOT_STORE_TEMP] &&
node->instr_pos != PPIR_INSTR_SLOT_ALU_COMBINE) {
ppir_store_node *store = ppir_node_to_store(node->instr->slots[PPIR_INSTR_SLOT_STORE_TEMP]);
if (store->index == temp_index &&
store->num_components == reg->num_components) {
store_node = &store->node;
reused = true;
ppir_node_target_assign(&store->src, node);
}
}
/* alloc new node to store value */
ppir_node *store_node = ppir_node_create(block, ppir_op_store_temp, -1, 0);
if (!store_node)
return false;
list_addtail(&store_node->list, &node->list);
comp->num_spills++;
if (!store_node) {
store_node = ppir_node_create(block, ppir_op_store_temp, -1, 0);
if (!store_node)
return false;
list_addtail(&store_node->list, &node->list);
comp->num_spills++;
ppir_store_node *store = ppir_node_to_store(store_node);
ppir_store_node *store = ppir_node_to_store(store_node);
store->index = -comp->prog->state.stack_size; /* index sizes are negative */
ppir_node_target_assign(&store->src, node);
store->num_components = reg->num_components;
store->index = temp_index;
store->num_components = reg->num_components;
ppir_node_target_assign(&store->src, node);
}
/* insert the new node as successor */
ppir_node_foreach_succ_safe(node, dep) {
@ -363,6 +387,9 @@ static bool ppir_update_spilled_dest(ppir_compiler *comp, ppir_block *block,
}
ppir_node_add_dep(store_node, node, ppir_dep_src);
if (reused)
return true;
/* If the store temp slot is empty, we can insert the store_temp
* there and use it directly. Exceptionally, if the node is in the
* combine slot, this doesn't work. */
@ -391,15 +418,11 @@ static bool ppir_regalloc_spill_reg(ppir_compiler *comp, ppir_reg *chosen)
return false;
}
ppir_node *fill_node = NULL;
/* nodes might have multiple references to the same value.
* avoid creating unnecessary loads for the same fill by
* saving the node resulting from the temporary load */
for (int i = 0; i < ppir_node_get_src_num(node); i++) {
ppir_src *src = ppir_node_get_src(node, i);
ppir_reg *reg = ppir_src_get_reg(src);
if (reg == chosen) {
if (!ppir_update_spilled_src(comp, block, node, src, &fill_node))
if (!ppir_update_spilled_src(comp, block, node, src))
return false;
}
}