diff --git a/src/gallium/drivers/lima/ir/pp/regalloc.c b/src/gallium/drivers/lima/ir/pp/regalloc.c index 151f94c8666..2423aa73344 100644 --- a/src/gallium/drivers/lima/ir/pp/regalloc.c +++ b/src/gallium/drivers/lima/ir/pp/regalloc.c @@ -185,14 +185,24 @@ static bool create_new_instr_before(ppir_block *block, ppir_instr *ref, } static bool ppir_update_spilled_src(ppir_compiler *comp, ppir_block *block, - ppir_node *node, ppir_src *src, - ppir_node **fill_node) + ppir_node *node, ppir_src *src) { - /* nodes might have multiple references to the same value. - * avoid creating unnecessary loads for the same fill by - * saving the node resulting from the temporary load */ - if (*fill_node) - goto update_src; + int temp_index = -comp->prog->state.stack_size; /* indices are negative */ + + /* Check if we already have a load for this temporary in node instruction + * and reuse it if possible */ + if (node->instr->slots[PPIR_INSTR_SLOT_UNIFORM] && + node->instr_pos != PPIR_INSTR_SLOT_VARYING && + node->instr_pos != PPIR_INSTR_SLOT_TEXLD) { + ppir_load_node *load = ppir_node_to_load(node->instr->slots[PPIR_INSTR_SLOT_UNIFORM]); + if (load->node.op == ppir_op_load_temp && + load->index == temp_index && + load->num_components == src->reg->num_components) { + /* Just update source */ + ppir_node_target_assign(src, &load->node); + return true; + } + } int num_components = src->reg->num_components; @@ -205,7 +215,7 @@ static bool ppir_update_spilled_src(ppir_compiler *comp, ppir_block *block, ppir_load_node *load = ppir_node_to_load(load_node); - load->index = -comp->prog->state.stack_size; /* index sizes are negative */ + load->index = temp_index; load->num_components = num_components; ppir_dest *ld_dest = &load->dest; @@ -220,7 +230,6 @@ static bool ppir_update_spilled_src(ppir_compiler *comp, ppir_block *block, node->instr_pos != PPIR_INSTR_SLOT_VARYING && node->instr_pos != PPIR_INSTR_SLOT_TEXLD) { ppir_node_target_assign(src, load_node); - *fill_node = load_node; return ppir_instr_insert_node(node->instr, load_node); } @@ -263,11 +272,8 @@ static bool ppir_update_spilled_src(ppir_compiler *comp, ppir_block *block, ppir_node_add_dep(node, move_node, ppir_dep_src); ppir_node_add_dep(move_node, load_node, ppir_dep_src); - *fill_node = move_node; - -update_src: /* switch node src to use the fill node dest */ - ppir_node_target_assign(src, *fill_node); + ppir_node_target_assign(src, move_node); return true; } @@ -340,20 +346,38 @@ static bool ppir_update_spilled_dest(ppir_compiler *comp, ppir_block *block, ppir_dest *dest = ppir_node_get_dest(node); assert(dest != NULL); ppir_reg *reg = ppir_dest_get_reg(dest); + bool reused = false; + int temp_index = -comp->prog->state.stack_size; /* indices are negative */ + + ppir_node *store_node = NULL; + + /* Check if we already have a store for this reg in the node instruction + * and reuse it */ + if (node->instr->slots[PPIR_INSTR_SLOT_STORE_TEMP] && + node->instr_pos != PPIR_INSTR_SLOT_ALU_COMBINE) { + ppir_store_node *store = ppir_node_to_store(node->instr->slots[PPIR_INSTR_SLOT_STORE_TEMP]); + if (store->index == temp_index && + store->num_components == reg->num_components) { + store_node = &store->node; + reused = true; + ppir_node_target_assign(&store->src, node); + } + } /* alloc new node to store value */ - ppir_node *store_node = ppir_node_create(block, ppir_op_store_temp, -1, 0); - if (!store_node) - return false; - list_addtail(&store_node->list, &node->list); - comp->num_spills++; + if (!store_node) { + store_node = ppir_node_create(block, ppir_op_store_temp, -1, 0); + if (!store_node) + return false; + list_addtail(&store_node->list, &node->list); + comp->num_spills++; - ppir_store_node *store = ppir_node_to_store(store_node); + ppir_store_node *store = ppir_node_to_store(store_node); - store->index = -comp->prog->state.stack_size; /* index sizes are negative */ - - ppir_node_target_assign(&store->src, node); - store->num_components = reg->num_components; + store->index = temp_index; + store->num_components = reg->num_components; + ppir_node_target_assign(&store->src, node); + } /* insert the new node as successor */ ppir_node_foreach_succ_safe(node, dep) { @@ -363,6 +387,9 @@ static bool ppir_update_spilled_dest(ppir_compiler *comp, ppir_block *block, } ppir_node_add_dep(store_node, node, ppir_dep_src); + if (reused) + return true; + /* If the store temp slot is empty, we can insert the store_temp * there and use it directly. Exceptionally, if the node is in the * combine slot, this doesn't work. */ @@ -391,15 +418,11 @@ static bool ppir_regalloc_spill_reg(ppir_compiler *comp, ppir_reg *chosen) return false; } - ppir_node *fill_node = NULL; - /* nodes might have multiple references to the same value. - * avoid creating unnecessary loads for the same fill by - * saving the node resulting from the temporary load */ for (int i = 0; i < ppir_node_get_src_num(node); i++) { ppir_src *src = ppir_node_get_src(node, i); ppir_reg *reg = ppir_src_get_reg(src); if (reg == chosen) { - if (!ppir_update_spilled_src(comp, block, node, src, &fill_node)) + if (!ppir_update_spilled_src(comp, block, node, src)) return false; } }