diff --git a/src/gallium/drivers/lima/ir/gp/gpir.h b/src/gallium/drivers/lima/ir/gp/gpir.h index de571c3c1c4..e7707814b7c 100644 --- a/src/gallium/drivers/lima/ir/gp/gpir.h +++ b/src/gallium/drivers/lima/ir/gp/gpir.h @@ -120,6 +120,7 @@ typedef struct { int *slots; gpir_node_type type; bool spillless; + bool schedule_first; bool may_consume_two_slots; } gpir_op_info; @@ -299,14 +300,20 @@ typedef struct gpir_instr { * * (1) alu_num_slot_free >= alu_num_slot_needed_by_store + * alu_num_slot_needed_by_max + - * alu_num_slot_needed_by_next_max + * max(alu_num_unscheduled_next_max - alu_max_allowed_next_max, 0) * (2) alu_non_cplx_slot_free >= alu_num_slot_needed_by_max + * alu_num_slot_neede_by_non_cplx_store + * + * alu_max_allowed_next_max is normally 5 (since there can be at most 5 max + * nodes for the next instruction) but when there is a complex1 node in + * this instruction it reduces to 4 to reserve a slot for complex2 in the + * next instruction. */ int alu_num_slot_needed_by_store; int alu_num_slot_needed_by_non_cplx_store; int alu_num_slot_needed_by_max; - int alu_num_slot_needed_by_next_max; + int alu_num_unscheduled_next_max; + int alu_max_allowed_next_max; /* Used to communicate to the scheduler how many slots need to be cleared * up in order to satisfy the invariants. diff --git a/src/gallium/drivers/lima/ir/gp/instr.c b/src/gallium/drivers/lima/ir/gp/instr.c index e07a2c9b7c2..45e9d817143 100644 --- a/src/gallium/drivers/lima/ir/gp/instr.c +++ b/src/gallium/drivers/lima/ir/gp/instr.c @@ -37,6 +37,7 @@ gpir_instr *gpir_instr_create(gpir_block *block) instr->index = block->sched.instr_index++; instr->alu_num_slot_free = 6; instr->alu_non_cplx_slot_free = 5; + instr->alu_max_allowed_next_max = 5; list_add(&instr->list, &block->instr_list); return instr; @@ -96,6 +97,8 @@ static bool gpir_instr_insert_alu_check(gpir_instr *instr, gpir_node *node) int non_cplx_store_reduce_slot = 0; int max_reduce_slot = node->sched.max_node ? 1 : 0; int next_max_reduce_slot = node->sched.next_max_node ? 1 : 0; + int alu_new_max_allowed_next_max = + node->op == gpir_op_complex1 ? 4 : instr->alu_max_allowed_next_max; /* check if this node is child of one store node. * complex1 won't be any of this instr's store node's child, @@ -117,7 +120,8 @@ static bool gpir_instr_insert_alu_check(gpir_instr *instr, gpir_node *node) int slot_difference = instr->alu_num_slot_needed_by_store - store_reduce_slot + instr->alu_num_slot_needed_by_max - max_reduce_slot + - MAX2(instr->alu_num_slot_needed_by_next_max - next_max_reduce_slot, 0) - + MAX2(instr->alu_num_unscheduled_next_max - next_max_reduce_slot - + alu_new_max_allowed_next_max, 0) - (instr->alu_num_slot_free - consume_slot); if (slot_difference > 0) { gpir_debug("failed %d because of alu slot\n", node->index); @@ -141,7 +145,8 @@ static bool gpir_instr_insert_alu_check(gpir_instr *instr, gpir_node *node) instr->alu_num_slot_needed_by_store -= store_reduce_slot; instr->alu_num_slot_needed_by_non_cplx_store -= non_cplx_store_reduce_slot; instr->alu_num_slot_needed_by_max -= max_reduce_slot; - instr->alu_num_slot_needed_by_next_max -= next_max_reduce_slot; + instr->alu_num_unscheduled_next_max -= next_max_reduce_slot; + instr->alu_max_allowed_next_max = alu_new_max_allowed_next_max; return true; } @@ -165,7 +170,9 @@ static void gpir_instr_remove_alu(gpir_instr *instr, gpir_node *node) if (node->sched.max_node) instr->alu_num_slot_needed_by_max++; if (node->sched.next_max_node) - instr->alu_num_slot_needed_by_next_max++; + instr->alu_num_unscheduled_next_max++; + if (node->op == gpir_op_complex1) + instr->alu_max_allowed_next_max = 5; } static bool gpir_instr_insert_reg0_check(gpir_instr *instr, gpir_node *node) @@ -312,7 +319,7 @@ static bool gpir_instr_insert_store_check(gpir_instr *instr, gpir_node *node) */ int slot_difference = instr->alu_num_slot_needed_by_store + 1 + instr->alu_num_slot_needed_by_max + - MAX2(instr->alu_num_slot_needed_by_next_max, 0) - + MAX2(instr->alu_num_unscheduled_next_max - instr->alu_max_allowed_next_max, 0) - instr->alu_num_slot_free; if (slot_difference > 0) { instr->slot_difference = slot_difference; diff --git a/src/gallium/drivers/lima/ir/gp/node.c b/src/gallium/drivers/lima/ir/gp/node.c index decda5f1246..a8706627f38 100644 --- a/src/gallium/drivers/lima/ir/gp/node.c +++ b/src/gallium/drivers/lima/ir/gp/node.c @@ -58,6 +58,7 @@ const gpir_op_info gpir_op_infos[] = { .name = "complex2", .slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END }, .spillless = true, + .schedule_first = true, }, [gpir_op_add] = { .name = "add", @@ -154,11 +155,13 @@ const gpir_op_info gpir_op_infos[] = { .name = "rcp_impl", .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END }, .spillless = true, + .schedule_first = true, }, [gpir_op_rsqrt_impl] = { .name = "rsqrt_impl", .slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END }, .spillless = true, + .schedule_first = true, }, [gpir_op_load_uniform] = { .name = "ld_uni", diff --git a/src/gallium/drivers/lima/ir/gp/scheduler.c b/src/gallium/drivers/lima/ir/gp/scheduler.c index 35925a1af51..f06089b7992 100644 --- a/src/gallium/drivers/lima/ir/gp/scheduler.c +++ b/src/gallium/drivers/lima/ir/gp/scheduler.c @@ -441,7 +441,8 @@ static void schedule_insert_ready_list(sched_ctx *ctx, struct list_head *insert_pos = &ctx->ready_list; list_for_each_entry(gpir_node, node, &ctx->ready_list, list) { - if (insert_node->sched.dist > node->sched.dist) { + if (insert_node->sched.dist > node->sched.dist || + gpir_op_infos[insert_node->op].schedule_first) { insert_pos = &node->list; break; } @@ -916,7 +917,7 @@ static void spill_node(sched_ctx *ctx, gpir_node *node, gpir_store_node *store) } if (node->sched.next_max_node) { node->sched.next_max_node = false; - ctx->instr->alu_num_slot_needed_by_next_max--; + ctx->instr->alu_num_unscheduled_next_max--; } } } @@ -1153,7 +1154,7 @@ static bool can_use_complex(gpir_node *node) static void sched_find_max_nodes(sched_ctx *ctx) { - ctx->instr->alu_num_slot_needed_by_next_max = -5; + ctx->instr->alu_num_unscheduled_next_max = 0; ctx->instr->alu_num_slot_needed_by_max = 0; list_for_each_entry(gpir_node, node, &ctx->ready_list, list) { @@ -1169,7 +1170,7 @@ static void sched_find_max_nodes(sched_ctx *ctx) if (node->sched.max_node) ctx->instr->alu_num_slot_needed_by_max++; if (node->sched.next_max_node) - ctx->instr->alu_num_slot_needed_by_next_max++; + ctx->instr->alu_num_unscheduled_next_max++; } } @@ -1179,9 +1180,10 @@ static void sched_find_max_nodes(sched_ctx *ctx) static void verify_max_nodes(sched_ctx *ctx) { int alu_num_slot_needed_by_max = 0; - int alu_num_slot_needed_by_next_max = -5; + int alu_num_unscheduled_next_max = 0; int alu_num_slot_needed_by_store = 0; int alu_num_slot_needed_by_non_cplx_store = 0; + int alu_max_allowed_next_max = 5; list_for_each_entry(gpir_node, node, &ctx->ready_list, list) { if (!gpir_is_input_node(node)) @@ -1190,7 +1192,7 @@ static void verify_max_nodes(sched_ctx *ctx) if (node->sched.max_node) alu_num_slot_needed_by_max++; if (node->sched.next_max_node) - alu_num_slot_needed_by_next_max++; + alu_num_unscheduled_next_max++; if (used_by_store(node, ctx->instr)) { alu_num_slot_needed_by_store++; if (node->sched.next_max_node && !node->sched.complex_allowed) @@ -1198,12 +1200,17 @@ static void verify_max_nodes(sched_ctx *ctx) } } + if (ctx->instr->slots[GPIR_INSTR_SLOT_MUL0] && + ctx->instr->slots[GPIR_INSTR_SLOT_MUL0]->op == gpir_op_complex1) + alu_max_allowed_next_max = 4; + assert(ctx->instr->alu_num_slot_needed_by_max == alu_num_slot_needed_by_max); - assert(ctx->instr->alu_num_slot_needed_by_next_max == alu_num_slot_needed_by_next_max); + assert(ctx->instr->alu_num_unscheduled_next_max == alu_num_unscheduled_next_max); + assert(ctx->instr->alu_max_allowed_next_max == alu_max_allowed_next_max); assert(ctx->instr->alu_num_slot_needed_by_store == alu_num_slot_needed_by_store); assert(ctx->instr->alu_num_slot_needed_by_non_cplx_store == alu_num_slot_needed_by_non_cplx_store); - assert(ctx->instr->alu_num_slot_free >= alu_num_slot_needed_by_store + alu_num_slot_needed_by_max + MAX2(alu_num_slot_needed_by_next_max, 0)); + assert(ctx->instr->alu_num_slot_free >= alu_num_slot_needed_by_store + alu_num_slot_needed_by_max + MAX2(alu_num_unscheduled_next_max - alu_max_allowed_next_max, 0)); assert(ctx->instr->alu_non_cplx_slot_free >= alu_num_slot_needed_by_max + alu_num_slot_needed_by_non_cplx_store); } @@ -1237,6 +1244,13 @@ static bool try_node(sched_ctx *ctx) score = schedule_try_node(ctx, node, true); } + /* schedule_first nodes must be scheduled if possible */ + if (gpir_op_infos[node->op].schedule_first && score != INT_MIN) { + best_node = node; + best_score = score; + break; + } + if (score > best_score) { best_score = score; best_node = node; @@ -1382,7 +1396,8 @@ static bool sched_move(sched_ctx *ctx) * need to insert the move. */ - if (ctx->instr->alu_num_slot_needed_by_next_max > 0) { + if (ctx->instr->alu_num_unscheduled_next_max > + ctx->instr->alu_max_allowed_next_max) { list_for_each_entry(gpir_node, node, &ctx->ready_list, list) { if (!can_place_move(ctx, node)) continue;