freedreno/ir3: Reduce choose_instr_dec() and _inc() overhead.

If you didn't have a freed+ready instruction, you'd redo the live_effect
and check_instr() logic multiple times per instr.  Replace the multiple
loops in each function with a ranking that I think is more readable,
reducing the overhead in the process.

debugoptimized dEQP-GLES31.functional.ubo.random.all_per_block_buffers.20
runtime goes from ~3.5s -> ~3.0s on my lazor.  No shader-db change.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11855>
This commit is contained in:
Emma Anholt 2021-07-13 13:39:36 -07:00 committed by Marge Bot
parent 56dd1adf3b
commit bda26dfcfc

View file

@ -593,6 +593,30 @@ static struct ir3_sched_node *choose_instr_inc(struct ir3_sched_ctx *ctx,
struct ir3_sched_notes *notes,
bool defer, bool avoid_output);
enum choose_instr_dec_rank {
DEC_NEUTRAL,
DEC_NEUTRAL_READY,
DEC_FREED,
DEC_FREED_READY,
};
static const char *
dec_rank_name(enum choose_instr_dec_rank rank)
{
switch (rank) {
case DEC_NEUTRAL:
return "neutral";
case DEC_NEUTRAL_READY:
return "neutral+ready";
case DEC_FREED:
return "freed";
case DEC_FREED_READY:
return "freed+ready";
default:
return NULL;
}
}
/**
* Chooses an instruction to schedule using the Goodman/Hsu (1988) CSR (Code
* Scheduling for Register pressure) heuristic.
@ -606,8 +630,8 @@ choose_instr_dec(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
{
const char *mode = defer ? "-d" : "";
struct ir3_sched_node *chosen = NULL;
enum choose_instr_dec_rank chosen_rank = DEC_NEUTRAL;
/* Find a ready inst with regs freed and pick the one with max cost. */
foreach_sched_node (n, &ctx->dag->heads) {
if (defer && should_defer(ctx, n->instr))
continue;
@ -615,99 +639,72 @@ choose_instr_dec(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
/* Note: mergedregs is only used post-RA, just set it to false */
unsigned d = ir3_delay_calc_prera(ctx->block, n->instr);
if (d > 0)
continue;
if (live_effect(n->instr) > -1)
int live = live_effect(n->instr);
if (live > 0)
continue;
if (!check_instr(ctx, notes, n->instr))
continue;
if (!chosen || chosen->max_delay < n->max_delay) {
enum choose_instr_dec_rank rank;
if (live < 0) {
/* Prioritize instrs which free up regs and can be scheduled with no
* delay.
*/
if (d == 0)
rank = DEC_FREED_READY;
else
rank = DEC_FREED;
} else {
/* Contra the paper, pick a leader with no effect on used regs. This
* may open up new opportunities, as otherwise a single-operand instr
* consuming a value will tend to block finding freeing that value.
* This had a massive effect on reducing spilling on V3D.
*
* XXX: Should this prioritize ready?
*/
if (d == 0)
rank = DEC_NEUTRAL_READY;
else
rank = DEC_NEUTRAL;
}
/* Prefer higher-ranked instructions, or in the case of a rank tie, the
* highest latency-to-end-of-program instruction.
*/
if (!chosen || rank > chosen_rank ||
(rank == chosen_rank && chosen->max_delay < n->max_delay)) {
chosen = n;
chosen_rank = rank;
}
}
if (chosen) {
di(chosen->instr, "dec%s: chose (freed+ready)", mode);
return chosen;
}
/* Find a leader with regs freed and pick the one with max cost. */
foreach_sched_node (n, &ctx->dag->heads) {
if (defer && should_defer(ctx, n->instr))
continue;
if (live_effect(n->instr) > -1)
continue;
if (!check_instr(ctx, notes, n->instr))
continue;
if (!chosen || chosen->max_delay < n->max_delay) {
chosen = n;
}
}
if (chosen) {
di(chosen->instr, "dec%s: chose (freed)", mode);
return chosen;
}
/* Contra the paper, pick a leader with no effect on used regs. This may
* open up new opportunities, as otherwise a single-operand instr consuming
* a value will tend to block finding freeing that value. This had a
* massive effect on reducing spilling on V3D.
*
* XXX: Should this prioritize ready?
*/
foreach_sched_node (n, &ctx->dag->heads) {
if (defer && should_defer(ctx, n->instr))
continue;
unsigned d = ir3_delay_calc_prera(ctx->block, n->instr);
if (d > 0)
continue;
if (live_effect(n->instr) > 0)
continue;
if (!check_instr(ctx, notes, n->instr))
continue;
if (!chosen || chosen->max_delay < n->max_delay)
chosen = n;
}
if (chosen) {
di(chosen->instr, "dec%s: chose (neutral+ready)", mode);
return chosen;
}
foreach_sched_node (n, &ctx->dag->heads) {
if (defer && should_defer(ctx, n->instr))
continue;
if (live_effect(n->instr) > 0)
continue;
if (!check_instr(ctx, notes, n->instr))
continue;
if (!chosen || chosen->max_delay < n->max_delay)
chosen = n;
}
if (chosen) {
di(chosen->instr, "dec%s: chose (neutral)", mode);
di(chosen->instr, "dec%s: chose (%s)", mode, dec_rank_name(chosen_rank));
return chosen;
}
return choose_instr_inc(ctx, notes, defer, true);
}
enum choose_instr_inc_rank {
INC_DISTANCE,
INC_DISTANCE_READY,
};
static const char *
inc_rank_name(enum choose_instr_inc_rank rank)
{
switch (rank) {
case INC_DISTANCE:
return "distance";
case INC_DISTANCE_READY:
return "distance+ready";
default:
return NULL;
}
}
/**
* When we can't choose an instruction that reduces register pressure or
* is neutral, we end up here to try and pick the least bad option.
@ -718,6 +715,7 @@ choose_instr_inc(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
{
const char *mode = defer ? "-d" : "";
struct ir3_sched_node *chosen = NULL;
enum choose_instr_inc_rank chosen_rank = INC_DISTANCE;
/*
* From hear on out, we are picking something that increases
@ -734,48 +732,29 @@ choose_instr_inc(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
if (defer && should_defer(ctx, n->instr))
continue;
if (!check_instr(ctx, notes, n->instr))
continue;
unsigned d = ir3_delay_calc_prera(ctx->block, n->instr);
if (d > 0)
continue;
if (!check_instr(ctx, notes, n->instr))
continue;
enum choose_instr_inc_rank rank;
if (d == 0)
rank = INC_DISTANCE_READY;
else
rank = INC_DISTANCE;
unsigned distance = nearest_use(n->instr);
if (!chosen || distance < chosen_distance) {
if (!chosen || rank > chosen_rank ||
(rank == chosen_rank && distance < chosen_distance)) {
chosen = n;
chosen_distance = distance;
chosen_rank = rank;
}
}
if (chosen) {
di(chosen->instr, "inc%s: chose (distance+ready)", mode);
return chosen;
}
/* Pick the max delay of the remaining leaders. */
foreach_sched_node (n, &ctx->dag->heads) {
if (avoid_output && n->output)
continue;
if (defer && should_defer(ctx, n->instr))
continue;
if (!check_instr(ctx, notes, n->instr))
continue;
unsigned distance = nearest_use(n->instr);
if (!chosen || distance < chosen_distance) {
chosen = n;
chosen_distance = distance;
}
}
if (chosen) {
di(chosen->instr, "inc%s: chose (distance)", mode);
di(chosen->instr, "inc%s: chose (%s)", mode, inc_rank_name(chosen_rank));
return chosen;
}