mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 02:40:11 +01:00
broadcom/compiler: try harder to merge thread switch earlier
We have been stopping as soon as we find a conflict but that doesn't mean we can't merge it in an earlier slot, so keep going. Going by shader-db, this sometimes allows us to merge the final thrsw a bit earlier and avoid emitting NOP instructions at the program end to make up for its delay slots. I have not observed cases where this helps with regular thrsw though, but it doesn't hurt to try with those too. total instructions in shared programs: 11526876 -> 11526354 (<.01%) instructions in affected programs: 10760 -> 10238 (-4.85%) helped: 236 HURT: 0 Instructions are helped. total max-temps in shared programs: 2231705 -> 2231677 (<.01%) max-temps in affected programs: 276 -> 248 (-10.14%) helped: 27 HURT: 0 Max-temps are helped. total inst-and-stalls in shared programs: 11545177 -> 11544655 (<.01%) inst-and-stalls in affected programs: 10777 -> 10255 (-4.84%) helped: 236 HURT: 0 Inst-and-stalls are helped. total nops in shared programs: 321624 -> 321152 (-0.15%) nops in affected programs: 751 -> 279 (-62.85%) helped: 236 HURT: 0 Nops are helped. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22679>
This commit is contained in:
parent
13f258fae3
commit
0468ce3791
1 changed files with 22 additions and 8 deletions
|
|
@ -1766,12 +1766,6 @@ valid_thrsw_sequence(struct v3d_compile *c, struct choose_scoreboard *scoreboard
|
|||
struct qinst *qinst, int instructions_in_sequence,
|
||||
bool is_thrend)
|
||||
{
|
||||
/* No emitting our thrsw while the previous thrsw hasn't happened yet. */
|
||||
if (scoreboard->last_thrsw_tick + 3 >
|
||||
scoreboard->tick - instructions_in_sequence) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int slot = 0; slot < instructions_in_sequence; slot++) {
|
||||
if (!qpu_inst_before_thrsw_valid_in_delay_slot(c, qinst, slot))
|
||||
return false;
|
||||
|
|
@ -1825,13 +1819,28 @@ emit_thrsw(struct v3d_compile *c,
|
|||
/* Find how far back into previous instructions we can put the THRSW. */
|
||||
int slots_filled = 0;
|
||||
int invalid_sig_count = 0;
|
||||
int invalid_seq_count = 0;
|
||||
bool last_thrsw_after_invalid_ok = false;
|
||||
struct qinst *merge_inst = NULL;
|
||||
vir_for_each_inst_rev(prev_inst, block) {
|
||||
/* No emitting our thrsw while the previous thrsw hasn't
|
||||
* happened yet.
|
||||
*/
|
||||
if (scoreboard->last_thrsw_tick + 3 >
|
||||
scoreboard->tick - (slots_filled + 1)) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
if (!valid_thrsw_sequence(c, scoreboard,
|
||||
prev_inst, slots_filled + 1,
|
||||
is_thrend)) {
|
||||
break;
|
||||
/* Even if the current sequence isn't valid, we may
|
||||
* be able to get a valid sequence by trying to move the
|
||||
* thrsw earlier, so keep going.
|
||||
*/
|
||||
invalid_seq_count++;
|
||||
goto cont_block;
|
||||
}
|
||||
|
||||
struct v3d_qpu_sig sig = prev_inst->qpu.sig;
|
||||
|
|
@ -1858,8 +1867,10 @@ emit_thrsw(struct v3d_compile *c,
|
|||
goto cont_block;
|
||||
}
|
||||
|
||||
/* We can merge the thrsw in this instruction */
|
||||
last_thrsw_after_invalid_ok = false;
|
||||
invalid_sig_count = 0;
|
||||
invalid_seq_count = 0;
|
||||
merge_inst = prev_inst;
|
||||
|
||||
cont_block:
|
||||
|
|
@ -1871,9 +1882,12 @@ cont_block:
|
|||
* merge the thrsw in the end, we need to adjust slots filled to match
|
||||
* the last valid merge point.
|
||||
*/
|
||||
assert(invalid_sig_count == 0 || slots_filled >= invalid_sig_count);
|
||||
assert((invalid_sig_count == 0 && invalid_seq_count == 0) ||
|
||||
slots_filled >= invalid_sig_count + invalid_seq_count);
|
||||
if (invalid_sig_count > 0)
|
||||
slots_filled -= invalid_sig_count;
|
||||
if (invalid_seq_count > 0)
|
||||
slots_filled -= invalid_seq_count;
|
||||
|
||||
bool needs_free = false;
|
||||
if (merge_inst) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue