mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-01 05:58:05 +02:00
broadcom/compiler: improve thrsw merge
Instead of stopping the merge process when we find an instruction with an incompatible signal (such as an small immediate), keep going and see if we can merge the thrsw in a previous instruction that is compatible. total instructions in shared programs: 13409835 -> 13356648 (-0.40%) instructions in affected programs: 3556860 -> 3503673 (-1.50%) helped: 17457 HURT: 18 Instructions are helped. total max-temps in shared programs: 2353971 -> 2352956 (-0.04%) max-temps in affected programs: 13960 -> 12945 (-7.27%) helped: 703 HURT: 0 Max-temps are helped. total spills in shared programs: 12301 -> 12301 (0.00%) total sfu-stalls in shared programs: 32596 -> 32499 (-0.30%) sfu-stalls in affected programs: 225 -> 128 (-43.11%) helped: 79 HURT: 3 Sfu-stalls are helped. total nops in shared programs: 347204 -> 325234 (-6.33%) nops in affected programs: 99834 -> 77864 (-22.01%) helped: 11515 HURT: 158 Nops are helped. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14172>
This commit is contained in:
parent
d4a4cd20d5
commit
2630c8f546
1 changed files with 38 additions and 7 deletions
|
|
@ -1763,26 +1763,57 @@ emit_thrsw(struct v3d_compile *c,
|
|||
|
||||
/* Find how far back into previous instructions we can put the THRSW. */
|
||||
int slots_filled = 0;
|
||||
int invalid_sig_count = 0;
|
||||
bool last_thrsw_after_invalid_ok = false;
|
||||
struct qinst *merge_inst = NULL;
|
||||
vir_for_each_inst_rev(prev_inst, block) {
|
||||
struct v3d_qpu_sig sig = prev_inst->qpu.sig;
|
||||
sig.thrsw = true;
|
||||
uint32_t packed_sig;
|
||||
|
||||
if (!v3d_qpu_sig_pack(c->devinfo, &sig, &packed_sig))
|
||||
break;
|
||||
|
||||
if (!valid_thrsw_sequence(c, scoreboard,
|
||||
prev_inst, slots_filled + 1,
|
||||
is_thrend)) {
|
||||
break;
|
||||
}
|
||||
|
||||
struct v3d_qpu_sig sig = prev_inst->qpu.sig;
|
||||
sig.thrsw = true;
|
||||
uint32_t packed_sig;
|
||||
if (!v3d_qpu_sig_pack(c->devinfo, &sig, &packed_sig)) {
|
||||
/* If we can't merge the thrsw here because of signal
|
||||
* incompatibility, keep going, we might be able to
|
||||
* merge it in an earlier instruction.
|
||||
*/
|
||||
invalid_sig_count++;
|
||||
goto cont_block;
|
||||
}
|
||||
|
||||
/* For last thrsw we need 2 consecutive slots that are
|
||||
* thrsw compatible, so if we have previously jumped over
|
||||
* an incompatible signal, flag that we have found the first
|
||||
* valid slot here and keep going.
|
||||
*/
|
||||
if (inst->is_last_thrsw && invalid_sig_count > 0 &&
|
||||
!last_thrsw_after_invalid_ok) {
|
||||
last_thrsw_after_invalid_ok = true;
|
||||
invalid_sig_count++;
|
||||
goto cont_block;
|
||||
}
|
||||
|
||||
last_thrsw_after_invalid_ok = false;
|
||||
invalid_sig_count = 0;
|
||||
merge_inst = prev_inst;
|
||||
|
||||
cont_block:
|
||||
if (++slots_filled == 3)
|
||||
break;
|
||||
}
|
||||
|
||||
/* If we jumped over a signal incompatibility and did not manage to
|
||||
* merge the thrsw in the end, we need to adjust slots filled to match
|
||||
* the last valid merge point.
|
||||
*/
|
||||
assert(invalid_sig_count == 0 || slots_filled >= invalid_sig_count);
|
||||
if (invalid_sig_count > 0)
|
||||
slots_filled -= invalid_sig_count;
|
||||
|
||||
bool needs_free = false;
|
||||
if (merge_inst) {
|
||||
merge_inst->qpu.sig.thrsw = true;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue