mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 02:28:10 +02:00
nir/from_ssa: Move the loop bounds check in resolve_parallel_copy
We loop, effectively, over two stacks: ready and to_do and finish only when both are empty. In the case where ready is empty, we pull one off of to_do, add a copy to a temporary, and push it onto the ready stack. Previously, we assumed that we would never get to the temporary copy case if to_do has exactly one entry because that would imply that there was only one copy left which means there can't possibly be a cycle to break. This was true untilc7fc44f9eb("nir/from_ssa: Respect and populate divergence information") which changed things such that temporary copies sometimes get added in the case where a convergent value is copied both to convergent and divergent destinations. This patch adjusts our loop iteration to always attempt to clear the ready stack before checking if there's anything left on the to_do stack. I also added an assert to make the exit condition more clear. Fixes:c7fc44f9eb("nir/from_ssa: Respect and populate divergence information") Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8037 Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21315> (cherry picked from commit4e09d37f3b)
This commit is contained in:
parent
8eb3e6d1a3
commit
07b9046128
2 changed files with 17 additions and 2 deletions
|
|
@ -4,7 +4,7 @@
|
|||
"description": "nir/from_ssa: Move the loop bounds check in resolve_parallel_copy",
|
||||
"nominated": true,
|
||||
"nomination_type": 1,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": "c7fc44f9ebbe93f7aefc010aea4e13e29d0d67fd"
|
||||
},
|
||||
|
|
|
|||
|
|
@ -767,7 +767,7 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy,
|
|||
ready[++ready_idx] = i;
|
||||
}
|
||||
|
||||
while (to_do_idx >= 0) {
|
||||
while (1) {
|
||||
while (ready_idx >= 0) {
|
||||
int b = ready[ready_idx--];
|
||||
int a = pred[b];
|
||||
|
|
@ -793,6 +793,11 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(ready_idx < 0);
|
||||
if (to_do_idx < 0)
|
||||
break;
|
||||
|
||||
int b = to_do[to_do_idx--];
|
||||
if (pred[b] == -1)
|
||||
continue;
|
||||
|
|
@ -805,6 +810,16 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy,
|
|||
* allocation, so we would rather not create extra register
|
||||
* dependencies for the backend to deal with. If it wants, the
|
||||
* backend can coalesce the (possibly multiple) temporaries.
|
||||
*
|
||||
* We can also get here in the case where there is no cycle but our
|
||||
* source value is convergent, is also used as a destination by another
|
||||
* element of the parallel copy, and all the destinations of the
|
||||
* parallel copy which copy from it are divergent. In this case, the
|
||||
* above loop cannot detect that the value has moved due to all the
|
||||
* divergent destinations and we'll end up emitting a copy to a
|
||||
* temporary which never gets used. We can avoid this with additional
|
||||
* tracking or we can just trust the back-end to dead-code the unused
|
||||
* temporary (which is trivial).
|
||||
*/
|
||||
assert(num_vals < num_copies * 2);
|
||||
nir_register *reg = nir_local_reg_create(state->builder.impl);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue