diff --git a/.pick_status.json b/.pick_status.json index 19e1328bc40..9adb3eebae7 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -4,7 +4,7 @@ "description": "nir/from_ssa: Move the loop bounds check in resolve_parallel_copy", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "c7fc44f9ebbe93f7aefc010aea4e13e29d0d67fd" }, diff --git a/src/compiler/nir/nir_from_ssa.c b/src/compiler/nir/nir_from_ssa.c index 01266516967..8f2fe62e70c 100644 --- a/src/compiler/nir/nir_from_ssa.c +++ b/src/compiler/nir/nir_from_ssa.c @@ -767,7 +767,7 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy, ready[++ready_idx] = i; } - while (to_do_idx >= 0) { + while (1) { while (ready_idx >= 0) { int b = ready[ready_idx--]; int a = pred[b]; @@ -793,6 +793,11 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy, } } } + + assert(ready_idx < 0); + if (to_do_idx < 0) + break; + int b = to_do[to_do_idx--]; if (pred[b] == -1) continue; @@ -805,6 +810,16 @@ resolve_parallel_copy(nir_parallel_copy_instr *pcopy, * allocation, so we would rather not create extra register * dependencies for the backend to deal with. If it wants, the * backend can coalesce the (possibly multiple) temporaries. + * + * We can also get here in the case where there is no cycle but our + * source value is convergent, is also used as a destination by another + * element of the parallel copy, and all the destinations of the + * parallel copy which copy from it are divergent. In this case, the + * above loop cannot detect that the value has moved due to all the + * divergent destinations and we'll end up emitting a copy to a + * temporary which never gets used. We can avoid this with additional + * tracking or we can just trust the back-end to dead-code the unused + * temporary (which is trivial). */ assert(num_vals < num_copies * 2); nir_register *reg = nir_local_reg_create(state->builder.impl);