mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-02 20:58:04 +02:00
aco: only consider cost of memory loads at waitcnt
We don't run this code before waitcnt insertion, so this isn't necessary.
This change improves accuracy in these two situations, because the waitcnt
insertion pass is more aware of divergent control flow:
v0 = valu
if (divergent) {
v0 = vmem
} else {
use(v0)
}
v0 = vmem
if (divergent) {
wait vmcnt(0)
} else {
wait vmcnt(0)
}
use(v0)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38262>
This commit is contained in:
parent
bca5aab2be
commit
88b6b6db17
1 changed files with 1 additions and 8 deletions
|
|
@ -441,14 +441,7 @@ BlockCycleEstimator::add(aco_ptr<Instruction>& instr)
|
|||
mem_ops[i].push_back(cur_cycle + wait_info[i]);
|
||||
}
|
||||
|
||||
/* This is inaccurate but shouldn't affect anything after waitcnt insertion.
|
||||
* Before waitcnt insertion, this is necessary to consider memory operations.
|
||||
*/
|
||||
unsigned latency = 0;
|
||||
for (unsigned i = 0; i < wait_type_num; i++)
|
||||
latency = MAX2(latency, i == wait_type_vs ? 0 : wait_info[i]);
|
||||
int32_t result_available = start + MAX2(perf.latency, (int32_t)latency);
|
||||
|
||||
int32_t result_available = start + perf.latency;
|
||||
for (Definition& def : instr->definitions) {
|
||||
int32_t* available = ®_available[def.physReg().reg()];
|
||||
for (unsigned i = 0; i < def.size(); i++)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue