From 3b3ae477f3fe9cff421bb11f272cbc099c7ede56 Mon Sep 17 00:00:00 2001 From: Dhruv Mark Collins Date: Thu, 9 Oct 2025 13:56:56 +0000 Subject: [PATCH] tu/autotune: Add render mode locking to PROFILED algorithm There are certain scenarios where even switching to another render mode has significant negative implications for performance even when done for a single invocation. Now we try to heuristically pick out these cases and lock them into the optimal mode, at the moment the heuristic is fairly conservative but it manages to lock RPs in under a minute in most cases. Signed-off-by: Dhruv Mark Collins Part-of: --- src/freedreno/vulkan/tu_autotune.cc | 49 +++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/src/freedreno/vulkan/tu_autotune.cc b/src/freedreno/vulkan/tu_autotune.cc index f3b60c4bc5c..77cdce10468 100644 --- a/src/freedreno/vulkan/tu_autotune.cc +++ b/src/freedreno/vulkan/tu_autotune.cc @@ -1056,6 +1056,7 @@ struct tu_autotune::rp_history { std::atomic sysmem_probability = PROBABILITY_MID; bool should_reset = false; /* If true, will reset sysmem_probability before next update. */ + bool locked = false; /* If true, the probability will no longer be updated. */ uint64_t seed[2] { 0x3bffb83978e24f88, 0x9238d5d56c71cd35 }; public: @@ -1066,6 +1067,9 @@ struct tu_autotune::rp_history { void update(rp_history &history, bool immediate) { + if (locked) + return; + auto &sysmem_ema = history.sysmem_rp_average; auto &gmem_ema = history.gmem_rp_average; uint32_t sysmem_prob = sysmem_probability.load(std::memory_order_relaxed); @@ -1075,15 +1079,13 @@ struct tu_autotune::rp_history { * scenario for autotune performance, since we know the optimal decisions. */ - if (sysmem_prob == 0 || sysmem_prob == 100) - return; /* Already resolved, no further updates are necessary. */ - if (sysmem_ema.count < 1) { sysmem_prob = PROBABILITY_MAX; } else if (gmem_ema.count < 1) { sysmem_prob = 0; } else { sysmem_prob = gmem_ema.get() < sysmem_ema.get() ? 0 : PROBABILITY_MAX; + locked = true; } } else { if (sysmem_ema.count < MIN_PROFILE_DURATION_COUNT || gmem_ema.count < MIN_PROFILE_DURATION_COUNT) { @@ -1097,14 +1099,41 @@ struct tu_autotune::rp_history { } /* Adjust probability based on timing results. */ - constexpr uint32_t STEP_DELTA = 5, MIN_PROBABILITY = 5, MAX_PROBABILITY = 95; + constexpr uint32_t STEP_DELTA = 5; /* 5% */ + constexpr uint32_t MIN_PROB = 5, MAX_PROB = 95; uint64_t avg_sysmem = sysmem_ema.get(); uint64_t avg_gmem = gmem_ema.get(); - if (avg_gmem < avg_sysmem && sysmem_prob > MIN_PROBABILITY) { - sysmem_prob = MAX2(sysmem_prob - STEP_DELTA, MIN_PROBABILITY); - } else if (avg_sysmem < avg_gmem && sysmem_prob < MAX_PROBABILITY) { - sysmem_prob = MIN2(sysmem_prob + STEP_DELTA, MAX_PROBABILITY); + + if (avg_gmem < avg_sysmem && sysmem_prob > MIN_PROB) { + sysmem_prob = MAX2(sysmem_prob - STEP_DELTA, MIN_PROB); + } else if (avg_sysmem < avg_gmem && sysmem_prob < MAX_PROB) { + sysmem_prob = MIN2(sysmem_prob + STEP_DELTA, MAX_PROB); + } + + /* If the RP duration exceeds a certain minimum duration threshold (i.e. has a large impact on frametime) + * and the percentage difference between the modes is large enough, we lock into the optimal mode. This + * avoids performance hazards from switching to an extremely suboptimal mode even if done very rarely. + * Note: Due to the potentially huge negative impact of a bad lock, this is a very conservative check. + */ + constexpr uint32_t MIN_LOCK_DURATION_COUNT = 15; + constexpr uint64_t MIN_LOCK_THRESHOLD = GPU_TICKS_PER_US * 1'000; /* 1ms */ + constexpr uint32_t LOCK_PERCENT_DIFF = 30; + + bool has_resolved = sysmem_prob == MAX_PROB || sysmem_prob == MIN_PROB; + bool enough_samples = + sysmem_ema.count >= MIN_LOCK_DURATION_COUNT && gmem_ema.count >= MIN_LOCK_DURATION_COUNT; + uint64_t min_avg = MIN2(avg_sysmem, avg_gmem); + uint64_t max_avg = MAX2(avg_sysmem, avg_gmem); + uint64_t percent_diff = (100 * (max_avg - min_avg)) / min_avg; + + if (has_resolved && enough_samples && max_avg >= MIN_LOCK_THRESHOLD && + percent_diff >= LOCK_PERCENT_DIFF) { + if (avg_gmem < avg_sysmem) + sysmem_prob = 0; + else + sysmem_prob = 100; + locked = true; } } } @@ -1112,9 +1141,9 @@ struct tu_autotune::rp_history { sysmem_probability.store(sysmem_prob, std::memory_order_relaxed); at_log_profiled_h("update%s avg_gmem: %" PRIu64 " us (%" PRIu64 " samples) avg_sysmem: %" PRIu64 - " us (%" PRIu64 " samples) = sysmem_probability: %" PRIu32, + " us (%" PRIu64 " samples) = sysmem_probability: %" PRIu32 " locked: %u", history.hash, immediate ? "-imm" : "", ticks_to_us(gmem_ema.get()), gmem_ema.count, - ticks_to_us(sysmem_ema.get()), sysmem_ema.count, sysmem_prob); + ticks_to_us(sysmem_ema.get()), sysmem_ema.count, sysmem_prob, locked); } public: