From b8e9e83bbe6b2501576ce50243d6fc8da9e0d7e6 Mon Sep 17 00:00:00 2001 From: Dhruv Mark Collins Date: Thu, 16 Apr 2026 21:53:54 +0000 Subject: [PATCH] tu/query_pool: Avoid CP counter conflict with autotune With autotune allocating counters low-to-high, the conflict with PERFORMANCE_QUERY_KHR will happen if any CP-based counters are used. This is a temporary workaround which just drops the first two CP counters from being usable for performance queries. Cc: mesa-stable Signed-off-by: Dhruv Mark Collins Assisted-by: OpenAI Codex (GPT-5.4) (cherry picked from commit 78e2bbc70f55f1cf6ef922e2052b1ce6b879b952) Part-of: --- .pick_status.json | 2 +- src/freedreno/vulkan/tu_query_pool.cc | 38 ++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 066c0702417..747d7bd58c9 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -2104,7 +2104,7 @@ "description": "tu/query_pool: Avoid CP counter conflict with autotune", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/freedreno/vulkan/tu_query_pool.cc b/src/freedreno/vulkan/tu_query_pool.cc index 09ae7656dbd..3c65ff81a45 100644 --- a/src/freedreno/vulkan/tu_query_pool.cc +++ b/src/freedreno/vulkan/tu_query_pool.cc @@ -249,6 +249,21 @@ perfcntr_index(const struct fd_perfcntr_group *group, uint32_t group_count, assert(i < group_count); } +static uint32_t +perfcntr_reserved_counters(const struct fd_perfcntr_group *group) +{ + /* Keep raw perf queries off the CP slots reserved by autotune latency optimization. + * TODO: We need to do this in a more robust way. + */ + return strcmp(group->name, "CP") == 0 ? 2 : 0; +} + +static uint32_t +perfcntr_available_counters(const struct fd_perfcntr_group *group) +{ + return group->num_counters - MIN2(group->num_counters, perfcntr_reserved_counters(group)); +} + static int compare_perfcntr_pass(const void *a, const void *b) { @@ -360,15 +375,26 @@ tu_CreateQueryPool(VkDevice _device, perf_query->data[i].cid = cid; perf_query->data[i].app_idx = i; + const struct fd_perfcntr_group *group = &perf_query->perf_group[gid]; + uint32_t reserved_counters = perfcntr_reserved_counters(group); + uint32_t available_counters = perfcntr_available_counters(group); + + if (available_counters == 0) { + vk_query_pool_destroy(&device->vk, pAllocator, &pool->vk); + return vk_errorf(device, VK_ERROR_FEATURE_NOT_PRESENT, "No raw perf counters available in group %s", + group->name); + } + /* When a counter register is over the capacity(num_counters), * reset it for next pass. */ - if (regs[gid] < perf_query->perf_group[gid].num_counters) { - perf_query->data[i].cntr_reg = regs[gid]++; + if (regs[gid] < available_counters) { + perf_query->data[i].cntr_reg = reserved_counters + regs[gid]++; perf_query->data[i].pass = pass[gid]; } else { perf_query->data[i].pass = ++pass[gid]; - perf_query->data[i].cntr_reg = regs[gid] = 0; + perf_query->data[i].cntr_reg = reserved_counters; + regs[gid] = 0; regs[gid]++; } } @@ -2299,7 +2325,11 @@ tu_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR( } for (uint32_t i = 0; i < group_count; i++) { - n_passes = DIV_ROUND_UP(counters_requested[i], group[i].num_counters); + uint32_t available_counters = perfcntr_available_counters(&group[i]); + if (available_counters == 0) + continue; + + n_passes = DIV_ROUND_UP(counters_requested[i], available_counters); *pNumPasses = MAX2(*pNumPasses, n_passes); } } else {