Merge branch 'turnip/feature/disable-cb-by-default' into 'main'

tu: Disable concurrent binning by default due to perf regressions

See merge request mesa/mesa!41394
This commit is contained in:
Danylo Piliaiev 2026-05-08 00:12:39 +00:00
commit 3b19fe5ef7
7 changed files with 27 additions and 4 deletions

View file

@ -31,6 +31,18 @@ tests_per_group = 10000
[deqp.env]
TU_DEBUG = "gmem,unaligned_store"
# force-gmem with concurrent binning allowed to test concurrent binning
[[deqp]]
deqp = "/deqp-vk/external/vulkancts/modules/vulkan/deqp-vk"
caselists = ["/deqp-vk/mustpass/vk-main.txt"]
include = ["dEQP-VK.renderpass2.*"]
prefix = "gmem-cb-"
fraction = 20
tests_per_group = 10000
[deqp.env]
TU_DEBUG = "gmem"
tu_allow_concurrent_binning = "true"
# force-sysmem testing
[[deqp]]
deqp = "/deqp-vk/external/vulkancts/modules/vulkan/deqp-vk"

View file

@ -343,6 +343,7 @@ a750-vk:
variables:
CI_TRON_TIMEOUT__OVERALL__MINUTES: 16
DEQP_SUITE: freedreno-a750-vk
DEQP_FRACTION: 2
# A750 VK traces are disabled because they time out -- we have a lot of:
# [ 465.720843] adreno 3d00000.gpu: CP | protected mode error | WRITE | addr=0x0000930A | status=0x0060930A

View file

@ -489,7 +489,7 @@ tu_emit_cache_flush(struct tu_cmd_buffer *cmd_buffer)
if ((flushes & TU_CMD_FLAG_WAIT_FOR_BR) && CHIP >= A7XX &&
!(cmd_buffer->state.pass && cmd_buffer->state.renderpass_cb_disabled) &&
!TU_DEBUG(NO_CONCURRENT_BINNING)) {
cmd_buffer->device->instance->allow_concurrent_binning) {
trace_start_concurrent_binning_barrier(&cmd_buffer->trace, cs, cmd_buffer);
/* Wait-for-BR when repeated a lot of times per frame can add up
@ -3109,8 +3109,8 @@ tu7_emit_concurrent_binning_start(struct tu_cmd_buffer *cmd,
tu7_cb_disable_reason(
(!cmd->state.lrz.fast_clear && cmd->state.lrz.image_view), cmd,
"LRZ fast clear disabled") ||
tu7_cb_disable_reason(TU_DEBUG(NO_CONCURRENT_BINNING), cmd,
"TU_DEBUG(NO_CONCURRENT_BINNING)")) {
tu7_cb_disable_reason(!cmd->device->instance->allow_concurrent_binning, cmd,
"globally disabled")) {
tu_cs_emit_pkt7(cs, CP_THREAD_CONTROL, 1);
tu_cs_emit(cs, CP_THREAD_CONTROL_0_THREAD(CP_SET_THREAD_BR) |
CP_THREAD_CONTROL_0_CONCURRENT_BIN_DISABLE);

View file

@ -1830,6 +1830,7 @@ static const driOptionDescription tu_dri_options[] = {
DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
DRI_CONF_VK_XWAYLAND_WAIT_READY(false)
DRI_CONF_TU_ALLOW_CONCURRENT_BINNING(false)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_DEBUG
@ -1884,6 +1885,9 @@ tu_init_dri_options(struct tu_instance *instance)
driQueryOptionb(&instance->dri_options, "tu_emulate_alpha_to_coverage");
instance->autotune_algo =
driQueryOptionstr(&instance->dri_options, "tu_autotune_algorithm");
instance->allow_concurrent_binning =
(driQueryOptionb(&instance->dri_options, "tu_allow_concurrent_binning") && !TU_DEBUG(NO_CONCURRENT_BINNING)) ||
TU_DEBUG(FORCE_CONCURRENT_BINNING);
}
static uint32_t instance_count = 0;

View file

@ -240,6 +240,8 @@ struct tu_instance
/* Configuration option to use a specific autotune algorithm by default. */
const char *autotune_algo;
bool allow_concurrent_binning;
};
VK_DEFINE_HANDLE_CASTS(tu_instance, vk.base, VkInstance,
VK_OBJECT_TYPE_INSTANCE)

View file

@ -182,7 +182,7 @@ resolve_vis_stream_patchpoints(struct tu_queue *queue,
* streams and therefore should be avoided.
*/
uint32_t min_vis_stream_count =
(TU_DEBUG(NO_CONCURRENT_BINNING) || dev->physical_device->info->chip < 7) ?
(!dev->instance->allow_concurrent_binning || dev->physical_device->info->chip < 7) ?
1 : MIN2(MAX2(rp_count, 1), TU_MAX_VIS_STREAMS);
uint32_t vis_stream_count;

View file

@ -672,6 +672,10 @@
DRI_CONF_OPT_S_NODEF(tu_autotune_algorithm, \
"Set the preferred autotune algorithm")
#define DRI_CONF_TU_ALLOW_CONCURRENT_BINNING(def) \
DRI_CONF_OPT_B(tu_allow_concurrent_binning, def, \
"Allow concurrent binning on A7XX+, the CB is disabled by default because it regresses performance on desktop games")
/**
* \brief Honeykrisp specific configuration options
*/